{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5197817566349329, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.2994543915873322e-05, "grad_norm": 0.7394225001335144, "learning_rate": 1.25e-05, "loss": 2.0735, "step": 1 }, { "epoch": 2.5989087831746645e-05, "grad_norm": 1.3883064985275269, "learning_rate": 2.5e-05, "loss": 2.7196, "step": 2 }, { "epoch": 3.8983631747619965e-05, "grad_norm": 1.988787055015564, "learning_rate": 3.7500000000000003e-05, "loss": 3.517, "step": 3 }, { "epoch": 5.197817566349329e-05, "grad_norm": 0.7598587274551392, "learning_rate": 5e-05, "loss": 2.0302, "step": 4 }, { "epoch": 6.497271957936662e-05, "grad_norm": 0.9281490445137024, "learning_rate": 6.25e-05, "loss": 2.5406, "step": 5 }, { "epoch": 7.796726349523993e-05, "grad_norm": 1.0694752931594849, "learning_rate": 7.500000000000001e-05, "loss": 2.7612, "step": 6 }, { "epoch": 9.096180741111325e-05, "grad_norm": 0.8068076968193054, "learning_rate": 8.75e-05, "loss": 2.6643, "step": 7 }, { "epoch": 0.00010395635132698658, "grad_norm": 2.041363000869751, "learning_rate": 0.0001, "loss": 3.1038, "step": 8 }, { "epoch": 0.0001169508952428599, "grad_norm": 1.9163001775741577, "learning_rate": 0.00011250000000000001, "loss": 2.7784, "step": 9 }, { "epoch": 0.00012994543915873324, "grad_norm": 0.7601706385612488, "learning_rate": 0.000125, "loss": 2.0801, "step": 10 }, { "epoch": 0.00014293998307460655, "grad_norm": 0.6098636388778687, "learning_rate": 0.0001375, "loss": 1.8692, "step": 11 }, { "epoch": 0.00015593452699047986, "grad_norm": 2.0739760398864746, "learning_rate": 0.00015000000000000001, "loss": 2.4924, "step": 12 }, { "epoch": 0.0001689290709063532, "grad_norm": 0.5784039497375488, "learning_rate": 0.00016250000000000002, "loss": 1.8504, "step": 13 }, { "epoch": 0.0001819236148222265, "grad_norm": 0.9400786757469177, "learning_rate": 0.000175, "loss": 1.9691, "step": 14 }, { "epoch": 0.00019491815873809985, "grad_norm": 0.9181386232376099, "learning_rate": 0.0001875, "loss": 1.9537, "step": 15 }, { "epoch": 0.00020791270265397316, "grad_norm": 0.8861708641052246, "learning_rate": 0.0002, "loss": 2.1486, "step": 16 }, { "epoch": 0.0002209072465698465, "grad_norm": 1.134639859199524, "learning_rate": 0.0001999974005380886, "loss": 2.1537, "step": 17 }, { "epoch": 0.0002339017904857198, "grad_norm": 0.813442587852478, "learning_rate": 0.00019999480107617726, "loss": 2.0054, "step": 18 }, { "epoch": 0.00024689633440159314, "grad_norm": 0.8912744522094727, "learning_rate": 0.00019999220161426586, "loss": 1.8618, "step": 19 }, { "epoch": 0.0002598908783174665, "grad_norm": 0.5203714966773987, "learning_rate": 0.00019998960215235448, "loss": 1.6886, "step": 20 }, { "epoch": 0.00027288542223333976, "grad_norm": 0.6847788691520691, "learning_rate": 0.00019998700269044308, "loss": 1.8225, "step": 21 }, { "epoch": 0.0002858799661492131, "grad_norm": 0.5748642683029175, "learning_rate": 0.0001999844032285317, "loss": 1.9526, "step": 22 }, { "epoch": 0.00029887451006508644, "grad_norm": 0.501183271408081, "learning_rate": 0.00019998180376662033, "loss": 1.7452, "step": 23 }, { "epoch": 0.0003118690539809597, "grad_norm": 0.703778088092804, "learning_rate": 0.00019997920430470892, "loss": 1.8431, "step": 24 }, { "epoch": 0.00032486359789683306, "grad_norm": 0.5046797394752502, "learning_rate": 0.00019997660484279755, "loss": 1.8238, "step": 25 }, { "epoch": 0.0003378581418127064, "grad_norm": 0.7188125848770142, "learning_rate": 0.00019997400538088617, "loss": 1.9455, "step": 26 }, { "epoch": 0.00035085268572857973, "grad_norm": 0.5912610292434692, "learning_rate": 0.0001999714059189748, "loss": 1.6048, "step": 27 }, { "epoch": 0.000363847229644453, "grad_norm": 0.588297426700592, "learning_rate": 0.0001999688064570634, "loss": 1.809, "step": 28 }, { "epoch": 0.00037684177356032636, "grad_norm": 0.7199362516403198, "learning_rate": 0.000199966206995152, "loss": 1.5913, "step": 29 }, { "epoch": 0.0003898363174761997, "grad_norm": 0.5308181643486023, "learning_rate": 0.00019996360753324064, "loss": 1.7181, "step": 30 }, { "epoch": 0.000402830861392073, "grad_norm": 0.7171155214309692, "learning_rate": 0.00019996100807132924, "loss": 1.8663, "step": 31 }, { "epoch": 0.0004158254053079463, "grad_norm": 0.8405770659446716, "learning_rate": 0.00019995840860941787, "loss": 1.8455, "step": 32 }, { "epoch": 0.00042881994922381965, "grad_norm": 1.0783236026763916, "learning_rate": 0.00019995580914750646, "loss": 1.9135, "step": 33 }, { "epoch": 0.000441814493139693, "grad_norm": 0.5558031797409058, "learning_rate": 0.0001999532096855951, "loss": 1.611, "step": 34 }, { "epoch": 0.00045480903705556627, "grad_norm": 0.643465518951416, "learning_rate": 0.0001999506102236837, "loss": 1.6633, "step": 35 }, { "epoch": 0.0004678035809714396, "grad_norm": 0.5131320953369141, "learning_rate": 0.0001999480107617723, "loss": 1.4666, "step": 36 }, { "epoch": 0.00048079812488731295, "grad_norm": 0.7401881814002991, "learning_rate": 0.00019994541129986093, "loss": 1.5295, "step": 37 }, { "epoch": 0.0004937926688031863, "grad_norm": 0.5300176739692688, "learning_rate": 0.00019994281183794956, "loss": 1.7598, "step": 38 }, { "epoch": 0.0005067872127190596, "grad_norm": 0.7013173699378967, "learning_rate": 0.00019994021237603818, "loss": 1.9747, "step": 39 }, { "epoch": 0.000519781756634933, "grad_norm": 0.5502619743347168, "learning_rate": 0.00019993761291412678, "loss": 1.7822, "step": 40 }, { "epoch": 0.0005327763005508062, "grad_norm": 0.7401593923568726, "learning_rate": 0.0001999350134522154, "loss": 1.7382, "step": 41 }, { "epoch": 0.0005457708444666795, "grad_norm": 0.8741652965545654, "learning_rate": 0.00019993241399030403, "loss": 1.5306, "step": 42 }, { "epoch": 0.0005587653883825529, "grad_norm": 1.000598430633545, "learning_rate": 0.00019992981452839263, "loss": 1.5486, "step": 43 }, { "epoch": 0.0005717599322984262, "grad_norm": 0.9379377961158752, "learning_rate": 0.00019992721506648125, "loss": 1.8125, "step": 44 }, { "epoch": 0.0005847544762142995, "grad_norm": 1.193866491317749, "learning_rate": 0.00019992461560456985, "loss": 1.7332, "step": 45 }, { "epoch": 0.0005977490201301729, "grad_norm": 1.287405252456665, "learning_rate": 0.00019992201614265847, "loss": 1.9023, "step": 46 }, { "epoch": 0.0006107435640460462, "grad_norm": 0.5383558869361877, "learning_rate": 0.0001999194166807471, "loss": 1.5893, "step": 47 }, { "epoch": 0.0006237381079619194, "grad_norm": 0.7014543414115906, "learning_rate": 0.0001999168172188357, "loss": 1.6903, "step": 48 }, { "epoch": 0.0006367326518777928, "grad_norm": 0.3466140031814575, "learning_rate": 0.00019991421775692432, "loss": 1.5747, "step": 49 }, { "epoch": 0.0006497271957936661, "grad_norm": 0.39199456572532654, "learning_rate": 0.00019991161829501294, "loss": 1.5924, "step": 50 }, { "epoch": 0.0006627217397095395, "grad_norm": 0.6264086365699768, "learning_rate": 0.00019990901883310157, "loss": 1.7289, "step": 51 }, { "epoch": 0.0006757162836254128, "grad_norm": 0.774596631526947, "learning_rate": 0.00019990641937119017, "loss": 1.6478, "step": 52 }, { "epoch": 0.0006887108275412861, "grad_norm": 1.0306848287582397, "learning_rate": 0.0001999038199092788, "loss": 1.8524, "step": 53 }, { "epoch": 0.0007017053714571595, "grad_norm": 0.94454026222229, "learning_rate": 0.00019990122044736741, "loss": 1.401, "step": 54 }, { "epoch": 0.0007146999153730328, "grad_norm": 1.3383725881576538, "learning_rate": 0.000199898620985456, "loss": 1.6518, "step": 55 }, { "epoch": 0.000727694459288906, "grad_norm": 0.3218197226524353, "learning_rate": 0.00019989602152354464, "loss": 1.4379, "step": 56 }, { "epoch": 0.0007406890032047794, "grad_norm": 0.6366977691650391, "learning_rate": 0.00019989342206163326, "loss": 1.6121, "step": 57 }, { "epoch": 0.0007536835471206527, "grad_norm": 0.40514591336250305, "learning_rate": 0.00019989082259972186, "loss": 1.808, "step": 58 }, { "epoch": 0.000766678091036526, "grad_norm": 0.4810250699520111, "learning_rate": 0.00019988822313781048, "loss": 1.5069, "step": 59 }, { "epoch": 0.0007796726349523994, "grad_norm": 0.33548909425735474, "learning_rate": 0.00019988562367589908, "loss": 1.3861, "step": 60 }, { "epoch": 0.0007926671788682727, "grad_norm": 0.37393638491630554, "learning_rate": 0.00019988302421398773, "loss": 1.4715, "step": 61 }, { "epoch": 0.000805661722784146, "grad_norm": 0.5419512987136841, "learning_rate": 0.00019988042475207633, "loss": 1.5614, "step": 62 }, { "epoch": 0.0008186562667000193, "grad_norm": 0.6855679154396057, "learning_rate": 0.00019987782529016495, "loss": 1.575, "step": 63 }, { "epoch": 0.0008316508106158926, "grad_norm": 0.4336775541305542, "learning_rate": 0.00019987522582825355, "loss": 1.5964, "step": 64 }, { "epoch": 0.000844645354531766, "grad_norm": 0.36566340923309326, "learning_rate": 0.00019987262636634218, "loss": 1.5594, "step": 65 }, { "epoch": 0.0008576398984476393, "grad_norm": 0.36470988392829895, "learning_rate": 0.0001998700269044308, "loss": 1.5931, "step": 66 }, { "epoch": 0.0008706344423635126, "grad_norm": 0.3844112455844879, "learning_rate": 0.0001998674274425194, "loss": 1.5849, "step": 67 }, { "epoch": 0.000883628986279386, "grad_norm": 0.44522514939308167, "learning_rate": 0.00019986482798060802, "loss": 1.4193, "step": 68 }, { "epoch": 0.0008966235301952593, "grad_norm": 0.2723512351512909, "learning_rate": 0.00019986222851869665, "loss": 1.4587, "step": 69 }, { "epoch": 0.0009096180741111325, "grad_norm": 0.4704466760158539, "learning_rate": 0.00019985962905678527, "loss": 1.4766, "step": 70 }, { "epoch": 0.0009226126180270059, "grad_norm": 0.4864993691444397, "learning_rate": 0.00019985702959487387, "loss": 1.7165, "step": 71 }, { "epoch": 0.0009356071619428792, "grad_norm": 0.4624054431915283, "learning_rate": 0.00019985443013296247, "loss": 1.5708, "step": 72 }, { "epoch": 0.0009486017058587525, "grad_norm": 0.4995464086532593, "learning_rate": 0.00019985183067105112, "loss": 1.6012, "step": 73 }, { "epoch": 0.0009615962497746259, "grad_norm": 0.3099346160888672, "learning_rate": 0.00019984923120913971, "loss": 1.5089, "step": 74 }, { "epoch": 0.0009745907936904992, "grad_norm": 0.5344899296760559, "learning_rate": 0.00019984663174722834, "loss": 1.4251, "step": 75 }, { "epoch": 0.0009875853376063726, "grad_norm": 0.42957183718681335, "learning_rate": 0.00019984403228531694, "loss": 1.7298, "step": 76 }, { "epoch": 0.0010005798815222457, "grad_norm": 0.31365153193473816, "learning_rate": 0.00019984143282340556, "loss": 1.3713, "step": 77 }, { "epoch": 0.0010135744254381191, "grad_norm": 0.2500200867652893, "learning_rate": 0.00019983883336149419, "loss": 1.4603, "step": 78 }, { "epoch": 0.0010265689693539925, "grad_norm": 0.4030626118183136, "learning_rate": 0.00019983623389958278, "loss": 1.6264, "step": 79 }, { "epoch": 0.001039563513269866, "grad_norm": 0.5635514259338379, "learning_rate": 0.0001998336344376714, "loss": 1.7023, "step": 80 }, { "epoch": 0.001052558057185739, "grad_norm": 0.3587415814399719, "learning_rate": 0.00019983103497576003, "loss": 1.4858, "step": 81 }, { "epoch": 0.0010655526011016125, "grad_norm": 0.5135048031806946, "learning_rate": 0.00019982843551384866, "loss": 1.6953, "step": 82 }, { "epoch": 0.0010785471450174859, "grad_norm": 0.5444433093070984, "learning_rate": 0.00019982583605193725, "loss": 1.5036, "step": 83 }, { "epoch": 0.001091541688933359, "grad_norm": 0.36568179726600647, "learning_rate": 0.00019982323659002585, "loss": 1.6189, "step": 84 }, { "epoch": 0.0011045362328492324, "grad_norm": 0.36495357751846313, "learning_rate": 0.0001998206371281145, "loss": 1.5315, "step": 85 }, { "epoch": 0.0011175307767651058, "grad_norm": 0.42951181530952454, "learning_rate": 0.0001998180376662031, "loss": 1.5117, "step": 86 }, { "epoch": 0.001130525320680979, "grad_norm": 0.3365992605686188, "learning_rate": 0.00019981543820429172, "loss": 1.2442, "step": 87 }, { "epoch": 0.0011435198645968524, "grad_norm": 0.4303477704524994, "learning_rate": 0.00019981283874238032, "loss": 1.4972, "step": 88 }, { "epoch": 0.0011565144085127258, "grad_norm": 0.5333189964294434, "learning_rate": 0.00019981023928046895, "loss": 1.6606, "step": 89 }, { "epoch": 0.001169508952428599, "grad_norm": 0.45519933104515076, "learning_rate": 0.00019980763981855757, "loss": 1.2952, "step": 90 }, { "epoch": 0.0011825034963444724, "grad_norm": 0.31117764115333557, "learning_rate": 0.00019980504035664617, "loss": 1.3897, "step": 91 }, { "epoch": 0.0011954980402603458, "grad_norm": 0.4803959131240845, "learning_rate": 0.00019980244089473482, "loss": 1.5162, "step": 92 }, { "epoch": 0.001208492584176219, "grad_norm": 0.397958368062973, "learning_rate": 0.00019979984143282342, "loss": 1.4509, "step": 93 }, { "epoch": 0.0012214871280920923, "grad_norm": 0.4095950126647949, "learning_rate": 0.00019979724197091204, "loss": 1.519, "step": 94 }, { "epoch": 0.0012344816720079657, "grad_norm": 0.27611222863197327, "learning_rate": 0.00019979464250900064, "loss": 1.307, "step": 95 }, { "epoch": 0.0012474762159238389, "grad_norm": 0.47964999079704285, "learning_rate": 0.00019979204304708926, "loss": 1.5736, "step": 96 }, { "epoch": 0.0012604707598397123, "grad_norm": 0.3905075788497925, "learning_rate": 0.0001997894435851779, "loss": 1.5931, "step": 97 }, { "epoch": 0.0012734653037555857, "grad_norm": 0.3908260464668274, "learning_rate": 0.00019978684412326649, "loss": 1.3432, "step": 98 }, { "epoch": 0.0012864598476714588, "grad_norm": 0.2977055013179779, "learning_rate": 0.0001997842446613551, "loss": 1.6434, "step": 99 }, { "epoch": 0.0012994543915873322, "grad_norm": 0.41405099630355835, "learning_rate": 0.00019978164519944373, "loss": 1.5094, "step": 100 }, { "epoch": 0.0013124489355032056, "grad_norm": 0.29386085271835327, "learning_rate": 0.00019977904573753233, "loss": 1.4693, "step": 101 }, { "epoch": 0.001325443479419079, "grad_norm": 0.3862020969390869, "learning_rate": 0.00019977644627562096, "loss": 1.4884, "step": 102 }, { "epoch": 0.0013384380233349522, "grad_norm": 0.35656118392944336, "learning_rate": 0.00019977384681370955, "loss": 1.394, "step": 103 }, { "epoch": 0.0013514325672508256, "grad_norm": 0.4221237897872925, "learning_rate": 0.0001997712473517982, "loss": 1.6967, "step": 104 }, { "epoch": 0.001364427111166699, "grad_norm": 0.3853652775287628, "learning_rate": 0.0001997686478898868, "loss": 1.5752, "step": 105 }, { "epoch": 0.0013774216550825722, "grad_norm": 0.4113156199455261, "learning_rate": 0.00019976604842797543, "loss": 1.735, "step": 106 }, { "epoch": 0.0013904161989984455, "grad_norm": 0.35804283618927, "learning_rate": 0.00019976344896606402, "loss": 1.5583, "step": 107 }, { "epoch": 0.001403410742914319, "grad_norm": 0.36831387877464294, "learning_rate": 0.00019976084950415265, "loss": 1.34, "step": 108 }, { "epoch": 0.0014164052868301921, "grad_norm": 0.31828340888023376, "learning_rate": 0.00019975825004224127, "loss": 1.4227, "step": 109 }, { "epoch": 0.0014293998307460655, "grad_norm": 0.32072535157203674, "learning_rate": 0.00019975565058032987, "loss": 1.7136, "step": 110 }, { "epoch": 0.001442394374661939, "grad_norm": 0.3531497120857239, "learning_rate": 0.0001997530511184185, "loss": 1.6733, "step": 111 }, { "epoch": 0.001455388918577812, "grad_norm": 0.3910057246685028, "learning_rate": 0.00019975045165650712, "loss": 1.5452, "step": 112 }, { "epoch": 0.0014683834624936855, "grad_norm": 0.39372771978378296, "learning_rate": 0.00019974785219459572, "loss": 1.4553, "step": 113 }, { "epoch": 0.0014813780064095589, "grad_norm": 0.3863373398780823, "learning_rate": 0.00019974525273268434, "loss": 1.3832, "step": 114 }, { "epoch": 0.001494372550325432, "grad_norm": 0.3262098729610443, "learning_rate": 0.00019974265327077294, "loss": 1.4632, "step": 115 }, { "epoch": 0.0015073670942413054, "grad_norm": 0.3259226381778717, "learning_rate": 0.0001997400538088616, "loss": 1.474, "step": 116 }, { "epoch": 0.0015203616381571788, "grad_norm": 0.31307917833328247, "learning_rate": 0.0001997374543469502, "loss": 1.3626, "step": 117 }, { "epoch": 0.001533356182073052, "grad_norm": 0.3163079619407654, "learning_rate": 0.0001997348548850388, "loss": 1.3759, "step": 118 }, { "epoch": 0.0015463507259889254, "grad_norm": 0.44436267018318176, "learning_rate": 0.0001997322554231274, "loss": 1.4845, "step": 119 }, { "epoch": 0.0015593452699047988, "grad_norm": 0.4585951566696167, "learning_rate": 0.00019972965596121603, "loss": 1.5826, "step": 120 }, { "epoch": 0.001572339813820672, "grad_norm": 0.37757980823516846, "learning_rate": 0.00019972705649930466, "loss": 1.5665, "step": 121 }, { "epoch": 0.0015853343577365453, "grad_norm": 0.33806487917900085, "learning_rate": 0.00019972445703739326, "loss": 1.5042, "step": 122 }, { "epoch": 0.0015983289016524187, "grad_norm": 0.44263142347335815, "learning_rate": 0.00019972185757548188, "loss": 1.5388, "step": 123 }, { "epoch": 0.001611323445568292, "grad_norm": 0.31990745663642883, "learning_rate": 0.0001997192581135705, "loss": 1.5376, "step": 124 }, { "epoch": 0.0016243179894841653, "grad_norm": 0.32597091794013977, "learning_rate": 0.00019971665865165913, "loss": 1.457, "step": 125 }, { "epoch": 0.0016373125334000387, "grad_norm": 0.31376028060913086, "learning_rate": 0.00019971405918974773, "loss": 1.4279, "step": 126 }, { "epoch": 0.001650307077315912, "grad_norm": 0.27025485038757324, "learning_rate": 0.00019971145972783632, "loss": 1.4572, "step": 127 }, { "epoch": 0.0016633016212317853, "grad_norm": 0.22959962487220764, "learning_rate": 0.00019970886026592498, "loss": 1.2702, "step": 128 }, { "epoch": 0.0016762961651476586, "grad_norm": 0.24560846388339996, "learning_rate": 0.00019970626080401357, "loss": 1.2481, "step": 129 }, { "epoch": 0.001689290709063532, "grad_norm": 0.4410383105278015, "learning_rate": 0.0001997036613421022, "loss": 1.4782, "step": 130 }, { "epoch": 0.0017022852529794052, "grad_norm": 0.35910242795944214, "learning_rate": 0.00019970106188019082, "loss": 1.5368, "step": 131 }, { "epoch": 0.0017152797968952786, "grad_norm": 0.3717356026172638, "learning_rate": 0.00019969846241827942, "loss": 1.43, "step": 132 }, { "epoch": 0.001728274340811152, "grad_norm": 0.34773606061935425, "learning_rate": 0.00019969586295636804, "loss": 1.5728, "step": 133 }, { "epoch": 0.0017412688847270252, "grad_norm": 0.3884320557117462, "learning_rate": 0.00019969326349445664, "loss": 1.5506, "step": 134 }, { "epoch": 0.0017542634286428986, "grad_norm": 0.30978694558143616, "learning_rate": 0.0001996906640325453, "loss": 1.5241, "step": 135 }, { "epoch": 0.001767257972558772, "grad_norm": 0.36688876152038574, "learning_rate": 0.0001996880645706339, "loss": 1.4682, "step": 136 }, { "epoch": 0.0017802525164746451, "grad_norm": 0.4215044677257538, "learning_rate": 0.00019968546510872252, "loss": 1.609, "step": 137 }, { "epoch": 0.0017932470603905185, "grad_norm": 0.35323330760002136, "learning_rate": 0.0001996828656468111, "loss": 1.5944, "step": 138 }, { "epoch": 0.001806241604306392, "grad_norm": 0.38223913311958313, "learning_rate": 0.00019968026618489974, "loss": 1.5231, "step": 139 }, { "epoch": 0.001819236148222265, "grad_norm": 0.3340063989162445, "learning_rate": 0.00019967766672298836, "loss": 1.4547, "step": 140 }, { "epoch": 0.0018322306921381385, "grad_norm": 0.2879827916622162, "learning_rate": 0.00019967506726107696, "loss": 1.3654, "step": 141 }, { "epoch": 0.0018452252360540119, "grad_norm": 0.35720300674438477, "learning_rate": 0.00019967246779916558, "loss": 1.475, "step": 142 }, { "epoch": 0.001858219779969885, "grad_norm": 0.45305946469306946, "learning_rate": 0.0001996698683372542, "loss": 1.462, "step": 143 }, { "epoch": 0.0018712143238857584, "grad_norm": 0.41068610548973083, "learning_rate": 0.0001996672688753428, "loss": 1.6636, "step": 144 }, { "epoch": 0.0018842088678016318, "grad_norm": 0.3500629663467407, "learning_rate": 0.00019966466941343143, "loss": 1.5408, "step": 145 }, { "epoch": 0.001897203411717505, "grad_norm": 0.41315722465515137, "learning_rate": 0.00019966206995152003, "loss": 1.4691, "step": 146 }, { "epoch": 0.0019101979556333784, "grad_norm": 0.3140808045864105, "learning_rate": 0.00019965947048960868, "loss": 1.3598, "step": 147 }, { "epoch": 0.0019231924995492518, "grad_norm": 0.23439089953899384, "learning_rate": 0.00019965687102769728, "loss": 1.4139, "step": 148 }, { "epoch": 0.0019361870434651252, "grad_norm": 0.40360027551651, "learning_rate": 0.0001996542715657859, "loss": 1.5104, "step": 149 }, { "epoch": 0.0019491815873809984, "grad_norm": 0.3256491720676422, "learning_rate": 0.0001996516721038745, "loss": 1.6522, "step": 150 }, { "epoch": 0.0019621761312968717, "grad_norm": 0.3488617539405823, "learning_rate": 0.00019964907264196312, "loss": 1.4508, "step": 151 }, { "epoch": 0.001975170675212745, "grad_norm": 0.3948095440864563, "learning_rate": 0.00019964647318005175, "loss": 1.4633, "step": 152 }, { "epoch": 0.0019881652191286185, "grad_norm": 0.3381510078907013, "learning_rate": 0.00019964387371814034, "loss": 1.3295, "step": 153 }, { "epoch": 0.0020011597630444915, "grad_norm": 0.3446526527404785, "learning_rate": 0.00019964127425622897, "loss": 1.4926, "step": 154 }, { "epoch": 0.002014154306960365, "grad_norm": 0.38809219002723694, "learning_rate": 0.0001996386747943176, "loss": 1.4912, "step": 155 }, { "epoch": 0.0020271488508762383, "grad_norm": 0.363363653421402, "learning_rate": 0.0001996360753324062, "loss": 1.6432, "step": 156 }, { "epoch": 0.0020401433947921117, "grad_norm": 0.4307781159877777, "learning_rate": 0.00019963347587049482, "loss": 1.6573, "step": 157 }, { "epoch": 0.002053137938707985, "grad_norm": 0.4218001067638397, "learning_rate": 0.0001996308764085834, "loss": 1.6746, "step": 158 }, { "epoch": 0.0020661324826238585, "grad_norm": 0.3414730727672577, "learning_rate": 0.00019962827694667206, "loss": 1.5121, "step": 159 }, { "epoch": 0.002079127026539732, "grad_norm": 0.3359917104244232, "learning_rate": 0.00019962567748476066, "loss": 1.5004, "step": 160 }, { "epoch": 0.002092121570455605, "grad_norm": 0.2820243239402771, "learning_rate": 0.00019962307802284929, "loss": 1.4176, "step": 161 }, { "epoch": 0.002105116114371478, "grad_norm": 0.33084461092948914, "learning_rate": 0.00019962047856093788, "loss": 1.5205, "step": 162 }, { "epoch": 0.0021181106582873516, "grad_norm": 0.41144001483917236, "learning_rate": 0.0001996178790990265, "loss": 1.6402, "step": 163 }, { "epoch": 0.002131105202203225, "grad_norm": 0.3849426507949829, "learning_rate": 0.00019961527963711513, "loss": 1.6129, "step": 164 }, { "epoch": 0.0021440997461190984, "grad_norm": 0.18820500373840332, "learning_rate": 0.00019961268017520373, "loss": 1.32, "step": 165 }, { "epoch": 0.0021570942900349718, "grad_norm": 0.35013675689697266, "learning_rate": 0.00019961008071329238, "loss": 1.5776, "step": 166 }, { "epoch": 0.0021700888339508447, "grad_norm": 0.3043151795864105, "learning_rate": 0.00019960748125138098, "loss": 1.5799, "step": 167 }, { "epoch": 0.002183083377866718, "grad_norm": 0.3667580485343933, "learning_rate": 0.00019960488178946958, "loss": 1.6441, "step": 168 }, { "epoch": 0.0021960779217825915, "grad_norm": 0.36724984645843506, "learning_rate": 0.0001996022823275582, "loss": 1.6838, "step": 169 }, { "epoch": 0.002209072465698465, "grad_norm": 0.3321700990200043, "learning_rate": 0.00019959968286564682, "loss": 1.6015, "step": 170 }, { "epoch": 0.0022220670096143383, "grad_norm": 0.34856680035591125, "learning_rate": 0.00019959708340373545, "loss": 1.4171, "step": 171 }, { "epoch": 0.0022350615535302117, "grad_norm": 0.35499298572540283, "learning_rate": 0.00019959448394182405, "loss": 1.6336, "step": 172 }, { "epoch": 0.0022480560974460846, "grad_norm": 0.320279985666275, "learning_rate": 0.00019959188447991267, "loss": 1.4745, "step": 173 }, { "epoch": 0.002261050641361958, "grad_norm": 0.2296724021434784, "learning_rate": 0.0001995892850180013, "loss": 1.2762, "step": 174 }, { "epoch": 0.0022740451852778314, "grad_norm": 0.4183747470378876, "learning_rate": 0.0001995866855560899, "loss": 1.5977, "step": 175 }, { "epoch": 0.002287039729193705, "grad_norm": 0.3686603605747223, "learning_rate": 0.00019958408609417852, "loss": 1.5158, "step": 176 }, { "epoch": 0.002300034273109578, "grad_norm": 0.4603264629840851, "learning_rate": 0.00019958148663226711, "loss": 1.5406, "step": 177 }, { "epoch": 0.0023130288170254516, "grad_norm": 0.37625256180763245, "learning_rate": 0.00019957888717035577, "loss": 1.5891, "step": 178 }, { "epoch": 0.0023260233609413246, "grad_norm": 0.3858341872692108, "learning_rate": 0.00019957628770844436, "loss": 1.4942, "step": 179 }, { "epoch": 0.002339017904857198, "grad_norm": 0.36542120575904846, "learning_rate": 0.000199573688246533, "loss": 1.423, "step": 180 }, { "epoch": 0.0023520124487730713, "grad_norm": 0.35841289162635803, "learning_rate": 0.00019957108878462159, "loss": 1.4763, "step": 181 }, { "epoch": 0.0023650069926889447, "grad_norm": 0.35375916957855225, "learning_rate": 0.0001995684893227102, "loss": 1.5949, "step": 182 }, { "epoch": 0.002378001536604818, "grad_norm": 0.334729939699173, "learning_rate": 0.00019956588986079883, "loss": 1.6127, "step": 183 }, { "epoch": 0.0023909960805206915, "grad_norm": 0.283333957195282, "learning_rate": 0.00019956329039888743, "loss": 1.4284, "step": 184 }, { "epoch": 0.002403990624436565, "grad_norm": 0.4093594253063202, "learning_rate": 0.00019956069093697606, "loss": 1.4232, "step": 185 }, { "epoch": 0.002416985168352438, "grad_norm": 0.3956725299358368, "learning_rate": 0.00019955809147506468, "loss": 1.553, "step": 186 }, { "epoch": 0.0024299797122683113, "grad_norm": 0.3651161193847656, "learning_rate": 0.00019955549201315328, "loss": 1.5034, "step": 187 }, { "epoch": 0.0024429742561841846, "grad_norm": 0.39115169644355774, "learning_rate": 0.0001995528925512419, "loss": 1.6763, "step": 188 }, { "epoch": 0.002455968800100058, "grad_norm": 0.2925562560558319, "learning_rate": 0.0001995502930893305, "loss": 1.441, "step": 189 }, { "epoch": 0.0024689633440159314, "grad_norm": 0.37559300661087036, "learning_rate": 0.00019954769362741915, "loss": 1.4808, "step": 190 }, { "epoch": 0.002481957887931805, "grad_norm": 0.3514731228351593, "learning_rate": 0.00019954509416550775, "loss": 1.4988, "step": 191 }, { "epoch": 0.0024949524318476778, "grad_norm": 0.33247920870780945, "learning_rate": 0.00019954249470359637, "loss": 1.4973, "step": 192 }, { "epoch": 0.002507946975763551, "grad_norm": 0.3307076096534729, "learning_rate": 0.00019953989524168497, "loss": 1.3719, "step": 193 }, { "epoch": 0.0025209415196794246, "grad_norm": 0.33346524834632874, "learning_rate": 0.0001995372957797736, "loss": 1.5309, "step": 194 }, { "epoch": 0.002533936063595298, "grad_norm": 0.3161267936229706, "learning_rate": 0.00019953469631786222, "loss": 1.3535, "step": 195 }, { "epoch": 0.0025469306075111713, "grad_norm": 0.28719034790992737, "learning_rate": 0.00019953209685595082, "loss": 1.5653, "step": 196 }, { "epoch": 0.0025599251514270447, "grad_norm": 0.36087697744369507, "learning_rate": 0.00019952949739403944, "loss": 1.5175, "step": 197 }, { "epoch": 0.0025729196953429177, "grad_norm": 0.43930137157440186, "learning_rate": 0.00019952689793212807, "loss": 1.5531, "step": 198 }, { "epoch": 0.002585914239258791, "grad_norm": 0.6818702816963196, "learning_rate": 0.00019952429847021666, "loss": 1.592, "step": 199 }, { "epoch": 0.0025989087831746645, "grad_norm": 0.20841331779956818, "learning_rate": 0.0001995216990083053, "loss": 1.0124, "step": 200 }, { "epoch": 0.002611903327090538, "grad_norm": 0.3404110372066498, "learning_rate": 0.00019951909954639389, "loss": 1.6634, "step": 201 }, { "epoch": 0.0026248978710064113, "grad_norm": 0.39020222425460815, "learning_rate": 0.00019951650008448254, "loss": 1.4838, "step": 202 }, { "epoch": 0.0026378924149222847, "grad_norm": 0.3787286579608917, "learning_rate": 0.00019951390062257113, "loss": 1.6008, "step": 203 }, { "epoch": 0.002650886958838158, "grad_norm": 0.3658662438392639, "learning_rate": 0.00019951130116065976, "loss": 1.4642, "step": 204 }, { "epoch": 0.002663881502754031, "grad_norm": 0.3418336808681488, "learning_rate": 0.00019950870169874838, "loss": 1.4158, "step": 205 }, { "epoch": 0.0026768760466699044, "grad_norm": 0.3201123774051666, "learning_rate": 0.00019950610223683698, "loss": 1.6424, "step": 206 }, { "epoch": 0.002689870590585778, "grad_norm": 0.4045439064502716, "learning_rate": 0.0001995035027749256, "loss": 1.7248, "step": 207 }, { "epoch": 0.002702865134501651, "grad_norm": 0.30982545018196106, "learning_rate": 0.0001995009033130142, "loss": 1.348, "step": 208 }, { "epoch": 0.0027158596784175246, "grad_norm": 0.3888842761516571, "learning_rate": 0.00019949830385110285, "loss": 1.6432, "step": 209 }, { "epoch": 0.002728854222333398, "grad_norm": 0.37842032313346863, "learning_rate": 0.00019949570438919145, "loss": 1.56, "step": 210 }, { "epoch": 0.002741848766249271, "grad_norm": 0.37506985664367676, "learning_rate": 0.00019949310492728005, "loss": 1.6661, "step": 211 }, { "epoch": 0.0027548433101651443, "grad_norm": 0.36613574624061584, "learning_rate": 0.00019949050546536867, "loss": 1.538, "step": 212 }, { "epoch": 0.0027678378540810177, "grad_norm": 0.2759266197681427, "learning_rate": 0.0001994879060034573, "loss": 1.5326, "step": 213 }, { "epoch": 0.002780832397996891, "grad_norm": 0.28298652172088623, "learning_rate": 0.00019948530654154592, "loss": 1.5049, "step": 214 }, { "epoch": 0.0027938269419127645, "grad_norm": 0.236300528049469, "learning_rate": 0.00019948270707963452, "loss": 1.4315, "step": 215 }, { "epoch": 0.002806821485828638, "grad_norm": 0.36888444423675537, "learning_rate": 0.00019948010761772314, "loss": 1.4284, "step": 216 }, { "epoch": 0.002819816029744511, "grad_norm": 0.42646467685699463, "learning_rate": 0.00019947750815581177, "loss": 1.6482, "step": 217 }, { "epoch": 0.0028328105736603842, "grad_norm": 0.30057191848754883, "learning_rate": 0.00019947490869390037, "loss": 1.3229, "step": 218 }, { "epoch": 0.0028458051175762576, "grad_norm": 0.39201629161834717, "learning_rate": 0.000199472309231989, "loss": 1.566, "step": 219 }, { "epoch": 0.002858799661492131, "grad_norm": 0.32493525743484497, "learning_rate": 0.0001994697097700776, "loss": 1.4828, "step": 220 }, { "epoch": 0.0028717942054080044, "grad_norm": 0.27081894874572754, "learning_rate": 0.00019946711030816624, "loss": 1.5155, "step": 221 }, { "epoch": 0.002884788749323878, "grad_norm": 0.38155704736709595, "learning_rate": 0.00019946451084625484, "loss": 1.3287, "step": 222 }, { "epoch": 0.0028977832932397508, "grad_norm": 0.364666610956192, "learning_rate": 0.00019946191138434343, "loss": 1.6249, "step": 223 }, { "epoch": 0.002910777837155624, "grad_norm": 0.24264782667160034, "learning_rate": 0.00019945931192243206, "loss": 1.4499, "step": 224 }, { "epoch": 0.0029237723810714975, "grad_norm": 0.39832785725593567, "learning_rate": 0.00019945671246052068, "loss": 1.5181, "step": 225 }, { "epoch": 0.002936766924987371, "grad_norm": 0.351386696100235, "learning_rate": 0.0001994541129986093, "loss": 1.4117, "step": 226 }, { "epoch": 0.0029497614689032443, "grad_norm": 0.3663950562477112, "learning_rate": 0.0001994515135366979, "loss": 1.5284, "step": 227 }, { "epoch": 0.0029627560128191177, "grad_norm": 0.33718690276145935, "learning_rate": 0.00019944891407478653, "loss": 1.668, "step": 228 }, { "epoch": 0.002975750556734991, "grad_norm": 0.28165867924690247, "learning_rate": 0.00019944631461287515, "loss": 1.3657, "step": 229 }, { "epoch": 0.002988745100650864, "grad_norm": 0.4133395850658417, "learning_rate": 0.00019944371515096375, "loss": 1.4351, "step": 230 }, { "epoch": 0.0030017396445667375, "grad_norm": 0.31343212723731995, "learning_rate": 0.00019944111568905238, "loss": 1.5072, "step": 231 }, { "epoch": 0.003014734188482611, "grad_norm": 0.2588444650173187, "learning_rate": 0.00019943851622714097, "loss": 1.3898, "step": 232 }, { "epoch": 0.0030277287323984842, "grad_norm": 0.3805907964706421, "learning_rate": 0.00019943591676522963, "loss": 1.7189, "step": 233 }, { "epoch": 0.0030407232763143576, "grad_norm": 0.3293072581291199, "learning_rate": 0.00019943331730331822, "loss": 1.5578, "step": 234 }, { "epoch": 0.003053717820230231, "grad_norm": 0.4328724443912506, "learning_rate": 0.00019943071784140682, "loss": 1.477, "step": 235 }, { "epoch": 0.003066712364146104, "grad_norm": 0.32229161262512207, "learning_rate": 0.00019942811837949544, "loss": 1.3583, "step": 236 }, { "epoch": 0.0030797069080619774, "grad_norm": 0.35365840792655945, "learning_rate": 0.00019942551891758407, "loss": 1.5048, "step": 237 }, { "epoch": 0.0030927014519778508, "grad_norm": 0.3168087899684906, "learning_rate": 0.0001994229194556727, "loss": 1.239, "step": 238 }, { "epoch": 0.003105695995893724, "grad_norm": 0.3821578621864319, "learning_rate": 0.0001994203199937613, "loss": 1.4687, "step": 239 }, { "epoch": 0.0031186905398095975, "grad_norm": 0.3703784644603729, "learning_rate": 0.00019941772053184992, "loss": 1.4105, "step": 240 }, { "epoch": 0.003131685083725471, "grad_norm": 0.3951148986816406, "learning_rate": 0.00019941512106993854, "loss": 1.4942, "step": 241 }, { "epoch": 0.003144679627641344, "grad_norm": 0.32025468349456787, "learning_rate": 0.00019941252160802714, "loss": 1.7404, "step": 242 }, { "epoch": 0.0031576741715572173, "grad_norm": 0.30971238017082214, "learning_rate": 0.00019940992214611576, "loss": 1.2907, "step": 243 }, { "epoch": 0.0031706687154730907, "grad_norm": 0.34188809990882874, "learning_rate": 0.00019940732268420439, "loss": 1.4408, "step": 244 }, { "epoch": 0.003183663259388964, "grad_norm": 0.3238825798034668, "learning_rate": 0.000199404723222293, "loss": 1.6224, "step": 245 }, { "epoch": 0.0031966578033048375, "grad_norm": 0.5203685760498047, "learning_rate": 0.0001994021237603816, "loss": 1.5555, "step": 246 }, { "epoch": 0.003209652347220711, "grad_norm": 0.3210241198539734, "learning_rate": 0.00019939952429847023, "loss": 1.5639, "step": 247 }, { "epoch": 0.003222646891136584, "grad_norm": 0.29965224862098694, "learning_rate": 0.00019939692483655886, "loss": 1.5204, "step": 248 }, { "epoch": 0.003235641435052457, "grad_norm": 0.36836618185043335, "learning_rate": 0.00019939432537464745, "loss": 1.3735, "step": 249 }, { "epoch": 0.0032486359789683306, "grad_norm": 0.316834419965744, "learning_rate": 0.00019939172591273608, "loss": 1.6373, "step": 250 }, { "epoch": 0.003261630522884204, "grad_norm": 0.3484581410884857, "learning_rate": 0.00019938912645082468, "loss": 1.4217, "step": 251 }, { "epoch": 0.0032746250668000774, "grad_norm": 0.3081274628639221, "learning_rate": 0.0001993865269889133, "loss": 1.5174, "step": 252 }, { "epoch": 0.0032876196107159508, "grad_norm": 0.35599973797798157, "learning_rate": 0.00019938392752700193, "loss": 1.3621, "step": 253 }, { "epoch": 0.003300614154631824, "grad_norm": 0.3440588712692261, "learning_rate": 0.00019938132806509052, "loss": 1.6824, "step": 254 }, { "epoch": 0.003313608698547697, "grad_norm": 0.34352511167526245, "learning_rate": 0.00019937872860317915, "loss": 1.5523, "step": 255 }, { "epoch": 0.0033266032424635705, "grad_norm": 0.3553222119808197, "learning_rate": 0.00019937612914126777, "loss": 1.4915, "step": 256 }, { "epoch": 0.003339597786379444, "grad_norm": 0.32171377539634705, "learning_rate": 0.0001993735296793564, "loss": 1.5895, "step": 257 }, { "epoch": 0.0033525923302953173, "grad_norm": 0.48296815156936646, "learning_rate": 0.000199370930217445, "loss": 1.7365, "step": 258 }, { "epoch": 0.0033655868742111907, "grad_norm": 0.3553713262081146, "learning_rate": 0.00019936833075553362, "loss": 1.6974, "step": 259 }, { "epoch": 0.003378581418127064, "grad_norm": 0.2838060259819031, "learning_rate": 0.00019936573129362224, "loss": 1.3813, "step": 260 }, { "epoch": 0.003391575962042937, "grad_norm": 0.3649294078350067, "learning_rate": 0.00019936313183171084, "loss": 1.5391, "step": 261 }, { "epoch": 0.0034045705059588104, "grad_norm": 0.31763696670532227, "learning_rate": 0.00019936053236979946, "loss": 1.3523, "step": 262 }, { "epoch": 0.003417565049874684, "grad_norm": 0.38990485668182373, "learning_rate": 0.00019935793290788806, "loss": 1.6409, "step": 263 }, { "epoch": 0.003430559593790557, "grad_norm": 0.2968158721923828, "learning_rate": 0.00019935533344597669, "loss": 1.5148, "step": 264 }, { "epoch": 0.0034435541377064306, "grad_norm": 0.3040262460708618, "learning_rate": 0.0001993527339840653, "loss": 1.5458, "step": 265 }, { "epoch": 0.003456548681622304, "grad_norm": 0.4232870638370514, "learning_rate": 0.0001993501345221539, "loss": 1.6085, "step": 266 }, { "epoch": 0.003469543225538177, "grad_norm": 0.3867042660713196, "learning_rate": 0.00019934753506024253, "loss": 1.5473, "step": 267 }, { "epoch": 0.0034825377694540503, "grad_norm": 0.3499396741390228, "learning_rate": 0.00019934493559833116, "loss": 1.5009, "step": 268 }, { "epoch": 0.0034955323133699237, "grad_norm": 0.38908466696739197, "learning_rate": 0.00019934233613641978, "loss": 1.4159, "step": 269 }, { "epoch": 0.003508526857285797, "grad_norm": 0.29934537410736084, "learning_rate": 0.00019933973667450838, "loss": 1.2518, "step": 270 }, { "epoch": 0.0035215214012016705, "grad_norm": 0.31104952096939087, "learning_rate": 0.000199337137212597, "loss": 1.4959, "step": 271 }, { "epoch": 0.003534515945117544, "grad_norm": 0.4307188391685486, "learning_rate": 0.00019933453775068563, "loss": 1.675, "step": 272 }, { "epoch": 0.0035475104890334173, "grad_norm": 0.30613699555397034, "learning_rate": 0.00019933193828877423, "loss": 1.5123, "step": 273 }, { "epoch": 0.0035605050329492903, "grad_norm": 0.39022064208984375, "learning_rate": 0.00019932933882686285, "loss": 1.5492, "step": 274 }, { "epoch": 0.0035734995768651637, "grad_norm": 0.4059942960739136, "learning_rate": 0.00019932673936495145, "loss": 1.5862, "step": 275 }, { "epoch": 0.003586494120781037, "grad_norm": 0.33377179503440857, "learning_rate": 0.0001993241399030401, "loss": 1.6645, "step": 276 }, { "epoch": 0.0035994886646969104, "grad_norm": 0.3957732915878296, "learning_rate": 0.0001993215404411287, "loss": 1.4876, "step": 277 }, { "epoch": 0.003612483208612784, "grad_norm": 0.3599517047405243, "learning_rate": 0.0001993189409792173, "loss": 1.4725, "step": 278 }, { "epoch": 0.0036254777525286572, "grad_norm": 0.28503838181495667, "learning_rate": 0.00019931634151730594, "loss": 1.3983, "step": 279 }, { "epoch": 0.00363847229644453, "grad_norm": 0.3737494647502899, "learning_rate": 0.00019931374205539454, "loss": 1.5421, "step": 280 }, { "epoch": 0.0036514668403604036, "grad_norm": 0.32609713077545166, "learning_rate": 0.00019931114259348317, "loss": 1.4412, "step": 281 }, { "epoch": 0.003664461384276277, "grad_norm": 0.27020320296287537, "learning_rate": 0.00019930854313157176, "loss": 1.323, "step": 282 }, { "epoch": 0.0036774559281921504, "grad_norm": 0.3037111759185791, "learning_rate": 0.0001993059436696604, "loss": 1.6002, "step": 283 }, { "epoch": 0.0036904504721080237, "grad_norm": 0.319678395986557, "learning_rate": 0.000199303344207749, "loss": 1.6927, "step": 284 }, { "epoch": 0.003703445016023897, "grad_norm": 0.3375207781791687, "learning_rate": 0.0001993007447458376, "loss": 1.6532, "step": 285 }, { "epoch": 0.00371643955993977, "grad_norm": 0.43750518560409546, "learning_rate": 0.00019929814528392624, "loss": 1.5129, "step": 286 }, { "epoch": 0.0037294341038556435, "grad_norm": 0.5206657648086548, "learning_rate": 0.00019929554582201486, "loss": 1.4792, "step": 287 }, { "epoch": 0.003742428647771517, "grad_norm": 0.3427729904651642, "learning_rate": 0.00019929294636010348, "loss": 1.4153, "step": 288 }, { "epoch": 0.0037554231916873903, "grad_norm": 0.2810385227203369, "learning_rate": 0.00019929034689819208, "loss": 1.4501, "step": 289 }, { "epoch": 0.0037684177356032637, "grad_norm": 0.3164840340614319, "learning_rate": 0.00019928774743628068, "loss": 1.5437, "step": 290 }, { "epoch": 0.003781412279519137, "grad_norm": 0.40451282262802124, "learning_rate": 0.00019928514797436933, "loss": 1.5069, "step": 291 }, { "epoch": 0.00379440682343501, "grad_norm": 0.24449609220027924, "learning_rate": 0.00019928254851245793, "loss": 1.3157, "step": 292 }, { "epoch": 0.0038074013673508834, "grad_norm": 0.3006196916103363, "learning_rate": 0.00019927994905054655, "loss": 1.4647, "step": 293 }, { "epoch": 0.003820395911266757, "grad_norm": 0.2584126591682434, "learning_rate": 0.00019927734958863515, "loss": 1.5663, "step": 294 }, { "epoch": 0.00383339045518263, "grad_norm": 0.3571203649044037, "learning_rate": 0.00019927475012672377, "loss": 1.3851, "step": 295 }, { "epoch": 0.0038463849990985036, "grad_norm": 0.31314989924430847, "learning_rate": 0.0001992721506648124, "loss": 1.4645, "step": 296 }, { "epoch": 0.003859379543014377, "grad_norm": 0.3966493010520935, "learning_rate": 0.000199269551202901, "loss": 1.2261, "step": 297 }, { "epoch": 0.0038723740869302504, "grad_norm": 0.37772703170776367, "learning_rate": 0.00019926695174098962, "loss": 1.6829, "step": 298 }, { "epoch": 0.0038853686308461233, "grad_norm": 0.34503284096717834, "learning_rate": 0.00019926435227907824, "loss": 1.5732, "step": 299 }, { "epoch": 0.0038983631747619967, "grad_norm": 0.3639414608478546, "learning_rate": 0.00019926175281716687, "loss": 1.4561, "step": 300 }, { "epoch": 0.0039113577186778705, "grad_norm": 0.36225298047065735, "learning_rate": 0.00019925915335525547, "loss": 1.5391, "step": 301 }, { "epoch": 0.0039243522625937435, "grad_norm": 0.3410588204860687, "learning_rate": 0.0001992565538933441, "loss": 1.3652, "step": 302 }, { "epoch": 0.0039373468065096165, "grad_norm": 0.3484848141670227, "learning_rate": 0.00019925395443143272, "loss": 1.3219, "step": 303 }, { "epoch": 0.00395034135042549, "grad_norm": 0.3137170374393463, "learning_rate": 0.0001992513549695213, "loss": 1.7442, "step": 304 }, { "epoch": 0.003963335894341363, "grad_norm": 0.27639421820640564, "learning_rate": 0.00019924875550760994, "loss": 1.4587, "step": 305 }, { "epoch": 0.003976330438257237, "grad_norm": 0.4024495482444763, "learning_rate": 0.00019924615604569854, "loss": 1.4646, "step": 306 }, { "epoch": 0.00398932498217311, "grad_norm": 0.3829977214336395, "learning_rate": 0.00019924355658378716, "loss": 1.476, "step": 307 }, { "epoch": 0.004002319526088983, "grad_norm": 0.24776104092597961, "learning_rate": 0.00019924095712187578, "loss": 1.3956, "step": 308 }, { "epoch": 0.004015314070004857, "grad_norm": 0.30229294300079346, "learning_rate": 0.00019923835765996438, "loss": 1.4949, "step": 309 }, { "epoch": 0.00402830861392073, "grad_norm": 0.2815120220184326, "learning_rate": 0.000199235758198053, "loss": 1.4075, "step": 310 }, { "epoch": 0.004041303157836604, "grad_norm": 0.3126884698867798, "learning_rate": 0.00019923315873614163, "loss": 1.2761, "step": 311 }, { "epoch": 0.0040542977017524766, "grad_norm": 0.38102856278419495, "learning_rate": 0.00019923055927423025, "loss": 1.4739, "step": 312 }, { "epoch": 0.00406729224566835, "grad_norm": 0.2952413260936737, "learning_rate": 0.00019922795981231885, "loss": 1.3963, "step": 313 }, { "epoch": 0.004080286789584223, "grad_norm": 0.3740508556365967, "learning_rate": 0.00019922536035040748, "loss": 1.426, "step": 314 }, { "epoch": 0.004093281333500096, "grad_norm": 0.3956066370010376, "learning_rate": 0.0001992227608884961, "loss": 1.396, "step": 315 }, { "epoch": 0.00410627587741597, "grad_norm": 0.32432445883750916, "learning_rate": 0.0001992201614265847, "loss": 1.5289, "step": 316 }, { "epoch": 0.004119270421331843, "grad_norm": 0.3189620077610016, "learning_rate": 0.00019921756196467332, "loss": 1.6106, "step": 317 }, { "epoch": 0.004132264965247717, "grad_norm": 0.27923664450645447, "learning_rate": 0.00019921496250276195, "loss": 1.4541, "step": 318 }, { "epoch": 0.00414525950916359, "grad_norm": 0.33311569690704346, "learning_rate": 0.00019921236304085054, "loss": 1.4914, "step": 319 }, { "epoch": 0.004158254053079464, "grad_norm": 0.4120247960090637, "learning_rate": 0.00019920976357893917, "loss": 1.5054, "step": 320 }, { "epoch": 0.004171248596995337, "grad_norm": 0.37923744320869446, "learning_rate": 0.00019920716411702777, "loss": 1.5037, "step": 321 }, { "epoch": 0.00418424314091121, "grad_norm": 0.5349377989768982, "learning_rate": 0.00019920456465511642, "loss": 1.5083, "step": 322 }, { "epoch": 0.004197237684827083, "grad_norm": 0.3078610599040985, "learning_rate": 0.00019920196519320502, "loss": 1.2024, "step": 323 }, { "epoch": 0.004210232228742956, "grad_norm": 0.30870485305786133, "learning_rate": 0.00019919936573129364, "loss": 1.2176, "step": 324 }, { "epoch": 0.00422322677265883, "grad_norm": 0.3730783760547638, "learning_rate": 0.00019919676626938224, "loss": 1.549, "step": 325 }, { "epoch": 0.004236221316574703, "grad_norm": 0.4394433796405792, "learning_rate": 0.00019919416680747086, "loss": 1.6255, "step": 326 }, { "epoch": 0.004249215860490576, "grad_norm": 0.35662856698036194, "learning_rate": 0.0001991915673455595, "loss": 1.5798, "step": 327 }, { "epoch": 0.00426221040440645, "grad_norm": 0.38909202814102173, "learning_rate": 0.00019918896788364808, "loss": 1.7098, "step": 328 }, { "epoch": 0.004275204948322323, "grad_norm": 0.2588720917701721, "learning_rate": 0.0001991863684217367, "loss": 1.4841, "step": 329 }, { "epoch": 0.004288199492238197, "grad_norm": 0.26081642508506775, "learning_rate": 0.00019918376895982533, "loss": 1.3831, "step": 330 }, { "epoch": 0.00430119403615407, "grad_norm": 0.3421750068664551, "learning_rate": 0.00019918116949791396, "loss": 1.4811, "step": 331 }, { "epoch": 0.0043141885800699435, "grad_norm": 0.3355828821659088, "learning_rate": 0.00019917857003600255, "loss": 1.4944, "step": 332 }, { "epoch": 0.0043271831239858165, "grad_norm": 0.36427485942840576, "learning_rate": 0.00019917597057409115, "loss": 1.4734, "step": 333 }, { "epoch": 0.0043401776679016894, "grad_norm": 0.23450542986392975, "learning_rate": 0.0001991733711121798, "loss": 1.5442, "step": 334 }, { "epoch": 0.004353172211817563, "grad_norm": 0.33973386883735657, "learning_rate": 0.0001991707716502684, "loss": 1.3849, "step": 335 }, { "epoch": 0.004366166755733436, "grad_norm": 0.3666324019432068, "learning_rate": 0.00019916817218835703, "loss": 1.4994, "step": 336 }, { "epoch": 0.00437916129964931, "grad_norm": 0.3015466332435608, "learning_rate": 0.00019916557272644562, "loss": 1.3411, "step": 337 }, { "epoch": 0.004392155843565183, "grad_norm": 0.39283299446105957, "learning_rate": 0.00019916297326453425, "loss": 1.5291, "step": 338 }, { "epoch": 0.004405150387481056, "grad_norm": 0.33914270997047424, "learning_rate": 0.00019916037380262287, "loss": 1.5784, "step": 339 }, { "epoch": 0.00441814493139693, "grad_norm": 0.3376213014125824, "learning_rate": 0.00019915777434071147, "loss": 1.6643, "step": 340 }, { "epoch": 0.004431139475312803, "grad_norm": 0.3161202073097229, "learning_rate": 0.0001991551748788001, "loss": 1.581, "step": 341 }, { "epoch": 0.004444134019228677, "grad_norm": 0.281429260969162, "learning_rate": 0.00019915257541688872, "loss": 1.4395, "step": 342 }, { "epoch": 0.0044571285631445495, "grad_norm": 0.30229508876800537, "learning_rate": 0.00019914997595497734, "loss": 1.3674, "step": 343 }, { "epoch": 0.004470123107060423, "grad_norm": 0.38609886169433594, "learning_rate": 0.00019914737649306594, "loss": 1.4923, "step": 344 }, { "epoch": 0.004483117650976296, "grad_norm": 0.3662254512310028, "learning_rate": 0.00019914477703115454, "loss": 1.4679, "step": 345 }, { "epoch": 0.004496112194892169, "grad_norm": 0.41322511434555054, "learning_rate": 0.0001991421775692432, "loss": 1.6495, "step": 346 }, { "epoch": 0.004509106738808043, "grad_norm": 0.33841001987457275, "learning_rate": 0.0001991395781073318, "loss": 1.4648, "step": 347 }, { "epoch": 0.004522101282723916, "grad_norm": 0.2575288712978363, "learning_rate": 0.0001991369786454204, "loss": 1.4663, "step": 348 }, { "epoch": 0.00453509582663979, "grad_norm": 0.29992565512657166, "learning_rate": 0.000199134379183509, "loss": 1.6909, "step": 349 }, { "epoch": 0.004548090370555663, "grad_norm": 0.30328288674354553, "learning_rate": 0.00019913177972159763, "loss": 1.3658, "step": 350 }, { "epoch": 0.004561084914471537, "grad_norm": 0.35181304812431335, "learning_rate": 0.00019912918025968626, "loss": 1.7398, "step": 351 }, { "epoch": 0.00457407945838741, "grad_norm": 0.35897067189216614, "learning_rate": 0.00019912658079777485, "loss": 1.5741, "step": 352 }, { "epoch": 0.004587074002303283, "grad_norm": 0.3016517758369446, "learning_rate": 0.0001991239813358635, "loss": 1.448, "step": 353 }, { "epoch": 0.004600068546219156, "grad_norm": 0.4207736849784851, "learning_rate": 0.0001991213818739521, "loss": 1.6313, "step": 354 }, { "epoch": 0.004613063090135029, "grad_norm": 0.5491371154785156, "learning_rate": 0.00019911878241204073, "loss": 1.7278, "step": 355 }, { "epoch": 0.004626057634050903, "grad_norm": 0.4126145541667938, "learning_rate": 0.00019911618295012933, "loss": 1.4285, "step": 356 }, { "epoch": 0.004639052177966776, "grad_norm": 0.3121761679649353, "learning_rate": 0.00019911358348821795, "loss": 1.5382, "step": 357 }, { "epoch": 0.004652046721882649, "grad_norm": 0.35061636567115784, "learning_rate": 0.00019911098402630657, "loss": 1.5044, "step": 358 }, { "epoch": 0.004665041265798523, "grad_norm": 0.30849018692970276, "learning_rate": 0.00019910838456439517, "loss": 1.4529, "step": 359 }, { "epoch": 0.004678035809714396, "grad_norm": 0.33758828043937683, "learning_rate": 0.0001991057851024838, "loss": 1.489, "step": 360 }, { "epoch": 0.00469103035363027, "grad_norm": 0.24260103702545166, "learning_rate": 0.00019910318564057242, "loss": 1.2288, "step": 361 }, { "epoch": 0.004704024897546143, "grad_norm": 0.30140095949172974, "learning_rate": 0.00019910058617866102, "loss": 1.6953, "step": 362 }, { "epoch": 0.0047170194414620165, "grad_norm": 0.43354475498199463, "learning_rate": 0.00019909798671674964, "loss": 1.5043, "step": 363 }, { "epoch": 0.0047300139853778895, "grad_norm": 0.322436660528183, "learning_rate": 0.00019909538725483824, "loss": 1.4263, "step": 364 }, { "epoch": 0.004743008529293762, "grad_norm": 0.3592729866504669, "learning_rate": 0.0001990927877929269, "loss": 1.4993, "step": 365 }, { "epoch": 0.004756003073209636, "grad_norm": 0.3389509320259094, "learning_rate": 0.0001990901883310155, "loss": 1.5002, "step": 366 }, { "epoch": 0.004768997617125509, "grad_norm": 0.35353222489356995, "learning_rate": 0.00019908758886910411, "loss": 1.4997, "step": 367 }, { "epoch": 0.004781992161041383, "grad_norm": 0.2971359193325043, "learning_rate": 0.0001990849894071927, "loss": 1.5266, "step": 368 }, { "epoch": 0.004794986704957256, "grad_norm": 0.2539112865924835, "learning_rate": 0.00019908238994528134, "loss": 1.4109, "step": 369 }, { "epoch": 0.00480798124887313, "grad_norm": 0.3044717013835907, "learning_rate": 0.00019907979048336996, "loss": 1.3098, "step": 370 }, { "epoch": 0.004820975792789003, "grad_norm": 0.2754577100276947, "learning_rate": 0.00019907719102145856, "loss": 1.2542, "step": 371 }, { "epoch": 0.004833970336704876, "grad_norm": 0.4150601029396057, "learning_rate": 0.00019907459155954718, "loss": 1.629, "step": 372 }, { "epoch": 0.0048469648806207495, "grad_norm": 0.4287967085838318, "learning_rate": 0.0001990719920976358, "loss": 1.5594, "step": 373 }, { "epoch": 0.0048599594245366225, "grad_norm": 0.4358462691307068, "learning_rate": 0.0001990693926357244, "loss": 1.4678, "step": 374 }, { "epoch": 0.004872953968452496, "grad_norm": 0.36713430285453796, "learning_rate": 0.00019906679317381303, "loss": 1.5234, "step": 375 }, { "epoch": 0.004885948512368369, "grad_norm": 0.26237761974334717, "learning_rate": 0.00019906419371190163, "loss": 1.1133, "step": 376 }, { "epoch": 0.004898943056284242, "grad_norm": 0.34526896476745605, "learning_rate": 0.00019906159424999028, "loss": 1.5015, "step": 377 }, { "epoch": 0.004911937600200116, "grad_norm": 0.32094281911849976, "learning_rate": 0.00019905899478807887, "loss": 1.3854, "step": 378 }, { "epoch": 0.004924932144115989, "grad_norm": 0.36162108182907104, "learning_rate": 0.0001990563953261675, "loss": 1.4663, "step": 379 }, { "epoch": 0.004937926688031863, "grad_norm": 0.310148149728775, "learning_rate": 0.0001990537958642561, "loss": 1.3797, "step": 380 }, { "epoch": 0.004950921231947736, "grad_norm": 0.30552294850349426, "learning_rate": 0.00019905119640234472, "loss": 1.5114, "step": 381 }, { "epoch": 0.00496391577586361, "grad_norm": 0.4390285015106201, "learning_rate": 0.00019904859694043335, "loss": 1.6308, "step": 382 }, { "epoch": 0.004976910319779483, "grad_norm": 0.4116781949996948, "learning_rate": 0.00019904599747852194, "loss": 1.5106, "step": 383 }, { "epoch": 0.0049899048636953556, "grad_norm": 0.4091288149356842, "learning_rate": 0.00019904339801661057, "loss": 1.4398, "step": 384 }, { "epoch": 0.005002899407611229, "grad_norm": 0.3653791546821594, "learning_rate": 0.0001990407985546992, "loss": 1.6652, "step": 385 }, { "epoch": 0.005015893951527102, "grad_norm": 0.2884867191314697, "learning_rate": 0.00019903819909278782, "loss": 1.5647, "step": 386 }, { "epoch": 0.005028888495442976, "grad_norm": 0.4264460504055023, "learning_rate": 0.0001990355996308764, "loss": 1.57, "step": 387 }, { "epoch": 0.005041883039358849, "grad_norm": 0.418401837348938, "learning_rate": 0.00019903300016896504, "loss": 1.5994, "step": 388 }, { "epoch": 0.005054877583274723, "grad_norm": 0.3586342930793762, "learning_rate": 0.00019903040070705366, "loss": 1.4049, "step": 389 }, { "epoch": 0.005067872127190596, "grad_norm": 0.36481258273124695, "learning_rate": 0.00019902780124514226, "loss": 1.5148, "step": 390 }, { "epoch": 0.005080866671106469, "grad_norm": 0.2749533951282501, "learning_rate": 0.00019902520178323088, "loss": 1.5359, "step": 391 }, { "epoch": 0.005093861215022343, "grad_norm": 0.29476672410964966, "learning_rate": 0.0001990226023213195, "loss": 1.3621, "step": 392 }, { "epoch": 0.005106855758938216, "grad_norm": 0.382303923368454, "learning_rate": 0.0001990200028594081, "loss": 1.4162, "step": 393 }, { "epoch": 0.0051198503028540895, "grad_norm": 0.3639098107814789, "learning_rate": 0.00019901740339749673, "loss": 1.4234, "step": 394 }, { "epoch": 0.005132844846769962, "grad_norm": 0.3024827241897583, "learning_rate": 0.00019901480393558533, "loss": 1.4654, "step": 395 }, { "epoch": 0.005145839390685835, "grad_norm": 0.3591507375240326, "learning_rate": 0.00019901220447367398, "loss": 1.5982, "step": 396 }, { "epoch": 0.005158833934601709, "grad_norm": 0.2867201566696167, "learning_rate": 0.00019900960501176258, "loss": 1.4561, "step": 397 }, { "epoch": 0.005171828478517582, "grad_norm": 0.24428586661815643, "learning_rate": 0.0001990070055498512, "loss": 1.4842, "step": 398 }, { "epoch": 0.005184823022433456, "grad_norm": 0.3388294577598572, "learning_rate": 0.0001990044060879398, "loss": 1.4982, "step": 399 }, { "epoch": 0.005197817566349329, "grad_norm": 0.3624451458454132, "learning_rate": 0.00019900180662602842, "loss": 1.4539, "step": 400 }, { "epoch": 0.005210812110265203, "grad_norm": 0.4799981415271759, "learning_rate": 0.00019899920716411705, "loss": 1.5812, "step": 401 }, { "epoch": 0.005223806654181076, "grad_norm": 0.3657929301261902, "learning_rate": 0.00019899660770220565, "loss": 1.5602, "step": 402 }, { "epoch": 0.005236801198096949, "grad_norm": 0.38014063239097595, "learning_rate": 0.00019899400824029427, "loss": 1.6159, "step": 403 }, { "epoch": 0.0052497957420128225, "grad_norm": 0.392153263092041, "learning_rate": 0.0001989914087783829, "loss": 1.5834, "step": 404 }, { "epoch": 0.0052627902859286955, "grad_norm": 0.39197036623954773, "learning_rate": 0.0001989888093164715, "loss": 1.5262, "step": 405 }, { "epoch": 0.005275784829844569, "grad_norm": 0.36897343397140503, "learning_rate": 0.00019898620985456012, "loss": 1.4581, "step": 406 }, { "epoch": 0.005288779373760442, "grad_norm": 0.3160710632801056, "learning_rate": 0.0001989836103926487, "loss": 1.3241, "step": 407 }, { "epoch": 0.005301773917676316, "grad_norm": 0.32507070899009705, "learning_rate": 0.00019898101093073737, "loss": 1.4533, "step": 408 }, { "epoch": 0.005314768461592189, "grad_norm": 0.3860207498073578, "learning_rate": 0.00019897841146882596, "loss": 1.6103, "step": 409 }, { "epoch": 0.005327763005508062, "grad_norm": 0.28321462869644165, "learning_rate": 0.0001989758120069146, "loss": 1.4407, "step": 410 }, { "epoch": 0.005340757549423936, "grad_norm": 0.36039674282073975, "learning_rate": 0.00019897321254500318, "loss": 1.5651, "step": 411 }, { "epoch": 0.005353752093339809, "grad_norm": 0.38339999318122864, "learning_rate": 0.0001989706130830918, "loss": 1.4411, "step": 412 }, { "epoch": 0.005366746637255683, "grad_norm": 0.3544124960899353, "learning_rate": 0.00019896801362118043, "loss": 1.4977, "step": 413 }, { "epoch": 0.005379741181171556, "grad_norm": 0.3119560480117798, "learning_rate": 0.00019896541415926903, "loss": 1.5812, "step": 414 }, { "epoch": 0.0053927357250874285, "grad_norm": 0.41256198287010193, "learning_rate": 0.00019896281469735766, "loss": 1.5193, "step": 415 }, { "epoch": 0.005405730269003302, "grad_norm": 0.3362357020378113, "learning_rate": 0.00019896021523544628, "loss": 1.3828, "step": 416 }, { "epoch": 0.005418724812919175, "grad_norm": 0.28691500425338745, "learning_rate": 0.00019895761577353488, "loss": 1.3371, "step": 417 }, { "epoch": 0.005431719356835049, "grad_norm": 0.31148120760917664, "learning_rate": 0.0001989550163116235, "loss": 1.4256, "step": 418 }, { "epoch": 0.005444713900750922, "grad_norm": 0.3507455587387085, "learning_rate": 0.0001989524168497121, "loss": 1.5591, "step": 419 }, { "epoch": 0.005457708444666796, "grad_norm": 0.5161775946617126, "learning_rate": 0.00019894981738780075, "loss": 1.7213, "step": 420 }, { "epoch": 0.005470702988582669, "grad_norm": 0.4539799094200134, "learning_rate": 0.00019894721792588935, "loss": 1.5054, "step": 421 }, { "epoch": 0.005483697532498542, "grad_norm": 0.38446927070617676, "learning_rate": 0.00019894461846397797, "loss": 1.6458, "step": 422 }, { "epoch": 0.005496692076414416, "grad_norm": 0.2804405987262726, "learning_rate": 0.00019894201900206657, "loss": 1.2219, "step": 423 }, { "epoch": 0.005509686620330289, "grad_norm": 0.3562676012516022, "learning_rate": 0.0001989394195401552, "loss": 1.3673, "step": 424 }, { "epoch": 0.0055226811642461624, "grad_norm": 0.39644521474838257, "learning_rate": 0.00019893682007824382, "loss": 1.6778, "step": 425 }, { "epoch": 0.005535675708162035, "grad_norm": 0.35181400179862976, "learning_rate": 0.00019893422061633242, "loss": 1.4347, "step": 426 }, { "epoch": 0.005548670252077908, "grad_norm": 0.33172330260276794, "learning_rate": 0.00019893162115442107, "loss": 1.5032, "step": 427 }, { "epoch": 0.005561664795993782, "grad_norm": 0.29729771614074707, "learning_rate": 0.00019892902169250966, "loss": 1.3403, "step": 428 }, { "epoch": 0.005574659339909655, "grad_norm": 0.4405791461467743, "learning_rate": 0.00019892642223059826, "loss": 1.7167, "step": 429 }, { "epoch": 0.005587653883825529, "grad_norm": 0.31268784403800964, "learning_rate": 0.0001989238227686869, "loss": 1.3955, "step": 430 }, { "epoch": 0.005600648427741402, "grad_norm": 0.2711225748062134, "learning_rate": 0.0001989212233067755, "loss": 1.5813, "step": 431 }, { "epoch": 0.005613642971657276, "grad_norm": 0.3970961570739746, "learning_rate": 0.00019891862384486414, "loss": 1.4497, "step": 432 }, { "epoch": 0.005626637515573149, "grad_norm": 0.3672778308391571, "learning_rate": 0.00019891602438295273, "loss": 1.4871, "step": 433 }, { "epoch": 0.005639632059489022, "grad_norm": 0.3104008138179779, "learning_rate": 0.00019891342492104136, "loss": 1.4096, "step": 434 }, { "epoch": 0.0056526266034048955, "grad_norm": 0.4113682806491852, "learning_rate": 0.00019891082545912998, "loss": 1.599, "step": 435 }, { "epoch": 0.0056656211473207685, "grad_norm": 0.3929567337036133, "learning_rate": 0.00019890822599721858, "loss": 1.5196, "step": 436 }, { "epoch": 0.005678615691236642, "grad_norm": 0.47488945722579956, "learning_rate": 0.0001989056265353072, "loss": 1.6349, "step": 437 }, { "epoch": 0.005691610235152515, "grad_norm": 0.3263094127178192, "learning_rate": 0.0001989030270733958, "loss": 1.4518, "step": 438 }, { "epoch": 0.005704604779068389, "grad_norm": 0.3672201633453369, "learning_rate": 0.00019890042761148445, "loss": 1.5772, "step": 439 }, { "epoch": 0.005717599322984262, "grad_norm": 0.4115719199180603, "learning_rate": 0.00019889782814957305, "loss": 1.7124, "step": 440 }, { "epoch": 0.005730593866900135, "grad_norm": 0.31117019057273865, "learning_rate": 0.00019889522868766165, "loss": 1.5545, "step": 441 }, { "epoch": 0.005743588410816009, "grad_norm": 0.43034565448760986, "learning_rate": 0.00019889262922575027, "loss": 1.5853, "step": 442 }, { "epoch": 0.005756582954731882, "grad_norm": 0.3980412781238556, "learning_rate": 0.0001988900297638389, "loss": 1.5383, "step": 443 }, { "epoch": 0.005769577498647756, "grad_norm": 0.3163507282733917, "learning_rate": 0.00019888743030192752, "loss": 1.3385, "step": 444 }, { "epoch": 0.0057825720425636285, "grad_norm": 0.37393853068351746, "learning_rate": 0.00019888483084001612, "loss": 1.4841, "step": 445 }, { "epoch": 0.0057955665864795015, "grad_norm": 0.3804837763309479, "learning_rate": 0.00019888223137810474, "loss": 1.4943, "step": 446 }, { "epoch": 0.005808561130395375, "grad_norm": 0.3611343204975128, "learning_rate": 0.00019887963191619337, "loss": 1.5363, "step": 447 }, { "epoch": 0.005821555674311248, "grad_norm": 0.284523069858551, "learning_rate": 0.00019887703245428196, "loss": 1.5203, "step": 448 }, { "epoch": 0.005834550218227122, "grad_norm": 0.398366779088974, "learning_rate": 0.0001988744329923706, "loss": 1.6513, "step": 449 }, { "epoch": 0.005847544762142995, "grad_norm": 0.3861139416694641, "learning_rate": 0.0001988718335304592, "loss": 1.4824, "step": 450 }, { "epoch": 0.005860539306058869, "grad_norm": 0.32513299584388733, "learning_rate": 0.00019886923406854784, "loss": 1.409, "step": 451 }, { "epoch": 0.005873533849974742, "grad_norm": 0.3809260427951813, "learning_rate": 0.00019886663460663644, "loss": 1.5881, "step": 452 }, { "epoch": 0.005886528393890615, "grad_norm": 0.3487527072429657, "learning_rate": 0.00019886403514472506, "loss": 1.5019, "step": 453 }, { "epoch": 0.005899522937806489, "grad_norm": 0.31411534547805786, "learning_rate": 0.00019886143568281366, "loss": 1.5802, "step": 454 }, { "epoch": 0.005912517481722362, "grad_norm": 0.4067882001399994, "learning_rate": 0.00019885883622090228, "loss": 1.4341, "step": 455 }, { "epoch": 0.005925512025638235, "grad_norm": 0.32054728269577026, "learning_rate": 0.0001988562367589909, "loss": 1.4677, "step": 456 }, { "epoch": 0.005938506569554108, "grad_norm": 0.336537629365921, "learning_rate": 0.0001988536372970795, "loss": 1.7248, "step": 457 }, { "epoch": 0.005951501113469982, "grad_norm": 0.34672510623931885, "learning_rate": 0.00019885103783516813, "loss": 1.3866, "step": 458 }, { "epoch": 0.005964495657385855, "grad_norm": 0.38182681798934937, "learning_rate": 0.00019884843837325675, "loss": 1.5306, "step": 459 }, { "epoch": 0.005977490201301728, "grad_norm": 0.3976811468601227, "learning_rate": 0.00019884583891134535, "loss": 1.7088, "step": 460 }, { "epoch": 0.005990484745217602, "grad_norm": 0.34490296244621277, "learning_rate": 0.00019884323944943397, "loss": 1.6008, "step": 461 }, { "epoch": 0.006003479289133475, "grad_norm": 0.4401227831840515, "learning_rate": 0.0001988406399875226, "loss": 1.4556, "step": 462 }, { "epoch": 0.006016473833049349, "grad_norm": 0.32190635800361633, "learning_rate": 0.00019883804052561122, "loss": 1.436, "step": 463 }, { "epoch": 0.006029468376965222, "grad_norm": 0.39786359667778015, "learning_rate": 0.00019883544106369982, "loss": 1.9256, "step": 464 }, { "epoch": 0.006042462920881095, "grad_norm": 0.32083266973495483, "learning_rate": 0.00019883284160178845, "loss": 1.5449, "step": 465 }, { "epoch": 0.0060554574647969685, "grad_norm": 0.37164804339408875, "learning_rate": 0.00019883024213987707, "loss": 1.4283, "step": 466 }, { "epoch": 0.006068452008712841, "grad_norm": 0.32890164852142334, "learning_rate": 0.00019882764267796567, "loss": 1.3723, "step": 467 }, { "epoch": 0.006081446552628715, "grad_norm": 0.3165036737918854, "learning_rate": 0.0001988250432160543, "loss": 1.2878, "step": 468 }, { "epoch": 0.006094441096544588, "grad_norm": 0.287625789642334, "learning_rate": 0.0001988224437541429, "loss": 1.5451, "step": 469 }, { "epoch": 0.006107435640460462, "grad_norm": 0.31385481357574463, "learning_rate": 0.00019881984429223154, "loss": 1.3884, "step": 470 }, { "epoch": 0.006120430184376335, "grad_norm": 0.48251134157180786, "learning_rate": 0.00019881724483032014, "loss": 1.4744, "step": 471 }, { "epoch": 0.006133424728292208, "grad_norm": 0.38446444272994995, "learning_rate": 0.00019881464536840874, "loss": 1.5976, "step": 472 }, { "epoch": 0.006146419272208082, "grad_norm": 0.40736618638038635, "learning_rate": 0.00019881204590649736, "loss": 1.6116, "step": 473 }, { "epoch": 0.006159413816123955, "grad_norm": 0.3143475651741028, "learning_rate": 0.00019880944644458598, "loss": 1.5222, "step": 474 }, { "epoch": 0.006172408360039829, "grad_norm": 0.4445915222167969, "learning_rate": 0.0001988068469826746, "loss": 1.5871, "step": 475 }, { "epoch": 0.0061854029039557015, "grad_norm": 0.29055100679397583, "learning_rate": 0.0001988042475207632, "loss": 1.6007, "step": 476 }, { "epoch": 0.006198397447871575, "grad_norm": 0.3698204457759857, "learning_rate": 0.00019880164805885183, "loss": 1.5176, "step": 477 }, { "epoch": 0.006211391991787448, "grad_norm": 0.31237465143203735, "learning_rate": 0.00019879904859694046, "loss": 1.4398, "step": 478 }, { "epoch": 0.006224386535703321, "grad_norm": 0.3039858043193817, "learning_rate": 0.00019879644913502905, "loss": 1.3464, "step": 479 }, { "epoch": 0.006237381079619195, "grad_norm": 0.40471094846725464, "learning_rate": 0.00019879384967311768, "loss": 1.7087, "step": 480 }, { "epoch": 0.006250375623535068, "grad_norm": 0.360324889421463, "learning_rate": 0.00019879125021120627, "loss": 1.523, "step": 481 }, { "epoch": 0.006263370167450942, "grad_norm": 0.3731452226638794, "learning_rate": 0.00019878865074929493, "loss": 1.5483, "step": 482 }, { "epoch": 0.006276364711366815, "grad_norm": 0.1972702145576477, "learning_rate": 0.00019878605128738352, "loss": 1.2281, "step": 483 }, { "epoch": 0.006289359255282688, "grad_norm": 0.36422380805015564, "learning_rate": 0.00019878345182547212, "loss": 1.4624, "step": 484 }, { "epoch": 0.006302353799198562, "grad_norm": 0.366229772567749, "learning_rate": 0.00019878085236356075, "loss": 1.4592, "step": 485 }, { "epoch": 0.006315348343114435, "grad_norm": 0.295625239610672, "learning_rate": 0.00019877825290164937, "loss": 1.4777, "step": 486 }, { "epoch": 0.006328342887030308, "grad_norm": 0.29468804597854614, "learning_rate": 0.000198775653439738, "loss": 1.4056, "step": 487 }, { "epoch": 0.006341337430946181, "grad_norm": 0.4027608036994934, "learning_rate": 0.0001987730539778266, "loss": 1.5459, "step": 488 }, { "epoch": 0.006354331974862055, "grad_norm": 0.39129120111465454, "learning_rate": 0.00019877045451591522, "loss": 1.4901, "step": 489 }, { "epoch": 0.006367326518777928, "grad_norm": 0.2942184507846832, "learning_rate": 0.00019876785505400384, "loss": 1.5704, "step": 490 }, { "epoch": 0.006380321062693801, "grad_norm": 0.3314521014690399, "learning_rate": 0.00019876525559209244, "loss": 1.4116, "step": 491 }, { "epoch": 0.006393315606609675, "grad_norm": 0.3074198365211487, "learning_rate": 0.00019876265613018106, "loss": 1.1821, "step": 492 }, { "epoch": 0.006406310150525548, "grad_norm": 0.2810860574245453, "learning_rate": 0.00019876005666826966, "loss": 1.472, "step": 493 }, { "epoch": 0.006419304694441422, "grad_norm": 0.35208213329315186, "learning_rate": 0.0001987574572063583, "loss": 1.5873, "step": 494 }, { "epoch": 0.006432299238357295, "grad_norm": 0.3943978548049927, "learning_rate": 0.0001987548577444469, "loss": 1.6375, "step": 495 }, { "epoch": 0.006445293782273168, "grad_norm": 0.32735174894332886, "learning_rate": 0.0001987522582825355, "loss": 1.5516, "step": 496 }, { "epoch": 0.0064582883261890415, "grad_norm": 0.361736923456192, "learning_rate": 0.00019874965882062413, "loss": 1.5815, "step": 497 }, { "epoch": 0.006471282870104914, "grad_norm": 0.3275897800922394, "learning_rate": 0.00019874705935871276, "loss": 1.5671, "step": 498 }, { "epoch": 0.006484277414020788, "grad_norm": 0.27669093012809753, "learning_rate": 0.00019874445989680138, "loss": 1.5036, "step": 499 }, { "epoch": 0.006497271957936661, "grad_norm": 0.39098384976387024, "learning_rate": 0.00019874186043488998, "loss": 1.661, "step": 500 }, { "epoch": 0.006510266501852535, "grad_norm": 0.32691437005996704, "learning_rate": 0.0001987392609729786, "loss": 1.3988, "step": 501 }, { "epoch": 0.006523261045768408, "grad_norm": 0.37341952323913574, "learning_rate": 0.00019873666151106723, "loss": 1.4535, "step": 502 }, { "epoch": 0.006536255589684281, "grad_norm": 0.3480914235115051, "learning_rate": 0.00019873406204915582, "loss": 1.419, "step": 503 }, { "epoch": 0.006549250133600155, "grad_norm": 0.41433480381965637, "learning_rate": 0.00019873146258724445, "loss": 1.6009, "step": 504 }, { "epoch": 0.006562244677516028, "grad_norm": 0.31330931186676025, "learning_rate": 0.00019872886312533307, "loss": 1.2479, "step": 505 }, { "epoch": 0.0065752392214319015, "grad_norm": 0.32603132724761963, "learning_rate": 0.0001987262636634217, "loss": 1.3484, "step": 506 }, { "epoch": 0.0065882337653477745, "grad_norm": 0.33049696683883667, "learning_rate": 0.0001987236642015103, "loss": 1.5938, "step": 507 }, { "epoch": 0.006601228309263648, "grad_norm": 0.31383728981018066, "learning_rate": 0.00019872106473959892, "loss": 1.4817, "step": 508 }, { "epoch": 0.006614222853179521, "grad_norm": 0.35135915875434875, "learning_rate": 0.00019871846527768754, "loss": 1.34, "step": 509 }, { "epoch": 0.006627217397095394, "grad_norm": 0.32868465781211853, "learning_rate": 0.00019871586581577614, "loss": 1.5162, "step": 510 }, { "epoch": 0.006640211941011268, "grad_norm": 0.3015967011451721, "learning_rate": 0.00019871326635386477, "loss": 1.3572, "step": 511 }, { "epoch": 0.006653206484927141, "grad_norm": 0.4350413978099823, "learning_rate": 0.00019871066689195336, "loss": 1.5271, "step": 512 }, { "epoch": 0.006666201028843015, "grad_norm": 0.42811042070388794, "learning_rate": 0.000198708067430042, "loss": 1.4912, "step": 513 }, { "epoch": 0.006679195572758888, "grad_norm": 0.389029324054718, "learning_rate": 0.0001987054679681306, "loss": 1.5096, "step": 514 }, { "epoch": 0.006692190116674761, "grad_norm": 0.26462414860725403, "learning_rate": 0.0001987028685062192, "loss": 1.1706, "step": 515 }, { "epoch": 0.006705184660590635, "grad_norm": 0.3653124272823334, "learning_rate": 0.00019870026904430783, "loss": 1.5413, "step": 516 }, { "epoch": 0.0067181792045065076, "grad_norm": 0.35684728622436523, "learning_rate": 0.00019869766958239646, "loss": 1.5449, "step": 517 }, { "epoch": 0.006731173748422381, "grad_norm": 0.37269213795661926, "learning_rate": 0.00019869507012048508, "loss": 1.6315, "step": 518 }, { "epoch": 0.006744168292338254, "grad_norm": 0.4106650948524475, "learning_rate": 0.00019869247065857368, "loss": 1.4719, "step": 519 }, { "epoch": 0.006757162836254128, "grad_norm": 0.3294067680835724, "learning_rate": 0.0001986898711966623, "loss": 1.5744, "step": 520 }, { "epoch": 0.006770157380170001, "grad_norm": 0.3181130290031433, "learning_rate": 0.00019868727173475093, "loss": 1.5229, "step": 521 }, { "epoch": 0.006783151924085874, "grad_norm": 0.24196115136146545, "learning_rate": 0.00019868467227283953, "loss": 1.5383, "step": 522 }, { "epoch": 0.006796146468001748, "grad_norm": 0.28848740458488464, "learning_rate": 0.00019868207281092815, "loss": 1.517, "step": 523 }, { "epoch": 0.006809141011917621, "grad_norm": 0.37162500619888306, "learning_rate": 0.00019867947334901675, "loss": 1.4766, "step": 524 }, { "epoch": 0.006822135555833495, "grad_norm": 0.4149519205093384, "learning_rate": 0.00019867687388710537, "loss": 1.4985, "step": 525 }, { "epoch": 0.006835130099749368, "grad_norm": 0.4525688886642456, "learning_rate": 0.000198674274425194, "loss": 1.6895, "step": 526 }, { "epoch": 0.0068481246436652415, "grad_norm": 0.3126329481601715, "learning_rate": 0.0001986716749632826, "loss": 1.3427, "step": 527 }, { "epoch": 0.006861119187581114, "grad_norm": 0.49626997113227844, "learning_rate": 0.00019866907550137122, "loss": 1.5601, "step": 528 }, { "epoch": 0.006874113731496987, "grad_norm": 0.33315759897232056, "learning_rate": 0.00019866647603945984, "loss": 1.3841, "step": 529 }, { "epoch": 0.006887108275412861, "grad_norm": 0.33371663093566895, "learning_rate": 0.00019866387657754847, "loss": 1.5505, "step": 530 }, { "epoch": 0.006900102819328734, "grad_norm": 0.3767539858818054, "learning_rate": 0.00019866127711563707, "loss": 1.5361, "step": 531 }, { "epoch": 0.006913097363244608, "grad_norm": 0.38601014018058777, "learning_rate": 0.0001986586776537257, "loss": 1.469, "step": 532 }, { "epoch": 0.006926091907160481, "grad_norm": 0.31352558732032776, "learning_rate": 0.00019865607819181431, "loss": 1.4213, "step": 533 }, { "epoch": 0.006939086451076354, "grad_norm": 0.41270455718040466, "learning_rate": 0.0001986534787299029, "loss": 1.5892, "step": 534 }, { "epoch": 0.006952080994992228, "grad_norm": 0.3458991050720215, "learning_rate": 0.00019865087926799154, "loss": 1.3762, "step": 535 }, { "epoch": 0.006965075538908101, "grad_norm": 0.35102227330207825, "learning_rate": 0.00019864827980608016, "loss": 1.4757, "step": 536 }, { "epoch": 0.0069780700828239745, "grad_norm": 0.3958595395088196, "learning_rate": 0.00019864568034416879, "loss": 1.5713, "step": 537 }, { "epoch": 0.0069910646267398475, "grad_norm": 0.43266427516937256, "learning_rate": 0.00019864308088225738, "loss": 1.5958, "step": 538 }, { "epoch": 0.007004059170655721, "grad_norm": 0.30778467655181885, "learning_rate": 0.00019864048142034598, "loss": 1.3696, "step": 539 }, { "epoch": 0.007017053714571594, "grad_norm": 0.34286659955978394, "learning_rate": 0.00019863788195843463, "loss": 1.5121, "step": 540 }, { "epoch": 0.007030048258487467, "grad_norm": 0.370387464761734, "learning_rate": 0.00019863528249652323, "loss": 1.6041, "step": 541 }, { "epoch": 0.007043042802403341, "grad_norm": 0.35693231225013733, "learning_rate": 0.00019863268303461185, "loss": 1.2903, "step": 542 }, { "epoch": 0.007056037346319214, "grad_norm": 0.39616748690605164, "learning_rate": 0.00019863008357270045, "loss": 1.4348, "step": 543 }, { "epoch": 0.007069031890235088, "grad_norm": 0.4330069422721863, "learning_rate": 0.00019862748411078908, "loss": 1.581, "step": 544 }, { "epoch": 0.007082026434150961, "grad_norm": 0.38319265842437744, "learning_rate": 0.0001986248846488777, "loss": 1.4199, "step": 545 }, { "epoch": 0.007095020978066835, "grad_norm": 0.3927115201950073, "learning_rate": 0.0001986222851869663, "loss": 1.717, "step": 546 }, { "epoch": 0.007108015521982708, "grad_norm": 0.3346995711326599, "learning_rate": 0.00019861968572505492, "loss": 1.5346, "step": 547 }, { "epoch": 0.0071210100658985805, "grad_norm": 0.3665822744369507, "learning_rate": 0.00019861708626314355, "loss": 1.4155, "step": 548 }, { "epoch": 0.007134004609814454, "grad_norm": 0.3435610234737396, "learning_rate": 0.00019861448680123217, "loss": 1.5495, "step": 549 }, { "epoch": 0.007146999153730327, "grad_norm": 0.382671058177948, "learning_rate": 0.00019861188733932077, "loss": 1.445, "step": 550 }, { "epoch": 0.007159993697646201, "grad_norm": 0.29679736495018005, "learning_rate": 0.00019860928787740937, "loss": 1.5509, "step": 551 }, { "epoch": 0.007172988241562074, "grad_norm": 0.41203227639198303, "learning_rate": 0.00019860668841549802, "loss": 1.5446, "step": 552 }, { "epoch": 0.007185982785477947, "grad_norm": 0.4276414215564728, "learning_rate": 0.00019860408895358661, "loss": 1.556, "step": 553 }, { "epoch": 0.007198977329393821, "grad_norm": 0.30258363485336304, "learning_rate": 0.00019860148949167524, "loss": 1.3663, "step": 554 }, { "epoch": 0.007211971873309694, "grad_norm": 0.4531673192977905, "learning_rate": 0.00019859889002976384, "loss": 1.5232, "step": 555 }, { "epoch": 0.007224966417225568, "grad_norm": 0.38264134526252747, "learning_rate": 0.00019859629056785246, "loss": 1.4465, "step": 556 }, { "epoch": 0.007237960961141441, "grad_norm": 0.3618118464946747, "learning_rate": 0.00019859369110594109, "loss": 1.4752, "step": 557 }, { "epoch": 0.0072509555050573144, "grad_norm": 0.28785741329193115, "learning_rate": 0.00019859109164402968, "loss": 1.2222, "step": 558 }, { "epoch": 0.007263950048973187, "grad_norm": 0.38898271322250366, "learning_rate": 0.0001985884921821183, "loss": 1.3929, "step": 559 }, { "epoch": 0.00727694459288906, "grad_norm": 0.32042554020881653, "learning_rate": 0.00019858589272020693, "loss": 1.5251, "step": 560 }, { "epoch": 0.007289939136804934, "grad_norm": 0.3364934027194977, "learning_rate": 0.00019858329325829556, "loss": 1.5811, "step": 561 }, { "epoch": 0.007302933680720807, "grad_norm": 0.36081287264823914, "learning_rate": 0.00019858069379638415, "loss": 1.6106, "step": 562 }, { "epoch": 0.007315928224636681, "grad_norm": 0.33980289101600647, "learning_rate": 0.00019857809433447275, "loss": 1.3746, "step": 563 }, { "epoch": 0.007328922768552554, "grad_norm": 0.3250245749950409, "learning_rate": 0.0001985754948725614, "loss": 1.3789, "step": 564 }, { "epoch": 0.007341917312468427, "grad_norm": 0.3220341205596924, "learning_rate": 0.00019857289541065, "loss": 1.7125, "step": 565 }, { "epoch": 0.007354911856384301, "grad_norm": 0.3877193033695221, "learning_rate": 0.00019857029594873862, "loss": 1.6931, "step": 566 }, { "epoch": 0.007367906400300174, "grad_norm": 0.3251323997974396, "learning_rate": 0.00019856769648682722, "loss": 1.396, "step": 567 }, { "epoch": 0.0073809009442160475, "grad_norm": 0.294418066740036, "learning_rate": 0.00019856509702491585, "loss": 1.3975, "step": 568 }, { "epoch": 0.0073938954881319205, "grad_norm": 0.3654199540615082, "learning_rate": 0.00019856249756300447, "loss": 1.5838, "step": 569 }, { "epoch": 0.007406890032047794, "grad_norm": 0.4372889995574951, "learning_rate": 0.00019855989810109307, "loss": 1.6906, "step": 570 }, { "epoch": 0.007419884575963667, "grad_norm": 0.34681013226509094, "learning_rate": 0.0001985572986391817, "loss": 1.4219, "step": 571 }, { "epoch": 0.00743287911987954, "grad_norm": 0.28042349219322205, "learning_rate": 0.00019855469917727032, "loss": 1.3512, "step": 572 }, { "epoch": 0.007445873663795414, "grad_norm": 0.37795329093933105, "learning_rate": 0.00019855209971535894, "loss": 1.4061, "step": 573 }, { "epoch": 0.007458868207711287, "grad_norm": 0.3672502934932709, "learning_rate": 0.00019854950025344754, "loss": 1.5503, "step": 574 }, { "epoch": 0.007471862751627161, "grad_norm": 0.34373030066490173, "learning_rate": 0.00019854690079153616, "loss": 1.6254, "step": 575 }, { "epoch": 0.007484857295543034, "grad_norm": 0.40038031339645386, "learning_rate": 0.0001985443013296248, "loss": 1.5282, "step": 576 }, { "epoch": 0.007497851839458908, "grad_norm": 0.3042319118976593, "learning_rate": 0.00019854170186771338, "loss": 1.4934, "step": 577 }, { "epoch": 0.0075108463833747805, "grad_norm": 0.34824636578559875, "learning_rate": 0.000198539102405802, "loss": 1.6298, "step": 578 }, { "epoch": 0.0075238409272906535, "grad_norm": 0.21115775406360626, "learning_rate": 0.00019853650294389063, "loss": 1.1295, "step": 579 }, { "epoch": 0.007536835471206527, "grad_norm": 0.4801003932952881, "learning_rate": 0.00019853390348197923, "loss": 1.4094, "step": 580 }, { "epoch": 0.0075498300151224, "grad_norm": 0.3953585922718048, "learning_rate": 0.00019853130402006786, "loss": 1.532, "step": 581 }, { "epoch": 0.007562824559038274, "grad_norm": 0.4331101179122925, "learning_rate": 0.00019852870455815645, "loss": 1.5266, "step": 582 }, { "epoch": 0.007575819102954147, "grad_norm": 0.3411102890968323, "learning_rate": 0.0001985261050962451, "loss": 1.5216, "step": 583 }, { "epoch": 0.00758881364687002, "grad_norm": 0.3673463761806488, "learning_rate": 0.0001985235056343337, "loss": 1.5071, "step": 584 }, { "epoch": 0.007601808190785894, "grad_norm": 0.34652939438819885, "learning_rate": 0.00019852090617242233, "loss": 1.2927, "step": 585 }, { "epoch": 0.007614802734701767, "grad_norm": 0.3582465946674347, "learning_rate": 0.00019851830671051092, "loss": 1.4479, "step": 586 }, { "epoch": 0.007627797278617641, "grad_norm": 0.33398643136024475, "learning_rate": 0.00019851570724859955, "loss": 1.6125, "step": 587 }, { "epoch": 0.007640791822533514, "grad_norm": 0.4001072645187378, "learning_rate": 0.00019851310778668817, "loss": 1.4119, "step": 588 }, { "epoch": 0.007653786366449387, "grad_norm": 0.3611069619655609, "learning_rate": 0.00019851050832477677, "loss": 1.5331, "step": 589 }, { "epoch": 0.00766678091036526, "grad_norm": 0.36661073565483093, "learning_rate": 0.0001985079088628654, "loss": 1.5552, "step": 590 }, { "epoch": 0.007679775454281133, "grad_norm": 0.40941113233566284, "learning_rate": 0.00019850530940095402, "loss": 1.4044, "step": 591 }, { "epoch": 0.007692769998197007, "grad_norm": 0.3474884629249573, "learning_rate": 0.00019850270993904264, "loss": 1.3388, "step": 592 }, { "epoch": 0.00770576454211288, "grad_norm": 0.3682302236557007, "learning_rate": 0.00019850011047713124, "loss": 1.4577, "step": 593 }, { "epoch": 0.007718759086028754, "grad_norm": 0.4242841303348541, "learning_rate": 0.00019849751101521984, "loss": 1.824, "step": 594 }, { "epoch": 0.007731753629944627, "grad_norm": 0.3588680624961853, "learning_rate": 0.0001984949115533085, "loss": 1.4645, "step": 595 }, { "epoch": 0.007744748173860501, "grad_norm": 0.41413599252700806, "learning_rate": 0.0001984923120913971, "loss": 1.4754, "step": 596 }, { "epoch": 0.007757742717776374, "grad_norm": 0.44178664684295654, "learning_rate": 0.0001984897126294857, "loss": 1.6119, "step": 597 }, { "epoch": 0.007770737261692247, "grad_norm": 0.34459012746810913, "learning_rate": 0.0001984871131675743, "loss": 1.4424, "step": 598 }, { "epoch": 0.0077837318056081205, "grad_norm": 0.3490884304046631, "learning_rate": 0.00019848451370566293, "loss": 1.531, "step": 599 }, { "epoch": 0.007796726349523993, "grad_norm": 0.3423737585544586, "learning_rate": 0.00019848191424375156, "loss": 1.4427, "step": 600 }, { "epoch": 0.007809720893439867, "grad_norm": 0.4252668619155884, "learning_rate": 0.00019847931478184016, "loss": 1.4669, "step": 601 }, { "epoch": 0.007822715437355741, "grad_norm": 0.28277668356895447, "learning_rate": 0.00019847671531992878, "loss": 1.4039, "step": 602 }, { "epoch": 0.007835709981271614, "grad_norm": 0.3732830584049225, "learning_rate": 0.0001984741158580174, "loss": 1.4148, "step": 603 }, { "epoch": 0.007848704525187487, "grad_norm": 0.3279058337211609, "learning_rate": 0.00019847151639610603, "loss": 1.3893, "step": 604 }, { "epoch": 0.00786169906910336, "grad_norm": 0.25931790471076965, "learning_rate": 0.00019846891693419463, "loss": 1.3712, "step": 605 }, { "epoch": 0.007874693613019233, "grad_norm": 0.34375739097595215, "learning_rate": 0.00019846631747228322, "loss": 1.4624, "step": 606 }, { "epoch": 0.007887688156935108, "grad_norm": 0.3998558819293976, "learning_rate": 0.00019846371801037188, "loss": 1.518, "step": 607 }, { "epoch": 0.00790068270085098, "grad_norm": 0.31153321266174316, "learning_rate": 0.00019846111854846047, "loss": 1.4971, "step": 608 }, { "epoch": 0.007913677244766854, "grad_norm": 0.4275885224342346, "learning_rate": 0.0001984585190865491, "loss": 1.6311, "step": 609 }, { "epoch": 0.007926671788682726, "grad_norm": 0.3340792655944824, "learning_rate": 0.00019845591962463772, "loss": 1.6886, "step": 610 }, { "epoch": 0.0079396663325986, "grad_norm": 0.3447030782699585, "learning_rate": 0.00019845332016272632, "loss": 1.4708, "step": 611 }, { "epoch": 0.007952660876514474, "grad_norm": 0.33856722712516785, "learning_rate": 0.00019845072070081494, "loss": 1.5812, "step": 612 }, { "epoch": 0.007965655420430347, "grad_norm": 0.4070112407207489, "learning_rate": 0.00019844812123890354, "loss": 1.522, "step": 613 }, { "epoch": 0.00797864996434622, "grad_norm": 0.24768106639385223, "learning_rate": 0.0001984455217769922, "loss": 1.1529, "step": 614 }, { "epoch": 0.007991644508262093, "grad_norm": 0.24420788884162903, "learning_rate": 0.0001984429223150808, "loss": 1.2745, "step": 615 }, { "epoch": 0.008004639052177966, "grad_norm": 0.407650887966156, "learning_rate": 0.00019844032285316941, "loss": 1.4861, "step": 616 }, { "epoch": 0.00801763359609384, "grad_norm": 0.31649330258369446, "learning_rate": 0.000198437723391258, "loss": 1.3632, "step": 617 }, { "epoch": 0.008030628140009714, "grad_norm": 0.41149628162384033, "learning_rate": 0.00019843512392934664, "loss": 1.7538, "step": 618 }, { "epoch": 0.008043622683925587, "grad_norm": 0.31875699758529663, "learning_rate": 0.00019843252446743526, "loss": 1.66, "step": 619 }, { "epoch": 0.00805661722784146, "grad_norm": 0.3146108090877533, "learning_rate": 0.00019842992500552386, "loss": 1.6156, "step": 620 }, { "epoch": 0.008069611771757334, "grad_norm": 0.3958038091659546, "learning_rate": 0.00019842732554361248, "loss": 1.4977, "step": 621 }, { "epoch": 0.008082606315673207, "grad_norm": 0.419617235660553, "learning_rate": 0.0001984247260817011, "loss": 1.5449, "step": 622 }, { "epoch": 0.00809560085958908, "grad_norm": 0.31068864464759827, "learning_rate": 0.0001984221266197897, "loss": 1.406, "step": 623 }, { "epoch": 0.008108595403504953, "grad_norm": 0.3374605178833008, "learning_rate": 0.00019841952715787833, "loss": 1.5347, "step": 624 }, { "epoch": 0.008121589947420826, "grad_norm": 0.4293529689311981, "learning_rate": 0.00019841692769596693, "loss": 1.5466, "step": 625 }, { "epoch": 0.0081345844913367, "grad_norm": 0.3483525514602661, "learning_rate": 0.00019841432823405558, "loss": 1.3158, "step": 626 }, { "epoch": 0.008147579035252574, "grad_norm": 0.3526119589805603, "learning_rate": 0.00019841172877214418, "loss": 1.5133, "step": 627 }, { "epoch": 0.008160573579168447, "grad_norm": 0.40216967463493347, "learning_rate": 0.0001984091293102328, "loss": 1.4854, "step": 628 }, { "epoch": 0.00817356812308432, "grad_norm": 0.3084598183631897, "learning_rate": 0.0001984065298483214, "loss": 1.2879, "step": 629 }, { "epoch": 0.008186562667000193, "grad_norm": 0.37966227531433105, "learning_rate": 0.00019840393038641002, "loss": 1.6095, "step": 630 }, { "epoch": 0.008199557210916067, "grad_norm": 0.43067601323127747, "learning_rate": 0.00019840133092449865, "loss": 1.356, "step": 631 }, { "epoch": 0.00821255175483194, "grad_norm": 0.4464859962463379, "learning_rate": 0.00019839873146258724, "loss": 1.5336, "step": 632 }, { "epoch": 0.008225546298747813, "grad_norm": 0.35634273290634155, "learning_rate": 0.00019839613200067587, "loss": 1.4559, "step": 633 }, { "epoch": 0.008238540842663686, "grad_norm": 0.33753782510757446, "learning_rate": 0.0001983935325387645, "loss": 1.4193, "step": 634 }, { "epoch": 0.008251535386579559, "grad_norm": 0.3455103933811188, "learning_rate": 0.0001983909330768531, "loss": 1.4038, "step": 635 }, { "epoch": 0.008264529930495434, "grad_norm": 0.35408419370651245, "learning_rate": 0.00019838833361494171, "loss": 1.4354, "step": 636 }, { "epoch": 0.008277524474411307, "grad_norm": 0.421975314617157, "learning_rate": 0.0001983857341530303, "loss": 1.1963, "step": 637 }, { "epoch": 0.00829051901832718, "grad_norm": 0.42913416028022766, "learning_rate": 0.00019838313469111896, "loss": 1.3913, "step": 638 }, { "epoch": 0.008303513562243053, "grad_norm": 0.3791632354259491, "learning_rate": 0.00019838053522920756, "loss": 1.4676, "step": 639 }, { "epoch": 0.008316508106158927, "grad_norm": 0.31101372838020325, "learning_rate": 0.00019837793576729619, "loss": 1.5258, "step": 640 }, { "epoch": 0.0083295026500748, "grad_norm": 0.40362784266471863, "learning_rate": 0.00019837533630538478, "loss": 1.7226, "step": 641 }, { "epoch": 0.008342497193990673, "grad_norm": 0.2881763279438019, "learning_rate": 0.0001983727368434734, "loss": 1.3932, "step": 642 }, { "epoch": 0.008355491737906546, "grad_norm": 0.39208993315696716, "learning_rate": 0.00019837013738156203, "loss": 1.4546, "step": 643 }, { "epoch": 0.00836848628182242, "grad_norm": 0.38456860184669495, "learning_rate": 0.00019836753791965063, "loss": 1.5148, "step": 644 }, { "epoch": 0.008381480825738294, "grad_norm": 0.23708252608776093, "learning_rate": 0.00019836493845773925, "loss": 1.0973, "step": 645 }, { "epoch": 0.008394475369654167, "grad_norm": 0.4995286762714386, "learning_rate": 0.00019836233899582788, "loss": 1.4641, "step": 646 }, { "epoch": 0.00840746991357004, "grad_norm": 0.3843645751476288, "learning_rate": 0.00019835973953391648, "loss": 1.4017, "step": 647 }, { "epoch": 0.008420464457485913, "grad_norm": 0.3064028024673462, "learning_rate": 0.0001983571400720051, "loss": 1.5515, "step": 648 }, { "epoch": 0.008433459001401786, "grad_norm": 0.38051947951316833, "learning_rate": 0.00019835454061009372, "loss": 1.5171, "step": 649 }, { "epoch": 0.00844645354531766, "grad_norm": 0.36851462721824646, "learning_rate": 0.00019835194114818235, "loss": 1.6806, "step": 650 }, { "epoch": 0.008459448089233533, "grad_norm": 0.4453751742839813, "learning_rate": 0.00019834934168627095, "loss": 1.5515, "step": 651 }, { "epoch": 0.008472442633149406, "grad_norm": 0.415773868560791, "learning_rate": 0.00019834674222435957, "loss": 1.6913, "step": 652 }, { "epoch": 0.00848543717706528, "grad_norm": 0.4147622585296631, "learning_rate": 0.0001983441427624482, "loss": 1.5182, "step": 653 }, { "epoch": 0.008498431720981152, "grad_norm": 0.3704775869846344, "learning_rate": 0.0001983415433005368, "loss": 1.6625, "step": 654 }, { "epoch": 0.008511426264897027, "grad_norm": 0.33425942063331604, "learning_rate": 0.00019833894383862542, "loss": 1.401, "step": 655 }, { "epoch": 0.0085244208088129, "grad_norm": 0.4238104820251465, "learning_rate": 0.00019833634437671401, "loss": 1.4789, "step": 656 }, { "epoch": 0.008537415352728773, "grad_norm": 0.37472251057624817, "learning_rate": 0.00019833374491480267, "loss": 1.5653, "step": 657 }, { "epoch": 0.008550409896644646, "grad_norm": 0.33363428711891174, "learning_rate": 0.00019833114545289126, "loss": 1.3726, "step": 658 }, { "epoch": 0.00856340444056052, "grad_norm": 0.3292219042778015, "learning_rate": 0.0001983285459909799, "loss": 1.5948, "step": 659 }, { "epoch": 0.008576398984476393, "grad_norm": 0.41897693276405334, "learning_rate": 0.00019832594652906849, "loss": 1.534, "step": 660 }, { "epoch": 0.008589393528392266, "grad_norm": 0.38180336356163025, "learning_rate": 0.0001983233470671571, "loss": 1.4522, "step": 661 }, { "epoch": 0.00860238807230814, "grad_norm": 0.32229453325271606, "learning_rate": 0.00019832074760524573, "loss": 1.5321, "step": 662 }, { "epoch": 0.008615382616224012, "grad_norm": 0.31744512915611267, "learning_rate": 0.00019831814814333433, "loss": 1.2042, "step": 663 }, { "epoch": 0.008628377160139887, "grad_norm": 0.4684705138206482, "learning_rate": 0.00019831554868142296, "loss": 1.8078, "step": 664 }, { "epoch": 0.00864137170405576, "grad_norm": 0.34191256761550903, "learning_rate": 0.00019831294921951158, "loss": 1.445, "step": 665 }, { "epoch": 0.008654366247971633, "grad_norm": 0.33159396052360535, "learning_rate": 0.00019831034975760018, "loss": 1.4312, "step": 666 }, { "epoch": 0.008667360791887506, "grad_norm": 0.3650842308998108, "learning_rate": 0.0001983077502956888, "loss": 1.6764, "step": 667 }, { "epoch": 0.008680355335803379, "grad_norm": 0.40540340542793274, "learning_rate": 0.0001983051508337774, "loss": 1.4032, "step": 668 }, { "epoch": 0.008693349879719254, "grad_norm": 0.35292646288871765, "learning_rate": 0.00019830255137186605, "loss": 1.3865, "step": 669 }, { "epoch": 0.008706344423635127, "grad_norm": 0.42002010345458984, "learning_rate": 0.00019829995190995465, "loss": 1.4331, "step": 670 }, { "epoch": 0.008719338967551, "grad_norm": 0.3328249156475067, "learning_rate": 0.00019829735244804327, "loss": 1.565, "step": 671 }, { "epoch": 0.008732333511466872, "grad_norm": 0.3382279872894287, "learning_rate": 0.00019829475298613187, "loss": 1.3253, "step": 672 }, { "epoch": 0.008745328055382745, "grad_norm": 0.402645081281662, "learning_rate": 0.0001982921535242205, "loss": 1.4082, "step": 673 }, { "epoch": 0.00875832259929862, "grad_norm": 0.3478292226791382, "learning_rate": 0.00019828955406230912, "loss": 1.6943, "step": 674 }, { "epoch": 0.008771317143214493, "grad_norm": 0.31939834356307983, "learning_rate": 0.00019828695460039772, "loss": 1.5331, "step": 675 }, { "epoch": 0.008784311687130366, "grad_norm": 0.3915035128593445, "learning_rate": 0.00019828435513848634, "loss": 1.5543, "step": 676 }, { "epoch": 0.008797306231046239, "grad_norm": 0.39418208599090576, "learning_rate": 0.00019828175567657497, "loss": 1.4944, "step": 677 }, { "epoch": 0.008810300774962112, "grad_norm": 0.337128221988678, "learning_rate": 0.00019827915621466356, "loss": 1.3955, "step": 678 }, { "epoch": 0.008823295318877987, "grad_norm": 0.2792261242866516, "learning_rate": 0.0001982765567527522, "loss": 1.3216, "step": 679 }, { "epoch": 0.00883628986279386, "grad_norm": 0.310337632894516, "learning_rate": 0.00019827395729084079, "loss": 1.7405, "step": 680 }, { "epoch": 0.008849284406709733, "grad_norm": 0.3071850538253784, "learning_rate": 0.00019827135782892944, "loss": 1.4601, "step": 681 }, { "epoch": 0.008862278950625605, "grad_norm": 0.35270869731903076, "learning_rate": 0.00019826875836701803, "loss": 1.3447, "step": 682 }, { "epoch": 0.00887527349454148, "grad_norm": 0.43033266067504883, "learning_rate": 0.00019826615890510666, "loss": 1.5967, "step": 683 }, { "epoch": 0.008888268038457353, "grad_norm": 0.35486581921577454, "learning_rate": 0.00019826355944319526, "loss": 1.4615, "step": 684 }, { "epoch": 0.008901262582373226, "grad_norm": 0.36915886402130127, "learning_rate": 0.00019826095998128388, "loss": 1.5589, "step": 685 }, { "epoch": 0.008914257126289099, "grad_norm": 0.2822892367839813, "learning_rate": 0.0001982583605193725, "loss": 1.383, "step": 686 }, { "epoch": 0.008927251670204972, "grad_norm": 0.4104083180427551, "learning_rate": 0.0001982557610574611, "loss": 1.5449, "step": 687 }, { "epoch": 0.008940246214120847, "grad_norm": 0.3812151253223419, "learning_rate": 0.00019825316159554975, "loss": 1.5726, "step": 688 }, { "epoch": 0.00895324075803672, "grad_norm": 0.33798748254776, "learning_rate": 0.00019825056213363835, "loss": 1.5412, "step": 689 }, { "epoch": 0.008966235301952593, "grad_norm": 0.38268429040908813, "learning_rate": 0.00019824796267172695, "loss": 1.5783, "step": 690 }, { "epoch": 0.008979229845868466, "grad_norm": 0.33230215311050415, "learning_rate": 0.00019824536320981557, "loss": 1.4306, "step": 691 }, { "epoch": 0.008992224389784339, "grad_norm": 0.33637288212776184, "learning_rate": 0.0001982427637479042, "loss": 1.3414, "step": 692 }, { "epoch": 0.009005218933700213, "grad_norm": 0.444703072309494, "learning_rate": 0.00019824016428599282, "loss": 1.4449, "step": 693 }, { "epoch": 0.009018213477616086, "grad_norm": 0.30531391501426697, "learning_rate": 0.00019823756482408142, "loss": 1.4927, "step": 694 }, { "epoch": 0.00903120802153196, "grad_norm": 0.3695407211780548, "learning_rate": 0.00019823496536217004, "loss": 1.5052, "step": 695 }, { "epoch": 0.009044202565447832, "grad_norm": 0.3395458459854126, "learning_rate": 0.00019823236590025867, "loss": 1.3262, "step": 696 }, { "epoch": 0.009057197109363705, "grad_norm": 0.33390477299690247, "learning_rate": 0.00019822976643834727, "loss": 1.5135, "step": 697 }, { "epoch": 0.00907019165327958, "grad_norm": 0.33290162682533264, "learning_rate": 0.0001982271669764359, "loss": 1.2037, "step": 698 }, { "epoch": 0.009083186197195453, "grad_norm": 0.36634618043899536, "learning_rate": 0.0001982245675145245, "loss": 1.5616, "step": 699 }, { "epoch": 0.009096180741111326, "grad_norm": 0.3186809718608856, "learning_rate": 0.00019822196805261314, "loss": 1.4216, "step": 700 }, { "epoch": 0.009109175285027199, "grad_norm": 0.3685540556907654, "learning_rate": 0.00019821936859070174, "loss": 1.5776, "step": 701 }, { "epoch": 0.009122169828943073, "grad_norm": 0.35809457302093506, "learning_rate": 0.00019821676912879033, "loss": 1.5361, "step": 702 }, { "epoch": 0.009135164372858946, "grad_norm": 0.36902254819869995, "learning_rate": 0.00019821416966687896, "loss": 1.563, "step": 703 }, { "epoch": 0.00914815891677482, "grad_norm": 0.2929859757423401, "learning_rate": 0.00019821157020496758, "loss": 1.4686, "step": 704 }, { "epoch": 0.009161153460690692, "grad_norm": 0.34742724895477295, "learning_rate": 0.0001982089707430562, "loss": 1.6509, "step": 705 }, { "epoch": 0.009174148004606565, "grad_norm": 0.3738511800765991, "learning_rate": 0.0001982063712811448, "loss": 1.4771, "step": 706 }, { "epoch": 0.00918714254852244, "grad_norm": 0.29607006907463074, "learning_rate": 0.00019820377181923343, "loss": 1.324, "step": 707 }, { "epoch": 0.009200137092438313, "grad_norm": 0.3647070825099945, "learning_rate": 0.00019820117235732205, "loss": 1.549, "step": 708 }, { "epoch": 0.009213131636354186, "grad_norm": 0.22530755400657654, "learning_rate": 0.00019819857289541065, "loss": 1.3984, "step": 709 }, { "epoch": 0.009226126180270059, "grad_norm": 0.37032994627952576, "learning_rate": 0.00019819597343349928, "loss": 1.2236, "step": 710 }, { "epoch": 0.009239120724185932, "grad_norm": 0.32561302185058594, "learning_rate": 0.00019819337397158787, "loss": 1.4317, "step": 711 }, { "epoch": 0.009252115268101806, "grad_norm": 0.34994667768478394, "learning_rate": 0.00019819077450967652, "loss": 1.3987, "step": 712 }, { "epoch": 0.00926510981201768, "grad_norm": 0.367980033159256, "learning_rate": 0.00019818817504776512, "loss": 1.5015, "step": 713 }, { "epoch": 0.009278104355933552, "grad_norm": 0.4678630530834198, "learning_rate": 0.00019818557558585375, "loss": 1.5274, "step": 714 }, { "epoch": 0.009291098899849425, "grad_norm": 0.36450856924057007, "learning_rate": 0.00019818297612394234, "loss": 1.6098, "step": 715 }, { "epoch": 0.009304093443765298, "grad_norm": 0.36564064025878906, "learning_rate": 0.00019818037666203097, "loss": 1.3938, "step": 716 }, { "epoch": 0.009317087987681173, "grad_norm": 0.3113052248954773, "learning_rate": 0.0001981777772001196, "loss": 1.5942, "step": 717 }, { "epoch": 0.009330082531597046, "grad_norm": 0.28900638222694397, "learning_rate": 0.0001981751777382082, "loss": 1.487, "step": 718 }, { "epoch": 0.009343077075512919, "grad_norm": 0.3917618691921234, "learning_rate": 0.00019817257827629681, "loss": 1.5005, "step": 719 }, { "epoch": 0.009356071619428792, "grad_norm": 0.2771584093570709, "learning_rate": 0.00019816997881438544, "loss": 1.3067, "step": 720 }, { "epoch": 0.009369066163344666, "grad_norm": 0.45164725184440613, "learning_rate": 0.00019816737935247404, "loss": 1.5159, "step": 721 }, { "epoch": 0.00938206070726054, "grad_norm": 0.3692169785499573, "learning_rate": 0.00019816477989056266, "loss": 1.2412, "step": 722 }, { "epoch": 0.009395055251176412, "grad_norm": 0.49505218863487244, "learning_rate": 0.00019816218042865129, "loss": 1.7205, "step": 723 }, { "epoch": 0.009408049795092285, "grad_norm": 0.34234389662742615, "learning_rate": 0.0001981595809667399, "loss": 1.515, "step": 724 }, { "epoch": 0.009421044339008158, "grad_norm": 0.38328883051872253, "learning_rate": 0.0001981569815048285, "loss": 1.5291, "step": 725 }, { "epoch": 0.009434038882924033, "grad_norm": 0.29110851883888245, "learning_rate": 0.00019815438204291713, "loss": 1.4392, "step": 726 }, { "epoch": 0.009447033426839906, "grad_norm": 0.3245619833469391, "learning_rate": 0.00019815178258100576, "loss": 1.48, "step": 727 }, { "epoch": 0.009460027970755779, "grad_norm": 0.3311016261577606, "learning_rate": 0.00019814918311909435, "loss": 1.4422, "step": 728 }, { "epoch": 0.009473022514671652, "grad_norm": 0.3260456919670105, "learning_rate": 0.00019814658365718298, "loss": 1.3742, "step": 729 }, { "epoch": 0.009486017058587525, "grad_norm": 0.3841300904750824, "learning_rate": 0.00019814398419527158, "loss": 1.5685, "step": 730 }, { "epoch": 0.0094990116025034, "grad_norm": 0.36956533789634705, "learning_rate": 0.0001981413847333602, "loss": 1.4435, "step": 731 }, { "epoch": 0.009512006146419272, "grad_norm": 0.416090726852417, "learning_rate": 0.00019813878527144882, "loss": 1.5898, "step": 732 }, { "epoch": 0.009525000690335145, "grad_norm": 0.3416915535926819, "learning_rate": 0.00019813618580953742, "loss": 1.3792, "step": 733 }, { "epoch": 0.009537995234251018, "grad_norm": 0.35502490401268005, "learning_rate": 0.00019813358634762605, "loss": 1.5907, "step": 734 }, { "epoch": 0.009550989778166891, "grad_norm": 0.39543843269348145, "learning_rate": 0.00019813098688571467, "loss": 1.5287, "step": 735 }, { "epoch": 0.009563984322082766, "grad_norm": 0.3150179982185364, "learning_rate": 0.0001981283874238033, "loss": 1.4447, "step": 736 }, { "epoch": 0.009576978865998639, "grad_norm": 0.37571099400520325, "learning_rate": 0.0001981257879618919, "loss": 1.3266, "step": 737 }, { "epoch": 0.009589973409914512, "grad_norm": 0.36124467849731445, "learning_rate": 0.00019812318849998052, "loss": 1.4497, "step": 738 }, { "epoch": 0.009602967953830385, "grad_norm": 0.3628777265548706, "learning_rate": 0.00019812058903806914, "loss": 1.4794, "step": 739 }, { "epoch": 0.00961596249774626, "grad_norm": 0.3742762506008148, "learning_rate": 0.00019811798957615774, "loss": 1.7162, "step": 740 }, { "epoch": 0.009628957041662133, "grad_norm": 0.5336025953292847, "learning_rate": 0.00019811539011424636, "loss": 1.5559, "step": 741 }, { "epoch": 0.009641951585578006, "grad_norm": 0.4883899688720703, "learning_rate": 0.00019811279065233496, "loss": 1.5139, "step": 742 }, { "epoch": 0.009654946129493878, "grad_norm": 0.45548561215400696, "learning_rate": 0.0001981101911904236, "loss": 1.4212, "step": 743 }, { "epoch": 0.009667940673409751, "grad_norm": 0.31765982508659363, "learning_rate": 0.0001981075917285122, "loss": 1.4476, "step": 744 }, { "epoch": 0.009680935217325626, "grad_norm": 0.3143158555030823, "learning_rate": 0.0001981049922666008, "loss": 1.7234, "step": 745 }, { "epoch": 0.009693929761241499, "grad_norm": 0.3834339678287506, "learning_rate": 0.00019810239280468943, "loss": 1.7519, "step": 746 }, { "epoch": 0.009706924305157372, "grad_norm": 0.33390897512435913, "learning_rate": 0.00019809979334277806, "loss": 1.5717, "step": 747 }, { "epoch": 0.009719918849073245, "grad_norm": 0.40962928533554077, "learning_rate": 0.00019809719388086668, "loss": 1.6574, "step": 748 }, { "epoch": 0.009732913392989118, "grad_norm": 0.39467093348503113, "learning_rate": 0.00019809459441895528, "loss": 1.6905, "step": 749 }, { "epoch": 0.009745907936904993, "grad_norm": 0.3943708539009094, "learning_rate": 0.0001980919949570439, "loss": 1.5263, "step": 750 }, { "epoch": 0.009758902480820866, "grad_norm": 0.2936602830886841, "learning_rate": 0.00019808939549513253, "loss": 1.3639, "step": 751 }, { "epoch": 0.009771897024736739, "grad_norm": 0.36436378955841064, "learning_rate": 0.00019808679603322112, "loss": 1.522, "step": 752 }, { "epoch": 0.009784891568652612, "grad_norm": 0.39485645294189453, "learning_rate": 0.00019808419657130975, "loss": 1.4362, "step": 753 }, { "epoch": 0.009797886112568484, "grad_norm": 0.3032613694667816, "learning_rate": 0.00019808159710939835, "loss": 1.4838, "step": 754 }, { "epoch": 0.00981088065648436, "grad_norm": 0.3500721752643585, "learning_rate": 0.000198078997647487, "loss": 1.4274, "step": 755 }, { "epoch": 0.009823875200400232, "grad_norm": 0.3658673167228699, "learning_rate": 0.0001980763981855756, "loss": 1.422, "step": 756 }, { "epoch": 0.009836869744316105, "grad_norm": 0.3680466413497925, "learning_rate": 0.0001980737987236642, "loss": 1.6103, "step": 757 }, { "epoch": 0.009849864288231978, "grad_norm": 0.4412722885608673, "learning_rate": 0.00019807119926175282, "loss": 1.4037, "step": 758 }, { "epoch": 0.009862858832147853, "grad_norm": 0.3488806188106537, "learning_rate": 0.00019806859979984144, "loss": 1.6904, "step": 759 }, { "epoch": 0.009875853376063726, "grad_norm": 0.38538268208503723, "learning_rate": 0.00019806600033793007, "loss": 1.6448, "step": 760 }, { "epoch": 0.009888847919979599, "grad_norm": 0.3275357484817505, "learning_rate": 0.00019806340087601866, "loss": 1.3492, "step": 761 }, { "epoch": 0.009901842463895472, "grad_norm": 0.45382624864578247, "learning_rate": 0.0001980608014141073, "loss": 1.2303, "step": 762 }, { "epoch": 0.009914837007811345, "grad_norm": 0.32894131541252136, "learning_rate": 0.0001980582019521959, "loss": 1.6423, "step": 763 }, { "epoch": 0.00992783155172722, "grad_norm": 0.3140595853328705, "learning_rate": 0.0001980556024902845, "loss": 1.2432, "step": 764 }, { "epoch": 0.009940826095643092, "grad_norm": 0.2719400227069855, "learning_rate": 0.00019805300302837313, "loss": 1.6344, "step": 765 }, { "epoch": 0.009953820639558965, "grad_norm": 0.29642561078071594, "learning_rate": 0.00019805040356646176, "loss": 1.4362, "step": 766 }, { "epoch": 0.009966815183474838, "grad_norm": 0.38486072421073914, "learning_rate": 0.00019804780410455038, "loss": 1.6494, "step": 767 }, { "epoch": 0.009979809727390711, "grad_norm": 0.3146076798439026, "learning_rate": 0.00019804520464263898, "loss": 1.5003, "step": 768 }, { "epoch": 0.009992804271306586, "grad_norm": 0.2924637496471405, "learning_rate": 0.00019804260518072758, "loss": 1.4728, "step": 769 }, { "epoch": 0.010005798815222459, "grad_norm": 0.2947547435760498, "learning_rate": 0.00019804000571881623, "loss": 1.4447, "step": 770 }, { "epoch": 0.010018793359138332, "grad_norm": 0.3057027757167816, "learning_rate": 0.00019803740625690483, "loss": 1.2061, "step": 771 }, { "epoch": 0.010031787903054205, "grad_norm": 0.3925977647304535, "learning_rate": 0.00019803480679499345, "loss": 1.5638, "step": 772 }, { "epoch": 0.010044782446970078, "grad_norm": 0.3047654330730438, "learning_rate": 0.00019803220733308205, "loss": 1.6397, "step": 773 }, { "epoch": 0.010057776990885952, "grad_norm": 0.2800816297531128, "learning_rate": 0.00019802960787117067, "loss": 1.4873, "step": 774 }, { "epoch": 0.010070771534801825, "grad_norm": 0.6156725287437439, "learning_rate": 0.0001980270084092593, "loss": 1.6791, "step": 775 }, { "epoch": 0.010083766078717698, "grad_norm": 0.3589865565299988, "learning_rate": 0.0001980244089473479, "loss": 1.512, "step": 776 }, { "epoch": 0.010096760622633571, "grad_norm": 0.2741207778453827, "learning_rate": 0.00019802180948543652, "loss": 1.6018, "step": 777 }, { "epoch": 0.010109755166549446, "grad_norm": 0.31274279952049255, "learning_rate": 0.00019801921002352514, "loss": 1.3728, "step": 778 }, { "epoch": 0.010122749710465319, "grad_norm": 0.427493691444397, "learning_rate": 0.00019801661056161377, "loss": 1.4774, "step": 779 }, { "epoch": 0.010135744254381192, "grad_norm": 0.33083292841911316, "learning_rate": 0.00019801401109970237, "loss": 1.3489, "step": 780 }, { "epoch": 0.010148738798297065, "grad_norm": 0.33251968026161194, "learning_rate": 0.000198011411637791, "loss": 1.5482, "step": 781 }, { "epoch": 0.010161733342212938, "grad_norm": 0.2710573971271515, "learning_rate": 0.00019800881217587962, "loss": 1.1779, "step": 782 }, { "epoch": 0.010174727886128812, "grad_norm": 0.354383647441864, "learning_rate": 0.0001980062127139682, "loss": 1.4562, "step": 783 }, { "epoch": 0.010187722430044685, "grad_norm": 0.41652238368988037, "learning_rate": 0.00019800361325205684, "loss": 1.4855, "step": 784 }, { "epoch": 0.010200716973960558, "grad_norm": 0.3225327134132385, "learning_rate": 0.00019800101379014543, "loss": 1.6558, "step": 785 }, { "epoch": 0.010213711517876431, "grad_norm": 0.38271769881248474, "learning_rate": 0.00019799841432823406, "loss": 1.4385, "step": 786 }, { "epoch": 0.010226706061792304, "grad_norm": 0.3630881607532501, "learning_rate": 0.00019799581486632268, "loss": 1.2769, "step": 787 }, { "epoch": 0.010239700605708179, "grad_norm": 0.33851003646850586, "learning_rate": 0.00019799321540441128, "loss": 1.5789, "step": 788 }, { "epoch": 0.010252695149624052, "grad_norm": 0.4176260828971863, "learning_rate": 0.0001979906159424999, "loss": 1.4785, "step": 789 }, { "epoch": 0.010265689693539925, "grad_norm": 0.3471718430519104, "learning_rate": 0.00019798801648058853, "loss": 1.4152, "step": 790 }, { "epoch": 0.010278684237455798, "grad_norm": 0.3253538906574249, "learning_rate": 0.00019798541701867715, "loss": 1.4411, "step": 791 }, { "epoch": 0.01029167878137167, "grad_norm": 0.37727871537208557, "learning_rate": 0.00019798281755676575, "loss": 1.5329, "step": 792 }, { "epoch": 0.010304673325287545, "grad_norm": 0.38673219084739685, "learning_rate": 0.00019798021809485438, "loss": 1.4563, "step": 793 }, { "epoch": 0.010317667869203418, "grad_norm": 0.37884414196014404, "learning_rate": 0.000197977618632943, "loss": 1.4507, "step": 794 }, { "epoch": 0.010330662413119291, "grad_norm": 0.3687722980976105, "learning_rate": 0.0001979750191710316, "loss": 1.402, "step": 795 }, { "epoch": 0.010343656957035164, "grad_norm": 0.39976727962493896, "learning_rate": 0.00019797241970912022, "loss": 1.5013, "step": 796 }, { "epoch": 0.010356651500951039, "grad_norm": 0.31597524881362915, "learning_rate": 0.00019796982024720885, "loss": 1.5531, "step": 797 }, { "epoch": 0.010369646044866912, "grad_norm": 0.4013782739639282, "learning_rate": 0.00019796722078529747, "loss": 1.3796, "step": 798 }, { "epoch": 0.010382640588782785, "grad_norm": 0.3744184076786041, "learning_rate": 0.00019796462132338607, "loss": 1.3908, "step": 799 }, { "epoch": 0.010395635132698658, "grad_norm": 0.2818164527416229, "learning_rate": 0.00019796202186147467, "loss": 1.5691, "step": 800 }, { "epoch": 0.010408629676614531, "grad_norm": 0.31891414523124695, "learning_rate": 0.00019795942239956332, "loss": 1.3117, "step": 801 }, { "epoch": 0.010421624220530406, "grad_norm": 0.44532331824302673, "learning_rate": 0.00019795682293765192, "loss": 1.7584, "step": 802 }, { "epoch": 0.010434618764446279, "grad_norm": 0.2924066185951233, "learning_rate": 0.00019795422347574054, "loss": 1.3124, "step": 803 }, { "epoch": 0.010447613308362151, "grad_norm": 0.3202171325683594, "learning_rate": 0.00019795162401382914, "loss": 1.2046, "step": 804 }, { "epoch": 0.010460607852278024, "grad_norm": 0.37899860739707947, "learning_rate": 0.00019794902455191776, "loss": 1.5393, "step": 805 }, { "epoch": 0.010473602396193897, "grad_norm": 0.4298096299171448, "learning_rate": 0.00019794642509000639, "loss": 1.4334, "step": 806 }, { "epoch": 0.010486596940109772, "grad_norm": 0.2809349596500397, "learning_rate": 0.00019794382562809498, "loss": 1.4074, "step": 807 }, { "epoch": 0.010499591484025645, "grad_norm": 0.3504308760166168, "learning_rate": 0.0001979412261661836, "loss": 1.4385, "step": 808 }, { "epoch": 0.010512586027941518, "grad_norm": 0.4143553078174591, "learning_rate": 0.00019793862670427223, "loss": 1.5984, "step": 809 }, { "epoch": 0.010525580571857391, "grad_norm": 0.2987794876098633, "learning_rate": 0.00019793602724236086, "loss": 1.247, "step": 810 }, { "epoch": 0.010538575115773264, "grad_norm": 0.3241748511791229, "learning_rate": 0.00019793342778044945, "loss": 1.3828, "step": 811 }, { "epoch": 0.010551569659689139, "grad_norm": 0.4106390178203583, "learning_rate": 0.00019793082831853805, "loss": 1.6831, "step": 812 }, { "epoch": 0.010564564203605012, "grad_norm": 0.4032892882823944, "learning_rate": 0.0001979282288566267, "loss": 1.5073, "step": 813 }, { "epoch": 0.010577558747520885, "grad_norm": 0.378022164106369, "learning_rate": 0.0001979256293947153, "loss": 1.6767, "step": 814 }, { "epoch": 0.010590553291436757, "grad_norm": 0.3420693278312683, "learning_rate": 0.00019792302993280393, "loss": 1.5153, "step": 815 }, { "epoch": 0.010603547835352632, "grad_norm": 0.313639760017395, "learning_rate": 0.00019792043047089252, "loss": 1.339, "step": 816 }, { "epoch": 0.010616542379268505, "grad_norm": 0.39957714080810547, "learning_rate": 0.00019791783100898115, "loss": 1.4186, "step": 817 }, { "epoch": 0.010629536923184378, "grad_norm": 0.283723920583725, "learning_rate": 0.00019791523154706977, "loss": 1.2269, "step": 818 }, { "epoch": 0.010642531467100251, "grad_norm": 0.2821674048900604, "learning_rate": 0.00019791263208515837, "loss": 1.5801, "step": 819 }, { "epoch": 0.010655526011016124, "grad_norm": 0.4604765772819519, "learning_rate": 0.000197910032623247, "loss": 1.4114, "step": 820 }, { "epoch": 0.010668520554931999, "grad_norm": 0.41818079352378845, "learning_rate": 0.00019790743316133562, "loss": 1.4538, "step": 821 }, { "epoch": 0.010681515098847872, "grad_norm": 0.4463641941547394, "learning_rate": 0.00019790483369942424, "loss": 1.5206, "step": 822 }, { "epoch": 0.010694509642763745, "grad_norm": 0.2832520008087158, "learning_rate": 0.00019790223423751284, "loss": 1.3815, "step": 823 }, { "epoch": 0.010707504186679618, "grad_norm": 0.38248422741889954, "learning_rate": 0.00019789963477560144, "loss": 1.5137, "step": 824 }, { "epoch": 0.01072049873059549, "grad_norm": 0.3523542284965515, "learning_rate": 0.0001978970353136901, "loss": 1.4657, "step": 825 }, { "epoch": 0.010733493274511365, "grad_norm": 0.39274516701698303, "learning_rate": 0.00019789443585177869, "loss": 1.557, "step": 826 }, { "epoch": 0.010746487818427238, "grad_norm": 0.3270724415779114, "learning_rate": 0.0001978918363898673, "loss": 1.2439, "step": 827 }, { "epoch": 0.010759482362343111, "grad_norm": 0.4127451181411743, "learning_rate": 0.0001978892369279559, "loss": 1.631, "step": 828 }, { "epoch": 0.010772476906258984, "grad_norm": 0.5745660662651062, "learning_rate": 0.00019788663746604453, "loss": 1.4533, "step": 829 }, { "epoch": 0.010785471450174857, "grad_norm": 0.3350748121738434, "learning_rate": 0.00019788403800413316, "loss": 1.3469, "step": 830 }, { "epoch": 0.010798465994090732, "grad_norm": 0.3463400602340698, "learning_rate": 0.00019788143854222175, "loss": 1.4159, "step": 831 }, { "epoch": 0.010811460538006605, "grad_norm": 0.36435380578041077, "learning_rate": 0.00019787883908031038, "loss": 1.4794, "step": 832 }, { "epoch": 0.010824455081922478, "grad_norm": 0.3727715313434601, "learning_rate": 0.000197876239618399, "loss": 1.6761, "step": 833 }, { "epoch": 0.01083744962583835, "grad_norm": 0.49526360630989075, "learning_rate": 0.00019787364015648763, "loss": 1.4028, "step": 834 }, { "epoch": 0.010850444169754224, "grad_norm": 0.3774344325065613, "learning_rate": 0.00019787104069457623, "loss": 1.4989, "step": 835 }, { "epoch": 0.010863438713670098, "grad_norm": 0.297404408454895, "learning_rate": 0.00019786844123266485, "loss": 1.4746, "step": 836 }, { "epoch": 0.010876433257585971, "grad_norm": 0.3675737977027893, "learning_rate": 0.00019786584177075347, "loss": 1.6151, "step": 837 }, { "epoch": 0.010889427801501844, "grad_norm": 0.363771915435791, "learning_rate": 0.00019786324230884207, "loss": 1.4896, "step": 838 }, { "epoch": 0.010902422345417717, "grad_norm": 0.2401675581932068, "learning_rate": 0.0001978606428469307, "loss": 1.2489, "step": 839 }, { "epoch": 0.010915416889333592, "grad_norm": 0.39043956995010376, "learning_rate": 0.00019785804338501932, "loss": 1.6779, "step": 840 }, { "epoch": 0.010928411433249465, "grad_norm": 0.3350941240787506, "learning_rate": 0.00019785544392310792, "loss": 1.4197, "step": 841 }, { "epoch": 0.010941405977165338, "grad_norm": 0.3687950670719147, "learning_rate": 0.00019785284446119654, "loss": 1.2805, "step": 842 }, { "epoch": 0.01095440052108121, "grad_norm": 0.3335752487182617, "learning_rate": 0.00019785024499928514, "loss": 1.534, "step": 843 }, { "epoch": 0.010967395064997084, "grad_norm": 0.37645015120506287, "learning_rate": 0.0001978476455373738, "loss": 1.2933, "step": 844 }, { "epoch": 0.010980389608912958, "grad_norm": 0.2943829596042633, "learning_rate": 0.0001978450460754624, "loss": 1.4492, "step": 845 }, { "epoch": 0.010993384152828831, "grad_norm": 0.3535577654838562, "learning_rate": 0.000197842446613551, "loss": 1.1818, "step": 846 }, { "epoch": 0.011006378696744704, "grad_norm": 0.347501277923584, "learning_rate": 0.0001978398471516396, "loss": 1.3698, "step": 847 }, { "epoch": 0.011019373240660577, "grad_norm": 0.41602951288223267, "learning_rate": 0.00019783724768972823, "loss": 1.5016, "step": 848 }, { "epoch": 0.01103236778457645, "grad_norm": 0.33995816111564636, "learning_rate": 0.00019783464822781686, "loss": 1.3072, "step": 849 }, { "epoch": 0.011045362328492325, "grad_norm": 0.4060112237930298, "learning_rate": 0.00019783204876590546, "loss": 1.5691, "step": 850 }, { "epoch": 0.011058356872408198, "grad_norm": 0.4026666581630707, "learning_rate": 0.00019782944930399408, "loss": 1.4755, "step": 851 }, { "epoch": 0.01107135141632407, "grad_norm": 0.3719538450241089, "learning_rate": 0.0001978268498420827, "loss": 1.4684, "step": 852 }, { "epoch": 0.011084345960239944, "grad_norm": 0.3730430006980896, "learning_rate": 0.0001978242503801713, "loss": 1.5605, "step": 853 }, { "epoch": 0.011097340504155817, "grad_norm": 0.3669048547744751, "learning_rate": 0.00019782165091825993, "loss": 1.5665, "step": 854 }, { "epoch": 0.011110335048071691, "grad_norm": 0.41444581747055054, "learning_rate": 0.00019781905145634853, "loss": 1.5279, "step": 855 }, { "epoch": 0.011123329591987564, "grad_norm": 0.31362953782081604, "learning_rate": 0.00019781645199443718, "loss": 1.5469, "step": 856 }, { "epoch": 0.011136324135903437, "grad_norm": 0.2954976260662079, "learning_rate": 0.00019781385253252577, "loss": 1.4712, "step": 857 }, { "epoch": 0.01114931867981931, "grad_norm": 0.36397314071655273, "learning_rate": 0.0001978112530706144, "loss": 1.583, "step": 858 }, { "epoch": 0.011162313223735185, "grad_norm": 0.3063300549983978, "learning_rate": 0.000197808653608703, "loss": 1.2616, "step": 859 }, { "epoch": 0.011175307767651058, "grad_norm": 0.39166536927223206, "learning_rate": 0.00019780605414679162, "loss": 1.5711, "step": 860 }, { "epoch": 0.011188302311566931, "grad_norm": 0.3681032657623291, "learning_rate": 0.00019780345468488024, "loss": 1.482, "step": 861 }, { "epoch": 0.011201296855482804, "grad_norm": 0.30603691935539246, "learning_rate": 0.00019780085522296884, "loss": 1.3632, "step": 862 }, { "epoch": 0.011214291399398677, "grad_norm": 0.48085567355155945, "learning_rate": 0.00019779825576105747, "loss": 1.564, "step": 863 }, { "epoch": 0.011227285943314552, "grad_norm": 0.3470838963985443, "learning_rate": 0.0001977956562991461, "loss": 1.4226, "step": 864 }, { "epoch": 0.011240280487230424, "grad_norm": 0.3490869104862213, "learning_rate": 0.00019779305683723472, "loss": 1.5778, "step": 865 }, { "epoch": 0.011253275031146297, "grad_norm": 0.4007914066314697, "learning_rate": 0.0001977904573753233, "loss": 1.6429, "step": 866 }, { "epoch": 0.01126626957506217, "grad_norm": 0.3783588707447052, "learning_rate": 0.0001977878579134119, "loss": 1.4628, "step": 867 }, { "epoch": 0.011279264118978043, "grad_norm": 0.3523287773132324, "learning_rate": 0.00019778525845150056, "loss": 1.4835, "step": 868 }, { "epoch": 0.011292258662893918, "grad_norm": 0.3539884686470032, "learning_rate": 0.00019778265898958916, "loss": 1.524, "step": 869 }, { "epoch": 0.011305253206809791, "grad_norm": 0.3133487403392792, "learning_rate": 0.00019778005952767778, "loss": 1.2986, "step": 870 }, { "epoch": 0.011318247750725664, "grad_norm": 0.4288972318172455, "learning_rate": 0.0001977774600657664, "loss": 1.7114, "step": 871 }, { "epoch": 0.011331242294641537, "grad_norm": 0.2804165482521057, "learning_rate": 0.000197774860603855, "loss": 1.325, "step": 872 }, { "epoch": 0.01134423683855741, "grad_norm": 0.4230891466140747, "learning_rate": 0.00019777226114194363, "loss": 1.5955, "step": 873 }, { "epoch": 0.011357231382473285, "grad_norm": 0.40529075264930725, "learning_rate": 0.00019776966168003223, "loss": 1.4972, "step": 874 }, { "epoch": 0.011370225926389158, "grad_norm": 0.4664103388786316, "learning_rate": 0.00019776706221812088, "loss": 1.5273, "step": 875 }, { "epoch": 0.01138322047030503, "grad_norm": 0.37206152081489563, "learning_rate": 0.00019776446275620948, "loss": 1.4608, "step": 876 }, { "epoch": 0.011396215014220903, "grad_norm": 0.48054125905036926, "learning_rate": 0.0001977618632942981, "loss": 1.5868, "step": 877 }, { "epoch": 0.011409209558136778, "grad_norm": 0.3271986246109009, "learning_rate": 0.0001977592638323867, "loss": 1.3005, "step": 878 }, { "epoch": 0.011422204102052651, "grad_norm": 0.3968530297279358, "learning_rate": 0.00019775666437047532, "loss": 1.3459, "step": 879 }, { "epoch": 0.011435198645968524, "grad_norm": 0.32345154881477356, "learning_rate": 0.00019775406490856395, "loss": 1.5788, "step": 880 }, { "epoch": 0.011448193189884397, "grad_norm": 0.4595682621002197, "learning_rate": 0.00019775146544665254, "loss": 1.4036, "step": 881 }, { "epoch": 0.01146118773380027, "grad_norm": 0.34216704964637756, "learning_rate": 0.00019774886598474117, "loss": 1.591, "step": 882 }, { "epoch": 0.011474182277716145, "grad_norm": 0.40196308493614197, "learning_rate": 0.0001977462665228298, "loss": 1.4519, "step": 883 }, { "epoch": 0.011487176821632018, "grad_norm": 0.3939869701862335, "learning_rate": 0.0001977436670609184, "loss": 1.2319, "step": 884 }, { "epoch": 0.01150017136554789, "grad_norm": 0.42931994795799255, "learning_rate": 0.00019774106759900702, "loss": 1.5851, "step": 885 }, { "epoch": 0.011513165909463764, "grad_norm": 0.3681303560733795, "learning_rate": 0.0001977384681370956, "loss": 1.4846, "step": 886 }, { "epoch": 0.011526160453379636, "grad_norm": 0.23849858343601227, "learning_rate": 0.00019773586867518426, "loss": 1.1437, "step": 887 }, { "epoch": 0.011539154997295511, "grad_norm": 0.35961535573005676, "learning_rate": 0.00019773326921327286, "loss": 1.5136, "step": 888 }, { "epoch": 0.011552149541211384, "grad_norm": 0.4356399476528168, "learning_rate": 0.00019773066975136149, "loss": 1.6493, "step": 889 }, { "epoch": 0.011565144085127257, "grad_norm": 0.3308802843093872, "learning_rate": 0.00019772807028945008, "loss": 1.312, "step": 890 }, { "epoch": 0.01157813862904313, "grad_norm": 0.29127219319343567, "learning_rate": 0.0001977254708275387, "loss": 1.3896, "step": 891 }, { "epoch": 0.011591133172959003, "grad_norm": 0.2908242344856262, "learning_rate": 0.00019772287136562733, "loss": 1.4376, "step": 892 }, { "epoch": 0.011604127716874878, "grad_norm": 0.3977406919002533, "learning_rate": 0.00019772027190371593, "loss": 1.5558, "step": 893 }, { "epoch": 0.01161712226079075, "grad_norm": 0.26013070344924927, "learning_rate": 0.00019771767244180455, "loss": 1.2552, "step": 894 }, { "epoch": 0.011630116804706624, "grad_norm": 0.32726550102233887, "learning_rate": 0.00019771507297989318, "loss": 1.5186, "step": 895 }, { "epoch": 0.011643111348622497, "grad_norm": 0.39039891958236694, "learning_rate": 0.00019771247351798178, "loss": 1.3885, "step": 896 }, { "epoch": 0.011656105892538371, "grad_norm": 0.23171035945415497, "learning_rate": 0.0001977098740560704, "loss": 1.2481, "step": 897 }, { "epoch": 0.011669100436454244, "grad_norm": 0.3014744818210602, "learning_rate": 0.000197707274594159, "loss": 1.2552, "step": 898 }, { "epoch": 0.011682094980370117, "grad_norm": 0.3523556590080261, "learning_rate": 0.00019770467513224765, "loss": 1.426, "step": 899 }, { "epoch": 0.01169508952428599, "grad_norm": 0.4000703692436218, "learning_rate": 0.00019770207567033625, "loss": 1.3178, "step": 900 }, { "epoch": 0.011708084068201863, "grad_norm": 0.36981841921806335, "learning_rate": 0.00019769947620842487, "loss": 1.4109, "step": 901 }, { "epoch": 0.011721078612117738, "grad_norm": 0.36349907517433167, "learning_rate": 0.00019769687674651347, "loss": 1.5898, "step": 902 }, { "epoch": 0.01173407315603361, "grad_norm": 0.363073468208313, "learning_rate": 0.0001976942772846021, "loss": 1.4734, "step": 903 }, { "epoch": 0.011747067699949484, "grad_norm": 0.3224738538265228, "learning_rate": 0.00019769167782269072, "loss": 1.3555, "step": 904 }, { "epoch": 0.011760062243865357, "grad_norm": 0.42104336619377136, "learning_rate": 0.00019768907836077932, "loss": 1.5197, "step": 905 }, { "epoch": 0.01177305678778123, "grad_norm": 0.3163914084434509, "learning_rate": 0.00019768647889886794, "loss": 1.5051, "step": 906 }, { "epoch": 0.011786051331697104, "grad_norm": 0.307299941778183, "learning_rate": 0.00019768387943695656, "loss": 1.4167, "step": 907 }, { "epoch": 0.011799045875612977, "grad_norm": 0.3385258913040161, "learning_rate": 0.00019768127997504516, "loss": 1.3296, "step": 908 }, { "epoch": 0.01181204041952885, "grad_norm": 0.3504544794559479, "learning_rate": 0.00019767868051313379, "loss": 1.5734, "step": 909 }, { "epoch": 0.011825034963444723, "grad_norm": 0.28617730736732483, "learning_rate": 0.0001976760810512224, "loss": 1.458, "step": 910 }, { "epoch": 0.011838029507360596, "grad_norm": 0.3386898934841156, "learning_rate": 0.00019767348158931104, "loss": 1.3342, "step": 911 }, { "epoch": 0.01185102405127647, "grad_norm": 0.3196926712989807, "learning_rate": 0.00019767088212739963, "loss": 1.6146, "step": 912 }, { "epoch": 0.011864018595192344, "grad_norm": 0.3252779245376587, "learning_rate": 0.00019766828266548826, "loss": 1.4038, "step": 913 }, { "epoch": 0.011877013139108217, "grad_norm": 0.33069950342178345, "learning_rate": 0.00019766568320357688, "loss": 1.56, "step": 914 }, { "epoch": 0.01189000768302409, "grad_norm": 0.3373044729232788, "learning_rate": 0.00019766308374166548, "loss": 1.4666, "step": 915 }, { "epoch": 0.011903002226939964, "grad_norm": 0.33167558908462524, "learning_rate": 0.0001976604842797541, "loss": 1.5786, "step": 916 }, { "epoch": 0.011915996770855837, "grad_norm": 0.33954721689224243, "learning_rate": 0.0001976578848178427, "loss": 1.5849, "step": 917 }, { "epoch": 0.01192899131477171, "grad_norm": 0.3559896945953369, "learning_rate": 0.00019765528535593135, "loss": 1.3744, "step": 918 }, { "epoch": 0.011941985858687583, "grad_norm": 0.31615790724754333, "learning_rate": 0.00019765268589401995, "loss": 1.488, "step": 919 }, { "epoch": 0.011954980402603456, "grad_norm": 0.39266765117645264, "learning_rate": 0.00019765008643210857, "loss": 1.4314, "step": 920 }, { "epoch": 0.011967974946519331, "grad_norm": 0.3904924690723419, "learning_rate": 0.00019764748697019717, "loss": 1.5633, "step": 921 }, { "epoch": 0.011980969490435204, "grad_norm": 0.32989031076431274, "learning_rate": 0.0001976448875082858, "loss": 1.2155, "step": 922 }, { "epoch": 0.011993964034351077, "grad_norm": 0.34584271907806396, "learning_rate": 0.00019764228804637442, "loss": 1.4938, "step": 923 }, { "epoch": 0.01200695857826695, "grad_norm": 0.35245075821876526, "learning_rate": 0.00019763968858446302, "loss": 1.4404, "step": 924 }, { "epoch": 0.012019953122182823, "grad_norm": 0.330208420753479, "learning_rate": 0.00019763708912255164, "loss": 1.8242, "step": 925 }, { "epoch": 0.012032947666098697, "grad_norm": 0.3622221350669861, "learning_rate": 0.00019763448966064027, "loss": 1.6096, "step": 926 }, { "epoch": 0.01204594221001457, "grad_norm": 0.3644472360610962, "learning_rate": 0.00019763189019872886, "loss": 1.3615, "step": 927 }, { "epoch": 0.012058936753930443, "grad_norm": 0.33621031045913696, "learning_rate": 0.0001976292907368175, "loss": 1.6668, "step": 928 }, { "epoch": 0.012071931297846316, "grad_norm": 0.30896222591400146, "learning_rate": 0.00019762669127490609, "loss": 1.3955, "step": 929 }, { "epoch": 0.01208492584176219, "grad_norm": 0.38593050837516785, "learning_rate": 0.00019762409181299474, "loss": 1.5416, "step": 930 }, { "epoch": 0.012097920385678064, "grad_norm": 0.3486964702606201, "learning_rate": 0.00019762149235108334, "loss": 1.4852, "step": 931 }, { "epoch": 0.012110914929593937, "grad_norm": 0.4540570080280304, "learning_rate": 0.00019761889288917196, "loss": 1.6986, "step": 932 }, { "epoch": 0.01212390947350981, "grad_norm": 0.3150416314601898, "learning_rate": 0.00019761629342726056, "loss": 1.3562, "step": 933 }, { "epoch": 0.012136904017425683, "grad_norm": 0.2738734185695648, "learning_rate": 0.00019761369396534918, "loss": 1.4792, "step": 934 }, { "epoch": 0.012149898561341558, "grad_norm": 0.33239731192588806, "learning_rate": 0.0001976110945034378, "loss": 1.3922, "step": 935 }, { "epoch": 0.01216289310525743, "grad_norm": 0.3253883123397827, "learning_rate": 0.0001976084950415264, "loss": 1.443, "step": 936 }, { "epoch": 0.012175887649173303, "grad_norm": 0.39133119583129883, "learning_rate": 0.00019760589557961503, "loss": 1.4235, "step": 937 }, { "epoch": 0.012188882193089176, "grad_norm": 0.3229159414768219, "learning_rate": 0.00019760329611770365, "loss": 1.6038, "step": 938 }, { "epoch": 0.01220187673700505, "grad_norm": 0.3184714615345001, "learning_rate": 0.00019760069665579225, "loss": 1.5701, "step": 939 }, { "epoch": 0.012214871280920924, "grad_norm": 0.37220147252082825, "learning_rate": 0.00019759809719388087, "loss": 1.4988, "step": 940 }, { "epoch": 0.012227865824836797, "grad_norm": 0.320101797580719, "learning_rate": 0.00019759549773196947, "loss": 1.4489, "step": 941 }, { "epoch": 0.01224086036875267, "grad_norm": 0.3790021538734436, "learning_rate": 0.00019759289827005812, "loss": 1.4883, "step": 942 }, { "epoch": 0.012253854912668543, "grad_norm": 0.390257865190506, "learning_rate": 0.00019759029880814672, "loss": 1.4613, "step": 943 }, { "epoch": 0.012266849456584416, "grad_norm": 0.3869599401950836, "learning_rate": 0.00019758769934623535, "loss": 1.4212, "step": 944 }, { "epoch": 0.01227984400050029, "grad_norm": 0.38254520297050476, "learning_rate": 0.00019758509988432397, "loss": 1.414, "step": 945 }, { "epoch": 0.012292838544416164, "grad_norm": 0.36777254939079285, "learning_rate": 0.00019758250042241257, "loss": 1.4511, "step": 946 }, { "epoch": 0.012305833088332037, "grad_norm": 0.4114663898944855, "learning_rate": 0.0001975799009605012, "loss": 1.5631, "step": 947 }, { "epoch": 0.01231882763224791, "grad_norm": 0.4308132231235504, "learning_rate": 0.0001975773014985898, "loss": 1.4463, "step": 948 }, { "epoch": 0.012331822176163782, "grad_norm": 0.47477850317955017, "learning_rate": 0.00019757470203667844, "loss": 1.6277, "step": 949 }, { "epoch": 0.012344816720079657, "grad_norm": 0.37226349115371704, "learning_rate": 0.00019757210257476704, "loss": 1.5688, "step": 950 }, { "epoch": 0.01235781126399553, "grad_norm": 0.3418992757797241, "learning_rate": 0.00019756950311285564, "loss": 1.3014, "step": 951 }, { "epoch": 0.012370805807911403, "grad_norm": 0.32099708914756775, "learning_rate": 0.00019756690365094426, "loss": 1.4976, "step": 952 }, { "epoch": 0.012383800351827276, "grad_norm": 0.3482179641723633, "learning_rate": 0.00019756430418903288, "loss": 1.383, "step": 953 }, { "epoch": 0.01239679489574315, "grad_norm": 0.3348259925842285, "learning_rate": 0.0001975617047271215, "loss": 1.4764, "step": 954 }, { "epoch": 0.012409789439659024, "grad_norm": 0.37045207619667053, "learning_rate": 0.0001975591052652101, "loss": 1.416, "step": 955 }, { "epoch": 0.012422783983574897, "grad_norm": 0.4267137944698334, "learning_rate": 0.00019755650580329873, "loss": 1.5242, "step": 956 }, { "epoch": 0.01243577852749077, "grad_norm": 0.3423871397972107, "learning_rate": 0.00019755390634138736, "loss": 1.5163, "step": 957 }, { "epoch": 0.012448773071406643, "grad_norm": 0.4420553743839264, "learning_rate": 0.00019755130687947595, "loss": 1.3978, "step": 958 }, { "epoch": 0.012461767615322517, "grad_norm": 0.39365220069885254, "learning_rate": 0.00019754870741756458, "loss": 1.4297, "step": 959 }, { "epoch": 0.01247476215923839, "grad_norm": 0.3856015205383301, "learning_rate": 0.00019754610795565317, "loss": 1.3805, "step": 960 }, { "epoch": 0.012487756703154263, "grad_norm": 0.40324947237968445, "learning_rate": 0.00019754350849374183, "loss": 1.5517, "step": 961 }, { "epoch": 0.012500751247070136, "grad_norm": 0.2551697790622711, "learning_rate": 0.00019754090903183042, "loss": 1.3591, "step": 962 }, { "epoch": 0.012513745790986009, "grad_norm": 0.3336125314235687, "learning_rate": 0.00019753830956991902, "loss": 1.7566, "step": 963 }, { "epoch": 0.012526740334901884, "grad_norm": 0.3386547565460205, "learning_rate": 0.00019753571010800765, "loss": 1.5109, "step": 964 }, { "epoch": 0.012539734878817757, "grad_norm": 0.3088414669036865, "learning_rate": 0.00019753311064609627, "loss": 1.3733, "step": 965 }, { "epoch": 0.01255272942273363, "grad_norm": 0.3609013557434082, "learning_rate": 0.0001975305111841849, "loss": 1.3377, "step": 966 }, { "epoch": 0.012565723966649503, "grad_norm": 0.35096341371536255, "learning_rate": 0.0001975279117222735, "loss": 1.4581, "step": 967 }, { "epoch": 0.012578718510565376, "grad_norm": 0.3886059522628784, "learning_rate": 0.00019752531226036212, "loss": 1.4808, "step": 968 }, { "epoch": 0.01259171305448125, "grad_norm": 0.4206385314464569, "learning_rate": 0.00019752271279845074, "loss": 1.4298, "step": 969 }, { "epoch": 0.012604707598397123, "grad_norm": 0.31605201959609985, "learning_rate": 0.00019752011333653934, "loss": 1.3647, "step": 970 }, { "epoch": 0.012617702142312996, "grad_norm": 0.2802874445915222, "learning_rate": 0.00019751751387462796, "loss": 1.2144, "step": 971 }, { "epoch": 0.01263069668622887, "grad_norm": 0.2805991768836975, "learning_rate": 0.00019751491441271656, "loss": 1.3136, "step": 972 }, { "epoch": 0.012643691230144742, "grad_norm": 0.38155031204223633, "learning_rate": 0.0001975123149508052, "loss": 1.4381, "step": 973 }, { "epoch": 0.012656685774060617, "grad_norm": 0.38293060660362244, "learning_rate": 0.0001975097154888938, "loss": 1.4946, "step": 974 }, { "epoch": 0.01266968031797649, "grad_norm": 0.3291340172290802, "learning_rate": 0.0001975071160269824, "loss": 1.4199, "step": 975 }, { "epoch": 0.012682674861892363, "grad_norm": 0.401024729013443, "learning_rate": 0.00019750451656507103, "loss": 1.4644, "step": 976 }, { "epoch": 0.012695669405808236, "grad_norm": 0.30952614545822144, "learning_rate": 0.00019750191710315966, "loss": 1.6848, "step": 977 }, { "epoch": 0.01270866394972411, "grad_norm": 0.2506324052810669, "learning_rate": 0.00019749931764124828, "loss": 1.3772, "step": 978 }, { "epoch": 0.012721658493639983, "grad_norm": 0.47377681732177734, "learning_rate": 0.00019749671817933688, "loss": 1.5883, "step": 979 }, { "epoch": 0.012734653037555856, "grad_norm": 0.35284608602523804, "learning_rate": 0.0001974941187174255, "loss": 1.696, "step": 980 }, { "epoch": 0.01274764758147173, "grad_norm": 0.41148173809051514, "learning_rate": 0.00019749151925551413, "loss": 1.4473, "step": 981 }, { "epoch": 0.012760642125387602, "grad_norm": 0.3409004807472229, "learning_rate": 0.00019748891979360272, "loss": 1.3546, "step": 982 }, { "epoch": 0.012773636669303477, "grad_norm": 0.2892957627773285, "learning_rate": 0.00019748632033169135, "loss": 1.413, "step": 983 }, { "epoch": 0.01278663121321935, "grad_norm": 0.28190919756889343, "learning_rate": 0.00019748372086977997, "loss": 1.341, "step": 984 }, { "epoch": 0.012799625757135223, "grad_norm": 0.2929656505584717, "learning_rate": 0.0001974811214078686, "loss": 1.361, "step": 985 }, { "epoch": 0.012812620301051096, "grad_norm": 0.3396834433078766, "learning_rate": 0.0001974785219459572, "loss": 1.6593, "step": 986 }, { "epoch": 0.012825614844966969, "grad_norm": 0.35800784826278687, "learning_rate": 0.00019747592248404582, "loss": 1.5808, "step": 987 }, { "epoch": 0.012838609388882843, "grad_norm": 0.36989840865135193, "learning_rate": 0.00019747332302213444, "loss": 1.7048, "step": 988 }, { "epoch": 0.012851603932798716, "grad_norm": 0.4329179525375366, "learning_rate": 0.00019747072356022304, "loss": 1.5349, "step": 989 }, { "epoch": 0.01286459847671459, "grad_norm": 0.32291293144226074, "learning_rate": 0.00019746812409831166, "loss": 1.3178, "step": 990 }, { "epoch": 0.012877593020630462, "grad_norm": 0.2734065055847168, "learning_rate": 0.00019746552463640026, "loss": 1.419, "step": 991 }, { "epoch": 0.012890587564546335, "grad_norm": 0.32284560799598694, "learning_rate": 0.0001974629251744889, "loss": 1.221, "step": 992 }, { "epoch": 0.01290358210846221, "grad_norm": 0.37148168683052063, "learning_rate": 0.0001974603257125775, "loss": 1.5621, "step": 993 }, { "epoch": 0.012916576652378083, "grad_norm": 0.4166358709335327, "learning_rate": 0.0001974577262506661, "loss": 1.565, "step": 994 }, { "epoch": 0.012929571196293956, "grad_norm": 0.41298240423202515, "learning_rate": 0.00019745512678875473, "loss": 1.4933, "step": 995 }, { "epoch": 0.012942565740209829, "grad_norm": 0.41472184658050537, "learning_rate": 0.00019745252732684336, "loss": 1.5142, "step": 996 }, { "epoch": 0.012955560284125704, "grad_norm": 0.365089476108551, "learning_rate": 0.00019744992786493198, "loss": 1.4963, "step": 997 }, { "epoch": 0.012968554828041576, "grad_norm": 0.46369650959968567, "learning_rate": 0.00019744732840302058, "loss": 1.5349, "step": 998 }, { "epoch": 0.01298154937195745, "grad_norm": 0.2969778776168823, "learning_rate": 0.0001974447289411092, "loss": 1.3989, "step": 999 }, { "epoch": 0.012994543915873322, "grad_norm": 0.3416184186935425, "learning_rate": 0.00019744212947919783, "loss": 1.4271, "step": 1000 }, { "epoch": 0.013007538459789195, "grad_norm": 0.3964928686618805, "learning_rate": 0.00019743953001728643, "loss": 1.4873, "step": 1001 }, { "epoch": 0.01302053300370507, "grad_norm": 0.2853555977344513, "learning_rate": 0.00019743693055537505, "loss": 1.3868, "step": 1002 }, { "epoch": 0.013033527547620943, "grad_norm": 0.38141801953315735, "learning_rate": 0.00019743433109346365, "loss": 1.5366, "step": 1003 }, { "epoch": 0.013046522091536816, "grad_norm": 0.32994070649147034, "learning_rate": 0.0001974317316315523, "loss": 1.3386, "step": 1004 }, { "epoch": 0.013059516635452689, "grad_norm": 0.372125506401062, "learning_rate": 0.0001974291321696409, "loss": 1.6295, "step": 1005 }, { "epoch": 0.013072511179368562, "grad_norm": 0.357553094625473, "learning_rate": 0.0001974265327077295, "loss": 1.5544, "step": 1006 }, { "epoch": 0.013085505723284437, "grad_norm": 0.33602210879325867, "learning_rate": 0.00019742393324581812, "loss": 1.4295, "step": 1007 }, { "epoch": 0.01309850026720031, "grad_norm": 0.2846996486186981, "learning_rate": 0.00019742133378390674, "loss": 1.3163, "step": 1008 }, { "epoch": 0.013111494811116182, "grad_norm": 0.2966218888759613, "learning_rate": 0.00019741873432199537, "loss": 1.1789, "step": 1009 }, { "epoch": 0.013124489355032055, "grad_norm": 0.3746318519115448, "learning_rate": 0.00019741613486008396, "loss": 1.5497, "step": 1010 }, { "epoch": 0.013137483898947928, "grad_norm": 0.3484114110469818, "learning_rate": 0.0001974135353981726, "loss": 1.5893, "step": 1011 }, { "epoch": 0.013150478442863803, "grad_norm": 0.43390658497810364, "learning_rate": 0.00019741093593626121, "loss": 1.513, "step": 1012 }, { "epoch": 0.013163472986779676, "grad_norm": 0.3467201590538025, "learning_rate": 0.0001974083364743498, "loss": 1.2857, "step": 1013 }, { "epoch": 0.013176467530695549, "grad_norm": 0.2515032887458801, "learning_rate": 0.00019740573701243844, "loss": 1.5167, "step": 1014 }, { "epoch": 0.013189462074611422, "grad_norm": 0.3913467824459076, "learning_rate": 0.00019740313755052703, "loss": 1.4647, "step": 1015 }, { "epoch": 0.013202456618527297, "grad_norm": 0.4138876497745514, "learning_rate": 0.00019740053808861568, "loss": 1.5505, "step": 1016 }, { "epoch": 0.01321545116244317, "grad_norm": 0.36367595195770264, "learning_rate": 0.00019739793862670428, "loss": 1.548, "step": 1017 }, { "epoch": 0.013228445706359043, "grad_norm": 0.3937438726425171, "learning_rate": 0.00019739533916479288, "loss": 1.5888, "step": 1018 }, { "epoch": 0.013241440250274916, "grad_norm": 0.3310936391353607, "learning_rate": 0.00019739273970288153, "loss": 1.4138, "step": 1019 }, { "epoch": 0.013254434794190788, "grad_norm": 0.32583069801330566, "learning_rate": 0.00019739014024097013, "loss": 1.465, "step": 1020 }, { "epoch": 0.013267429338106663, "grad_norm": 0.6564087271690369, "learning_rate": 0.00019738754077905875, "loss": 1.4758, "step": 1021 }, { "epoch": 0.013280423882022536, "grad_norm": 0.4228411614894867, "learning_rate": 0.00019738494131714735, "loss": 1.7239, "step": 1022 }, { "epoch": 0.013293418425938409, "grad_norm": 0.38515767455101013, "learning_rate": 0.00019738234185523597, "loss": 1.3104, "step": 1023 }, { "epoch": 0.013306412969854282, "grad_norm": 0.4113703668117523, "learning_rate": 0.0001973797423933246, "loss": 1.5076, "step": 1024 }, { "epoch": 0.013319407513770155, "grad_norm": 0.30311328172683716, "learning_rate": 0.0001973771429314132, "loss": 1.6749, "step": 1025 }, { "epoch": 0.01333240205768603, "grad_norm": 0.38779404759407043, "learning_rate": 0.00019737454346950182, "loss": 1.5167, "step": 1026 }, { "epoch": 0.013345396601601903, "grad_norm": 0.3705992102622986, "learning_rate": 0.00019737194400759045, "loss": 1.6297, "step": 1027 }, { "epoch": 0.013358391145517776, "grad_norm": 0.3721454441547394, "learning_rate": 0.00019736934454567907, "loss": 1.6093, "step": 1028 }, { "epoch": 0.013371385689433649, "grad_norm": 0.27784106135368347, "learning_rate": 0.00019736674508376767, "loss": 1.4481, "step": 1029 }, { "epoch": 0.013384380233349522, "grad_norm": 0.4030505418777466, "learning_rate": 0.00019736414562185626, "loss": 1.571, "step": 1030 }, { "epoch": 0.013397374777265396, "grad_norm": 0.4560116231441498, "learning_rate": 0.00019736154615994492, "loss": 1.5739, "step": 1031 }, { "epoch": 0.01341036932118127, "grad_norm": 0.44230586290359497, "learning_rate": 0.00019735894669803351, "loss": 1.5975, "step": 1032 }, { "epoch": 0.013423363865097142, "grad_norm": 0.29934000968933105, "learning_rate": 0.00019735634723612214, "loss": 1.337, "step": 1033 }, { "epoch": 0.013436358409013015, "grad_norm": 0.36191225051879883, "learning_rate": 0.00019735374777421074, "loss": 1.5397, "step": 1034 }, { "epoch": 0.01344935295292889, "grad_norm": 0.20351794362068176, "learning_rate": 0.00019735114831229936, "loss": 1.3109, "step": 1035 }, { "epoch": 0.013462347496844763, "grad_norm": 0.39175131916999817, "learning_rate": 0.00019734854885038798, "loss": 1.5401, "step": 1036 }, { "epoch": 0.013475342040760636, "grad_norm": 0.3709506094455719, "learning_rate": 0.00019734594938847658, "loss": 1.4169, "step": 1037 }, { "epoch": 0.013488336584676509, "grad_norm": 0.34935545921325684, "learning_rate": 0.0001973433499265652, "loss": 1.6892, "step": 1038 }, { "epoch": 0.013501331128592382, "grad_norm": 0.38193994760513306, "learning_rate": 0.00019734075046465383, "loss": 1.5982, "step": 1039 }, { "epoch": 0.013514325672508256, "grad_norm": 0.41490989923477173, "learning_rate": 0.00019733815100274246, "loss": 1.5192, "step": 1040 }, { "epoch": 0.01352732021642413, "grad_norm": 0.4238545000553131, "learning_rate": 0.00019733555154083105, "loss": 1.4892, "step": 1041 }, { "epoch": 0.013540314760340002, "grad_norm": 0.35215967893600464, "learning_rate": 0.00019733295207891968, "loss": 1.3275, "step": 1042 }, { "epoch": 0.013553309304255875, "grad_norm": 0.39302146434783936, "learning_rate": 0.0001973303526170083, "loss": 1.5638, "step": 1043 }, { "epoch": 0.013566303848171748, "grad_norm": 0.3888627886772156, "learning_rate": 0.0001973277531550969, "loss": 1.7963, "step": 1044 }, { "epoch": 0.013579298392087623, "grad_norm": 0.3744841516017914, "learning_rate": 0.00019732515369318552, "loss": 1.4169, "step": 1045 }, { "epoch": 0.013592292936003496, "grad_norm": 0.36582887172698975, "learning_rate": 0.00019732255423127412, "loss": 1.4175, "step": 1046 }, { "epoch": 0.013605287479919369, "grad_norm": 0.40386176109313965, "learning_rate": 0.00019731995476936275, "loss": 1.4876, "step": 1047 }, { "epoch": 0.013618282023835242, "grad_norm": 0.38611915707588196, "learning_rate": 0.00019731735530745137, "loss": 1.5448, "step": 1048 }, { "epoch": 0.013631276567751115, "grad_norm": 0.3901870846748352, "learning_rate": 0.00019731475584553997, "loss": 1.4399, "step": 1049 }, { "epoch": 0.01364427111166699, "grad_norm": 0.4732416868209839, "learning_rate": 0.0001973121563836286, "loss": 1.4883, "step": 1050 }, { "epoch": 0.013657265655582862, "grad_norm": 0.39601173996925354, "learning_rate": 0.00019730955692171722, "loss": 1.4738, "step": 1051 }, { "epoch": 0.013670260199498735, "grad_norm": 0.3134896457195282, "learning_rate": 0.00019730695745980584, "loss": 1.5333, "step": 1052 }, { "epoch": 0.013683254743414608, "grad_norm": 0.32899072766304016, "learning_rate": 0.00019730435799789444, "loss": 1.4485, "step": 1053 }, { "epoch": 0.013696249287330483, "grad_norm": 0.33218157291412354, "learning_rate": 0.00019730175853598306, "loss": 1.2999, "step": 1054 }, { "epoch": 0.013709243831246356, "grad_norm": 0.37725093960762024, "learning_rate": 0.0001972991590740717, "loss": 1.6862, "step": 1055 }, { "epoch": 0.013722238375162229, "grad_norm": 0.3622485399246216, "learning_rate": 0.00019729655961216028, "loss": 1.3255, "step": 1056 }, { "epoch": 0.013735232919078102, "grad_norm": 0.23080238699913025, "learning_rate": 0.0001972939601502489, "loss": 1.326, "step": 1057 }, { "epoch": 0.013748227462993975, "grad_norm": 0.4250565469264984, "learning_rate": 0.00019729136068833753, "loss": 1.3157, "step": 1058 }, { "epoch": 0.01376122200690985, "grad_norm": 0.37228456139564514, "learning_rate": 0.00019728876122642613, "loss": 1.523, "step": 1059 }, { "epoch": 0.013774216550825722, "grad_norm": 0.432044118642807, "learning_rate": 0.00019728616176451476, "loss": 1.5878, "step": 1060 }, { "epoch": 0.013787211094741595, "grad_norm": 0.3209286630153656, "learning_rate": 0.00019728356230260335, "loss": 1.2784, "step": 1061 }, { "epoch": 0.013800205638657468, "grad_norm": 0.41788139939308167, "learning_rate": 0.000197280962840692, "loss": 1.5172, "step": 1062 }, { "epoch": 0.013813200182573341, "grad_norm": 0.375265896320343, "learning_rate": 0.0001972783633787806, "loss": 1.3935, "step": 1063 }, { "epoch": 0.013826194726489216, "grad_norm": 0.30664366483688354, "learning_rate": 0.00019727576391686923, "loss": 1.4017, "step": 1064 }, { "epoch": 0.013839189270405089, "grad_norm": 0.38297995924949646, "learning_rate": 0.00019727316445495782, "loss": 1.3393, "step": 1065 }, { "epoch": 0.013852183814320962, "grad_norm": 0.4578399956226349, "learning_rate": 0.00019727056499304645, "loss": 1.5749, "step": 1066 }, { "epoch": 0.013865178358236835, "grad_norm": 0.3051646053791046, "learning_rate": 0.00019726796553113507, "loss": 1.3907, "step": 1067 }, { "epoch": 0.013878172902152708, "grad_norm": 0.34466204047203064, "learning_rate": 0.00019726536606922367, "loss": 1.5721, "step": 1068 }, { "epoch": 0.013891167446068583, "grad_norm": 0.3795184791088104, "learning_rate": 0.0001972627666073123, "loss": 1.6227, "step": 1069 }, { "epoch": 0.013904161989984455, "grad_norm": 0.3206294775009155, "learning_rate": 0.00019726016714540092, "loss": 1.3982, "step": 1070 }, { "epoch": 0.013917156533900328, "grad_norm": 0.3425387144088745, "learning_rate": 0.00019725756768348954, "loss": 1.4367, "step": 1071 }, { "epoch": 0.013930151077816201, "grad_norm": 0.4226791560649872, "learning_rate": 0.00019725496822157814, "loss": 1.5182, "step": 1072 }, { "epoch": 0.013943145621732076, "grad_norm": 0.3256654739379883, "learning_rate": 0.00019725236875966674, "loss": 1.5103, "step": 1073 }, { "epoch": 0.013956140165647949, "grad_norm": 0.4334042966365814, "learning_rate": 0.0001972497692977554, "loss": 1.4865, "step": 1074 }, { "epoch": 0.013969134709563822, "grad_norm": 0.3617881238460541, "learning_rate": 0.000197247169835844, "loss": 1.3425, "step": 1075 }, { "epoch": 0.013982129253479695, "grad_norm": 0.35514402389526367, "learning_rate": 0.0001972445703739326, "loss": 1.3842, "step": 1076 }, { "epoch": 0.013995123797395568, "grad_norm": 0.3968522250652313, "learning_rate": 0.0001972419709120212, "loss": 1.565, "step": 1077 }, { "epoch": 0.014008118341311443, "grad_norm": 0.3910624086856842, "learning_rate": 0.00019723937145010983, "loss": 1.6575, "step": 1078 }, { "epoch": 0.014021112885227316, "grad_norm": 0.3052786588668823, "learning_rate": 0.00019723677198819846, "loss": 1.4456, "step": 1079 }, { "epoch": 0.014034107429143189, "grad_norm": 0.3568594455718994, "learning_rate": 0.00019723417252628706, "loss": 1.4607, "step": 1080 }, { "epoch": 0.014047101973059061, "grad_norm": 0.35911497473716736, "learning_rate": 0.00019723157306437568, "loss": 1.4244, "step": 1081 }, { "epoch": 0.014060096516974934, "grad_norm": 0.35595959424972534, "learning_rate": 0.0001972289736024643, "loss": 1.4078, "step": 1082 }, { "epoch": 0.01407309106089081, "grad_norm": 0.4164542555809021, "learning_rate": 0.00019722637414055293, "loss": 1.5674, "step": 1083 }, { "epoch": 0.014086085604806682, "grad_norm": 0.4948754608631134, "learning_rate": 0.00019722377467864153, "loss": 1.5765, "step": 1084 }, { "epoch": 0.014099080148722555, "grad_norm": 0.4149811863899231, "learning_rate": 0.00019722117521673012, "loss": 1.5938, "step": 1085 }, { "epoch": 0.014112074692638428, "grad_norm": 0.2832207679748535, "learning_rate": 0.00019721857575481878, "loss": 1.429, "step": 1086 }, { "epoch": 0.014125069236554301, "grad_norm": 0.46026912331581116, "learning_rate": 0.00019721597629290737, "loss": 1.6396, "step": 1087 }, { "epoch": 0.014138063780470176, "grad_norm": 0.4021912217140198, "learning_rate": 0.000197213376830996, "loss": 1.4451, "step": 1088 }, { "epoch": 0.014151058324386049, "grad_norm": 0.33511781692504883, "learning_rate": 0.0001972107773690846, "loss": 1.6081, "step": 1089 }, { "epoch": 0.014164052868301922, "grad_norm": 0.2907991409301758, "learning_rate": 0.00019720817790717322, "loss": 1.4162, "step": 1090 }, { "epoch": 0.014177047412217795, "grad_norm": 0.44519951939582825, "learning_rate": 0.00019720557844526184, "loss": 1.5443, "step": 1091 }, { "epoch": 0.01419004195613367, "grad_norm": 0.30131325125694275, "learning_rate": 0.00019720297898335044, "loss": 1.4325, "step": 1092 }, { "epoch": 0.014203036500049542, "grad_norm": 0.32041850686073303, "learning_rate": 0.0001972003795214391, "loss": 1.6483, "step": 1093 }, { "epoch": 0.014216031043965415, "grad_norm": 0.3435966372489929, "learning_rate": 0.0001971977800595277, "loss": 1.4087, "step": 1094 }, { "epoch": 0.014229025587881288, "grad_norm": 0.36322468519210815, "learning_rate": 0.00019719518059761631, "loss": 1.3984, "step": 1095 }, { "epoch": 0.014242020131797161, "grad_norm": 0.3201417028903961, "learning_rate": 0.0001971925811357049, "loss": 1.4159, "step": 1096 }, { "epoch": 0.014255014675713036, "grad_norm": 0.371951699256897, "learning_rate": 0.00019718998167379354, "loss": 1.4836, "step": 1097 }, { "epoch": 0.014268009219628909, "grad_norm": 0.5051089525222778, "learning_rate": 0.00019718738221188216, "loss": 1.6927, "step": 1098 }, { "epoch": 0.014281003763544782, "grad_norm": 0.36920613050460815, "learning_rate": 0.00019718478274997076, "loss": 1.4733, "step": 1099 }, { "epoch": 0.014293998307460655, "grad_norm": 0.42907023429870605, "learning_rate": 0.00019718218328805938, "loss": 1.5133, "step": 1100 }, { "epoch": 0.014306992851376528, "grad_norm": 0.34839338064193726, "learning_rate": 0.000197179583826148, "loss": 1.5482, "step": 1101 }, { "epoch": 0.014319987395292402, "grad_norm": 0.32832157611846924, "learning_rate": 0.0001971769843642366, "loss": 1.5275, "step": 1102 }, { "epoch": 0.014332981939208275, "grad_norm": 0.4097306728363037, "learning_rate": 0.00019717438490232523, "loss": 1.3461, "step": 1103 }, { "epoch": 0.014345976483124148, "grad_norm": 0.3983323276042938, "learning_rate": 0.00019717178544041383, "loss": 1.4898, "step": 1104 }, { "epoch": 0.014358971027040021, "grad_norm": 0.385093092918396, "learning_rate": 0.00019716918597850248, "loss": 1.4728, "step": 1105 }, { "epoch": 0.014371965570955894, "grad_norm": 0.42422205209732056, "learning_rate": 0.00019716658651659108, "loss": 1.6079, "step": 1106 }, { "epoch": 0.014384960114871769, "grad_norm": 0.33223721385002136, "learning_rate": 0.0001971639870546797, "loss": 1.3487, "step": 1107 }, { "epoch": 0.014397954658787642, "grad_norm": 0.34783658385276794, "learning_rate": 0.0001971613875927683, "loss": 1.6666, "step": 1108 }, { "epoch": 0.014410949202703515, "grad_norm": 0.42093703150749207, "learning_rate": 0.00019715878813085692, "loss": 1.7062, "step": 1109 }, { "epoch": 0.014423943746619388, "grad_norm": 0.3577468693256378, "learning_rate": 0.00019715618866894555, "loss": 1.4418, "step": 1110 }, { "epoch": 0.014436938290535262, "grad_norm": 0.3317162096500397, "learning_rate": 0.00019715358920703414, "loss": 1.5098, "step": 1111 }, { "epoch": 0.014449932834451135, "grad_norm": 0.39414602518081665, "learning_rate": 0.00019715098974512277, "loss": 1.5375, "step": 1112 }, { "epoch": 0.014462927378367008, "grad_norm": 0.3096737861633301, "learning_rate": 0.0001971483902832114, "loss": 1.5748, "step": 1113 }, { "epoch": 0.014475921922282881, "grad_norm": 0.31180667877197266, "learning_rate": 0.0001971457908213, "loss": 1.2854, "step": 1114 }, { "epoch": 0.014488916466198754, "grad_norm": 0.3488854765892029, "learning_rate": 0.00019714319135938861, "loss": 1.3896, "step": 1115 }, { "epoch": 0.014501911010114629, "grad_norm": 0.4322400689125061, "learning_rate": 0.0001971405918974772, "loss": 1.6063, "step": 1116 }, { "epoch": 0.014514905554030502, "grad_norm": 0.3862898647785187, "learning_rate": 0.00019713799243556586, "loss": 1.3693, "step": 1117 }, { "epoch": 0.014527900097946375, "grad_norm": 0.4383971095085144, "learning_rate": 0.00019713539297365446, "loss": 1.4469, "step": 1118 }, { "epoch": 0.014540894641862248, "grad_norm": 0.40656328201293945, "learning_rate": 0.00019713279351174308, "loss": 1.6469, "step": 1119 }, { "epoch": 0.01455388918577812, "grad_norm": 0.3286970257759094, "learning_rate": 0.00019713019404983168, "loss": 1.448, "step": 1120 }, { "epoch": 0.014566883729693995, "grad_norm": 0.3503006100654602, "learning_rate": 0.0001971275945879203, "loss": 1.4671, "step": 1121 }, { "epoch": 0.014579878273609868, "grad_norm": 0.4297201633453369, "learning_rate": 0.00019712499512600893, "loss": 1.3163, "step": 1122 }, { "epoch": 0.014592872817525741, "grad_norm": 0.3751363456249237, "learning_rate": 0.00019712239566409753, "loss": 1.4478, "step": 1123 }, { "epoch": 0.014605867361441614, "grad_norm": 0.38053035736083984, "learning_rate": 0.00019711979620218615, "loss": 1.2915, "step": 1124 }, { "epoch": 0.014618861905357487, "grad_norm": 0.3267087936401367, "learning_rate": 0.00019711719674027478, "loss": 1.4562, "step": 1125 }, { "epoch": 0.014631856449273362, "grad_norm": 0.3064032793045044, "learning_rate": 0.0001971145972783634, "loss": 1.4026, "step": 1126 }, { "epoch": 0.014644850993189235, "grad_norm": 0.360404372215271, "learning_rate": 0.000197111997816452, "loss": 1.3682, "step": 1127 }, { "epoch": 0.014657845537105108, "grad_norm": 0.5024319887161255, "learning_rate": 0.0001971093983545406, "loss": 1.6088, "step": 1128 }, { "epoch": 0.01467084008102098, "grad_norm": 0.38353124260902405, "learning_rate": 0.00019710679889262925, "loss": 1.4739, "step": 1129 }, { "epoch": 0.014683834624936854, "grad_norm": 0.35435613989830017, "learning_rate": 0.00019710419943071785, "loss": 1.3529, "step": 1130 }, { "epoch": 0.014696829168852728, "grad_norm": 0.43681633472442627, "learning_rate": 0.00019710159996880647, "loss": 1.3106, "step": 1131 }, { "epoch": 0.014709823712768601, "grad_norm": 0.4419565498828888, "learning_rate": 0.0001970990005068951, "loss": 1.5253, "step": 1132 }, { "epoch": 0.014722818256684474, "grad_norm": 0.3059302270412445, "learning_rate": 0.0001970964010449837, "loss": 1.3594, "step": 1133 }, { "epoch": 0.014735812800600347, "grad_norm": 0.38897067308425903, "learning_rate": 0.00019709380158307232, "loss": 1.4217, "step": 1134 }, { "epoch": 0.014748807344516222, "grad_norm": 0.34824851155281067, "learning_rate": 0.00019709120212116091, "loss": 1.4551, "step": 1135 }, { "epoch": 0.014761801888432095, "grad_norm": 0.3961305618286133, "learning_rate": 0.00019708860265924957, "loss": 1.6001, "step": 1136 }, { "epoch": 0.014774796432347968, "grad_norm": 0.3158442974090576, "learning_rate": 0.00019708600319733816, "loss": 1.4555, "step": 1137 }, { "epoch": 0.014787790976263841, "grad_norm": 0.3308698832988739, "learning_rate": 0.0001970834037354268, "loss": 1.5223, "step": 1138 }, { "epoch": 0.014800785520179714, "grad_norm": 0.31649670004844666, "learning_rate": 0.00019708080427351538, "loss": 1.4427, "step": 1139 }, { "epoch": 0.014813780064095589, "grad_norm": 0.30845457315444946, "learning_rate": 0.000197078204811604, "loss": 1.4735, "step": 1140 }, { "epoch": 0.014826774608011462, "grad_norm": 0.34705907106399536, "learning_rate": 0.00019707560534969263, "loss": 1.4968, "step": 1141 }, { "epoch": 0.014839769151927334, "grad_norm": 0.33273646235466003, "learning_rate": 0.00019707300588778123, "loss": 1.1341, "step": 1142 }, { "epoch": 0.014852763695843207, "grad_norm": 0.37411919236183167, "learning_rate": 0.00019707040642586986, "loss": 1.6221, "step": 1143 }, { "epoch": 0.01486575823975908, "grad_norm": 0.3228834271430969, "learning_rate": 0.00019706780696395848, "loss": 1.7656, "step": 1144 }, { "epoch": 0.014878752783674955, "grad_norm": 0.24861685931682587, "learning_rate": 0.00019706520750204708, "loss": 1.219, "step": 1145 }, { "epoch": 0.014891747327590828, "grad_norm": 0.3395216166973114, "learning_rate": 0.0001970626080401357, "loss": 1.3166, "step": 1146 }, { "epoch": 0.014904741871506701, "grad_norm": 0.28913232684135437, "learning_rate": 0.0001970600085782243, "loss": 1.4134, "step": 1147 }, { "epoch": 0.014917736415422574, "grad_norm": 0.399809330701828, "learning_rate": 0.00019705740911631295, "loss": 1.5225, "step": 1148 }, { "epoch": 0.014930730959338447, "grad_norm": 0.43894335627555847, "learning_rate": 0.00019705480965440155, "loss": 1.6128, "step": 1149 }, { "epoch": 0.014943725503254322, "grad_norm": 0.3003581166267395, "learning_rate": 0.00019705221019249017, "loss": 1.3618, "step": 1150 }, { "epoch": 0.014956720047170195, "grad_norm": 0.5539864301681519, "learning_rate": 0.00019704961073057877, "loss": 1.5579, "step": 1151 }, { "epoch": 0.014969714591086068, "grad_norm": 0.4360288083553314, "learning_rate": 0.0001970470112686674, "loss": 1.4598, "step": 1152 }, { "epoch": 0.01498270913500194, "grad_norm": 0.3488319218158722, "learning_rate": 0.00019704441180675602, "loss": 1.4878, "step": 1153 }, { "epoch": 0.014995703678917815, "grad_norm": 0.414284348487854, "learning_rate": 0.00019704181234484462, "loss": 1.3644, "step": 1154 }, { "epoch": 0.015008698222833688, "grad_norm": 0.3985223174095154, "learning_rate": 0.00019703921288293324, "loss": 1.6135, "step": 1155 }, { "epoch": 0.015021692766749561, "grad_norm": 0.3805556297302246, "learning_rate": 0.00019703661342102187, "loss": 1.5731, "step": 1156 }, { "epoch": 0.015034687310665434, "grad_norm": 0.3703943192958832, "learning_rate": 0.00019703401395911046, "loss": 1.5537, "step": 1157 }, { "epoch": 0.015047681854581307, "grad_norm": 0.3613834083080292, "learning_rate": 0.0001970314144971991, "loss": 1.6242, "step": 1158 }, { "epoch": 0.015060676398497182, "grad_norm": 0.3949839770793915, "learning_rate": 0.00019702881503528768, "loss": 1.4553, "step": 1159 }, { "epoch": 0.015073670942413055, "grad_norm": 0.34406524896621704, "learning_rate": 0.00019702621557337634, "loss": 1.1838, "step": 1160 }, { "epoch": 0.015086665486328928, "grad_norm": 0.3827522099018097, "learning_rate": 0.00019702361611146493, "loss": 1.4566, "step": 1161 }, { "epoch": 0.0150996600302448, "grad_norm": 0.2957545816898346, "learning_rate": 0.00019702101664955356, "loss": 1.7034, "step": 1162 }, { "epoch": 0.015112654574160674, "grad_norm": 0.42204660177230835, "learning_rate": 0.00019701841718764216, "loss": 1.4461, "step": 1163 }, { "epoch": 0.015125649118076548, "grad_norm": 0.44211307168006897, "learning_rate": 0.00019701581772573078, "loss": 1.6812, "step": 1164 }, { "epoch": 0.015138643661992421, "grad_norm": 0.42191219329833984, "learning_rate": 0.0001970132182638194, "loss": 1.6109, "step": 1165 }, { "epoch": 0.015151638205908294, "grad_norm": 0.4305635690689087, "learning_rate": 0.000197010618801908, "loss": 1.5388, "step": 1166 }, { "epoch": 0.015164632749824167, "grad_norm": 0.3535262644290924, "learning_rate": 0.00019700801933999665, "loss": 1.5471, "step": 1167 }, { "epoch": 0.01517762729374004, "grad_norm": 0.27894142270088196, "learning_rate": 0.00019700541987808525, "loss": 1.3392, "step": 1168 }, { "epoch": 0.015190621837655915, "grad_norm": 0.34142574667930603, "learning_rate": 0.00019700282041617385, "loss": 1.285, "step": 1169 }, { "epoch": 0.015203616381571788, "grad_norm": 0.30574700236320496, "learning_rate": 0.00019700022095426247, "loss": 1.1122, "step": 1170 }, { "epoch": 0.01521661092548766, "grad_norm": 0.34303486347198486, "learning_rate": 0.0001969976214923511, "loss": 1.4074, "step": 1171 }, { "epoch": 0.015229605469403534, "grad_norm": 0.39488857984542847, "learning_rate": 0.00019699502203043972, "loss": 1.5406, "step": 1172 }, { "epoch": 0.015242600013319408, "grad_norm": 0.362435907125473, "learning_rate": 0.00019699242256852832, "loss": 1.4349, "step": 1173 }, { "epoch": 0.015255594557235281, "grad_norm": 0.41773369908332825, "learning_rate": 0.00019698982310661694, "loss": 1.5431, "step": 1174 }, { "epoch": 0.015268589101151154, "grad_norm": 0.4345804750919342, "learning_rate": 0.00019698722364470557, "loss": 1.4943, "step": 1175 }, { "epoch": 0.015281583645067027, "grad_norm": 0.46782350540161133, "learning_rate": 0.00019698462418279417, "loss": 1.6564, "step": 1176 }, { "epoch": 0.0152945781889829, "grad_norm": 0.40089139342308044, "learning_rate": 0.0001969820247208828, "loss": 1.6876, "step": 1177 }, { "epoch": 0.015307572732898775, "grad_norm": 0.3707917034626007, "learning_rate": 0.0001969794252589714, "loss": 1.487, "step": 1178 }, { "epoch": 0.015320567276814648, "grad_norm": 0.37704959511756897, "learning_rate": 0.00019697682579706004, "loss": 1.609, "step": 1179 }, { "epoch": 0.01533356182073052, "grad_norm": 0.3373548090457916, "learning_rate": 0.00019697422633514864, "loss": 1.3733, "step": 1180 }, { "epoch": 0.015346556364646394, "grad_norm": 0.3421562612056732, "learning_rate": 0.00019697162687323723, "loss": 1.5377, "step": 1181 }, { "epoch": 0.015359550908562267, "grad_norm": 0.42313849925994873, "learning_rate": 0.00019696902741132586, "loss": 1.5373, "step": 1182 }, { "epoch": 0.015372545452478141, "grad_norm": 0.3664761483669281, "learning_rate": 0.00019696642794941448, "loss": 1.5097, "step": 1183 }, { "epoch": 0.015385539996394014, "grad_norm": 0.3784591257572174, "learning_rate": 0.0001969638284875031, "loss": 1.4573, "step": 1184 }, { "epoch": 0.015398534540309887, "grad_norm": 0.36157453060150146, "learning_rate": 0.0001969612290255917, "loss": 1.5686, "step": 1185 }, { "epoch": 0.01541152908422576, "grad_norm": 0.3741309642791748, "learning_rate": 0.00019695862956368033, "loss": 1.5118, "step": 1186 }, { "epoch": 0.015424523628141633, "grad_norm": 0.3733425438404083, "learning_rate": 0.00019695603010176895, "loss": 1.4915, "step": 1187 }, { "epoch": 0.015437518172057508, "grad_norm": 0.4223376214504242, "learning_rate": 0.00019695343063985755, "loss": 1.4806, "step": 1188 }, { "epoch": 0.01545051271597338, "grad_norm": 0.4280458092689514, "learning_rate": 0.00019695083117794618, "loss": 1.5442, "step": 1189 }, { "epoch": 0.015463507259889254, "grad_norm": 0.3798462152481079, "learning_rate": 0.00019694823171603477, "loss": 1.5422, "step": 1190 }, { "epoch": 0.015476501803805127, "grad_norm": 0.22774185240268707, "learning_rate": 0.00019694563225412342, "loss": 1.2608, "step": 1191 }, { "epoch": 0.015489496347721001, "grad_norm": 0.38605165481567383, "learning_rate": 0.00019694303279221202, "loss": 1.4247, "step": 1192 }, { "epoch": 0.015502490891636874, "grad_norm": 0.4194793999195099, "learning_rate": 0.00019694043333030065, "loss": 1.508, "step": 1193 }, { "epoch": 0.015515485435552747, "grad_norm": 0.3809635043144226, "learning_rate": 0.00019693783386838924, "loss": 1.6783, "step": 1194 }, { "epoch": 0.01552847997946862, "grad_norm": 0.34799525141716003, "learning_rate": 0.00019693523440647787, "loss": 1.3097, "step": 1195 }, { "epoch": 0.015541474523384493, "grad_norm": 0.30423760414123535, "learning_rate": 0.0001969326349445665, "loss": 1.215, "step": 1196 }, { "epoch": 0.015554469067300368, "grad_norm": 0.3915634751319885, "learning_rate": 0.0001969300354826551, "loss": 1.5735, "step": 1197 }, { "epoch": 0.015567463611216241, "grad_norm": 0.37613967061042786, "learning_rate": 0.00019692743602074371, "loss": 1.4058, "step": 1198 }, { "epoch": 0.015580458155132114, "grad_norm": 0.40080973505973816, "learning_rate": 0.00019692483655883234, "loss": 1.5197, "step": 1199 }, { "epoch": 0.015593452699047987, "grad_norm": 0.3446778357028961, "learning_rate": 0.00019692223709692094, "loss": 1.6041, "step": 1200 }, { "epoch": 0.01560644724296386, "grad_norm": 0.366235613822937, "learning_rate": 0.00019691963763500956, "loss": 1.383, "step": 1201 }, { "epoch": 0.015619441786879735, "grad_norm": 0.3766935467720032, "learning_rate": 0.00019691703817309816, "loss": 1.6052, "step": 1202 }, { "epoch": 0.015632436330795606, "grad_norm": 0.3511297404766083, "learning_rate": 0.0001969144387111868, "loss": 1.3423, "step": 1203 }, { "epoch": 0.015645430874711482, "grad_norm": 0.48626863956451416, "learning_rate": 0.0001969118392492754, "loss": 1.4677, "step": 1204 }, { "epoch": 0.015658425418627355, "grad_norm": 0.34959498047828674, "learning_rate": 0.00019690923978736403, "loss": 1.3027, "step": 1205 }, { "epoch": 0.015671419962543228, "grad_norm": 0.3932822644710541, "learning_rate": 0.00019690664032545266, "loss": 1.3391, "step": 1206 }, { "epoch": 0.0156844145064591, "grad_norm": 0.21857501566410065, "learning_rate": 0.00019690404086354125, "loss": 1.1553, "step": 1207 }, { "epoch": 0.015697409050374974, "grad_norm": 0.4064771831035614, "learning_rate": 0.00019690144140162988, "loss": 1.678, "step": 1208 }, { "epoch": 0.015710403594290847, "grad_norm": 0.3586530387401581, "learning_rate": 0.00019689884193971848, "loss": 1.3805, "step": 1209 }, { "epoch": 0.01572339813820672, "grad_norm": 0.3518249988555908, "learning_rate": 0.00019689624247780713, "loss": 1.2187, "step": 1210 }, { "epoch": 0.015736392682122593, "grad_norm": 0.27563443779945374, "learning_rate": 0.00019689364301589572, "loss": 1.1782, "step": 1211 }, { "epoch": 0.015749387226038466, "grad_norm": 0.3735935389995575, "learning_rate": 0.00019689104355398432, "loss": 1.5659, "step": 1212 }, { "epoch": 0.01576238176995434, "grad_norm": 0.3346775472164154, "learning_rate": 0.00019688844409207295, "loss": 1.2095, "step": 1213 }, { "epoch": 0.015775376313870215, "grad_norm": 0.3192962408065796, "learning_rate": 0.00019688584463016157, "loss": 1.2406, "step": 1214 }, { "epoch": 0.015788370857786088, "grad_norm": 0.44287389516830444, "learning_rate": 0.0001968832451682502, "loss": 1.4406, "step": 1215 }, { "epoch": 0.01580136540170196, "grad_norm": 0.2607724368572235, "learning_rate": 0.0001968806457063388, "loss": 1.6125, "step": 1216 }, { "epoch": 0.015814359945617834, "grad_norm": 0.35675251483917236, "learning_rate": 0.00019687804624442742, "loss": 1.4747, "step": 1217 }, { "epoch": 0.015827354489533707, "grad_norm": 0.45708343386650085, "learning_rate": 0.00019687544678251604, "loss": 1.4091, "step": 1218 }, { "epoch": 0.01584034903344958, "grad_norm": 0.43029478192329407, "learning_rate": 0.00019687284732060464, "loss": 1.4042, "step": 1219 }, { "epoch": 0.015853343577365453, "grad_norm": 0.3849797546863556, "learning_rate": 0.00019687024785869326, "loss": 1.4625, "step": 1220 }, { "epoch": 0.015866338121281326, "grad_norm": 0.300675630569458, "learning_rate": 0.00019686764839678186, "loss": 1.4155, "step": 1221 }, { "epoch": 0.0158793326651972, "grad_norm": 0.49140745401382446, "learning_rate": 0.0001968650489348705, "loss": 1.6745, "step": 1222 }, { "epoch": 0.015892327209113075, "grad_norm": 0.4113255739212036, "learning_rate": 0.0001968624494729591, "loss": 1.4548, "step": 1223 }, { "epoch": 0.01590532175302895, "grad_norm": 0.4348243474960327, "learning_rate": 0.0001968598500110477, "loss": 1.4758, "step": 1224 }, { "epoch": 0.01591831629694482, "grad_norm": 0.47167786955833435, "learning_rate": 0.00019685725054913633, "loss": 1.5269, "step": 1225 }, { "epoch": 0.015931310840860694, "grad_norm": 0.40507128834724426, "learning_rate": 0.00019685465108722496, "loss": 1.5768, "step": 1226 }, { "epoch": 0.015944305384776567, "grad_norm": 0.34047117829322815, "learning_rate": 0.00019685205162531358, "loss": 1.5666, "step": 1227 }, { "epoch": 0.01595729992869244, "grad_norm": 0.3125999867916107, "learning_rate": 0.00019684945216340218, "loss": 1.6384, "step": 1228 }, { "epoch": 0.015970294472608313, "grad_norm": 0.41916677355766296, "learning_rate": 0.0001968468527014908, "loss": 1.4802, "step": 1229 }, { "epoch": 0.015983289016524186, "grad_norm": 0.3740885853767395, "learning_rate": 0.00019684425323957943, "loss": 1.404, "step": 1230 }, { "epoch": 0.01599628356044006, "grad_norm": 0.3712579607963562, "learning_rate": 0.00019684165377766802, "loss": 1.5274, "step": 1231 }, { "epoch": 0.016009278104355932, "grad_norm": 0.4336252510547638, "learning_rate": 0.00019683905431575665, "loss": 1.5589, "step": 1232 }, { "epoch": 0.01602227264827181, "grad_norm": 0.337321013212204, "learning_rate": 0.00019683645485384525, "loss": 1.2543, "step": 1233 }, { "epoch": 0.01603526719218768, "grad_norm": 0.3992306590080261, "learning_rate": 0.0001968338553919339, "loss": 1.2617, "step": 1234 }, { "epoch": 0.016048261736103554, "grad_norm": 0.33321884274482727, "learning_rate": 0.0001968312559300225, "loss": 1.5899, "step": 1235 }, { "epoch": 0.016061256280019427, "grad_norm": 0.4244740307331085, "learning_rate": 0.0001968286564681111, "loss": 1.5195, "step": 1236 }, { "epoch": 0.0160742508239353, "grad_norm": 0.3898663818836212, "learning_rate": 0.00019682605700619972, "loss": 1.7735, "step": 1237 }, { "epoch": 0.016087245367851173, "grad_norm": 0.3646060526371002, "learning_rate": 0.00019682345754428834, "loss": 1.5273, "step": 1238 }, { "epoch": 0.016100239911767046, "grad_norm": 0.43874886631965637, "learning_rate": 0.00019682085808237697, "loss": 1.5299, "step": 1239 }, { "epoch": 0.01611323445568292, "grad_norm": 0.3499436378479004, "learning_rate": 0.00019681825862046556, "loss": 1.6245, "step": 1240 }, { "epoch": 0.016126228999598792, "grad_norm": 0.475818395614624, "learning_rate": 0.0001968156591585542, "loss": 1.3729, "step": 1241 }, { "epoch": 0.01613922354351467, "grad_norm": 0.3963949978351593, "learning_rate": 0.0001968130596966428, "loss": 1.3623, "step": 1242 }, { "epoch": 0.01615221808743054, "grad_norm": 0.37632128596305847, "learning_rate": 0.0001968104602347314, "loss": 1.419, "step": 1243 }, { "epoch": 0.016165212631346414, "grad_norm": 0.4357317090034485, "learning_rate": 0.00019680786077282003, "loss": 1.56, "step": 1244 }, { "epoch": 0.016178207175262287, "grad_norm": 0.33209028840065, "learning_rate": 0.00019680526131090866, "loss": 1.3852, "step": 1245 }, { "epoch": 0.01619120171917816, "grad_norm": 0.3596300482749939, "learning_rate": 0.00019680266184899728, "loss": 1.4985, "step": 1246 }, { "epoch": 0.016204196263094033, "grad_norm": 0.3832753598690033, "learning_rate": 0.00019680006238708588, "loss": 1.4908, "step": 1247 }, { "epoch": 0.016217190807009906, "grad_norm": 0.3142191767692566, "learning_rate": 0.0001967974629251745, "loss": 1.5067, "step": 1248 }, { "epoch": 0.01623018535092578, "grad_norm": 0.36329564452171326, "learning_rate": 0.00019679486346326313, "loss": 1.4688, "step": 1249 }, { "epoch": 0.016243179894841652, "grad_norm": 0.31218141317367554, "learning_rate": 0.00019679226400135173, "loss": 1.3132, "step": 1250 }, { "epoch": 0.016256174438757525, "grad_norm": 0.4516143500804901, "learning_rate": 0.00019678966453944035, "loss": 1.5997, "step": 1251 }, { "epoch": 0.0162691689826734, "grad_norm": 0.37211713194847107, "learning_rate": 0.00019678706507752895, "loss": 1.3798, "step": 1252 }, { "epoch": 0.016282163526589274, "grad_norm": 0.4344126582145691, "learning_rate": 0.00019678446561561757, "loss": 1.5206, "step": 1253 }, { "epoch": 0.016295158070505147, "grad_norm": 0.33741259574890137, "learning_rate": 0.0001967818661537062, "loss": 1.5059, "step": 1254 }, { "epoch": 0.01630815261442102, "grad_norm": 0.4300345182418823, "learning_rate": 0.0001967792666917948, "loss": 1.624, "step": 1255 }, { "epoch": 0.016321147158336893, "grad_norm": 0.33736440539360046, "learning_rate": 0.00019677666722988342, "loss": 1.4496, "step": 1256 }, { "epoch": 0.016334141702252766, "grad_norm": 0.3926686644554138, "learning_rate": 0.00019677406776797204, "loss": 1.5301, "step": 1257 }, { "epoch": 0.01634713624616864, "grad_norm": 0.426224023103714, "learning_rate": 0.00019677146830606067, "loss": 1.5169, "step": 1258 }, { "epoch": 0.016360130790084512, "grad_norm": 0.4742068946361542, "learning_rate": 0.00019676886884414927, "loss": 1.5163, "step": 1259 }, { "epoch": 0.016373125334000385, "grad_norm": 0.3262328505516052, "learning_rate": 0.0001967662693822379, "loss": 1.4143, "step": 1260 }, { "epoch": 0.01638611987791626, "grad_norm": 0.2593839466571808, "learning_rate": 0.00019676366992032651, "loss": 1.23, "step": 1261 }, { "epoch": 0.016399114421832135, "grad_norm": 0.38010913133621216, "learning_rate": 0.0001967610704584151, "loss": 1.6117, "step": 1262 }, { "epoch": 0.016412108965748008, "grad_norm": 0.267661452293396, "learning_rate": 0.00019675847099650374, "loss": 1.315, "step": 1263 }, { "epoch": 0.01642510350966388, "grad_norm": 0.3073880672454834, "learning_rate": 0.00019675587153459233, "loss": 1.4164, "step": 1264 }, { "epoch": 0.016438098053579753, "grad_norm": 0.3029155135154724, "learning_rate": 0.00019675327207268096, "loss": 1.6044, "step": 1265 }, { "epoch": 0.016451092597495626, "grad_norm": 0.31216323375701904, "learning_rate": 0.00019675067261076958, "loss": 1.4097, "step": 1266 }, { "epoch": 0.0164640871414115, "grad_norm": 0.41255277395248413, "learning_rate": 0.00019674807314885818, "loss": 1.5876, "step": 1267 }, { "epoch": 0.016477081685327372, "grad_norm": 0.3925461769104004, "learning_rate": 0.0001967454736869468, "loss": 1.4628, "step": 1268 }, { "epoch": 0.016490076229243245, "grad_norm": 0.47023651003837585, "learning_rate": 0.00019674287422503543, "loss": 1.4258, "step": 1269 }, { "epoch": 0.016503070773159118, "grad_norm": 0.3889276683330536, "learning_rate": 0.00019674027476312405, "loss": 1.4992, "step": 1270 }, { "epoch": 0.016516065317074995, "grad_norm": 0.5152811408042908, "learning_rate": 0.00019673767530121265, "loss": 1.5095, "step": 1271 }, { "epoch": 0.016529059860990868, "grad_norm": 0.3542300760746002, "learning_rate": 0.00019673507583930128, "loss": 1.5557, "step": 1272 }, { "epoch": 0.01654205440490674, "grad_norm": 0.3644115626811981, "learning_rate": 0.0001967324763773899, "loss": 1.3361, "step": 1273 }, { "epoch": 0.016555048948822614, "grad_norm": 0.4357520043849945, "learning_rate": 0.0001967298769154785, "loss": 1.4503, "step": 1274 }, { "epoch": 0.016568043492738486, "grad_norm": 0.3494506776332855, "learning_rate": 0.00019672727745356712, "loss": 1.2671, "step": 1275 }, { "epoch": 0.01658103803665436, "grad_norm": 0.39975765347480774, "learning_rate": 0.00019672467799165572, "loss": 1.5215, "step": 1276 }, { "epoch": 0.016594032580570232, "grad_norm": 0.373329222202301, "learning_rate": 0.00019672207852974437, "loss": 1.4909, "step": 1277 }, { "epoch": 0.016607027124486105, "grad_norm": 0.43159815669059753, "learning_rate": 0.00019671947906783297, "loss": 1.4468, "step": 1278 }, { "epoch": 0.01662002166840198, "grad_norm": 0.41933301091194153, "learning_rate": 0.00019671687960592157, "loss": 1.3893, "step": 1279 }, { "epoch": 0.016633016212317855, "grad_norm": 0.3944145739078522, "learning_rate": 0.00019671428014401022, "loss": 1.5484, "step": 1280 }, { "epoch": 0.016646010756233728, "grad_norm": 0.38021165132522583, "learning_rate": 0.00019671168068209881, "loss": 1.4971, "step": 1281 }, { "epoch": 0.0166590053001496, "grad_norm": 0.3604079782962799, "learning_rate": 0.00019670908122018744, "loss": 1.3135, "step": 1282 }, { "epoch": 0.016671999844065474, "grad_norm": 0.2936398386955261, "learning_rate": 0.00019670648175827604, "loss": 1.3214, "step": 1283 }, { "epoch": 0.016684994387981347, "grad_norm": 0.41944095492362976, "learning_rate": 0.00019670388229636466, "loss": 1.5769, "step": 1284 }, { "epoch": 0.01669798893189722, "grad_norm": 0.3244532346725464, "learning_rate": 0.00019670128283445329, "loss": 1.4632, "step": 1285 }, { "epoch": 0.016710983475813092, "grad_norm": 0.4025017023086548, "learning_rate": 0.00019669868337254188, "loss": 1.2391, "step": 1286 }, { "epoch": 0.016723978019728965, "grad_norm": 0.2932943105697632, "learning_rate": 0.0001966960839106305, "loss": 1.2617, "step": 1287 }, { "epoch": 0.01673697256364484, "grad_norm": 0.33363422751426697, "learning_rate": 0.00019669348444871913, "loss": 1.3775, "step": 1288 }, { "epoch": 0.01674996710756071, "grad_norm": 0.3420233428478241, "learning_rate": 0.00019669088498680776, "loss": 1.4534, "step": 1289 }, { "epoch": 0.016762961651476588, "grad_norm": 0.3920753002166748, "learning_rate": 0.00019668828552489635, "loss": 1.4119, "step": 1290 }, { "epoch": 0.01677595619539246, "grad_norm": 0.1987222135066986, "learning_rate": 0.00019668568606298495, "loss": 1.4032, "step": 1291 }, { "epoch": 0.016788950739308334, "grad_norm": 0.431986927986145, "learning_rate": 0.0001966830866010736, "loss": 1.4481, "step": 1292 }, { "epoch": 0.016801945283224207, "grad_norm": 0.36777424812316895, "learning_rate": 0.0001966804871391622, "loss": 1.4234, "step": 1293 }, { "epoch": 0.01681493982714008, "grad_norm": 0.41752204298973083, "learning_rate": 0.00019667788767725082, "loss": 1.4181, "step": 1294 }, { "epoch": 0.016827934371055953, "grad_norm": 0.36617961525917053, "learning_rate": 0.00019667528821533942, "loss": 1.4269, "step": 1295 }, { "epoch": 0.016840928914971826, "grad_norm": 0.35011550784111023, "learning_rate": 0.00019667268875342805, "loss": 1.3349, "step": 1296 }, { "epoch": 0.0168539234588877, "grad_norm": 0.46133318543434143, "learning_rate": 0.00019667008929151667, "loss": 1.6471, "step": 1297 }, { "epoch": 0.01686691800280357, "grad_norm": 0.31067320704460144, "learning_rate": 0.00019666748982960527, "loss": 1.4683, "step": 1298 }, { "epoch": 0.016879912546719448, "grad_norm": 0.37132853269577026, "learning_rate": 0.0001966648903676939, "loss": 1.4418, "step": 1299 }, { "epoch": 0.01689290709063532, "grad_norm": 0.4115432798862457, "learning_rate": 0.00019666229090578252, "loss": 1.473, "step": 1300 }, { "epoch": 0.016905901634551194, "grad_norm": 0.38667991757392883, "learning_rate": 0.00019665969144387114, "loss": 1.8367, "step": 1301 }, { "epoch": 0.016918896178467067, "grad_norm": 0.3053534924983978, "learning_rate": 0.00019665709198195974, "loss": 1.6643, "step": 1302 }, { "epoch": 0.01693189072238294, "grad_norm": 0.47252416610717773, "learning_rate": 0.00019665449252004834, "loss": 1.4148, "step": 1303 }, { "epoch": 0.016944885266298813, "grad_norm": 0.3782711327075958, "learning_rate": 0.000196651893058137, "loss": 1.5064, "step": 1304 }, { "epoch": 0.016957879810214686, "grad_norm": 0.3270961344242096, "learning_rate": 0.00019664929359622559, "loss": 1.5107, "step": 1305 }, { "epoch": 0.01697087435413056, "grad_norm": 0.46320730447769165, "learning_rate": 0.0001966466941343142, "loss": 1.5103, "step": 1306 }, { "epoch": 0.01698386889804643, "grad_norm": 0.4325108230113983, "learning_rate": 0.0001966440946724028, "loss": 1.4994, "step": 1307 }, { "epoch": 0.016996863441962305, "grad_norm": 0.4188390076160431, "learning_rate": 0.00019664149521049143, "loss": 1.6011, "step": 1308 }, { "epoch": 0.01700985798587818, "grad_norm": 0.3909618854522705, "learning_rate": 0.00019663889574858006, "loss": 1.3284, "step": 1309 }, { "epoch": 0.017022852529794054, "grad_norm": 0.33980676531791687, "learning_rate": 0.00019663629628666865, "loss": 1.4136, "step": 1310 }, { "epoch": 0.017035847073709927, "grad_norm": 0.4157145917415619, "learning_rate": 0.00019663369682475728, "loss": 1.4706, "step": 1311 }, { "epoch": 0.0170488416176258, "grad_norm": 0.38978928327560425, "learning_rate": 0.0001966310973628459, "loss": 1.4535, "step": 1312 }, { "epoch": 0.017061836161541673, "grad_norm": 0.3986358642578125, "learning_rate": 0.00019662849790093453, "loss": 1.4836, "step": 1313 }, { "epoch": 0.017074830705457546, "grad_norm": 0.35495102405548096, "learning_rate": 0.00019662589843902312, "loss": 1.468, "step": 1314 }, { "epoch": 0.01708782524937342, "grad_norm": 0.3181716501712799, "learning_rate": 0.00019662329897711175, "loss": 1.4632, "step": 1315 }, { "epoch": 0.01710081979328929, "grad_norm": 0.28140318393707275, "learning_rate": 0.00019662069951520037, "loss": 1.3824, "step": 1316 }, { "epoch": 0.017113814337205165, "grad_norm": 0.47010791301727295, "learning_rate": 0.00019661810005328897, "loss": 1.5767, "step": 1317 }, { "epoch": 0.01712680888112104, "grad_norm": 0.33159905672073364, "learning_rate": 0.0001966155005913776, "loss": 1.547, "step": 1318 }, { "epoch": 0.017139803425036914, "grad_norm": 0.39347562193870544, "learning_rate": 0.00019661290112946622, "loss": 1.4111, "step": 1319 }, { "epoch": 0.017152797968952787, "grad_norm": 0.38357362151145935, "learning_rate": 0.00019661030166755482, "loss": 1.5499, "step": 1320 }, { "epoch": 0.01716579251286866, "grad_norm": 0.4207701086997986, "learning_rate": 0.00019660770220564344, "loss": 1.6153, "step": 1321 }, { "epoch": 0.017178787056784533, "grad_norm": 0.3719491958618164, "learning_rate": 0.00019660510274373204, "loss": 1.323, "step": 1322 }, { "epoch": 0.017191781600700406, "grad_norm": 0.3435841500759125, "learning_rate": 0.0001966025032818207, "loss": 1.5274, "step": 1323 }, { "epoch": 0.01720477614461628, "grad_norm": 0.389879435300827, "learning_rate": 0.0001965999038199093, "loss": 1.3968, "step": 1324 }, { "epoch": 0.01721777068853215, "grad_norm": 0.42259082198143005, "learning_rate": 0.0001965973043579979, "loss": 1.5433, "step": 1325 }, { "epoch": 0.017230765232448025, "grad_norm": 0.35753822326660156, "learning_rate": 0.0001965947048960865, "loss": 1.709, "step": 1326 }, { "epoch": 0.017243759776363898, "grad_norm": 0.26413440704345703, "learning_rate": 0.00019659210543417513, "loss": 1.378, "step": 1327 }, { "epoch": 0.017256754320279774, "grad_norm": 0.42384201288223267, "learning_rate": 0.00019658950597226376, "loss": 1.4592, "step": 1328 }, { "epoch": 0.017269748864195647, "grad_norm": 0.7451703548431396, "learning_rate": 0.00019658690651035236, "loss": 1.6224, "step": 1329 }, { "epoch": 0.01728274340811152, "grad_norm": 0.32426488399505615, "learning_rate": 0.00019658430704844098, "loss": 1.2088, "step": 1330 }, { "epoch": 0.017295737952027393, "grad_norm": 0.5340223908424377, "learning_rate": 0.0001965817075865296, "loss": 1.4398, "step": 1331 }, { "epoch": 0.017308732495943266, "grad_norm": 0.3323982357978821, "learning_rate": 0.00019657910812461823, "loss": 1.5148, "step": 1332 }, { "epoch": 0.01732172703985914, "grad_norm": 0.511139988899231, "learning_rate": 0.00019657650866270683, "loss": 1.4744, "step": 1333 }, { "epoch": 0.017334721583775012, "grad_norm": 0.3836911618709564, "learning_rate": 0.00019657390920079542, "loss": 1.5983, "step": 1334 }, { "epoch": 0.017347716127690885, "grad_norm": 0.3472544252872467, "learning_rate": 0.00019657130973888408, "loss": 1.3834, "step": 1335 }, { "epoch": 0.017360710671606758, "grad_norm": 0.28959575295448303, "learning_rate": 0.00019656871027697267, "loss": 1.2538, "step": 1336 }, { "epoch": 0.017373705215522634, "grad_norm": 0.4705945551395416, "learning_rate": 0.0001965661108150613, "loss": 1.4277, "step": 1337 }, { "epoch": 0.017386699759438507, "grad_norm": 0.44887563586235046, "learning_rate": 0.0001965635113531499, "loss": 1.6429, "step": 1338 }, { "epoch": 0.01739969430335438, "grad_norm": 0.39057818055152893, "learning_rate": 0.00019656091189123852, "loss": 1.4686, "step": 1339 }, { "epoch": 0.017412688847270253, "grad_norm": 0.39304253458976746, "learning_rate": 0.00019655831242932714, "loss": 1.4984, "step": 1340 }, { "epoch": 0.017425683391186126, "grad_norm": 0.44281622767448425, "learning_rate": 0.00019655571296741574, "loss": 1.5449, "step": 1341 }, { "epoch": 0.017438677935102, "grad_norm": 0.38267067074775696, "learning_rate": 0.00019655311350550437, "loss": 1.4291, "step": 1342 }, { "epoch": 0.017451672479017872, "grad_norm": 0.31228458881378174, "learning_rate": 0.000196550514043593, "loss": 1.544, "step": 1343 }, { "epoch": 0.017464667022933745, "grad_norm": 0.3796466886997223, "learning_rate": 0.00019654791458168162, "loss": 1.5666, "step": 1344 }, { "epoch": 0.017477661566849618, "grad_norm": 0.2629081904888153, "learning_rate": 0.0001965453151197702, "loss": 1.4495, "step": 1345 }, { "epoch": 0.01749065611076549, "grad_norm": 0.44984155893325806, "learning_rate": 0.0001965427156578588, "loss": 1.6004, "step": 1346 }, { "epoch": 0.017503650654681367, "grad_norm": 0.3668937385082245, "learning_rate": 0.00019654011619594746, "loss": 1.5419, "step": 1347 }, { "epoch": 0.01751664519859724, "grad_norm": 0.41513144969940186, "learning_rate": 0.00019653751673403606, "loss": 1.5247, "step": 1348 }, { "epoch": 0.017529639742513113, "grad_norm": 0.36529871821403503, "learning_rate": 0.00019653491727212468, "loss": 1.3889, "step": 1349 }, { "epoch": 0.017542634286428986, "grad_norm": 0.272764652967453, "learning_rate": 0.00019653231781021328, "loss": 1.3127, "step": 1350 }, { "epoch": 0.01755562883034486, "grad_norm": 0.4416327178478241, "learning_rate": 0.0001965297183483019, "loss": 1.4581, "step": 1351 }, { "epoch": 0.017568623374260732, "grad_norm": 0.3854648470878601, "learning_rate": 0.00019652711888639053, "loss": 1.4115, "step": 1352 }, { "epoch": 0.017581617918176605, "grad_norm": 0.3894107937812805, "learning_rate": 0.00019652451942447913, "loss": 1.5245, "step": 1353 }, { "epoch": 0.017594612462092478, "grad_norm": 0.3270106613636017, "learning_rate": 0.00019652191996256778, "loss": 1.377, "step": 1354 }, { "epoch": 0.01760760700600835, "grad_norm": 0.35854044556617737, "learning_rate": 0.00019651932050065638, "loss": 1.3939, "step": 1355 }, { "epoch": 0.017620601549924224, "grad_norm": 0.391289621591568, "learning_rate": 0.000196516721038745, "loss": 1.4741, "step": 1356 }, { "epoch": 0.0176335960938401, "grad_norm": 0.4587692618370056, "learning_rate": 0.0001965141215768336, "loss": 1.6093, "step": 1357 }, { "epoch": 0.017646590637755973, "grad_norm": 0.5379094481468201, "learning_rate": 0.00019651152211492222, "loss": 1.526, "step": 1358 }, { "epoch": 0.017659585181671846, "grad_norm": 0.37917113304138184, "learning_rate": 0.00019650892265301085, "loss": 1.44, "step": 1359 }, { "epoch": 0.01767257972558772, "grad_norm": 0.48548614978790283, "learning_rate": 0.00019650632319109944, "loss": 1.5297, "step": 1360 }, { "epoch": 0.017685574269503592, "grad_norm": 0.3773084878921509, "learning_rate": 0.00019650372372918807, "loss": 1.3303, "step": 1361 }, { "epoch": 0.017698568813419465, "grad_norm": 0.30313122272491455, "learning_rate": 0.0001965011242672767, "loss": 1.3955, "step": 1362 }, { "epoch": 0.017711563357335338, "grad_norm": 0.3642292320728302, "learning_rate": 0.0001964985248053653, "loss": 1.4358, "step": 1363 }, { "epoch": 0.01772455790125121, "grad_norm": 0.34993597865104675, "learning_rate": 0.00019649592534345392, "loss": 1.4424, "step": 1364 }, { "epoch": 0.017737552445167084, "grad_norm": 0.38262784481048584, "learning_rate": 0.0001964933258815425, "loss": 1.4008, "step": 1365 }, { "epoch": 0.01775054698908296, "grad_norm": 0.36598148941993713, "learning_rate": 0.00019649072641963116, "loss": 1.3502, "step": 1366 }, { "epoch": 0.017763541532998833, "grad_norm": 0.32614684104919434, "learning_rate": 0.00019648812695771976, "loss": 1.2557, "step": 1367 }, { "epoch": 0.017776536076914706, "grad_norm": 0.2726497948169708, "learning_rate": 0.00019648552749580839, "loss": 1.4819, "step": 1368 }, { "epoch": 0.01778953062083058, "grad_norm": 0.41167518496513367, "learning_rate": 0.00019648292803389698, "loss": 1.5286, "step": 1369 }, { "epoch": 0.017802525164746452, "grad_norm": 0.42786210775375366, "learning_rate": 0.0001964803285719856, "loss": 1.4808, "step": 1370 }, { "epoch": 0.017815519708662325, "grad_norm": 0.3530051112174988, "learning_rate": 0.00019647772911007423, "loss": 1.2793, "step": 1371 }, { "epoch": 0.017828514252578198, "grad_norm": 0.6079438924789429, "learning_rate": 0.00019647512964816283, "loss": 1.5808, "step": 1372 }, { "epoch": 0.01784150879649407, "grad_norm": 0.3473976254463196, "learning_rate": 0.00019647253018625145, "loss": 1.4315, "step": 1373 }, { "epoch": 0.017854503340409944, "grad_norm": 0.3194151222705841, "learning_rate": 0.00019646993072434008, "loss": 1.5639, "step": 1374 }, { "epoch": 0.017867497884325817, "grad_norm": 0.3278243839740753, "learning_rate": 0.00019646733126242868, "loss": 1.5066, "step": 1375 }, { "epoch": 0.017880492428241693, "grad_norm": 0.4241548478603363, "learning_rate": 0.0001964647318005173, "loss": 1.5062, "step": 1376 }, { "epoch": 0.017893486972157566, "grad_norm": 0.40428072214126587, "learning_rate": 0.0001964621323386059, "loss": 1.5999, "step": 1377 }, { "epoch": 0.01790648151607344, "grad_norm": 0.402030348777771, "learning_rate": 0.00019645953287669455, "loss": 1.5519, "step": 1378 }, { "epoch": 0.017919476059989312, "grad_norm": 0.33562710881233215, "learning_rate": 0.00019645693341478315, "loss": 1.3547, "step": 1379 }, { "epoch": 0.017932470603905185, "grad_norm": 0.37471288442611694, "learning_rate": 0.00019645433395287177, "loss": 1.5516, "step": 1380 }, { "epoch": 0.017945465147821058, "grad_norm": 0.45008721947669983, "learning_rate": 0.00019645173449096037, "loss": 1.4721, "step": 1381 }, { "epoch": 0.01795845969173693, "grad_norm": 0.3916715979576111, "learning_rate": 0.000196449135029049, "loss": 1.3908, "step": 1382 }, { "epoch": 0.017971454235652804, "grad_norm": 0.364383727312088, "learning_rate": 0.00019644653556713762, "loss": 1.59, "step": 1383 }, { "epoch": 0.017984448779568677, "grad_norm": 0.41933467984199524, "learning_rate": 0.00019644393610522622, "loss": 1.6209, "step": 1384 }, { "epoch": 0.017997443323484554, "grad_norm": 0.37265875935554504, "learning_rate": 0.00019644133664331484, "loss": 1.4868, "step": 1385 }, { "epoch": 0.018010437867400426, "grad_norm": 0.3947279751300812, "learning_rate": 0.00019643873718140346, "loss": 1.3463, "step": 1386 }, { "epoch": 0.0180234324113163, "grad_norm": 0.38589316606521606, "learning_rate": 0.00019643613771949206, "loss": 1.416, "step": 1387 }, { "epoch": 0.018036426955232172, "grad_norm": 0.3667202889919281, "learning_rate": 0.00019643353825758069, "loss": 1.468, "step": 1388 }, { "epoch": 0.018049421499148045, "grad_norm": 0.47952884435653687, "learning_rate": 0.00019643093879566928, "loss": 1.5815, "step": 1389 }, { "epoch": 0.01806241604306392, "grad_norm": 0.4453055262565613, "learning_rate": 0.00019642833933375793, "loss": 1.601, "step": 1390 }, { "epoch": 0.01807541058697979, "grad_norm": 0.36778724193573, "learning_rate": 0.00019642573987184653, "loss": 1.4059, "step": 1391 }, { "epoch": 0.018088405130895664, "grad_norm": 0.3312399685382843, "learning_rate": 0.00019642314040993516, "loss": 1.6006, "step": 1392 }, { "epoch": 0.018101399674811537, "grad_norm": 0.29925376176834106, "learning_rate": 0.00019642054094802378, "loss": 1.3347, "step": 1393 }, { "epoch": 0.01811439421872741, "grad_norm": 0.32471317052841187, "learning_rate": 0.00019641794148611238, "loss": 1.6194, "step": 1394 }, { "epoch": 0.018127388762643287, "grad_norm": 0.3405779004096985, "learning_rate": 0.000196415342024201, "loss": 1.2661, "step": 1395 }, { "epoch": 0.01814038330655916, "grad_norm": 0.34910064935684204, "learning_rate": 0.0001964127425622896, "loss": 1.4643, "step": 1396 }, { "epoch": 0.018153377850475032, "grad_norm": 0.3806057572364807, "learning_rate": 0.00019641014310037825, "loss": 1.4939, "step": 1397 }, { "epoch": 0.018166372394390905, "grad_norm": 0.3816677927970886, "learning_rate": 0.00019640754363846685, "loss": 1.486, "step": 1398 }, { "epoch": 0.01817936693830678, "grad_norm": 0.38400983810424805, "learning_rate": 0.00019640494417655547, "loss": 1.6257, "step": 1399 }, { "epoch": 0.01819236148222265, "grad_norm": 0.4322017431259155, "learning_rate": 0.00019640234471464407, "loss": 1.6245, "step": 1400 }, { "epoch": 0.018205356026138524, "grad_norm": 0.3365495204925537, "learning_rate": 0.0001963997452527327, "loss": 1.4112, "step": 1401 }, { "epoch": 0.018218350570054397, "grad_norm": 0.32547080516815186, "learning_rate": 0.00019639714579082132, "loss": 1.5964, "step": 1402 }, { "epoch": 0.01823134511397027, "grad_norm": 0.32175591588020325, "learning_rate": 0.00019639454632890992, "loss": 1.5287, "step": 1403 }, { "epoch": 0.018244339657886147, "grad_norm": 0.5055533051490784, "learning_rate": 0.00019639194686699854, "loss": 1.5169, "step": 1404 }, { "epoch": 0.01825733420180202, "grad_norm": 0.43116527795791626, "learning_rate": 0.00019638934740508717, "loss": 1.6879, "step": 1405 }, { "epoch": 0.018270328745717893, "grad_norm": 0.44412800669670105, "learning_rate": 0.00019638674794317576, "loss": 1.5493, "step": 1406 }, { "epoch": 0.018283323289633766, "grad_norm": 0.3559592664241791, "learning_rate": 0.0001963841484812644, "loss": 1.431, "step": 1407 }, { "epoch": 0.01829631783354964, "grad_norm": 0.28410786390304565, "learning_rate": 0.00019638154901935299, "loss": 1.2386, "step": 1408 }, { "epoch": 0.01830931237746551, "grad_norm": 0.3983698785305023, "learning_rate": 0.00019637894955744164, "loss": 1.4429, "step": 1409 }, { "epoch": 0.018322306921381384, "grad_norm": 0.44524967670440674, "learning_rate": 0.00019637635009553023, "loss": 1.4997, "step": 1410 }, { "epoch": 0.018335301465297257, "grad_norm": 0.41650405526161194, "learning_rate": 0.00019637375063361886, "loss": 1.4228, "step": 1411 }, { "epoch": 0.01834829600921313, "grad_norm": 0.33032137155532837, "learning_rate": 0.00019637115117170746, "loss": 1.3543, "step": 1412 }, { "epoch": 0.018361290553129003, "grad_norm": 0.37099602818489075, "learning_rate": 0.00019636855170979608, "loss": 1.4397, "step": 1413 }, { "epoch": 0.01837428509704488, "grad_norm": 0.35451018810272217, "learning_rate": 0.0001963659522478847, "loss": 1.4941, "step": 1414 }, { "epoch": 0.018387279640960753, "grad_norm": 0.39484304189682007, "learning_rate": 0.0001963633527859733, "loss": 1.4442, "step": 1415 }, { "epoch": 0.018400274184876626, "grad_norm": 0.3444206118583679, "learning_rate": 0.00019636075332406193, "loss": 1.2892, "step": 1416 }, { "epoch": 0.0184132687287925, "grad_norm": 0.39459744095802307, "learning_rate": 0.00019635815386215055, "loss": 1.3448, "step": 1417 }, { "epoch": 0.01842626327270837, "grad_norm": 0.43979862332344055, "learning_rate": 0.00019635555440023915, "loss": 1.4319, "step": 1418 }, { "epoch": 0.018439257816624244, "grad_norm": 0.37974467873573303, "learning_rate": 0.00019635295493832777, "loss": 1.408, "step": 1419 }, { "epoch": 0.018452252360540117, "grad_norm": 0.4778790771961212, "learning_rate": 0.00019635035547641637, "loss": 1.5885, "step": 1420 }, { "epoch": 0.01846524690445599, "grad_norm": 0.4614560604095459, "learning_rate": 0.00019634775601450502, "loss": 1.4592, "step": 1421 }, { "epoch": 0.018478241448371863, "grad_norm": 0.3092544674873352, "learning_rate": 0.00019634515655259362, "loss": 1.4532, "step": 1422 }, { "epoch": 0.01849123599228774, "grad_norm": 0.4220763146877289, "learning_rate": 0.00019634255709068224, "loss": 1.4722, "step": 1423 }, { "epoch": 0.018504230536203613, "grad_norm": 0.3420048952102661, "learning_rate": 0.00019633995762877084, "loss": 1.6317, "step": 1424 }, { "epoch": 0.018517225080119486, "grad_norm": 0.3050045371055603, "learning_rate": 0.00019633735816685947, "loss": 1.3881, "step": 1425 }, { "epoch": 0.01853021962403536, "grad_norm": 0.34365832805633545, "learning_rate": 0.0001963347587049481, "loss": 1.4057, "step": 1426 }, { "epoch": 0.01854321416795123, "grad_norm": 0.6004374027252197, "learning_rate": 0.0001963321592430367, "loss": 1.5394, "step": 1427 }, { "epoch": 0.018556208711867105, "grad_norm": 0.318081259727478, "learning_rate": 0.00019632955978112534, "loss": 1.5718, "step": 1428 }, { "epoch": 0.018569203255782978, "grad_norm": 0.33333227038383484, "learning_rate": 0.00019632696031921394, "loss": 1.2994, "step": 1429 }, { "epoch": 0.01858219779969885, "grad_norm": 0.4516380727291107, "learning_rate": 0.00019632436085730253, "loss": 1.5457, "step": 1430 }, { "epoch": 0.018595192343614723, "grad_norm": 0.4420582056045532, "learning_rate": 0.00019632176139539116, "loss": 1.3913, "step": 1431 }, { "epoch": 0.018608186887530596, "grad_norm": 0.3033845126628876, "learning_rate": 0.00019631916193347978, "loss": 1.477, "step": 1432 }, { "epoch": 0.018621181431446473, "grad_norm": 0.4648483693599701, "learning_rate": 0.0001963165624715684, "loss": 1.6043, "step": 1433 }, { "epoch": 0.018634175975362346, "grad_norm": 0.28452369570732117, "learning_rate": 0.000196313963009657, "loss": 1.1945, "step": 1434 }, { "epoch": 0.01864717051927822, "grad_norm": 0.3563484251499176, "learning_rate": 0.00019631136354774563, "loss": 1.5169, "step": 1435 }, { "epoch": 0.01866016506319409, "grad_norm": 0.44836705923080444, "learning_rate": 0.00019630876408583425, "loss": 1.5318, "step": 1436 }, { "epoch": 0.018673159607109965, "grad_norm": 0.3512181341648102, "learning_rate": 0.00019630616462392285, "loss": 1.4591, "step": 1437 }, { "epoch": 0.018686154151025838, "grad_norm": 0.3802800476551056, "learning_rate": 0.00019630356516201148, "loss": 1.681, "step": 1438 }, { "epoch": 0.01869914869494171, "grad_norm": 0.3871404826641083, "learning_rate": 0.00019630096570010007, "loss": 1.315, "step": 1439 }, { "epoch": 0.018712143238857584, "grad_norm": 0.4375351369380951, "learning_rate": 0.00019629836623818873, "loss": 1.5924, "step": 1440 }, { "epoch": 0.018725137782773457, "grad_norm": 0.3527708649635315, "learning_rate": 0.00019629576677627732, "loss": 1.519, "step": 1441 }, { "epoch": 0.018738132326689333, "grad_norm": 0.48671025037765503, "learning_rate": 0.00019629316731436592, "loss": 1.4259, "step": 1442 }, { "epoch": 0.018751126870605206, "grad_norm": 0.3766919672489166, "learning_rate": 0.00019629056785245454, "loss": 1.5339, "step": 1443 }, { "epoch": 0.01876412141452108, "grad_norm": 0.40276792645454407, "learning_rate": 0.00019628796839054317, "loss": 1.4726, "step": 1444 }, { "epoch": 0.018777115958436952, "grad_norm": 0.3981993496417999, "learning_rate": 0.0001962853689286318, "loss": 1.6222, "step": 1445 }, { "epoch": 0.018790110502352825, "grad_norm": 0.41286009550094604, "learning_rate": 0.0001962827694667204, "loss": 1.4731, "step": 1446 }, { "epoch": 0.018803105046268698, "grad_norm": 0.3539413809776306, "learning_rate": 0.00019628017000480902, "loss": 1.619, "step": 1447 }, { "epoch": 0.01881609959018457, "grad_norm": 0.33750399947166443, "learning_rate": 0.00019627757054289764, "loss": 1.3662, "step": 1448 }, { "epoch": 0.018829094134100444, "grad_norm": 0.3177066445350647, "learning_rate": 0.00019627497108098624, "loss": 1.1867, "step": 1449 }, { "epoch": 0.018842088678016317, "grad_norm": 0.39732736349105835, "learning_rate": 0.00019627237161907486, "loss": 1.4744, "step": 1450 }, { "epoch": 0.01885508322193219, "grad_norm": 0.41208726167678833, "learning_rate": 0.00019626977215716346, "loss": 1.4427, "step": 1451 }, { "epoch": 0.018868077765848066, "grad_norm": 0.34757867455482483, "learning_rate": 0.0001962671726952521, "loss": 1.3379, "step": 1452 }, { "epoch": 0.01888107230976394, "grad_norm": 0.37493589520454407, "learning_rate": 0.0001962645732333407, "loss": 1.5556, "step": 1453 }, { "epoch": 0.018894066853679812, "grad_norm": 0.311355322599411, "learning_rate": 0.00019626197377142933, "loss": 1.1438, "step": 1454 }, { "epoch": 0.018907061397595685, "grad_norm": 0.3531663119792938, "learning_rate": 0.00019625937430951793, "loss": 1.399, "step": 1455 }, { "epoch": 0.018920055941511558, "grad_norm": 0.36858677864074707, "learning_rate": 0.00019625677484760655, "loss": 1.4933, "step": 1456 }, { "epoch": 0.01893305048542743, "grad_norm": 0.4147689938545227, "learning_rate": 0.00019625417538569518, "loss": 1.424, "step": 1457 }, { "epoch": 0.018946045029343304, "grad_norm": 0.3704078495502472, "learning_rate": 0.00019625157592378378, "loss": 1.4857, "step": 1458 }, { "epoch": 0.018959039573259177, "grad_norm": 0.38175466656684875, "learning_rate": 0.0001962489764618724, "loss": 1.4635, "step": 1459 }, { "epoch": 0.01897203411717505, "grad_norm": 0.4142589569091797, "learning_rate": 0.00019624637699996103, "loss": 1.3575, "step": 1460 }, { "epoch": 0.018985028661090926, "grad_norm": 0.37956202030181885, "learning_rate": 0.00019624377753804962, "loss": 1.3203, "step": 1461 }, { "epoch": 0.0189980232050068, "grad_norm": 0.34043657779693604, "learning_rate": 0.00019624117807613825, "loss": 1.6161, "step": 1462 }, { "epoch": 0.019011017748922672, "grad_norm": 0.4602002203464508, "learning_rate": 0.00019623857861422684, "loss": 1.4248, "step": 1463 }, { "epoch": 0.019024012292838545, "grad_norm": 0.39603501558303833, "learning_rate": 0.0001962359791523155, "loss": 1.6715, "step": 1464 }, { "epoch": 0.019037006836754418, "grad_norm": 0.357126384973526, "learning_rate": 0.0001962333796904041, "loss": 1.6395, "step": 1465 }, { "epoch": 0.01905000138067029, "grad_norm": 0.3500949442386627, "learning_rate": 0.00019623078022849272, "loss": 1.3647, "step": 1466 }, { "epoch": 0.019062995924586164, "grad_norm": 0.36586612462997437, "learning_rate": 0.00019622818076658134, "loss": 1.3783, "step": 1467 }, { "epoch": 0.019075990468502037, "grad_norm": 0.4651097357273102, "learning_rate": 0.00019622558130466994, "loss": 1.5214, "step": 1468 }, { "epoch": 0.01908898501241791, "grad_norm": 0.5161360502243042, "learning_rate": 0.00019622298184275856, "loss": 1.5843, "step": 1469 }, { "epoch": 0.019101979556333783, "grad_norm": 0.31639549136161804, "learning_rate": 0.00019622038238084716, "loss": 1.2787, "step": 1470 }, { "epoch": 0.01911497410024966, "grad_norm": 0.3406965434551239, "learning_rate": 0.00019621778291893579, "loss": 1.5458, "step": 1471 }, { "epoch": 0.019127968644165532, "grad_norm": 0.3634943664073944, "learning_rate": 0.0001962151834570244, "loss": 1.3922, "step": 1472 }, { "epoch": 0.019140963188081405, "grad_norm": 0.4297401010990143, "learning_rate": 0.000196212583995113, "loss": 1.6699, "step": 1473 }, { "epoch": 0.019153957731997278, "grad_norm": 0.3873206079006195, "learning_rate": 0.00019620998453320163, "loss": 1.3755, "step": 1474 }, { "epoch": 0.01916695227591315, "grad_norm": 0.35218915343284607, "learning_rate": 0.00019620738507129026, "loss": 1.5802, "step": 1475 }, { "epoch": 0.019179946819829024, "grad_norm": 0.4823857247829437, "learning_rate": 0.00019620478560937888, "loss": 1.3618, "step": 1476 }, { "epoch": 0.019192941363744897, "grad_norm": 0.33877623081207275, "learning_rate": 0.00019620218614746748, "loss": 1.5062, "step": 1477 }, { "epoch": 0.01920593590766077, "grad_norm": 0.33483630418777466, "learning_rate": 0.0001961995866855561, "loss": 1.6163, "step": 1478 }, { "epoch": 0.019218930451576643, "grad_norm": 0.37369218468666077, "learning_rate": 0.00019619698722364473, "loss": 1.5505, "step": 1479 }, { "epoch": 0.01923192499549252, "grad_norm": 0.3920283615589142, "learning_rate": 0.00019619438776173333, "loss": 1.6222, "step": 1480 }, { "epoch": 0.019244919539408392, "grad_norm": 0.35777056217193604, "learning_rate": 0.00019619178829982195, "loss": 1.7302, "step": 1481 }, { "epoch": 0.019257914083324265, "grad_norm": 0.4896351397037506, "learning_rate": 0.00019618918883791055, "loss": 1.5716, "step": 1482 }, { "epoch": 0.019270908627240138, "grad_norm": 0.3160429894924164, "learning_rate": 0.0001961865893759992, "loss": 1.4115, "step": 1483 }, { "epoch": 0.01928390317115601, "grad_norm": 0.3355870246887207, "learning_rate": 0.0001961839899140878, "loss": 1.3963, "step": 1484 }, { "epoch": 0.019296897715071884, "grad_norm": 0.39011481404304504, "learning_rate": 0.0001961813904521764, "loss": 1.6994, "step": 1485 }, { "epoch": 0.019309892258987757, "grad_norm": 0.3732297420501709, "learning_rate": 0.00019617879099026502, "loss": 1.4887, "step": 1486 }, { "epoch": 0.01932288680290363, "grad_norm": 0.3787655532360077, "learning_rate": 0.00019617619152835364, "loss": 1.7666, "step": 1487 }, { "epoch": 0.019335881346819503, "grad_norm": 0.45300403237342834, "learning_rate": 0.00019617359206644227, "loss": 1.5503, "step": 1488 }, { "epoch": 0.019348875890735376, "grad_norm": 0.36539945006370544, "learning_rate": 0.00019617099260453086, "loss": 1.3619, "step": 1489 }, { "epoch": 0.019361870434651252, "grad_norm": 0.38043802976608276, "learning_rate": 0.0001961683931426195, "loss": 1.5032, "step": 1490 }, { "epoch": 0.019374864978567125, "grad_norm": 0.3876885175704956, "learning_rate": 0.0001961657936807081, "loss": 1.5929, "step": 1491 }, { "epoch": 0.019387859522482998, "grad_norm": 0.31826695799827576, "learning_rate": 0.0001961631942187967, "loss": 1.2323, "step": 1492 }, { "epoch": 0.01940085406639887, "grad_norm": 0.45770037174224854, "learning_rate": 0.00019616059475688534, "loss": 1.4443, "step": 1493 }, { "epoch": 0.019413848610314744, "grad_norm": 0.39884278178215027, "learning_rate": 0.00019615799529497393, "loss": 1.3063, "step": 1494 }, { "epoch": 0.019426843154230617, "grad_norm": 0.28220826387405396, "learning_rate": 0.00019615539583306258, "loss": 1.5326, "step": 1495 }, { "epoch": 0.01943983769814649, "grad_norm": 0.47629642486572266, "learning_rate": 0.00019615279637115118, "loss": 1.5384, "step": 1496 }, { "epoch": 0.019452832242062363, "grad_norm": 0.417201429605484, "learning_rate": 0.00019615019690923978, "loss": 1.4289, "step": 1497 }, { "epoch": 0.019465826785978236, "grad_norm": 0.4086882770061493, "learning_rate": 0.0001961475974473284, "loss": 1.5168, "step": 1498 }, { "epoch": 0.019478821329894112, "grad_norm": 0.3688398003578186, "learning_rate": 0.00019614499798541703, "loss": 1.5101, "step": 1499 }, { "epoch": 0.019491815873809985, "grad_norm": 0.39062660932540894, "learning_rate": 0.00019614239852350565, "loss": 1.5113, "step": 1500 }, { "epoch": 0.01950481041772586, "grad_norm": 0.395280659198761, "learning_rate": 0.00019613979906159425, "loss": 1.45, "step": 1501 }, { "epoch": 0.01951780496164173, "grad_norm": 0.43302208185195923, "learning_rate": 0.00019613719959968287, "loss": 1.5149, "step": 1502 }, { "epoch": 0.019530799505557604, "grad_norm": 0.37377139925956726, "learning_rate": 0.0001961346001377715, "loss": 1.6256, "step": 1503 }, { "epoch": 0.019543794049473477, "grad_norm": 0.370684951543808, "learning_rate": 0.0001961320006758601, "loss": 1.5941, "step": 1504 }, { "epoch": 0.01955678859338935, "grad_norm": 0.33560818433761597, "learning_rate": 0.00019612940121394872, "loss": 1.2448, "step": 1505 }, { "epoch": 0.019569783137305223, "grad_norm": 0.3527664840221405, "learning_rate": 0.00019612680175203735, "loss": 1.4019, "step": 1506 }, { "epoch": 0.019582777681221096, "grad_norm": 0.4073215126991272, "learning_rate": 0.00019612420229012597, "loss": 1.5824, "step": 1507 }, { "epoch": 0.01959577222513697, "grad_norm": 0.3010920584201813, "learning_rate": 0.00019612160282821457, "loss": 1.3867, "step": 1508 }, { "epoch": 0.019608766769052845, "grad_norm": 0.44669947028160095, "learning_rate": 0.00019611900336630316, "loss": 1.7077, "step": 1509 }, { "epoch": 0.01962176131296872, "grad_norm": 0.28721189498901367, "learning_rate": 0.00019611640390439182, "loss": 1.2883, "step": 1510 }, { "epoch": 0.01963475585688459, "grad_norm": 0.4460401237010956, "learning_rate": 0.0001961138044424804, "loss": 1.4707, "step": 1511 }, { "epoch": 0.019647750400800464, "grad_norm": 0.4010676145553589, "learning_rate": 0.00019611120498056904, "loss": 1.7405, "step": 1512 }, { "epoch": 0.019660744944716337, "grad_norm": 0.40383365750312805, "learning_rate": 0.00019610860551865764, "loss": 1.5814, "step": 1513 }, { "epoch": 0.01967373948863221, "grad_norm": 0.37719887495040894, "learning_rate": 0.00019610600605674626, "loss": 1.4757, "step": 1514 }, { "epoch": 0.019686734032548083, "grad_norm": 0.3334643244743347, "learning_rate": 0.00019610340659483488, "loss": 1.2733, "step": 1515 }, { "epoch": 0.019699728576463956, "grad_norm": 0.2876072824001312, "learning_rate": 0.00019610080713292348, "loss": 1.54, "step": 1516 }, { "epoch": 0.01971272312037983, "grad_norm": 0.41408345103263855, "learning_rate": 0.0001960982076710121, "loss": 1.4716, "step": 1517 }, { "epoch": 0.019725717664295706, "grad_norm": 0.2929058372974396, "learning_rate": 0.00019609560820910073, "loss": 1.4984, "step": 1518 }, { "epoch": 0.01973871220821158, "grad_norm": 0.3934227228164673, "learning_rate": 0.00019609300874718935, "loss": 1.5725, "step": 1519 }, { "epoch": 0.01975170675212745, "grad_norm": 0.42947065830230713, "learning_rate": 0.00019609040928527795, "loss": 1.3895, "step": 1520 }, { "epoch": 0.019764701296043324, "grad_norm": 0.3334328830242157, "learning_rate": 0.00019608780982336658, "loss": 1.491, "step": 1521 }, { "epoch": 0.019777695839959197, "grad_norm": 0.494296133518219, "learning_rate": 0.0001960852103614552, "loss": 1.4017, "step": 1522 }, { "epoch": 0.01979069038387507, "grad_norm": 0.2985383868217468, "learning_rate": 0.0001960826108995438, "loss": 1.3115, "step": 1523 }, { "epoch": 0.019803684927790943, "grad_norm": 0.35311025381088257, "learning_rate": 0.00019608001143763242, "loss": 1.5029, "step": 1524 }, { "epoch": 0.019816679471706816, "grad_norm": 0.40701058506965637, "learning_rate": 0.00019607741197572102, "loss": 1.4916, "step": 1525 }, { "epoch": 0.01982967401562269, "grad_norm": 0.5032493472099304, "learning_rate": 0.00019607481251380965, "loss": 1.4567, "step": 1526 }, { "epoch": 0.019842668559538562, "grad_norm": 0.4273280203342438, "learning_rate": 0.00019607221305189827, "loss": 1.5937, "step": 1527 }, { "epoch": 0.01985566310345444, "grad_norm": 0.34755900502204895, "learning_rate": 0.00019606961358998687, "loss": 1.3849, "step": 1528 }, { "epoch": 0.01986865764737031, "grad_norm": 0.4055110812187195, "learning_rate": 0.0001960670141280755, "loss": 1.6312, "step": 1529 }, { "epoch": 0.019881652191286184, "grad_norm": 0.3750371038913727, "learning_rate": 0.00019606441466616412, "loss": 1.3506, "step": 1530 }, { "epoch": 0.019894646735202057, "grad_norm": 0.39874982833862305, "learning_rate": 0.00019606181520425274, "loss": 1.6667, "step": 1531 }, { "epoch": 0.01990764127911793, "grad_norm": 0.3380453288555145, "learning_rate": 0.00019605921574234134, "loss": 1.3847, "step": 1532 }, { "epoch": 0.019920635823033803, "grad_norm": 0.37355825304985046, "learning_rate": 0.00019605661628042996, "loss": 1.349, "step": 1533 }, { "epoch": 0.019933630366949676, "grad_norm": 0.36169975996017456, "learning_rate": 0.0001960540168185186, "loss": 1.3348, "step": 1534 }, { "epoch": 0.01994662491086555, "grad_norm": 0.4419514834880829, "learning_rate": 0.00019605141735660718, "loss": 1.7414, "step": 1535 }, { "epoch": 0.019959619454781422, "grad_norm": 0.37134990096092224, "learning_rate": 0.0001960488178946958, "loss": 1.4494, "step": 1536 }, { "epoch": 0.0199726139986973, "grad_norm": 0.40228742361068726, "learning_rate": 0.0001960462184327844, "loss": 1.2943, "step": 1537 }, { "epoch": 0.01998560854261317, "grad_norm": 0.3903040885925293, "learning_rate": 0.00019604361897087306, "loss": 1.5018, "step": 1538 }, { "epoch": 0.019998603086529045, "grad_norm": 0.25189146399497986, "learning_rate": 0.00019604101950896165, "loss": 1.1487, "step": 1539 }, { "epoch": 0.020011597630444918, "grad_norm": 0.3548290729522705, "learning_rate": 0.00019603842004705025, "loss": 1.2833, "step": 1540 }, { "epoch": 0.02002459217436079, "grad_norm": 0.3860187828540802, "learning_rate": 0.0001960358205851389, "loss": 1.5513, "step": 1541 }, { "epoch": 0.020037586718276663, "grad_norm": 0.3593614101409912, "learning_rate": 0.0001960332211232275, "loss": 1.3988, "step": 1542 }, { "epoch": 0.020050581262192536, "grad_norm": 0.42542481422424316, "learning_rate": 0.00019603062166131613, "loss": 1.5644, "step": 1543 }, { "epoch": 0.02006357580610841, "grad_norm": 0.4755573570728302, "learning_rate": 0.00019602802219940472, "loss": 1.6379, "step": 1544 }, { "epoch": 0.020076570350024282, "grad_norm": 0.4325181543827057, "learning_rate": 0.00019602542273749335, "loss": 1.4713, "step": 1545 }, { "epoch": 0.020089564893940155, "grad_norm": 0.4106459617614746, "learning_rate": 0.00019602282327558197, "loss": 1.5294, "step": 1546 }, { "epoch": 0.02010255943785603, "grad_norm": 0.4240623116493225, "learning_rate": 0.00019602022381367057, "loss": 1.4604, "step": 1547 }, { "epoch": 0.020115553981771905, "grad_norm": 0.4559037685394287, "learning_rate": 0.0001960176243517592, "loss": 1.6868, "step": 1548 }, { "epoch": 0.020128548525687778, "grad_norm": 0.4125285744667053, "learning_rate": 0.00019601502488984782, "loss": 1.5499, "step": 1549 }, { "epoch": 0.02014154306960365, "grad_norm": 0.3951132297515869, "learning_rate": 0.00019601242542793644, "loss": 1.3677, "step": 1550 }, { "epoch": 0.020154537613519524, "grad_norm": 0.3750077784061432, "learning_rate": 0.00019600982596602504, "loss": 1.2986, "step": 1551 }, { "epoch": 0.020167532157435396, "grad_norm": 0.3765091001987457, "learning_rate": 0.00019600722650411364, "loss": 1.3322, "step": 1552 }, { "epoch": 0.02018052670135127, "grad_norm": 0.465188205242157, "learning_rate": 0.0001960046270422023, "loss": 1.3976, "step": 1553 }, { "epoch": 0.020193521245267142, "grad_norm": 0.30513960123062134, "learning_rate": 0.0001960020275802909, "loss": 1.6173, "step": 1554 }, { "epoch": 0.020206515789183015, "grad_norm": 0.35195010900497437, "learning_rate": 0.0001959994281183795, "loss": 1.4578, "step": 1555 }, { "epoch": 0.020219510333098892, "grad_norm": 0.2966456711292267, "learning_rate": 0.0001959968286564681, "loss": 1.3418, "step": 1556 }, { "epoch": 0.020232504877014765, "grad_norm": 0.4176999628543854, "learning_rate": 0.00019599422919455673, "loss": 1.4104, "step": 1557 }, { "epoch": 0.020245499420930638, "grad_norm": 0.4266605079174042, "learning_rate": 0.00019599162973264536, "loss": 1.5092, "step": 1558 }, { "epoch": 0.02025849396484651, "grad_norm": 0.358388751745224, "learning_rate": 0.00019598903027073395, "loss": 1.6631, "step": 1559 }, { "epoch": 0.020271488508762384, "grad_norm": 0.30513235926628113, "learning_rate": 0.00019598643080882258, "loss": 1.1193, "step": 1560 }, { "epoch": 0.020284483052678257, "grad_norm": 0.45252203941345215, "learning_rate": 0.0001959838313469112, "loss": 1.3373, "step": 1561 }, { "epoch": 0.02029747759659413, "grad_norm": 0.41170284152030945, "learning_rate": 0.00019598123188499983, "loss": 1.4798, "step": 1562 }, { "epoch": 0.020310472140510002, "grad_norm": 0.33470121026039124, "learning_rate": 0.00019597863242308843, "loss": 1.6219, "step": 1563 }, { "epoch": 0.020323466684425875, "grad_norm": 0.3679245710372925, "learning_rate": 0.00019597603296117702, "loss": 1.4923, "step": 1564 }, { "epoch": 0.02033646122834175, "grad_norm": 0.41525131464004517, "learning_rate": 0.00019597343349926567, "loss": 1.3043, "step": 1565 }, { "epoch": 0.020349455772257625, "grad_norm": 0.3219340145587921, "learning_rate": 0.00019597083403735427, "loss": 1.3098, "step": 1566 }, { "epoch": 0.020362450316173498, "grad_norm": 0.3557848334312439, "learning_rate": 0.0001959682345754429, "loss": 1.4068, "step": 1567 }, { "epoch": 0.02037544486008937, "grad_norm": 0.34874600172042847, "learning_rate": 0.0001959656351135315, "loss": 1.4774, "step": 1568 }, { "epoch": 0.020388439404005244, "grad_norm": 0.3569698929786682, "learning_rate": 0.00019596303565162012, "loss": 1.3676, "step": 1569 }, { "epoch": 0.020401433947921117, "grad_norm": 0.42987772822380066, "learning_rate": 0.00019596043618970874, "loss": 1.5767, "step": 1570 }, { "epoch": 0.02041442849183699, "grad_norm": 0.3264305889606476, "learning_rate": 0.00019595783672779734, "loss": 1.4497, "step": 1571 }, { "epoch": 0.020427423035752863, "grad_norm": 0.36391645669937134, "learning_rate": 0.00019595523726588596, "loss": 1.3669, "step": 1572 }, { "epoch": 0.020440417579668736, "grad_norm": 0.28453534841537476, "learning_rate": 0.0001959526378039746, "loss": 1.2903, "step": 1573 }, { "epoch": 0.02045341212358461, "grad_norm": 0.43484920263290405, "learning_rate": 0.00019595003834206321, "loss": 1.5088, "step": 1574 }, { "epoch": 0.020466406667500485, "grad_norm": 0.42664283514022827, "learning_rate": 0.0001959474388801518, "loss": 1.4973, "step": 1575 }, { "epoch": 0.020479401211416358, "grad_norm": 0.3683311641216278, "learning_rate": 0.00019594483941824044, "loss": 1.4963, "step": 1576 }, { "epoch": 0.02049239575533223, "grad_norm": 0.44799497723579407, "learning_rate": 0.00019594223995632906, "loss": 1.6409, "step": 1577 }, { "epoch": 0.020505390299248104, "grad_norm": 0.47593674063682556, "learning_rate": 0.00019593964049441766, "loss": 1.4892, "step": 1578 }, { "epoch": 0.020518384843163977, "grad_norm": 0.3819369077682495, "learning_rate": 0.00019593704103250628, "loss": 1.3729, "step": 1579 }, { "epoch": 0.02053137938707985, "grad_norm": 0.40040919184684753, "learning_rate": 0.0001959344415705949, "loss": 1.5173, "step": 1580 }, { "epoch": 0.020544373930995723, "grad_norm": 0.35317182540893555, "learning_rate": 0.0001959318421086835, "loss": 1.4997, "step": 1581 }, { "epoch": 0.020557368474911596, "grad_norm": 0.3929605782032013, "learning_rate": 0.00019592924264677213, "loss": 1.6177, "step": 1582 }, { "epoch": 0.02057036301882747, "grad_norm": 0.2945997416973114, "learning_rate": 0.00019592664318486073, "loss": 1.4386, "step": 1583 }, { "epoch": 0.02058335756274334, "grad_norm": 0.3660469055175781, "learning_rate": 0.00019592404372294938, "loss": 1.4455, "step": 1584 }, { "epoch": 0.020596352106659218, "grad_norm": 0.3905063569545746, "learning_rate": 0.00019592144426103797, "loss": 1.5125, "step": 1585 }, { "epoch": 0.02060934665057509, "grad_norm": 0.36867886781692505, "learning_rate": 0.0001959188447991266, "loss": 1.3826, "step": 1586 }, { "epoch": 0.020622341194490964, "grad_norm": 0.4044784605503082, "learning_rate": 0.0001959162453372152, "loss": 1.4459, "step": 1587 }, { "epoch": 0.020635335738406837, "grad_norm": 0.3518185317516327, "learning_rate": 0.00019591364587530382, "loss": 1.3904, "step": 1588 }, { "epoch": 0.02064833028232271, "grad_norm": 0.386371910572052, "learning_rate": 0.00019591104641339245, "loss": 1.5087, "step": 1589 }, { "epoch": 0.020661324826238583, "grad_norm": 0.3203730881214142, "learning_rate": 0.00019590844695148104, "loss": 1.148, "step": 1590 }, { "epoch": 0.020674319370154456, "grad_norm": 0.2952377200126648, "learning_rate": 0.00019590584748956967, "loss": 1.4205, "step": 1591 }, { "epoch": 0.02068731391407033, "grad_norm": 0.3668016195297241, "learning_rate": 0.0001959032480276583, "loss": 1.4445, "step": 1592 }, { "epoch": 0.0207003084579862, "grad_norm": 0.3414691388607025, "learning_rate": 0.0001959006485657469, "loss": 1.4218, "step": 1593 }, { "epoch": 0.020713303001902078, "grad_norm": 0.39005324244499207, "learning_rate": 0.00019589804910383551, "loss": 1.6387, "step": 1594 }, { "epoch": 0.02072629754581795, "grad_norm": 0.3395806550979614, "learning_rate": 0.0001958954496419241, "loss": 1.3853, "step": 1595 }, { "epoch": 0.020739292089733824, "grad_norm": 0.3177931308746338, "learning_rate": 0.00019589285018001276, "loss": 1.2602, "step": 1596 }, { "epoch": 0.020752286633649697, "grad_norm": 0.4624570608139038, "learning_rate": 0.00019589025071810136, "loss": 1.4313, "step": 1597 }, { "epoch": 0.02076528117756557, "grad_norm": 0.3250756859779358, "learning_rate": 0.00019588765125618998, "loss": 1.4142, "step": 1598 }, { "epoch": 0.020778275721481443, "grad_norm": 0.3827402889728546, "learning_rate": 0.00019588505179427858, "loss": 1.4889, "step": 1599 }, { "epoch": 0.020791270265397316, "grad_norm": 0.3126867115497589, "learning_rate": 0.0001958824523323672, "loss": 1.3279, "step": 1600 }, { "epoch": 0.02080426480931319, "grad_norm": 0.3472033441066742, "learning_rate": 0.00019587985287045583, "loss": 1.4702, "step": 1601 }, { "epoch": 0.020817259353229062, "grad_norm": 0.4711418151855469, "learning_rate": 0.00019587725340854443, "loss": 1.4495, "step": 1602 }, { "epoch": 0.020830253897144935, "grad_norm": 0.36849603056907654, "learning_rate": 0.00019587465394663305, "loss": 1.748, "step": 1603 }, { "epoch": 0.02084324844106081, "grad_norm": 0.38734176754951477, "learning_rate": 0.00019587205448472168, "loss": 1.6044, "step": 1604 }, { "epoch": 0.020856242984976684, "grad_norm": 0.4070992171764374, "learning_rate": 0.0001958694550228103, "loss": 1.4472, "step": 1605 }, { "epoch": 0.020869237528892557, "grad_norm": 0.44407230615615845, "learning_rate": 0.0001958668555608989, "loss": 1.4856, "step": 1606 }, { "epoch": 0.02088223207280843, "grad_norm": 0.42099443078041077, "learning_rate": 0.0001958642560989875, "loss": 1.4157, "step": 1607 }, { "epoch": 0.020895226616724303, "grad_norm": 0.3330783545970917, "learning_rate": 0.00019586165663707615, "loss": 1.269, "step": 1608 }, { "epoch": 0.020908221160640176, "grad_norm": 0.3795165419578552, "learning_rate": 0.00019585905717516475, "loss": 1.6631, "step": 1609 }, { "epoch": 0.02092121570455605, "grad_norm": 0.3347870409488678, "learning_rate": 0.00019585645771325337, "loss": 1.39, "step": 1610 }, { "epoch": 0.020934210248471922, "grad_norm": 0.42705029249191284, "learning_rate": 0.00019585385825134197, "loss": 1.6396, "step": 1611 }, { "epoch": 0.020947204792387795, "grad_norm": 0.3586139976978302, "learning_rate": 0.0001958512587894306, "loss": 1.5277, "step": 1612 }, { "epoch": 0.02096019933630367, "grad_norm": 0.38245609402656555, "learning_rate": 0.00019584865932751922, "loss": 1.5861, "step": 1613 }, { "epoch": 0.020973193880219544, "grad_norm": 0.3106684684753418, "learning_rate": 0.00019584605986560781, "loss": 1.1715, "step": 1614 }, { "epoch": 0.020986188424135417, "grad_norm": 0.37953510880470276, "learning_rate": 0.00019584346040369647, "loss": 1.5584, "step": 1615 }, { "epoch": 0.02099918296805129, "grad_norm": 0.2933439016342163, "learning_rate": 0.00019584086094178506, "loss": 1.2928, "step": 1616 }, { "epoch": 0.021012177511967163, "grad_norm": 0.369454562664032, "learning_rate": 0.0001958382614798737, "loss": 1.453, "step": 1617 }, { "epoch": 0.021025172055883036, "grad_norm": 0.34735023975372314, "learning_rate": 0.00019583566201796228, "loss": 1.4575, "step": 1618 }, { "epoch": 0.02103816659979891, "grad_norm": 0.26747339963912964, "learning_rate": 0.0001958330625560509, "loss": 1.4802, "step": 1619 }, { "epoch": 0.021051161143714782, "grad_norm": 0.44794002175331116, "learning_rate": 0.00019583046309413953, "loss": 1.5177, "step": 1620 }, { "epoch": 0.021064155687630655, "grad_norm": 0.41062891483306885, "learning_rate": 0.00019582786363222813, "loss": 1.5903, "step": 1621 }, { "epoch": 0.021077150231546528, "grad_norm": 0.3192037343978882, "learning_rate": 0.00019582526417031676, "loss": 1.322, "step": 1622 }, { "epoch": 0.021090144775462404, "grad_norm": 0.47188833355903625, "learning_rate": 0.00019582266470840538, "loss": 1.5766, "step": 1623 }, { "epoch": 0.021103139319378277, "grad_norm": 0.329072505235672, "learning_rate": 0.00019582006524649398, "loss": 1.3272, "step": 1624 }, { "epoch": 0.02111613386329415, "grad_norm": 0.3218333125114441, "learning_rate": 0.0001958174657845826, "loss": 1.252, "step": 1625 }, { "epoch": 0.021129128407210023, "grad_norm": 0.44932013750076294, "learning_rate": 0.0001958148663226712, "loss": 1.2511, "step": 1626 }, { "epoch": 0.021142122951125896, "grad_norm": 0.4039768576622009, "learning_rate": 0.00019581226686075985, "loss": 1.3452, "step": 1627 }, { "epoch": 0.02115511749504177, "grad_norm": 0.3648715019226074, "learning_rate": 0.00019580966739884845, "loss": 1.5494, "step": 1628 }, { "epoch": 0.021168112038957642, "grad_norm": 0.30340489745140076, "learning_rate": 0.00019580706793693707, "loss": 1.3307, "step": 1629 }, { "epoch": 0.021181106582873515, "grad_norm": 0.3260335624217987, "learning_rate": 0.00019580446847502567, "loss": 1.2421, "step": 1630 }, { "epoch": 0.021194101126789388, "grad_norm": 0.39445722103118896, "learning_rate": 0.0001958018690131143, "loss": 1.5133, "step": 1631 }, { "epoch": 0.021207095670705264, "grad_norm": 0.4508373439311981, "learning_rate": 0.00019579926955120292, "loss": 1.497, "step": 1632 }, { "epoch": 0.021220090214621137, "grad_norm": 0.34700095653533936, "learning_rate": 0.00019579667008929152, "loss": 1.5436, "step": 1633 }, { "epoch": 0.02123308475853701, "grad_norm": 0.3277294635772705, "learning_rate": 0.00019579407062738014, "loss": 1.5377, "step": 1634 }, { "epoch": 0.021246079302452883, "grad_norm": 0.3692040741443634, "learning_rate": 0.00019579147116546877, "loss": 1.4643, "step": 1635 }, { "epoch": 0.021259073846368756, "grad_norm": 0.33273717761039734, "learning_rate": 0.00019578887170355736, "loss": 1.4873, "step": 1636 }, { "epoch": 0.02127206839028463, "grad_norm": 0.4648902714252472, "learning_rate": 0.000195786272241646, "loss": 1.5925, "step": 1637 }, { "epoch": 0.021285062934200502, "grad_norm": 0.32959115505218506, "learning_rate": 0.00019578367277973458, "loss": 1.4733, "step": 1638 }, { "epoch": 0.021298057478116375, "grad_norm": 0.3375896215438843, "learning_rate": 0.00019578107331782324, "loss": 1.3738, "step": 1639 }, { "epoch": 0.021311052022032248, "grad_norm": 0.397700697183609, "learning_rate": 0.00019577847385591183, "loss": 1.4783, "step": 1640 }, { "epoch": 0.02132404656594812, "grad_norm": 0.3494514226913452, "learning_rate": 0.00019577587439400046, "loss": 1.4699, "step": 1641 }, { "epoch": 0.021337041109863997, "grad_norm": 0.3787671625614166, "learning_rate": 0.00019577327493208906, "loss": 1.3895, "step": 1642 }, { "epoch": 0.02135003565377987, "grad_norm": 0.3944171965122223, "learning_rate": 0.00019577067547017768, "loss": 1.4478, "step": 1643 }, { "epoch": 0.021363030197695743, "grad_norm": 0.3857954442501068, "learning_rate": 0.0001957680760082663, "loss": 1.5263, "step": 1644 }, { "epoch": 0.021376024741611616, "grad_norm": 0.3964695334434509, "learning_rate": 0.0001957654765463549, "loss": 1.6049, "step": 1645 }, { "epoch": 0.02138901928552749, "grad_norm": 0.4778573215007782, "learning_rate": 0.00019576287708444353, "loss": 1.5705, "step": 1646 }, { "epoch": 0.021402013829443362, "grad_norm": 0.35294604301452637, "learning_rate": 0.00019576027762253215, "loss": 1.4666, "step": 1647 }, { "epoch": 0.021415008373359235, "grad_norm": 0.31928515434265137, "learning_rate": 0.00019575767816062075, "loss": 1.4431, "step": 1648 }, { "epoch": 0.021428002917275108, "grad_norm": 0.3613530099391937, "learning_rate": 0.00019575507869870937, "loss": 1.3176, "step": 1649 }, { "epoch": 0.02144099746119098, "grad_norm": 0.2900174558162689, "learning_rate": 0.000195752479236798, "loss": 1.4382, "step": 1650 }, { "epoch": 0.021453992005106854, "grad_norm": 0.3931158483028412, "learning_rate": 0.00019574987977488662, "loss": 1.5383, "step": 1651 }, { "epoch": 0.02146698654902273, "grad_norm": 0.4072851240634918, "learning_rate": 0.00019574728031297522, "loss": 1.7854, "step": 1652 }, { "epoch": 0.021479981092938603, "grad_norm": 0.3354068994522095, "learning_rate": 0.00019574468085106384, "loss": 1.4827, "step": 1653 }, { "epoch": 0.021492975636854476, "grad_norm": 0.25913044810295105, "learning_rate": 0.00019574208138915247, "loss": 1.291, "step": 1654 }, { "epoch": 0.02150597018077035, "grad_norm": 0.2384590059518814, "learning_rate": 0.00019573948192724107, "loss": 1.3029, "step": 1655 }, { "epoch": 0.021518964724686222, "grad_norm": 0.31463539600372314, "learning_rate": 0.0001957368824653297, "loss": 1.3735, "step": 1656 }, { "epoch": 0.021531959268602095, "grad_norm": 0.39537736773490906, "learning_rate": 0.0001957342830034183, "loss": 1.3781, "step": 1657 }, { "epoch": 0.021544953812517968, "grad_norm": 0.33102867007255554, "learning_rate": 0.00019573168354150694, "loss": 1.5955, "step": 1658 }, { "epoch": 0.02155794835643384, "grad_norm": 0.4276646077632904, "learning_rate": 0.00019572908407959554, "loss": 1.5761, "step": 1659 }, { "epoch": 0.021570942900349714, "grad_norm": 0.41354063153266907, "learning_rate": 0.00019572648461768416, "loss": 1.444, "step": 1660 }, { "epoch": 0.02158393744426559, "grad_norm": 0.37918925285339355, "learning_rate": 0.00019572388515577276, "loss": 1.4344, "step": 1661 }, { "epoch": 0.021596931988181464, "grad_norm": 0.5014055371284485, "learning_rate": 0.00019572128569386138, "loss": 1.5845, "step": 1662 }, { "epoch": 0.021609926532097336, "grad_norm": 0.42829424142837524, "learning_rate": 0.00019571868623195, "loss": 1.5156, "step": 1663 }, { "epoch": 0.02162292107601321, "grad_norm": 0.43786388635635376, "learning_rate": 0.0001957160867700386, "loss": 1.6055, "step": 1664 }, { "epoch": 0.021635915619929082, "grad_norm": 0.31251829862594604, "learning_rate": 0.00019571348730812723, "loss": 1.5896, "step": 1665 }, { "epoch": 0.021648910163844955, "grad_norm": 0.31247207522392273, "learning_rate": 0.00019571088784621585, "loss": 1.4182, "step": 1666 }, { "epoch": 0.02166190470776083, "grad_norm": 0.3295499086380005, "learning_rate": 0.00019570828838430445, "loss": 1.2062, "step": 1667 }, { "epoch": 0.0216748992516767, "grad_norm": 0.3025866746902466, "learning_rate": 0.00019570568892239307, "loss": 1.5354, "step": 1668 }, { "epoch": 0.021687893795592574, "grad_norm": 0.249764546751976, "learning_rate": 0.00019570308946048167, "loss": 1.346, "step": 1669 }, { "epoch": 0.021700888339508447, "grad_norm": 0.4976350963115692, "learning_rate": 0.00019570048999857032, "loss": 1.5642, "step": 1670 }, { "epoch": 0.021713882883424324, "grad_norm": 0.37651264667510986, "learning_rate": 0.00019569789053665892, "loss": 1.5268, "step": 1671 }, { "epoch": 0.021726877427340197, "grad_norm": 0.3025478720664978, "learning_rate": 0.00019569529107474755, "loss": 1.309, "step": 1672 }, { "epoch": 0.02173987197125607, "grad_norm": 0.5278252959251404, "learning_rate": 0.00019569269161283614, "loss": 1.5146, "step": 1673 }, { "epoch": 0.021752866515171942, "grad_norm": 0.3368765711784363, "learning_rate": 0.00019569009215092477, "loss": 1.4717, "step": 1674 }, { "epoch": 0.021765861059087815, "grad_norm": 0.37126728892326355, "learning_rate": 0.0001956874926890134, "loss": 1.3485, "step": 1675 }, { "epoch": 0.02177885560300369, "grad_norm": 0.4636981785297394, "learning_rate": 0.000195684893227102, "loss": 1.5706, "step": 1676 }, { "epoch": 0.02179185014691956, "grad_norm": 0.25153589248657227, "learning_rate": 0.00019568229376519061, "loss": 1.3659, "step": 1677 }, { "epoch": 0.021804844690835434, "grad_norm": 0.41266581416130066, "learning_rate": 0.00019567969430327924, "loss": 1.4365, "step": 1678 }, { "epoch": 0.021817839234751307, "grad_norm": 0.37196439504623413, "learning_rate": 0.00019567709484136784, "loss": 1.2218, "step": 1679 }, { "epoch": 0.021830833778667184, "grad_norm": 0.43542590737342834, "learning_rate": 0.00019567449537945646, "loss": 1.5324, "step": 1680 }, { "epoch": 0.021843828322583057, "grad_norm": 0.34926947951316833, "learning_rate": 0.00019567189591754506, "loss": 1.5377, "step": 1681 }, { "epoch": 0.02185682286649893, "grad_norm": 0.41107818484306335, "learning_rate": 0.0001956692964556337, "loss": 1.5147, "step": 1682 }, { "epoch": 0.021869817410414803, "grad_norm": 0.3859551250934601, "learning_rate": 0.0001956666969937223, "loss": 1.5414, "step": 1683 }, { "epoch": 0.021882811954330676, "grad_norm": 0.3669387996196747, "learning_rate": 0.00019566409753181093, "loss": 1.4922, "step": 1684 }, { "epoch": 0.02189580649824655, "grad_norm": 0.34603843092918396, "learning_rate": 0.00019566149806989953, "loss": 1.3241, "step": 1685 }, { "epoch": 0.02190880104216242, "grad_norm": 0.37981685996055603, "learning_rate": 0.00019565889860798815, "loss": 1.3466, "step": 1686 }, { "epoch": 0.021921795586078294, "grad_norm": 0.2920598089694977, "learning_rate": 0.00019565629914607678, "loss": 1.4988, "step": 1687 }, { "epoch": 0.021934790129994167, "grad_norm": 0.40843528509140015, "learning_rate": 0.00019565369968416537, "loss": 1.304, "step": 1688 }, { "epoch": 0.02194778467391004, "grad_norm": 0.48986881971359253, "learning_rate": 0.00019565110022225403, "loss": 1.3282, "step": 1689 }, { "epoch": 0.021960779217825917, "grad_norm": 0.35337305068969727, "learning_rate": 0.00019564850076034262, "loss": 1.5884, "step": 1690 }, { "epoch": 0.02197377376174179, "grad_norm": 0.37043288350105286, "learning_rate": 0.00019564590129843122, "loss": 1.3222, "step": 1691 }, { "epoch": 0.021986768305657663, "grad_norm": 0.30725833773612976, "learning_rate": 0.00019564330183651985, "loss": 1.4441, "step": 1692 }, { "epoch": 0.021999762849573536, "grad_norm": 0.3049827218055725, "learning_rate": 0.00019564070237460847, "loss": 1.3802, "step": 1693 }, { "epoch": 0.02201275739348941, "grad_norm": 0.3680458962917328, "learning_rate": 0.0001956381029126971, "loss": 1.3028, "step": 1694 }, { "epoch": 0.02202575193740528, "grad_norm": 0.3884005844593048, "learning_rate": 0.0001956355034507857, "loss": 1.5881, "step": 1695 }, { "epoch": 0.022038746481321154, "grad_norm": 0.386909544467926, "learning_rate": 0.00019563290398887432, "loss": 1.4967, "step": 1696 }, { "epoch": 0.022051741025237027, "grad_norm": 0.327727347612381, "learning_rate": 0.00019563030452696294, "loss": 1.4156, "step": 1697 }, { "epoch": 0.0220647355691529, "grad_norm": 0.36065375804901123, "learning_rate": 0.00019562770506505154, "loss": 1.434, "step": 1698 }, { "epoch": 0.022077730113068777, "grad_norm": 0.41146379709243774, "learning_rate": 0.00019562510560314016, "loss": 1.4004, "step": 1699 }, { "epoch": 0.02209072465698465, "grad_norm": 0.28855806589126587, "learning_rate": 0.00019562250614122876, "loss": 1.352, "step": 1700 }, { "epoch": 0.022103719200900523, "grad_norm": 0.35015758872032166, "learning_rate": 0.0001956199066793174, "loss": 1.4631, "step": 1701 }, { "epoch": 0.022116713744816396, "grad_norm": 0.3388160169124603, "learning_rate": 0.000195617307217406, "loss": 1.5847, "step": 1702 }, { "epoch": 0.02212970828873227, "grad_norm": 0.3232349157333374, "learning_rate": 0.0001956147077554946, "loss": 1.353, "step": 1703 }, { "epoch": 0.02214270283264814, "grad_norm": 0.3832527995109558, "learning_rate": 0.00019561210829358323, "loss": 1.5827, "step": 1704 }, { "epoch": 0.022155697376564015, "grad_norm": 0.321197509765625, "learning_rate": 0.00019560950883167186, "loss": 1.376, "step": 1705 }, { "epoch": 0.022168691920479888, "grad_norm": 0.2841126024723053, "learning_rate": 0.00019560690936976048, "loss": 1.262, "step": 1706 }, { "epoch": 0.02218168646439576, "grad_norm": 0.27810975909233093, "learning_rate": 0.00019560430990784908, "loss": 1.1901, "step": 1707 }, { "epoch": 0.022194681008311633, "grad_norm": 0.40710029006004333, "learning_rate": 0.0001956017104459377, "loss": 1.3795, "step": 1708 }, { "epoch": 0.02220767555222751, "grad_norm": 0.27838972210884094, "learning_rate": 0.00019559911098402633, "loss": 1.464, "step": 1709 }, { "epoch": 0.022220670096143383, "grad_norm": 0.37907466292381287, "learning_rate": 0.00019559651152211492, "loss": 1.3841, "step": 1710 }, { "epoch": 0.022233664640059256, "grad_norm": 0.4405648708343506, "learning_rate": 0.00019559391206020355, "loss": 1.4879, "step": 1711 }, { "epoch": 0.02224665918397513, "grad_norm": 0.30526280403137207, "learning_rate": 0.00019559131259829215, "loss": 1.3064, "step": 1712 }, { "epoch": 0.022259653727891, "grad_norm": 0.4097805619239807, "learning_rate": 0.0001955887131363808, "loss": 1.4996, "step": 1713 }, { "epoch": 0.022272648271806875, "grad_norm": 0.3696689009666443, "learning_rate": 0.0001955861136744694, "loss": 1.3542, "step": 1714 }, { "epoch": 0.022285642815722748, "grad_norm": 0.4285631775856018, "learning_rate": 0.000195583514212558, "loss": 1.465, "step": 1715 }, { "epoch": 0.02229863735963862, "grad_norm": 0.39842674136161804, "learning_rate": 0.00019558091475064662, "loss": 1.4928, "step": 1716 }, { "epoch": 0.022311631903554494, "grad_norm": 0.41399630904197693, "learning_rate": 0.00019557831528873524, "loss": 1.3706, "step": 1717 }, { "epoch": 0.02232462644747037, "grad_norm": 0.31001347303390503, "learning_rate": 0.00019557571582682387, "loss": 1.3039, "step": 1718 }, { "epoch": 0.022337620991386243, "grad_norm": 0.6012894511222839, "learning_rate": 0.00019557311636491246, "loss": 1.5732, "step": 1719 }, { "epoch": 0.022350615535302116, "grad_norm": 0.4421006739139557, "learning_rate": 0.0001955705169030011, "loss": 1.4559, "step": 1720 }, { "epoch": 0.02236361007921799, "grad_norm": 0.3570576310157776, "learning_rate": 0.0001955679174410897, "loss": 1.3467, "step": 1721 }, { "epoch": 0.022376604623133862, "grad_norm": 0.3897778391838074, "learning_rate": 0.0001955653179791783, "loss": 1.3302, "step": 1722 }, { "epoch": 0.022389599167049735, "grad_norm": 0.3074800968170166, "learning_rate": 0.00019556271851726693, "loss": 1.3689, "step": 1723 }, { "epoch": 0.022402593710965608, "grad_norm": 0.29305726289749146, "learning_rate": 0.00019556011905535556, "loss": 1.4544, "step": 1724 }, { "epoch": 0.02241558825488148, "grad_norm": 0.4414934515953064, "learning_rate": 0.00019555751959344418, "loss": 1.4851, "step": 1725 }, { "epoch": 0.022428582798797354, "grad_norm": 0.27918463945388794, "learning_rate": 0.00019555492013153278, "loss": 1.4063, "step": 1726 }, { "epoch": 0.022441577342713227, "grad_norm": 0.4117446541786194, "learning_rate": 0.0001955523206696214, "loss": 1.4085, "step": 1727 }, { "epoch": 0.022454571886629103, "grad_norm": 0.42266303300857544, "learning_rate": 0.00019554972120771003, "loss": 1.5035, "step": 1728 }, { "epoch": 0.022467566430544976, "grad_norm": 0.4759625792503357, "learning_rate": 0.00019554712174579863, "loss": 1.4085, "step": 1729 }, { "epoch": 0.02248056097446085, "grad_norm": 0.374464213848114, "learning_rate": 0.00019554452228388725, "loss": 1.5305, "step": 1730 }, { "epoch": 0.022493555518376722, "grad_norm": 0.31605151295661926, "learning_rate": 0.00019554192282197585, "loss": 1.5106, "step": 1731 }, { "epoch": 0.022506550062292595, "grad_norm": 0.3102051913738251, "learning_rate": 0.00019553932336006447, "loss": 1.598, "step": 1732 }, { "epoch": 0.022519544606208468, "grad_norm": 0.32919368147850037, "learning_rate": 0.0001955367238981531, "loss": 1.7339, "step": 1733 }, { "epoch": 0.02253253915012434, "grad_norm": 0.37161538004875183, "learning_rate": 0.0001955341244362417, "loss": 1.3533, "step": 1734 }, { "epoch": 0.022545533694040214, "grad_norm": 0.33915358781814575, "learning_rate": 0.00019553152497433032, "loss": 1.5635, "step": 1735 }, { "epoch": 0.022558528237956087, "grad_norm": 0.23286893963813782, "learning_rate": 0.00019552892551241894, "loss": 1.229, "step": 1736 }, { "epoch": 0.022571522781871963, "grad_norm": 0.30798688530921936, "learning_rate": 0.00019552632605050757, "loss": 1.4644, "step": 1737 }, { "epoch": 0.022584517325787836, "grad_norm": 0.3687688410282135, "learning_rate": 0.00019552372658859617, "loss": 1.3613, "step": 1738 }, { "epoch": 0.02259751186970371, "grad_norm": 0.3794326186180115, "learning_rate": 0.0001955211271266848, "loss": 1.5485, "step": 1739 }, { "epoch": 0.022610506413619582, "grad_norm": 0.42618221044540405, "learning_rate": 0.00019551852766477341, "loss": 1.6085, "step": 1740 }, { "epoch": 0.022623500957535455, "grad_norm": 0.4333156645298004, "learning_rate": 0.000195515928202862, "loss": 1.4613, "step": 1741 }, { "epoch": 0.022636495501451328, "grad_norm": 0.35553714632987976, "learning_rate": 0.00019551332874095064, "loss": 1.5204, "step": 1742 }, { "epoch": 0.0226494900453672, "grad_norm": 0.3447892367839813, "learning_rate": 0.00019551072927903923, "loss": 1.5232, "step": 1743 }, { "epoch": 0.022662484589283074, "grad_norm": 0.408682256937027, "learning_rate": 0.00019550812981712789, "loss": 1.5445, "step": 1744 }, { "epoch": 0.022675479133198947, "grad_norm": 0.28475674986839294, "learning_rate": 0.00019550553035521648, "loss": 1.4025, "step": 1745 }, { "epoch": 0.02268847367711482, "grad_norm": 0.4748742878437042, "learning_rate": 0.00019550293089330508, "loss": 1.3943, "step": 1746 }, { "epoch": 0.022701468221030696, "grad_norm": 0.30381184816360474, "learning_rate": 0.0001955003314313937, "loss": 1.3255, "step": 1747 }, { "epoch": 0.02271446276494657, "grad_norm": 0.4273858964443207, "learning_rate": 0.00019549773196948233, "loss": 1.4897, "step": 1748 }, { "epoch": 0.022727457308862442, "grad_norm": 0.44716984033584595, "learning_rate": 0.00019549513250757095, "loss": 1.5146, "step": 1749 }, { "epoch": 0.022740451852778315, "grad_norm": 0.3693029284477234, "learning_rate": 0.00019549253304565955, "loss": 1.5719, "step": 1750 }, { "epoch": 0.022753446396694188, "grad_norm": 0.3475962281227112, "learning_rate": 0.00019548993358374818, "loss": 1.5674, "step": 1751 }, { "epoch": 0.02276644094061006, "grad_norm": 0.43653982877731323, "learning_rate": 0.0001954873341218368, "loss": 1.6131, "step": 1752 }, { "epoch": 0.022779435484525934, "grad_norm": 0.2988130748271942, "learning_rate": 0.0001954847346599254, "loss": 1.4084, "step": 1753 }, { "epoch": 0.022792430028441807, "grad_norm": 0.33502525091171265, "learning_rate": 0.00019548213519801402, "loss": 1.6681, "step": 1754 }, { "epoch": 0.02280542457235768, "grad_norm": 0.3344237208366394, "learning_rate": 0.00019547953573610262, "loss": 1.4013, "step": 1755 }, { "epoch": 0.022818419116273556, "grad_norm": 0.27920055389404297, "learning_rate": 0.00019547693627419127, "loss": 1.2838, "step": 1756 }, { "epoch": 0.02283141366018943, "grad_norm": 0.3209424316883087, "learning_rate": 0.00019547433681227987, "loss": 1.3716, "step": 1757 }, { "epoch": 0.022844408204105302, "grad_norm": 0.36769208312034607, "learning_rate": 0.00019547173735036847, "loss": 1.6245, "step": 1758 }, { "epoch": 0.022857402748021175, "grad_norm": 0.33044371008872986, "learning_rate": 0.0001954691378884571, "loss": 1.4102, "step": 1759 }, { "epoch": 0.022870397291937048, "grad_norm": 0.4227673411369324, "learning_rate": 0.00019546653842654571, "loss": 1.5309, "step": 1760 }, { "epoch": 0.02288339183585292, "grad_norm": 0.3412001430988312, "learning_rate": 0.00019546393896463434, "loss": 1.453, "step": 1761 }, { "epoch": 0.022896386379768794, "grad_norm": 0.31443512439727783, "learning_rate": 0.00019546133950272294, "loss": 1.1764, "step": 1762 }, { "epoch": 0.022909380923684667, "grad_norm": 0.4690960645675659, "learning_rate": 0.00019545874004081156, "loss": 1.4996, "step": 1763 }, { "epoch": 0.02292237546760054, "grad_norm": 0.23522229492664337, "learning_rate": 0.00019545614057890019, "loss": 1.3415, "step": 1764 }, { "epoch": 0.022935370011516413, "grad_norm": 0.38632360100746155, "learning_rate": 0.00019545354111698878, "loss": 1.6867, "step": 1765 }, { "epoch": 0.02294836455543229, "grad_norm": 0.447410523891449, "learning_rate": 0.0001954509416550774, "loss": 1.6823, "step": 1766 }, { "epoch": 0.022961359099348162, "grad_norm": 0.3630903959274292, "learning_rate": 0.00019544834219316603, "loss": 1.456, "step": 1767 }, { "epoch": 0.022974353643264035, "grad_norm": 0.37147101759910583, "learning_rate": 0.00019544574273125466, "loss": 1.4898, "step": 1768 }, { "epoch": 0.022987348187179908, "grad_norm": 0.3553246557712555, "learning_rate": 0.00019544314326934325, "loss": 1.3276, "step": 1769 }, { "epoch": 0.02300034273109578, "grad_norm": 0.42636096477508545, "learning_rate": 0.00019544054380743185, "loss": 1.3457, "step": 1770 }, { "epoch": 0.023013337275011654, "grad_norm": 0.35853463411331177, "learning_rate": 0.0001954379443455205, "loss": 1.5554, "step": 1771 }, { "epoch": 0.023026331818927527, "grad_norm": 0.40227043628692627, "learning_rate": 0.0001954353448836091, "loss": 1.5046, "step": 1772 }, { "epoch": 0.0230393263628434, "grad_norm": 0.43165484070777893, "learning_rate": 0.00019543274542169772, "loss": 1.5448, "step": 1773 }, { "epoch": 0.023052320906759273, "grad_norm": 0.40751728415489197, "learning_rate": 0.00019543014595978632, "loss": 1.4163, "step": 1774 }, { "epoch": 0.02306531545067515, "grad_norm": 0.3644002377986908, "learning_rate": 0.00019542754649787495, "loss": 1.3576, "step": 1775 }, { "epoch": 0.023078309994591022, "grad_norm": 0.40311771631240845, "learning_rate": 0.00019542494703596357, "loss": 1.5754, "step": 1776 }, { "epoch": 0.023091304538506895, "grad_norm": 0.2770278751850128, "learning_rate": 0.00019542234757405217, "loss": 1.4552, "step": 1777 }, { "epoch": 0.02310429908242277, "grad_norm": 0.3735068440437317, "learning_rate": 0.0001954197481121408, "loss": 1.411, "step": 1778 }, { "epoch": 0.02311729362633864, "grad_norm": 0.40823933482170105, "learning_rate": 0.00019541714865022942, "loss": 1.3935, "step": 1779 }, { "epoch": 0.023130288170254514, "grad_norm": 0.4334075450897217, "learning_rate": 0.00019541454918831804, "loss": 1.5425, "step": 1780 }, { "epoch": 0.023143282714170387, "grad_norm": 0.4126792252063751, "learning_rate": 0.00019541194972640664, "loss": 1.5586, "step": 1781 }, { "epoch": 0.02315627725808626, "grad_norm": 0.39981988072395325, "learning_rate": 0.00019540935026449526, "loss": 1.5523, "step": 1782 }, { "epoch": 0.023169271802002133, "grad_norm": 0.34040069580078125, "learning_rate": 0.0001954067508025839, "loss": 1.3141, "step": 1783 }, { "epoch": 0.023182266345918006, "grad_norm": 0.3204672038555145, "learning_rate": 0.00019540415134067249, "loss": 1.5524, "step": 1784 }, { "epoch": 0.023195260889833882, "grad_norm": 0.394387811422348, "learning_rate": 0.0001954015518787611, "loss": 1.3762, "step": 1785 }, { "epoch": 0.023208255433749755, "grad_norm": 0.4125596880912781, "learning_rate": 0.0001953989524168497, "loss": 1.3622, "step": 1786 }, { "epoch": 0.02322124997766563, "grad_norm": 0.35644418001174927, "learning_rate": 0.00019539635295493833, "loss": 1.5096, "step": 1787 }, { "epoch": 0.0232342445215815, "grad_norm": 0.3932243883609772, "learning_rate": 0.00019539375349302696, "loss": 1.3592, "step": 1788 }, { "epoch": 0.023247239065497374, "grad_norm": 0.2546967566013336, "learning_rate": 0.00019539115403111555, "loss": 1.4257, "step": 1789 }, { "epoch": 0.023260233609413247, "grad_norm": 0.3977675139904022, "learning_rate": 0.00019538855456920418, "loss": 1.3011, "step": 1790 }, { "epoch": 0.02327322815332912, "grad_norm": 0.43489986658096313, "learning_rate": 0.0001953859551072928, "loss": 1.6674, "step": 1791 }, { "epoch": 0.023286222697244993, "grad_norm": 0.4233644902706146, "learning_rate": 0.00019538335564538143, "loss": 1.4111, "step": 1792 }, { "epoch": 0.023299217241160866, "grad_norm": 0.41177353262901306, "learning_rate": 0.00019538075618347002, "loss": 1.4682, "step": 1793 }, { "epoch": 0.023312211785076743, "grad_norm": 0.38274767994880676, "learning_rate": 0.00019537815672155865, "loss": 1.3928, "step": 1794 }, { "epoch": 0.023325206328992616, "grad_norm": 0.38090863823890686, "learning_rate": 0.00019537555725964727, "loss": 1.6614, "step": 1795 }, { "epoch": 0.02333820087290849, "grad_norm": 0.35949671268463135, "learning_rate": 0.00019537295779773587, "loss": 1.58, "step": 1796 }, { "epoch": 0.02335119541682436, "grad_norm": 0.3463672697544098, "learning_rate": 0.0001953703583358245, "loss": 1.5728, "step": 1797 }, { "epoch": 0.023364189960740234, "grad_norm": 0.4024708569049835, "learning_rate": 0.00019536775887391312, "loss": 1.4199, "step": 1798 }, { "epoch": 0.023377184504656107, "grad_norm": 0.3992582857608795, "learning_rate": 0.00019536515941200172, "loss": 1.4886, "step": 1799 }, { "epoch": 0.02339017904857198, "grad_norm": 0.32905566692352295, "learning_rate": 0.00019536255995009034, "loss": 1.4478, "step": 1800 }, { "epoch": 0.023403173592487853, "grad_norm": 0.5432623028755188, "learning_rate": 0.00019535996048817894, "loss": 1.3756, "step": 1801 }, { "epoch": 0.023416168136403726, "grad_norm": 0.27649611234664917, "learning_rate": 0.0001953573610262676, "loss": 1.3959, "step": 1802 }, { "epoch": 0.0234291626803196, "grad_norm": 0.39728596806526184, "learning_rate": 0.0001953547615643562, "loss": 1.3592, "step": 1803 }, { "epoch": 0.023442157224235476, "grad_norm": 0.3490980267524719, "learning_rate": 0.0001953521621024448, "loss": 1.493, "step": 1804 }, { "epoch": 0.02345515176815135, "grad_norm": 0.3728959858417511, "learning_rate": 0.0001953495626405334, "loss": 1.3163, "step": 1805 }, { "epoch": 0.02346814631206722, "grad_norm": 0.3092458248138428, "learning_rate": 0.00019534696317862203, "loss": 1.3774, "step": 1806 }, { "epoch": 0.023481140855983094, "grad_norm": 0.47971421480178833, "learning_rate": 0.00019534436371671066, "loss": 1.5847, "step": 1807 }, { "epoch": 0.023494135399898967, "grad_norm": 0.3586556315422058, "learning_rate": 0.00019534176425479926, "loss": 1.2791, "step": 1808 }, { "epoch": 0.02350712994381484, "grad_norm": 0.4068545699119568, "learning_rate": 0.00019533916479288788, "loss": 1.3263, "step": 1809 }, { "epoch": 0.023520124487730713, "grad_norm": 0.44036608934402466, "learning_rate": 0.0001953365653309765, "loss": 1.5351, "step": 1810 }, { "epoch": 0.023533119031646586, "grad_norm": 0.3685401976108551, "learning_rate": 0.00019533396586906513, "loss": 1.3645, "step": 1811 }, { "epoch": 0.02354611357556246, "grad_norm": 0.4026589095592499, "learning_rate": 0.00019533136640715373, "loss": 1.3607, "step": 1812 }, { "epoch": 0.023559108119478336, "grad_norm": 0.3264610469341278, "learning_rate": 0.00019532876694524232, "loss": 1.492, "step": 1813 }, { "epoch": 0.02357210266339421, "grad_norm": 0.38049957156181335, "learning_rate": 0.00019532616748333098, "loss": 1.3762, "step": 1814 }, { "epoch": 0.02358509720731008, "grad_norm": 0.44767656922340393, "learning_rate": 0.00019532356802141957, "loss": 1.5042, "step": 1815 }, { "epoch": 0.023598091751225955, "grad_norm": 0.41038426756858826, "learning_rate": 0.0001953209685595082, "loss": 1.4511, "step": 1816 }, { "epoch": 0.023611086295141828, "grad_norm": 0.45387715101242065, "learning_rate": 0.0001953183690975968, "loss": 1.5997, "step": 1817 }, { "epoch": 0.0236240808390577, "grad_norm": 0.4338409900665283, "learning_rate": 0.00019531576963568542, "loss": 1.5321, "step": 1818 }, { "epoch": 0.023637075382973573, "grad_norm": 0.3182096779346466, "learning_rate": 0.00019531317017377404, "loss": 1.331, "step": 1819 }, { "epoch": 0.023650069926889446, "grad_norm": 0.39733293652534485, "learning_rate": 0.00019531057071186264, "loss": 1.3856, "step": 1820 }, { "epoch": 0.02366306447080532, "grad_norm": 0.3503945469856262, "learning_rate": 0.00019530797124995127, "loss": 1.3635, "step": 1821 }, { "epoch": 0.023676059014721192, "grad_norm": 0.3881324231624603, "learning_rate": 0.0001953053717880399, "loss": 1.4314, "step": 1822 }, { "epoch": 0.02368905355863707, "grad_norm": 0.46018099784851074, "learning_rate": 0.00019530277232612851, "loss": 1.532, "step": 1823 }, { "epoch": 0.02370204810255294, "grad_norm": 0.3509732782840729, "learning_rate": 0.0001953001728642171, "loss": 1.4576, "step": 1824 }, { "epoch": 0.023715042646468815, "grad_norm": 0.38438957929611206, "learning_rate": 0.0001952975734023057, "loss": 1.4521, "step": 1825 }, { "epoch": 0.023728037190384688, "grad_norm": 0.4050957262516022, "learning_rate": 0.00019529497394039436, "loss": 1.5061, "step": 1826 }, { "epoch": 0.02374103173430056, "grad_norm": 0.33474844694137573, "learning_rate": 0.00019529237447848296, "loss": 1.5358, "step": 1827 }, { "epoch": 0.023754026278216434, "grad_norm": 0.4549270570278168, "learning_rate": 0.00019528977501657158, "loss": 1.3857, "step": 1828 }, { "epoch": 0.023767020822132306, "grad_norm": 0.4617195427417755, "learning_rate": 0.00019528717555466018, "loss": 1.5747, "step": 1829 }, { "epoch": 0.02378001536604818, "grad_norm": 0.3896535634994507, "learning_rate": 0.0001952845760927488, "loss": 1.3527, "step": 1830 }, { "epoch": 0.023793009909964052, "grad_norm": 0.43055301904678345, "learning_rate": 0.00019528197663083743, "loss": 1.5093, "step": 1831 }, { "epoch": 0.02380600445387993, "grad_norm": 0.36662182211875916, "learning_rate": 0.00019527937716892603, "loss": 1.3762, "step": 1832 }, { "epoch": 0.023818998997795802, "grad_norm": 0.2322726547718048, "learning_rate": 0.00019527677770701465, "loss": 1.3949, "step": 1833 }, { "epoch": 0.023831993541711675, "grad_norm": 0.3672502636909485, "learning_rate": 0.00019527417824510328, "loss": 1.4479, "step": 1834 }, { "epoch": 0.023844988085627548, "grad_norm": 0.2350219041109085, "learning_rate": 0.0001952715787831919, "loss": 1.2864, "step": 1835 }, { "epoch": 0.02385798262954342, "grad_norm": 0.4465300738811493, "learning_rate": 0.0001952689793212805, "loss": 1.6505, "step": 1836 }, { "epoch": 0.023870977173459294, "grad_norm": 0.4024832546710968, "learning_rate": 0.00019526637985936912, "loss": 1.5301, "step": 1837 }, { "epoch": 0.023883971717375167, "grad_norm": 0.42621225118637085, "learning_rate": 0.00019526378039745775, "loss": 1.4636, "step": 1838 }, { "epoch": 0.02389696626129104, "grad_norm": 0.5147488713264465, "learning_rate": 0.00019526118093554634, "loss": 1.6715, "step": 1839 }, { "epoch": 0.023909960805206912, "grad_norm": 0.24608314037322998, "learning_rate": 0.00019525858147363497, "loss": 1.3956, "step": 1840 }, { "epoch": 0.023922955349122785, "grad_norm": 0.35765427350997925, "learning_rate": 0.0001952559820117236, "loss": 1.4464, "step": 1841 }, { "epoch": 0.023935949893038662, "grad_norm": 0.4051652252674103, "learning_rate": 0.0001952533825498122, "loss": 1.4202, "step": 1842 }, { "epoch": 0.023948944436954535, "grad_norm": 0.35005876421928406, "learning_rate": 0.00019525078308790081, "loss": 1.6109, "step": 1843 }, { "epoch": 0.023961938980870408, "grad_norm": 0.44820502400398254, "learning_rate": 0.0001952481836259894, "loss": 1.5186, "step": 1844 }, { "epoch": 0.02397493352478628, "grad_norm": 0.5122503042221069, "learning_rate": 0.00019524558416407806, "loss": 1.6335, "step": 1845 }, { "epoch": 0.023987928068702154, "grad_norm": 0.37109479308128357, "learning_rate": 0.00019524298470216666, "loss": 1.3491, "step": 1846 }, { "epoch": 0.024000922612618027, "grad_norm": 0.3216516971588135, "learning_rate": 0.00019524038524025529, "loss": 1.4127, "step": 1847 }, { "epoch": 0.0240139171565339, "grad_norm": 0.35651183128356934, "learning_rate": 0.00019523778577834388, "loss": 1.6087, "step": 1848 }, { "epoch": 0.024026911700449773, "grad_norm": 0.3415221571922302, "learning_rate": 0.0001952351863164325, "loss": 1.4724, "step": 1849 }, { "epoch": 0.024039906244365646, "grad_norm": 0.5249360799789429, "learning_rate": 0.00019523258685452113, "loss": 1.5401, "step": 1850 }, { "epoch": 0.024052900788281522, "grad_norm": 0.40630602836608887, "learning_rate": 0.00019522998739260973, "loss": 1.5061, "step": 1851 }, { "epoch": 0.024065895332197395, "grad_norm": 0.46728020906448364, "learning_rate": 0.00019522738793069835, "loss": 1.5968, "step": 1852 }, { "epoch": 0.024078889876113268, "grad_norm": 0.41321757435798645, "learning_rate": 0.00019522478846878698, "loss": 1.6464, "step": 1853 }, { "epoch": 0.02409188442002914, "grad_norm": 0.3643966019153595, "learning_rate": 0.00019522218900687558, "loss": 1.3056, "step": 1854 }, { "epoch": 0.024104878963945014, "grad_norm": 0.3538874685764313, "learning_rate": 0.0001952195895449642, "loss": 1.4451, "step": 1855 }, { "epoch": 0.024117873507860887, "grad_norm": 0.4620124399662018, "learning_rate": 0.0001952169900830528, "loss": 1.5543, "step": 1856 }, { "epoch": 0.02413086805177676, "grad_norm": 0.3777616322040558, "learning_rate": 0.00019521439062114145, "loss": 1.7202, "step": 1857 }, { "epoch": 0.024143862595692633, "grad_norm": 0.38026413321495056, "learning_rate": 0.00019521179115923005, "loss": 1.6294, "step": 1858 }, { "epoch": 0.024156857139608506, "grad_norm": 0.3189028203487396, "learning_rate": 0.00019520919169731867, "loss": 1.3578, "step": 1859 }, { "epoch": 0.02416985168352438, "grad_norm": 0.3826671540737152, "learning_rate": 0.00019520659223540727, "loss": 1.5216, "step": 1860 }, { "epoch": 0.024182846227440255, "grad_norm": 0.44067713618278503, "learning_rate": 0.0001952039927734959, "loss": 1.6301, "step": 1861 }, { "epoch": 0.024195840771356128, "grad_norm": 0.43638303875923157, "learning_rate": 0.00019520139331158452, "loss": 1.5364, "step": 1862 }, { "epoch": 0.024208835315272, "grad_norm": 0.4482448101043701, "learning_rate": 0.00019519879384967311, "loss": 1.4069, "step": 1863 }, { "epoch": 0.024221829859187874, "grad_norm": 0.4252583384513855, "learning_rate": 0.00019519619438776174, "loss": 1.3853, "step": 1864 }, { "epoch": 0.024234824403103747, "grad_norm": 0.40959984064102173, "learning_rate": 0.00019519359492585036, "loss": 1.3477, "step": 1865 }, { "epoch": 0.02424781894701962, "grad_norm": 0.4321820139884949, "learning_rate": 0.000195190995463939, "loss": 1.6339, "step": 1866 }, { "epoch": 0.024260813490935493, "grad_norm": 0.37583255767822266, "learning_rate": 0.00019518839600202759, "loss": 1.5711, "step": 1867 }, { "epoch": 0.024273808034851366, "grad_norm": 0.3513713479042053, "learning_rate": 0.00019518579654011618, "loss": 1.416, "step": 1868 }, { "epoch": 0.02428680257876724, "grad_norm": 0.4160189628601074, "learning_rate": 0.00019518319707820483, "loss": 1.5614, "step": 1869 }, { "epoch": 0.024299797122683115, "grad_norm": 0.21549619734287262, "learning_rate": 0.00019518059761629343, "loss": 1.4135, "step": 1870 }, { "epoch": 0.024312791666598988, "grad_norm": 0.4574928879737854, "learning_rate": 0.00019517799815438206, "loss": 1.509, "step": 1871 }, { "epoch": 0.02432578621051486, "grad_norm": 0.29601985216140747, "learning_rate": 0.00019517539869247068, "loss": 1.4798, "step": 1872 }, { "epoch": 0.024338780754430734, "grad_norm": 0.33058714866638184, "learning_rate": 0.00019517279923055928, "loss": 1.5106, "step": 1873 }, { "epoch": 0.024351775298346607, "grad_norm": 0.553209662437439, "learning_rate": 0.0001951701997686479, "loss": 1.6211, "step": 1874 }, { "epoch": 0.02436476984226248, "grad_norm": 0.31298884749412537, "learning_rate": 0.0001951676003067365, "loss": 1.3592, "step": 1875 }, { "epoch": 0.024377764386178353, "grad_norm": 0.3478134870529175, "learning_rate": 0.00019516500084482515, "loss": 1.3123, "step": 1876 }, { "epoch": 0.024390758930094226, "grad_norm": 0.3965305984020233, "learning_rate": 0.00019516240138291375, "loss": 1.6281, "step": 1877 }, { "epoch": 0.0244037534740101, "grad_norm": 0.3518056869506836, "learning_rate": 0.00019515980192100237, "loss": 1.5344, "step": 1878 }, { "epoch": 0.024416748017925972, "grad_norm": 0.42089545726776123, "learning_rate": 0.00019515720245909097, "loss": 1.5098, "step": 1879 }, { "epoch": 0.024429742561841848, "grad_norm": 0.3613037168979645, "learning_rate": 0.0001951546029971796, "loss": 1.6852, "step": 1880 }, { "epoch": 0.02444273710575772, "grad_norm": 0.46798157691955566, "learning_rate": 0.00019515200353526822, "loss": 1.5023, "step": 1881 }, { "epoch": 0.024455731649673594, "grad_norm": 0.3810153603553772, "learning_rate": 0.00019514940407335682, "loss": 1.4599, "step": 1882 }, { "epoch": 0.024468726193589467, "grad_norm": 0.35787442326545715, "learning_rate": 0.00019514680461144544, "loss": 1.3997, "step": 1883 }, { "epoch": 0.02448172073750534, "grad_norm": 0.3975280225276947, "learning_rate": 0.00019514420514953407, "loss": 1.3629, "step": 1884 }, { "epoch": 0.024494715281421213, "grad_norm": 0.36241644620895386, "learning_rate": 0.00019514160568762266, "loss": 1.4633, "step": 1885 }, { "epoch": 0.024507709825337086, "grad_norm": 0.40416109561920166, "learning_rate": 0.0001951390062257113, "loss": 1.3648, "step": 1886 }, { "epoch": 0.02452070436925296, "grad_norm": 0.3390207290649414, "learning_rate": 0.00019513640676379989, "loss": 1.2775, "step": 1887 }, { "epoch": 0.024533698913168832, "grad_norm": 0.37671610713005066, "learning_rate": 0.00019513380730188854, "loss": 1.496, "step": 1888 }, { "epoch": 0.024546693457084708, "grad_norm": 0.3258741796016693, "learning_rate": 0.00019513120783997713, "loss": 1.3034, "step": 1889 }, { "epoch": 0.02455968800100058, "grad_norm": 0.3780488967895508, "learning_rate": 0.00019512860837806576, "loss": 1.518, "step": 1890 }, { "epoch": 0.024572682544916454, "grad_norm": 0.3583645820617676, "learning_rate": 0.00019512600891615436, "loss": 1.5031, "step": 1891 }, { "epoch": 0.024585677088832327, "grad_norm": 0.28971540927886963, "learning_rate": 0.00019512340945424298, "loss": 1.2767, "step": 1892 }, { "epoch": 0.0245986716327482, "grad_norm": 0.4159455895423889, "learning_rate": 0.0001951208099923316, "loss": 1.4147, "step": 1893 }, { "epoch": 0.024611666176664073, "grad_norm": 0.45120856165885925, "learning_rate": 0.0001951182105304202, "loss": 1.5091, "step": 1894 }, { "epoch": 0.024624660720579946, "grad_norm": 0.2567850649356842, "learning_rate": 0.00019511561106850883, "loss": 1.5043, "step": 1895 }, { "epoch": 0.02463765526449582, "grad_norm": 0.4277418851852417, "learning_rate": 0.00019511301160659745, "loss": 1.5215, "step": 1896 }, { "epoch": 0.024650649808411692, "grad_norm": 0.529853343963623, "learning_rate": 0.00019511041214468605, "loss": 1.607, "step": 1897 }, { "epoch": 0.024663644352327565, "grad_norm": 0.415848970413208, "learning_rate": 0.00019510781268277467, "loss": 1.5035, "step": 1898 }, { "epoch": 0.02467663889624344, "grad_norm": 0.3474975526332855, "learning_rate": 0.00019510521322086327, "loss": 1.4307, "step": 1899 }, { "epoch": 0.024689633440159314, "grad_norm": 0.40108722448349, "learning_rate": 0.00019510261375895192, "loss": 1.6045, "step": 1900 }, { "epoch": 0.024702627984075187, "grad_norm": 0.3662712574005127, "learning_rate": 0.00019510001429704052, "loss": 1.5101, "step": 1901 }, { "epoch": 0.02471562252799106, "grad_norm": 0.3879152536392212, "learning_rate": 0.00019509741483512914, "loss": 1.513, "step": 1902 }, { "epoch": 0.024728617071906933, "grad_norm": 0.4358268082141876, "learning_rate": 0.00019509481537321774, "loss": 1.4907, "step": 1903 }, { "epoch": 0.024741611615822806, "grad_norm": 0.2656150162220001, "learning_rate": 0.00019509221591130637, "loss": 1.2582, "step": 1904 }, { "epoch": 0.02475460615973868, "grad_norm": 0.3241492509841919, "learning_rate": 0.000195089616449395, "loss": 1.4139, "step": 1905 }, { "epoch": 0.024767600703654552, "grad_norm": 0.33699047565460205, "learning_rate": 0.0001950870169874836, "loss": 1.4947, "step": 1906 }, { "epoch": 0.024780595247570425, "grad_norm": 0.3858382999897003, "learning_rate": 0.0001950844175255722, "loss": 1.4898, "step": 1907 }, { "epoch": 0.0247935897914863, "grad_norm": 0.42251360416412354, "learning_rate": 0.00019508181806366084, "loss": 1.5405, "step": 1908 }, { "epoch": 0.024806584335402174, "grad_norm": 0.41407859325408936, "learning_rate": 0.00019507921860174943, "loss": 1.5812, "step": 1909 }, { "epoch": 0.024819578879318047, "grad_norm": 0.4241170287132263, "learning_rate": 0.00019507661913983806, "loss": 1.6296, "step": 1910 }, { "epoch": 0.02483257342323392, "grad_norm": 0.40611186623573303, "learning_rate": 0.00019507401967792668, "loss": 1.5777, "step": 1911 }, { "epoch": 0.024845567967149793, "grad_norm": 0.31183817982673645, "learning_rate": 0.0001950714202160153, "loss": 1.4478, "step": 1912 }, { "epoch": 0.024858562511065666, "grad_norm": 0.310866117477417, "learning_rate": 0.0001950688207541039, "loss": 1.393, "step": 1913 }, { "epoch": 0.02487155705498154, "grad_norm": 0.3437785804271698, "learning_rate": 0.00019506622129219253, "loss": 1.3354, "step": 1914 }, { "epoch": 0.024884551598897412, "grad_norm": 0.36962059140205383, "learning_rate": 0.00019506362183028115, "loss": 1.7343, "step": 1915 }, { "epoch": 0.024897546142813285, "grad_norm": 0.3808988630771637, "learning_rate": 0.00019506102236836975, "loss": 1.5288, "step": 1916 }, { "epoch": 0.024910540686729158, "grad_norm": 0.4140763282775879, "learning_rate": 0.00019505842290645838, "loss": 1.4053, "step": 1917 }, { "epoch": 0.024923535230645034, "grad_norm": 0.4288756549358368, "learning_rate": 0.00019505582344454697, "loss": 1.499, "step": 1918 }, { "epoch": 0.024936529774560907, "grad_norm": 0.3769247531890869, "learning_rate": 0.00019505322398263562, "loss": 1.3153, "step": 1919 }, { "epoch": 0.02494952431847678, "grad_norm": 0.3923449218273163, "learning_rate": 0.00019505062452072422, "loss": 1.3731, "step": 1920 }, { "epoch": 0.024962518862392653, "grad_norm": 0.40384578704833984, "learning_rate": 0.00019504802505881285, "loss": 1.6362, "step": 1921 }, { "epoch": 0.024975513406308526, "grad_norm": 0.25997865200042725, "learning_rate": 0.00019504542559690144, "loss": 1.4529, "step": 1922 }, { "epoch": 0.0249885079502244, "grad_norm": 0.32743462920188904, "learning_rate": 0.00019504282613499007, "loss": 1.318, "step": 1923 }, { "epoch": 0.025001502494140272, "grad_norm": 0.4509304463863373, "learning_rate": 0.0001950402266730787, "loss": 1.5391, "step": 1924 }, { "epoch": 0.025014497038056145, "grad_norm": 0.4460051953792572, "learning_rate": 0.0001950376272111673, "loss": 1.6095, "step": 1925 }, { "epoch": 0.025027491581972018, "grad_norm": 0.3727940618991852, "learning_rate": 0.00019503502774925592, "loss": 1.3623, "step": 1926 }, { "epoch": 0.025040486125887895, "grad_norm": 0.4771404564380646, "learning_rate": 0.00019503242828734454, "loss": 1.6147, "step": 1927 }, { "epoch": 0.025053480669803768, "grad_norm": 0.40584996342658997, "learning_rate": 0.00019502982882543314, "loss": 1.5194, "step": 1928 }, { "epoch": 0.02506647521371964, "grad_norm": 0.4106747806072235, "learning_rate": 0.00019502722936352176, "loss": 1.3099, "step": 1929 }, { "epoch": 0.025079469757635513, "grad_norm": 0.4870680868625641, "learning_rate": 0.00019502462990161036, "loss": 1.5416, "step": 1930 }, { "epoch": 0.025092464301551386, "grad_norm": 0.30915436148643494, "learning_rate": 0.000195022030439699, "loss": 1.3029, "step": 1931 }, { "epoch": 0.02510545884546726, "grad_norm": 0.30829089879989624, "learning_rate": 0.0001950194309777876, "loss": 1.3773, "step": 1932 }, { "epoch": 0.025118453389383132, "grad_norm": 0.3524402678012848, "learning_rate": 0.00019501683151587623, "loss": 1.4034, "step": 1933 }, { "epoch": 0.025131447933299005, "grad_norm": 0.3223772644996643, "learning_rate": 0.00019501423205396483, "loss": 1.486, "step": 1934 }, { "epoch": 0.025144442477214878, "grad_norm": 0.37729641795158386, "learning_rate": 0.00019501163259205345, "loss": 1.5304, "step": 1935 }, { "epoch": 0.02515743702113075, "grad_norm": 0.4305570423603058, "learning_rate": 0.00019500903313014208, "loss": 1.4949, "step": 1936 }, { "epoch": 0.025170431565046628, "grad_norm": 0.28514212369918823, "learning_rate": 0.00019500643366823068, "loss": 1.119, "step": 1937 }, { "epoch": 0.0251834261089625, "grad_norm": 0.2823854088783264, "learning_rate": 0.0001950038342063193, "loss": 1.411, "step": 1938 }, { "epoch": 0.025196420652878374, "grad_norm": 0.49034997820854187, "learning_rate": 0.00019500123474440792, "loss": 1.6508, "step": 1939 }, { "epoch": 0.025209415196794246, "grad_norm": 0.37515679001808167, "learning_rate": 0.00019499863528249652, "loss": 1.507, "step": 1940 }, { "epoch": 0.02522240974071012, "grad_norm": 0.4184080958366394, "learning_rate": 0.00019499603582058515, "loss": 1.4623, "step": 1941 }, { "epoch": 0.025235404284625992, "grad_norm": 0.35954853892326355, "learning_rate": 0.00019499343635867374, "loss": 1.3605, "step": 1942 }, { "epoch": 0.025248398828541865, "grad_norm": 0.39759063720703125, "learning_rate": 0.0001949908368967624, "loss": 1.4, "step": 1943 }, { "epoch": 0.02526139337245774, "grad_norm": 0.3427615165710449, "learning_rate": 0.000194988237434851, "loss": 1.294, "step": 1944 }, { "epoch": 0.02527438791637361, "grad_norm": 0.35400789976119995, "learning_rate": 0.00019498563797293962, "loss": 1.5946, "step": 1945 }, { "epoch": 0.025287382460289484, "grad_norm": 0.3394954800605774, "learning_rate": 0.00019498303851102824, "loss": 1.2008, "step": 1946 }, { "epoch": 0.02530037700420536, "grad_norm": 0.43507492542266846, "learning_rate": 0.00019498043904911684, "loss": 1.4453, "step": 1947 }, { "epoch": 0.025313371548121234, "grad_norm": 0.3879145383834839, "learning_rate": 0.00019497783958720546, "loss": 1.451, "step": 1948 }, { "epoch": 0.025326366092037107, "grad_norm": 0.4290544390678406, "learning_rate": 0.00019497524012529406, "loss": 1.5188, "step": 1949 }, { "epoch": 0.02533936063595298, "grad_norm": 0.45724618434906006, "learning_rate": 0.0001949726406633827, "loss": 1.5062, "step": 1950 }, { "epoch": 0.025352355179868852, "grad_norm": 0.3396241068840027, "learning_rate": 0.0001949700412014713, "loss": 1.4249, "step": 1951 }, { "epoch": 0.025365349723784725, "grad_norm": 0.4424882233142853, "learning_rate": 0.0001949674417395599, "loss": 1.5906, "step": 1952 }, { "epoch": 0.0253783442677006, "grad_norm": 0.49618202447891235, "learning_rate": 0.00019496484227764853, "loss": 1.6788, "step": 1953 }, { "epoch": 0.02539133881161647, "grad_norm": 0.37435975670814514, "learning_rate": 0.00019496224281573716, "loss": 1.4677, "step": 1954 }, { "epoch": 0.025404333355532344, "grad_norm": 0.3808096945285797, "learning_rate": 0.00019495964335382578, "loss": 1.5246, "step": 1955 }, { "epoch": 0.02541732789944822, "grad_norm": 0.3840477764606476, "learning_rate": 0.00019495704389191438, "loss": 1.5503, "step": 1956 }, { "epoch": 0.025430322443364094, "grad_norm": 0.3403218388557434, "learning_rate": 0.000194954444430003, "loss": 1.3534, "step": 1957 }, { "epoch": 0.025443316987279967, "grad_norm": 0.3999299108982086, "learning_rate": 0.00019495184496809163, "loss": 1.5026, "step": 1958 }, { "epoch": 0.02545631153119584, "grad_norm": 0.3052142858505249, "learning_rate": 0.00019494924550618022, "loss": 1.2833, "step": 1959 }, { "epoch": 0.025469306075111713, "grad_norm": 0.35920408368110657, "learning_rate": 0.00019494664604426885, "loss": 1.5176, "step": 1960 }, { "epoch": 0.025482300619027586, "grad_norm": 0.4335194528102875, "learning_rate": 0.00019494404658235745, "loss": 1.5712, "step": 1961 }, { "epoch": 0.02549529516294346, "grad_norm": 0.31558382511138916, "learning_rate": 0.0001949414471204461, "loss": 1.4686, "step": 1962 }, { "epoch": 0.02550828970685933, "grad_norm": 0.3751327693462372, "learning_rate": 0.0001949388476585347, "loss": 1.3645, "step": 1963 }, { "epoch": 0.025521284250775204, "grad_norm": 0.365709513425827, "learning_rate": 0.0001949362481966233, "loss": 1.3094, "step": 1964 }, { "epoch": 0.025534278794691077, "grad_norm": 0.35730892419815063, "learning_rate": 0.00019493364873471192, "loss": 1.4899, "step": 1965 }, { "epoch": 0.025547273338606954, "grad_norm": 0.37063705921173096, "learning_rate": 0.00019493104927280054, "loss": 1.6645, "step": 1966 }, { "epoch": 0.025560267882522827, "grad_norm": 0.4170796275138855, "learning_rate": 0.00019492844981088917, "loss": 1.2274, "step": 1967 }, { "epoch": 0.0255732624264387, "grad_norm": 0.4714570939540863, "learning_rate": 0.00019492585034897776, "loss": 1.5636, "step": 1968 }, { "epoch": 0.025586256970354573, "grad_norm": 0.41997700929641724, "learning_rate": 0.0001949232508870664, "loss": 1.3946, "step": 1969 }, { "epoch": 0.025599251514270446, "grad_norm": 0.3678540587425232, "learning_rate": 0.000194920651425155, "loss": 1.48, "step": 1970 }, { "epoch": 0.02561224605818632, "grad_norm": 0.39316561818122864, "learning_rate": 0.0001949180519632436, "loss": 1.53, "step": 1971 }, { "epoch": 0.02562524060210219, "grad_norm": 0.3689624071121216, "learning_rate": 0.00019491545250133223, "loss": 1.332, "step": 1972 }, { "epoch": 0.025638235146018064, "grad_norm": 0.3830846846103668, "learning_rate": 0.00019491285303942083, "loss": 1.4066, "step": 1973 }, { "epoch": 0.025651229689933937, "grad_norm": 0.3716714382171631, "learning_rate": 0.00019491025357750948, "loss": 1.3653, "step": 1974 }, { "epoch": 0.025664224233849814, "grad_norm": 0.3765179216861725, "learning_rate": 0.00019490765411559808, "loss": 1.5237, "step": 1975 }, { "epoch": 0.025677218777765687, "grad_norm": 0.33990898728370667, "learning_rate": 0.00019490505465368668, "loss": 1.3687, "step": 1976 }, { "epoch": 0.02569021332168156, "grad_norm": 0.5103486776351929, "learning_rate": 0.0001949024551917753, "loss": 1.4904, "step": 1977 }, { "epoch": 0.025703207865597433, "grad_norm": 0.49304041266441345, "learning_rate": 0.00019489985572986393, "loss": 1.5119, "step": 1978 }, { "epoch": 0.025716202409513306, "grad_norm": 0.33284151554107666, "learning_rate": 0.00019489725626795255, "loss": 1.4815, "step": 1979 }, { "epoch": 0.02572919695342918, "grad_norm": 0.4878380298614502, "learning_rate": 0.00019489465680604115, "loss": 1.6747, "step": 1980 }, { "epoch": 0.02574219149734505, "grad_norm": 0.607802152633667, "learning_rate": 0.00019489205734412977, "loss": 1.5806, "step": 1981 }, { "epoch": 0.025755186041260925, "grad_norm": 0.2903883457183838, "learning_rate": 0.0001948894578822184, "loss": 1.4749, "step": 1982 }, { "epoch": 0.025768180585176798, "grad_norm": 0.28316977620124817, "learning_rate": 0.000194886858420307, "loss": 1.4754, "step": 1983 }, { "epoch": 0.02578117512909267, "grad_norm": 0.40020421147346497, "learning_rate": 0.00019488425895839562, "loss": 1.634, "step": 1984 }, { "epoch": 0.025794169673008547, "grad_norm": 0.4239027500152588, "learning_rate": 0.00019488165949648424, "loss": 1.6033, "step": 1985 }, { "epoch": 0.02580716421692442, "grad_norm": 0.4065403938293457, "learning_rate": 0.00019487906003457287, "loss": 1.4784, "step": 1986 }, { "epoch": 0.025820158760840293, "grad_norm": 0.46506601572036743, "learning_rate": 0.00019487646057266147, "loss": 1.561, "step": 1987 }, { "epoch": 0.025833153304756166, "grad_norm": 0.2645089030265808, "learning_rate": 0.0001948738611107501, "loss": 1.4561, "step": 1988 }, { "epoch": 0.02584614784867204, "grad_norm": 0.34971901774406433, "learning_rate": 0.00019487126164883872, "loss": 1.4301, "step": 1989 }, { "epoch": 0.02585914239258791, "grad_norm": 0.3946782946586609, "learning_rate": 0.0001948686621869273, "loss": 1.5002, "step": 1990 }, { "epoch": 0.025872136936503785, "grad_norm": 0.3921728730201721, "learning_rate": 0.00019486606272501594, "loss": 1.5428, "step": 1991 }, { "epoch": 0.025885131480419658, "grad_norm": 0.4433472454547882, "learning_rate": 0.00019486346326310453, "loss": 1.4727, "step": 1992 }, { "epoch": 0.02589812602433553, "grad_norm": 0.42046865820884705, "learning_rate": 0.00019486086380119316, "loss": 1.5136, "step": 1993 }, { "epoch": 0.025911120568251407, "grad_norm": 0.3857981860637665, "learning_rate": 0.00019485826433928178, "loss": 1.4629, "step": 1994 }, { "epoch": 0.02592411511216728, "grad_norm": 0.43305647373199463, "learning_rate": 0.00019485566487737038, "loss": 1.4255, "step": 1995 }, { "epoch": 0.025937109656083153, "grad_norm": 0.40418142080307007, "learning_rate": 0.000194853065415459, "loss": 1.5196, "step": 1996 }, { "epoch": 0.025950104199999026, "grad_norm": 0.3565748333930969, "learning_rate": 0.00019485046595354763, "loss": 1.531, "step": 1997 }, { "epoch": 0.0259630987439149, "grad_norm": 0.3650085926055908, "learning_rate": 0.00019484786649163625, "loss": 1.3057, "step": 1998 }, { "epoch": 0.025976093287830772, "grad_norm": 0.3957287073135376, "learning_rate": 0.00019484526702972485, "loss": 1.4302, "step": 1999 }, { "epoch": 0.025989087831746645, "grad_norm": 0.37043535709381104, "learning_rate": 0.00019484266756781348, "loss": 1.525, "step": 2000 }, { "epoch": 0.026002082375662518, "grad_norm": 0.37735459208488464, "learning_rate": 0.0001948400681059021, "loss": 1.4637, "step": 2001 }, { "epoch": 0.02601507691957839, "grad_norm": 0.3438260853290558, "learning_rate": 0.0001948374686439907, "loss": 1.5209, "step": 2002 }, { "epoch": 0.026028071463494264, "grad_norm": 0.3096390664577484, "learning_rate": 0.00019483486918207932, "loss": 1.3843, "step": 2003 }, { "epoch": 0.02604106600741014, "grad_norm": 0.38412708044052124, "learning_rate": 0.00019483226972016792, "loss": 1.5368, "step": 2004 }, { "epoch": 0.026054060551326013, "grad_norm": 0.2826448976993561, "learning_rate": 0.00019482967025825654, "loss": 1.3217, "step": 2005 }, { "epoch": 0.026067055095241886, "grad_norm": 0.27543365955352783, "learning_rate": 0.00019482707079634517, "loss": 1.3478, "step": 2006 }, { "epoch": 0.02608004963915776, "grad_norm": 0.4210810363292694, "learning_rate": 0.00019482447133443377, "loss": 1.6229, "step": 2007 }, { "epoch": 0.026093044183073632, "grad_norm": 0.37372809648513794, "learning_rate": 0.0001948218718725224, "loss": 1.5294, "step": 2008 }, { "epoch": 0.026106038726989505, "grad_norm": 0.38795676827430725, "learning_rate": 0.00019481927241061102, "loss": 1.4597, "step": 2009 }, { "epoch": 0.026119033270905378, "grad_norm": 0.49610623717308044, "learning_rate": 0.00019481667294869964, "loss": 1.7004, "step": 2010 }, { "epoch": 0.02613202781482125, "grad_norm": 0.3665335476398468, "learning_rate": 0.00019481407348678824, "loss": 1.4932, "step": 2011 }, { "epoch": 0.026145022358737124, "grad_norm": 0.3332715332508087, "learning_rate": 0.00019481147402487686, "loss": 1.4019, "step": 2012 }, { "epoch": 0.026158016902653, "grad_norm": 0.47080639004707336, "learning_rate": 0.00019480887456296549, "loss": 1.5523, "step": 2013 }, { "epoch": 0.026171011446568873, "grad_norm": 0.39187318086624146, "learning_rate": 0.00019480627510105408, "loss": 1.6874, "step": 2014 }, { "epoch": 0.026184005990484746, "grad_norm": 0.3465287685394287, "learning_rate": 0.0001948036756391427, "loss": 1.2811, "step": 2015 }, { "epoch": 0.02619700053440062, "grad_norm": 0.4644133746623993, "learning_rate": 0.0001948010761772313, "loss": 1.5755, "step": 2016 }, { "epoch": 0.026209995078316492, "grad_norm": 0.37882065773010254, "learning_rate": 0.00019479847671531996, "loss": 1.5025, "step": 2017 }, { "epoch": 0.026222989622232365, "grad_norm": 0.31355947256088257, "learning_rate": 0.00019479587725340855, "loss": 1.2981, "step": 2018 }, { "epoch": 0.026235984166148238, "grad_norm": 0.39792558550834656, "learning_rate": 0.00019479327779149715, "loss": 1.4684, "step": 2019 }, { "epoch": 0.02624897871006411, "grad_norm": 0.38897544145584106, "learning_rate": 0.00019479067832958578, "loss": 1.4391, "step": 2020 }, { "epoch": 0.026261973253979984, "grad_norm": 0.33233538269996643, "learning_rate": 0.0001947880788676744, "loss": 1.4077, "step": 2021 }, { "epoch": 0.026274967797895857, "grad_norm": 0.2888289988040924, "learning_rate": 0.00019478547940576303, "loss": 1.1484, "step": 2022 }, { "epoch": 0.026287962341811733, "grad_norm": 0.3799711763858795, "learning_rate": 0.00019478287994385162, "loss": 1.6634, "step": 2023 }, { "epoch": 0.026300956885727606, "grad_norm": 0.3666038513183594, "learning_rate": 0.00019478028048194025, "loss": 1.5296, "step": 2024 }, { "epoch": 0.02631395142964348, "grad_norm": 0.3440206050872803, "learning_rate": 0.00019477768102002887, "loss": 1.6052, "step": 2025 }, { "epoch": 0.026326945973559352, "grad_norm": 0.4239407479763031, "learning_rate": 0.00019477508155811747, "loss": 1.7206, "step": 2026 }, { "epoch": 0.026339940517475225, "grad_norm": 0.37203747034072876, "learning_rate": 0.0001947724820962061, "loss": 1.5201, "step": 2027 }, { "epoch": 0.026352935061391098, "grad_norm": 0.40613853931427, "learning_rate": 0.00019476988263429472, "loss": 1.327, "step": 2028 }, { "epoch": 0.02636592960530697, "grad_norm": 0.3439273238182068, "learning_rate": 0.00019476728317238334, "loss": 1.4519, "step": 2029 }, { "epoch": 0.026378924149222844, "grad_norm": 0.32606256008148193, "learning_rate": 0.00019476468371047194, "loss": 1.4166, "step": 2030 }, { "epoch": 0.026391918693138717, "grad_norm": 0.41630515456199646, "learning_rate": 0.00019476208424856054, "loss": 1.4042, "step": 2031 }, { "epoch": 0.026404913237054593, "grad_norm": 0.30288708209991455, "learning_rate": 0.0001947594847866492, "loss": 1.3639, "step": 2032 }, { "epoch": 0.026417907780970466, "grad_norm": 0.3434416949748993, "learning_rate": 0.00019475688532473779, "loss": 1.4555, "step": 2033 }, { "epoch": 0.02643090232488634, "grad_norm": 0.4458916485309601, "learning_rate": 0.0001947542858628264, "loss": 1.4114, "step": 2034 }, { "epoch": 0.026443896868802212, "grad_norm": 0.3650971055030823, "learning_rate": 0.000194751686400915, "loss": 1.4294, "step": 2035 }, { "epoch": 0.026456891412718085, "grad_norm": 0.4173720180988312, "learning_rate": 0.00019474908693900363, "loss": 1.6647, "step": 2036 }, { "epoch": 0.026469885956633958, "grad_norm": 0.4249539375305176, "learning_rate": 0.00019474648747709226, "loss": 1.6043, "step": 2037 }, { "epoch": 0.02648288050054983, "grad_norm": 0.35150468349456787, "learning_rate": 0.00019474388801518085, "loss": 1.5264, "step": 2038 }, { "epoch": 0.026495875044465704, "grad_norm": 0.3060664236545563, "learning_rate": 0.00019474128855326948, "loss": 1.4176, "step": 2039 }, { "epoch": 0.026508869588381577, "grad_norm": 0.3721410632133484, "learning_rate": 0.0001947386890913581, "loss": 1.5991, "step": 2040 }, { "epoch": 0.02652186413229745, "grad_norm": 0.2834204435348511, "learning_rate": 0.00019473608962944673, "loss": 1.4996, "step": 2041 }, { "epoch": 0.026534858676213326, "grad_norm": 0.3835855722427368, "learning_rate": 0.00019473349016753533, "loss": 1.6001, "step": 2042 }, { "epoch": 0.0265478532201292, "grad_norm": 0.544060230255127, "learning_rate": 0.00019473089070562395, "loss": 1.4033, "step": 2043 }, { "epoch": 0.026560847764045072, "grad_norm": 0.4304278790950775, "learning_rate": 0.00019472829124371257, "loss": 1.4974, "step": 2044 }, { "epoch": 0.026573842307960945, "grad_norm": 0.36693310737609863, "learning_rate": 0.00019472569178180117, "loss": 1.4544, "step": 2045 }, { "epoch": 0.026586836851876818, "grad_norm": 0.3819798231124878, "learning_rate": 0.0001947230923198898, "loss": 1.4423, "step": 2046 }, { "epoch": 0.02659983139579269, "grad_norm": 0.43944051861763, "learning_rate": 0.0001947204928579784, "loss": 1.7041, "step": 2047 }, { "epoch": 0.026612825939708564, "grad_norm": 0.4430011808872223, "learning_rate": 0.00019471789339606702, "loss": 1.405, "step": 2048 }, { "epoch": 0.026625820483624437, "grad_norm": 0.4401518404483795, "learning_rate": 0.00019471529393415564, "loss": 1.6303, "step": 2049 }, { "epoch": 0.02663881502754031, "grad_norm": 0.32380321621894836, "learning_rate": 0.00019471269447224424, "loss": 1.5584, "step": 2050 }, { "epoch": 0.026651809571456186, "grad_norm": 0.3804262578487396, "learning_rate": 0.00019471009501033286, "loss": 1.4661, "step": 2051 }, { "epoch": 0.02666480411537206, "grad_norm": 0.35763663053512573, "learning_rate": 0.0001947074955484215, "loss": 1.2534, "step": 2052 }, { "epoch": 0.026677798659287932, "grad_norm": 0.4508529007434845, "learning_rate": 0.0001947048960865101, "loss": 1.5882, "step": 2053 }, { "epoch": 0.026690793203203805, "grad_norm": 0.3657906651496887, "learning_rate": 0.0001947022966245987, "loss": 1.4638, "step": 2054 }, { "epoch": 0.02670378774711968, "grad_norm": 0.3245506286621094, "learning_rate": 0.00019469969716268734, "loss": 1.3875, "step": 2055 }, { "epoch": 0.02671678229103555, "grad_norm": 0.27691057324409485, "learning_rate": 0.00019469709770077596, "loss": 1.1873, "step": 2056 }, { "epoch": 0.026729776834951424, "grad_norm": 0.29322341084480286, "learning_rate": 0.00019469449823886456, "loss": 1.3471, "step": 2057 }, { "epoch": 0.026742771378867297, "grad_norm": 0.34089669585227966, "learning_rate": 0.00019469189877695318, "loss": 1.3804, "step": 2058 }, { "epoch": 0.02675576592278317, "grad_norm": 0.3724285364151001, "learning_rate": 0.0001946892993150418, "loss": 1.5864, "step": 2059 }, { "epoch": 0.026768760466699043, "grad_norm": 0.3318098783493042, "learning_rate": 0.0001946866998531304, "loss": 1.4543, "step": 2060 }, { "epoch": 0.02678175501061492, "grad_norm": 0.3071412444114685, "learning_rate": 0.00019468410039121903, "loss": 1.249, "step": 2061 }, { "epoch": 0.026794749554530792, "grad_norm": 0.31382542848587036, "learning_rate": 0.00019468150092930763, "loss": 1.2915, "step": 2062 }, { "epoch": 0.026807744098446665, "grad_norm": 0.5058941841125488, "learning_rate": 0.00019467890146739628, "loss": 1.5519, "step": 2063 }, { "epoch": 0.02682073864236254, "grad_norm": 0.4811553359031677, "learning_rate": 0.00019467630200548487, "loss": 1.4646, "step": 2064 }, { "epoch": 0.02683373318627841, "grad_norm": 0.3544370234012604, "learning_rate": 0.0001946737025435735, "loss": 1.2672, "step": 2065 }, { "epoch": 0.026846727730194284, "grad_norm": 0.3728879988193512, "learning_rate": 0.0001946711030816621, "loss": 1.4612, "step": 2066 }, { "epoch": 0.026859722274110157, "grad_norm": 0.320218563079834, "learning_rate": 0.00019466850361975072, "loss": 1.4772, "step": 2067 }, { "epoch": 0.02687271681802603, "grad_norm": 0.3651033937931061, "learning_rate": 0.00019466590415783934, "loss": 1.3758, "step": 2068 }, { "epoch": 0.026885711361941903, "grad_norm": 0.32942336797714233, "learning_rate": 0.00019466330469592794, "loss": 1.3695, "step": 2069 }, { "epoch": 0.02689870590585778, "grad_norm": 0.3832577168941498, "learning_rate": 0.00019466070523401657, "loss": 1.5221, "step": 2070 }, { "epoch": 0.026911700449773653, "grad_norm": 0.4433085322380066, "learning_rate": 0.0001946581057721052, "loss": 1.3553, "step": 2071 }, { "epoch": 0.026924694993689526, "grad_norm": 0.3786352574825287, "learning_rate": 0.00019465550631019382, "loss": 1.3309, "step": 2072 }, { "epoch": 0.0269376895376054, "grad_norm": 0.22439265251159668, "learning_rate": 0.0001946529068482824, "loss": 1.1815, "step": 2073 }, { "epoch": 0.02695068408152127, "grad_norm": 0.3922896087169647, "learning_rate": 0.000194650307386371, "loss": 1.4499, "step": 2074 }, { "epoch": 0.026963678625437144, "grad_norm": 0.43656137585639954, "learning_rate": 0.00019464770792445966, "loss": 1.7509, "step": 2075 }, { "epoch": 0.026976673169353017, "grad_norm": 0.3852211833000183, "learning_rate": 0.00019464510846254826, "loss": 1.3327, "step": 2076 }, { "epoch": 0.02698966771326889, "grad_norm": 0.33573904633522034, "learning_rate": 0.00019464250900063688, "loss": 1.4515, "step": 2077 }, { "epoch": 0.027002662257184763, "grad_norm": 0.32149389386177063, "learning_rate": 0.00019463990953872548, "loss": 1.2627, "step": 2078 }, { "epoch": 0.027015656801100636, "grad_norm": 0.30632153153419495, "learning_rate": 0.0001946373100768141, "loss": 1.0881, "step": 2079 }, { "epoch": 0.027028651345016513, "grad_norm": 0.40625059604644775, "learning_rate": 0.00019463471061490273, "loss": 1.3931, "step": 2080 }, { "epoch": 0.027041645888932386, "grad_norm": 0.4922195076942444, "learning_rate": 0.00019463211115299133, "loss": 1.5984, "step": 2081 }, { "epoch": 0.02705464043284826, "grad_norm": 0.36153602600097656, "learning_rate": 0.00019462951169107995, "loss": 1.1035, "step": 2082 }, { "epoch": 0.02706763497676413, "grad_norm": 0.40944454073905945, "learning_rate": 0.00019462691222916858, "loss": 1.4335, "step": 2083 }, { "epoch": 0.027080629520680004, "grad_norm": 0.3449093699455261, "learning_rate": 0.0001946243127672572, "loss": 1.4577, "step": 2084 }, { "epoch": 0.027093624064595877, "grad_norm": 0.27790942788124084, "learning_rate": 0.0001946217133053458, "loss": 1.2703, "step": 2085 }, { "epoch": 0.02710661860851175, "grad_norm": 0.37999653816223145, "learning_rate": 0.0001946191138434344, "loss": 1.3875, "step": 2086 }, { "epoch": 0.027119613152427623, "grad_norm": 0.38547950983047485, "learning_rate": 0.00019461651438152305, "loss": 1.4481, "step": 2087 }, { "epoch": 0.027132607696343496, "grad_norm": 0.37379974126815796, "learning_rate": 0.00019461391491961164, "loss": 1.3661, "step": 2088 }, { "epoch": 0.027145602240259373, "grad_norm": 0.39012619853019714, "learning_rate": 0.00019461131545770027, "loss": 1.4049, "step": 2089 }, { "epoch": 0.027158596784175246, "grad_norm": 0.46945053339004517, "learning_rate": 0.00019460871599578887, "loss": 1.5731, "step": 2090 }, { "epoch": 0.02717159132809112, "grad_norm": 0.2752492427825928, "learning_rate": 0.0001946061165338775, "loss": 1.4742, "step": 2091 }, { "epoch": 0.02718458587200699, "grad_norm": 0.3483410179615021, "learning_rate": 0.00019460351707196612, "loss": 1.363, "step": 2092 }, { "epoch": 0.027197580415922865, "grad_norm": 0.3654286563396454, "learning_rate": 0.0001946009176100547, "loss": 1.6469, "step": 2093 }, { "epoch": 0.027210574959838738, "grad_norm": 0.4013746380805969, "learning_rate": 0.00019459831814814334, "loss": 1.4361, "step": 2094 }, { "epoch": 0.02722356950375461, "grad_norm": 0.33663803339004517, "learning_rate": 0.00019459571868623196, "loss": 1.343, "step": 2095 }, { "epoch": 0.027236564047670483, "grad_norm": 0.33509793877601624, "learning_rate": 0.0001945931192243206, "loss": 1.469, "step": 2096 }, { "epoch": 0.027249558591586356, "grad_norm": 0.36451658606529236, "learning_rate": 0.00019459051976240918, "loss": 1.4419, "step": 2097 }, { "epoch": 0.02726255313550223, "grad_norm": 0.45805126428604126, "learning_rate": 0.0001945879203004978, "loss": 1.4376, "step": 2098 }, { "epoch": 0.027275547679418106, "grad_norm": 0.3644254505634308, "learning_rate": 0.00019458532083858643, "loss": 1.4541, "step": 2099 }, { "epoch": 0.02728854222333398, "grad_norm": 0.544037938117981, "learning_rate": 0.00019458272137667503, "loss": 1.5757, "step": 2100 }, { "epoch": 0.02730153676724985, "grad_norm": 0.434401273727417, "learning_rate": 0.00019458012191476365, "loss": 1.5415, "step": 2101 }, { "epoch": 0.027314531311165725, "grad_norm": 0.21660558879375458, "learning_rate": 0.00019457752245285228, "loss": 1.4633, "step": 2102 }, { "epoch": 0.027327525855081598, "grad_norm": 0.5315440893173218, "learning_rate": 0.00019457492299094088, "loss": 1.5467, "step": 2103 }, { "epoch": 0.02734052039899747, "grad_norm": 0.31882423162460327, "learning_rate": 0.0001945723235290295, "loss": 1.3665, "step": 2104 }, { "epoch": 0.027353514942913344, "grad_norm": 0.3948776125907898, "learning_rate": 0.0001945697240671181, "loss": 1.477, "step": 2105 }, { "epoch": 0.027366509486829216, "grad_norm": 0.3557848036289215, "learning_rate": 0.00019456712460520675, "loss": 1.4599, "step": 2106 }, { "epoch": 0.02737950403074509, "grad_norm": 0.3059878945350647, "learning_rate": 0.00019456452514329535, "loss": 1.4175, "step": 2107 }, { "epoch": 0.027392498574660966, "grad_norm": 0.38770779967308044, "learning_rate": 0.00019456192568138397, "loss": 1.5319, "step": 2108 }, { "epoch": 0.02740549311857684, "grad_norm": 0.4011865258216858, "learning_rate": 0.00019455932621947257, "loss": 1.3311, "step": 2109 }, { "epoch": 0.027418487662492712, "grad_norm": 0.2876737713813782, "learning_rate": 0.0001945567267575612, "loss": 1.1813, "step": 2110 }, { "epoch": 0.027431482206408585, "grad_norm": 0.339067280292511, "learning_rate": 0.00019455412729564982, "loss": 1.2076, "step": 2111 }, { "epoch": 0.027444476750324458, "grad_norm": 0.43654006719589233, "learning_rate": 0.00019455152783373842, "loss": 1.5403, "step": 2112 }, { "epoch": 0.02745747129424033, "grad_norm": 0.4195444881916046, "learning_rate": 0.00019454892837182704, "loss": 1.5803, "step": 2113 }, { "epoch": 0.027470465838156204, "grad_norm": 0.37961822748184204, "learning_rate": 0.00019454632890991566, "loss": 1.3663, "step": 2114 }, { "epoch": 0.027483460382072077, "grad_norm": 0.46763333678245544, "learning_rate": 0.00019454372944800426, "loss": 1.6148, "step": 2115 }, { "epoch": 0.02749645492598795, "grad_norm": 0.3583020567893982, "learning_rate": 0.0001945411299860929, "loss": 1.4051, "step": 2116 }, { "epoch": 0.027509449469903823, "grad_norm": 0.3380371928215027, "learning_rate": 0.00019453853052418148, "loss": 1.2766, "step": 2117 }, { "epoch": 0.0275224440138197, "grad_norm": 0.3549646735191345, "learning_rate": 0.00019453593106227014, "loss": 1.4812, "step": 2118 }, { "epoch": 0.027535438557735572, "grad_norm": 0.39473992586135864, "learning_rate": 0.00019453333160035873, "loss": 1.4351, "step": 2119 }, { "epoch": 0.027548433101651445, "grad_norm": 0.3086841106414795, "learning_rate": 0.00019453073213844736, "loss": 1.498, "step": 2120 }, { "epoch": 0.027561427645567318, "grad_norm": 0.3433244526386261, "learning_rate": 0.00019452813267653595, "loss": 1.6794, "step": 2121 }, { "epoch": 0.02757442218948319, "grad_norm": 0.3646318316459656, "learning_rate": 0.00019452553321462458, "loss": 1.4646, "step": 2122 }, { "epoch": 0.027587416733399064, "grad_norm": 0.36260178685188293, "learning_rate": 0.0001945229337527132, "loss": 1.6132, "step": 2123 }, { "epoch": 0.027600411277314937, "grad_norm": 0.3652835488319397, "learning_rate": 0.0001945203342908018, "loss": 1.2324, "step": 2124 }, { "epoch": 0.02761340582123081, "grad_norm": 0.3084219992160797, "learning_rate": 0.00019451773482889043, "loss": 1.2231, "step": 2125 }, { "epoch": 0.027626400365146683, "grad_norm": 0.36105242371559143, "learning_rate": 0.00019451513536697905, "loss": 1.3947, "step": 2126 }, { "epoch": 0.02763939490906256, "grad_norm": 0.6619263887405396, "learning_rate": 0.00019451253590506767, "loss": 1.456, "step": 2127 }, { "epoch": 0.027652389452978432, "grad_norm": 0.3887653052806854, "learning_rate": 0.00019450993644315627, "loss": 1.6393, "step": 2128 }, { "epoch": 0.027665383996894305, "grad_norm": 0.32822638750076294, "learning_rate": 0.00019450733698124487, "loss": 1.3381, "step": 2129 }, { "epoch": 0.027678378540810178, "grad_norm": 0.3243531584739685, "learning_rate": 0.00019450473751933352, "loss": 1.2432, "step": 2130 }, { "epoch": 0.02769137308472605, "grad_norm": 0.38900068402290344, "learning_rate": 0.00019450213805742212, "loss": 1.6306, "step": 2131 }, { "epoch": 0.027704367628641924, "grad_norm": 0.39967986941337585, "learning_rate": 0.00019449953859551074, "loss": 1.5413, "step": 2132 }, { "epoch": 0.027717362172557797, "grad_norm": 0.3535040616989136, "learning_rate": 0.00019449693913359937, "loss": 1.4364, "step": 2133 }, { "epoch": 0.02773035671647367, "grad_norm": 0.34658634662628174, "learning_rate": 0.00019449433967168796, "loss": 1.341, "step": 2134 }, { "epoch": 0.027743351260389543, "grad_norm": 0.34029027819633484, "learning_rate": 0.0001944917402097766, "loss": 1.609, "step": 2135 }, { "epoch": 0.027756345804305416, "grad_norm": 0.354667603969574, "learning_rate": 0.0001944891407478652, "loss": 1.3905, "step": 2136 }, { "epoch": 0.027769340348221292, "grad_norm": 0.31113141775131226, "learning_rate": 0.00019448654128595384, "loss": 1.5166, "step": 2137 }, { "epoch": 0.027782334892137165, "grad_norm": 0.3507639467716217, "learning_rate": 0.00019448394182404244, "loss": 1.4244, "step": 2138 }, { "epoch": 0.027795329436053038, "grad_norm": 0.3752739727497101, "learning_rate": 0.00019448134236213106, "loss": 1.3482, "step": 2139 }, { "epoch": 0.02780832397996891, "grad_norm": 0.4519732892513275, "learning_rate": 0.00019447874290021966, "loss": 1.578, "step": 2140 }, { "epoch": 0.027821318523884784, "grad_norm": 0.4155772924423218, "learning_rate": 0.00019447614343830828, "loss": 1.4331, "step": 2141 }, { "epoch": 0.027834313067800657, "grad_norm": 0.34380027651786804, "learning_rate": 0.0001944735439763969, "loss": 1.2967, "step": 2142 }, { "epoch": 0.02784730761171653, "grad_norm": 0.4142053425312042, "learning_rate": 0.0001944709445144855, "loss": 1.4169, "step": 2143 }, { "epoch": 0.027860302155632403, "grad_norm": 0.45165592432022095, "learning_rate": 0.00019446834505257413, "loss": 1.617, "step": 2144 }, { "epoch": 0.027873296699548276, "grad_norm": 0.36684224009513855, "learning_rate": 0.00019446574559066275, "loss": 1.3973, "step": 2145 }, { "epoch": 0.027886291243464152, "grad_norm": 0.352329820394516, "learning_rate": 0.00019446314612875135, "loss": 1.3445, "step": 2146 }, { "epoch": 0.027899285787380025, "grad_norm": 0.3258627653121948, "learning_rate": 0.00019446054666683997, "loss": 1.3411, "step": 2147 }, { "epoch": 0.027912280331295898, "grad_norm": 0.4311891198158264, "learning_rate": 0.00019445794720492857, "loss": 1.3768, "step": 2148 }, { "epoch": 0.02792527487521177, "grad_norm": 0.33797669410705566, "learning_rate": 0.00019445534774301722, "loss": 1.4377, "step": 2149 }, { "epoch": 0.027938269419127644, "grad_norm": 0.44946396350860596, "learning_rate": 0.00019445274828110582, "loss": 1.2503, "step": 2150 }, { "epoch": 0.027951263963043517, "grad_norm": 0.395925372838974, "learning_rate": 0.00019445014881919445, "loss": 1.6571, "step": 2151 }, { "epoch": 0.02796425850695939, "grad_norm": 0.3170452117919922, "learning_rate": 0.00019444754935728304, "loss": 1.5332, "step": 2152 }, { "epoch": 0.027977253050875263, "grad_norm": 0.3092959523200989, "learning_rate": 0.00019444494989537167, "loss": 1.4253, "step": 2153 }, { "epoch": 0.027990247594791136, "grad_norm": 0.44351643323898315, "learning_rate": 0.0001944423504334603, "loss": 1.6285, "step": 2154 }, { "epoch": 0.02800324213870701, "grad_norm": 0.6147965788841248, "learning_rate": 0.0001944397509715489, "loss": 1.5539, "step": 2155 }, { "epoch": 0.028016236682622885, "grad_norm": 0.42599916458129883, "learning_rate": 0.0001944371515096375, "loss": 1.3832, "step": 2156 }, { "epoch": 0.028029231226538758, "grad_norm": 0.4279272258281708, "learning_rate": 0.00019443455204772614, "loss": 1.5309, "step": 2157 }, { "epoch": 0.02804222577045463, "grad_norm": 0.3968813717365265, "learning_rate": 0.00019443195258581474, "loss": 1.5024, "step": 2158 }, { "epoch": 0.028055220314370504, "grad_norm": 0.4060969352722168, "learning_rate": 0.00019442935312390336, "loss": 1.3264, "step": 2159 }, { "epoch": 0.028068214858286377, "grad_norm": 0.29550233483314514, "learning_rate": 0.00019442675366199196, "loss": 1.3736, "step": 2160 }, { "epoch": 0.02808120940220225, "grad_norm": 0.327310711145401, "learning_rate": 0.0001944241542000806, "loss": 1.4457, "step": 2161 }, { "epoch": 0.028094203946118123, "grad_norm": 0.2951511740684509, "learning_rate": 0.0001944215547381692, "loss": 1.4906, "step": 2162 }, { "epoch": 0.028107198490033996, "grad_norm": 0.46167200803756714, "learning_rate": 0.00019441895527625783, "loss": 1.5882, "step": 2163 }, { "epoch": 0.02812019303394987, "grad_norm": 0.36212441325187683, "learning_rate": 0.00019441635581434643, "loss": 1.3374, "step": 2164 }, { "epoch": 0.028133187577865745, "grad_norm": 0.4009593427181244, "learning_rate": 0.00019441375635243505, "loss": 1.3603, "step": 2165 }, { "epoch": 0.02814618212178162, "grad_norm": 0.45525768399238586, "learning_rate": 0.00019441115689052368, "loss": 1.4289, "step": 2166 }, { "epoch": 0.02815917666569749, "grad_norm": 0.3185199499130249, "learning_rate": 0.00019440855742861227, "loss": 1.4702, "step": 2167 }, { "epoch": 0.028172171209613364, "grad_norm": 0.428203821182251, "learning_rate": 0.0001944059579667009, "loss": 1.4742, "step": 2168 }, { "epoch": 0.028185165753529237, "grad_norm": 0.4030996561050415, "learning_rate": 0.00019440335850478952, "loss": 1.4453, "step": 2169 }, { "epoch": 0.02819816029744511, "grad_norm": 0.33252498507499695, "learning_rate": 0.00019440075904287812, "loss": 1.501, "step": 2170 }, { "epoch": 0.028211154841360983, "grad_norm": 0.35301727056503296, "learning_rate": 0.00019439815958096675, "loss": 1.4448, "step": 2171 }, { "epoch": 0.028224149385276856, "grad_norm": 0.5131669044494629, "learning_rate": 0.00019439556011905537, "loss": 1.5222, "step": 2172 }, { "epoch": 0.02823714392919273, "grad_norm": 0.4534998834133148, "learning_rate": 0.000194392960657144, "loss": 1.3583, "step": 2173 }, { "epoch": 0.028250138473108602, "grad_norm": 0.3587299585342407, "learning_rate": 0.0001943903611952326, "loss": 1.3828, "step": 2174 }, { "epoch": 0.02826313301702448, "grad_norm": 0.2546916604042053, "learning_rate": 0.00019438776173332122, "loss": 1.3169, "step": 2175 }, { "epoch": 0.02827612756094035, "grad_norm": 0.3397449851036072, "learning_rate": 0.00019438516227140984, "loss": 1.3687, "step": 2176 }, { "epoch": 0.028289122104856224, "grad_norm": 0.38214564323425293, "learning_rate": 0.00019438256280949844, "loss": 1.4583, "step": 2177 }, { "epoch": 0.028302116648772097, "grad_norm": 0.3576605021953583, "learning_rate": 0.00019437996334758706, "loss": 1.3904, "step": 2178 }, { "epoch": 0.02831511119268797, "grad_norm": 0.4208417534828186, "learning_rate": 0.00019437736388567566, "loss": 1.2989, "step": 2179 }, { "epoch": 0.028328105736603843, "grad_norm": 0.33130720257759094, "learning_rate": 0.0001943747644237643, "loss": 1.3052, "step": 2180 }, { "epoch": 0.028341100280519716, "grad_norm": 0.4892352223396301, "learning_rate": 0.0001943721649618529, "loss": 1.5524, "step": 2181 }, { "epoch": 0.02835409482443559, "grad_norm": 0.46355971693992615, "learning_rate": 0.0001943695654999415, "loss": 1.6365, "step": 2182 }, { "epoch": 0.028367089368351462, "grad_norm": 0.3080877959728241, "learning_rate": 0.00019436696603803013, "loss": 1.3449, "step": 2183 }, { "epoch": 0.02838008391226734, "grad_norm": 0.34956327080726624, "learning_rate": 0.00019436436657611876, "loss": 1.4234, "step": 2184 }, { "epoch": 0.02839307845618321, "grad_norm": 0.35542523860931396, "learning_rate": 0.00019436176711420738, "loss": 1.4423, "step": 2185 }, { "epoch": 0.028406073000099084, "grad_norm": 0.4644271433353424, "learning_rate": 0.00019435916765229598, "loss": 1.481, "step": 2186 }, { "epoch": 0.028419067544014957, "grad_norm": 0.4230409264564514, "learning_rate": 0.0001943565681903846, "loss": 1.5347, "step": 2187 }, { "epoch": 0.02843206208793083, "grad_norm": 0.39285415410995483, "learning_rate": 0.00019435396872847323, "loss": 1.4178, "step": 2188 }, { "epoch": 0.028445056631846703, "grad_norm": 0.41373884677886963, "learning_rate": 0.00019435136926656182, "loss": 1.47, "step": 2189 }, { "epoch": 0.028458051175762576, "grad_norm": 0.4094409644603729, "learning_rate": 0.00019434876980465045, "loss": 1.5291, "step": 2190 }, { "epoch": 0.02847104571967845, "grad_norm": 0.4535164535045624, "learning_rate": 0.00019434617034273905, "loss": 1.4188, "step": 2191 }, { "epoch": 0.028484040263594322, "grad_norm": 0.36925461888313293, "learning_rate": 0.0001943435708808277, "loss": 1.4442, "step": 2192 }, { "epoch": 0.028497034807510195, "grad_norm": 0.38038572669029236, "learning_rate": 0.0001943409714189163, "loss": 1.4644, "step": 2193 }, { "epoch": 0.02851002935142607, "grad_norm": 0.3370724320411682, "learning_rate": 0.00019433837195700492, "loss": 1.2589, "step": 2194 }, { "epoch": 0.028523023895341944, "grad_norm": 0.36902526021003723, "learning_rate": 0.00019433577249509352, "loss": 1.726, "step": 2195 }, { "epoch": 0.028536018439257817, "grad_norm": 0.40908193588256836, "learning_rate": 0.00019433317303318214, "loss": 1.4635, "step": 2196 }, { "epoch": 0.02854901298317369, "grad_norm": 0.36590951681137085, "learning_rate": 0.00019433057357127077, "loss": 1.4601, "step": 2197 }, { "epoch": 0.028562007527089563, "grad_norm": 0.37208840250968933, "learning_rate": 0.00019432797410935936, "loss": 1.6144, "step": 2198 }, { "epoch": 0.028575002071005436, "grad_norm": 0.35629284381866455, "learning_rate": 0.000194325374647448, "loss": 1.4232, "step": 2199 }, { "epoch": 0.02858799661492131, "grad_norm": 0.4063398241996765, "learning_rate": 0.0001943227751855366, "loss": 1.5298, "step": 2200 }, { "epoch": 0.028600991158837182, "grad_norm": 0.47579383850097656, "learning_rate": 0.0001943201757236252, "loss": 1.431, "step": 2201 }, { "epoch": 0.028613985702753055, "grad_norm": 0.3551229238510132, "learning_rate": 0.00019431757626171383, "loss": 1.4623, "step": 2202 }, { "epoch": 0.02862698024666893, "grad_norm": 0.3218754827976227, "learning_rate": 0.00019431497679980243, "loss": 1.5658, "step": 2203 }, { "epoch": 0.028639974790584805, "grad_norm": 0.360439658164978, "learning_rate": 0.00019431237733789108, "loss": 1.3714, "step": 2204 }, { "epoch": 0.028652969334500678, "grad_norm": 0.3467865586280823, "learning_rate": 0.00019430977787597968, "loss": 1.4742, "step": 2205 }, { "epoch": 0.02866596387841655, "grad_norm": 0.33811742067337036, "learning_rate": 0.0001943071784140683, "loss": 1.33, "step": 2206 }, { "epoch": 0.028678958422332423, "grad_norm": 0.41617485880851746, "learning_rate": 0.00019430457895215693, "loss": 1.5135, "step": 2207 }, { "epoch": 0.028691952966248296, "grad_norm": 0.3327701985836029, "learning_rate": 0.00019430197949024553, "loss": 1.5446, "step": 2208 }, { "epoch": 0.02870494751016417, "grad_norm": 0.3984528183937073, "learning_rate": 0.00019429938002833415, "loss": 1.3622, "step": 2209 }, { "epoch": 0.028717942054080042, "grad_norm": 0.4298560321331024, "learning_rate": 0.00019429678056642275, "loss": 1.4283, "step": 2210 }, { "epoch": 0.028730936597995915, "grad_norm": 0.35935840010643005, "learning_rate": 0.00019429418110451137, "loss": 1.4038, "step": 2211 }, { "epoch": 0.028743931141911788, "grad_norm": 0.4088001549243927, "learning_rate": 0.0001942915816426, "loss": 1.6949, "step": 2212 }, { "epoch": 0.028756925685827665, "grad_norm": 0.3564993739128113, "learning_rate": 0.0001942889821806886, "loss": 1.3841, "step": 2213 }, { "epoch": 0.028769920229743538, "grad_norm": 0.27074694633483887, "learning_rate": 0.00019428638271877722, "loss": 1.231, "step": 2214 }, { "epoch": 0.02878291477365941, "grad_norm": 0.3868809640407562, "learning_rate": 0.00019428378325686584, "loss": 1.4815, "step": 2215 }, { "epoch": 0.028795909317575284, "grad_norm": 0.43486249446868896, "learning_rate": 0.00019428118379495447, "loss": 1.4355, "step": 2216 }, { "epoch": 0.028808903861491156, "grad_norm": 0.5228997468948364, "learning_rate": 0.00019427858433304306, "loss": 1.5785, "step": 2217 }, { "epoch": 0.02882189840540703, "grad_norm": 0.4311128854751587, "learning_rate": 0.0001942759848711317, "loss": 1.5378, "step": 2218 }, { "epoch": 0.028834892949322902, "grad_norm": 0.36703139543533325, "learning_rate": 0.00019427338540922031, "loss": 1.4434, "step": 2219 }, { "epoch": 0.028847887493238775, "grad_norm": 0.39816349744796753, "learning_rate": 0.0001942707859473089, "loss": 1.3951, "step": 2220 }, { "epoch": 0.02886088203715465, "grad_norm": 0.4139325022697449, "learning_rate": 0.00019426818648539754, "loss": 1.591, "step": 2221 }, { "epoch": 0.028873876581070525, "grad_norm": 0.4262898862361908, "learning_rate": 0.00019426558702348613, "loss": 1.4914, "step": 2222 }, { "epoch": 0.028886871124986398, "grad_norm": 0.42953887581825256, "learning_rate": 0.00019426298756157478, "loss": 1.5241, "step": 2223 }, { "epoch": 0.02889986566890227, "grad_norm": 0.7413663864135742, "learning_rate": 0.00019426038809966338, "loss": 1.455, "step": 2224 }, { "epoch": 0.028912860212818144, "grad_norm": 0.35395124554634094, "learning_rate": 0.00019425778863775198, "loss": 1.5257, "step": 2225 }, { "epoch": 0.028925854756734017, "grad_norm": 0.4739680290222168, "learning_rate": 0.0001942551891758406, "loss": 1.5969, "step": 2226 }, { "epoch": 0.02893884930064989, "grad_norm": 0.3732389509677887, "learning_rate": 0.00019425258971392923, "loss": 1.4742, "step": 2227 }, { "epoch": 0.028951843844565762, "grad_norm": 0.40652787685394287, "learning_rate": 0.00019424999025201785, "loss": 1.3275, "step": 2228 }, { "epoch": 0.028964838388481635, "grad_norm": 0.3962986469268799, "learning_rate": 0.00019424739079010645, "loss": 1.6153, "step": 2229 }, { "epoch": 0.02897783293239751, "grad_norm": 0.37969836592674255, "learning_rate": 0.00019424479132819507, "loss": 1.5725, "step": 2230 }, { "epoch": 0.02899082747631338, "grad_norm": 0.6805177927017212, "learning_rate": 0.0001942421918662837, "loss": 1.4578, "step": 2231 }, { "epoch": 0.029003822020229258, "grad_norm": 0.4655977189540863, "learning_rate": 0.0001942395924043723, "loss": 1.3158, "step": 2232 }, { "epoch": 0.02901681656414513, "grad_norm": 0.5238486528396606, "learning_rate": 0.00019423699294246092, "loss": 1.4599, "step": 2233 }, { "epoch": 0.029029811108061004, "grad_norm": 0.4091179072856903, "learning_rate": 0.00019423439348054952, "loss": 1.5138, "step": 2234 }, { "epoch": 0.029042805651976877, "grad_norm": 0.3676810562610626, "learning_rate": 0.00019423179401863817, "loss": 1.6341, "step": 2235 }, { "epoch": 0.02905580019589275, "grad_norm": 0.37930363416671753, "learning_rate": 0.00019422919455672677, "loss": 1.4647, "step": 2236 }, { "epoch": 0.029068794739808623, "grad_norm": 0.3974941372871399, "learning_rate": 0.00019422659509481536, "loss": 1.6024, "step": 2237 }, { "epoch": 0.029081789283724496, "grad_norm": 0.39573827385902405, "learning_rate": 0.000194223995632904, "loss": 1.4298, "step": 2238 }, { "epoch": 0.02909478382764037, "grad_norm": 0.3913654685020447, "learning_rate": 0.00019422139617099261, "loss": 1.3641, "step": 2239 }, { "epoch": 0.02910777837155624, "grad_norm": 0.3153485059738159, "learning_rate": 0.00019421879670908124, "loss": 1.2573, "step": 2240 }, { "epoch": 0.029120772915472114, "grad_norm": 0.3856097161769867, "learning_rate": 0.00019421619724716984, "loss": 1.3456, "step": 2241 }, { "epoch": 0.02913376745938799, "grad_norm": 0.308120995759964, "learning_rate": 0.00019421359778525846, "loss": 1.3074, "step": 2242 }, { "epoch": 0.029146762003303864, "grad_norm": 0.37055110931396484, "learning_rate": 0.00019421099832334708, "loss": 1.5545, "step": 2243 }, { "epoch": 0.029159756547219737, "grad_norm": 0.3175742030143738, "learning_rate": 0.00019420839886143568, "loss": 1.1888, "step": 2244 }, { "epoch": 0.02917275109113561, "grad_norm": 0.32111236453056335, "learning_rate": 0.0001942057993995243, "loss": 1.6643, "step": 2245 }, { "epoch": 0.029185745635051483, "grad_norm": 0.3519747853279114, "learning_rate": 0.00019420319993761293, "loss": 1.4373, "step": 2246 }, { "epoch": 0.029198740178967356, "grad_norm": 0.3781045973300934, "learning_rate": 0.00019420060047570156, "loss": 1.3688, "step": 2247 }, { "epoch": 0.02921173472288323, "grad_norm": 0.3909277617931366, "learning_rate": 0.00019419800101379015, "loss": 1.4453, "step": 2248 }, { "epoch": 0.0292247292667991, "grad_norm": 0.41048064827919006, "learning_rate": 0.00019419540155187878, "loss": 1.3133, "step": 2249 }, { "epoch": 0.029237723810714975, "grad_norm": 0.3009323477745056, "learning_rate": 0.0001941928020899674, "loss": 1.5612, "step": 2250 }, { "epoch": 0.02925071835463085, "grad_norm": 0.4818994998931885, "learning_rate": 0.000194190202628056, "loss": 1.4417, "step": 2251 }, { "epoch": 0.029263712898546724, "grad_norm": 0.46832817792892456, "learning_rate": 0.00019418760316614462, "loss": 1.5761, "step": 2252 }, { "epoch": 0.029276707442462597, "grad_norm": 0.41898638010025024, "learning_rate": 0.00019418500370423322, "loss": 1.4214, "step": 2253 }, { "epoch": 0.02928970198637847, "grad_norm": 0.3925115466117859, "learning_rate": 0.00019418240424232185, "loss": 1.5422, "step": 2254 }, { "epoch": 0.029302696530294343, "grad_norm": 0.5036033987998962, "learning_rate": 0.00019417980478041047, "loss": 1.5198, "step": 2255 }, { "epoch": 0.029315691074210216, "grad_norm": 0.34338614344596863, "learning_rate": 0.00019417720531849907, "loss": 1.4765, "step": 2256 }, { "epoch": 0.02932868561812609, "grad_norm": 0.4155566394329071, "learning_rate": 0.0001941746058565877, "loss": 1.5128, "step": 2257 }, { "epoch": 0.02934168016204196, "grad_norm": 0.3929993212223053, "learning_rate": 0.00019417200639467632, "loss": 1.4375, "step": 2258 }, { "epoch": 0.029354674705957835, "grad_norm": 0.26651692390441895, "learning_rate": 0.00019416940693276494, "loss": 1.458, "step": 2259 }, { "epoch": 0.029367669249873708, "grad_norm": 0.41451215744018555, "learning_rate": 0.00019416680747085354, "loss": 1.538, "step": 2260 }, { "epoch": 0.029380663793789584, "grad_norm": 0.3747888207435608, "learning_rate": 0.00019416420800894216, "loss": 1.386, "step": 2261 }, { "epoch": 0.029393658337705457, "grad_norm": 0.29739704728126526, "learning_rate": 0.0001941616085470308, "loss": 1.3246, "step": 2262 }, { "epoch": 0.02940665288162133, "grad_norm": 0.49290746450424194, "learning_rate": 0.00019415900908511938, "loss": 1.407, "step": 2263 }, { "epoch": 0.029419647425537203, "grad_norm": 0.28033435344696045, "learning_rate": 0.000194156409623208, "loss": 1.367, "step": 2264 }, { "epoch": 0.029432641969453076, "grad_norm": 0.37781044840812683, "learning_rate": 0.0001941538101612966, "loss": 1.4717, "step": 2265 }, { "epoch": 0.02944563651336895, "grad_norm": 0.40272921323776245, "learning_rate": 0.00019415121069938523, "loss": 1.3663, "step": 2266 }, { "epoch": 0.02945863105728482, "grad_norm": 0.3746371567249298, "learning_rate": 0.00019414861123747386, "loss": 1.2733, "step": 2267 }, { "epoch": 0.029471625601200695, "grad_norm": 0.3725739121437073, "learning_rate": 0.00019414601177556245, "loss": 1.4286, "step": 2268 }, { "epoch": 0.029484620145116568, "grad_norm": 0.35759153962135315, "learning_rate": 0.00019414341231365108, "loss": 1.6504, "step": 2269 }, { "epoch": 0.029497614689032444, "grad_norm": 0.37699389457702637, "learning_rate": 0.0001941408128517397, "loss": 1.5343, "step": 2270 }, { "epoch": 0.029510609232948317, "grad_norm": 0.4187336564064026, "learning_rate": 0.00019413821338982833, "loss": 1.6269, "step": 2271 }, { "epoch": 0.02952360377686419, "grad_norm": 0.46491625905036926, "learning_rate": 0.00019413561392791692, "loss": 1.692, "step": 2272 }, { "epoch": 0.029536598320780063, "grad_norm": 0.3756006062030792, "learning_rate": 0.00019413301446600555, "loss": 1.5247, "step": 2273 }, { "epoch": 0.029549592864695936, "grad_norm": 0.354198157787323, "learning_rate": 0.00019413041500409417, "loss": 1.4363, "step": 2274 }, { "epoch": 0.02956258740861181, "grad_norm": 0.3795511722564697, "learning_rate": 0.00019412781554218277, "loss": 1.4965, "step": 2275 }, { "epoch": 0.029575581952527682, "grad_norm": 0.37611326575279236, "learning_rate": 0.0001941252160802714, "loss": 1.4589, "step": 2276 }, { "epoch": 0.029588576496443555, "grad_norm": 0.5443653464317322, "learning_rate": 0.00019412261661836, "loss": 1.4941, "step": 2277 }, { "epoch": 0.029601571040359428, "grad_norm": 0.3725961744785309, "learning_rate": 0.00019412001715644864, "loss": 1.3932, "step": 2278 }, { "epoch": 0.0296145655842753, "grad_norm": 0.3796549141407013, "learning_rate": 0.00019411741769453724, "loss": 1.3333, "step": 2279 }, { "epoch": 0.029627560128191177, "grad_norm": 0.3841266334056854, "learning_rate": 0.00019411481823262584, "loss": 1.5165, "step": 2280 }, { "epoch": 0.02964055467210705, "grad_norm": 0.3486854135990143, "learning_rate": 0.0001941122187707145, "loss": 1.2011, "step": 2281 }, { "epoch": 0.029653549216022923, "grad_norm": 0.4347458481788635, "learning_rate": 0.0001941096193088031, "loss": 1.5416, "step": 2282 }, { "epoch": 0.029666543759938796, "grad_norm": 0.3523584306240082, "learning_rate": 0.0001941070198468917, "loss": 1.5624, "step": 2283 }, { "epoch": 0.02967953830385467, "grad_norm": 0.4369819462299347, "learning_rate": 0.0001941044203849803, "loss": 1.4357, "step": 2284 }, { "epoch": 0.029692532847770542, "grad_norm": 0.47636428475379944, "learning_rate": 0.00019410182092306893, "loss": 1.6214, "step": 2285 }, { "epoch": 0.029705527391686415, "grad_norm": 0.42865052819252014, "learning_rate": 0.00019409922146115756, "loss": 1.6789, "step": 2286 }, { "epoch": 0.029718521935602288, "grad_norm": 0.3979128897190094, "learning_rate": 0.00019409662199924616, "loss": 1.4674, "step": 2287 }, { "epoch": 0.02973151647951816, "grad_norm": 0.30139976739883423, "learning_rate": 0.00019409402253733478, "loss": 1.4787, "step": 2288 }, { "epoch": 0.029744511023434037, "grad_norm": 0.38433244824409485, "learning_rate": 0.0001940914230754234, "loss": 1.3779, "step": 2289 }, { "epoch": 0.02975750556734991, "grad_norm": 0.3885755240917206, "learning_rate": 0.00019408882361351203, "loss": 1.4913, "step": 2290 }, { "epoch": 0.029770500111265783, "grad_norm": 0.35155102610588074, "learning_rate": 0.00019408622415160063, "loss": 1.4257, "step": 2291 }, { "epoch": 0.029783494655181656, "grad_norm": 0.3547854423522949, "learning_rate": 0.00019408362468968922, "loss": 1.388, "step": 2292 }, { "epoch": 0.02979648919909753, "grad_norm": 0.3712790310382843, "learning_rate": 0.00019408102522777788, "loss": 1.5405, "step": 2293 }, { "epoch": 0.029809483743013402, "grad_norm": 0.3487582504749298, "learning_rate": 0.00019407842576586647, "loss": 1.3696, "step": 2294 }, { "epoch": 0.029822478286929275, "grad_norm": 0.36178717017173767, "learning_rate": 0.0001940758263039551, "loss": 1.5208, "step": 2295 }, { "epoch": 0.029835472830845148, "grad_norm": 0.4386369585990906, "learning_rate": 0.0001940732268420437, "loss": 1.381, "step": 2296 }, { "epoch": 0.02984846737476102, "grad_norm": 0.36633753776550293, "learning_rate": 0.00019407062738013232, "loss": 1.4501, "step": 2297 }, { "epoch": 0.029861461918676894, "grad_norm": 0.37941429018974304, "learning_rate": 0.00019406802791822094, "loss": 1.5131, "step": 2298 }, { "epoch": 0.02987445646259277, "grad_norm": 0.3331683278083801, "learning_rate": 0.00019406542845630954, "loss": 1.3258, "step": 2299 }, { "epoch": 0.029887451006508643, "grad_norm": 0.3746281862258911, "learning_rate": 0.00019406282899439817, "loss": 1.5902, "step": 2300 }, { "epoch": 0.029900445550424516, "grad_norm": 0.41005784273147583, "learning_rate": 0.0001940602295324868, "loss": 1.5298, "step": 2301 }, { "epoch": 0.02991344009434039, "grad_norm": 0.3656270503997803, "learning_rate": 0.00019405763007057541, "loss": 1.425, "step": 2302 }, { "epoch": 0.029926434638256262, "grad_norm": 0.32205793261528015, "learning_rate": 0.000194055030608664, "loss": 1.5017, "step": 2303 }, { "epoch": 0.029939429182172135, "grad_norm": 0.37533479928970337, "learning_rate": 0.0001940524311467526, "loss": 1.4854, "step": 2304 }, { "epoch": 0.029952423726088008, "grad_norm": 0.364859014749527, "learning_rate": 0.00019404983168484126, "loss": 1.6089, "step": 2305 }, { "epoch": 0.02996541827000388, "grad_norm": 0.4076573848724365, "learning_rate": 0.00019404723222292986, "loss": 1.5191, "step": 2306 }, { "epoch": 0.029978412813919754, "grad_norm": 0.6191604137420654, "learning_rate": 0.00019404463276101848, "loss": 1.4567, "step": 2307 }, { "epoch": 0.02999140735783563, "grad_norm": 0.4787641167640686, "learning_rate": 0.00019404203329910708, "loss": 1.5462, "step": 2308 }, { "epoch": 0.030004401901751503, "grad_norm": 0.4141702651977539, "learning_rate": 0.0001940394338371957, "loss": 1.5666, "step": 2309 }, { "epoch": 0.030017396445667376, "grad_norm": 0.4188869893550873, "learning_rate": 0.00019403683437528433, "loss": 1.5711, "step": 2310 }, { "epoch": 0.03003039098958325, "grad_norm": 0.46232324838638306, "learning_rate": 0.00019403423491337293, "loss": 1.4781, "step": 2311 }, { "epoch": 0.030043385533499122, "grad_norm": 0.39016687870025635, "learning_rate": 0.00019403163545146155, "loss": 1.5628, "step": 2312 }, { "epoch": 0.030056380077414995, "grad_norm": 0.4115760922431946, "learning_rate": 0.00019402903598955018, "loss": 1.5846, "step": 2313 }, { "epoch": 0.030069374621330868, "grad_norm": 0.38630566000938416, "learning_rate": 0.0001940264365276388, "loss": 1.4349, "step": 2314 }, { "epoch": 0.03008236916524674, "grad_norm": 0.39953503012657166, "learning_rate": 0.0001940238370657274, "loss": 1.556, "step": 2315 }, { "epoch": 0.030095363709162614, "grad_norm": 0.3413309156894684, "learning_rate": 0.00019402123760381602, "loss": 1.4068, "step": 2316 }, { "epoch": 0.030108358253078487, "grad_norm": 0.43236130475997925, "learning_rate": 0.00019401863814190465, "loss": 1.5038, "step": 2317 }, { "epoch": 0.030121352796994363, "grad_norm": 0.42266950011253357, "learning_rate": 0.00019401603867999324, "loss": 1.6838, "step": 2318 }, { "epoch": 0.030134347340910236, "grad_norm": 0.385187029838562, "learning_rate": 0.00019401343921808187, "loss": 1.3582, "step": 2319 }, { "epoch": 0.03014734188482611, "grad_norm": 0.3673193156719208, "learning_rate": 0.0001940108397561705, "loss": 1.1196, "step": 2320 }, { "epoch": 0.030160336428741982, "grad_norm": 0.333638072013855, "learning_rate": 0.0001940082402942591, "loss": 1.616, "step": 2321 }, { "epoch": 0.030173330972657855, "grad_norm": 0.34829479455947876, "learning_rate": 0.00019400564083234771, "loss": 1.541, "step": 2322 }, { "epoch": 0.030186325516573728, "grad_norm": 0.4145924150943756, "learning_rate": 0.0001940030413704363, "loss": 1.6013, "step": 2323 }, { "epoch": 0.0301993200604896, "grad_norm": 0.3042910397052765, "learning_rate": 0.00019400044190852496, "loss": 1.4189, "step": 2324 }, { "epoch": 0.030212314604405474, "grad_norm": 0.4206189215183258, "learning_rate": 0.00019399784244661356, "loss": 1.4251, "step": 2325 }, { "epoch": 0.030225309148321347, "grad_norm": 0.4113405644893646, "learning_rate": 0.00019399524298470219, "loss": 1.4679, "step": 2326 }, { "epoch": 0.030238303692237224, "grad_norm": 0.4340955913066864, "learning_rate": 0.00019399264352279078, "loss": 1.4749, "step": 2327 }, { "epoch": 0.030251298236153096, "grad_norm": 0.39315587282180786, "learning_rate": 0.0001939900440608794, "loss": 1.4331, "step": 2328 }, { "epoch": 0.03026429278006897, "grad_norm": 0.39607760310173035, "learning_rate": 0.00019398744459896803, "loss": 1.6217, "step": 2329 }, { "epoch": 0.030277287323984842, "grad_norm": 0.41378265619277954, "learning_rate": 0.00019398484513705663, "loss": 1.3429, "step": 2330 }, { "epoch": 0.030290281867900715, "grad_norm": 0.3606838583946228, "learning_rate": 0.00019398224567514525, "loss": 1.5375, "step": 2331 }, { "epoch": 0.03030327641181659, "grad_norm": 0.5275381803512573, "learning_rate": 0.00019397964621323388, "loss": 1.5635, "step": 2332 }, { "epoch": 0.03031627095573246, "grad_norm": 0.2677003741264343, "learning_rate": 0.0001939770467513225, "loss": 1.213, "step": 2333 }, { "epoch": 0.030329265499648334, "grad_norm": 0.3109504282474518, "learning_rate": 0.0001939744472894111, "loss": 1.2838, "step": 2334 }, { "epoch": 0.030342260043564207, "grad_norm": 0.4251733720302582, "learning_rate": 0.0001939718478274997, "loss": 1.6817, "step": 2335 }, { "epoch": 0.03035525458748008, "grad_norm": 0.4478415846824646, "learning_rate": 0.00019396924836558835, "loss": 1.4814, "step": 2336 }, { "epoch": 0.030368249131395957, "grad_norm": 0.42527344822883606, "learning_rate": 0.00019396664890367695, "loss": 1.3118, "step": 2337 }, { "epoch": 0.03038124367531183, "grad_norm": 0.44282853603363037, "learning_rate": 0.00019396404944176557, "loss": 1.6291, "step": 2338 }, { "epoch": 0.030394238219227702, "grad_norm": 0.4052993655204773, "learning_rate": 0.00019396144997985417, "loss": 1.3936, "step": 2339 }, { "epoch": 0.030407232763143575, "grad_norm": 0.3035629987716675, "learning_rate": 0.0001939588505179428, "loss": 1.3489, "step": 2340 }, { "epoch": 0.03042022730705945, "grad_norm": 0.41601982712745667, "learning_rate": 0.00019395625105603142, "loss": 1.3657, "step": 2341 }, { "epoch": 0.03043322185097532, "grad_norm": 0.37834465503692627, "learning_rate": 0.00019395365159412001, "loss": 1.2281, "step": 2342 }, { "epoch": 0.030446216394891194, "grad_norm": 0.27632567286491394, "learning_rate": 0.00019395105213220864, "loss": 1.3653, "step": 2343 }, { "epoch": 0.030459210938807067, "grad_norm": 0.4285818934440613, "learning_rate": 0.00019394845267029726, "loss": 1.3668, "step": 2344 }, { "epoch": 0.03047220548272294, "grad_norm": 0.39858153462409973, "learning_rate": 0.0001939458532083859, "loss": 1.5637, "step": 2345 }, { "epoch": 0.030485200026638817, "grad_norm": 0.40720924735069275, "learning_rate": 0.00019394325374647449, "loss": 1.5489, "step": 2346 }, { "epoch": 0.03049819457055469, "grad_norm": 0.3333108723163605, "learning_rate": 0.00019394065428456308, "loss": 1.2593, "step": 2347 }, { "epoch": 0.030511189114470563, "grad_norm": 0.3936966359615326, "learning_rate": 0.00019393805482265173, "loss": 1.4027, "step": 2348 }, { "epoch": 0.030524183658386436, "grad_norm": 0.3745768368244171, "learning_rate": 0.00019393545536074033, "loss": 1.3695, "step": 2349 }, { "epoch": 0.03053717820230231, "grad_norm": 0.357470840215683, "learning_rate": 0.00019393285589882896, "loss": 1.4901, "step": 2350 }, { "epoch": 0.03055017274621818, "grad_norm": 0.4375747740268707, "learning_rate": 0.00019393025643691755, "loss": 1.499, "step": 2351 }, { "epoch": 0.030563167290134054, "grad_norm": 0.40660470724105835, "learning_rate": 0.00019392765697500618, "loss": 1.3784, "step": 2352 }, { "epoch": 0.030576161834049927, "grad_norm": 0.38084301352500916, "learning_rate": 0.0001939250575130948, "loss": 1.466, "step": 2353 }, { "epoch": 0.0305891563779658, "grad_norm": 0.3898766040802002, "learning_rate": 0.0001939224580511834, "loss": 1.4877, "step": 2354 }, { "epoch": 0.030602150921881673, "grad_norm": 0.5616552233695984, "learning_rate": 0.00019391985858927205, "loss": 1.7029, "step": 2355 }, { "epoch": 0.03061514546579755, "grad_norm": 0.3264707028865814, "learning_rate": 0.00019391725912736065, "loss": 1.282, "step": 2356 }, { "epoch": 0.030628140009713423, "grad_norm": 0.41132742166519165, "learning_rate": 0.00019391465966544927, "loss": 1.6009, "step": 2357 }, { "epoch": 0.030641134553629296, "grad_norm": 0.3672133982181549, "learning_rate": 0.00019391206020353787, "loss": 1.4757, "step": 2358 }, { "epoch": 0.03065412909754517, "grad_norm": 0.400573194026947, "learning_rate": 0.0001939094607416265, "loss": 1.4037, "step": 2359 }, { "epoch": 0.03066712364146104, "grad_norm": 0.3660773038864136, "learning_rate": 0.00019390686127971512, "loss": 1.4139, "step": 2360 }, { "epoch": 0.030680118185376914, "grad_norm": 0.38157641887664795, "learning_rate": 0.00019390426181780372, "loss": 1.4627, "step": 2361 }, { "epoch": 0.030693112729292787, "grad_norm": 0.4337979853153229, "learning_rate": 0.00019390166235589234, "loss": 1.6154, "step": 2362 }, { "epoch": 0.03070610727320866, "grad_norm": 0.3866462707519531, "learning_rate": 0.00019389906289398097, "loss": 1.5673, "step": 2363 }, { "epoch": 0.030719101817124533, "grad_norm": 0.39175018668174744, "learning_rate": 0.00019389646343206956, "loss": 1.507, "step": 2364 }, { "epoch": 0.03073209636104041, "grad_norm": 0.36119142174720764, "learning_rate": 0.0001938938639701582, "loss": 1.6573, "step": 2365 }, { "epoch": 0.030745090904956283, "grad_norm": 0.40435540676116943, "learning_rate": 0.00019389126450824678, "loss": 1.4668, "step": 2366 }, { "epoch": 0.030758085448872156, "grad_norm": 0.4037638306617737, "learning_rate": 0.00019388866504633544, "loss": 1.1692, "step": 2367 }, { "epoch": 0.03077107999278803, "grad_norm": 0.4345413148403168, "learning_rate": 0.00019388606558442403, "loss": 1.5138, "step": 2368 }, { "epoch": 0.0307840745367039, "grad_norm": 0.4004787802696228, "learning_rate": 0.00019388346612251266, "loss": 1.6534, "step": 2369 }, { "epoch": 0.030797069080619775, "grad_norm": 0.3963693678379059, "learning_rate": 0.00019388086666060126, "loss": 1.4003, "step": 2370 }, { "epoch": 0.030810063624535648, "grad_norm": 0.3286566138267517, "learning_rate": 0.00019387826719868988, "loss": 1.5337, "step": 2371 }, { "epoch": 0.03082305816845152, "grad_norm": 0.31235527992248535, "learning_rate": 0.0001938756677367785, "loss": 1.3039, "step": 2372 }, { "epoch": 0.030836052712367393, "grad_norm": 0.39822515845298767, "learning_rate": 0.0001938730682748671, "loss": 1.3909, "step": 2373 }, { "epoch": 0.030849047256283266, "grad_norm": 0.4359123706817627, "learning_rate": 0.00019387046881295573, "loss": 1.4659, "step": 2374 }, { "epoch": 0.030862041800199143, "grad_norm": 0.34830644726753235, "learning_rate": 0.00019386786935104435, "loss": 1.4766, "step": 2375 }, { "epoch": 0.030875036344115016, "grad_norm": 0.42656728625297546, "learning_rate": 0.00019386526988913295, "loss": 1.4657, "step": 2376 }, { "epoch": 0.03088803088803089, "grad_norm": 0.4401993155479431, "learning_rate": 0.00019386267042722157, "loss": 1.4028, "step": 2377 }, { "epoch": 0.03090102543194676, "grad_norm": 0.37953370809555054, "learning_rate": 0.00019386007096531017, "loss": 1.3901, "step": 2378 }, { "epoch": 0.030914019975862635, "grad_norm": 0.47079914808273315, "learning_rate": 0.00019385747150339882, "loss": 1.5176, "step": 2379 }, { "epoch": 0.030927014519778508, "grad_norm": 0.39718374609947205, "learning_rate": 0.00019385487204148742, "loss": 1.4507, "step": 2380 }, { "epoch": 0.03094000906369438, "grad_norm": 0.42498964071273804, "learning_rate": 0.00019385227257957604, "loss": 1.6366, "step": 2381 }, { "epoch": 0.030953003607610254, "grad_norm": 0.4166160821914673, "learning_rate": 0.00019384967311766464, "loss": 1.6349, "step": 2382 }, { "epoch": 0.030965998151526127, "grad_norm": 0.3568093180656433, "learning_rate": 0.00019384707365575327, "loss": 1.4379, "step": 2383 }, { "epoch": 0.030978992695442003, "grad_norm": 0.41537830233573914, "learning_rate": 0.0001938444741938419, "loss": 1.5079, "step": 2384 }, { "epoch": 0.030991987239357876, "grad_norm": 0.32123616337776184, "learning_rate": 0.0001938418747319305, "loss": 1.3794, "step": 2385 }, { "epoch": 0.03100498178327375, "grad_norm": 0.3980812132358551, "learning_rate": 0.0001938392752700191, "loss": 1.4997, "step": 2386 }, { "epoch": 0.031017976327189622, "grad_norm": 0.359386682510376, "learning_rate": 0.00019383667580810774, "loss": 1.5401, "step": 2387 }, { "epoch": 0.031030970871105495, "grad_norm": 0.30126532912254333, "learning_rate": 0.00019383407634619633, "loss": 1.4162, "step": 2388 }, { "epoch": 0.031043965415021368, "grad_norm": 0.2829444408416748, "learning_rate": 0.00019383147688428496, "loss": 1.5548, "step": 2389 }, { "epoch": 0.03105695995893724, "grad_norm": 0.49797871708869934, "learning_rate": 0.00019382887742237356, "loss": 1.5038, "step": 2390 }, { "epoch": 0.031069954502853114, "grad_norm": 0.3714198172092438, "learning_rate": 0.0001938262779604622, "loss": 1.4442, "step": 2391 }, { "epoch": 0.031082949046768987, "grad_norm": 0.3066392242908478, "learning_rate": 0.0001938236784985508, "loss": 1.4485, "step": 2392 }, { "epoch": 0.03109594359068486, "grad_norm": 0.4711465835571289, "learning_rate": 0.00019382107903663943, "loss": 1.4427, "step": 2393 }, { "epoch": 0.031108938134600736, "grad_norm": 0.4103511869907379, "learning_rate": 0.00019381847957472805, "loss": 1.653, "step": 2394 }, { "epoch": 0.03112193267851661, "grad_norm": 0.3232939541339874, "learning_rate": 0.00019381588011281665, "loss": 1.4108, "step": 2395 }, { "epoch": 0.031134927222432482, "grad_norm": 0.3427291214466095, "learning_rate": 0.00019381328065090528, "loss": 1.5509, "step": 2396 }, { "epoch": 0.031147921766348355, "grad_norm": 0.36916622519493103, "learning_rate": 0.00019381068118899387, "loss": 1.5009, "step": 2397 }, { "epoch": 0.031160916310264228, "grad_norm": 0.38322317600250244, "learning_rate": 0.00019380808172708252, "loss": 1.36, "step": 2398 }, { "epoch": 0.0311739108541801, "grad_norm": 0.5019955039024353, "learning_rate": 0.00019380548226517112, "loss": 1.4435, "step": 2399 }, { "epoch": 0.031186905398095974, "grad_norm": 0.4351155161857605, "learning_rate": 0.00019380288280325975, "loss": 1.5429, "step": 2400 }, { "epoch": 0.031199899942011847, "grad_norm": 0.3205655813217163, "learning_rate": 0.00019380028334134834, "loss": 1.558, "step": 2401 }, { "epoch": 0.03121289448592772, "grad_norm": 0.4249109625816345, "learning_rate": 0.00019379768387943697, "loss": 1.3846, "step": 2402 }, { "epoch": 0.031225889029843596, "grad_norm": 0.43097853660583496, "learning_rate": 0.0001937950844175256, "loss": 1.73, "step": 2403 }, { "epoch": 0.03123888357375947, "grad_norm": 0.3964199125766754, "learning_rate": 0.0001937924849556142, "loss": 1.4712, "step": 2404 }, { "epoch": 0.03125187811767534, "grad_norm": 0.40434059500694275, "learning_rate": 0.00019378988549370281, "loss": 1.4685, "step": 2405 }, { "epoch": 0.03126487266159121, "grad_norm": 0.330955445766449, "learning_rate": 0.00019378728603179144, "loss": 1.3936, "step": 2406 }, { "epoch": 0.031277867205507084, "grad_norm": 0.36570027470588684, "learning_rate": 0.00019378468656988004, "loss": 1.5513, "step": 2407 }, { "epoch": 0.031290861749422964, "grad_norm": 0.31739145517349243, "learning_rate": 0.00019378208710796866, "loss": 1.5095, "step": 2408 }, { "epoch": 0.03130385629333884, "grad_norm": 0.3888550102710724, "learning_rate": 0.00019377948764605726, "loss": 1.3702, "step": 2409 }, { "epoch": 0.03131685083725471, "grad_norm": 0.3065565824508667, "learning_rate": 0.0001937768881841459, "loss": 1.3797, "step": 2410 }, { "epoch": 0.03132984538117058, "grad_norm": 0.2960432767868042, "learning_rate": 0.0001937742887222345, "loss": 1.3027, "step": 2411 }, { "epoch": 0.031342839925086456, "grad_norm": 0.3159444034099579, "learning_rate": 0.00019377168926032313, "loss": 1.3071, "step": 2412 }, { "epoch": 0.03135583446900233, "grad_norm": 0.41638821363449097, "learning_rate": 0.00019376908979841173, "loss": 1.5492, "step": 2413 }, { "epoch": 0.0313688290129182, "grad_norm": 0.42141085863113403, "learning_rate": 0.00019376649033650035, "loss": 1.4054, "step": 2414 }, { "epoch": 0.031381823556834075, "grad_norm": 0.43707937002182007, "learning_rate": 0.00019376389087458898, "loss": 1.4755, "step": 2415 }, { "epoch": 0.03139481810074995, "grad_norm": 0.3669463098049164, "learning_rate": 0.00019376129141267758, "loss": 1.4415, "step": 2416 }, { "epoch": 0.03140781264466582, "grad_norm": 0.3679063022136688, "learning_rate": 0.0001937586919507662, "loss": 1.4681, "step": 2417 }, { "epoch": 0.031420807188581694, "grad_norm": 0.33323216438293457, "learning_rate": 0.00019375609248885482, "loss": 1.5661, "step": 2418 }, { "epoch": 0.03143380173249757, "grad_norm": 0.3823724687099457, "learning_rate": 0.00019375349302694342, "loss": 1.4424, "step": 2419 }, { "epoch": 0.03144679627641344, "grad_norm": 0.37714922428131104, "learning_rate": 0.00019375089356503205, "loss": 1.5948, "step": 2420 }, { "epoch": 0.03145979082032931, "grad_norm": 0.36402538418769836, "learning_rate": 0.00019374829410312064, "loss": 1.4183, "step": 2421 }, { "epoch": 0.031472785364245186, "grad_norm": 0.5110093355178833, "learning_rate": 0.0001937456946412093, "loss": 1.4378, "step": 2422 }, { "epoch": 0.03148577990816106, "grad_norm": 0.3924713730812073, "learning_rate": 0.0001937430951792979, "loss": 1.5022, "step": 2423 }, { "epoch": 0.03149877445207693, "grad_norm": 0.3390837013721466, "learning_rate": 0.00019374049571738652, "loss": 1.3778, "step": 2424 }, { "epoch": 0.031511768995992805, "grad_norm": 0.38642051815986633, "learning_rate": 0.00019373789625547511, "loss": 1.4276, "step": 2425 }, { "epoch": 0.03152476353990868, "grad_norm": 0.36511752009391785, "learning_rate": 0.00019373529679356374, "loss": 1.2479, "step": 2426 }, { "epoch": 0.03153775808382456, "grad_norm": 0.4753246009349823, "learning_rate": 0.00019373269733165236, "loss": 1.4581, "step": 2427 }, { "epoch": 0.03155075262774043, "grad_norm": 0.507246732711792, "learning_rate": 0.00019373009786974096, "loss": 1.4634, "step": 2428 }, { "epoch": 0.0315637471716563, "grad_norm": 0.4189542829990387, "learning_rate": 0.0001937274984078296, "loss": 1.5611, "step": 2429 }, { "epoch": 0.031576741715572176, "grad_norm": 0.5271665453910828, "learning_rate": 0.0001937248989459182, "loss": 1.3618, "step": 2430 }, { "epoch": 0.03158973625948805, "grad_norm": 0.30904582142829895, "learning_rate": 0.0001937222994840068, "loss": 1.3887, "step": 2431 }, { "epoch": 0.03160273080340392, "grad_norm": 0.29169711470603943, "learning_rate": 0.00019371970002209543, "loss": 1.53, "step": 2432 }, { "epoch": 0.031615725347319795, "grad_norm": 0.36168599128723145, "learning_rate": 0.00019371710056018406, "loss": 1.3156, "step": 2433 }, { "epoch": 0.03162871989123567, "grad_norm": 0.3162071108818054, "learning_rate": 0.00019371450109827268, "loss": 1.4583, "step": 2434 }, { "epoch": 0.03164171443515154, "grad_norm": 0.24736791849136353, "learning_rate": 0.00019371190163636128, "loss": 1.3349, "step": 2435 }, { "epoch": 0.031654708979067414, "grad_norm": 0.2598811984062195, "learning_rate": 0.0001937093021744499, "loss": 1.2131, "step": 2436 }, { "epoch": 0.03166770352298329, "grad_norm": 0.45681071281433105, "learning_rate": 0.00019370670271253853, "loss": 1.5545, "step": 2437 }, { "epoch": 0.03168069806689916, "grad_norm": 0.3820514678955078, "learning_rate": 0.00019370410325062712, "loss": 1.3836, "step": 2438 }, { "epoch": 0.03169369261081503, "grad_norm": 0.39064905047416687, "learning_rate": 0.00019370150378871575, "loss": 1.4368, "step": 2439 }, { "epoch": 0.031706687154730906, "grad_norm": 0.4174213707447052, "learning_rate": 0.00019369890432680435, "loss": 1.587, "step": 2440 }, { "epoch": 0.03171968169864678, "grad_norm": 0.3780830204486847, "learning_rate": 0.000193696304864893, "loss": 1.7156, "step": 2441 }, { "epoch": 0.03173267624256265, "grad_norm": 0.29549428820610046, "learning_rate": 0.0001936937054029816, "loss": 1.4188, "step": 2442 }, { "epoch": 0.031745670786478525, "grad_norm": 0.30807724595069885, "learning_rate": 0.0001936911059410702, "loss": 1.4814, "step": 2443 }, { "epoch": 0.0317586653303944, "grad_norm": 0.38823434710502625, "learning_rate": 0.00019368850647915882, "loss": 1.5692, "step": 2444 }, { "epoch": 0.03177165987431027, "grad_norm": 0.3719289004802704, "learning_rate": 0.00019368590701724744, "loss": 1.4046, "step": 2445 }, { "epoch": 0.03178465441822615, "grad_norm": 0.4645630419254303, "learning_rate": 0.00019368330755533607, "loss": 1.4787, "step": 2446 }, { "epoch": 0.031797648962142024, "grad_norm": 0.32714515924453735, "learning_rate": 0.00019368070809342466, "loss": 1.4412, "step": 2447 }, { "epoch": 0.0318106435060579, "grad_norm": 0.3628447353839874, "learning_rate": 0.0001936781086315133, "loss": 1.4802, "step": 2448 }, { "epoch": 0.03182363804997377, "grad_norm": 0.2753009498119354, "learning_rate": 0.0001936755091696019, "loss": 1.5107, "step": 2449 }, { "epoch": 0.03183663259388964, "grad_norm": 0.36268672347068787, "learning_rate": 0.0001936729097076905, "loss": 1.5527, "step": 2450 }, { "epoch": 0.031849627137805515, "grad_norm": 0.4151109457015991, "learning_rate": 0.00019367031024577913, "loss": 1.2486, "step": 2451 }, { "epoch": 0.03186262168172139, "grad_norm": 0.36598706245422363, "learning_rate": 0.00019366771078386773, "loss": 1.6202, "step": 2452 }, { "epoch": 0.03187561622563726, "grad_norm": 0.47833046317100525, "learning_rate": 0.00019366511132195638, "loss": 1.5974, "step": 2453 }, { "epoch": 0.031888610769553134, "grad_norm": 0.392170786857605, "learning_rate": 0.00019366251186004498, "loss": 1.3854, "step": 2454 }, { "epoch": 0.03190160531346901, "grad_norm": 0.4662458002567291, "learning_rate": 0.0001936599123981336, "loss": 1.6315, "step": 2455 }, { "epoch": 0.03191459985738488, "grad_norm": 0.39337852597236633, "learning_rate": 0.0001936573129362222, "loss": 1.5138, "step": 2456 }, { "epoch": 0.03192759440130075, "grad_norm": 0.3348299562931061, "learning_rate": 0.00019365471347431083, "loss": 1.4841, "step": 2457 }, { "epoch": 0.031940588945216626, "grad_norm": 1.329984188079834, "learning_rate": 0.00019365211401239945, "loss": 1.2603, "step": 2458 }, { "epoch": 0.0319535834891325, "grad_norm": 0.41095101833343506, "learning_rate": 0.00019364951455048805, "loss": 1.5679, "step": 2459 }, { "epoch": 0.03196657803304837, "grad_norm": 0.42867493629455566, "learning_rate": 0.00019364691508857667, "loss": 1.3936, "step": 2460 }, { "epoch": 0.031979572576964245, "grad_norm": 0.4014851152896881, "learning_rate": 0.0001936443156266653, "loss": 1.5077, "step": 2461 }, { "epoch": 0.03199256712088012, "grad_norm": 0.32092902064323425, "learning_rate": 0.0001936417161647539, "loss": 1.39, "step": 2462 }, { "epoch": 0.03200556166479599, "grad_norm": 0.31203359365463257, "learning_rate": 0.00019363911670284252, "loss": 1.4216, "step": 2463 }, { "epoch": 0.032018556208711864, "grad_norm": 0.4010063707828522, "learning_rate": 0.00019363651724093112, "loss": 1.5438, "step": 2464 }, { "epoch": 0.032031550752627744, "grad_norm": 0.45139941573143005, "learning_rate": 0.00019363391777901977, "loss": 1.5588, "step": 2465 }, { "epoch": 0.03204454529654362, "grad_norm": 0.3555641770362854, "learning_rate": 0.00019363131831710837, "loss": 1.489, "step": 2466 }, { "epoch": 0.03205753984045949, "grad_norm": 0.37799158692359924, "learning_rate": 0.000193628718855197, "loss": 1.3104, "step": 2467 }, { "epoch": 0.03207053438437536, "grad_norm": 0.32376259565353394, "learning_rate": 0.00019362611939328561, "loss": 1.299, "step": 2468 }, { "epoch": 0.032083528928291236, "grad_norm": 0.2891981303691864, "learning_rate": 0.0001936235199313742, "loss": 1.2869, "step": 2469 }, { "epoch": 0.03209652347220711, "grad_norm": 0.30140420794487, "learning_rate": 0.00019362092046946284, "loss": 1.2806, "step": 2470 }, { "epoch": 0.03210951801612298, "grad_norm": 0.3630262017250061, "learning_rate": 0.00019361832100755143, "loss": 1.4137, "step": 2471 }, { "epoch": 0.032122512560038854, "grad_norm": 0.3762016296386719, "learning_rate": 0.00019361572154564006, "loss": 1.4943, "step": 2472 }, { "epoch": 0.03213550710395473, "grad_norm": 0.34979432821273804, "learning_rate": 0.00019361312208372868, "loss": 1.2977, "step": 2473 }, { "epoch": 0.0321485016478706, "grad_norm": 0.3361506462097168, "learning_rate": 0.00019361052262181728, "loss": 1.267, "step": 2474 }, { "epoch": 0.03216149619178647, "grad_norm": 0.38225317001342773, "learning_rate": 0.0001936079231599059, "loss": 1.4333, "step": 2475 }, { "epoch": 0.032174490735702346, "grad_norm": 0.4617854058742523, "learning_rate": 0.00019360532369799453, "loss": 1.5878, "step": 2476 }, { "epoch": 0.03218748527961822, "grad_norm": 0.3995307385921478, "learning_rate": 0.00019360272423608315, "loss": 1.5246, "step": 2477 }, { "epoch": 0.03220047982353409, "grad_norm": 0.37244245409965515, "learning_rate": 0.00019360012477417175, "loss": 1.2833, "step": 2478 }, { "epoch": 0.032213474367449965, "grad_norm": 0.30733734369277954, "learning_rate": 0.00019359752531226038, "loss": 1.3623, "step": 2479 }, { "epoch": 0.03222646891136584, "grad_norm": 0.4627993404865265, "learning_rate": 0.000193594925850349, "loss": 1.4876, "step": 2480 }, { "epoch": 0.03223946345528171, "grad_norm": 0.33499887585639954, "learning_rate": 0.0001935923263884376, "loss": 1.5466, "step": 2481 }, { "epoch": 0.032252457999197584, "grad_norm": 0.3215530514717102, "learning_rate": 0.00019358972692652622, "loss": 1.4848, "step": 2482 }, { "epoch": 0.03226545254311346, "grad_norm": 0.3846237361431122, "learning_rate": 0.00019358712746461482, "loss": 1.4881, "step": 2483 }, { "epoch": 0.03227844708702934, "grad_norm": 0.40432724356651306, "learning_rate": 0.00019358452800270347, "loss": 1.6301, "step": 2484 }, { "epoch": 0.03229144163094521, "grad_norm": 0.4204750955104828, "learning_rate": 0.00019358192854079207, "loss": 1.5951, "step": 2485 }, { "epoch": 0.03230443617486108, "grad_norm": 0.33970895409584045, "learning_rate": 0.00019357932907888067, "loss": 1.3571, "step": 2486 }, { "epoch": 0.032317430718776956, "grad_norm": 0.3833889067173004, "learning_rate": 0.0001935767296169693, "loss": 1.6484, "step": 2487 }, { "epoch": 0.03233042526269283, "grad_norm": 0.5447184443473816, "learning_rate": 0.00019357413015505791, "loss": 1.5994, "step": 2488 }, { "epoch": 0.0323434198066087, "grad_norm": 0.39822641015052795, "learning_rate": 0.00019357153069314654, "loss": 1.3907, "step": 2489 }, { "epoch": 0.032356414350524575, "grad_norm": 0.36578643321990967, "learning_rate": 0.00019356893123123514, "loss": 1.3455, "step": 2490 }, { "epoch": 0.03236940889444045, "grad_norm": 0.31123805046081543, "learning_rate": 0.00019356633176932376, "loss": 1.289, "step": 2491 }, { "epoch": 0.03238240343835632, "grad_norm": 0.6136845350265503, "learning_rate": 0.00019356373230741239, "loss": 1.639, "step": 2492 }, { "epoch": 0.032395397982272194, "grad_norm": 0.32089829444885254, "learning_rate": 0.00019356113284550098, "loss": 1.3375, "step": 2493 }, { "epoch": 0.032408392526188066, "grad_norm": 0.3012245297431946, "learning_rate": 0.0001935585333835896, "loss": 1.5459, "step": 2494 }, { "epoch": 0.03242138707010394, "grad_norm": 0.3286607265472412, "learning_rate": 0.0001935559339216782, "loss": 1.4914, "step": 2495 }, { "epoch": 0.03243438161401981, "grad_norm": 0.2875760793685913, "learning_rate": 0.00019355333445976686, "loss": 1.3503, "step": 2496 }, { "epoch": 0.032447376157935685, "grad_norm": 0.3188101053237915, "learning_rate": 0.00019355073499785545, "loss": 1.3547, "step": 2497 }, { "epoch": 0.03246037070185156, "grad_norm": 0.3665832579135895, "learning_rate": 0.00019354813553594405, "loss": 1.2182, "step": 2498 }, { "epoch": 0.03247336524576743, "grad_norm": 0.3593955934047699, "learning_rate": 0.00019354553607403268, "loss": 1.3949, "step": 2499 }, { "epoch": 0.032486359789683304, "grad_norm": 0.4981686472892761, "learning_rate": 0.0001935429366121213, "loss": 1.4597, "step": 2500 }, { "epoch": 0.03249935433359918, "grad_norm": 0.29916468262672424, "learning_rate": 0.00019354033715020992, "loss": 1.4686, "step": 2501 }, { "epoch": 0.03251234887751505, "grad_norm": 0.5272825956344604, "learning_rate": 0.00019353773768829852, "loss": 1.478, "step": 2502 }, { "epoch": 0.03252534342143093, "grad_norm": 0.39652758836746216, "learning_rate": 0.00019353513822638715, "loss": 1.4928, "step": 2503 }, { "epoch": 0.0325383379653468, "grad_norm": 0.43363118171691895, "learning_rate": 0.00019353253876447577, "loss": 1.6528, "step": 2504 }, { "epoch": 0.032551332509262676, "grad_norm": 0.374214768409729, "learning_rate": 0.00019352993930256437, "loss": 1.5646, "step": 2505 }, { "epoch": 0.03256432705317855, "grad_norm": 0.2884669303894043, "learning_rate": 0.000193527339840653, "loss": 1.4726, "step": 2506 }, { "epoch": 0.03257732159709442, "grad_norm": 0.2782253324985504, "learning_rate": 0.00019352474037874162, "loss": 1.4378, "step": 2507 }, { "epoch": 0.032590316141010295, "grad_norm": 0.4006049633026123, "learning_rate": 0.00019352214091683024, "loss": 1.6467, "step": 2508 }, { "epoch": 0.03260331068492617, "grad_norm": 0.38108521699905396, "learning_rate": 0.00019351954145491884, "loss": 1.5763, "step": 2509 }, { "epoch": 0.03261630522884204, "grad_norm": 0.3799011707305908, "learning_rate": 0.00019351694199300744, "loss": 1.4375, "step": 2510 }, { "epoch": 0.032629299772757914, "grad_norm": 0.3035220205783844, "learning_rate": 0.0001935143425310961, "loss": 1.5254, "step": 2511 }, { "epoch": 0.03264229431667379, "grad_norm": 0.36107808351516724, "learning_rate": 0.00019351174306918469, "loss": 1.561, "step": 2512 }, { "epoch": 0.03265528886058966, "grad_norm": 0.42562344670295715, "learning_rate": 0.0001935091436072733, "loss": 1.4518, "step": 2513 }, { "epoch": 0.03266828340450553, "grad_norm": 0.3229316473007202, "learning_rate": 0.0001935065441453619, "loss": 1.5941, "step": 2514 }, { "epoch": 0.032681277948421406, "grad_norm": 0.27872514724731445, "learning_rate": 0.00019350394468345053, "loss": 1.1342, "step": 2515 }, { "epoch": 0.03269427249233728, "grad_norm": 0.4430004060268402, "learning_rate": 0.00019350134522153916, "loss": 1.568, "step": 2516 }, { "epoch": 0.03270726703625315, "grad_norm": 0.438882440328598, "learning_rate": 0.00019349874575962775, "loss": 1.5194, "step": 2517 }, { "epoch": 0.032720261580169024, "grad_norm": 0.34737488627433777, "learning_rate": 0.00019349614629771638, "loss": 1.478, "step": 2518 }, { "epoch": 0.0327332561240849, "grad_norm": 0.32929858565330505, "learning_rate": 0.000193493546835805, "loss": 1.4294, "step": 2519 }, { "epoch": 0.03274625066800077, "grad_norm": 0.3582665026187897, "learning_rate": 0.00019349094737389363, "loss": 1.2878, "step": 2520 }, { "epoch": 0.03275924521191664, "grad_norm": 0.4200032949447632, "learning_rate": 0.00019348834791198222, "loss": 1.6029, "step": 2521 }, { "epoch": 0.03277223975583252, "grad_norm": 0.31368643045425415, "learning_rate": 0.00019348574845007085, "loss": 1.4605, "step": 2522 }, { "epoch": 0.032785234299748396, "grad_norm": 0.3194805085659027, "learning_rate": 0.00019348314898815947, "loss": 1.3755, "step": 2523 }, { "epoch": 0.03279822884366427, "grad_norm": 0.2415430098772049, "learning_rate": 0.00019348054952624807, "loss": 1.3457, "step": 2524 }, { "epoch": 0.03281122338758014, "grad_norm": 0.3878587484359741, "learning_rate": 0.0001934779500643367, "loss": 1.4192, "step": 2525 }, { "epoch": 0.032824217931496015, "grad_norm": 0.3380589485168457, "learning_rate": 0.0001934753506024253, "loss": 1.5376, "step": 2526 }, { "epoch": 0.03283721247541189, "grad_norm": 0.3906152546405792, "learning_rate": 0.00019347275114051392, "loss": 1.4204, "step": 2527 }, { "epoch": 0.03285020701932776, "grad_norm": 0.41287320852279663, "learning_rate": 0.00019347015167860254, "loss": 1.3862, "step": 2528 }, { "epoch": 0.032863201563243634, "grad_norm": 0.27187255024909973, "learning_rate": 0.00019346755221669114, "loss": 1.282, "step": 2529 }, { "epoch": 0.03287619610715951, "grad_norm": 0.41603827476501465, "learning_rate": 0.00019346495275477976, "loss": 1.6202, "step": 2530 }, { "epoch": 0.03288919065107538, "grad_norm": 0.2614023685455322, "learning_rate": 0.0001934623532928684, "loss": 1.32, "step": 2531 }, { "epoch": 0.03290218519499125, "grad_norm": 0.3348638117313385, "learning_rate": 0.000193459753830957, "loss": 1.349, "step": 2532 }, { "epoch": 0.032915179738907126, "grad_norm": 0.42554065585136414, "learning_rate": 0.0001934571543690456, "loss": 1.4376, "step": 2533 }, { "epoch": 0.032928174282823, "grad_norm": 0.4203813970088959, "learning_rate": 0.00019345455490713423, "loss": 1.5713, "step": 2534 }, { "epoch": 0.03294116882673887, "grad_norm": 0.30855053663253784, "learning_rate": 0.00019345195544522286, "loss": 1.3682, "step": 2535 }, { "epoch": 0.032954163370654745, "grad_norm": 0.3360995650291443, "learning_rate": 0.00019344935598331146, "loss": 1.3917, "step": 2536 }, { "epoch": 0.03296715791457062, "grad_norm": 0.34986838698387146, "learning_rate": 0.00019344675652140008, "loss": 1.3935, "step": 2537 }, { "epoch": 0.03298015245848649, "grad_norm": 0.3301599621772766, "learning_rate": 0.00019344415705948868, "loss": 1.4279, "step": 2538 }, { "epoch": 0.032993147002402363, "grad_norm": 0.28528234362602234, "learning_rate": 0.00019344155759757733, "loss": 1.2924, "step": 2539 }, { "epoch": 0.033006141546318236, "grad_norm": 0.404764324426651, "learning_rate": 0.00019343895813566593, "loss": 1.4343, "step": 2540 }, { "epoch": 0.033019136090234116, "grad_norm": 0.3436245620250702, "learning_rate": 0.00019343635867375452, "loss": 1.2012, "step": 2541 }, { "epoch": 0.03303213063414999, "grad_norm": 0.28586044907569885, "learning_rate": 0.00019343375921184318, "loss": 1.4012, "step": 2542 }, { "epoch": 0.03304512517806586, "grad_norm": 0.3174670338630676, "learning_rate": 0.00019343115974993177, "loss": 1.1817, "step": 2543 }, { "epoch": 0.033058119721981735, "grad_norm": 0.4420662224292755, "learning_rate": 0.0001934285602880204, "loss": 1.407, "step": 2544 }, { "epoch": 0.03307111426589761, "grad_norm": 0.4695225954055786, "learning_rate": 0.000193425960826109, "loss": 1.5113, "step": 2545 }, { "epoch": 0.03308410880981348, "grad_norm": 0.3479248583316803, "learning_rate": 0.00019342336136419762, "loss": 1.4837, "step": 2546 }, { "epoch": 0.033097103353729354, "grad_norm": 0.42536017298698425, "learning_rate": 0.00019342076190228624, "loss": 1.4728, "step": 2547 }, { "epoch": 0.03311009789764523, "grad_norm": 0.3892519474029541, "learning_rate": 0.00019341816244037484, "loss": 1.5892, "step": 2548 }, { "epoch": 0.0331230924415611, "grad_norm": 0.5114793181419373, "learning_rate": 0.00019341556297846347, "loss": 1.5139, "step": 2549 }, { "epoch": 0.03313608698547697, "grad_norm": 0.34405434131622314, "learning_rate": 0.0001934129635165521, "loss": 1.3389, "step": 2550 }, { "epoch": 0.033149081529392846, "grad_norm": 0.38240694999694824, "learning_rate": 0.00019341036405464072, "loss": 1.437, "step": 2551 }, { "epoch": 0.03316207607330872, "grad_norm": 0.35863661766052246, "learning_rate": 0.0001934077645927293, "loss": 1.4334, "step": 2552 }, { "epoch": 0.03317507061722459, "grad_norm": 0.3211493492126465, "learning_rate": 0.0001934051651308179, "loss": 1.2768, "step": 2553 }, { "epoch": 0.033188065161140465, "grad_norm": 0.3290875256061554, "learning_rate": 0.00019340256566890656, "loss": 1.1401, "step": 2554 }, { "epoch": 0.03320105970505634, "grad_norm": 0.3791804611682892, "learning_rate": 0.00019339996620699516, "loss": 1.4081, "step": 2555 }, { "epoch": 0.03321405424897221, "grad_norm": 0.2770865261554718, "learning_rate": 0.00019339736674508378, "loss": 1.3397, "step": 2556 }, { "epoch": 0.033227048792888084, "grad_norm": 0.3526403307914734, "learning_rate": 0.00019339476728317238, "loss": 1.388, "step": 2557 }, { "epoch": 0.03324004333680396, "grad_norm": 0.37430208921432495, "learning_rate": 0.000193392167821261, "loss": 1.5212, "step": 2558 }, { "epoch": 0.03325303788071983, "grad_norm": 0.3494778275489807, "learning_rate": 0.00019338956835934963, "loss": 1.3671, "step": 2559 }, { "epoch": 0.03326603242463571, "grad_norm": 0.4079028069972992, "learning_rate": 0.00019338696889743823, "loss": 1.402, "step": 2560 }, { "epoch": 0.03327902696855158, "grad_norm": 0.46199265122413635, "learning_rate": 0.00019338436943552685, "loss": 1.4798, "step": 2561 }, { "epoch": 0.033292021512467455, "grad_norm": 0.32505708932876587, "learning_rate": 0.00019338176997361548, "loss": 1.4432, "step": 2562 }, { "epoch": 0.03330501605638333, "grad_norm": 0.3568376302719116, "learning_rate": 0.0001933791705117041, "loss": 1.3947, "step": 2563 }, { "epoch": 0.0333180106002992, "grad_norm": 0.3708251118659973, "learning_rate": 0.0001933765710497927, "loss": 1.2981, "step": 2564 }, { "epoch": 0.033331005144215074, "grad_norm": 0.39222443103790283, "learning_rate": 0.0001933739715878813, "loss": 1.3699, "step": 2565 }, { "epoch": 0.03334399968813095, "grad_norm": 0.2827359139919281, "learning_rate": 0.00019337137212596995, "loss": 1.3626, "step": 2566 }, { "epoch": 0.03335699423204682, "grad_norm": 0.3819025158882141, "learning_rate": 0.00019336877266405854, "loss": 1.3338, "step": 2567 }, { "epoch": 0.03336998877596269, "grad_norm": 0.38024330139160156, "learning_rate": 0.00019336617320214717, "loss": 1.4123, "step": 2568 }, { "epoch": 0.033382983319878566, "grad_norm": 0.3383539915084839, "learning_rate": 0.00019336357374023577, "loss": 1.391, "step": 2569 }, { "epoch": 0.03339597786379444, "grad_norm": 0.2682119905948639, "learning_rate": 0.0001933609742783244, "loss": 1.575, "step": 2570 }, { "epoch": 0.03340897240771031, "grad_norm": 0.40894395112991333, "learning_rate": 0.00019335837481641302, "loss": 1.3491, "step": 2571 }, { "epoch": 0.033421966951626185, "grad_norm": 0.4955017864704132, "learning_rate": 0.0001933557753545016, "loss": 1.3903, "step": 2572 }, { "epoch": 0.03343496149554206, "grad_norm": 0.40897059440612793, "learning_rate": 0.00019335317589259024, "loss": 1.2006, "step": 2573 }, { "epoch": 0.03344795603945793, "grad_norm": 0.31796297430992126, "learning_rate": 0.00019335057643067886, "loss": 1.2861, "step": 2574 }, { "epoch": 0.033460950583373804, "grad_norm": 0.3871162235736847, "learning_rate": 0.00019334797696876749, "loss": 1.5619, "step": 2575 }, { "epoch": 0.03347394512728968, "grad_norm": 0.33858761191368103, "learning_rate": 0.00019334537750685608, "loss": 1.4107, "step": 2576 }, { "epoch": 0.03348693967120555, "grad_norm": 0.35653814673423767, "learning_rate": 0.0001933427780449447, "loss": 1.6332, "step": 2577 }, { "epoch": 0.03349993421512142, "grad_norm": 0.3362171947956085, "learning_rate": 0.00019334017858303333, "loss": 1.425, "step": 2578 }, { "epoch": 0.0335129287590373, "grad_norm": 0.4000280797481537, "learning_rate": 0.00019333757912112193, "loss": 1.3239, "step": 2579 }, { "epoch": 0.033525923302953176, "grad_norm": 0.42451217770576477, "learning_rate": 0.00019333497965921055, "loss": 1.5075, "step": 2580 }, { "epoch": 0.03353891784686905, "grad_norm": 0.3276481032371521, "learning_rate": 0.00019333238019729918, "loss": 1.2976, "step": 2581 }, { "epoch": 0.03355191239078492, "grad_norm": 0.4846782088279724, "learning_rate": 0.00019332978073538778, "loss": 1.5055, "step": 2582 }, { "epoch": 0.033564906934700794, "grad_norm": 0.31280940771102905, "learning_rate": 0.0001933271812734764, "loss": 1.3257, "step": 2583 }, { "epoch": 0.03357790147861667, "grad_norm": 0.3765902817249298, "learning_rate": 0.000193324581811565, "loss": 1.496, "step": 2584 }, { "epoch": 0.03359089602253254, "grad_norm": 0.3513646721839905, "learning_rate": 0.00019332198234965365, "loss": 1.4519, "step": 2585 }, { "epoch": 0.03360389056644841, "grad_norm": 0.3542924225330353, "learning_rate": 0.00019331938288774225, "loss": 1.4657, "step": 2586 }, { "epoch": 0.033616885110364286, "grad_norm": 0.27782920002937317, "learning_rate": 0.00019331678342583087, "loss": 1.2721, "step": 2587 }, { "epoch": 0.03362987965428016, "grad_norm": 0.4466197192668915, "learning_rate": 0.00019331418396391947, "loss": 1.4218, "step": 2588 }, { "epoch": 0.03364287419819603, "grad_norm": 0.3801550269126892, "learning_rate": 0.0001933115845020081, "loss": 1.4564, "step": 2589 }, { "epoch": 0.033655868742111905, "grad_norm": 0.5039145946502686, "learning_rate": 0.00019330898504009672, "loss": 1.5975, "step": 2590 }, { "epoch": 0.03366886328602778, "grad_norm": 0.326337605714798, "learning_rate": 0.00019330638557818532, "loss": 1.3116, "step": 2591 }, { "epoch": 0.03368185782994365, "grad_norm": 0.5112564563751221, "learning_rate": 0.00019330378611627394, "loss": 1.5561, "step": 2592 }, { "epoch": 0.033694852373859524, "grad_norm": 0.404899924993515, "learning_rate": 0.00019330118665436256, "loss": 1.4605, "step": 2593 }, { "epoch": 0.0337078469177754, "grad_norm": 0.2979227304458618, "learning_rate": 0.00019329858719245116, "loss": 1.2656, "step": 2594 }, { "epoch": 0.03372084146169127, "grad_norm": 0.42224523425102234, "learning_rate": 0.00019329598773053979, "loss": 1.6135, "step": 2595 }, { "epoch": 0.03373383600560714, "grad_norm": 0.2901502549648285, "learning_rate": 0.00019329338826862838, "loss": 1.4867, "step": 2596 }, { "epoch": 0.033746830549523016, "grad_norm": 0.3446262776851654, "learning_rate": 0.00019329078880671704, "loss": 1.242, "step": 2597 }, { "epoch": 0.033759825093438896, "grad_norm": 0.45019254088401794, "learning_rate": 0.00019328818934480563, "loss": 1.3692, "step": 2598 }, { "epoch": 0.03377281963735477, "grad_norm": 0.49961885809898376, "learning_rate": 0.00019328558988289426, "loss": 1.5872, "step": 2599 }, { "epoch": 0.03378581418127064, "grad_norm": 0.36805474758148193, "learning_rate": 0.00019328299042098285, "loss": 1.4729, "step": 2600 }, { "epoch": 0.033798808725186515, "grad_norm": 0.4167197346687317, "learning_rate": 0.00019328039095907148, "loss": 1.6615, "step": 2601 }, { "epoch": 0.03381180326910239, "grad_norm": 0.35889965295791626, "learning_rate": 0.0001932777914971601, "loss": 1.4583, "step": 2602 }, { "epoch": 0.03382479781301826, "grad_norm": 0.3481581211090088, "learning_rate": 0.0001932751920352487, "loss": 1.4279, "step": 2603 }, { "epoch": 0.033837792356934134, "grad_norm": 0.42533284425735474, "learning_rate": 0.00019327259257333733, "loss": 1.6325, "step": 2604 }, { "epoch": 0.033850786900850006, "grad_norm": 0.43109431862831116, "learning_rate": 0.00019326999311142595, "loss": 1.3109, "step": 2605 }, { "epoch": 0.03386378144476588, "grad_norm": 0.40868356823921204, "learning_rate": 0.00019326739364951457, "loss": 1.4989, "step": 2606 }, { "epoch": 0.03387677598868175, "grad_norm": 0.3893938958644867, "learning_rate": 0.00019326479418760317, "loss": 1.4323, "step": 2607 }, { "epoch": 0.033889770532597625, "grad_norm": 0.4579794406890869, "learning_rate": 0.00019326219472569177, "loss": 1.5663, "step": 2608 }, { "epoch": 0.0339027650765135, "grad_norm": 0.3872573971748352, "learning_rate": 0.00019325959526378042, "loss": 1.6074, "step": 2609 }, { "epoch": 0.03391575962042937, "grad_norm": 0.27037733793258667, "learning_rate": 0.00019325699580186902, "loss": 1.321, "step": 2610 }, { "epoch": 0.033928754164345244, "grad_norm": 0.38498052954673767, "learning_rate": 0.00019325439633995764, "loss": 1.3941, "step": 2611 }, { "epoch": 0.03394174870826112, "grad_norm": 0.2826327979564667, "learning_rate": 0.00019325179687804624, "loss": 1.3917, "step": 2612 }, { "epoch": 0.03395474325217699, "grad_norm": 0.39286717772483826, "learning_rate": 0.00019324919741613486, "loss": 1.4006, "step": 2613 }, { "epoch": 0.03396773779609286, "grad_norm": 0.32138383388519287, "learning_rate": 0.0001932465979542235, "loss": 1.3965, "step": 2614 }, { "epoch": 0.033980732340008736, "grad_norm": 0.3826918303966522, "learning_rate": 0.00019324399849231209, "loss": 1.5142, "step": 2615 }, { "epoch": 0.03399372688392461, "grad_norm": 0.44314083456993103, "learning_rate": 0.00019324139903040074, "loss": 1.4034, "step": 2616 }, { "epoch": 0.03400672142784049, "grad_norm": 0.31664013862609863, "learning_rate": 0.00019323879956848933, "loss": 1.3188, "step": 2617 }, { "epoch": 0.03401971597175636, "grad_norm": 0.5277805924415588, "learning_rate": 0.00019323620010657796, "loss": 1.3762, "step": 2618 }, { "epoch": 0.034032710515672235, "grad_norm": 0.5130351781845093, "learning_rate": 0.00019323360064466656, "loss": 1.4866, "step": 2619 }, { "epoch": 0.03404570505958811, "grad_norm": 0.3151296377182007, "learning_rate": 0.00019323100118275518, "loss": 1.389, "step": 2620 }, { "epoch": 0.03405869960350398, "grad_norm": 0.535396933555603, "learning_rate": 0.0001932284017208438, "loss": 1.5263, "step": 2621 }, { "epoch": 0.034071694147419854, "grad_norm": 0.48304587602615356, "learning_rate": 0.0001932258022589324, "loss": 1.5219, "step": 2622 }, { "epoch": 0.03408468869133573, "grad_norm": 0.41263094544410706, "learning_rate": 0.00019322320279702103, "loss": 1.5241, "step": 2623 }, { "epoch": 0.0340976832352516, "grad_norm": 0.41084033250808716, "learning_rate": 0.00019322060333510965, "loss": 1.3622, "step": 2624 }, { "epoch": 0.03411067777916747, "grad_norm": 0.28344786167144775, "learning_rate": 0.00019321800387319825, "loss": 1.3885, "step": 2625 }, { "epoch": 0.034123672323083346, "grad_norm": 0.37179088592529297, "learning_rate": 0.00019321540441128687, "loss": 1.5059, "step": 2626 }, { "epoch": 0.03413666686699922, "grad_norm": 0.3244517147541046, "learning_rate": 0.00019321280494937547, "loss": 1.4318, "step": 2627 }, { "epoch": 0.03414966141091509, "grad_norm": 0.4654742181301117, "learning_rate": 0.00019321020548746412, "loss": 1.4745, "step": 2628 }, { "epoch": 0.034162655954830964, "grad_norm": 0.3580836355686188, "learning_rate": 0.00019320760602555272, "loss": 1.4278, "step": 2629 }, { "epoch": 0.03417565049874684, "grad_norm": 0.45845532417297363, "learning_rate": 0.00019320500656364134, "loss": 1.5502, "step": 2630 }, { "epoch": 0.03418864504266271, "grad_norm": 0.38971734046936035, "learning_rate": 0.00019320240710172994, "loss": 1.5486, "step": 2631 }, { "epoch": 0.03420163958657858, "grad_norm": 0.3922237157821655, "learning_rate": 0.00019319980763981857, "loss": 1.3719, "step": 2632 }, { "epoch": 0.034214634130494456, "grad_norm": 0.37390995025634766, "learning_rate": 0.0001931972081779072, "loss": 1.2856, "step": 2633 }, { "epoch": 0.03422762867441033, "grad_norm": 0.46617215871810913, "learning_rate": 0.0001931946087159958, "loss": 1.4395, "step": 2634 }, { "epoch": 0.0342406232183262, "grad_norm": 0.3563721477985382, "learning_rate": 0.0001931920092540844, "loss": 1.4457, "step": 2635 }, { "epoch": 0.03425361776224208, "grad_norm": 0.409978449344635, "learning_rate": 0.00019318940979217304, "loss": 1.4947, "step": 2636 }, { "epoch": 0.034266612306157955, "grad_norm": 0.4160659611225128, "learning_rate": 0.00019318681033026163, "loss": 1.4029, "step": 2637 }, { "epoch": 0.03427960685007383, "grad_norm": 0.4420088827610016, "learning_rate": 0.00019318421086835026, "loss": 1.4389, "step": 2638 }, { "epoch": 0.0342926013939897, "grad_norm": 0.40361663699150085, "learning_rate": 0.00019318161140643886, "loss": 1.4556, "step": 2639 }, { "epoch": 0.034305595937905574, "grad_norm": 0.3836807906627655, "learning_rate": 0.0001931790119445275, "loss": 1.5375, "step": 2640 }, { "epoch": 0.03431859048182145, "grad_norm": 0.21775692701339722, "learning_rate": 0.0001931764124826161, "loss": 1.3929, "step": 2641 }, { "epoch": 0.03433158502573732, "grad_norm": 0.4751773476600647, "learning_rate": 0.00019317381302070473, "loss": 1.533, "step": 2642 }, { "epoch": 0.03434457956965319, "grad_norm": 0.3994961380958557, "learning_rate": 0.00019317121355879333, "loss": 1.3528, "step": 2643 }, { "epoch": 0.034357574113569066, "grad_norm": 0.33339062333106995, "learning_rate": 0.00019316861409688195, "loss": 1.4777, "step": 2644 }, { "epoch": 0.03437056865748494, "grad_norm": 0.4146357774734497, "learning_rate": 0.00019316601463497058, "loss": 1.5178, "step": 2645 }, { "epoch": 0.03438356320140081, "grad_norm": 0.4500183165073395, "learning_rate": 0.00019316341517305917, "loss": 1.5561, "step": 2646 }, { "epoch": 0.034396557745316685, "grad_norm": 0.38291653990745544, "learning_rate": 0.0001931608157111478, "loss": 1.2276, "step": 2647 }, { "epoch": 0.03440955228923256, "grad_norm": 0.41809695959091187, "learning_rate": 0.00019315821624923642, "loss": 1.5846, "step": 2648 }, { "epoch": 0.03442254683314843, "grad_norm": 0.32560357451438904, "learning_rate": 0.00019315561678732502, "loss": 1.5845, "step": 2649 }, { "epoch": 0.0344355413770643, "grad_norm": 0.46995043754577637, "learning_rate": 0.00019315301732541364, "loss": 1.5644, "step": 2650 }, { "epoch": 0.034448535920980176, "grad_norm": 0.344234436750412, "learning_rate": 0.00019315041786350224, "loss": 1.3925, "step": 2651 }, { "epoch": 0.03446153046489605, "grad_norm": 0.3973907232284546, "learning_rate": 0.0001931478184015909, "loss": 1.4672, "step": 2652 }, { "epoch": 0.03447452500881192, "grad_norm": 0.4125625491142273, "learning_rate": 0.0001931452189396795, "loss": 1.3658, "step": 2653 }, { "epoch": 0.034487519552727795, "grad_norm": 0.32643839716911316, "learning_rate": 0.00019314261947776812, "loss": 1.4776, "step": 2654 }, { "epoch": 0.034500514096643675, "grad_norm": 0.3233036398887634, "learning_rate": 0.00019314002001585674, "loss": 1.4777, "step": 2655 }, { "epoch": 0.03451350864055955, "grad_norm": 0.4476162791252136, "learning_rate": 0.00019313742055394534, "loss": 1.5353, "step": 2656 }, { "epoch": 0.03452650318447542, "grad_norm": 0.42174360156059265, "learning_rate": 0.00019313482109203396, "loss": 1.3087, "step": 2657 }, { "epoch": 0.034539497728391294, "grad_norm": 0.3945503830909729, "learning_rate": 0.00019313222163012256, "loss": 1.5998, "step": 2658 }, { "epoch": 0.03455249227230717, "grad_norm": 0.4233253300189972, "learning_rate": 0.0001931296221682112, "loss": 1.638, "step": 2659 }, { "epoch": 0.03456548681622304, "grad_norm": 0.4910193085670471, "learning_rate": 0.0001931270227062998, "loss": 1.4125, "step": 2660 }, { "epoch": 0.03457848136013891, "grad_norm": 0.47878560423851013, "learning_rate": 0.00019312442324438843, "loss": 1.4589, "step": 2661 }, { "epoch": 0.034591475904054786, "grad_norm": 0.3505452871322632, "learning_rate": 0.00019312182378247703, "loss": 1.2657, "step": 2662 }, { "epoch": 0.03460447044797066, "grad_norm": 0.3399491012096405, "learning_rate": 0.00019311922432056565, "loss": 1.4723, "step": 2663 }, { "epoch": 0.03461746499188653, "grad_norm": 0.38019537925720215, "learning_rate": 0.00019311662485865428, "loss": 1.5725, "step": 2664 }, { "epoch": 0.034630459535802405, "grad_norm": 0.36103230714797974, "learning_rate": 0.00019311402539674288, "loss": 1.405, "step": 2665 }, { "epoch": 0.03464345407971828, "grad_norm": 0.41101959347724915, "learning_rate": 0.0001931114259348315, "loss": 1.4975, "step": 2666 }, { "epoch": 0.03465644862363415, "grad_norm": 0.4802800118923187, "learning_rate": 0.00019310882647292013, "loss": 1.4648, "step": 2667 }, { "epoch": 0.034669443167550024, "grad_norm": 0.4229055345058441, "learning_rate": 0.00019310622701100872, "loss": 1.5491, "step": 2668 }, { "epoch": 0.0346824377114659, "grad_norm": 0.3899267911911011, "learning_rate": 0.00019310362754909735, "loss": 1.3745, "step": 2669 }, { "epoch": 0.03469543225538177, "grad_norm": 0.5346493124961853, "learning_rate": 0.00019310102808718594, "loss": 1.4355, "step": 2670 }, { "epoch": 0.03470842679929764, "grad_norm": 0.425605833530426, "learning_rate": 0.0001930984286252746, "loss": 1.6465, "step": 2671 }, { "epoch": 0.034721421343213515, "grad_norm": 0.4313183128833771, "learning_rate": 0.0001930958291633632, "loss": 1.4226, "step": 2672 }, { "epoch": 0.03473441588712939, "grad_norm": 0.33624106645584106, "learning_rate": 0.00019309322970145182, "loss": 1.1646, "step": 2673 }, { "epoch": 0.03474741043104527, "grad_norm": 0.37093326449394226, "learning_rate": 0.00019309063023954042, "loss": 1.526, "step": 2674 }, { "epoch": 0.03476040497496114, "grad_norm": 0.44133490324020386, "learning_rate": 0.00019308803077762904, "loss": 1.5556, "step": 2675 }, { "epoch": 0.034773399518877014, "grad_norm": 0.3195987045764923, "learning_rate": 0.00019308543131571766, "loss": 1.3604, "step": 2676 }, { "epoch": 0.03478639406279289, "grad_norm": 0.41076597571372986, "learning_rate": 0.00019308283185380626, "loss": 1.4536, "step": 2677 }, { "epoch": 0.03479938860670876, "grad_norm": 0.501983106136322, "learning_rate": 0.00019308023239189489, "loss": 1.4381, "step": 2678 }, { "epoch": 0.03481238315062463, "grad_norm": 0.4325046241283417, "learning_rate": 0.0001930776329299835, "loss": 1.5159, "step": 2679 }, { "epoch": 0.034825377694540506, "grad_norm": 0.32546481490135193, "learning_rate": 0.0001930750334680721, "loss": 1.2549, "step": 2680 }, { "epoch": 0.03483837223845638, "grad_norm": 0.3376631736755371, "learning_rate": 0.00019307243400616073, "loss": 1.6918, "step": 2681 }, { "epoch": 0.03485136678237225, "grad_norm": 0.40217649936676025, "learning_rate": 0.00019306983454424933, "loss": 1.3748, "step": 2682 }, { "epoch": 0.034864361326288125, "grad_norm": 0.24640783667564392, "learning_rate": 0.00019306723508233798, "loss": 1.1395, "step": 2683 }, { "epoch": 0.034877355870204, "grad_norm": 0.43814030289649963, "learning_rate": 0.00019306463562042658, "loss": 1.5176, "step": 2684 }, { "epoch": 0.03489035041411987, "grad_norm": 0.2910681664943695, "learning_rate": 0.0001930620361585152, "loss": 1.4568, "step": 2685 }, { "epoch": 0.034903344958035744, "grad_norm": 0.4540664851665497, "learning_rate": 0.0001930594366966038, "loss": 1.4671, "step": 2686 }, { "epoch": 0.03491633950195162, "grad_norm": 0.34096863865852356, "learning_rate": 0.00019305683723469243, "loss": 1.2896, "step": 2687 }, { "epoch": 0.03492933404586749, "grad_norm": 0.4042552709579468, "learning_rate": 0.00019305423777278105, "loss": 1.4341, "step": 2688 }, { "epoch": 0.03494232858978336, "grad_norm": 0.33614152669906616, "learning_rate": 0.00019305163831086965, "loss": 1.3547, "step": 2689 }, { "epoch": 0.034955323133699236, "grad_norm": 0.35462722182273865, "learning_rate": 0.0001930490388489583, "loss": 1.23, "step": 2690 }, { "epoch": 0.03496831767761511, "grad_norm": 0.3538316786289215, "learning_rate": 0.0001930464393870469, "loss": 1.5511, "step": 2691 }, { "epoch": 0.03498131222153098, "grad_norm": 0.2901199758052826, "learning_rate": 0.0001930438399251355, "loss": 1.346, "step": 2692 }, { "epoch": 0.034994306765446855, "grad_norm": 0.40211308002471924, "learning_rate": 0.00019304124046322412, "loss": 1.6341, "step": 2693 }, { "epoch": 0.035007301309362734, "grad_norm": 0.3229469358921051, "learning_rate": 0.00019303864100131274, "loss": 1.5874, "step": 2694 }, { "epoch": 0.03502029585327861, "grad_norm": 0.4672679603099823, "learning_rate": 0.00019303604153940137, "loss": 1.3592, "step": 2695 }, { "epoch": 0.03503329039719448, "grad_norm": 0.3832331597805023, "learning_rate": 0.00019303344207748996, "loss": 1.6367, "step": 2696 }, { "epoch": 0.03504628494111035, "grad_norm": 0.4546045660972595, "learning_rate": 0.0001930308426155786, "loss": 1.3327, "step": 2697 }, { "epoch": 0.035059279485026226, "grad_norm": 0.4661729037761688, "learning_rate": 0.0001930282431536672, "loss": 1.6591, "step": 2698 }, { "epoch": 0.0350722740289421, "grad_norm": 0.4349871575832367, "learning_rate": 0.0001930256436917558, "loss": 1.4524, "step": 2699 }, { "epoch": 0.03508526857285797, "grad_norm": 0.38665902614593506, "learning_rate": 0.00019302304422984444, "loss": 1.4058, "step": 2700 }, { "epoch": 0.035098263116773845, "grad_norm": 0.38995254039764404, "learning_rate": 0.00019302044476793303, "loss": 1.3594, "step": 2701 }, { "epoch": 0.03511125766068972, "grad_norm": 0.2993116080760956, "learning_rate": 0.00019301784530602168, "loss": 1.5534, "step": 2702 }, { "epoch": 0.03512425220460559, "grad_norm": 0.3170860707759857, "learning_rate": 0.00019301524584411028, "loss": 1.413, "step": 2703 }, { "epoch": 0.035137246748521464, "grad_norm": 0.4542633891105652, "learning_rate": 0.00019301264638219888, "loss": 1.5157, "step": 2704 }, { "epoch": 0.03515024129243734, "grad_norm": 0.30418646335601807, "learning_rate": 0.0001930100469202875, "loss": 1.4775, "step": 2705 }, { "epoch": 0.03516323583635321, "grad_norm": 0.4070758819580078, "learning_rate": 0.00019300744745837613, "loss": 1.3137, "step": 2706 }, { "epoch": 0.03517623038026908, "grad_norm": 0.40993425250053406, "learning_rate": 0.00019300484799646475, "loss": 1.395, "step": 2707 }, { "epoch": 0.035189224924184956, "grad_norm": 0.43533939123153687, "learning_rate": 0.00019300224853455335, "loss": 1.5093, "step": 2708 }, { "epoch": 0.03520221946810083, "grad_norm": 0.34060242772102356, "learning_rate": 0.00019299964907264197, "loss": 1.5795, "step": 2709 }, { "epoch": 0.0352152140120167, "grad_norm": 0.45110419392585754, "learning_rate": 0.0001929970496107306, "loss": 1.3807, "step": 2710 }, { "epoch": 0.035228208555932575, "grad_norm": 0.2704315483570099, "learning_rate": 0.0001929944501488192, "loss": 1.3191, "step": 2711 }, { "epoch": 0.03524120309984845, "grad_norm": 0.46195530891418457, "learning_rate": 0.00019299185068690782, "loss": 1.5613, "step": 2712 }, { "epoch": 0.03525419764376433, "grad_norm": 0.40167585015296936, "learning_rate": 0.00019298925122499642, "loss": 1.3393, "step": 2713 }, { "epoch": 0.0352671921876802, "grad_norm": 0.44363099336624146, "learning_rate": 0.00019298665176308507, "loss": 1.4231, "step": 2714 }, { "epoch": 0.035280186731596073, "grad_norm": 0.40086689591407776, "learning_rate": 0.00019298405230117367, "loss": 1.3922, "step": 2715 }, { "epoch": 0.035293181275511946, "grad_norm": 0.3890444040298462, "learning_rate": 0.00019298145283926226, "loss": 1.4934, "step": 2716 }, { "epoch": 0.03530617581942782, "grad_norm": 0.35410887002944946, "learning_rate": 0.0001929788533773509, "loss": 1.4242, "step": 2717 }, { "epoch": 0.03531917036334369, "grad_norm": 0.4561476707458496, "learning_rate": 0.0001929762539154395, "loss": 1.5194, "step": 2718 }, { "epoch": 0.035332164907259565, "grad_norm": 0.3089006245136261, "learning_rate": 0.00019297365445352814, "loss": 1.5368, "step": 2719 }, { "epoch": 0.03534515945117544, "grad_norm": 0.38267242908477783, "learning_rate": 0.00019297105499161674, "loss": 1.4586, "step": 2720 }, { "epoch": 0.03535815399509131, "grad_norm": 0.32861995697021484, "learning_rate": 0.00019296845552970536, "loss": 1.4387, "step": 2721 }, { "epoch": 0.035371148539007184, "grad_norm": 0.37558478116989136, "learning_rate": 0.00019296585606779398, "loss": 1.5579, "step": 2722 }, { "epoch": 0.03538414308292306, "grad_norm": 0.3885676860809326, "learning_rate": 0.00019296325660588258, "loss": 1.5473, "step": 2723 }, { "epoch": 0.03539713762683893, "grad_norm": 0.3887832462787628, "learning_rate": 0.0001929606571439712, "loss": 1.4257, "step": 2724 }, { "epoch": 0.0354101321707548, "grad_norm": 0.25129011273384094, "learning_rate": 0.0001929580576820598, "loss": 1.2151, "step": 2725 }, { "epoch": 0.035423126714670676, "grad_norm": 0.34469395875930786, "learning_rate": 0.00019295545822014846, "loss": 1.3625, "step": 2726 }, { "epoch": 0.03543612125858655, "grad_norm": 0.29271090030670166, "learning_rate": 0.00019295285875823705, "loss": 1.6102, "step": 2727 }, { "epoch": 0.03544911580250242, "grad_norm": 0.35284462571144104, "learning_rate": 0.00019295025929632568, "loss": 1.5273, "step": 2728 }, { "epoch": 0.035462110346418295, "grad_norm": 0.30605068802833557, "learning_rate": 0.0001929476598344143, "loss": 1.322, "step": 2729 }, { "epoch": 0.03547510489033417, "grad_norm": 0.380879670381546, "learning_rate": 0.0001929450603725029, "loss": 1.4431, "step": 2730 }, { "epoch": 0.03548809943425004, "grad_norm": 0.3157254755496979, "learning_rate": 0.00019294246091059152, "loss": 1.5266, "step": 2731 }, { "epoch": 0.03550109397816592, "grad_norm": 0.4257451295852661, "learning_rate": 0.00019293986144868012, "loss": 1.5895, "step": 2732 }, { "epoch": 0.035514088522081794, "grad_norm": 0.4311240613460541, "learning_rate": 0.00019293726198676875, "loss": 1.5401, "step": 2733 }, { "epoch": 0.03552708306599767, "grad_norm": 0.3798077404499054, "learning_rate": 0.00019293466252485737, "loss": 1.2725, "step": 2734 }, { "epoch": 0.03554007760991354, "grad_norm": 0.38537341356277466, "learning_rate": 0.00019293206306294597, "loss": 1.5666, "step": 2735 }, { "epoch": 0.03555307215382941, "grad_norm": 0.4559878706932068, "learning_rate": 0.0001929294636010346, "loss": 1.5698, "step": 2736 }, { "epoch": 0.035566066697745286, "grad_norm": 0.3879867196083069, "learning_rate": 0.00019292686413912322, "loss": 1.3961, "step": 2737 }, { "epoch": 0.03557906124166116, "grad_norm": 0.3555436432361603, "learning_rate": 0.00019292426467721184, "loss": 1.5106, "step": 2738 }, { "epoch": 0.03559205578557703, "grad_norm": 0.4483688175678253, "learning_rate": 0.00019292166521530044, "loss": 1.5846, "step": 2739 }, { "epoch": 0.035605050329492904, "grad_norm": 0.3611527383327484, "learning_rate": 0.00019291906575338906, "loss": 1.4275, "step": 2740 }, { "epoch": 0.03561804487340878, "grad_norm": 0.3116562068462372, "learning_rate": 0.0001929164662914777, "loss": 1.2558, "step": 2741 }, { "epoch": 0.03563103941732465, "grad_norm": 0.37978363037109375, "learning_rate": 0.00019291386682956628, "loss": 1.4643, "step": 2742 }, { "epoch": 0.03564403396124052, "grad_norm": 0.24704952538013458, "learning_rate": 0.0001929112673676549, "loss": 1.146, "step": 2743 }, { "epoch": 0.035657028505156396, "grad_norm": 0.3831765949726105, "learning_rate": 0.0001929086679057435, "loss": 1.5438, "step": 2744 }, { "epoch": 0.03567002304907227, "grad_norm": 0.30055537819862366, "learning_rate": 0.00019290606844383216, "loss": 1.4412, "step": 2745 }, { "epoch": 0.03568301759298814, "grad_norm": 0.2527606189250946, "learning_rate": 0.00019290346898192076, "loss": 1.4767, "step": 2746 }, { "epoch": 0.035696012136904015, "grad_norm": 0.42268630862236023, "learning_rate": 0.00019290086952000935, "loss": 1.6555, "step": 2747 }, { "epoch": 0.03570900668081989, "grad_norm": 0.47067180275917053, "learning_rate": 0.00019289827005809798, "loss": 1.4822, "step": 2748 }, { "epoch": 0.03572200122473576, "grad_norm": 0.411670058965683, "learning_rate": 0.0001928956705961866, "loss": 1.4746, "step": 2749 }, { "epoch": 0.035734995768651634, "grad_norm": 0.3741244971752167, "learning_rate": 0.00019289307113427523, "loss": 1.4604, "step": 2750 }, { "epoch": 0.035747990312567514, "grad_norm": 0.4290016293525696, "learning_rate": 0.00019289047167236382, "loss": 1.4158, "step": 2751 }, { "epoch": 0.03576098485648339, "grad_norm": 0.364467054605484, "learning_rate": 0.00019288787221045245, "loss": 1.3861, "step": 2752 }, { "epoch": 0.03577397940039926, "grad_norm": 0.35490918159484863, "learning_rate": 0.00019288527274854107, "loss": 1.5629, "step": 2753 }, { "epoch": 0.03578697394431513, "grad_norm": 0.2794831097126007, "learning_rate": 0.00019288267328662967, "loss": 1.4614, "step": 2754 }, { "epoch": 0.035799968488231006, "grad_norm": 0.287809818983078, "learning_rate": 0.0001928800738247183, "loss": 1.2182, "step": 2755 }, { "epoch": 0.03581296303214688, "grad_norm": 0.3844785690307617, "learning_rate": 0.0001928774743628069, "loss": 1.391, "step": 2756 }, { "epoch": 0.03582595757606275, "grad_norm": 0.32365888357162476, "learning_rate": 0.00019287487490089554, "loss": 1.5859, "step": 2757 }, { "epoch": 0.035838952119978625, "grad_norm": 0.37656038999557495, "learning_rate": 0.00019287227543898414, "loss": 1.4207, "step": 2758 }, { "epoch": 0.0358519466638945, "grad_norm": 0.3249402642250061, "learning_rate": 0.00019286967597707274, "loss": 1.2407, "step": 2759 }, { "epoch": 0.03586494120781037, "grad_norm": 0.6949344873428345, "learning_rate": 0.00019286707651516136, "loss": 1.718, "step": 2760 }, { "epoch": 0.03587793575172624, "grad_norm": 0.3642655611038208, "learning_rate": 0.00019286447705325, "loss": 1.4663, "step": 2761 }, { "epoch": 0.035890930295642116, "grad_norm": 0.3835026025772095, "learning_rate": 0.0001928618775913386, "loss": 1.6773, "step": 2762 }, { "epoch": 0.03590392483955799, "grad_norm": 0.3899837136268616, "learning_rate": 0.0001928592781294272, "loss": 1.5875, "step": 2763 }, { "epoch": 0.03591691938347386, "grad_norm": 1.368149995803833, "learning_rate": 0.00019285667866751583, "loss": 1.6732, "step": 2764 }, { "epoch": 0.035929913927389735, "grad_norm": 0.449677437543869, "learning_rate": 0.00019285407920560446, "loss": 1.6178, "step": 2765 }, { "epoch": 0.03594290847130561, "grad_norm": 0.34505805373191833, "learning_rate": 0.00019285147974369305, "loss": 1.487, "step": 2766 }, { "epoch": 0.03595590301522148, "grad_norm": 0.3163648247718811, "learning_rate": 0.00019284888028178168, "loss": 1.307, "step": 2767 }, { "epoch": 0.035968897559137354, "grad_norm": 0.43610504269599915, "learning_rate": 0.0001928462808198703, "loss": 1.4207, "step": 2768 }, { "epoch": 0.03598189210305323, "grad_norm": 0.3650064468383789, "learning_rate": 0.00019284368135795893, "loss": 1.5816, "step": 2769 }, { "epoch": 0.03599488664696911, "grad_norm": 0.45498794317245483, "learning_rate": 0.00019284108189604753, "loss": 1.6289, "step": 2770 }, { "epoch": 0.03600788119088498, "grad_norm": 0.38969722390174866, "learning_rate": 0.00019283848243413612, "loss": 1.2897, "step": 2771 }, { "epoch": 0.03602087573480085, "grad_norm": 0.3977135419845581, "learning_rate": 0.00019283588297222477, "loss": 1.3847, "step": 2772 }, { "epoch": 0.036033870278716726, "grad_norm": 0.41410887241363525, "learning_rate": 0.00019283328351031337, "loss": 1.4496, "step": 2773 }, { "epoch": 0.0360468648226326, "grad_norm": 0.3857017457485199, "learning_rate": 0.000192830684048402, "loss": 1.4659, "step": 2774 }, { "epoch": 0.03605985936654847, "grad_norm": 0.49290332198143005, "learning_rate": 0.0001928280845864906, "loss": 1.3607, "step": 2775 }, { "epoch": 0.036072853910464345, "grad_norm": 0.31616663932800293, "learning_rate": 0.00019282548512457922, "loss": 1.3968, "step": 2776 }, { "epoch": 0.03608584845438022, "grad_norm": 0.5127238035202026, "learning_rate": 0.00019282288566266784, "loss": 1.5859, "step": 2777 }, { "epoch": 0.03609884299829609, "grad_norm": 0.3986349403858185, "learning_rate": 0.00019282028620075644, "loss": 1.4152, "step": 2778 }, { "epoch": 0.036111837542211964, "grad_norm": 0.32311007380485535, "learning_rate": 0.00019281768673884506, "loss": 1.2462, "step": 2779 }, { "epoch": 0.03612483208612784, "grad_norm": 0.33841562271118164, "learning_rate": 0.0001928150872769337, "loss": 1.2382, "step": 2780 }, { "epoch": 0.03613782663004371, "grad_norm": 0.30147987604141235, "learning_rate": 0.00019281248781502231, "loss": 1.3171, "step": 2781 }, { "epoch": 0.03615082117395958, "grad_norm": 0.2974450886249542, "learning_rate": 0.0001928098883531109, "loss": 1.2797, "step": 2782 }, { "epoch": 0.036163815717875455, "grad_norm": 0.452944815158844, "learning_rate": 0.00019280728889119954, "loss": 1.5388, "step": 2783 }, { "epoch": 0.03617681026179133, "grad_norm": 0.35432595014572144, "learning_rate": 0.00019280468942928816, "loss": 1.4834, "step": 2784 }, { "epoch": 0.0361898048057072, "grad_norm": 0.3833582401275635, "learning_rate": 0.00019280208996737676, "loss": 1.4581, "step": 2785 }, { "epoch": 0.036202799349623074, "grad_norm": 0.438627153635025, "learning_rate": 0.00019279949050546538, "loss": 1.5574, "step": 2786 }, { "epoch": 0.03621579389353895, "grad_norm": 0.4910653233528137, "learning_rate": 0.00019279689104355398, "loss": 1.2658, "step": 2787 }, { "epoch": 0.03622878843745482, "grad_norm": 0.4092201292514801, "learning_rate": 0.0001927942915816426, "loss": 1.4815, "step": 2788 }, { "epoch": 0.0362417829813707, "grad_norm": 0.36961793899536133, "learning_rate": 0.00019279169211973123, "loss": 1.4692, "step": 2789 }, { "epoch": 0.03625477752528657, "grad_norm": 0.3870302736759186, "learning_rate": 0.00019278909265781983, "loss": 1.3644, "step": 2790 }, { "epoch": 0.036267772069202446, "grad_norm": 0.4005250036716461, "learning_rate": 0.00019278649319590845, "loss": 1.4207, "step": 2791 }, { "epoch": 0.03628076661311832, "grad_norm": 0.33879703283309937, "learning_rate": 0.00019278389373399707, "loss": 1.4814, "step": 2792 }, { "epoch": 0.03629376115703419, "grad_norm": 0.39506030082702637, "learning_rate": 0.0001927812942720857, "loss": 1.4696, "step": 2793 }, { "epoch": 0.036306755700950065, "grad_norm": 0.42061692476272583, "learning_rate": 0.0001927786948101743, "loss": 1.4733, "step": 2794 }, { "epoch": 0.03631975024486594, "grad_norm": 0.42268291115760803, "learning_rate": 0.00019277609534826292, "loss": 1.4487, "step": 2795 }, { "epoch": 0.03633274478878181, "grad_norm": 0.3141692876815796, "learning_rate": 0.00019277349588635155, "loss": 1.3469, "step": 2796 }, { "epoch": 0.036345739332697684, "grad_norm": 0.3309340476989746, "learning_rate": 0.00019277089642444014, "loss": 1.0896, "step": 2797 }, { "epoch": 0.03635873387661356, "grad_norm": 0.39377573132514954, "learning_rate": 0.00019276829696252877, "loss": 1.3871, "step": 2798 }, { "epoch": 0.03637172842052943, "grad_norm": 0.36125418543815613, "learning_rate": 0.00019276569750061736, "loss": 1.4044, "step": 2799 }, { "epoch": 0.0363847229644453, "grad_norm": 0.37107643485069275, "learning_rate": 0.000192763098038706, "loss": 1.4066, "step": 2800 }, { "epoch": 0.036397717508361176, "grad_norm": 0.514762818813324, "learning_rate": 0.00019276049857679461, "loss": 1.4408, "step": 2801 }, { "epoch": 0.03641071205227705, "grad_norm": 0.4846345782279968, "learning_rate": 0.0001927578991148832, "loss": 1.6813, "step": 2802 }, { "epoch": 0.03642370659619292, "grad_norm": 0.36676642298698425, "learning_rate": 0.00019275529965297186, "loss": 1.4402, "step": 2803 }, { "epoch": 0.036436701140108795, "grad_norm": 0.38358089327812195, "learning_rate": 0.00019275270019106046, "loss": 1.4207, "step": 2804 }, { "epoch": 0.03644969568402467, "grad_norm": 0.43707358837127686, "learning_rate": 0.00019275010072914908, "loss": 1.6785, "step": 2805 }, { "epoch": 0.03646269022794054, "grad_norm": 0.393779993057251, "learning_rate": 0.00019274750126723768, "loss": 1.428, "step": 2806 }, { "epoch": 0.03647568477185641, "grad_norm": 0.3945956528186798, "learning_rate": 0.0001927449018053263, "loss": 1.3964, "step": 2807 }, { "epoch": 0.03648867931577229, "grad_norm": 0.2623893618583679, "learning_rate": 0.00019274230234341493, "loss": 1.4994, "step": 2808 }, { "epoch": 0.036501673859688166, "grad_norm": 0.3510804772377014, "learning_rate": 0.00019273970288150353, "loss": 1.271, "step": 2809 }, { "epoch": 0.03651466840360404, "grad_norm": 0.4407918453216553, "learning_rate": 0.00019273710341959215, "loss": 1.3439, "step": 2810 }, { "epoch": 0.03652766294751991, "grad_norm": 0.4164028465747833, "learning_rate": 0.00019273450395768078, "loss": 1.4009, "step": 2811 }, { "epoch": 0.036540657491435785, "grad_norm": 0.38145989179611206, "learning_rate": 0.0001927319044957694, "loss": 1.3704, "step": 2812 }, { "epoch": 0.03655365203535166, "grad_norm": 0.3335427939891815, "learning_rate": 0.000192729305033858, "loss": 1.3729, "step": 2813 }, { "epoch": 0.03656664657926753, "grad_norm": 0.37175679206848145, "learning_rate": 0.0001927267055719466, "loss": 1.4238, "step": 2814 }, { "epoch": 0.036579641123183404, "grad_norm": 0.34029579162597656, "learning_rate": 0.00019272410611003525, "loss": 1.3745, "step": 2815 }, { "epoch": 0.03659263566709928, "grad_norm": 0.44117608666419983, "learning_rate": 0.00019272150664812385, "loss": 1.3927, "step": 2816 }, { "epoch": 0.03660563021101515, "grad_norm": 0.37611329555511475, "learning_rate": 0.00019271890718621247, "loss": 1.3691, "step": 2817 }, { "epoch": 0.03661862475493102, "grad_norm": 0.3684469163417816, "learning_rate": 0.00019271630772430107, "loss": 1.3714, "step": 2818 }, { "epoch": 0.036631619298846896, "grad_norm": 0.41082513332366943, "learning_rate": 0.0001927137082623897, "loss": 1.4639, "step": 2819 }, { "epoch": 0.03664461384276277, "grad_norm": 0.4682472348213196, "learning_rate": 0.00019271110880047832, "loss": 1.3318, "step": 2820 }, { "epoch": 0.03665760838667864, "grad_norm": 0.34688398241996765, "learning_rate": 0.00019270850933856691, "loss": 1.3724, "step": 2821 }, { "epoch": 0.036670602930594515, "grad_norm": 0.38562142848968506, "learning_rate": 0.00019270590987665554, "loss": 1.3346, "step": 2822 }, { "epoch": 0.03668359747451039, "grad_norm": 0.3660123646259308, "learning_rate": 0.00019270331041474416, "loss": 1.5847, "step": 2823 }, { "epoch": 0.03669659201842626, "grad_norm": 0.3025285303592682, "learning_rate": 0.0001927007109528328, "loss": 1.577, "step": 2824 }, { "epoch": 0.036709586562342134, "grad_norm": 0.35528770089149475, "learning_rate": 0.00019269811149092138, "loss": 1.3957, "step": 2825 }, { "epoch": 0.03672258110625801, "grad_norm": 0.3572424352169037, "learning_rate": 0.00019269551202900998, "loss": 1.5052, "step": 2826 }, { "epoch": 0.036735575650173886, "grad_norm": 0.30051520466804504, "learning_rate": 0.00019269291256709863, "loss": 1.4398, "step": 2827 }, { "epoch": 0.03674857019408976, "grad_norm": 0.42542964220046997, "learning_rate": 0.00019269031310518723, "loss": 1.4406, "step": 2828 }, { "epoch": 0.03676156473800563, "grad_norm": 0.35860592126846313, "learning_rate": 0.00019268771364327586, "loss": 1.3644, "step": 2829 }, { "epoch": 0.036774559281921505, "grad_norm": 0.43359726667404175, "learning_rate": 0.00019268511418136445, "loss": 1.4387, "step": 2830 }, { "epoch": 0.03678755382583738, "grad_norm": 0.3515236973762512, "learning_rate": 0.00019268251471945308, "loss": 1.3023, "step": 2831 }, { "epoch": 0.03680054836975325, "grad_norm": 0.28295955061912537, "learning_rate": 0.0001926799152575417, "loss": 1.2749, "step": 2832 }, { "epoch": 0.036813542913669124, "grad_norm": 0.36652517318725586, "learning_rate": 0.0001926773157956303, "loss": 1.4286, "step": 2833 }, { "epoch": 0.036826537457585, "grad_norm": 0.3952235281467438, "learning_rate": 0.00019267471633371892, "loss": 1.5777, "step": 2834 }, { "epoch": 0.03683953200150087, "grad_norm": 0.31595343351364136, "learning_rate": 0.00019267211687180755, "loss": 1.4189, "step": 2835 }, { "epoch": 0.03685252654541674, "grad_norm": 0.33102521300315857, "learning_rate": 0.00019266951740989617, "loss": 1.394, "step": 2836 }, { "epoch": 0.036865521089332616, "grad_norm": 0.2974259555339813, "learning_rate": 0.00019266691794798477, "loss": 1.357, "step": 2837 }, { "epoch": 0.03687851563324849, "grad_norm": 0.6384289264678955, "learning_rate": 0.0001926643184860734, "loss": 1.4339, "step": 2838 }, { "epoch": 0.03689151017716436, "grad_norm": 0.4564565122127533, "learning_rate": 0.00019266171902416202, "loss": 1.5453, "step": 2839 }, { "epoch": 0.036904504721080235, "grad_norm": 0.38966381549835205, "learning_rate": 0.00019265911956225062, "loss": 1.5627, "step": 2840 }, { "epoch": 0.03691749926499611, "grad_norm": 0.4062577188014984, "learning_rate": 0.00019265652010033924, "loss": 1.4098, "step": 2841 }, { "epoch": 0.03693049380891198, "grad_norm": 0.29419323801994324, "learning_rate": 0.00019265392063842787, "loss": 1.2312, "step": 2842 }, { "epoch": 0.036943488352827854, "grad_norm": 0.4806249737739563, "learning_rate": 0.00019265132117651646, "loss": 1.2879, "step": 2843 }, { "epoch": 0.03695648289674373, "grad_norm": 0.32719606161117554, "learning_rate": 0.0001926487217146051, "loss": 1.4757, "step": 2844 }, { "epoch": 0.0369694774406596, "grad_norm": 0.4196498394012451, "learning_rate": 0.00019264612225269368, "loss": 1.2361, "step": 2845 }, { "epoch": 0.03698247198457548, "grad_norm": 0.5088819861412048, "learning_rate": 0.00019264352279078234, "loss": 1.5954, "step": 2846 }, { "epoch": 0.03699546652849135, "grad_norm": 0.378284215927124, "learning_rate": 0.00019264092332887093, "loss": 1.3907, "step": 2847 }, { "epoch": 0.037008461072407225, "grad_norm": 0.442389577627182, "learning_rate": 0.00019263832386695956, "loss": 1.5548, "step": 2848 }, { "epoch": 0.0370214556163231, "grad_norm": 0.39782026410102844, "learning_rate": 0.00019263572440504816, "loss": 1.4675, "step": 2849 }, { "epoch": 0.03703445016023897, "grad_norm": 0.43371039628982544, "learning_rate": 0.00019263312494313678, "loss": 1.4222, "step": 2850 }, { "epoch": 0.037047444704154844, "grad_norm": 0.41124409437179565, "learning_rate": 0.0001926305254812254, "loss": 1.4427, "step": 2851 }, { "epoch": 0.03706043924807072, "grad_norm": 0.3725537061691284, "learning_rate": 0.000192627926019314, "loss": 1.4465, "step": 2852 }, { "epoch": 0.03707343379198659, "grad_norm": 0.3172045648097992, "learning_rate": 0.00019262532655740263, "loss": 1.3537, "step": 2853 }, { "epoch": 0.03708642833590246, "grad_norm": 0.35795867443084717, "learning_rate": 0.00019262272709549125, "loss": 1.7009, "step": 2854 }, { "epoch": 0.037099422879818336, "grad_norm": 0.43789443373680115, "learning_rate": 0.00019262012763357985, "loss": 1.6143, "step": 2855 }, { "epoch": 0.03711241742373421, "grad_norm": 0.33214282989501953, "learning_rate": 0.00019261752817166847, "loss": 1.4922, "step": 2856 }, { "epoch": 0.03712541196765008, "grad_norm": 0.4682319760322571, "learning_rate": 0.00019261492870975707, "loss": 1.6605, "step": 2857 }, { "epoch": 0.037138406511565955, "grad_norm": 0.48196035623550415, "learning_rate": 0.00019261232924784572, "loss": 1.5506, "step": 2858 }, { "epoch": 0.03715140105548183, "grad_norm": 0.4416365325450897, "learning_rate": 0.00019260972978593432, "loss": 1.7541, "step": 2859 }, { "epoch": 0.0371643955993977, "grad_norm": 0.41896694898605347, "learning_rate": 0.00019260713032402294, "loss": 1.4409, "step": 2860 }, { "epoch": 0.037177390143313574, "grad_norm": 0.35514992475509644, "learning_rate": 0.00019260453086211154, "loss": 1.421, "step": 2861 }, { "epoch": 0.03719038468722945, "grad_norm": 0.2940196692943573, "learning_rate": 0.00019260193140020017, "loss": 1.2734, "step": 2862 }, { "epoch": 0.03720337923114532, "grad_norm": 0.3980550467967987, "learning_rate": 0.0001925993319382888, "loss": 1.7478, "step": 2863 }, { "epoch": 0.03721637377506119, "grad_norm": 0.4403688907623291, "learning_rate": 0.0001925967324763774, "loss": 1.5199, "step": 2864 }, { "epoch": 0.03722936831897707, "grad_norm": 0.38370171189308167, "learning_rate": 0.000192594133014466, "loss": 1.4741, "step": 2865 }, { "epoch": 0.037242362862892946, "grad_norm": 0.3681994080543518, "learning_rate": 0.00019259153355255464, "loss": 1.4456, "step": 2866 }, { "epoch": 0.03725535740680882, "grad_norm": 0.35437920689582825, "learning_rate": 0.00019258893409064326, "loss": 1.5705, "step": 2867 }, { "epoch": 0.03726835195072469, "grad_norm": 0.49699491262435913, "learning_rate": 0.00019258633462873186, "loss": 1.4665, "step": 2868 }, { "epoch": 0.037281346494640565, "grad_norm": 0.356911301612854, "learning_rate": 0.00019258373516682046, "loss": 1.4914, "step": 2869 }, { "epoch": 0.03729434103855644, "grad_norm": 0.26467427611351013, "learning_rate": 0.0001925811357049091, "loss": 1.2276, "step": 2870 }, { "epoch": 0.03730733558247231, "grad_norm": 0.4024367034435272, "learning_rate": 0.0001925785362429977, "loss": 1.476, "step": 2871 }, { "epoch": 0.03732033012638818, "grad_norm": 0.4283798336982727, "learning_rate": 0.00019257593678108633, "loss": 1.4843, "step": 2872 }, { "epoch": 0.037333324670304056, "grad_norm": 0.38268956542015076, "learning_rate": 0.00019257333731917493, "loss": 1.3179, "step": 2873 }, { "epoch": 0.03734631921421993, "grad_norm": 0.4232378602027893, "learning_rate": 0.00019257073785726355, "loss": 1.5569, "step": 2874 }, { "epoch": 0.0373593137581358, "grad_norm": 0.4544670581817627, "learning_rate": 0.00019256813839535218, "loss": 1.5626, "step": 2875 }, { "epoch": 0.037372308302051675, "grad_norm": 0.34140169620513916, "learning_rate": 0.00019256553893344077, "loss": 1.4478, "step": 2876 }, { "epoch": 0.03738530284596755, "grad_norm": 0.42812231183052063, "learning_rate": 0.00019256293947152942, "loss": 1.4363, "step": 2877 }, { "epoch": 0.03739829738988342, "grad_norm": 0.3961136043071747, "learning_rate": 0.00019256034000961802, "loss": 1.3606, "step": 2878 }, { "epoch": 0.037411291933799294, "grad_norm": 0.28746384382247925, "learning_rate": 0.00019255774054770665, "loss": 1.0565, "step": 2879 }, { "epoch": 0.03742428647771517, "grad_norm": 0.37665361166000366, "learning_rate": 0.00019255514108579524, "loss": 1.5813, "step": 2880 }, { "epoch": 0.03743728102163104, "grad_norm": 0.47615721821784973, "learning_rate": 0.00019255254162388387, "loss": 1.3915, "step": 2881 }, { "epoch": 0.03745027556554691, "grad_norm": 0.40794748067855835, "learning_rate": 0.0001925499421619725, "loss": 1.5256, "step": 2882 }, { "epoch": 0.037463270109462786, "grad_norm": 0.39038363099098206, "learning_rate": 0.0001925473427000611, "loss": 1.3619, "step": 2883 }, { "epoch": 0.037476264653378666, "grad_norm": 0.4532313644886017, "learning_rate": 0.00019254474323814971, "loss": 1.5355, "step": 2884 }, { "epoch": 0.03748925919729454, "grad_norm": 0.41911494731903076, "learning_rate": 0.00019254214377623834, "loss": 1.6976, "step": 2885 }, { "epoch": 0.03750225374121041, "grad_norm": 0.45028582215309143, "learning_rate": 0.00019253954431432694, "loss": 1.5581, "step": 2886 }, { "epoch": 0.037515248285126285, "grad_norm": 0.4192914664745331, "learning_rate": 0.00019253694485241556, "loss": 1.3167, "step": 2887 }, { "epoch": 0.03752824282904216, "grad_norm": 0.38193854689598083, "learning_rate": 0.00019253434539050416, "loss": 1.4941, "step": 2888 }, { "epoch": 0.03754123737295803, "grad_norm": 0.3629629611968994, "learning_rate": 0.0001925317459285928, "loss": 1.4815, "step": 2889 }, { "epoch": 0.037554231916873904, "grad_norm": 0.33992719650268555, "learning_rate": 0.0001925291464666814, "loss": 1.5113, "step": 2890 }, { "epoch": 0.03756722646078978, "grad_norm": 0.3796389400959015, "learning_rate": 0.00019252654700477003, "loss": 1.4956, "step": 2891 }, { "epoch": 0.03758022100470565, "grad_norm": 0.3908306658267975, "learning_rate": 0.00019252394754285863, "loss": 1.4478, "step": 2892 }, { "epoch": 0.03759321554862152, "grad_norm": 0.34537550806999207, "learning_rate": 0.00019252134808094725, "loss": 1.4948, "step": 2893 }, { "epoch": 0.037606210092537395, "grad_norm": 0.43783748149871826, "learning_rate": 0.00019251874861903588, "loss": 1.3109, "step": 2894 }, { "epoch": 0.03761920463645327, "grad_norm": 0.30554085969924927, "learning_rate": 0.00019251614915712448, "loss": 1.5971, "step": 2895 }, { "epoch": 0.03763219918036914, "grad_norm": 0.38141849637031555, "learning_rate": 0.0001925135496952131, "loss": 1.5607, "step": 2896 }, { "epoch": 0.037645193724285014, "grad_norm": 0.3407510817050934, "learning_rate": 0.00019251095023330172, "loss": 1.5818, "step": 2897 }, { "epoch": 0.03765818826820089, "grad_norm": 0.39207011461257935, "learning_rate": 0.00019250835077139032, "loss": 1.4283, "step": 2898 }, { "epoch": 0.03767118281211676, "grad_norm": 0.34534555673599243, "learning_rate": 0.00019250575130947895, "loss": 1.4144, "step": 2899 }, { "epoch": 0.03768417735603263, "grad_norm": 0.4672044515609741, "learning_rate": 0.00019250315184756754, "loss": 1.7198, "step": 2900 }, { "epoch": 0.037697171899948506, "grad_norm": 0.36498475074768066, "learning_rate": 0.0001925005523856562, "loss": 1.3947, "step": 2901 }, { "epoch": 0.03771016644386438, "grad_norm": 0.4158748984336853, "learning_rate": 0.0001924979529237448, "loss": 1.5405, "step": 2902 }, { "epoch": 0.03772316098778026, "grad_norm": 0.3219645619392395, "learning_rate": 0.00019249535346183342, "loss": 1.5222, "step": 2903 }, { "epoch": 0.03773615553169613, "grad_norm": 0.38233116269111633, "learning_rate": 0.00019249275399992201, "loss": 1.3962, "step": 2904 }, { "epoch": 0.037749150075612005, "grad_norm": 0.35942554473876953, "learning_rate": 0.00019249015453801064, "loss": 1.3824, "step": 2905 }, { "epoch": 0.03776214461952788, "grad_norm": 0.36550280451774597, "learning_rate": 0.00019248755507609926, "loss": 1.48, "step": 2906 }, { "epoch": 0.03777513916344375, "grad_norm": 0.4292823374271393, "learning_rate": 0.00019248495561418786, "loss": 1.5711, "step": 2907 }, { "epoch": 0.037788133707359624, "grad_norm": 0.4042067229747772, "learning_rate": 0.00019248235615227648, "loss": 1.6332, "step": 2908 }, { "epoch": 0.0378011282512755, "grad_norm": 0.31133267283439636, "learning_rate": 0.0001924797566903651, "loss": 1.3715, "step": 2909 }, { "epoch": 0.03781412279519137, "grad_norm": 0.4283090829849243, "learning_rate": 0.0001924771572284537, "loss": 1.2436, "step": 2910 }, { "epoch": 0.03782711733910724, "grad_norm": 0.4492538869380951, "learning_rate": 0.00019247455776654233, "loss": 1.5023, "step": 2911 }, { "epoch": 0.037840111883023116, "grad_norm": 0.37573447823524475, "learning_rate": 0.00019247195830463096, "loss": 1.4719, "step": 2912 }, { "epoch": 0.03785310642693899, "grad_norm": 0.3890881836414337, "learning_rate": 0.00019246935884271958, "loss": 1.3534, "step": 2913 }, { "epoch": 0.03786610097085486, "grad_norm": 0.3864831030368805, "learning_rate": 0.00019246675938080818, "loss": 1.3381, "step": 2914 }, { "epoch": 0.037879095514770734, "grad_norm": 0.3690125644207001, "learning_rate": 0.0001924641599188968, "loss": 1.4198, "step": 2915 }, { "epoch": 0.03789209005868661, "grad_norm": 0.3906749188899994, "learning_rate": 0.00019246156045698543, "loss": 1.5224, "step": 2916 }, { "epoch": 0.03790508460260248, "grad_norm": 0.49216577410697937, "learning_rate": 0.00019245896099507402, "loss": 1.4364, "step": 2917 }, { "epoch": 0.03791807914651835, "grad_norm": 0.3060561418533325, "learning_rate": 0.00019245636153316265, "loss": 1.5562, "step": 2918 }, { "epoch": 0.037931073690434226, "grad_norm": 0.37658512592315674, "learning_rate": 0.00019245376207125125, "loss": 1.4912, "step": 2919 }, { "epoch": 0.0379440682343501, "grad_norm": 0.3828648030757904, "learning_rate": 0.0001924511626093399, "loss": 1.3431, "step": 2920 }, { "epoch": 0.03795706277826597, "grad_norm": 0.45422253012657166, "learning_rate": 0.0001924485631474285, "loss": 1.5319, "step": 2921 }, { "epoch": 0.03797005732218185, "grad_norm": 0.4186297357082367, "learning_rate": 0.0001924459636855171, "loss": 1.3389, "step": 2922 }, { "epoch": 0.037983051866097725, "grad_norm": 0.4232546091079712, "learning_rate": 0.00019244336422360572, "loss": 1.3911, "step": 2923 }, { "epoch": 0.0379960464100136, "grad_norm": 0.33594444394111633, "learning_rate": 0.00019244076476169434, "loss": 1.5508, "step": 2924 }, { "epoch": 0.03800904095392947, "grad_norm": 0.37918147444725037, "learning_rate": 0.00019243816529978297, "loss": 1.4947, "step": 2925 }, { "epoch": 0.038022035497845344, "grad_norm": 0.3648119866847992, "learning_rate": 0.00019243556583787156, "loss": 1.4514, "step": 2926 }, { "epoch": 0.03803503004176122, "grad_norm": 0.3229885697364807, "learning_rate": 0.0001924329663759602, "loss": 1.3821, "step": 2927 }, { "epoch": 0.03804802458567709, "grad_norm": 0.3869847357273102, "learning_rate": 0.0001924303669140488, "loss": 1.3408, "step": 2928 }, { "epoch": 0.03806101912959296, "grad_norm": 0.3429639935493469, "learning_rate": 0.0001924277674521374, "loss": 1.4586, "step": 2929 }, { "epoch": 0.038074013673508836, "grad_norm": 0.3165348768234253, "learning_rate": 0.00019242516799022603, "loss": 1.3305, "step": 2930 }, { "epoch": 0.03808700821742471, "grad_norm": 0.42802226543426514, "learning_rate": 0.00019242256852831463, "loss": 1.4054, "step": 2931 }, { "epoch": 0.03810000276134058, "grad_norm": 0.3656299412250519, "learning_rate": 0.00019241996906640328, "loss": 1.604, "step": 2932 }, { "epoch": 0.038112997305256455, "grad_norm": 0.38018178939819336, "learning_rate": 0.00019241736960449188, "loss": 1.5177, "step": 2933 }, { "epoch": 0.03812599184917233, "grad_norm": 0.4416995048522949, "learning_rate": 0.0001924147701425805, "loss": 1.4017, "step": 2934 }, { "epoch": 0.0381389863930882, "grad_norm": 0.4500894844532013, "learning_rate": 0.0001924121706806691, "loss": 1.4532, "step": 2935 }, { "epoch": 0.038151980937004074, "grad_norm": 0.27240195870399475, "learning_rate": 0.00019240957121875773, "loss": 1.4876, "step": 2936 }, { "epoch": 0.038164975480919947, "grad_norm": 0.3285284638404846, "learning_rate": 0.00019240697175684635, "loss": 1.5121, "step": 2937 }, { "epoch": 0.03817797002483582, "grad_norm": 0.37454378604888916, "learning_rate": 0.00019240437229493495, "loss": 1.2427, "step": 2938 }, { "epoch": 0.03819096456875169, "grad_norm": 0.375453919172287, "learning_rate": 0.00019240177283302357, "loss": 1.3329, "step": 2939 }, { "epoch": 0.038203959112667565, "grad_norm": 0.34264907240867615, "learning_rate": 0.0001923991733711122, "loss": 1.4839, "step": 2940 }, { "epoch": 0.038216953656583445, "grad_norm": 0.3332982659339905, "learning_rate": 0.0001923965739092008, "loss": 1.6241, "step": 2941 }, { "epoch": 0.03822994820049932, "grad_norm": 0.4663456082344055, "learning_rate": 0.00019239397444728942, "loss": 1.4251, "step": 2942 }, { "epoch": 0.03824294274441519, "grad_norm": 0.29390889406204224, "learning_rate": 0.00019239137498537802, "loss": 1.3889, "step": 2943 }, { "epoch": 0.038255937288331064, "grad_norm": 0.3493329584598541, "learning_rate": 0.00019238877552346667, "loss": 1.2042, "step": 2944 }, { "epoch": 0.03826893183224694, "grad_norm": 0.41765937209129333, "learning_rate": 0.00019238617606155527, "loss": 1.4362, "step": 2945 }, { "epoch": 0.03828192637616281, "grad_norm": 0.4229629635810852, "learning_rate": 0.0001923835765996439, "loss": 1.5947, "step": 2946 }, { "epoch": 0.03829492092007868, "grad_norm": 0.39950308203697205, "learning_rate": 0.0001923809771377325, "loss": 1.3957, "step": 2947 }, { "epoch": 0.038307915463994556, "grad_norm": 0.3801060616970062, "learning_rate": 0.0001923783776758211, "loss": 1.0505, "step": 2948 }, { "epoch": 0.03832091000791043, "grad_norm": 0.3297862708568573, "learning_rate": 0.00019237577821390974, "loss": 1.3285, "step": 2949 }, { "epoch": 0.0383339045518263, "grad_norm": 0.42620059847831726, "learning_rate": 0.00019237317875199833, "loss": 1.5758, "step": 2950 }, { "epoch": 0.038346899095742175, "grad_norm": 0.40891823172569275, "learning_rate": 0.00019237057929008699, "loss": 1.4526, "step": 2951 }, { "epoch": 0.03835989363965805, "grad_norm": 0.35171499848365784, "learning_rate": 0.00019236797982817558, "loss": 1.4504, "step": 2952 }, { "epoch": 0.03837288818357392, "grad_norm": 0.30551573634147644, "learning_rate": 0.00019236538036626418, "loss": 1.4851, "step": 2953 }, { "epoch": 0.038385882727489794, "grad_norm": 0.3448511064052582, "learning_rate": 0.0001923627809043528, "loss": 1.3106, "step": 2954 }, { "epoch": 0.03839887727140567, "grad_norm": 0.4777507483959198, "learning_rate": 0.00019236018144244143, "loss": 1.6141, "step": 2955 }, { "epoch": 0.03841187181532154, "grad_norm": 0.3326804041862488, "learning_rate": 0.00019235758198053005, "loss": 1.2672, "step": 2956 }, { "epoch": 0.03842486635923741, "grad_norm": 0.4908417761325836, "learning_rate": 0.00019235498251861865, "loss": 1.6664, "step": 2957 }, { "epoch": 0.038437860903153286, "grad_norm": 0.36089977622032166, "learning_rate": 0.00019235238305670728, "loss": 1.3454, "step": 2958 }, { "epoch": 0.03845085544706916, "grad_norm": 0.3847794830799103, "learning_rate": 0.0001923497835947959, "loss": 1.4589, "step": 2959 }, { "epoch": 0.03846384999098504, "grad_norm": 0.24992746114730835, "learning_rate": 0.0001923471841328845, "loss": 1.3842, "step": 2960 }, { "epoch": 0.03847684453490091, "grad_norm": 0.38662829995155334, "learning_rate": 0.00019234458467097312, "loss": 1.4272, "step": 2961 }, { "epoch": 0.038489839078816784, "grad_norm": 0.46076446771621704, "learning_rate": 0.00019234198520906172, "loss": 1.4969, "step": 2962 }, { "epoch": 0.03850283362273266, "grad_norm": 0.30887776613235474, "learning_rate": 0.00019233938574715037, "loss": 1.4812, "step": 2963 }, { "epoch": 0.03851582816664853, "grad_norm": 0.37549760937690735, "learning_rate": 0.00019233678628523897, "loss": 1.4241, "step": 2964 }, { "epoch": 0.0385288227105644, "grad_norm": 0.3620559275150299, "learning_rate": 0.00019233418682332757, "loss": 1.5063, "step": 2965 }, { "epoch": 0.038541817254480276, "grad_norm": 0.35685279965400696, "learning_rate": 0.0001923315873614162, "loss": 1.458, "step": 2966 }, { "epoch": 0.03855481179839615, "grad_norm": 0.3742183744907379, "learning_rate": 0.00019232898789950481, "loss": 1.5527, "step": 2967 }, { "epoch": 0.03856780634231202, "grad_norm": 0.3448207378387451, "learning_rate": 0.00019232638843759344, "loss": 1.5387, "step": 2968 }, { "epoch": 0.038580800886227895, "grad_norm": 0.2890182435512543, "learning_rate": 0.00019232378897568204, "loss": 1.4247, "step": 2969 }, { "epoch": 0.03859379543014377, "grad_norm": 0.38998499512672424, "learning_rate": 0.00019232118951377066, "loss": 1.3513, "step": 2970 }, { "epoch": 0.03860678997405964, "grad_norm": 0.31292712688446045, "learning_rate": 0.00019231859005185929, "loss": 1.4684, "step": 2971 }, { "epoch": 0.038619784517975514, "grad_norm": 0.3543795943260193, "learning_rate": 0.00019231599058994788, "loss": 1.6187, "step": 2972 }, { "epoch": 0.03863277906189139, "grad_norm": 0.3005315661430359, "learning_rate": 0.0001923133911280365, "loss": 1.2564, "step": 2973 }, { "epoch": 0.03864577360580726, "grad_norm": 0.40173181891441345, "learning_rate": 0.0001923107916661251, "loss": 1.6105, "step": 2974 }, { "epoch": 0.03865876814972313, "grad_norm": 0.34406977891921997, "learning_rate": 0.00019230819220421376, "loss": 1.473, "step": 2975 }, { "epoch": 0.038671762693639006, "grad_norm": 0.3398469090461731, "learning_rate": 0.00019230559274230235, "loss": 1.344, "step": 2976 }, { "epoch": 0.03868475723755488, "grad_norm": 0.36122235655784607, "learning_rate": 0.00019230299328039095, "loss": 1.2835, "step": 2977 }, { "epoch": 0.03869775178147075, "grad_norm": 0.38948532938957214, "learning_rate": 0.00019230039381847958, "loss": 1.3939, "step": 2978 }, { "epoch": 0.03871074632538663, "grad_norm": 0.2958020567893982, "learning_rate": 0.0001922977943565682, "loss": 1.326, "step": 2979 }, { "epoch": 0.038723740869302505, "grad_norm": 0.336001455783844, "learning_rate": 0.00019229519489465682, "loss": 1.4251, "step": 2980 }, { "epoch": 0.03873673541321838, "grad_norm": 0.35965749621391296, "learning_rate": 0.00019229259543274542, "loss": 1.6125, "step": 2981 }, { "epoch": 0.03874972995713425, "grad_norm": 0.3999517261981964, "learning_rate": 0.00019228999597083405, "loss": 1.2803, "step": 2982 }, { "epoch": 0.03876272450105012, "grad_norm": 0.3699052035808563, "learning_rate": 0.00019228739650892267, "loss": 1.689, "step": 2983 }, { "epoch": 0.038775719044965996, "grad_norm": 0.42240333557128906, "learning_rate": 0.00019228479704701127, "loss": 1.5546, "step": 2984 }, { "epoch": 0.03878871358888187, "grad_norm": 0.3998093008995056, "learning_rate": 0.0001922821975850999, "loss": 1.5531, "step": 2985 }, { "epoch": 0.03880170813279774, "grad_norm": 0.3260725438594818, "learning_rate": 0.00019227959812318852, "loss": 1.4899, "step": 2986 }, { "epoch": 0.038814702676713615, "grad_norm": 0.3507775664329529, "learning_rate": 0.00019227699866127714, "loss": 1.3648, "step": 2987 }, { "epoch": 0.03882769722062949, "grad_norm": 0.35958436131477356, "learning_rate": 0.00019227439919936574, "loss": 1.5094, "step": 2988 }, { "epoch": 0.03884069176454536, "grad_norm": 0.28536874055862427, "learning_rate": 0.00019227179973745436, "loss": 1.4603, "step": 2989 }, { "epoch": 0.038853686308461234, "grad_norm": 0.5363749861717224, "learning_rate": 0.000192269200275543, "loss": 1.4896, "step": 2990 }, { "epoch": 0.03886668085237711, "grad_norm": 0.4216437339782715, "learning_rate": 0.00019226660081363159, "loss": 1.5247, "step": 2991 }, { "epoch": 0.03887967539629298, "grad_norm": 0.2677660584449768, "learning_rate": 0.0001922640013517202, "loss": 1.4846, "step": 2992 }, { "epoch": 0.03889266994020885, "grad_norm": 0.4548943042755127, "learning_rate": 0.0001922614018898088, "loss": 1.6278, "step": 2993 }, { "epoch": 0.038905664484124726, "grad_norm": 0.3277578353881836, "learning_rate": 0.00019225880242789743, "loss": 1.4106, "step": 2994 }, { "epoch": 0.0389186590280406, "grad_norm": 0.3157084286212921, "learning_rate": 0.00019225620296598606, "loss": 1.4095, "step": 2995 }, { "epoch": 0.03893165357195647, "grad_norm": 0.36140143871307373, "learning_rate": 0.00019225360350407465, "loss": 1.4858, "step": 2996 }, { "epoch": 0.038944648115872345, "grad_norm": 0.3800543546676636, "learning_rate": 0.00019225100404216328, "loss": 1.5974, "step": 2997 }, { "epoch": 0.038957642659788225, "grad_norm": 0.36502912640571594, "learning_rate": 0.0001922484045802519, "loss": 1.5445, "step": 2998 }, { "epoch": 0.0389706372037041, "grad_norm": 0.35446375608444214, "learning_rate": 0.00019224580511834053, "loss": 1.4025, "step": 2999 }, { "epoch": 0.03898363174761997, "grad_norm": 0.3510723412036896, "learning_rate": 0.00019224320565642912, "loss": 1.2342, "step": 3000 }, { "epoch": 0.038996626291535844, "grad_norm": 0.31108078360557556, "learning_rate": 0.00019224060619451775, "loss": 1.3674, "step": 3001 }, { "epoch": 0.03900962083545172, "grad_norm": 0.45004478096961975, "learning_rate": 0.00019223800673260637, "loss": 1.585, "step": 3002 }, { "epoch": 0.03902261537936759, "grad_norm": 0.372096449136734, "learning_rate": 0.00019223540727069497, "loss": 1.451, "step": 3003 }, { "epoch": 0.03903560992328346, "grad_norm": 0.3879518508911133, "learning_rate": 0.0001922328078087836, "loss": 1.5369, "step": 3004 }, { "epoch": 0.039048604467199335, "grad_norm": 0.39250648021698, "learning_rate": 0.0001922302083468722, "loss": 1.3992, "step": 3005 }, { "epoch": 0.03906159901111521, "grad_norm": 0.4562216103076935, "learning_rate": 0.00019222760888496082, "loss": 1.555, "step": 3006 }, { "epoch": 0.03907459355503108, "grad_norm": 0.32097911834716797, "learning_rate": 0.00019222500942304944, "loss": 1.4113, "step": 3007 }, { "epoch": 0.039087588098946954, "grad_norm": 0.3371255099773407, "learning_rate": 0.00019222240996113804, "loss": 1.4397, "step": 3008 }, { "epoch": 0.03910058264286283, "grad_norm": 0.35715731978416443, "learning_rate": 0.00019221981049922666, "loss": 1.4138, "step": 3009 }, { "epoch": 0.0391135771867787, "grad_norm": 0.37619277834892273, "learning_rate": 0.0001922172110373153, "loss": 1.2273, "step": 3010 }, { "epoch": 0.03912657173069457, "grad_norm": 0.36379989981651306, "learning_rate": 0.0001922146115754039, "loss": 1.4565, "step": 3011 }, { "epoch": 0.039139566274610446, "grad_norm": 0.3899136185646057, "learning_rate": 0.0001922120121134925, "loss": 1.5098, "step": 3012 }, { "epoch": 0.03915256081852632, "grad_norm": 0.32606860995292664, "learning_rate": 0.00019220941265158113, "loss": 1.2806, "step": 3013 }, { "epoch": 0.03916555536244219, "grad_norm": 0.5540496706962585, "learning_rate": 0.00019220681318966976, "loss": 1.5175, "step": 3014 }, { "epoch": 0.039178549906358065, "grad_norm": 0.4048491418361664, "learning_rate": 0.00019220421372775836, "loss": 1.4804, "step": 3015 }, { "epoch": 0.03919154445027394, "grad_norm": 0.36790144443511963, "learning_rate": 0.00019220161426584698, "loss": 1.5979, "step": 3016 }, { "epoch": 0.03920453899418982, "grad_norm": 0.5192331671714783, "learning_rate": 0.00019219901480393558, "loss": 1.5445, "step": 3017 }, { "epoch": 0.03921753353810569, "grad_norm": 0.3224785327911377, "learning_rate": 0.00019219641534202423, "loss": 1.2415, "step": 3018 }, { "epoch": 0.039230528082021564, "grad_norm": 0.4109537601470947, "learning_rate": 0.00019219381588011283, "loss": 1.4474, "step": 3019 }, { "epoch": 0.03924352262593744, "grad_norm": 0.4873676896095276, "learning_rate": 0.00019219121641820142, "loss": 1.5665, "step": 3020 }, { "epoch": 0.03925651716985331, "grad_norm": 0.36733099818229675, "learning_rate": 0.00019218861695629005, "loss": 1.4073, "step": 3021 }, { "epoch": 0.03926951171376918, "grad_norm": 0.4428819417953491, "learning_rate": 0.00019218601749437867, "loss": 1.4578, "step": 3022 }, { "epoch": 0.039282506257685056, "grad_norm": 0.41870808601379395, "learning_rate": 0.0001921834180324673, "loss": 1.524, "step": 3023 }, { "epoch": 0.03929550080160093, "grad_norm": 0.4455831050872803, "learning_rate": 0.0001921808185705559, "loss": 1.3535, "step": 3024 }, { "epoch": 0.0393084953455168, "grad_norm": 0.4161718189716339, "learning_rate": 0.00019217821910864452, "loss": 1.4771, "step": 3025 }, { "epoch": 0.039321489889432674, "grad_norm": 0.3113771080970764, "learning_rate": 0.00019217561964673314, "loss": 1.3269, "step": 3026 }, { "epoch": 0.03933448443334855, "grad_norm": 0.4568064212799072, "learning_rate": 0.00019217302018482174, "loss": 1.36, "step": 3027 }, { "epoch": 0.03934747897726442, "grad_norm": 0.36629778146743774, "learning_rate": 0.00019217042072291037, "loss": 1.4161, "step": 3028 }, { "epoch": 0.03936047352118029, "grad_norm": 0.4069276750087738, "learning_rate": 0.000192167821260999, "loss": 1.3755, "step": 3029 }, { "epoch": 0.039373468065096166, "grad_norm": 0.26498275995254517, "learning_rate": 0.00019216522179908761, "loss": 1.1617, "step": 3030 }, { "epoch": 0.03938646260901204, "grad_norm": 0.3571862578392029, "learning_rate": 0.0001921626223371762, "loss": 1.4352, "step": 3031 }, { "epoch": 0.03939945715292791, "grad_norm": 0.3694959580898285, "learning_rate": 0.0001921600228752648, "loss": 1.4404, "step": 3032 }, { "epoch": 0.039412451696843785, "grad_norm": 0.45220714807510376, "learning_rate": 0.00019215742341335346, "loss": 1.5745, "step": 3033 }, { "epoch": 0.03942544624075966, "grad_norm": 0.37689951062202454, "learning_rate": 0.00019215482395144206, "loss": 1.3817, "step": 3034 }, { "epoch": 0.03943844078467553, "grad_norm": 0.29421502351760864, "learning_rate": 0.00019215222448953068, "loss": 1.5907, "step": 3035 }, { "epoch": 0.03945143532859141, "grad_norm": 0.3483215272426605, "learning_rate": 0.00019214962502761928, "loss": 1.4719, "step": 3036 }, { "epoch": 0.039464429872507284, "grad_norm": 0.5257629752159119, "learning_rate": 0.0001921470255657079, "loss": 1.2928, "step": 3037 }, { "epoch": 0.03947742441642316, "grad_norm": 0.42938506603240967, "learning_rate": 0.00019214442610379653, "loss": 1.4492, "step": 3038 }, { "epoch": 0.03949041896033903, "grad_norm": 0.4321722686290741, "learning_rate": 0.00019214182664188513, "loss": 1.4239, "step": 3039 }, { "epoch": 0.0395034135042549, "grad_norm": 0.3258979022502899, "learning_rate": 0.00019213922717997375, "loss": 1.5091, "step": 3040 }, { "epoch": 0.039516408048170776, "grad_norm": 0.35038477182388306, "learning_rate": 0.00019213662771806238, "loss": 1.5664, "step": 3041 }, { "epoch": 0.03952940259208665, "grad_norm": 0.38343197107315063, "learning_rate": 0.000192134028256151, "loss": 1.5419, "step": 3042 }, { "epoch": 0.03954239713600252, "grad_norm": 0.317745566368103, "learning_rate": 0.0001921314287942396, "loss": 1.5216, "step": 3043 }, { "epoch": 0.039555391679918395, "grad_norm": 0.3610340654850006, "learning_rate": 0.0001921288293323282, "loss": 1.378, "step": 3044 }, { "epoch": 0.03956838622383427, "grad_norm": 0.5125117897987366, "learning_rate": 0.00019212622987041685, "loss": 1.5753, "step": 3045 }, { "epoch": 0.03958138076775014, "grad_norm": 0.38834917545318604, "learning_rate": 0.00019212363040850544, "loss": 1.4358, "step": 3046 }, { "epoch": 0.039594375311666014, "grad_norm": 0.39003828167915344, "learning_rate": 0.00019212103094659407, "loss": 1.505, "step": 3047 }, { "epoch": 0.039607369855581886, "grad_norm": 0.3908798396587372, "learning_rate": 0.00019211843148468267, "loss": 1.4697, "step": 3048 }, { "epoch": 0.03962036439949776, "grad_norm": 0.33871498703956604, "learning_rate": 0.0001921158320227713, "loss": 1.3073, "step": 3049 }, { "epoch": 0.03963335894341363, "grad_norm": 0.5763903260231018, "learning_rate": 0.00019211323256085991, "loss": 1.5585, "step": 3050 }, { "epoch": 0.039646353487329505, "grad_norm": 0.35756200551986694, "learning_rate": 0.0001921106330989485, "loss": 1.6285, "step": 3051 }, { "epoch": 0.03965934803124538, "grad_norm": 0.29677605628967285, "learning_rate": 0.00019210803363703714, "loss": 1.4501, "step": 3052 }, { "epoch": 0.03967234257516125, "grad_norm": 0.3514330983161926, "learning_rate": 0.00019210543417512576, "loss": 1.3728, "step": 3053 }, { "epoch": 0.039685337119077124, "grad_norm": 0.33042457699775696, "learning_rate": 0.00019210283471321439, "loss": 1.5604, "step": 3054 }, { "epoch": 0.039698331662993004, "grad_norm": 0.369961142539978, "learning_rate": 0.00019210023525130298, "loss": 1.4684, "step": 3055 }, { "epoch": 0.03971132620690888, "grad_norm": 0.42035093903541565, "learning_rate": 0.0001920976357893916, "loss": 1.6284, "step": 3056 }, { "epoch": 0.03972432075082475, "grad_norm": 0.44291654229164124, "learning_rate": 0.00019209503632748023, "loss": 1.385, "step": 3057 }, { "epoch": 0.03973731529474062, "grad_norm": 0.42376652359962463, "learning_rate": 0.00019209243686556883, "loss": 1.7623, "step": 3058 }, { "epoch": 0.039750309838656496, "grad_norm": 0.47812968492507935, "learning_rate": 0.00019208983740365745, "loss": 1.2825, "step": 3059 }, { "epoch": 0.03976330438257237, "grad_norm": 0.40783625841140747, "learning_rate": 0.00019208723794174608, "loss": 1.4694, "step": 3060 }, { "epoch": 0.03977629892648824, "grad_norm": 0.4069369435310364, "learning_rate": 0.00019208463847983468, "loss": 1.7472, "step": 3061 }, { "epoch": 0.039789293470404115, "grad_norm": 0.33194565773010254, "learning_rate": 0.0001920820390179233, "loss": 1.3744, "step": 3062 }, { "epoch": 0.03980228801431999, "grad_norm": 0.2969339191913605, "learning_rate": 0.0001920794395560119, "loss": 1.5694, "step": 3063 }, { "epoch": 0.03981528255823586, "grad_norm": 0.3042122721672058, "learning_rate": 0.00019207684009410055, "loss": 1.4554, "step": 3064 }, { "epoch": 0.039828277102151734, "grad_norm": 0.3624174892902374, "learning_rate": 0.00019207424063218915, "loss": 1.3019, "step": 3065 }, { "epoch": 0.03984127164606761, "grad_norm": 0.36166122555732727, "learning_rate": 0.00019207164117027777, "loss": 1.4189, "step": 3066 }, { "epoch": 0.03985426618998348, "grad_norm": 0.37450551986694336, "learning_rate": 0.00019206904170836637, "loss": 1.2247, "step": 3067 }, { "epoch": 0.03986726073389935, "grad_norm": 0.44436436891555786, "learning_rate": 0.000192066442246455, "loss": 1.4524, "step": 3068 }, { "epoch": 0.039880255277815226, "grad_norm": 0.23996558785438538, "learning_rate": 0.00019206384278454362, "loss": 1.3004, "step": 3069 }, { "epoch": 0.0398932498217311, "grad_norm": 0.3490460515022278, "learning_rate": 0.00019206124332263221, "loss": 1.5343, "step": 3070 }, { "epoch": 0.03990624436564697, "grad_norm": 0.3472658395767212, "learning_rate": 0.00019205864386072084, "loss": 1.5033, "step": 3071 }, { "epoch": 0.039919238909562844, "grad_norm": 0.4355182647705078, "learning_rate": 0.00019205604439880946, "loss": 1.3936, "step": 3072 }, { "epoch": 0.03993223345347872, "grad_norm": 0.41063302755355835, "learning_rate": 0.0001920534449368981, "loss": 1.4734, "step": 3073 }, { "epoch": 0.0399452279973946, "grad_norm": 0.4038110673427582, "learning_rate": 0.00019205084547498669, "loss": 1.6693, "step": 3074 }, { "epoch": 0.03995822254131047, "grad_norm": 0.41437745094299316, "learning_rate": 0.00019204824601307528, "loss": 1.4717, "step": 3075 }, { "epoch": 0.03997121708522634, "grad_norm": 0.33500972390174866, "learning_rate": 0.00019204564655116393, "loss": 1.3303, "step": 3076 }, { "epoch": 0.039984211629142216, "grad_norm": 0.3980722725391388, "learning_rate": 0.00019204304708925253, "loss": 1.4424, "step": 3077 }, { "epoch": 0.03999720617305809, "grad_norm": 0.39202064275741577, "learning_rate": 0.00019204044762734116, "loss": 1.4207, "step": 3078 }, { "epoch": 0.04001020071697396, "grad_norm": 0.448565274477005, "learning_rate": 0.00019203784816542975, "loss": 1.3661, "step": 3079 }, { "epoch": 0.040023195260889835, "grad_norm": 0.29417359828948975, "learning_rate": 0.00019203524870351838, "loss": 1.2719, "step": 3080 }, { "epoch": 0.04003618980480571, "grad_norm": 0.37672901153564453, "learning_rate": 0.000192032649241607, "loss": 1.4153, "step": 3081 }, { "epoch": 0.04004918434872158, "grad_norm": 0.4291362464427948, "learning_rate": 0.0001920300497796956, "loss": 1.509, "step": 3082 }, { "epoch": 0.040062178892637454, "grad_norm": 0.4519380033016205, "learning_rate": 0.00019202745031778422, "loss": 1.5726, "step": 3083 }, { "epoch": 0.04007517343655333, "grad_norm": 0.33586302399635315, "learning_rate": 0.00019202485085587285, "loss": 1.5352, "step": 3084 }, { "epoch": 0.0400881679804692, "grad_norm": 0.379517138004303, "learning_rate": 0.00019202225139396147, "loss": 1.5744, "step": 3085 }, { "epoch": 0.04010116252438507, "grad_norm": 0.47037217020988464, "learning_rate": 0.00019201965193205007, "loss": 1.6572, "step": 3086 }, { "epoch": 0.040114157068300946, "grad_norm": 0.3559758961200714, "learning_rate": 0.00019201705247013867, "loss": 1.3829, "step": 3087 }, { "epoch": 0.04012715161221682, "grad_norm": 0.45659565925598145, "learning_rate": 0.00019201445300822732, "loss": 1.6783, "step": 3088 }, { "epoch": 0.04014014615613269, "grad_norm": 0.29577526450157166, "learning_rate": 0.00019201185354631592, "loss": 1.3802, "step": 3089 }, { "epoch": 0.040153140700048565, "grad_norm": 0.4399329125881195, "learning_rate": 0.00019200925408440454, "loss": 1.7204, "step": 3090 }, { "epoch": 0.04016613524396444, "grad_norm": 0.5615129470825195, "learning_rate": 0.00019200665462249314, "loss": 1.4306, "step": 3091 }, { "epoch": 0.04017912978788031, "grad_norm": 0.39894187450408936, "learning_rate": 0.00019200405516058176, "loss": 1.6013, "step": 3092 }, { "epoch": 0.04019212433179619, "grad_norm": 0.49865931272506714, "learning_rate": 0.0001920014556986704, "loss": 1.5064, "step": 3093 }, { "epoch": 0.04020511887571206, "grad_norm": 0.3973310887813568, "learning_rate": 0.00019199885623675899, "loss": 1.5184, "step": 3094 }, { "epoch": 0.040218113419627936, "grad_norm": 0.3640269339084625, "learning_rate": 0.0001919962567748476, "loss": 1.3856, "step": 3095 }, { "epoch": 0.04023110796354381, "grad_norm": 0.3153294324874878, "learning_rate": 0.00019199365731293623, "loss": 1.3646, "step": 3096 }, { "epoch": 0.04024410250745968, "grad_norm": 0.42748406529426575, "learning_rate": 0.00019199105785102486, "loss": 1.3435, "step": 3097 }, { "epoch": 0.040257097051375555, "grad_norm": 0.33205223083496094, "learning_rate": 0.00019198845838911346, "loss": 1.4495, "step": 3098 }, { "epoch": 0.04027009159529143, "grad_norm": 0.3578440248966217, "learning_rate": 0.00019198585892720208, "loss": 1.2762, "step": 3099 }, { "epoch": 0.0402830861392073, "grad_norm": 0.33170372247695923, "learning_rate": 0.0001919832594652907, "loss": 1.3112, "step": 3100 }, { "epoch": 0.040296080683123174, "grad_norm": 0.3646683692932129, "learning_rate": 0.0001919806600033793, "loss": 1.6309, "step": 3101 }, { "epoch": 0.04030907522703905, "grad_norm": 0.4076707363128662, "learning_rate": 0.00019197806054146793, "loss": 1.5324, "step": 3102 }, { "epoch": 0.04032206977095492, "grad_norm": 0.38818368315696716, "learning_rate": 0.00019197546107955655, "loss": 1.4028, "step": 3103 }, { "epoch": 0.04033506431487079, "grad_norm": 0.40092992782592773, "learning_rate": 0.00019197286161764515, "loss": 1.2861, "step": 3104 }, { "epoch": 0.040348058858786666, "grad_norm": 0.3484092950820923, "learning_rate": 0.00019197026215573377, "loss": 1.5126, "step": 3105 }, { "epoch": 0.04036105340270254, "grad_norm": 0.44686079025268555, "learning_rate": 0.00019196766269382237, "loss": 1.4899, "step": 3106 }, { "epoch": 0.04037404794661841, "grad_norm": 1.2319358587265015, "learning_rate": 0.00019196506323191102, "loss": 1.4878, "step": 3107 }, { "epoch": 0.040387042490534285, "grad_norm": 0.401244580745697, "learning_rate": 0.00019196246376999962, "loss": 1.3403, "step": 3108 }, { "epoch": 0.04040003703445016, "grad_norm": 0.3605179488658905, "learning_rate": 0.00019195986430808824, "loss": 1.6123, "step": 3109 }, { "epoch": 0.04041303157836603, "grad_norm": 0.449707567691803, "learning_rate": 0.00019195726484617684, "loss": 1.4513, "step": 3110 }, { "epoch": 0.040426026122281904, "grad_norm": 0.437468022108078, "learning_rate": 0.00019195466538426547, "loss": 1.4019, "step": 3111 }, { "epoch": 0.040439020666197784, "grad_norm": 0.38820868730545044, "learning_rate": 0.0001919520659223541, "loss": 1.3386, "step": 3112 }, { "epoch": 0.040452015210113657, "grad_norm": 0.3916557729244232, "learning_rate": 0.0001919494664604427, "loss": 1.5633, "step": 3113 }, { "epoch": 0.04046500975402953, "grad_norm": 0.2851991653442383, "learning_rate": 0.0001919468669985313, "loss": 1.3573, "step": 3114 }, { "epoch": 0.0404780042979454, "grad_norm": 0.41225916147232056, "learning_rate": 0.00019194426753661994, "loss": 1.5041, "step": 3115 }, { "epoch": 0.040490998841861275, "grad_norm": 0.43771931529045105, "learning_rate": 0.00019194166807470853, "loss": 1.5264, "step": 3116 }, { "epoch": 0.04050399338577715, "grad_norm": 0.4076649844646454, "learning_rate": 0.00019193906861279716, "loss": 1.5627, "step": 3117 }, { "epoch": 0.04051698792969302, "grad_norm": 0.4111360013484955, "learning_rate": 0.00019193646915088576, "loss": 1.497, "step": 3118 }, { "epoch": 0.040529982473608894, "grad_norm": 0.3873785138130188, "learning_rate": 0.0001919338696889744, "loss": 1.4727, "step": 3119 }, { "epoch": 0.04054297701752477, "grad_norm": 0.4220013916492462, "learning_rate": 0.000191931270227063, "loss": 1.4145, "step": 3120 }, { "epoch": 0.04055597156144064, "grad_norm": 0.4144245684146881, "learning_rate": 0.00019192867076515163, "loss": 1.4805, "step": 3121 }, { "epoch": 0.04056896610535651, "grad_norm": 0.4178270399570465, "learning_rate": 0.00019192607130324023, "loss": 1.3928, "step": 3122 }, { "epoch": 0.040581960649272386, "grad_norm": 0.4145921468734741, "learning_rate": 0.00019192347184132885, "loss": 1.4666, "step": 3123 }, { "epoch": 0.04059495519318826, "grad_norm": 0.40947577357292175, "learning_rate": 0.00019192087237941748, "loss": 1.4129, "step": 3124 }, { "epoch": 0.04060794973710413, "grad_norm": 0.4217536747455597, "learning_rate": 0.00019191827291750607, "loss": 1.4506, "step": 3125 }, { "epoch": 0.040620944281020005, "grad_norm": 0.43890392780303955, "learning_rate": 0.0001919156734555947, "loss": 1.4512, "step": 3126 }, { "epoch": 0.04063393882493588, "grad_norm": 0.3964190185070038, "learning_rate": 0.00019191307399368332, "loss": 1.3763, "step": 3127 }, { "epoch": 0.04064693336885175, "grad_norm": 0.4450424015522003, "learning_rate": 0.00019191047453177192, "loss": 1.5343, "step": 3128 }, { "epoch": 0.040659927912767624, "grad_norm": 0.3186704218387604, "learning_rate": 0.00019190787506986054, "loss": 1.3483, "step": 3129 }, { "epoch": 0.0406729224566835, "grad_norm": 0.4094642996788025, "learning_rate": 0.00019190527560794914, "loss": 1.5211, "step": 3130 }, { "epoch": 0.04068591700059938, "grad_norm": 0.44076094031333923, "learning_rate": 0.0001919026761460378, "loss": 1.4394, "step": 3131 }, { "epoch": 0.04069891154451525, "grad_norm": 0.5064018368721008, "learning_rate": 0.0001919000766841264, "loss": 1.5669, "step": 3132 }, { "epoch": 0.04071190608843112, "grad_norm": 0.39235207438468933, "learning_rate": 0.00019189747722221502, "loss": 1.4464, "step": 3133 }, { "epoch": 0.040724900632346996, "grad_norm": 0.3050629794597626, "learning_rate": 0.00019189487776030364, "loss": 1.1383, "step": 3134 }, { "epoch": 0.04073789517626287, "grad_norm": 0.4484102129936218, "learning_rate": 0.00019189227829839224, "loss": 1.3698, "step": 3135 }, { "epoch": 0.04075088972017874, "grad_norm": 0.46817225217819214, "learning_rate": 0.00019188967883648086, "loss": 1.4105, "step": 3136 }, { "epoch": 0.040763884264094614, "grad_norm": 0.3172460198402405, "learning_rate": 0.00019188707937456946, "loss": 1.4185, "step": 3137 }, { "epoch": 0.04077687880801049, "grad_norm": 0.3920261263847351, "learning_rate": 0.0001918844799126581, "loss": 1.2577, "step": 3138 }, { "epoch": 0.04078987335192636, "grad_norm": 0.37091803550720215, "learning_rate": 0.0001918818804507467, "loss": 1.533, "step": 3139 }, { "epoch": 0.04080286789584223, "grad_norm": 0.38436436653137207, "learning_rate": 0.00019187928098883533, "loss": 1.3562, "step": 3140 }, { "epoch": 0.040815862439758106, "grad_norm": 0.34745246171951294, "learning_rate": 0.00019187668152692393, "loss": 1.4727, "step": 3141 }, { "epoch": 0.04082885698367398, "grad_norm": 0.30579110980033875, "learning_rate": 0.00019187408206501255, "loss": 1.185, "step": 3142 }, { "epoch": 0.04084185152758985, "grad_norm": 0.3694552481174469, "learning_rate": 0.00019187148260310118, "loss": 1.5686, "step": 3143 }, { "epoch": 0.040854846071505725, "grad_norm": 0.3438752591609955, "learning_rate": 0.00019186888314118978, "loss": 1.4681, "step": 3144 }, { "epoch": 0.0408678406154216, "grad_norm": 0.4971218705177307, "learning_rate": 0.0001918662836792784, "loss": 1.4305, "step": 3145 }, { "epoch": 0.04088083515933747, "grad_norm": 0.31639137864112854, "learning_rate": 0.00019186368421736703, "loss": 1.3869, "step": 3146 }, { "epoch": 0.040893829703253344, "grad_norm": 0.42337992787361145, "learning_rate": 0.00019186108475545562, "loss": 1.4166, "step": 3147 }, { "epoch": 0.04090682424716922, "grad_norm": 0.43055403232574463, "learning_rate": 0.00019185848529354425, "loss": 1.4959, "step": 3148 }, { "epoch": 0.04091981879108509, "grad_norm": 0.43309667706489563, "learning_rate": 0.00019185588583163284, "loss": 1.7316, "step": 3149 }, { "epoch": 0.04093281333500097, "grad_norm": 0.46645283699035645, "learning_rate": 0.0001918532863697215, "loss": 1.4909, "step": 3150 }, { "epoch": 0.04094580787891684, "grad_norm": 0.4567408263683319, "learning_rate": 0.0001918506869078101, "loss": 1.5053, "step": 3151 }, { "epoch": 0.040958802422832716, "grad_norm": 0.4238909184932709, "learning_rate": 0.00019184808744589872, "loss": 1.594, "step": 3152 }, { "epoch": 0.04097179696674859, "grad_norm": 0.3949819505214691, "learning_rate": 0.00019184548798398732, "loss": 1.5029, "step": 3153 }, { "epoch": 0.04098479151066446, "grad_norm": 0.4502496123313904, "learning_rate": 0.00019184288852207594, "loss": 1.5892, "step": 3154 }, { "epoch": 0.040997786054580335, "grad_norm": 0.3974790573120117, "learning_rate": 0.00019184028906016456, "loss": 1.6014, "step": 3155 }, { "epoch": 0.04101078059849621, "grad_norm": 0.4246217608451843, "learning_rate": 0.00019183768959825316, "loss": 1.3469, "step": 3156 }, { "epoch": 0.04102377514241208, "grad_norm": 0.3463131785392761, "learning_rate": 0.00019183509013634179, "loss": 1.5015, "step": 3157 }, { "epoch": 0.041036769686327954, "grad_norm": 0.5264678597450256, "learning_rate": 0.0001918324906744304, "loss": 1.6415, "step": 3158 }, { "epoch": 0.041049764230243826, "grad_norm": 0.4146251380443573, "learning_rate": 0.000191829891212519, "loss": 1.4009, "step": 3159 }, { "epoch": 0.0410627587741597, "grad_norm": 0.3029043972492218, "learning_rate": 0.00019182729175060763, "loss": 1.4445, "step": 3160 }, { "epoch": 0.04107575331807557, "grad_norm": 0.4535422623157501, "learning_rate": 0.00019182469228869623, "loss": 1.5333, "step": 3161 }, { "epoch": 0.041088747861991445, "grad_norm": 0.37516871094703674, "learning_rate": 0.00019182209282678488, "loss": 1.6223, "step": 3162 }, { "epoch": 0.04110174240590732, "grad_norm": 0.5137490630149841, "learning_rate": 0.00019181949336487348, "loss": 1.5471, "step": 3163 }, { "epoch": 0.04111473694982319, "grad_norm": 0.4472859799861908, "learning_rate": 0.0001918168939029621, "loss": 1.4141, "step": 3164 }, { "epoch": 0.041127731493739064, "grad_norm": 0.40392884612083435, "learning_rate": 0.0001918142944410507, "loss": 1.4995, "step": 3165 }, { "epoch": 0.04114072603765494, "grad_norm": 0.390100359916687, "learning_rate": 0.00019181169497913932, "loss": 1.4495, "step": 3166 }, { "epoch": 0.04115372058157081, "grad_norm": 0.3772204518318176, "learning_rate": 0.00019180909551722795, "loss": 1.4012, "step": 3167 }, { "epoch": 0.04116671512548668, "grad_norm": 0.3527357876300812, "learning_rate": 0.00019180649605531655, "loss": 1.5031, "step": 3168 }, { "epoch": 0.04117970966940256, "grad_norm": 0.43985220789909363, "learning_rate": 0.00019180389659340517, "loss": 1.7382, "step": 3169 }, { "epoch": 0.041192704213318436, "grad_norm": 0.30340802669525146, "learning_rate": 0.0001918012971314938, "loss": 1.4302, "step": 3170 }, { "epoch": 0.04120569875723431, "grad_norm": 0.43774303793907166, "learning_rate": 0.0001917986976695824, "loss": 1.4333, "step": 3171 }, { "epoch": 0.04121869330115018, "grad_norm": 0.3428357243537903, "learning_rate": 0.00019179609820767102, "loss": 1.4309, "step": 3172 }, { "epoch": 0.041231687845066055, "grad_norm": 0.3634168207645416, "learning_rate": 0.00019179349874575964, "loss": 1.4616, "step": 3173 }, { "epoch": 0.04124468238898193, "grad_norm": 0.3627302646636963, "learning_rate": 0.00019179089928384827, "loss": 1.6259, "step": 3174 }, { "epoch": 0.0412576769328978, "grad_norm": 0.2947049140930176, "learning_rate": 0.00019178829982193686, "loss": 1.4478, "step": 3175 }, { "epoch": 0.041270671476813674, "grad_norm": 0.22655540704727173, "learning_rate": 0.0001917857003600255, "loss": 1.5733, "step": 3176 }, { "epoch": 0.04128366602072955, "grad_norm": 0.44975316524505615, "learning_rate": 0.0001917831008981141, "loss": 1.5573, "step": 3177 }, { "epoch": 0.04129666056464542, "grad_norm": 0.37253931164741516, "learning_rate": 0.0001917805014362027, "loss": 1.3003, "step": 3178 }, { "epoch": 0.04130965510856129, "grad_norm": 0.42107266187667847, "learning_rate": 0.00019177790197429133, "loss": 1.4648, "step": 3179 }, { "epoch": 0.041322649652477166, "grad_norm": 0.395936518907547, "learning_rate": 0.00019177530251237993, "loss": 1.6666, "step": 3180 }, { "epoch": 0.04133564419639304, "grad_norm": 0.3395724892616272, "learning_rate": 0.00019177270305046858, "loss": 1.4189, "step": 3181 }, { "epoch": 0.04134863874030891, "grad_norm": 0.34199321269989014, "learning_rate": 0.00019177010358855718, "loss": 1.3027, "step": 3182 }, { "epoch": 0.041361633284224784, "grad_norm": 0.40141457319259644, "learning_rate": 0.00019176750412664578, "loss": 1.4258, "step": 3183 }, { "epoch": 0.04137462782814066, "grad_norm": 0.2920992076396942, "learning_rate": 0.0001917649046647344, "loss": 1.3822, "step": 3184 }, { "epoch": 0.04138762237205653, "grad_norm": 0.41035526990890503, "learning_rate": 0.00019176230520282303, "loss": 1.3713, "step": 3185 }, { "epoch": 0.0414006169159724, "grad_norm": 0.5116333365440369, "learning_rate": 0.00019175970574091165, "loss": 1.5178, "step": 3186 }, { "epoch": 0.041413611459888276, "grad_norm": 0.34667888283729553, "learning_rate": 0.00019175710627900025, "loss": 1.2822, "step": 3187 }, { "epoch": 0.041426606003804156, "grad_norm": 0.35072648525238037, "learning_rate": 0.00019175450681708887, "loss": 1.2398, "step": 3188 }, { "epoch": 0.04143960054772003, "grad_norm": 0.37003394961357117, "learning_rate": 0.0001917519073551775, "loss": 1.3389, "step": 3189 }, { "epoch": 0.0414525950916359, "grad_norm": 0.37191593647003174, "learning_rate": 0.0001917493078932661, "loss": 1.4589, "step": 3190 }, { "epoch": 0.041465589635551775, "grad_norm": 0.4373728930950165, "learning_rate": 0.00019174670843135472, "loss": 1.5607, "step": 3191 }, { "epoch": 0.04147858417946765, "grad_norm": 0.37511971592903137, "learning_rate": 0.00019174410896944332, "loss": 1.4171, "step": 3192 }, { "epoch": 0.04149157872338352, "grad_norm": 0.2950449287891388, "learning_rate": 0.00019174150950753197, "loss": 1.4469, "step": 3193 }, { "epoch": 0.041504573267299394, "grad_norm": 0.3146478831768036, "learning_rate": 0.00019173891004562057, "loss": 1.4147, "step": 3194 }, { "epoch": 0.04151756781121527, "grad_norm": 0.32311949133872986, "learning_rate": 0.0001917363105837092, "loss": 1.4398, "step": 3195 }, { "epoch": 0.04153056235513114, "grad_norm": 0.41635018587112427, "learning_rate": 0.0001917337111217978, "loss": 1.4912, "step": 3196 }, { "epoch": 0.04154355689904701, "grad_norm": 0.25399893522262573, "learning_rate": 0.0001917311116598864, "loss": 1.1599, "step": 3197 }, { "epoch": 0.041556551442962886, "grad_norm": 0.4764833152294159, "learning_rate": 0.00019172851219797504, "loss": 1.5619, "step": 3198 }, { "epoch": 0.04156954598687876, "grad_norm": 0.37993738055229187, "learning_rate": 0.00019172591273606363, "loss": 1.5158, "step": 3199 }, { "epoch": 0.04158254053079463, "grad_norm": 0.37712740898132324, "learning_rate": 0.00019172331327415226, "loss": 1.2766, "step": 3200 }, { "epoch": 0.041595535074710505, "grad_norm": 0.2939991056919098, "learning_rate": 0.00019172071381224088, "loss": 1.3083, "step": 3201 }, { "epoch": 0.04160852961862638, "grad_norm": 0.41009974479675293, "learning_rate": 0.00019171811435032948, "loss": 1.4472, "step": 3202 }, { "epoch": 0.04162152416254225, "grad_norm": 0.30324918031692505, "learning_rate": 0.0001917155148884181, "loss": 1.2577, "step": 3203 }, { "epoch": 0.041634518706458123, "grad_norm": 0.346285879611969, "learning_rate": 0.0001917129154265067, "loss": 1.5461, "step": 3204 }, { "epoch": 0.041647513250373996, "grad_norm": 0.3356819748878479, "learning_rate": 0.00019171031596459535, "loss": 1.5499, "step": 3205 }, { "epoch": 0.04166050779428987, "grad_norm": 0.39320090413093567, "learning_rate": 0.00019170771650268395, "loss": 1.5392, "step": 3206 }, { "epoch": 0.04167350233820575, "grad_norm": 0.42837026715278625, "learning_rate": 0.00019170511704077258, "loss": 1.635, "step": 3207 }, { "epoch": 0.04168649688212162, "grad_norm": 0.4710414409637451, "learning_rate": 0.0001917025175788612, "loss": 1.3536, "step": 3208 }, { "epoch": 0.041699491426037495, "grad_norm": 0.37710267305374146, "learning_rate": 0.0001916999181169498, "loss": 1.5275, "step": 3209 }, { "epoch": 0.04171248596995337, "grad_norm": 0.5383466482162476, "learning_rate": 0.00019169731865503842, "loss": 1.3972, "step": 3210 }, { "epoch": 0.04172548051386924, "grad_norm": 0.4248463213443756, "learning_rate": 0.00019169471919312702, "loss": 1.5377, "step": 3211 }, { "epoch": 0.041738475057785114, "grad_norm": 0.3452415466308594, "learning_rate": 0.00019169211973121564, "loss": 1.4607, "step": 3212 }, { "epoch": 0.04175146960170099, "grad_norm": 0.3828495442867279, "learning_rate": 0.00019168952026930427, "loss": 1.4264, "step": 3213 }, { "epoch": 0.04176446414561686, "grad_norm": 0.40293848514556885, "learning_rate": 0.00019168692080739287, "loss": 1.5344, "step": 3214 }, { "epoch": 0.04177745868953273, "grad_norm": 0.3619150221347809, "learning_rate": 0.0001916843213454815, "loss": 1.4832, "step": 3215 }, { "epoch": 0.041790453233448606, "grad_norm": 0.40804359316825867, "learning_rate": 0.00019168172188357012, "loss": 1.5644, "step": 3216 }, { "epoch": 0.04180344777736448, "grad_norm": 0.4149736166000366, "learning_rate": 0.00019167912242165874, "loss": 1.437, "step": 3217 }, { "epoch": 0.04181644232128035, "grad_norm": 0.31111371517181396, "learning_rate": 0.00019167652295974734, "loss": 1.4111, "step": 3218 }, { "epoch": 0.041829436865196225, "grad_norm": 0.3087538182735443, "learning_rate": 0.00019167392349783596, "loss": 1.3734, "step": 3219 }, { "epoch": 0.0418424314091121, "grad_norm": 0.4850727617740631, "learning_rate": 0.00019167132403592459, "loss": 1.4706, "step": 3220 }, { "epoch": 0.04185542595302797, "grad_norm": 0.38028115034103394, "learning_rate": 0.00019166872457401318, "loss": 1.3597, "step": 3221 }, { "epoch": 0.041868420496943844, "grad_norm": 0.3625474274158478, "learning_rate": 0.0001916661251121018, "loss": 1.4015, "step": 3222 }, { "epoch": 0.04188141504085972, "grad_norm": 0.4645829498767853, "learning_rate": 0.0001916635256501904, "loss": 1.5933, "step": 3223 }, { "epoch": 0.04189440958477559, "grad_norm": 0.42849549651145935, "learning_rate": 0.00019166092618827906, "loss": 1.5038, "step": 3224 }, { "epoch": 0.04190740412869146, "grad_norm": 0.38927534222602844, "learning_rate": 0.00019165832672636765, "loss": 1.4694, "step": 3225 }, { "epoch": 0.04192039867260734, "grad_norm": 0.3418194353580475, "learning_rate": 0.00019165572726445625, "loss": 1.3263, "step": 3226 }, { "epoch": 0.041933393216523215, "grad_norm": 0.28867998719215393, "learning_rate": 0.00019165312780254488, "loss": 1.2728, "step": 3227 }, { "epoch": 0.04194638776043909, "grad_norm": 0.7567052841186523, "learning_rate": 0.0001916505283406335, "loss": 1.535, "step": 3228 }, { "epoch": 0.04195938230435496, "grad_norm": 0.3170691728591919, "learning_rate": 0.00019164792887872213, "loss": 1.4031, "step": 3229 }, { "epoch": 0.041972376848270834, "grad_norm": 0.4613756835460663, "learning_rate": 0.00019164532941681072, "loss": 1.4951, "step": 3230 }, { "epoch": 0.04198537139218671, "grad_norm": 0.42037251591682434, "learning_rate": 0.00019164272995489935, "loss": 1.4238, "step": 3231 }, { "epoch": 0.04199836593610258, "grad_norm": 0.39461565017700195, "learning_rate": 0.00019164013049298797, "loss": 1.5215, "step": 3232 }, { "epoch": 0.04201136048001845, "grad_norm": 0.3436048924922943, "learning_rate": 0.00019163753103107657, "loss": 1.555, "step": 3233 }, { "epoch": 0.042024355023934326, "grad_norm": 0.42129844427108765, "learning_rate": 0.0001916349315691652, "loss": 1.5112, "step": 3234 }, { "epoch": 0.0420373495678502, "grad_norm": 0.39394626021385193, "learning_rate": 0.0001916323321072538, "loss": 1.254, "step": 3235 }, { "epoch": 0.04205034411176607, "grad_norm": 0.4991368055343628, "learning_rate": 0.00019162973264534244, "loss": 1.509, "step": 3236 }, { "epoch": 0.042063338655681945, "grad_norm": 0.4191257357597351, "learning_rate": 0.00019162713318343104, "loss": 1.4626, "step": 3237 }, { "epoch": 0.04207633319959782, "grad_norm": 0.3565533459186554, "learning_rate": 0.00019162453372151964, "loss": 1.3298, "step": 3238 }, { "epoch": 0.04208932774351369, "grad_norm": 0.3717363178730011, "learning_rate": 0.00019162193425960826, "loss": 1.4278, "step": 3239 }, { "epoch": 0.042102322287429564, "grad_norm": 0.5267666578292847, "learning_rate": 0.00019161933479769689, "loss": 1.4161, "step": 3240 }, { "epoch": 0.04211531683134544, "grad_norm": 0.3773806691169739, "learning_rate": 0.0001916167353357855, "loss": 1.388, "step": 3241 }, { "epoch": 0.04212831137526131, "grad_norm": 0.4267769455909729, "learning_rate": 0.0001916141358738741, "loss": 1.3399, "step": 3242 }, { "epoch": 0.04214130591917718, "grad_norm": 0.3078964948654175, "learning_rate": 0.00019161153641196273, "loss": 1.3957, "step": 3243 }, { "epoch": 0.042154300463093056, "grad_norm": 0.44167131185531616, "learning_rate": 0.00019160893695005136, "loss": 1.6181, "step": 3244 }, { "epoch": 0.042167295007008936, "grad_norm": 0.41440489888191223, "learning_rate": 0.00019160633748813995, "loss": 1.3941, "step": 3245 }, { "epoch": 0.04218028955092481, "grad_norm": 0.3722870349884033, "learning_rate": 0.00019160373802622858, "loss": 1.3619, "step": 3246 }, { "epoch": 0.04219328409484068, "grad_norm": 0.4042980372905731, "learning_rate": 0.0001916011385643172, "loss": 1.4128, "step": 3247 }, { "epoch": 0.042206278638756554, "grad_norm": 0.415513813495636, "learning_rate": 0.00019159853910240583, "loss": 1.6317, "step": 3248 }, { "epoch": 0.04221927318267243, "grad_norm": 0.4566916823387146, "learning_rate": 0.00019159593964049443, "loss": 1.4313, "step": 3249 }, { "epoch": 0.0422322677265883, "grad_norm": 0.4161822497844696, "learning_rate": 0.00019159334017858302, "loss": 1.3289, "step": 3250 }, { "epoch": 0.04224526227050417, "grad_norm": 0.319856196641922, "learning_rate": 0.00019159074071667167, "loss": 1.4485, "step": 3251 }, { "epoch": 0.042258256814420046, "grad_norm": 0.37036123871803284, "learning_rate": 0.00019158814125476027, "loss": 1.4701, "step": 3252 }, { "epoch": 0.04227125135833592, "grad_norm": 0.31518563628196716, "learning_rate": 0.0001915855417928489, "loss": 1.4222, "step": 3253 }, { "epoch": 0.04228424590225179, "grad_norm": 0.38721492886543274, "learning_rate": 0.0001915829423309375, "loss": 1.5214, "step": 3254 }, { "epoch": 0.042297240446167665, "grad_norm": 0.3673882782459259, "learning_rate": 0.00019158034286902612, "loss": 1.5217, "step": 3255 }, { "epoch": 0.04231023499008354, "grad_norm": 0.3944181203842163, "learning_rate": 0.00019157774340711474, "loss": 1.3793, "step": 3256 }, { "epoch": 0.04232322953399941, "grad_norm": 0.38716012239456177, "learning_rate": 0.00019157514394520334, "loss": 1.6818, "step": 3257 }, { "epoch": 0.042336224077915284, "grad_norm": 0.4616550803184509, "learning_rate": 0.00019157254448329196, "loss": 1.2827, "step": 3258 }, { "epoch": 0.04234921862183116, "grad_norm": 0.3793615698814392, "learning_rate": 0.0001915699450213806, "loss": 1.4161, "step": 3259 }, { "epoch": 0.04236221316574703, "grad_norm": 0.24249446392059326, "learning_rate": 0.0001915673455594692, "loss": 1.1609, "step": 3260 }, { "epoch": 0.0423752077096629, "grad_norm": 0.3935641050338745, "learning_rate": 0.0001915647460975578, "loss": 1.4688, "step": 3261 }, { "epoch": 0.042388202253578776, "grad_norm": 0.4153493046760559, "learning_rate": 0.00019156214663564644, "loss": 1.4151, "step": 3262 }, { "epoch": 0.04240119679749465, "grad_norm": 0.4026470482349396, "learning_rate": 0.00019155954717373506, "loss": 1.5416, "step": 3263 }, { "epoch": 0.04241419134141053, "grad_norm": 0.3599541485309601, "learning_rate": 0.00019155694771182366, "loss": 1.4973, "step": 3264 }, { "epoch": 0.0424271858853264, "grad_norm": 0.3831346929073334, "learning_rate": 0.00019155434824991228, "loss": 1.284, "step": 3265 }, { "epoch": 0.042440180429242275, "grad_norm": 0.3427187204360962, "learning_rate": 0.00019155174878800088, "loss": 1.5802, "step": 3266 }, { "epoch": 0.04245317497315815, "grad_norm": 0.4238213002681732, "learning_rate": 0.0001915491493260895, "loss": 1.4432, "step": 3267 }, { "epoch": 0.04246616951707402, "grad_norm": 0.3959029018878937, "learning_rate": 0.00019154654986417813, "loss": 1.492, "step": 3268 }, { "epoch": 0.042479164060989894, "grad_norm": 0.3928011655807495, "learning_rate": 0.00019154395040226673, "loss": 1.6269, "step": 3269 }, { "epoch": 0.042492158604905766, "grad_norm": 0.4751276969909668, "learning_rate": 0.00019154135094035535, "loss": 1.451, "step": 3270 }, { "epoch": 0.04250515314882164, "grad_norm": 0.3577234148979187, "learning_rate": 0.00019153875147844397, "loss": 1.383, "step": 3271 }, { "epoch": 0.04251814769273751, "grad_norm": 0.3346809446811676, "learning_rate": 0.0001915361520165326, "loss": 1.442, "step": 3272 }, { "epoch": 0.042531142236653385, "grad_norm": 0.39318138360977173, "learning_rate": 0.0001915335525546212, "loss": 1.2091, "step": 3273 }, { "epoch": 0.04254413678056926, "grad_norm": 0.40168461203575134, "learning_rate": 0.00019153095309270982, "loss": 1.2523, "step": 3274 }, { "epoch": 0.04255713132448513, "grad_norm": 0.3460966944694519, "learning_rate": 0.00019152835363079845, "loss": 1.2636, "step": 3275 }, { "epoch": 0.042570125868401004, "grad_norm": 0.48361414670944214, "learning_rate": 0.00019152575416888704, "loss": 1.4194, "step": 3276 }, { "epoch": 0.04258312041231688, "grad_norm": 0.31816691160202026, "learning_rate": 0.00019152315470697567, "loss": 1.1946, "step": 3277 }, { "epoch": 0.04259611495623275, "grad_norm": 0.42274630069732666, "learning_rate": 0.00019152055524506426, "loss": 1.5731, "step": 3278 }, { "epoch": 0.04260910950014862, "grad_norm": 0.35717618465423584, "learning_rate": 0.00019151795578315292, "loss": 1.313, "step": 3279 }, { "epoch": 0.042622104044064496, "grad_norm": 0.3694474399089813, "learning_rate": 0.0001915153563212415, "loss": 1.2986, "step": 3280 }, { "epoch": 0.04263509858798037, "grad_norm": 0.396898090839386, "learning_rate": 0.0001915127568593301, "loss": 1.4396, "step": 3281 }, { "epoch": 0.04264809313189624, "grad_norm": 0.4371946454048157, "learning_rate": 0.00019151015739741874, "loss": 1.5678, "step": 3282 }, { "epoch": 0.04266108767581212, "grad_norm": 0.4931833744049072, "learning_rate": 0.00019150755793550736, "loss": 1.4774, "step": 3283 }, { "epoch": 0.042674082219727995, "grad_norm": 0.37313833832740784, "learning_rate": 0.00019150495847359598, "loss": 1.3354, "step": 3284 }, { "epoch": 0.04268707676364387, "grad_norm": 0.3993110656738281, "learning_rate": 0.00019150235901168458, "loss": 1.4563, "step": 3285 }, { "epoch": 0.04270007130755974, "grad_norm": 0.47115734219551086, "learning_rate": 0.0001914997595497732, "loss": 1.5814, "step": 3286 }, { "epoch": 0.042713065851475614, "grad_norm": 0.39446935057640076, "learning_rate": 0.00019149716008786183, "loss": 1.48, "step": 3287 }, { "epoch": 0.04272606039539149, "grad_norm": 0.38951924443244934, "learning_rate": 0.00019149456062595043, "loss": 1.519, "step": 3288 }, { "epoch": 0.04273905493930736, "grad_norm": 0.4462415874004364, "learning_rate": 0.00019149196116403905, "loss": 1.4445, "step": 3289 }, { "epoch": 0.04275204948322323, "grad_norm": 0.3566194176673889, "learning_rate": 0.00019148936170212768, "loss": 1.4556, "step": 3290 }, { "epoch": 0.042765044027139106, "grad_norm": 0.4223826229572296, "learning_rate": 0.0001914867622402163, "loss": 1.6536, "step": 3291 }, { "epoch": 0.04277803857105498, "grad_norm": 0.36028677225112915, "learning_rate": 0.0001914841627783049, "loss": 1.3713, "step": 3292 }, { "epoch": 0.04279103311497085, "grad_norm": 0.3774397373199463, "learning_rate": 0.0001914815633163935, "loss": 1.1729, "step": 3293 }, { "epoch": 0.042804027658886724, "grad_norm": 0.42678093910217285, "learning_rate": 0.00019147896385448215, "loss": 1.5057, "step": 3294 }, { "epoch": 0.0428170222028026, "grad_norm": 0.4033164978027344, "learning_rate": 0.00019147636439257075, "loss": 1.3776, "step": 3295 }, { "epoch": 0.04283001674671847, "grad_norm": 0.37973758578300476, "learning_rate": 0.00019147376493065937, "loss": 1.2883, "step": 3296 }, { "epoch": 0.04284301129063434, "grad_norm": 0.4481126666069031, "learning_rate": 0.00019147116546874797, "loss": 1.5364, "step": 3297 }, { "epoch": 0.042856005834550216, "grad_norm": 0.40619781613349915, "learning_rate": 0.0001914685660068366, "loss": 1.49, "step": 3298 }, { "epoch": 0.04286900037846609, "grad_norm": 0.3777209222316742, "learning_rate": 0.00019146596654492522, "loss": 1.2836, "step": 3299 }, { "epoch": 0.04288199492238196, "grad_norm": 0.39847204089164734, "learning_rate": 0.0001914633670830138, "loss": 1.3011, "step": 3300 }, { "epoch": 0.042894989466297835, "grad_norm": 0.43236109614372253, "learning_rate": 0.00019146076762110244, "loss": 1.6102, "step": 3301 }, { "epoch": 0.04290798401021371, "grad_norm": 0.4709397852420807, "learning_rate": 0.00019145816815919106, "loss": 1.6345, "step": 3302 }, { "epoch": 0.04292097855412959, "grad_norm": 0.36808282136917114, "learning_rate": 0.0001914555686972797, "loss": 1.4226, "step": 3303 }, { "epoch": 0.04293397309804546, "grad_norm": 0.39973029494285583, "learning_rate": 0.00019145296923536828, "loss": 1.4584, "step": 3304 }, { "epoch": 0.042946967641961334, "grad_norm": 0.3749482333660126, "learning_rate": 0.00019145036977345688, "loss": 1.2796, "step": 3305 }, { "epoch": 0.04295996218587721, "grad_norm": 0.32088154554367065, "learning_rate": 0.00019144777031154553, "loss": 1.4063, "step": 3306 }, { "epoch": 0.04297295672979308, "grad_norm": 0.40059664845466614, "learning_rate": 0.00019144517084963413, "loss": 1.5157, "step": 3307 }, { "epoch": 0.04298595127370895, "grad_norm": 0.40178564190864563, "learning_rate": 0.00019144257138772275, "loss": 1.5518, "step": 3308 }, { "epoch": 0.042998945817624826, "grad_norm": 0.42714112997055054, "learning_rate": 0.00019143997192581135, "loss": 1.5906, "step": 3309 }, { "epoch": 0.0430119403615407, "grad_norm": 0.4414735734462738, "learning_rate": 0.00019143737246389998, "loss": 1.556, "step": 3310 }, { "epoch": 0.04302493490545657, "grad_norm": 0.34161970019340515, "learning_rate": 0.0001914347730019886, "loss": 1.3804, "step": 3311 }, { "epoch": 0.043037929449372445, "grad_norm": 0.389321506023407, "learning_rate": 0.0001914321735400772, "loss": 1.3994, "step": 3312 }, { "epoch": 0.04305092399328832, "grad_norm": 0.40419989824295044, "learning_rate": 0.00019142957407816582, "loss": 1.4814, "step": 3313 }, { "epoch": 0.04306391853720419, "grad_norm": 0.3991282880306244, "learning_rate": 0.00019142697461625445, "loss": 1.5364, "step": 3314 }, { "epoch": 0.04307691308112006, "grad_norm": 0.24831891059875488, "learning_rate": 0.00019142437515434307, "loss": 1.3061, "step": 3315 }, { "epoch": 0.043089907625035936, "grad_norm": 0.32519426941871643, "learning_rate": 0.00019142177569243167, "loss": 1.428, "step": 3316 }, { "epoch": 0.04310290216895181, "grad_norm": 0.3940295875072479, "learning_rate": 0.0001914191762305203, "loss": 1.2883, "step": 3317 }, { "epoch": 0.04311589671286768, "grad_norm": 0.4799407720565796, "learning_rate": 0.00019141657676860892, "loss": 1.4101, "step": 3318 }, { "epoch": 0.043128891256783555, "grad_norm": 0.4417022168636322, "learning_rate": 0.00019141397730669752, "loss": 1.4683, "step": 3319 }, { "epoch": 0.04314188580069943, "grad_norm": 0.37678059935569763, "learning_rate": 0.00019141137784478614, "loss": 1.4166, "step": 3320 }, { "epoch": 0.0431548803446153, "grad_norm": 0.3916042149066925, "learning_rate": 0.00019140877838287476, "loss": 1.5433, "step": 3321 }, { "epoch": 0.04316787488853118, "grad_norm": 0.2916439473628998, "learning_rate": 0.00019140617892096336, "loss": 1.3216, "step": 3322 }, { "epoch": 0.043180869432447054, "grad_norm": 0.43066203594207764, "learning_rate": 0.000191403579459052, "loss": 1.3716, "step": 3323 }, { "epoch": 0.04319386397636293, "grad_norm": 0.3231244683265686, "learning_rate": 0.00019140097999714058, "loss": 1.4794, "step": 3324 }, { "epoch": 0.0432068585202788, "grad_norm": 0.39126765727996826, "learning_rate": 0.00019139838053522924, "loss": 1.236, "step": 3325 }, { "epoch": 0.04321985306419467, "grad_norm": 0.36046120524406433, "learning_rate": 0.00019139578107331783, "loss": 1.4241, "step": 3326 }, { "epoch": 0.043232847608110546, "grad_norm": 0.3886171877384186, "learning_rate": 0.00019139318161140646, "loss": 1.3316, "step": 3327 }, { "epoch": 0.04324584215202642, "grad_norm": 0.35811758041381836, "learning_rate": 0.00019139058214949505, "loss": 1.3552, "step": 3328 }, { "epoch": 0.04325883669594229, "grad_norm": 0.480874627828598, "learning_rate": 0.00019138798268758368, "loss": 1.6968, "step": 3329 }, { "epoch": 0.043271831239858165, "grad_norm": 0.3552871644496918, "learning_rate": 0.0001913853832256723, "loss": 1.4936, "step": 3330 }, { "epoch": 0.04328482578377404, "grad_norm": 0.3589709401130676, "learning_rate": 0.0001913827837637609, "loss": 1.449, "step": 3331 }, { "epoch": 0.04329782032768991, "grad_norm": 0.43328332901000977, "learning_rate": 0.00019138018430184953, "loss": 1.4585, "step": 3332 }, { "epoch": 0.043310814871605784, "grad_norm": 0.3951812982559204, "learning_rate": 0.00019137758483993815, "loss": 1.5951, "step": 3333 }, { "epoch": 0.04332380941552166, "grad_norm": 0.3889358639717102, "learning_rate": 0.00019137498537802675, "loss": 1.3383, "step": 3334 }, { "epoch": 0.04333680395943753, "grad_norm": 0.4324074983596802, "learning_rate": 0.00019137238591611537, "loss": 1.3453, "step": 3335 }, { "epoch": 0.0433497985033534, "grad_norm": 0.28207793831825256, "learning_rate": 0.00019136978645420397, "loss": 1.5449, "step": 3336 }, { "epoch": 0.043362793047269275, "grad_norm": 0.3594921827316284, "learning_rate": 0.00019136718699229262, "loss": 1.442, "step": 3337 }, { "epoch": 0.04337578759118515, "grad_norm": 0.40041711926460266, "learning_rate": 0.00019136458753038122, "loss": 1.4303, "step": 3338 }, { "epoch": 0.04338878213510102, "grad_norm": 0.387516587972641, "learning_rate": 0.00019136198806846984, "loss": 1.2913, "step": 3339 }, { "epoch": 0.043401776679016894, "grad_norm": 0.34604567289352417, "learning_rate": 0.00019135938860655844, "loss": 1.4383, "step": 3340 }, { "epoch": 0.043414771222932774, "grad_norm": 0.45010215044021606, "learning_rate": 0.00019135678914464706, "loss": 1.424, "step": 3341 }, { "epoch": 0.04342776576684865, "grad_norm": 0.37968873977661133, "learning_rate": 0.0001913541896827357, "loss": 1.5163, "step": 3342 }, { "epoch": 0.04344076031076452, "grad_norm": 0.2532168924808502, "learning_rate": 0.0001913515902208243, "loss": 1.2122, "step": 3343 }, { "epoch": 0.04345375485468039, "grad_norm": 0.40517109632492065, "learning_rate": 0.0001913489907589129, "loss": 1.475, "step": 3344 }, { "epoch": 0.043466749398596266, "grad_norm": 0.3487946391105652, "learning_rate": 0.00019134639129700154, "loss": 1.3463, "step": 3345 }, { "epoch": 0.04347974394251214, "grad_norm": 0.4634891748428345, "learning_rate": 0.00019134379183509016, "loss": 1.4547, "step": 3346 }, { "epoch": 0.04349273848642801, "grad_norm": 0.6074512004852295, "learning_rate": 0.00019134119237317876, "loss": 1.5215, "step": 3347 }, { "epoch": 0.043505733030343885, "grad_norm": 0.3533388376235962, "learning_rate": 0.00019133859291126735, "loss": 1.1557, "step": 3348 }, { "epoch": 0.04351872757425976, "grad_norm": 0.3960515558719635, "learning_rate": 0.000191335993449356, "loss": 1.3195, "step": 3349 }, { "epoch": 0.04353172211817563, "grad_norm": 0.436777800321579, "learning_rate": 0.0001913333939874446, "loss": 1.3511, "step": 3350 }, { "epoch": 0.043544716662091504, "grad_norm": 0.36303094029426575, "learning_rate": 0.00019133079452553323, "loss": 1.6913, "step": 3351 }, { "epoch": 0.04355771120600738, "grad_norm": 0.339926153421402, "learning_rate": 0.00019132819506362183, "loss": 1.4129, "step": 3352 }, { "epoch": 0.04357070574992325, "grad_norm": 0.37749627232551575, "learning_rate": 0.00019132559560171045, "loss": 1.4347, "step": 3353 }, { "epoch": 0.04358370029383912, "grad_norm": 0.4598933756351471, "learning_rate": 0.00019132299613979907, "loss": 1.507, "step": 3354 }, { "epoch": 0.043596694837754996, "grad_norm": 0.34212225675582886, "learning_rate": 0.00019132039667788767, "loss": 1.3819, "step": 3355 }, { "epoch": 0.04360968938167087, "grad_norm": 0.4093504548072815, "learning_rate": 0.0001913177972159763, "loss": 1.5085, "step": 3356 }, { "epoch": 0.04362268392558674, "grad_norm": 0.3737906515598297, "learning_rate": 0.00019131519775406492, "loss": 1.462, "step": 3357 }, { "epoch": 0.043635678469502615, "grad_norm": 0.35606417059898376, "learning_rate": 0.00019131259829215355, "loss": 1.3589, "step": 3358 }, { "epoch": 0.04364867301341849, "grad_norm": 0.39090147614479065, "learning_rate": 0.00019130999883024214, "loss": 1.2332, "step": 3359 }, { "epoch": 0.04366166755733437, "grad_norm": 0.3723491430282593, "learning_rate": 0.00019130739936833077, "loss": 1.3077, "step": 3360 }, { "epoch": 0.04367466210125024, "grad_norm": 0.30465856194496155, "learning_rate": 0.0001913047999064194, "loss": 1.4543, "step": 3361 }, { "epoch": 0.04368765664516611, "grad_norm": 0.4320400655269623, "learning_rate": 0.000191302200444508, "loss": 1.518, "step": 3362 }, { "epoch": 0.043700651189081986, "grad_norm": 0.33320820331573486, "learning_rate": 0.00019129960098259661, "loss": 1.5668, "step": 3363 }, { "epoch": 0.04371364573299786, "grad_norm": 0.6051658391952515, "learning_rate": 0.00019129700152068524, "loss": 1.5598, "step": 3364 }, { "epoch": 0.04372664027691373, "grad_norm": 0.43451371788978577, "learning_rate": 0.00019129440205877384, "loss": 1.5338, "step": 3365 }, { "epoch": 0.043739634820829605, "grad_norm": 0.38585007190704346, "learning_rate": 0.00019129180259686246, "loss": 1.4364, "step": 3366 }, { "epoch": 0.04375262936474548, "grad_norm": 0.345638245344162, "learning_rate": 0.00019128920313495106, "loss": 1.3738, "step": 3367 }, { "epoch": 0.04376562390866135, "grad_norm": 0.4160021245479584, "learning_rate": 0.0001912866036730397, "loss": 1.4794, "step": 3368 }, { "epoch": 0.043778618452577224, "grad_norm": 0.4704032838344574, "learning_rate": 0.0001912840042111283, "loss": 1.6299, "step": 3369 }, { "epoch": 0.0437916129964931, "grad_norm": 0.34709498286247253, "learning_rate": 0.00019128140474921693, "loss": 1.3436, "step": 3370 }, { "epoch": 0.04380460754040897, "grad_norm": 0.2992079257965088, "learning_rate": 0.00019127880528730553, "loss": 1.2318, "step": 3371 }, { "epoch": 0.04381760208432484, "grad_norm": 0.37219589948654175, "learning_rate": 0.00019127620582539415, "loss": 1.6489, "step": 3372 }, { "epoch": 0.043830596628240716, "grad_norm": 0.43788623809814453, "learning_rate": 0.00019127360636348278, "loss": 1.4366, "step": 3373 }, { "epoch": 0.04384359117215659, "grad_norm": 0.3767496347427368, "learning_rate": 0.00019127100690157137, "loss": 1.4562, "step": 3374 }, { "epoch": 0.04385658571607246, "grad_norm": 0.30844634771347046, "learning_rate": 0.00019126840743966, "loss": 1.2874, "step": 3375 }, { "epoch": 0.043869580259988335, "grad_norm": 0.3502744138240814, "learning_rate": 0.00019126580797774862, "loss": 1.3586, "step": 3376 }, { "epoch": 0.04388257480390421, "grad_norm": 0.36088719964027405, "learning_rate": 0.00019126320851583722, "loss": 1.614, "step": 3377 }, { "epoch": 0.04389556934782008, "grad_norm": 0.3557823896408081, "learning_rate": 0.00019126060905392585, "loss": 1.5289, "step": 3378 }, { "epoch": 0.04390856389173596, "grad_norm": 0.3625345826148987, "learning_rate": 0.00019125800959201444, "loss": 1.5533, "step": 3379 }, { "epoch": 0.043921558435651833, "grad_norm": 0.29409506916999817, "learning_rate": 0.0001912554101301031, "loss": 1.4743, "step": 3380 }, { "epoch": 0.043934552979567706, "grad_norm": 0.33340227603912354, "learning_rate": 0.0001912528106681917, "loss": 1.4126, "step": 3381 }, { "epoch": 0.04394754752348358, "grad_norm": 0.43819737434387207, "learning_rate": 0.00019125021120628032, "loss": 1.5138, "step": 3382 }, { "epoch": 0.04396054206739945, "grad_norm": 0.3450305163860321, "learning_rate": 0.00019124761174436891, "loss": 1.3265, "step": 3383 }, { "epoch": 0.043973536611315325, "grad_norm": 0.38587579131126404, "learning_rate": 0.00019124501228245754, "loss": 1.415, "step": 3384 }, { "epoch": 0.0439865311552312, "grad_norm": 0.3960534334182739, "learning_rate": 0.00019124241282054616, "loss": 1.6012, "step": 3385 }, { "epoch": 0.04399952569914707, "grad_norm": 0.398406058549881, "learning_rate": 0.00019123981335863476, "loss": 1.2995, "step": 3386 }, { "epoch": 0.044012520243062944, "grad_norm": 0.33824533224105835, "learning_rate": 0.00019123721389672338, "loss": 1.4585, "step": 3387 }, { "epoch": 0.04402551478697882, "grad_norm": 0.33364346623420715, "learning_rate": 0.000191234614434812, "loss": 1.3822, "step": 3388 }, { "epoch": 0.04403850933089469, "grad_norm": 0.4895845651626587, "learning_rate": 0.0001912320149729006, "loss": 1.4984, "step": 3389 }, { "epoch": 0.04405150387481056, "grad_norm": 0.39063429832458496, "learning_rate": 0.00019122941551098923, "loss": 1.5546, "step": 3390 }, { "epoch": 0.044064498418726436, "grad_norm": 0.3552950322628021, "learning_rate": 0.00019122681604907783, "loss": 1.5222, "step": 3391 }, { "epoch": 0.04407749296264231, "grad_norm": 0.3311936557292938, "learning_rate": 0.00019122421658716648, "loss": 1.4323, "step": 3392 }, { "epoch": 0.04409048750655818, "grad_norm": 0.3944788873195648, "learning_rate": 0.00019122161712525508, "loss": 1.5376, "step": 3393 }, { "epoch": 0.044103482050474055, "grad_norm": 0.529399573802948, "learning_rate": 0.0001912190176633437, "loss": 1.7481, "step": 3394 }, { "epoch": 0.04411647659438993, "grad_norm": 0.35166001319885254, "learning_rate": 0.00019121641820143233, "loss": 1.3484, "step": 3395 }, { "epoch": 0.0441294711383058, "grad_norm": 0.3795880079269409, "learning_rate": 0.00019121381873952092, "loss": 1.5504, "step": 3396 }, { "epoch": 0.044142465682221674, "grad_norm": 0.3294336795806885, "learning_rate": 0.00019121121927760955, "loss": 1.3608, "step": 3397 }, { "epoch": 0.044155460226137554, "grad_norm": 0.282772421836853, "learning_rate": 0.00019120861981569815, "loss": 1.1645, "step": 3398 }, { "epoch": 0.04416845477005343, "grad_norm": 0.45824310183525085, "learning_rate": 0.0001912060203537868, "loss": 1.4217, "step": 3399 }, { "epoch": 0.0441814493139693, "grad_norm": 0.37385669350624084, "learning_rate": 0.0001912034208918754, "loss": 1.4098, "step": 3400 }, { "epoch": 0.04419444385788517, "grad_norm": 0.3293857276439667, "learning_rate": 0.00019120082142996402, "loss": 1.3769, "step": 3401 }, { "epoch": 0.044207438401801046, "grad_norm": 0.6088602542877197, "learning_rate": 0.00019119822196805262, "loss": 1.3635, "step": 3402 }, { "epoch": 0.04422043294571692, "grad_norm": 0.40845146775245667, "learning_rate": 0.00019119562250614124, "loss": 1.5731, "step": 3403 }, { "epoch": 0.04423342748963279, "grad_norm": 0.30555814504623413, "learning_rate": 0.00019119302304422987, "loss": 1.4244, "step": 3404 }, { "epoch": 0.044246422033548664, "grad_norm": 0.4098779857158661, "learning_rate": 0.00019119042358231846, "loss": 1.4273, "step": 3405 }, { "epoch": 0.04425941657746454, "grad_norm": 0.31717047095298767, "learning_rate": 0.0001911878241204071, "loss": 1.5747, "step": 3406 }, { "epoch": 0.04427241112138041, "grad_norm": 0.4416353702545166, "learning_rate": 0.0001911852246584957, "loss": 1.5328, "step": 3407 }, { "epoch": 0.04428540566529628, "grad_norm": 0.45297935605049133, "learning_rate": 0.0001911826251965843, "loss": 1.5303, "step": 3408 }, { "epoch": 0.044298400209212156, "grad_norm": 0.36077067255973816, "learning_rate": 0.00019118002573467293, "loss": 1.3611, "step": 3409 }, { "epoch": 0.04431139475312803, "grad_norm": 0.311235636472702, "learning_rate": 0.00019117742627276153, "loss": 1.3419, "step": 3410 }, { "epoch": 0.0443243892970439, "grad_norm": 0.38337618112564087, "learning_rate": 0.00019117482681085018, "loss": 1.5022, "step": 3411 }, { "epoch": 0.044337383840959775, "grad_norm": 0.3651482164859772, "learning_rate": 0.00019117222734893878, "loss": 1.5406, "step": 3412 }, { "epoch": 0.04435037838487565, "grad_norm": 0.3558707535266876, "learning_rate": 0.0001911696278870274, "loss": 1.5622, "step": 3413 }, { "epoch": 0.04436337292879152, "grad_norm": 0.3880421221256256, "learning_rate": 0.000191167028425116, "loss": 1.5068, "step": 3414 }, { "epoch": 0.044376367472707394, "grad_norm": 0.4164212942123413, "learning_rate": 0.00019116442896320463, "loss": 1.3439, "step": 3415 }, { "epoch": 0.04438936201662327, "grad_norm": 0.36084792017936707, "learning_rate": 0.00019116182950129325, "loss": 1.6476, "step": 3416 }, { "epoch": 0.04440235656053915, "grad_norm": 0.38160771131515503, "learning_rate": 0.00019115923003938185, "loss": 1.3344, "step": 3417 }, { "epoch": 0.04441535110445502, "grad_norm": 0.39801520109176636, "learning_rate": 0.00019115663057747047, "loss": 1.4835, "step": 3418 }, { "epoch": 0.04442834564837089, "grad_norm": 0.4460076689720154, "learning_rate": 0.0001911540311155591, "loss": 1.5934, "step": 3419 }, { "epoch": 0.044441340192286766, "grad_norm": 0.3776906430721283, "learning_rate": 0.0001911514316536477, "loss": 1.3509, "step": 3420 }, { "epoch": 0.04445433473620264, "grad_norm": 0.37701916694641113, "learning_rate": 0.00019114883219173632, "loss": 1.4444, "step": 3421 }, { "epoch": 0.04446732928011851, "grad_norm": 0.39519202709198, "learning_rate": 0.00019114623272982492, "loss": 1.4658, "step": 3422 }, { "epoch": 0.044480323824034385, "grad_norm": 0.4301571547985077, "learning_rate": 0.00019114363326791357, "loss": 1.5473, "step": 3423 }, { "epoch": 0.04449331836795026, "grad_norm": 0.33932965993881226, "learning_rate": 0.00019114103380600217, "loss": 1.4612, "step": 3424 }, { "epoch": 0.04450631291186613, "grad_norm": 0.4156722128391266, "learning_rate": 0.0001911384343440908, "loss": 1.4608, "step": 3425 }, { "epoch": 0.044519307455782, "grad_norm": 0.32418495416641235, "learning_rate": 0.0001911358348821794, "loss": 1.4705, "step": 3426 }, { "epoch": 0.044532301999697876, "grad_norm": 0.3849780559539795, "learning_rate": 0.000191133235420268, "loss": 1.2606, "step": 3427 }, { "epoch": 0.04454529654361375, "grad_norm": 0.37302151322364807, "learning_rate": 0.00019113063595835664, "loss": 1.5298, "step": 3428 }, { "epoch": 0.04455829108752962, "grad_norm": 0.43634387850761414, "learning_rate": 0.00019112803649644523, "loss": 1.7202, "step": 3429 }, { "epoch": 0.044571285631445495, "grad_norm": 0.2565285861492157, "learning_rate": 0.00019112543703453386, "loss": 1.4194, "step": 3430 }, { "epoch": 0.04458428017536137, "grad_norm": 0.41815894842147827, "learning_rate": 0.00019112283757262248, "loss": 1.1767, "step": 3431 }, { "epoch": 0.04459727471927724, "grad_norm": 0.42732083797454834, "learning_rate": 0.00019112023811071108, "loss": 1.4933, "step": 3432 }, { "epoch": 0.044610269263193114, "grad_norm": 0.288154661655426, "learning_rate": 0.0001911176386487997, "loss": 1.2661, "step": 3433 }, { "epoch": 0.04462326380710899, "grad_norm": 0.43552613258361816, "learning_rate": 0.00019111503918688833, "loss": 1.6755, "step": 3434 }, { "epoch": 0.04463625835102486, "grad_norm": 0.38512101769447327, "learning_rate": 0.00019111243972497695, "loss": 1.4476, "step": 3435 }, { "epoch": 0.04464925289494074, "grad_norm": 0.4210730493068695, "learning_rate": 0.00019110984026306555, "loss": 1.5755, "step": 3436 }, { "epoch": 0.04466224743885661, "grad_norm": 0.3965669870376587, "learning_rate": 0.00019110724080115417, "loss": 1.3531, "step": 3437 }, { "epoch": 0.044675241982772486, "grad_norm": 0.3597378134727478, "learning_rate": 0.0001911046413392428, "loss": 1.1933, "step": 3438 }, { "epoch": 0.04468823652668836, "grad_norm": 0.550243616104126, "learning_rate": 0.0001911020418773314, "loss": 1.5067, "step": 3439 }, { "epoch": 0.04470123107060423, "grad_norm": 0.3286186456680298, "learning_rate": 0.00019109944241542002, "loss": 1.4354, "step": 3440 }, { "epoch": 0.044714225614520105, "grad_norm": 0.37238383293151855, "learning_rate": 0.00019109684295350862, "loss": 1.5725, "step": 3441 }, { "epoch": 0.04472722015843598, "grad_norm": 0.43052932620048523, "learning_rate": 0.00019109424349159727, "loss": 1.4398, "step": 3442 }, { "epoch": 0.04474021470235185, "grad_norm": 0.397022545337677, "learning_rate": 0.00019109164402968587, "loss": 1.4153, "step": 3443 }, { "epoch": 0.044753209246267724, "grad_norm": 0.4465096890926361, "learning_rate": 0.00019108904456777447, "loss": 1.8237, "step": 3444 }, { "epoch": 0.0447662037901836, "grad_norm": 0.34367677569389343, "learning_rate": 0.0001910864451058631, "loss": 1.2262, "step": 3445 }, { "epoch": 0.04477919833409947, "grad_norm": 0.368748277425766, "learning_rate": 0.00019108384564395171, "loss": 1.3616, "step": 3446 }, { "epoch": 0.04479219287801534, "grad_norm": 0.3148956894874573, "learning_rate": 0.00019108124618204034, "loss": 1.4302, "step": 3447 }, { "epoch": 0.044805187421931215, "grad_norm": 0.3931005597114563, "learning_rate": 0.00019107864672012894, "loss": 1.4085, "step": 3448 }, { "epoch": 0.04481818196584709, "grad_norm": 0.40465056896209717, "learning_rate": 0.00019107604725821756, "loss": 1.4855, "step": 3449 }, { "epoch": 0.04483117650976296, "grad_norm": 0.5275431871414185, "learning_rate": 0.00019107344779630618, "loss": 1.5041, "step": 3450 }, { "epoch": 0.044844171053678834, "grad_norm": 0.42719629406929016, "learning_rate": 0.00019107084833439478, "loss": 1.5919, "step": 3451 }, { "epoch": 0.04485716559759471, "grad_norm": 0.46026965975761414, "learning_rate": 0.0001910682488724834, "loss": 1.5621, "step": 3452 }, { "epoch": 0.04487016014151058, "grad_norm": 0.3713780343532562, "learning_rate": 0.000191065649410572, "loss": 1.4741, "step": 3453 }, { "epoch": 0.04488315468542645, "grad_norm": 0.4859156608581543, "learning_rate": 0.00019106304994866066, "loss": 1.532, "step": 3454 }, { "epoch": 0.04489614922934233, "grad_norm": 0.3467864692211151, "learning_rate": 0.00019106045048674925, "loss": 1.1647, "step": 3455 }, { "epoch": 0.044909143773258206, "grad_norm": 0.3873916566371918, "learning_rate": 0.00019105785102483785, "loss": 1.5057, "step": 3456 }, { "epoch": 0.04492213831717408, "grad_norm": 0.397622674703598, "learning_rate": 0.00019105525156292647, "loss": 1.5431, "step": 3457 }, { "epoch": 0.04493513286108995, "grad_norm": 0.36541593074798584, "learning_rate": 0.0001910526521010151, "loss": 1.5761, "step": 3458 }, { "epoch": 0.044948127405005825, "grad_norm": 0.40878790616989136, "learning_rate": 0.00019105005263910372, "loss": 1.3912, "step": 3459 }, { "epoch": 0.0449611219489217, "grad_norm": 0.451894074678421, "learning_rate": 0.00019104745317719232, "loss": 1.5588, "step": 3460 }, { "epoch": 0.04497411649283757, "grad_norm": 0.43258681893348694, "learning_rate": 0.00019104485371528095, "loss": 1.5533, "step": 3461 }, { "epoch": 0.044987111036753444, "grad_norm": 0.4070456624031067, "learning_rate": 0.00019104225425336957, "loss": 1.4362, "step": 3462 }, { "epoch": 0.04500010558066932, "grad_norm": 0.4204276502132416, "learning_rate": 0.00019103965479145817, "loss": 1.6424, "step": 3463 }, { "epoch": 0.04501310012458519, "grad_norm": 0.3918856382369995, "learning_rate": 0.0001910370553295468, "loss": 1.427, "step": 3464 }, { "epoch": 0.04502609466850106, "grad_norm": 0.4557289481163025, "learning_rate": 0.0001910344558676354, "loss": 1.3844, "step": 3465 }, { "epoch": 0.045039089212416936, "grad_norm": 0.47764307260513306, "learning_rate": 0.00019103185640572404, "loss": 1.4944, "step": 3466 }, { "epoch": 0.04505208375633281, "grad_norm": 0.4500553607940674, "learning_rate": 0.00019102925694381264, "loss": 1.5151, "step": 3467 }, { "epoch": 0.04506507830024868, "grad_norm": 0.36121290922164917, "learning_rate": 0.00019102665748190126, "loss": 1.279, "step": 3468 }, { "epoch": 0.045078072844164555, "grad_norm": 0.3657602071762085, "learning_rate": 0.0001910240580199899, "loss": 1.2671, "step": 3469 }, { "epoch": 0.04509106738808043, "grad_norm": 0.3611789047718048, "learning_rate": 0.00019102145855807848, "loss": 1.3417, "step": 3470 }, { "epoch": 0.0451040619319963, "grad_norm": 0.34144169092178345, "learning_rate": 0.0001910188590961671, "loss": 1.1636, "step": 3471 }, { "epoch": 0.04511705647591217, "grad_norm": 0.349431574344635, "learning_rate": 0.0001910162596342557, "loss": 1.5859, "step": 3472 }, { "epoch": 0.045130051019828046, "grad_norm": 0.499302476644516, "learning_rate": 0.00019101366017234433, "loss": 1.49, "step": 3473 }, { "epoch": 0.045143045563743926, "grad_norm": 0.2950809895992279, "learning_rate": 0.00019101106071043296, "loss": 1.3242, "step": 3474 }, { "epoch": 0.0451560401076598, "grad_norm": 0.35766127705574036, "learning_rate": 0.00019100846124852155, "loss": 1.5298, "step": 3475 }, { "epoch": 0.04516903465157567, "grad_norm": 0.30416497588157654, "learning_rate": 0.00019100586178661018, "loss": 1.5598, "step": 3476 }, { "epoch": 0.045182029195491545, "grad_norm": 0.4109727740287781, "learning_rate": 0.0001910032623246988, "loss": 1.224, "step": 3477 }, { "epoch": 0.04519502373940742, "grad_norm": 0.3664378523826599, "learning_rate": 0.00019100066286278743, "loss": 1.4657, "step": 3478 }, { "epoch": 0.04520801828332329, "grad_norm": 0.5163008570671082, "learning_rate": 0.00019099806340087602, "loss": 1.4298, "step": 3479 }, { "epoch": 0.045221012827239164, "grad_norm": 0.4767324924468994, "learning_rate": 0.00019099546393896465, "loss": 1.4132, "step": 3480 }, { "epoch": 0.04523400737115504, "grad_norm": 0.35452350974082947, "learning_rate": 0.00019099286447705327, "loss": 1.4272, "step": 3481 }, { "epoch": 0.04524700191507091, "grad_norm": 0.4030720293521881, "learning_rate": 0.00019099026501514187, "loss": 1.6696, "step": 3482 }, { "epoch": 0.04525999645898678, "grad_norm": 0.30554822087287903, "learning_rate": 0.0001909876655532305, "loss": 1.3461, "step": 3483 }, { "epoch": 0.045272991002902656, "grad_norm": 0.4898455739021301, "learning_rate": 0.0001909850660913191, "loss": 1.5115, "step": 3484 }, { "epoch": 0.04528598554681853, "grad_norm": 0.4397023022174835, "learning_rate": 0.00019098246662940774, "loss": 1.3619, "step": 3485 }, { "epoch": 0.0452989800907344, "grad_norm": 0.44009312987327576, "learning_rate": 0.00019097986716749634, "loss": 1.411, "step": 3486 }, { "epoch": 0.045311974634650275, "grad_norm": 0.33126917481422424, "learning_rate": 0.00019097726770558494, "loss": 1.3827, "step": 3487 }, { "epoch": 0.04532496917856615, "grad_norm": 0.3113521337509155, "learning_rate": 0.00019097466824367356, "loss": 1.3504, "step": 3488 }, { "epoch": 0.04533796372248202, "grad_norm": 0.4283364713191986, "learning_rate": 0.0001909720687817622, "loss": 1.4902, "step": 3489 }, { "epoch": 0.045350958266397894, "grad_norm": 0.3907061219215393, "learning_rate": 0.0001909694693198508, "loss": 1.4679, "step": 3490 }, { "epoch": 0.045363952810313767, "grad_norm": 0.4915041923522949, "learning_rate": 0.0001909668698579394, "loss": 1.4044, "step": 3491 }, { "epoch": 0.04537694735422964, "grad_norm": 0.3264465630054474, "learning_rate": 0.00019096427039602803, "loss": 1.4853, "step": 3492 }, { "epoch": 0.04538994189814552, "grad_norm": 0.39820829033851624, "learning_rate": 0.00019096167093411666, "loss": 1.4823, "step": 3493 }, { "epoch": 0.04540293644206139, "grad_norm": 0.32205459475517273, "learning_rate": 0.00019095907147220526, "loss": 1.2417, "step": 3494 }, { "epoch": 0.045415930985977265, "grad_norm": 0.38776856660842896, "learning_rate": 0.00019095647201029388, "loss": 1.4646, "step": 3495 }, { "epoch": 0.04542892552989314, "grad_norm": 0.33155444264411926, "learning_rate": 0.00019095387254838248, "loss": 1.1417, "step": 3496 }, { "epoch": 0.04544192007380901, "grad_norm": 0.3750079870223999, "learning_rate": 0.00019095127308647113, "loss": 1.4263, "step": 3497 }, { "epoch": 0.045454914617724884, "grad_norm": 0.3497723937034607, "learning_rate": 0.00019094867362455973, "loss": 1.4163, "step": 3498 }, { "epoch": 0.04546790916164076, "grad_norm": 0.4343213737010956, "learning_rate": 0.00019094607416264832, "loss": 1.4147, "step": 3499 }, { "epoch": 0.04548090370555663, "grad_norm": 0.46293163299560547, "learning_rate": 0.00019094347470073695, "loss": 1.4799, "step": 3500 }, { "epoch": 0.0454938982494725, "grad_norm": 0.43661198019981384, "learning_rate": 0.00019094087523882557, "loss": 1.4232, "step": 3501 }, { "epoch": 0.045506892793388376, "grad_norm": 0.4032571613788605, "learning_rate": 0.0001909382757769142, "loss": 1.322, "step": 3502 }, { "epoch": 0.04551988733730425, "grad_norm": 0.3524533212184906, "learning_rate": 0.0001909356763150028, "loss": 1.2897, "step": 3503 }, { "epoch": 0.04553288188122012, "grad_norm": 0.3699171543121338, "learning_rate": 0.00019093307685309142, "loss": 1.5074, "step": 3504 }, { "epoch": 0.045545876425135995, "grad_norm": 0.40244174003601074, "learning_rate": 0.00019093047739118004, "loss": 1.5089, "step": 3505 }, { "epoch": 0.04555887096905187, "grad_norm": 0.37908700108528137, "learning_rate": 0.00019092787792926864, "loss": 1.5981, "step": 3506 }, { "epoch": 0.04557186551296774, "grad_norm": 0.3592241108417511, "learning_rate": 0.00019092527846735727, "loss": 1.38, "step": 3507 }, { "epoch": 0.045584860056883614, "grad_norm": 0.45989441871643066, "learning_rate": 0.0001909226790054459, "loss": 1.3989, "step": 3508 }, { "epoch": 0.04559785460079949, "grad_norm": 0.3594523072242737, "learning_rate": 0.00019092007954353451, "loss": 1.4228, "step": 3509 }, { "epoch": 0.04561084914471536, "grad_norm": 0.37525588274002075, "learning_rate": 0.0001909174800816231, "loss": 1.4879, "step": 3510 }, { "epoch": 0.04562384368863123, "grad_norm": 0.4640478193759918, "learning_rate": 0.0001909148806197117, "loss": 1.5419, "step": 3511 }, { "epoch": 0.04563683823254711, "grad_norm": 0.34540554881095886, "learning_rate": 0.00019091228115780036, "loss": 1.596, "step": 3512 }, { "epoch": 0.045649832776462985, "grad_norm": 0.35171157121658325, "learning_rate": 0.00019090968169588896, "loss": 1.4258, "step": 3513 }, { "epoch": 0.04566282732037886, "grad_norm": 0.3607141077518463, "learning_rate": 0.00019090708223397758, "loss": 1.4261, "step": 3514 }, { "epoch": 0.04567582186429473, "grad_norm": 0.31665804982185364, "learning_rate": 0.00019090448277206618, "loss": 1.3936, "step": 3515 }, { "epoch": 0.045688816408210604, "grad_norm": 0.3263287842273712, "learning_rate": 0.0001909018833101548, "loss": 1.4128, "step": 3516 }, { "epoch": 0.04570181095212648, "grad_norm": 0.4578251242637634, "learning_rate": 0.00019089928384824343, "loss": 1.4633, "step": 3517 }, { "epoch": 0.04571480549604235, "grad_norm": 0.39254575967788696, "learning_rate": 0.00019089668438633203, "loss": 1.4463, "step": 3518 }, { "epoch": 0.04572780003995822, "grad_norm": 0.4729418456554413, "learning_rate": 0.00019089408492442065, "loss": 1.4424, "step": 3519 }, { "epoch": 0.045740794583874096, "grad_norm": 0.3395959138870239, "learning_rate": 0.00019089148546250928, "loss": 1.5223, "step": 3520 }, { "epoch": 0.04575378912778997, "grad_norm": 0.4151439070701599, "learning_rate": 0.0001908888860005979, "loss": 1.5108, "step": 3521 }, { "epoch": 0.04576678367170584, "grad_norm": 0.31169265508651733, "learning_rate": 0.0001908862865386865, "loss": 1.339, "step": 3522 }, { "epoch": 0.045779778215621715, "grad_norm": 0.38498082756996155, "learning_rate": 0.00019088368707677512, "loss": 1.5744, "step": 3523 }, { "epoch": 0.04579277275953759, "grad_norm": 0.46308454871177673, "learning_rate": 0.00019088108761486375, "loss": 1.5169, "step": 3524 }, { "epoch": 0.04580576730345346, "grad_norm": 0.36932235956192017, "learning_rate": 0.00019087848815295234, "loss": 1.5982, "step": 3525 }, { "epoch": 0.045818761847369334, "grad_norm": 0.4504307806491852, "learning_rate": 0.00019087588869104097, "loss": 1.2685, "step": 3526 }, { "epoch": 0.04583175639128521, "grad_norm": 0.4500713646411896, "learning_rate": 0.00019087328922912957, "loss": 1.5148, "step": 3527 }, { "epoch": 0.04584475093520108, "grad_norm": 0.43191471695899963, "learning_rate": 0.0001908706897672182, "loss": 1.3907, "step": 3528 }, { "epoch": 0.04585774547911695, "grad_norm": 0.23284229636192322, "learning_rate": 0.00019086809030530681, "loss": 1.4453, "step": 3529 }, { "epoch": 0.045870740023032826, "grad_norm": 0.39071956276893616, "learning_rate": 0.0001908654908433954, "loss": 1.5298, "step": 3530 }, { "epoch": 0.045883734566948706, "grad_norm": 0.4016498327255249, "learning_rate": 0.00019086289138148404, "loss": 1.3926, "step": 3531 }, { "epoch": 0.04589672911086458, "grad_norm": 0.39591899514198303, "learning_rate": 0.00019086029191957266, "loss": 1.7326, "step": 3532 }, { "epoch": 0.04590972365478045, "grad_norm": 0.5015997290611267, "learning_rate": 0.00019085769245766129, "loss": 1.5801, "step": 3533 }, { "epoch": 0.045922718198696325, "grad_norm": 0.37236592173576355, "learning_rate": 0.00019085509299574988, "loss": 1.3658, "step": 3534 }, { "epoch": 0.0459357127426122, "grad_norm": 0.3775648772716522, "learning_rate": 0.0001908524935338385, "loss": 1.4203, "step": 3535 }, { "epoch": 0.04594870728652807, "grad_norm": 0.41073334217071533, "learning_rate": 0.00019084989407192713, "loss": 1.3554, "step": 3536 }, { "epoch": 0.04596170183044394, "grad_norm": 0.38080254197120667, "learning_rate": 0.00019084729461001573, "loss": 1.3259, "step": 3537 }, { "epoch": 0.045974696374359816, "grad_norm": 0.3639844059944153, "learning_rate": 0.00019084469514810435, "loss": 1.5634, "step": 3538 }, { "epoch": 0.04598769091827569, "grad_norm": 0.4045422673225403, "learning_rate": 0.00019084209568619295, "loss": 1.6289, "step": 3539 }, { "epoch": 0.04600068546219156, "grad_norm": 0.3872968554496765, "learning_rate": 0.00019083949622428158, "loss": 1.3977, "step": 3540 }, { "epoch": 0.046013680006107435, "grad_norm": 0.33250367641448975, "learning_rate": 0.0001908368967623702, "loss": 1.4352, "step": 3541 }, { "epoch": 0.04602667455002331, "grad_norm": 0.6230947971343994, "learning_rate": 0.0001908342973004588, "loss": 1.3423, "step": 3542 }, { "epoch": 0.04603966909393918, "grad_norm": 0.3451698422431946, "learning_rate": 0.00019083169783854745, "loss": 1.4538, "step": 3543 }, { "epoch": 0.046052663637855054, "grad_norm": 0.4396614730358124, "learning_rate": 0.00019082909837663605, "loss": 1.4726, "step": 3544 }, { "epoch": 0.04606565818177093, "grad_norm": 0.4091491997241974, "learning_rate": 0.00019082649891472467, "loss": 1.7656, "step": 3545 }, { "epoch": 0.0460786527256868, "grad_norm": 0.341570109128952, "learning_rate": 0.00019082389945281327, "loss": 1.2968, "step": 3546 }, { "epoch": 0.04609164726960267, "grad_norm": 0.3608258366584778, "learning_rate": 0.0001908212999909019, "loss": 1.361, "step": 3547 }, { "epoch": 0.046104641813518546, "grad_norm": 0.44565585255622864, "learning_rate": 0.00019081870052899052, "loss": 1.4502, "step": 3548 }, { "epoch": 0.04611763635743442, "grad_norm": 0.4324675500392914, "learning_rate": 0.00019081610106707911, "loss": 1.5883, "step": 3549 }, { "epoch": 0.0461306309013503, "grad_norm": 0.41858798265457153, "learning_rate": 0.00019081350160516774, "loss": 1.5429, "step": 3550 }, { "epoch": 0.04614362544526617, "grad_norm": 0.41675299406051636, "learning_rate": 0.00019081090214325636, "loss": 1.3907, "step": 3551 }, { "epoch": 0.046156619989182045, "grad_norm": 0.45589423179626465, "learning_rate": 0.000190808302681345, "loss": 1.4325, "step": 3552 }, { "epoch": 0.04616961453309792, "grad_norm": 0.37698423862457275, "learning_rate": 0.00019080570321943359, "loss": 1.5058, "step": 3553 }, { "epoch": 0.04618260907701379, "grad_norm": 0.29102256894111633, "learning_rate": 0.00019080310375752218, "loss": 1.2826, "step": 3554 }, { "epoch": 0.046195603620929664, "grad_norm": 0.3150346875190735, "learning_rate": 0.00019080050429561083, "loss": 1.4803, "step": 3555 }, { "epoch": 0.04620859816484554, "grad_norm": 0.42382198572158813, "learning_rate": 0.00019079790483369943, "loss": 1.4414, "step": 3556 }, { "epoch": 0.04622159270876141, "grad_norm": 0.48112672567367554, "learning_rate": 0.00019079530537178806, "loss": 1.3901, "step": 3557 }, { "epoch": 0.04623458725267728, "grad_norm": 0.2613075375556946, "learning_rate": 0.00019079270590987665, "loss": 1.334, "step": 3558 }, { "epoch": 0.046247581796593155, "grad_norm": 0.5119329690933228, "learning_rate": 0.00019079010644796528, "loss": 1.5087, "step": 3559 }, { "epoch": 0.04626057634050903, "grad_norm": 0.4582822620868683, "learning_rate": 0.0001907875069860539, "loss": 1.3544, "step": 3560 }, { "epoch": 0.0462735708844249, "grad_norm": 0.38838452100753784, "learning_rate": 0.0001907849075241425, "loss": 1.5, "step": 3561 }, { "epoch": 0.046286565428340774, "grad_norm": 0.41043031215667725, "learning_rate": 0.00019078230806223112, "loss": 1.4815, "step": 3562 }, { "epoch": 0.04629955997225665, "grad_norm": 0.3754003345966339, "learning_rate": 0.00019077970860031975, "loss": 1.4818, "step": 3563 }, { "epoch": 0.04631255451617252, "grad_norm": 0.30324748158454895, "learning_rate": 0.00019077710913840837, "loss": 1.3506, "step": 3564 }, { "epoch": 0.04632554906008839, "grad_norm": 0.3835824131965637, "learning_rate": 0.00019077450967649697, "loss": 1.5176, "step": 3565 }, { "epoch": 0.046338543604004266, "grad_norm": 0.34142348170280457, "learning_rate": 0.00019077191021458557, "loss": 1.3284, "step": 3566 }, { "epoch": 0.04635153814792014, "grad_norm": 0.4325087368488312, "learning_rate": 0.00019076931075267422, "loss": 1.5274, "step": 3567 }, { "epoch": 0.04636453269183601, "grad_norm": 0.41978904604911804, "learning_rate": 0.00019076671129076282, "loss": 1.3381, "step": 3568 }, { "epoch": 0.04637752723575189, "grad_norm": 0.43123379349708557, "learning_rate": 0.00019076411182885144, "loss": 1.5037, "step": 3569 }, { "epoch": 0.046390521779667765, "grad_norm": 0.3783569037914276, "learning_rate": 0.00019076151236694004, "loss": 1.3391, "step": 3570 }, { "epoch": 0.04640351632358364, "grad_norm": 0.4903333783149719, "learning_rate": 0.00019075891290502866, "loss": 1.5181, "step": 3571 }, { "epoch": 0.04641651086749951, "grad_norm": 0.383445143699646, "learning_rate": 0.0001907563134431173, "loss": 1.0928, "step": 3572 }, { "epoch": 0.046429505411415384, "grad_norm": 0.4517488181591034, "learning_rate": 0.00019075371398120589, "loss": 1.6872, "step": 3573 }, { "epoch": 0.04644249995533126, "grad_norm": 0.3907638192176819, "learning_rate": 0.0001907511145192945, "loss": 1.2761, "step": 3574 }, { "epoch": 0.04645549449924713, "grad_norm": 0.4366452991962433, "learning_rate": 0.00019074851505738313, "loss": 1.39, "step": 3575 }, { "epoch": 0.046468489043163, "grad_norm": 0.46755126118659973, "learning_rate": 0.00019074591559547176, "loss": 1.5274, "step": 3576 }, { "epoch": 0.046481483587078876, "grad_norm": 0.4617554545402527, "learning_rate": 0.00019074331613356036, "loss": 1.5557, "step": 3577 }, { "epoch": 0.04649447813099475, "grad_norm": 0.38301703333854675, "learning_rate": 0.00019074071667164898, "loss": 1.3252, "step": 3578 }, { "epoch": 0.04650747267491062, "grad_norm": 0.4291174113750458, "learning_rate": 0.0001907381172097376, "loss": 1.3795, "step": 3579 }, { "epoch": 0.046520467218826494, "grad_norm": 0.4066070318222046, "learning_rate": 0.0001907355177478262, "loss": 1.4005, "step": 3580 }, { "epoch": 0.04653346176274237, "grad_norm": 0.3907028138637543, "learning_rate": 0.00019073291828591483, "loss": 1.3814, "step": 3581 }, { "epoch": 0.04654645630665824, "grad_norm": 0.4663504660129547, "learning_rate": 0.00019073031882400345, "loss": 1.5346, "step": 3582 }, { "epoch": 0.04655945085057411, "grad_norm": 0.32876548171043396, "learning_rate": 0.00019072771936209205, "loss": 1.5176, "step": 3583 }, { "epoch": 0.046572445394489986, "grad_norm": 0.45175355672836304, "learning_rate": 0.00019072511990018067, "loss": 1.3537, "step": 3584 }, { "epoch": 0.04658543993840586, "grad_norm": 0.35481515526771545, "learning_rate": 0.00019072252043826927, "loss": 1.5478, "step": 3585 }, { "epoch": 0.04659843448232173, "grad_norm": 0.29223692417144775, "learning_rate": 0.00019071992097635792, "loss": 1.6045, "step": 3586 }, { "epoch": 0.046611429026237605, "grad_norm": 0.3558688759803772, "learning_rate": 0.00019071732151444652, "loss": 1.4885, "step": 3587 }, { "epoch": 0.046624423570153485, "grad_norm": 0.4412180781364441, "learning_rate": 0.00019071472205253514, "loss": 1.4081, "step": 3588 }, { "epoch": 0.04663741811406936, "grad_norm": 0.44046077132225037, "learning_rate": 0.00019071212259062374, "loss": 1.4469, "step": 3589 }, { "epoch": 0.04665041265798523, "grad_norm": 0.3765508532524109, "learning_rate": 0.00019070952312871237, "loss": 1.2601, "step": 3590 }, { "epoch": 0.046663407201901104, "grad_norm": 0.4437933564186096, "learning_rate": 0.000190706923666801, "loss": 1.6313, "step": 3591 }, { "epoch": 0.04667640174581698, "grad_norm": 0.4332149624824524, "learning_rate": 0.0001907043242048896, "loss": 1.4368, "step": 3592 }, { "epoch": 0.04668939628973285, "grad_norm": 0.5110267996788025, "learning_rate": 0.0001907017247429782, "loss": 1.4827, "step": 3593 }, { "epoch": 0.04670239083364872, "grad_norm": 0.3366774916648865, "learning_rate": 0.00019069912528106684, "loss": 1.4352, "step": 3594 }, { "epoch": 0.046715385377564596, "grad_norm": 0.35826683044433594, "learning_rate": 0.00019069652581915543, "loss": 1.512, "step": 3595 }, { "epoch": 0.04672837992148047, "grad_norm": 0.4029589593410492, "learning_rate": 0.00019069392635724406, "loss": 1.4252, "step": 3596 }, { "epoch": 0.04674137446539634, "grad_norm": 0.4411650002002716, "learning_rate": 0.00019069132689533266, "loss": 1.5004, "step": 3597 }, { "epoch": 0.046754369009312215, "grad_norm": 0.4946557581424713, "learning_rate": 0.0001906887274334213, "loss": 1.3942, "step": 3598 }, { "epoch": 0.04676736355322809, "grad_norm": 0.4434962868690491, "learning_rate": 0.0001906861279715099, "loss": 1.4991, "step": 3599 }, { "epoch": 0.04678035809714396, "grad_norm": 0.338807076215744, "learning_rate": 0.00019068352850959853, "loss": 1.1774, "step": 3600 }, { "epoch": 0.046793352641059834, "grad_norm": 0.40168771147727966, "learning_rate": 0.00019068092904768713, "loss": 1.5569, "step": 3601 }, { "epoch": 0.046806347184975707, "grad_norm": 0.3353491425514221, "learning_rate": 0.00019067832958577575, "loss": 1.4, "step": 3602 }, { "epoch": 0.04681934172889158, "grad_norm": 0.4418715238571167, "learning_rate": 0.00019067573012386438, "loss": 1.4268, "step": 3603 }, { "epoch": 0.04683233627280745, "grad_norm": 0.3415584862232208, "learning_rate": 0.00019067313066195297, "loss": 1.3771, "step": 3604 }, { "epoch": 0.046845330816723325, "grad_norm": 0.48909637331962585, "learning_rate": 0.0001906705312000416, "loss": 1.3793, "step": 3605 }, { "epoch": 0.0468583253606392, "grad_norm": 0.3500750958919525, "learning_rate": 0.00019066793173813022, "loss": 1.4094, "step": 3606 }, { "epoch": 0.04687131990455508, "grad_norm": 0.3301793038845062, "learning_rate": 0.00019066533227621885, "loss": 1.388, "step": 3607 }, { "epoch": 0.04688431444847095, "grad_norm": 0.4042547345161438, "learning_rate": 0.00019066273281430744, "loss": 1.4307, "step": 3608 }, { "epoch": 0.046897308992386824, "grad_norm": 0.31483331322669983, "learning_rate": 0.00019066013335239604, "loss": 1.3205, "step": 3609 }, { "epoch": 0.0469103035363027, "grad_norm": 0.40097078680992126, "learning_rate": 0.0001906575338904847, "loss": 1.6694, "step": 3610 }, { "epoch": 0.04692329808021857, "grad_norm": 0.3837755620479584, "learning_rate": 0.0001906549344285733, "loss": 1.5345, "step": 3611 }, { "epoch": 0.04693629262413444, "grad_norm": 0.4778191149234772, "learning_rate": 0.00019065233496666191, "loss": 1.5407, "step": 3612 }, { "epoch": 0.046949287168050316, "grad_norm": 0.415060430765152, "learning_rate": 0.0001906497355047505, "loss": 1.5277, "step": 3613 }, { "epoch": 0.04696228171196619, "grad_norm": 0.2776385247707367, "learning_rate": 0.00019064713604283914, "loss": 1.361, "step": 3614 }, { "epoch": 0.04697527625588206, "grad_norm": 0.3970852792263031, "learning_rate": 0.00019064453658092776, "loss": 1.2939, "step": 3615 }, { "epoch": 0.046988270799797935, "grad_norm": 0.36459505558013916, "learning_rate": 0.00019064193711901636, "loss": 1.4153, "step": 3616 }, { "epoch": 0.04700126534371381, "grad_norm": 0.45243555307388306, "learning_rate": 0.000190639337657105, "loss": 1.5853, "step": 3617 }, { "epoch": 0.04701425988762968, "grad_norm": 0.37392571568489075, "learning_rate": 0.0001906367381951936, "loss": 1.4884, "step": 3618 }, { "epoch": 0.047027254431545554, "grad_norm": 0.3919658660888672, "learning_rate": 0.00019063413873328223, "loss": 1.4168, "step": 3619 }, { "epoch": 0.04704024897546143, "grad_norm": 0.3406011462211609, "learning_rate": 0.00019063153927137083, "loss": 1.4847, "step": 3620 }, { "epoch": 0.0470532435193773, "grad_norm": 0.34002724289894104, "learning_rate": 0.00019062893980945945, "loss": 1.4379, "step": 3621 }, { "epoch": 0.04706623806329317, "grad_norm": 0.3958209753036499, "learning_rate": 0.00019062634034754808, "loss": 1.526, "step": 3622 }, { "epoch": 0.047079232607209046, "grad_norm": 0.3342420160770416, "learning_rate": 0.00019062374088563668, "loss": 1.5268, "step": 3623 }, { "epoch": 0.04709222715112492, "grad_norm": 0.45454952120780945, "learning_rate": 0.0001906211414237253, "loss": 1.6099, "step": 3624 }, { "epoch": 0.04710522169504079, "grad_norm": 0.3253743648529053, "learning_rate": 0.00019061854196181392, "loss": 1.6407, "step": 3625 }, { "epoch": 0.04711821623895667, "grad_norm": 0.4331594705581665, "learning_rate": 0.00019061594249990252, "loss": 1.6284, "step": 3626 }, { "epoch": 0.047131210782872544, "grad_norm": 0.41288936138153076, "learning_rate": 0.00019061334303799115, "loss": 1.5875, "step": 3627 }, { "epoch": 0.04714420532678842, "grad_norm": 0.22082705795764923, "learning_rate": 0.00019061074357607974, "loss": 1.2422, "step": 3628 }, { "epoch": 0.04715719987070429, "grad_norm": 0.450970321893692, "learning_rate": 0.0001906081441141684, "loss": 1.5674, "step": 3629 }, { "epoch": 0.04717019441462016, "grad_norm": 0.3594655990600586, "learning_rate": 0.000190605544652257, "loss": 1.3607, "step": 3630 }, { "epoch": 0.047183188958536036, "grad_norm": 0.4247593581676483, "learning_rate": 0.00019060294519034562, "loss": 1.352, "step": 3631 }, { "epoch": 0.04719618350245191, "grad_norm": 0.36219149827957153, "learning_rate": 0.00019060034572843421, "loss": 1.4978, "step": 3632 }, { "epoch": 0.04720917804636778, "grad_norm": 0.36923158168792725, "learning_rate": 0.00019059774626652284, "loss": 1.3403, "step": 3633 }, { "epoch": 0.047222172590283655, "grad_norm": 0.414307177066803, "learning_rate": 0.00019059514680461146, "loss": 1.4527, "step": 3634 }, { "epoch": 0.04723516713419953, "grad_norm": 0.330245703458786, "learning_rate": 0.00019059254734270006, "loss": 1.2773, "step": 3635 }, { "epoch": 0.0472481616781154, "grad_norm": 0.35710257291793823, "learning_rate": 0.00019058994788078869, "loss": 1.2525, "step": 3636 }, { "epoch": 0.047261156222031274, "grad_norm": 0.34154680371284485, "learning_rate": 0.0001905873484188773, "loss": 1.3289, "step": 3637 }, { "epoch": 0.04727415076594715, "grad_norm": 0.4402523338794708, "learning_rate": 0.0001905847489569659, "loss": 1.5782, "step": 3638 }, { "epoch": 0.04728714530986302, "grad_norm": 0.37777984142303467, "learning_rate": 0.00019058214949505453, "loss": 1.4294, "step": 3639 }, { "epoch": 0.04730013985377889, "grad_norm": 0.43438291549682617, "learning_rate": 0.00019057955003314313, "loss": 1.4628, "step": 3640 }, { "epoch": 0.047313134397694766, "grad_norm": 0.3909463882446289, "learning_rate": 0.00019057695057123178, "loss": 1.4606, "step": 3641 }, { "epoch": 0.04732612894161064, "grad_norm": 0.3320329189300537, "learning_rate": 0.00019057435110932038, "loss": 1.3508, "step": 3642 }, { "epoch": 0.04733912348552651, "grad_norm": 0.32663288712501526, "learning_rate": 0.000190571751647409, "loss": 1.4622, "step": 3643 }, { "epoch": 0.047352118029442385, "grad_norm": 0.3893803358078003, "learning_rate": 0.0001905691521854976, "loss": 1.3823, "step": 3644 }, { "epoch": 0.047365112573358265, "grad_norm": 0.3809978663921356, "learning_rate": 0.00019056655272358622, "loss": 1.4029, "step": 3645 }, { "epoch": 0.04737810711727414, "grad_norm": 0.4102168381214142, "learning_rate": 0.00019056395326167485, "loss": 1.3259, "step": 3646 }, { "epoch": 0.04739110166119001, "grad_norm": 0.3692684471607208, "learning_rate": 0.00019056135379976345, "loss": 1.4453, "step": 3647 }, { "epoch": 0.04740409620510588, "grad_norm": 0.3841249942779541, "learning_rate": 0.00019055875433785207, "loss": 1.5368, "step": 3648 }, { "epoch": 0.047417090749021756, "grad_norm": 0.35412758588790894, "learning_rate": 0.0001905561548759407, "loss": 1.6147, "step": 3649 }, { "epoch": 0.04743008529293763, "grad_norm": 0.38396957516670227, "learning_rate": 0.0001905535554140293, "loss": 1.4088, "step": 3650 }, { "epoch": 0.0474430798368535, "grad_norm": 0.45067089796066284, "learning_rate": 0.00019055095595211792, "loss": 1.5147, "step": 3651 }, { "epoch": 0.047456074380769375, "grad_norm": 0.40251994132995605, "learning_rate": 0.00019054835649020651, "loss": 1.3527, "step": 3652 }, { "epoch": 0.04746906892468525, "grad_norm": 0.30425411462783813, "learning_rate": 0.00019054575702829517, "loss": 1.2581, "step": 3653 }, { "epoch": 0.04748206346860112, "grad_norm": 0.3596998155117035, "learning_rate": 0.00019054315756638376, "loss": 1.508, "step": 3654 }, { "epoch": 0.047495058012516994, "grad_norm": 0.3922152519226074, "learning_rate": 0.0001905405581044724, "loss": 1.3969, "step": 3655 }, { "epoch": 0.04750805255643287, "grad_norm": 0.3967946767807007, "learning_rate": 0.000190537958642561, "loss": 1.3867, "step": 3656 }, { "epoch": 0.04752104710034874, "grad_norm": 0.39123407006263733, "learning_rate": 0.0001905353591806496, "loss": 1.4557, "step": 3657 }, { "epoch": 0.04753404164426461, "grad_norm": 0.33910247683525085, "learning_rate": 0.00019053275971873823, "loss": 1.5389, "step": 3658 }, { "epoch": 0.047547036188180486, "grad_norm": 0.3255355656147003, "learning_rate": 0.00019053016025682683, "loss": 1.4173, "step": 3659 }, { "epoch": 0.04756003073209636, "grad_norm": 0.38558611273765564, "learning_rate": 0.00019052756079491548, "loss": 1.635, "step": 3660 }, { "epoch": 0.04757302527601223, "grad_norm": 0.318462610244751, "learning_rate": 0.00019052496133300408, "loss": 1.3435, "step": 3661 }, { "epoch": 0.047586019819928105, "grad_norm": 0.4146612584590912, "learning_rate": 0.00019052236187109268, "loss": 1.1471, "step": 3662 }, { "epoch": 0.04759901436384398, "grad_norm": 0.5100176930427551, "learning_rate": 0.0001905197624091813, "loss": 1.5207, "step": 3663 }, { "epoch": 0.04761200890775986, "grad_norm": 0.2977929413318634, "learning_rate": 0.00019051716294726993, "loss": 1.3735, "step": 3664 }, { "epoch": 0.04762500345167573, "grad_norm": 0.38414695858955383, "learning_rate": 0.00019051456348535855, "loss": 1.4907, "step": 3665 }, { "epoch": 0.047637997995591604, "grad_norm": 0.2535100281238556, "learning_rate": 0.00019051196402344715, "loss": 1.2081, "step": 3666 }, { "epoch": 0.04765099253950748, "grad_norm": 0.34876739978790283, "learning_rate": 0.00019050936456153577, "loss": 1.3364, "step": 3667 }, { "epoch": 0.04766398708342335, "grad_norm": 0.3137843906879425, "learning_rate": 0.0001905067650996244, "loss": 1.1271, "step": 3668 }, { "epoch": 0.04767698162733922, "grad_norm": 0.44447457790374756, "learning_rate": 0.000190504165637713, "loss": 1.4365, "step": 3669 }, { "epoch": 0.047689976171255095, "grad_norm": 0.3810134828090668, "learning_rate": 0.00019050156617580162, "loss": 1.6645, "step": 3670 }, { "epoch": 0.04770297071517097, "grad_norm": 0.42089030146598816, "learning_rate": 0.00019049896671389022, "loss": 1.4278, "step": 3671 }, { "epoch": 0.04771596525908684, "grad_norm": 0.45074015855789185, "learning_rate": 0.00019049636725197887, "loss": 1.4899, "step": 3672 }, { "epoch": 0.047728959803002714, "grad_norm": 0.41166314482688904, "learning_rate": 0.00019049376779006747, "loss": 1.6006, "step": 3673 }, { "epoch": 0.04774195434691859, "grad_norm": 0.47895753383636475, "learning_rate": 0.0001904911683281561, "loss": 1.5603, "step": 3674 }, { "epoch": 0.04775494889083446, "grad_norm": 0.36243027448654175, "learning_rate": 0.0001904885688662447, "loss": 1.5536, "step": 3675 }, { "epoch": 0.04776794343475033, "grad_norm": 0.4041252136230469, "learning_rate": 0.0001904859694043333, "loss": 1.4372, "step": 3676 }, { "epoch": 0.047780937978666206, "grad_norm": 0.3591209948062897, "learning_rate": 0.00019048336994242194, "loss": 1.3041, "step": 3677 }, { "epoch": 0.04779393252258208, "grad_norm": 0.4013978838920593, "learning_rate": 0.00019048077048051053, "loss": 1.6379, "step": 3678 }, { "epoch": 0.04780692706649795, "grad_norm": 0.39858371019363403, "learning_rate": 0.00019047817101859916, "loss": 1.4992, "step": 3679 }, { "epoch": 0.047819921610413825, "grad_norm": 0.43754443526268005, "learning_rate": 0.00019047557155668778, "loss": 1.3867, "step": 3680 }, { "epoch": 0.0478329161543297, "grad_norm": 0.37209850549697876, "learning_rate": 0.00019047297209477638, "loss": 1.3733, "step": 3681 }, { "epoch": 0.04784591069824557, "grad_norm": 0.38896167278289795, "learning_rate": 0.000190470372632865, "loss": 1.5887, "step": 3682 }, { "epoch": 0.04785890524216145, "grad_norm": 0.36297520995140076, "learning_rate": 0.0001904677731709536, "loss": 1.4573, "step": 3683 }, { "epoch": 0.047871899786077324, "grad_norm": 0.2979295253753662, "learning_rate": 0.00019046517370904225, "loss": 1.3184, "step": 3684 }, { "epoch": 0.0478848943299932, "grad_norm": 0.3434945344924927, "learning_rate": 0.00019046257424713085, "loss": 1.3364, "step": 3685 }, { "epoch": 0.04789788887390907, "grad_norm": 0.4513181746006012, "learning_rate": 0.00019045997478521948, "loss": 1.5704, "step": 3686 }, { "epoch": 0.04791088341782494, "grad_norm": 0.43385082483291626, "learning_rate": 0.00019045737532330807, "loss": 1.4051, "step": 3687 }, { "epoch": 0.047923877961740816, "grad_norm": 0.391757607460022, "learning_rate": 0.0001904547758613967, "loss": 1.5644, "step": 3688 }, { "epoch": 0.04793687250565669, "grad_norm": 0.3542194068431854, "learning_rate": 0.00019045217639948532, "loss": 1.5415, "step": 3689 }, { "epoch": 0.04794986704957256, "grad_norm": 0.3492177426815033, "learning_rate": 0.00019044957693757392, "loss": 1.3601, "step": 3690 }, { "epoch": 0.047962861593488434, "grad_norm": 0.3090565800666809, "learning_rate": 0.00019044697747566257, "loss": 1.3153, "step": 3691 }, { "epoch": 0.04797585613740431, "grad_norm": 0.4005926549434662, "learning_rate": 0.00019044437801375117, "loss": 1.7639, "step": 3692 }, { "epoch": 0.04798885068132018, "grad_norm": 0.691261887550354, "learning_rate": 0.00019044177855183977, "loss": 1.5533, "step": 3693 }, { "epoch": 0.04800184522523605, "grad_norm": 0.4005010724067688, "learning_rate": 0.0001904391790899284, "loss": 1.4576, "step": 3694 }, { "epoch": 0.048014839769151926, "grad_norm": 0.4017079472541809, "learning_rate": 0.00019043657962801702, "loss": 1.4879, "step": 3695 }, { "epoch": 0.0480278343130678, "grad_norm": 0.39686429500579834, "learning_rate": 0.00019043398016610564, "loss": 1.4527, "step": 3696 }, { "epoch": 0.04804082885698367, "grad_norm": 0.40051910281181335, "learning_rate": 0.00019043138070419424, "loss": 1.4086, "step": 3697 }, { "epoch": 0.048053823400899545, "grad_norm": 0.3536207377910614, "learning_rate": 0.00019042878124228286, "loss": 1.2281, "step": 3698 }, { "epoch": 0.04806681794481542, "grad_norm": 0.3526003658771515, "learning_rate": 0.00019042618178037149, "loss": 1.2182, "step": 3699 }, { "epoch": 0.04807981248873129, "grad_norm": 0.37056267261505127, "learning_rate": 0.00019042358231846008, "loss": 1.463, "step": 3700 }, { "epoch": 0.048092807032647164, "grad_norm": 0.33729904890060425, "learning_rate": 0.0001904209828565487, "loss": 1.4765, "step": 3701 }, { "epoch": 0.048105801576563044, "grad_norm": 0.31484049558639526, "learning_rate": 0.0001904183833946373, "loss": 1.3468, "step": 3702 }, { "epoch": 0.04811879612047892, "grad_norm": 0.6188753843307495, "learning_rate": 0.00019041578393272596, "loss": 1.4841, "step": 3703 }, { "epoch": 0.04813179066439479, "grad_norm": 0.36766317486763, "learning_rate": 0.00019041318447081455, "loss": 1.4597, "step": 3704 }, { "epoch": 0.04814478520831066, "grad_norm": 0.3956480026245117, "learning_rate": 0.00019041058500890315, "loss": 1.5674, "step": 3705 }, { "epoch": 0.048157779752226536, "grad_norm": 0.466078519821167, "learning_rate": 0.00019040798554699178, "loss": 1.4406, "step": 3706 }, { "epoch": 0.04817077429614241, "grad_norm": 0.4860243797302246, "learning_rate": 0.0001904053860850804, "loss": 1.4233, "step": 3707 }, { "epoch": 0.04818376884005828, "grad_norm": 0.35091403126716614, "learning_rate": 0.00019040278662316902, "loss": 1.5212, "step": 3708 }, { "epoch": 0.048196763383974155, "grad_norm": 0.4561355412006378, "learning_rate": 0.00019040018716125762, "loss": 1.4089, "step": 3709 }, { "epoch": 0.04820975792789003, "grad_norm": 0.3972288966178894, "learning_rate": 0.00019039758769934625, "loss": 1.4688, "step": 3710 }, { "epoch": 0.0482227524718059, "grad_norm": 0.35800743103027344, "learning_rate": 0.00019039498823743487, "loss": 1.3091, "step": 3711 }, { "epoch": 0.048235747015721774, "grad_norm": 0.3681759834289551, "learning_rate": 0.00019039238877552347, "loss": 1.4073, "step": 3712 }, { "epoch": 0.048248741559637646, "grad_norm": 0.3054344952106476, "learning_rate": 0.0001903897893136121, "loss": 1.3646, "step": 3713 }, { "epoch": 0.04826173610355352, "grad_norm": 0.37973904609680176, "learning_rate": 0.0001903871898517007, "loss": 1.5879, "step": 3714 }, { "epoch": 0.04827473064746939, "grad_norm": 0.46718481183052063, "learning_rate": 0.00019038459038978934, "loss": 1.5299, "step": 3715 }, { "epoch": 0.048287725191385265, "grad_norm": 0.39083757996559143, "learning_rate": 0.00019038199092787794, "loss": 1.295, "step": 3716 }, { "epoch": 0.04830071973530114, "grad_norm": 0.2630516588687897, "learning_rate": 0.00019037939146596654, "loss": 1.3067, "step": 3717 }, { "epoch": 0.04831371427921701, "grad_norm": 0.3986029028892517, "learning_rate": 0.00019037679200405516, "loss": 1.4904, "step": 3718 }, { "epoch": 0.048326708823132884, "grad_norm": 0.5132040977478027, "learning_rate": 0.00019037419254214379, "loss": 1.5321, "step": 3719 }, { "epoch": 0.04833970336704876, "grad_norm": 0.37517425417900085, "learning_rate": 0.0001903715930802324, "loss": 1.4871, "step": 3720 }, { "epoch": 0.04835269791096464, "grad_norm": 0.3649660646915436, "learning_rate": 0.000190368993618321, "loss": 1.1234, "step": 3721 }, { "epoch": 0.04836569245488051, "grad_norm": 0.4588010907173157, "learning_rate": 0.00019036639415640963, "loss": 1.7344, "step": 3722 }, { "epoch": 0.04837868699879638, "grad_norm": 0.3188712000846863, "learning_rate": 0.00019036379469449826, "loss": 1.519, "step": 3723 }, { "epoch": 0.048391681542712256, "grad_norm": 0.451236367225647, "learning_rate": 0.00019036119523258685, "loss": 1.5545, "step": 3724 }, { "epoch": 0.04840467608662813, "grad_norm": 0.38023120164871216, "learning_rate": 0.00019035859577067548, "loss": 1.513, "step": 3725 }, { "epoch": 0.048417670630544, "grad_norm": 0.3073883056640625, "learning_rate": 0.00019035599630876408, "loss": 1.4846, "step": 3726 }, { "epoch": 0.048430665174459875, "grad_norm": 0.32462555170059204, "learning_rate": 0.00019035339684685273, "loss": 1.5351, "step": 3727 }, { "epoch": 0.04844365971837575, "grad_norm": 0.38581156730651855, "learning_rate": 0.00019035079738494132, "loss": 1.5359, "step": 3728 }, { "epoch": 0.04845665426229162, "grad_norm": 0.46308737993240356, "learning_rate": 0.00019034819792302995, "loss": 1.471, "step": 3729 }, { "epoch": 0.048469648806207494, "grad_norm": 0.49333426356315613, "learning_rate": 0.00019034559846111857, "loss": 1.4374, "step": 3730 }, { "epoch": 0.04848264335012337, "grad_norm": 0.4720149338245392, "learning_rate": 0.00019034299899920717, "loss": 1.4151, "step": 3731 }, { "epoch": 0.04849563789403924, "grad_norm": 0.43370410799980164, "learning_rate": 0.0001903403995372958, "loss": 1.4138, "step": 3732 }, { "epoch": 0.04850863243795511, "grad_norm": 0.46720731258392334, "learning_rate": 0.0001903378000753844, "loss": 1.5979, "step": 3733 }, { "epoch": 0.048521626981870986, "grad_norm": 0.3942004144191742, "learning_rate": 0.00019033520061347302, "loss": 1.8305, "step": 3734 }, { "epoch": 0.04853462152578686, "grad_norm": 0.44630008935928345, "learning_rate": 0.00019033260115156164, "loss": 1.6237, "step": 3735 }, { "epoch": 0.04854761606970273, "grad_norm": 0.38367313146591187, "learning_rate": 0.00019033000168965024, "loss": 1.3709, "step": 3736 }, { "epoch": 0.048560610613618604, "grad_norm": 0.3858979642391205, "learning_rate": 0.00019032740222773886, "loss": 1.334, "step": 3737 }, { "epoch": 0.04857360515753448, "grad_norm": 0.3400343358516693, "learning_rate": 0.0001903248027658275, "loss": 1.3174, "step": 3738 }, { "epoch": 0.04858659970145035, "grad_norm": 0.39535218477249146, "learning_rate": 0.0001903222033039161, "loss": 1.2091, "step": 3739 }, { "epoch": 0.04859959424536623, "grad_norm": 0.32496944069862366, "learning_rate": 0.0001903196038420047, "loss": 1.3763, "step": 3740 }, { "epoch": 0.0486125887892821, "grad_norm": 0.3461853265762329, "learning_rate": 0.00019031700438009333, "loss": 1.4223, "step": 3741 }, { "epoch": 0.048625583333197976, "grad_norm": 0.4377201795578003, "learning_rate": 0.00019031440491818196, "loss": 1.5245, "step": 3742 }, { "epoch": 0.04863857787711385, "grad_norm": 0.4547326862812042, "learning_rate": 0.00019031180545627056, "loss": 1.6038, "step": 3743 }, { "epoch": 0.04865157242102972, "grad_norm": 0.43616053462028503, "learning_rate": 0.00019030920599435918, "loss": 1.6193, "step": 3744 }, { "epoch": 0.048664566964945595, "grad_norm": 0.3790490925312042, "learning_rate": 0.00019030660653244778, "loss": 1.6556, "step": 3745 }, { "epoch": 0.04867756150886147, "grad_norm": 0.4055534303188324, "learning_rate": 0.0001903040070705364, "loss": 1.5272, "step": 3746 }, { "epoch": 0.04869055605277734, "grad_norm": 0.5028093457221985, "learning_rate": 0.00019030140760862503, "loss": 1.4023, "step": 3747 }, { "epoch": 0.048703550596693214, "grad_norm": 0.37545719742774963, "learning_rate": 0.00019029880814671362, "loss": 1.4415, "step": 3748 }, { "epoch": 0.04871654514060909, "grad_norm": 0.41033586859703064, "learning_rate": 0.00019029620868480225, "loss": 1.4422, "step": 3749 }, { "epoch": 0.04872953968452496, "grad_norm": 0.41760483384132385, "learning_rate": 0.00019029360922289087, "loss": 1.4391, "step": 3750 }, { "epoch": 0.04874253422844083, "grad_norm": 0.4780189096927643, "learning_rate": 0.0001902910097609795, "loss": 1.3392, "step": 3751 }, { "epoch": 0.048755528772356706, "grad_norm": 0.33102235198020935, "learning_rate": 0.0001902884102990681, "loss": 1.3838, "step": 3752 }, { "epoch": 0.04876852331627258, "grad_norm": 0.3358710706233978, "learning_rate": 0.00019028581083715672, "loss": 1.5932, "step": 3753 }, { "epoch": 0.04878151786018845, "grad_norm": 0.5055046677589417, "learning_rate": 0.00019028321137524534, "loss": 1.5045, "step": 3754 }, { "epoch": 0.048794512404104325, "grad_norm": 0.34506577253341675, "learning_rate": 0.00019028061191333394, "loss": 1.4033, "step": 3755 }, { "epoch": 0.0488075069480202, "grad_norm": 0.5171628594398499, "learning_rate": 0.00019027801245142257, "loss": 1.3833, "step": 3756 }, { "epoch": 0.04882050149193607, "grad_norm": 0.4783114492893219, "learning_rate": 0.00019027541298951116, "loss": 1.457, "step": 3757 }, { "epoch": 0.048833496035851943, "grad_norm": 0.32838794589042664, "learning_rate": 0.00019027281352759982, "loss": 1.2866, "step": 3758 }, { "epoch": 0.04884649057976782, "grad_norm": 0.405598521232605, "learning_rate": 0.0001902702140656884, "loss": 1.4387, "step": 3759 }, { "epoch": 0.048859485123683696, "grad_norm": 0.3459217846393585, "learning_rate": 0.000190267614603777, "loss": 1.4465, "step": 3760 }, { "epoch": 0.04887247966759957, "grad_norm": 0.42128950357437134, "learning_rate": 0.00019026501514186563, "loss": 1.6357, "step": 3761 }, { "epoch": 0.04888547421151544, "grad_norm": 0.44771215319633484, "learning_rate": 0.00019026241567995426, "loss": 1.5112, "step": 3762 }, { "epoch": 0.048898468755431315, "grad_norm": 0.32228025794029236, "learning_rate": 0.00019025981621804288, "loss": 1.3816, "step": 3763 }, { "epoch": 0.04891146329934719, "grad_norm": 0.38464412093162537, "learning_rate": 0.00019025721675613148, "loss": 1.4335, "step": 3764 }, { "epoch": 0.04892445784326306, "grad_norm": 0.38852474093437195, "learning_rate": 0.0001902546172942201, "loss": 1.3744, "step": 3765 }, { "epoch": 0.048937452387178934, "grad_norm": 0.46775123476982117, "learning_rate": 0.00019025201783230873, "loss": 1.5269, "step": 3766 }, { "epoch": 0.04895044693109481, "grad_norm": 0.3528216779232025, "learning_rate": 0.00019024941837039733, "loss": 1.4835, "step": 3767 }, { "epoch": 0.04896344147501068, "grad_norm": 0.4044293165206909, "learning_rate": 0.00019024681890848595, "loss": 1.322, "step": 3768 }, { "epoch": 0.04897643601892655, "grad_norm": 0.3173816204071045, "learning_rate": 0.00019024421944657458, "loss": 1.3836, "step": 3769 }, { "epoch": 0.048989430562842426, "grad_norm": 0.34366855025291443, "learning_rate": 0.0001902416199846632, "loss": 1.3584, "step": 3770 }, { "epoch": 0.0490024251067583, "grad_norm": 0.5185137987136841, "learning_rate": 0.0001902390205227518, "loss": 1.7088, "step": 3771 }, { "epoch": 0.04901541965067417, "grad_norm": 0.41273248195648193, "learning_rate": 0.0001902364210608404, "loss": 1.2926, "step": 3772 }, { "epoch": 0.049028414194590045, "grad_norm": 0.39725032448768616, "learning_rate": 0.00019023382159892905, "loss": 1.6967, "step": 3773 }, { "epoch": 0.04904140873850592, "grad_norm": 0.3438766896724701, "learning_rate": 0.00019023122213701764, "loss": 1.4632, "step": 3774 }, { "epoch": 0.04905440328242179, "grad_norm": 0.48754197359085083, "learning_rate": 0.00019022862267510627, "loss": 1.3423, "step": 3775 }, { "epoch": 0.049067397826337664, "grad_norm": 0.44422802329063416, "learning_rate": 0.00019022602321319487, "loss": 1.2952, "step": 3776 }, { "epoch": 0.04908039237025354, "grad_norm": 0.40650635957717896, "learning_rate": 0.0001902234237512835, "loss": 1.4863, "step": 3777 }, { "epoch": 0.049093386914169417, "grad_norm": 0.3226718604564667, "learning_rate": 0.00019022082428937212, "loss": 1.3477, "step": 3778 }, { "epoch": 0.04910638145808529, "grad_norm": 0.3460264801979065, "learning_rate": 0.0001902182248274607, "loss": 1.3279, "step": 3779 }, { "epoch": 0.04911937600200116, "grad_norm": 0.3752950429916382, "learning_rate": 0.00019021562536554934, "loss": 1.473, "step": 3780 }, { "epoch": 0.049132370545917035, "grad_norm": 0.42022013664245605, "learning_rate": 0.00019021302590363796, "loss": 1.5363, "step": 3781 }, { "epoch": 0.04914536508983291, "grad_norm": 0.3701004683971405, "learning_rate": 0.00019021042644172659, "loss": 1.6591, "step": 3782 }, { "epoch": 0.04915835963374878, "grad_norm": 0.4206155836582184, "learning_rate": 0.00019020782697981518, "loss": 1.4595, "step": 3783 }, { "epoch": 0.049171354177664654, "grad_norm": 0.37528252601623535, "learning_rate": 0.0001902052275179038, "loss": 1.5484, "step": 3784 }, { "epoch": 0.04918434872158053, "grad_norm": 0.4147874414920807, "learning_rate": 0.00019020262805599243, "loss": 1.4868, "step": 3785 }, { "epoch": 0.0491973432654964, "grad_norm": 0.30775102972984314, "learning_rate": 0.00019020002859408103, "loss": 1.4969, "step": 3786 }, { "epoch": 0.04921033780941227, "grad_norm": 0.33989468216896057, "learning_rate": 0.00019019742913216965, "loss": 1.3719, "step": 3787 }, { "epoch": 0.049223332353328146, "grad_norm": 0.3552214801311493, "learning_rate": 0.00019019482967025825, "loss": 1.4006, "step": 3788 }, { "epoch": 0.04923632689724402, "grad_norm": 0.3304178714752197, "learning_rate": 0.00019019223020834688, "loss": 1.422, "step": 3789 }, { "epoch": 0.04924932144115989, "grad_norm": 0.3558228015899658, "learning_rate": 0.0001901896307464355, "loss": 1.6192, "step": 3790 }, { "epoch": 0.049262315985075765, "grad_norm": 0.4193790555000305, "learning_rate": 0.0001901870312845241, "loss": 1.4656, "step": 3791 }, { "epoch": 0.04927531052899164, "grad_norm": 0.3567087650299072, "learning_rate": 0.00019018443182261272, "loss": 1.2557, "step": 3792 }, { "epoch": 0.04928830507290751, "grad_norm": 0.4742031395435333, "learning_rate": 0.00019018183236070135, "loss": 1.5403, "step": 3793 }, { "epoch": 0.049301299616823384, "grad_norm": 0.4044789969921112, "learning_rate": 0.00019017923289878997, "loss": 1.3158, "step": 3794 }, { "epoch": 0.04931429416073926, "grad_norm": 0.37397974729537964, "learning_rate": 0.00019017663343687857, "loss": 1.3066, "step": 3795 }, { "epoch": 0.04932728870465513, "grad_norm": 0.39239782094955444, "learning_rate": 0.0001901740339749672, "loss": 1.4822, "step": 3796 }, { "epoch": 0.04934028324857101, "grad_norm": 0.2867686152458191, "learning_rate": 0.00019017143451305582, "loss": 1.1689, "step": 3797 }, { "epoch": 0.04935327779248688, "grad_norm": 0.4478296637535095, "learning_rate": 0.00019016883505114442, "loss": 1.4884, "step": 3798 }, { "epoch": 0.049366272336402756, "grad_norm": 0.4071597456932068, "learning_rate": 0.00019016623558923304, "loss": 1.4253, "step": 3799 }, { "epoch": 0.04937926688031863, "grad_norm": 0.3840247392654419, "learning_rate": 0.00019016363612732164, "loss": 1.4365, "step": 3800 }, { "epoch": 0.0493922614242345, "grad_norm": 0.35399726033210754, "learning_rate": 0.00019016103666541026, "loss": 1.5576, "step": 3801 }, { "epoch": 0.049405255968150374, "grad_norm": 0.45631012320518494, "learning_rate": 0.00019015843720349889, "loss": 1.5579, "step": 3802 }, { "epoch": 0.04941825051206625, "grad_norm": 0.3735182583332062, "learning_rate": 0.00019015583774158748, "loss": 1.3921, "step": 3803 }, { "epoch": 0.04943124505598212, "grad_norm": 0.3003441095352173, "learning_rate": 0.00019015323827967614, "loss": 1.4741, "step": 3804 }, { "epoch": 0.04944423959989799, "grad_norm": 0.5309226512908936, "learning_rate": 0.00019015063881776473, "loss": 1.499, "step": 3805 }, { "epoch": 0.049457234143813866, "grad_norm": 0.32520508766174316, "learning_rate": 0.00019014803935585336, "loss": 1.5014, "step": 3806 }, { "epoch": 0.04947022868772974, "grad_norm": 0.435548335313797, "learning_rate": 0.00019014543989394195, "loss": 1.4792, "step": 3807 }, { "epoch": 0.04948322323164561, "grad_norm": 0.46274563670158386, "learning_rate": 0.00019014284043203058, "loss": 1.6184, "step": 3808 }, { "epoch": 0.049496217775561485, "grad_norm": 0.36255529522895813, "learning_rate": 0.0001901402409701192, "loss": 1.3842, "step": 3809 }, { "epoch": 0.04950921231947736, "grad_norm": 0.3969224989414215, "learning_rate": 0.0001901376415082078, "loss": 1.582, "step": 3810 }, { "epoch": 0.04952220686339323, "grad_norm": 0.4023350179195404, "learning_rate": 0.00019013504204629643, "loss": 1.5543, "step": 3811 }, { "epoch": 0.049535201407309104, "grad_norm": 0.3140353560447693, "learning_rate": 0.00019013244258438505, "loss": 1.3916, "step": 3812 }, { "epoch": 0.04954819595122498, "grad_norm": 0.29110926389694214, "learning_rate": 0.00019012984312247367, "loss": 1.471, "step": 3813 }, { "epoch": 0.04956119049514085, "grad_norm": 0.41599124670028687, "learning_rate": 0.00019012724366056227, "loss": 1.4365, "step": 3814 }, { "epoch": 0.04957418503905672, "grad_norm": 0.367975652217865, "learning_rate": 0.00019012464419865087, "loss": 1.5376, "step": 3815 }, { "epoch": 0.0495871795829726, "grad_norm": 0.3864184021949768, "learning_rate": 0.00019012204473673952, "loss": 1.5685, "step": 3816 }, { "epoch": 0.049600174126888476, "grad_norm": 0.399983286857605, "learning_rate": 0.00019011944527482812, "loss": 1.4027, "step": 3817 }, { "epoch": 0.04961316867080435, "grad_norm": 0.3606490194797516, "learning_rate": 0.00019011684581291674, "loss": 1.3922, "step": 3818 }, { "epoch": 0.04962616321472022, "grad_norm": 0.293867290019989, "learning_rate": 0.00019011424635100534, "loss": 1.3493, "step": 3819 }, { "epoch": 0.049639157758636095, "grad_norm": 0.3390860855579376, "learning_rate": 0.00019011164688909396, "loss": 1.4754, "step": 3820 }, { "epoch": 0.04965215230255197, "grad_norm": 0.5028812289237976, "learning_rate": 0.0001901090474271826, "loss": 1.4913, "step": 3821 }, { "epoch": 0.04966514684646784, "grad_norm": 0.4193522036075592, "learning_rate": 0.00019010644796527119, "loss": 1.5433, "step": 3822 }, { "epoch": 0.049678141390383714, "grad_norm": 0.4153229594230652, "learning_rate": 0.0001901038485033598, "loss": 1.5895, "step": 3823 }, { "epoch": 0.049691135934299586, "grad_norm": 0.4533325433731079, "learning_rate": 0.00019010124904144844, "loss": 1.4521, "step": 3824 }, { "epoch": 0.04970413047821546, "grad_norm": 0.38161057233810425, "learning_rate": 0.00019009864957953706, "loss": 1.3993, "step": 3825 }, { "epoch": 0.04971712502213133, "grad_norm": 0.3211041986942291, "learning_rate": 0.00019009605011762566, "loss": 1.6267, "step": 3826 }, { "epoch": 0.049730119566047205, "grad_norm": 0.26964321732521057, "learning_rate": 0.00019009345065571425, "loss": 1.2429, "step": 3827 }, { "epoch": 0.04974311410996308, "grad_norm": 0.35638460516929626, "learning_rate": 0.0001900908511938029, "loss": 1.6416, "step": 3828 }, { "epoch": 0.04975610865387895, "grad_norm": 0.3692905008792877, "learning_rate": 0.0001900882517318915, "loss": 1.4969, "step": 3829 }, { "epoch": 0.049769103197794824, "grad_norm": 0.307699978351593, "learning_rate": 0.00019008565226998013, "loss": 1.2532, "step": 3830 }, { "epoch": 0.0497820977417107, "grad_norm": 0.3573067784309387, "learning_rate": 0.00019008305280806873, "loss": 1.6465, "step": 3831 }, { "epoch": 0.04979509228562657, "grad_norm": 0.3379407227039337, "learning_rate": 0.00019008045334615735, "loss": 1.28, "step": 3832 }, { "epoch": 0.04980808682954244, "grad_norm": 0.37374475598335266, "learning_rate": 0.00019007785388424597, "loss": 1.3597, "step": 3833 }, { "epoch": 0.049821081373458316, "grad_norm": 0.3123195171356201, "learning_rate": 0.00019007525442233457, "loss": 1.3405, "step": 3834 }, { "epoch": 0.049834075917374196, "grad_norm": 0.3965378999710083, "learning_rate": 0.0001900726549604232, "loss": 1.351, "step": 3835 }, { "epoch": 0.04984707046129007, "grad_norm": 0.48493969440460205, "learning_rate": 0.00019007005549851182, "loss": 1.742, "step": 3836 }, { "epoch": 0.04986006500520594, "grad_norm": 0.4489361643791199, "learning_rate": 0.00019006745603660044, "loss": 1.5255, "step": 3837 }, { "epoch": 0.049873059549121815, "grad_norm": 0.39542123675346375, "learning_rate": 0.00019006485657468904, "loss": 1.6101, "step": 3838 }, { "epoch": 0.04988605409303769, "grad_norm": 0.3340208828449249, "learning_rate": 0.00019006225711277767, "loss": 1.2449, "step": 3839 }, { "epoch": 0.04989904863695356, "grad_norm": 0.3478074371814728, "learning_rate": 0.0001900596576508663, "loss": 1.5239, "step": 3840 }, { "epoch": 0.049912043180869434, "grad_norm": 0.37966689467430115, "learning_rate": 0.0001900570581889549, "loss": 1.4362, "step": 3841 }, { "epoch": 0.04992503772478531, "grad_norm": 0.39213261008262634, "learning_rate": 0.0001900544587270435, "loss": 1.4932, "step": 3842 }, { "epoch": 0.04993803226870118, "grad_norm": 0.4360811412334442, "learning_rate": 0.00019005185926513214, "loss": 1.5797, "step": 3843 }, { "epoch": 0.04995102681261705, "grad_norm": 0.40591880679130554, "learning_rate": 0.00019004925980322074, "loss": 1.3499, "step": 3844 }, { "epoch": 0.049964021356532926, "grad_norm": 0.42182737588882446, "learning_rate": 0.00019004666034130936, "loss": 1.5688, "step": 3845 }, { "epoch": 0.0499770159004488, "grad_norm": 0.44136422872543335, "learning_rate": 0.00019004406087939796, "loss": 1.4248, "step": 3846 }, { "epoch": 0.04999001044436467, "grad_norm": 0.4252159595489502, "learning_rate": 0.0001900414614174866, "loss": 1.5371, "step": 3847 }, { "epoch": 0.050003004988280544, "grad_norm": 0.406872421503067, "learning_rate": 0.0001900388619555752, "loss": 1.6734, "step": 3848 }, { "epoch": 0.05001599953219642, "grad_norm": 0.3588224947452545, "learning_rate": 0.00019003626249366383, "loss": 1.3372, "step": 3849 }, { "epoch": 0.05002899407611229, "grad_norm": 0.25963282585144043, "learning_rate": 0.00019003366303175243, "loss": 1.2794, "step": 3850 }, { "epoch": 0.05004198862002816, "grad_norm": 0.4401414394378662, "learning_rate": 0.00019003106356984105, "loss": 1.4461, "step": 3851 }, { "epoch": 0.050054983163944036, "grad_norm": 0.43792974948883057, "learning_rate": 0.00019002846410792968, "loss": 1.581, "step": 3852 }, { "epoch": 0.05006797770785991, "grad_norm": 0.36800432205200195, "learning_rate": 0.00019002586464601827, "loss": 1.3583, "step": 3853 }, { "epoch": 0.05008097225177579, "grad_norm": 0.4142528772354126, "learning_rate": 0.0001900232651841069, "loss": 1.3109, "step": 3854 }, { "epoch": 0.05009396679569166, "grad_norm": 0.32176101207733154, "learning_rate": 0.00019002066572219552, "loss": 1.3163, "step": 3855 }, { "epoch": 0.050106961339607535, "grad_norm": 0.41339778900146484, "learning_rate": 0.00019001806626028412, "loss": 1.4049, "step": 3856 }, { "epoch": 0.05011995588352341, "grad_norm": 0.404988557100296, "learning_rate": 0.00019001546679837274, "loss": 1.423, "step": 3857 }, { "epoch": 0.05013295042743928, "grad_norm": 0.4195566177368164, "learning_rate": 0.00019001286733646134, "loss": 1.4085, "step": 3858 }, { "epoch": 0.050145944971355154, "grad_norm": 0.39707931876182556, "learning_rate": 0.00019001026787455, "loss": 1.22, "step": 3859 }, { "epoch": 0.05015893951527103, "grad_norm": 0.4275943636894226, "learning_rate": 0.0001900076684126386, "loss": 1.4051, "step": 3860 }, { "epoch": 0.0501719340591869, "grad_norm": 0.31092458963394165, "learning_rate": 0.00019000506895072722, "loss": 1.0827, "step": 3861 }, { "epoch": 0.05018492860310277, "grad_norm": 0.4529190957546234, "learning_rate": 0.0001900024694888158, "loss": 1.442, "step": 3862 }, { "epoch": 0.050197923147018646, "grad_norm": 0.49008408188819885, "learning_rate": 0.00018999987002690444, "loss": 1.4841, "step": 3863 }, { "epoch": 0.05021091769093452, "grad_norm": 0.348882257938385, "learning_rate": 0.00018999727056499306, "loss": 1.3866, "step": 3864 }, { "epoch": 0.05022391223485039, "grad_norm": 0.44368821382522583, "learning_rate": 0.00018999467110308166, "loss": 1.4542, "step": 3865 }, { "epoch": 0.050236906778766265, "grad_norm": 0.41098013520240784, "learning_rate": 0.00018999207164117028, "loss": 1.4977, "step": 3866 }, { "epoch": 0.05024990132268214, "grad_norm": 0.4766836166381836, "learning_rate": 0.0001899894721792589, "loss": 1.2225, "step": 3867 }, { "epoch": 0.05026289586659801, "grad_norm": 0.4762088656425476, "learning_rate": 0.0001899868727173475, "loss": 1.4545, "step": 3868 }, { "epoch": 0.05027589041051388, "grad_norm": 0.20205679535865784, "learning_rate": 0.00018998427325543613, "loss": 1.4255, "step": 3869 }, { "epoch": 0.050288884954429756, "grad_norm": 0.3192203938961029, "learning_rate": 0.00018998167379352473, "loss": 1.4206, "step": 3870 }, { "epoch": 0.05030187949834563, "grad_norm": 0.3639497756958008, "learning_rate": 0.00018997907433161338, "loss": 1.4292, "step": 3871 }, { "epoch": 0.0503148740422615, "grad_norm": 0.35752227902412415, "learning_rate": 0.00018997647486970198, "loss": 1.3998, "step": 3872 }, { "epoch": 0.05032786858617738, "grad_norm": 0.43642255663871765, "learning_rate": 0.0001899738754077906, "loss": 1.5194, "step": 3873 }, { "epoch": 0.050340863130093255, "grad_norm": 0.4110788106918335, "learning_rate": 0.0001899712759458792, "loss": 1.375, "step": 3874 }, { "epoch": 0.05035385767400913, "grad_norm": 0.4246370792388916, "learning_rate": 0.00018996867648396782, "loss": 1.4195, "step": 3875 }, { "epoch": 0.050366852217925, "grad_norm": 0.43559038639068604, "learning_rate": 0.00018996607702205645, "loss": 1.3269, "step": 3876 }, { "epoch": 0.050379846761840874, "grad_norm": 0.2696673572063446, "learning_rate": 0.00018996347756014504, "loss": 1.2111, "step": 3877 }, { "epoch": 0.05039284130575675, "grad_norm": 0.34256792068481445, "learning_rate": 0.0001899608780982337, "loss": 1.4508, "step": 3878 }, { "epoch": 0.05040583584967262, "grad_norm": 0.3418741226196289, "learning_rate": 0.0001899582786363223, "loss": 1.2265, "step": 3879 }, { "epoch": 0.05041883039358849, "grad_norm": 0.4165349304676056, "learning_rate": 0.00018995567917441092, "loss": 1.4036, "step": 3880 }, { "epoch": 0.050431824937504366, "grad_norm": 0.3854735791683197, "learning_rate": 0.00018995307971249952, "loss": 1.2993, "step": 3881 }, { "epoch": 0.05044481948142024, "grad_norm": 0.3382399082183838, "learning_rate": 0.00018995048025058814, "loss": 1.5128, "step": 3882 }, { "epoch": 0.05045781402533611, "grad_norm": 0.4080575108528137, "learning_rate": 0.00018994788078867676, "loss": 1.5723, "step": 3883 }, { "epoch": 0.050470808569251985, "grad_norm": 0.3790281116962433, "learning_rate": 0.00018994528132676536, "loss": 1.4365, "step": 3884 }, { "epoch": 0.05048380311316786, "grad_norm": 0.3492949306964874, "learning_rate": 0.000189942681864854, "loss": 1.3486, "step": 3885 }, { "epoch": 0.05049679765708373, "grad_norm": 0.40551507472991943, "learning_rate": 0.0001899400824029426, "loss": 1.4734, "step": 3886 }, { "epoch": 0.050509792200999604, "grad_norm": 0.2826780676841736, "learning_rate": 0.0001899374829410312, "loss": 1.4101, "step": 3887 }, { "epoch": 0.05052278674491548, "grad_norm": 0.3535975217819214, "learning_rate": 0.00018993488347911983, "loss": 1.3429, "step": 3888 }, { "epoch": 0.05053578128883135, "grad_norm": 0.34710127115249634, "learning_rate": 0.00018993228401720843, "loss": 1.5799, "step": 3889 }, { "epoch": 0.05054877583274722, "grad_norm": 0.3437153697013855, "learning_rate": 0.00018992968455529708, "loss": 1.3689, "step": 3890 }, { "epoch": 0.050561770376663095, "grad_norm": 0.32699424028396606, "learning_rate": 0.00018992708509338568, "loss": 1.2133, "step": 3891 }, { "epoch": 0.05057476492057897, "grad_norm": 0.2864934206008911, "learning_rate": 0.0001899244856314743, "loss": 1.3931, "step": 3892 }, { "epoch": 0.05058775946449485, "grad_norm": 0.3372269570827484, "learning_rate": 0.0001899218861695629, "loss": 1.5607, "step": 3893 }, { "epoch": 0.05060075400841072, "grad_norm": 0.3295058012008667, "learning_rate": 0.00018991928670765153, "loss": 1.6334, "step": 3894 }, { "epoch": 0.050613748552326594, "grad_norm": 0.45339012145996094, "learning_rate": 0.00018991668724574015, "loss": 1.4967, "step": 3895 }, { "epoch": 0.05062674309624247, "grad_norm": 0.47987493872642517, "learning_rate": 0.00018991408778382875, "loss": 1.6288, "step": 3896 }, { "epoch": 0.05063973764015834, "grad_norm": 0.3702436089515686, "learning_rate": 0.00018991148832191737, "loss": 1.6368, "step": 3897 }, { "epoch": 0.05065273218407421, "grad_norm": 0.3788699209690094, "learning_rate": 0.000189908888860006, "loss": 1.2819, "step": 3898 }, { "epoch": 0.050665726727990086, "grad_norm": 0.39456719160079956, "learning_rate": 0.0001899062893980946, "loss": 1.4102, "step": 3899 }, { "epoch": 0.05067872127190596, "grad_norm": 0.49345386028289795, "learning_rate": 0.00018990368993618322, "loss": 1.5114, "step": 3900 }, { "epoch": 0.05069171581582183, "grad_norm": 0.30331170558929443, "learning_rate": 0.00018990109047427182, "loss": 1.419, "step": 3901 }, { "epoch": 0.050704710359737705, "grad_norm": 0.3756665885448456, "learning_rate": 0.00018989849101236047, "loss": 1.3318, "step": 3902 }, { "epoch": 0.05071770490365358, "grad_norm": 0.3780282735824585, "learning_rate": 0.00018989589155044906, "loss": 1.2521, "step": 3903 }, { "epoch": 0.05073069944756945, "grad_norm": 0.3055044412612915, "learning_rate": 0.0001898932920885377, "loss": 1.4419, "step": 3904 }, { "epoch": 0.050743693991485324, "grad_norm": 0.3280501365661621, "learning_rate": 0.0001898906926266263, "loss": 1.3271, "step": 3905 }, { "epoch": 0.0507566885354012, "grad_norm": 0.5151320099830627, "learning_rate": 0.0001898880931647149, "loss": 1.6363, "step": 3906 }, { "epoch": 0.05076968307931707, "grad_norm": 0.4122999310493469, "learning_rate": 0.00018988549370280354, "loss": 1.5251, "step": 3907 }, { "epoch": 0.05078267762323294, "grad_norm": 0.3506038188934326, "learning_rate": 0.00018988289424089213, "loss": 1.4699, "step": 3908 }, { "epoch": 0.050795672167148816, "grad_norm": 0.3838462829589844, "learning_rate": 0.00018988029477898076, "loss": 1.4741, "step": 3909 }, { "epoch": 0.05080866671106469, "grad_norm": 0.36845508217811584, "learning_rate": 0.00018987769531706938, "loss": 1.498, "step": 3910 }, { "epoch": 0.05082166125498056, "grad_norm": 0.4354405105113983, "learning_rate": 0.00018987509585515798, "loss": 1.2742, "step": 3911 }, { "epoch": 0.05083465579889644, "grad_norm": 0.37164774537086487, "learning_rate": 0.0001898724963932466, "loss": 1.5356, "step": 3912 }, { "epoch": 0.050847650342812314, "grad_norm": 0.40080708265304565, "learning_rate": 0.0001898698969313352, "loss": 1.3931, "step": 3913 }, { "epoch": 0.05086064488672819, "grad_norm": 0.33808889985084534, "learning_rate": 0.00018986729746942385, "loss": 1.6375, "step": 3914 }, { "epoch": 0.05087363943064406, "grad_norm": 0.4188748896121979, "learning_rate": 0.00018986469800751245, "loss": 1.3959, "step": 3915 }, { "epoch": 0.05088663397455993, "grad_norm": 0.39286088943481445, "learning_rate": 0.00018986209854560107, "loss": 1.5286, "step": 3916 }, { "epoch": 0.050899628518475806, "grad_norm": 0.3928526043891907, "learning_rate": 0.0001898594990836897, "loss": 1.4883, "step": 3917 }, { "epoch": 0.05091262306239168, "grad_norm": 0.31225576996803284, "learning_rate": 0.0001898568996217783, "loss": 1.3666, "step": 3918 }, { "epoch": 0.05092561760630755, "grad_norm": 0.35683757066726685, "learning_rate": 0.00018985430015986692, "loss": 1.397, "step": 3919 }, { "epoch": 0.050938612150223425, "grad_norm": 0.4413927495479584, "learning_rate": 0.00018985170069795552, "loss": 1.607, "step": 3920 }, { "epoch": 0.0509516066941393, "grad_norm": 0.37743958830833435, "learning_rate": 0.00018984910123604417, "loss": 1.3102, "step": 3921 }, { "epoch": 0.05096460123805517, "grad_norm": 0.3739142417907715, "learning_rate": 0.00018984650177413277, "loss": 1.3585, "step": 3922 }, { "epoch": 0.050977595781971044, "grad_norm": 0.397958904504776, "learning_rate": 0.00018984390231222136, "loss": 1.3426, "step": 3923 }, { "epoch": 0.05099059032588692, "grad_norm": 0.4387616217136383, "learning_rate": 0.00018984130285031, "loss": 1.6153, "step": 3924 }, { "epoch": 0.05100358486980279, "grad_norm": 0.4138893485069275, "learning_rate": 0.0001898387033883986, "loss": 1.4823, "step": 3925 }, { "epoch": 0.05101657941371866, "grad_norm": 0.4145199656486511, "learning_rate": 0.00018983610392648724, "loss": 1.4494, "step": 3926 }, { "epoch": 0.051029573957634536, "grad_norm": 0.39016634225845337, "learning_rate": 0.00018983350446457584, "loss": 1.456, "step": 3927 }, { "epoch": 0.05104256850155041, "grad_norm": 0.40434640645980835, "learning_rate": 0.00018983090500266446, "loss": 1.4514, "step": 3928 }, { "epoch": 0.05105556304546628, "grad_norm": 0.2926351726055145, "learning_rate": 0.00018982830554075308, "loss": 1.4219, "step": 3929 }, { "epoch": 0.051068557589382155, "grad_norm": 0.3985045850276947, "learning_rate": 0.00018982570607884168, "loss": 1.5569, "step": 3930 }, { "epoch": 0.051081552133298035, "grad_norm": 0.35467809438705444, "learning_rate": 0.0001898231066169303, "loss": 1.513, "step": 3931 }, { "epoch": 0.05109454667721391, "grad_norm": 0.3070789575576782, "learning_rate": 0.0001898205071550189, "loss": 1.5246, "step": 3932 }, { "epoch": 0.05110754122112978, "grad_norm": 0.40189459919929504, "learning_rate": 0.00018981790769310756, "loss": 1.4707, "step": 3933 }, { "epoch": 0.051120535765045653, "grad_norm": 0.41328689455986023, "learning_rate": 0.00018981530823119615, "loss": 1.4124, "step": 3934 }, { "epoch": 0.051133530308961526, "grad_norm": 0.37467867136001587, "learning_rate": 0.00018981270876928478, "loss": 1.475, "step": 3935 }, { "epoch": 0.0511465248528774, "grad_norm": 0.39252179861068726, "learning_rate": 0.00018981010930737337, "loss": 1.564, "step": 3936 }, { "epoch": 0.05115951939679327, "grad_norm": 0.40778642892837524, "learning_rate": 0.000189807509845462, "loss": 1.539, "step": 3937 }, { "epoch": 0.051172513940709145, "grad_norm": 0.33827200531959534, "learning_rate": 0.00018980491038355062, "loss": 1.302, "step": 3938 }, { "epoch": 0.05118550848462502, "grad_norm": 0.3841506838798523, "learning_rate": 0.00018980231092163922, "loss": 1.5352, "step": 3939 }, { "epoch": 0.05119850302854089, "grad_norm": 0.4162614345550537, "learning_rate": 0.00018979971145972785, "loss": 1.4781, "step": 3940 }, { "epoch": 0.051211497572456764, "grad_norm": 0.38057389855384827, "learning_rate": 0.00018979711199781647, "loss": 1.3907, "step": 3941 }, { "epoch": 0.05122449211637264, "grad_norm": 0.4606248140335083, "learning_rate": 0.00018979451253590507, "loss": 1.7181, "step": 3942 }, { "epoch": 0.05123748666028851, "grad_norm": 0.35811400413513184, "learning_rate": 0.0001897919130739937, "loss": 1.3935, "step": 3943 }, { "epoch": 0.05125048120420438, "grad_norm": 0.34928786754608154, "learning_rate": 0.0001897893136120823, "loss": 1.5874, "step": 3944 }, { "epoch": 0.051263475748120256, "grad_norm": 0.6878196001052856, "learning_rate": 0.00018978671415017094, "loss": 1.2778, "step": 3945 }, { "epoch": 0.05127647029203613, "grad_norm": 0.42672884464263916, "learning_rate": 0.00018978411468825954, "loss": 1.5405, "step": 3946 }, { "epoch": 0.051289464835952, "grad_norm": 0.4493511915206909, "learning_rate": 0.00018978151522634816, "loss": 1.4918, "step": 3947 }, { "epoch": 0.051302459379867875, "grad_norm": 0.42197975516319275, "learning_rate": 0.00018977891576443676, "loss": 1.473, "step": 3948 }, { "epoch": 0.05131545392378375, "grad_norm": 0.43470677733421326, "learning_rate": 0.00018977631630252538, "loss": 1.4323, "step": 3949 }, { "epoch": 0.05132844846769963, "grad_norm": 0.33630824089050293, "learning_rate": 0.000189773716840614, "loss": 1.3864, "step": 3950 }, { "epoch": 0.0513414430116155, "grad_norm": 0.4269065260887146, "learning_rate": 0.0001897711173787026, "loss": 1.4342, "step": 3951 }, { "epoch": 0.051354437555531374, "grad_norm": 0.3277847170829773, "learning_rate": 0.00018976851791679123, "loss": 1.3853, "step": 3952 }, { "epoch": 0.05136743209944725, "grad_norm": 0.4418373703956604, "learning_rate": 0.00018976591845487986, "loss": 1.4092, "step": 3953 }, { "epoch": 0.05138042664336312, "grad_norm": 0.4023517370223999, "learning_rate": 0.00018976331899296845, "loss": 1.5127, "step": 3954 }, { "epoch": 0.05139342118727899, "grad_norm": 0.4355026185512543, "learning_rate": 0.00018976071953105708, "loss": 1.4716, "step": 3955 }, { "epoch": 0.051406415731194866, "grad_norm": 0.3902953863143921, "learning_rate": 0.0001897581200691457, "loss": 1.4001, "step": 3956 }, { "epoch": 0.05141941027511074, "grad_norm": 0.40391770005226135, "learning_rate": 0.00018975552060723433, "loss": 1.5625, "step": 3957 }, { "epoch": 0.05143240481902661, "grad_norm": 0.45951566100120544, "learning_rate": 0.00018975292114532292, "loss": 1.478, "step": 3958 }, { "epoch": 0.051445399362942484, "grad_norm": 0.42912647128105164, "learning_rate": 0.00018975032168341155, "loss": 1.6006, "step": 3959 }, { "epoch": 0.05145839390685836, "grad_norm": 0.45635467767715454, "learning_rate": 0.00018974772222150017, "loss": 1.4409, "step": 3960 }, { "epoch": 0.05147138845077423, "grad_norm": 0.35998058319091797, "learning_rate": 0.00018974512275958877, "loss": 1.5093, "step": 3961 }, { "epoch": 0.0514843829946901, "grad_norm": 0.3792210817337036, "learning_rate": 0.0001897425232976774, "loss": 1.4528, "step": 3962 }, { "epoch": 0.051497377538605976, "grad_norm": 0.4066675007343292, "learning_rate": 0.000189739923835766, "loss": 1.7153, "step": 3963 }, { "epoch": 0.05151037208252185, "grad_norm": 0.41340136528015137, "learning_rate": 0.00018973732437385464, "loss": 1.4482, "step": 3964 }, { "epoch": 0.05152336662643772, "grad_norm": 0.38980579376220703, "learning_rate": 0.00018973472491194324, "loss": 1.4207, "step": 3965 }, { "epoch": 0.051536361170353595, "grad_norm": 0.36849039793014526, "learning_rate": 0.00018973212545003184, "loss": 1.3176, "step": 3966 }, { "epoch": 0.05154935571426947, "grad_norm": 0.39238834381103516, "learning_rate": 0.00018972952598812046, "loss": 1.5189, "step": 3967 }, { "epoch": 0.05156235025818534, "grad_norm": 0.33392074704170227, "learning_rate": 0.0001897269265262091, "loss": 1.5565, "step": 3968 }, { "epoch": 0.05157534480210122, "grad_norm": 0.44543009996414185, "learning_rate": 0.0001897243270642977, "loss": 1.3318, "step": 3969 }, { "epoch": 0.051588339346017094, "grad_norm": 0.45427942276000977, "learning_rate": 0.0001897217276023863, "loss": 1.3999, "step": 3970 }, { "epoch": 0.05160133388993297, "grad_norm": 0.3277691602706909, "learning_rate": 0.00018971912814047493, "loss": 1.4067, "step": 3971 }, { "epoch": 0.05161432843384884, "grad_norm": 0.2940301299095154, "learning_rate": 0.00018971652867856356, "loss": 1.3412, "step": 3972 }, { "epoch": 0.05162732297776471, "grad_norm": 0.5000894665718079, "learning_rate": 0.00018971392921665216, "loss": 1.541, "step": 3973 }, { "epoch": 0.051640317521680586, "grad_norm": 0.36390557885169983, "learning_rate": 0.00018971132975474078, "loss": 1.3446, "step": 3974 }, { "epoch": 0.05165331206559646, "grad_norm": 0.3943788707256317, "learning_rate": 0.00018970873029282938, "loss": 1.4573, "step": 3975 }, { "epoch": 0.05166630660951233, "grad_norm": 0.29128462076187134, "learning_rate": 0.00018970613083091803, "loss": 1.4002, "step": 3976 }, { "epoch": 0.051679301153428205, "grad_norm": 0.39258241653442383, "learning_rate": 0.00018970353136900663, "loss": 1.4483, "step": 3977 }, { "epoch": 0.05169229569734408, "grad_norm": 0.44540131092071533, "learning_rate": 0.00018970093190709522, "loss": 1.3354, "step": 3978 }, { "epoch": 0.05170529024125995, "grad_norm": 0.291391521692276, "learning_rate": 0.00018969833244518385, "loss": 1.3594, "step": 3979 }, { "epoch": 0.05171828478517582, "grad_norm": 0.401706725358963, "learning_rate": 0.00018969573298327247, "loss": 1.3292, "step": 3980 }, { "epoch": 0.051731279329091696, "grad_norm": 0.3497712314128876, "learning_rate": 0.0001896931335213611, "loss": 1.5336, "step": 3981 }, { "epoch": 0.05174427387300757, "grad_norm": 0.3641929626464844, "learning_rate": 0.0001896905340594497, "loss": 1.3193, "step": 3982 }, { "epoch": 0.05175726841692344, "grad_norm": 0.3684643507003784, "learning_rate": 0.00018968793459753832, "loss": 1.5871, "step": 3983 }, { "epoch": 0.051770262960839315, "grad_norm": 0.3841712772846222, "learning_rate": 0.00018968533513562694, "loss": 1.3227, "step": 3984 }, { "epoch": 0.05178325750475519, "grad_norm": 0.319903165102005, "learning_rate": 0.00018968273567371554, "loss": 1.3485, "step": 3985 }, { "epoch": 0.05179625204867106, "grad_norm": 0.428154319524765, "learning_rate": 0.00018968013621180416, "loss": 1.2606, "step": 3986 }, { "epoch": 0.051809246592586934, "grad_norm": 0.3619686961174011, "learning_rate": 0.00018967753674989276, "loss": 1.5088, "step": 3987 }, { "epoch": 0.051822241136502814, "grad_norm": 0.2955183982849121, "learning_rate": 0.00018967493728798141, "loss": 1.3389, "step": 3988 }, { "epoch": 0.05183523568041869, "grad_norm": 0.2890011966228485, "learning_rate": 0.00018967233782607, "loss": 1.2731, "step": 3989 }, { "epoch": 0.05184823022433456, "grad_norm": 0.4698973000049591, "learning_rate": 0.00018966973836415864, "loss": 1.6323, "step": 3990 }, { "epoch": 0.05186122476825043, "grad_norm": 0.45719438791275024, "learning_rate": 0.00018966713890224726, "loss": 1.5064, "step": 3991 }, { "epoch": 0.051874219312166306, "grad_norm": 0.40210840106010437, "learning_rate": 0.00018966453944033586, "loss": 1.4794, "step": 3992 }, { "epoch": 0.05188721385608218, "grad_norm": 0.42772597074508667, "learning_rate": 0.00018966193997842448, "loss": 1.252, "step": 3993 }, { "epoch": 0.05190020839999805, "grad_norm": 0.46020588278770447, "learning_rate": 0.00018965934051651308, "loss": 1.6665, "step": 3994 }, { "epoch": 0.051913202943913925, "grad_norm": 0.400939017534256, "learning_rate": 0.0001896567410546017, "loss": 1.6223, "step": 3995 }, { "epoch": 0.0519261974878298, "grad_norm": 0.3720465898513794, "learning_rate": 0.00018965414159269033, "loss": 1.3402, "step": 3996 }, { "epoch": 0.05193919203174567, "grad_norm": 0.26941317319869995, "learning_rate": 0.00018965154213077893, "loss": 1.2729, "step": 3997 }, { "epoch": 0.051952186575661544, "grad_norm": 0.24724151194095612, "learning_rate": 0.00018964894266886755, "loss": 1.2738, "step": 3998 }, { "epoch": 0.05196518111957742, "grad_norm": 0.4975112974643707, "learning_rate": 0.00018964634320695617, "loss": 1.5769, "step": 3999 }, { "epoch": 0.05197817566349329, "grad_norm": 0.45040571689605713, "learning_rate": 0.0001896437437450448, "loss": 1.4282, "step": 4000 }, { "epoch": 0.05199117020740916, "grad_norm": 0.47491106390953064, "learning_rate": 0.0001896411442831334, "loss": 1.3699, "step": 4001 }, { "epoch": 0.052004164751325035, "grad_norm": 0.3222854435443878, "learning_rate": 0.00018963854482122202, "loss": 1.2931, "step": 4002 }, { "epoch": 0.05201715929524091, "grad_norm": 0.3565429747104645, "learning_rate": 0.00018963594535931065, "loss": 1.5645, "step": 4003 }, { "epoch": 0.05203015383915678, "grad_norm": 0.4117390811443329, "learning_rate": 0.00018963334589739924, "loss": 1.4915, "step": 4004 }, { "epoch": 0.052043148383072654, "grad_norm": 0.48919737339019775, "learning_rate": 0.00018963074643548787, "loss": 1.5253, "step": 4005 }, { "epoch": 0.05205614292698853, "grad_norm": 0.2919909358024597, "learning_rate": 0.00018962814697357646, "loss": 1.4976, "step": 4006 }, { "epoch": 0.05206913747090441, "grad_norm": 0.4114655554294586, "learning_rate": 0.0001896255475116651, "loss": 1.6495, "step": 4007 }, { "epoch": 0.05208213201482028, "grad_norm": 0.38783255219459534, "learning_rate": 0.00018962294804975371, "loss": 1.5461, "step": 4008 }, { "epoch": 0.05209512655873615, "grad_norm": 0.36005041003227234, "learning_rate": 0.0001896203485878423, "loss": 1.5542, "step": 4009 }, { "epoch": 0.052108121102652026, "grad_norm": 0.31175991892814636, "learning_rate": 0.00018961774912593094, "loss": 1.531, "step": 4010 }, { "epoch": 0.0521211156465679, "grad_norm": 0.336824506521225, "learning_rate": 0.00018961514966401956, "loss": 1.5141, "step": 4011 }, { "epoch": 0.05213411019048377, "grad_norm": 0.41131842136383057, "learning_rate": 0.00018961255020210818, "loss": 1.4053, "step": 4012 }, { "epoch": 0.052147104734399645, "grad_norm": 0.45289406180381775, "learning_rate": 0.00018960995074019678, "loss": 1.3242, "step": 4013 }, { "epoch": 0.05216009927831552, "grad_norm": 0.3515275716781616, "learning_rate": 0.0001896073512782854, "loss": 1.4834, "step": 4014 }, { "epoch": 0.05217309382223139, "grad_norm": 0.3492477536201477, "learning_rate": 0.00018960475181637403, "loss": 1.3464, "step": 4015 }, { "epoch": 0.052186088366147264, "grad_norm": 0.3573140799999237, "learning_rate": 0.00018960215235446263, "loss": 1.3785, "step": 4016 }, { "epoch": 0.05219908291006314, "grad_norm": 0.4475495219230652, "learning_rate": 0.00018959955289255125, "loss": 1.6641, "step": 4017 }, { "epoch": 0.05221207745397901, "grad_norm": 0.29575610160827637, "learning_rate": 0.00018959695343063985, "loss": 1.485, "step": 4018 }, { "epoch": 0.05222507199789488, "grad_norm": 0.35003504157066345, "learning_rate": 0.0001895943539687285, "loss": 1.3035, "step": 4019 }, { "epoch": 0.052238066541810756, "grad_norm": 0.4605973958969116, "learning_rate": 0.0001895917545068171, "loss": 1.5717, "step": 4020 }, { "epoch": 0.05225106108572663, "grad_norm": 0.41698935627937317, "learning_rate": 0.0001895891550449057, "loss": 1.6018, "step": 4021 }, { "epoch": 0.0522640556296425, "grad_norm": 0.3836056590080261, "learning_rate": 0.00018958655558299432, "loss": 1.5877, "step": 4022 }, { "epoch": 0.052277050173558375, "grad_norm": 0.4108414947986603, "learning_rate": 0.00018958395612108295, "loss": 1.599, "step": 4023 }, { "epoch": 0.05229004471747425, "grad_norm": 0.3855954110622406, "learning_rate": 0.00018958135665917157, "loss": 1.4276, "step": 4024 }, { "epoch": 0.05230303926139012, "grad_norm": 0.35487011075019836, "learning_rate": 0.00018957875719726017, "loss": 1.4927, "step": 4025 }, { "epoch": 0.052316033805306, "grad_norm": 0.3558792769908905, "learning_rate": 0.0001895761577353488, "loss": 1.2971, "step": 4026 }, { "epoch": 0.05232902834922187, "grad_norm": 0.2872849404811859, "learning_rate": 0.00018957355827343742, "loss": 1.3233, "step": 4027 }, { "epoch": 0.052342022893137746, "grad_norm": 0.3171432614326477, "learning_rate": 0.00018957095881152601, "loss": 1.394, "step": 4028 }, { "epoch": 0.05235501743705362, "grad_norm": 0.40315118432044983, "learning_rate": 0.00018956835934961464, "loss": 1.5473, "step": 4029 }, { "epoch": 0.05236801198096949, "grad_norm": 0.44339045882225037, "learning_rate": 0.00018956575988770326, "loss": 1.5307, "step": 4030 }, { "epoch": 0.052381006524885365, "grad_norm": 0.37378010153770447, "learning_rate": 0.0001895631604257919, "loss": 1.4751, "step": 4031 }, { "epoch": 0.05239400106880124, "grad_norm": 0.4320535659790039, "learning_rate": 0.00018956056096388048, "loss": 1.3061, "step": 4032 }, { "epoch": 0.05240699561271711, "grad_norm": 0.2966800332069397, "learning_rate": 0.00018955796150196908, "loss": 1.2925, "step": 4033 }, { "epoch": 0.052419990156632984, "grad_norm": 0.39737069606781006, "learning_rate": 0.00018955536204005773, "loss": 1.5461, "step": 4034 }, { "epoch": 0.05243298470054886, "grad_norm": 0.39521902799606323, "learning_rate": 0.00018955276257814633, "loss": 1.5108, "step": 4035 }, { "epoch": 0.05244597924446473, "grad_norm": 0.438451886177063, "learning_rate": 0.00018955016311623496, "loss": 1.2581, "step": 4036 }, { "epoch": 0.0524589737883806, "grad_norm": 0.3777187466621399, "learning_rate": 0.00018954756365432355, "loss": 1.3956, "step": 4037 }, { "epoch": 0.052471968332296476, "grad_norm": 0.5382497310638428, "learning_rate": 0.00018954496419241218, "loss": 1.4657, "step": 4038 }, { "epoch": 0.05248496287621235, "grad_norm": 0.38454824686050415, "learning_rate": 0.0001895423647305008, "loss": 1.3285, "step": 4039 }, { "epoch": 0.05249795742012822, "grad_norm": 0.361906498670578, "learning_rate": 0.0001895397652685894, "loss": 1.4909, "step": 4040 }, { "epoch": 0.052510951964044095, "grad_norm": 0.3523094654083252, "learning_rate": 0.00018953716580667802, "loss": 1.4241, "step": 4041 }, { "epoch": 0.05252394650795997, "grad_norm": 0.3995682895183563, "learning_rate": 0.00018953456634476665, "loss": 1.5484, "step": 4042 }, { "epoch": 0.05253694105187584, "grad_norm": 0.4963608980178833, "learning_rate": 0.00018953196688285527, "loss": 1.5385, "step": 4043 }, { "epoch": 0.052549935595791714, "grad_norm": 0.4471227824687958, "learning_rate": 0.00018952936742094387, "loss": 1.519, "step": 4044 }, { "epoch": 0.052562930139707593, "grad_norm": 0.3839107155799866, "learning_rate": 0.00018952676795903247, "loss": 1.2874, "step": 4045 }, { "epoch": 0.052575924683623466, "grad_norm": 0.33835527300834656, "learning_rate": 0.00018952416849712112, "loss": 1.5024, "step": 4046 }, { "epoch": 0.05258891922753934, "grad_norm": 0.41741132736206055, "learning_rate": 0.00018952156903520972, "loss": 1.5582, "step": 4047 }, { "epoch": 0.05260191377145521, "grad_norm": 0.47670289874076843, "learning_rate": 0.00018951896957329834, "loss": 1.6401, "step": 4048 }, { "epoch": 0.052614908315371085, "grad_norm": 0.3288404643535614, "learning_rate": 0.00018951637011138694, "loss": 1.3547, "step": 4049 }, { "epoch": 0.05262790285928696, "grad_norm": 1.391832709312439, "learning_rate": 0.00018951377064947556, "loss": 1.3922, "step": 4050 }, { "epoch": 0.05264089740320283, "grad_norm": 0.4483433663845062, "learning_rate": 0.0001895111711875642, "loss": 1.2929, "step": 4051 }, { "epoch": 0.052653891947118704, "grad_norm": 0.4345998466014862, "learning_rate": 0.00018950857172565278, "loss": 1.3383, "step": 4052 }, { "epoch": 0.05266688649103458, "grad_norm": 0.39746445417404175, "learning_rate": 0.0001895059722637414, "loss": 1.4204, "step": 4053 }, { "epoch": 0.05267988103495045, "grad_norm": 0.387657105922699, "learning_rate": 0.00018950337280183003, "loss": 1.3751, "step": 4054 }, { "epoch": 0.05269287557886632, "grad_norm": 0.38720956444740295, "learning_rate": 0.00018950077333991866, "loss": 1.4092, "step": 4055 }, { "epoch": 0.052705870122782196, "grad_norm": 0.4390738010406494, "learning_rate": 0.00018949817387800726, "loss": 1.5691, "step": 4056 }, { "epoch": 0.05271886466669807, "grad_norm": 0.47112616896629333, "learning_rate": 0.00018949557441609588, "loss": 1.5116, "step": 4057 }, { "epoch": 0.05273185921061394, "grad_norm": 0.2595531642436981, "learning_rate": 0.0001894929749541845, "loss": 1.3734, "step": 4058 }, { "epoch": 0.052744853754529815, "grad_norm": 0.3311459720134735, "learning_rate": 0.0001894903754922731, "loss": 1.4041, "step": 4059 }, { "epoch": 0.05275784829844569, "grad_norm": 0.5230029225349426, "learning_rate": 0.00018948777603036173, "loss": 1.5171, "step": 4060 }, { "epoch": 0.05277084284236156, "grad_norm": 0.3773336112499237, "learning_rate": 0.00018948517656845032, "loss": 1.2762, "step": 4061 }, { "epoch": 0.052783837386277434, "grad_norm": 0.3544774651527405, "learning_rate": 0.00018948257710653895, "loss": 1.3548, "step": 4062 }, { "epoch": 0.05279683193019331, "grad_norm": 0.4684385061264038, "learning_rate": 0.00018947997764462757, "loss": 1.5289, "step": 4063 }, { "epoch": 0.05280982647410919, "grad_norm": 0.5378061532974243, "learning_rate": 0.00018947737818271617, "loss": 1.602, "step": 4064 }, { "epoch": 0.05282282101802506, "grad_norm": 0.435875803232193, "learning_rate": 0.00018947477872080482, "loss": 1.5745, "step": 4065 }, { "epoch": 0.05283581556194093, "grad_norm": 0.40728333592414856, "learning_rate": 0.00018947217925889342, "loss": 1.3974, "step": 4066 }, { "epoch": 0.052848810105856805, "grad_norm": 0.32386717200279236, "learning_rate": 0.00018946957979698204, "loss": 1.5727, "step": 4067 }, { "epoch": 0.05286180464977268, "grad_norm": 0.3410390615463257, "learning_rate": 0.00018946698033507064, "loss": 1.5648, "step": 4068 }, { "epoch": 0.05287479919368855, "grad_norm": 0.2897005081176758, "learning_rate": 0.00018946438087315927, "loss": 1.3153, "step": 4069 }, { "epoch": 0.052887793737604424, "grad_norm": 0.4447380602359772, "learning_rate": 0.0001894617814112479, "loss": 1.4752, "step": 4070 }, { "epoch": 0.0529007882815203, "grad_norm": 0.4160134494304657, "learning_rate": 0.0001894591819493365, "loss": 1.3817, "step": 4071 }, { "epoch": 0.05291378282543617, "grad_norm": 0.4001814126968384, "learning_rate": 0.0001894565824874251, "loss": 1.4828, "step": 4072 }, { "epoch": 0.05292677736935204, "grad_norm": 0.3617863953113556, "learning_rate": 0.00018945398302551374, "loss": 1.3612, "step": 4073 }, { "epoch": 0.052939771913267916, "grad_norm": 0.3365032374858856, "learning_rate": 0.00018945138356360233, "loss": 1.3973, "step": 4074 }, { "epoch": 0.05295276645718379, "grad_norm": 0.38645651936531067, "learning_rate": 0.00018944878410169096, "loss": 1.2994, "step": 4075 }, { "epoch": 0.05296576100109966, "grad_norm": 0.42820626497268677, "learning_rate": 0.00018944618463977956, "loss": 1.4455, "step": 4076 }, { "epoch": 0.052978755545015535, "grad_norm": 0.3983556926250458, "learning_rate": 0.0001894435851778682, "loss": 1.5465, "step": 4077 }, { "epoch": 0.05299175008893141, "grad_norm": 0.4455036520957947, "learning_rate": 0.0001894409857159568, "loss": 1.5994, "step": 4078 }, { "epoch": 0.05300474463284728, "grad_norm": 0.4224860668182373, "learning_rate": 0.00018943838625404543, "loss": 1.4456, "step": 4079 }, { "epoch": 0.053017739176763154, "grad_norm": 0.38385462760925293, "learning_rate": 0.00018943578679213403, "loss": 1.2557, "step": 4080 }, { "epoch": 0.05303073372067903, "grad_norm": 0.3887278139591217, "learning_rate": 0.00018943318733022265, "loss": 1.4912, "step": 4081 }, { "epoch": 0.0530437282645949, "grad_norm": 0.296936959028244, "learning_rate": 0.00018943058786831128, "loss": 1.3998, "step": 4082 }, { "epoch": 0.05305672280851078, "grad_norm": 0.3960835635662079, "learning_rate": 0.00018942798840639987, "loss": 1.3654, "step": 4083 }, { "epoch": 0.05306971735242665, "grad_norm": 0.29843807220458984, "learning_rate": 0.0001894253889444885, "loss": 1.3795, "step": 4084 }, { "epoch": 0.053082711896342526, "grad_norm": 0.4079810380935669, "learning_rate": 0.00018942278948257712, "loss": 1.3624, "step": 4085 }, { "epoch": 0.0530957064402584, "grad_norm": 0.4206112027168274, "learning_rate": 0.00018942019002066575, "loss": 1.3792, "step": 4086 }, { "epoch": 0.05310870098417427, "grad_norm": 0.3746127784252167, "learning_rate": 0.00018941759055875434, "loss": 1.578, "step": 4087 }, { "epoch": 0.053121695528090145, "grad_norm": 0.3708513081073761, "learning_rate": 0.00018941499109684294, "loss": 1.5625, "step": 4088 }, { "epoch": 0.05313469007200602, "grad_norm": 0.3918353021144867, "learning_rate": 0.0001894123916349316, "loss": 1.4684, "step": 4089 }, { "epoch": 0.05314768461592189, "grad_norm": 0.3920225203037262, "learning_rate": 0.0001894097921730202, "loss": 1.2668, "step": 4090 }, { "epoch": 0.05316067915983776, "grad_norm": 0.4823001027107239, "learning_rate": 0.00018940719271110881, "loss": 1.5541, "step": 4091 }, { "epoch": 0.053173673703753636, "grad_norm": 0.35504278540611267, "learning_rate": 0.0001894045932491974, "loss": 1.4308, "step": 4092 }, { "epoch": 0.05318666824766951, "grad_norm": 0.4522918462753296, "learning_rate": 0.00018940199378728604, "loss": 1.5973, "step": 4093 }, { "epoch": 0.05319966279158538, "grad_norm": 0.4165440797805786, "learning_rate": 0.00018939939432537466, "loss": 1.4632, "step": 4094 }, { "epoch": 0.053212657335501255, "grad_norm": 0.5297533273696899, "learning_rate": 0.00018939679486346326, "loss": 1.5272, "step": 4095 }, { "epoch": 0.05322565187941713, "grad_norm": 0.38570156693458557, "learning_rate": 0.00018939419540155188, "loss": 1.2312, "step": 4096 }, { "epoch": 0.053238646423333, "grad_norm": 0.3549193739891052, "learning_rate": 0.0001893915959396405, "loss": 1.2436, "step": 4097 }, { "epoch": 0.053251640967248874, "grad_norm": 0.4733002483844757, "learning_rate": 0.00018938899647772913, "loss": 1.5235, "step": 4098 }, { "epoch": 0.05326463551116475, "grad_norm": 0.3299548327922821, "learning_rate": 0.00018938639701581773, "loss": 1.4887, "step": 4099 }, { "epoch": 0.05327763005508062, "grad_norm": 0.37195706367492676, "learning_rate": 0.00018938379755390635, "loss": 1.4369, "step": 4100 }, { "epoch": 0.05329062459899649, "grad_norm": 0.4118349254131317, "learning_rate": 0.00018938119809199498, "loss": 1.4259, "step": 4101 }, { "epoch": 0.05330361914291237, "grad_norm": 0.4443964660167694, "learning_rate": 0.00018937859863008358, "loss": 1.5751, "step": 4102 }, { "epoch": 0.053316613686828246, "grad_norm": 0.19295981526374817, "learning_rate": 0.0001893759991681722, "loss": 1.2777, "step": 4103 }, { "epoch": 0.05332960823074412, "grad_norm": 0.3431297540664673, "learning_rate": 0.00018937339970626082, "loss": 1.4713, "step": 4104 }, { "epoch": 0.05334260277465999, "grad_norm": 0.3754962682723999, "learning_rate": 0.00018937080024434942, "loss": 1.5343, "step": 4105 }, { "epoch": 0.053355597318575865, "grad_norm": 0.41893696784973145, "learning_rate": 0.00018936820078243805, "loss": 1.3261, "step": 4106 }, { "epoch": 0.05336859186249174, "grad_norm": 0.3288760781288147, "learning_rate": 0.00018936560132052664, "loss": 1.4207, "step": 4107 }, { "epoch": 0.05338158640640761, "grad_norm": 0.5319237112998962, "learning_rate": 0.0001893630018586153, "loss": 1.5312, "step": 4108 }, { "epoch": 0.053394580950323484, "grad_norm": 0.42209652066230774, "learning_rate": 0.0001893604023967039, "loss": 1.5887, "step": 4109 }, { "epoch": 0.05340757549423936, "grad_norm": 0.4267975389957428, "learning_rate": 0.00018935780293479252, "loss": 1.578, "step": 4110 }, { "epoch": 0.05342057003815523, "grad_norm": 0.39445337653160095, "learning_rate": 0.00018935520347288111, "loss": 1.3847, "step": 4111 }, { "epoch": 0.0534335645820711, "grad_norm": 0.41110867261886597, "learning_rate": 0.00018935260401096974, "loss": 1.5021, "step": 4112 }, { "epoch": 0.053446559125986975, "grad_norm": 0.5017188787460327, "learning_rate": 0.00018935000454905836, "loss": 1.5875, "step": 4113 }, { "epoch": 0.05345955366990285, "grad_norm": 0.365742951631546, "learning_rate": 0.00018934740508714696, "loss": 1.4253, "step": 4114 }, { "epoch": 0.05347254821381872, "grad_norm": 0.37313637137413025, "learning_rate": 0.00018934480562523559, "loss": 1.5807, "step": 4115 }, { "epoch": 0.053485542757734594, "grad_norm": 0.32133063673973083, "learning_rate": 0.0001893422061633242, "loss": 1.3378, "step": 4116 }, { "epoch": 0.05349853730165047, "grad_norm": 0.36928263306617737, "learning_rate": 0.0001893396067014128, "loss": 1.361, "step": 4117 }, { "epoch": 0.05351153184556634, "grad_norm": 0.3204761743545532, "learning_rate": 0.00018933700723950143, "loss": 1.5236, "step": 4118 }, { "epoch": 0.05352452638948221, "grad_norm": 0.34568074345588684, "learning_rate": 0.00018933440777759003, "loss": 1.4459, "step": 4119 }, { "epoch": 0.053537520933398086, "grad_norm": 0.4119112193584442, "learning_rate": 0.00018933180831567868, "loss": 1.426, "step": 4120 }, { "epoch": 0.053550515477313966, "grad_norm": 0.32223182916641235, "learning_rate": 0.00018932920885376728, "loss": 1.3968, "step": 4121 }, { "epoch": 0.05356351002122984, "grad_norm": 0.37836843729019165, "learning_rate": 0.0001893266093918559, "loss": 1.4545, "step": 4122 }, { "epoch": 0.05357650456514571, "grad_norm": 0.3146507740020752, "learning_rate": 0.0001893240099299445, "loss": 1.1039, "step": 4123 }, { "epoch": 0.053589499109061585, "grad_norm": 0.39077362418174744, "learning_rate": 0.00018932141046803312, "loss": 1.3596, "step": 4124 }, { "epoch": 0.05360249365297746, "grad_norm": 0.44414275884628296, "learning_rate": 0.00018931881100612175, "loss": 1.3353, "step": 4125 }, { "epoch": 0.05361548819689333, "grad_norm": 0.35662269592285156, "learning_rate": 0.00018931621154421035, "loss": 1.3744, "step": 4126 }, { "epoch": 0.053628482740809204, "grad_norm": 0.3510833978652954, "learning_rate": 0.00018931361208229897, "loss": 1.4567, "step": 4127 }, { "epoch": 0.05364147728472508, "grad_norm": 0.4733801484107971, "learning_rate": 0.0001893110126203876, "loss": 1.571, "step": 4128 }, { "epoch": 0.05365447182864095, "grad_norm": 0.46760106086730957, "learning_rate": 0.0001893084131584762, "loss": 1.4941, "step": 4129 }, { "epoch": 0.05366746637255682, "grad_norm": 0.3219572603702545, "learning_rate": 0.00018930581369656482, "loss": 1.4493, "step": 4130 }, { "epoch": 0.053680460916472696, "grad_norm": 0.32618248462677, "learning_rate": 0.00018930321423465341, "loss": 1.3629, "step": 4131 }, { "epoch": 0.05369345546038857, "grad_norm": 0.28745201230049133, "learning_rate": 0.00018930061477274207, "loss": 1.1966, "step": 4132 }, { "epoch": 0.05370645000430444, "grad_norm": 0.41781213879585266, "learning_rate": 0.00018929801531083066, "loss": 1.4458, "step": 4133 }, { "epoch": 0.053719444548220315, "grad_norm": 0.42739737033843994, "learning_rate": 0.0001892954158489193, "loss": 1.4283, "step": 4134 }, { "epoch": 0.05373243909213619, "grad_norm": 0.33842942118644714, "learning_rate": 0.00018929281638700788, "loss": 1.5339, "step": 4135 }, { "epoch": 0.05374543363605206, "grad_norm": 0.31668564677238464, "learning_rate": 0.0001892902169250965, "loss": 1.5259, "step": 4136 }, { "epoch": 0.05375842817996793, "grad_norm": 0.44656166434288025, "learning_rate": 0.00018928761746318513, "loss": 1.4575, "step": 4137 }, { "epoch": 0.053771422723883806, "grad_norm": 0.35060915350914, "learning_rate": 0.00018928501800127373, "loss": 1.3341, "step": 4138 }, { "epoch": 0.05378441726779968, "grad_norm": 0.42004090547561646, "learning_rate": 0.00018928241853936238, "loss": 1.3517, "step": 4139 }, { "epoch": 0.05379741181171556, "grad_norm": 0.37880387902259827, "learning_rate": 0.00018927981907745098, "loss": 1.3493, "step": 4140 }, { "epoch": 0.05381040635563143, "grad_norm": 0.4265282452106476, "learning_rate": 0.0001892772196155396, "loss": 1.4873, "step": 4141 }, { "epoch": 0.053823400899547305, "grad_norm": 0.24523285031318665, "learning_rate": 0.0001892746201536282, "loss": 1.4249, "step": 4142 }, { "epoch": 0.05383639544346318, "grad_norm": 0.4270896017551422, "learning_rate": 0.00018927202069171683, "loss": 1.4341, "step": 4143 }, { "epoch": 0.05384938998737905, "grad_norm": 0.46497875452041626, "learning_rate": 0.00018926942122980545, "loss": 1.4216, "step": 4144 }, { "epoch": 0.053862384531294924, "grad_norm": 0.3323686122894287, "learning_rate": 0.00018926682176789405, "loss": 1.36, "step": 4145 }, { "epoch": 0.0538753790752108, "grad_norm": 0.43498021364212036, "learning_rate": 0.00018926422230598267, "loss": 1.3576, "step": 4146 }, { "epoch": 0.05388837361912667, "grad_norm": 0.4388672411441803, "learning_rate": 0.0001892616228440713, "loss": 1.4025, "step": 4147 }, { "epoch": 0.05390136816304254, "grad_norm": 0.3581877052783966, "learning_rate": 0.0001892590233821599, "loss": 1.5512, "step": 4148 }, { "epoch": 0.053914362706958416, "grad_norm": 0.24979346990585327, "learning_rate": 0.00018925642392024852, "loss": 1.4426, "step": 4149 }, { "epoch": 0.05392735725087429, "grad_norm": 0.2491956204175949, "learning_rate": 0.00018925382445833712, "loss": 1.3142, "step": 4150 }, { "epoch": 0.05394035179479016, "grad_norm": 0.4160778820514679, "learning_rate": 0.00018925122499642577, "loss": 1.4305, "step": 4151 }, { "epoch": 0.053953346338706035, "grad_norm": 0.37397119402885437, "learning_rate": 0.00018924862553451437, "loss": 1.3388, "step": 4152 }, { "epoch": 0.05396634088262191, "grad_norm": 0.21446822583675385, "learning_rate": 0.000189246026072603, "loss": 1.0965, "step": 4153 }, { "epoch": 0.05397933542653778, "grad_norm": 0.5060781240463257, "learning_rate": 0.0001892434266106916, "loss": 1.478, "step": 4154 }, { "epoch": 0.053992329970453654, "grad_norm": 0.39640262722969055, "learning_rate": 0.0001892408271487802, "loss": 1.3321, "step": 4155 }, { "epoch": 0.054005324514369527, "grad_norm": 0.4792666435241699, "learning_rate": 0.00018923822768686884, "loss": 1.399, "step": 4156 }, { "epoch": 0.0540183190582854, "grad_norm": 0.2827445864677429, "learning_rate": 0.00018923562822495743, "loss": 1.4774, "step": 4157 }, { "epoch": 0.05403131360220127, "grad_norm": 0.41037750244140625, "learning_rate": 0.00018923302876304606, "loss": 1.7294, "step": 4158 }, { "epoch": 0.05404430814611715, "grad_norm": 0.3872053623199463, "learning_rate": 0.00018923042930113468, "loss": 1.7031, "step": 4159 }, { "epoch": 0.054057302690033025, "grad_norm": 0.40729260444641113, "learning_rate": 0.00018922782983922328, "loss": 1.5746, "step": 4160 }, { "epoch": 0.0540702972339489, "grad_norm": 0.2942042052745819, "learning_rate": 0.0001892252303773119, "loss": 1.366, "step": 4161 }, { "epoch": 0.05408329177786477, "grad_norm": 0.3768075704574585, "learning_rate": 0.0001892226309154005, "loss": 1.394, "step": 4162 }, { "epoch": 0.054096286321780644, "grad_norm": 0.33810779452323914, "learning_rate": 0.00018922003145348915, "loss": 1.3207, "step": 4163 }, { "epoch": 0.05410928086569652, "grad_norm": 0.3797534704208374, "learning_rate": 0.00018921743199157775, "loss": 1.3949, "step": 4164 }, { "epoch": 0.05412227540961239, "grad_norm": 0.3625997006893158, "learning_rate": 0.00018921483252966638, "loss": 1.4761, "step": 4165 }, { "epoch": 0.05413526995352826, "grad_norm": 0.31058254837989807, "learning_rate": 0.00018921223306775497, "loss": 1.4738, "step": 4166 }, { "epoch": 0.054148264497444136, "grad_norm": 0.31756383180618286, "learning_rate": 0.0001892096336058436, "loss": 1.4252, "step": 4167 }, { "epoch": 0.05416125904136001, "grad_norm": 0.4506288468837738, "learning_rate": 0.00018920703414393222, "loss": 1.3989, "step": 4168 }, { "epoch": 0.05417425358527588, "grad_norm": 0.30721551179885864, "learning_rate": 0.00018920443468202082, "loss": 1.5109, "step": 4169 }, { "epoch": 0.054187248129191755, "grad_norm": 0.34977883100509644, "learning_rate": 0.00018920183522010944, "loss": 1.4504, "step": 4170 }, { "epoch": 0.05420024267310763, "grad_norm": 0.3549085855484009, "learning_rate": 0.00018919923575819807, "loss": 1.5236, "step": 4171 }, { "epoch": 0.0542132372170235, "grad_norm": 0.45250678062438965, "learning_rate": 0.00018919663629628667, "loss": 1.3842, "step": 4172 }, { "epoch": 0.054226231760939374, "grad_norm": 0.39982518553733826, "learning_rate": 0.0001891940368343753, "loss": 1.4139, "step": 4173 }, { "epoch": 0.05423922630485525, "grad_norm": 0.3891928195953369, "learning_rate": 0.00018919143737246391, "loss": 1.4246, "step": 4174 }, { "epoch": 0.05425222084877112, "grad_norm": 0.417416512966156, "learning_rate": 0.00018918883791055254, "loss": 1.7258, "step": 4175 }, { "epoch": 0.05426521539268699, "grad_norm": 0.3698502779006958, "learning_rate": 0.00018918623844864114, "loss": 1.4572, "step": 4176 }, { "epoch": 0.054278209936602866, "grad_norm": 0.32752782106399536, "learning_rate": 0.00018918363898672976, "loss": 1.5054, "step": 4177 }, { "epoch": 0.054291204480518745, "grad_norm": 0.34790733456611633, "learning_rate": 0.00018918103952481839, "loss": 1.5698, "step": 4178 }, { "epoch": 0.05430419902443462, "grad_norm": 0.42732593417167664, "learning_rate": 0.00018917844006290698, "loss": 1.5238, "step": 4179 }, { "epoch": 0.05431719356835049, "grad_norm": 0.3276183009147644, "learning_rate": 0.0001891758406009956, "loss": 1.3563, "step": 4180 }, { "epoch": 0.054330188112266364, "grad_norm": 0.38544175028800964, "learning_rate": 0.0001891732411390842, "loss": 1.613, "step": 4181 }, { "epoch": 0.05434318265618224, "grad_norm": 0.4363090693950653, "learning_rate": 0.00018917064167717286, "loss": 1.302, "step": 4182 }, { "epoch": 0.05435617720009811, "grad_norm": 0.39787256717681885, "learning_rate": 0.00018916804221526145, "loss": 1.4739, "step": 4183 }, { "epoch": 0.05436917174401398, "grad_norm": 0.4567318856716156, "learning_rate": 0.00018916544275335005, "loss": 1.538, "step": 4184 }, { "epoch": 0.054382166287929856, "grad_norm": 0.35571911931037903, "learning_rate": 0.00018916284329143868, "loss": 1.229, "step": 4185 }, { "epoch": 0.05439516083184573, "grad_norm": 0.31598547101020813, "learning_rate": 0.0001891602438295273, "loss": 1.3032, "step": 4186 }, { "epoch": 0.0544081553757616, "grad_norm": 0.4143746793270111, "learning_rate": 0.00018915764436761592, "loss": 1.181, "step": 4187 }, { "epoch": 0.054421149919677475, "grad_norm": 0.32978639006614685, "learning_rate": 0.00018915504490570452, "loss": 1.4337, "step": 4188 }, { "epoch": 0.05443414446359335, "grad_norm": 0.3699577748775482, "learning_rate": 0.00018915244544379315, "loss": 1.4999, "step": 4189 }, { "epoch": 0.05444713900750922, "grad_norm": 0.43622177839279175, "learning_rate": 0.00018914984598188177, "loss": 1.5525, "step": 4190 }, { "epoch": 0.054460133551425094, "grad_norm": 0.6816190481185913, "learning_rate": 0.00018914724651997037, "loss": 1.5219, "step": 4191 }, { "epoch": 0.05447312809534097, "grad_norm": 0.42073604464530945, "learning_rate": 0.000189144647058059, "loss": 1.4336, "step": 4192 }, { "epoch": 0.05448612263925684, "grad_norm": 0.6639722585678101, "learning_rate": 0.0001891420475961476, "loss": 1.4878, "step": 4193 }, { "epoch": 0.05449911718317271, "grad_norm": 0.40993350744247437, "learning_rate": 0.00018913944813423624, "loss": 1.4694, "step": 4194 }, { "epoch": 0.054512111727088586, "grad_norm": 0.39893394708633423, "learning_rate": 0.00018913684867232484, "loss": 1.4709, "step": 4195 }, { "epoch": 0.05452510627100446, "grad_norm": 0.4202471971511841, "learning_rate": 0.00018913424921041346, "loss": 1.5103, "step": 4196 }, { "epoch": 0.05453810081492034, "grad_norm": 0.38577938079833984, "learning_rate": 0.00018913164974850206, "loss": 1.4811, "step": 4197 }, { "epoch": 0.05455109535883621, "grad_norm": 0.40890538692474365, "learning_rate": 0.00018912905028659069, "loss": 1.1876, "step": 4198 }, { "epoch": 0.054564089902752085, "grad_norm": 0.37090209126472473, "learning_rate": 0.0001891264508246793, "loss": 1.505, "step": 4199 }, { "epoch": 0.05457708444666796, "grad_norm": 0.36616623401641846, "learning_rate": 0.0001891238513627679, "loss": 1.4369, "step": 4200 }, { "epoch": 0.05459007899058383, "grad_norm": 0.33773836493492126, "learning_rate": 0.00018912125190085653, "loss": 1.4114, "step": 4201 }, { "epoch": 0.0546030735344997, "grad_norm": 0.44164177775382996, "learning_rate": 0.00018911865243894516, "loss": 1.3541, "step": 4202 }, { "epoch": 0.054616068078415576, "grad_norm": 0.3994191586971283, "learning_rate": 0.00018911605297703375, "loss": 1.4149, "step": 4203 }, { "epoch": 0.05462906262233145, "grad_norm": 0.40110909938812256, "learning_rate": 0.00018911345351512238, "loss": 1.5296, "step": 4204 }, { "epoch": 0.05464205716624732, "grad_norm": 0.44197770953178406, "learning_rate": 0.00018911085405321098, "loss": 1.3985, "step": 4205 }, { "epoch": 0.054655051710163195, "grad_norm": 0.333254337310791, "learning_rate": 0.00018910825459129963, "loss": 1.2687, "step": 4206 }, { "epoch": 0.05466804625407907, "grad_norm": 0.38280072808265686, "learning_rate": 0.00018910565512938822, "loss": 1.4165, "step": 4207 }, { "epoch": 0.05468104079799494, "grad_norm": 0.43307724595069885, "learning_rate": 0.00018910305566747685, "loss": 1.5463, "step": 4208 }, { "epoch": 0.054694035341910814, "grad_norm": 0.39417174458503723, "learning_rate": 0.00018910045620556545, "loss": 1.4257, "step": 4209 }, { "epoch": 0.05470702988582669, "grad_norm": 0.4331287443637848, "learning_rate": 0.00018909785674365407, "loss": 1.453, "step": 4210 }, { "epoch": 0.05472002442974256, "grad_norm": 0.33602091670036316, "learning_rate": 0.0001890952572817427, "loss": 1.2553, "step": 4211 }, { "epoch": 0.05473301897365843, "grad_norm": 0.43651923537254333, "learning_rate": 0.0001890926578198313, "loss": 1.4727, "step": 4212 }, { "epoch": 0.054746013517574306, "grad_norm": 0.47823551297187805, "learning_rate": 0.00018909005835791992, "loss": 1.5448, "step": 4213 }, { "epoch": 0.05475900806149018, "grad_norm": 0.3251281976699829, "learning_rate": 0.00018908745889600854, "loss": 1.3866, "step": 4214 }, { "epoch": 0.05477200260540605, "grad_norm": 0.3628029227256775, "learning_rate": 0.00018908485943409714, "loss": 1.3968, "step": 4215 }, { "epoch": 0.05478499714932193, "grad_norm": 0.35794857144355774, "learning_rate": 0.00018908225997218576, "loss": 1.4602, "step": 4216 }, { "epoch": 0.054797991693237805, "grad_norm": 0.4379797875881195, "learning_rate": 0.0001890796605102744, "loss": 1.5374, "step": 4217 }, { "epoch": 0.05481098623715368, "grad_norm": 0.47001224756240845, "learning_rate": 0.000189077061048363, "loss": 1.6732, "step": 4218 }, { "epoch": 0.05482398078106955, "grad_norm": 0.38813579082489014, "learning_rate": 0.0001890744615864516, "loss": 1.3578, "step": 4219 }, { "epoch": 0.054836975324985424, "grad_norm": 0.4324837923049927, "learning_rate": 0.00018907186212454023, "loss": 1.7067, "step": 4220 }, { "epoch": 0.0548499698689013, "grad_norm": 0.4201784133911133, "learning_rate": 0.00018906926266262886, "loss": 1.5515, "step": 4221 }, { "epoch": 0.05486296441281717, "grad_norm": 0.4128369987010956, "learning_rate": 0.00018906666320071746, "loss": 1.4877, "step": 4222 }, { "epoch": 0.05487595895673304, "grad_norm": 0.42768174409866333, "learning_rate": 0.00018906406373880608, "loss": 1.3831, "step": 4223 }, { "epoch": 0.054888953500648915, "grad_norm": 0.3972453773021698, "learning_rate": 0.00018906146427689468, "loss": 1.5632, "step": 4224 }, { "epoch": 0.05490194804456479, "grad_norm": 0.45946013927459717, "learning_rate": 0.00018905886481498333, "loss": 1.5957, "step": 4225 }, { "epoch": 0.05491494258848066, "grad_norm": 0.4405013918876648, "learning_rate": 0.00018905626535307193, "loss": 1.562, "step": 4226 }, { "epoch": 0.054927937132396534, "grad_norm": 0.4791746139526367, "learning_rate": 0.00018905366589116052, "loss": 1.5667, "step": 4227 }, { "epoch": 0.05494093167631241, "grad_norm": 0.3574109971523285, "learning_rate": 0.00018905106642924915, "loss": 1.3823, "step": 4228 }, { "epoch": 0.05495392622022828, "grad_norm": 0.41632288694381714, "learning_rate": 0.00018904846696733777, "loss": 1.4108, "step": 4229 }, { "epoch": 0.05496692076414415, "grad_norm": 0.3771124482154846, "learning_rate": 0.0001890458675054264, "loss": 1.3334, "step": 4230 }, { "epoch": 0.054979915308060026, "grad_norm": 0.3680572211742401, "learning_rate": 0.000189043268043515, "loss": 1.561, "step": 4231 }, { "epoch": 0.0549929098519759, "grad_norm": 0.4603727459907532, "learning_rate": 0.00018904066858160362, "loss": 1.5374, "step": 4232 }, { "epoch": 0.05500590439589177, "grad_norm": 0.3189932107925415, "learning_rate": 0.00018903806911969224, "loss": 1.4432, "step": 4233 }, { "epoch": 0.055018898939807645, "grad_norm": 0.23667339980602264, "learning_rate": 0.00018903546965778084, "loss": 1.3938, "step": 4234 }, { "epoch": 0.055031893483723525, "grad_norm": 0.37380659580230713, "learning_rate": 0.00018903287019586947, "loss": 1.3736, "step": 4235 }, { "epoch": 0.0550448880276394, "grad_norm": 0.45473358035087585, "learning_rate": 0.00018903027073395806, "loss": 1.4154, "step": 4236 }, { "epoch": 0.05505788257155527, "grad_norm": 0.43753811717033386, "learning_rate": 0.00018902767127204671, "loss": 1.5389, "step": 4237 }, { "epoch": 0.055070877115471144, "grad_norm": 0.42147397994995117, "learning_rate": 0.0001890250718101353, "loss": 1.4252, "step": 4238 }, { "epoch": 0.05508387165938702, "grad_norm": 0.396310955286026, "learning_rate": 0.0001890224723482239, "loss": 1.3903, "step": 4239 }, { "epoch": 0.05509686620330289, "grad_norm": 0.4641113579273224, "learning_rate": 0.00018901987288631253, "loss": 1.3335, "step": 4240 }, { "epoch": 0.05510986074721876, "grad_norm": 0.32203537225723267, "learning_rate": 0.00018901727342440116, "loss": 1.3022, "step": 4241 }, { "epoch": 0.055122855291134636, "grad_norm": 0.41443485021591187, "learning_rate": 0.00018901467396248978, "loss": 1.4563, "step": 4242 }, { "epoch": 0.05513584983505051, "grad_norm": 0.3391130566596985, "learning_rate": 0.00018901207450057838, "loss": 1.5188, "step": 4243 }, { "epoch": 0.05514884437896638, "grad_norm": 0.41159477829933167, "learning_rate": 0.000189009475038667, "loss": 1.5471, "step": 4244 }, { "epoch": 0.055161838922882254, "grad_norm": 0.35348883271217346, "learning_rate": 0.00018900687557675563, "loss": 1.3703, "step": 4245 }, { "epoch": 0.05517483346679813, "grad_norm": 0.5232519507408142, "learning_rate": 0.00018900427611484423, "loss": 1.3852, "step": 4246 }, { "epoch": 0.055187828010714, "grad_norm": 0.34217482805252075, "learning_rate": 0.00018900167665293285, "loss": 1.353, "step": 4247 }, { "epoch": 0.05520082255462987, "grad_norm": 0.5088396072387695, "learning_rate": 0.00018899907719102148, "loss": 1.4171, "step": 4248 }, { "epoch": 0.055213817098545746, "grad_norm": 0.42737191915512085, "learning_rate": 0.0001889964777291101, "loss": 1.3982, "step": 4249 }, { "epoch": 0.05522681164246162, "grad_norm": 0.34471267461776733, "learning_rate": 0.0001889938782671987, "loss": 1.4787, "step": 4250 }, { "epoch": 0.05523980618637749, "grad_norm": 0.2757571339607239, "learning_rate": 0.0001889912788052873, "loss": 1.4539, "step": 4251 }, { "epoch": 0.055252800730293365, "grad_norm": 0.3801814317703247, "learning_rate": 0.00018898867934337595, "loss": 1.2948, "step": 4252 }, { "epoch": 0.05526579527420924, "grad_norm": 0.4791862666606903, "learning_rate": 0.00018898607988146454, "loss": 1.4388, "step": 4253 }, { "epoch": 0.05527878981812512, "grad_norm": 0.39182424545288086, "learning_rate": 0.00018898348041955317, "loss": 1.3614, "step": 4254 }, { "epoch": 0.05529178436204099, "grad_norm": 0.39498671889305115, "learning_rate": 0.00018898088095764177, "loss": 1.4235, "step": 4255 }, { "epoch": 0.055304778905956864, "grad_norm": 0.3955695331096649, "learning_rate": 0.0001889782814957304, "loss": 1.3083, "step": 4256 }, { "epoch": 0.05531777344987274, "grad_norm": 0.34899240732192993, "learning_rate": 0.00018897568203381901, "loss": 1.4599, "step": 4257 }, { "epoch": 0.05533076799378861, "grad_norm": 0.3629237115383148, "learning_rate": 0.0001889730825719076, "loss": 1.4181, "step": 4258 }, { "epoch": 0.05534376253770448, "grad_norm": 0.46008944511413574, "learning_rate": 0.00018897048310999624, "loss": 1.7422, "step": 4259 }, { "epoch": 0.055356757081620356, "grad_norm": 0.38336610794067383, "learning_rate": 0.00018896788364808486, "loss": 1.3975, "step": 4260 }, { "epoch": 0.05536975162553623, "grad_norm": 0.38027679920196533, "learning_rate": 0.00018896528418617349, "loss": 1.493, "step": 4261 }, { "epoch": 0.0553827461694521, "grad_norm": 0.356699138879776, "learning_rate": 0.00018896268472426208, "loss": 1.382, "step": 4262 }, { "epoch": 0.055395740713367975, "grad_norm": 0.4037978947162628, "learning_rate": 0.0001889600852623507, "loss": 1.2951, "step": 4263 }, { "epoch": 0.05540873525728385, "grad_norm": 0.42546093463897705, "learning_rate": 0.00018895748580043933, "loss": 1.3919, "step": 4264 }, { "epoch": 0.05542172980119972, "grad_norm": 0.4108228087425232, "learning_rate": 0.00018895488633852793, "loss": 1.7336, "step": 4265 }, { "epoch": 0.055434724345115594, "grad_norm": 0.37069782614707947, "learning_rate": 0.00018895228687661655, "loss": 1.3827, "step": 4266 }, { "epoch": 0.055447718889031467, "grad_norm": 0.3522305488586426, "learning_rate": 0.00018894968741470515, "loss": 1.3179, "step": 4267 }, { "epoch": 0.05546071343294734, "grad_norm": 0.3672562837600708, "learning_rate": 0.00018894708795279378, "loss": 1.3482, "step": 4268 }, { "epoch": 0.05547370797686321, "grad_norm": 0.40353137254714966, "learning_rate": 0.0001889444884908824, "loss": 1.3486, "step": 4269 }, { "epoch": 0.055486702520779085, "grad_norm": 0.471489280462265, "learning_rate": 0.000188941889028971, "loss": 1.5052, "step": 4270 }, { "epoch": 0.05549969706469496, "grad_norm": 0.3540598154067993, "learning_rate": 0.00018893928956705962, "loss": 1.3245, "step": 4271 }, { "epoch": 0.05551269160861083, "grad_norm": 0.3782809376716614, "learning_rate": 0.00018893669010514825, "loss": 1.3392, "step": 4272 }, { "epoch": 0.05552568615252671, "grad_norm": 0.3543851375579834, "learning_rate": 0.00018893409064323687, "loss": 1.3055, "step": 4273 }, { "epoch": 0.055538680696442584, "grad_norm": 0.4358716309070587, "learning_rate": 0.00018893149118132547, "loss": 1.3783, "step": 4274 }, { "epoch": 0.05555167524035846, "grad_norm": 0.3005434572696686, "learning_rate": 0.0001889288917194141, "loss": 1.2415, "step": 4275 }, { "epoch": 0.05556466978427433, "grad_norm": 0.2737489342689514, "learning_rate": 0.00018892629225750272, "loss": 1.3817, "step": 4276 }, { "epoch": 0.0555776643281902, "grad_norm": 0.3879261314868927, "learning_rate": 0.00018892369279559131, "loss": 1.3333, "step": 4277 }, { "epoch": 0.055590658872106076, "grad_norm": 0.3781391382217407, "learning_rate": 0.00018892109333367994, "loss": 1.5116, "step": 4278 }, { "epoch": 0.05560365341602195, "grad_norm": 0.43596363067626953, "learning_rate": 0.00018891849387176854, "loss": 1.3875, "step": 4279 }, { "epoch": 0.05561664795993782, "grad_norm": 0.22721873223781586, "learning_rate": 0.0001889158944098572, "loss": 1.1289, "step": 4280 }, { "epoch": 0.055629642503853695, "grad_norm": 0.3520500659942627, "learning_rate": 0.00018891329494794579, "loss": 1.4431, "step": 4281 }, { "epoch": 0.05564263704776957, "grad_norm": 0.47316721081733704, "learning_rate": 0.00018891069548603438, "loss": 1.651, "step": 4282 }, { "epoch": 0.05565563159168544, "grad_norm": 0.4087876081466675, "learning_rate": 0.000188908096024123, "loss": 1.4293, "step": 4283 }, { "epoch": 0.055668626135601314, "grad_norm": 0.46811285614967346, "learning_rate": 0.00018890549656221163, "loss": 1.4035, "step": 4284 }, { "epoch": 0.05568162067951719, "grad_norm": 0.31009799242019653, "learning_rate": 0.00018890289710030026, "loss": 1.3051, "step": 4285 }, { "epoch": 0.05569461522343306, "grad_norm": 0.32416224479675293, "learning_rate": 0.00018890029763838885, "loss": 1.3752, "step": 4286 }, { "epoch": 0.05570760976734893, "grad_norm": 0.2768734395503998, "learning_rate": 0.00018889769817647748, "loss": 1.5295, "step": 4287 }, { "epoch": 0.055720604311264806, "grad_norm": 0.3529180586338043, "learning_rate": 0.0001888950987145661, "loss": 1.3711, "step": 4288 }, { "epoch": 0.05573359885518068, "grad_norm": 0.31314295530319214, "learning_rate": 0.0001888924992526547, "loss": 1.1908, "step": 4289 }, { "epoch": 0.05574659339909655, "grad_norm": 0.3992103040218353, "learning_rate": 0.00018888989979074332, "loss": 1.3531, "step": 4290 }, { "epoch": 0.055759587943012424, "grad_norm": 0.3720290958881378, "learning_rate": 0.00018888730032883195, "loss": 1.5652, "step": 4291 }, { "epoch": 0.055772582486928304, "grad_norm": 0.38190796971321106, "learning_rate": 0.00018888470086692057, "loss": 1.4262, "step": 4292 }, { "epoch": 0.05578557703084418, "grad_norm": 0.39032599329948425, "learning_rate": 0.00018888210140500917, "loss": 1.4588, "step": 4293 }, { "epoch": 0.05579857157476005, "grad_norm": 0.4417930245399475, "learning_rate": 0.00018887950194309777, "loss": 1.4527, "step": 4294 }, { "epoch": 0.05581156611867592, "grad_norm": 0.25574052333831787, "learning_rate": 0.00018887690248118642, "loss": 1.4443, "step": 4295 }, { "epoch": 0.055824560662591796, "grad_norm": 0.4334050714969635, "learning_rate": 0.00018887430301927502, "loss": 1.2305, "step": 4296 }, { "epoch": 0.05583755520650767, "grad_norm": 0.4551560878753662, "learning_rate": 0.00018887170355736364, "loss": 1.4285, "step": 4297 }, { "epoch": 0.05585054975042354, "grad_norm": 0.29558664560317993, "learning_rate": 0.00018886910409545224, "loss": 1.2468, "step": 4298 }, { "epoch": 0.055863544294339415, "grad_norm": 0.3928694427013397, "learning_rate": 0.00018886650463354086, "loss": 1.4435, "step": 4299 }, { "epoch": 0.05587653883825529, "grad_norm": 0.3278985917568207, "learning_rate": 0.0001888639051716295, "loss": 1.3559, "step": 4300 }, { "epoch": 0.05588953338217116, "grad_norm": 0.3517388701438904, "learning_rate": 0.00018886130570971809, "loss": 1.2926, "step": 4301 }, { "epoch": 0.055902527926087034, "grad_norm": 0.41983991861343384, "learning_rate": 0.0001888587062478067, "loss": 1.2926, "step": 4302 }, { "epoch": 0.05591552247000291, "grad_norm": 0.35589921474456787, "learning_rate": 0.00018885610678589533, "loss": 1.6002, "step": 4303 }, { "epoch": 0.05592851701391878, "grad_norm": 0.3442300856113434, "learning_rate": 0.00018885350732398396, "loss": 1.3369, "step": 4304 }, { "epoch": 0.05594151155783465, "grad_norm": 0.38496631383895874, "learning_rate": 0.00018885090786207256, "loss": 1.4899, "step": 4305 }, { "epoch": 0.055954506101750526, "grad_norm": 0.3305857479572296, "learning_rate": 0.00018884830840016115, "loss": 1.4139, "step": 4306 }, { "epoch": 0.0559675006456664, "grad_norm": 0.3925778865814209, "learning_rate": 0.0001888457089382498, "loss": 1.4576, "step": 4307 }, { "epoch": 0.05598049518958227, "grad_norm": 0.4273711144924164, "learning_rate": 0.0001888431094763384, "loss": 1.2212, "step": 4308 }, { "epoch": 0.055993489733498145, "grad_norm": 0.3588268756866455, "learning_rate": 0.00018884051001442703, "loss": 1.5006, "step": 4309 }, { "epoch": 0.05600648427741402, "grad_norm": 0.33522024750709534, "learning_rate": 0.00018883791055251562, "loss": 1.2204, "step": 4310 }, { "epoch": 0.0560194788213299, "grad_norm": 0.4092654287815094, "learning_rate": 0.00018883531109060425, "loss": 1.359, "step": 4311 }, { "epoch": 0.05603247336524577, "grad_norm": 0.40513813495635986, "learning_rate": 0.00018883271162869287, "loss": 1.2697, "step": 4312 }, { "epoch": 0.05604546790916164, "grad_norm": 0.4017346501350403, "learning_rate": 0.00018883011216678147, "loss": 1.4096, "step": 4313 }, { "epoch": 0.056058462453077516, "grad_norm": 0.39384058117866516, "learning_rate": 0.0001888275127048701, "loss": 1.3911, "step": 4314 }, { "epoch": 0.05607145699699339, "grad_norm": 0.3191656172275543, "learning_rate": 0.00018882491324295872, "loss": 1.3967, "step": 4315 }, { "epoch": 0.05608445154090926, "grad_norm": 0.30934426188468933, "learning_rate": 0.00018882231378104734, "loss": 1.5199, "step": 4316 }, { "epoch": 0.056097446084825135, "grad_norm": 0.3824143707752228, "learning_rate": 0.00018881971431913594, "loss": 1.3707, "step": 4317 }, { "epoch": 0.05611044062874101, "grad_norm": 0.42951467633247375, "learning_rate": 0.00018881711485722457, "loss": 1.4281, "step": 4318 }, { "epoch": 0.05612343517265688, "grad_norm": 0.39471718668937683, "learning_rate": 0.0001888145153953132, "loss": 1.4594, "step": 4319 }, { "epoch": 0.056136429716572754, "grad_norm": 0.4263366758823395, "learning_rate": 0.0001888119159334018, "loss": 1.4854, "step": 4320 }, { "epoch": 0.05614942426048863, "grad_norm": 0.3760986030101776, "learning_rate": 0.0001888093164714904, "loss": 1.5372, "step": 4321 }, { "epoch": 0.0561624188044045, "grad_norm": 0.3160140812397003, "learning_rate": 0.00018880671700957904, "loss": 1.3305, "step": 4322 }, { "epoch": 0.05617541334832037, "grad_norm": 0.32864001393318176, "learning_rate": 0.00018880411754766763, "loss": 1.5169, "step": 4323 }, { "epoch": 0.056188407892236246, "grad_norm": 0.3781583309173584, "learning_rate": 0.00018880151808575626, "loss": 1.5385, "step": 4324 }, { "epoch": 0.05620140243615212, "grad_norm": 0.25325852632522583, "learning_rate": 0.00018879891862384486, "loss": 1.3887, "step": 4325 }, { "epoch": 0.05621439698006799, "grad_norm": 0.3994160592556, "learning_rate": 0.0001887963191619335, "loss": 1.3673, "step": 4326 }, { "epoch": 0.056227391523983865, "grad_norm": 0.36684292554855347, "learning_rate": 0.0001887937197000221, "loss": 1.6225, "step": 4327 }, { "epoch": 0.05624038606789974, "grad_norm": 0.36858800053596497, "learning_rate": 0.00018879112023811073, "loss": 1.2697, "step": 4328 }, { "epoch": 0.05625338061181561, "grad_norm": 0.2822013199329376, "learning_rate": 0.00018878852077619933, "loss": 1.3315, "step": 4329 }, { "epoch": 0.05626637515573149, "grad_norm": 0.34770163893699646, "learning_rate": 0.00018878592131428795, "loss": 1.2821, "step": 4330 }, { "epoch": 0.056279369699647364, "grad_norm": 0.3689379096031189, "learning_rate": 0.00018878332185237658, "loss": 1.5098, "step": 4331 }, { "epoch": 0.05629236424356324, "grad_norm": 0.3937860429286957, "learning_rate": 0.00018878072239046517, "loss": 1.3241, "step": 4332 }, { "epoch": 0.05630535878747911, "grad_norm": 0.3446391820907593, "learning_rate": 0.0001887781229285538, "loss": 1.5134, "step": 4333 }, { "epoch": 0.05631835333139498, "grad_norm": 0.32316312193870544, "learning_rate": 0.00018877552346664242, "loss": 1.3648, "step": 4334 }, { "epoch": 0.056331347875310855, "grad_norm": 0.4080737829208374, "learning_rate": 0.00018877292400473102, "loss": 1.4906, "step": 4335 }, { "epoch": 0.05634434241922673, "grad_norm": 0.4372391402721405, "learning_rate": 0.00018877032454281964, "loss": 1.4577, "step": 4336 }, { "epoch": 0.0563573369631426, "grad_norm": 0.3978181481361389, "learning_rate": 0.00018876772508090824, "loss": 1.5947, "step": 4337 }, { "epoch": 0.056370331507058474, "grad_norm": 0.4092581272125244, "learning_rate": 0.0001887651256189969, "loss": 1.5452, "step": 4338 }, { "epoch": 0.05638332605097435, "grad_norm": 0.29107651114463806, "learning_rate": 0.0001887625261570855, "loss": 1.4862, "step": 4339 }, { "epoch": 0.05639632059489022, "grad_norm": 0.36635684967041016, "learning_rate": 0.00018875992669517412, "loss": 1.4533, "step": 4340 }, { "epoch": 0.05640931513880609, "grad_norm": 0.3783576488494873, "learning_rate": 0.0001887573272332627, "loss": 1.4592, "step": 4341 }, { "epoch": 0.056422309682721966, "grad_norm": 0.47449570894241333, "learning_rate": 0.00018875472777135134, "loss": 1.4946, "step": 4342 }, { "epoch": 0.05643530422663784, "grad_norm": 0.3573222756385803, "learning_rate": 0.00018875212830943996, "loss": 1.1712, "step": 4343 }, { "epoch": 0.05644829877055371, "grad_norm": 0.35999277234077454, "learning_rate": 0.00018874952884752856, "loss": 1.3245, "step": 4344 }, { "epoch": 0.056461293314469585, "grad_norm": 0.4604869782924652, "learning_rate": 0.00018874692938561718, "loss": 1.5313, "step": 4345 }, { "epoch": 0.05647428785838546, "grad_norm": 0.40419793128967285, "learning_rate": 0.0001887443299237058, "loss": 1.5753, "step": 4346 }, { "epoch": 0.05648728240230133, "grad_norm": 0.43127113580703735, "learning_rate": 0.00018874173046179443, "loss": 1.3463, "step": 4347 }, { "epoch": 0.056500276946217204, "grad_norm": 0.38444408774375916, "learning_rate": 0.00018873913099988303, "loss": 1.3402, "step": 4348 }, { "epoch": 0.056513271490133084, "grad_norm": 0.3234100937843323, "learning_rate": 0.00018873653153797163, "loss": 1.6239, "step": 4349 }, { "epoch": 0.05652626603404896, "grad_norm": 0.3555113673210144, "learning_rate": 0.00018873393207606028, "loss": 1.3021, "step": 4350 }, { "epoch": 0.05653926057796483, "grad_norm": 0.36611834168434143, "learning_rate": 0.00018873133261414888, "loss": 1.5364, "step": 4351 }, { "epoch": 0.0565522551218807, "grad_norm": 0.4237237572669983, "learning_rate": 0.0001887287331522375, "loss": 1.5019, "step": 4352 }, { "epoch": 0.056565249665796576, "grad_norm": 0.38082775473594666, "learning_rate": 0.0001887261336903261, "loss": 1.5492, "step": 4353 }, { "epoch": 0.05657824420971245, "grad_norm": 0.3991524279117584, "learning_rate": 0.00018872353422841472, "loss": 1.4544, "step": 4354 }, { "epoch": 0.05659123875362832, "grad_norm": 0.4199187457561493, "learning_rate": 0.00018872093476650335, "loss": 1.5366, "step": 4355 }, { "epoch": 0.056604233297544194, "grad_norm": 0.35470521450042725, "learning_rate": 0.00018871833530459194, "loss": 1.5311, "step": 4356 }, { "epoch": 0.05661722784146007, "grad_norm": 0.2809489667415619, "learning_rate": 0.00018871573584268057, "loss": 1.2625, "step": 4357 }, { "epoch": 0.05663022238537594, "grad_norm": 0.3196016550064087, "learning_rate": 0.0001887131363807692, "loss": 1.4976, "step": 4358 }, { "epoch": 0.05664321692929181, "grad_norm": 0.408279687166214, "learning_rate": 0.00018871053691885782, "loss": 1.5503, "step": 4359 }, { "epoch": 0.056656211473207686, "grad_norm": 0.36303767561912537, "learning_rate": 0.00018870793745694642, "loss": 1.493, "step": 4360 }, { "epoch": 0.05666920601712356, "grad_norm": 0.3275662064552307, "learning_rate": 0.00018870533799503504, "loss": 1.3344, "step": 4361 }, { "epoch": 0.05668220056103943, "grad_norm": 0.38077402114868164, "learning_rate": 0.00018870273853312366, "loss": 1.4943, "step": 4362 }, { "epoch": 0.056695195104955305, "grad_norm": 0.34123852849006653, "learning_rate": 0.00018870013907121226, "loss": 1.3335, "step": 4363 }, { "epoch": 0.05670818964887118, "grad_norm": 0.4996756315231323, "learning_rate": 0.00018869753960930089, "loss": 1.4703, "step": 4364 }, { "epoch": 0.05672118419278705, "grad_norm": 0.3341628313064575, "learning_rate": 0.0001886949401473895, "loss": 1.5521, "step": 4365 }, { "epoch": 0.056734178736702924, "grad_norm": 0.45765554904937744, "learning_rate": 0.0001886923406854781, "loss": 1.5338, "step": 4366 }, { "epoch": 0.0567471732806188, "grad_norm": 0.24268069863319397, "learning_rate": 0.00018868974122356673, "loss": 1.0957, "step": 4367 }, { "epoch": 0.05676016782453468, "grad_norm": 0.36286821961402893, "learning_rate": 0.00018868714176165533, "loss": 1.6127, "step": 4368 }, { "epoch": 0.05677316236845055, "grad_norm": 0.3851850926876068, "learning_rate": 0.00018868454229974398, "loss": 1.4118, "step": 4369 }, { "epoch": 0.05678615691236642, "grad_norm": 0.2679467499256134, "learning_rate": 0.00018868194283783258, "loss": 1.4785, "step": 4370 }, { "epoch": 0.056799151456282296, "grad_norm": 0.26473966240882874, "learning_rate": 0.0001886793433759212, "loss": 1.5598, "step": 4371 }, { "epoch": 0.05681214600019817, "grad_norm": 0.3831224739551544, "learning_rate": 0.0001886767439140098, "loss": 1.4606, "step": 4372 }, { "epoch": 0.05682514054411404, "grad_norm": 0.3660658895969391, "learning_rate": 0.00018867414445209843, "loss": 1.3516, "step": 4373 }, { "epoch": 0.056838135088029915, "grad_norm": 0.40516167879104614, "learning_rate": 0.00018867154499018705, "loss": 1.5301, "step": 4374 }, { "epoch": 0.05685112963194579, "grad_norm": 0.44480594992637634, "learning_rate": 0.00018866894552827565, "loss": 1.4486, "step": 4375 }, { "epoch": 0.05686412417586166, "grad_norm": 0.33053305745124817, "learning_rate": 0.00018866634606636427, "loss": 1.3866, "step": 4376 }, { "epoch": 0.056877118719777534, "grad_norm": 0.37060365080833435, "learning_rate": 0.0001886637466044529, "loss": 1.3789, "step": 4377 }, { "epoch": 0.056890113263693406, "grad_norm": 0.3704473078250885, "learning_rate": 0.0001886611471425415, "loss": 1.3734, "step": 4378 }, { "epoch": 0.05690310780760928, "grad_norm": 0.39931410551071167, "learning_rate": 0.00018865854768063012, "loss": 1.5498, "step": 4379 }, { "epoch": 0.05691610235152515, "grad_norm": 0.5383016467094421, "learning_rate": 0.00018865594821871872, "loss": 1.4388, "step": 4380 }, { "epoch": 0.056929096895441025, "grad_norm": 0.24925829470157623, "learning_rate": 0.00018865334875680737, "loss": 1.2727, "step": 4381 }, { "epoch": 0.0569420914393569, "grad_norm": 0.35634124279022217, "learning_rate": 0.00018865074929489596, "loss": 1.396, "step": 4382 }, { "epoch": 0.05695508598327277, "grad_norm": 0.4414868652820587, "learning_rate": 0.0001886481498329846, "loss": 1.558, "step": 4383 }, { "epoch": 0.056968080527188644, "grad_norm": 0.32493627071380615, "learning_rate": 0.00018864555037107319, "loss": 1.6839, "step": 4384 }, { "epoch": 0.05698107507110452, "grad_norm": 0.3880787193775177, "learning_rate": 0.0001886429509091618, "loss": 1.4107, "step": 4385 }, { "epoch": 0.05699406961502039, "grad_norm": 0.4654039740562439, "learning_rate": 0.00018864035144725043, "loss": 1.6062, "step": 4386 }, { "epoch": 0.05700706415893627, "grad_norm": 0.38659539818763733, "learning_rate": 0.00018863775198533903, "loss": 1.5001, "step": 4387 }, { "epoch": 0.05702005870285214, "grad_norm": 0.35698825120925903, "learning_rate": 0.00018863515252342766, "loss": 1.3173, "step": 4388 }, { "epoch": 0.057033053246768016, "grad_norm": 0.38120898604393005, "learning_rate": 0.00018863255306151628, "loss": 1.4852, "step": 4389 }, { "epoch": 0.05704604779068389, "grad_norm": 0.37367287278175354, "learning_rate": 0.00018862995359960488, "loss": 1.4621, "step": 4390 }, { "epoch": 0.05705904233459976, "grad_norm": 0.39866533875465393, "learning_rate": 0.0001886273541376935, "loss": 1.2533, "step": 4391 }, { "epoch": 0.057072036878515635, "grad_norm": 0.4202077090740204, "learning_rate": 0.0001886247546757821, "loss": 1.4478, "step": 4392 }, { "epoch": 0.05708503142243151, "grad_norm": 0.2696082592010498, "learning_rate": 0.00018862215521387075, "loss": 1.4689, "step": 4393 }, { "epoch": 0.05709802596634738, "grad_norm": 0.4467935562133789, "learning_rate": 0.00018861955575195935, "loss": 1.3469, "step": 4394 }, { "epoch": 0.057111020510263254, "grad_norm": 0.3542415201663971, "learning_rate": 0.00018861695629004797, "loss": 1.614, "step": 4395 }, { "epoch": 0.05712401505417913, "grad_norm": 0.35639870166778564, "learning_rate": 0.0001886143568281366, "loss": 1.5655, "step": 4396 }, { "epoch": 0.057137009598095, "grad_norm": 0.40242400765419006, "learning_rate": 0.0001886117573662252, "loss": 1.4791, "step": 4397 }, { "epoch": 0.05715000414201087, "grad_norm": 0.34428870677948, "learning_rate": 0.00018860915790431382, "loss": 1.3569, "step": 4398 }, { "epoch": 0.057162998685926746, "grad_norm": 0.391963928937912, "learning_rate": 0.00018860655844240242, "loss": 1.4659, "step": 4399 }, { "epoch": 0.05717599322984262, "grad_norm": 0.4823959767818451, "learning_rate": 0.00018860395898049107, "loss": 1.4824, "step": 4400 }, { "epoch": 0.05718898777375849, "grad_norm": 0.49892473220825195, "learning_rate": 0.00018860135951857967, "loss": 1.5869, "step": 4401 }, { "epoch": 0.057201982317674364, "grad_norm": 0.3645821213722229, "learning_rate": 0.0001885987600566683, "loss": 1.607, "step": 4402 }, { "epoch": 0.05721497686159024, "grad_norm": 0.384893000125885, "learning_rate": 0.0001885961605947569, "loss": 1.5141, "step": 4403 }, { "epoch": 0.05722797140550611, "grad_norm": 0.37683653831481934, "learning_rate": 0.0001885935611328455, "loss": 1.2531, "step": 4404 }, { "epoch": 0.05724096594942198, "grad_norm": 0.3627872169017792, "learning_rate": 0.00018859096167093414, "loss": 1.3925, "step": 4405 }, { "epoch": 0.05725396049333786, "grad_norm": 0.3693505823612213, "learning_rate": 0.00018858836220902273, "loss": 1.6687, "step": 4406 }, { "epoch": 0.057266955037253736, "grad_norm": 0.3219207525253296, "learning_rate": 0.00018858576274711136, "loss": 1.4408, "step": 4407 }, { "epoch": 0.05727994958116961, "grad_norm": 0.2685849070549011, "learning_rate": 0.00018858316328519998, "loss": 1.4384, "step": 4408 }, { "epoch": 0.05729294412508548, "grad_norm": 0.3409925699234009, "learning_rate": 0.00018858056382328858, "loss": 1.3818, "step": 4409 }, { "epoch": 0.057305938669001355, "grad_norm": 0.4180390238761902, "learning_rate": 0.0001885779643613772, "loss": 1.4226, "step": 4410 }, { "epoch": 0.05731893321291723, "grad_norm": 0.48234209418296814, "learning_rate": 0.0001885753648994658, "loss": 1.556, "step": 4411 }, { "epoch": 0.0573319277568331, "grad_norm": 0.5108433961868286, "learning_rate": 0.00018857276543755445, "loss": 1.466, "step": 4412 }, { "epoch": 0.057344922300748974, "grad_norm": 0.4192737638950348, "learning_rate": 0.00018857016597564305, "loss": 1.4339, "step": 4413 }, { "epoch": 0.05735791684466485, "grad_norm": 0.3814343512058258, "learning_rate": 0.00018856756651373168, "loss": 1.3235, "step": 4414 }, { "epoch": 0.05737091138858072, "grad_norm": 0.22901009023189545, "learning_rate": 0.00018856496705182027, "loss": 1.2285, "step": 4415 }, { "epoch": 0.05738390593249659, "grad_norm": 0.26425036787986755, "learning_rate": 0.0001885623675899089, "loss": 1.3256, "step": 4416 }, { "epoch": 0.057396900476412466, "grad_norm": 0.3967977464199066, "learning_rate": 0.00018855976812799752, "loss": 1.3776, "step": 4417 }, { "epoch": 0.05740989502032834, "grad_norm": 0.4345617890357971, "learning_rate": 0.00018855716866608612, "loss": 1.5153, "step": 4418 }, { "epoch": 0.05742288956424421, "grad_norm": 0.423673540353775, "learning_rate": 0.00018855456920417474, "loss": 1.4166, "step": 4419 }, { "epoch": 0.057435884108160085, "grad_norm": 0.374982625246048, "learning_rate": 0.00018855196974226337, "loss": 1.314, "step": 4420 }, { "epoch": 0.05744887865207596, "grad_norm": 0.4199756979942322, "learning_rate": 0.00018854937028035197, "loss": 1.5202, "step": 4421 }, { "epoch": 0.05746187319599183, "grad_norm": 0.32804566621780396, "learning_rate": 0.0001885467708184406, "loss": 1.5451, "step": 4422 }, { "epoch": 0.057474867739907703, "grad_norm": 0.3952144682407379, "learning_rate": 0.0001885441713565292, "loss": 1.5651, "step": 4423 }, { "epoch": 0.057487862283823576, "grad_norm": 0.5229212045669556, "learning_rate": 0.00018854157189461784, "loss": 1.2889, "step": 4424 }, { "epoch": 0.057500856827739456, "grad_norm": 0.30185407400131226, "learning_rate": 0.00018853897243270644, "loss": 1.4835, "step": 4425 }, { "epoch": 0.05751385137165533, "grad_norm": 0.4042168855667114, "learning_rate": 0.00018853637297079506, "loss": 1.4768, "step": 4426 }, { "epoch": 0.0575268459155712, "grad_norm": 0.4536532759666443, "learning_rate": 0.00018853377350888366, "loss": 1.4839, "step": 4427 }, { "epoch": 0.057539840459487075, "grad_norm": 0.4703330397605896, "learning_rate": 0.00018853117404697228, "loss": 1.4641, "step": 4428 }, { "epoch": 0.05755283500340295, "grad_norm": 0.3574981689453125, "learning_rate": 0.0001885285745850609, "loss": 1.3675, "step": 4429 }, { "epoch": 0.05756582954731882, "grad_norm": 0.2977541387081146, "learning_rate": 0.0001885259751231495, "loss": 1.3422, "step": 4430 }, { "epoch": 0.057578824091234694, "grad_norm": 0.4809090197086334, "learning_rate": 0.00018852337566123813, "loss": 1.5843, "step": 4431 }, { "epoch": 0.05759181863515057, "grad_norm": 0.37623995542526245, "learning_rate": 0.00018852077619932675, "loss": 1.4277, "step": 4432 }, { "epoch": 0.05760481317906644, "grad_norm": 0.2894614040851593, "learning_rate": 0.00018851817673741535, "loss": 1.3494, "step": 4433 }, { "epoch": 0.05761780772298231, "grad_norm": 0.3871980309486389, "learning_rate": 0.00018851557727550398, "loss": 1.426, "step": 4434 }, { "epoch": 0.057630802266898186, "grad_norm": 0.3628866970539093, "learning_rate": 0.0001885129778135926, "loss": 1.2789, "step": 4435 }, { "epoch": 0.05764379681081406, "grad_norm": 0.46236157417297363, "learning_rate": 0.00018851037835168123, "loss": 1.604, "step": 4436 }, { "epoch": 0.05765679135472993, "grad_norm": 0.41646429896354675, "learning_rate": 0.00018850777888976982, "loss": 1.6024, "step": 4437 }, { "epoch": 0.057669785898645805, "grad_norm": 0.3667775094509125, "learning_rate": 0.00018850517942785845, "loss": 1.4866, "step": 4438 }, { "epoch": 0.05768278044256168, "grad_norm": 0.3537268340587616, "learning_rate": 0.00018850257996594707, "loss": 1.3893, "step": 4439 }, { "epoch": 0.05769577498647755, "grad_norm": 0.4517529606819153, "learning_rate": 0.00018849998050403567, "loss": 1.3818, "step": 4440 }, { "epoch": 0.057708769530393424, "grad_norm": 0.374284029006958, "learning_rate": 0.0001884973810421243, "loss": 1.5122, "step": 4441 }, { "epoch": 0.0577217640743093, "grad_norm": 0.49993324279785156, "learning_rate": 0.0001884947815802129, "loss": 1.5521, "step": 4442 }, { "epoch": 0.05773475861822517, "grad_norm": 0.367410272359848, "learning_rate": 0.00018849218211830154, "loss": 1.4932, "step": 4443 }, { "epoch": 0.05774775316214105, "grad_norm": 0.42041441798210144, "learning_rate": 0.00018848958265639014, "loss": 1.5762, "step": 4444 }, { "epoch": 0.05776074770605692, "grad_norm": 0.3849473297595978, "learning_rate": 0.00018848698319447874, "loss": 1.4216, "step": 4445 }, { "epoch": 0.057773742249972795, "grad_norm": 0.40465423464775085, "learning_rate": 0.00018848438373256736, "loss": 1.421, "step": 4446 }, { "epoch": 0.05778673679388867, "grad_norm": 0.4473832845687866, "learning_rate": 0.00018848178427065599, "loss": 1.4875, "step": 4447 }, { "epoch": 0.05779973133780454, "grad_norm": 0.39657723903656006, "learning_rate": 0.0001884791848087446, "loss": 1.5944, "step": 4448 }, { "epoch": 0.057812725881720414, "grad_norm": 0.40098837018013, "learning_rate": 0.0001884765853468332, "loss": 1.3575, "step": 4449 }, { "epoch": 0.05782572042563629, "grad_norm": 0.4412459135055542, "learning_rate": 0.00018847398588492183, "loss": 1.4385, "step": 4450 }, { "epoch": 0.05783871496955216, "grad_norm": 0.3459238111972809, "learning_rate": 0.00018847138642301046, "loss": 1.4425, "step": 4451 }, { "epoch": 0.05785170951346803, "grad_norm": 0.39290276169776917, "learning_rate": 0.00018846878696109905, "loss": 1.6497, "step": 4452 }, { "epoch": 0.057864704057383906, "grad_norm": 0.3830406069755554, "learning_rate": 0.00018846618749918768, "loss": 1.3845, "step": 4453 }, { "epoch": 0.05787769860129978, "grad_norm": 0.4009338617324829, "learning_rate": 0.00018846358803727628, "loss": 1.6374, "step": 4454 }, { "epoch": 0.05789069314521565, "grad_norm": 0.4065341651439667, "learning_rate": 0.00018846098857536493, "loss": 1.2858, "step": 4455 }, { "epoch": 0.057903687689131525, "grad_norm": 0.4415000081062317, "learning_rate": 0.00018845838911345353, "loss": 1.4675, "step": 4456 }, { "epoch": 0.0579166822330474, "grad_norm": 0.42395147681236267, "learning_rate": 0.00018845578965154212, "loss": 1.5597, "step": 4457 }, { "epoch": 0.05792967677696327, "grad_norm": 0.3737245798110962, "learning_rate": 0.00018845319018963075, "loss": 1.2627, "step": 4458 }, { "epoch": 0.057942671320879144, "grad_norm": 0.30794718861579895, "learning_rate": 0.00018845059072771937, "loss": 1.4222, "step": 4459 }, { "epoch": 0.05795566586479502, "grad_norm": 0.2816654145717621, "learning_rate": 0.000188447991265808, "loss": 1.353, "step": 4460 }, { "epoch": 0.05796866040871089, "grad_norm": 0.3791772425174713, "learning_rate": 0.0001884453918038966, "loss": 1.4008, "step": 4461 }, { "epoch": 0.05798165495262676, "grad_norm": 0.43883711099624634, "learning_rate": 0.00018844279234198522, "loss": 1.5184, "step": 4462 }, { "epoch": 0.05799464949654264, "grad_norm": 0.38996151089668274, "learning_rate": 0.00018844019288007384, "loss": 1.3953, "step": 4463 }, { "epoch": 0.058007644040458516, "grad_norm": 0.4525420665740967, "learning_rate": 0.00018843759341816244, "loss": 1.5363, "step": 4464 }, { "epoch": 0.05802063858437439, "grad_norm": 0.35998621582984924, "learning_rate": 0.00018843499395625106, "loss": 1.542, "step": 4465 }, { "epoch": 0.05803363312829026, "grad_norm": 0.322517991065979, "learning_rate": 0.00018843239449433966, "loss": 1.4447, "step": 4466 }, { "epoch": 0.058046627672206134, "grad_norm": 0.4839661419391632, "learning_rate": 0.0001884297950324283, "loss": 1.5355, "step": 4467 }, { "epoch": 0.05805962221612201, "grad_norm": 0.48378366231918335, "learning_rate": 0.0001884271955705169, "loss": 1.464, "step": 4468 }, { "epoch": 0.05807261676003788, "grad_norm": 0.4123730957508087, "learning_rate": 0.00018842459610860554, "loss": 1.4833, "step": 4469 }, { "epoch": 0.05808561130395375, "grad_norm": 0.38946062326431274, "learning_rate": 0.00018842199664669416, "loss": 1.3464, "step": 4470 }, { "epoch": 0.058098605847869626, "grad_norm": 0.37049511075019836, "learning_rate": 0.00018841939718478276, "loss": 1.4161, "step": 4471 }, { "epoch": 0.0581116003917855, "grad_norm": 0.42215830087661743, "learning_rate": 0.00018841679772287138, "loss": 1.6647, "step": 4472 }, { "epoch": 0.05812459493570137, "grad_norm": 0.45599594712257385, "learning_rate": 0.00018841419826095998, "loss": 1.5973, "step": 4473 }, { "epoch": 0.058137589479617245, "grad_norm": 0.30034714937210083, "learning_rate": 0.0001884115987990486, "loss": 1.2091, "step": 4474 }, { "epoch": 0.05815058402353312, "grad_norm": 0.3550974726676941, "learning_rate": 0.00018840899933713723, "loss": 1.3227, "step": 4475 }, { "epoch": 0.05816357856744899, "grad_norm": 0.4022202789783478, "learning_rate": 0.00018840639987522583, "loss": 1.5537, "step": 4476 }, { "epoch": 0.058176573111364864, "grad_norm": 0.39612770080566406, "learning_rate": 0.00018840380041331445, "loss": 1.6405, "step": 4477 }, { "epoch": 0.05818956765528074, "grad_norm": 0.46339184045791626, "learning_rate": 0.00018840120095140307, "loss": 1.4874, "step": 4478 }, { "epoch": 0.05820256219919661, "grad_norm": 0.3612900972366333, "learning_rate": 0.0001883986014894917, "loss": 1.3682, "step": 4479 }, { "epoch": 0.05821555674311248, "grad_norm": 0.45932045578956604, "learning_rate": 0.0001883960020275803, "loss": 1.4133, "step": 4480 }, { "epoch": 0.058228551287028356, "grad_norm": 0.4143752157688141, "learning_rate": 0.00018839340256566892, "loss": 1.4237, "step": 4481 }, { "epoch": 0.05824154583094423, "grad_norm": 0.4116329848766327, "learning_rate": 0.00018839080310375755, "loss": 1.5742, "step": 4482 }, { "epoch": 0.05825454037486011, "grad_norm": 0.42525044083595276, "learning_rate": 0.00018838820364184614, "loss": 1.233, "step": 4483 }, { "epoch": 0.05826753491877598, "grad_norm": 0.33023613691329956, "learning_rate": 0.00018838560417993477, "loss": 1.3341, "step": 4484 }, { "epoch": 0.058280529462691855, "grad_norm": 0.3632299304008484, "learning_rate": 0.00018838300471802336, "loss": 1.4923, "step": 4485 }, { "epoch": 0.05829352400660773, "grad_norm": 0.35917314887046814, "learning_rate": 0.00018838040525611202, "loss": 1.4307, "step": 4486 }, { "epoch": 0.0583065185505236, "grad_norm": 0.3781297206878662, "learning_rate": 0.0001883778057942006, "loss": 1.4574, "step": 4487 }, { "epoch": 0.058319513094439474, "grad_norm": 0.3325479328632355, "learning_rate": 0.0001883752063322892, "loss": 1.4971, "step": 4488 }, { "epoch": 0.058332507638355346, "grad_norm": 0.4469044804573059, "learning_rate": 0.00018837260687037784, "loss": 1.4701, "step": 4489 }, { "epoch": 0.05834550218227122, "grad_norm": 0.4646832346916199, "learning_rate": 0.00018837000740846646, "loss": 1.4618, "step": 4490 }, { "epoch": 0.05835849672618709, "grad_norm": 0.4738560616970062, "learning_rate": 0.00018836740794655508, "loss": 1.3937, "step": 4491 }, { "epoch": 0.058371491270102965, "grad_norm": 0.37531280517578125, "learning_rate": 0.00018836480848464368, "loss": 1.3389, "step": 4492 }, { "epoch": 0.05838448581401884, "grad_norm": 0.3102249205112457, "learning_rate": 0.0001883622090227323, "loss": 1.2293, "step": 4493 }, { "epoch": 0.05839748035793471, "grad_norm": 0.32316166162490845, "learning_rate": 0.00018835960956082093, "loss": 1.5093, "step": 4494 }, { "epoch": 0.058410474901850584, "grad_norm": 0.38363924622535706, "learning_rate": 0.00018835701009890953, "loss": 1.2756, "step": 4495 }, { "epoch": 0.05842346944576646, "grad_norm": 0.3228376507759094, "learning_rate": 0.00018835441063699815, "loss": 1.4595, "step": 4496 }, { "epoch": 0.05843646398968233, "grad_norm": 0.3308725357055664, "learning_rate": 0.00018835181117508675, "loss": 1.3428, "step": 4497 }, { "epoch": 0.0584494585335982, "grad_norm": 0.4232613444328308, "learning_rate": 0.0001883492117131754, "loss": 1.4279, "step": 4498 }, { "epoch": 0.058462453077514076, "grad_norm": 0.38894835114479065, "learning_rate": 0.000188346612251264, "loss": 1.4692, "step": 4499 }, { "epoch": 0.05847544762142995, "grad_norm": 0.3871758282184601, "learning_rate": 0.0001883440127893526, "loss": 1.516, "step": 4500 }, { "epoch": 0.05848844216534582, "grad_norm": 0.46854329109191895, "learning_rate": 0.00018834141332744122, "loss": 1.5492, "step": 4501 }, { "epoch": 0.0585014367092617, "grad_norm": 0.3060224950313568, "learning_rate": 0.00018833881386552985, "loss": 1.3563, "step": 4502 }, { "epoch": 0.058514431253177575, "grad_norm": 0.46446001529693604, "learning_rate": 0.00018833621440361847, "loss": 1.6972, "step": 4503 }, { "epoch": 0.05852742579709345, "grad_norm": 0.3680477738380432, "learning_rate": 0.00018833361494170707, "loss": 1.6149, "step": 4504 }, { "epoch": 0.05854042034100932, "grad_norm": 0.44431188702583313, "learning_rate": 0.0001883310154797957, "loss": 1.5737, "step": 4505 }, { "epoch": 0.058553414884925194, "grad_norm": 0.44540688395500183, "learning_rate": 0.00018832841601788432, "loss": 1.348, "step": 4506 }, { "epoch": 0.05856640942884107, "grad_norm": 0.3630962371826172, "learning_rate": 0.0001883258165559729, "loss": 1.4643, "step": 4507 }, { "epoch": 0.05857940397275694, "grad_norm": 0.4329513609409332, "learning_rate": 0.00018832321709406154, "loss": 1.5085, "step": 4508 }, { "epoch": 0.05859239851667281, "grad_norm": 0.44908472895622253, "learning_rate": 0.00018832061763215016, "loss": 1.3614, "step": 4509 }, { "epoch": 0.058605393060588686, "grad_norm": 0.3840656876564026, "learning_rate": 0.0001883180181702388, "loss": 1.6086, "step": 4510 }, { "epoch": 0.05861838760450456, "grad_norm": 0.4723275601863861, "learning_rate": 0.00018831541870832738, "loss": 1.4794, "step": 4511 }, { "epoch": 0.05863138214842043, "grad_norm": 0.3561088442802429, "learning_rate": 0.00018831281924641598, "loss": 1.5629, "step": 4512 }, { "epoch": 0.058644376692336304, "grad_norm": 0.5417634844779968, "learning_rate": 0.00018831021978450463, "loss": 1.5771, "step": 4513 }, { "epoch": 0.05865737123625218, "grad_norm": 0.4606473445892334, "learning_rate": 0.00018830762032259323, "loss": 1.4187, "step": 4514 }, { "epoch": 0.05867036578016805, "grad_norm": 0.3861846327781677, "learning_rate": 0.00018830502086068186, "loss": 1.3589, "step": 4515 }, { "epoch": 0.05868336032408392, "grad_norm": 0.4596537947654724, "learning_rate": 0.00018830242139877045, "loss": 1.4268, "step": 4516 }, { "epoch": 0.058696354867999796, "grad_norm": 0.43084535002708435, "learning_rate": 0.00018829982193685908, "loss": 1.5676, "step": 4517 }, { "epoch": 0.05870934941191567, "grad_norm": 0.4324735105037689, "learning_rate": 0.0001882972224749477, "loss": 1.3924, "step": 4518 }, { "epoch": 0.05872234395583154, "grad_norm": 0.4066638946533203, "learning_rate": 0.0001882946230130363, "loss": 1.4511, "step": 4519 }, { "epoch": 0.058735338499747415, "grad_norm": 0.2944854199886322, "learning_rate": 0.00018829202355112492, "loss": 1.4473, "step": 4520 }, { "epoch": 0.058748333043663295, "grad_norm": 0.518361508846283, "learning_rate": 0.00018828942408921355, "loss": 1.5047, "step": 4521 }, { "epoch": 0.05876132758757917, "grad_norm": 0.5447002649307251, "learning_rate": 0.00018828682462730217, "loss": 1.3459, "step": 4522 }, { "epoch": 0.05877432213149504, "grad_norm": 0.43478384613990784, "learning_rate": 0.00018828422516539077, "loss": 1.3106, "step": 4523 }, { "epoch": 0.058787316675410914, "grad_norm": 0.2438018023967743, "learning_rate": 0.0001882816257034794, "loss": 1.342, "step": 4524 }, { "epoch": 0.05880031121932679, "grad_norm": 0.3428383767604828, "learning_rate": 0.00018827902624156802, "loss": 1.5545, "step": 4525 }, { "epoch": 0.05881330576324266, "grad_norm": 0.4866192638874054, "learning_rate": 0.00018827642677965662, "loss": 1.4466, "step": 4526 }, { "epoch": 0.05882630030715853, "grad_norm": 0.43104222416877747, "learning_rate": 0.00018827382731774524, "loss": 1.386, "step": 4527 }, { "epoch": 0.058839294851074406, "grad_norm": 0.46314769983291626, "learning_rate": 0.00018827122785583384, "loss": 1.4909, "step": 4528 }, { "epoch": 0.05885228939499028, "grad_norm": 0.3360287845134735, "learning_rate": 0.00018826862839392246, "loss": 1.4474, "step": 4529 }, { "epoch": 0.05886528393890615, "grad_norm": 0.41563481092453003, "learning_rate": 0.0001882660289320111, "loss": 1.4183, "step": 4530 }, { "epoch": 0.058878278482822025, "grad_norm": 0.3972434997558594, "learning_rate": 0.00018826342947009968, "loss": 1.5192, "step": 4531 }, { "epoch": 0.0588912730267379, "grad_norm": 0.4367322027683258, "learning_rate": 0.0001882608300081883, "loss": 1.7404, "step": 4532 }, { "epoch": 0.05890426757065377, "grad_norm": 0.365024596452713, "learning_rate": 0.00018825823054627693, "loss": 1.4461, "step": 4533 }, { "epoch": 0.05891726211456964, "grad_norm": 0.329790860414505, "learning_rate": 0.00018825563108436556, "loss": 1.2034, "step": 4534 }, { "epoch": 0.058930256658485516, "grad_norm": 0.3816514015197754, "learning_rate": 0.00018825303162245415, "loss": 1.4533, "step": 4535 }, { "epoch": 0.05894325120240139, "grad_norm": 0.3474125266075134, "learning_rate": 0.00018825043216054278, "loss": 1.3997, "step": 4536 }, { "epoch": 0.05895624574631726, "grad_norm": 0.46969178318977356, "learning_rate": 0.0001882478326986314, "loss": 1.5531, "step": 4537 }, { "epoch": 0.058969240290233135, "grad_norm": 0.33096548914909363, "learning_rate": 0.00018824523323672, "loss": 1.443, "step": 4538 }, { "epoch": 0.05898223483414901, "grad_norm": 0.34960368275642395, "learning_rate": 0.00018824263377480863, "loss": 1.3415, "step": 4539 }, { "epoch": 0.05899522937806489, "grad_norm": 0.4240265190601349, "learning_rate": 0.00018824003431289722, "loss": 1.5412, "step": 4540 }, { "epoch": 0.05900822392198076, "grad_norm": 0.4648091793060303, "learning_rate": 0.00018823743485098585, "loss": 1.4664, "step": 4541 }, { "epoch": 0.059021218465896634, "grad_norm": 0.34888583421707153, "learning_rate": 0.00018823483538907447, "loss": 1.354, "step": 4542 }, { "epoch": 0.05903421300981251, "grad_norm": 0.4759249687194824, "learning_rate": 0.00018823223592716307, "loss": 1.4718, "step": 4543 }, { "epoch": 0.05904720755372838, "grad_norm": 0.3777037262916565, "learning_rate": 0.0001882296364652517, "loss": 1.3386, "step": 4544 }, { "epoch": 0.05906020209764425, "grad_norm": 0.4605162739753723, "learning_rate": 0.00018822703700334032, "loss": 1.5282, "step": 4545 }, { "epoch": 0.059073196641560126, "grad_norm": 0.436426043510437, "learning_rate": 0.00018822443754142894, "loss": 1.5363, "step": 4546 }, { "epoch": 0.059086191185476, "grad_norm": 0.40732067823410034, "learning_rate": 0.00018822183807951754, "loss": 1.6434, "step": 4547 }, { "epoch": 0.05909918572939187, "grad_norm": 0.37293821573257446, "learning_rate": 0.00018821923861760616, "loss": 1.3742, "step": 4548 }, { "epoch": 0.059112180273307745, "grad_norm": 0.42027273774147034, "learning_rate": 0.0001882166391556948, "loss": 1.4205, "step": 4549 }, { "epoch": 0.05912517481722362, "grad_norm": 0.40116173028945923, "learning_rate": 0.0001882140396937834, "loss": 1.6846, "step": 4550 }, { "epoch": 0.05913816936113949, "grad_norm": 0.48561277985572815, "learning_rate": 0.000188211440231872, "loss": 1.4225, "step": 4551 }, { "epoch": 0.059151163905055364, "grad_norm": 0.3712165355682373, "learning_rate": 0.00018820884076996064, "loss": 1.2646, "step": 4552 }, { "epoch": 0.05916415844897124, "grad_norm": 0.2929782271385193, "learning_rate": 0.00018820624130804926, "loss": 1.3504, "step": 4553 }, { "epoch": 0.05917715299288711, "grad_norm": 0.4422702491283417, "learning_rate": 0.00018820364184613786, "loss": 1.5324, "step": 4554 }, { "epoch": 0.05919014753680298, "grad_norm": 0.31200239062309265, "learning_rate": 0.00018820104238422645, "loss": 1.5455, "step": 4555 }, { "epoch": 0.059203142080718855, "grad_norm": 0.32378989458084106, "learning_rate": 0.0001881984429223151, "loss": 1.2866, "step": 4556 }, { "epoch": 0.05921613662463473, "grad_norm": 0.4474591016769409, "learning_rate": 0.0001881958434604037, "loss": 1.3814, "step": 4557 }, { "epoch": 0.0592291311685506, "grad_norm": 0.39132970571517944, "learning_rate": 0.00018819324399849233, "loss": 1.3229, "step": 4558 }, { "epoch": 0.05924212571246648, "grad_norm": 0.3722696900367737, "learning_rate": 0.00018819064453658093, "loss": 1.5352, "step": 4559 }, { "epoch": 0.059255120256382354, "grad_norm": 0.36158639192581177, "learning_rate": 0.00018818804507466955, "loss": 1.4934, "step": 4560 }, { "epoch": 0.05926811480029823, "grad_norm": 0.3869750201702118, "learning_rate": 0.00018818544561275817, "loss": 1.4109, "step": 4561 }, { "epoch": 0.0592811093442141, "grad_norm": 0.28921765089035034, "learning_rate": 0.00018818284615084677, "loss": 1.4839, "step": 4562 }, { "epoch": 0.05929410388812997, "grad_norm": 0.2798103988170624, "learning_rate": 0.0001881802466889354, "loss": 1.4997, "step": 4563 }, { "epoch": 0.059307098432045846, "grad_norm": 0.4336410462856293, "learning_rate": 0.00018817764722702402, "loss": 1.4585, "step": 4564 }, { "epoch": 0.05932009297596172, "grad_norm": 0.3858420252799988, "learning_rate": 0.00018817504776511265, "loss": 1.3882, "step": 4565 }, { "epoch": 0.05933308751987759, "grad_norm": 0.346164345741272, "learning_rate": 0.00018817244830320124, "loss": 1.428, "step": 4566 }, { "epoch": 0.059346082063793465, "grad_norm": 0.36873191595077515, "learning_rate": 0.00018816984884128984, "loss": 1.434, "step": 4567 }, { "epoch": 0.05935907660770934, "grad_norm": 0.4173000156879425, "learning_rate": 0.0001881672493793785, "loss": 1.2551, "step": 4568 }, { "epoch": 0.05937207115162521, "grad_norm": 0.47325408458709717, "learning_rate": 0.0001881646499174671, "loss": 1.3303, "step": 4569 }, { "epoch": 0.059385065695541084, "grad_norm": 0.3723583519458771, "learning_rate": 0.00018816205045555571, "loss": 1.5589, "step": 4570 }, { "epoch": 0.05939806023945696, "grad_norm": 0.4007658064365387, "learning_rate": 0.0001881594509936443, "loss": 1.6914, "step": 4571 }, { "epoch": 0.05941105478337283, "grad_norm": 0.30833274126052856, "learning_rate": 0.00018815685153173294, "loss": 1.3943, "step": 4572 }, { "epoch": 0.0594240493272887, "grad_norm": 0.38788431882858276, "learning_rate": 0.00018815425206982156, "loss": 1.4956, "step": 4573 }, { "epoch": 0.059437043871204576, "grad_norm": 0.3323206603527069, "learning_rate": 0.00018815165260791016, "loss": 1.4467, "step": 4574 }, { "epoch": 0.05945003841512045, "grad_norm": 0.32013601064682007, "learning_rate": 0.00018814905314599878, "loss": 1.294, "step": 4575 }, { "epoch": 0.05946303295903632, "grad_norm": 0.3274105191230774, "learning_rate": 0.0001881464536840874, "loss": 1.5338, "step": 4576 }, { "epoch": 0.059476027502952195, "grad_norm": 0.3312860429286957, "learning_rate": 0.00018814385422217603, "loss": 1.3967, "step": 4577 }, { "epoch": 0.059489022046868074, "grad_norm": 0.3890517055988312, "learning_rate": 0.00018814125476026463, "loss": 1.3403, "step": 4578 }, { "epoch": 0.05950201659078395, "grad_norm": 0.33841392397880554, "learning_rate": 0.00018813865529835323, "loss": 1.5476, "step": 4579 }, { "epoch": 0.05951501113469982, "grad_norm": 0.3727584183216095, "learning_rate": 0.00018813605583644188, "loss": 1.6292, "step": 4580 }, { "epoch": 0.05952800567861569, "grad_norm": 0.4647679328918457, "learning_rate": 0.00018813345637453047, "loss": 1.4954, "step": 4581 }, { "epoch": 0.059541000222531566, "grad_norm": 0.4308343231678009, "learning_rate": 0.0001881308569126191, "loss": 1.4879, "step": 4582 }, { "epoch": 0.05955399476644744, "grad_norm": 0.32289424538612366, "learning_rate": 0.00018812825745070772, "loss": 1.3234, "step": 4583 }, { "epoch": 0.05956698931036331, "grad_norm": 0.25068745017051697, "learning_rate": 0.00018812565798879632, "loss": 1.3987, "step": 4584 }, { "epoch": 0.059579983854279185, "grad_norm": 0.32612547278404236, "learning_rate": 0.00018812305852688495, "loss": 1.3596, "step": 4585 }, { "epoch": 0.05959297839819506, "grad_norm": 0.3579429090023041, "learning_rate": 0.00018812045906497354, "loss": 1.2533, "step": 4586 }, { "epoch": 0.05960597294211093, "grad_norm": 0.4372074604034424, "learning_rate": 0.0001881178596030622, "loss": 1.4473, "step": 4587 }, { "epoch": 0.059618967486026804, "grad_norm": 0.30114609003067017, "learning_rate": 0.0001881152601411508, "loss": 1.276, "step": 4588 }, { "epoch": 0.05963196202994268, "grad_norm": 0.4335648715496063, "learning_rate": 0.00018811266067923942, "loss": 1.2533, "step": 4589 }, { "epoch": 0.05964495657385855, "grad_norm": 0.40057137608528137, "learning_rate": 0.00018811006121732801, "loss": 1.4782, "step": 4590 }, { "epoch": 0.05965795111777442, "grad_norm": 0.43965011835098267, "learning_rate": 0.00018810746175541664, "loss": 1.4541, "step": 4591 }, { "epoch": 0.059670945661690296, "grad_norm": 0.3410152792930603, "learning_rate": 0.00018810486229350526, "loss": 1.2469, "step": 4592 }, { "epoch": 0.05968394020560617, "grad_norm": 0.3688991665840149, "learning_rate": 0.00018810226283159386, "loss": 1.6052, "step": 4593 }, { "epoch": 0.05969693474952204, "grad_norm": 0.3528572916984558, "learning_rate": 0.00018809966336968248, "loss": 1.5222, "step": 4594 }, { "epoch": 0.059709929293437915, "grad_norm": 0.4465098977088928, "learning_rate": 0.0001880970639077711, "loss": 1.4465, "step": 4595 }, { "epoch": 0.05972292383735379, "grad_norm": 0.34993624687194824, "learning_rate": 0.0001880944644458597, "loss": 1.3766, "step": 4596 }, { "epoch": 0.05973591838126967, "grad_norm": 0.4656573534011841, "learning_rate": 0.00018809186498394833, "loss": 1.359, "step": 4597 }, { "epoch": 0.05974891292518554, "grad_norm": 0.3948744237422943, "learning_rate": 0.00018808926552203693, "loss": 1.4361, "step": 4598 }, { "epoch": 0.059761907469101413, "grad_norm": 0.5141533613204956, "learning_rate": 0.00018808666606012558, "loss": 1.3905, "step": 4599 }, { "epoch": 0.059774902013017286, "grad_norm": 0.3613419532775879, "learning_rate": 0.00018808406659821418, "loss": 1.2921, "step": 4600 }, { "epoch": 0.05978789655693316, "grad_norm": 0.39984235167503357, "learning_rate": 0.0001880814671363028, "loss": 1.5183, "step": 4601 }, { "epoch": 0.05980089110084903, "grad_norm": 0.3041442632675171, "learning_rate": 0.0001880788676743914, "loss": 1.4087, "step": 4602 }, { "epoch": 0.059813885644764905, "grad_norm": 0.42544159293174744, "learning_rate": 0.00018807626821248002, "loss": 1.4614, "step": 4603 }, { "epoch": 0.05982688018868078, "grad_norm": 0.4615595042705536, "learning_rate": 0.00018807366875056865, "loss": 1.6792, "step": 4604 }, { "epoch": 0.05983987473259665, "grad_norm": 0.35686227679252625, "learning_rate": 0.00018807106928865725, "loss": 1.6299, "step": 4605 }, { "epoch": 0.059852869276512524, "grad_norm": 0.3619399070739746, "learning_rate": 0.00018806846982674587, "loss": 1.2132, "step": 4606 }, { "epoch": 0.0598658638204284, "grad_norm": 0.4124472141265869, "learning_rate": 0.0001880658703648345, "loss": 1.5429, "step": 4607 }, { "epoch": 0.05987885836434427, "grad_norm": 0.22899313271045685, "learning_rate": 0.00018806327090292312, "loss": 1.0735, "step": 4608 }, { "epoch": 0.05989185290826014, "grad_norm": 0.3973156213760376, "learning_rate": 0.00018806067144101172, "loss": 1.2878, "step": 4609 }, { "epoch": 0.059904847452176016, "grad_norm": 0.38161152601242065, "learning_rate": 0.00018805807197910031, "loss": 1.2518, "step": 4610 }, { "epoch": 0.05991784199609189, "grad_norm": 0.37226733565330505, "learning_rate": 0.00018805547251718897, "loss": 1.4684, "step": 4611 }, { "epoch": 0.05993083654000776, "grad_norm": 0.30384692549705505, "learning_rate": 0.00018805287305527756, "loss": 1.4512, "step": 4612 }, { "epoch": 0.059943831083923635, "grad_norm": 0.46904847025871277, "learning_rate": 0.0001880502735933662, "loss": 1.5138, "step": 4613 }, { "epoch": 0.05995682562783951, "grad_norm": 0.41440069675445557, "learning_rate": 0.00018804767413145478, "loss": 1.4284, "step": 4614 }, { "epoch": 0.05996982017175538, "grad_norm": 0.42137232422828674, "learning_rate": 0.0001880450746695434, "loss": 1.5833, "step": 4615 }, { "epoch": 0.05998281471567126, "grad_norm": 0.40327513217926025, "learning_rate": 0.00018804247520763203, "loss": 1.6324, "step": 4616 }, { "epoch": 0.059995809259587134, "grad_norm": 0.3549708425998688, "learning_rate": 0.00018803987574572063, "loss": 1.4376, "step": 4617 }, { "epoch": 0.06000880380350301, "grad_norm": 0.3755921423435211, "learning_rate": 0.00018803727628380926, "loss": 1.4155, "step": 4618 }, { "epoch": 0.06002179834741888, "grad_norm": 0.5553861856460571, "learning_rate": 0.00018803467682189788, "loss": 1.4298, "step": 4619 }, { "epoch": 0.06003479289133475, "grad_norm": 0.40438908338546753, "learning_rate": 0.0001880320773599865, "loss": 1.6984, "step": 4620 }, { "epoch": 0.060047787435250626, "grad_norm": 0.4536943733692169, "learning_rate": 0.0001880294778980751, "loss": 1.5542, "step": 4621 }, { "epoch": 0.0600607819791665, "grad_norm": 0.3599386215209961, "learning_rate": 0.00018802687843616373, "loss": 1.3511, "step": 4622 }, { "epoch": 0.06007377652308237, "grad_norm": 0.42840346693992615, "learning_rate": 0.00018802427897425235, "loss": 1.4751, "step": 4623 }, { "epoch": 0.060086771066998244, "grad_norm": 0.3442873954772949, "learning_rate": 0.00018802167951234095, "loss": 1.2768, "step": 4624 }, { "epoch": 0.06009976561091412, "grad_norm": 0.42270854115486145, "learning_rate": 0.00018801908005042957, "loss": 1.5792, "step": 4625 }, { "epoch": 0.06011276015482999, "grad_norm": 0.4190753400325775, "learning_rate": 0.0001880164805885182, "loss": 1.2809, "step": 4626 }, { "epoch": 0.06012575469874586, "grad_norm": 0.37923577427864075, "learning_rate": 0.0001880138811266068, "loss": 1.4505, "step": 4627 }, { "epoch": 0.060138749242661736, "grad_norm": 0.3711230754852295, "learning_rate": 0.00018801128166469542, "loss": 1.5396, "step": 4628 }, { "epoch": 0.06015174378657761, "grad_norm": 0.41335341334342957, "learning_rate": 0.00018800868220278402, "loss": 1.4764, "step": 4629 }, { "epoch": 0.06016473833049348, "grad_norm": 0.3963650166988373, "learning_rate": 0.00018800608274087267, "loss": 1.4486, "step": 4630 }, { "epoch": 0.060177732874409355, "grad_norm": 0.43489956855773926, "learning_rate": 0.00018800348327896127, "loss": 1.3373, "step": 4631 }, { "epoch": 0.06019072741832523, "grad_norm": 0.3788653612136841, "learning_rate": 0.0001880008838170499, "loss": 1.6634, "step": 4632 }, { "epoch": 0.0602037219622411, "grad_norm": 0.28406089544296265, "learning_rate": 0.0001879982843551385, "loss": 1.2853, "step": 4633 }, { "epoch": 0.060216716506156974, "grad_norm": 0.3892311155796051, "learning_rate": 0.0001879956848932271, "loss": 1.3714, "step": 4634 }, { "epoch": 0.060229711050072854, "grad_norm": 0.3391830623149872, "learning_rate": 0.00018799308543131574, "loss": 1.5051, "step": 4635 }, { "epoch": 0.06024270559398873, "grad_norm": 0.37689408659935, "learning_rate": 0.00018799048596940433, "loss": 1.4626, "step": 4636 }, { "epoch": 0.0602557001379046, "grad_norm": 0.4322948455810547, "learning_rate": 0.00018798788650749296, "loss": 1.345, "step": 4637 }, { "epoch": 0.06026869468182047, "grad_norm": 0.34250393509864807, "learning_rate": 0.00018798528704558158, "loss": 1.5787, "step": 4638 }, { "epoch": 0.060281689225736346, "grad_norm": 0.35673874616622925, "learning_rate": 0.00018798268758367018, "loss": 1.5439, "step": 4639 }, { "epoch": 0.06029468376965222, "grad_norm": 0.3739342987537384, "learning_rate": 0.0001879800881217588, "loss": 1.3467, "step": 4640 }, { "epoch": 0.06030767831356809, "grad_norm": 0.38367798924446106, "learning_rate": 0.0001879774886598474, "loss": 1.422, "step": 4641 }, { "epoch": 0.060320672857483965, "grad_norm": 0.3680630326271057, "learning_rate": 0.00018797488919793605, "loss": 1.4557, "step": 4642 }, { "epoch": 0.06033366740139984, "grad_norm": 0.3883708715438843, "learning_rate": 0.00018797228973602465, "loss": 1.4711, "step": 4643 }, { "epoch": 0.06034666194531571, "grad_norm": 0.4156469404697418, "learning_rate": 0.00018796969027411328, "loss": 1.4911, "step": 4644 }, { "epoch": 0.06035965648923158, "grad_norm": 0.3638401925563812, "learning_rate": 0.00018796709081220187, "loss": 1.3364, "step": 4645 }, { "epoch": 0.060372651033147456, "grad_norm": 0.3887464702129364, "learning_rate": 0.0001879644913502905, "loss": 1.5257, "step": 4646 }, { "epoch": 0.06038564557706333, "grad_norm": 0.3279300034046173, "learning_rate": 0.00018796189188837912, "loss": 1.4309, "step": 4647 }, { "epoch": 0.0603986401209792, "grad_norm": 0.4504035711288452, "learning_rate": 0.00018795929242646772, "loss": 1.236, "step": 4648 }, { "epoch": 0.060411634664895075, "grad_norm": 0.3183561861515045, "learning_rate": 0.00018795669296455634, "loss": 1.6423, "step": 4649 }, { "epoch": 0.06042462920881095, "grad_norm": 0.37958523631095886, "learning_rate": 0.00018795409350264497, "loss": 1.3208, "step": 4650 }, { "epoch": 0.06043762375272682, "grad_norm": 0.359591007232666, "learning_rate": 0.00018795149404073357, "loss": 1.3348, "step": 4651 }, { "epoch": 0.060450618296642694, "grad_norm": 0.3713827133178711, "learning_rate": 0.0001879488945788222, "loss": 1.5255, "step": 4652 }, { "epoch": 0.06046361284055857, "grad_norm": 0.5505270957946777, "learning_rate": 0.0001879462951169108, "loss": 1.3536, "step": 4653 }, { "epoch": 0.06047660738447445, "grad_norm": 0.43987274169921875, "learning_rate": 0.00018794369565499944, "loss": 1.4315, "step": 4654 }, { "epoch": 0.06048960192839032, "grad_norm": 0.5327964425086975, "learning_rate": 0.00018794109619308804, "loss": 1.4154, "step": 4655 }, { "epoch": 0.06050259647230619, "grad_norm": 0.43529361486434937, "learning_rate": 0.00018793849673117666, "loss": 1.4928, "step": 4656 }, { "epoch": 0.060515591016222066, "grad_norm": 0.36240583658218384, "learning_rate": 0.00018793589726926528, "loss": 1.3947, "step": 4657 }, { "epoch": 0.06052858556013794, "grad_norm": 0.4684526026248932, "learning_rate": 0.00018793329780735388, "loss": 1.5496, "step": 4658 }, { "epoch": 0.06054158010405381, "grad_norm": 0.29860028624534607, "learning_rate": 0.0001879306983454425, "loss": 1.5375, "step": 4659 }, { "epoch": 0.060554574647969685, "grad_norm": 0.501872181892395, "learning_rate": 0.0001879280988835311, "loss": 1.5208, "step": 4660 }, { "epoch": 0.06056756919188556, "grad_norm": 0.4405615031719208, "learning_rate": 0.00018792549942161976, "loss": 1.5717, "step": 4661 }, { "epoch": 0.06058056373580143, "grad_norm": 0.3775745928287506, "learning_rate": 0.00018792289995970835, "loss": 1.3513, "step": 4662 }, { "epoch": 0.060593558279717304, "grad_norm": 0.33774715662002563, "learning_rate": 0.00018792030049779695, "loss": 1.2951, "step": 4663 }, { "epoch": 0.06060655282363318, "grad_norm": 0.43124672770500183, "learning_rate": 0.00018791770103588558, "loss": 1.589, "step": 4664 }, { "epoch": 0.06061954736754905, "grad_norm": 0.3844517767429352, "learning_rate": 0.0001879151015739742, "loss": 1.3519, "step": 4665 }, { "epoch": 0.06063254191146492, "grad_norm": 0.356585294008255, "learning_rate": 0.00018791250211206282, "loss": 1.3697, "step": 4666 }, { "epoch": 0.060645536455380795, "grad_norm": 0.3608217239379883, "learning_rate": 0.00018790990265015142, "loss": 1.5346, "step": 4667 }, { "epoch": 0.06065853099929667, "grad_norm": 0.5022678375244141, "learning_rate": 0.00018790730318824005, "loss": 1.5213, "step": 4668 }, { "epoch": 0.06067152554321254, "grad_norm": 0.39539477229118347, "learning_rate": 0.00018790470372632867, "loss": 1.5392, "step": 4669 }, { "epoch": 0.060684520087128414, "grad_norm": 0.3768894076347351, "learning_rate": 0.00018790210426441727, "loss": 1.5174, "step": 4670 }, { "epoch": 0.06069751463104429, "grad_norm": 0.40725386142730713, "learning_rate": 0.0001878995048025059, "loss": 1.4684, "step": 4671 }, { "epoch": 0.06071050917496016, "grad_norm": 0.5199422240257263, "learning_rate": 0.0001878969053405945, "loss": 1.4579, "step": 4672 }, { "epoch": 0.06072350371887604, "grad_norm": 0.35900917649269104, "learning_rate": 0.00018789430587868314, "loss": 1.6107, "step": 4673 }, { "epoch": 0.06073649826279191, "grad_norm": 0.37490200996398926, "learning_rate": 0.00018789170641677174, "loss": 1.4037, "step": 4674 }, { "epoch": 0.060749492806707786, "grad_norm": 0.3832722008228302, "learning_rate": 0.00018788910695486036, "loss": 1.5209, "step": 4675 }, { "epoch": 0.06076248735062366, "grad_norm": 0.4584009647369385, "learning_rate": 0.00018788650749294896, "loss": 1.4505, "step": 4676 }, { "epoch": 0.06077548189453953, "grad_norm": 0.39893820881843567, "learning_rate": 0.00018788390803103758, "loss": 1.6227, "step": 4677 }, { "epoch": 0.060788476438455405, "grad_norm": 0.4154282212257385, "learning_rate": 0.0001878813085691262, "loss": 1.53, "step": 4678 }, { "epoch": 0.06080147098237128, "grad_norm": 0.4616670310497284, "learning_rate": 0.0001878787091072148, "loss": 1.5009, "step": 4679 }, { "epoch": 0.06081446552628715, "grad_norm": 0.39531344175338745, "learning_rate": 0.00018787610964530343, "loss": 1.3302, "step": 4680 }, { "epoch": 0.060827460070203024, "grad_norm": 0.41708120703697205, "learning_rate": 0.00018787351018339206, "loss": 1.4394, "step": 4681 }, { "epoch": 0.0608404546141189, "grad_norm": 0.4020169675350189, "learning_rate": 0.00018787091072148065, "loss": 1.7117, "step": 4682 }, { "epoch": 0.06085344915803477, "grad_norm": 0.42392462491989136, "learning_rate": 0.00018786831125956928, "loss": 1.481, "step": 4683 }, { "epoch": 0.06086644370195064, "grad_norm": 0.476747989654541, "learning_rate": 0.00018786571179765787, "loss": 1.6551, "step": 4684 }, { "epoch": 0.060879438245866516, "grad_norm": 0.339716374874115, "learning_rate": 0.00018786311233574653, "loss": 1.369, "step": 4685 }, { "epoch": 0.06089243278978239, "grad_norm": 0.3359491229057312, "learning_rate": 0.00018786051287383512, "loss": 1.4248, "step": 4686 }, { "epoch": 0.06090542733369826, "grad_norm": 0.4091237485408783, "learning_rate": 0.00018785791341192375, "loss": 1.5515, "step": 4687 }, { "epoch": 0.060918421877614135, "grad_norm": 0.3720499277114868, "learning_rate": 0.00018785531395001235, "loss": 1.6032, "step": 4688 }, { "epoch": 0.06093141642153001, "grad_norm": 0.4408172369003296, "learning_rate": 0.00018785271448810097, "loss": 1.4856, "step": 4689 }, { "epoch": 0.06094441096544588, "grad_norm": 0.358181893825531, "learning_rate": 0.0001878501150261896, "loss": 1.4114, "step": 4690 }, { "epoch": 0.06095740550936175, "grad_norm": 0.42284950613975525, "learning_rate": 0.0001878475155642782, "loss": 1.4355, "step": 4691 }, { "epoch": 0.06097040005327763, "grad_norm": 0.34294018149375916, "learning_rate": 0.00018784491610236682, "loss": 1.425, "step": 4692 }, { "epoch": 0.060983394597193506, "grad_norm": 0.3853449523448944, "learning_rate": 0.00018784231664045544, "loss": 1.553, "step": 4693 }, { "epoch": 0.06099638914110938, "grad_norm": 0.44293487071990967, "learning_rate": 0.00018783971717854404, "loss": 1.605, "step": 4694 }, { "epoch": 0.06100938368502525, "grad_norm": 0.30496087670326233, "learning_rate": 0.00018783711771663266, "loss": 1.323, "step": 4695 }, { "epoch": 0.061022378228941125, "grad_norm": 0.37509262561798096, "learning_rate": 0.0001878345182547213, "loss": 1.3581, "step": 4696 }, { "epoch": 0.061035372772857, "grad_norm": 0.3697527050971985, "learning_rate": 0.0001878319187928099, "loss": 1.44, "step": 4697 }, { "epoch": 0.06104836731677287, "grad_norm": 0.3637860417366028, "learning_rate": 0.0001878293193308985, "loss": 1.5373, "step": 4698 }, { "epoch": 0.061061361860688744, "grad_norm": 0.4187600612640381, "learning_rate": 0.00018782671986898713, "loss": 1.5917, "step": 4699 }, { "epoch": 0.06107435640460462, "grad_norm": 0.40024304389953613, "learning_rate": 0.00018782412040707576, "loss": 1.4136, "step": 4700 }, { "epoch": 0.06108735094852049, "grad_norm": 0.4038996696472168, "learning_rate": 0.00018782152094516436, "loss": 1.4541, "step": 4701 }, { "epoch": 0.06110034549243636, "grad_norm": 0.42494386434555054, "learning_rate": 0.00018781892148325298, "loss": 1.3921, "step": 4702 }, { "epoch": 0.061113340036352236, "grad_norm": 0.3595023453235626, "learning_rate": 0.00018781632202134158, "loss": 1.5208, "step": 4703 }, { "epoch": 0.06112633458026811, "grad_norm": 0.4407199025154114, "learning_rate": 0.00018781372255943023, "loss": 1.6047, "step": 4704 }, { "epoch": 0.06113932912418398, "grad_norm": 0.3866972029209137, "learning_rate": 0.00018781112309751883, "loss": 1.4929, "step": 4705 }, { "epoch": 0.061152323668099855, "grad_norm": 0.38850200176239014, "learning_rate": 0.00018780852363560742, "loss": 1.5825, "step": 4706 }, { "epoch": 0.06116531821201573, "grad_norm": 0.4282163977622986, "learning_rate": 0.00018780592417369605, "loss": 1.4532, "step": 4707 }, { "epoch": 0.0611783127559316, "grad_norm": 0.4159630835056305, "learning_rate": 0.00018780332471178467, "loss": 1.2992, "step": 4708 }, { "epoch": 0.061191307299847474, "grad_norm": 0.3920672535896301, "learning_rate": 0.0001878007252498733, "loss": 1.5302, "step": 4709 }, { "epoch": 0.06120430184376335, "grad_norm": 0.4455764591693878, "learning_rate": 0.0001877981257879619, "loss": 1.559, "step": 4710 }, { "epoch": 0.061217296387679226, "grad_norm": 0.3193241357803345, "learning_rate": 0.00018779552632605052, "loss": 1.1913, "step": 4711 }, { "epoch": 0.0612302909315951, "grad_norm": 0.3334447741508484, "learning_rate": 0.00018779292686413914, "loss": 1.3724, "step": 4712 }, { "epoch": 0.06124328547551097, "grad_norm": 0.31845778226852417, "learning_rate": 0.00018779032740222774, "loss": 1.3204, "step": 4713 }, { "epoch": 0.061256280019426845, "grad_norm": 0.3759171664714813, "learning_rate": 0.00018778772794031637, "loss": 1.5403, "step": 4714 }, { "epoch": 0.06126927456334272, "grad_norm": 0.41342827677726746, "learning_rate": 0.00018778512847840496, "loss": 1.2863, "step": 4715 }, { "epoch": 0.06128226910725859, "grad_norm": 0.3663657009601593, "learning_rate": 0.00018778252901649361, "loss": 1.5629, "step": 4716 }, { "epoch": 0.061295263651174464, "grad_norm": 0.41584208607673645, "learning_rate": 0.0001877799295545822, "loss": 1.4875, "step": 4717 }, { "epoch": 0.06130825819509034, "grad_norm": 0.3094605505466461, "learning_rate": 0.0001877773300926708, "loss": 1.4665, "step": 4718 }, { "epoch": 0.06132125273900621, "grad_norm": 0.29234790802001953, "learning_rate": 0.00018777473063075943, "loss": 1.2721, "step": 4719 }, { "epoch": 0.06133424728292208, "grad_norm": 0.3660517930984497, "learning_rate": 0.00018777213116884806, "loss": 1.4399, "step": 4720 }, { "epoch": 0.061347241826837956, "grad_norm": 0.40468522906303406, "learning_rate": 0.00018776953170693668, "loss": 1.4982, "step": 4721 }, { "epoch": 0.06136023637075383, "grad_norm": 0.34839415550231934, "learning_rate": 0.00018776693224502528, "loss": 1.3703, "step": 4722 }, { "epoch": 0.0613732309146697, "grad_norm": 0.36361679434776306, "learning_rate": 0.0001877643327831139, "loss": 1.4074, "step": 4723 }, { "epoch": 0.061386225458585575, "grad_norm": 0.3819040060043335, "learning_rate": 0.00018776173332120253, "loss": 1.3619, "step": 4724 }, { "epoch": 0.06139922000250145, "grad_norm": 0.4625639319419861, "learning_rate": 0.00018775913385929113, "loss": 1.4666, "step": 4725 }, { "epoch": 0.06141221454641732, "grad_norm": 0.38273537158966064, "learning_rate": 0.00018775653439737975, "loss": 1.4758, "step": 4726 }, { "epoch": 0.061425209090333194, "grad_norm": 0.32814735174179077, "learning_rate": 0.00018775393493546835, "loss": 1.3391, "step": 4727 }, { "epoch": 0.06143820363424907, "grad_norm": 0.4361976087093353, "learning_rate": 0.000187751335473557, "loss": 1.386, "step": 4728 }, { "epoch": 0.06145119817816494, "grad_norm": 0.470562607049942, "learning_rate": 0.0001877487360116456, "loss": 1.3806, "step": 4729 }, { "epoch": 0.06146419272208082, "grad_norm": 0.48570799827575684, "learning_rate": 0.00018774613654973422, "loss": 1.2079, "step": 4730 }, { "epoch": 0.06147718726599669, "grad_norm": 0.5506556630134583, "learning_rate": 0.00018774353708782285, "loss": 1.4277, "step": 4731 }, { "epoch": 0.061490181809912565, "grad_norm": 0.37174728512763977, "learning_rate": 0.00018774093762591144, "loss": 1.2928, "step": 4732 }, { "epoch": 0.06150317635382844, "grad_norm": 0.3608209788799286, "learning_rate": 0.00018773833816400007, "loss": 1.1566, "step": 4733 }, { "epoch": 0.06151617089774431, "grad_norm": 0.4654795825481415, "learning_rate": 0.00018773573870208867, "loss": 1.3341, "step": 4734 }, { "epoch": 0.061529165441660184, "grad_norm": 0.26536867022514343, "learning_rate": 0.0001877331392401773, "loss": 1.3326, "step": 4735 }, { "epoch": 0.06154215998557606, "grad_norm": 0.44267329573631287, "learning_rate": 0.00018773053977826591, "loss": 1.4208, "step": 4736 }, { "epoch": 0.06155515452949193, "grad_norm": 0.3890420198440552, "learning_rate": 0.0001877279403163545, "loss": 1.4988, "step": 4737 }, { "epoch": 0.0615681490734078, "grad_norm": 0.3623405992984772, "learning_rate": 0.00018772534085444314, "loss": 1.3181, "step": 4738 }, { "epoch": 0.061581143617323676, "grad_norm": 0.46809765696525574, "learning_rate": 0.00018772274139253176, "loss": 1.5969, "step": 4739 }, { "epoch": 0.06159413816123955, "grad_norm": 0.4159347116947174, "learning_rate": 0.00018772014193062039, "loss": 1.4688, "step": 4740 }, { "epoch": 0.06160713270515542, "grad_norm": 0.36170199513435364, "learning_rate": 0.00018771754246870898, "loss": 1.5292, "step": 4741 }, { "epoch": 0.061620127249071295, "grad_norm": 0.4478611350059509, "learning_rate": 0.0001877149430067976, "loss": 1.4147, "step": 4742 }, { "epoch": 0.06163312179298717, "grad_norm": 0.33543694019317627, "learning_rate": 0.00018771234354488623, "loss": 1.3701, "step": 4743 }, { "epoch": 0.06164611633690304, "grad_norm": 0.43642449378967285, "learning_rate": 0.00018770974408297483, "loss": 1.5713, "step": 4744 }, { "epoch": 0.061659110880818914, "grad_norm": 0.4053778350353241, "learning_rate": 0.00018770714462106345, "loss": 1.5266, "step": 4745 }, { "epoch": 0.06167210542473479, "grad_norm": 0.41506728529930115, "learning_rate": 0.00018770454515915205, "loss": 1.2939, "step": 4746 }, { "epoch": 0.06168509996865066, "grad_norm": 0.30604878067970276, "learning_rate": 0.00018770194569724068, "loss": 1.2528, "step": 4747 }, { "epoch": 0.06169809451256653, "grad_norm": 0.48034870624542236, "learning_rate": 0.0001876993462353293, "loss": 1.5633, "step": 4748 }, { "epoch": 0.06171108905648241, "grad_norm": 0.3947703242301941, "learning_rate": 0.0001876967467734179, "loss": 1.4401, "step": 4749 }, { "epoch": 0.061724083600398286, "grad_norm": 0.43567919731140137, "learning_rate": 0.00018769414731150652, "loss": 1.5715, "step": 4750 }, { "epoch": 0.06173707814431416, "grad_norm": 0.37029874324798584, "learning_rate": 0.00018769154784959515, "loss": 1.3434, "step": 4751 }, { "epoch": 0.06175007268823003, "grad_norm": 0.3966732919216156, "learning_rate": 0.00018768894838768377, "loss": 1.3285, "step": 4752 }, { "epoch": 0.061763067232145905, "grad_norm": 0.3607752025127411, "learning_rate": 0.00018768634892577237, "loss": 1.4862, "step": 4753 }, { "epoch": 0.06177606177606178, "grad_norm": 0.37333714962005615, "learning_rate": 0.000187683749463861, "loss": 1.493, "step": 4754 }, { "epoch": 0.06178905631997765, "grad_norm": 0.38883668184280396, "learning_rate": 0.00018768115000194962, "loss": 1.3699, "step": 4755 }, { "epoch": 0.06180205086389352, "grad_norm": 0.4387046694755554, "learning_rate": 0.00018767855054003821, "loss": 1.7208, "step": 4756 }, { "epoch": 0.061815045407809396, "grad_norm": 0.33561962842941284, "learning_rate": 0.00018767595107812684, "loss": 1.4431, "step": 4757 }, { "epoch": 0.06182803995172527, "grad_norm": 0.40161532163619995, "learning_rate": 0.00018767335161621544, "loss": 1.4407, "step": 4758 }, { "epoch": 0.06184103449564114, "grad_norm": 0.4498758614063263, "learning_rate": 0.0001876707521543041, "loss": 1.4239, "step": 4759 }, { "epoch": 0.061854029039557015, "grad_norm": 0.5349181890487671, "learning_rate": 0.00018766815269239269, "loss": 1.4826, "step": 4760 }, { "epoch": 0.06186702358347289, "grad_norm": 0.44247332215309143, "learning_rate": 0.00018766555323048128, "loss": 1.4883, "step": 4761 }, { "epoch": 0.06188001812738876, "grad_norm": 0.40534019470214844, "learning_rate": 0.0001876629537685699, "loss": 1.5636, "step": 4762 }, { "epoch": 0.061893012671304634, "grad_norm": 0.4621119201183319, "learning_rate": 0.00018766035430665853, "loss": 1.5419, "step": 4763 }, { "epoch": 0.06190600721522051, "grad_norm": 0.3977503478527069, "learning_rate": 0.00018765775484474716, "loss": 1.2825, "step": 4764 }, { "epoch": 0.06191900175913638, "grad_norm": 0.40935274958610535, "learning_rate": 0.00018765515538283575, "loss": 1.423, "step": 4765 }, { "epoch": 0.06193199630305225, "grad_norm": 0.35663458704948425, "learning_rate": 0.00018765255592092438, "loss": 1.3097, "step": 4766 }, { "epoch": 0.061944990846968126, "grad_norm": 0.34516972303390503, "learning_rate": 0.000187649956459013, "loss": 1.4726, "step": 4767 }, { "epoch": 0.061957985390884006, "grad_norm": 0.3514779806137085, "learning_rate": 0.0001876473569971016, "loss": 1.3543, "step": 4768 }, { "epoch": 0.06197097993479988, "grad_norm": 0.3431565761566162, "learning_rate": 0.00018764475753519022, "loss": 1.4397, "step": 4769 }, { "epoch": 0.06198397447871575, "grad_norm": 0.29300588369369507, "learning_rate": 0.00018764215807327885, "loss": 1.4131, "step": 4770 }, { "epoch": 0.061996969022631625, "grad_norm": 0.42205876111984253, "learning_rate": 0.00018763955861136747, "loss": 1.5009, "step": 4771 }, { "epoch": 0.0620099635665475, "grad_norm": 0.43114832043647766, "learning_rate": 0.00018763695914945607, "loss": 1.4078, "step": 4772 }, { "epoch": 0.06202295811046337, "grad_norm": 0.3251851201057434, "learning_rate": 0.00018763435968754467, "loss": 1.3527, "step": 4773 }, { "epoch": 0.062035952654379244, "grad_norm": 0.4490157663822174, "learning_rate": 0.00018763176022563332, "loss": 1.4597, "step": 4774 }, { "epoch": 0.06204894719829512, "grad_norm": 0.36808133125305176, "learning_rate": 0.00018762916076372192, "loss": 1.4885, "step": 4775 }, { "epoch": 0.06206194174221099, "grad_norm": 0.4256531298160553, "learning_rate": 0.00018762656130181054, "loss": 1.4016, "step": 4776 }, { "epoch": 0.06207493628612686, "grad_norm": 0.3940003216266632, "learning_rate": 0.00018762396183989914, "loss": 1.5645, "step": 4777 }, { "epoch": 0.062087930830042735, "grad_norm": 0.4098092019557953, "learning_rate": 0.00018762136237798776, "loss": 1.3158, "step": 4778 }, { "epoch": 0.06210092537395861, "grad_norm": 0.4216630458831787, "learning_rate": 0.0001876187629160764, "loss": 1.5037, "step": 4779 }, { "epoch": 0.06211391991787448, "grad_norm": 0.2802329659461975, "learning_rate": 0.00018761616345416499, "loss": 1.2852, "step": 4780 }, { "epoch": 0.062126914461790354, "grad_norm": 0.4184139668941498, "learning_rate": 0.0001876135639922536, "loss": 1.4164, "step": 4781 }, { "epoch": 0.06213990900570623, "grad_norm": 0.4360269606113434, "learning_rate": 0.00018761096453034223, "loss": 1.6717, "step": 4782 }, { "epoch": 0.0621529035496221, "grad_norm": 0.37473347783088684, "learning_rate": 0.00018760836506843086, "loss": 1.3978, "step": 4783 }, { "epoch": 0.06216589809353797, "grad_norm": 0.5111908912658691, "learning_rate": 0.00018760576560651946, "loss": 1.4218, "step": 4784 }, { "epoch": 0.062178892637453846, "grad_norm": 0.5529409646987915, "learning_rate": 0.00018760316614460805, "loss": 1.5151, "step": 4785 }, { "epoch": 0.06219188718136972, "grad_norm": 0.42840296030044556, "learning_rate": 0.0001876005666826967, "loss": 1.5215, "step": 4786 }, { "epoch": 0.0622048817252856, "grad_norm": 0.39297595620155334, "learning_rate": 0.0001875979672207853, "loss": 1.5035, "step": 4787 }, { "epoch": 0.06221787626920147, "grad_norm": 0.3373584449291229, "learning_rate": 0.00018759536775887393, "loss": 1.38, "step": 4788 }, { "epoch": 0.062230870813117345, "grad_norm": 0.36476561427116394, "learning_rate": 0.00018759276829696252, "loss": 1.363, "step": 4789 }, { "epoch": 0.06224386535703322, "grad_norm": 0.3945012092590332, "learning_rate": 0.00018759016883505115, "loss": 1.4091, "step": 4790 }, { "epoch": 0.06225685990094909, "grad_norm": 0.3533738851547241, "learning_rate": 0.00018758756937313977, "loss": 1.1834, "step": 4791 }, { "epoch": 0.062269854444864964, "grad_norm": 0.3350159227848053, "learning_rate": 0.00018758496991122837, "loss": 1.2534, "step": 4792 }, { "epoch": 0.06228284898878084, "grad_norm": 0.38589489459991455, "learning_rate": 0.000187582370449317, "loss": 1.3611, "step": 4793 }, { "epoch": 0.06229584353269671, "grad_norm": 0.4569569230079651, "learning_rate": 0.00018757977098740562, "loss": 1.4845, "step": 4794 }, { "epoch": 0.06230883807661258, "grad_norm": 0.38989877700805664, "learning_rate": 0.00018757717152549424, "loss": 1.2864, "step": 4795 }, { "epoch": 0.062321832620528456, "grad_norm": 0.3836038410663605, "learning_rate": 0.00018757457206358284, "loss": 1.4262, "step": 4796 }, { "epoch": 0.06233482716444433, "grad_norm": 0.3546046018600464, "learning_rate": 0.00018757197260167147, "loss": 1.4234, "step": 4797 }, { "epoch": 0.0623478217083602, "grad_norm": 0.3120918273925781, "learning_rate": 0.0001875693731397601, "loss": 1.4014, "step": 4798 }, { "epoch": 0.062360816252276075, "grad_norm": 0.5139092206954956, "learning_rate": 0.0001875667736778487, "loss": 1.6836, "step": 4799 }, { "epoch": 0.06237381079619195, "grad_norm": 0.3254159986972809, "learning_rate": 0.0001875641742159373, "loss": 1.3511, "step": 4800 }, { "epoch": 0.06238680534010782, "grad_norm": 0.3734841048717499, "learning_rate": 0.0001875615747540259, "loss": 1.5588, "step": 4801 }, { "epoch": 0.06239979988402369, "grad_norm": 0.38808658719062805, "learning_rate": 0.00018755897529211453, "loss": 1.5629, "step": 4802 }, { "epoch": 0.062412794427939566, "grad_norm": 0.48189741373062134, "learning_rate": 0.00018755637583020316, "loss": 1.4998, "step": 4803 }, { "epoch": 0.06242578897185544, "grad_norm": 0.4589759409427643, "learning_rate": 0.00018755377636829176, "loss": 1.5244, "step": 4804 }, { "epoch": 0.06243878351577131, "grad_norm": 0.38827449083328247, "learning_rate": 0.0001875511769063804, "loss": 1.5605, "step": 4805 }, { "epoch": 0.06245177805968719, "grad_norm": 0.35549396276474, "learning_rate": 0.000187548577444469, "loss": 1.4121, "step": 4806 }, { "epoch": 0.062464772603603065, "grad_norm": 0.36884772777557373, "learning_rate": 0.00018754597798255763, "loss": 1.4399, "step": 4807 }, { "epoch": 0.06247776714751894, "grad_norm": 0.41779768466949463, "learning_rate": 0.00018754337852064623, "loss": 1.6428, "step": 4808 }, { "epoch": 0.06249076169143481, "grad_norm": 0.345687597990036, "learning_rate": 0.00018754077905873485, "loss": 1.3752, "step": 4809 }, { "epoch": 0.06250375623535068, "grad_norm": 0.29694631695747375, "learning_rate": 0.00018753817959682348, "loss": 1.4213, "step": 4810 }, { "epoch": 0.06251675077926655, "grad_norm": 0.41125616431236267, "learning_rate": 0.00018753558013491207, "loss": 1.3525, "step": 4811 }, { "epoch": 0.06252974532318242, "grad_norm": 0.4661087393760681, "learning_rate": 0.0001875329806730007, "loss": 1.6321, "step": 4812 }, { "epoch": 0.0625427398670983, "grad_norm": 0.40610751509666443, "learning_rate": 0.00018753038121108932, "loss": 1.4283, "step": 4813 }, { "epoch": 0.06255573441101417, "grad_norm": 0.3044849634170532, "learning_rate": 0.00018752778174917795, "loss": 1.3231, "step": 4814 }, { "epoch": 0.06256872895493004, "grad_norm": 0.44375497102737427, "learning_rate": 0.00018752518228726654, "loss": 1.5793, "step": 4815 }, { "epoch": 0.06258172349884593, "grad_norm": 0.463663250207901, "learning_rate": 0.00018752258282535514, "loss": 1.5444, "step": 4816 }, { "epoch": 0.0625947180427618, "grad_norm": 0.34532085061073303, "learning_rate": 0.0001875199833634438, "loss": 1.4096, "step": 4817 }, { "epoch": 0.06260771258667767, "grad_norm": 0.4654233753681183, "learning_rate": 0.0001875173839015324, "loss": 1.7286, "step": 4818 }, { "epoch": 0.06262070713059355, "grad_norm": 0.4802190065383911, "learning_rate": 0.00018751478443962101, "loss": 1.4878, "step": 4819 }, { "epoch": 0.06263370167450942, "grad_norm": 0.3427649438381195, "learning_rate": 0.0001875121849777096, "loss": 1.6239, "step": 4820 }, { "epoch": 0.0626466962184253, "grad_norm": 0.31176993250846863, "learning_rate": 0.00018750958551579824, "loss": 1.4806, "step": 4821 }, { "epoch": 0.06265969076234117, "grad_norm": 0.43891656398773193, "learning_rate": 0.00018750698605388686, "loss": 1.4473, "step": 4822 }, { "epoch": 0.06267268530625704, "grad_norm": 0.3852303624153137, "learning_rate": 0.00018750438659197546, "loss": 1.2812, "step": 4823 }, { "epoch": 0.06268567985017291, "grad_norm": 0.3743630349636078, "learning_rate": 0.00018750178713006408, "loss": 1.3848, "step": 4824 }, { "epoch": 0.06269867439408879, "grad_norm": 0.3716904819011688, "learning_rate": 0.0001874991876681527, "loss": 1.3388, "step": 4825 }, { "epoch": 0.06271166893800466, "grad_norm": 0.44633615016937256, "learning_rate": 0.00018749658820624133, "loss": 1.5431, "step": 4826 }, { "epoch": 0.06272466348192053, "grad_norm": 0.36774638295173645, "learning_rate": 0.00018749398874432993, "loss": 1.247, "step": 4827 }, { "epoch": 0.0627376580258364, "grad_norm": 0.3416709303855896, "learning_rate": 0.00018749138928241853, "loss": 1.4417, "step": 4828 }, { "epoch": 0.06275065256975228, "grad_norm": 0.39817753434181213, "learning_rate": 0.00018748878982050718, "loss": 1.4612, "step": 4829 }, { "epoch": 0.06276364711366815, "grad_norm": 0.4371894299983978, "learning_rate": 0.00018748619035859578, "loss": 1.3331, "step": 4830 }, { "epoch": 0.06277664165758402, "grad_norm": 0.3970278203487396, "learning_rate": 0.0001874835908966844, "loss": 1.3768, "step": 4831 }, { "epoch": 0.0627896362014999, "grad_norm": 0.4924564063549042, "learning_rate": 0.000187480991434773, "loss": 1.4808, "step": 4832 }, { "epoch": 0.06280263074541577, "grad_norm": 0.4114846885204315, "learning_rate": 0.00018747839197286162, "loss": 1.479, "step": 4833 }, { "epoch": 0.06281562528933164, "grad_norm": 0.4020672142505646, "learning_rate": 0.00018747579251095025, "loss": 1.6144, "step": 4834 }, { "epoch": 0.06282861983324751, "grad_norm": 0.34900376200675964, "learning_rate": 0.00018747319304903884, "loss": 1.2328, "step": 4835 }, { "epoch": 0.06284161437716339, "grad_norm": 0.3972448706626892, "learning_rate": 0.00018747059358712747, "loss": 1.3904, "step": 4836 }, { "epoch": 0.06285460892107926, "grad_norm": 0.49019351601600647, "learning_rate": 0.0001874679941252161, "loss": 1.7153, "step": 4837 }, { "epoch": 0.06286760346499513, "grad_norm": 0.2971283793449402, "learning_rate": 0.00018746539466330472, "loss": 1.4029, "step": 4838 }, { "epoch": 0.062880598008911, "grad_norm": 0.2929637134075165, "learning_rate": 0.00018746279520139331, "loss": 1.4402, "step": 4839 }, { "epoch": 0.06289359255282688, "grad_norm": 0.34337177872657776, "learning_rate": 0.0001874601957394819, "loss": 1.3277, "step": 4840 }, { "epoch": 0.06290658709674275, "grad_norm": 0.49686381220817566, "learning_rate": 0.00018745759627757056, "loss": 1.3806, "step": 4841 }, { "epoch": 0.06291958164065863, "grad_norm": 0.4317689538002014, "learning_rate": 0.00018745499681565916, "loss": 1.4662, "step": 4842 }, { "epoch": 0.0629325761845745, "grad_norm": 0.35114434361457825, "learning_rate": 0.00018745239735374779, "loss": 1.6053, "step": 4843 }, { "epoch": 0.06294557072849037, "grad_norm": 0.46290478110313416, "learning_rate": 0.0001874497978918364, "loss": 1.6538, "step": 4844 }, { "epoch": 0.06295856527240624, "grad_norm": 0.38073739409446716, "learning_rate": 0.000187447198429925, "loss": 1.5796, "step": 4845 }, { "epoch": 0.06297155981632212, "grad_norm": 0.3745887875556946, "learning_rate": 0.00018744459896801363, "loss": 1.3767, "step": 4846 }, { "epoch": 0.06298455436023799, "grad_norm": 0.35628604888916016, "learning_rate": 0.00018744199950610223, "loss": 1.5253, "step": 4847 }, { "epoch": 0.06299754890415386, "grad_norm": 0.39481082558631897, "learning_rate": 0.00018743940004419088, "loss": 1.5285, "step": 4848 }, { "epoch": 0.06301054344806974, "grad_norm": 0.40641120076179504, "learning_rate": 0.00018743680058227948, "loss": 1.6706, "step": 4849 }, { "epoch": 0.06302353799198561, "grad_norm": 0.41250911355018616, "learning_rate": 0.0001874342011203681, "loss": 1.4065, "step": 4850 }, { "epoch": 0.06303653253590148, "grad_norm": 0.465817928314209, "learning_rate": 0.0001874316016584567, "loss": 1.4455, "step": 4851 }, { "epoch": 0.06304952707981736, "grad_norm": 0.31983256340026855, "learning_rate": 0.00018742900219654532, "loss": 1.4688, "step": 4852 }, { "epoch": 0.06306252162373323, "grad_norm": 0.3200644254684448, "learning_rate": 0.00018742640273463395, "loss": 1.2597, "step": 4853 }, { "epoch": 0.06307551616764911, "grad_norm": 0.38094374537467957, "learning_rate": 0.00018742380327272255, "loss": 1.3426, "step": 4854 }, { "epoch": 0.06308851071156499, "grad_norm": 0.3403262495994568, "learning_rate": 0.00018742120381081117, "loss": 1.5393, "step": 4855 }, { "epoch": 0.06310150525548086, "grad_norm": 0.318154901266098, "learning_rate": 0.0001874186043488998, "loss": 1.403, "step": 4856 }, { "epoch": 0.06311449979939673, "grad_norm": 0.4239601790904999, "learning_rate": 0.0001874160048869884, "loss": 1.6201, "step": 4857 }, { "epoch": 0.0631274943433126, "grad_norm": 0.3950299322605133, "learning_rate": 0.00018741340542507702, "loss": 1.3967, "step": 4858 }, { "epoch": 0.06314048888722848, "grad_norm": 0.35630932450294495, "learning_rate": 0.00018741080596316561, "loss": 1.4808, "step": 4859 }, { "epoch": 0.06315348343114435, "grad_norm": 0.5480958223342896, "learning_rate": 0.00018740820650125427, "loss": 1.5293, "step": 4860 }, { "epoch": 0.06316647797506023, "grad_norm": 0.4242057204246521, "learning_rate": 0.00018740560703934286, "loss": 1.356, "step": 4861 }, { "epoch": 0.0631794725189761, "grad_norm": 0.4509803056716919, "learning_rate": 0.0001874030075774315, "loss": 1.4957, "step": 4862 }, { "epoch": 0.06319246706289197, "grad_norm": 0.3295024037361145, "learning_rate": 0.00018740040811552009, "loss": 1.2792, "step": 4863 }, { "epoch": 0.06320546160680784, "grad_norm": 0.3918381333351135, "learning_rate": 0.0001873978086536087, "loss": 1.4447, "step": 4864 }, { "epoch": 0.06321845615072372, "grad_norm": 0.383027046918869, "learning_rate": 0.00018739520919169733, "loss": 1.5594, "step": 4865 }, { "epoch": 0.06323145069463959, "grad_norm": 0.36572399735450745, "learning_rate": 0.00018739260972978593, "loss": 1.3749, "step": 4866 }, { "epoch": 0.06324444523855546, "grad_norm": 0.3963125944137573, "learning_rate": 0.00018739001026787456, "loss": 1.4538, "step": 4867 }, { "epoch": 0.06325743978247134, "grad_norm": 0.3682536482810974, "learning_rate": 0.00018738741080596318, "loss": 1.4723, "step": 4868 }, { "epoch": 0.06327043432638721, "grad_norm": 0.406673789024353, "learning_rate": 0.00018738481134405178, "loss": 1.3904, "step": 4869 }, { "epoch": 0.06328342887030308, "grad_norm": 0.41151633858680725, "learning_rate": 0.0001873822118821404, "loss": 1.5182, "step": 4870 }, { "epoch": 0.06329642341421896, "grad_norm": 0.405517041683197, "learning_rate": 0.000187379612420229, "loss": 1.5509, "step": 4871 }, { "epoch": 0.06330941795813483, "grad_norm": 0.4210527539253235, "learning_rate": 0.00018737701295831765, "loss": 1.5795, "step": 4872 }, { "epoch": 0.0633224125020507, "grad_norm": 0.413746178150177, "learning_rate": 0.00018737441349640625, "loss": 1.4749, "step": 4873 }, { "epoch": 0.06333540704596657, "grad_norm": 0.40400955080986023, "learning_rate": 0.00018737181403449487, "loss": 1.3607, "step": 4874 }, { "epoch": 0.06334840158988245, "grad_norm": 0.46322938799858093, "learning_rate": 0.00018736921457258347, "loss": 1.3933, "step": 4875 }, { "epoch": 0.06336139613379832, "grad_norm": 0.4640101194381714, "learning_rate": 0.0001873666151106721, "loss": 1.4439, "step": 4876 }, { "epoch": 0.06337439067771419, "grad_norm": 0.40370509028434753, "learning_rate": 0.00018736401564876072, "loss": 1.474, "step": 4877 }, { "epoch": 0.06338738522163007, "grad_norm": 0.4165574908256531, "learning_rate": 0.00018736141618684932, "loss": 1.5289, "step": 4878 }, { "epoch": 0.06340037976554594, "grad_norm": 0.4392988085746765, "learning_rate": 0.00018735881672493797, "loss": 1.3942, "step": 4879 }, { "epoch": 0.06341337430946181, "grad_norm": 0.33102694153785706, "learning_rate": 0.00018735621726302657, "loss": 1.4154, "step": 4880 }, { "epoch": 0.06342636885337768, "grad_norm": 0.3873150646686554, "learning_rate": 0.0001873536178011152, "loss": 1.5786, "step": 4881 }, { "epoch": 0.06343936339729356, "grad_norm": 0.32687070965766907, "learning_rate": 0.0001873510183392038, "loss": 1.3004, "step": 4882 }, { "epoch": 0.06345235794120943, "grad_norm": 0.3952428698539734, "learning_rate": 0.0001873484188772924, "loss": 1.3218, "step": 4883 }, { "epoch": 0.0634653524851253, "grad_norm": 0.2773917019367218, "learning_rate": 0.00018734581941538104, "loss": 1.3843, "step": 4884 }, { "epoch": 0.06347834702904118, "grad_norm": 0.3833611011505127, "learning_rate": 0.00018734321995346963, "loss": 1.5324, "step": 4885 }, { "epoch": 0.06349134157295705, "grad_norm": 0.29361867904663086, "learning_rate": 0.00018734062049155826, "loss": 1.1574, "step": 4886 }, { "epoch": 0.06350433611687292, "grad_norm": 0.38995978236198425, "learning_rate": 0.00018733802102964688, "loss": 1.4576, "step": 4887 }, { "epoch": 0.0635173306607888, "grad_norm": 0.3644857108592987, "learning_rate": 0.00018733542156773548, "loss": 1.5604, "step": 4888 }, { "epoch": 0.06353032520470467, "grad_norm": 0.2922137677669525, "learning_rate": 0.0001873328221058241, "loss": 1.2234, "step": 4889 }, { "epoch": 0.06354331974862054, "grad_norm": 0.3658216595649719, "learning_rate": 0.0001873302226439127, "loss": 1.4905, "step": 4890 }, { "epoch": 0.06355631429253641, "grad_norm": 0.3737642765045166, "learning_rate": 0.00018732762318200135, "loss": 1.6124, "step": 4891 }, { "epoch": 0.0635693088364523, "grad_norm": 0.33959582448005676, "learning_rate": 0.00018732502372008995, "loss": 1.5312, "step": 4892 }, { "epoch": 0.06358230338036817, "grad_norm": 0.3634147346019745, "learning_rate": 0.00018732242425817858, "loss": 1.2265, "step": 4893 }, { "epoch": 0.06359529792428405, "grad_norm": 0.38192006945610046, "learning_rate": 0.00018731982479626717, "loss": 1.158, "step": 4894 }, { "epoch": 0.06360829246819992, "grad_norm": 0.384304940700531, "learning_rate": 0.0001873172253343558, "loss": 1.4773, "step": 4895 }, { "epoch": 0.0636212870121158, "grad_norm": 0.41935840249061584, "learning_rate": 0.00018731462587244442, "loss": 1.4632, "step": 4896 }, { "epoch": 0.06363428155603167, "grad_norm": 0.4345855116844177, "learning_rate": 0.00018731202641053302, "loss": 1.4467, "step": 4897 }, { "epoch": 0.06364727609994754, "grad_norm": 0.4804510772228241, "learning_rate": 0.00018730942694862164, "loss": 1.6504, "step": 4898 }, { "epoch": 0.06366027064386341, "grad_norm": 0.40293917059898376, "learning_rate": 0.00018730682748671027, "loss": 1.3154, "step": 4899 }, { "epoch": 0.06367326518777928, "grad_norm": 0.35224154591560364, "learning_rate": 0.00018730422802479887, "loss": 1.5255, "step": 4900 }, { "epoch": 0.06368625973169516, "grad_norm": 0.3401183784008026, "learning_rate": 0.0001873016285628875, "loss": 1.399, "step": 4901 }, { "epoch": 0.06369925427561103, "grad_norm": 0.3785831928253174, "learning_rate": 0.0001872990291009761, "loss": 1.3195, "step": 4902 }, { "epoch": 0.0637122488195269, "grad_norm": 0.2865942418575287, "learning_rate": 0.00018729642963906474, "loss": 1.4083, "step": 4903 }, { "epoch": 0.06372524336344278, "grad_norm": 0.3530195653438568, "learning_rate": 0.00018729383017715334, "loss": 1.4825, "step": 4904 }, { "epoch": 0.06373823790735865, "grad_norm": 0.2818562984466553, "learning_rate": 0.00018729123071524196, "loss": 1.4423, "step": 4905 }, { "epoch": 0.06375123245127452, "grad_norm": 0.36763685941696167, "learning_rate": 0.00018728863125333056, "loss": 1.3577, "step": 4906 }, { "epoch": 0.0637642269951904, "grad_norm": 0.4166032373905182, "learning_rate": 0.00018728603179141918, "loss": 1.5294, "step": 4907 }, { "epoch": 0.06377722153910627, "grad_norm": 0.32174357771873474, "learning_rate": 0.0001872834323295078, "loss": 1.3242, "step": 4908 }, { "epoch": 0.06379021608302214, "grad_norm": 0.37954533100128174, "learning_rate": 0.0001872808328675964, "loss": 1.525, "step": 4909 }, { "epoch": 0.06380321062693801, "grad_norm": 0.41271960735321045, "learning_rate": 0.00018727823340568503, "loss": 1.2738, "step": 4910 }, { "epoch": 0.06381620517085389, "grad_norm": 0.2798667252063751, "learning_rate": 0.00018727563394377365, "loss": 1.4685, "step": 4911 }, { "epoch": 0.06382919971476976, "grad_norm": 0.3457575738430023, "learning_rate": 0.00018727303448186225, "loss": 1.5371, "step": 4912 }, { "epoch": 0.06384219425868563, "grad_norm": 0.32217657566070557, "learning_rate": 0.00018727043501995088, "loss": 1.3464, "step": 4913 }, { "epoch": 0.0638551888026015, "grad_norm": 0.3394484221935272, "learning_rate": 0.00018726783555803947, "loss": 1.4517, "step": 4914 }, { "epoch": 0.06386818334651738, "grad_norm": 0.3870427906513214, "learning_rate": 0.00018726523609612813, "loss": 1.4706, "step": 4915 }, { "epoch": 0.06388117789043325, "grad_norm": 0.4184185564517975, "learning_rate": 0.00018726263663421672, "loss": 1.3265, "step": 4916 }, { "epoch": 0.06389417243434913, "grad_norm": 0.33759355545043945, "learning_rate": 0.00018726003717230535, "loss": 1.3971, "step": 4917 }, { "epoch": 0.063907166978265, "grad_norm": 0.42973077297210693, "learning_rate": 0.00018725743771039397, "loss": 1.5604, "step": 4918 }, { "epoch": 0.06392016152218087, "grad_norm": 0.45261356234550476, "learning_rate": 0.00018725483824848257, "loss": 1.5697, "step": 4919 }, { "epoch": 0.06393315606609674, "grad_norm": 0.40429526567459106, "learning_rate": 0.0001872522387865712, "loss": 1.3049, "step": 4920 }, { "epoch": 0.06394615061001262, "grad_norm": 0.3268931210041046, "learning_rate": 0.0001872496393246598, "loss": 1.5144, "step": 4921 }, { "epoch": 0.06395914515392849, "grad_norm": 0.462516725063324, "learning_rate": 0.00018724703986274844, "loss": 1.6261, "step": 4922 }, { "epoch": 0.06397213969784436, "grad_norm": 0.40660592913627625, "learning_rate": 0.00018724444040083704, "loss": 1.5333, "step": 4923 }, { "epoch": 0.06398513424176024, "grad_norm": 0.43708881735801697, "learning_rate": 0.00018724184093892564, "loss": 1.5775, "step": 4924 }, { "epoch": 0.06399812878567611, "grad_norm": 0.46050506830215454, "learning_rate": 0.00018723924147701426, "loss": 1.4223, "step": 4925 }, { "epoch": 0.06401112332959198, "grad_norm": 0.3936122953891754, "learning_rate": 0.00018723664201510289, "loss": 1.4894, "step": 4926 }, { "epoch": 0.06402411787350785, "grad_norm": 0.32210901379585266, "learning_rate": 0.0001872340425531915, "loss": 1.3553, "step": 4927 }, { "epoch": 0.06403711241742373, "grad_norm": 0.29751095175743103, "learning_rate": 0.0001872314430912801, "loss": 1.291, "step": 4928 }, { "epoch": 0.0640501069613396, "grad_norm": 0.3196150064468384, "learning_rate": 0.00018722884362936873, "loss": 1.3245, "step": 4929 }, { "epoch": 0.06406310150525549, "grad_norm": 0.3867841064929962, "learning_rate": 0.00018722624416745736, "loss": 1.3225, "step": 4930 }, { "epoch": 0.06407609604917136, "grad_norm": 0.5138772130012512, "learning_rate": 0.00018722364470554595, "loss": 1.4339, "step": 4931 }, { "epoch": 0.06408909059308723, "grad_norm": 0.41582852602005005, "learning_rate": 0.00018722104524363458, "loss": 1.4092, "step": 4932 }, { "epoch": 0.0641020851370031, "grad_norm": 0.4686858057975769, "learning_rate": 0.00018721844578172318, "loss": 1.4387, "step": 4933 }, { "epoch": 0.06411507968091898, "grad_norm": 0.44573119282722473, "learning_rate": 0.00018721584631981183, "loss": 1.7254, "step": 4934 }, { "epoch": 0.06412807422483485, "grad_norm": 0.3969566524028778, "learning_rate": 0.00018721324685790043, "loss": 1.4787, "step": 4935 }, { "epoch": 0.06414106876875073, "grad_norm": 0.3597899377346039, "learning_rate": 0.00018721064739598905, "loss": 1.3418, "step": 4936 }, { "epoch": 0.0641540633126666, "grad_norm": 0.3663119673728943, "learning_rate": 0.00018720804793407765, "loss": 1.4231, "step": 4937 }, { "epoch": 0.06416705785658247, "grad_norm": 0.35539206862449646, "learning_rate": 0.00018720544847216627, "loss": 1.4016, "step": 4938 }, { "epoch": 0.06418005240049834, "grad_norm": 0.3831118643283844, "learning_rate": 0.0001872028490102549, "loss": 1.5359, "step": 4939 }, { "epoch": 0.06419304694441422, "grad_norm": 0.3788885772228241, "learning_rate": 0.0001872002495483435, "loss": 1.4351, "step": 4940 }, { "epoch": 0.06420604148833009, "grad_norm": 0.3635537922382355, "learning_rate": 0.00018719765008643212, "loss": 1.5072, "step": 4941 }, { "epoch": 0.06421903603224596, "grad_norm": 0.4678174555301666, "learning_rate": 0.00018719505062452074, "loss": 1.5083, "step": 4942 }, { "epoch": 0.06423203057616184, "grad_norm": 0.3268563747406006, "learning_rate": 0.00018719245116260934, "loss": 1.5077, "step": 4943 }, { "epoch": 0.06424502512007771, "grad_norm": 0.315374493598938, "learning_rate": 0.00018718985170069796, "loss": 1.4007, "step": 4944 }, { "epoch": 0.06425801966399358, "grad_norm": 0.4014391601085663, "learning_rate": 0.00018718725223878656, "loss": 1.678, "step": 4945 }, { "epoch": 0.06427101420790945, "grad_norm": 0.36102932691574097, "learning_rate": 0.0001871846527768752, "loss": 1.3457, "step": 4946 }, { "epoch": 0.06428400875182533, "grad_norm": 0.4070459008216858, "learning_rate": 0.0001871820533149638, "loss": 1.4825, "step": 4947 }, { "epoch": 0.0642970032957412, "grad_norm": 0.42442283034324646, "learning_rate": 0.00018717945385305243, "loss": 1.5607, "step": 4948 }, { "epoch": 0.06430999783965707, "grad_norm": 0.33655229210853577, "learning_rate": 0.00018717685439114103, "loss": 1.5214, "step": 4949 }, { "epoch": 0.06432299238357295, "grad_norm": 0.40103280544281006, "learning_rate": 0.00018717425492922966, "loss": 1.6607, "step": 4950 }, { "epoch": 0.06433598692748882, "grad_norm": 0.33195415139198303, "learning_rate": 0.00018717165546731828, "loss": 1.5514, "step": 4951 }, { "epoch": 0.06434898147140469, "grad_norm": 0.3654930591583252, "learning_rate": 0.00018716905600540688, "loss": 1.4176, "step": 4952 }, { "epoch": 0.06436197601532057, "grad_norm": 0.3469924032688141, "learning_rate": 0.0001871664565434955, "loss": 1.336, "step": 4953 }, { "epoch": 0.06437497055923644, "grad_norm": 0.36465591192245483, "learning_rate": 0.00018716385708158413, "loss": 1.4397, "step": 4954 }, { "epoch": 0.06438796510315231, "grad_norm": 0.5298256874084473, "learning_rate": 0.00018716125761967272, "loss": 1.388, "step": 4955 }, { "epoch": 0.06440095964706818, "grad_norm": 0.3695477843284607, "learning_rate": 0.00018715865815776135, "loss": 1.4277, "step": 4956 }, { "epoch": 0.06441395419098406, "grad_norm": 0.39149484038352966, "learning_rate": 0.00018715605869584997, "loss": 1.4951, "step": 4957 }, { "epoch": 0.06442694873489993, "grad_norm": 0.4806424677371979, "learning_rate": 0.0001871534592339386, "loss": 1.3813, "step": 4958 }, { "epoch": 0.0644399432788158, "grad_norm": 0.4368738830089569, "learning_rate": 0.0001871508597720272, "loss": 1.4217, "step": 4959 }, { "epoch": 0.06445293782273168, "grad_norm": 0.4076714813709259, "learning_rate": 0.00018714826031011582, "loss": 1.4966, "step": 4960 }, { "epoch": 0.06446593236664755, "grad_norm": 0.35776248574256897, "learning_rate": 0.00018714566084820444, "loss": 1.3683, "step": 4961 }, { "epoch": 0.06447892691056342, "grad_norm": 0.5128650665283203, "learning_rate": 0.00018714306138629304, "loss": 1.4594, "step": 4962 }, { "epoch": 0.0644919214544793, "grad_norm": 0.47895219922065735, "learning_rate": 0.00018714046192438167, "loss": 1.4747, "step": 4963 }, { "epoch": 0.06450491599839517, "grad_norm": 0.4154643416404724, "learning_rate": 0.00018713786246247026, "loss": 1.421, "step": 4964 }, { "epoch": 0.06451791054231104, "grad_norm": 0.40340059995651245, "learning_rate": 0.00018713526300055892, "loss": 1.582, "step": 4965 }, { "epoch": 0.06453090508622691, "grad_norm": 0.39383891224861145, "learning_rate": 0.0001871326635386475, "loss": 1.4579, "step": 4966 }, { "epoch": 0.06454389963014279, "grad_norm": 0.3662666082382202, "learning_rate": 0.0001871300640767361, "loss": 1.2288, "step": 4967 }, { "epoch": 0.06455689417405867, "grad_norm": 0.5457305908203125, "learning_rate": 0.00018712746461482473, "loss": 1.4215, "step": 4968 }, { "epoch": 0.06456988871797455, "grad_norm": 0.3704676628112793, "learning_rate": 0.00018712486515291336, "loss": 1.4879, "step": 4969 }, { "epoch": 0.06458288326189042, "grad_norm": 0.40221697092056274, "learning_rate": 0.00018712226569100198, "loss": 1.4518, "step": 4970 }, { "epoch": 0.06459587780580629, "grad_norm": 0.3336734473705292, "learning_rate": 0.00018711966622909058, "loss": 1.2597, "step": 4971 }, { "epoch": 0.06460887234972217, "grad_norm": 0.403178334236145, "learning_rate": 0.0001871170667671792, "loss": 1.5475, "step": 4972 }, { "epoch": 0.06462186689363804, "grad_norm": 0.34967759251594543, "learning_rate": 0.00018711446730526783, "loss": 1.3389, "step": 4973 }, { "epoch": 0.06463486143755391, "grad_norm": 0.44066518545150757, "learning_rate": 0.00018711186784335643, "loss": 1.425, "step": 4974 }, { "epoch": 0.06464785598146978, "grad_norm": 0.3541397452354431, "learning_rate": 0.00018710926838144505, "loss": 1.3289, "step": 4975 }, { "epoch": 0.06466085052538566, "grad_norm": 0.2975107729434967, "learning_rate": 0.00018710666891953365, "loss": 1.3044, "step": 4976 }, { "epoch": 0.06467384506930153, "grad_norm": 0.4533711373806, "learning_rate": 0.0001871040694576223, "loss": 1.612, "step": 4977 }, { "epoch": 0.0646868396132174, "grad_norm": 0.34213998913764954, "learning_rate": 0.0001871014699957109, "loss": 1.4785, "step": 4978 }, { "epoch": 0.06469983415713328, "grad_norm": 0.41275545954704285, "learning_rate": 0.0001870988705337995, "loss": 1.4921, "step": 4979 }, { "epoch": 0.06471282870104915, "grad_norm": 0.2710539400577545, "learning_rate": 0.00018709627107188812, "loss": 1.5003, "step": 4980 }, { "epoch": 0.06472582324496502, "grad_norm": 0.31130334734916687, "learning_rate": 0.00018709367160997674, "loss": 1.3272, "step": 4981 }, { "epoch": 0.0647388177888809, "grad_norm": 0.4436469078063965, "learning_rate": 0.00018709107214806537, "loss": 1.5013, "step": 4982 }, { "epoch": 0.06475181233279677, "grad_norm": 0.42660483717918396, "learning_rate": 0.00018708847268615397, "loss": 1.3936, "step": 4983 }, { "epoch": 0.06476480687671264, "grad_norm": 0.397128164768219, "learning_rate": 0.0001870858732242426, "loss": 1.5172, "step": 4984 }, { "epoch": 0.06477780142062851, "grad_norm": 0.3946099579334259, "learning_rate": 0.00018708327376233122, "loss": 1.5568, "step": 4985 }, { "epoch": 0.06479079596454439, "grad_norm": 0.43961185216903687, "learning_rate": 0.0001870806743004198, "loss": 1.4902, "step": 4986 }, { "epoch": 0.06480379050846026, "grad_norm": 0.4488104283809662, "learning_rate": 0.00018707807483850844, "loss": 1.5189, "step": 4987 }, { "epoch": 0.06481678505237613, "grad_norm": 0.34242379665374756, "learning_rate": 0.00018707547537659703, "loss": 1.47, "step": 4988 }, { "epoch": 0.064829779596292, "grad_norm": 0.38616856932640076, "learning_rate": 0.00018707287591468569, "loss": 1.4936, "step": 4989 }, { "epoch": 0.06484277414020788, "grad_norm": 0.4312630891799927, "learning_rate": 0.00018707027645277428, "loss": 1.3957, "step": 4990 }, { "epoch": 0.06485576868412375, "grad_norm": 0.3538651764392853, "learning_rate": 0.00018706767699086288, "loss": 1.3587, "step": 4991 }, { "epoch": 0.06486876322803962, "grad_norm": 0.32463616132736206, "learning_rate": 0.00018706507752895153, "loss": 1.3117, "step": 4992 }, { "epoch": 0.0648817577719555, "grad_norm": 0.38083845376968384, "learning_rate": 0.00018706247806704013, "loss": 1.6409, "step": 4993 }, { "epoch": 0.06489475231587137, "grad_norm": 0.395946741104126, "learning_rate": 0.00018705987860512875, "loss": 1.7357, "step": 4994 }, { "epoch": 0.06490774685978724, "grad_norm": 0.4680713415145874, "learning_rate": 0.00018705727914321735, "loss": 1.5717, "step": 4995 }, { "epoch": 0.06492074140370312, "grad_norm": 0.3958815932273865, "learning_rate": 0.00018705467968130598, "loss": 1.4041, "step": 4996 }, { "epoch": 0.06493373594761899, "grad_norm": 0.3951125741004944, "learning_rate": 0.0001870520802193946, "loss": 1.5398, "step": 4997 }, { "epoch": 0.06494673049153486, "grad_norm": 0.3468087613582611, "learning_rate": 0.0001870494807574832, "loss": 1.428, "step": 4998 }, { "epoch": 0.06495972503545074, "grad_norm": 0.439179927110672, "learning_rate": 0.00018704688129557182, "loss": 1.358, "step": 4999 }, { "epoch": 0.06497271957936661, "grad_norm": 0.31045451760292053, "learning_rate": 0.00018704428183366045, "loss": 1.3327, "step": 5000 }, { "epoch": 0.06498571412328248, "grad_norm": 0.4269821345806122, "learning_rate": 0.00018704168237174907, "loss": 1.5544, "step": 5001 }, { "epoch": 0.06499870866719835, "grad_norm": 0.2961575388908386, "learning_rate": 0.00018703908290983767, "loss": 1.4255, "step": 5002 }, { "epoch": 0.06501170321111423, "grad_norm": 0.3565014600753784, "learning_rate": 0.0001870364834479263, "loss": 1.3879, "step": 5003 }, { "epoch": 0.0650246977550301, "grad_norm": 0.41762495040893555, "learning_rate": 0.00018703388398601492, "loss": 1.475, "step": 5004 }, { "epoch": 0.06503769229894597, "grad_norm": 0.35624971985816956, "learning_rate": 0.00018703128452410352, "loss": 1.4048, "step": 5005 }, { "epoch": 0.06505068684286186, "grad_norm": 0.43210282921791077, "learning_rate": 0.00018702868506219214, "loss": 1.4183, "step": 5006 }, { "epoch": 0.06506368138677773, "grad_norm": 0.3515591621398926, "learning_rate": 0.00018702608560028074, "loss": 1.3787, "step": 5007 }, { "epoch": 0.0650766759306936, "grad_norm": 0.3989673852920532, "learning_rate": 0.00018702348613836936, "loss": 1.4394, "step": 5008 }, { "epoch": 0.06508967047460948, "grad_norm": 0.3967299163341522, "learning_rate": 0.00018702088667645799, "loss": 1.3999, "step": 5009 }, { "epoch": 0.06510266501852535, "grad_norm": 0.40923601388931274, "learning_rate": 0.00018701828721454658, "loss": 1.4759, "step": 5010 }, { "epoch": 0.06511565956244122, "grad_norm": 0.41464051604270935, "learning_rate": 0.0001870156877526352, "loss": 1.7134, "step": 5011 }, { "epoch": 0.0651286541063571, "grad_norm": 0.3874533772468567, "learning_rate": 0.00018701308829072383, "loss": 1.5878, "step": 5012 }, { "epoch": 0.06514164865027297, "grad_norm": 0.4026485085487366, "learning_rate": 0.00018701048882881246, "loss": 1.3231, "step": 5013 }, { "epoch": 0.06515464319418884, "grad_norm": 0.40182870626449585, "learning_rate": 0.00018700788936690105, "loss": 1.5892, "step": 5014 }, { "epoch": 0.06516763773810472, "grad_norm": 0.369368314743042, "learning_rate": 0.00018700528990498968, "loss": 1.2536, "step": 5015 }, { "epoch": 0.06518063228202059, "grad_norm": 0.2835204601287842, "learning_rate": 0.0001870026904430783, "loss": 1.4752, "step": 5016 }, { "epoch": 0.06519362682593646, "grad_norm": 0.4133825898170471, "learning_rate": 0.0001870000909811669, "loss": 1.3276, "step": 5017 }, { "epoch": 0.06520662136985234, "grad_norm": 0.5406321883201599, "learning_rate": 0.00018699749151925553, "loss": 1.3796, "step": 5018 }, { "epoch": 0.06521961591376821, "grad_norm": 0.4201764762401581, "learning_rate": 0.00018699489205734412, "loss": 1.4892, "step": 5019 }, { "epoch": 0.06523261045768408, "grad_norm": 0.43299543857574463, "learning_rate": 0.00018699229259543277, "loss": 1.4428, "step": 5020 }, { "epoch": 0.06524560500159995, "grad_norm": 0.4272761642932892, "learning_rate": 0.00018698969313352137, "loss": 1.6236, "step": 5021 }, { "epoch": 0.06525859954551583, "grad_norm": 0.2663511037826538, "learning_rate": 0.00018698709367160997, "loss": 1.3557, "step": 5022 }, { "epoch": 0.0652715940894317, "grad_norm": 0.45003563165664673, "learning_rate": 0.0001869844942096986, "loss": 1.6476, "step": 5023 }, { "epoch": 0.06528458863334757, "grad_norm": 0.38335496187210083, "learning_rate": 0.00018698189474778722, "loss": 1.4705, "step": 5024 }, { "epoch": 0.06529758317726345, "grad_norm": 0.43141335248947144, "learning_rate": 0.00018697929528587584, "loss": 1.5704, "step": 5025 }, { "epoch": 0.06531057772117932, "grad_norm": 0.4025283455848694, "learning_rate": 0.00018697669582396444, "loss": 1.4308, "step": 5026 }, { "epoch": 0.06532357226509519, "grad_norm": 0.3948580324649811, "learning_rate": 0.00018697409636205306, "loss": 1.6566, "step": 5027 }, { "epoch": 0.06533656680901107, "grad_norm": 0.45215436816215515, "learning_rate": 0.0001869714969001417, "loss": 1.6829, "step": 5028 }, { "epoch": 0.06534956135292694, "grad_norm": 0.38435620069503784, "learning_rate": 0.00018696889743823029, "loss": 1.3634, "step": 5029 }, { "epoch": 0.06536255589684281, "grad_norm": 0.38024795055389404, "learning_rate": 0.0001869662979763189, "loss": 1.5269, "step": 5030 }, { "epoch": 0.06537555044075868, "grad_norm": 0.37334343791007996, "learning_rate": 0.00018696369851440754, "loss": 1.2901, "step": 5031 }, { "epoch": 0.06538854498467456, "grad_norm": 0.3237695097923279, "learning_rate": 0.00018696109905249616, "loss": 1.523, "step": 5032 }, { "epoch": 0.06540153952859043, "grad_norm": 0.35873982310295105, "learning_rate": 0.00018695849959058476, "loss": 1.4954, "step": 5033 }, { "epoch": 0.0654145340725063, "grad_norm": 0.37050142884254456, "learning_rate": 0.00018695590012867335, "loss": 1.5798, "step": 5034 }, { "epoch": 0.06542752861642218, "grad_norm": 0.37431302666664124, "learning_rate": 0.000186953300666762, "loss": 1.3788, "step": 5035 }, { "epoch": 0.06544052316033805, "grad_norm": 0.37319809198379517, "learning_rate": 0.0001869507012048506, "loss": 1.4956, "step": 5036 }, { "epoch": 0.06545351770425392, "grad_norm": 0.3907186686992645, "learning_rate": 0.00018694810174293923, "loss": 1.3038, "step": 5037 }, { "epoch": 0.0654665122481698, "grad_norm": 0.3807026445865631, "learning_rate": 0.00018694550228102783, "loss": 1.5015, "step": 5038 }, { "epoch": 0.06547950679208567, "grad_norm": 0.37635213136672974, "learning_rate": 0.00018694290281911645, "loss": 1.3797, "step": 5039 }, { "epoch": 0.06549250133600154, "grad_norm": 0.4360508322715759, "learning_rate": 0.00018694030335720507, "loss": 1.5969, "step": 5040 }, { "epoch": 0.06550549587991741, "grad_norm": 0.3133851885795593, "learning_rate": 0.00018693770389529367, "loss": 1.4129, "step": 5041 }, { "epoch": 0.06551849042383329, "grad_norm": 0.32416558265686035, "learning_rate": 0.0001869351044333823, "loss": 1.4297, "step": 5042 }, { "epoch": 0.06553148496774916, "grad_norm": 0.42722848057746887, "learning_rate": 0.00018693250497147092, "loss": 1.6206, "step": 5043 }, { "epoch": 0.06554447951166505, "grad_norm": 0.4987455904483795, "learning_rate": 0.00018692990550955955, "loss": 1.4992, "step": 5044 }, { "epoch": 0.06555747405558092, "grad_norm": 0.43340280652046204, "learning_rate": 0.00018692730604764814, "loss": 1.6011, "step": 5045 }, { "epoch": 0.06557046859949679, "grad_norm": 0.4113078713417053, "learning_rate": 0.00018692470658573674, "loss": 1.5793, "step": 5046 }, { "epoch": 0.06558346314341267, "grad_norm": 0.4991094470024109, "learning_rate": 0.0001869221071238254, "loss": 1.5646, "step": 5047 }, { "epoch": 0.06559645768732854, "grad_norm": 0.48501214385032654, "learning_rate": 0.000186919507661914, "loss": 1.3584, "step": 5048 }, { "epoch": 0.06560945223124441, "grad_norm": 0.3096579611301422, "learning_rate": 0.0001869169082000026, "loss": 1.4875, "step": 5049 }, { "epoch": 0.06562244677516028, "grad_norm": 0.3580510914325714, "learning_rate": 0.0001869143087380912, "loss": 1.2825, "step": 5050 }, { "epoch": 0.06563544131907616, "grad_norm": 0.3211670517921448, "learning_rate": 0.00018691170927617984, "loss": 1.2917, "step": 5051 }, { "epoch": 0.06564843586299203, "grad_norm": 0.3310427963733673, "learning_rate": 0.00018690910981426846, "loss": 1.4332, "step": 5052 }, { "epoch": 0.0656614304069079, "grad_norm": 0.3947664797306061, "learning_rate": 0.00018690651035235706, "loss": 1.4708, "step": 5053 }, { "epoch": 0.06567442495082378, "grad_norm": 0.44028183817863464, "learning_rate": 0.00018690391089044568, "loss": 1.5272, "step": 5054 }, { "epoch": 0.06568741949473965, "grad_norm": 0.27063998579978943, "learning_rate": 0.0001869013114285343, "loss": 1.2619, "step": 5055 }, { "epoch": 0.06570041403865552, "grad_norm": 0.3743489384651184, "learning_rate": 0.00018689871196662293, "loss": 1.4847, "step": 5056 }, { "epoch": 0.0657134085825714, "grad_norm": 0.38224536180496216, "learning_rate": 0.00018689611250471153, "loss": 1.3754, "step": 5057 }, { "epoch": 0.06572640312648727, "grad_norm": 0.44809606671333313, "learning_rate": 0.00018689351304280015, "loss": 1.5811, "step": 5058 }, { "epoch": 0.06573939767040314, "grad_norm": 0.38005557656288147, "learning_rate": 0.00018689091358088878, "loss": 1.408, "step": 5059 }, { "epoch": 0.06575239221431901, "grad_norm": 0.4105590581893921, "learning_rate": 0.00018688831411897737, "loss": 1.242, "step": 5060 }, { "epoch": 0.06576538675823489, "grad_norm": 0.3711411952972412, "learning_rate": 0.000186885714657066, "loss": 1.4263, "step": 5061 }, { "epoch": 0.06577838130215076, "grad_norm": 0.4252431094646454, "learning_rate": 0.0001868831151951546, "loss": 1.4642, "step": 5062 }, { "epoch": 0.06579137584606663, "grad_norm": 0.45010408759117126, "learning_rate": 0.00018688051573324322, "loss": 1.5068, "step": 5063 }, { "epoch": 0.0658043703899825, "grad_norm": 0.3801364302635193, "learning_rate": 0.00018687791627133185, "loss": 1.5146, "step": 5064 }, { "epoch": 0.06581736493389838, "grad_norm": 0.36441367864608765, "learning_rate": 0.00018687531680942044, "loss": 1.3055, "step": 5065 }, { "epoch": 0.06583035947781425, "grad_norm": 0.3163621127605438, "learning_rate": 0.0001868727173475091, "loss": 1.3386, "step": 5066 }, { "epoch": 0.06584335402173012, "grad_norm": 0.35099443793296814, "learning_rate": 0.0001868701178855977, "loss": 1.1686, "step": 5067 }, { "epoch": 0.065856348565646, "grad_norm": 0.28214946389198303, "learning_rate": 0.00018686751842368632, "loss": 1.5059, "step": 5068 }, { "epoch": 0.06586934310956187, "grad_norm": 0.36046352982521057, "learning_rate": 0.0001868649189617749, "loss": 1.3996, "step": 5069 }, { "epoch": 0.06588233765347774, "grad_norm": 0.331374853849411, "learning_rate": 0.00018686231949986354, "loss": 1.3427, "step": 5070 }, { "epoch": 0.06589533219739362, "grad_norm": 0.2996813654899597, "learning_rate": 0.00018685972003795216, "loss": 1.4744, "step": 5071 }, { "epoch": 0.06590832674130949, "grad_norm": 0.4613484740257263, "learning_rate": 0.00018685712057604076, "loss": 1.6503, "step": 5072 }, { "epoch": 0.06592132128522536, "grad_norm": 0.35255923867225647, "learning_rate": 0.00018685452111412938, "loss": 1.4307, "step": 5073 }, { "epoch": 0.06593431582914124, "grad_norm": 0.6994350552558899, "learning_rate": 0.000186851921652218, "loss": 1.5619, "step": 5074 }, { "epoch": 0.06594731037305711, "grad_norm": 0.39828792214393616, "learning_rate": 0.0001868493221903066, "loss": 1.4517, "step": 5075 }, { "epoch": 0.06596030491697298, "grad_norm": 0.37385642528533936, "learning_rate": 0.00018684672272839523, "loss": 1.5417, "step": 5076 }, { "epoch": 0.06597329946088885, "grad_norm": 0.33690065145492554, "learning_rate": 0.00018684412326648383, "loss": 1.3128, "step": 5077 }, { "epoch": 0.06598629400480473, "grad_norm": 0.404055655002594, "learning_rate": 0.00018684152380457248, "loss": 1.6508, "step": 5078 }, { "epoch": 0.0659992885487206, "grad_norm": 0.41085606813430786, "learning_rate": 0.00018683892434266108, "loss": 1.4099, "step": 5079 }, { "epoch": 0.06601228309263647, "grad_norm": 0.34779030084609985, "learning_rate": 0.0001868363248807497, "loss": 1.2436, "step": 5080 }, { "epoch": 0.06602527763655235, "grad_norm": 0.45767742395401, "learning_rate": 0.0001868337254188383, "loss": 1.5184, "step": 5081 }, { "epoch": 0.06603827218046823, "grad_norm": 0.45148172974586487, "learning_rate": 0.00018683112595692692, "loss": 1.496, "step": 5082 }, { "epoch": 0.0660512667243841, "grad_norm": 0.45361578464508057, "learning_rate": 0.00018682852649501555, "loss": 1.3767, "step": 5083 }, { "epoch": 0.06606426126829998, "grad_norm": 0.39419567584991455, "learning_rate": 0.00018682592703310414, "loss": 1.4638, "step": 5084 }, { "epoch": 0.06607725581221585, "grad_norm": 0.4580013155937195, "learning_rate": 0.00018682332757119277, "loss": 1.5722, "step": 5085 }, { "epoch": 0.06609025035613172, "grad_norm": 0.45496541261672974, "learning_rate": 0.0001868207281092814, "loss": 1.4134, "step": 5086 }, { "epoch": 0.0661032449000476, "grad_norm": 0.3905915319919586, "learning_rate": 0.00018681812864737002, "loss": 1.467, "step": 5087 }, { "epoch": 0.06611623944396347, "grad_norm": 0.359813392162323, "learning_rate": 0.00018681552918545862, "loss": 1.2496, "step": 5088 }, { "epoch": 0.06612923398787934, "grad_norm": 0.4537199139595032, "learning_rate": 0.0001868129297235472, "loss": 1.4641, "step": 5089 }, { "epoch": 0.06614222853179522, "grad_norm": 0.39269357919692993, "learning_rate": 0.00018681033026163586, "loss": 1.569, "step": 5090 }, { "epoch": 0.06615522307571109, "grad_norm": 0.3269616663455963, "learning_rate": 0.00018680773079972446, "loss": 1.3089, "step": 5091 }, { "epoch": 0.06616821761962696, "grad_norm": 0.4135110676288605, "learning_rate": 0.0001868051313378131, "loss": 1.5335, "step": 5092 }, { "epoch": 0.06618121216354284, "grad_norm": 0.35924768447875977, "learning_rate": 0.00018680253187590168, "loss": 1.4796, "step": 5093 }, { "epoch": 0.06619420670745871, "grad_norm": 0.4137924015522003, "learning_rate": 0.0001867999324139903, "loss": 1.5997, "step": 5094 }, { "epoch": 0.06620720125137458, "grad_norm": 0.4537808299064636, "learning_rate": 0.00018679733295207893, "loss": 1.5673, "step": 5095 }, { "epoch": 0.06622019579529045, "grad_norm": 0.5089097619056702, "learning_rate": 0.00018679473349016753, "loss": 1.4853, "step": 5096 }, { "epoch": 0.06623319033920633, "grad_norm": 0.42744994163513184, "learning_rate": 0.00018679213402825615, "loss": 1.4846, "step": 5097 }, { "epoch": 0.0662461848831222, "grad_norm": 0.4083409905433655, "learning_rate": 0.00018678953456634478, "loss": 1.4917, "step": 5098 }, { "epoch": 0.06625917942703807, "grad_norm": 0.41509148478507996, "learning_rate": 0.0001867869351044334, "loss": 1.51, "step": 5099 }, { "epoch": 0.06627217397095395, "grad_norm": 0.404069721698761, "learning_rate": 0.000186784335642522, "loss": 1.4931, "step": 5100 }, { "epoch": 0.06628516851486982, "grad_norm": 0.4125731289386749, "learning_rate": 0.00018678173618061063, "loss": 1.364, "step": 5101 }, { "epoch": 0.06629816305878569, "grad_norm": 0.48383399844169617, "learning_rate": 0.00018677913671869925, "loss": 1.6206, "step": 5102 }, { "epoch": 0.06631115760270156, "grad_norm": 0.44578373432159424, "learning_rate": 0.00018677653725678785, "loss": 1.3747, "step": 5103 }, { "epoch": 0.06632415214661744, "grad_norm": 0.4086229205131531, "learning_rate": 0.00018677393779487647, "loss": 1.4139, "step": 5104 }, { "epoch": 0.06633714669053331, "grad_norm": 0.3682081997394562, "learning_rate": 0.0001867713383329651, "loss": 1.5932, "step": 5105 }, { "epoch": 0.06635014123444918, "grad_norm": 0.37063175439834595, "learning_rate": 0.0001867687388710537, "loss": 1.3922, "step": 5106 }, { "epoch": 0.06636313577836506, "grad_norm": 0.3529486060142517, "learning_rate": 0.00018676613940914232, "loss": 1.3967, "step": 5107 }, { "epoch": 0.06637613032228093, "grad_norm": 0.3288261592388153, "learning_rate": 0.00018676353994723092, "loss": 1.5192, "step": 5108 }, { "epoch": 0.0663891248661968, "grad_norm": 0.41075578331947327, "learning_rate": 0.00018676094048531957, "loss": 1.4928, "step": 5109 }, { "epoch": 0.06640211941011268, "grad_norm": 0.3678074777126312, "learning_rate": 0.00018675834102340816, "loss": 1.3007, "step": 5110 }, { "epoch": 0.06641511395402855, "grad_norm": 0.39846938848495483, "learning_rate": 0.0001867557415614968, "loss": 1.3693, "step": 5111 }, { "epoch": 0.06642810849794442, "grad_norm": 0.3051009178161621, "learning_rate": 0.0001867531420995854, "loss": 1.4999, "step": 5112 }, { "epoch": 0.0664411030418603, "grad_norm": 0.3826873004436493, "learning_rate": 0.000186750542637674, "loss": 1.5415, "step": 5113 }, { "epoch": 0.06645409758577617, "grad_norm": 0.3207344710826874, "learning_rate": 0.00018674794317576264, "loss": 1.4091, "step": 5114 }, { "epoch": 0.06646709212969204, "grad_norm": 0.3725905418395996, "learning_rate": 0.00018674534371385123, "loss": 1.5888, "step": 5115 }, { "epoch": 0.06648008667360791, "grad_norm": 0.3939463496208191, "learning_rate": 0.00018674274425193986, "loss": 1.3029, "step": 5116 }, { "epoch": 0.06649308121752379, "grad_norm": 0.3740473687648773, "learning_rate": 0.00018674014479002848, "loss": 1.3662, "step": 5117 }, { "epoch": 0.06650607576143966, "grad_norm": 0.4016275107860565, "learning_rate": 0.00018673754532811708, "loss": 1.42, "step": 5118 }, { "epoch": 0.06651907030535553, "grad_norm": 0.368172824382782, "learning_rate": 0.0001867349458662057, "loss": 1.6754, "step": 5119 }, { "epoch": 0.06653206484927142, "grad_norm": 0.4605575501918793, "learning_rate": 0.0001867323464042943, "loss": 1.6073, "step": 5120 }, { "epoch": 0.06654505939318729, "grad_norm": 0.3691301643848419, "learning_rate": 0.00018672974694238295, "loss": 1.2124, "step": 5121 }, { "epoch": 0.06655805393710316, "grad_norm": 0.28400954604148865, "learning_rate": 0.00018672714748047155, "loss": 1.3996, "step": 5122 }, { "epoch": 0.06657104848101904, "grad_norm": 0.37806907296180725, "learning_rate": 0.00018672454801856017, "loss": 1.3488, "step": 5123 }, { "epoch": 0.06658404302493491, "grad_norm": 0.4382490813732147, "learning_rate": 0.00018672194855664877, "loss": 1.5047, "step": 5124 }, { "epoch": 0.06659703756885078, "grad_norm": 0.4405001997947693, "learning_rate": 0.0001867193490947374, "loss": 1.4759, "step": 5125 }, { "epoch": 0.06661003211276666, "grad_norm": 0.35975533723831177, "learning_rate": 0.00018671674963282602, "loss": 1.3524, "step": 5126 }, { "epoch": 0.06662302665668253, "grad_norm": 0.2759242057800293, "learning_rate": 0.00018671415017091462, "loss": 1.2414, "step": 5127 }, { "epoch": 0.0666360212005984, "grad_norm": 0.44217249751091003, "learning_rate": 0.00018671155070900324, "loss": 1.5851, "step": 5128 }, { "epoch": 0.06664901574451428, "grad_norm": 0.3929182291030884, "learning_rate": 0.00018670895124709187, "loss": 1.575, "step": 5129 }, { "epoch": 0.06666201028843015, "grad_norm": 0.35938742756843567, "learning_rate": 0.00018670635178518046, "loss": 1.4744, "step": 5130 }, { "epoch": 0.06667500483234602, "grad_norm": 0.3668377995491028, "learning_rate": 0.0001867037523232691, "loss": 1.5975, "step": 5131 }, { "epoch": 0.0666879993762619, "grad_norm": 0.3105641305446625, "learning_rate": 0.0001867011528613577, "loss": 1.4407, "step": 5132 }, { "epoch": 0.06670099392017777, "grad_norm": 0.43602290749549866, "learning_rate": 0.00018669855339944634, "loss": 1.4216, "step": 5133 }, { "epoch": 0.06671398846409364, "grad_norm": 0.3823474049568176, "learning_rate": 0.00018669595393753494, "loss": 1.3277, "step": 5134 }, { "epoch": 0.06672698300800951, "grad_norm": 0.3559263050556183, "learning_rate": 0.00018669335447562356, "loss": 1.344, "step": 5135 }, { "epoch": 0.06673997755192539, "grad_norm": 0.4177055060863495, "learning_rate": 0.00018669075501371216, "loss": 1.5246, "step": 5136 }, { "epoch": 0.06675297209584126, "grad_norm": 0.4542948603630066, "learning_rate": 0.00018668815555180078, "loss": 1.4636, "step": 5137 }, { "epoch": 0.06676596663975713, "grad_norm": 0.3753998875617981, "learning_rate": 0.0001866855560898894, "loss": 1.4454, "step": 5138 }, { "epoch": 0.066778961183673, "grad_norm": 0.3499920070171356, "learning_rate": 0.000186682956627978, "loss": 1.4657, "step": 5139 }, { "epoch": 0.06679195572758888, "grad_norm": 0.3218679130077362, "learning_rate": 0.00018668035716606666, "loss": 1.2491, "step": 5140 }, { "epoch": 0.06680495027150475, "grad_norm": 0.43401187658309937, "learning_rate": 0.00018667775770415525, "loss": 1.5627, "step": 5141 }, { "epoch": 0.06681794481542062, "grad_norm": 0.4200804531574249, "learning_rate": 0.00018667515824224388, "loss": 1.282, "step": 5142 }, { "epoch": 0.0668309393593365, "grad_norm": 0.41693368554115295, "learning_rate": 0.00018667255878033247, "loss": 1.5221, "step": 5143 }, { "epoch": 0.06684393390325237, "grad_norm": 0.48830854892730713, "learning_rate": 0.0001866699593184211, "loss": 1.6156, "step": 5144 }, { "epoch": 0.06685692844716824, "grad_norm": 0.4205325245857239, "learning_rate": 0.00018666735985650972, "loss": 1.3908, "step": 5145 }, { "epoch": 0.06686992299108412, "grad_norm": 0.4797542989253998, "learning_rate": 0.00018666476039459832, "loss": 1.4816, "step": 5146 }, { "epoch": 0.06688291753499999, "grad_norm": 0.6492830514907837, "learning_rate": 0.00018666216093268695, "loss": 1.5275, "step": 5147 }, { "epoch": 0.06689591207891586, "grad_norm": 0.3697177767753601, "learning_rate": 0.00018665956147077557, "loss": 1.302, "step": 5148 }, { "epoch": 0.06690890662283173, "grad_norm": 0.381051629781723, "learning_rate": 0.00018665696200886417, "loss": 1.4579, "step": 5149 }, { "epoch": 0.06692190116674761, "grad_norm": 0.4169452488422394, "learning_rate": 0.0001866543625469528, "loss": 1.5325, "step": 5150 }, { "epoch": 0.06693489571066348, "grad_norm": 0.5180598497390747, "learning_rate": 0.0001866517630850414, "loss": 1.5506, "step": 5151 }, { "epoch": 0.06694789025457935, "grad_norm": 0.5124133229255676, "learning_rate": 0.00018664916362313004, "loss": 1.558, "step": 5152 }, { "epoch": 0.06696088479849523, "grad_norm": 0.4223629832267761, "learning_rate": 0.00018664656416121864, "loss": 1.4085, "step": 5153 }, { "epoch": 0.0669738793424111, "grad_norm": 0.4464322328567505, "learning_rate": 0.00018664396469930726, "loss": 1.5744, "step": 5154 }, { "epoch": 0.06698687388632697, "grad_norm": 0.39805474877357483, "learning_rate": 0.00018664136523739586, "loss": 1.3028, "step": 5155 }, { "epoch": 0.06699986843024285, "grad_norm": 0.5118453502655029, "learning_rate": 0.00018663876577548448, "loss": 1.5302, "step": 5156 }, { "epoch": 0.06701286297415872, "grad_norm": 0.3416501581668854, "learning_rate": 0.0001866361663135731, "loss": 1.2731, "step": 5157 }, { "epoch": 0.0670258575180746, "grad_norm": 0.3870924115180969, "learning_rate": 0.0001866335668516617, "loss": 1.3796, "step": 5158 }, { "epoch": 0.06703885206199048, "grad_norm": 0.3704221844673157, "learning_rate": 0.00018663096738975033, "loss": 1.3454, "step": 5159 }, { "epoch": 0.06705184660590635, "grad_norm": 0.3537854850292206, "learning_rate": 0.00018662836792783896, "loss": 1.4649, "step": 5160 }, { "epoch": 0.06706484114982222, "grad_norm": 0.4003196358680725, "learning_rate": 0.00018662576846592755, "loss": 1.3104, "step": 5161 }, { "epoch": 0.0670778356937381, "grad_norm": 0.3782847225666046, "learning_rate": 0.00018662316900401618, "loss": 1.407, "step": 5162 }, { "epoch": 0.06709083023765397, "grad_norm": 0.30927857756614685, "learning_rate": 0.00018662056954210477, "loss": 1.3548, "step": 5163 }, { "epoch": 0.06710382478156984, "grad_norm": 0.44752243161201477, "learning_rate": 0.00018661797008019343, "loss": 1.3521, "step": 5164 }, { "epoch": 0.06711681932548572, "grad_norm": 0.4513384997844696, "learning_rate": 0.00018661537061828202, "loss": 1.6778, "step": 5165 }, { "epoch": 0.06712981386940159, "grad_norm": 0.43821585178375244, "learning_rate": 0.00018661277115637065, "loss": 1.6521, "step": 5166 }, { "epoch": 0.06714280841331746, "grad_norm": 0.3929871618747711, "learning_rate": 0.00018661017169445925, "loss": 1.2764, "step": 5167 }, { "epoch": 0.06715580295723333, "grad_norm": 0.4311833381652832, "learning_rate": 0.00018660757223254787, "loss": 1.463, "step": 5168 }, { "epoch": 0.06716879750114921, "grad_norm": 0.3531707227230072, "learning_rate": 0.0001866049727706365, "loss": 1.4412, "step": 5169 }, { "epoch": 0.06718179204506508, "grad_norm": 0.44328662753105164, "learning_rate": 0.0001866023733087251, "loss": 1.507, "step": 5170 }, { "epoch": 0.06719478658898095, "grad_norm": 0.32282599806785583, "learning_rate": 0.00018659977384681372, "loss": 1.3373, "step": 5171 }, { "epoch": 0.06720778113289683, "grad_norm": 0.3568762540817261, "learning_rate": 0.00018659717438490234, "loss": 1.2362, "step": 5172 }, { "epoch": 0.0672207756768127, "grad_norm": 0.32072365283966064, "learning_rate": 0.00018659457492299094, "loss": 1.5883, "step": 5173 }, { "epoch": 0.06723377022072857, "grad_norm": 0.33857256174087524, "learning_rate": 0.00018659197546107956, "loss": 1.5246, "step": 5174 }, { "epoch": 0.06724676476464445, "grad_norm": 0.35791605710983276, "learning_rate": 0.0001865893759991682, "loss": 1.5009, "step": 5175 }, { "epoch": 0.06725975930856032, "grad_norm": 0.31736233830451965, "learning_rate": 0.0001865867765372568, "loss": 1.406, "step": 5176 }, { "epoch": 0.06727275385247619, "grad_norm": 0.5177508592605591, "learning_rate": 0.0001865841770753454, "loss": 1.4898, "step": 5177 }, { "epoch": 0.06728574839639206, "grad_norm": 0.27760931849479675, "learning_rate": 0.00018658157761343403, "loss": 1.1981, "step": 5178 }, { "epoch": 0.06729874294030794, "grad_norm": 0.46979695558547974, "learning_rate": 0.00018657897815152266, "loss": 1.5755, "step": 5179 }, { "epoch": 0.06731173748422381, "grad_norm": 0.4669855237007141, "learning_rate": 0.00018657637868961126, "loss": 1.4488, "step": 5180 }, { "epoch": 0.06732473202813968, "grad_norm": 0.19816741347312927, "learning_rate": 0.00018657377922769988, "loss": 1.2818, "step": 5181 }, { "epoch": 0.06733772657205556, "grad_norm": 0.3971433639526367, "learning_rate": 0.00018657117976578848, "loss": 1.4341, "step": 5182 }, { "epoch": 0.06735072111597143, "grad_norm": 0.2910483777523041, "learning_rate": 0.00018656858030387713, "loss": 1.3133, "step": 5183 }, { "epoch": 0.0673637156598873, "grad_norm": 0.3295382261276245, "learning_rate": 0.00018656598084196573, "loss": 1.3246, "step": 5184 }, { "epoch": 0.06737671020380318, "grad_norm": 0.3677801787853241, "learning_rate": 0.00018656338138005432, "loss": 1.1968, "step": 5185 }, { "epoch": 0.06738970474771905, "grad_norm": 0.3704491853713989, "learning_rate": 0.00018656078191814295, "loss": 1.3783, "step": 5186 }, { "epoch": 0.06740269929163492, "grad_norm": 0.30574753880500793, "learning_rate": 0.00018655818245623157, "loss": 1.3477, "step": 5187 }, { "epoch": 0.0674156938355508, "grad_norm": 0.3611847758293152, "learning_rate": 0.0001865555829943202, "loss": 1.4376, "step": 5188 }, { "epoch": 0.06742868837946667, "grad_norm": 0.3521232604980469, "learning_rate": 0.0001865529835324088, "loss": 1.4912, "step": 5189 }, { "epoch": 0.06744168292338254, "grad_norm": 0.33566415309906006, "learning_rate": 0.00018655038407049742, "loss": 1.4116, "step": 5190 }, { "epoch": 0.06745467746729841, "grad_norm": 0.3886506259441376, "learning_rate": 0.00018654778460858604, "loss": 1.4885, "step": 5191 }, { "epoch": 0.06746767201121429, "grad_norm": 0.3376752734184265, "learning_rate": 0.00018654518514667464, "loss": 1.3143, "step": 5192 }, { "epoch": 0.06748066655513016, "grad_norm": 0.45552459359169006, "learning_rate": 0.00018654258568476327, "loss": 1.472, "step": 5193 }, { "epoch": 0.06749366109904603, "grad_norm": 0.3657516539096832, "learning_rate": 0.00018653998622285186, "loss": 1.4285, "step": 5194 }, { "epoch": 0.0675066556429619, "grad_norm": 0.4129646420478821, "learning_rate": 0.00018653738676094051, "loss": 1.4733, "step": 5195 }, { "epoch": 0.06751965018687779, "grad_norm": 0.4120321571826935, "learning_rate": 0.0001865347872990291, "loss": 1.5079, "step": 5196 }, { "epoch": 0.06753264473079366, "grad_norm": 0.4931769371032715, "learning_rate": 0.0001865321878371177, "loss": 1.4502, "step": 5197 }, { "epoch": 0.06754563927470954, "grad_norm": 0.31527647376060486, "learning_rate": 0.00018652958837520633, "loss": 1.4159, "step": 5198 }, { "epoch": 0.06755863381862541, "grad_norm": 0.42977553606033325, "learning_rate": 0.00018652698891329496, "loss": 1.277, "step": 5199 }, { "epoch": 0.06757162836254128, "grad_norm": 0.3936081528663635, "learning_rate": 0.00018652438945138358, "loss": 1.3278, "step": 5200 }, { "epoch": 0.06758462290645716, "grad_norm": 0.38466107845306396, "learning_rate": 0.00018652178998947218, "loss": 1.3467, "step": 5201 }, { "epoch": 0.06759761745037303, "grad_norm": 0.3167068362236023, "learning_rate": 0.0001865191905275608, "loss": 1.3721, "step": 5202 }, { "epoch": 0.0676106119942889, "grad_norm": 0.4390086233615875, "learning_rate": 0.00018651659106564943, "loss": 1.4633, "step": 5203 }, { "epoch": 0.06762360653820478, "grad_norm": 0.3464265763759613, "learning_rate": 0.00018651399160373803, "loss": 1.5348, "step": 5204 }, { "epoch": 0.06763660108212065, "grad_norm": 0.46605727076530457, "learning_rate": 0.00018651139214182665, "loss": 1.4266, "step": 5205 }, { "epoch": 0.06764959562603652, "grad_norm": 0.31618010997772217, "learning_rate": 0.00018650879267991525, "loss": 1.4348, "step": 5206 }, { "epoch": 0.0676625901699524, "grad_norm": 0.41530296206474304, "learning_rate": 0.0001865061932180039, "loss": 1.366, "step": 5207 }, { "epoch": 0.06767558471386827, "grad_norm": 0.5661311149597168, "learning_rate": 0.0001865035937560925, "loss": 1.564, "step": 5208 }, { "epoch": 0.06768857925778414, "grad_norm": 0.3350794315338135, "learning_rate": 0.00018650099429418112, "loss": 1.2329, "step": 5209 }, { "epoch": 0.06770157380170001, "grad_norm": 0.3374537527561188, "learning_rate": 0.00018649839483226972, "loss": 1.2643, "step": 5210 }, { "epoch": 0.06771456834561589, "grad_norm": 0.4399222433567047, "learning_rate": 0.00018649579537035834, "loss": 1.3484, "step": 5211 }, { "epoch": 0.06772756288953176, "grad_norm": 0.40509864687919617, "learning_rate": 0.00018649319590844697, "loss": 1.7185, "step": 5212 }, { "epoch": 0.06774055743344763, "grad_norm": 0.3245294392108917, "learning_rate": 0.00018649059644653557, "loss": 1.3554, "step": 5213 }, { "epoch": 0.0677535519773635, "grad_norm": 0.32566478848457336, "learning_rate": 0.0001864879969846242, "loss": 1.4119, "step": 5214 }, { "epoch": 0.06776654652127938, "grad_norm": 0.38132572174072266, "learning_rate": 0.00018648539752271281, "loss": 1.4767, "step": 5215 }, { "epoch": 0.06777954106519525, "grad_norm": 0.3775690495967865, "learning_rate": 0.0001864827980608014, "loss": 1.4809, "step": 5216 }, { "epoch": 0.06779253560911112, "grad_norm": 0.4051850736141205, "learning_rate": 0.00018648019859889004, "loss": 1.5445, "step": 5217 }, { "epoch": 0.067805530153027, "grad_norm": 0.44299229979515076, "learning_rate": 0.00018647759913697866, "loss": 1.4679, "step": 5218 }, { "epoch": 0.06781852469694287, "grad_norm": 0.4380272626876831, "learning_rate": 0.00018647499967506728, "loss": 1.2535, "step": 5219 }, { "epoch": 0.06783151924085874, "grad_norm": 0.44665494561195374, "learning_rate": 0.00018647240021315588, "loss": 1.3511, "step": 5220 }, { "epoch": 0.06784451378477462, "grad_norm": 0.33580636978149414, "learning_rate": 0.0001864698007512445, "loss": 1.231, "step": 5221 }, { "epoch": 0.06785750832869049, "grad_norm": 0.3992922306060791, "learning_rate": 0.00018646720128933313, "loss": 1.212, "step": 5222 }, { "epoch": 0.06787050287260636, "grad_norm": 0.44707828760147095, "learning_rate": 0.00018646460182742173, "loss": 1.294, "step": 5223 }, { "epoch": 0.06788349741652223, "grad_norm": 0.3758302330970764, "learning_rate": 0.00018646200236551035, "loss": 1.2983, "step": 5224 }, { "epoch": 0.06789649196043811, "grad_norm": 0.38771140575408936, "learning_rate": 0.00018645940290359895, "loss": 1.4407, "step": 5225 }, { "epoch": 0.06790948650435398, "grad_norm": 0.2942237854003906, "learning_rate": 0.0001864568034416876, "loss": 1.3648, "step": 5226 }, { "epoch": 0.06792248104826985, "grad_norm": 0.3121523857116699, "learning_rate": 0.0001864542039797762, "loss": 1.3511, "step": 5227 }, { "epoch": 0.06793547559218573, "grad_norm": 0.44604796171188354, "learning_rate": 0.0001864516045178648, "loss": 1.4367, "step": 5228 }, { "epoch": 0.0679484701361016, "grad_norm": 0.4803444743156433, "learning_rate": 0.00018644900505595342, "loss": 1.4321, "step": 5229 }, { "epoch": 0.06796146468001747, "grad_norm": 0.4141957759857178, "learning_rate": 0.00018644640559404205, "loss": 1.5444, "step": 5230 }, { "epoch": 0.06797445922393335, "grad_norm": 0.364609032869339, "learning_rate": 0.00018644380613213067, "loss": 1.363, "step": 5231 }, { "epoch": 0.06798745376784922, "grad_norm": 0.44600462913513184, "learning_rate": 0.00018644120667021927, "loss": 1.5828, "step": 5232 }, { "epoch": 0.06800044831176509, "grad_norm": 0.3881787657737732, "learning_rate": 0.0001864386072083079, "loss": 1.4084, "step": 5233 }, { "epoch": 0.06801344285568098, "grad_norm": 0.5400509238243103, "learning_rate": 0.00018643600774639652, "loss": 1.5342, "step": 5234 }, { "epoch": 0.06802643739959685, "grad_norm": 0.4942297041416168, "learning_rate": 0.00018643340828448511, "loss": 1.6631, "step": 5235 }, { "epoch": 0.06803943194351272, "grad_norm": 0.33731839060783386, "learning_rate": 0.00018643080882257374, "loss": 1.5178, "step": 5236 }, { "epoch": 0.0680524264874286, "grad_norm": 0.3498685956001282, "learning_rate": 0.00018642820936066234, "loss": 1.3351, "step": 5237 }, { "epoch": 0.06806542103134447, "grad_norm": 0.4106043875217438, "learning_rate": 0.000186425609898751, "loss": 1.4977, "step": 5238 }, { "epoch": 0.06807841557526034, "grad_norm": 0.36881181597709656, "learning_rate": 0.00018642301043683958, "loss": 1.4291, "step": 5239 }, { "epoch": 0.06809141011917622, "grad_norm": 0.42651116847991943, "learning_rate": 0.00018642041097492818, "loss": 1.4475, "step": 5240 }, { "epoch": 0.06810440466309209, "grad_norm": 0.38923752307891846, "learning_rate": 0.0001864178115130168, "loss": 1.3683, "step": 5241 }, { "epoch": 0.06811739920700796, "grad_norm": 0.3672979772090912, "learning_rate": 0.00018641521205110543, "loss": 1.3896, "step": 5242 }, { "epoch": 0.06813039375092383, "grad_norm": 0.3511219620704651, "learning_rate": 0.00018641261258919406, "loss": 1.291, "step": 5243 }, { "epoch": 0.06814338829483971, "grad_norm": 0.3356342017650604, "learning_rate": 0.00018641001312728265, "loss": 1.4553, "step": 5244 }, { "epoch": 0.06815638283875558, "grad_norm": 0.36938512325286865, "learning_rate": 0.00018640741366537128, "loss": 1.282, "step": 5245 }, { "epoch": 0.06816937738267145, "grad_norm": 0.37965676188468933, "learning_rate": 0.0001864048142034599, "loss": 1.3674, "step": 5246 }, { "epoch": 0.06818237192658733, "grad_norm": 0.32190224528312683, "learning_rate": 0.0001864022147415485, "loss": 1.475, "step": 5247 }, { "epoch": 0.0681953664705032, "grad_norm": 0.377675324678421, "learning_rate": 0.00018639961527963712, "loss": 1.6903, "step": 5248 }, { "epoch": 0.06820836101441907, "grad_norm": 0.45027559995651245, "learning_rate": 0.00018639701581772572, "loss": 1.6195, "step": 5249 }, { "epoch": 0.06822135555833495, "grad_norm": 0.38764241337776184, "learning_rate": 0.00018639441635581437, "loss": 1.3975, "step": 5250 }, { "epoch": 0.06823435010225082, "grad_norm": 0.3362537622451782, "learning_rate": 0.00018639181689390297, "loss": 1.2832, "step": 5251 }, { "epoch": 0.06824734464616669, "grad_norm": 0.48953282833099365, "learning_rate": 0.00018638921743199157, "loss": 1.7233, "step": 5252 }, { "epoch": 0.06826033919008256, "grad_norm": 0.3870137631893158, "learning_rate": 0.00018638661797008022, "loss": 1.5522, "step": 5253 }, { "epoch": 0.06827333373399844, "grad_norm": 0.4046284258365631, "learning_rate": 0.00018638401850816882, "loss": 1.3826, "step": 5254 }, { "epoch": 0.06828632827791431, "grad_norm": 0.45206111669540405, "learning_rate": 0.00018638141904625744, "loss": 1.5993, "step": 5255 }, { "epoch": 0.06829932282183018, "grad_norm": 0.35778701305389404, "learning_rate": 0.00018637881958434604, "loss": 1.6784, "step": 5256 }, { "epoch": 0.06831231736574606, "grad_norm": 0.4111640453338623, "learning_rate": 0.00018637622012243466, "loss": 1.3314, "step": 5257 }, { "epoch": 0.06832531190966193, "grad_norm": 0.4049829840660095, "learning_rate": 0.0001863736206605233, "loss": 1.5166, "step": 5258 }, { "epoch": 0.0683383064535778, "grad_norm": 0.4178505837917328, "learning_rate": 0.00018637102119861188, "loss": 1.4619, "step": 5259 }, { "epoch": 0.06835130099749367, "grad_norm": 0.3455483317375183, "learning_rate": 0.0001863684217367005, "loss": 1.5682, "step": 5260 }, { "epoch": 0.06836429554140955, "grad_norm": 0.34195348620414734, "learning_rate": 0.00018636582227478913, "loss": 1.2803, "step": 5261 }, { "epoch": 0.06837729008532542, "grad_norm": 0.464097261428833, "learning_rate": 0.00018636322281287776, "loss": 1.4458, "step": 5262 }, { "epoch": 0.0683902846292413, "grad_norm": 0.47718513011932373, "learning_rate": 0.00018636062335096636, "loss": 1.6724, "step": 5263 }, { "epoch": 0.06840327917315717, "grad_norm": 0.4567102789878845, "learning_rate": 0.00018635802388905498, "loss": 1.5884, "step": 5264 }, { "epoch": 0.06841627371707304, "grad_norm": 0.3462975025177002, "learning_rate": 0.0001863554244271436, "loss": 1.297, "step": 5265 }, { "epoch": 0.06842926826098891, "grad_norm": 0.3322049677371979, "learning_rate": 0.0001863528249652322, "loss": 1.385, "step": 5266 }, { "epoch": 0.06844226280490479, "grad_norm": 0.4266486167907715, "learning_rate": 0.00018635022550332083, "loss": 1.4837, "step": 5267 }, { "epoch": 0.06845525734882066, "grad_norm": 0.4028988778591156, "learning_rate": 0.00018634762604140942, "loss": 1.4101, "step": 5268 }, { "epoch": 0.06846825189273653, "grad_norm": 0.46043241024017334, "learning_rate": 0.00018634502657949805, "loss": 1.3826, "step": 5269 }, { "epoch": 0.0684812464366524, "grad_norm": 0.3606872260570526, "learning_rate": 0.00018634242711758667, "loss": 1.629, "step": 5270 }, { "epoch": 0.06849424098056828, "grad_norm": 0.36665815114974976, "learning_rate": 0.00018633982765567527, "loss": 1.4191, "step": 5271 }, { "epoch": 0.06850723552448416, "grad_norm": 0.3493126630783081, "learning_rate": 0.0001863372281937639, "loss": 1.4933, "step": 5272 }, { "epoch": 0.06852023006840004, "grad_norm": 0.35804951190948486, "learning_rate": 0.00018633462873185252, "loss": 1.3218, "step": 5273 }, { "epoch": 0.06853322461231591, "grad_norm": 0.41491150856018066, "learning_rate": 0.00018633202926994114, "loss": 1.3729, "step": 5274 }, { "epoch": 0.06854621915623178, "grad_norm": 0.43768370151519775, "learning_rate": 0.00018632942980802974, "loss": 1.5797, "step": 5275 }, { "epoch": 0.06855921370014766, "grad_norm": 0.308243066072464, "learning_rate": 0.00018632683034611837, "loss": 1.4119, "step": 5276 }, { "epoch": 0.06857220824406353, "grad_norm": 0.44702255725860596, "learning_rate": 0.000186324230884207, "loss": 1.3934, "step": 5277 }, { "epoch": 0.0685852027879794, "grad_norm": 0.2788557708263397, "learning_rate": 0.0001863216314222956, "loss": 1.2067, "step": 5278 }, { "epoch": 0.06859819733189527, "grad_norm": 0.37357908487319946, "learning_rate": 0.0001863190319603842, "loss": 1.4587, "step": 5279 }, { "epoch": 0.06861119187581115, "grad_norm": 0.42723459005355835, "learning_rate": 0.0001863164324984728, "loss": 1.4007, "step": 5280 }, { "epoch": 0.06862418641972702, "grad_norm": 0.5236778855323792, "learning_rate": 0.00018631383303656143, "loss": 1.4214, "step": 5281 }, { "epoch": 0.0686371809636429, "grad_norm": 0.36598870158195496, "learning_rate": 0.00018631123357465006, "loss": 1.4405, "step": 5282 }, { "epoch": 0.06865017550755877, "grad_norm": 0.3029235601425171, "learning_rate": 0.00018630863411273866, "loss": 1.5244, "step": 5283 }, { "epoch": 0.06866317005147464, "grad_norm": 0.40026795864105225, "learning_rate": 0.00018630603465082728, "loss": 1.5395, "step": 5284 }, { "epoch": 0.06867616459539051, "grad_norm": 0.2823532223701477, "learning_rate": 0.0001863034351889159, "loss": 1.2154, "step": 5285 }, { "epoch": 0.06868915913930639, "grad_norm": 0.3997417390346527, "learning_rate": 0.00018630083572700453, "loss": 1.4944, "step": 5286 }, { "epoch": 0.06870215368322226, "grad_norm": 0.43358761072158813, "learning_rate": 0.00018629823626509313, "loss": 1.5712, "step": 5287 }, { "epoch": 0.06871514822713813, "grad_norm": 0.4592825770378113, "learning_rate": 0.00018629563680318175, "loss": 1.3432, "step": 5288 }, { "epoch": 0.068728142771054, "grad_norm": 0.3673540949821472, "learning_rate": 0.00018629303734127038, "loss": 1.3306, "step": 5289 }, { "epoch": 0.06874113731496988, "grad_norm": 0.42722219228744507, "learning_rate": 0.00018629043787935897, "loss": 1.425, "step": 5290 }, { "epoch": 0.06875413185888575, "grad_norm": 0.35838770866394043, "learning_rate": 0.0001862878384174476, "loss": 1.4759, "step": 5291 }, { "epoch": 0.06876712640280162, "grad_norm": 0.29523012042045593, "learning_rate": 0.00018628523895553622, "loss": 1.4983, "step": 5292 }, { "epoch": 0.0687801209467175, "grad_norm": 0.4709910750389099, "learning_rate": 0.00018628263949362485, "loss": 1.3785, "step": 5293 }, { "epoch": 0.06879311549063337, "grad_norm": 0.47797682881355286, "learning_rate": 0.00018628004003171344, "loss": 1.526, "step": 5294 }, { "epoch": 0.06880611003454924, "grad_norm": 0.28782710433006287, "learning_rate": 0.00018627744056980204, "loss": 1.3666, "step": 5295 }, { "epoch": 0.06881910457846512, "grad_norm": 0.39402514696121216, "learning_rate": 0.0001862748411078907, "loss": 1.3459, "step": 5296 }, { "epoch": 0.06883209912238099, "grad_norm": 0.3537048101425171, "learning_rate": 0.0001862722416459793, "loss": 1.4959, "step": 5297 }, { "epoch": 0.06884509366629686, "grad_norm": 0.32225099205970764, "learning_rate": 0.00018626964218406791, "loss": 1.2476, "step": 5298 }, { "epoch": 0.06885808821021273, "grad_norm": 0.38983437418937683, "learning_rate": 0.0001862670427221565, "loss": 1.5324, "step": 5299 }, { "epoch": 0.0688710827541286, "grad_norm": 0.3756939470767975, "learning_rate": 0.00018626444326024514, "loss": 1.4344, "step": 5300 }, { "epoch": 0.06888407729804448, "grad_norm": 0.34896355867385864, "learning_rate": 0.00018626184379833376, "loss": 1.3561, "step": 5301 }, { "epoch": 0.06889707184196035, "grad_norm": 0.42439353466033936, "learning_rate": 0.00018625924433642236, "loss": 1.3665, "step": 5302 }, { "epoch": 0.06891006638587623, "grad_norm": 0.45926398038864136, "learning_rate": 0.00018625664487451098, "loss": 1.3234, "step": 5303 }, { "epoch": 0.0689230609297921, "grad_norm": 0.34778714179992676, "learning_rate": 0.0001862540454125996, "loss": 1.387, "step": 5304 }, { "epoch": 0.06893605547370797, "grad_norm": 0.4572735130786896, "learning_rate": 0.00018625144595068823, "loss": 1.4087, "step": 5305 }, { "epoch": 0.06894905001762384, "grad_norm": 0.3111006021499634, "learning_rate": 0.00018624884648877683, "loss": 1.1695, "step": 5306 }, { "epoch": 0.06896204456153972, "grad_norm": 0.35482633113861084, "learning_rate": 0.00018624624702686543, "loss": 1.4407, "step": 5307 }, { "epoch": 0.06897503910545559, "grad_norm": 0.47263920307159424, "learning_rate": 0.00018624364756495408, "loss": 1.5393, "step": 5308 }, { "epoch": 0.06898803364937146, "grad_norm": 0.4116426408290863, "learning_rate": 0.00018624104810304268, "loss": 1.4939, "step": 5309 }, { "epoch": 0.06900102819328735, "grad_norm": 0.4379853904247284, "learning_rate": 0.0001862384486411313, "loss": 1.4931, "step": 5310 }, { "epoch": 0.06901402273720322, "grad_norm": 0.3272196352481842, "learning_rate": 0.0001862358491792199, "loss": 1.3136, "step": 5311 }, { "epoch": 0.0690270172811191, "grad_norm": 0.3781753182411194, "learning_rate": 0.00018623324971730852, "loss": 1.3917, "step": 5312 }, { "epoch": 0.06904001182503497, "grad_norm": 0.3424317538738251, "learning_rate": 0.00018623065025539715, "loss": 1.4196, "step": 5313 }, { "epoch": 0.06905300636895084, "grad_norm": 0.4061320126056671, "learning_rate": 0.00018622805079348574, "loss": 1.3784, "step": 5314 }, { "epoch": 0.06906600091286672, "grad_norm": 0.2918657660484314, "learning_rate": 0.00018622545133157437, "loss": 1.4452, "step": 5315 }, { "epoch": 0.06907899545678259, "grad_norm": 0.36582809686660767, "learning_rate": 0.000186222851869663, "loss": 1.413, "step": 5316 }, { "epoch": 0.06909199000069846, "grad_norm": 0.3369402587413788, "learning_rate": 0.00018622025240775162, "loss": 1.5082, "step": 5317 }, { "epoch": 0.06910498454461433, "grad_norm": 0.48518049716949463, "learning_rate": 0.00018621765294584021, "loss": 1.3053, "step": 5318 }, { "epoch": 0.06911797908853021, "grad_norm": 0.42002245783805847, "learning_rate": 0.0001862150534839288, "loss": 1.5164, "step": 5319 }, { "epoch": 0.06913097363244608, "grad_norm": 0.32677462697029114, "learning_rate": 0.00018621245402201746, "loss": 1.2893, "step": 5320 }, { "epoch": 0.06914396817636195, "grad_norm": 0.3690161108970642, "learning_rate": 0.00018620985456010606, "loss": 1.5781, "step": 5321 }, { "epoch": 0.06915696272027783, "grad_norm": 0.4283229410648346, "learning_rate": 0.00018620725509819469, "loss": 1.3131, "step": 5322 }, { "epoch": 0.0691699572641937, "grad_norm": 0.4192677140235901, "learning_rate": 0.00018620465563628328, "loss": 1.6219, "step": 5323 }, { "epoch": 0.06918295180810957, "grad_norm": 0.27933207154273987, "learning_rate": 0.0001862020561743719, "loss": 1.2215, "step": 5324 }, { "epoch": 0.06919594635202544, "grad_norm": 0.37069013714790344, "learning_rate": 0.00018619945671246053, "loss": 1.4709, "step": 5325 }, { "epoch": 0.06920894089594132, "grad_norm": 0.47909682989120483, "learning_rate": 0.00018619685725054913, "loss": 1.6683, "step": 5326 }, { "epoch": 0.06922193543985719, "grad_norm": 0.38398486375808716, "learning_rate": 0.00018619425778863778, "loss": 1.4308, "step": 5327 }, { "epoch": 0.06923492998377306, "grad_norm": 0.5003829002380371, "learning_rate": 0.00018619165832672638, "loss": 1.58, "step": 5328 }, { "epoch": 0.06924792452768894, "grad_norm": 0.42183107137680054, "learning_rate": 0.000186189058864815, "loss": 1.3787, "step": 5329 }, { "epoch": 0.06926091907160481, "grad_norm": 0.4104776084423065, "learning_rate": 0.0001861864594029036, "loss": 1.3821, "step": 5330 }, { "epoch": 0.06927391361552068, "grad_norm": 0.3703576326370239, "learning_rate": 0.00018618385994099222, "loss": 1.4411, "step": 5331 }, { "epoch": 0.06928690815943656, "grad_norm": 0.4419730007648468, "learning_rate": 0.00018618126047908085, "loss": 1.4491, "step": 5332 }, { "epoch": 0.06929990270335243, "grad_norm": 0.4102279543876648, "learning_rate": 0.00018617866101716945, "loss": 1.3687, "step": 5333 }, { "epoch": 0.0693128972472683, "grad_norm": 0.43752139806747437, "learning_rate": 0.00018617606155525807, "loss": 1.5106, "step": 5334 }, { "epoch": 0.06932589179118417, "grad_norm": 0.3585141897201538, "learning_rate": 0.0001861734620933467, "loss": 1.5743, "step": 5335 }, { "epoch": 0.06933888633510005, "grad_norm": 0.475147008895874, "learning_rate": 0.0001861708626314353, "loss": 1.5227, "step": 5336 }, { "epoch": 0.06935188087901592, "grad_norm": 0.40625354647636414, "learning_rate": 0.00018616826316952392, "loss": 1.6361, "step": 5337 }, { "epoch": 0.0693648754229318, "grad_norm": 0.34236976504325867, "learning_rate": 0.00018616566370761251, "loss": 1.4576, "step": 5338 }, { "epoch": 0.06937786996684767, "grad_norm": 0.38249075412750244, "learning_rate": 0.00018616306424570117, "loss": 1.1979, "step": 5339 }, { "epoch": 0.06939086451076354, "grad_norm": 0.3851509690284729, "learning_rate": 0.00018616046478378976, "loss": 1.5422, "step": 5340 }, { "epoch": 0.06940385905467941, "grad_norm": 0.3492293059825897, "learning_rate": 0.0001861578653218784, "loss": 1.4181, "step": 5341 }, { "epoch": 0.06941685359859529, "grad_norm": 0.5009096264839172, "learning_rate": 0.00018615526585996699, "loss": 1.5556, "step": 5342 }, { "epoch": 0.06942984814251116, "grad_norm": 0.3898882269859314, "learning_rate": 0.0001861526663980556, "loss": 1.5137, "step": 5343 }, { "epoch": 0.06944284268642703, "grad_norm": 0.3906683027744293, "learning_rate": 0.00018615006693614423, "loss": 1.3354, "step": 5344 }, { "epoch": 0.0694558372303429, "grad_norm": 0.3135431706905365, "learning_rate": 0.00018614746747423283, "loss": 1.398, "step": 5345 }, { "epoch": 0.06946883177425878, "grad_norm": 0.3445374071598053, "learning_rate": 0.00018614486801232146, "loss": 1.3836, "step": 5346 }, { "epoch": 0.06948182631817465, "grad_norm": 0.42195379734039307, "learning_rate": 0.00018614226855041008, "loss": 1.399, "step": 5347 }, { "epoch": 0.06949482086209054, "grad_norm": 0.3718344569206238, "learning_rate": 0.0001861396690884987, "loss": 1.3381, "step": 5348 }, { "epoch": 0.06950781540600641, "grad_norm": 0.4483731687068939, "learning_rate": 0.0001861370696265873, "loss": 1.5374, "step": 5349 }, { "epoch": 0.06952080994992228, "grad_norm": 0.41100406646728516, "learning_rate": 0.0001861344701646759, "loss": 1.593, "step": 5350 }, { "epoch": 0.06953380449383816, "grad_norm": 0.30381742119789124, "learning_rate": 0.00018613187070276455, "loss": 1.3517, "step": 5351 }, { "epoch": 0.06954679903775403, "grad_norm": 0.35975003242492676, "learning_rate": 0.00018612927124085315, "loss": 1.4362, "step": 5352 }, { "epoch": 0.0695597935816699, "grad_norm": 0.46031081676483154, "learning_rate": 0.00018612667177894177, "loss": 1.6122, "step": 5353 }, { "epoch": 0.06957278812558577, "grad_norm": 0.4003441333770752, "learning_rate": 0.00018612407231703037, "loss": 1.4936, "step": 5354 }, { "epoch": 0.06958578266950165, "grad_norm": 0.310420423746109, "learning_rate": 0.000186121472855119, "loss": 1.5211, "step": 5355 }, { "epoch": 0.06959877721341752, "grad_norm": 0.46783626079559326, "learning_rate": 0.00018611887339320762, "loss": 1.3485, "step": 5356 }, { "epoch": 0.0696117717573334, "grad_norm": 0.46449097990989685, "learning_rate": 0.00018611627393129622, "loss": 1.6073, "step": 5357 }, { "epoch": 0.06962476630124927, "grad_norm": 0.404897540807724, "learning_rate": 0.00018611367446938484, "loss": 1.4101, "step": 5358 }, { "epoch": 0.06963776084516514, "grad_norm": 0.46192020177841187, "learning_rate": 0.00018611107500747347, "loss": 1.4044, "step": 5359 }, { "epoch": 0.06965075538908101, "grad_norm": 0.36133280396461487, "learning_rate": 0.0001861084755455621, "loss": 1.4238, "step": 5360 }, { "epoch": 0.06966374993299689, "grad_norm": 0.24162161350250244, "learning_rate": 0.0001861058760836507, "loss": 1.1458, "step": 5361 }, { "epoch": 0.06967674447691276, "grad_norm": 0.3562941551208496, "learning_rate": 0.0001861032766217393, "loss": 1.3348, "step": 5362 }, { "epoch": 0.06968973902082863, "grad_norm": 0.2858433127403259, "learning_rate": 0.00018610067715982794, "loss": 1.4673, "step": 5363 }, { "epoch": 0.0697027335647445, "grad_norm": 0.33209657669067383, "learning_rate": 0.00018609807769791653, "loss": 1.449, "step": 5364 }, { "epoch": 0.06971572810866038, "grad_norm": 0.3816041052341461, "learning_rate": 0.00018609547823600516, "loss": 1.5225, "step": 5365 }, { "epoch": 0.06972872265257625, "grad_norm": 0.4275282025337219, "learning_rate": 0.00018609287877409378, "loss": 1.4657, "step": 5366 }, { "epoch": 0.06974171719649212, "grad_norm": 0.5272087454795837, "learning_rate": 0.00018609027931218238, "loss": 1.58, "step": 5367 }, { "epoch": 0.069754711740408, "grad_norm": 0.3843916952610016, "learning_rate": 0.000186087679850271, "loss": 1.368, "step": 5368 }, { "epoch": 0.06976770628432387, "grad_norm": 0.5260601043701172, "learning_rate": 0.0001860850803883596, "loss": 1.3693, "step": 5369 }, { "epoch": 0.06978070082823974, "grad_norm": 0.35798612236976624, "learning_rate": 0.00018608248092644825, "loss": 1.5617, "step": 5370 }, { "epoch": 0.06979369537215561, "grad_norm": 0.37765389680862427, "learning_rate": 0.00018607988146453685, "loss": 1.5099, "step": 5371 }, { "epoch": 0.06980668991607149, "grad_norm": 0.37394413352012634, "learning_rate": 0.00018607728200262548, "loss": 1.261, "step": 5372 }, { "epoch": 0.06981968445998736, "grad_norm": 0.339111328125, "learning_rate": 0.00018607468254071407, "loss": 1.2864, "step": 5373 }, { "epoch": 0.06983267900390323, "grad_norm": 0.38882938027381897, "learning_rate": 0.0001860720830788027, "loss": 1.3353, "step": 5374 }, { "epoch": 0.0698456735478191, "grad_norm": 0.42675748467445374, "learning_rate": 0.00018606948361689132, "loss": 1.4789, "step": 5375 }, { "epoch": 0.06985866809173498, "grad_norm": 0.35793524980545044, "learning_rate": 0.00018606688415497992, "loss": 1.3242, "step": 5376 }, { "epoch": 0.06987166263565085, "grad_norm": 0.4599798321723938, "learning_rate": 0.00018606428469306854, "loss": 1.5116, "step": 5377 }, { "epoch": 0.06988465717956673, "grad_norm": 0.5006440281867981, "learning_rate": 0.00018606168523115717, "loss": 1.4687, "step": 5378 }, { "epoch": 0.0698976517234826, "grad_norm": 0.4113091826438904, "learning_rate": 0.00018605908576924577, "loss": 1.3881, "step": 5379 }, { "epoch": 0.06991064626739847, "grad_norm": 0.4981347322463989, "learning_rate": 0.0001860564863073344, "loss": 1.5034, "step": 5380 }, { "epoch": 0.06992364081131434, "grad_norm": 0.2606261670589447, "learning_rate": 0.000186053886845423, "loss": 1.3155, "step": 5381 }, { "epoch": 0.06993663535523022, "grad_norm": 0.43710580468177795, "learning_rate": 0.00018605128738351164, "loss": 1.4758, "step": 5382 }, { "epoch": 0.06994962989914609, "grad_norm": 0.4450712203979492, "learning_rate": 0.00018604868792160024, "loss": 1.3672, "step": 5383 }, { "epoch": 0.06996262444306196, "grad_norm": 0.43217140436172485, "learning_rate": 0.00018604608845968886, "loss": 1.5189, "step": 5384 }, { "epoch": 0.06997561898697784, "grad_norm": 0.4858933687210083, "learning_rate": 0.00018604348899777746, "loss": 1.4209, "step": 5385 }, { "epoch": 0.06998861353089371, "grad_norm": 0.4423260986804962, "learning_rate": 0.00018604088953586608, "loss": 1.4636, "step": 5386 }, { "epoch": 0.0700016080748096, "grad_norm": 0.43281981348991394, "learning_rate": 0.0001860382900739547, "loss": 1.4459, "step": 5387 }, { "epoch": 0.07001460261872547, "grad_norm": 0.5359558463096619, "learning_rate": 0.0001860356906120433, "loss": 1.5114, "step": 5388 }, { "epoch": 0.07002759716264134, "grad_norm": 0.39268165826797485, "learning_rate": 0.00018603309115013193, "loss": 1.3415, "step": 5389 }, { "epoch": 0.07004059170655721, "grad_norm": 0.4200558364391327, "learning_rate": 0.00018603049168822055, "loss": 1.3982, "step": 5390 }, { "epoch": 0.07005358625047309, "grad_norm": 0.5264400243759155, "learning_rate": 0.00018602789222630915, "loss": 1.6304, "step": 5391 }, { "epoch": 0.07006658079438896, "grad_norm": 0.4133051931858063, "learning_rate": 0.00018602529276439778, "loss": 1.5549, "step": 5392 }, { "epoch": 0.07007957533830483, "grad_norm": 0.30834338068962097, "learning_rate": 0.00018602269330248637, "loss": 1.4716, "step": 5393 }, { "epoch": 0.0700925698822207, "grad_norm": 0.45827698707580566, "learning_rate": 0.00018602009384057502, "loss": 1.5348, "step": 5394 }, { "epoch": 0.07010556442613658, "grad_norm": 0.30906587839126587, "learning_rate": 0.00018601749437866362, "loss": 1.3635, "step": 5395 }, { "epoch": 0.07011855897005245, "grad_norm": 0.38631874322891235, "learning_rate": 0.00018601489491675225, "loss": 1.4959, "step": 5396 }, { "epoch": 0.07013155351396833, "grad_norm": 0.3548017144203186, "learning_rate": 0.00018601229545484084, "loss": 1.4748, "step": 5397 }, { "epoch": 0.0701445480578842, "grad_norm": 0.35564255714416504, "learning_rate": 0.00018600969599292947, "loss": 1.4983, "step": 5398 }, { "epoch": 0.07015754260180007, "grad_norm": 0.38662031292915344, "learning_rate": 0.0001860070965310181, "loss": 1.5811, "step": 5399 }, { "epoch": 0.07017053714571594, "grad_norm": 0.40547817945480347, "learning_rate": 0.0001860044970691067, "loss": 1.6064, "step": 5400 }, { "epoch": 0.07018353168963182, "grad_norm": 0.3626398742198944, "learning_rate": 0.00018600189760719534, "loss": 1.0841, "step": 5401 }, { "epoch": 0.07019652623354769, "grad_norm": 0.3960350453853607, "learning_rate": 0.00018599929814528394, "loss": 1.5816, "step": 5402 }, { "epoch": 0.07020952077746356, "grad_norm": 0.3598216772079468, "learning_rate": 0.00018599669868337254, "loss": 1.4682, "step": 5403 }, { "epoch": 0.07022251532137944, "grad_norm": 0.43670007586479187, "learning_rate": 0.00018599409922146116, "loss": 1.503, "step": 5404 }, { "epoch": 0.07023550986529531, "grad_norm": 0.4934794306755066, "learning_rate": 0.00018599149975954979, "loss": 1.5594, "step": 5405 }, { "epoch": 0.07024850440921118, "grad_norm": 0.35070568323135376, "learning_rate": 0.0001859889002976384, "loss": 1.4915, "step": 5406 }, { "epoch": 0.07026149895312706, "grad_norm": 0.2919926345348358, "learning_rate": 0.000185986300835727, "loss": 1.5776, "step": 5407 }, { "epoch": 0.07027449349704293, "grad_norm": 0.3852614760398865, "learning_rate": 0.00018598370137381563, "loss": 1.3357, "step": 5408 }, { "epoch": 0.0702874880409588, "grad_norm": 0.402317076921463, "learning_rate": 0.00018598110191190426, "loss": 1.4288, "step": 5409 }, { "epoch": 0.07030048258487467, "grad_norm": 0.3355379104614258, "learning_rate": 0.00018597850244999285, "loss": 1.5372, "step": 5410 }, { "epoch": 0.07031347712879055, "grad_norm": 0.5210515260696411, "learning_rate": 0.00018597590298808148, "loss": 1.5411, "step": 5411 }, { "epoch": 0.07032647167270642, "grad_norm": 0.3981787860393524, "learning_rate": 0.00018597330352617008, "loss": 1.5348, "step": 5412 }, { "epoch": 0.07033946621662229, "grad_norm": 0.40836101770401, "learning_rate": 0.00018597070406425873, "loss": 1.3734, "step": 5413 }, { "epoch": 0.07035246076053817, "grad_norm": 0.5196245312690735, "learning_rate": 0.00018596810460234732, "loss": 1.4643, "step": 5414 }, { "epoch": 0.07036545530445404, "grad_norm": 0.34003061056137085, "learning_rate": 0.00018596550514043595, "loss": 1.2051, "step": 5415 }, { "epoch": 0.07037844984836991, "grad_norm": 0.40599769353866577, "learning_rate": 0.00018596290567852455, "loss": 1.5264, "step": 5416 }, { "epoch": 0.07039144439228578, "grad_norm": 0.379014790058136, "learning_rate": 0.00018596030621661317, "loss": 1.3553, "step": 5417 }, { "epoch": 0.07040443893620166, "grad_norm": 0.41018831729888916, "learning_rate": 0.0001859577067547018, "loss": 1.3308, "step": 5418 }, { "epoch": 0.07041743348011753, "grad_norm": 0.40476205945014954, "learning_rate": 0.0001859551072927904, "loss": 1.4389, "step": 5419 }, { "epoch": 0.0704304280240334, "grad_norm": 0.3424795866012573, "learning_rate": 0.00018595250783087902, "loss": 1.5422, "step": 5420 }, { "epoch": 0.07044342256794928, "grad_norm": 0.35810622572898865, "learning_rate": 0.00018594990836896764, "loss": 1.4295, "step": 5421 }, { "epoch": 0.07045641711186515, "grad_norm": 0.4544055759906769, "learning_rate": 0.00018594730890705624, "loss": 1.6031, "step": 5422 }, { "epoch": 0.07046941165578102, "grad_norm": 0.3985508978366852, "learning_rate": 0.00018594470944514486, "loss": 1.4231, "step": 5423 }, { "epoch": 0.0704824061996969, "grad_norm": 0.49580472707748413, "learning_rate": 0.00018594210998323346, "loss": 1.4039, "step": 5424 }, { "epoch": 0.07049540074361278, "grad_norm": 0.391928493976593, "learning_rate": 0.0001859395105213221, "loss": 1.5262, "step": 5425 }, { "epoch": 0.07050839528752866, "grad_norm": 0.36121219396591187, "learning_rate": 0.0001859369110594107, "loss": 1.3354, "step": 5426 }, { "epoch": 0.07052138983144453, "grad_norm": 0.27666881680488586, "learning_rate": 0.00018593431159749933, "loss": 1.4533, "step": 5427 }, { "epoch": 0.0705343843753604, "grad_norm": 0.28745222091674805, "learning_rate": 0.00018593171213558793, "loss": 1.3047, "step": 5428 }, { "epoch": 0.07054737891927627, "grad_norm": 0.44759461283683777, "learning_rate": 0.00018592911267367656, "loss": 1.6501, "step": 5429 }, { "epoch": 0.07056037346319215, "grad_norm": 0.49797555804252625, "learning_rate": 0.00018592651321176518, "loss": 1.6201, "step": 5430 }, { "epoch": 0.07057336800710802, "grad_norm": 0.29849007725715637, "learning_rate": 0.00018592391374985378, "loss": 1.4536, "step": 5431 }, { "epoch": 0.07058636255102389, "grad_norm": 0.3546353280544281, "learning_rate": 0.0001859213142879424, "loss": 1.3535, "step": 5432 }, { "epoch": 0.07059935709493977, "grad_norm": 0.4548199474811554, "learning_rate": 0.00018591871482603103, "loss": 1.4198, "step": 5433 }, { "epoch": 0.07061235163885564, "grad_norm": 0.43979954719543457, "learning_rate": 0.00018591611536411962, "loss": 1.3925, "step": 5434 }, { "epoch": 0.07062534618277151, "grad_norm": 0.39429983496665955, "learning_rate": 0.00018591351590220825, "loss": 1.5399, "step": 5435 }, { "epoch": 0.07063834072668738, "grad_norm": 0.24984683096408844, "learning_rate": 0.00018591091644029687, "loss": 1.3133, "step": 5436 }, { "epoch": 0.07065133527060326, "grad_norm": 0.357465535402298, "learning_rate": 0.0001859083169783855, "loss": 1.5052, "step": 5437 }, { "epoch": 0.07066432981451913, "grad_norm": 0.40855807065963745, "learning_rate": 0.0001859057175164741, "loss": 1.5294, "step": 5438 }, { "epoch": 0.070677324358435, "grad_norm": 0.3908853530883789, "learning_rate": 0.00018590311805456272, "loss": 1.319, "step": 5439 }, { "epoch": 0.07069031890235088, "grad_norm": 0.3614378273487091, "learning_rate": 0.00018590051859265134, "loss": 1.4397, "step": 5440 }, { "epoch": 0.07070331344626675, "grad_norm": 0.3503163754940033, "learning_rate": 0.00018589791913073994, "loss": 1.5543, "step": 5441 }, { "epoch": 0.07071630799018262, "grad_norm": 0.4148585796356201, "learning_rate": 0.00018589531966882857, "loss": 1.4212, "step": 5442 }, { "epoch": 0.0707293025340985, "grad_norm": 0.31987398862838745, "learning_rate": 0.00018589272020691716, "loss": 1.3397, "step": 5443 }, { "epoch": 0.07074229707801437, "grad_norm": 0.329733282327652, "learning_rate": 0.00018589012074500582, "loss": 1.4183, "step": 5444 }, { "epoch": 0.07075529162193024, "grad_norm": 0.4021393060684204, "learning_rate": 0.0001858875212830944, "loss": 1.3736, "step": 5445 }, { "epoch": 0.07076828616584611, "grad_norm": 0.4750361740589142, "learning_rate": 0.000185884921821183, "loss": 1.6133, "step": 5446 }, { "epoch": 0.07078128070976199, "grad_norm": 0.34743133187294006, "learning_rate": 0.00018588232235927163, "loss": 1.4817, "step": 5447 }, { "epoch": 0.07079427525367786, "grad_norm": 0.3615758419036865, "learning_rate": 0.00018587972289736026, "loss": 1.2433, "step": 5448 }, { "epoch": 0.07080726979759373, "grad_norm": 0.427296906709671, "learning_rate": 0.00018587712343544888, "loss": 1.4121, "step": 5449 }, { "epoch": 0.0708202643415096, "grad_norm": 0.3961687386035919, "learning_rate": 0.00018587452397353748, "loss": 1.541, "step": 5450 }, { "epoch": 0.07083325888542548, "grad_norm": 0.2941243648529053, "learning_rate": 0.0001858719245116261, "loss": 1.2386, "step": 5451 }, { "epoch": 0.07084625342934135, "grad_norm": 0.4702562689781189, "learning_rate": 0.00018586932504971473, "loss": 1.5811, "step": 5452 }, { "epoch": 0.07085924797325723, "grad_norm": 0.44686421751976013, "learning_rate": 0.00018586672558780333, "loss": 1.375, "step": 5453 }, { "epoch": 0.0708722425171731, "grad_norm": 0.3779712915420532, "learning_rate": 0.00018586412612589195, "loss": 1.266, "step": 5454 }, { "epoch": 0.07088523706108897, "grad_norm": 0.4184970557689667, "learning_rate": 0.00018586152666398055, "loss": 1.4821, "step": 5455 }, { "epoch": 0.07089823160500484, "grad_norm": 0.43823352456092834, "learning_rate": 0.0001858589272020692, "loss": 1.5553, "step": 5456 }, { "epoch": 0.07091122614892072, "grad_norm": 0.37257346510887146, "learning_rate": 0.0001858563277401578, "loss": 1.5413, "step": 5457 }, { "epoch": 0.07092422069283659, "grad_norm": 0.38129737973213196, "learning_rate": 0.0001858537282782464, "loss": 1.3405, "step": 5458 }, { "epoch": 0.07093721523675246, "grad_norm": 0.3697986304759979, "learning_rate": 0.00018585112881633502, "loss": 1.3921, "step": 5459 }, { "epoch": 0.07095020978066834, "grad_norm": 0.3374973237514496, "learning_rate": 0.00018584852935442364, "loss": 1.4575, "step": 5460 }, { "epoch": 0.07096320432458421, "grad_norm": 0.46403875946998596, "learning_rate": 0.00018584592989251227, "loss": 1.6582, "step": 5461 }, { "epoch": 0.07097619886850008, "grad_norm": 0.3866918981075287, "learning_rate": 0.00018584333043060087, "loss": 1.5336, "step": 5462 }, { "epoch": 0.07098919341241597, "grad_norm": 0.3675229251384735, "learning_rate": 0.0001858407309686895, "loss": 1.3309, "step": 5463 }, { "epoch": 0.07100218795633184, "grad_norm": 0.3723551034927368, "learning_rate": 0.00018583813150677812, "loss": 1.5619, "step": 5464 }, { "epoch": 0.07101518250024771, "grad_norm": 0.3850755989551544, "learning_rate": 0.0001858355320448667, "loss": 1.523, "step": 5465 }, { "epoch": 0.07102817704416359, "grad_norm": 0.34363529086112976, "learning_rate": 0.00018583293258295534, "loss": 1.2147, "step": 5466 }, { "epoch": 0.07104117158807946, "grad_norm": 0.35843032598495483, "learning_rate": 0.00018583033312104393, "loss": 1.5313, "step": 5467 }, { "epoch": 0.07105416613199533, "grad_norm": 0.3088741600513458, "learning_rate": 0.00018582773365913259, "loss": 1.5659, "step": 5468 }, { "epoch": 0.0710671606759112, "grad_norm": 0.5088632702827454, "learning_rate": 0.00018582513419722118, "loss": 1.5353, "step": 5469 }, { "epoch": 0.07108015521982708, "grad_norm": 0.4652015268802643, "learning_rate": 0.0001858225347353098, "loss": 1.2798, "step": 5470 }, { "epoch": 0.07109314976374295, "grad_norm": 0.316891074180603, "learning_rate": 0.0001858199352733984, "loss": 1.2449, "step": 5471 }, { "epoch": 0.07110614430765883, "grad_norm": 0.34538426995277405, "learning_rate": 0.00018581733581148703, "loss": 1.2856, "step": 5472 }, { "epoch": 0.0711191388515747, "grad_norm": 0.4169134795665741, "learning_rate": 0.00018581473634957565, "loss": 1.3852, "step": 5473 }, { "epoch": 0.07113213339549057, "grad_norm": 0.4016588628292084, "learning_rate": 0.00018581213688766425, "loss": 1.5823, "step": 5474 }, { "epoch": 0.07114512793940644, "grad_norm": 0.3618713915348053, "learning_rate": 0.00018580953742575288, "loss": 1.4659, "step": 5475 }, { "epoch": 0.07115812248332232, "grad_norm": 0.4007403552532196, "learning_rate": 0.0001858069379638415, "loss": 1.4936, "step": 5476 }, { "epoch": 0.07117111702723819, "grad_norm": 0.34492501616477966, "learning_rate": 0.0001858043385019301, "loss": 1.5609, "step": 5477 }, { "epoch": 0.07118411157115406, "grad_norm": 0.34819379448890686, "learning_rate": 0.00018580173904001872, "loss": 1.4435, "step": 5478 }, { "epoch": 0.07119710611506994, "grad_norm": 0.4405934810638428, "learning_rate": 0.00018579913957810735, "loss": 1.381, "step": 5479 }, { "epoch": 0.07121010065898581, "grad_norm": 0.40937164425849915, "learning_rate": 0.00018579654011619597, "loss": 1.5527, "step": 5480 }, { "epoch": 0.07122309520290168, "grad_norm": 0.3682154715061188, "learning_rate": 0.00018579394065428457, "loss": 1.3222, "step": 5481 }, { "epoch": 0.07123608974681755, "grad_norm": 0.38335657119750977, "learning_rate": 0.0001857913411923732, "loss": 1.5465, "step": 5482 }, { "epoch": 0.07124908429073343, "grad_norm": 0.3870220482349396, "learning_rate": 0.00018578874173046182, "loss": 1.3894, "step": 5483 }, { "epoch": 0.0712620788346493, "grad_norm": 0.47531741857528687, "learning_rate": 0.00018578614226855042, "loss": 1.5172, "step": 5484 }, { "epoch": 0.07127507337856517, "grad_norm": 0.38174280524253845, "learning_rate": 0.00018578354280663904, "loss": 1.4115, "step": 5485 }, { "epoch": 0.07128806792248105, "grad_norm": 0.35346317291259766, "learning_rate": 0.00018578094334472764, "loss": 1.3289, "step": 5486 }, { "epoch": 0.07130106246639692, "grad_norm": 0.31188464164733887, "learning_rate": 0.00018577834388281626, "loss": 1.2731, "step": 5487 }, { "epoch": 0.07131405701031279, "grad_norm": 0.41615059971809387, "learning_rate": 0.00018577574442090489, "loss": 1.5308, "step": 5488 }, { "epoch": 0.07132705155422867, "grad_norm": 0.5589435696601868, "learning_rate": 0.00018577314495899348, "loss": 1.5682, "step": 5489 }, { "epoch": 0.07134004609814454, "grad_norm": 0.36484724283218384, "learning_rate": 0.0001857705454970821, "loss": 1.7109, "step": 5490 }, { "epoch": 0.07135304064206041, "grad_norm": 0.4378516376018524, "learning_rate": 0.00018576794603517073, "loss": 1.5515, "step": 5491 }, { "epoch": 0.07136603518597628, "grad_norm": 0.31119734048843384, "learning_rate": 0.00018576534657325936, "loss": 1.3169, "step": 5492 }, { "epoch": 0.07137902972989216, "grad_norm": 0.49236518144607544, "learning_rate": 0.00018576274711134795, "loss": 1.598, "step": 5493 }, { "epoch": 0.07139202427380803, "grad_norm": 0.5020756721496582, "learning_rate": 0.00018576014764943658, "loss": 1.5953, "step": 5494 }, { "epoch": 0.0714050188177239, "grad_norm": 0.3653267025947571, "learning_rate": 0.0001857575481875252, "loss": 1.3867, "step": 5495 }, { "epoch": 0.07141801336163978, "grad_norm": 0.42779818177223206, "learning_rate": 0.0001857549487256138, "loss": 1.4839, "step": 5496 }, { "epoch": 0.07143100790555565, "grad_norm": 0.4221138060092926, "learning_rate": 0.00018575234926370242, "loss": 1.4289, "step": 5497 }, { "epoch": 0.07144400244947152, "grad_norm": 0.4305471181869507, "learning_rate": 0.00018574974980179102, "loss": 1.4869, "step": 5498 }, { "epoch": 0.0714569969933874, "grad_norm": 0.7394503355026245, "learning_rate": 0.00018574715033987967, "loss": 1.4199, "step": 5499 }, { "epoch": 0.07146999153730327, "grad_norm": 0.42988282442092896, "learning_rate": 0.00018574455087796827, "loss": 1.4626, "step": 5500 }, { "epoch": 0.07148298608121915, "grad_norm": 0.4476601183414459, "learning_rate": 0.00018574195141605687, "loss": 1.4446, "step": 5501 }, { "epoch": 0.07149598062513503, "grad_norm": 0.3432336449623108, "learning_rate": 0.0001857393519541455, "loss": 1.3539, "step": 5502 }, { "epoch": 0.0715089751690509, "grad_norm": 0.30796200037002563, "learning_rate": 0.00018573675249223412, "loss": 1.458, "step": 5503 }, { "epoch": 0.07152196971296677, "grad_norm": 0.35000425577163696, "learning_rate": 0.00018573415303032274, "loss": 1.5302, "step": 5504 }, { "epoch": 0.07153496425688265, "grad_norm": 0.3962832987308502, "learning_rate": 0.00018573155356841134, "loss": 1.3661, "step": 5505 }, { "epoch": 0.07154795880079852, "grad_norm": 0.4164280891418457, "learning_rate": 0.00018572895410649996, "loss": 1.4931, "step": 5506 }, { "epoch": 0.07156095334471439, "grad_norm": 0.4541429579257965, "learning_rate": 0.0001857263546445886, "loss": 1.2475, "step": 5507 }, { "epoch": 0.07157394788863027, "grad_norm": 0.32912299036979675, "learning_rate": 0.00018572375518267719, "loss": 1.5443, "step": 5508 }, { "epoch": 0.07158694243254614, "grad_norm": 0.661169171333313, "learning_rate": 0.0001857211557207658, "loss": 1.6283, "step": 5509 }, { "epoch": 0.07159993697646201, "grad_norm": 0.3548290729522705, "learning_rate": 0.00018571855625885443, "loss": 1.3872, "step": 5510 }, { "epoch": 0.07161293152037788, "grad_norm": 0.36101076006889343, "learning_rate": 0.00018571595679694306, "loss": 1.5111, "step": 5511 }, { "epoch": 0.07162592606429376, "grad_norm": 0.3813101649284363, "learning_rate": 0.00018571335733503166, "loss": 1.3474, "step": 5512 }, { "epoch": 0.07163892060820963, "grad_norm": 0.3508604168891907, "learning_rate": 0.00018571075787312025, "loss": 1.4458, "step": 5513 }, { "epoch": 0.0716519151521255, "grad_norm": 0.3383382558822632, "learning_rate": 0.0001857081584112089, "loss": 1.3505, "step": 5514 }, { "epoch": 0.07166490969604138, "grad_norm": 0.36480727791786194, "learning_rate": 0.0001857055589492975, "loss": 1.4672, "step": 5515 }, { "epoch": 0.07167790423995725, "grad_norm": 0.36195939779281616, "learning_rate": 0.00018570295948738613, "loss": 1.4437, "step": 5516 }, { "epoch": 0.07169089878387312, "grad_norm": 0.3500955402851105, "learning_rate": 0.00018570036002547472, "loss": 1.3328, "step": 5517 }, { "epoch": 0.071703893327789, "grad_norm": 0.4806077778339386, "learning_rate": 0.00018569776056356335, "loss": 1.6179, "step": 5518 }, { "epoch": 0.07171688787170487, "grad_norm": 0.38188299536705017, "learning_rate": 0.00018569516110165197, "loss": 1.4384, "step": 5519 }, { "epoch": 0.07172988241562074, "grad_norm": 0.32269108295440674, "learning_rate": 0.00018569256163974057, "loss": 1.2824, "step": 5520 }, { "epoch": 0.07174287695953661, "grad_norm": 0.3889369070529938, "learning_rate": 0.0001856899621778292, "loss": 1.3971, "step": 5521 }, { "epoch": 0.07175587150345249, "grad_norm": 0.38597962260246277, "learning_rate": 0.00018568736271591782, "loss": 1.4444, "step": 5522 }, { "epoch": 0.07176886604736836, "grad_norm": 0.38747406005859375, "learning_rate": 0.00018568476325400644, "loss": 1.5089, "step": 5523 }, { "epoch": 0.07178186059128423, "grad_norm": 0.5148333311080933, "learning_rate": 0.00018568216379209504, "loss": 1.4134, "step": 5524 }, { "epoch": 0.0717948551352001, "grad_norm": 0.3707602322101593, "learning_rate": 0.00018567956433018364, "loss": 1.3292, "step": 5525 }, { "epoch": 0.07180784967911598, "grad_norm": 0.37553104758262634, "learning_rate": 0.0001856769648682723, "loss": 1.3988, "step": 5526 }, { "epoch": 0.07182084422303185, "grad_norm": 0.3768678605556488, "learning_rate": 0.0001856743654063609, "loss": 1.3855, "step": 5527 }, { "epoch": 0.07183383876694772, "grad_norm": 0.4996545612812042, "learning_rate": 0.0001856717659444495, "loss": 1.4192, "step": 5528 }, { "epoch": 0.0718468333108636, "grad_norm": 0.4021969437599182, "learning_rate": 0.0001856691664825381, "loss": 1.4751, "step": 5529 }, { "epoch": 0.07185982785477947, "grad_norm": 0.357889324426651, "learning_rate": 0.00018566656702062673, "loss": 1.4382, "step": 5530 }, { "epoch": 0.07187282239869534, "grad_norm": 0.413897305727005, "learning_rate": 0.00018566396755871536, "loss": 1.4548, "step": 5531 }, { "epoch": 0.07188581694261122, "grad_norm": 0.3731609284877777, "learning_rate": 0.00018566136809680396, "loss": 1.3424, "step": 5532 }, { "epoch": 0.07189881148652709, "grad_norm": 0.386899471282959, "learning_rate": 0.00018565876863489258, "loss": 1.4701, "step": 5533 }, { "epoch": 0.07191180603044296, "grad_norm": 0.5114560127258301, "learning_rate": 0.0001856561691729812, "loss": 1.5957, "step": 5534 }, { "epoch": 0.07192480057435884, "grad_norm": 0.31541940569877625, "learning_rate": 0.00018565356971106983, "loss": 1.3177, "step": 5535 }, { "epoch": 0.07193779511827471, "grad_norm": 0.34982484579086304, "learning_rate": 0.00018565097024915843, "loss": 1.4722, "step": 5536 }, { "epoch": 0.07195078966219058, "grad_norm": 0.3410489857196808, "learning_rate": 0.00018564837078724705, "loss": 1.5004, "step": 5537 }, { "epoch": 0.07196378420610645, "grad_norm": 0.44715091586112976, "learning_rate": 0.00018564577132533568, "loss": 1.3261, "step": 5538 }, { "epoch": 0.07197677875002234, "grad_norm": 0.324921190738678, "learning_rate": 0.00018564317186342427, "loss": 1.4688, "step": 5539 }, { "epoch": 0.07198977329393821, "grad_norm": 0.4115283489227295, "learning_rate": 0.0001856405724015129, "loss": 1.5109, "step": 5540 }, { "epoch": 0.07200276783785409, "grad_norm": 0.37124302983283997, "learning_rate": 0.0001856379729396015, "loss": 1.2954, "step": 5541 }, { "epoch": 0.07201576238176996, "grad_norm": 0.5044050216674805, "learning_rate": 0.00018563537347769012, "loss": 1.5364, "step": 5542 }, { "epoch": 0.07202875692568583, "grad_norm": 0.3772490620613098, "learning_rate": 0.00018563277401577874, "loss": 1.4122, "step": 5543 }, { "epoch": 0.0720417514696017, "grad_norm": 0.4495158791542053, "learning_rate": 0.00018563017455386734, "loss": 1.5468, "step": 5544 }, { "epoch": 0.07205474601351758, "grad_norm": 0.4341451823711395, "learning_rate": 0.00018562757509195597, "loss": 1.4036, "step": 5545 }, { "epoch": 0.07206774055743345, "grad_norm": 0.4424019753932953, "learning_rate": 0.0001856249756300446, "loss": 1.4686, "step": 5546 }, { "epoch": 0.07208073510134932, "grad_norm": 0.4379132390022278, "learning_rate": 0.00018562237616813322, "loss": 1.5167, "step": 5547 }, { "epoch": 0.0720937296452652, "grad_norm": 0.2422821968793869, "learning_rate": 0.0001856197767062218, "loss": 1.2305, "step": 5548 }, { "epoch": 0.07210672418918107, "grad_norm": 0.427615761756897, "learning_rate": 0.00018561717724431044, "loss": 1.5418, "step": 5549 }, { "epoch": 0.07211971873309694, "grad_norm": 0.34096798300743103, "learning_rate": 0.00018561457778239906, "loss": 1.2817, "step": 5550 }, { "epoch": 0.07213271327701282, "grad_norm": 0.3905801773071289, "learning_rate": 0.00018561197832048766, "loss": 1.3466, "step": 5551 }, { "epoch": 0.07214570782092869, "grad_norm": 0.3452693819999695, "learning_rate": 0.00018560937885857628, "loss": 1.3596, "step": 5552 }, { "epoch": 0.07215870236484456, "grad_norm": 0.4102634787559509, "learning_rate": 0.0001856067793966649, "loss": 1.5921, "step": 5553 }, { "epoch": 0.07217169690876044, "grad_norm": 0.4027297794818878, "learning_rate": 0.00018560417993475353, "loss": 1.5723, "step": 5554 }, { "epoch": 0.07218469145267631, "grad_norm": 0.47069665789604187, "learning_rate": 0.00018560158047284213, "loss": 1.505, "step": 5555 }, { "epoch": 0.07219768599659218, "grad_norm": 0.33498018980026245, "learning_rate": 0.00018559898101093073, "loss": 1.2608, "step": 5556 }, { "epoch": 0.07221068054050805, "grad_norm": 0.2888638973236084, "learning_rate": 0.00018559638154901938, "loss": 1.3408, "step": 5557 }, { "epoch": 0.07222367508442393, "grad_norm": 0.3327373266220093, "learning_rate": 0.00018559378208710798, "loss": 1.3849, "step": 5558 }, { "epoch": 0.0722366696283398, "grad_norm": 0.4662718176841736, "learning_rate": 0.0001855911826251966, "loss": 1.4976, "step": 5559 }, { "epoch": 0.07224966417225567, "grad_norm": 0.30613040924072266, "learning_rate": 0.0001855885831632852, "loss": 1.2854, "step": 5560 }, { "epoch": 0.07226265871617155, "grad_norm": 0.35056453943252563, "learning_rate": 0.00018558598370137382, "loss": 1.3573, "step": 5561 }, { "epoch": 0.07227565326008742, "grad_norm": 0.4638853967189789, "learning_rate": 0.00018558338423946245, "loss": 1.6123, "step": 5562 }, { "epoch": 0.07228864780400329, "grad_norm": 0.466193825006485, "learning_rate": 0.00018558078477755104, "loss": 1.3917, "step": 5563 }, { "epoch": 0.07230164234791916, "grad_norm": 0.5195732712745667, "learning_rate": 0.00018557818531563967, "loss": 1.5221, "step": 5564 }, { "epoch": 0.07231463689183504, "grad_norm": 0.39762115478515625, "learning_rate": 0.0001855755858537283, "loss": 1.5463, "step": 5565 }, { "epoch": 0.07232763143575091, "grad_norm": 0.44320446252822876, "learning_rate": 0.00018557298639181692, "loss": 1.5412, "step": 5566 }, { "epoch": 0.07234062597966678, "grad_norm": 0.3181615173816681, "learning_rate": 0.00018557038692990552, "loss": 1.2224, "step": 5567 }, { "epoch": 0.07235362052358266, "grad_norm": 0.2894691526889801, "learning_rate": 0.0001855677874679941, "loss": 1.3671, "step": 5568 }, { "epoch": 0.07236661506749853, "grad_norm": 0.4079062044620514, "learning_rate": 0.00018556518800608276, "loss": 1.5234, "step": 5569 }, { "epoch": 0.0723796096114144, "grad_norm": 0.3911947011947632, "learning_rate": 0.00018556258854417136, "loss": 1.6498, "step": 5570 }, { "epoch": 0.07239260415533028, "grad_norm": 0.3745634853839874, "learning_rate": 0.00018555998908225999, "loss": 1.3886, "step": 5571 }, { "epoch": 0.07240559869924615, "grad_norm": 0.39828798174858093, "learning_rate": 0.00018555738962034858, "loss": 1.5264, "step": 5572 }, { "epoch": 0.07241859324316202, "grad_norm": 0.4185575246810913, "learning_rate": 0.0001855547901584372, "loss": 1.4389, "step": 5573 }, { "epoch": 0.0724315877870779, "grad_norm": 0.3066312074661255, "learning_rate": 0.00018555219069652583, "loss": 1.4756, "step": 5574 }, { "epoch": 0.07244458233099377, "grad_norm": 0.3252162039279938, "learning_rate": 0.00018554959123461443, "loss": 1.4824, "step": 5575 }, { "epoch": 0.07245757687490964, "grad_norm": 0.41581249237060547, "learning_rate": 0.00018554699177270305, "loss": 1.4503, "step": 5576 }, { "epoch": 0.07247057141882553, "grad_norm": 0.41156283020973206, "learning_rate": 0.00018554439231079168, "loss": 1.6927, "step": 5577 }, { "epoch": 0.0724835659627414, "grad_norm": 0.4363971948623657, "learning_rate": 0.0001855417928488803, "loss": 1.5528, "step": 5578 }, { "epoch": 0.07249656050665727, "grad_norm": 0.42701852321624756, "learning_rate": 0.0001855391933869689, "loss": 1.4775, "step": 5579 }, { "epoch": 0.07250955505057315, "grad_norm": 0.4176211655139923, "learning_rate": 0.0001855365939250575, "loss": 1.4082, "step": 5580 }, { "epoch": 0.07252254959448902, "grad_norm": 0.39570191502571106, "learning_rate": 0.00018553399446314615, "loss": 1.4394, "step": 5581 }, { "epoch": 0.07253554413840489, "grad_norm": 0.4649661183357239, "learning_rate": 0.00018553139500123475, "loss": 1.5078, "step": 5582 }, { "epoch": 0.07254853868232077, "grad_norm": 0.3875357210636139, "learning_rate": 0.00018552879553932337, "loss": 1.301, "step": 5583 }, { "epoch": 0.07256153322623664, "grad_norm": 0.48106566071510315, "learning_rate": 0.000185526196077412, "loss": 1.3939, "step": 5584 }, { "epoch": 0.07257452777015251, "grad_norm": 0.3127942979335785, "learning_rate": 0.0001855235966155006, "loss": 1.4939, "step": 5585 }, { "epoch": 0.07258752231406838, "grad_norm": 0.40679267048835754, "learning_rate": 0.00018552099715358922, "loss": 1.349, "step": 5586 }, { "epoch": 0.07260051685798426, "grad_norm": 0.4243844151496887, "learning_rate": 0.00018551839769167782, "loss": 1.6021, "step": 5587 }, { "epoch": 0.07261351140190013, "grad_norm": 0.4676162004470825, "learning_rate": 0.00018551579822976647, "loss": 1.5655, "step": 5588 }, { "epoch": 0.072626505945816, "grad_norm": 0.4225911796092987, "learning_rate": 0.00018551319876785506, "loss": 1.2942, "step": 5589 }, { "epoch": 0.07263950048973188, "grad_norm": 0.35572972893714905, "learning_rate": 0.0001855105993059437, "loss": 1.3874, "step": 5590 }, { "epoch": 0.07265249503364775, "grad_norm": 0.46690237522125244, "learning_rate": 0.00018550799984403229, "loss": 1.5002, "step": 5591 }, { "epoch": 0.07266548957756362, "grad_norm": 0.38411280512809753, "learning_rate": 0.0001855054003821209, "loss": 1.5267, "step": 5592 }, { "epoch": 0.0726784841214795, "grad_norm": 0.30819186568260193, "learning_rate": 0.00018550280092020954, "loss": 1.3883, "step": 5593 }, { "epoch": 0.07269147866539537, "grad_norm": 0.41401219367980957, "learning_rate": 0.00018550020145829813, "loss": 1.6515, "step": 5594 }, { "epoch": 0.07270447320931124, "grad_norm": 0.4541913866996765, "learning_rate": 0.00018549760199638676, "loss": 1.4572, "step": 5595 }, { "epoch": 0.07271746775322711, "grad_norm": 0.3997112214565277, "learning_rate": 0.00018549500253447538, "loss": 1.4728, "step": 5596 }, { "epoch": 0.07273046229714299, "grad_norm": 0.3244662284851074, "learning_rate": 0.00018549240307256398, "loss": 1.3256, "step": 5597 }, { "epoch": 0.07274345684105886, "grad_norm": 0.4154819846153259, "learning_rate": 0.0001854898036106526, "loss": 1.6368, "step": 5598 }, { "epoch": 0.07275645138497473, "grad_norm": 0.3281448781490326, "learning_rate": 0.0001854872041487412, "loss": 1.3244, "step": 5599 }, { "epoch": 0.0727694459288906, "grad_norm": 0.27969348430633545, "learning_rate": 0.00018548460468682985, "loss": 1.4892, "step": 5600 }, { "epoch": 0.07278244047280648, "grad_norm": 0.45478615164756775, "learning_rate": 0.00018548200522491845, "loss": 1.5494, "step": 5601 }, { "epoch": 0.07279543501672235, "grad_norm": 0.4144267737865448, "learning_rate": 0.00018547940576300707, "loss": 1.4138, "step": 5602 }, { "epoch": 0.07280842956063822, "grad_norm": 0.3249765932559967, "learning_rate": 0.00018547680630109567, "loss": 1.4079, "step": 5603 }, { "epoch": 0.0728214241045541, "grad_norm": 0.49898579716682434, "learning_rate": 0.0001854742068391843, "loss": 1.6388, "step": 5604 }, { "epoch": 0.07283441864846997, "grad_norm": 0.40876245498657227, "learning_rate": 0.00018547160737727292, "loss": 1.2897, "step": 5605 }, { "epoch": 0.07284741319238584, "grad_norm": 0.4395081102848053, "learning_rate": 0.00018546900791536152, "loss": 1.5956, "step": 5606 }, { "epoch": 0.07286040773630172, "grad_norm": 0.6372933387756348, "learning_rate": 0.00018546640845345014, "loss": 1.6542, "step": 5607 }, { "epoch": 0.07287340228021759, "grad_norm": 0.39420461654663086, "learning_rate": 0.00018546380899153877, "loss": 1.4588, "step": 5608 }, { "epoch": 0.07288639682413346, "grad_norm": 0.3278575837612152, "learning_rate": 0.00018546120952962736, "loss": 1.489, "step": 5609 }, { "epoch": 0.07289939136804933, "grad_norm": 0.3301399350166321, "learning_rate": 0.000185458610067716, "loss": 1.3192, "step": 5610 }, { "epoch": 0.07291238591196521, "grad_norm": 0.4959639310836792, "learning_rate": 0.00018545601060580459, "loss": 1.5534, "step": 5611 }, { "epoch": 0.07292538045588108, "grad_norm": 0.47583460807800293, "learning_rate": 0.00018545341114389324, "loss": 1.4835, "step": 5612 }, { "epoch": 0.07293837499979695, "grad_norm": 0.3644411265850067, "learning_rate": 0.00018545081168198184, "loss": 1.5143, "step": 5613 }, { "epoch": 0.07295136954371283, "grad_norm": 0.42564284801483154, "learning_rate": 0.00018544821222007046, "loss": 1.553, "step": 5614 }, { "epoch": 0.07296436408762871, "grad_norm": 0.41172221302986145, "learning_rate": 0.00018544561275815906, "loss": 1.5168, "step": 5615 }, { "epoch": 0.07297735863154459, "grad_norm": 0.4057060480117798, "learning_rate": 0.00018544301329624768, "loss": 1.4829, "step": 5616 }, { "epoch": 0.07299035317546046, "grad_norm": 0.4562079906463623, "learning_rate": 0.0001854404138343363, "loss": 1.4217, "step": 5617 }, { "epoch": 0.07300334771937633, "grad_norm": 0.45361238718032837, "learning_rate": 0.0001854378143724249, "loss": 1.3939, "step": 5618 }, { "epoch": 0.0730163422632922, "grad_norm": 0.3313571512699127, "learning_rate": 0.00018543521491051353, "loss": 1.3955, "step": 5619 }, { "epoch": 0.07302933680720808, "grad_norm": 0.399566650390625, "learning_rate": 0.00018543261544860215, "loss": 1.5584, "step": 5620 }, { "epoch": 0.07304233135112395, "grad_norm": 0.46305033564567566, "learning_rate": 0.00018543001598669078, "loss": 1.5445, "step": 5621 }, { "epoch": 0.07305532589503982, "grad_norm": 0.5216182470321655, "learning_rate": 0.00018542741652477937, "loss": 1.4129, "step": 5622 }, { "epoch": 0.0730683204389557, "grad_norm": 0.3481959402561188, "learning_rate": 0.000185424817062868, "loss": 1.5363, "step": 5623 }, { "epoch": 0.07308131498287157, "grad_norm": 0.38653409481048584, "learning_rate": 0.00018542221760095662, "loss": 1.4856, "step": 5624 }, { "epoch": 0.07309430952678744, "grad_norm": 0.40774691104888916, "learning_rate": 0.00018541961813904522, "loss": 1.4834, "step": 5625 }, { "epoch": 0.07310730407070332, "grad_norm": 0.46210741996765137, "learning_rate": 0.00018541701867713384, "loss": 1.3434, "step": 5626 }, { "epoch": 0.07312029861461919, "grad_norm": 0.25466418266296387, "learning_rate": 0.00018541441921522247, "loss": 1.3635, "step": 5627 }, { "epoch": 0.07313329315853506, "grad_norm": 0.4233796298503876, "learning_rate": 0.00018541181975331107, "loss": 1.4875, "step": 5628 }, { "epoch": 0.07314628770245094, "grad_norm": 0.3973749577999115, "learning_rate": 0.0001854092202913997, "loss": 1.5705, "step": 5629 }, { "epoch": 0.07315928224636681, "grad_norm": 0.41903677582740784, "learning_rate": 0.0001854066208294883, "loss": 1.6226, "step": 5630 }, { "epoch": 0.07317227679028268, "grad_norm": 0.4141426384449005, "learning_rate": 0.00018540402136757694, "loss": 1.5467, "step": 5631 }, { "epoch": 0.07318527133419855, "grad_norm": 0.4214317202568054, "learning_rate": 0.00018540142190566554, "loss": 1.4372, "step": 5632 }, { "epoch": 0.07319826587811443, "grad_norm": 0.3303471803665161, "learning_rate": 0.00018539882244375416, "loss": 1.5253, "step": 5633 }, { "epoch": 0.0732112604220303, "grad_norm": 0.4142480194568634, "learning_rate": 0.00018539622298184276, "loss": 1.4846, "step": 5634 }, { "epoch": 0.07322425496594617, "grad_norm": 0.4120120108127594, "learning_rate": 0.00018539362351993138, "loss": 1.5881, "step": 5635 }, { "epoch": 0.07323724950986205, "grad_norm": 0.36254847049713135, "learning_rate": 0.00018539102405802, "loss": 1.5508, "step": 5636 }, { "epoch": 0.07325024405377792, "grad_norm": 0.34341758489608765, "learning_rate": 0.0001853884245961086, "loss": 1.4403, "step": 5637 }, { "epoch": 0.07326323859769379, "grad_norm": 0.4617409110069275, "learning_rate": 0.00018538582513419723, "loss": 1.3383, "step": 5638 }, { "epoch": 0.07327623314160966, "grad_norm": 0.37782910466194153, "learning_rate": 0.00018538322567228585, "loss": 1.5037, "step": 5639 }, { "epoch": 0.07328922768552554, "grad_norm": 0.3224794566631317, "learning_rate": 0.00018538062621037445, "loss": 1.593, "step": 5640 }, { "epoch": 0.07330222222944141, "grad_norm": 0.4115370512008667, "learning_rate": 0.00018537802674846308, "loss": 1.3537, "step": 5641 }, { "epoch": 0.07331521677335728, "grad_norm": 0.4420442581176758, "learning_rate": 0.00018537542728655167, "loss": 1.5839, "step": 5642 }, { "epoch": 0.07332821131727316, "grad_norm": 0.4861597418785095, "learning_rate": 0.00018537282782464033, "loss": 1.4875, "step": 5643 }, { "epoch": 0.07334120586118903, "grad_norm": 0.3862830400466919, "learning_rate": 0.00018537022836272892, "loss": 1.4328, "step": 5644 }, { "epoch": 0.0733542004051049, "grad_norm": 0.38285693526268005, "learning_rate": 0.00018536762890081755, "loss": 1.3033, "step": 5645 }, { "epoch": 0.07336719494902078, "grad_norm": 0.6086478233337402, "learning_rate": 0.00018536502943890614, "loss": 1.3785, "step": 5646 }, { "epoch": 0.07338018949293665, "grad_norm": 0.46374431252479553, "learning_rate": 0.00018536242997699477, "loss": 1.5682, "step": 5647 }, { "epoch": 0.07339318403685252, "grad_norm": 0.5707323551177979, "learning_rate": 0.0001853598305150834, "loss": 1.212, "step": 5648 }, { "epoch": 0.0734061785807684, "grad_norm": 0.3908240795135498, "learning_rate": 0.000185357231053172, "loss": 1.4904, "step": 5649 }, { "epoch": 0.07341917312468427, "grad_norm": 0.39183786511421204, "learning_rate": 0.00018535463159126062, "loss": 1.4241, "step": 5650 }, { "epoch": 0.07343216766860014, "grad_norm": 0.28787100315093994, "learning_rate": 0.00018535203212934924, "loss": 1.3047, "step": 5651 }, { "epoch": 0.07344516221251601, "grad_norm": 0.4687195420265198, "learning_rate": 0.00018534943266743784, "loss": 1.4699, "step": 5652 }, { "epoch": 0.0734581567564319, "grad_norm": 0.3689233958721161, "learning_rate": 0.00018534683320552646, "loss": 1.3981, "step": 5653 }, { "epoch": 0.07347115130034777, "grad_norm": 0.47054871916770935, "learning_rate": 0.00018534423374361506, "loss": 1.3714, "step": 5654 }, { "epoch": 0.07348414584426365, "grad_norm": 0.40562546253204346, "learning_rate": 0.0001853416342817037, "loss": 1.4845, "step": 5655 }, { "epoch": 0.07349714038817952, "grad_norm": 0.371001660823822, "learning_rate": 0.0001853390348197923, "loss": 1.4136, "step": 5656 }, { "epoch": 0.07351013493209539, "grad_norm": 0.41035521030426025, "learning_rate": 0.00018533643535788093, "loss": 1.3432, "step": 5657 }, { "epoch": 0.07352312947601126, "grad_norm": 0.3619242012500763, "learning_rate": 0.00018533383589596956, "loss": 1.3709, "step": 5658 }, { "epoch": 0.07353612401992714, "grad_norm": 0.4026356041431427, "learning_rate": 0.00018533123643405815, "loss": 1.5246, "step": 5659 }, { "epoch": 0.07354911856384301, "grad_norm": 0.3411107063293457, "learning_rate": 0.00018532863697214678, "loss": 1.3789, "step": 5660 }, { "epoch": 0.07356211310775888, "grad_norm": 0.4111210107803345, "learning_rate": 0.00018532603751023538, "loss": 1.6607, "step": 5661 }, { "epoch": 0.07357510765167476, "grad_norm": 0.38018786907196045, "learning_rate": 0.00018532343804832403, "loss": 1.1654, "step": 5662 }, { "epoch": 0.07358810219559063, "grad_norm": 0.4601939022541046, "learning_rate": 0.00018532083858641263, "loss": 1.5294, "step": 5663 }, { "epoch": 0.0736010967395065, "grad_norm": 0.45119649171829224, "learning_rate": 0.00018531823912450122, "loss": 1.5943, "step": 5664 }, { "epoch": 0.07361409128342238, "grad_norm": 0.4009825587272644, "learning_rate": 0.00018531563966258985, "loss": 1.3158, "step": 5665 }, { "epoch": 0.07362708582733825, "grad_norm": 0.3716208338737488, "learning_rate": 0.00018531304020067847, "loss": 1.491, "step": 5666 }, { "epoch": 0.07364008037125412, "grad_norm": 0.4477281868457794, "learning_rate": 0.0001853104407387671, "loss": 1.5405, "step": 5667 }, { "epoch": 0.07365307491517, "grad_norm": 0.37739893794059753, "learning_rate": 0.0001853078412768557, "loss": 1.3925, "step": 5668 }, { "epoch": 0.07366606945908587, "grad_norm": 0.3842066824436188, "learning_rate": 0.00018530524181494432, "loss": 1.5007, "step": 5669 }, { "epoch": 0.07367906400300174, "grad_norm": 0.33403629064559937, "learning_rate": 0.00018530264235303294, "loss": 1.2318, "step": 5670 }, { "epoch": 0.07369205854691761, "grad_norm": 0.37254855036735535, "learning_rate": 0.00018530004289112154, "loss": 1.4311, "step": 5671 }, { "epoch": 0.07370505309083349, "grad_norm": 0.4163953363895416, "learning_rate": 0.00018529744342921016, "loss": 1.625, "step": 5672 }, { "epoch": 0.07371804763474936, "grad_norm": 0.37085282802581787, "learning_rate": 0.00018529484396729876, "loss": 1.3594, "step": 5673 }, { "epoch": 0.07373104217866523, "grad_norm": 0.42054829001426697, "learning_rate": 0.00018529224450538741, "loss": 1.5976, "step": 5674 }, { "epoch": 0.0737440367225811, "grad_norm": 0.3884391188621521, "learning_rate": 0.000185289645043476, "loss": 1.4202, "step": 5675 }, { "epoch": 0.07375703126649698, "grad_norm": 0.4759387969970703, "learning_rate": 0.00018528704558156464, "loss": 1.4894, "step": 5676 }, { "epoch": 0.07377002581041285, "grad_norm": 0.5548647046089172, "learning_rate": 0.00018528444611965323, "loss": 1.5356, "step": 5677 }, { "epoch": 0.07378302035432872, "grad_norm": 0.45505598187446594, "learning_rate": 0.00018528184665774186, "loss": 1.488, "step": 5678 }, { "epoch": 0.0737960148982446, "grad_norm": 0.39482781291007996, "learning_rate": 0.00018527924719583048, "loss": 1.2231, "step": 5679 }, { "epoch": 0.07380900944216047, "grad_norm": 0.39929914474487305, "learning_rate": 0.00018527664773391908, "loss": 1.2839, "step": 5680 }, { "epoch": 0.07382200398607634, "grad_norm": 0.4383854866027832, "learning_rate": 0.0001852740482720077, "loss": 1.6429, "step": 5681 }, { "epoch": 0.07383499852999222, "grad_norm": 0.3644176721572876, "learning_rate": 0.00018527144881009633, "loss": 1.31, "step": 5682 }, { "epoch": 0.07384799307390809, "grad_norm": 0.30723321437835693, "learning_rate": 0.00018526884934818493, "loss": 1.419, "step": 5683 }, { "epoch": 0.07386098761782396, "grad_norm": 0.3782745599746704, "learning_rate": 0.00018526624988627355, "loss": 1.4171, "step": 5684 }, { "epoch": 0.07387398216173983, "grad_norm": 0.49302423000335693, "learning_rate": 0.00018526365042436215, "loss": 1.515, "step": 5685 }, { "epoch": 0.07388697670565571, "grad_norm": 0.34835150837898254, "learning_rate": 0.0001852610509624508, "loss": 1.3484, "step": 5686 }, { "epoch": 0.07389997124957158, "grad_norm": 0.407061368227005, "learning_rate": 0.0001852584515005394, "loss": 1.4315, "step": 5687 }, { "epoch": 0.07391296579348745, "grad_norm": 0.38006383180618286, "learning_rate": 0.00018525585203862802, "loss": 1.5133, "step": 5688 }, { "epoch": 0.07392596033740333, "grad_norm": 0.5021685361862183, "learning_rate": 0.00018525325257671662, "loss": 1.4815, "step": 5689 }, { "epoch": 0.0739389548813192, "grad_norm": 0.4555334150791168, "learning_rate": 0.00018525065311480524, "loss": 1.4961, "step": 5690 }, { "epoch": 0.07395194942523509, "grad_norm": 0.3869423270225525, "learning_rate": 0.00018524805365289387, "loss": 1.3448, "step": 5691 }, { "epoch": 0.07396494396915096, "grad_norm": 0.33090898394584656, "learning_rate": 0.00018524545419098246, "loss": 1.3476, "step": 5692 }, { "epoch": 0.07397793851306683, "grad_norm": 0.46437868475914, "learning_rate": 0.0001852428547290711, "loss": 1.4696, "step": 5693 }, { "epoch": 0.0739909330569827, "grad_norm": 0.3153212070465088, "learning_rate": 0.00018524025526715971, "loss": 1.5102, "step": 5694 }, { "epoch": 0.07400392760089858, "grad_norm": 0.31786441802978516, "learning_rate": 0.0001852376558052483, "loss": 1.4115, "step": 5695 }, { "epoch": 0.07401692214481445, "grad_norm": 0.3887665569782257, "learning_rate": 0.00018523505634333694, "loss": 1.4906, "step": 5696 }, { "epoch": 0.07402991668873032, "grad_norm": 0.49712473154067993, "learning_rate": 0.00018523245688142556, "loss": 1.5448, "step": 5697 }, { "epoch": 0.0740429112326462, "grad_norm": 0.4528498947620392, "learning_rate": 0.00018522985741951418, "loss": 1.5413, "step": 5698 }, { "epoch": 0.07405590577656207, "grad_norm": 0.35879814624786377, "learning_rate": 0.00018522725795760278, "loss": 1.3134, "step": 5699 }, { "epoch": 0.07406890032047794, "grad_norm": 0.3808993697166443, "learning_rate": 0.0001852246584956914, "loss": 1.522, "step": 5700 }, { "epoch": 0.07408189486439382, "grad_norm": 0.44484058022499084, "learning_rate": 0.00018522205903378003, "loss": 1.5045, "step": 5701 }, { "epoch": 0.07409488940830969, "grad_norm": 0.46724170446395874, "learning_rate": 0.00018521945957186863, "loss": 1.5769, "step": 5702 }, { "epoch": 0.07410788395222556, "grad_norm": 0.25938349962234497, "learning_rate": 0.00018521686010995725, "loss": 1.2574, "step": 5703 }, { "epoch": 0.07412087849614143, "grad_norm": 0.34519582986831665, "learning_rate": 0.00018521426064804585, "loss": 1.5257, "step": 5704 }, { "epoch": 0.07413387304005731, "grad_norm": 0.3566371500492096, "learning_rate": 0.0001852116611861345, "loss": 1.3797, "step": 5705 }, { "epoch": 0.07414686758397318, "grad_norm": 0.330496609210968, "learning_rate": 0.0001852090617242231, "loss": 1.3353, "step": 5706 }, { "epoch": 0.07415986212788905, "grad_norm": 0.37345677614212036, "learning_rate": 0.0001852064622623117, "loss": 1.4017, "step": 5707 }, { "epoch": 0.07417285667180493, "grad_norm": 0.36138954758644104, "learning_rate": 0.00018520386280040032, "loss": 1.3175, "step": 5708 }, { "epoch": 0.0741858512157208, "grad_norm": 0.3797924816608429, "learning_rate": 0.00018520126333848895, "loss": 1.3777, "step": 5709 }, { "epoch": 0.07419884575963667, "grad_norm": 0.3644832968711853, "learning_rate": 0.00018519866387657757, "loss": 1.2839, "step": 5710 }, { "epoch": 0.07421184030355255, "grad_norm": 0.3977486491203308, "learning_rate": 0.00018519606441466617, "loss": 1.4154, "step": 5711 }, { "epoch": 0.07422483484746842, "grad_norm": 0.4330911338329315, "learning_rate": 0.0001851934649527548, "loss": 1.304, "step": 5712 }, { "epoch": 0.07423782939138429, "grad_norm": 0.44224247336387634, "learning_rate": 0.00018519086549084342, "loss": 1.4277, "step": 5713 }, { "epoch": 0.07425082393530016, "grad_norm": 0.3843836188316345, "learning_rate": 0.000185188266028932, "loss": 1.5312, "step": 5714 }, { "epoch": 0.07426381847921604, "grad_norm": 0.4149883985519409, "learning_rate": 0.00018518566656702064, "loss": 1.3724, "step": 5715 }, { "epoch": 0.07427681302313191, "grad_norm": 0.4066537022590637, "learning_rate": 0.00018518306710510924, "loss": 1.3919, "step": 5716 }, { "epoch": 0.07428980756704778, "grad_norm": 0.3956160843372345, "learning_rate": 0.0001851804676431979, "loss": 1.5682, "step": 5717 }, { "epoch": 0.07430280211096366, "grad_norm": 0.414580762386322, "learning_rate": 0.00018517786818128648, "loss": 1.4342, "step": 5718 }, { "epoch": 0.07431579665487953, "grad_norm": 0.40832704305648804, "learning_rate": 0.00018517526871937508, "loss": 1.5079, "step": 5719 }, { "epoch": 0.0743287911987954, "grad_norm": 0.33055317401885986, "learning_rate": 0.0001851726692574637, "loss": 1.3293, "step": 5720 }, { "epoch": 0.07434178574271127, "grad_norm": 0.3359217047691345, "learning_rate": 0.00018517006979555233, "loss": 1.445, "step": 5721 }, { "epoch": 0.07435478028662715, "grad_norm": 0.4215114116668701, "learning_rate": 0.00018516747033364096, "loss": 1.504, "step": 5722 }, { "epoch": 0.07436777483054302, "grad_norm": 0.42226025462150574, "learning_rate": 0.00018516487087172955, "loss": 1.4326, "step": 5723 }, { "epoch": 0.0743807693744589, "grad_norm": 0.3280055820941925, "learning_rate": 0.00018516227140981818, "loss": 1.3928, "step": 5724 }, { "epoch": 0.07439376391837477, "grad_norm": 0.33235517144203186, "learning_rate": 0.0001851596719479068, "loss": 1.4994, "step": 5725 }, { "epoch": 0.07440675846229064, "grad_norm": 0.36907488107681274, "learning_rate": 0.0001851570724859954, "loss": 1.2933, "step": 5726 }, { "epoch": 0.07441975300620651, "grad_norm": 0.3370386064052582, "learning_rate": 0.00018515447302408402, "loss": 1.4913, "step": 5727 }, { "epoch": 0.07443274755012239, "grad_norm": 0.3899488151073456, "learning_rate": 0.00018515187356217262, "loss": 1.4666, "step": 5728 }, { "epoch": 0.07444574209403827, "grad_norm": 0.42508041858673096, "learning_rate": 0.00018514927410026127, "loss": 1.4094, "step": 5729 }, { "epoch": 0.07445873663795415, "grad_norm": 0.37834376096725464, "learning_rate": 0.00018514667463834987, "loss": 1.5049, "step": 5730 }, { "epoch": 0.07447173118187002, "grad_norm": 0.41443678736686707, "learning_rate": 0.0001851440751764385, "loss": 1.6022, "step": 5731 }, { "epoch": 0.07448472572578589, "grad_norm": 0.3229401111602783, "learning_rate": 0.00018514147571452712, "loss": 1.496, "step": 5732 }, { "epoch": 0.07449772026970176, "grad_norm": 0.45629793405532837, "learning_rate": 0.00018513887625261572, "loss": 1.3344, "step": 5733 }, { "epoch": 0.07451071481361764, "grad_norm": 0.44251513481140137, "learning_rate": 0.00018513627679070434, "loss": 1.6878, "step": 5734 }, { "epoch": 0.07452370935753351, "grad_norm": 0.41825950145721436, "learning_rate": 0.00018513367732879294, "loss": 1.6729, "step": 5735 }, { "epoch": 0.07453670390144938, "grad_norm": 0.47430917620658875, "learning_rate": 0.00018513107786688156, "loss": 1.5414, "step": 5736 }, { "epoch": 0.07454969844536526, "grad_norm": 0.38670629262924194, "learning_rate": 0.0001851284784049702, "loss": 1.619, "step": 5737 }, { "epoch": 0.07456269298928113, "grad_norm": 0.48887214064598083, "learning_rate": 0.00018512587894305878, "loss": 1.428, "step": 5738 }, { "epoch": 0.074575687533197, "grad_norm": 0.4115327298641205, "learning_rate": 0.0001851232794811474, "loss": 1.4086, "step": 5739 }, { "epoch": 0.07458868207711288, "grad_norm": 0.4153934121131897, "learning_rate": 0.00018512068001923603, "loss": 1.4183, "step": 5740 }, { "epoch": 0.07460167662102875, "grad_norm": 0.37232258915901184, "learning_rate": 0.00018511808055732466, "loss": 1.3497, "step": 5741 }, { "epoch": 0.07461467116494462, "grad_norm": 0.4177737236022949, "learning_rate": 0.00018511548109541326, "loss": 1.4034, "step": 5742 }, { "epoch": 0.0746276657088605, "grad_norm": 0.436621755361557, "learning_rate": 0.00018511288163350188, "loss": 1.3205, "step": 5743 }, { "epoch": 0.07464066025277637, "grad_norm": 0.37911200523376465, "learning_rate": 0.0001851102821715905, "loss": 1.451, "step": 5744 }, { "epoch": 0.07465365479669224, "grad_norm": 0.4468877911567688, "learning_rate": 0.0001851076827096791, "loss": 1.3117, "step": 5745 }, { "epoch": 0.07466664934060811, "grad_norm": 0.40068939328193665, "learning_rate": 0.00018510508324776773, "loss": 1.4544, "step": 5746 }, { "epoch": 0.07467964388452399, "grad_norm": 0.3930702805519104, "learning_rate": 0.00018510248378585632, "loss": 1.4813, "step": 5747 }, { "epoch": 0.07469263842843986, "grad_norm": 0.3526495099067688, "learning_rate": 0.00018509988432394495, "loss": 1.5122, "step": 5748 }, { "epoch": 0.07470563297235573, "grad_norm": 0.4161950945854187, "learning_rate": 0.00018509728486203357, "loss": 1.3742, "step": 5749 }, { "epoch": 0.0747186275162716, "grad_norm": 0.4628022313117981, "learning_rate": 0.00018509468540012217, "loss": 1.4864, "step": 5750 }, { "epoch": 0.07473162206018748, "grad_norm": 0.46159228682518005, "learning_rate": 0.0001850920859382108, "loss": 1.3268, "step": 5751 }, { "epoch": 0.07474461660410335, "grad_norm": 0.405312180519104, "learning_rate": 0.00018508948647629942, "loss": 1.4594, "step": 5752 }, { "epoch": 0.07475761114801922, "grad_norm": 0.3584649860858917, "learning_rate": 0.00018508688701438804, "loss": 1.4297, "step": 5753 }, { "epoch": 0.0747706056919351, "grad_norm": 0.5019659996032715, "learning_rate": 0.00018508428755247664, "loss": 1.6759, "step": 5754 }, { "epoch": 0.07478360023585097, "grad_norm": 0.19369813799858093, "learning_rate": 0.00018508168809056526, "loss": 1.195, "step": 5755 }, { "epoch": 0.07479659477976684, "grad_norm": 0.3313782811164856, "learning_rate": 0.0001850790886286539, "loss": 1.4215, "step": 5756 }, { "epoch": 0.07480958932368272, "grad_norm": 0.4623451828956604, "learning_rate": 0.0001850764891667425, "loss": 1.5191, "step": 5757 }, { "epoch": 0.07482258386759859, "grad_norm": 0.5040561556816101, "learning_rate": 0.0001850738897048311, "loss": 1.4171, "step": 5758 }, { "epoch": 0.07483557841151446, "grad_norm": 0.4187166094779968, "learning_rate": 0.0001850712902429197, "loss": 1.4973, "step": 5759 }, { "epoch": 0.07484857295543033, "grad_norm": 0.5405794978141785, "learning_rate": 0.00018506869078100836, "loss": 1.4428, "step": 5760 }, { "epoch": 0.07486156749934621, "grad_norm": 0.3933601379394531, "learning_rate": 0.00018506609131909696, "loss": 1.3624, "step": 5761 }, { "epoch": 0.07487456204326208, "grad_norm": 0.38803520798683167, "learning_rate": 0.00018506349185718556, "loss": 1.3182, "step": 5762 }, { "epoch": 0.07488755658717795, "grad_norm": 0.3997972011566162, "learning_rate": 0.00018506089239527418, "loss": 1.4482, "step": 5763 }, { "epoch": 0.07490055113109383, "grad_norm": 0.36882323026657104, "learning_rate": 0.0001850582929333628, "loss": 1.3833, "step": 5764 }, { "epoch": 0.0749135456750097, "grad_norm": 0.42174553871154785, "learning_rate": 0.00018505569347145143, "loss": 1.584, "step": 5765 }, { "epoch": 0.07492654021892557, "grad_norm": 0.33125194907188416, "learning_rate": 0.00018505309400954003, "loss": 1.2519, "step": 5766 }, { "epoch": 0.07493953476284146, "grad_norm": 0.35263311862945557, "learning_rate": 0.00018505049454762865, "loss": 1.4555, "step": 5767 }, { "epoch": 0.07495252930675733, "grad_norm": 0.3132159113883972, "learning_rate": 0.00018504789508571727, "loss": 1.3918, "step": 5768 }, { "epoch": 0.0749655238506732, "grad_norm": 0.3519570529460907, "learning_rate": 0.00018504529562380587, "loss": 1.1826, "step": 5769 }, { "epoch": 0.07497851839458908, "grad_norm": 0.3637275695800781, "learning_rate": 0.0001850426961618945, "loss": 1.4613, "step": 5770 }, { "epoch": 0.07499151293850495, "grad_norm": 0.33911579847335815, "learning_rate": 0.00018504009669998312, "loss": 1.3359, "step": 5771 }, { "epoch": 0.07500450748242082, "grad_norm": 0.4679122567176819, "learning_rate": 0.00018503749723807175, "loss": 1.4188, "step": 5772 }, { "epoch": 0.0750175020263367, "grad_norm": 0.38590025901794434, "learning_rate": 0.00018503489777616034, "loss": 1.4469, "step": 5773 }, { "epoch": 0.07503049657025257, "grad_norm": 0.3744296431541443, "learning_rate": 0.00018503229831424894, "loss": 1.4446, "step": 5774 }, { "epoch": 0.07504349111416844, "grad_norm": 0.4199884831905365, "learning_rate": 0.0001850296988523376, "loss": 1.3196, "step": 5775 }, { "epoch": 0.07505648565808432, "grad_norm": 0.3628632128238678, "learning_rate": 0.0001850270993904262, "loss": 1.415, "step": 5776 }, { "epoch": 0.07506948020200019, "grad_norm": 0.3270149230957031, "learning_rate": 0.00018502449992851481, "loss": 1.4124, "step": 5777 }, { "epoch": 0.07508247474591606, "grad_norm": 0.4369531273841858, "learning_rate": 0.0001850219004666034, "loss": 1.4002, "step": 5778 }, { "epoch": 0.07509546928983193, "grad_norm": 0.3593451976776123, "learning_rate": 0.00018501930100469204, "loss": 1.4635, "step": 5779 }, { "epoch": 0.07510846383374781, "grad_norm": 0.43367254734039307, "learning_rate": 0.00018501670154278066, "loss": 1.4829, "step": 5780 }, { "epoch": 0.07512145837766368, "grad_norm": 0.40663108229637146, "learning_rate": 0.00018501410208086926, "loss": 1.4948, "step": 5781 }, { "epoch": 0.07513445292157955, "grad_norm": 0.4318493902683258, "learning_rate": 0.00018501150261895788, "loss": 1.5239, "step": 5782 }, { "epoch": 0.07514744746549543, "grad_norm": 0.40720197558403015, "learning_rate": 0.0001850089031570465, "loss": 1.4984, "step": 5783 }, { "epoch": 0.0751604420094113, "grad_norm": 0.32058578729629517, "learning_rate": 0.00018500630369513513, "loss": 1.1605, "step": 5784 }, { "epoch": 0.07517343655332717, "grad_norm": 0.39151209592819214, "learning_rate": 0.00018500370423322373, "loss": 1.3383, "step": 5785 }, { "epoch": 0.07518643109724304, "grad_norm": 0.42630091309547424, "learning_rate": 0.00018500110477131233, "loss": 1.4948, "step": 5786 }, { "epoch": 0.07519942564115892, "grad_norm": 0.3706749975681305, "learning_rate": 0.00018499850530940098, "loss": 1.2889, "step": 5787 }, { "epoch": 0.07521242018507479, "grad_norm": 0.37756261229515076, "learning_rate": 0.00018499590584748957, "loss": 1.387, "step": 5788 }, { "epoch": 0.07522541472899066, "grad_norm": 0.39900779724121094, "learning_rate": 0.0001849933063855782, "loss": 1.509, "step": 5789 }, { "epoch": 0.07523840927290654, "grad_norm": 0.34100595116615295, "learning_rate": 0.0001849907069236668, "loss": 1.3943, "step": 5790 }, { "epoch": 0.07525140381682241, "grad_norm": 0.4117633104324341, "learning_rate": 0.00018498810746175542, "loss": 1.6532, "step": 5791 }, { "epoch": 0.07526439836073828, "grad_norm": 0.38742175698280334, "learning_rate": 0.00018498550799984405, "loss": 1.501, "step": 5792 }, { "epoch": 0.07527739290465416, "grad_norm": 0.323934942483902, "learning_rate": 0.00018498290853793264, "loss": 1.327, "step": 5793 }, { "epoch": 0.07529038744857003, "grad_norm": 0.351481169462204, "learning_rate": 0.00018498030907602127, "loss": 1.1411, "step": 5794 }, { "epoch": 0.0753033819924859, "grad_norm": 0.3165893256664276, "learning_rate": 0.0001849777096141099, "loss": 1.4399, "step": 5795 }, { "epoch": 0.07531637653640177, "grad_norm": 0.34249910712242126, "learning_rate": 0.00018497511015219852, "loss": 1.397, "step": 5796 }, { "epoch": 0.07532937108031765, "grad_norm": 0.44883275032043457, "learning_rate": 0.00018497251069028711, "loss": 1.4407, "step": 5797 }, { "epoch": 0.07534236562423352, "grad_norm": 0.35347557067871094, "learning_rate": 0.00018496991122837574, "loss": 1.2652, "step": 5798 }, { "epoch": 0.0753553601681494, "grad_norm": 0.4140808880329132, "learning_rate": 0.00018496731176646436, "loss": 1.3446, "step": 5799 }, { "epoch": 0.07536835471206527, "grad_norm": 0.4276021718978882, "learning_rate": 0.00018496471230455296, "loss": 1.2137, "step": 5800 }, { "epoch": 0.07538134925598114, "grad_norm": 0.34881800413131714, "learning_rate": 0.00018496211284264158, "loss": 1.3496, "step": 5801 }, { "epoch": 0.07539434379989701, "grad_norm": 0.3739721477031708, "learning_rate": 0.00018495951338073018, "loss": 1.4815, "step": 5802 }, { "epoch": 0.07540733834381289, "grad_norm": 0.41915491223335266, "learning_rate": 0.0001849569139188188, "loss": 1.3621, "step": 5803 }, { "epoch": 0.07542033288772876, "grad_norm": 0.3745441734790802, "learning_rate": 0.00018495431445690743, "loss": 1.4732, "step": 5804 }, { "epoch": 0.07543332743164465, "grad_norm": 0.6090517640113831, "learning_rate": 0.00018495171499499603, "loss": 1.6198, "step": 5805 }, { "epoch": 0.07544632197556052, "grad_norm": 0.34556782245635986, "learning_rate": 0.00018494911553308468, "loss": 1.2102, "step": 5806 }, { "epoch": 0.07545931651947639, "grad_norm": 0.3727145791053772, "learning_rate": 0.00018494651607117328, "loss": 1.3996, "step": 5807 }, { "epoch": 0.07547231106339226, "grad_norm": 0.3725121021270752, "learning_rate": 0.0001849439166092619, "loss": 1.5234, "step": 5808 }, { "epoch": 0.07548530560730814, "grad_norm": 0.39851897954940796, "learning_rate": 0.0001849413171473505, "loss": 1.3714, "step": 5809 }, { "epoch": 0.07549830015122401, "grad_norm": 0.3840694725513458, "learning_rate": 0.00018493871768543912, "loss": 1.4691, "step": 5810 }, { "epoch": 0.07551129469513988, "grad_norm": 0.4172472059726715, "learning_rate": 0.00018493611822352775, "loss": 1.525, "step": 5811 }, { "epoch": 0.07552428923905576, "grad_norm": 0.2960830330848694, "learning_rate": 0.00018493351876161635, "loss": 1.4679, "step": 5812 }, { "epoch": 0.07553728378297163, "grad_norm": 0.35858914256095886, "learning_rate": 0.00018493091929970497, "loss": 1.4521, "step": 5813 }, { "epoch": 0.0755502783268875, "grad_norm": 0.34739160537719727, "learning_rate": 0.0001849283198377936, "loss": 1.4745, "step": 5814 }, { "epoch": 0.07556327287080337, "grad_norm": 0.44756895303726196, "learning_rate": 0.0001849257203758822, "loss": 1.5411, "step": 5815 }, { "epoch": 0.07557626741471925, "grad_norm": 0.3778274655342102, "learning_rate": 0.00018492312091397082, "loss": 1.419, "step": 5816 }, { "epoch": 0.07558926195863512, "grad_norm": 0.49816054105758667, "learning_rate": 0.00018492052145205941, "loss": 1.5912, "step": 5817 }, { "epoch": 0.075602256502551, "grad_norm": 0.42864638566970825, "learning_rate": 0.00018491792199014807, "loss": 1.3594, "step": 5818 }, { "epoch": 0.07561525104646687, "grad_norm": 0.42342302203178406, "learning_rate": 0.00018491532252823666, "loss": 1.5458, "step": 5819 }, { "epoch": 0.07562824559038274, "grad_norm": 0.3598827123641968, "learning_rate": 0.0001849127230663253, "loss": 1.249, "step": 5820 }, { "epoch": 0.07564124013429861, "grad_norm": 0.3846488296985626, "learning_rate": 0.00018491012360441388, "loss": 1.4677, "step": 5821 }, { "epoch": 0.07565423467821449, "grad_norm": 0.3567892014980316, "learning_rate": 0.0001849075241425025, "loss": 1.4233, "step": 5822 }, { "epoch": 0.07566722922213036, "grad_norm": 0.3446444272994995, "learning_rate": 0.00018490492468059113, "loss": 1.5045, "step": 5823 }, { "epoch": 0.07568022376604623, "grad_norm": 0.33591076731681824, "learning_rate": 0.00018490232521867973, "loss": 1.4461, "step": 5824 }, { "epoch": 0.0756932183099621, "grad_norm": 0.4599846303462982, "learning_rate": 0.00018489972575676836, "loss": 1.4785, "step": 5825 }, { "epoch": 0.07570621285387798, "grad_norm": 0.3949660062789917, "learning_rate": 0.00018489712629485698, "loss": 1.6074, "step": 5826 }, { "epoch": 0.07571920739779385, "grad_norm": 0.4107274115085602, "learning_rate": 0.0001848945268329456, "loss": 1.4772, "step": 5827 }, { "epoch": 0.07573220194170972, "grad_norm": 0.4312751889228821, "learning_rate": 0.0001848919273710342, "loss": 1.5427, "step": 5828 }, { "epoch": 0.0757451964856256, "grad_norm": 0.38404184579849243, "learning_rate": 0.0001848893279091228, "loss": 1.3894, "step": 5829 }, { "epoch": 0.07575819102954147, "grad_norm": 0.43782028555870056, "learning_rate": 0.00018488672844721145, "loss": 1.4754, "step": 5830 }, { "epoch": 0.07577118557345734, "grad_norm": 0.4280025064945221, "learning_rate": 0.00018488412898530005, "loss": 1.3574, "step": 5831 }, { "epoch": 0.07578418011737321, "grad_norm": 0.4050753712654114, "learning_rate": 0.00018488152952338867, "loss": 1.3057, "step": 5832 }, { "epoch": 0.07579717466128909, "grad_norm": 0.3534747064113617, "learning_rate": 0.00018487893006147727, "loss": 1.3667, "step": 5833 }, { "epoch": 0.07581016920520496, "grad_norm": 0.3065432906150818, "learning_rate": 0.0001848763305995659, "loss": 1.2162, "step": 5834 }, { "epoch": 0.07582316374912083, "grad_norm": 0.2661783993244171, "learning_rate": 0.00018487373113765452, "loss": 1.4638, "step": 5835 }, { "epoch": 0.0758361582930367, "grad_norm": 0.4889761209487915, "learning_rate": 0.00018487113167574312, "loss": 1.5143, "step": 5836 }, { "epoch": 0.07584915283695258, "grad_norm": 0.3838978707790375, "learning_rate": 0.00018486853221383174, "loss": 1.3842, "step": 5837 }, { "epoch": 0.07586214738086845, "grad_norm": 0.4237801432609558, "learning_rate": 0.00018486593275192037, "loss": 1.6655, "step": 5838 }, { "epoch": 0.07587514192478433, "grad_norm": 0.39514413475990295, "learning_rate": 0.000184863333290009, "loss": 1.3252, "step": 5839 }, { "epoch": 0.0758881364687002, "grad_norm": 0.36931896209716797, "learning_rate": 0.0001848607338280976, "loss": 1.5721, "step": 5840 }, { "epoch": 0.07590113101261607, "grad_norm": 0.4811924695968628, "learning_rate": 0.00018485813436618618, "loss": 1.4095, "step": 5841 }, { "epoch": 0.07591412555653194, "grad_norm": 0.3904770314693451, "learning_rate": 0.00018485553490427484, "loss": 1.6637, "step": 5842 }, { "epoch": 0.07592712010044783, "grad_norm": 0.3167823255062103, "learning_rate": 0.00018485293544236343, "loss": 1.3825, "step": 5843 }, { "epoch": 0.0759401146443637, "grad_norm": 0.3343684673309326, "learning_rate": 0.00018485033598045206, "loss": 1.1775, "step": 5844 }, { "epoch": 0.07595310918827958, "grad_norm": 0.422659695148468, "learning_rate": 0.00018484773651854068, "loss": 1.3561, "step": 5845 }, { "epoch": 0.07596610373219545, "grad_norm": 0.35066717863082886, "learning_rate": 0.00018484513705662928, "loss": 1.1534, "step": 5846 }, { "epoch": 0.07597909827611132, "grad_norm": 0.47269636392593384, "learning_rate": 0.0001848425375947179, "loss": 1.4737, "step": 5847 }, { "epoch": 0.0759920928200272, "grad_norm": 0.3691405653953552, "learning_rate": 0.0001848399381328065, "loss": 1.3411, "step": 5848 }, { "epoch": 0.07600508736394307, "grad_norm": 0.4624173045158386, "learning_rate": 0.00018483733867089515, "loss": 1.5863, "step": 5849 }, { "epoch": 0.07601808190785894, "grad_norm": 0.40431129932403564, "learning_rate": 0.00018483473920898375, "loss": 1.2269, "step": 5850 }, { "epoch": 0.07603107645177481, "grad_norm": 0.46895161271095276, "learning_rate": 0.00018483213974707238, "loss": 1.4745, "step": 5851 }, { "epoch": 0.07604407099569069, "grad_norm": 0.3865452706813812, "learning_rate": 0.00018482954028516097, "loss": 1.2737, "step": 5852 }, { "epoch": 0.07605706553960656, "grad_norm": 0.48697805404663086, "learning_rate": 0.0001848269408232496, "loss": 1.5581, "step": 5853 }, { "epoch": 0.07607006008352243, "grad_norm": 0.45192742347717285, "learning_rate": 0.00018482434136133822, "loss": 1.4669, "step": 5854 }, { "epoch": 0.0760830546274383, "grad_norm": 0.3490251898765564, "learning_rate": 0.00018482174189942682, "loss": 1.2573, "step": 5855 }, { "epoch": 0.07609604917135418, "grad_norm": 0.3953474760055542, "learning_rate": 0.00018481914243751544, "loss": 1.3058, "step": 5856 }, { "epoch": 0.07610904371527005, "grad_norm": 0.397747278213501, "learning_rate": 0.00018481654297560407, "loss": 1.37, "step": 5857 }, { "epoch": 0.07612203825918593, "grad_norm": 0.46927931904792786, "learning_rate": 0.00018481394351369267, "loss": 1.3264, "step": 5858 }, { "epoch": 0.0761350328031018, "grad_norm": 0.3955818712711334, "learning_rate": 0.0001848113440517813, "loss": 1.5555, "step": 5859 }, { "epoch": 0.07614802734701767, "grad_norm": 0.39604973793029785, "learning_rate": 0.0001848087445898699, "loss": 1.6205, "step": 5860 }, { "epoch": 0.07616102189093354, "grad_norm": 0.37378475069999695, "learning_rate": 0.00018480614512795854, "loss": 1.3903, "step": 5861 }, { "epoch": 0.07617401643484942, "grad_norm": 0.3710018992424011, "learning_rate": 0.00018480354566604714, "loss": 1.4082, "step": 5862 }, { "epoch": 0.07618701097876529, "grad_norm": 0.48273733258247375, "learning_rate": 0.00018480094620413576, "loss": 1.2398, "step": 5863 }, { "epoch": 0.07620000552268116, "grad_norm": 0.4252732992172241, "learning_rate": 0.00018479834674222436, "loss": 1.4478, "step": 5864 }, { "epoch": 0.07621300006659704, "grad_norm": 0.40777069330215454, "learning_rate": 0.00018479574728031298, "loss": 1.5822, "step": 5865 }, { "epoch": 0.07622599461051291, "grad_norm": 0.3742409944534302, "learning_rate": 0.0001847931478184016, "loss": 1.1418, "step": 5866 }, { "epoch": 0.07623898915442878, "grad_norm": 0.4629398286342621, "learning_rate": 0.0001847905483564902, "loss": 1.5865, "step": 5867 }, { "epoch": 0.07625198369834466, "grad_norm": 0.21505504846572876, "learning_rate": 0.00018478794889457883, "loss": 1.1823, "step": 5868 }, { "epoch": 0.07626497824226053, "grad_norm": 0.4295143783092499, "learning_rate": 0.00018478534943266745, "loss": 1.349, "step": 5869 }, { "epoch": 0.0762779727861764, "grad_norm": 0.4053129553794861, "learning_rate": 0.00018478274997075605, "loss": 1.4786, "step": 5870 }, { "epoch": 0.07629096733009227, "grad_norm": 0.3658570349216461, "learning_rate": 0.00018478015050884468, "loss": 1.297, "step": 5871 }, { "epoch": 0.07630396187400815, "grad_norm": 0.4330212473869324, "learning_rate": 0.00018477755104693327, "loss": 1.378, "step": 5872 }, { "epoch": 0.07631695641792402, "grad_norm": 0.45120710134506226, "learning_rate": 0.00018477495158502192, "loss": 1.3482, "step": 5873 }, { "epoch": 0.07632995096183989, "grad_norm": 0.4001186192035675, "learning_rate": 0.00018477235212311052, "loss": 1.5825, "step": 5874 }, { "epoch": 0.07634294550575577, "grad_norm": 0.37141090631484985, "learning_rate": 0.00018476975266119915, "loss": 1.4463, "step": 5875 }, { "epoch": 0.07635594004967164, "grad_norm": 0.36818942427635193, "learning_rate": 0.00018476715319928774, "loss": 1.4735, "step": 5876 }, { "epoch": 0.07636893459358751, "grad_norm": 0.40000078082084656, "learning_rate": 0.00018476455373737637, "loss": 1.102, "step": 5877 }, { "epoch": 0.07638192913750338, "grad_norm": 0.428722083568573, "learning_rate": 0.000184761954275465, "loss": 1.5632, "step": 5878 }, { "epoch": 0.07639492368141926, "grad_norm": 0.373018741607666, "learning_rate": 0.0001847593548135536, "loss": 1.1342, "step": 5879 }, { "epoch": 0.07640791822533513, "grad_norm": 0.46091723442077637, "learning_rate": 0.00018475675535164221, "loss": 1.5319, "step": 5880 }, { "epoch": 0.07642091276925102, "grad_norm": 0.5123398900032043, "learning_rate": 0.00018475415588973084, "loss": 1.3243, "step": 5881 }, { "epoch": 0.07643390731316689, "grad_norm": 0.468747079372406, "learning_rate": 0.00018475155642781946, "loss": 1.474, "step": 5882 }, { "epoch": 0.07644690185708276, "grad_norm": 0.5109299421310425, "learning_rate": 0.00018474895696590806, "loss": 1.6138, "step": 5883 }, { "epoch": 0.07645989640099864, "grad_norm": 0.373140424489975, "learning_rate": 0.00018474635750399669, "loss": 1.4796, "step": 5884 }, { "epoch": 0.07647289094491451, "grad_norm": 0.39965105056762695, "learning_rate": 0.0001847437580420853, "loss": 1.5189, "step": 5885 }, { "epoch": 0.07648588548883038, "grad_norm": 0.4661354422569275, "learning_rate": 0.0001847411585801739, "loss": 1.5229, "step": 5886 }, { "epoch": 0.07649888003274626, "grad_norm": 0.33783531188964844, "learning_rate": 0.00018473855911826253, "loss": 1.4403, "step": 5887 }, { "epoch": 0.07651187457666213, "grad_norm": 0.350143700838089, "learning_rate": 0.00018473595965635116, "loss": 1.4552, "step": 5888 }, { "epoch": 0.076524869120578, "grad_norm": 0.37084877490997314, "learning_rate": 0.00018473336019443975, "loss": 1.1972, "step": 5889 }, { "epoch": 0.07653786366449387, "grad_norm": 0.4950472414493561, "learning_rate": 0.00018473076073252838, "loss": 1.4101, "step": 5890 }, { "epoch": 0.07655085820840975, "grad_norm": 0.3145788609981537, "learning_rate": 0.00018472816127061698, "loss": 1.4652, "step": 5891 }, { "epoch": 0.07656385275232562, "grad_norm": 0.3568994104862213, "learning_rate": 0.00018472556180870563, "loss": 1.517, "step": 5892 }, { "epoch": 0.0765768472962415, "grad_norm": 0.37646493315696716, "learning_rate": 0.00018472296234679422, "loss": 1.4366, "step": 5893 }, { "epoch": 0.07658984184015737, "grad_norm": 0.3793256878852844, "learning_rate": 0.00018472036288488285, "loss": 1.4065, "step": 5894 }, { "epoch": 0.07660283638407324, "grad_norm": 0.4935625195503235, "learning_rate": 0.00018471776342297145, "loss": 1.4447, "step": 5895 }, { "epoch": 0.07661583092798911, "grad_norm": 0.4019348919391632, "learning_rate": 0.00018471516396106007, "loss": 1.5234, "step": 5896 }, { "epoch": 0.07662882547190498, "grad_norm": 0.40772294998168945, "learning_rate": 0.0001847125644991487, "loss": 1.3832, "step": 5897 }, { "epoch": 0.07664182001582086, "grad_norm": 0.3134511411190033, "learning_rate": 0.0001847099650372373, "loss": 1.3266, "step": 5898 }, { "epoch": 0.07665481455973673, "grad_norm": 0.3904426693916321, "learning_rate": 0.00018470736557532592, "loss": 1.4104, "step": 5899 }, { "epoch": 0.0766678091036526, "grad_norm": 0.25366300344467163, "learning_rate": 0.00018470476611341454, "loss": 1.3223, "step": 5900 }, { "epoch": 0.07668080364756848, "grad_norm": 0.40504834055900574, "learning_rate": 0.00018470216665150314, "loss": 1.3474, "step": 5901 }, { "epoch": 0.07669379819148435, "grad_norm": 0.43310731649398804, "learning_rate": 0.00018469956718959176, "loss": 1.5406, "step": 5902 }, { "epoch": 0.07670679273540022, "grad_norm": 0.4102226793766022, "learning_rate": 0.00018469696772768036, "loss": 1.3684, "step": 5903 }, { "epoch": 0.0767197872793161, "grad_norm": 0.41729775071144104, "learning_rate": 0.000184694368265769, "loss": 1.3798, "step": 5904 }, { "epoch": 0.07673278182323197, "grad_norm": 0.4303189814090729, "learning_rate": 0.0001846917688038576, "loss": 1.4608, "step": 5905 }, { "epoch": 0.07674577636714784, "grad_norm": 0.3884187936782837, "learning_rate": 0.00018468916934194623, "loss": 1.6917, "step": 5906 }, { "epoch": 0.07675877091106371, "grad_norm": 0.38658177852630615, "learning_rate": 0.00018468656988003483, "loss": 1.543, "step": 5907 }, { "epoch": 0.07677176545497959, "grad_norm": 0.3554634153842926, "learning_rate": 0.00018468397041812346, "loss": 1.5053, "step": 5908 }, { "epoch": 0.07678475999889546, "grad_norm": 0.445719838142395, "learning_rate": 0.00018468137095621208, "loss": 1.4064, "step": 5909 }, { "epoch": 0.07679775454281133, "grad_norm": 0.3174709975719452, "learning_rate": 0.00018467877149430068, "loss": 1.2606, "step": 5910 }, { "epoch": 0.0768107490867272, "grad_norm": 0.4045545756816864, "learning_rate": 0.0001846761720323893, "loss": 1.3734, "step": 5911 }, { "epoch": 0.07682374363064308, "grad_norm": 0.39586082100868225, "learning_rate": 0.00018467357257047793, "loss": 1.4758, "step": 5912 }, { "epoch": 0.07683673817455895, "grad_norm": 0.5619513988494873, "learning_rate": 0.00018467097310856652, "loss": 1.7004, "step": 5913 }, { "epoch": 0.07684973271847483, "grad_norm": 0.37469327449798584, "learning_rate": 0.00018466837364665515, "loss": 1.4063, "step": 5914 }, { "epoch": 0.0768627272623907, "grad_norm": 0.3134441673755646, "learning_rate": 0.00018466577418474375, "loss": 1.2579, "step": 5915 }, { "epoch": 0.07687572180630657, "grad_norm": 0.38641074299812317, "learning_rate": 0.0001846631747228324, "loss": 1.6522, "step": 5916 }, { "epoch": 0.07688871635022244, "grad_norm": 0.3021990656852722, "learning_rate": 0.000184660575260921, "loss": 1.3556, "step": 5917 }, { "epoch": 0.07690171089413832, "grad_norm": 0.3889233469963074, "learning_rate": 0.00018465797579900962, "loss": 1.4543, "step": 5918 }, { "epoch": 0.0769147054380542, "grad_norm": 0.3740723133087158, "learning_rate": 0.00018465537633709824, "loss": 1.5567, "step": 5919 }, { "epoch": 0.07692769998197008, "grad_norm": 0.37010201811790466, "learning_rate": 0.00018465277687518684, "loss": 1.4651, "step": 5920 }, { "epoch": 0.07694069452588595, "grad_norm": 0.3395724892616272, "learning_rate": 0.00018465017741327547, "loss": 1.2977, "step": 5921 }, { "epoch": 0.07695368906980182, "grad_norm": 0.4764532446861267, "learning_rate": 0.00018464757795136406, "loss": 1.4808, "step": 5922 }, { "epoch": 0.0769666836137177, "grad_norm": 0.4063996374607086, "learning_rate": 0.00018464497848945271, "loss": 1.5211, "step": 5923 }, { "epoch": 0.07697967815763357, "grad_norm": 0.5106183290481567, "learning_rate": 0.0001846423790275413, "loss": 1.6894, "step": 5924 }, { "epoch": 0.07699267270154944, "grad_norm": 0.2898086607456207, "learning_rate": 0.0001846397795656299, "loss": 1.4411, "step": 5925 }, { "epoch": 0.07700566724546531, "grad_norm": 0.48833972215652466, "learning_rate": 0.00018463718010371853, "loss": 1.6266, "step": 5926 }, { "epoch": 0.07701866178938119, "grad_norm": 0.3334541320800781, "learning_rate": 0.00018463458064180716, "loss": 1.2624, "step": 5927 }, { "epoch": 0.07703165633329706, "grad_norm": 0.4016132652759552, "learning_rate": 0.00018463198117989578, "loss": 1.3208, "step": 5928 }, { "epoch": 0.07704465087721293, "grad_norm": 0.33149516582489014, "learning_rate": 0.00018462938171798438, "loss": 1.4494, "step": 5929 }, { "epoch": 0.0770576454211288, "grad_norm": 0.45958682894706726, "learning_rate": 0.000184626782256073, "loss": 1.363, "step": 5930 }, { "epoch": 0.07707063996504468, "grad_norm": 0.42671599984169006, "learning_rate": 0.00018462418279416163, "loss": 1.3863, "step": 5931 }, { "epoch": 0.07708363450896055, "grad_norm": 0.4863154888153076, "learning_rate": 0.00018462158333225023, "loss": 1.3409, "step": 5932 }, { "epoch": 0.07709662905287643, "grad_norm": 0.4399794936180115, "learning_rate": 0.00018461898387033885, "loss": 1.3409, "step": 5933 }, { "epoch": 0.0771096235967923, "grad_norm": 0.4456668198108673, "learning_rate": 0.00018461638440842745, "loss": 1.4864, "step": 5934 }, { "epoch": 0.07712261814070817, "grad_norm": 0.45837193727493286, "learning_rate": 0.0001846137849465161, "loss": 1.4673, "step": 5935 }, { "epoch": 0.07713561268462404, "grad_norm": 0.46039247512817383, "learning_rate": 0.0001846111854846047, "loss": 1.2641, "step": 5936 }, { "epoch": 0.07714860722853992, "grad_norm": 0.34568727016448975, "learning_rate": 0.00018460858602269332, "loss": 1.215, "step": 5937 }, { "epoch": 0.07716160177245579, "grad_norm": 0.40586352348327637, "learning_rate": 0.00018460598656078192, "loss": 1.3489, "step": 5938 }, { "epoch": 0.07717459631637166, "grad_norm": 0.33365893363952637, "learning_rate": 0.00018460338709887054, "loss": 1.2618, "step": 5939 }, { "epoch": 0.07718759086028754, "grad_norm": 0.35039955377578735, "learning_rate": 0.00018460078763695917, "loss": 1.4472, "step": 5940 }, { "epoch": 0.07720058540420341, "grad_norm": 0.5006925463676453, "learning_rate": 0.00018459818817504777, "loss": 1.4732, "step": 5941 }, { "epoch": 0.07721357994811928, "grad_norm": 0.4903702139854431, "learning_rate": 0.0001845955887131364, "loss": 1.2832, "step": 5942 }, { "epoch": 0.07722657449203515, "grad_norm": 0.4398631155490875, "learning_rate": 0.00018459298925122501, "loss": 1.4457, "step": 5943 }, { "epoch": 0.07723956903595103, "grad_norm": 0.36402255296707153, "learning_rate": 0.0001845903897893136, "loss": 1.567, "step": 5944 }, { "epoch": 0.0772525635798669, "grad_norm": 0.3949216306209564, "learning_rate": 0.00018458779032740224, "loss": 1.4803, "step": 5945 }, { "epoch": 0.07726555812378277, "grad_norm": 0.3183668851852417, "learning_rate": 0.00018458519086549083, "loss": 1.2234, "step": 5946 }, { "epoch": 0.07727855266769865, "grad_norm": 0.39650097489356995, "learning_rate": 0.00018458259140357949, "loss": 1.4524, "step": 5947 }, { "epoch": 0.07729154721161452, "grad_norm": 0.38234174251556396, "learning_rate": 0.00018457999194166808, "loss": 1.4546, "step": 5948 }, { "epoch": 0.07730454175553039, "grad_norm": 0.40995270013809204, "learning_rate": 0.0001845773924797567, "loss": 1.5724, "step": 5949 }, { "epoch": 0.07731753629944627, "grad_norm": 0.4977561831474304, "learning_rate": 0.0001845747930178453, "loss": 1.539, "step": 5950 }, { "epoch": 0.07733053084336214, "grad_norm": 0.3710271716117859, "learning_rate": 0.00018457219355593393, "loss": 1.4109, "step": 5951 }, { "epoch": 0.07734352538727801, "grad_norm": 0.4911384582519531, "learning_rate": 0.00018456959409402255, "loss": 1.5608, "step": 5952 }, { "epoch": 0.07735651993119388, "grad_norm": 0.3211837112903595, "learning_rate": 0.00018456699463211115, "loss": 1.3303, "step": 5953 }, { "epoch": 0.07736951447510976, "grad_norm": 0.361427366733551, "learning_rate": 0.00018456439517019978, "loss": 1.5518, "step": 5954 }, { "epoch": 0.07738250901902563, "grad_norm": 0.3623093366622925, "learning_rate": 0.0001845617957082884, "loss": 1.559, "step": 5955 }, { "epoch": 0.0773955035629415, "grad_norm": 0.44917863607406616, "learning_rate": 0.000184559196246377, "loss": 1.346, "step": 5956 }, { "epoch": 0.07740849810685739, "grad_norm": 0.34671348333358765, "learning_rate": 0.00018455659678446562, "loss": 1.2791, "step": 5957 }, { "epoch": 0.07742149265077326, "grad_norm": 0.39676445722579956, "learning_rate": 0.00018455399732255425, "loss": 1.5143, "step": 5958 }, { "epoch": 0.07743448719468914, "grad_norm": 0.5092577934265137, "learning_rate": 0.00018455139786064287, "loss": 1.5055, "step": 5959 }, { "epoch": 0.07744748173860501, "grad_norm": 0.38445591926574707, "learning_rate": 0.00018454879839873147, "loss": 1.5349, "step": 5960 }, { "epoch": 0.07746047628252088, "grad_norm": 0.3044701814651489, "learning_rate": 0.0001845461989368201, "loss": 1.2968, "step": 5961 }, { "epoch": 0.07747347082643675, "grad_norm": 0.3887306749820709, "learning_rate": 0.00018454359947490872, "loss": 1.4493, "step": 5962 }, { "epoch": 0.07748646537035263, "grad_norm": 0.24546630680561066, "learning_rate": 0.00018454100001299731, "loss": 1.1963, "step": 5963 }, { "epoch": 0.0774994599142685, "grad_norm": 0.4075829088687897, "learning_rate": 0.00018453840055108594, "loss": 1.4309, "step": 5964 }, { "epoch": 0.07751245445818437, "grad_norm": 0.3502098321914673, "learning_rate": 0.00018453580108917454, "loss": 1.2734, "step": 5965 }, { "epoch": 0.07752544900210025, "grad_norm": 0.3049415051937103, "learning_rate": 0.0001845332016272632, "loss": 1.3011, "step": 5966 }, { "epoch": 0.07753844354601612, "grad_norm": 0.5176023244857788, "learning_rate": 0.00018453060216535179, "loss": 1.5442, "step": 5967 }, { "epoch": 0.07755143808993199, "grad_norm": 0.30824077129364014, "learning_rate": 0.00018452800270344038, "loss": 1.2717, "step": 5968 }, { "epoch": 0.07756443263384787, "grad_norm": 0.2755676507949829, "learning_rate": 0.000184525403241529, "loss": 1.4089, "step": 5969 }, { "epoch": 0.07757742717776374, "grad_norm": 0.2947712242603302, "learning_rate": 0.00018452280377961763, "loss": 1.3732, "step": 5970 }, { "epoch": 0.07759042172167961, "grad_norm": 0.33350735902786255, "learning_rate": 0.00018452020431770626, "loss": 1.4573, "step": 5971 }, { "epoch": 0.07760341626559548, "grad_norm": 0.390143483877182, "learning_rate": 0.00018451760485579485, "loss": 1.6165, "step": 5972 }, { "epoch": 0.07761641080951136, "grad_norm": 0.4501892626285553, "learning_rate": 0.00018451500539388348, "loss": 1.4443, "step": 5973 }, { "epoch": 0.07762940535342723, "grad_norm": 0.37454545497894287, "learning_rate": 0.0001845124059319721, "loss": 1.4505, "step": 5974 }, { "epoch": 0.0776423998973431, "grad_norm": 0.35362398624420166, "learning_rate": 0.0001845098064700607, "loss": 1.3278, "step": 5975 }, { "epoch": 0.07765539444125898, "grad_norm": 0.37507548928260803, "learning_rate": 0.00018450720700814932, "loss": 1.1848, "step": 5976 }, { "epoch": 0.07766838898517485, "grad_norm": 0.4120621085166931, "learning_rate": 0.00018450460754623792, "loss": 1.483, "step": 5977 }, { "epoch": 0.07768138352909072, "grad_norm": 0.40024903416633606, "learning_rate": 0.00018450200808432657, "loss": 1.5498, "step": 5978 }, { "epoch": 0.0776943780730066, "grad_norm": 0.3831518888473511, "learning_rate": 0.00018449940862241517, "loss": 1.4385, "step": 5979 }, { "epoch": 0.07770737261692247, "grad_norm": 0.4055730104446411, "learning_rate": 0.00018449680916050377, "loss": 1.6143, "step": 5980 }, { "epoch": 0.07772036716083834, "grad_norm": 0.38118797540664673, "learning_rate": 0.0001844942096985924, "loss": 1.3368, "step": 5981 }, { "epoch": 0.07773336170475421, "grad_norm": 0.4233747124671936, "learning_rate": 0.00018449161023668102, "loss": 1.5142, "step": 5982 }, { "epoch": 0.07774635624867009, "grad_norm": 0.3702712059020996, "learning_rate": 0.00018448901077476964, "loss": 1.3708, "step": 5983 }, { "epoch": 0.07775935079258596, "grad_norm": 0.5815667510032654, "learning_rate": 0.00018448641131285824, "loss": 1.2719, "step": 5984 }, { "epoch": 0.07777234533650183, "grad_norm": 0.428805410861969, "learning_rate": 0.00018448381185094686, "loss": 1.433, "step": 5985 }, { "epoch": 0.0777853398804177, "grad_norm": 0.4008171558380127, "learning_rate": 0.0001844812123890355, "loss": 1.599, "step": 5986 }, { "epoch": 0.07779833442433358, "grad_norm": 0.4541243612766266, "learning_rate": 0.00018447861292712409, "loss": 1.4879, "step": 5987 }, { "epoch": 0.07781132896824945, "grad_norm": 0.28324034810066223, "learning_rate": 0.0001844760134652127, "loss": 1.2413, "step": 5988 }, { "epoch": 0.07782432351216532, "grad_norm": 0.43518519401550293, "learning_rate": 0.0001844734140033013, "loss": 1.4548, "step": 5989 }, { "epoch": 0.0778373180560812, "grad_norm": 0.3788944184780121, "learning_rate": 0.00018447081454138996, "loss": 1.3762, "step": 5990 }, { "epoch": 0.07785031259999707, "grad_norm": 0.41933387517929077, "learning_rate": 0.00018446821507947856, "loss": 1.4301, "step": 5991 }, { "epoch": 0.07786330714391294, "grad_norm": 0.45617789030075073, "learning_rate": 0.00018446561561756715, "loss": 1.4336, "step": 5992 }, { "epoch": 0.07787630168782882, "grad_norm": 0.38238054513931274, "learning_rate": 0.0001844630161556558, "loss": 1.3785, "step": 5993 }, { "epoch": 0.07788929623174469, "grad_norm": 0.3966638743877411, "learning_rate": 0.0001844604166937444, "loss": 1.4539, "step": 5994 }, { "epoch": 0.07790229077566056, "grad_norm": 0.29679030179977417, "learning_rate": 0.00018445781723183303, "loss": 1.1991, "step": 5995 }, { "epoch": 0.07791528531957645, "grad_norm": 0.30962076783180237, "learning_rate": 0.00018445521776992162, "loss": 1.4212, "step": 5996 }, { "epoch": 0.07792827986349232, "grad_norm": 0.33151495456695557, "learning_rate": 0.00018445261830801025, "loss": 1.3329, "step": 5997 }, { "epoch": 0.0779412744074082, "grad_norm": 0.39591848850250244, "learning_rate": 0.00018445001884609887, "loss": 1.4453, "step": 5998 }, { "epoch": 0.07795426895132407, "grad_norm": 0.31794121861457825, "learning_rate": 0.00018444741938418747, "loss": 1.408, "step": 5999 }, { "epoch": 0.07796726349523994, "grad_norm": 0.4208926260471344, "learning_rate": 0.0001844448199222761, "loss": 1.3413, "step": 6000 }, { "epoch": 0.07798025803915581, "grad_norm": 0.34509938955307007, "learning_rate": 0.00018444222046036472, "loss": 1.3611, "step": 6001 }, { "epoch": 0.07799325258307169, "grad_norm": 0.5411408543586731, "learning_rate": 0.00018443962099845334, "loss": 1.2923, "step": 6002 }, { "epoch": 0.07800624712698756, "grad_norm": 0.3810083270072937, "learning_rate": 0.00018443702153654194, "loss": 1.5298, "step": 6003 }, { "epoch": 0.07801924167090343, "grad_norm": 0.4454694092273712, "learning_rate": 0.00018443442207463057, "loss": 1.4519, "step": 6004 }, { "epoch": 0.0780322362148193, "grad_norm": 0.48522186279296875, "learning_rate": 0.0001844318226127192, "loss": 1.4947, "step": 6005 }, { "epoch": 0.07804523075873518, "grad_norm": 0.41381701827049255, "learning_rate": 0.0001844292231508078, "loss": 1.5144, "step": 6006 }, { "epoch": 0.07805822530265105, "grad_norm": 0.35586321353912354, "learning_rate": 0.0001844266236888964, "loss": 1.4649, "step": 6007 }, { "epoch": 0.07807121984656692, "grad_norm": 0.3811730742454529, "learning_rate": 0.000184424024226985, "loss": 1.3283, "step": 6008 }, { "epoch": 0.0780842143904828, "grad_norm": 0.34810492396354675, "learning_rate": 0.00018442142476507363, "loss": 1.2199, "step": 6009 }, { "epoch": 0.07809720893439867, "grad_norm": 0.4614677429199219, "learning_rate": 0.00018441882530316226, "loss": 1.2591, "step": 6010 }, { "epoch": 0.07811020347831454, "grad_norm": 0.362655907869339, "learning_rate": 0.00018441622584125086, "loss": 1.3368, "step": 6011 }, { "epoch": 0.07812319802223042, "grad_norm": 0.42832571268081665, "learning_rate": 0.00018441362637933948, "loss": 1.6327, "step": 6012 }, { "epoch": 0.07813619256614629, "grad_norm": 0.37692761421203613, "learning_rate": 0.0001844110269174281, "loss": 1.5255, "step": 6013 }, { "epoch": 0.07814918711006216, "grad_norm": 0.3350902795791626, "learning_rate": 0.00018440842745551673, "loss": 1.4163, "step": 6014 }, { "epoch": 0.07816218165397804, "grad_norm": 0.3296547830104828, "learning_rate": 0.00018440582799360533, "loss": 1.4614, "step": 6015 }, { "epoch": 0.07817517619789391, "grad_norm": 0.39048123359680176, "learning_rate": 0.00018440322853169395, "loss": 1.4951, "step": 6016 }, { "epoch": 0.07818817074180978, "grad_norm": 0.3843384087085724, "learning_rate": 0.00018440062906978258, "loss": 1.4064, "step": 6017 }, { "epoch": 0.07820116528572565, "grad_norm": 0.41673725843429565, "learning_rate": 0.00018439802960787117, "loss": 1.4817, "step": 6018 }, { "epoch": 0.07821415982964153, "grad_norm": 0.41697585582733154, "learning_rate": 0.0001843954301459598, "loss": 1.5021, "step": 6019 }, { "epoch": 0.0782271543735574, "grad_norm": 0.3676753342151642, "learning_rate": 0.0001843928306840484, "loss": 1.5594, "step": 6020 }, { "epoch": 0.07824014891747327, "grad_norm": 0.41512975096702576, "learning_rate": 0.00018439023122213702, "loss": 1.5761, "step": 6021 }, { "epoch": 0.07825314346138915, "grad_norm": 0.43105581402778625, "learning_rate": 0.00018438763176022564, "loss": 1.4796, "step": 6022 }, { "epoch": 0.07826613800530502, "grad_norm": 0.3050001561641693, "learning_rate": 0.00018438503229831424, "loss": 1.2084, "step": 6023 }, { "epoch": 0.07827913254922089, "grad_norm": 0.43741321563720703, "learning_rate": 0.00018438243283640287, "loss": 1.5064, "step": 6024 }, { "epoch": 0.07829212709313677, "grad_norm": 0.38264143466949463, "learning_rate": 0.0001843798333744915, "loss": 1.5653, "step": 6025 }, { "epoch": 0.07830512163705264, "grad_norm": 0.3128499686717987, "learning_rate": 0.00018437723391258011, "loss": 1.3427, "step": 6026 }, { "epoch": 0.07831811618096851, "grad_norm": 0.49272000789642334, "learning_rate": 0.0001843746344506687, "loss": 1.4129, "step": 6027 }, { "epoch": 0.07833111072488438, "grad_norm": 0.36527642607688904, "learning_rate": 0.00018437203498875734, "loss": 1.6205, "step": 6028 }, { "epoch": 0.07834410526880026, "grad_norm": 0.3390181362628937, "learning_rate": 0.00018436943552684596, "loss": 1.5177, "step": 6029 }, { "epoch": 0.07835709981271613, "grad_norm": 0.4812953770160675, "learning_rate": 0.00018436683606493456, "loss": 1.5312, "step": 6030 }, { "epoch": 0.078370094356632, "grad_norm": 0.7524536848068237, "learning_rate": 0.00018436423660302318, "loss": 1.4617, "step": 6031 }, { "epoch": 0.07838308890054788, "grad_norm": 0.2742747962474823, "learning_rate": 0.0001843616371411118, "loss": 1.232, "step": 6032 }, { "epoch": 0.07839608344446375, "grad_norm": 0.4105358123779297, "learning_rate": 0.00018435903767920043, "loss": 1.3249, "step": 6033 }, { "epoch": 0.07840907798837964, "grad_norm": 0.38426193594932556, "learning_rate": 0.00018435643821728903, "loss": 1.4264, "step": 6034 }, { "epoch": 0.07842207253229551, "grad_norm": 0.491949200630188, "learning_rate": 0.00018435383875537763, "loss": 1.5094, "step": 6035 }, { "epoch": 0.07843506707621138, "grad_norm": 0.4059925675392151, "learning_rate": 0.00018435123929346628, "loss": 1.449, "step": 6036 }, { "epoch": 0.07844806162012725, "grad_norm": 0.35963520407676697, "learning_rate": 0.00018434863983155488, "loss": 1.4089, "step": 6037 }, { "epoch": 0.07846105616404313, "grad_norm": 0.46164417266845703, "learning_rate": 0.0001843460403696435, "loss": 1.4802, "step": 6038 }, { "epoch": 0.078474050707959, "grad_norm": 0.4237184524536133, "learning_rate": 0.0001843434409077321, "loss": 1.3275, "step": 6039 }, { "epoch": 0.07848704525187487, "grad_norm": 0.38426315784454346, "learning_rate": 0.00018434084144582072, "loss": 1.6909, "step": 6040 }, { "epoch": 0.07850003979579075, "grad_norm": 0.44933873414993286, "learning_rate": 0.00018433824198390935, "loss": 1.6576, "step": 6041 }, { "epoch": 0.07851303433970662, "grad_norm": 0.4057685136795044, "learning_rate": 0.00018433564252199794, "loss": 1.3317, "step": 6042 }, { "epoch": 0.07852602888362249, "grad_norm": 0.31768089532852173, "learning_rate": 0.00018433304306008657, "loss": 1.4909, "step": 6043 }, { "epoch": 0.07853902342753837, "grad_norm": 0.3535803556442261, "learning_rate": 0.0001843304435981752, "loss": 1.4807, "step": 6044 }, { "epoch": 0.07855201797145424, "grad_norm": 0.6468108892440796, "learning_rate": 0.00018432784413626382, "loss": 1.4055, "step": 6045 }, { "epoch": 0.07856501251537011, "grad_norm": 0.3221374452114105, "learning_rate": 0.00018432524467435241, "loss": 1.2863, "step": 6046 }, { "epoch": 0.07857800705928598, "grad_norm": 0.3894992470741272, "learning_rate": 0.000184322645212441, "loss": 1.3439, "step": 6047 }, { "epoch": 0.07859100160320186, "grad_norm": 0.3688139319419861, "learning_rate": 0.00018432004575052966, "loss": 1.4272, "step": 6048 }, { "epoch": 0.07860399614711773, "grad_norm": 0.3546498417854309, "learning_rate": 0.00018431744628861826, "loss": 1.2453, "step": 6049 }, { "epoch": 0.0786169906910336, "grad_norm": 0.2959712743759155, "learning_rate": 0.00018431484682670689, "loss": 1.3423, "step": 6050 }, { "epoch": 0.07862998523494948, "grad_norm": 0.36510032415390015, "learning_rate": 0.00018431224736479548, "loss": 1.4213, "step": 6051 }, { "epoch": 0.07864297977886535, "grad_norm": 0.34032556414604187, "learning_rate": 0.0001843096479028841, "loss": 1.2443, "step": 6052 }, { "epoch": 0.07865597432278122, "grad_norm": 0.3144850730895996, "learning_rate": 0.00018430704844097273, "loss": 1.1622, "step": 6053 }, { "epoch": 0.0786689688666971, "grad_norm": 0.32013940811157227, "learning_rate": 0.00018430444897906133, "loss": 1.4364, "step": 6054 }, { "epoch": 0.07868196341061297, "grad_norm": 0.511988639831543, "learning_rate": 0.00018430184951714995, "loss": 1.3648, "step": 6055 }, { "epoch": 0.07869495795452884, "grad_norm": 0.5024500489234924, "learning_rate": 0.00018429925005523858, "loss": 1.5292, "step": 6056 }, { "epoch": 0.07870795249844471, "grad_norm": 0.2716609537601471, "learning_rate": 0.0001842966505933272, "loss": 1.4871, "step": 6057 }, { "epoch": 0.07872094704236059, "grad_norm": 0.2951996326446533, "learning_rate": 0.0001842940511314158, "loss": 1.4362, "step": 6058 }, { "epoch": 0.07873394158627646, "grad_norm": 0.3120375871658325, "learning_rate": 0.00018429145166950442, "loss": 1.4638, "step": 6059 }, { "epoch": 0.07874693613019233, "grad_norm": 0.4412476420402527, "learning_rate": 0.00018428885220759305, "loss": 1.3742, "step": 6060 }, { "epoch": 0.0787599306741082, "grad_norm": 0.38948512077331543, "learning_rate": 0.00018428625274568165, "loss": 1.2452, "step": 6061 }, { "epoch": 0.07877292521802408, "grad_norm": 0.3852015435695648, "learning_rate": 0.00018428365328377027, "loss": 1.5265, "step": 6062 }, { "epoch": 0.07878591976193995, "grad_norm": 0.422014057636261, "learning_rate": 0.00018428105382185887, "loss": 1.6678, "step": 6063 }, { "epoch": 0.07879891430585582, "grad_norm": 0.3978329598903656, "learning_rate": 0.0001842784543599475, "loss": 1.31, "step": 6064 }, { "epoch": 0.0788119088497717, "grad_norm": 0.4004749655723572, "learning_rate": 0.00018427585489803612, "loss": 1.4686, "step": 6065 }, { "epoch": 0.07882490339368757, "grad_norm": 0.3805711567401886, "learning_rate": 0.00018427325543612471, "loss": 1.557, "step": 6066 }, { "epoch": 0.07883789793760344, "grad_norm": 0.3802225589752197, "learning_rate": 0.00018427065597421337, "loss": 1.3069, "step": 6067 }, { "epoch": 0.07885089248151932, "grad_norm": 0.3975139558315277, "learning_rate": 0.00018426805651230196, "loss": 1.5216, "step": 6068 }, { "epoch": 0.07886388702543519, "grad_norm": 0.42604002356529236, "learning_rate": 0.0001842654570503906, "loss": 1.4016, "step": 6069 }, { "epoch": 0.07887688156935106, "grad_norm": 0.5064747929573059, "learning_rate": 0.00018426285758847919, "loss": 1.4061, "step": 6070 }, { "epoch": 0.07888987611326694, "grad_norm": 0.4241625666618347, "learning_rate": 0.0001842602581265678, "loss": 1.6493, "step": 6071 }, { "epoch": 0.07890287065718282, "grad_norm": 0.40913069248199463, "learning_rate": 0.00018425765866465643, "loss": 1.4782, "step": 6072 }, { "epoch": 0.0789158652010987, "grad_norm": 0.3326561152935028, "learning_rate": 0.00018425505920274503, "loss": 1.5368, "step": 6073 }, { "epoch": 0.07892885974501457, "grad_norm": 0.43161246180534363, "learning_rate": 0.00018425245974083366, "loss": 1.6333, "step": 6074 }, { "epoch": 0.07894185428893044, "grad_norm": 0.5879168510437012, "learning_rate": 0.00018424986027892228, "loss": 1.5427, "step": 6075 }, { "epoch": 0.07895484883284631, "grad_norm": 0.41375404596328735, "learning_rate": 0.00018424726081701088, "loss": 1.5386, "step": 6076 }, { "epoch": 0.07896784337676219, "grad_norm": 0.4308698773384094, "learning_rate": 0.0001842446613550995, "loss": 1.4401, "step": 6077 }, { "epoch": 0.07898083792067806, "grad_norm": 0.453360915184021, "learning_rate": 0.0001842420618931881, "loss": 1.5339, "step": 6078 }, { "epoch": 0.07899383246459393, "grad_norm": 0.4292598366737366, "learning_rate": 0.00018423946243127675, "loss": 1.4217, "step": 6079 }, { "epoch": 0.0790068270085098, "grad_norm": 0.2839990556240082, "learning_rate": 0.00018423686296936535, "loss": 1.5659, "step": 6080 }, { "epoch": 0.07901982155242568, "grad_norm": 0.3806931674480438, "learning_rate": 0.00018423426350745397, "loss": 1.4281, "step": 6081 }, { "epoch": 0.07903281609634155, "grad_norm": 0.38026121258735657, "learning_rate": 0.00018423166404554257, "loss": 1.3982, "step": 6082 }, { "epoch": 0.07904581064025742, "grad_norm": 0.36227384209632874, "learning_rate": 0.0001842290645836312, "loss": 1.3195, "step": 6083 }, { "epoch": 0.0790588051841733, "grad_norm": 0.38675713539123535, "learning_rate": 0.00018422646512171982, "loss": 1.2942, "step": 6084 }, { "epoch": 0.07907179972808917, "grad_norm": 0.3891057074069977, "learning_rate": 0.00018422386565980842, "loss": 1.5729, "step": 6085 }, { "epoch": 0.07908479427200504, "grad_norm": 0.47500666975975037, "learning_rate": 0.00018422126619789704, "loss": 1.5495, "step": 6086 }, { "epoch": 0.07909778881592092, "grad_norm": 0.4468681216239929, "learning_rate": 0.00018421866673598567, "loss": 1.4148, "step": 6087 }, { "epoch": 0.07911078335983679, "grad_norm": 0.46201348304748535, "learning_rate": 0.0001842160672740743, "loss": 1.3881, "step": 6088 }, { "epoch": 0.07912377790375266, "grad_norm": 0.4509305953979492, "learning_rate": 0.0001842134678121629, "loss": 1.4759, "step": 6089 }, { "epoch": 0.07913677244766854, "grad_norm": 0.3844403326511383, "learning_rate": 0.00018421086835025149, "loss": 1.5496, "step": 6090 }, { "epoch": 0.07914976699158441, "grad_norm": 0.3434497117996216, "learning_rate": 0.00018420826888834014, "loss": 1.4548, "step": 6091 }, { "epoch": 0.07916276153550028, "grad_norm": 0.4298562705516815, "learning_rate": 0.00018420566942642873, "loss": 1.2458, "step": 6092 }, { "epoch": 0.07917575607941615, "grad_norm": 0.3077503442764282, "learning_rate": 0.00018420306996451736, "loss": 1.3318, "step": 6093 }, { "epoch": 0.07918875062333203, "grad_norm": 0.46109291911125183, "learning_rate": 0.00018420047050260596, "loss": 1.5237, "step": 6094 }, { "epoch": 0.0792017451672479, "grad_norm": 0.36462199687957764, "learning_rate": 0.00018419787104069458, "loss": 1.3325, "step": 6095 }, { "epoch": 0.07921473971116377, "grad_norm": 0.3409494459629059, "learning_rate": 0.0001841952715787832, "loss": 1.477, "step": 6096 }, { "epoch": 0.07922773425507965, "grad_norm": 0.3770604431629181, "learning_rate": 0.0001841926721168718, "loss": 1.4151, "step": 6097 }, { "epoch": 0.07924072879899552, "grad_norm": 0.44813933968544006, "learning_rate": 0.00018419007265496043, "loss": 1.3529, "step": 6098 }, { "epoch": 0.07925372334291139, "grad_norm": 0.42887306213378906, "learning_rate": 0.00018418747319304905, "loss": 1.5434, "step": 6099 }, { "epoch": 0.07926671788682726, "grad_norm": 0.3355839252471924, "learning_rate": 0.00018418487373113768, "loss": 1.5068, "step": 6100 }, { "epoch": 0.07927971243074314, "grad_norm": 0.345017671585083, "learning_rate": 0.00018418227426922627, "loss": 1.5166, "step": 6101 }, { "epoch": 0.07929270697465901, "grad_norm": 0.3053815960884094, "learning_rate": 0.00018417967480731487, "loss": 1.3564, "step": 6102 }, { "epoch": 0.07930570151857488, "grad_norm": 0.4291446805000305, "learning_rate": 0.00018417707534540352, "loss": 1.5859, "step": 6103 }, { "epoch": 0.07931869606249076, "grad_norm": 0.2839497923851013, "learning_rate": 0.00018417447588349212, "loss": 1.1365, "step": 6104 }, { "epoch": 0.07933169060640663, "grad_norm": 0.32945430278778076, "learning_rate": 0.00018417187642158074, "loss": 1.3896, "step": 6105 }, { "epoch": 0.0793446851503225, "grad_norm": 0.3918827772140503, "learning_rate": 0.00018416927695966937, "loss": 1.5631, "step": 6106 }, { "epoch": 0.07935767969423838, "grad_norm": 0.3880796730518341, "learning_rate": 0.00018416667749775797, "loss": 1.5472, "step": 6107 }, { "epoch": 0.07937067423815425, "grad_norm": 0.49858877062797546, "learning_rate": 0.0001841640780358466, "loss": 1.448, "step": 6108 }, { "epoch": 0.07938366878207012, "grad_norm": 0.3416162431240082, "learning_rate": 0.0001841614785739352, "loss": 1.4361, "step": 6109 }, { "epoch": 0.07939666332598601, "grad_norm": 0.27496257424354553, "learning_rate": 0.00018415887911202384, "loss": 1.4356, "step": 6110 }, { "epoch": 0.07940965786990188, "grad_norm": 0.38354769349098206, "learning_rate": 0.00018415627965011244, "loss": 1.4506, "step": 6111 }, { "epoch": 0.07942265241381775, "grad_norm": 0.37779149413108826, "learning_rate": 0.00018415368018820106, "loss": 1.5162, "step": 6112 }, { "epoch": 0.07943564695773363, "grad_norm": 0.3469626307487488, "learning_rate": 0.00018415108072628966, "loss": 1.3967, "step": 6113 }, { "epoch": 0.0794486415016495, "grad_norm": 0.4428900480270386, "learning_rate": 0.00018414848126437828, "loss": 1.3716, "step": 6114 }, { "epoch": 0.07946163604556537, "grad_norm": 0.36185234785079956, "learning_rate": 0.0001841458818024669, "loss": 1.3165, "step": 6115 }, { "epoch": 0.07947463058948125, "grad_norm": 0.5801428556442261, "learning_rate": 0.0001841432823405555, "loss": 1.5218, "step": 6116 }, { "epoch": 0.07948762513339712, "grad_norm": 0.3739064335823059, "learning_rate": 0.00018414068287864413, "loss": 1.3396, "step": 6117 }, { "epoch": 0.07950061967731299, "grad_norm": 0.4277260899543762, "learning_rate": 0.00018413808341673275, "loss": 1.5768, "step": 6118 }, { "epoch": 0.07951361422122886, "grad_norm": 0.4436528980731964, "learning_rate": 0.00018413548395482135, "loss": 1.4144, "step": 6119 }, { "epoch": 0.07952660876514474, "grad_norm": 0.3718056082725525, "learning_rate": 0.00018413288449290998, "loss": 1.5007, "step": 6120 }, { "epoch": 0.07953960330906061, "grad_norm": 0.4448679983615875, "learning_rate": 0.00018413028503099857, "loss": 1.5605, "step": 6121 }, { "epoch": 0.07955259785297648, "grad_norm": 0.28929075598716736, "learning_rate": 0.00018412768556908723, "loss": 1.3381, "step": 6122 }, { "epoch": 0.07956559239689236, "grad_norm": 0.4016954004764557, "learning_rate": 0.00018412508610717582, "loss": 1.5228, "step": 6123 }, { "epoch": 0.07957858694080823, "grad_norm": 0.3283591866493225, "learning_rate": 0.00018412248664526445, "loss": 1.2988, "step": 6124 }, { "epoch": 0.0795915814847241, "grad_norm": 0.3484986424446106, "learning_rate": 0.00018411988718335304, "loss": 1.4909, "step": 6125 }, { "epoch": 0.07960457602863998, "grad_norm": 0.4112135171890259, "learning_rate": 0.00018411728772144167, "loss": 1.3978, "step": 6126 }, { "epoch": 0.07961757057255585, "grad_norm": 0.39837971329689026, "learning_rate": 0.0001841146882595303, "loss": 1.4346, "step": 6127 }, { "epoch": 0.07963056511647172, "grad_norm": 0.4017678499221802, "learning_rate": 0.0001841120887976189, "loss": 1.5039, "step": 6128 }, { "epoch": 0.0796435596603876, "grad_norm": 0.44885116815567017, "learning_rate": 0.00018410948933570752, "loss": 1.6208, "step": 6129 }, { "epoch": 0.07965655420430347, "grad_norm": 0.4098648726940155, "learning_rate": 0.00018410688987379614, "loss": 1.4538, "step": 6130 }, { "epoch": 0.07966954874821934, "grad_norm": 0.4489709436893463, "learning_rate": 0.00018410429041188474, "loss": 1.4321, "step": 6131 }, { "epoch": 0.07968254329213521, "grad_norm": 0.40003854036331177, "learning_rate": 0.00018410169094997336, "loss": 1.4442, "step": 6132 }, { "epoch": 0.07969553783605109, "grad_norm": 0.37231209874153137, "learning_rate": 0.00018409909148806196, "loss": 1.4277, "step": 6133 }, { "epoch": 0.07970853237996696, "grad_norm": 0.43695852160453796, "learning_rate": 0.0001840964920261506, "loss": 1.4894, "step": 6134 }, { "epoch": 0.07972152692388283, "grad_norm": 0.35560116171836853, "learning_rate": 0.0001840938925642392, "loss": 1.3351, "step": 6135 }, { "epoch": 0.0797345214677987, "grad_norm": 0.26695865392684937, "learning_rate": 0.00018409129310232783, "loss": 1.3985, "step": 6136 }, { "epoch": 0.07974751601171458, "grad_norm": 0.36609166860580444, "learning_rate": 0.00018408869364041643, "loss": 1.4149, "step": 6137 }, { "epoch": 0.07976051055563045, "grad_norm": 0.3258199691772461, "learning_rate": 0.00018408609417850505, "loss": 1.3311, "step": 6138 }, { "epoch": 0.07977350509954632, "grad_norm": 0.5638648867607117, "learning_rate": 0.00018408349471659368, "loss": 1.517, "step": 6139 }, { "epoch": 0.0797864996434622, "grad_norm": 0.37880295515060425, "learning_rate": 0.00018408089525468228, "loss": 1.4107, "step": 6140 }, { "epoch": 0.07979949418737807, "grad_norm": 0.43471813201904297, "learning_rate": 0.00018407829579277093, "loss": 1.4891, "step": 6141 }, { "epoch": 0.07981248873129394, "grad_norm": 0.46257299184799194, "learning_rate": 0.00018407569633085953, "loss": 1.5192, "step": 6142 }, { "epoch": 0.07982548327520982, "grad_norm": 0.48966357111930847, "learning_rate": 0.00018407309686894815, "loss": 1.3936, "step": 6143 }, { "epoch": 0.07983847781912569, "grad_norm": 0.4501968324184418, "learning_rate": 0.00018407049740703675, "loss": 1.5821, "step": 6144 }, { "epoch": 0.07985147236304156, "grad_norm": 0.502448558807373, "learning_rate": 0.00018406789794512537, "loss": 1.4745, "step": 6145 }, { "epoch": 0.07986446690695743, "grad_norm": 0.37218302488327026, "learning_rate": 0.000184065298483214, "loss": 1.4213, "step": 6146 }, { "epoch": 0.07987746145087331, "grad_norm": 0.4011833071708679, "learning_rate": 0.0001840626990213026, "loss": 1.3675, "step": 6147 }, { "epoch": 0.0798904559947892, "grad_norm": 0.4688166081905365, "learning_rate": 0.00018406009955939122, "loss": 1.5187, "step": 6148 }, { "epoch": 0.07990345053870507, "grad_norm": 0.3566301167011261, "learning_rate": 0.00018405750009747984, "loss": 1.3601, "step": 6149 }, { "epoch": 0.07991644508262094, "grad_norm": 0.41925936937332153, "learning_rate": 0.00018405490063556844, "loss": 1.434, "step": 6150 }, { "epoch": 0.07992943962653681, "grad_norm": 0.3408973813056946, "learning_rate": 0.00018405230117365706, "loss": 1.4679, "step": 6151 }, { "epoch": 0.07994243417045269, "grad_norm": 0.42943698167800903, "learning_rate": 0.00018404970171174566, "loss": 1.4241, "step": 6152 }, { "epoch": 0.07995542871436856, "grad_norm": 0.33581170439720154, "learning_rate": 0.0001840471022498343, "loss": 1.5286, "step": 6153 }, { "epoch": 0.07996842325828443, "grad_norm": 0.32352644205093384, "learning_rate": 0.0001840445027879229, "loss": 1.2904, "step": 6154 }, { "epoch": 0.0799814178022003, "grad_norm": 0.37474170327186584, "learning_rate": 0.00018404190332601154, "loss": 1.3927, "step": 6155 }, { "epoch": 0.07999441234611618, "grad_norm": 0.4087858200073242, "learning_rate": 0.00018403930386410013, "loss": 1.3849, "step": 6156 }, { "epoch": 0.08000740689003205, "grad_norm": 0.29384076595306396, "learning_rate": 0.00018403670440218876, "loss": 1.6336, "step": 6157 }, { "epoch": 0.08002040143394792, "grad_norm": 0.4185171127319336, "learning_rate": 0.00018403410494027738, "loss": 1.4786, "step": 6158 }, { "epoch": 0.0800333959778638, "grad_norm": 0.3421257436275482, "learning_rate": 0.00018403150547836598, "loss": 1.4873, "step": 6159 }, { "epoch": 0.08004639052177967, "grad_norm": 0.3407057225704193, "learning_rate": 0.0001840289060164546, "loss": 1.3454, "step": 6160 }, { "epoch": 0.08005938506569554, "grad_norm": 0.5290220379829407, "learning_rate": 0.00018402630655454323, "loss": 1.4196, "step": 6161 }, { "epoch": 0.08007237960961142, "grad_norm": 0.5162290930747986, "learning_rate": 0.00018402370709263183, "loss": 1.5756, "step": 6162 }, { "epoch": 0.08008537415352729, "grad_norm": 0.3592604100704193, "learning_rate": 0.00018402110763072045, "loss": 1.5719, "step": 6163 }, { "epoch": 0.08009836869744316, "grad_norm": 0.41995272040367126, "learning_rate": 0.00018401850816880905, "loss": 1.361, "step": 6164 }, { "epoch": 0.08011136324135903, "grad_norm": 0.44744640588760376, "learning_rate": 0.0001840159087068977, "loss": 1.4387, "step": 6165 }, { "epoch": 0.08012435778527491, "grad_norm": 0.48496049642562866, "learning_rate": 0.0001840133092449863, "loss": 1.4521, "step": 6166 }, { "epoch": 0.08013735232919078, "grad_norm": 0.3519055247306824, "learning_rate": 0.00018401070978307492, "loss": 1.4123, "step": 6167 }, { "epoch": 0.08015034687310665, "grad_norm": 0.38523778319358826, "learning_rate": 0.00018400811032116352, "loss": 1.3916, "step": 6168 }, { "epoch": 0.08016334141702253, "grad_norm": 0.38707244396209717, "learning_rate": 0.00018400551085925214, "loss": 1.4847, "step": 6169 }, { "epoch": 0.0801763359609384, "grad_norm": 0.3970610201358795, "learning_rate": 0.00018400291139734077, "loss": 1.4294, "step": 6170 }, { "epoch": 0.08018933050485427, "grad_norm": 0.3914804756641388, "learning_rate": 0.00018400031193542936, "loss": 1.339, "step": 6171 }, { "epoch": 0.08020232504877015, "grad_norm": 0.35249534249305725, "learning_rate": 0.000183997712473518, "loss": 1.5041, "step": 6172 }, { "epoch": 0.08021531959268602, "grad_norm": 0.3310585916042328, "learning_rate": 0.0001839951130116066, "loss": 1.459, "step": 6173 }, { "epoch": 0.08022831413660189, "grad_norm": 0.20819032192230225, "learning_rate": 0.0001839925135496952, "loss": 1.157, "step": 6174 }, { "epoch": 0.08024130868051776, "grad_norm": 0.2881905138492584, "learning_rate": 0.00018398991408778383, "loss": 1.1212, "step": 6175 }, { "epoch": 0.08025430322443364, "grad_norm": 0.4127061069011688, "learning_rate": 0.00018398731462587243, "loss": 1.3076, "step": 6176 }, { "epoch": 0.08026729776834951, "grad_norm": 0.34341922402381897, "learning_rate": 0.00018398471516396108, "loss": 1.6453, "step": 6177 }, { "epoch": 0.08028029231226538, "grad_norm": 0.4085890054702759, "learning_rate": 0.00018398211570204968, "loss": 1.452, "step": 6178 }, { "epoch": 0.08029328685618126, "grad_norm": 0.3887009024620056, "learning_rate": 0.0001839795162401383, "loss": 1.3789, "step": 6179 }, { "epoch": 0.08030628140009713, "grad_norm": 0.40998849272727966, "learning_rate": 0.00018397691677822693, "loss": 1.4491, "step": 6180 }, { "epoch": 0.080319275944013, "grad_norm": 0.3676016330718994, "learning_rate": 0.00018397431731631553, "loss": 1.1966, "step": 6181 }, { "epoch": 0.08033227048792888, "grad_norm": 0.44718894362449646, "learning_rate": 0.00018397171785440415, "loss": 1.624, "step": 6182 }, { "epoch": 0.08034526503184475, "grad_norm": 0.4542218744754791, "learning_rate": 0.00018396911839249275, "loss": 1.3156, "step": 6183 }, { "epoch": 0.08035825957576062, "grad_norm": 0.3227229714393616, "learning_rate": 0.0001839665189305814, "loss": 1.3696, "step": 6184 }, { "epoch": 0.0803712541196765, "grad_norm": 0.4354618489742279, "learning_rate": 0.00018396391946867, "loss": 1.4932, "step": 6185 }, { "epoch": 0.08038424866359238, "grad_norm": 0.35693785548210144, "learning_rate": 0.0001839613200067586, "loss": 1.3006, "step": 6186 }, { "epoch": 0.08039724320750825, "grad_norm": 0.3768044710159302, "learning_rate": 0.00018395872054484722, "loss": 1.4651, "step": 6187 }, { "epoch": 0.08041023775142413, "grad_norm": 0.31924551725387573, "learning_rate": 0.00018395612108293584, "loss": 1.6138, "step": 6188 }, { "epoch": 0.08042323229534, "grad_norm": 0.4328700602054596, "learning_rate": 0.00018395352162102447, "loss": 1.6016, "step": 6189 }, { "epoch": 0.08043622683925587, "grad_norm": 0.358801931142807, "learning_rate": 0.00018395092215911307, "loss": 1.3202, "step": 6190 }, { "epoch": 0.08044922138317175, "grad_norm": 0.612026035785675, "learning_rate": 0.0001839483226972017, "loss": 1.2749, "step": 6191 }, { "epoch": 0.08046221592708762, "grad_norm": 0.3530118465423584, "learning_rate": 0.00018394572323529032, "loss": 1.4187, "step": 6192 }, { "epoch": 0.08047521047100349, "grad_norm": 0.49632585048675537, "learning_rate": 0.0001839431237733789, "loss": 1.6824, "step": 6193 }, { "epoch": 0.08048820501491936, "grad_norm": 0.3985925018787384, "learning_rate": 0.00018394052431146754, "loss": 1.416, "step": 6194 }, { "epoch": 0.08050119955883524, "grad_norm": 0.5029274821281433, "learning_rate": 0.00018393792484955613, "loss": 1.3748, "step": 6195 }, { "epoch": 0.08051419410275111, "grad_norm": 0.3749791085720062, "learning_rate": 0.0001839353253876448, "loss": 1.4471, "step": 6196 }, { "epoch": 0.08052718864666698, "grad_norm": 0.4623998701572418, "learning_rate": 0.00018393272592573338, "loss": 1.3695, "step": 6197 }, { "epoch": 0.08054018319058286, "grad_norm": 0.3979378938674927, "learning_rate": 0.00018393012646382198, "loss": 1.448, "step": 6198 }, { "epoch": 0.08055317773449873, "grad_norm": 0.47189393639564514, "learning_rate": 0.0001839275270019106, "loss": 1.6139, "step": 6199 }, { "epoch": 0.0805661722784146, "grad_norm": 0.33067888021469116, "learning_rate": 0.00018392492753999923, "loss": 1.623, "step": 6200 }, { "epoch": 0.08057916682233048, "grad_norm": 0.3573934733867645, "learning_rate": 0.00018392232807808785, "loss": 1.3681, "step": 6201 }, { "epoch": 0.08059216136624635, "grad_norm": 0.3497483730316162, "learning_rate": 0.00018391972861617645, "loss": 1.3895, "step": 6202 }, { "epoch": 0.08060515591016222, "grad_norm": 0.4011506140232086, "learning_rate": 0.00018391712915426508, "loss": 1.5495, "step": 6203 }, { "epoch": 0.0806181504540781, "grad_norm": 0.39159879088401794, "learning_rate": 0.0001839145296923537, "loss": 1.4483, "step": 6204 }, { "epoch": 0.08063114499799397, "grad_norm": 0.3504270017147064, "learning_rate": 0.0001839119302304423, "loss": 1.5802, "step": 6205 }, { "epoch": 0.08064413954190984, "grad_norm": 0.41678738594055176, "learning_rate": 0.00018390933076853092, "loss": 1.4952, "step": 6206 }, { "epoch": 0.08065713408582571, "grad_norm": 0.3560841381549835, "learning_rate": 0.00018390673130661952, "loss": 1.4196, "step": 6207 }, { "epoch": 0.08067012862974159, "grad_norm": 0.32116806507110596, "learning_rate": 0.00018390413184470817, "loss": 1.2481, "step": 6208 }, { "epoch": 0.08068312317365746, "grad_norm": 0.39083969593048096, "learning_rate": 0.00018390153238279677, "loss": 1.385, "step": 6209 }, { "epoch": 0.08069611771757333, "grad_norm": 0.4282075762748718, "learning_rate": 0.0001838989329208854, "loss": 1.5725, "step": 6210 }, { "epoch": 0.0807091122614892, "grad_norm": 0.3888271152973175, "learning_rate": 0.000183896333458974, "loss": 1.475, "step": 6211 }, { "epoch": 0.08072210680540508, "grad_norm": 0.4699666202068329, "learning_rate": 0.00018389373399706262, "loss": 1.4977, "step": 6212 }, { "epoch": 0.08073510134932095, "grad_norm": 0.38986507058143616, "learning_rate": 0.00018389113453515124, "loss": 1.2864, "step": 6213 }, { "epoch": 0.08074809589323682, "grad_norm": 0.3026287257671356, "learning_rate": 0.00018388853507323984, "loss": 1.2347, "step": 6214 }, { "epoch": 0.0807610904371527, "grad_norm": 0.40429025888442993, "learning_rate": 0.00018388593561132846, "loss": 1.5108, "step": 6215 }, { "epoch": 0.08077408498106857, "grad_norm": 0.3776196837425232, "learning_rate": 0.00018388333614941709, "loss": 1.7085, "step": 6216 }, { "epoch": 0.08078707952498444, "grad_norm": 0.37580227851867676, "learning_rate": 0.00018388073668750568, "loss": 1.3545, "step": 6217 }, { "epoch": 0.08080007406890032, "grad_norm": 0.27544987201690674, "learning_rate": 0.0001838781372255943, "loss": 1.2992, "step": 6218 }, { "epoch": 0.08081306861281619, "grad_norm": 0.37173834443092346, "learning_rate": 0.00018387553776368293, "loss": 1.4749, "step": 6219 }, { "epoch": 0.08082606315673206, "grad_norm": 0.29365089535713196, "learning_rate": 0.00018387293830177156, "loss": 1.5172, "step": 6220 }, { "epoch": 0.08083905770064793, "grad_norm": 0.39724504947662354, "learning_rate": 0.00018387033883986015, "loss": 1.4817, "step": 6221 }, { "epoch": 0.08085205224456381, "grad_norm": 0.43359705805778503, "learning_rate": 0.00018386773937794878, "loss": 1.643, "step": 6222 }, { "epoch": 0.08086504678847968, "grad_norm": 0.32017529010772705, "learning_rate": 0.0001838651399160374, "loss": 1.6074, "step": 6223 }, { "epoch": 0.08087804133239557, "grad_norm": 0.33276161551475525, "learning_rate": 0.000183862540454126, "loss": 1.2605, "step": 6224 }, { "epoch": 0.08089103587631144, "grad_norm": 0.36590129137039185, "learning_rate": 0.00018385994099221463, "loss": 1.3109, "step": 6225 }, { "epoch": 0.08090403042022731, "grad_norm": 0.38914740085601807, "learning_rate": 0.00018385734153030322, "loss": 1.6086, "step": 6226 }, { "epoch": 0.08091702496414319, "grad_norm": 0.34516239166259766, "learning_rate": 0.00018385474206839185, "loss": 1.439, "step": 6227 }, { "epoch": 0.08093001950805906, "grad_norm": 0.4240453243255615, "learning_rate": 0.00018385214260648047, "loss": 1.4602, "step": 6228 }, { "epoch": 0.08094301405197493, "grad_norm": 0.41503041982650757, "learning_rate": 0.00018384954314456907, "loss": 1.3663, "step": 6229 }, { "epoch": 0.0809560085958908, "grad_norm": 0.35573306679725647, "learning_rate": 0.0001838469436826577, "loss": 1.6092, "step": 6230 }, { "epoch": 0.08096900313980668, "grad_norm": 0.3753318190574646, "learning_rate": 0.00018384434422074632, "loss": 1.4279, "step": 6231 }, { "epoch": 0.08098199768372255, "grad_norm": 0.4131934344768524, "learning_rate": 0.00018384174475883494, "loss": 1.2911, "step": 6232 }, { "epoch": 0.08099499222763842, "grad_norm": 0.38943934440612793, "learning_rate": 0.00018383914529692354, "loss": 1.3937, "step": 6233 }, { "epoch": 0.0810079867715543, "grad_norm": 0.44479724764823914, "learning_rate": 0.00018383654583501216, "loss": 1.5545, "step": 6234 }, { "epoch": 0.08102098131547017, "grad_norm": 0.34958744049072266, "learning_rate": 0.0001838339463731008, "loss": 1.2487, "step": 6235 }, { "epoch": 0.08103397585938604, "grad_norm": 0.2523711919784546, "learning_rate": 0.00018383134691118939, "loss": 1.4417, "step": 6236 }, { "epoch": 0.08104697040330192, "grad_norm": 0.49556130170822144, "learning_rate": 0.000183828747449278, "loss": 1.3459, "step": 6237 }, { "epoch": 0.08105996494721779, "grad_norm": 0.43096816539764404, "learning_rate": 0.0001838261479873666, "loss": 1.5894, "step": 6238 }, { "epoch": 0.08107295949113366, "grad_norm": 0.2899806797504425, "learning_rate": 0.00018382354852545526, "loss": 1.4316, "step": 6239 }, { "epoch": 0.08108595403504953, "grad_norm": 0.35303351283073425, "learning_rate": 0.00018382094906354386, "loss": 1.3656, "step": 6240 }, { "epoch": 0.08109894857896541, "grad_norm": 0.3945399224758148, "learning_rate": 0.00018381834960163245, "loss": 1.4313, "step": 6241 }, { "epoch": 0.08111194312288128, "grad_norm": 0.45361238718032837, "learning_rate": 0.00018381575013972108, "loss": 1.2535, "step": 6242 }, { "epoch": 0.08112493766679715, "grad_norm": 0.46176430583000183, "learning_rate": 0.0001838131506778097, "loss": 1.517, "step": 6243 }, { "epoch": 0.08113793221071303, "grad_norm": 0.4771147072315216, "learning_rate": 0.00018381055121589833, "loss": 1.3434, "step": 6244 }, { "epoch": 0.0811509267546289, "grad_norm": 0.30673474073410034, "learning_rate": 0.00018380795175398693, "loss": 1.3649, "step": 6245 }, { "epoch": 0.08116392129854477, "grad_norm": 0.3452615439891815, "learning_rate": 0.00018380535229207555, "loss": 1.3725, "step": 6246 }, { "epoch": 0.08117691584246065, "grad_norm": 0.416964590549469, "learning_rate": 0.00018380275283016417, "loss": 1.4851, "step": 6247 }, { "epoch": 0.08118991038637652, "grad_norm": 0.38670051097869873, "learning_rate": 0.00018380015336825277, "loss": 1.3859, "step": 6248 }, { "epoch": 0.08120290493029239, "grad_norm": 0.4432021975517273, "learning_rate": 0.0001837975539063414, "loss": 1.4931, "step": 6249 }, { "epoch": 0.08121589947420826, "grad_norm": 0.4541003108024597, "learning_rate": 0.00018379495444443, "loss": 1.5087, "step": 6250 }, { "epoch": 0.08122889401812414, "grad_norm": 0.45787960290908813, "learning_rate": 0.00018379235498251865, "loss": 1.5582, "step": 6251 }, { "epoch": 0.08124188856204001, "grad_norm": 0.30369269847869873, "learning_rate": 0.00018378975552060724, "loss": 1.3281, "step": 6252 }, { "epoch": 0.08125488310595588, "grad_norm": 0.3906219005584717, "learning_rate": 0.00018378715605869584, "loss": 1.4755, "step": 6253 }, { "epoch": 0.08126787764987176, "grad_norm": 0.3645680248737335, "learning_rate": 0.0001837845565967845, "loss": 1.66, "step": 6254 }, { "epoch": 0.08128087219378763, "grad_norm": 0.3993740975856781, "learning_rate": 0.0001837819571348731, "loss": 1.2824, "step": 6255 }, { "epoch": 0.0812938667377035, "grad_norm": 0.38091859221458435, "learning_rate": 0.0001837793576729617, "loss": 1.556, "step": 6256 }, { "epoch": 0.08130686128161937, "grad_norm": 0.4366264045238495, "learning_rate": 0.0001837767582110503, "loss": 1.3614, "step": 6257 }, { "epoch": 0.08131985582553525, "grad_norm": 0.4241064786911011, "learning_rate": 0.00018377415874913894, "loss": 1.5158, "step": 6258 }, { "epoch": 0.08133285036945112, "grad_norm": 0.38198429346084595, "learning_rate": 0.00018377155928722756, "loss": 1.4414, "step": 6259 }, { "epoch": 0.081345844913367, "grad_norm": 0.4117804169654846, "learning_rate": 0.00018376895982531616, "loss": 1.4767, "step": 6260 }, { "epoch": 0.08135883945728287, "grad_norm": 0.4990704357624054, "learning_rate": 0.00018376636036340478, "loss": 1.2187, "step": 6261 }, { "epoch": 0.08137183400119875, "grad_norm": 0.3977000117301941, "learning_rate": 0.0001837637609014934, "loss": 1.5617, "step": 6262 }, { "epoch": 0.08138482854511463, "grad_norm": 0.398823618888855, "learning_rate": 0.00018376116143958203, "loss": 1.5426, "step": 6263 }, { "epoch": 0.0813978230890305, "grad_norm": 0.4289095103740692, "learning_rate": 0.00018375856197767063, "loss": 1.5082, "step": 6264 }, { "epoch": 0.08141081763294637, "grad_norm": 0.3886120319366455, "learning_rate": 0.00018375596251575925, "loss": 1.7187, "step": 6265 }, { "epoch": 0.08142381217686225, "grad_norm": 0.34749701619148254, "learning_rate": 0.00018375336305384788, "loss": 1.3236, "step": 6266 }, { "epoch": 0.08143680672077812, "grad_norm": 0.367283433675766, "learning_rate": 0.00018375076359193647, "loss": 1.4599, "step": 6267 }, { "epoch": 0.08144980126469399, "grad_norm": 0.3778984248638153, "learning_rate": 0.0001837481641300251, "loss": 1.5789, "step": 6268 }, { "epoch": 0.08146279580860986, "grad_norm": 0.6308656930923462, "learning_rate": 0.0001837455646681137, "loss": 1.3317, "step": 6269 }, { "epoch": 0.08147579035252574, "grad_norm": 0.38500407338142395, "learning_rate": 0.00018374296520620232, "loss": 1.437, "step": 6270 }, { "epoch": 0.08148878489644161, "grad_norm": 0.415232390165329, "learning_rate": 0.00018374036574429095, "loss": 1.4343, "step": 6271 }, { "epoch": 0.08150177944035748, "grad_norm": 0.3456633687019348, "learning_rate": 0.00018373776628237954, "loss": 1.3905, "step": 6272 }, { "epoch": 0.08151477398427336, "grad_norm": 0.40678590536117554, "learning_rate": 0.00018373516682046817, "loss": 1.4074, "step": 6273 }, { "epoch": 0.08152776852818923, "grad_norm": 0.3896959125995636, "learning_rate": 0.0001837325673585568, "loss": 1.3847, "step": 6274 }, { "epoch": 0.0815407630721051, "grad_norm": 0.41664302349090576, "learning_rate": 0.00018372996789664542, "loss": 1.6356, "step": 6275 }, { "epoch": 0.08155375761602097, "grad_norm": 0.3730113208293915, "learning_rate": 0.000183727368434734, "loss": 1.5023, "step": 6276 }, { "epoch": 0.08156675215993685, "grad_norm": 0.30903345346450806, "learning_rate": 0.00018372476897282264, "loss": 1.3195, "step": 6277 }, { "epoch": 0.08157974670385272, "grad_norm": 0.4093523323535919, "learning_rate": 0.00018372216951091126, "loss": 1.5907, "step": 6278 }, { "epoch": 0.0815927412477686, "grad_norm": 0.49078160524368286, "learning_rate": 0.00018371957004899986, "loss": 1.6554, "step": 6279 }, { "epoch": 0.08160573579168447, "grad_norm": 0.49689167737960815, "learning_rate": 0.00018371697058708848, "loss": 1.5196, "step": 6280 }, { "epoch": 0.08161873033560034, "grad_norm": 0.43951088190078735, "learning_rate": 0.00018371437112517708, "loss": 1.5346, "step": 6281 }, { "epoch": 0.08163172487951621, "grad_norm": 0.3622475862503052, "learning_rate": 0.0001837117716632657, "loss": 1.609, "step": 6282 }, { "epoch": 0.08164471942343209, "grad_norm": 0.5064297318458557, "learning_rate": 0.00018370917220135433, "loss": 1.3538, "step": 6283 }, { "epoch": 0.08165771396734796, "grad_norm": 0.39422526955604553, "learning_rate": 0.00018370657273944293, "loss": 1.542, "step": 6284 }, { "epoch": 0.08167070851126383, "grad_norm": 0.4281027615070343, "learning_rate": 0.00018370397327753155, "loss": 1.5572, "step": 6285 }, { "epoch": 0.0816837030551797, "grad_norm": 0.4165332317352295, "learning_rate": 0.00018370137381562018, "loss": 1.4751, "step": 6286 }, { "epoch": 0.08169669759909558, "grad_norm": 0.30757102370262146, "learning_rate": 0.0001836987743537088, "loss": 1.3309, "step": 6287 }, { "epoch": 0.08170969214301145, "grad_norm": 0.4747482240200043, "learning_rate": 0.0001836961748917974, "loss": 1.575, "step": 6288 }, { "epoch": 0.08172268668692732, "grad_norm": 0.3442952334880829, "learning_rate": 0.00018369357542988602, "loss": 1.2855, "step": 6289 }, { "epoch": 0.0817356812308432, "grad_norm": 0.27735987305641174, "learning_rate": 0.00018369097596797465, "loss": 1.2472, "step": 6290 }, { "epoch": 0.08174867577475907, "grad_norm": 0.30388858914375305, "learning_rate": 0.00018368837650606325, "loss": 1.4579, "step": 6291 }, { "epoch": 0.08176167031867494, "grad_norm": 0.3185122311115265, "learning_rate": 0.00018368577704415187, "loss": 1.482, "step": 6292 }, { "epoch": 0.08177466486259082, "grad_norm": 0.4130038619041443, "learning_rate": 0.0001836831775822405, "loss": 1.403, "step": 6293 }, { "epoch": 0.08178765940650669, "grad_norm": 0.4267940819263458, "learning_rate": 0.00018368057812032912, "loss": 1.5247, "step": 6294 }, { "epoch": 0.08180065395042256, "grad_norm": 0.3583746552467346, "learning_rate": 0.00018367797865841772, "loss": 1.4195, "step": 6295 }, { "epoch": 0.08181364849433843, "grad_norm": 0.37809985876083374, "learning_rate": 0.0001836753791965063, "loss": 1.4028, "step": 6296 }, { "epoch": 0.08182664303825431, "grad_norm": 0.4940185248851776, "learning_rate": 0.00018367277973459496, "loss": 1.4484, "step": 6297 }, { "epoch": 0.08183963758217018, "grad_norm": 0.3952077329158783, "learning_rate": 0.00018367018027268356, "loss": 1.4168, "step": 6298 }, { "epoch": 0.08185263212608605, "grad_norm": 0.4756681025028229, "learning_rate": 0.0001836675808107722, "loss": 1.6063, "step": 6299 }, { "epoch": 0.08186562667000194, "grad_norm": 0.31230786442756653, "learning_rate": 0.00018366498134886078, "loss": 1.5183, "step": 6300 }, { "epoch": 0.08187862121391781, "grad_norm": 0.3104645311832428, "learning_rate": 0.0001836623818869494, "loss": 1.275, "step": 6301 }, { "epoch": 0.08189161575783369, "grad_norm": 0.36013808846473694, "learning_rate": 0.00018365978242503803, "loss": 1.5446, "step": 6302 }, { "epoch": 0.08190461030174956, "grad_norm": 0.4131903052330017, "learning_rate": 0.00018365718296312663, "loss": 1.3805, "step": 6303 }, { "epoch": 0.08191760484566543, "grad_norm": 0.41663122177124023, "learning_rate": 0.00018365458350121526, "loss": 1.6242, "step": 6304 }, { "epoch": 0.0819305993895813, "grad_norm": 0.379584938287735, "learning_rate": 0.00018365198403930388, "loss": 1.4697, "step": 6305 }, { "epoch": 0.08194359393349718, "grad_norm": 0.3859310746192932, "learning_rate": 0.0001836493845773925, "loss": 1.6415, "step": 6306 }, { "epoch": 0.08195658847741305, "grad_norm": 0.40626102685928345, "learning_rate": 0.0001836467851154811, "loss": 1.519, "step": 6307 }, { "epoch": 0.08196958302132892, "grad_norm": 0.4381747245788574, "learning_rate": 0.0001836441856535697, "loss": 1.4524, "step": 6308 }, { "epoch": 0.0819825775652448, "grad_norm": 0.42206859588623047, "learning_rate": 0.00018364158619165835, "loss": 1.6181, "step": 6309 }, { "epoch": 0.08199557210916067, "grad_norm": 0.4496839642524719, "learning_rate": 0.00018363898672974695, "loss": 1.4177, "step": 6310 }, { "epoch": 0.08200856665307654, "grad_norm": 0.4506014585494995, "learning_rate": 0.00018363638726783557, "loss": 1.5302, "step": 6311 }, { "epoch": 0.08202156119699242, "grad_norm": 0.4212636947631836, "learning_rate": 0.00018363378780592417, "loss": 1.6268, "step": 6312 }, { "epoch": 0.08203455574090829, "grad_norm": 0.4072587192058563, "learning_rate": 0.0001836311883440128, "loss": 1.3955, "step": 6313 }, { "epoch": 0.08204755028482416, "grad_norm": 0.3774198889732361, "learning_rate": 0.00018362858888210142, "loss": 1.3902, "step": 6314 }, { "epoch": 0.08206054482874003, "grad_norm": 0.40328261256217957, "learning_rate": 0.00018362598942019002, "loss": 1.4437, "step": 6315 }, { "epoch": 0.08207353937265591, "grad_norm": 0.32378828525543213, "learning_rate": 0.00018362338995827864, "loss": 1.5592, "step": 6316 }, { "epoch": 0.08208653391657178, "grad_norm": 0.42563384771347046, "learning_rate": 0.00018362079049636726, "loss": 1.5266, "step": 6317 }, { "epoch": 0.08209952846048765, "grad_norm": 0.4518590569496155, "learning_rate": 0.0001836181910344559, "loss": 1.4812, "step": 6318 }, { "epoch": 0.08211252300440353, "grad_norm": 0.36745765805244446, "learning_rate": 0.0001836155915725445, "loss": 1.2738, "step": 6319 }, { "epoch": 0.0821255175483194, "grad_norm": 0.44639402627944946, "learning_rate": 0.00018361299211063308, "loss": 1.3873, "step": 6320 }, { "epoch": 0.08213851209223527, "grad_norm": 0.36386215686798096, "learning_rate": 0.00018361039264872174, "loss": 1.4629, "step": 6321 }, { "epoch": 0.08215150663615114, "grad_norm": 0.30434849858283997, "learning_rate": 0.00018360779318681033, "loss": 1.2594, "step": 6322 }, { "epoch": 0.08216450118006702, "grad_norm": 0.34231579303741455, "learning_rate": 0.00018360519372489896, "loss": 1.5994, "step": 6323 }, { "epoch": 0.08217749572398289, "grad_norm": 0.43207627534866333, "learning_rate": 0.00018360259426298755, "loss": 1.4636, "step": 6324 }, { "epoch": 0.08219049026789876, "grad_norm": 0.4585356116294861, "learning_rate": 0.00018359999480107618, "loss": 1.6959, "step": 6325 }, { "epoch": 0.08220348481181464, "grad_norm": 0.28777074813842773, "learning_rate": 0.0001835973953391648, "loss": 1.5394, "step": 6326 }, { "epoch": 0.08221647935573051, "grad_norm": 0.4813830554485321, "learning_rate": 0.0001835947958772534, "loss": 1.6296, "step": 6327 }, { "epoch": 0.08222947389964638, "grad_norm": 0.33295485377311707, "learning_rate": 0.00018359219641534205, "loss": 1.2077, "step": 6328 }, { "epoch": 0.08224246844356226, "grad_norm": 0.3948964476585388, "learning_rate": 0.00018358959695343065, "loss": 1.458, "step": 6329 }, { "epoch": 0.08225546298747813, "grad_norm": 0.6034369468688965, "learning_rate": 0.00018358699749151927, "loss": 1.5965, "step": 6330 }, { "epoch": 0.082268457531394, "grad_norm": 0.39594656229019165, "learning_rate": 0.00018358439802960787, "loss": 1.494, "step": 6331 }, { "epoch": 0.08228145207530987, "grad_norm": 0.36615267395973206, "learning_rate": 0.0001835817985676965, "loss": 1.3831, "step": 6332 }, { "epoch": 0.08229444661922575, "grad_norm": 0.4499756693840027, "learning_rate": 0.00018357919910578512, "loss": 1.5744, "step": 6333 }, { "epoch": 0.08230744116314162, "grad_norm": 0.33017051219940186, "learning_rate": 0.00018357659964387372, "loss": 1.6343, "step": 6334 }, { "epoch": 0.0823204357070575, "grad_norm": 0.3306119441986084, "learning_rate": 0.00018357400018196234, "loss": 1.394, "step": 6335 }, { "epoch": 0.08233343025097337, "grad_norm": 0.3868931531906128, "learning_rate": 0.00018357140072005097, "loss": 1.5379, "step": 6336 }, { "epoch": 0.08234642479488924, "grad_norm": 0.42174673080444336, "learning_rate": 0.00018356880125813956, "loss": 1.3303, "step": 6337 }, { "epoch": 0.08235941933880513, "grad_norm": 0.4145820438861847, "learning_rate": 0.0001835662017962282, "loss": 1.3131, "step": 6338 }, { "epoch": 0.082372413882721, "grad_norm": 0.43100664019584656, "learning_rate": 0.0001835636023343168, "loss": 1.2742, "step": 6339 }, { "epoch": 0.08238540842663687, "grad_norm": 0.33210310339927673, "learning_rate": 0.00018356100287240544, "loss": 1.4114, "step": 6340 }, { "epoch": 0.08239840297055274, "grad_norm": 0.4400142431259155, "learning_rate": 0.00018355840341049404, "loss": 1.5164, "step": 6341 }, { "epoch": 0.08241139751446862, "grad_norm": 0.2951825261116028, "learning_rate": 0.00018355580394858266, "loss": 1.3462, "step": 6342 }, { "epoch": 0.08242439205838449, "grad_norm": 0.35411882400512695, "learning_rate": 0.00018355320448667126, "loss": 1.4355, "step": 6343 }, { "epoch": 0.08243738660230036, "grad_norm": 0.3669249713420868, "learning_rate": 0.00018355060502475988, "loss": 1.5282, "step": 6344 }, { "epoch": 0.08245038114621624, "grad_norm": 0.5035409927368164, "learning_rate": 0.0001835480055628485, "loss": 1.3305, "step": 6345 }, { "epoch": 0.08246337569013211, "grad_norm": 0.3865646421909332, "learning_rate": 0.0001835454061009371, "loss": 1.4901, "step": 6346 }, { "epoch": 0.08247637023404798, "grad_norm": 0.44800180196762085, "learning_rate": 0.00018354280663902573, "loss": 1.31, "step": 6347 }, { "epoch": 0.08248936477796386, "grad_norm": 0.3423851728439331, "learning_rate": 0.00018354020717711435, "loss": 1.4015, "step": 6348 }, { "epoch": 0.08250235932187973, "grad_norm": 0.5188177227973938, "learning_rate": 0.00018353760771520298, "loss": 1.6804, "step": 6349 }, { "epoch": 0.0825153538657956, "grad_norm": 0.3972238600254059, "learning_rate": 0.00018353500825329157, "loss": 1.3281, "step": 6350 }, { "epoch": 0.08252834840971147, "grad_norm": 0.40426206588745117, "learning_rate": 0.00018353240879138017, "loss": 1.6123, "step": 6351 }, { "epoch": 0.08254134295362735, "grad_norm": 0.4127432703971863, "learning_rate": 0.00018352980932946882, "loss": 1.4924, "step": 6352 }, { "epoch": 0.08255433749754322, "grad_norm": 0.280304878950119, "learning_rate": 0.00018352720986755742, "loss": 1.392, "step": 6353 }, { "epoch": 0.0825673320414591, "grad_norm": 0.45447084307670593, "learning_rate": 0.00018352461040564605, "loss": 1.4239, "step": 6354 }, { "epoch": 0.08258032658537497, "grad_norm": 0.3501328229904175, "learning_rate": 0.00018352201094373464, "loss": 1.3715, "step": 6355 }, { "epoch": 0.08259332112929084, "grad_norm": 0.40163177251815796, "learning_rate": 0.00018351941148182327, "loss": 1.2484, "step": 6356 }, { "epoch": 0.08260631567320671, "grad_norm": 0.4098997116088867, "learning_rate": 0.0001835168120199119, "loss": 1.4671, "step": 6357 }, { "epoch": 0.08261931021712259, "grad_norm": 0.3810834288597107, "learning_rate": 0.0001835142125580005, "loss": 1.5171, "step": 6358 }, { "epoch": 0.08263230476103846, "grad_norm": 0.41706377267837524, "learning_rate": 0.00018351161309608911, "loss": 1.4981, "step": 6359 }, { "epoch": 0.08264529930495433, "grad_norm": 0.4371885061264038, "learning_rate": 0.00018350901363417774, "loss": 1.4537, "step": 6360 }, { "epoch": 0.0826582938488702, "grad_norm": 0.3304905295372009, "learning_rate": 0.00018350641417226636, "loss": 1.337, "step": 6361 }, { "epoch": 0.08267128839278608, "grad_norm": 0.41819995641708374, "learning_rate": 0.00018350381471035496, "loss": 1.2867, "step": 6362 }, { "epoch": 0.08268428293670195, "grad_norm": 0.4859756827354431, "learning_rate": 0.00018350121524844358, "loss": 1.4621, "step": 6363 }, { "epoch": 0.08269727748061782, "grad_norm": 0.32665860652923584, "learning_rate": 0.0001834986157865322, "loss": 1.3724, "step": 6364 }, { "epoch": 0.0827102720245337, "grad_norm": 0.3746100664138794, "learning_rate": 0.0001834960163246208, "loss": 1.4845, "step": 6365 }, { "epoch": 0.08272326656844957, "grad_norm": 0.3771091103553772, "learning_rate": 0.00018349341686270943, "loss": 1.4149, "step": 6366 }, { "epoch": 0.08273626111236544, "grad_norm": 0.38788601756095886, "learning_rate": 0.00018349081740079806, "loss": 1.3736, "step": 6367 }, { "epoch": 0.08274925565628131, "grad_norm": 0.42723456025123596, "learning_rate": 0.00018348821793888665, "loss": 1.5531, "step": 6368 }, { "epoch": 0.08276225020019719, "grad_norm": 0.3780154287815094, "learning_rate": 0.00018348561847697528, "loss": 1.5811, "step": 6369 }, { "epoch": 0.08277524474411306, "grad_norm": 0.4152792990207672, "learning_rate": 0.00018348301901506387, "loss": 1.4255, "step": 6370 }, { "epoch": 0.08278823928802893, "grad_norm": 0.34465155005455017, "learning_rate": 0.00018348041955315253, "loss": 1.4392, "step": 6371 }, { "epoch": 0.0828012338319448, "grad_norm": 0.379810094833374, "learning_rate": 0.00018347782009124112, "loss": 1.5304, "step": 6372 }, { "epoch": 0.08281422837586068, "grad_norm": 0.40812456607818604, "learning_rate": 0.00018347522062932975, "loss": 1.3356, "step": 6373 }, { "epoch": 0.08282722291977655, "grad_norm": 0.4013248383998871, "learning_rate": 0.00018347262116741835, "loss": 1.7343, "step": 6374 }, { "epoch": 0.08284021746369243, "grad_norm": 0.3170357644557953, "learning_rate": 0.00018347002170550697, "loss": 1.4234, "step": 6375 }, { "epoch": 0.08285321200760831, "grad_norm": 0.38626012206077576, "learning_rate": 0.0001834674222435956, "loss": 1.6129, "step": 6376 }, { "epoch": 0.08286620655152419, "grad_norm": 0.44640329480171204, "learning_rate": 0.0001834648227816842, "loss": 1.5105, "step": 6377 }, { "epoch": 0.08287920109544006, "grad_norm": 0.3134946823120117, "learning_rate": 0.00018346222331977282, "loss": 1.413, "step": 6378 }, { "epoch": 0.08289219563935593, "grad_norm": 0.3978760838508606, "learning_rate": 0.00018345962385786144, "loss": 1.327, "step": 6379 }, { "epoch": 0.0829051901832718, "grad_norm": 0.4487209618091583, "learning_rate": 0.00018345702439595004, "loss": 1.4105, "step": 6380 }, { "epoch": 0.08291818472718768, "grad_norm": 0.3922748565673828, "learning_rate": 0.00018345442493403866, "loss": 1.5619, "step": 6381 }, { "epoch": 0.08293117927110355, "grad_norm": 0.35467037558555603, "learning_rate": 0.00018345182547212726, "loss": 1.5109, "step": 6382 }, { "epoch": 0.08294417381501942, "grad_norm": 0.35295793414115906, "learning_rate": 0.0001834492260102159, "loss": 1.4991, "step": 6383 }, { "epoch": 0.0829571683589353, "grad_norm": 0.408783882856369, "learning_rate": 0.0001834466265483045, "loss": 1.4983, "step": 6384 }, { "epoch": 0.08297016290285117, "grad_norm": 0.39997902512550354, "learning_rate": 0.00018344402708639313, "loss": 1.3185, "step": 6385 }, { "epoch": 0.08298315744676704, "grad_norm": 0.36605584621429443, "learning_rate": 0.00018344142762448173, "loss": 1.3637, "step": 6386 }, { "epoch": 0.08299615199068291, "grad_norm": 0.48475515842437744, "learning_rate": 0.00018343882816257036, "loss": 1.4331, "step": 6387 }, { "epoch": 0.08300914653459879, "grad_norm": 0.37995797395706177, "learning_rate": 0.00018343622870065898, "loss": 1.4912, "step": 6388 }, { "epoch": 0.08302214107851466, "grad_norm": 0.35502561926841736, "learning_rate": 0.00018343362923874758, "loss": 1.3799, "step": 6389 }, { "epoch": 0.08303513562243053, "grad_norm": 0.2844708561897278, "learning_rate": 0.0001834310297768362, "loss": 1.6807, "step": 6390 }, { "epoch": 0.0830481301663464, "grad_norm": 0.42832931876182556, "learning_rate": 0.00018342843031492483, "loss": 1.2539, "step": 6391 }, { "epoch": 0.08306112471026228, "grad_norm": 0.4181867241859436, "learning_rate": 0.00018342583085301342, "loss": 1.5233, "step": 6392 }, { "epoch": 0.08307411925417815, "grad_norm": 0.4286670684814453, "learning_rate": 0.00018342323139110205, "loss": 1.381, "step": 6393 }, { "epoch": 0.08308711379809403, "grad_norm": 0.37081727385520935, "learning_rate": 0.00018342063192919065, "loss": 1.539, "step": 6394 }, { "epoch": 0.0831001083420099, "grad_norm": 0.3459774851799011, "learning_rate": 0.0001834180324672793, "loss": 1.2504, "step": 6395 }, { "epoch": 0.08311310288592577, "grad_norm": 0.4026981294155121, "learning_rate": 0.0001834154330053679, "loss": 1.394, "step": 6396 }, { "epoch": 0.08312609742984164, "grad_norm": 0.421860009431839, "learning_rate": 0.00018341283354345652, "loss": 1.4205, "step": 6397 }, { "epoch": 0.08313909197375752, "grad_norm": 0.4410203993320465, "learning_rate": 0.00018341023408154512, "loss": 1.4552, "step": 6398 }, { "epoch": 0.08315208651767339, "grad_norm": 0.3262028694152832, "learning_rate": 0.00018340763461963374, "loss": 1.4587, "step": 6399 }, { "epoch": 0.08316508106158926, "grad_norm": 0.39461982250213623, "learning_rate": 0.00018340503515772237, "loss": 1.3985, "step": 6400 }, { "epoch": 0.08317807560550514, "grad_norm": 0.308521032333374, "learning_rate": 0.00018340243569581096, "loss": 1.4266, "step": 6401 }, { "epoch": 0.08319107014942101, "grad_norm": 0.39089030027389526, "learning_rate": 0.00018339983623389961, "loss": 1.4905, "step": 6402 }, { "epoch": 0.08320406469333688, "grad_norm": 0.478932648897171, "learning_rate": 0.0001833972367719882, "loss": 1.6396, "step": 6403 }, { "epoch": 0.08321705923725276, "grad_norm": 0.4224386215209961, "learning_rate": 0.0001833946373100768, "loss": 1.4816, "step": 6404 }, { "epoch": 0.08323005378116863, "grad_norm": 0.38532745838165283, "learning_rate": 0.00018339203784816543, "loss": 1.4052, "step": 6405 }, { "epoch": 0.0832430483250845, "grad_norm": 0.24512577056884766, "learning_rate": 0.00018338943838625406, "loss": 1.1233, "step": 6406 }, { "epoch": 0.08325604286900037, "grad_norm": 0.3907359540462494, "learning_rate": 0.00018338683892434268, "loss": 1.4548, "step": 6407 }, { "epoch": 0.08326903741291625, "grad_norm": 0.4116802513599396, "learning_rate": 0.00018338423946243128, "loss": 1.5438, "step": 6408 }, { "epoch": 0.08328203195683212, "grad_norm": 0.38361355662345886, "learning_rate": 0.0001833816400005199, "loss": 1.7092, "step": 6409 }, { "epoch": 0.08329502650074799, "grad_norm": 0.4690987169742584, "learning_rate": 0.00018337904053860853, "loss": 1.6206, "step": 6410 }, { "epoch": 0.08330802104466387, "grad_norm": 0.43358904123306274, "learning_rate": 0.00018337644107669713, "loss": 1.5192, "step": 6411 }, { "epoch": 0.08332101558857974, "grad_norm": 0.4032171368598938, "learning_rate": 0.00018337384161478575, "loss": 1.4371, "step": 6412 }, { "epoch": 0.08333401013249561, "grad_norm": 0.483635276556015, "learning_rate": 0.00018337124215287435, "loss": 1.4422, "step": 6413 }, { "epoch": 0.0833470046764115, "grad_norm": 0.3206835985183716, "learning_rate": 0.000183368642690963, "loss": 1.2842, "step": 6414 }, { "epoch": 0.08335999922032737, "grad_norm": 0.4473925530910492, "learning_rate": 0.0001833660432290516, "loss": 1.5486, "step": 6415 }, { "epoch": 0.08337299376424324, "grad_norm": 0.39831212162971497, "learning_rate": 0.00018336344376714022, "loss": 1.4663, "step": 6416 }, { "epoch": 0.08338598830815912, "grad_norm": 0.3915517032146454, "learning_rate": 0.00018336084430522882, "loss": 1.3953, "step": 6417 }, { "epoch": 0.08339898285207499, "grad_norm": 0.37744471430778503, "learning_rate": 0.00018335824484331744, "loss": 1.3147, "step": 6418 }, { "epoch": 0.08341197739599086, "grad_norm": 0.27386778593063354, "learning_rate": 0.00018335564538140607, "loss": 1.4143, "step": 6419 }, { "epoch": 0.08342497193990674, "grad_norm": 0.3985297679901123, "learning_rate": 0.00018335304591949467, "loss": 1.4344, "step": 6420 }, { "epoch": 0.08343796648382261, "grad_norm": 0.35788625478744507, "learning_rate": 0.0001833504464575833, "loss": 1.4988, "step": 6421 }, { "epoch": 0.08345096102773848, "grad_norm": 0.6142004132270813, "learning_rate": 0.00018334784699567191, "loss": 1.554, "step": 6422 }, { "epoch": 0.08346395557165436, "grad_norm": 0.4944033920764923, "learning_rate": 0.0001833452475337605, "loss": 1.6212, "step": 6423 }, { "epoch": 0.08347695011557023, "grad_norm": 0.4056105315685272, "learning_rate": 0.00018334264807184914, "loss": 1.581, "step": 6424 }, { "epoch": 0.0834899446594861, "grad_norm": 0.3387428820133209, "learning_rate": 0.00018334004860993773, "loss": 1.3207, "step": 6425 }, { "epoch": 0.08350293920340197, "grad_norm": 0.36850714683532715, "learning_rate": 0.00018333744914802638, "loss": 1.4192, "step": 6426 }, { "epoch": 0.08351593374731785, "grad_norm": 0.40807291865348816, "learning_rate": 0.00018333484968611498, "loss": 1.5072, "step": 6427 }, { "epoch": 0.08352892829123372, "grad_norm": 0.3865683078765869, "learning_rate": 0.0001833322502242036, "loss": 1.7671, "step": 6428 }, { "epoch": 0.08354192283514959, "grad_norm": 0.46008211374282837, "learning_rate": 0.0001833296507622922, "loss": 1.3162, "step": 6429 }, { "epoch": 0.08355491737906547, "grad_norm": 0.3837754428386688, "learning_rate": 0.00018332705130038083, "loss": 1.5041, "step": 6430 }, { "epoch": 0.08356791192298134, "grad_norm": 0.43874359130859375, "learning_rate": 0.00018332445183846945, "loss": 1.6245, "step": 6431 }, { "epoch": 0.08358090646689721, "grad_norm": 0.3807770907878876, "learning_rate": 0.00018332185237655805, "loss": 1.4719, "step": 6432 }, { "epoch": 0.08359390101081308, "grad_norm": 0.3583511710166931, "learning_rate": 0.00018331925291464668, "loss": 1.4884, "step": 6433 }, { "epoch": 0.08360689555472896, "grad_norm": 0.3377654254436493, "learning_rate": 0.0001833166534527353, "loss": 1.4229, "step": 6434 }, { "epoch": 0.08361989009864483, "grad_norm": 0.42893752455711365, "learning_rate": 0.0001833140539908239, "loss": 1.4813, "step": 6435 }, { "epoch": 0.0836328846425607, "grad_norm": 0.3380570113658905, "learning_rate": 0.00018331145452891252, "loss": 1.4832, "step": 6436 }, { "epoch": 0.08364587918647658, "grad_norm": 0.4045338034629822, "learning_rate": 0.00018330885506700115, "loss": 1.5704, "step": 6437 }, { "epoch": 0.08365887373039245, "grad_norm": 0.3734631836414337, "learning_rate": 0.00018330625560508977, "loss": 1.3702, "step": 6438 }, { "epoch": 0.08367186827430832, "grad_norm": 0.4069790244102478, "learning_rate": 0.00018330365614317837, "loss": 1.4478, "step": 6439 }, { "epoch": 0.0836848628182242, "grad_norm": 0.370533287525177, "learning_rate": 0.000183301056681267, "loss": 1.2567, "step": 6440 }, { "epoch": 0.08369785736214007, "grad_norm": 0.4504433274269104, "learning_rate": 0.00018329845721935562, "loss": 1.38, "step": 6441 }, { "epoch": 0.08371085190605594, "grad_norm": 0.44486233592033386, "learning_rate": 0.00018329585775744421, "loss": 1.3701, "step": 6442 }, { "epoch": 0.08372384644997181, "grad_norm": 0.39275482296943665, "learning_rate": 0.00018329325829553284, "loss": 1.4081, "step": 6443 }, { "epoch": 0.08373684099388769, "grad_norm": 0.42928028106689453, "learning_rate": 0.00018329065883362144, "loss": 1.4704, "step": 6444 }, { "epoch": 0.08374983553780356, "grad_norm": 0.3356246054172516, "learning_rate": 0.0001832880593717101, "loss": 1.3732, "step": 6445 }, { "epoch": 0.08376283008171943, "grad_norm": 0.3775327205657959, "learning_rate": 0.00018328545990979868, "loss": 1.4392, "step": 6446 }, { "epoch": 0.0837758246256353, "grad_norm": 0.4768427014350891, "learning_rate": 0.00018328286044788728, "loss": 1.4577, "step": 6447 }, { "epoch": 0.08378881916955118, "grad_norm": 0.3144374489784241, "learning_rate": 0.0001832802609859759, "loss": 1.2489, "step": 6448 }, { "epoch": 0.08380181371346705, "grad_norm": 0.41254571080207825, "learning_rate": 0.00018327766152406453, "loss": 1.2922, "step": 6449 }, { "epoch": 0.08381480825738293, "grad_norm": 0.4477587938308716, "learning_rate": 0.00018327506206215316, "loss": 1.476, "step": 6450 }, { "epoch": 0.0838278028012988, "grad_norm": 0.4534163177013397, "learning_rate": 0.00018327246260024175, "loss": 1.5073, "step": 6451 }, { "epoch": 0.08384079734521468, "grad_norm": 0.4524908661842346, "learning_rate": 0.00018326986313833038, "loss": 1.6396, "step": 6452 }, { "epoch": 0.08385379188913056, "grad_norm": 0.4170534014701843, "learning_rate": 0.000183267263676419, "loss": 1.3806, "step": 6453 }, { "epoch": 0.08386678643304643, "grad_norm": 0.3801972568035126, "learning_rate": 0.0001832646642145076, "loss": 1.3386, "step": 6454 }, { "epoch": 0.0838797809769623, "grad_norm": 0.43365636467933655, "learning_rate": 0.00018326206475259622, "loss": 1.5673, "step": 6455 }, { "epoch": 0.08389277552087818, "grad_norm": 0.3590330183506012, "learning_rate": 0.00018325946529068482, "loss": 1.4961, "step": 6456 }, { "epoch": 0.08390577006479405, "grad_norm": 0.42626532912254333, "learning_rate": 0.00018325686582877347, "loss": 1.449, "step": 6457 }, { "epoch": 0.08391876460870992, "grad_norm": 0.45199307799339294, "learning_rate": 0.00018325426636686207, "loss": 1.348, "step": 6458 }, { "epoch": 0.0839317591526258, "grad_norm": 0.3183334767818451, "learning_rate": 0.00018325166690495067, "loss": 1.406, "step": 6459 }, { "epoch": 0.08394475369654167, "grad_norm": 0.6126296520233154, "learning_rate": 0.0001832490674430393, "loss": 1.4263, "step": 6460 }, { "epoch": 0.08395774824045754, "grad_norm": 0.3479015827178955, "learning_rate": 0.00018324646798112792, "loss": 1.4934, "step": 6461 }, { "epoch": 0.08397074278437341, "grad_norm": 0.4620133936405182, "learning_rate": 0.00018324386851921654, "loss": 1.2783, "step": 6462 }, { "epoch": 0.08398373732828929, "grad_norm": 0.3367346227169037, "learning_rate": 0.00018324126905730514, "loss": 1.3929, "step": 6463 }, { "epoch": 0.08399673187220516, "grad_norm": 0.4636990427970886, "learning_rate": 0.00018323866959539376, "loss": 1.6359, "step": 6464 }, { "epoch": 0.08400972641612103, "grad_norm": 0.39021503925323486, "learning_rate": 0.0001832360701334824, "loss": 1.3567, "step": 6465 }, { "epoch": 0.0840227209600369, "grad_norm": 0.4318285286426544, "learning_rate": 0.00018323347067157098, "loss": 1.4567, "step": 6466 }, { "epoch": 0.08403571550395278, "grad_norm": 0.3595968782901764, "learning_rate": 0.0001832308712096596, "loss": 1.4315, "step": 6467 }, { "epoch": 0.08404871004786865, "grad_norm": 0.3814601004123688, "learning_rate": 0.0001832282717477482, "loss": 1.4064, "step": 6468 }, { "epoch": 0.08406170459178453, "grad_norm": 0.404971182346344, "learning_rate": 0.00018322567228583686, "loss": 1.4376, "step": 6469 }, { "epoch": 0.0840746991357004, "grad_norm": 0.3815779685974121, "learning_rate": 0.00018322307282392546, "loss": 1.3255, "step": 6470 }, { "epoch": 0.08408769367961627, "grad_norm": 0.3937915563583374, "learning_rate": 0.00018322047336201408, "loss": 1.293, "step": 6471 }, { "epoch": 0.08410068822353214, "grad_norm": 0.43878594040870667, "learning_rate": 0.00018321787390010268, "loss": 1.5132, "step": 6472 }, { "epoch": 0.08411368276744802, "grad_norm": 0.3743495047092438, "learning_rate": 0.0001832152744381913, "loss": 1.3853, "step": 6473 }, { "epoch": 0.08412667731136389, "grad_norm": 0.43098658323287964, "learning_rate": 0.00018321267497627993, "loss": 1.4325, "step": 6474 }, { "epoch": 0.08413967185527976, "grad_norm": 0.43854820728302, "learning_rate": 0.00018321007551436852, "loss": 1.5207, "step": 6475 }, { "epoch": 0.08415266639919564, "grad_norm": 0.3657275140285492, "learning_rate": 0.00018320747605245715, "loss": 1.4217, "step": 6476 }, { "epoch": 0.08416566094311151, "grad_norm": 0.38142380118370056, "learning_rate": 0.00018320487659054577, "loss": 1.3738, "step": 6477 }, { "epoch": 0.08417865548702738, "grad_norm": 0.38894620537757874, "learning_rate": 0.00018320227712863437, "loss": 1.4504, "step": 6478 }, { "epoch": 0.08419165003094325, "grad_norm": 0.3506476879119873, "learning_rate": 0.000183199677666723, "loss": 1.1223, "step": 6479 }, { "epoch": 0.08420464457485913, "grad_norm": 0.44102421402931213, "learning_rate": 0.00018319707820481162, "loss": 1.4269, "step": 6480 }, { "epoch": 0.084217639118775, "grad_norm": 0.4569186568260193, "learning_rate": 0.00018319447874290024, "loss": 1.596, "step": 6481 }, { "epoch": 0.08423063366269087, "grad_norm": 0.38186657428741455, "learning_rate": 0.00018319187928098884, "loss": 1.2877, "step": 6482 }, { "epoch": 0.08424362820660675, "grad_norm": 0.45444315671920776, "learning_rate": 0.00018318927981907747, "loss": 1.4535, "step": 6483 }, { "epoch": 0.08425662275052262, "grad_norm": 0.32623738050460815, "learning_rate": 0.0001831866803571661, "loss": 1.582, "step": 6484 }, { "epoch": 0.08426961729443849, "grad_norm": 0.3647509813308716, "learning_rate": 0.0001831840808952547, "loss": 1.3403, "step": 6485 }, { "epoch": 0.08428261183835437, "grad_norm": 0.4334845542907715, "learning_rate": 0.0001831814814333433, "loss": 1.6941, "step": 6486 }, { "epoch": 0.08429560638227024, "grad_norm": 0.2596302926540375, "learning_rate": 0.0001831788819714319, "loss": 1.1878, "step": 6487 }, { "epoch": 0.08430860092618611, "grad_norm": 0.3581424355506897, "learning_rate": 0.00018317628250952053, "loss": 1.5037, "step": 6488 }, { "epoch": 0.08432159547010198, "grad_norm": 0.36386409401893616, "learning_rate": 0.00018317368304760916, "loss": 1.7406, "step": 6489 }, { "epoch": 0.08433459001401787, "grad_norm": 0.5269473791122437, "learning_rate": 0.00018317108358569776, "loss": 1.4095, "step": 6490 }, { "epoch": 0.08434758455793374, "grad_norm": 0.36427703499794006, "learning_rate": 0.00018316848412378638, "loss": 1.5148, "step": 6491 }, { "epoch": 0.08436057910184962, "grad_norm": 0.4502542316913605, "learning_rate": 0.000183165884661875, "loss": 1.4436, "step": 6492 }, { "epoch": 0.08437357364576549, "grad_norm": 0.41653943061828613, "learning_rate": 0.00018316328519996363, "loss": 1.3564, "step": 6493 }, { "epoch": 0.08438656818968136, "grad_norm": 0.40431392192840576, "learning_rate": 0.00018316068573805223, "loss": 1.4104, "step": 6494 }, { "epoch": 0.08439956273359724, "grad_norm": 0.37855595350265503, "learning_rate": 0.00018315808627614085, "loss": 1.3603, "step": 6495 }, { "epoch": 0.08441255727751311, "grad_norm": 0.37523722648620605, "learning_rate": 0.00018315548681422948, "loss": 1.4139, "step": 6496 }, { "epoch": 0.08442555182142898, "grad_norm": 0.42329201102256775, "learning_rate": 0.00018315288735231807, "loss": 1.5167, "step": 6497 }, { "epoch": 0.08443854636534485, "grad_norm": 0.47706955671310425, "learning_rate": 0.0001831502878904067, "loss": 1.3451, "step": 6498 }, { "epoch": 0.08445154090926073, "grad_norm": 0.37929004430770874, "learning_rate": 0.0001831476884284953, "loss": 1.4028, "step": 6499 }, { "epoch": 0.0844645354531766, "grad_norm": 0.2957339286804199, "learning_rate": 0.00018314508896658395, "loss": 1.5917, "step": 6500 }, { "epoch": 0.08447752999709247, "grad_norm": 0.3257904350757599, "learning_rate": 0.00018314248950467254, "loss": 1.5287, "step": 6501 }, { "epoch": 0.08449052454100835, "grad_norm": 0.3705645203590393, "learning_rate": 0.00018313989004276114, "loss": 1.2935, "step": 6502 }, { "epoch": 0.08450351908492422, "grad_norm": 0.37605494260787964, "learning_rate": 0.00018313729058084977, "loss": 1.4802, "step": 6503 }, { "epoch": 0.08451651362884009, "grad_norm": 0.3834712505340576, "learning_rate": 0.0001831346911189384, "loss": 1.5491, "step": 6504 }, { "epoch": 0.08452950817275597, "grad_norm": 0.31999561190605164, "learning_rate": 0.00018313209165702701, "loss": 1.4002, "step": 6505 }, { "epoch": 0.08454250271667184, "grad_norm": 0.31522393226623535, "learning_rate": 0.0001831294921951156, "loss": 1.392, "step": 6506 }, { "epoch": 0.08455549726058771, "grad_norm": 0.4409068524837494, "learning_rate": 0.00018312689273320424, "loss": 1.6956, "step": 6507 }, { "epoch": 0.08456849180450358, "grad_norm": 0.38314947485923767, "learning_rate": 0.00018312429327129286, "loss": 1.3847, "step": 6508 }, { "epoch": 0.08458148634841946, "grad_norm": 0.39943504333496094, "learning_rate": 0.00018312169380938146, "loss": 1.6174, "step": 6509 }, { "epoch": 0.08459448089233533, "grad_norm": 0.32379797101020813, "learning_rate": 0.00018311909434747008, "loss": 1.2652, "step": 6510 }, { "epoch": 0.0846074754362512, "grad_norm": 0.4151378273963928, "learning_rate": 0.00018311649488555868, "loss": 1.1655, "step": 6511 }, { "epoch": 0.08462046998016708, "grad_norm": 0.4282408654689789, "learning_rate": 0.00018311389542364733, "loss": 1.5631, "step": 6512 }, { "epoch": 0.08463346452408295, "grad_norm": 0.4001627266407013, "learning_rate": 0.00018311129596173593, "loss": 1.2653, "step": 6513 }, { "epoch": 0.08464645906799882, "grad_norm": 0.3585038483142853, "learning_rate": 0.00018310869649982453, "loss": 1.4528, "step": 6514 }, { "epoch": 0.0846594536119147, "grad_norm": 0.379959374666214, "learning_rate": 0.00018310609703791318, "loss": 1.5179, "step": 6515 }, { "epoch": 0.08467244815583057, "grad_norm": 0.40313899517059326, "learning_rate": 0.00018310349757600178, "loss": 1.4173, "step": 6516 }, { "epoch": 0.08468544269974644, "grad_norm": 0.26642388105392456, "learning_rate": 0.0001831008981140904, "loss": 1.4209, "step": 6517 }, { "epoch": 0.08469843724366231, "grad_norm": 0.41755956411361694, "learning_rate": 0.000183098298652179, "loss": 1.3722, "step": 6518 }, { "epoch": 0.08471143178757819, "grad_norm": 0.3805711567401886, "learning_rate": 0.00018309569919026762, "loss": 1.4338, "step": 6519 }, { "epoch": 0.08472442633149406, "grad_norm": 0.35494542121887207, "learning_rate": 0.00018309309972835625, "loss": 1.5149, "step": 6520 }, { "epoch": 0.08473742087540993, "grad_norm": 0.35381510853767395, "learning_rate": 0.00018309050026644484, "loss": 1.633, "step": 6521 }, { "epoch": 0.0847504154193258, "grad_norm": 0.4472545385360718, "learning_rate": 0.00018308790080453347, "loss": 1.5209, "step": 6522 }, { "epoch": 0.08476340996324168, "grad_norm": 0.4020017385482788, "learning_rate": 0.0001830853013426221, "loss": 1.404, "step": 6523 }, { "epoch": 0.08477640450715755, "grad_norm": 0.5307603478431702, "learning_rate": 0.00018308270188071072, "loss": 1.4422, "step": 6524 }, { "epoch": 0.08478939905107342, "grad_norm": 0.32418203353881836, "learning_rate": 0.00018308010241879931, "loss": 1.528, "step": 6525 }, { "epoch": 0.0848023935949893, "grad_norm": 0.47254547476768494, "learning_rate": 0.0001830775029568879, "loss": 1.5067, "step": 6526 }, { "epoch": 0.08481538813890517, "grad_norm": 0.3759053647518158, "learning_rate": 0.00018307490349497656, "loss": 1.4258, "step": 6527 }, { "epoch": 0.08482838268282106, "grad_norm": 0.35423743724823, "learning_rate": 0.00018307230403306516, "loss": 1.5367, "step": 6528 }, { "epoch": 0.08484137722673693, "grad_norm": 0.5136507749557495, "learning_rate": 0.00018306970457115379, "loss": 1.4426, "step": 6529 }, { "epoch": 0.0848543717706528, "grad_norm": 0.3568829596042633, "learning_rate": 0.00018306710510924238, "loss": 1.4597, "step": 6530 }, { "epoch": 0.08486736631456868, "grad_norm": 0.41523584723472595, "learning_rate": 0.000183064505647331, "loss": 1.527, "step": 6531 }, { "epoch": 0.08488036085848455, "grad_norm": 0.33089902997016907, "learning_rate": 0.00018306190618541963, "loss": 1.149, "step": 6532 }, { "epoch": 0.08489335540240042, "grad_norm": 0.38755321502685547, "learning_rate": 0.00018305930672350823, "loss": 1.5139, "step": 6533 }, { "epoch": 0.0849063499463163, "grad_norm": 0.3957598805427551, "learning_rate": 0.00018305670726159685, "loss": 1.3406, "step": 6534 }, { "epoch": 0.08491934449023217, "grad_norm": 0.4124167859554291, "learning_rate": 0.00018305410779968548, "loss": 1.5872, "step": 6535 }, { "epoch": 0.08493233903414804, "grad_norm": 0.51162189245224, "learning_rate": 0.0001830515083377741, "loss": 1.386, "step": 6536 }, { "epoch": 0.08494533357806391, "grad_norm": 0.34675490856170654, "learning_rate": 0.0001830489088758627, "loss": 1.5843, "step": 6537 }, { "epoch": 0.08495832812197979, "grad_norm": 0.38475319743156433, "learning_rate": 0.00018304630941395132, "loss": 1.3556, "step": 6538 }, { "epoch": 0.08497132266589566, "grad_norm": 0.40646085143089294, "learning_rate": 0.00018304370995203995, "loss": 1.3462, "step": 6539 }, { "epoch": 0.08498431720981153, "grad_norm": 0.40540021657943726, "learning_rate": 0.00018304111049012855, "loss": 1.4431, "step": 6540 }, { "epoch": 0.0849973117537274, "grad_norm": 0.4556567370891571, "learning_rate": 0.00018303851102821717, "loss": 1.5604, "step": 6541 }, { "epoch": 0.08501030629764328, "grad_norm": 0.35327455401420593, "learning_rate": 0.00018303591156630577, "loss": 1.2263, "step": 6542 }, { "epoch": 0.08502330084155915, "grad_norm": 0.4268738329410553, "learning_rate": 0.0001830333121043944, "loss": 1.5877, "step": 6543 }, { "epoch": 0.08503629538547502, "grad_norm": 0.43034985661506653, "learning_rate": 0.00018303071264248302, "loss": 1.4343, "step": 6544 }, { "epoch": 0.0850492899293909, "grad_norm": 0.4247622787952423, "learning_rate": 0.00018302811318057161, "loss": 1.4409, "step": 6545 }, { "epoch": 0.08506228447330677, "grad_norm": 0.3663536012172699, "learning_rate": 0.00018302551371866024, "loss": 1.2992, "step": 6546 }, { "epoch": 0.08507527901722264, "grad_norm": 0.35559043288230896, "learning_rate": 0.00018302291425674886, "loss": 1.4171, "step": 6547 }, { "epoch": 0.08508827356113852, "grad_norm": 0.4016110897064209, "learning_rate": 0.0001830203147948375, "loss": 1.2979, "step": 6548 }, { "epoch": 0.08510126810505439, "grad_norm": 0.36676278710365295, "learning_rate": 0.00018301771533292609, "loss": 1.34, "step": 6549 }, { "epoch": 0.08511426264897026, "grad_norm": 0.44288933277130127, "learning_rate": 0.0001830151158710147, "loss": 1.5184, "step": 6550 }, { "epoch": 0.08512725719288614, "grad_norm": 0.4219822287559509, "learning_rate": 0.00018301251640910333, "loss": 1.4674, "step": 6551 }, { "epoch": 0.08514025173680201, "grad_norm": 0.3047439754009247, "learning_rate": 0.00018300991694719193, "loss": 1.4308, "step": 6552 }, { "epoch": 0.08515324628071788, "grad_norm": 0.395720899105072, "learning_rate": 0.00018300731748528056, "loss": 1.4747, "step": 6553 }, { "epoch": 0.08516624082463375, "grad_norm": 0.3163815140724182, "learning_rate": 0.00018300471802336918, "loss": 1.3703, "step": 6554 }, { "epoch": 0.08517923536854963, "grad_norm": 0.4249371886253357, "learning_rate": 0.0001830021185614578, "loss": 1.6033, "step": 6555 }, { "epoch": 0.0851922299124655, "grad_norm": 0.3310534954071045, "learning_rate": 0.0001829995190995464, "loss": 1.3846, "step": 6556 }, { "epoch": 0.08520522445638137, "grad_norm": 0.3335309326648712, "learning_rate": 0.000182996919637635, "loss": 1.3639, "step": 6557 }, { "epoch": 0.08521821900029725, "grad_norm": 0.3078988492488861, "learning_rate": 0.00018299432017572365, "loss": 1.2313, "step": 6558 }, { "epoch": 0.08523121354421312, "grad_norm": 0.4341719150543213, "learning_rate": 0.00018299172071381225, "loss": 1.5524, "step": 6559 }, { "epoch": 0.08524420808812899, "grad_norm": 0.368182897567749, "learning_rate": 0.00018298912125190087, "loss": 1.394, "step": 6560 }, { "epoch": 0.08525720263204487, "grad_norm": 0.4454631209373474, "learning_rate": 0.00018298652178998947, "loss": 1.287, "step": 6561 }, { "epoch": 0.08527019717596074, "grad_norm": 0.3346441090106964, "learning_rate": 0.0001829839223280781, "loss": 1.3334, "step": 6562 }, { "epoch": 0.08528319171987661, "grad_norm": 0.4472450315952301, "learning_rate": 0.00018298132286616672, "loss": 1.5126, "step": 6563 }, { "epoch": 0.08529618626379248, "grad_norm": 0.42458298802375793, "learning_rate": 0.00018297872340425532, "loss": 1.5703, "step": 6564 }, { "epoch": 0.08530918080770836, "grad_norm": 0.41712358593940735, "learning_rate": 0.00018297612394234394, "loss": 1.2264, "step": 6565 }, { "epoch": 0.08532217535162424, "grad_norm": 0.36192449927330017, "learning_rate": 0.00018297352448043257, "loss": 1.4235, "step": 6566 }, { "epoch": 0.08533516989554012, "grad_norm": 0.5095565319061279, "learning_rate": 0.0001829709250185212, "loss": 1.341, "step": 6567 }, { "epoch": 0.08534816443945599, "grad_norm": 0.2955765426158905, "learning_rate": 0.0001829683255566098, "loss": 1.2465, "step": 6568 }, { "epoch": 0.08536115898337186, "grad_norm": 0.4438478648662567, "learning_rate": 0.00018296572609469839, "loss": 1.3453, "step": 6569 }, { "epoch": 0.08537415352728774, "grad_norm": 0.42083898186683655, "learning_rate": 0.00018296312663278704, "loss": 1.3034, "step": 6570 }, { "epoch": 0.08538714807120361, "grad_norm": 0.4801914691925049, "learning_rate": 0.00018296052717087563, "loss": 1.7428, "step": 6571 }, { "epoch": 0.08540014261511948, "grad_norm": 0.37993961572647095, "learning_rate": 0.00018295792770896426, "loss": 1.5196, "step": 6572 }, { "epoch": 0.08541313715903535, "grad_norm": 0.4432593286037445, "learning_rate": 0.00018295532824705286, "loss": 1.5703, "step": 6573 }, { "epoch": 0.08542613170295123, "grad_norm": 0.3644528388977051, "learning_rate": 0.00018295272878514148, "loss": 1.3511, "step": 6574 }, { "epoch": 0.0854391262468671, "grad_norm": 0.4239424765110016, "learning_rate": 0.0001829501293232301, "loss": 1.5522, "step": 6575 }, { "epoch": 0.08545212079078297, "grad_norm": 0.3994225263595581, "learning_rate": 0.0001829475298613187, "loss": 1.4388, "step": 6576 }, { "epoch": 0.08546511533469885, "grad_norm": 0.35156315565109253, "learning_rate": 0.00018294493039940733, "loss": 1.6481, "step": 6577 }, { "epoch": 0.08547810987861472, "grad_norm": 0.4217776358127594, "learning_rate": 0.00018294233093749595, "loss": 1.5105, "step": 6578 }, { "epoch": 0.08549110442253059, "grad_norm": 0.44565704464912415, "learning_rate": 0.00018293973147558458, "loss": 1.3199, "step": 6579 }, { "epoch": 0.08550409896644647, "grad_norm": 0.46253079175949097, "learning_rate": 0.00018293713201367317, "loss": 1.5615, "step": 6580 }, { "epoch": 0.08551709351036234, "grad_norm": 0.44732171297073364, "learning_rate": 0.00018293453255176177, "loss": 1.493, "step": 6581 }, { "epoch": 0.08553008805427821, "grad_norm": 0.5240910649299622, "learning_rate": 0.00018293193308985042, "loss": 1.6053, "step": 6582 }, { "epoch": 0.08554308259819408, "grad_norm": 0.3878539204597473, "learning_rate": 0.00018292933362793902, "loss": 1.4246, "step": 6583 }, { "epoch": 0.08555607714210996, "grad_norm": 0.40849217772483826, "learning_rate": 0.00018292673416602764, "loss": 1.4402, "step": 6584 }, { "epoch": 0.08556907168602583, "grad_norm": 0.46559247374534607, "learning_rate": 0.00018292413470411624, "loss": 1.4489, "step": 6585 }, { "epoch": 0.0855820662299417, "grad_norm": 0.35966387391090393, "learning_rate": 0.00018292153524220487, "loss": 1.3184, "step": 6586 }, { "epoch": 0.08559506077385758, "grad_norm": 0.36290839314460754, "learning_rate": 0.0001829189357802935, "loss": 1.4918, "step": 6587 }, { "epoch": 0.08560805531777345, "grad_norm": 0.4738025665283203, "learning_rate": 0.0001829163363183821, "loss": 1.6859, "step": 6588 }, { "epoch": 0.08562104986168932, "grad_norm": 0.3524314761161804, "learning_rate": 0.00018291373685647074, "loss": 1.316, "step": 6589 }, { "epoch": 0.0856340444056052, "grad_norm": 0.26684197783470154, "learning_rate": 0.00018291113739455934, "loss": 1.3911, "step": 6590 }, { "epoch": 0.08564703894952107, "grad_norm": 0.5597006678581238, "learning_rate": 0.00018290853793264796, "loss": 1.4495, "step": 6591 }, { "epoch": 0.08566003349343694, "grad_norm": 0.4325263202190399, "learning_rate": 0.00018290593847073656, "loss": 1.4761, "step": 6592 }, { "epoch": 0.08567302803735281, "grad_norm": 0.34806618094444275, "learning_rate": 0.00018290333900882518, "loss": 1.2459, "step": 6593 }, { "epoch": 0.08568602258126869, "grad_norm": 0.3460070490837097, "learning_rate": 0.0001829007395469138, "loss": 1.3498, "step": 6594 }, { "epoch": 0.08569901712518456, "grad_norm": 0.3247237205505371, "learning_rate": 0.0001828981400850024, "loss": 1.2516, "step": 6595 }, { "epoch": 0.08571201166910043, "grad_norm": 0.36009061336517334, "learning_rate": 0.00018289554062309103, "loss": 1.4388, "step": 6596 }, { "epoch": 0.0857250062130163, "grad_norm": 0.41921988129615784, "learning_rate": 0.00018289294116117965, "loss": 1.466, "step": 6597 }, { "epoch": 0.08573800075693218, "grad_norm": 0.5115149021148682, "learning_rate": 0.00018289034169926825, "loss": 1.651, "step": 6598 }, { "epoch": 0.08575099530084805, "grad_norm": 0.3894090950489044, "learning_rate": 0.00018288774223735688, "loss": 1.531, "step": 6599 }, { "epoch": 0.08576398984476392, "grad_norm": 0.32461825013160706, "learning_rate": 0.00018288514277544547, "loss": 1.3326, "step": 6600 }, { "epoch": 0.0857769843886798, "grad_norm": 0.3585717976093292, "learning_rate": 0.00018288254331353412, "loss": 1.4792, "step": 6601 }, { "epoch": 0.08578997893259567, "grad_norm": 0.43605196475982666, "learning_rate": 0.00018287994385162272, "loss": 1.4011, "step": 6602 }, { "epoch": 0.08580297347651154, "grad_norm": 0.288531094789505, "learning_rate": 0.00018287734438971135, "loss": 1.5059, "step": 6603 }, { "epoch": 0.08581596802042742, "grad_norm": 0.34259143471717834, "learning_rate": 0.00018287474492779994, "loss": 1.2856, "step": 6604 }, { "epoch": 0.0858289625643433, "grad_norm": 0.3507554531097412, "learning_rate": 0.00018287214546588857, "loss": 1.4691, "step": 6605 }, { "epoch": 0.08584195710825918, "grad_norm": 0.32275745272636414, "learning_rate": 0.0001828695460039772, "loss": 1.6526, "step": 6606 }, { "epoch": 0.08585495165217505, "grad_norm": 0.3872429430484772, "learning_rate": 0.0001828669465420658, "loss": 1.2231, "step": 6607 }, { "epoch": 0.08586794619609092, "grad_norm": 0.4679528474807739, "learning_rate": 0.00018286434708015441, "loss": 1.4568, "step": 6608 }, { "epoch": 0.0858809407400068, "grad_norm": 0.4792744815349579, "learning_rate": 0.00018286174761824304, "loss": 1.4876, "step": 6609 }, { "epoch": 0.08589393528392267, "grad_norm": 0.45394331216812134, "learning_rate": 0.00018285914815633164, "loss": 1.4684, "step": 6610 }, { "epoch": 0.08590692982783854, "grad_norm": 0.31654372811317444, "learning_rate": 0.00018285654869442026, "loss": 1.4346, "step": 6611 }, { "epoch": 0.08591992437175441, "grad_norm": 0.34356799721717834, "learning_rate": 0.00018285394923250886, "loss": 1.4469, "step": 6612 }, { "epoch": 0.08593291891567029, "grad_norm": 0.3233030140399933, "learning_rate": 0.0001828513497705975, "loss": 1.276, "step": 6613 }, { "epoch": 0.08594591345958616, "grad_norm": 0.4390173554420471, "learning_rate": 0.0001828487503086861, "loss": 1.5669, "step": 6614 }, { "epoch": 0.08595890800350203, "grad_norm": 0.3441517651081085, "learning_rate": 0.00018284615084677473, "loss": 1.2979, "step": 6615 }, { "epoch": 0.0859719025474179, "grad_norm": 0.32941415905952454, "learning_rate": 0.00018284355138486333, "loss": 1.3249, "step": 6616 }, { "epoch": 0.08598489709133378, "grad_norm": 0.40751445293426514, "learning_rate": 0.00018284095192295195, "loss": 1.1955, "step": 6617 }, { "epoch": 0.08599789163524965, "grad_norm": 0.27998894453048706, "learning_rate": 0.00018283835246104058, "loss": 1.4467, "step": 6618 }, { "epoch": 0.08601088617916552, "grad_norm": 0.5176675319671631, "learning_rate": 0.00018283575299912918, "loss": 1.568, "step": 6619 }, { "epoch": 0.0860238807230814, "grad_norm": 0.4798547923564911, "learning_rate": 0.0001828331535372178, "loss": 1.5515, "step": 6620 }, { "epoch": 0.08603687526699727, "grad_norm": 0.40374356508255005, "learning_rate": 0.00018283055407530642, "loss": 1.4655, "step": 6621 }, { "epoch": 0.08604986981091314, "grad_norm": 0.3490309715270996, "learning_rate": 0.00018282795461339505, "loss": 1.4753, "step": 6622 }, { "epoch": 0.08606286435482902, "grad_norm": 0.5119740962982178, "learning_rate": 0.00018282535515148365, "loss": 1.5082, "step": 6623 }, { "epoch": 0.08607585889874489, "grad_norm": 0.459770143032074, "learning_rate": 0.00018282275568957227, "loss": 1.3987, "step": 6624 }, { "epoch": 0.08608885344266076, "grad_norm": 0.2700651288032532, "learning_rate": 0.0001828201562276609, "loss": 1.5007, "step": 6625 }, { "epoch": 0.08610184798657664, "grad_norm": 0.2813178598880768, "learning_rate": 0.0001828175567657495, "loss": 1.2854, "step": 6626 }, { "epoch": 0.08611484253049251, "grad_norm": 0.3808160126209259, "learning_rate": 0.00018281495730383812, "loss": 1.4859, "step": 6627 }, { "epoch": 0.08612783707440838, "grad_norm": 0.33384811878204346, "learning_rate": 0.00018281235784192674, "loss": 1.611, "step": 6628 }, { "epoch": 0.08614083161832425, "grad_norm": 0.3770199716091156, "learning_rate": 0.00018280975838001534, "loss": 1.5353, "step": 6629 }, { "epoch": 0.08615382616224013, "grad_norm": 0.35864925384521484, "learning_rate": 0.00018280715891810396, "loss": 1.3598, "step": 6630 }, { "epoch": 0.086166820706156, "grad_norm": 0.4076600670814514, "learning_rate": 0.00018280455945619256, "loss": 1.6174, "step": 6631 }, { "epoch": 0.08617981525007187, "grad_norm": 0.42534613609313965, "learning_rate": 0.0001828019599942812, "loss": 1.3148, "step": 6632 }, { "epoch": 0.08619280979398775, "grad_norm": 0.4146581292152405, "learning_rate": 0.0001827993605323698, "loss": 1.4736, "step": 6633 }, { "epoch": 0.08620580433790362, "grad_norm": 0.3903135657310486, "learning_rate": 0.00018279676107045843, "loss": 1.3434, "step": 6634 }, { "epoch": 0.08621879888181949, "grad_norm": 0.3914014399051666, "learning_rate": 0.00018279416160854703, "loss": 1.3335, "step": 6635 }, { "epoch": 0.08623179342573536, "grad_norm": 0.3830832839012146, "learning_rate": 0.00018279156214663566, "loss": 1.2612, "step": 6636 }, { "epoch": 0.08624478796965124, "grad_norm": 0.34652161598205566, "learning_rate": 0.00018278896268472428, "loss": 1.3799, "step": 6637 }, { "epoch": 0.08625778251356711, "grad_norm": 0.4538786709308624, "learning_rate": 0.00018278636322281288, "loss": 1.4622, "step": 6638 }, { "epoch": 0.08627077705748298, "grad_norm": 0.341753214597702, "learning_rate": 0.0001827837637609015, "loss": 1.3347, "step": 6639 }, { "epoch": 0.08628377160139886, "grad_norm": 0.4236353635787964, "learning_rate": 0.00018278116429899013, "loss": 1.4453, "step": 6640 }, { "epoch": 0.08629676614531473, "grad_norm": 0.3563282787799835, "learning_rate": 0.00018277856483707872, "loss": 1.3889, "step": 6641 }, { "epoch": 0.0863097606892306, "grad_norm": 0.4225514829158783, "learning_rate": 0.00018277596537516735, "loss": 1.312, "step": 6642 }, { "epoch": 0.08632275523314649, "grad_norm": 0.5219054222106934, "learning_rate": 0.00018277336591325595, "loss": 1.5966, "step": 6643 }, { "epoch": 0.08633574977706236, "grad_norm": 0.4198419451713562, "learning_rate": 0.0001827707664513446, "loss": 1.5836, "step": 6644 }, { "epoch": 0.08634874432097824, "grad_norm": 0.4462706744670868, "learning_rate": 0.0001827681669894332, "loss": 1.3672, "step": 6645 }, { "epoch": 0.08636173886489411, "grad_norm": 0.46152985095977783, "learning_rate": 0.00018276556752752182, "loss": 1.157, "step": 6646 }, { "epoch": 0.08637473340880998, "grad_norm": 0.3734128773212433, "learning_rate": 0.00018276296806561042, "loss": 1.4892, "step": 6647 }, { "epoch": 0.08638772795272585, "grad_norm": 0.3328731656074524, "learning_rate": 0.00018276036860369904, "loss": 1.4599, "step": 6648 }, { "epoch": 0.08640072249664173, "grad_norm": 0.4166589379310608, "learning_rate": 0.00018275776914178767, "loss": 1.3006, "step": 6649 }, { "epoch": 0.0864137170405576, "grad_norm": 0.3915296494960785, "learning_rate": 0.00018275516967987626, "loss": 1.5149, "step": 6650 }, { "epoch": 0.08642671158447347, "grad_norm": 0.4036220610141754, "learning_rate": 0.0001827525702179649, "loss": 1.3709, "step": 6651 }, { "epoch": 0.08643970612838935, "grad_norm": 0.38509851694107056, "learning_rate": 0.0001827499707560535, "loss": 1.2501, "step": 6652 }, { "epoch": 0.08645270067230522, "grad_norm": 0.3693358600139618, "learning_rate": 0.0001827473712941421, "loss": 1.6177, "step": 6653 }, { "epoch": 0.08646569521622109, "grad_norm": 0.4955030083656311, "learning_rate": 0.00018274477183223073, "loss": 1.523, "step": 6654 }, { "epoch": 0.08647868976013696, "grad_norm": 0.40535542368888855, "learning_rate": 0.00018274217237031933, "loss": 1.3908, "step": 6655 }, { "epoch": 0.08649168430405284, "grad_norm": 0.3674229681491852, "learning_rate": 0.00018273957290840798, "loss": 1.3266, "step": 6656 }, { "epoch": 0.08650467884796871, "grad_norm": 0.3828207850456238, "learning_rate": 0.00018273697344649658, "loss": 1.5555, "step": 6657 }, { "epoch": 0.08651767339188458, "grad_norm": 0.4368997812271118, "learning_rate": 0.0001827343739845852, "loss": 1.3704, "step": 6658 }, { "epoch": 0.08653066793580046, "grad_norm": 0.3853335380554199, "learning_rate": 0.0001827317745226738, "loss": 1.1551, "step": 6659 }, { "epoch": 0.08654366247971633, "grad_norm": 0.37162187695503235, "learning_rate": 0.00018272917506076243, "loss": 1.3298, "step": 6660 }, { "epoch": 0.0865566570236322, "grad_norm": 0.4518080949783325, "learning_rate": 0.00018272657559885105, "loss": 1.3398, "step": 6661 }, { "epoch": 0.08656965156754808, "grad_norm": 0.3125367760658264, "learning_rate": 0.00018272397613693965, "loss": 1.3816, "step": 6662 }, { "epoch": 0.08658264611146395, "grad_norm": 0.3811508119106293, "learning_rate": 0.0001827213766750283, "loss": 1.321, "step": 6663 }, { "epoch": 0.08659564065537982, "grad_norm": 0.47511252760887146, "learning_rate": 0.0001827187772131169, "loss": 1.4614, "step": 6664 }, { "epoch": 0.0866086351992957, "grad_norm": 0.41604506969451904, "learning_rate": 0.0001827161777512055, "loss": 1.5482, "step": 6665 }, { "epoch": 0.08662162974321157, "grad_norm": 0.41973546147346497, "learning_rate": 0.00018271357828929412, "loss": 1.5492, "step": 6666 }, { "epoch": 0.08663462428712744, "grad_norm": 0.43922296166419983, "learning_rate": 0.00018271097882738274, "loss": 1.5224, "step": 6667 }, { "epoch": 0.08664761883104331, "grad_norm": 0.3093338906764984, "learning_rate": 0.00018270837936547137, "loss": 1.3158, "step": 6668 }, { "epoch": 0.08666061337495919, "grad_norm": 0.37729695439338684, "learning_rate": 0.00018270577990355997, "loss": 1.5179, "step": 6669 }, { "epoch": 0.08667360791887506, "grad_norm": 0.40877071022987366, "learning_rate": 0.0001827031804416486, "loss": 1.3835, "step": 6670 }, { "epoch": 0.08668660246279093, "grad_norm": 0.3912234604358673, "learning_rate": 0.00018270058097973722, "loss": 1.4194, "step": 6671 }, { "epoch": 0.0866995970067068, "grad_norm": 0.481755793094635, "learning_rate": 0.0001826979815178258, "loss": 1.4019, "step": 6672 }, { "epoch": 0.08671259155062268, "grad_norm": 0.3227919638156891, "learning_rate": 0.00018269538205591444, "loss": 1.3923, "step": 6673 }, { "epoch": 0.08672558609453855, "grad_norm": 0.33725088834762573, "learning_rate": 0.00018269278259400303, "loss": 1.4424, "step": 6674 }, { "epoch": 0.08673858063845442, "grad_norm": 0.36098626255989075, "learning_rate": 0.00018269018313209169, "loss": 1.4174, "step": 6675 }, { "epoch": 0.0867515751823703, "grad_norm": 0.40109825134277344, "learning_rate": 0.00018268758367018028, "loss": 1.5953, "step": 6676 }, { "epoch": 0.08676456972628617, "grad_norm": 0.4429379999637604, "learning_rate": 0.0001826849842082689, "loss": 1.5633, "step": 6677 }, { "epoch": 0.08677756427020204, "grad_norm": 0.3582462668418884, "learning_rate": 0.0001826823847463575, "loss": 1.5346, "step": 6678 }, { "epoch": 0.08679055881411792, "grad_norm": 0.35185185074806213, "learning_rate": 0.00018267978528444613, "loss": 1.2513, "step": 6679 }, { "epoch": 0.08680355335803379, "grad_norm": 0.40533891320228577, "learning_rate": 0.00018267718582253475, "loss": 1.4454, "step": 6680 }, { "epoch": 0.08681654790194968, "grad_norm": 0.3830435574054718, "learning_rate": 0.00018267458636062335, "loss": 1.4817, "step": 6681 }, { "epoch": 0.08682954244586555, "grad_norm": 0.4009658694267273, "learning_rate": 0.00018267198689871198, "loss": 1.4674, "step": 6682 }, { "epoch": 0.08684253698978142, "grad_norm": 0.44393613934516907, "learning_rate": 0.0001826693874368006, "loss": 1.4079, "step": 6683 }, { "epoch": 0.0868555315336973, "grad_norm": 0.4655272662639618, "learning_rate": 0.0001826667879748892, "loss": 1.4485, "step": 6684 }, { "epoch": 0.08686852607761317, "grad_norm": 0.5212623476982117, "learning_rate": 0.00018266418851297782, "loss": 1.3869, "step": 6685 }, { "epoch": 0.08688152062152904, "grad_norm": 0.46332216262817383, "learning_rate": 0.00018266158905106642, "loss": 1.5046, "step": 6686 }, { "epoch": 0.08689451516544491, "grad_norm": 0.45600855350494385, "learning_rate": 0.00018265898958915507, "loss": 1.5166, "step": 6687 }, { "epoch": 0.08690750970936079, "grad_norm": 0.4539586901664734, "learning_rate": 0.00018265639012724367, "loss": 1.5981, "step": 6688 }, { "epoch": 0.08692050425327666, "grad_norm": 0.38734015822410583, "learning_rate": 0.0001826537906653323, "loss": 1.3194, "step": 6689 }, { "epoch": 0.08693349879719253, "grad_norm": 0.38938552141189575, "learning_rate": 0.0001826511912034209, "loss": 1.4787, "step": 6690 }, { "epoch": 0.0869464933411084, "grad_norm": 0.4348485767841339, "learning_rate": 0.00018264859174150952, "loss": 1.3383, "step": 6691 }, { "epoch": 0.08695948788502428, "grad_norm": 0.3537873327732086, "learning_rate": 0.00018264599227959814, "loss": 1.4022, "step": 6692 }, { "epoch": 0.08697248242894015, "grad_norm": 0.4237958490848541, "learning_rate": 0.00018264339281768674, "loss": 1.4953, "step": 6693 }, { "epoch": 0.08698547697285602, "grad_norm": 0.39175277948379517, "learning_rate": 0.00018264079335577536, "loss": 1.402, "step": 6694 }, { "epoch": 0.0869984715167719, "grad_norm": 0.40237271785736084, "learning_rate": 0.00018263819389386399, "loss": 1.2712, "step": 6695 }, { "epoch": 0.08701146606068777, "grad_norm": 0.42883434891700745, "learning_rate": 0.00018263559443195258, "loss": 1.5904, "step": 6696 }, { "epoch": 0.08702446060460364, "grad_norm": 0.28929761052131653, "learning_rate": 0.0001826329949700412, "loss": 1.3678, "step": 6697 }, { "epoch": 0.08703745514851952, "grad_norm": 0.3892543911933899, "learning_rate": 0.00018263039550812983, "loss": 1.5108, "step": 6698 }, { "epoch": 0.08705044969243539, "grad_norm": 0.41149577498435974, "learning_rate": 0.00018262779604621846, "loss": 1.4441, "step": 6699 }, { "epoch": 0.08706344423635126, "grad_norm": 0.49987760186195374, "learning_rate": 0.00018262519658430705, "loss": 1.4826, "step": 6700 }, { "epoch": 0.08707643878026713, "grad_norm": 0.3278210163116455, "learning_rate": 0.00018262259712239568, "loss": 1.5166, "step": 6701 }, { "epoch": 0.08708943332418301, "grad_norm": 0.5012274980545044, "learning_rate": 0.0001826199976604843, "loss": 1.4114, "step": 6702 }, { "epoch": 0.08710242786809888, "grad_norm": 0.37377458810806274, "learning_rate": 0.0001826173981985729, "loss": 1.4066, "step": 6703 }, { "epoch": 0.08711542241201475, "grad_norm": 0.3846884071826935, "learning_rate": 0.00018261479873666153, "loss": 1.4282, "step": 6704 }, { "epoch": 0.08712841695593063, "grad_norm": 0.48730963468551636, "learning_rate": 0.00018261219927475012, "loss": 1.4026, "step": 6705 }, { "epoch": 0.0871414114998465, "grad_norm": 0.4235099256038666, "learning_rate": 0.00018260959981283877, "loss": 1.6324, "step": 6706 }, { "epoch": 0.08715440604376237, "grad_norm": 0.42631927132606506, "learning_rate": 0.00018260700035092737, "loss": 1.618, "step": 6707 }, { "epoch": 0.08716740058767825, "grad_norm": 0.38488584756851196, "learning_rate": 0.00018260440088901597, "loss": 1.4508, "step": 6708 }, { "epoch": 0.08718039513159412, "grad_norm": 0.3621501326560974, "learning_rate": 0.0001826018014271046, "loss": 1.436, "step": 6709 }, { "epoch": 0.08719338967550999, "grad_norm": 0.41321900486946106, "learning_rate": 0.00018259920196519322, "loss": 1.4415, "step": 6710 }, { "epoch": 0.08720638421942586, "grad_norm": 0.366578608751297, "learning_rate": 0.00018259660250328184, "loss": 1.4146, "step": 6711 }, { "epoch": 0.08721937876334174, "grad_norm": 0.4188486933708191, "learning_rate": 0.00018259400304137044, "loss": 1.6776, "step": 6712 }, { "epoch": 0.08723237330725761, "grad_norm": 0.3642802834510803, "learning_rate": 0.00018259140357945906, "loss": 1.4507, "step": 6713 }, { "epoch": 0.08724536785117348, "grad_norm": 0.38941413164138794, "learning_rate": 0.0001825888041175477, "loss": 1.301, "step": 6714 }, { "epoch": 0.08725836239508936, "grad_norm": 0.47522616386413574, "learning_rate": 0.00018258620465563629, "loss": 1.3847, "step": 6715 }, { "epoch": 0.08727135693900523, "grad_norm": 0.32577309012413025, "learning_rate": 0.0001825836051937249, "loss": 1.4071, "step": 6716 }, { "epoch": 0.0872843514829211, "grad_norm": 0.31356143951416016, "learning_rate": 0.0001825810057318135, "loss": 1.3019, "step": 6717 }, { "epoch": 0.08729734602683697, "grad_norm": 0.38095274567604065, "learning_rate": 0.00018257840626990216, "loss": 1.3849, "step": 6718 }, { "epoch": 0.08731034057075286, "grad_norm": 0.4623626470565796, "learning_rate": 0.00018257580680799076, "loss": 1.6373, "step": 6719 }, { "epoch": 0.08732333511466873, "grad_norm": 0.33871403336524963, "learning_rate": 0.00018257320734607935, "loss": 1.4132, "step": 6720 }, { "epoch": 0.08733632965858461, "grad_norm": 0.35408031940460205, "learning_rate": 0.00018257060788416798, "loss": 1.4725, "step": 6721 }, { "epoch": 0.08734932420250048, "grad_norm": 0.4097619354724884, "learning_rate": 0.0001825680084222566, "loss": 1.5569, "step": 6722 }, { "epoch": 0.08736231874641635, "grad_norm": 0.45207032561302185, "learning_rate": 0.00018256540896034523, "loss": 1.5888, "step": 6723 }, { "epoch": 0.08737531329033223, "grad_norm": 0.4350290894508362, "learning_rate": 0.00018256280949843382, "loss": 1.3835, "step": 6724 }, { "epoch": 0.0873883078342481, "grad_norm": 0.43973809480667114, "learning_rate": 0.00018256021003652245, "loss": 1.3678, "step": 6725 }, { "epoch": 0.08740130237816397, "grad_norm": 0.44525089859962463, "learning_rate": 0.00018255761057461107, "loss": 1.5277, "step": 6726 }, { "epoch": 0.08741429692207985, "grad_norm": 0.3814772069454193, "learning_rate": 0.00018255501111269967, "loss": 1.2925, "step": 6727 }, { "epoch": 0.08742729146599572, "grad_norm": 0.42106109857559204, "learning_rate": 0.0001825524116507883, "loss": 1.2959, "step": 6728 }, { "epoch": 0.08744028600991159, "grad_norm": 0.4496477246284485, "learning_rate": 0.0001825498121888769, "loss": 1.5256, "step": 6729 }, { "epoch": 0.08745328055382746, "grad_norm": 0.3612123429775238, "learning_rate": 0.00018254721272696554, "loss": 1.2477, "step": 6730 }, { "epoch": 0.08746627509774334, "grad_norm": 0.33892276883125305, "learning_rate": 0.00018254461326505414, "loss": 1.5615, "step": 6731 }, { "epoch": 0.08747926964165921, "grad_norm": 0.40976324677467346, "learning_rate": 0.00018254201380314274, "loss": 1.6113, "step": 6732 }, { "epoch": 0.08749226418557508, "grad_norm": 0.4102124273777008, "learning_rate": 0.00018253941434123136, "loss": 1.4434, "step": 6733 }, { "epoch": 0.08750525872949096, "grad_norm": 0.4042928218841553, "learning_rate": 0.00018253681487932, "loss": 1.6459, "step": 6734 }, { "epoch": 0.08751825327340683, "grad_norm": 0.32147514820098877, "learning_rate": 0.0001825342154174086, "loss": 1.4155, "step": 6735 }, { "epoch": 0.0875312478173227, "grad_norm": 0.4061089754104614, "learning_rate": 0.0001825316159554972, "loss": 1.2141, "step": 6736 }, { "epoch": 0.08754424236123858, "grad_norm": 0.37023404240608215, "learning_rate": 0.00018252901649358583, "loss": 1.4745, "step": 6737 }, { "epoch": 0.08755723690515445, "grad_norm": 0.38748273253440857, "learning_rate": 0.00018252641703167446, "loss": 1.41, "step": 6738 }, { "epoch": 0.08757023144907032, "grad_norm": 0.44647854566574097, "learning_rate": 0.00018252381756976306, "loss": 1.2981, "step": 6739 }, { "epoch": 0.0875832259929862, "grad_norm": 0.3964136838912964, "learning_rate": 0.00018252121810785168, "loss": 1.2843, "step": 6740 }, { "epoch": 0.08759622053690207, "grad_norm": 0.4006388187408447, "learning_rate": 0.0001825186186459403, "loss": 1.6067, "step": 6741 }, { "epoch": 0.08760921508081794, "grad_norm": 0.40677353739738464, "learning_rate": 0.00018251601918402893, "loss": 1.5652, "step": 6742 }, { "epoch": 0.08762220962473381, "grad_norm": 0.36035212874412537, "learning_rate": 0.00018251341972211753, "loss": 1.3936, "step": 6743 }, { "epoch": 0.08763520416864969, "grad_norm": 0.36891618371009827, "learning_rate": 0.00018251082026020615, "loss": 1.3743, "step": 6744 }, { "epoch": 0.08764819871256556, "grad_norm": 0.4133085310459137, "learning_rate": 0.00018250822079829478, "loss": 1.4251, "step": 6745 }, { "epoch": 0.08766119325648143, "grad_norm": 0.3932144343852997, "learning_rate": 0.00018250562133638337, "loss": 1.3792, "step": 6746 }, { "epoch": 0.0876741878003973, "grad_norm": 0.3970622718334198, "learning_rate": 0.000182503021874472, "loss": 1.292, "step": 6747 }, { "epoch": 0.08768718234431318, "grad_norm": 0.3572845757007599, "learning_rate": 0.0001825004224125606, "loss": 1.3365, "step": 6748 }, { "epoch": 0.08770017688822905, "grad_norm": 0.3556772768497467, "learning_rate": 0.00018249782295064922, "loss": 1.5918, "step": 6749 }, { "epoch": 0.08771317143214492, "grad_norm": 0.363410621881485, "learning_rate": 0.00018249522348873784, "loss": 1.3931, "step": 6750 }, { "epoch": 0.0877261659760608, "grad_norm": 0.41273099184036255, "learning_rate": 0.00018249262402682644, "loss": 1.2968, "step": 6751 }, { "epoch": 0.08773916051997667, "grad_norm": 0.3791990876197815, "learning_rate": 0.00018249002456491507, "loss": 1.4417, "step": 6752 }, { "epoch": 0.08775215506389254, "grad_norm": 0.48342934250831604, "learning_rate": 0.0001824874251030037, "loss": 1.5098, "step": 6753 }, { "epoch": 0.08776514960780842, "grad_norm": 0.5464855432510376, "learning_rate": 0.00018248482564109232, "loss": 1.5592, "step": 6754 }, { "epoch": 0.08777814415172429, "grad_norm": 0.35458904504776, "learning_rate": 0.0001824822261791809, "loss": 1.5243, "step": 6755 }, { "epoch": 0.08779113869564016, "grad_norm": 0.4396544098854065, "learning_rate": 0.00018247962671726954, "loss": 1.4359, "step": 6756 }, { "epoch": 0.08780413323955605, "grad_norm": 0.39417317509651184, "learning_rate": 0.00018247702725535816, "loss": 1.4294, "step": 6757 }, { "epoch": 0.08781712778347192, "grad_norm": 0.355384886264801, "learning_rate": 0.00018247442779344676, "loss": 1.3781, "step": 6758 }, { "epoch": 0.0878301223273878, "grad_norm": 0.35375678539276123, "learning_rate": 0.00018247182833153538, "loss": 1.4783, "step": 6759 }, { "epoch": 0.08784311687130367, "grad_norm": 0.3800255358219147, "learning_rate": 0.00018246922886962398, "loss": 1.5268, "step": 6760 }, { "epoch": 0.08785611141521954, "grad_norm": 0.44282281398773193, "learning_rate": 0.00018246662940771263, "loss": 1.3767, "step": 6761 }, { "epoch": 0.08786910595913541, "grad_norm": 0.3774675726890564, "learning_rate": 0.00018246402994580123, "loss": 1.3743, "step": 6762 }, { "epoch": 0.08788210050305129, "grad_norm": 0.36646127700805664, "learning_rate": 0.00018246143048388983, "loss": 1.5078, "step": 6763 }, { "epoch": 0.08789509504696716, "grad_norm": 0.3378525972366333, "learning_rate": 0.00018245883102197845, "loss": 1.3718, "step": 6764 }, { "epoch": 0.08790808959088303, "grad_norm": 0.38771650195121765, "learning_rate": 0.00018245623156006708, "loss": 1.3849, "step": 6765 }, { "epoch": 0.0879210841347989, "grad_norm": 0.3553890883922577, "learning_rate": 0.0001824536320981557, "loss": 1.4497, "step": 6766 }, { "epoch": 0.08793407867871478, "grad_norm": 0.342769980430603, "learning_rate": 0.0001824510326362443, "loss": 1.2222, "step": 6767 }, { "epoch": 0.08794707322263065, "grad_norm": 0.39237168431282043, "learning_rate": 0.00018244843317433292, "loss": 1.1589, "step": 6768 }, { "epoch": 0.08796006776654652, "grad_norm": 0.4822896718978882, "learning_rate": 0.00018244583371242155, "loss": 1.6161, "step": 6769 }, { "epoch": 0.0879730623104624, "grad_norm": 0.40048688650131226, "learning_rate": 0.00018244323425051014, "loss": 1.3556, "step": 6770 }, { "epoch": 0.08798605685437827, "grad_norm": 0.4437680244445801, "learning_rate": 0.00018244063478859877, "loss": 1.5057, "step": 6771 }, { "epoch": 0.08799905139829414, "grad_norm": 0.2981235682964325, "learning_rate": 0.0001824380353266874, "loss": 1.2565, "step": 6772 }, { "epoch": 0.08801204594221002, "grad_norm": 0.33592531085014343, "learning_rate": 0.00018243543586477602, "loss": 1.4356, "step": 6773 }, { "epoch": 0.08802504048612589, "grad_norm": 0.3112647831439972, "learning_rate": 0.00018243283640286462, "loss": 1.4179, "step": 6774 }, { "epoch": 0.08803803503004176, "grad_norm": 0.48134511709213257, "learning_rate": 0.0001824302369409532, "loss": 1.65, "step": 6775 }, { "epoch": 0.08805102957395763, "grad_norm": 0.3852820098400116, "learning_rate": 0.00018242763747904186, "loss": 1.5032, "step": 6776 }, { "epoch": 0.08806402411787351, "grad_norm": 0.4001849293708801, "learning_rate": 0.00018242503801713046, "loss": 1.5401, "step": 6777 }, { "epoch": 0.08807701866178938, "grad_norm": 0.3402118980884552, "learning_rate": 0.00018242243855521909, "loss": 1.4296, "step": 6778 }, { "epoch": 0.08809001320570525, "grad_norm": 0.412910133600235, "learning_rate": 0.00018241983909330768, "loss": 1.4326, "step": 6779 }, { "epoch": 0.08810300774962113, "grad_norm": 0.4577445983886719, "learning_rate": 0.0001824172396313963, "loss": 1.4972, "step": 6780 }, { "epoch": 0.088116002293537, "grad_norm": 0.31557148694992065, "learning_rate": 0.00018241464016948493, "loss": 1.3197, "step": 6781 }, { "epoch": 0.08812899683745287, "grad_norm": 0.3885459899902344, "learning_rate": 0.00018241204070757353, "loss": 1.7714, "step": 6782 }, { "epoch": 0.08814199138136875, "grad_norm": 0.4835717976093292, "learning_rate": 0.00018240944124566215, "loss": 1.4426, "step": 6783 }, { "epoch": 0.08815498592528462, "grad_norm": 0.31692183017730713, "learning_rate": 0.00018240684178375078, "loss": 1.3472, "step": 6784 }, { "epoch": 0.08816798046920049, "grad_norm": 0.31482797861099243, "learning_rate": 0.0001824042423218394, "loss": 1.31, "step": 6785 }, { "epoch": 0.08818097501311636, "grad_norm": 0.4475362300872803, "learning_rate": 0.000182401642859928, "loss": 1.5553, "step": 6786 }, { "epoch": 0.08819396955703224, "grad_norm": 0.2428712546825409, "learning_rate": 0.0001823990433980166, "loss": 1.1972, "step": 6787 }, { "epoch": 0.08820696410094811, "grad_norm": 0.3964831829071045, "learning_rate": 0.00018239644393610525, "loss": 1.4751, "step": 6788 }, { "epoch": 0.08821995864486398, "grad_norm": 0.4768216609954834, "learning_rate": 0.00018239384447419385, "loss": 1.6285, "step": 6789 }, { "epoch": 0.08823295318877986, "grad_norm": 0.43892496824264526, "learning_rate": 0.00018239124501228247, "loss": 1.5025, "step": 6790 }, { "epoch": 0.08824594773269573, "grad_norm": 0.3282887935638428, "learning_rate": 0.00018238864555037107, "loss": 1.3379, "step": 6791 }, { "epoch": 0.0882589422766116, "grad_norm": 0.4435223937034607, "learning_rate": 0.0001823860460884597, "loss": 1.7546, "step": 6792 }, { "epoch": 0.08827193682052747, "grad_norm": 0.4093644320964813, "learning_rate": 0.00018238344662654832, "loss": 1.3932, "step": 6793 }, { "epoch": 0.08828493136444335, "grad_norm": 0.424146831035614, "learning_rate": 0.00018238084716463692, "loss": 1.6236, "step": 6794 }, { "epoch": 0.08829792590835923, "grad_norm": 0.5087403059005737, "learning_rate": 0.00018237824770272554, "loss": 1.4988, "step": 6795 }, { "epoch": 0.08831092045227511, "grad_norm": 0.34016329050064087, "learning_rate": 0.00018237564824081416, "loss": 1.3984, "step": 6796 }, { "epoch": 0.08832391499619098, "grad_norm": 0.307283878326416, "learning_rate": 0.0001823730487789028, "loss": 1.5193, "step": 6797 }, { "epoch": 0.08833690954010685, "grad_norm": 0.3525094985961914, "learning_rate": 0.00018237044931699139, "loss": 1.3198, "step": 6798 }, { "epoch": 0.08834990408402273, "grad_norm": 0.3240290582180023, "learning_rate": 0.00018236784985508, "loss": 1.533, "step": 6799 }, { "epoch": 0.0883628986279386, "grad_norm": 0.30903682112693787, "learning_rate": 0.00018236525039316864, "loss": 1.2742, "step": 6800 }, { "epoch": 0.08837589317185447, "grad_norm": 0.3859502971172333, "learning_rate": 0.00018236265093125723, "loss": 1.3709, "step": 6801 }, { "epoch": 0.08838888771577035, "grad_norm": 0.3207455575466156, "learning_rate": 0.00018236005146934586, "loss": 1.2321, "step": 6802 }, { "epoch": 0.08840188225968622, "grad_norm": 0.4492981433868408, "learning_rate": 0.00018235745200743445, "loss": 1.6, "step": 6803 }, { "epoch": 0.08841487680360209, "grad_norm": 0.4954877197742462, "learning_rate": 0.00018235485254552308, "loss": 1.4338, "step": 6804 }, { "epoch": 0.08842787134751796, "grad_norm": 0.4125249981880188, "learning_rate": 0.0001823522530836117, "loss": 1.3939, "step": 6805 }, { "epoch": 0.08844086589143384, "grad_norm": 0.38339829444885254, "learning_rate": 0.0001823496536217003, "loss": 1.649, "step": 6806 }, { "epoch": 0.08845386043534971, "grad_norm": 0.4332326054573059, "learning_rate": 0.00018234705415978893, "loss": 1.3569, "step": 6807 }, { "epoch": 0.08846685497926558, "grad_norm": 0.3514329493045807, "learning_rate": 0.00018234445469787755, "loss": 1.4779, "step": 6808 }, { "epoch": 0.08847984952318146, "grad_norm": 0.39225804805755615, "learning_rate": 0.00018234185523596617, "loss": 1.4586, "step": 6809 }, { "epoch": 0.08849284406709733, "grad_norm": 0.4566524028778076, "learning_rate": 0.00018233925577405477, "loss": 1.4375, "step": 6810 }, { "epoch": 0.0885058386110132, "grad_norm": 0.38308635354042053, "learning_rate": 0.0001823366563121434, "loss": 1.4494, "step": 6811 }, { "epoch": 0.08851883315492907, "grad_norm": 0.32838448882102966, "learning_rate": 0.00018233405685023202, "loss": 1.4249, "step": 6812 }, { "epoch": 0.08853182769884495, "grad_norm": 0.36618572473526, "learning_rate": 0.00018233145738832062, "loss": 1.3167, "step": 6813 }, { "epoch": 0.08854482224276082, "grad_norm": 0.45102056860923767, "learning_rate": 0.00018232885792640924, "loss": 1.4872, "step": 6814 }, { "epoch": 0.0885578167866767, "grad_norm": 0.3948284089565277, "learning_rate": 0.00018232625846449787, "loss": 1.4123, "step": 6815 }, { "epoch": 0.08857081133059257, "grad_norm": 0.4315451979637146, "learning_rate": 0.00018232365900258646, "loss": 1.3051, "step": 6816 }, { "epoch": 0.08858380587450844, "grad_norm": 0.4110200107097626, "learning_rate": 0.0001823210595406751, "loss": 1.416, "step": 6817 }, { "epoch": 0.08859680041842431, "grad_norm": 0.3555764853954315, "learning_rate": 0.00018231846007876369, "loss": 1.2121, "step": 6818 }, { "epoch": 0.08860979496234019, "grad_norm": 0.4137984812259674, "learning_rate": 0.00018231586061685234, "loss": 1.375, "step": 6819 }, { "epoch": 0.08862278950625606, "grad_norm": 0.39686208963394165, "learning_rate": 0.00018231326115494094, "loss": 1.4284, "step": 6820 }, { "epoch": 0.08863578405017193, "grad_norm": 0.3907967805862427, "learning_rate": 0.00018231066169302956, "loss": 1.4825, "step": 6821 }, { "epoch": 0.0886487785940878, "grad_norm": 0.3236677348613739, "learning_rate": 0.00018230806223111816, "loss": 1.351, "step": 6822 }, { "epoch": 0.08866177313800368, "grad_norm": 0.3468323349952698, "learning_rate": 0.00018230546276920678, "loss": 1.3833, "step": 6823 }, { "epoch": 0.08867476768191955, "grad_norm": 0.3934495449066162, "learning_rate": 0.0001823028633072954, "loss": 1.4241, "step": 6824 }, { "epoch": 0.08868776222583542, "grad_norm": 0.49817419052124023, "learning_rate": 0.000182300263845384, "loss": 1.5475, "step": 6825 }, { "epoch": 0.0887007567697513, "grad_norm": 0.43562814593315125, "learning_rate": 0.00018229766438347263, "loss": 1.5361, "step": 6826 }, { "epoch": 0.08871375131366717, "grad_norm": 0.42393261194229126, "learning_rate": 0.00018229506492156125, "loss": 1.4752, "step": 6827 }, { "epoch": 0.08872674585758304, "grad_norm": 0.44226598739624023, "learning_rate": 0.00018229246545964988, "loss": 1.5798, "step": 6828 }, { "epoch": 0.08873974040149891, "grad_norm": 0.7142444849014282, "learning_rate": 0.00018228986599773847, "loss": 1.7151, "step": 6829 }, { "epoch": 0.08875273494541479, "grad_norm": 0.42160525918006897, "learning_rate": 0.00018228726653582707, "loss": 1.4608, "step": 6830 }, { "epoch": 0.08876572948933066, "grad_norm": 0.3964773118495941, "learning_rate": 0.00018228466707391572, "loss": 1.5238, "step": 6831 }, { "epoch": 0.08877872403324653, "grad_norm": 0.4013957679271698, "learning_rate": 0.00018228206761200432, "loss": 1.569, "step": 6832 }, { "epoch": 0.08879171857716242, "grad_norm": 0.32581770420074463, "learning_rate": 0.00018227946815009295, "loss": 1.5966, "step": 6833 }, { "epoch": 0.0888047131210783, "grad_norm": 0.39206647872924805, "learning_rate": 0.00018227686868818154, "loss": 1.3957, "step": 6834 }, { "epoch": 0.08881770766499417, "grad_norm": 0.4365825951099396, "learning_rate": 0.00018227426922627017, "loss": 1.4571, "step": 6835 }, { "epoch": 0.08883070220891004, "grad_norm": 0.31897208094596863, "learning_rate": 0.0001822716697643588, "loss": 1.2062, "step": 6836 }, { "epoch": 0.08884369675282591, "grad_norm": 0.3728909492492676, "learning_rate": 0.0001822690703024474, "loss": 1.4331, "step": 6837 }, { "epoch": 0.08885669129674179, "grad_norm": 0.3852331340312958, "learning_rate": 0.000182266470840536, "loss": 1.421, "step": 6838 }, { "epoch": 0.08886968584065766, "grad_norm": 0.4345470368862152, "learning_rate": 0.00018226387137862464, "loss": 1.6635, "step": 6839 }, { "epoch": 0.08888268038457353, "grad_norm": 0.3771885335445404, "learning_rate": 0.00018226127191671326, "loss": 1.4296, "step": 6840 }, { "epoch": 0.0888956749284894, "grad_norm": 0.31102120876312256, "learning_rate": 0.00018225867245480186, "loss": 1.3999, "step": 6841 }, { "epoch": 0.08890866947240528, "grad_norm": 0.3644242584705353, "learning_rate": 0.00018225607299289046, "loss": 1.5674, "step": 6842 }, { "epoch": 0.08892166401632115, "grad_norm": 0.38540053367614746, "learning_rate": 0.0001822534735309791, "loss": 1.5188, "step": 6843 }, { "epoch": 0.08893465856023702, "grad_norm": 0.43788740038871765, "learning_rate": 0.0001822508740690677, "loss": 1.3869, "step": 6844 }, { "epoch": 0.0889476531041529, "grad_norm": 0.3545567989349365, "learning_rate": 0.00018224827460715633, "loss": 1.3809, "step": 6845 }, { "epoch": 0.08896064764806877, "grad_norm": 0.2941083610057831, "learning_rate": 0.00018224567514524495, "loss": 1.4417, "step": 6846 }, { "epoch": 0.08897364219198464, "grad_norm": 0.3817788064479828, "learning_rate": 0.00018224307568333355, "loss": 1.5529, "step": 6847 }, { "epoch": 0.08898663673590052, "grad_norm": 0.4662015438079834, "learning_rate": 0.00018224047622142218, "loss": 1.328, "step": 6848 }, { "epoch": 0.08899963127981639, "grad_norm": 0.37792739272117615, "learning_rate": 0.00018223787675951077, "loss": 1.3555, "step": 6849 }, { "epoch": 0.08901262582373226, "grad_norm": 0.43304669857025146, "learning_rate": 0.00018223527729759943, "loss": 1.4969, "step": 6850 }, { "epoch": 0.08902562036764813, "grad_norm": 0.3964652717113495, "learning_rate": 0.00018223267783568802, "loss": 1.429, "step": 6851 }, { "epoch": 0.089038614911564, "grad_norm": 0.35465994477272034, "learning_rate": 0.00018223007837377665, "loss": 1.3504, "step": 6852 }, { "epoch": 0.08905160945547988, "grad_norm": 0.4410751163959503, "learning_rate": 0.00018222747891186525, "loss": 1.4947, "step": 6853 }, { "epoch": 0.08906460399939575, "grad_norm": 0.3674108684062958, "learning_rate": 0.00018222487944995387, "loss": 1.3659, "step": 6854 }, { "epoch": 0.08907759854331163, "grad_norm": 0.49862414598464966, "learning_rate": 0.0001822222799880425, "loss": 1.3169, "step": 6855 }, { "epoch": 0.0890905930872275, "grad_norm": 0.4159758985042572, "learning_rate": 0.0001822196805261311, "loss": 1.1606, "step": 6856 }, { "epoch": 0.08910358763114337, "grad_norm": 0.3795209228992462, "learning_rate": 0.00018221708106421972, "loss": 1.3068, "step": 6857 }, { "epoch": 0.08911658217505924, "grad_norm": 0.4399988353252411, "learning_rate": 0.00018221448160230834, "loss": 1.3575, "step": 6858 }, { "epoch": 0.08912957671897512, "grad_norm": 0.3988720774650574, "learning_rate": 0.00018221188214039694, "loss": 1.3137, "step": 6859 }, { "epoch": 0.08914257126289099, "grad_norm": 0.3311937153339386, "learning_rate": 0.00018220928267848556, "loss": 1.3278, "step": 6860 }, { "epoch": 0.08915556580680686, "grad_norm": 0.38437026739120483, "learning_rate": 0.00018220668321657416, "loss": 1.3375, "step": 6861 }, { "epoch": 0.08916856035072274, "grad_norm": 0.4377189874649048, "learning_rate": 0.0001822040837546628, "loss": 1.5218, "step": 6862 }, { "epoch": 0.08918155489463861, "grad_norm": 0.47967737913131714, "learning_rate": 0.0001822014842927514, "loss": 1.4893, "step": 6863 }, { "epoch": 0.08919454943855448, "grad_norm": 0.4246910512447357, "learning_rate": 0.00018219888483084003, "loss": 1.5762, "step": 6864 }, { "epoch": 0.08920754398247036, "grad_norm": 0.4137103259563446, "learning_rate": 0.00018219628536892863, "loss": 1.4194, "step": 6865 }, { "epoch": 0.08922053852638623, "grad_norm": 0.24333456158638, "learning_rate": 0.00018219368590701725, "loss": 1.2215, "step": 6866 }, { "epoch": 0.0892335330703021, "grad_norm": 0.3583102524280548, "learning_rate": 0.00018219108644510588, "loss": 1.5534, "step": 6867 }, { "epoch": 0.08924652761421797, "grad_norm": 0.45948895812034607, "learning_rate": 0.00018218848698319448, "loss": 1.3769, "step": 6868 }, { "epoch": 0.08925952215813385, "grad_norm": 0.34668073058128357, "learning_rate": 0.0001821858875212831, "loss": 1.5531, "step": 6869 }, { "epoch": 0.08927251670204972, "grad_norm": 0.4705469608306885, "learning_rate": 0.00018218328805937173, "loss": 1.5348, "step": 6870 }, { "epoch": 0.08928551124596561, "grad_norm": 0.29723280668258667, "learning_rate": 0.00018218068859746032, "loss": 1.3343, "step": 6871 }, { "epoch": 0.08929850578988148, "grad_norm": 0.4309476613998413, "learning_rate": 0.00018217808913554895, "loss": 1.4278, "step": 6872 }, { "epoch": 0.08931150033379735, "grad_norm": 0.4949832856655121, "learning_rate": 0.00018217548967363754, "loss": 1.5741, "step": 6873 }, { "epoch": 0.08932449487771323, "grad_norm": 0.4408215284347534, "learning_rate": 0.0001821728902117262, "loss": 1.4931, "step": 6874 }, { "epoch": 0.0893374894216291, "grad_norm": 0.49203070998191833, "learning_rate": 0.0001821702907498148, "loss": 1.5534, "step": 6875 }, { "epoch": 0.08935048396554497, "grad_norm": 0.5074947476387024, "learning_rate": 0.00018216769128790342, "loss": 1.6048, "step": 6876 }, { "epoch": 0.08936347850946084, "grad_norm": 0.4955956041812897, "learning_rate": 0.00018216509182599202, "loss": 1.3616, "step": 6877 }, { "epoch": 0.08937647305337672, "grad_norm": 0.4054872393608093, "learning_rate": 0.00018216249236408064, "loss": 1.2805, "step": 6878 }, { "epoch": 0.08938946759729259, "grad_norm": 0.3785209059715271, "learning_rate": 0.00018215989290216926, "loss": 1.4633, "step": 6879 }, { "epoch": 0.08940246214120846, "grad_norm": 0.3802170157432556, "learning_rate": 0.00018215729344025786, "loss": 1.4995, "step": 6880 }, { "epoch": 0.08941545668512434, "grad_norm": 0.39720088243484497, "learning_rate": 0.0001821546939783465, "loss": 1.5033, "step": 6881 }, { "epoch": 0.08942845122904021, "grad_norm": 0.42358294129371643, "learning_rate": 0.0001821520945164351, "loss": 1.3757, "step": 6882 }, { "epoch": 0.08944144577295608, "grad_norm": 0.3463008403778076, "learning_rate": 0.00018214949505452374, "loss": 1.1762, "step": 6883 }, { "epoch": 0.08945444031687196, "grad_norm": 0.48838555812835693, "learning_rate": 0.00018214689559261233, "loss": 1.6427, "step": 6884 }, { "epoch": 0.08946743486078783, "grad_norm": 0.3624010980129242, "learning_rate": 0.00018214429613070096, "loss": 1.6316, "step": 6885 }, { "epoch": 0.0894804294047037, "grad_norm": 0.36835822463035583, "learning_rate": 0.00018214169666878958, "loss": 1.3811, "step": 6886 }, { "epoch": 0.08949342394861957, "grad_norm": 0.36487331986427307, "learning_rate": 0.00018213909720687818, "loss": 1.2815, "step": 6887 }, { "epoch": 0.08950641849253545, "grad_norm": 0.3315284848213196, "learning_rate": 0.0001821364977449668, "loss": 1.4272, "step": 6888 }, { "epoch": 0.08951941303645132, "grad_norm": 0.3720794916152954, "learning_rate": 0.00018213389828305543, "loss": 1.5472, "step": 6889 }, { "epoch": 0.0895324075803672, "grad_norm": 0.4438225328922272, "learning_rate": 0.00018213129882114403, "loss": 1.6675, "step": 6890 }, { "epoch": 0.08954540212428307, "grad_norm": 0.43497613072395325, "learning_rate": 0.00018212869935923265, "loss": 1.4811, "step": 6891 }, { "epoch": 0.08955839666819894, "grad_norm": 0.375722199678421, "learning_rate": 0.00018212609989732125, "loss": 1.5981, "step": 6892 }, { "epoch": 0.08957139121211481, "grad_norm": 0.35424378514289856, "learning_rate": 0.0001821235004354099, "loss": 1.4233, "step": 6893 }, { "epoch": 0.08958438575603068, "grad_norm": 0.4285990297794342, "learning_rate": 0.0001821209009734985, "loss": 1.4388, "step": 6894 }, { "epoch": 0.08959738029994656, "grad_norm": 0.3616275489330292, "learning_rate": 0.00018211830151158712, "loss": 1.3027, "step": 6895 }, { "epoch": 0.08961037484386243, "grad_norm": 0.42806243896484375, "learning_rate": 0.00018211570204967572, "loss": 1.5357, "step": 6896 }, { "epoch": 0.0896233693877783, "grad_norm": 0.424429714679718, "learning_rate": 0.00018211310258776434, "loss": 1.5203, "step": 6897 }, { "epoch": 0.08963636393169418, "grad_norm": 0.45211121439933777, "learning_rate": 0.00018211050312585297, "loss": 1.35, "step": 6898 }, { "epoch": 0.08964935847561005, "grad_norm": 0.4621683955192566, "learning_rate": 0.00018210790366394156, "loss": 1.3788, "step": 6899 }, { "epoch": 0.08966235301952592, "grad_norm": 0.34341955184936523, "learning_rate": 0.0001821053042020302, "loss": 1.4968, "step": 6900 }, { "epoch": 0.0896753475634418, "grad_norm": 0.5100769996643066, "learning_rate": 0.00018210270474011881, "loss": 1.5317, "step": 6901 }, { "epoch": 0.08968834210735767, "grad_norm": 0.4372366964817047, "learning_rate": 0.0001821001052782074, "loss": 1.5806, "step": 6902 }, { "epoch": 0.08970133665127354, "grad_norm": 0.4510011672973633, "learning_rate": 0.00018209750581629604, "loss": 1.5886, "step": 6903 }, { "epoch": 0.08971433119518941, "grad_norm": 0.5962079167366028, "learning_rate": 0.00018209490635438463, "loss": 1.5202, "step": 6904 }, { "epoch": 0.08972732573910529, "grad_norm": 0.38910624384880066, "learning_rate": 0.00018209230689247328, "loss": 1.3992, "step": 6905 }, { "epoch": 0.08974032028302116, "grad_norm": 0.3711726665496826, "learning_rate": 0.00018208970743056188, "loss": 1.4978, "step": 6906 }, { "epoch": 0.08975331482693703, "grad_norm": 0.39889922738075256, "learning_rate": 0.0001820871079686505, "loss": 1.4703, "step": 6907 }, { "epoch": 0.0897663093708529, "grad_norm": 0.35066187381744385, "learning_rate": 0.0001820845085067391, "loss": 1.5049, "step": 6908 }, { "epoch": 0.0897793039147688, "grad_norm": 0.33957386016845703, "learning_rate": 0.00018208190904482773, "loss": 1.2951, "step": 6909 }, { "epoch": 0.08979229845868467, "grad_norm": 0.3135322630405426, "learning_rate": 0.00018207930958291635, "loss": 1.6078, "step": 6910 }, { "epoch": 0.08980529300260054, "grad_norm": 0.3385043144226074, "learning_rate": 0.00018207671012100495, "loss": 1.1911, "step": 6911 }, { "epoch": 0.08981828754651641, "grad_norm": 0.4538031816482544, "learning_rate": 0.00018207411065909357, "loss": 1.7088, "step": 6912 }, { "epoch": 0.08983128209043229, "grad_norm": 0.41059112548828125, "learning_rate": 0.0001820715111971822, "loss": 1.3147, "step": 6913 }, { "epoch": 0.08984427663434816, "grad_norm": 0.3995553255081177, "learning_rate": 0.0001820689117352708, "loss": 1.4009, "step": 6914 }, { "epoch": 0.08985727117826403, "grad_norm": 0.34051433205604553, "learning_rate": 0.00018206631227335942, "loss": 1.3919, "step": 6915 }, { "epoch": 0.0898702657221799, "grad_norm": 0.4123254418373108, "learning_rate": 0.00018206371281144802, "loss": 1.5242, "step": 6916 }, { "epoch": 0.08988326026609578, "grad_norm": 0.3846631944179535, "learning_rate": 0.00018206111334953667, "loss": 1.5889, "step": 6917 }, { "epoch": 0.08989625481001165, "grad_norm": 0.4947319030761719, "learning_rate": 0.00018205851388762527, "loss": 1.5242, "step": 6918 }, { "epoch": 0.08990924935392752, "grad_norm": 0.3631800711154938, "learning_rate": 0.0001820559144257139, "loss": 1.2717, "step": 6919 }, { "epoch": 0.0899222438978434, "grad_norm": 0.4155273735523224, "learning_rate": 0.00018205331496380252, "loss": 1.5665, "step": 6920 }, { "epoch": 0.08993523844175927, "grad_norm": 0.3829532265663147, "learning_rate": 0.00018205071550189111, "loss": 1.4345, "step": 6921 }, { "epoch": 0.08994823298567514, "grad_norm": 0.3948131799697876, "learning_rate": 0.00018204811603997974, "loss": 1.448, "step": 6922 }, { "epoch": 0.08996122752959101, "grad_norm": 0.3429071307182312, "learning_rate": 0.00018204551657806834, "loss": 1.4406, "step": 6923 }, { "epoch": 0.08997422207350689, "grad_norm": 0.324846476316452, "learning_rate": 0.000182042917116157, "loss": 1.356, "step": 6924 }, { "epoch": 0.08998721661742276, "grad_norm": 0.4990135133266449, "learning_rate": 0.00018204031765424558, "loss": 1.4544, "step": 6925 }, { "epoch": 0.09000021116133863, "grad_norm": 0.39653822779655457, "learning_rate": 0.00018203771819233418, "loss": 1.5492, "step": 6926 }, { "epoch": 0.0900132057052545, "grad_norm": 0.45440101623535156, "learning_rate": 0.0001820351187304228, "loss": 1.4791, "step": 6927 }, { "epoch": 0.09002620024917038, "grad_norm": 0.44539371132850647, "learning_rate": 0.00018203251926851143, "loss": 1.6066, "step": 6928 }, { "epoch": 0.09003919479308625, "grad_norm": 0.32898858189582825, "learning_rate": 0.00018202991980660006, "loss": 1.3568, "step": 6929 }, { "epoch": 0.09005218933700213, "grad_norm": 0.4969126284122467, "learning_rate": 0.00018202732034468865, "loss": 1.6048, "step": 6930 }, { "epoch": 0.090065183880918, "grad_norm": 0.43364131450653076, "learning_rate": 0.00018202472088277728, "loss": 1.345, "step": 6931 }, { "epoch": 0.09007817842483387, "grad_norm": 0.42342495918273926, "learning_rate": 0.0001820221214208659, "loss": 1.5351, "step": 6932 }, { "epoch": 0.09009117296874974, "grad_norm": 0.33170658349990845, "learning_rate": 0.0001820195219589545, "loss": 1.4517, "step": 6933 }, { "epoch": 0.09010416751266562, "grad_norm": 0.3703378438949585, "learning_rate": 0.00018201692249704312, "loss": 1.5418, "step": 6934 }, { "epoch": 0.09011716205658149, "grad_norm": 0.32989075779914856, "learning_rate": 0.00018201432303513172, "loss": 1.2628, "step": 6935 }, { "epoch": 0.09013015660049736, "grad_norm": 0.3939376473426819, "learning_rate": 0.00018201172357322037, "loss": 1.4046, "step": 6936 }, { "epoch": 0.09014315114441324, "grad_norm": 0.46229249238967896, "learning_rate": 0.00018200912411130897, "loss": 1.5823, "step": 6937 }, { "epoch": 0.09015614568832911, "grad_norm": 0.36480844020843506, "learning_rate": 0.00018200652464939757, "loss": 1.3736, "step": 6938 }, { "epoch": 0.09016914023224498, "grad_norm": 0.4342983663082123, "learning_rate": 0.0001820039251874862, "loss": 1.4448, "step": 6939 }, { "epoch": 0.09018213477616085, "grad_norm": 0.4324263036251068, "learning_rate": 0.00018200132572557482, "loss": 1.5807, "step": 6940 }, { "epoch": 0.09019512932007673, "grad_norm": 0.3215867877006531, "learning_rate": 0.00018199872626366344, "loss": 1.445, "step": 6941 }, { "epoch": 0.0902081238639926, "grad_norm": 0.34165892004966736, "learning_rate": 0.00018199612680175204, "loss": 1.4501, "step": 6942 }, { "epoch": 0.09022111840790847, "grad_norm": 0.41335347294807434, "learning_rate": 0.00018199352733984066, "loss": 1.5691, "step": 6943 }, { "epoch": 0.09023411295182435, "grad_norm": 0.4693707525730133, "learning_rate": 0.0001819909278779293, "loss": 1.3992, "step": 6944 }, { "epoch": 0.09024710749574022, "grad_norm": 0.453662633895874, "learning_rate": 0.00018198832841601788, "loss": 1.6101, "step": 6945 }, { "epoch": 0.09026010203965609, "grad_norm": 0.3513772189617157, "learning_rate": 0.0001819857289541065, "loss": 1.3074, "step": 6946 }, { "epoch": 0.09027309658357198, "grad_norm": 0.4163765609264374, "learning_rate": 0.0001819831294921951, "loss": 1.5037, "step": 6947 }, { "epoch": 0.09028609112748785, "grad_norm": 0.44242116808891296, "learning_rate": 0.00018198053003028376, "loss": 1.5549, "step": 6948 }, { "epoch": 0.09029908567140373, "grad_norm": 0.43407416343688965, "learning_rate": 0.00018197793056837236, "loss": 1.7314, "step": 6949 }, { "epoch": 0.0903120802153196, "grad_norm": 0.3851221799850464, "learning_rate": 0.00018197533110646098, "loss": 1.4527, "step": 6950 }, { "epoch": 0.09032507475923547, "grad_norm": 0.2697647511959076, "learning_rate": 0.00018197273164454958, "loss": 1.2771, "step": 6951 }, { "epoch": 0.09033806930315134, "grad_norm": 0.36101463437080383, "learning_rate": 0.0001819701321826382, "loss": 1.3333, "step": 6952 }, { "epoch": 0.09035106384706722, "grad_norm": 0.4358506202697754, "learning_rate": 0.00018196753272072683, "loss": 1.5135, "step": 6953 }, { "epoch": 0.09036405839098309, "grad_norm": 0.45592454075813293, "learning_rate": 0.00018196493325881542, "loss": 1.5169, "step": 6954 }, { "epoch": 0.09037705293489896, "grad_norm": 0.4009954631328583, "learning_rate": 0.00018196233379690405, "loss": 1.426, "step": 6955 }, { "epoch": 0.09039004747881484, "grad_norm": 0.3629184365272522, "learning_rate": 0.00018195973433499267, "loss": 1.453, "step": 6956 }, { "epoch": 0.09040304202273071, "grad_norm": 0.3912285566329956, "learning_rate": 0.00018195713487308127, "loss": 1.3439, "step": 6957 }, { "epoch": 0.09041603656664658, "grad_norm": 0.44960349798202515, "learning_rate": 0.0001819545354111699, "loss": 1.4849, "step": 6958 }, { "epoch": 0.09042903111056246, "grad_norm": 0.41558364033699036, "learning_rate": 0.00018195193594925852, "loss": 1.4932, "step": 6959 }, { "epoch": 0.09044202565447833, "grad_norm": 0.3597593307495117, "learning_rate": 0.00018194933648734714, "loss": 1.3039, "step": 6960 }, { "epoch": 0.0904550201983942, "grad_norm": 0.40801116824150085, "learning_rate": 0.00018194673702543574, "loss": 1.4394, "step": 6961 }, { "epoch": 0.09046801474231007, "grad_norm": 0.45404523611068726, "learning_rate": 0.00018194413756352437, "loss": 1.5567, "step": 6962 }, { "epoch": 0.09048100928622595, "grad_norm": 0.32136112451553345, "learning_rate": 0.000181941538101613, "loss": 1.3397, "step": 6963 }, { "epoch": 0.09049400383014182, "grad_norm": 0.44544461369514465, "learning_rate": 0.0001819389386397016, "loss": 1.3232, "step": 6964 }, { "epoch": 0.09050699837405769, "grad_norm": 0.36751747131347656, "learning_rate": 0.0001819363391777902, "loss": 1.2491, "step": 6965 }, { "epoch": 0.09051999291797357, "grad_norm": 0.40375760197639465, "learning_rate": 0.0001819337397158788, "loss": 1.6012, "step": 6966 }, { "epoch": 0.09053298746188944, "grad_norm": 0.3991863429546356, "learning_rate": 0.00018193114025396746, "loss": 1.4175, "step": 6967 }, { "epoch": 0.09054598200580531, "grad_norm": 0.5024530291557312, "learning_rate": 0.00018192854079205606, "loss": 1.5153, "step": 6968 }, { "epoch": 0.09055897654972118, "grad_norm": 0.4122013449668884, "learning_rate": 0.00018192594133014466, "loss": 1.3864, "step": 6969 }, { "epoch": 0.09057197109363706, "grad_norm": 0.39671802520751953, "learning_rate": 0.00018192334186823328, "loss": 1.3102, "step": 6970 }, { "epoch": 0.09058496563755293, "grad_norm": 0.3554450273513794, "learning_rate": 0.0001819207424063219, "loss": 1.4223, "step": 6971 }, { "epoch": 0.0905979601814688, "grad_norm": 0.4238445460796356, "learning_rate": 0.00018191814294441053, "loss": 1.4315, "step": 6972 }, { "epoch": 0.09061095472538468, "grad_norm": 0.36172258853912354, "learning_rate": 0.00018191554348249913, "loss": 1.2792, "step": 6973 }, { "epoch": 0.09062394926930055, "grad_norm": 0.3543929159641266, "learning_rate": 0.00018191294402058775, "loss": 1.3975, "step": 6974 }, { "epoch": 0.09063694381321642, "grad_norm": 0.3743111193180084, "learning_rate": 0.00018191034455867637, "loss": 1.4346, "step": 6975 }, { "epoch": 0.0906499383571323, "grad_norm": 0.455879271030426, "learning_rate": 0.00018190774509676497, "loss": 1.5487, "step": 6976 }, { "epoch": 0.09066293290104817, "grad_norm": 0.33087995648384094, "learning_rate": 0.0001819051456348536, "loss": 1.5044, "step": 6977 }, { "epoch": 0.09067592744496404, "grad_norm": 0.4066532850265503, "learning_rate": 0.0001819025461729422, "loss": 1.5154, "step": 6978 }, { "epoch": 0.09068892198887991, "grad_norm": 0.41942641139030457, "learning_rate": 0.00018189994671103085, "loss": 1.5276, "step": 6979 }, { "epoch": 0.09070191653279579, "grad_norm": 0.43709370493888855, "learning_rate": 0.00018189734724911944, "loss": 1.666, "step": 6980 }, { "epoch": 0.09071491107671166, "grad_norm": 0.34738513827323914, "learning_rate": 0.00018189474778720804, "loss": 1.415, "step": 6981 }, { "epoch": 0.09072790562062753, "grad_norm": 0.3906846046447754, "learning_rate": 0.00018189214832529667, "loss": 1.243, "step": 6982 }, { "epoch": 0.0907409001645434, "grad_norm": 0.4436666965484619, "learning_rate": 0.0001818895488633853, "loss": 1.242, "step": 6983 }, { "epoch": 0.09075389470845928, "grad_norm": 0.4382869005203247, "learning_rate": 0.00018188694940147391, "loss": 1.3336, "step": 6984 }, { "epoch": 0.09076688925237517, "grad_norm": 0.3783816397190094, "learning_rate": 0.0001818843499395625, "loss": 1.3196, "step": 6985 }, { "epoch": 0.09077988379629104, "grad_norm": 0.44984304904937744, "learning_rate": 0.00018188175047765114, "loss": 1.4837, "step": 6986 }, { "epoch": 0.09079287834020691, "grad_norm": 0.7355769872665405, "learning_rate": 0.00018187915101573976, "loss": 1.496, "step": 6987 }, { "epoch": 0.09080587288412278, "grad_norm": 0.340820848941803, "learning_rate": 0.00018187655155382836, "loss": 1.2732, "step": 6988 }, { "epoch": 0.09081886742803866, "grad_norm": 0.3869789242744446, "learning_rate": 0.00018187395209191698, "loss": 1.5891, "step": 6989 }, { "epoch": 0.09083186197195453, "grad_norm": 0.3887910544872284, "learning_rate": 0.00018187135263000558, "loss": 1.4475, "step": 6990 }, { "epoch": 0.0908448565158704, "grad_norm": 0.4561319947242737, "learning_rate": 0.00018186875316809423, "loss": 1.3507, "step": 6991 }, { "epoch": 0.09085785105978628, "grad_norm": 0.34259194135665894, "learning_rate": 0.00018186615370618283, "loss": 1.4105, "step": 6992 }, { "epoch": 0.09087084560370215, "grad_norm": 0.41069769859313965, "learning_rate": 0.00018186355424427143, "loss": 1.4076, "step": 6993 }, { "epoch": 0.09088384014761802, "grad_norm": 0.5440664887428284, "learning_rate": 0.00018186095478236008, "loss": 1.4778, "step": 6994 }, { "epoch": 0.0908968346915339, "grad_norm": 0.44725194573402405, "learning_rate": 0.00018185835532044867, "loss": 1.539, "step": 6995 }, { "epoch": 0.09090982923544977, "grad_norm": 0.35201412439346313, "learning_rate": 0.0001818557558585373, "loss": 1.5734, "step": 6996 }, { "epoch": 0.09092282377936564, "grad_norm": 0.4043336510658264, "learning_rate": 0.0001818531563966259, "loss": 1.5282, "step": 6997 }, { "epoch": 0.09093581832328151, "grad_norm": 0.43089672923088074, "learning_rate": 0.00018185055693471452, "loss": 1.5351, "step": 6998 }, { "epoch": 0.09094881286719739, "grad_norm": 0.33127808570861816, "learning_rate": 0.00018184795747280315, "loss": 1.3167, "step": 6999 }, { "epoch": 0.09096180741111326, "grad_norm": 0.35555338859558105, "learning_rate": 0.00018184535801089174, "loss": 1.4014, "step": 7000 }, { "epoch": 0.09097480195502913, "grad_norm": 0.47664740681648254, "learning_rate": 0.00018184275854898037, "loss": 1.231, "step": 7001 }, { "epoch": 0.090987796498945, "grad_norm": 0.37155023217201233, "learning_rate": 0.000181840159087069, "loss": 1.3657, "step": 7002 }, { "epoch": 0.09100079104286088, "grad_norm": 0.3100747764110565, "learning_rate": 0.00018183755962515762, "loss": 1.1602, "step": 7003 }, { "epoch": 0.09101378558677675, "grad_norm": 0.3802303075790405, "learning_rate": 0.00018183496016324621, "loss": 1.3541, "step": 7004 }, { "epoch": 0.09102678013069262, "grad_norm": 0.43504342436790466, "learning_rate": 0.00018183236070133484, "loss": 1.3814, "step": 7005 }, { "epoch": 0.0910397746746085, "grad_norm": 0.26990699768066406, "learning_rate": 0.00018182976123942346, "loss": 1.2038, "step": 7006 }, { "epoch": 0.09105276921852437, "grad_norm": 0.38820093870162964, "learning_rate": 0.00018182716177751206, "loss": 1.4681, "step": 7007 }, { "epoch": 0.09106576376244024, "grad_norm": 0.4903755784034729, "learning_rate": 0.00018182456231560068, "loss": 1.5494, "step": 7008 }, { "epoch": 0.09107875830635612, "grad_norm": 0.35954418778419495, "learning_rate": 0.00018182196285368928, "loss": 1.4889, "step": 7009 }, { "epoch": 0.09109175285027199, "grad_norm": 0.29346954822540283, "learning_rate": 0.0001818193633917779, "loss": 1.1728, "step": 7010 }, { "epoch": 0.09110474739418786, "grad_norm": 0.3912215530872345, "learning_rate": 0.00018181676392986653, "loss": 1.5047, "step": 7011 }, { "epoch": 0.09111774193810374, "grad_norm": 0.4584551751613617, "learning_rate": 0.00018181416446795513, "loss": 1.54, "step": 7012 }, { "epoch": 0.09113073648201961, "grad_norm": 0.4383290410041809, "learning_rate": 0.00018181156500604375, "loss": 1.3977, "step": 7013 }, { "epoch": 0.09114373102593548, "grad_norm": 0.45443469285964966, "learning_rate": 0.00018180896554413238, "loss": 1.5307, "step": 7014 }, { "epoch": 0.09115672556985135, "grad_norm": 0.34525009989738464, "learning_rate": 0.000181806366082221, "loss": 1.5244, "step": 7015 }, { "epoch": 0.09116972011376723, "grad_norm": 0.4214661121368408, "learning_rate": 0.0001818037666203096, "loss": 1.5493, "step": 7016 }, { "epoch": 0.0911827146576831, "grad_norm": 0.35950130224227905, "learning_rate": 0.00018180116715839822, "loss": 1.4499, "step": 7017 }, { "epoch": 0.09119570920159897, "grad_norm": 0.360706627368927, "learning_rate": 0.00018179856769648685, "loss": 1.5785, "step": 7018 }, { "epoch": 0.09120870374551485, "grad_norm": 0.37173810601234436, "learning_rate": 0.00018179596823457545, "loss": 1.4272, "step": 7019 }, { "epoch": 0.09122169828943072, "grad_norm": 0.36263522505760193, "learning_rate": 0.00018179336877266407, "loss": 1.3951, "step": 7020 }, { "epoch": 0.09123469283334659, "grad_norm": 0.42371153831481934, "learning_rate": 0.00018179076931075267, "loss": 1.4595, "step": 7021 }, { "epoch": 0.09124768737726247, "grad_norm": 0.3105991780757904, "learning_rate": 0.0001817881698488413, "loss": 1.4995, "step": 7022 }, { "epoch": 0.09126068192117835, "grad_norm": 0.3982296288013458, "learning_rate": 0.00018178557038692992, "loss": 1.5567, "step": 7023 }, { "epoch": 0.09127367646509423, "grad_norm": 0.4463663399219513, "learning_rate": 0.00018178297092501851, "loss": 1.6307, "step": 7024 }, { "epoch": 0.0912866710090101, "grad_norm": 0.4056328535079956, "learning_rate": 0.00018178037146310714, "loss": 1.445, "step": 7025 }, { "epoch": 0.09129966555292597, "grad_norm": 0.30898308753967285, "learning_rate": 0.00018177777200119576, "loss": 1.2924, "step": 7026 }, { "epoch": 0.09131266009684184, "grad_norm": 0.4046187996864319, "learning_rate": 0.0001817751725392844, "loss": 1.4658, "step": 7027 }, { "epoch": 0.09132565464075772, "grad_norm": 0.4392760992050171, "learning_rate": 0.00018177257307737298, "loss": 1.4515, "step": 7028 }, { "epoch": 0.09133864918467359, "grad_norm": 0.4211624264717102, "learning_rate": 0.0001817699736154616, "loss": 1.394, "step": 7029 }, { "epoch": 0.09135164372858946, "grad_norm": 0.3351362645626068, "learning_rate": 0.00018176737415355023, "loss": 1.2156, "step": 7030 }, { "epoch": 0.09136463827250534, "grad_norm": 0.4164751470088959, "learning_rate": 0.00018176477469163883, "loss": 1.3609, "step": 7031 }, { "epoch": 0.09137763281642121, "grad_norm": 0.492680162191391, "learning_rate": 0.00018176217522972746, "loss": 1.2473, "step": 7032 }, { "epoch": 0.09139062736033708, "grad_norm": 0.44297999143600464, "learning_rate": 0.00018175957576781608, "loss": 1.4983, "step": 7033 }, { "epoch": 0.09140362190425295, "grad_norm": 0.3613723814487457, "learning_rate": 0.0001817569763059047, "loss": 1.4359, "step": 7034 }, { "epoch": 0.09141661644816883, "grad_norm": 0.45229169726371765, "learning_rate": 0.0001817543768439933, "loss": 1.5633, "step": 7035 }, { "epoch": 0.0914296109920847, "grad_norm": 0.39727187156677246, "learning_rate": 0.0001817517773820819, "loss": 1.3757, "step": 7036 }, { "epoch": 0.09144260553600057, "grad_norm": 0.39390307664871216, "learning_rate": 0.00018174917792017055, "loss": 1.4118, "step": 7037 }, { "epoch": 0.09145560007991645, "grad_norm": 0.40995270013809204, "learning_rate": 0.00018174657845825915, "loss": 1.529, "step": 7038 }, { "epoch": 0.09146859462383232, "grad_norm": 0.5152929425239563, "learning_rate": 0.00018174397899634777, "loss": 1.3893, "step": 7039 }, { "epoch": 0.09148158916774819, "grad_norm": 0.40617021918296814, "learning_rate": 0.00018174137953443637, "loss": 1.4569, "step": 7040 }, { "epoch": 0.09149458371166407, "grad_norm": 0.4657137989997864, "learning_rate": 0.000181738780072525, "loss": 1.651, "step": 7041 }, { "epoch": 0.09150757825557994, "grad_norm": 0.36911675333976746, "learning_rate": 0.00018173618061061362, "loss": 1.5006, "step": 7042 }, { "epoch": 0.09152057279949581, "grad_norm": 0.4068413972854614, "learning_rate": 0.00018173358114870222, "loss": 1.4879, "step": 7043 }, { "epoch": 0.09153356734341168, "grad_norm": 0.40428224205970764, "learning_rate": 0.00018173098168679084, "loss": 1.269, "step": 7044 }, { "epoch": 0.09154656188732756, "grad_norm": 0.48272445797920227, "learning_rate": 0.00018172838222487947, "loss": 1.5141, "step": 7045 }, { "epoch": 0.09155955643124343, "grad_norm": 0.3999854326248169, "learning_rate": 0.0001817257827629681, "loss": 1.3822, "step": 7046 }, { "epoch": 0.0915725509751593, "grad_norm": 0.34400972723960876, "learning_rate": 0.0001817231833010567, "loss": 1.5411, "step": 7047 }, { "epoch": 0.09158554551907518, "grad_norm": 0.4068968594074249, "learning_rate": 0.00018172058383914528, "loss": 1.3241, "step": 7048 }, { "epoch": 0.09159854006299105, "grad_norm": 0.4398244321346283, "learning_rate": 0.00018171798437723394, "loss": 1.513, "step": 7049 }, { "epoch": 0.09161153460690692, "grad_norm": 0.48099762201309204, "learning_rate": 0.00018171538491532253, "loss": 1.4429, "step": 7050 }, { "epoch": 0.0916245291508228, "grad_norm": 0.4356433153152466, "learning_rate": 0.00018171278545341116, "loss": 1.342, "step": 7051 }, { "epoch": 0.09163752369473867, "grad_norm": 0.3434799313545227, "learning_rate": 0.00018171018599149976, "loss": 1.4022, "step": 7052 }, { "epoch": 0.09165051823865454, "grad_norm": 0.3334213197231293, "learning_rate": 0.00018170758652958838, "loss": 1.4959, "step": 7053 }, { "epoch": 0.09166351278257041, "grad_norm": 0.43539541959762573, "learning_rate": 0.000181704987067677, "loss": 1.466, "step": 7054 }, { "epoch": 0.09167650732648629, "grad_norm": 0.3760571777820587, "learning_rate": 0.0001817023876057656, "loss": 1.3674, "step": 7055 }, { "epoch": 0.09168950187040216, "grad_norm": 0.4352891743183136, "learning_rate": 0.00018169978814385423, "loss": 1.6493, "step": 7056 }, { "epoch": 0.09170249641431803, "grad_norm": 0.40841996669769287, "learning_rate": 0.00018169718868194285, "loss": 1.3558, "step": 7057 }, { "epoch": 0.0917154909582339, "grad_norm": 0.3723299503326416, "learning_rate": 0.00018169458922003148, "loss": 1.1837, "step": 7058 }, { "epoch": 0.09172848550214978, "grad_norm": 0.3892412483692169, "learning_rate": 0.00018169198975812007, "loss": 1.5824, "step": 7059 }, { "epoch": 0.09174148004606565, "grad_norm": 0.35799074172973633, "learning_rate": 0.00018168939029620867, "loss": 1.5078, "step": 7060 }, { "epoch": 0.09175447458998154, "grad_norm": 0.3643108606338501, "learning_rate": 0.00018168679083429732, "loss": 1.4008, "step": 7061 }, { "epoch": 0.09176746913389741, "grad_norm": 0.44489216804504395, "learning_rate": 0.00018168419137238592, "loss": 1.4752, "step": 7062 }, { "epoch": 0.09178046367781328, "grad_norm": 0.33809971809387207, "learning_rate": 0.00018168159191047454, "loss": 1.52, "step": 7063 }, { "epoch": 0.09179345822172916, "grad_norm": 0.30471858382225037, "learning_rate": 0.00018167899244856314, "loss": 1.6783, "step": 7064 }, { "epoch": 0.09180645276564503, "grad_norm": 0.39556384086608887, "learning_rate": 0.00018167639298665177, "loss": 1.3709, "step": 7065 }, { "epoch": 0.0918194473095609, "grad_norm": 0.3694120943546295, "learning_rate": 0.0001816737935247404, "loss": 1.4105, "step": 7066 }, { "epoch": 0.09183244185347678, "grad_norm": 0.4004029929637909, "learning_rate": 0.000181671194062829, "loss": 1.4499, "step": 7067 }, { "epoch": 0.09184543639739265, "grad_norm": 0.31606435775756836, "learning_rate": 0.00018166859460091764, "loss": 1.3162, "step": 7068 }, { "epoch": 0.09185843094130852, "grad_norm": 0.31936192512512207, "learning_rate": 0.00018166599513900624, "loss": 1.4311, "step": 7069 }, { "epoch": 0.0918714254852244, "grad_norm": 0.4151539206504822, "learning_rate": 0.00018166339567709486, "loss": 1.3412, "step": 7070 }, { "epoch": 0.09188442002914027, "grad_norm": 0.338312566280365, "learning_rate": 0.00018166079621518346, "loss": 1.4083, "step": 7071 }, { "epoch": 0.09189741457305614, "grad_norm": 0.35674360394477844, "learning_rate": 0.00018165819675327208, "loss": 1.3069, "step": 7072 }, { "epoch": 0.09191040911697201, "grad_norm": 0.4314648509025574, "learning_rate": 0.0001816555972913607, "loss": 1.549, "step": 7073 }, { "epoch": 0.09192340366088789, "grad_norm": 0.39260637760162354, "learning_rate": 0.0001816529978294493, "loss": 1.4833, "step": 7074 }, { "epoch": 0.09193639820480376, "grad_norm": 0.35555902123451233, "learning_rate": 0.00018165039836753793, "loss": 1.2901, "step": 7075 }, { "epoch": 0.09194939274871963, "grad_norm": 0.3010965883731842, "learning_rate": 0.00018164779890562655, "loss": 1.5237, "step": 7076 }, { "epoch": 0.0919623872926355, "grad_norm": 0.40476569533348083, "learning_rate": 0.00018164519944371515, "loss": 1.3212, "step": 7077 }, { "epoch": 0.09197538183655138, "grad_norm": 0.3887787163257599, "learning_rate": 0.00018164259998180378, "loss": 1.3762, "step": 7078 }, { "epoch": 0.09198837638046725, "grad_norm": 0.42832162976264954, "learning_rate": 0.00018164000051989237, "loss": 1.5362, "step": 7079 }, { "epoch": 0.09200137092438312, "grad_norm": 0.3696151077747345, "learning_rate": 0.00018163740105798102, "loss": 1.2748, "step": 7080 }, { "epoch": 0.092014365468299, "grad_norm": 0.3990439474582672, "learning_rate": 0.00018163480159606962, "loss": 1.2019, "step": 7081 }, { "epoch": 0.09202736001221487, "grad_norm": 0.34416481852531433, "learning_rate": 0.00018163220213415825, "loss": 1.496, "step": 7082 }, { "epoch": 0.09204035455613074, "grad_norm": 0.3878422975540161, "learning_rate": 0.00018162960267224684, "loss": 1.4295, "step": 7083 }, { "epoch": 0.09205334910004662, "grad_norm": 0.40328705310821533, "learning_rate": 0.00018162700321033547, "loss": 1.4789, "step": 7084 }, { "epoch": 0.09206634364396249, "grad_norm": 0.3785564601421356, "learning_rate": 0.0001816244037484241, "loss": 1.3298, "step": 7085 }, { "epoch": 0.09207933818787836, "grad_norm": 0.42818620800971985, "learning_rate": 0.0001816218042865127, "loss": 1.3423, "step": 7086 }, { "epoch": 0.09209233273179424, "grad_norm": 0.4125883877277374, "learning_rate": 0.00018161920482460131, "loss": 1.3533, "step": 7087 }, { "epoch": 0.09210532727571011, "grad_norm": 0.4832891523838043, "learning_rate": 0.00018161660536268994, "loss": 1.4342, "step": 7088 }, { "epoch": 0.09211832181962598, "grad_norm": 0.37861329317092896, "learning_rate": 0.00018161400590077856, "loss": 1.407, "step": 7089 }, { "epoch": 0.09213131636354185, "grad_norm": 0.6877596378326416, "learning_rate": 0.00018161140643886716, "loss": 1.4898, "step": 7090 }, { "epoch": 0.09214431090745773, "grad_norm": 0.3705253601074219, "learning_rate": 0.00018160880697695576, "loss": 1.5152, "step": 7091 }, { "epoch": 0.0921573054513736, "grad_norm": 0.4205756187438965, "learning_rate": 0.0001816062075150444, "loss": 1.4117, "step": 7092 }, { "epoch": 0.09217029999528947, "grad_norm": 0.35524222254753113, "learning_rate": 0.000181603608053133, "loss": 1.5007, "step": 7093 }, { "epoch": 0.09218329453920535, "grad_norm": 0.367727667093277, "learning_rate": 0.00018160100859122163, "loss": 1.2419, "step": 7094 }, { "epoch": 0.09219628908312122, "grad_norm": 0.4637907147407532, "learning_rate": 0.00018159840912931023, "loss": 1.4009, "step": 7095 }, { "epoch": 0.09220928362703709, "grad_norm": 0.30720487236976624, "learning_rate": 0.00018159580966739885, "loss": 1.4144, "step": 7096 }, { "epoch": 0.09222227817095296, "grad_norm": 0.44006332755088806, "learning_rate": 0.00018159321020548748, "loss": 1.285, "step": 7097 }, { "epoch": 0.09223527271486884, "grad_norm": 0.3205036222934723, "learning_rate": 0.00018159061074357608, "loss": 1.4792, "step": 7098 }, { "epoch": 0.09224826725878472, "grad_norm": 0.41404226422309875, "learning_rate": 0.0001815880112816647, "loss": 1.4057, "step": 7099 }, { "epoch": 0.0922612618027006, "grad_norm": 0.3069048523902893, "learning_rate": 0.00018158541181975332, "loss": 1.3685, "step": 7100 }, { "epoch": 0.09227425634661647, "grad_norm": 0.3949953615665436, "learning_rate": 0.00018158281235784195, "loss": 1.4948, "step": 7101 }, { "epoch": 0.09228725089053234, "grad_norm": 0.4210084080696106, "learning_rate": 0.00018158021289593055, "loss": 1.4039, "step": 7102 }, { "epoch": 0.09230024543444822, "grad_norm": 0.31463754177093506, "learning_rate": 0.00018157761343401914, "loss": 1.3567, "step": 7103 }, { "epoch": 0.09231323997836409, "grad_norm": 0.3610028326511383, "learning_rate": 0.0001815750139721078, "loss": 1.4224, "step": 7104 }, { "epoch": 0.09232623452227996, "grad_norm": 0.4031345844268799, "learning_rate": 0.0001815724145101964, "loss": 1.4431, "step": 7105 }, { "epoch": 0.09233922906619584, "grad_norm": 0.39141783118247986, "learning_rate": 0.00018156981504828502, "loss": 1.3225, "step": 7106 }, { "epoch": 0.09235222361011171, "grad_norm": 0.429343044757843, "learning_rate": 0.00018156721558637364, "loss": 1.4191, "step": 7107 }, { "epoch": 0.09236521815402758, "grad_norm": 0.4602467119693756, "learning_rate": 0.00018156461612446224, "loss": 1.5815, "step": 7108 }, { "epoch": 0.09237821269794345, "grad_norm": 0.349427193403244, "learning_rate": 0.00018156201666255086, "loss": 1.381, "step": 7109 }, { "epoch": 0.09239120724185933, "grad_norm": 0.34029173851013184, "learning_rate": 0.00018155941720063946, "loss": 1.5318, "step": 7110 }, { "epoch": 0.0924042017857752, "grad_norm": 0.42074456810951233, "learning_rate": 0.0001815568177387281, "loss": 1.3697, "step": 7111 }, { "epoch": 0.09241719632969107, "grad_norm": 0.3333298861980438, "learning_rate": 0.0001815542182768167, "loss": 1.3375, "step": 7112 }, { "epoch": 0.09243019087360695, "grad_norm": 0.45479950308799744, "learning_rate": 0.00018155161881490533, "loss": 1.4995, "step": 7113 }, { "epoch": 0.09244318541752282, "grad_norm": 0.34272897243499756, "learning_rate": 0.00018154901935299393, "loss": 1.4244, "step": 7114 }, { "epoch": 0.09245617996143869, "grad_norm": 0.5107108950614929, "learning_rate": 0.00018154641989108256, "loss": 1.4776, "step": 7115 }, { "epoch": 0.09246917450535456, "grad_norm": 0.32694002985954285, "learning_rate": 0.00018154382042917118, "loss": 1.3601, "step": 7116 }, { "epoch": 0.09248216904927044, "grad_norm": 0.42429476976394653, "learning_rate": 0.00018154122096725978, "loss": 1.4091, "step": 7117 }, { "epoch": 0.09249516359318631, "grad_norm": 0.48859018087387085, "learning_rate": 0.0001815386215053484, "loss": 1.5322, "step": 7118 }, { "epoch": 0.09250815813710218, "grad_norm": 0.33436352014541626, "learning_rate": 0.00018153602204343703, "loss": 1.1238, "step": 7119 }, { "epoch": 0.09252115268101806, "grad_norm": 0.40995797514915466, "learning_rate": 0.00018153342258152562, "loss": 1.514, "step": 7120 }, { "epoch": 0.09253414722493393, "grad_norm": 0.36481329798698425, "learning_rate": 0.00018153082311961425, "loss": 1.5133, "step": 7121 }, { "epoch": 0.0925471417688498, "grad_norm": 0.602674126625061, "learning_rate": 0.00018152822365770285, "loss": 1.6651, "step": 7122 }, { "epoch": 0.09256013631276568, "grad_norm": 0.3543475270271301, "learning_rate": 0.0001815256241957915, "loss": 1.2827, "step": 7123 }, { "epoch": 0.09257313085668155, "grad_norm": 0.3600478768348694, "learning_rate": 0.0001815230247338801, "loss": 1.5326, "step": 7124 }, { "epoch": 0.09258612540059742, "grad_norm": 0.3686290979385376, "learning_rate": 0.00018152042527196872, "loss": 1.2841, "step": 7125 }, { "epoch": 0.0925991199445133, "grad_norm": 0.46782857179641724, "learning_rate": 0.00018151782581005732, "loss": 1.5128, "step": 7126 }, { "epoch": 0.09261211448842917, "grad_norm": 0.4918583929538727, "learning_rate": 0.00018151522634814594, "loss": 1.4921, "step": 7127 }, { "epoch": 0.09262510903234504, "grad_norm": 0.37741222977638245, "learning_rate": 0.00018151262688623457, "loss": 1.3863, "step": 7128 }, { "epoch": 0.09263810357626091, "grad_norm": 0.5578149557113647, "learning_rate": 0.00018151002742432316, "loss": 1.5841, "step": 7129 }, { "epoch": 0.09265109812017679, "grad_norm": 0.38426831364631653, "learning_rate": 0.0001815074279624118, "loss": 1.3376, "step": 7130 }, { "epoch": 0.09266409266409266, "grad_norm": 0.36330536007881165, "learning_rate": 0.0001815048285005004, "loss": 1.5015, "step": 7131 }, { "epoch": 0.09267708720800853, "grad_norm": 0.4214705228805542, "learning_rate": 0.000181502229038589, "loss": 1.4123, "step": 7132 }, { "epoch": 0.0926900817519244, "grad_norm": 0.3297528326511383, "learning_rate": 0.00018149962957667763, "loss": 1.3856, "step": 7133 }, { "epoch": 0.09270307629584028, "grad_norm": 0.45045676827430725, "learning_rate": 0.00018149703011476623, "loss": 1.3108, "step": 7134 }, { "epoch": 0.09271607083975615, "grad_norm": 0.4501149654388428, "learning_rate": 0.00018149443065285488, "loss": 1.589, "step": 7135 }, { "epoch": 0.09272906538367202, "grad_norm": 0.25426357984542847, "learning_rate": 0.00018149183119094348, "loss": 1.3711, "step": 7136 }, { "epoch": 0.09274205992758791, "grad_norm": 0.41150906682014465, "learning_rate": 0.0001814892317290321, "loss": 1.3483, "step": 7137 }, { "epoch": 0.09275505447150378, "grad_norm": 0.40769341588020325, "learning_rate": 0.0001814866322671207, "loss": 1.4589, "step": 7138 }, { "epoch": 0.09276804901541966, "grad_norm": 0.3981645703315735, "learning_rate": 0.00018148403280520933, "loss": 1.6916, "step": 7139 }, { "epoch": 0.09278104355933553, "grad_norm": 0.28859761357307434, "learning_rate": 0.00018148143334329795, "loss": 1.2536, "step": 7140 }, { "epoch": 0.0927940381032514, "grad_norm": 0.3678811490535736, "learning_rate": 0.00018147883388138655, "loss": 1.5757, "step": 7141 }, { "epoch": 0.09280703264716728, "grad_norm": 0.4214949905872345, "learning_rate": 0.00018147623441947517, "loss": 1.5823, "step": 7142 }, { "epoch": 0.09282002719108315, "grad_norm": 0.2991885542869568, "learning_rate": 0.0001814736349575638, "loss": 1.4296, "step": 7143 }, { "epoch": 0.09283302173499902, "grad_norm": 0.45115718245506287, "learning_rate": 0.0001814710354956524, "loss": 1.3936, "step": 7144 }, { "epoch": 0.0928460162789149, "grad_norm": 0.38903236389160156, "learning_rate": 0.00018146843603374102, "loss": 1.3686, "step": 7145 }, { "epoch": 0.09285901082283077, "grad_norm": 0.4278908371925354, "learning_rate": 0.00018146583657182964, "loss": 1.4786, "step": 7146 }, { "epoch": 0.09287200536674664, "grad_norm": 0.42673051357269287, "learning_rate": 0.00018146323710991827, "loss": 1.6307, "step": 7147 }, { "epoch": 0.09288499991066251, "grad_norm": 0.39841514825820923, "learning_rate": 0.00018146063764800687, "loss": 1.6144, "step": 7148 }, { "epoch": 0.09289799445457839, "grad_norm": 0.239227294921875, "learning_rate": 0.0001814580381860955, "loss": 1.3622, "step": 7149 }, { "epoch": 0.09291098899849426, "grad_norm": 0.31507638096809387, "learning_rate": 0.00018145543872418411, "loss": 1.3801, "step": 7150 }, { "epoch": 0.09292398354241013, "grad_norm": 0.3635648190975189, "learning_rate": 0.0001814528392622727, "loss": 1.5107, "step": 7151 }, { "epoch": 0.092936978086326, "grad_norm": 0.398466020822525, "learning_rate": 0.00018145023980036134, "loss": 1.6659, "step": 7152 }, { "epoch": 0.09294997263024188, "grad_norm": 0.4346812665462494, "learning_rate": 0.00018144764033844993, "loss": 1.496, "step": 7153 }, { "epoch": 0.09296296717415775, "grad_norm": 0.35877594351768494, "learning_rate": 0.00018144504087653859, "loss": 1.342, "step": 7154 }, { "epoch": 0.09297596171807362, "grad_norm": 0.42430737614631653, "learning_rate": 0.00018144244141462718, "loss": 1.2827, "step": 7155 }, { "epoch": 0.0929889562619895, "grad_norm": 0.3588751554489136, "learning_rate": 0.0001814398419527158, "loss": 1.5225, "step": 7156 }, { "epoch": 0.09300195080590537, "grad_norm": 0.3559873402118683, "learning_rate": 0.0001814372424908044, "loss": 1.3338, "step": 7157 }, { "epoch": 0.09301494534982124, "grad_norm": 0.41714149713516235, "learning_rate": 0.00018143464302889303, "loss": 1.4866, "step": 7158 }, { "epoch": 0.09302793989373712, "grad_norm": 0.4089769124984741, "learning_rate": 0.00018143204356698165, "loss": 1.3828, "step": 7159 }, { "epoch": 0.09304093443765299, "grad_norm": 0.3431582450866699, "learning_rate": 0.00018142944410507025, "loss": 1.5926, "step": 7160 }, { "epoch": 0.09305392898156886, "grad_norm": 0.35248029232025146, "learning_rate": 0.00018142684464315888, "loss": 1.5597, "step": 7161 }, { "epoch": 0.09306692352548473, "grad_norm": 0.47301214933395386, "learning_rate": 0.0001814242451812475, "loss": 1.5375, "step": 7162 }, { "epoch": 0.09307991806940061, "grad_norm": 0.4430922567844391, "learning_rate": 0.0001814216457193361, "loss": 1.4473, "step": 7163 }, { "epoch": 0.09309291261331648, "grad_norm": 0.28790032863616943, "learning_rate": 0.00018141904625742472, "loss": 1.1821, "step": 7164 }, { "epoch": 0.09310590715723235, "grad_norm": 0.41752859950065613, "learning_rate": 0.00018141644679551332, "loss": 1.5598, "step": 7165 }, { "epoch": 0.09311890170114823, "grad_norm": 0.4206375181674957, "learning_rate": 0.00018141384733360197, "loss": 1.5172, "step": 7166 }, { "epoch": 0.0931318962450641, "grad_norm": 0.3069722652435303, "learning_rate": 0.00018141124787169057, "loss": 1.4652, "step": 7167 }, { "epoch": 0.09314489078897997, "grad_norm": 0.3917238116264343, "learning_rate": 0.0001814086484097792, "loss": 1.4294, "step": 7168 }, { "epoch": 0.09315788533289585, "grad_norm": 0.2199425846338272, "learning_rate": 0.0001814060489478678, "loss": 1.2917, "step": 7169 }, { "epoch": 0.09317087987681172, "grad_norm": 0.3223063051700592, "learning_rate": 0.00018140344948595641, "loss": 1.3479, "step": 7170 }, { "epoch": 0.09318387442072759, "grad_norm": 0.37226495146751404, "learning_rate": 0.00018140085002404504, "loss": 1.5153, "step": 7171 }, { "epoch": 0.09319686896464346, "grad_norm": 0.351835697889328, "learning_rate": 0.00018139825056213364, "loss": 1.3814, "step": 7172 }, { "epoch": 0.09320986350855934, "grad_norm": 0.4412347972393036, "learning_rate": 0.00018139565110022226, "loss": 1.4869, "step": 7173 }, { "epoch": 0.09322285805247521, "grad_norm": 0.4145277142524719, "learning_rate": 0.00018139305163831089, "loss": 1.5661, "step": 7174 }, { "epoch": 0.09323585259639108, "grad_norm": 0.4118339419364929, "learning_rate": 0.00018139045217639948, "loss": 1.2829, "step": 7175 }, { "epoch": 0.09324884714030697, "grad_norm": 0.27595028281211853, "learning_rate": 0.0001813878527144881, "loss": 1.3253, "step": 7176 }, { "epoch": 0.09326184168422284, "grad_norm": 0.2849619686603546, "learning_rate": 0.0001813852532525767, "loss": 1.3401, "step": 7177 }, { "epoch": 0.09327483622813872, "grad_norm": 0.32255294919013977, "learning_rate": 0.00018138265379066536, "loss": 1.3041, "step": 7178 }, { "epoch": 0.09328783077205459, "grad_norm": 0.33268553018569946, "learning_rate": 0.00018138005432875395, "loss": 1.4912, "step": 7179 }, { "epoch": 0.09330082531597046, "grad_norm": 0.41254350543022156, "learning_rate": 0.00018137745486684258, "loss": 1.632, "step": 7180 }, { "epoch": 0.09331381985988633, "grad_norm": 0.4047669768333435, "learning_rate": 0.0001813748554049312, "loss": 1.4541, "step": 7181 }, { "epoch": 0.09332681440380221, "grad_norm": 0.4384784698486328, "learning_rate": 0.0001813722559430198, "loss": 1.5041, "step": 7182 }, { "epoch": 0.09333980894771808, "grad_norm": 0.40634799003601074, "learning_rate": 0.00018136965648110842, "loss": 1.5847, "step": 7183 }, { "epoch": 0.09335280349163395, "grad_norm": 0.2665691077709198, "learning_rate": 0.00018136705701919702, "loss": 1.1773, "step": 7184 }, { "epoch": 0.09336579803554983, "grad_norm": 0.40051954984664917, "learning_rate": 0.00018136445755728567, "loss": 1.5293, "step": 7185 }, { "epoch": 0.0933787925794657, "grad_norm": 0.40561601519584656, "learning_rate": 0.00018136185809537427, "loss": 1.2482, "step": 7186 }, { "epoch": 0.09339178712338157, "grad_norm": 0.35493436455726624, "learning_rate": 0.00018135925863346287, "loss": 1.371, "step": 7187 }, { "epoch": 0.09340478166729745, "grad_norm": 0.5505121350288391, "learning_rate": 0.0001813566591715515, "loss": 1.6067, "step": 7188 }, { "epoch": 0.09341777621121332, "grad_norm": 0.37096771597862244, "learning_rate": 0.00018135405970964012, "loss": 1.5045, "step": 7189 }, { "epoch": 0.09343077075512919, "grad_norm": 0.3663386106491089, "learning_rate": 0.00018135146024772874, "loss": 1.3917, "step": 7190 }, { "epoch": 0.09344376529904506, "grad_norm": 0.331105500459671, "learning_rate": 0.00018134886078581734, "loss": 1.3407, "step": 7191 }, { "epoch": 0.09345675984296094, "grad_norm": 0.3691966235637665, "learning_rate": 0.00018134626132390596, "loss": 1.4897, "step": 7192 }, { "epoch": 0.09346975438687681, "grad_norm": 0.5245912075042725, "learning_rate": 0.0001813436618619946, "loss": 1.4214, "step": 7193 }, { "epoch": 0.09348274893079268, "grad_norm": 0.4015735387802124, "learning_rate": 0.00018134106240008319, "loss": 1.2476, "step": 7194 }, { "epoch": 0.09349574347470856, "grad_norm": 0.40817928314208984, "learning_rate": 0.0001813384629381718, "loss": 1.2945, "step": 7195 }, { "epoch": 0.09350873801862443, "grad_norm": 0.5424723029136658, "learning_rate": 0.0001813358634762604, "loss": 1.508, "step": 7196 }, { "epoch": 0.0935217325625403, "grad_norm": 0.4100075960159302, "learning_rate": 0.00018133326401434906, "loss": 1.5291, "step": 7197 }, { "epoch": 0.09353472710645618, "grad_norm": 0.2901502847671509, "learning_rate": 0.00018133066455243766, "loss": 1.6145, "step": 7198 }, { "epoch": 0.09354772165037205, "grad_norm": 0.368080198764801, "learning_rate": 0.00018132806509052625, "loss": 1.2638, "step": 7199 }, { "epoch": 0.09356071619428792, "grad_norm": 0.4281146228313446, "learning_rate": 0.00018132546562861488, "loss": 1.4845, "step": 7200 }, { "epoch": 0.0935737107382038, "grad_norm": 0.3882058262825012, "learning_rate": 0.0001813228661667035, "loss": 1.3361, "step": 7201 }, { "epoch": 0.09358670528211967, "grad_norm": 0.3651081323623657, "learning_rate": 0.00018132026670479213, "loss": 1.515, "step": 7202 }, { "epoch": 0.09359969982603554, "grad_norm": 0.34518468379974365, "learning_rate": 0.00018131766724288072, "loss": 1.3506, "step": 7203 }, { "epoch": 0.09361269436995141, "grad_norm": 0.4185968041419983, "learning_rate": 0.00018131506778096935, "loss": 1.5779, "step": 7204 }, { "epoch": 0.09362568891386729, "grad_norm": 0.4731001555919647, "learning_rate": 0.00018131246831905797, "loss": 1.5335, "step": 7205 }, { "epoch": 0.09363868345778316, "grad_norm": 0.3844606280326843, "learning_rate": 0.00018130986885714657, "loss": 1.3826, "step": 7206 }, { "epoch": 0.09365167800169903, "grad_norm": 0.3645429313182831, "learning_rate": 0.0001813072693952352, "loss": 1.4334, "step": 7207 }, { "epoch": 0.0936646725456149, "grad_norm": 0.44046902656555176, "learning_rate": 0.0001813046699333238, "loss": 1.2695, "step": 7208 }, { "epoch": 0.09367766708953078, "grad_norm": 0.4989940822124481, "learning_rate": 0.00018130207047141244, "loss": 1.4647, "step": 7209 }, { "epoch": 0.09369066163344665, "grad_norm": 0.3138985335826874, "learning_rate": 0.00018129947100950104, "loss": 1.3611, "step": 7210 }, { "epoch": 0.09370365617736252, "grad_norm": 0.4873834550380707, "learning_rate": 0.00018129687154758967, "loss": 1.4739, "step": 7211 }, { "epoch": 0.0937166507212784, "grad_norm": 0.4353213608264923, "learning_rate": 0.00018129427208567826, "loss": 1.2071, "step": 7212 }, { "epoch": 0.09372964526519427, "grad_norm": 0.43387168645858765, "learning_rate": 0.0001812916726237669, "loss": 1.4833, "step": 7213 }, { "epoch": 0.09374263980911016, "grad_norm": 0.4404866695404053, "learning_rate": 0.0001812890731618555, "loss": 1.3925, "step": 7214 }, { "epoch": 0.09375563435302603, "grad_norm": 0.48707398772239685, "learning_rate": 0.0001812864736999441, "loss": 1.5795, "step": 7215 }, { "epoch": 0.0937686288969419, "grad_norm": 0.3479972183704376, "learning_rate": 0.00018128387423803273, "loss": 1.4622, "step": 7216 }, { "epoch": 0.09378162344085778, "grad_norm": 0.24355284869670868, "learning_rate": 0.00018128127477612136, "loss": 1.2919, "step": 7217 }, { "epoch": 0.09379461798477365, "grad_norm": 0.39244765043258667, "learning_rate": 0.00018127867531420996, "loss": 1.5117, "step": 7218 }, { "epoch": 0.09380761252868952, "grad_norm": 0.32687613368034363, "learning_rate": 0.00018127607585229858, "loss": 1.1883, "step": 7219 }, { "epoch": 0.0938206070726054, "grad_norm": 0.4393558204174042, "learning_rate": 0.0001812734763903872, "loss": 1.4521, "step": 7220 }, { "epoch": 0.09383360161652127, "grad_norm": 0.34508129954338074, "learning_rate": 0.00018127087692847583, "loss": 1.2808, "step": 7221 }, { "epoch": 0.09384659616043714, "grad_norm": 0.4363642930984497, "learning_rate": 0.00018126827746656443, "loss": 1.4579, "step": 7222 }, { "epoch": 0.09385959070435301, "grad_norm": 0.404473215341568, "learning_rate": 0.00018126567800465305, "loss": 1.2808, "step": 7223 }, { "epoch": 0.09387258524826889, "grad_norm": 0.4129363000392914, "learning_rate": 0.00018126307854274168, "loss": 1.2411, "step": 7224 }, { "epoch": 0.09388557979218476, "grad_norm": 0.39193013310432434, "learning_rate": 0.00018126047908083027, "loss": 1.4219, "step": 7225 }, { "epoch": 0.09389857433610063, "grad_norm": 0.4417882263660431, "learning_rate": 0.0001812578796189189, "loss": 1.3255, "step": 7226 }, { "epoch": 0.0939115688800165, "grad_norm": 0.31806811690330505, "learning_rate": 0.0001812552801570075, "loss": 1.2958, "step": 7227 }, { "epoch": 0.09392456342393238, "grad_norm": 0.3990212380886078, "learning_rate": 0.00018125268069509612, "loss": 1.4539, "step": 7228 }, { "epoch": 0.09393755796784825, "grad_norm": 0.3806932270526886, "learning_rate": 0.00018125008123318474, "loss": 1.6653, "step": 7229 }, { "epoch": 0.09395055251176412, "grad_norm": 0.3945797383785248, "learning_rate": 0.00018124748177127334, "loss": 1.4329, "step": 7230 }, { "epoch": 0.09396354705568, "grad_norm": 0.4346785247325897, "learning_rate": 0.00018124488230936197, "loss": 1.5304, "step": 7231 }, { "epoch": 0.09397654159959587, "grad_norm": 0.32155469059944153, "learning_rate": 0.0001812422828474506, "loss": 1.0665, "step": 7232 }, { "epoch": 0.09398953614351174, "grad_norm": 0.394033282995224, "learning_rate": 0.00018123968338553922, "loss": 1.365, "step": 7233 }, { "epoch": 0.09400253068742762, "grad_norm": 0.46820324659347534, "learning_rate": 0.0001812370839236278, "loss": 1.4929, "step": 7234 }, { "epoch": 0.09401552523134349, "grad_norm": 0.3518800437450409, "learning_rate": 0.00018123448446171644, "loss": 1.4794, "step": 7235 }, { "epoch": 0.09402851977525936, "grad_norm": 0.5230505466461182, "learning_rate": 0.00018123188499980506, "loss": 1.447, "step": 7236 }, { "epoch": 0.09404151431917523, "grad_norm": 0.42552244663238525, "learning_rate": 0.00018122928553789366, "loss": 1.2292, "step": 7237 }, { "epoch": 0.09405450886309111, "grad_norm": 0.49563997983932495, "learning_rate": 0.00018122668607598228, "loss": 1.4937, "step": 7238 }, { "epoch": 0.09406750340700698, "grad_norm": 0.33242788910865784, "learning_rate": 0.00018122408661407088, "loss": 1.4184, "step": 7239 }, { "epoch": 0.09408049795092285, "grad_norm": 0.41565611958503723, "learning_rate": 0.00018122148715215953, "loss": 1.5235, "step": 7240 }, { "epoch": 0.09409349249483873, "grad_norm": 0.38495129346847534, "learning_rate": 0.00018121888769024813, "loss": 1.3662, "step": 7241 }, { "epoch": 0.0941064870387546, "grad_norm": 0.432624876499176, "learning_rate": 0.00018121628822833673, "loss": 1.4292, "step": 7242 }, { "epoch": 0.09411948158267047, "grad_norm": 0.3683008849620819, "learning_rate": 0.00018121368876642535, "loss": 1.5875, "step": 7243 }, { "epoch": 0.09413247612658635, "grad_norm": 0.35654228925704956, "learning_rate": 0.00018121108930451398, "loss": 1.5008, "step": 7244 }, { "epoch": 0.09414547067050222, "grad_norm": 0.36029231548309326, "learning_rate": 0.0001812084898426026, "loss": 1.4172, "step": 7245 }, { "epoch": 0.09415846521441809, "grad_norm": 0.414405882358551, "learning_rate": 0.0001812058903806912, "loss": 1.5133, "step": 7246 }, { "epoch": 0.09417145975833396, "grad_norm": 0.4873195290565491, "learning_rate": 0.00018120329091877982, "loss": 1.5078, "step": 7247 }, { "epoch": 0.09418445430224984, "grad_norm": 0.5806477665901184, "learning_rate": 0.00018120069145686845, "loss": 1.4059, "step": 7248 }, { "epoch": 0.09419744884616571, "grad_norm": 0.41904518008232117, "learning_rate": 0.00018119809199495704, "loss": 1.5204, "step": 7249 }, { "epoch": 0.09421044339008158, "grad_norm": 0.421613872051239, "learning_rate": 0.00018119549253304567, "loss": 1.4697, "step": 7250 }, { "epoch": 0.09422343793399746, "grad_norm": 0.3459349572658539, "learning_rate": 0.00018119289307113427, "loss": 1.3191, "step": 7251 }, { "epoch": 0.09423643247791334, "grad_norm": 0.42944464087486267, "learning_rate": 0.00018119029360922292, "loss": 1.3137, "step": 7252 }, { "epoch": 0.09424942702182922, "grad_norm": 0.3202815651893616, "learning_rate": 0.00018118769414731152, "loss": 1.4005, "step": 7253 }, { "epoch": 0.09426242156574509, "grad_norm": 0.32814428210258484, "learning_rate": 0.0001811850946854001, "loss": 1.5978, "step": 7254 }, { "epoch": 0.09427541610966096, "grad_norm": 0.44766125082969666, "learning_rate": 0.00018118249522348876, "loss": 1.3655, "step": 7255 }, { "epoch": 0.09428841065357683, "grad_norm": 0.2914086878299713, "learning_rate": 0.00018117989576157736, "loss": 1.4049, "step": 7256 }, { "epoch": 0.09430140519749271, "grad_norm": 0.39988675713539124, "learning_rate": 0.00018117729629966599, "loss": 1.4526, "step": 7257 }, { "epoch": 0.09431439974140858, "grad_norm": 0.3311614394187927, "learning_rate": 0.00018117469683775458, "loss": 1.3424, "step": 7258 }, { "epoch": 0.09432739428532445, "grad_norm": 0.4980418384075165, "learning_rate": 0.0001811720973758432, "loss": 1.4031, "step": 7259 }, { "epoch": 0.09434038882924033, "grad_norm": 0.6409164071083069, "learning_rate": 0.00018116949791393183, "loss": 1.5083, "step": 7260 }, { "epoch": 0.0943533833731562, "grad_norm": 0.6993975043296814, "learning_rate": 0.00018116689845202043, "loss": 1.557, "step": 7261 }, { "epoch": 0.09436637791707207, "grad_norm": 0.4055137634277344, "learning_rate": 0.00018116429899010905, "loss": 1.2874, "step": 7262 }, { "epoch": 0.09437937246098795, "grad_norm": 0.36568740010261536, "learning_rate": 0.00018116169952819768, "loss": 1.7407, "step": 7263 }, { "epoch": 0.09439236700490382, "grad_norm": 0.2845642566680908, "learning_rate": 0.0001811591000662863, "loss": 1.3145, "step": 7264 }, { "epoch": 0.09440536154881969, "grad_norm": 0.45284831523895264, "learning_rate": 0.0001811565006043749, "loss": 1.4203, "step": 7265 }, { "epoch": 0.09441835609273556, "grad_norm": 0.46527132391929626, "learning_rate": 0.0001811539011424635, "loss": 1.5508, "step": 7266 }, { "epoch": 0.09443135063665144, "grad_norm": 0.3700307309627533, "learning_rate": 0.00018115130168055215, "loss": 1.4195, "step": 7267 }, { "epoch": 0.09444434518056731, "grad_norm": 0.45265263319015503, "learning_rate": 0.00018114870221864075, "loss": 1.3754, "step": 7268 }, { "epoch": 0.09445733972448318, "grad_norm": 0.44388577342033386, "learning_rate": 0.00018114610275672937, "loss": 1.39, "step": 7269 }, { "epoch": 0.09447033426839906, "grad_norm": 0.3856041431427002, "learning_rate": 0.00018114350329481797, "loss": 1.3591, "step": 7270 }, { "epoch": 0.09448332881231493, "grad_norm": 0.37693923711776733, "learning_rate": 0.0001811409038329066, "loss": 1.4226, "step": 7271 }, { "epoch": 0.0944963233562308, "grad_norm": 0.368068128824234, "learning_rate": 0.00018113830437099522, "loss": 1.4905, "step": 7272 }, { "epoch": 0.09450931790014667, "grad_norm": 0.4132508337497711, "learning_rate": 0.00018113570490908381, "loss": 1.3154, "step": 7273 }, { "epoch": 0.09452231244406255, "grad_norm": 0.39588838815689087, "learning_rate": 0.00018113310544717244, "loss": 1.3746, "step": 7274 }, { "epoch": 0.09453530698797842, "grad_norm": 0.4163910150527954, "learning_rate": 0.00018113050598526106, "loss": 1.2441, "step": 7275 }, { "epoch": 0.0945483015318943, "grad_norm": 0.35650166869163513, "learning_rate": 0.0001811279065233497, "loss": 1.2851, "step": 7276 }, { "epoch": 0.09456129607581017, "grad_norm": 0.4032966196537018, "learning_rate": 0.00018112530706143829, "loss": 1.5843, "step": 7277 }, { "epoch": 0.09457429061972604, "grad_norm": 0.3548662066459656, "learning_rate": 0.0001811227075995269, "loss": 1.5866, "step": 7278 }, { "epoch": 0.09458728516364191, "grad_norm": 0.4279075860977173, "learning_rate": 0.00018112010813761553, "loss": 1.3916, "step": 7279 }, { "epoch": 0.09460027970755779, "grad_norm": 0.31919071078300476, "learning_rate": 0.00018111750867570413, "loss": 1.535, "step": 7280 }, { "epoch": 0.09461327425147366, "grad_norm": 0.40728500485420227, "learning_rate": 0.00018111490921379276, "loss": 1.6466, "step": 7281 }, { "epoch": 0.09462626879538953, "grad_norm": 0.25449395179748535, "learning_rate": 0.00018111230975188135, "loss": 1.4623, "step": 7282 }, { "epoch": 0.0946392633393054, "grad_norm": 0.4637458324432373, "learning_rate": 0.00018110971028996998, "loss": 1.4263, "step": 7283 }, { "epoch": 0.09465225788322128, "grad_norm": 0.329838365316391, "learning_rate": 0.0001811071108280586, "loss": 1.5491, "step": 7284 }, { "epoch": 0.09466525242713715, "grad_norm": 0.29775992035865784, "learning_rate": 0.0001811045113661472, "loss": 1.5777, "step": 7285 }, { "epoch": 0.09467824697105302, "grad_norm": 0.41244420409202576, "learning_rate": 0.00018110191190423582, "loss": 1.3067, "step": 7286 }, { "epoch": 0.0946912415149689, "grad_norm": 0.318005234003067, "learning_rate": 0.00018109931244232445, "loss": 1.3772, "step": 7287 }, { "epoch": 0.09470423605888477, "grad_norm": 0.38731497526168823, "learning_rate": 0.00018109671298041307, "loss": 1.4829, "step": 7288 }, { "epoch": 0.09471723060280064, "grad_norm": 0.35857513546943665, "learning_rate": 0.00018109411351850167, "loss": 1.4593, "step": 7289 }, { "epoch": 0.09473022514671653, "grad_norm": 0.4021919369697571, "learning_rate": 0.0001810915140565903, "loss": 1.4017, "step": 7290 }, { "epoch": 0.0947432196906324, "grad_norm": 0.4414508044719696, "learning_rate": 0.00018108891459467892, "loss": 1.5618, "step": 7291 }, { "epoch": 0.09475621423454827, "grad_norm": 0.4374082088470459, "learning_rate": 0.00018108631513276752, "loss": 1.4636, "step": 7292 }, { "epoch": 0.09476920877846415, "grad_norm": 0.41006237268447876, "learning_rate": 0.00018108371567085614, "loss": 1.4143, "step": 7293 }, { "epoch": 0.09478220332238002, "grad_norm": 0.30947592854499817, "learning_rate": 0.00018108111620894477, "loss": 1.4089, "step": 7294 }, { "epoch": 0.0947951978662959, "grad_norm": 0.4499688148498535, "learning_rate": 0.0001810785167470334, "loss": 1.5636, "step": 7295 }, { "epoch": 0.09480819241021177, "grad_norm": 0.6653492450714111, "learning_rate": 0.000181075917285122, "loss": 1.4635, "step": 7296 }, { "epoch": 0.09482118695412764, "grad_norm": 0.3968139588832855, "learning_rate": 0.00018107331782321059, "loss": 1.4746, "step": 7297 }, { "epoch": 0.09483418149804351, "grad_norm": 0.3583230972290039, "learning_rate": 0.00018107071836129924, "loss": 1.4722, "step": 7298 }, { "epoch": 0.09484717604195939, "grad_norm": 0.4312600791454315, "learning_rate": 0.00018106811889938783, "loss": 1.3817, "step": 7299 }, { "epoch": 0.09486017058587526, "grad_norm": 0.406304270029068, "learning_rate": 0.00018106551943747646, "loss": 1.5599, "step": 7300 }, { "epoch": 0.09487316512979113, "grad_norm": 0.36707812547683716, "learning_rate": 0.00018106291997556506, "loss": 1.2522, "step": 7301 }, { "epoch": 0.094886159673707, "grad_norm": 0.43151021003723145, "learning_rate": 0.00018106032051365368, "loss": 1.4555, "step": 7302 }, { "epoch": 0.09489915421762288, "grad_norm": 0.5606217384338379, "learning_rate": 0.0001810577210517423, "loss": 1.5608, "step": 7303 }, { "epoch": 0.09491214876153875, "grad_norm": 0.297919899225235, "learning_rate": 0.0001810551215898309, "loss": 1.2231, "step": 7304 }, { "epoch": 0.09492514330545462, "grad_norm": 0.5842865705490112, "learning_rate": 0.00018105252212791953, "loss": 1.5461, "step": 7305 }, { "epoch": 0.0949381378493705, "grad_norm": 0.40130487084388733, "learning_rate": 0.00018104992266600815, "loss": 1.3524, "step": 7306 }, { "epoch": 0.09495113239328637, "grad_norm": 0.3985798954963684, "learning_rate": 0.00018104732320409678, "loss": 1.2793, "step": 7307 }, { "epoch": 0.09496412693720224, "grad_norm": 0.35852622985839844, "learning_rate": 0.00018104472374218537, "loss": 1.3091, "step": 7308 }, { "epoch": 0.09497712148111812, "grad_norm": 0.39018869400024414, "learning_rate": 0.00018104212428027397, "loss": 1.3751, "step": 7309 }, { "epoch": 0.09499011602503399, "grad_norm": 0.5982402563095093, "learning_rate": 0.00018103952481836262, "loss": 1.4635, "step": 7310 }, { "epoch": 0.09500311056894986, "grad_norm": 0.4077194631099701, "learning_rate": 0.00018103692535645122, "loss": 1.4362, "step": 7311 }, { "epoch": 0.09501610511286573, "grad_norm": 0.3744657039642334, "learning_rate": 0.00018103432589453984, "loss": 1.2755, "step": 7312 }, { "epoch": 0.09502909965678161, "grad_norm": 0.4277615547180176, "learning_rate": 0.00018103172643262844, "loss": 1.4048, "step": 7313 }, { "epoch": 0.09504209420069748, "grad_norm": 0.3324318528175354, "learning_rate": 0.00018102912697071707, "loss": 1.3802, "step": 7314 }, { "epoch": 0.09505508874461335, "grad_norm": 0.3124167025089264, "learning_rate": 0.0001810265275088057, "loss": 1.2795, "step": 7315 }, { "epoch": 0.09506808328852923, "grad_norm": 0.4548947513103485, "learning_rate": 0.0001810239280468943, "loss": 1.6053, "step": 7316 }, { "epoch": 0.0950810778324451, "grad_norm": 0.4082011580467224, "learning_rate": 0.0001810213285849829, "loss": 1.6244, "step": 7317 }, { "epoch": 0.09509407237636097, "grad_norm": 0.3603448271751404, "learning_rate": 0.00018101872912307154, "loss": 1.278, "step": 7318 }, { "epoch": 0.09510706692027684, "grad_norm": 0.4153482913970947, "learning_rate": 0.00018101612966116016, "loss": 1.4195, "step": 7319 }, { "epoch": 0.09512006146419272, "grad_norm": 0.4001554548740387, "learning_rate": 0.00018101353019924876, "loss": 1.328, "step": 7320 }, { "epoch": 0.09513305600810859, "grad_norm": 0.46756473183631897, "learning_rate": 0.00018101093073733736, "loss": 1.292, "step": 7321 }, { "epoch": 0.09514605055202446, "grad_norm": 0.39417633414268494, "learning_rate": 0.000181008331275426, "loss": 1.3832, "step": 7322 }, { "epoch": 0.09515904509594034, "grad_norm": 0.45010629296302795, "learning_rate": 0.0001810057318135146, "loss": 1.5333, "step": 7323 }, { "epoch": 0.09517203963985621, "grad_norm": 0.3506676256656647, "learning_rate": 0.00018100313235160323, "loss": 1.5742, "step": 7324 }, { "epoch": 0.09518503418377208, "grad_norm": 0.4224172532558441, "learning_rate": 0.00018100053288969183, "loss": 1.4842, "step": 7325 }, { "epoch": 0.09519802872768796, "grad_norm": 0.34364381432533264, "learning_rate": 0.00018099793342778045, "loss": 1.3448, "step": 7326 }, { "epoch": 0.09521102327160383, "grad_norm": 0.43910667300224304, "learning_rate": 0.00018099533396586908, "loss": 1.3444, "step": 7327 }, { "epoch": 0.09522401781551972, "grad_norm": 0.39456990361213684, "learning_rate": 0.00018099273450395767, "loss": 1.5078, "step": 7328 }, { "epoch": 0.09523701235943559, "grad_norm": 0.3839728832244873, "learning_rate": 0.00018099013504204633, "loss": 1.4328, "step": 7329 }, { "epoch": 0.09525000690335146, "grad_norm": 0.3956484794616699, "learning_rate": 0.00018098753558013492, "loss": 1.4098, "step": 7330 }, { "epoch": 0.09526300144726733, "grad_norm": 0.4149603545665741, "learning_rate": 0.00018098493611822355, "loss": 1.3573, "step": 7331 }, { "epoch": 0.09527599599118321, "grad_norm": 0.3212421238422394, "learning_rate": 0.00018098233665631214, "loss": 1.4073, "step": 7332 }, { "epoch": 0.09528899053509908, "grad_norm": 0.4270807206630707, "learning_rate": 0.00018097973719440077, "loss": 1.4798, "step": 7333 }, { "epoch": 0.09530198507901495, "grad_norm": 0.48940223455429077, "learning_rate": 0.0001809771377324894, "loss": 1.4807, "step": 7334 }, { "epoch": 0.09531497962293083, "grad_norm": 0.39676734805107117, "learning_rate": 0.000180974538270578, "loss": 1.5772, "step": 7335 }, { "epoch": 0.0953279741668467, "grad_norm": 0.3974304795265198, "learning_rate": 0.00018097193880866662, "loss": 1.4628, "step": 7336 }, { "epoch": 0.09534096871076257, "grad_norm": 0.3763043284416199, "learning_rate": 0.00018096933934675524, "loss": 1.4777, "step": 7337 }, { "epoch": 0.09535396325467844, "grad_norm": 0.4016934335231781, "learning_rate": 0.00018096673988484384, "loss": 1.4591, "step": 7338 }, { "epoch": 0.09536695779859432, "grad_norm": 0.4639422297477722, "learning_rate": 0.00018096414042293246, "loss": 1.5875, "step": 7339 }, { "epoch": 0.09537995234251019, "grad_norm": 0.47356316447257996, "learning_rate": 0.00018096154096102106, "loss": 1.4073, "step": 7340 }, { "epoch": 0.09539294688642606, "grad_norm": 0.4202554523944855, "learning_rate": 0.0001809589414991097, "loss": 1.1655, "step": 7341 }, { "epoch": 0.09540594143034194, "grad_norm": 0.3120211362838745, "learning_rate": 0.0001809563420371983, "loss": 1.3712, "step": 7342 }, { "epoch": 0.09541893597425781, "grad_norm": 0.30092450976371765, "learning_rate": 0.00018095374257528693, "loss": 1.5021, "step": 7343 }, { "epoch": 0.09543193051817368, "grad_norm": 0.33890169858932495, "learning_rate": 0.00018095114311337553, "loss": 1.4425, "step": 7344 }, { "epoch": 0.09544492506208956, "grad_norm": 0.44607123732566833, "learning_rate": 0.00018094854365146415, "loss": 1.6243, "step": 7345 }, { "epoch": 0.09545791960600543, "grad_norm": 0.41282761096954346, "learning_rate": 0.00018094594418955278, "loss": 1.4266, "step": 7346 }, { "epoch": 0.0954709141499213, "grad_norm": 0.375723659992218, "learning_rate": 0.00018094334472764138, "loss": 1.5753, "step": 7347 }, { "epoch": 0.09548390869383717, "grad_norm": 0.3935757279396057, "learning_rate": 0.00018094074526573, "loss": 1.4751, "step": 7348 }, { "epoch": 0.09549690323775305, "grad_norm": 0.3137151896953583, "learning_rate": 0.00018093814580381863, "loss": 1.4364, "step": 7349 }, { "epoch": 0.09550989778166892, "grad_norm": 0.44521477818489075, "learning_rate": 0.00018093554634190722, "loss": 1.4635, "step": 7350 }, { "epoch": 0.0955228923255848, "grad_norm": 0.5662813186645508, "learning_rate": 0.00018093294687999585, "loss": 1.5626, "step": 7351 }, { "epoch": 0.09553588686950067, "grad_norm": 0.3745267689228058, "learning_rate": 0.00018093034741808444, "loss": 1.4033, "step": 7352 }, { "epoch": 0.09554888141341654, "grad_norm": 0.402737021446228, "learning_rate": 0.0001809277479561731, "loss": 1.5217, "step": 7353 }, { "epoch": 0.09556187595733241, "grad_norm": 0.44217994809150696, "learning_rate": 0.0001809251484942617, "loss": 1.5257, "step": 7354 }, { "epoch": 0.09557487050124829, "grad_norm": 0.33841899037361145, "learning_rate": 0.00018092254903235032, "loss": 1.3847, "step": 7355 }, { "epoch": 0.09558786504516416, "grad_norm": 0.3889140188694, "learning_rate": 0.00018091994957043892, "loss": 1.4086, "step": 7356 }, { "epoch": 0.09560085958908003, "grad_norm": 0.42446374893188477, "learning_rate": 0.00018091735010852754, "loss": 1.3973, "step": 7357 }, { "epoch": 0.0956138541329959, "grad_norm": 0.406424880027771, "learning_rate": 0.00018091475064661616, "loss": 1.3708, "step": 7358 }, { "epoch": 0.09562684867691178, "grad_norm": 0.3968759775161743, "learning_rate": 0.00018091215118470476, "loss": 1.1888, "step": 7359 }, { "epoch": 0.09563984322082765, "grad_norm": 0.3304159939289093, "learning_rate": 0.00018090955172279339, "loss": 1.196, "step": 7360 }, { "epoch": 0.09565283776474352, "grad_norm": 0.34259432554244995, "learning_rate": 0.000180906952260882, "loss": 1.3919, "step": 7361 }, { "epoch": 0.0956658323086594, "grad_norm": 0.41082262992858887, "learning_rate": 0.00018090435279897064, "loss": 1.3382, "step": 7362 }, { "epoch": 0.09567882685257527, "grad_norm": 0.46010255813598633, "learning_rate": 0.00018090175333705923, "loss": 1.5625, "step": 7363 }, { "epoch": 0.09569182139649114, "grad_norm": 0.3312811255455017, "learning_rate": 0.00018089915387514783, "loss": 1.3329, "step": 7364 }, { "epoch": 0.09570481594040701, "grad_norm": 0.3977181613445282, "learning_rate": 0.00018089655441323648, "loss": 1.3733, "step": 7365 }, { "epoch": 0.0957178104843229, "grad_norm": 0.398436963558197, "learning_rate": 0.00018089395495132508, "loss": 1.4563, "step": 7366 }, { "epoch": 0.09573080502823877, "grad_norm": 0.43061181902885437, "learning_rate": 0.0001808913554894137, "loss": 1.5571, "step": 7367 }, { "epoch": 0.09574379957215465, "grad_norm": 0.35109207034111023, "learning_rate": 0.00018088875602750233, "loss": 1.302, "step": 7368 }, { "epoch": 0.09575679411607052, "grad_norm": 0.34773433208465576, "learning_rate": 0.00018088615656559093, "loss": 1.4575, "step": 7369 }, { "epoch": 0.0957697886599864, "grad_norm": 0.4366167485713959, "learning_rate": 0.00018088355710367955, "loss": 1.3934, "step": 7370 }, { "epoch": 0.09578278320390227, "grad_norm": 0.37397125363349915, "learning_rate": 0.00018088095764176815, "loss": 1.5105, "step": 7371 }, { "epoch": 0.09579577774781814, "grad_norm": 0.47733554244041443, "learning_rate": 0.0001808783581798568, "loss": 1.4562, "step": 7372 }, { "epoch": 0.09580877229173401, "grad_norm": 0.4071448743343353, "learning_rate": 0.0001808757587179454, "loss": 1.5588, "step": 7373 }, { "epoch": 0.09582176683564989, "grad_norm": 0.2880563735961914, "learning_rate": 0.00018087315925603402, "loss": 1.3191, "step": 7374 }, { "epoch": 0.09583476137956576, "grad_norm": 0.3139370083808899, "learning_rate": 0.00018087055979412262, "loss": 1.2754, "step": 7375 }, { "epoch": 0.09584775592348163, "grad_norm": 0.4224952161312103, "learning_rate": 0.00018086796033221124, "loss": 1.5666, "step": 7376 }, { "epoch": 0.0958607504673975, "grad_norm": 0.3922746777534485, "learning_rate": 0.00018086536087029987, "loss": 1.671, "step": 7377 }, { "epoch": 0.09587374501131338, "grad_norm": 0.41716238856315613, "learning_rate": 0.00018086276140838846, "loss": 1.3269, "step": 7378 }, { "epoch": 0.09588673955522925, "grad_norm": 0.45813536643981934, "learning_rate": 0.0001808601619464771, "loss": 1.6024, "step": 7379 }, { "epoch": 0.09589973409914512, "grad_norm": 0.33751386404037476, "learning_rate": 0.0001808575624845657, "loss": 1.2527, "step": 7380 }, { "epoch": 0.095912728643061, "grad_norm": 0.4570043981075287, "learning_rate": 0.0001808549630226543, "loss": 1.5184, "step": 7381 }, { "epoch": 0.09592572318697687, "grad_norm": 0.35360071063041687, "learning_rate": 0.00018085236356074294, "loss": 1.4118, "step": 7382 }, { "epoch": 0.09593871773089274, "grad_norm": 0.2964922785758972, "learning_rate": 0.00018084976409883153, "loss": 1.5343, "step": 7383 }, { "epoch": 0.09595171227480861, "grad_norm": 0.40963271260261536, "learning_rate": 0.00018084716463692018, "loss": 1.4879, "step": 7384 }, { "epoch": 0.09596470681872449, "grad_norm": 0.4206002354621887, "learning_rate": 0.00018084456517500878, "loss": 1.3153, "step": 7385 }, { "epoch": 0.09597770136264036, "grad_norm": 0.3872007727622986, "learning_rate": 0.0001808419657130974, "loss": 1.5442, "step": 7386 }, { "epoch": 0.09599069590655623, "grad_norm": 0.3645806312561035, "learning_rate": 0.000180839366251186, "loss": 1.4758, "step": 7387 }, { "epoch": 0.0960036904504721, "grad_norm": 0.46816349029541016, "learning_rate": 0.00018083676678927463, "loss": 1.4461, "step": 7388 }, { "epoch": 0.09601668499438798, "grad_norm": 0.3602139353752136, "learning_rate": 0.00018083416732736325, "loss": 1.4969, "step": 7389 }, { "epoch": 0.09602967953830385, "grad_norm": 0.3550172448158264, "learning_rate": 0.00018083156786545185, "loss": 1.518, "step": 7390 }, { "epoch": 0.09604267408221973, "grad_norm": 0.46977195143699646, "learning_rate": 0.00018082896840354047, "loss": 1.6275, "step": 7391 }, { "epoch": 0.0960556686261356, "grad_norm": 0.5217018127441406, "learning_rate": 0.0001808263689416291, "loss": 1.6169, "step": 7392 }, { "epoch": 0.09606866317005147, "grad_norm": 0.36064958572387695, "learning_rate": 0.0001808237694797177, "loss": 1.5654, "step": 7393 }, { "epoch": 0.09608165771396734, "grad_norm": 0.37483900785446167, "learning_rate": 0.00018082117001780632, "loss": 1.3788, "step": 7394 }, { "epoch": 0.09609465225788322, "grad_norm": 0.3947065472602844, "learning_rate": 0.00018081857055589492, "loss": 1.539, "step": 7395 }, { "epoch": 0.09610764680179909, "grad_norm": 0.40937402844429016, "learning_rate": 0.00018081597109398357, "loss": 1.33, "step": 7396 }, { "epoch": 0.09612064134571496, "grad_norm": 0.375588595867157, "learning_rate": 0.00018081337163207217, "loss": 1.3802, "step": 7397 }, { "epoch": 0.09613363588963084, "grad_norm": 0.4242369830608368, "learning_rate": 0.0001808107721701608, "loss": 1.2283, "step": 7398 }, { "epoch": 0.09614663043354671, "grad_norm": 0.31093209981918335, "learning_rate": 0.0001808081727082494, "loss": 1.1685, "step": 7399 }, { "epoch": 0.09615962497746258, "grad_norm": 0.33873701095581055, "learning_rate": 0.000180805573246338, "loss": 1.2617, "step": 7400 }, { "epoch": 0.09617261952137846, "grad_norm": 0.4038071632385254, "learning_rate": 0.00018080297378442664, "loss": 1.2833, "step": 7401 }, { "epoch": 0.09618561406529433, "grad_norm": 0.43444883823394775, "learning_rate": 0.00018080037432251524, "loss": 1.4952, "step": 7402 }, { "epoch": 0.0961986086092102, "grad_norm": 0.37988609075546265, "learning_rate": 0.0001807977748606039, "loss": 1.2989, "step": 7403 }, { "epoch": 0.09621160315312609, "grad_norm": 0.39441198110580444, "learning_rate": 0.00018079517539869248, "loss": 1.327, "step": 7404 }, { "epoch": 0.09622459769704196, "grad_norm": 0.385147362947464, "learning_rate": 0.00018079257593678108, "loss": 1.4291, "step": 7405 }, { "epoch": 0.09623759224095783, "grad_norm": 0.34766459465026855, "learning_rate": 0.0001807899764748697, "loss": 1.3645, "step": 7406 }, { "epoch": 0.0962505867848737, "grad_norm": 0.28275880217552185, "learning_rate": 0.00018078737701295833, "loss": 1.2419, "step": 7407 }, { "epoch": 0.09626358132878958, "grad_norm": 0.4010525047779083, "learning_rate": 0.00018078477755104695, "loss": 1.3837, "step": 7408 }, { "epoch": 0.09627657587270545, "grad_norm": 0.5371651649475098, "learning_rate": 0.00018078217808913555, "loss": 1.3649, "step": 7409 }, { "epoch": 0.09628957041662133, "grad_norm": 0.35657212138175964, "learning_rate": 0.00018077957862722418, "loss": 1.2928, "step": 7410 }, { "epoch": 0.0963025649605372, "grad_norm": 0.33672940731048584, "learning_rate": 0.0001807769791653128, "loss": 1.481, "step": 7411 }, { "epoch": 0.09631555950445307, "grad_norm": 0.31562361121177673, "learning_rate": 0.0001807743797034014, "loss": 1.3423, "step": 7412 }, { "epoch": 0.09632855404836894, "grad_norm": 0.5343045592308044, "learning_rate": 0.00018077178024149002, "loss": 1.466, "step": 7413 }, { "epoch": 0.09634154859228482, "grad_norm": 0.43652138113975525, "learning_rate": 0.00018076918077957862, "loss": 1.4483, "step": 7414 }, { "epoch": 0.09635454313620069, "grad_norm": 0.4236850142478943, "learning_rate": 0.00018076658131766727, "loss": 1.5935, "step": 7415 }, { "epoch": 0.09636753768011656, "grad_norm": 0.4643765985965729, "learning_rate": 0.00018076398185575587, "loss": 1.4371, "step": 7416 }, { "epoch": 0.09638053222403244, "grad_norm": 0.3924958109855652, "learning_rate": 0.0001807613823938445, "loss": 1.4879, "step": 7417 }, { "epoch": 0.09639352676794831, "grad_norm": 0.3421379327774048, "learning_rate": 0.0001807587829319331, "loss": 1.5619, "step": 7418 }, { "epoch": 0.09640652131186418, "grad_norm": 0.4625434875488281, "learning_rate": 0.00018075618347002172, "loss": 1.3606, "step": 7419 }, { "epoch": 0.09641951585578006, "grad_norm": 0.5802971124649048, "learning_rate": 0.00018075358400811034, "loss": 1.4982, "step": 7420 }, { "epoch": 0.09643251039969593, "grad_norm": 0.3458288609981537, "learning_rate": 0.00018075098454619894, "loss": 1.562, "step": 7421 }, { "epoch": 0.0964455049436118, "grad_norm": 0.3565884530544281, "learning_rate": 0.00018074838508428756, "loss": 1.3796, "step": 7422 }, { "epoch": 0.09645849948752767, "grad_norm": 0.37184450030326843, "learning_rate": 0.0001807457856223762, "loss": 1.2933, "step": 7423 }, { "epoch": 0.09647149403144355, "grad_norm": 0.396337628364563, "learning_rate": 0.00018074318616046478, "loss": 1.5114, "step": 7424 }, { "epoch": 0.09648448857535942, "grad_norm": 0.4396108090877533, "learning_rate": 0.0001807405866985534, "loss": 1.6913, "step": 7425 }, { "epoch": 0.09649748311927529, "grad_norm": 0.3083972632884979, "learning_rate": 0.000180737987236642, "loss": 1.4721, "step": 7426 }, { "epoch": 0.09651047766319117, "grad_norm": 0.29246222972869873, "learning_rate": 0.00018073538777473066, "loss": 1.375, "step": 7427 }, { "epoch": 0.09652347220710704, "grad_norm": 0.5322912335395813, "learning_rate": 0.00018073278831281925, "loss": 1.63, "step": 7428 }, { "epoch": 0.09653646675102291, "grad_norm": 0.3530120551586151, "learning_rate": 0.00018073018885090788, "loss": 1.3668, "step": 7429 }, { "epoch": 0.09654946129493878, "grad_norm": 0.44654831290245056, "learning_rate": 0.00018072758938899648, "loss": 1.465, "step": 7430 }, { "epoch": 0.09656245583885466, "grad_norm": 0.35736167430877686, "learning_rate": 0.0001807249899270851, "loss": 1.2781, "step": 7431 }, { "epoch": 0.09657545038277053, "grad_norm": 0.5238146781921387, "learning_rate": 0.00018072239046517373, "loss": 1.4013, "step": 7432 }, { "epoch": 0.0965884449266864, "grad_norm": 0.35692641139030457, "learning_rate": 0.00018071979100326232, "loss": 1.3866, "step": 7433 }, { "epoch": 0.09660143947060228, "grad_norm": 0.4682738780975342, "learning_rate": 0.00018071719154135095, "loss": 1.3896, "step": 7434 }, { "epoch": 0.09661443401451815, "grad_norm": 0.45371633768081665, "learning_rate": 0.00018071459207943957, "loss": 1.587, "step": 7435 }, { "epoch": 0.09662742855843402, "grad_norm": 0.4439394772052765, "learning_rate": 0.00018071199261752817, "loss": 1.5158, "step": 7436 }, { "epoch": 0.0966404231023499, "grad_norm": 0.3876210153102875, "learning_rate": 0.0001807093931556168, "loss": 1.3619, "step": 7437 }, { "epoch": 0.09665341764626577, "grad_norm": 0.389203280210495, "learning_rate": 0.0001807067936937054, "loss": 1.4799, "step": 7438 }, { "epoch": 0.09666641219018164, "grad_norm": 0.44843536615371704, "learning_rate": 0.00018070419423179404, "loss": 1.6466, "step": 7439 }, { "epoch": 0.09667940673409751, "grad_norm": 0.5099455714225769, "learning_rate": 0.00018070159476988264, "loss": 1.511, "step": 7440 }, { "epoch": 0.09669240127801339, "grad_norm": 0.46425628662109375, "learning_rate": 0.00018069899530797126, "loss": 1.35, "step": 7441 }, { "epoch": 0.09670539582192927, "grad_norm": 0.35282662510871887, "learning_rate": 0.0001806963958460599, "loss": 1.3496, "step": 7442 }, { "epoch": 0.09671839036584515, "grad_norm": 0.4124460816383362, "learning_rate": 0.0001806937963841485, "loss": 1.4794, "step": 7443 }, { "epoch": 0.09673138490976102, "grad_norm": 0.3811986446380615, "learning_rate": 0.0001806911969222371, "loss": 1.4536, "step": 7444 }, { "epoch": 0.0967443794536769, "grad_norm": 0.5014336705207825, "learning_rate": 0.0001806885974603257, "loss": 1.5964, "step": 7445 }, { "epoch": 0.09675737399759277, "grad_norm": 0.38406094908714294, "learning_rate": 0.00018068599799841436, "loss": 1.3432, "step": 7446 }, { "epoch": 0.09677036854150864, "grad_norm": 0.50011146068573, "learning_rate": 0.00018068339853650296, "loss": 1.6317, "step": 7447 }, { "epoch": 0.09678336308542451, "grad_norm": 0.5149485468864441, "learning_rate": 0.00018068079907459155, "loss": 1.5929, "step": 7448 }, { "epoch": 0.09679635762934038, "grad_norm": 0.29857826232910156, "learning_rate": 0.00018067819961268018, "loss": 1.3656, "step": 7449 }, { "epoch": 0.09680935217325626, "grad_norm": 0.38642942905426025, "learning_rate": 0.0001806756001507688, "loss": 1.4969, "step": 7450 }, { "epoch": 0.09682234671717213, "grad_norm": 0.4397829473018646, "learning_rate": 0.00018067300068885743, "loss": 1.4019, "step": 7451 }, { "epoch": 0.096835341261088, "grad_norm": 0.3764725923538208, "learning_rate": 0.00018067040122694603, "loss": 1.297, "step": 7452 }, { "epoch": 0.09684833580500388, "grad_norm": 0.41457849740982056, "learning_rate": 0.00018066780176503465, "loss": 1.5439, "step": 7453 }, { "epoch": 0.09686133034891975, "grad_norm": 0.3851878345012665, "learning_rate": 0.00018066520230312327, "loss": 1.6111, "step": 7454 }, { "epoch": 0.09687432489283562, "grad_norm": 0.5073195099830627, "learning_rate": 0.00018066260284121187, "loss": 1.6866, "step": 7455 }, { "epoch": 0.0968873194367515, "grad_norm": 0.3910175561904907, "learning_rate": 0.0001806600033793005, "loss": 1.4723, "step": 7456 }, { "epoch": 0.09690031398066737, "grad_norm": 0.3264296054840088, "learning_rate": 0.0001806574039173891, "loss": 1.2507, "step": 7457 }, { "epoch": 0.09691330852458324, "grad_norm": 0.39491769671440125, "learning_rate": 0.00018065480445547775, "loss": 1.5712, "step": 7458 }, { "epoch": 0.09692630306849911, "grad_norm": 0.3605521023273468, "learning_rate": 0.00018065220499356634, "loss": 1.4412, "step": 7459 }, { "epoch": 0.09693929761241499, "grad_norm": 0.40367692708969116, "learning_rate": 0.00018064960553165494, "loss": 1.4422, "step": 7460 }, { "epoch": 0.09695229215633086, "grad_norm": 0.3287511467933655, "learning_rate": 0.00018064700606974356, "loss": 1.5895, "step": 7461 }, { "epoch": 0.09696528670024673, "grad_norm": 0.49243098497390747, "learning_rate": 0.0001806444066078322, "loss": 1.438, "step": 7462 }, { "epoch": 0.0969782812441626, "grad_norm": 0.4011189341545105, "learning_rate": 0.00018064180714592081, "loss": 1.3119, "step": 7463 }, { "epoch": 0.09699127578807848, "grad_norm": 0.3325624465942383, "learning_rate": 0.0001806392076840094, "loss": 1.3323, "step": 7464 }, { "epoch": 0.09700427033199435, "grad_norm": 0.4215577244758606, "learning_rate": 0.00018063660822209804, "loss": 1.3532, "step": 7465 }, { "epoch": 0.09701726487591023, "grad_norm": 0.3219201862812042, "learning_rate": 0.00018063400876018666, "loss": 1.3114, "step": 7466 }, { "epoch": 0.0970302594198261, "grad_norm": 0.481382817029953, "learning_rate": 0.00018063140929827526, "loss": 1.5181, "step": 7467 }, { "epoch": 0.09704325396374197, "grad_norm": 0.40177300572395325, "learning_rate": 0.00018062880983636388, "loss": 1.1998, "step": 7468 }, { "epoch": 0.09705624850765784, "grad_norm": 0.41246241331100464, "learning_rate": 0.00018062621037445248, "loss": 1.29, "step": 7469 }, { "epoch": 0.09706924305157372, "grad_norm": 0.37534165382385254, "learning_rate": 0.00018062361091254113, "loss": 1.2805, "step": 7470 }, { "epoch": 0.09708223759548959, "grad_norm": 0.3613832890987396, "learning_rate": 0.00018062101145062973, "loss": 1.5733, "step": 7471 }, { "epoch": 0.09709523213940546, "grad_norm": 0.43854978680610657, "learning_rate": 0.00018061841198871833, "loss": 1.3541, "step": 7472 }, { "epoch": 0.09710822668332134, "grad_norm": 0.40474042296409607, "learning_rate": 0.00018061581252680695, "loss": 1.3872, "step": 7473 }, { "epoch": 0.09712122122723721, "grad_norm": 0.37860623002052307, "learning_rate": 0.00018061321306489557, "loss": 1.4542, "step": 7474 }, { "epoch": 0.09713421577115308, "grad_norm": 0.42968985438346863, "learning_rate": 0.0001806106136029842, "loss": 1.4972, "step": 7475 }, { "epoch": 0.09714721031506895, "grad_norm": 0.32527509331703186, "learning_rate": 0.0001806080141410728, "loss": 1.4433, "step": 7476 }, { "epoch": 0.09716020485898483, "grad_norm": 0.49563100934028625, "learning_rate": 0.00018060541467916142, "loss": 1.4954, "step": 7477 }, { "epoch": 0.0971731994029007, "grad_norm": 0.4498002529144287, "learning_rate": 0.00018060281521725005, "loss": 1.415, "step": 7478 }, { "epoch": 0.09718619394681657, "grad_norm": 0.41119900345802307, "learning_rate": 0.00018060021575533864, "loss": 1.4771, "step": 7479 }, { "epoch": 0.09719918849073246, "grad_norm": 0.3612847924232483, "learning_rate": 0.00018059761629342727, "loss": 1.4699, "step": 7480 }, { "epoch": 0.09721218303464833, "grad_norm": 0.38155508041381836, "learning_rate": 0.0001805950168315159, "loss": 1.2713, "step": 7481 }, { "epoch": 0.0972251775785642, "grad_norm": 0.40645986795425415, "learning_rate": 0.00018059241736960452, "loss": 1.5713, "step": 7482 }, { "epoch": 0.09723817212248008, "grad_norm": 0.42810964584350586, "learning_rate": 0.0001805898179076931, "loss": 1.458, "step": 7483 }, { "epoch": 0.09725116666639595, "grad_norm": 0.4785873293876648, "learning_rate": 0.00018058721844578174, "loss": 1.4083, "step": 7484 }, { "epoch": 0.09726416121031183, "grad_norm": 0.3351441025733948, "learning_rate": 0.00018058461898387036, "loss": 1.4183, "step": 7485 }, { "epoch": 0.0972771557542277, "grad_norm": 0.3357161581516266, "learning_rate": 0.00018058201952195896, "loss": 1.3946, "step": 7486 }, { "epoch": 0.09729015029814357, "grad_norm": 0.3906060755252838, "learning_rate": 0.00018057942006004758, "loss": 1.4933, "step": 7487 }, { "epoch": 0.09730314484205944, "grad_norm": 0.3968433439731598, "learning_rate": 0.00018057682059813618, "loss": 1.4347, "step": 7488 }, { "epoch": 0.09731613938597532, "grad_norm": 0.3303782343864441, "learning_rate": 0.0001805742211362248, "loss": 1.2685, "step": 7489 }, { "epoch": 0.09732913392989119, "grad_norm": 0.3775410056114197, "learning_rate": 0.00018057162167431343, "loss": 1.3588, "step": 7490 }, { "epoch": 0.09734212847380706, "grad_norm": 0.3607504665851593, "learning_rate": 0.00018056902221240203, "loss": 1.4946, "step": 7491 }, { "epoch": 0.09735512301772294, "grad_norm": 0.41206371784210205, "learning_rate": 0.00018056642275049065, "loss": 1.4833, "step": 7492 }, { "epoch": 0.09736811756163881, "grad_norm": 0.37164849042892456, "learning_rate": 0.00018056382328857928, "loss": 1.6235, "step": 7493 }, { "epoch": 0.09738111210555468, "grad_norm": 0.4014964997768402, "learning_rate": 0.0001805612238266679, "loss": 1.3329, "step": 7494 }, { "epoch": 0.09739410664947055, "grad_norm": 0.3379157781600952, "learning_rate": 0.0001805586243647565, "loss": 1.3853, "step": 7495 }, { "epoch": 0.09740710119338643, "grad_norm": 0.37434065341949463, "learning_rate": 0.00018055602490284512, "loss": 1.3878, "step": 7496 }, { "epoch": 0.0974200957373023, "grad_norm": 0.5373088121414185, "learning_rate": 0.00018055342544093375, "loss": 1.5024, "step": 7497 }, { "epoch": 0.09743309028121817, "grad_norm": 0.3984873592853546, "learning_rate": 0.00018055082597902235, "loss": 1.5179, "step": 7498 }, { "epoch": 0.09744608482513405, "grad_norm": 0.4356929659843445, "learning_rate": 0.00018054822651711097, "loss": 1.5313, "step": 7499 }, { "epoch": 0.09745907936904992, "grad_norm": 0.39235201478004456, "learning_rate": 0.00018054562705519957, "loss": 1.4062, "step": 7500 }, { "epoch": 0.09747207391296579, "grad_norm": 0.38887616991996765, "learning_rate": 0.00018054302759328822, "loss": 1.625, "step": 7501 }, { "epoch": 0.09748506845688167, "grad_norm": 0.3356170654296875, "learning_rate": 0.00018054042813137682, "loss": 1.4263, "step": 7502 }, { "epoch": 0.09749806300079754, "grad_norm": 0.4639907777309418, "learning_rate": 0.0001805378286694654, "loss": 1.4609, "step": 7503 }, { "epoch": 0.09751105754471341, "grad_norm": 0.39358067512512207, "learning_rate": 0.00018053522920755404, "loss": 1.411, "step": 7504 }, { "epoch": 0.09752405208862928, "grad_norm": 0.40222474932670593, "learning_rate": 0.00018053262974564266, "loss": 1.3899, "step": 7505 }, { "epoch": 0.09753704663254516, "grad_norm": 0.4041571319103241, "learning_rate": 0.0001805300302837313, "loss": 1.5465, "step": 7506 }, { "epoch": 0.09755004117646103, "grad_norm": 0.3240668475627899, "learning_rate": 0.00018052743082181988, "loss": 1.1432, "step": 7507 }, { "epoch": 0.0975630357203769, "grad_norm": 0.3331120014190674, "learning_rate": 0.0001805248313599085, "loss": 1.5669, "step": 7508 }, { "epoch": 0.09757603026429278, "grad_norm": 0.5190784931182861, "learning_rate": 0.00018052223189799713, "loss": 1.538, "step": 7509 }, { "epoch": 0.09758902480820865, "grad_norm": 0.34999462962150574, "learning_rate": 0.00018051963243608573, "loss": 1.1415, "step": 7510 }, { "epoch": 0.09760201935212452, "grad_norm": 0.4424222409725189, "learning_rate": 0.00018051703297417436, "loss": 1.3399, "step": 7511 }, { "epoch": 0.0976150138960404, "grad_norm": 0.40078315138816833, "learning_rate": 0.00018051443351226295, "loss": 1.5825, "step": 7512 }, { "epoch": 0.09762800843995627, "grad_norm": 0.43246933817863464, "learning_rate": 0.0001805118340503516, "loss": 1.4266, "step": 7513 }, { "epoch": 0.09764100298387214, "grad_norm": 0.3761407136917114, "learning_rate": 0.0001805092345884402, "loss": 1.4836, "step": 7514 }, { "epoch": 0.09765399752778801, "grad_norm": 0.3609583377838135, "learning_rate": 0.0001805066351265288, "loss": 1.3117, "step": 7515 }, { "epoch": 0.09766699207170389, "grad_norm": 0.47262704372406006, "learning_rate": 0.00018050403566461745, "loss": 1.4304, "step": 7516 }, { "epoch": 0.09767998661561976, "grad_norm": 0.3673385977745056, "learning_rate": 0.00018050143620270605, "loss": 1.4625, "step": 7517 }, { "epoch": 0.09769298115953565, "grad_norm": 0.42160317301750183, "learning_rate": 0.00018049883674079467, "loss": 1.3847, "step": 7518 }, { "epoch": 0.09770597570345152, "grad_norm": 0.3719385266304016, "learning_rate": 0.00018049623727888327, "loss": 1.3846, "step": 7519 }, { "epoch": 0.09771897024736739, "grad_norm": 0.39151620864868164, "learning_rate": 0.0001804936378169719, "loss": 1.3828, "step": 7520 }, { "epoch": 0.09773196479128327, "grad_norm": 0.2977345585823059, "learning_rate": 0.00018049103835506052, "loss": 1.2172, "step": 7521 }, { "epoch": 0.09774495933519914, "grad_norm": 0.4201314449310303, "learning_rate": 0.00018048843889314912, "loss": 1.3374, "step": 7522 }, { "epoch": 0.09775795387911501, "grad_norm": 0.38257303833961487, "learning_rate": 0.00018048583943123774, "loss": 1.549, "step": 7523 }, { "epoch": 0.09777094842303088, "grad_norm": 0.4054087698459625, "learning_rate": 0.00018048323996932637, "loss": 1.4241, "step": 7524 }, { "epoch": 0.09778394296694676, "grad_norm": 0.360950231552124, "learning_rate": 0.000180480640507415, "loss": 1.2819, "step": 7525 }, { "epoch": 0.09779693751086263, "grad_norm": 0.35810574889183044, "learning_rate": 0.0001804780410455036, "loss": 1.3003, "step": 7526 }, { "epoch": 0.0978099320547785, "grad_norm": 0.3271980285644531, "learning_rate": 0.00018047544158359218, "loss": 1.4801, "step": 7527 }, { "epoch": 0.09782292659869438, "grad_norm": 0.47151824831962585, "learning_rate": 0.00018047284212168084, "loss": 1.5772, "step": 7528 }, { "epoch": 0.09783592114261025, "grad_norm": 0.28331610560417175, "learning_rate": 0.00018047024265976943, "loss": 1.4071, "step": 7529 }, { "epoch": 0.09784891568652612, "grad_norm": 0.3555101752281189, "learning_rate": 0.00018046764319785806, "loss": 1.4646, "step": 7530 }, { "epoch": 0.097861910230442, "grad_norm": 0.3308420181274414, "learning_rate": 0.00018046504373594666, "loss": 1.3502, "step": 7531 }, { "epoch": 0.09787490477435787, "grad_norm": 0.2997888922691345, "learning_rate": 0.00018046244427403528, "loss": 1.2462, "step": 7532 }, { "epoch": 0.09788789931827374, "grad_norm": 0.35908517241477966, "learning_rate": 0.0001804598448121239, "loss": 1.3868, "step": 7533 }, { "epoch": 0.09790089386218961, "grad_norm": 0.35377171635627747, "learning_rate": 0.0001804572453502125, "loss": 1.4193, "step": 7534 }, { "epoch": 0.09791388840610549, "grad_norm": 0.39672043919563293, "learning_rate": 0.00018045464588830113, "loss": 1.2546, "step": 7535 }, { "epoch": 0.09792688295002136, "grad_norm": 0.3332906663417816, "learning_rate": 0.00018045204642638975, "loss": 1.2704, "step": 7536 }, { "epoch": 0.09793987749393723, "grad_norm": 0.3438867926597595, "learning_rate": 0.00018044944696447837, "loss": 1.4203, "step": 7537 }, { "epoch": 0.0979528720378531, "grad_norm": 0.31091347336769104, "learning_rate": 0.00018044684750256697, "loss": 1.2197, "step": 7538 }, { "epoch": 0.09796586658176898, "grad_norm": 0.42031732201576233, "learning_rate": 0.0001804442480406556, "loss": 1.4666, "step": 7539 }, { "epoch": 0.09797886112568485, "grad_norm": 0.3627092242240906, "learning_rate": 0.00018044164857874422, "loss": 1.4846, "step": 7540 }, { "epoch": 0.09799185566960072, "grad_norm": 0.36253103613853455, "learning_rate": 0.00018043904911683282, "loss": 1.5323, "step": 7541 }, { "epoch": 0.0980048502135166, "grad_norm": 0.3460425138473511, "learning_rate": 0.00018043644965492144, "loss": 1.5784, "step": 7542 }, { "epoch": 0.09801784475743247, "grad_norm": 0.4711621105670929, "learning_rate": 0.00018043385019301004, "loss": 1.4127, "step": 7543 }, { "epoch": 0.09803083930134834, "grad_norm": 0.41815438866615295, "learning_rate": 0.00018043125073109866, "loss": 1.4559, "step": 7544 }, { "epoch": 0.09804383384526422, "grad_norm": 0.44216975569725037, "learning_rate": 0.0001804286512691873, "loss": 1.4986, "step": 7545 }, { "epoch": 0.09805682838918009, "grad_norm": 0.3501591980457306, "learning_rate": 0.0001804260518072759, "loss": 1.5426, "step": 7546 }, { "epoch": 0.09806982293309596, "grad_norm": 0.4122762084007263, "learning_rate": 0.0001804234523453645, "loss": 1.5177, "step": 7547 }, { "epoch": 0.09808281747701184, "grad_norm": 0.3824808597564697, "learning_rate": 0.00018042085288345314, "loss": 1.6754, "step": 7548 }, { "epoch": 0.09809581202092771, "grad_norm": 0.4453409016132355, "learning_rate": 0.00018041825342154176, "loss": 1.4781, "step": 7549 }, { "epoch": 0.09810880656484358, "grad_norm": 0.39347773790359497, "learning_rate": 0.00018041565395963036, "loss": 1.433, "step": 7550 }, { "epoch": 0.09812180110875945, "grad_norm": 0.3569260537624359, "learning_rate": 0.00018041305449771898, "loss": 1.2342, "step": 7551 }, { "epoch": 0.09813479565267533, "grad_norm": 0.3456551134586334, "learning_rate": 0.0001804104550358076, "loss": 1.5651, "step": 7552 }, { "epoch": 0.0981477901965912, "grad_norm": 0.3950441777706146, "learning_rate": 0.0001804078555738962, "loss": 1.3125, "step": 7553 }, { "epoch": 0.09816078474050707, "grad_norm": 0.5004082918167114, "learning_rate": 0.00018040525611198483, "loss": 1.6303, "step": 7554 }, { "epoch": 0.09817377928442295, "grad_norm": 0.3228236436843872, "learning_rate": 0.00018040265665007345, "loss": 1.2768, "step": 7555 }, { "epoch": 0.09818677382833883, "grad_norm": 0.21827465295791626, "learning_rate": 0.00018040005718816205, "loss": 1.3292, "step": 7556 }, { "epoch": 0.0981997683722547, "grad_norm": 0.32572606205940247, "learning_rate": 0.00018039745772625067, "loss": 1.5443, "step": 7557 }, { "epoch": 0.09821276291617058, "grad_norm": 0.446177214384079, "learning_rate": 0.00018039485826433927, "loss": 1.2286, "step": 7558 }, { "epoch": 0.09822575746008645, "grad_norm": 0.35685598850250244, "learning_rate": 0.00018039225880242792, "loss": 1.355, "step": 7559 }, { "epoch": 0.09823875200400232, "grad_norm": 0.44513094425201416, "learning_rate": 0.00018038965934051652, "loss": 1.5031, "step": 7560 }, { "epoch": 0.0982517465479182, "grad_norm": 0.47552233934402466, "learning_rate": 0.00018038705987860515, "loss": 1.3778, "step": 7561 }, { "epoch": 0.09826474109183407, "grad_norm": 0.4690370559692383, "learning_rate": 0.00018038446041669374, "loss": 1.5849, "step": 7562 }, { "epoch": 0.09827773563574994, "grad_norm": 0.35861924290657043, "learning_rate": 0.00018038186095478237, "loss": 1.5092, "step": 7563 }, { "epoch": 0.09829073017966582, "grad_norm": 0.35612034797668457, "learning_rate": 0.000180379261492871, "loss": 1.2118, "step": 7564 }, { "epoch": 0.09830372472358169, "grad_norm": 0.4245584309101105, "learning_rate": 0.0001803766620309596, "loss": 1.7199, "step": 7565 }, { "epoch": 0.09831671926749756, "grad_norm": 0.2901960015296936, "learning_rate": 0.00018037406256904821, "loss": 1.3535, "step": 7566 }, { "epoch": 0.09832971381141344, "grad_norm": 0.4192603826522827, "learning_rate": 0.00018037146310713684, "loss": 1.4169, "step": 7567 }, { "epoch": 0.09834270835532931, "grad_norm": 0.44621896743774414, "learning_rate": 0.00018036886364522546, "loss": 1.4539, "step": 7568 }, { "epoch": 0.09835570289924518, "grad_norm": 0.34592095017433167, "learning_rate": 0.00018036626418331406, "loss": 1.4665, "step": 7569 }, { "epoch": 0.09836869744316105, "grad_norm": 0.32394349575042725, "learning_rate": 0.00018036366472140266, "loss": 1.4129, "step": 7570 }, { "epoch": 0.09838169198707693, "grad_norm": 0.43621963262557983, "learning_rate": 0.0001803610652594913, "loss": 1.5761, "step": 7571 }, { "epoch": 0.0983946865309928, "grad_norm": 0.3277306854724884, "learning_rate": 0.0001803584657975799, "loss": 1.6591, "step": 7572 }, { "epoch": 0.09840768107490867, "grad_norm": 0.37760117650032043, "learning_rate": 0.00018035586633566853, "loss": 1.5335, "step": 7573 }, { "epoch": 0.09842067561882455, "grad_norm": 0.3746792674064636, "learning_rate": 0.00018035326687375713, "loss": 1.5167, "step": 7574 }, { "epoch": 0.09843367016274042, "grad_norm": 0.38139402866363525, "learning_rate": 0.00018035066741184575, "loss": 1.4593, "step": 7575 }, { "epoch": 0.09844666470665629, "grad_norm": 0.3655568063259125, "learning_rate": 0.00018034806794993438, "loss": 1.4886, "step": 7576 }, { "epoch": 0.09845965925057217, "grad_norm": 0.37853625416755676, "learning_rate": 0.00018034546848802297, "loss": 1.3427, "step": 7577 }, { "epoch": 0.09847265379448804, "grad_norm": 0.390889048576355, "learning_rate": 0.0001803428690261116, "loss": 1.6101, "step": 7578 }, { "epoch": 0.09848564833840391, "grad_norm": 0.9414922595024109, "learning_rate": 0.00018034026956420022, "loss": 1.4874, "step": 7579 }, { "epoch": 0.09849864288231978, "grad_norm": 0.42668992280960083, "learning_rate": 0.00018033767010228885, "loss": 1.4178, "step": 7580 }, { "epoch": 0.09851163742623566, "grad_norm": 0.3904257118701935, "learning_rate": 0.00018033507064037745, "loss": 1.371, "step": 7581 }, { "epoch": 0.09852463197015153, "grad_norm": 0.4323786497116089, "learning_rate": 0.00018033247117846604, "loss": 1.4144, "step": 7582 }, { "epoch": 0.0985376265140674, "grad_norm": 0.40542373061180115, "learning_rate": 0.0001803298717165547, "loss": 1.4599, "step": 7583 }, { "epoch": 0.09855062105798328, "grad_norm": 0.37801429629325867, "learning_rate": 0.0001803272722546433, "loss": 1.3522, "step": 7584 }, { "epoch": 0.09856361560189915, "grad_norm": 0.35894036293029785, "learning_rate": 0.00018032467279273192, "loss": 1.385, "step": 7585 }, { "epoch": 0.09857661014581502, "grad_norm": 0.44921109080314636, "learning_rate": 0.00018032207333082051, "loss": 1.4985, "step": 7586 }, { "epoch": 0.0985896046897309, "grad_norm": 0.4041857421398163, "learning_rate": 0.00018031947386890914, "loss": 1.5342, "step": 7587 }, { "epoch": 0.09860259923364677, "grad_norm": 0.3700587749481201, "learning_rate": 0.00018031687440699776, "loss": 1.3321, "step": 7588 }, { "epoch": 0.09861559377756264, "grad_norm": 0.3191107213497162, "learning_rate": 0.00018031427494508636, "loss": 1.3033, "step": 7589 }, { "epoch": 0.09862858832147851, "grad_norm": 0.2953815162181854, "learning_rate": 0.000180311675483175, "loss": 1.2716, "step": 7590 }, { "epoch": 0.09864158286539439, "grad_norm": 0.44157710671424866, "learning_rate": 0.0001803090760212636, "loss": 1.6003, "step": 7591 }, { "epoch": 0.09865457740931026, "grad_norm": 0.35140421986579895, "learning_rate": 0.00018030647655935223, "loss": 1.4758, "step": 7592 }, { "epoch": 0.09866757195322613, "grad_norm": 0.4715821444988251, "learning_rate": 0.00018030387709744083, "loss": 1.4399, "step": 7593 }, { "epoch": 0.09868056649714202, "grad_norm": 0.3662639260292053, "learning_rate": 0.00018030127763552946, "loss": 1.4089, "step": 7594 }, { "epoch": 0.09869356104105789, "grad_norm": 0.4767250418663025, "learning_rate": 0.00018029867817361808, "loss": 1.3589, "step": 7595 }, { "epoch": 0.09870655558497377, "grad_norm": 0.49728071689605713, "learning_rate": 0.00018029607871170668, "loss": 1.4808, "step": 7596 }, { "epoch": 0.09871955012888964, "grad_norm": 0.32903313636779785, "learning_rate": 0.0001802934792497953, "loss": 1.2998, "step": 7597 }, { "epoch": 0.09873254467280551, "grad_norm": 0.34764564037323, "learning_rate": 0.00018029087978788393, "loss": 1.4103, "step": 7598 }, { "epoch": 0.09874553921672138, "grad_norm": 0.40784770250320435, "learning_rate": 0.00018028828032597252, "loss": 1.5499, "step": 7599 }, { "epoch": 0.09875853376063726, "grad_norm": 0.3321189284324646, "learning_rate": 0.00018028568086406115, "loss": 1.3849, "step": 7600 }, { "epoch": 0.09877152830455313, "grad_norm": 0.43755555152893066, "learning_rate": 0.00018028308140214975, "loss": 1.2838, "step": 7601 }, { "epoch": 0.098784522848469, "grad_norm": 0.3918802738189697, "learning_rate": 0.0001802804819402384, "loss": 1.4765, "step": 7602 }, { "epoch": 0.09879751739238488, "grad_norm": 0.4191012382507324, "learning_rate": 0.000180277882478327, "loss": 1.4286, "step": 7603 }, { "epoch": 0.09881051193630075, "grad_norm": 0.5088168978691101, "learning_rate": 0.00018027528301641562, "loss": 1.6129, "step": 7604 }, { "epoch": 0.09882350648021662, "grad_norm": 0.35823875665664673, "learning_rate": 0.00018027268355450422, "loss": 1.5607, "step": 7605 }, { "epoch": 0.0988365010241325, "grad_norm": 0.39868295192718506, "learning_rate": 0.00018027008409259284, "loss": 1.4932, "step": 7606 }, { "epoch": 0.09884949556804837, "grad_norm": 0.35696572065353394, "learning_rate": 0.00018026748463068147, "loss": 1.5195, "step": 7607 }, { "epoch": 0.09886249011196424, "grad_norm": 0.4137752652168274, "learning_rate": 0.00018026488516877006, "loss": 1.6511, "step": 7608 }, { "epoch": 0.09887548465588011, "grad_norm": 0.37321752309799194, "learning_rate": 0.0001802622857068587, "loss": 1.3618, "step": 7609 }, { "epoch": 0.09888847919979599, "grad_norm": 0.49691078066825867, "learning_rate": 0.0001802596862449473, "loss": 1.5592, "step": 7610 }, { "epoch": 0.09890147374371186, "grad_norm": 0.4516826868057251, "learning_rate": 0.0001802570867830359, "loss": 1.4276, "step": 7611 }, { "epoch": 0.09891446828762773, "grad_norm": 0.3362126648426056, "learning_rate": 0.00018025448732112453, "loss": 1.4064, "step": 7612 }, { "epoch": 0.0989274628315436, "grad_norm": 0.34383925795555115, "learning_rate": 0.00018025188785921313, "loss": 1.3941, "step": 7613 }, { "epoch": 0.09894045737545948, "grad_norm": 0.3464807868003845, "learning_rate": 0.00018024928839730178, "loss": 1.1543, "step": 7614 }, { "epoch": 0.09895345191937535, "grad_norm": 0.39652219414711, "learning_rate": 0.00018024668893539038, "loss": 1.3663, "step": 7615 }, { "epoch": 0.09896644646329122, "grad_norm": 0.4803014397621155, "learning_rate": 0.000180244089473479, "loss": 1.4493, "step": 7616 }, { "epoch": 0.0989794410072071, "grad_norm": 0.37208932638168335, "learning_rate": 0.0001802414900115676, "loss": 1.5482, "step": 7617 }, { "epoch": 0.09899243555112297, "grad_norm": 0.34949222207069397, "learning_rate": 0.00018023889054965623, "loss": 1.5523, "step": 7618 }, { "epoch": 0.09900543009503884, "grad_norm": 0.3843694031238556, "learning_rate": 0.00018023629108774485, "loss": 1.5452, "step": 7619 }, { "epoch": 0.09901842463895472, "grad_norm": 0.4375974237918854, "learning_rate": 0.00018023369162583345, "loss": 1.4226, "step": 7620 }, { "epoch": 0.09903141918287059, "grad_norm": 0.42773744463920593, "learning_rate": 0.00018023109216392207, "loss": 1.4304, "step": 7621 }, { "epoch": 0.09904441372678646, "grad_norm": 0.42004647850990295, "learning_rate": 0.0001802284927020107, "loss": 1.4345, "step": 7622 }, { "epoch": 0.09905740827070234, "grad_norm": 0.6433254480361938, "learning_rate": 0.00018022589324009932, "loss": 1.3391, "step": 7623 }, { "epoch": 0.09907040281461821, "grad_norm": 0.358011394739151, "learning_rate": 0.00018022329377818792, "loss": 1.4138, "step": 7624 }, { "epoch": 0.09908339735853408, "grad_norm": 0.4215996265411377, "learning_rate": 0.00018022069431627654, "loss": 1.2198, "step": 7625 }, { "epoch": 0.09909639190244995, "grad_norm": 0.33352354168891907, "learning_rate": 0.00018021809485436517, "loss": 1.3744, "step": 7626 }, { "epoch": 0.09910938644636583, "grad_norm": 0.4371497333049774, "learning_rate": 0.00018021549539245377, "loss": 1.4866, "step": 7627 }, { "epoch": 0.0991223809902817, "grad_norm": 0.2593614459037781, "learning_rate": 0.0001802128959305424, "loss": 1.3998, "step": 7628 }, { "epoch": 0.09913537553419757, "grad_norm": 0.42470139265060425, "learning_rate": 0.00018021029646863101, "loss": 1.5663, "step": 7629 }, { "epoch": 0.09914837007811345, "grad_norm": 0.4210742115974426, "learning_rate": 0.0001802076970067196, "loss": 1.5433, "step": 7630 }, { "epoch": 0.09916136462202932, "grad_norm": 0.27131569385528564, "learning_rate": 0.00018020509754480824, "loss": 1.2315, "step": 7631 }, { "epoch": 0.0991743591659452, "grad_norm": 0.44708698987960815, "learning_rate": 0.00018020249808289683, "loss": 1.3469, "step": 7632 }, { "epoch": 0.09918735370986108, "grad_norm": 0.42890846729278564, "learning_rate": 0.00018019989862098549, "loss": 1.4295, "step": 7633 }, { "epoch": 0.09920034825377695, "grad_norm": 0.45680341124534607, "learning_rate": 0.00018019729915907408, "loss": 1.6792, "step": 7634 }, { "epoch": 0.09921334279769282, "grad_norm": 0.3280755281448364, "learning_rate": 0.0001801946996971627, "loss": 1.3708, "step": 7635 }, { "epoch": 0.0992263373416087, "grad_norm": 0.328364759683609, "learning_rate": 0.0001801921002352513, "loss": 1.3109, "step": 7636 }, { "epoch": 0.09923933188552457, "grad_norm": 0.46605971455574036, "learning_rate": 0.00018018950077333993, "loss": 1.4123, "step": 7637 }, { "epoch": 0.09925232642944044, "grad_norm": 0.5017272233963013, "learning_rate": 0.00018018690131142855, "loss": 1.4994, "step": 7638 }, { "epoch": 0.09926532097335632, "grad_norm": 0.43509921431541443, "learning_rate": 0.00018018430184951715, "loss": 1.5142, "step": 7639 }, { "epoch": 0.09927831551727219, "grad_norm": 0.29439496994018555, "learning_rate": 0.00018018170238760578, "loss": 1.5676, "step": 7640 }, { "epoch": 0.09929131006118806, "grad_norm": 0.44283726811408997, "learning_rate": 0.0001801791029256944, "loss": 1.3522, "step": 7641 }, { "epoch": 0.09930430460510394, "grad_norm": 0.3328530192375183, "learning_rate": 0.000180176503463783, "loss": 1.5117, "step": 7642 }, { "epoch": 0.09931729914901981, "grad_norm": 0.4323212504386902, "learning_rate": 0.00018017390400187162, "loss": 1.3786, "step": 7643 }, { "epoch": 0.09933029369293568, "grad_norm": 0.3416399657726288, "learning_rate": 0.00018017130453996022, "loss": 1.4117, "step": 7644 }, { "epoch": 0.09934328823685155, "grad_norm": 0.42340588569641113, "learning_rate": 0.00018016870507804887, "loss": 1.3224, "step": 7645 }, { "epoch": 0.09935628278076743, "grad_norm": 0.3885568082332611, "learning_rate": 0.00018016610561613747, "loss": 1.3456, "step": 7646 }, { "epoch": 0.0993692773246833, "grad_norm": 0.41005587577819824, "learning_rate": 0.0001801635061542261, "loss": 1.4821, "step": 7647 }, { "epoch": 0.09938227186859917, "grad_norm": 0.4532742500305176, "learning_rate": 0.0001801609066923147, "loss": 1.4531, "step": 7648 }, { "epoch": 0.09939526641251505, "grad_norm": 0.48052161931991577, "learning_rate": 0.00018015830723040331, "loss": 1.4078, "step": 7649 }, { "epoch": 0.09940826095643092, "grad_norm": 0.31460922956466675, "learning_rate": 0.00018015570776849194, "loss": 1.3792, "step": 7650 }, { "epoch": 0.09942125550034679, "grad_norm": 0.37324583530426025, "learning_rate": 0.00018015310830658054, "loss": 1.2037, "step": 7651 }, { "epoch": 0.09943425004426266, "grad_norm": 0.5292695760726929, "learning_rate": 0.00018015050884466916, "loss": 1.4458, "step": 7652 }, { "epoch": 0.09944724458817854, "grad_norm": 0.3979688286781311, "learning_rate": 0.00018014790938275779, "loss": 1.4226, "step": 7653 }, { "epoch": 0.09946023913209441, "grad_norm": 0.3596692681312561, "learning_rate": 0.00018014530992084638, "loss": 1.266, "step": 7654 }, { "epoch": 0.09947323367601028, "grad_norm": 0.35680097341537476, "learning_rate": 0.000180142710458935, "loss": 1.561, "step": 7655 }, { "epoch": 0.09948622821992616, "grad_norm": 0.366475373506546, "learning_rate": 0.0001801401109970236, "loss": 1.4271, "step": 7656 }, { "epoch": 0.09949922276384203, "grad_norm": 0.3848341703414917, "learning_rate": 0.00018013751153511226, "loss": 1.3237, "step": 7657 }, { "epoch": 0.0995122173077579, "grad_norm": 0.3629254698753357, "learning_rate": 0.00018013491207320085, "loss": 1.5101, "step": 7658 }, { "epoch": 0.09952521185167378, "grad_norm": 0.41521772742271423, "learning_rate": 0.00018013231261128948, "loss": 1.3345, "step": 7659 }, { "epoch": 0.09953820639558965, "grad_norm": 0.4822794497013092, "learning_rate": 0.00018012971314937808, "loss": 1.5778, "step": 7660 }, { "epoch": 0.09955120093950552, "grad_norm": 0.3347254693508148, "learning_rate": 0.0001801271136874667, "loss": 1.5753, "step": 7661 }, { "epoch": 0.0995641954834214, "grad_norm": 0.5010448694229126, "learning_rate": 0.00018012451422555532, "loss": 1.5201, "step": 7662 }, { "epoch": 0.09957719002733727, "grad_norm": 0.40447068214416504, "learning_rate": 0.00018012191476364392, "loss": 1.5344, "step": 7663 }, { "epoch": 0.09959018457125314, "grad_norm": 0.4594039022922516, "learning_rate": 0.00018011931530173257, "loss": 1.6741, "step": 7664 }, { "epoch": 0.09960317911516901, "grad_norm": 0.3824000358581543, "learning_rate": 0.00018011671583982117, "loss": 1.3474, "step": 7665 }, { "epoch": 0.09961617365908489, "grad_norm": 0.33877086639404297, "learning_rate": 0.00018011411637790977, "loss": 1.3428, "step": 7666 }, { "epoch": 0.09962916820300076, "grad_norm": 0.3358142673969269, "learning_rate": 0.0001801115169159984, "loss": 1.395, "step": 7667 }, { "epoch": 0.09964216274691663, "grad_norm": 0.46106573939323425, "learning_rate": 0.00018010891745408702, "loss": 1.4206, "step": 7668 }, { "epoch": 0.0996551572908325, "grad_norm": 0.3157424330711365, "learning_rate": 0.00018010631799217564, "loss": 1.4562, "step": 7669 }, { "epoch": 0.09966815183474839, "grad_norm": 0.3862575888633728, "learning_rate": 0.00018010371853026424, "loss": 1.4042, "step": 7670 }, { "epoch": 0.09968114637866426, "grad_norm": 0.48718324303627014, "learning_rate": 0.00018010111906835286, "loss": 1.4858, "step": 7671 }, { "epoch": 0.09969414092258014, "grad_norm": 0.37498462200164795, "learning_rate": 0.0001800985196064415, "loss": 1.6164, "step": 7672 }, { "epoch": 0.09970713546649601, "grad_norm": 0.42233625054359436, "learning_rate": 0.00018009592014453008, "loss": 1.5249, "step": 7673 }, { "epoch": 0.09972013001041188, "grad_norm": 0.3537615239620209, "learning_rate": 0.0001800933206826187, "loss": 1.4671, "step": 7674 }, { "epoch": 0.09973312455432776, "grad_norm": 0.34899771213531494, "learning_rate": 0.0001800907212207073, "loss": 1.4183, "step": 7675 }, { "epoch": 0.09974611909824363, "grad_norm": 0.5116575360298157, "learning_rate": 0.00018008812175879596, "loss": 1.313, "step": 7676 }, { "epoch": 0.0997591136421595, "grad_norm": 0.3554348051548004, "learning_rate": 0.00018008552229688456, "loss": 1.3488, "step": 7677 }, { "epoch": 0.09977210818607538, "grad_norm": 0.4460490345954895, "learning_rate": 0.00018008292283497315, "loss": 1.3481, "step": 7678 }, { "epoch": 0.09978510272999125, "grad_norm": 0.3297146260738373, "learning_rate": 0.00018008032337306178, "loss": 1.4292, "step": 7679 }, { "epoch": 0.09979809727390712, "grad_norm": 0.3345957100391388, "learning_rate": 0.0001800777239111504, "loss": 1.4054, "step": 7680 }, { "epoch": 0.099811091817823, "grad_norm": 0.4210030436515808, "learning_rate": 0.00018007512444923903, "loss": 1.4803, "step": 7681 }, { "epoch": 0.09982408636173887, "grad_norm": 0.46942102909088135, "learning_rate": 0.00018007252498732762, "loss": 1.4172, "step": 7682 }, { "epoch": 0.09983708090565474, "grad_norm": 0.4845230281352997, "learning_rate": 0.00018006992552541625, "loss": 1.497, "step": 7683 }, { "epoch": 0.09985007544957061, "grad_norm": 0.35429152846336365, "learning_rate": 0.00018006732606350487, "loss": 1.2605, "step": 7684 }, { "epoch": 0.09986306999348649, "grad_norm": 0.4821487069129944, "learning_rate": 0.00018006472660159347, "loss": 1.4393, "step": 7685 }, { "epoch": 0.09987606453740236, "grad_norm": 0.5426856279373169, "learning_rate": 0.0001800621271396821, "loss": 1.2874, "step": 7686 }, { "epoch": 0.09988905908131823, "grad_norm": 0.35171741247177124, "learning_rate": 0.0001800595276777707, "loss": 1.3084, "step": 7687 }, { "epoch": 0.0999020536252341, "grad_norm": 0.40299689769744873, "learning_rate": 0.00018005692821585934, "loss": 1.3285, "step": 7688 }, { "epoch": 0.09991504816914998, "grad_norm": 0.4185236990451813, "learning_rate": 0.00018005432875394794, "loss": 1.4843, "step": 7689 }, { "epoch": 0.09992804271306585, "grad_norm": 0.3998733162879944, "learning_rate": 0.00018005172929203657, "loss": 1.5653, "step": 7690 }, { "epoch": 0.09994103725698172, "grad_norm": 0.40453624725341797, "learning_rate": 0.00018004912983012516, "loss": 1.1748, "step": 7691 }, { "epoch": 0.0999540318008976, "grad_norm": 0.41360390186309814, "learning_rate": 0.0001800465303682138, "loss": 1.3347, "step": 7692 }, { "epoch": 0.09996702634481347, "grad_norm": 0.4500105082988739, "learning_rate": 0.0001800439309063024, "loss": 1.6253, "step": 7693 }, { "epoch": 0.09998002088872934, "grad_norm": 0.40081730484962463, "learning_rate": 0.000180041331444391, "loss": 1.4126, "step": 7694 }, { "epoch": 0.09999301543264522, "grad_norm": 0.5991983413696289, "learning_rate": 0.00018003873198247963, "loss": 1.4585, "step": 7695 }, { "epoch": 0.10000600997656109, "grad_norm": 0.40953150391578674, "learning_rate": 0.00018003613252056826, "loss": 1.4681, "step": 7696 }, { "epoch": 0.10001900452047696, "grad_norm": 0.4214688241481781, "learning_rate": 0.00018003353305865686, "loss": 1.2906, "step": 7697 }, { "epoch": 0.10003199906439283, "grad_norm": 0.38336676359176636, "learning_rate": 0.00018003093359674548, "loss": 1.4432, "step": 7698 }, { "epoch": 0.10004499360830871, "grad_norm": 0.4267819821834564, "learning_rate": 0.0001800283341348341, "loss": 1.4618, "step": 7699 }, { "epoch": 0.10005798815222458, "grad_norm": 0.3744755685329437, "learning_rate": 0.00018002573467292273, "loss": 1.4235, "step": 7700 }, { "epoch": 0.10007098269614045, "grad_norm": 0.4582114517688751, "learning_rate": 0.00018002313521101133, "loss": 1.4422, "step": 7701 }, { "epoch": 0.10008397724005633, "grad_norm": 0.4534589648246765, "learning_rate": 0.00018002053574909995, "loss": 1.4194, "step": 7702 }, { "epoch": 0.1000969717839722, "grad_norm": 0.34885936975479126, "learning_rate": 0.00018001793628718858, "loss": 1.4276, "step": 7703 }, { "epoch": 0.10010996632788807, "grad_norm": 0.3478038012981415, "learning_rate": 0.00018001533682527717, "loss": 1.3986, "step": 7704 }, { "epoch": 0.10012296087180395, "grad_norm": 0.36455038189888, "learning_rate": 0.0001800127373633658, "loss": 1.5459, "step": 7705 }, { "epoch": 0.10013595541571982, "grad_norm": 0.4637551009654999, "learning_rate": 0.0001800101379014544, "loss": 1.5051, "step": 7706 }, { "epoch": 0.10014894995963569, "grad_norm": 0.4914877712726593, "learning_rate": 0.00018000753843954305, "loss": 1.333, "step": 7707 }, { "epoch": 0.10016194450355158, "grad_norm": 0.39947807788848877, "learning_rate": 0.00018000493897763164, "loss": 1.5386, "step": 7708 }, { "epoch": 0.10017493904746745, "grad_norm": 0.34416329860687256, "learning_rate": 0.00018000233951572024, "loss": 1.32, "step": 7709 }, { "epoch": 0.10018793359138332, "grad_norm": 0.43152153491973877, "learning_rate": 0.00017999974005380887, "loss": 1.7124, "step": 7710 }, { "epoch": 0.1002009281352992, "grad_norm": 0.31289172172546387, "learning_rate": 0.0001799971405918975, "loss": 1.4115, "step": 7711 }, { "epoch": 0.10021392267921507, "grad_norm": 0.48261481523513794, "learning_rate": 0.00017999454112998611, "loss": 1.4367, "step": 7712 }, { "epoch": 0.10022691722313094, "grad_norm": 0.3611449599266052, "learning_rate": 0.0001799919416680747, "loss": 1.4975, "step": 7713 }, { "epoch": 0.10023991176704682, "grad_norm": 0.3715337812900543, "learning_rate": 0.00017998934220616334, "loss": 1.3302, "step": 7714 }, { "epoch": 0.10025290631096269, "grad_norm": 0.35607150197029114, "learning_rate": 0.00017998674274425196, "loss": 1.3665, "step": 7715 }, { "epoch": 0.10026590085487856, "grad_norm": 0.5352275371551514, "learning_rate": 0.00017998414328234056, "loss": 1.6504, "step": 7716 }, { "epoch": 0.10027889539879443, "grad_norm": 0.34869760274887085, "learning_rate": 0.00017998154382042918, "loss": 1.411, "step": 7717 }, { "epoch": 0.10029188994271031, "grad_norm": 0.3344050645828247, "learning_rate": 0.00017997894435851778, "loss": 1.2106, "step": 7718 }, { "epoch": 0.10030488448662618, "grad_norm": 0.4052220284938812, "learning_rate": 0.00017997634489660643, "loss": 1.4318, "step": 7719 }, { "epoch": 0.10031787903054205, "grad_norm": 0.36047059297561646, "learning_rate": 0.00017997374543469503, "loss": 1.3013, "step": 7720 }, { "epoch": 0.10033087357445793, "grad_norm": 0.4065697491168976, "learning_rate": 0.00017997114597278363, "loss": 1.3914, "step": 7721 }, { "epoch": 0.1003438681183738, "grad_norm": 0.3502679169178009, "learning_rate": 0.00017996854651087225, "loss": 1.3115, "step": 7722 }, { "epoch": 0.10035686266228967, "grad_norm": 0.3656959533691406, "learning_rate": 0.00017996594704896088, "loss": 1.3683, "step": 7723 }, { "epoch": 0.10036985720620555, "grad_norm": 0.41776686906814575, "learning_rate": 0.0001799633475870495, "loss": 1.5869, "step": 7724 }, { "epoch": 0.10038285175012142, "grad_norm": 0.2601430416107178, "learning_rate": 0.0001799607481251381, "loss": 1.4407, "step": 7725 }, { "epoch": 0.10039584629403729, "grad_norm": 0.41610637307167053, "learning_rate": 0.00017995814866322672, "loss": 1.3239, "step": 7726 }, { "epoch": 0.10040884083795316, "grad_norm": 0.401491641998291, "learning_rate": 0.00017995554920131535, "loss": 1.5508, "step": 7727 }, { "epoch": 0.10042183538186904, "grad_norm": 0.44771575927734375, "learning_rate": 0.00017995294973940394, "loss": 1.337, "step": 7728 }, { "epoch": 0.10043482992578491, "grad_norm": 0.4742160141468048, "learning_rate": 0.00017995035027749257, "loss": 1.407, "step": 7729 }, { "epoch": 0.10044782446970078, "grad_norm": 0.3995453417301178, "learning_rate": 0.00017994775081558117, "loss": 1.432, "step": 7730 }, { "epoch": 0.10046081901361666, "grad_norm": 0.3786431550979614, "learning_rate": 0.00017994515135366982, "loss": 1.3565, "step": 7731 }, { "epoch": 0.10047381355753253, "grad_norm": 0.3390199840068817, "learning_rate": 0.00017994255189175841, "loss": 1.3396, "step": 7732 }, { "epoch": 0.1004868081014484, "grad_norm": 0.3517451882362366, "learning_rate": 0.000179939952429847, "loss": 1.4114, "step": 7733 }, { "epoch": 0.10049980264536428, "grad_norm": 0.32465922832489014, "learning_rate": 0.00017993735296793564, "loss": 1.264, "step": 7734 }, { "epoch": 0.10051279718928015, "grad_norm": 0.34513556957244873, "learning_rate": 0.00017993475350602426, "loss": 1.3132, "step": 7735 }, { "epoch": 0.10052579173319602, "grad_norm": 0.3557755947113037, "learning_rate": 0.00017993215404411289, "loss": 1.4187, "step": 7736 }, { "epoch": 0.1005387862771119, "grad_norm": 0.44158443808555603, "learning_rate": 0.00017992955458220148, "loss": 1.451, "step": 7737 }, { "epoch": 0.10055178082102777, "grad_norm": 0.4244614243507385, "learning_rate": 0.0001799269551202901, "loss": 1.3511, "step": 7738 }, { "epoch": 0.10056477536494364, "grad_norm": 0.3721020221710205, "learning_rate": 0.00017992435565837873, "loss": 1.3173, "step": 7739 }, { "epoch": 0.10057776990885951, "grad_norm": 0.44494977593421936, "learning_rate": 0.00017992175619646733, "loss": 1.4004, "step": 7740 }, { "epoch": 0.10059076445277539, "grad_norm": 0.41851723194122314, "learning_rate": 0.00017991915673455595, "loss": 1.3677, "step": 7741 }, { "epoch": 0.10060375899669126, "grad_norm": 0.31408342719078064, "learning_rate": 0.00017991655727264458, "loss": 1.3433, "step": 7742 }, { "epoch": 0.10061675354060713, "grad_norm": 0.48315465450286865, "learning_rate": 0.0001799139578107332, "loss": 1.4698, "step": 7743 }, { "epoch": 0.100629748084523, "grad_norm": 0.42054128646850586, "learning_rate": 0.0001799113583488218, "loss": 1.5336, "step": 7744 }, { "epoch": 0.10064274262843888, "grad_norm": 0.33167505264282227, "learning_rate": 0.00017990875888691042, "loss": 1.2733, "step": 7745 }, { "epoch": 0.10065573717235476, "grad_norm": 0.46904489398002625, "learning_rate": 0.00017990615942499905, "loss": 1.5083, "step": 7746 }, { "epoch": 0.10066873171627064, "grad_norm": 0.41800230741500854, "learning_rate": 0.00017990355996308765, "loss": 1.3772, "step": 7747 }, { "epoch": 0.10068172626018651, "grad_norm": 0.3952338993549347, "learning_rate": 0.00017990096050117627, "loss": 1.44, "step": 7748 }, { "epoch": 0.10069472080410238, "grad_norm": 0.42936819791793823, "learning_rate": 0.00017989836103926487, "loss": 1.3384, "step": 7749 }, { "epoch": 0.10070771534801826, "grad_norm": 0.45824122428894043, "learning_rate": 0.0001798957615773535, "loss": 1.5126, "step": 7750 }, { "epoch": 0.10072070989193413, "grad_norm": 0.36568549275398254, "learning_rate": 0.00017989316211544212, "loss": 1.4435, "step": 7751 }, { "epoch": 0.10073370443585, "grad_norm": 0.2619154155254364, "learning_rate": 0.00017989056265353071, "loss": 1.4428, "step": 7752 }, { "epoch": 0.10074669897976588, "grad_norm": 0.30440089106559753, "learning_rate": 0.00017988796319161934, "loss": 1.4661, "step": 7753 }, { "epoch": 0.10075969352368175, "grad_norm": 0.3850434422492981, "learning_rate": 0.00017988536372970796, "loss": 1.5891, "step": 7754 }, { "epoch": 0.10077268806759762, "grad_norm": 0.40767812728881836, "learning_rate": 0.0001798827642677966, "loss": 1.3217, "step": 7755 }, { "epoch": 0.1007856826115135, "grad_norm": 0.3729136288166046, "learning_rate": 0.00017988016480588519, "loss": 1.3227, "step": 7756 }, { "epoch": 0.10079867715542937, "grad_norm": 0.40594127774238586, "learning_rate": 0.0001798775653439738, "loss": 1.5095, "step": 7757 }, { "epoch": 0.10081167169934524, "grad_norm": 0.39236366748809814, "learning_rate": 0.00017987496588206243, "loss": 1.4879, "step": 7758 }, { "epoch": 0.10082466624326111, "grad_norm": 0.40335825085639954, "learning_rate": 0.00017987236642015103, "loss": 1.7277, "step": 7759 }, { "epoch": 0.10083766078717699, "grad_norm": 0.36751919984817505, "learning_rate": 0.00017986976695823966, "loss": 1.4349, "step": 7760 }, { "epoch": 0.10085065533109286, "grad_norm": 0.35334184765815735, "learning_rate": 0.00017986716749632825, "loss": 1.4215, "step": 7761 }, { "epoch": 0.10086364987500873, "grad_norm": 0.3580816388130188, "learning_rate": 0.00017986456803441688, "loss": 1.5138, "step": 7762 }, { "epoch": 0.1008766444189246, "grad_norm": 0.41510021686553955, "learning_rate": 0.0001798619685725055, "loss": 1.3897, "step": 7763 }, { "epoch": 0.10088963896284048, "grad_norm": 0.3836803734302521, "learning_rate": 0.0001798593691105941, "loss": 1.5846, "step": 7764 }, { "epoch": 0.10090263350675635, "grad_norm": 0.3758074939250946, "learning_rate": 0.00017985676964868272, "loss": 1.543, "step": 7765 }, { "epoch": 0.10091562805067222, "grad_norm": 0.45739293098449707, "learning_rate": 0.00017985417018677135, "loss": 1.4393, "step": 7766 }, { "epoch": 0.1009286225945881, "grad_norm": 0.3502725660800934, "learning_rate": 0.00017985157072485997, "loss": 1.3627, "step": 7767 }, { "epoch": 0.10094161713850397, "grad_norm": 0.37774160504341125, "learning_rate": 0.00017984897126294857, "loss": 1.3531, "step": 7768 }, { "epoch": 0.10095461168241984, "grad_norm": 0.40376293659210205, "learning_rate": 0.0001798463718010372, "loss": 1.3984, "step": 7769 }, { "epoch": 0.10096760622633572, "grad_norm": 0.3908384442329407, "learning_rate": 0.00017984377233912582, "loss": 1.4472, "step": 7770 }, { "epoch": 0.10098060077025159, "grad_norm": 0.41029441356658936, "learning_rate": 0.00017984117287721442, "loss": 1.4773, "step": 7771 }, { "epoch": 0.10099359531416746, "grad_norm": 0.4586721658706665, "learning_rate": 0.00017983857341530304, "loss": 1.5646, "step": 7772 }, { "epoch": 0.10100658985808333, "grad_norm": 0.39528968930244446, "learning_rate": 0.00017983597395339167, "loss": 1.3538, "step": 7773 }, { "epoch": 0.10101958440199921, "grad_norm": 0.3632925748825073, "learning_rate": 0.0001798333744914803, "loss": 1.6117, "step": 7774 }, { "epoch": 0.10103257894591508, "grad_norm": 0.3847543001174927, "learning_rate": 0.0001798307750295689, "loss": 1.4012, "step": 7775 }, { "epoch": 0.10104557348983095, "grad_norm": 0.43651658296585083, "learning_rate": 0.00017982817556765749, "loss": 1.3917, "step": 7776 }, { "epoch": 0.10105856803374683, "grad_norm": 0.4280056953430176, "learning_rate": 0.00017982557610574614, "loss": 1.4205, "step": 7777 }, { "epoch": 0.1010715625776627, "grad_norm": 0.4749160706996918, "learning_rate": 0.00017982297664383473, "loss": 1.5303, "step": 7778 }, { "epoch": 0.10108455712157857, "grad_norm": 0.3926313519477844, "learning_rate": 0.00017982037718192336, "loss": 1.5686, "step": 7779 }, { "epoch": 0.10109755166549445, "grad_norm": 0.44535452127456665, "learning_rate": 0.00017981777772001196, "loss": 1.5519, "step": 7780 }, { "epoch": 0.10111054620941032, "grad_norm": 0.33920952677726746, "learning_rate": 0.00017981517825810058, "loss": 1.2957, "step": 7781 }, { "epoch": 0.10112354075332619, "grad_norm": 0.365041047334671, "learning_rate": 0.0001798125787961892, "loss": 1.5364, "step": 7782 }, { "epoch": 0.10113653529724206, "grad_norm": 0.4695407748222351, "learning_rate": 0.0001798099793342778, "loss": 1.4655, "step": 7783 }, { "epoch": 0.10114952984115794, "grad_norm": 0.3340899646282196, "learning_rate": 0.00017980737987236643, "loss": 1.398, "step": 7784 }, { "epoch": 0.10116252438507382, "grad_norm": 0.38033172488212585, "learning_rate": 0.00017980478041045505, "loss": 1.428, "step": 7785 }, { "epoch": 0.1011755189289897, "grad_norm": 0.37100502848625183, "learning_rate": 0.00017980218094854368, "loss": 1.4597, "step": 7786 }, { "epoch": 0.10118851347290557, "grad_norm": 0.3285274803638458, "learning_rate": 0.00017979958148663227, "loss": 1.3332, "step": 7787 }, { "epoch": 0.10120150801682144, "grad_norm": 0.43906766176223755, "learning_rate": 0.00017979698202472087, "loss": 1.4413, "step": 7788 }, { "epoch": 0.10121450256073732, "grad_norm": 0.42923998832702637, "learning_rate": 0.00017979438256280952, "loss": 1.455, "step": 7789 }, { "epoch": 0.10122749710465319, "grad_norm": 0.4535680413246155, "learning_rate": 0.00017979178310089812, "loss": 1.2622, "step": 7790 }, { "epoch": 0.10124049164856906, "grad_norm": 0.4653768241405487, "learning_rate": 0.00017978918363898674, "loss": 1.5935, "step": 7791 }, { "epoch": 0.10125348619248493, "grad_norm": 0.4389393627643585, "learning_rate": 0.00017978658417707534, "loss": 1.485, "step": 7792 }, { "epoch": 0.10126648073640081, "grad_norm": 0.4844987690448761, "learning_rate": 0.00017978398471516397, "loss": 1.4351, "step": 7793 }, { "epoch": 0.10127947528031668, "grad_norm": 0.4055352210998535, "learning_rate": 0.0001797813852532526, "loss": 1.5297, "step": 7794 }, { "epoch": 0.10129246982423255, "grad_norm": 0.40472620725631714, "learning_rate": 0.0001797787857913412, "loss": 1.6019, "step": 7795 }, { "epoch": 0.10130546436814843, "grad_norm": 0.390473335981369, "learning_rate": 0.0001797761863294298, "loss": 1.3117, "step": 7796 }, { "epoch": 0.1013184589120643, "grad_norm": 0.3171139657497406, "learning_rate": 0.00017977358686751844, "loss": 1.3259, "step": 7797 }, { "epoch": 0.10133145345598017, "grad_norm": 0.38119930028915405, "learning_rate": 0.00017977098740560706, "loss": 1.5352, "step": 7798 }, { "epoch": 0.10134444799989605, "grad_norm": 0.39812228083610535, "learning_rate": 0.00017976838794369566, "loss": 1.4322, "step": 7799 }, { "epoch": 0.10135744254381192, "grad_norm": 0.34248262643814087, "learning_rate": 0.00017976578848178428, "loss": 1.2128, "step": 7800 }, { "epoch": 0.10137043708772779, "grad_norm": 0.3865338861942291, "learning_rate": 0.0001797631890198729, "loss": 1.4702, "step": 7801 }, { "epoch": 0.10138343163164366, "grad_norm": 0.3255064785480499, "learning_rate": 0.0001797605895579615, "loss": 1.435, "step": 7802 }, { "epoch": 0.10139642617555954, "grad_norm": 0.33372730016708374, "learning_rate": 0.00017975799009605013, "loss": 1.2388, "step": 7803 }, { "epoch": 0.10140942071947541, "grad_norm": 0.5129263997077942, "learning_rate": 0.00017975539063413873, "loss": 1.5348, "step": 7804 }, { "epoch": 0.10142241526339128, "grad_norm": 0.38935065269470215, "learning_rate": 0.00017975279117222735, "loss": 1.4739, "step": 7805 }, { "epoch": 0.10143540980730716, "grad_norm": 0.43530070781707764, "learning_rate": 0.00017975019171031598, "loss": 1.4767, "step": 7806 }, { "epoch": 0.10144840435122303, "grad_norm": 0.4198201298713684, "learning_rate": 0.00017974759224840457, "loss": 1.5228, "step": 7807 }, { "epoch": 0.1014613988951389, "grad_norm": 0.3787910044193268, "learning_rate": 0.0001797449927864932, "loss": 1.2797, "step": 7808 }, { "epoch": 0.10147439343905477, "grad_norm": 0.38960161805152893, "learning_rate": 0.00017974239332458182, "loss": 1.3406, "step": 7809 }, { "epoch": 0.10148738798297065, "grad_norm": 0.4343084692955017, "learning_rate": 0.00017973979386267045, "loss": 1.6734, "step": 7810 }, { "epoch": 0.10150038252688652, "grad_norm": 0.4567416310310364, "learning_rate": 0.00017973719440075904, "loss": 1.4247, "step": 7811 }, { "epoch": 0.1015133770708024, "grad_norm": 0.4708667993545532, "learning_rate": 0.00017973459493884767, "loss": 1.5348, "step": 7812 }, { "epoch": 0.10152637161471827, "grad_norm": 0.42531076073646545, "learning_rate": 0.0001797319954769363, "loss": 1.483, "step": 7813 }, { "epoch": 0.10153936615863414, "grad_norm": 0.44487708806991577, "learning_rate": 0.0001797293960150249, "loss": 1.4065, "step": 7814 }, { "epoch": 0.10155236070255001, "grad_norm": 0.3435768187046051, "learning_rate": 0.00017972679655311351, "loss": 1.1367, "step": 7815 }, { "epoch": 0.10156535524646589, "grad_norm": 0.4042853116989136, "learning_rate": 0.00017972419709120214, "loss": 1.4232, "step": 7816 }, { "epoch": 0.10157834979038176, "grad_norm": 0.3916345238685608, "learning_rate": 0.00017972159762929074, "loss": 1.3807, "step": 7817 }, { "epoch": 0.10159134433429763, "grad_norm": 0.37747761607170105, "learning_rate": 0.00017971899816737936, "loss": 1.2671, "step": 7818 }, { "epoch": 0.1016043388782135, "grad_norm": 0.45680347084999084, "learning_rate": 0.00017971639870546796, "loss": 1.5233, "step": 7819 }, { "epoch": 0.10161733342212938, "grad_norm": 0.39336326718330383, "learning_rate": 0.0001797137992435566, "loss": 1.4689, "step": 7820 }, { "epoch": 0.10163032796604525, "grad_norm": 0.39701905846595764, "learning_rate": 0.0001797111997816452, "loss": 1.4432, "step": 7821 }, { "epoch": 0.10164332250996112, "grad_norm": 0.5037115216255188, "learning_rate": 0.00017970860031973383, "loss": 1.4934, "step": 7822 }, { "epoch": 0.10165631705387701, "grad_norm": 0.4388052225112915, "learning_rate": 0.00017970600085782243, "loss": 1.4042, "step": 7823 }, { "epoch": 0.10166931159779288, "grad_norm": 0.34577223658561707, "learning_rate": 0.00017970340139591105, "loss": 1.3769, "step": 7824 }, { "epoch": 0.10168230614170876, "grad_norm": 0.39711371064186096, "learning_rate": 0.00017970080193399968, "loss": 1.2014, "step": 7825 }, { "epoch": 0.10169530068562463, "grad_norm": 0.32872632145881653, "learning_rate": 0.00017969820247208828, "loss": 1.1028, "step": 7826 }, { "epoch": 0.1017082952295405, "grad_norm": 0.3827008306980133, "learning_rate": 0.0001796956030101769, "loss": 1.4979, "step": 7827 }, { "epoch": 0.10172128977345637, "grad_norm": 0.30252885818481445, "learning_rate": 0.00017969300354826552, "loss": 1.1797, "step": 7828 }, { "epoch": 0.10173428431737225, "grad_norm": 0.4236536920070648, "learning_rate": 0.00017969040408635415, "loss": 1.4234, "step": 7829 }, { "epoch": 0.10174727886128812, "grad_norm": 0.41328996419906616, "learning_rate": 0.00017968780462444275, "loss": 1.4877, "step": 7830 }, { "epoch": 0.101760273405204, "grad_norm": 0.39690878987312317, "learning_rate": 0.00017968520516253134, "loss": 1.3703, "step": 7831 }, { "epoch": 0.10177326794911987, "grad_norm": 0.3942885398864746, "learning_rate": 0.00017968260570062, "loss": 1.4388, "step": 7832 }, { "epoch": 0.10178626249303574, "grad_norm": 0.3404872715473175, "learning_rate": 0.0001796800062387086, "loss": 1.4037, "step": 7833 }, { "epoch": 0.10179925703695161, "grad_norm": 0.3025733232498169, "learning_rate": 0.00017967740677679722, "loss": 1.3139, "step": 7834 }, { "epoch": 0.10181225158086749, "grad_norm": 0.38775646686553955, "learning_rate": 0.00017967480731488581, "loss": 1.5021, "step": 7835 }, { "epoch": 0.10182524612478336, "grad_norm": 0.45014211535453796, "learning_rate": 0.00017967220785297444, "loss": 1.4182, "step": 7836 }, { "epoch": 0.10183824066869923, "grad_norm": 0.4211207330226898, "learning_rate": 0.00017966960839106306, "loss": 1.5032, "step": 7837 }, { "epoch": 0.1018512352126151, "grad_norm": 0.35970160365104675, "learning_rate": 0.00017966700892915166, "loss": 1.3707, "step": 7838 }, { "epoch": 0.10186422975653098, "grad_norm": 0.32968997955322266, "learning_rate": 0.00017966440946724029, "loss": 1.3483, "step": 7839 }, { "epoch": 0.10187722430044685, "grad_norm": 0.40386343002319336, "learning_rate": 0.0001796618100053289, "loss": 1.486, "step": 7840 }, { "epoch": 0.10189021884436272, "grad_norm": 0.41209647059440613, "learning_rate": 0.00017965921054341753, "loss": 1.4984, "step": 7841 }, { "epoch": 0.1019032133882786, "grad_norm": 0.4551544487476349, "learning_rate": 0.00017965661108150613, "loss": 1.7353, "step": 7842 }, { "epoch": 0.10191620793219447, "grad_norm": 0.28845903277397156, "learning_rate": 0.00017965401161959473, "loss": 1.1945, "step": 7843 }, { "epoch": 0.10192920247611034, "grad_norm": 0.4547567069530487, "learning_rate": 0.00017965141215768338, "loss": 1.4428, "step": 7844 }, { "epoch": 0.10194219702002622, "grad_norm": 0.3907301425933838, "learning_rate": 0.00017964881269577198, "loss": 1.4306, "step": 7845 }, { "epoch": 0.10195519156394209, "grad_norm": 0.3539254367351532, "learning_rate": 0.0001796462132338606, "loss": 1.25, "step": 7846 }, { "epoch": 0.10196818610785796, "grad_norm": 0.42573782801628113, "learning_rate": 0.0001796436137719492, "loss": 1.4194, "step": 7847 }, { "epoch": 0.10198118065177383, "grad_norm": 0.5222511887550354, "learning_rate": 0.00017964101431003782, "loss": 1.5892, "step": 7848 }, { "epoch": 0.1019941751956897, "grad_norm": 0.4325457513332367, "learning_rate": 0.00017963841484812645, "loss": 1.4501, "step": 7849 }, { "epoch": 0.10200716973960558, "grad_norm": 0.42050960659980774, "learning_rate": 0.00017963581538621505, "loss": 1.5406, "step": 7850 }, { "epoch": 0.10202016428352145, "grad_norm": 0.34707149863243103, "learning_rate": 0.0001796332159243037, "loss": 1.3757, "step": 7851 }, { "epoch": 0.10203315882743733, "grad_norm": 0.36952534317970276, "learning_rate": 0.0001796306164623923, "loss": 1.6663, "step": 7852 }, { "epoch": 0.1020461533713532, "grad_norm": 0.5112026929855347, "learning_rate": 0.00017962801700048092, "loss": 1.5723, "step": 7853 }, { "epoch": 0.10205914791526907, "grad_norm": 0.4922695457935333, "learning_rate": 0.00017962541753856952, "loss": 1.3254, "step": 7854 }, { "epoch": 0.10207214245918494, "grad_norm": 0.3786063492298126, "learning_rate": 0.00017962281807665814, "loss": 1.5976, "step": 7855 }, { "epoch": 0.10208513700310082, "grad_norm": 0.4102255403995514, "learning_rate": 0.00017962021861474677, "loss": 1.4643, "step": 7856 }, { "epoch": 0.10209813154701669, "grad_norm": 0.4170873165130615, "learning_rate": 0.00017961761915283536, "loss": 1.4876, "step": 7857 }, { "epoch": 0.10211112609093256, "grad_norm": 0.33146771788597107, "learning_rate": 0.000179615019690924, "loss": 1.425, "step": 7858 }, { "epoch": 0.10212412063484844, "grad_norm": 0.4506660997867584, "learning_rate": 0.0001796124202290126, "loss": 1.5549, "step": 7859 }, { "epoch": 0.10213711517876431, "grad_norm": 0.4250401258468628, "learning_rate": 0.0001796098207671012, "loss": 1.2596, "step": 7860 }, { "epoch": 0.1021501097226802, "grad_norm": 0.3729168176651001, "learning_rate": 0.00017960722130518983, "loss": 1.3355, "step": 7861 }, { "epoch": 0.10216310426659607, "grad_norm": 0.42832502722740173, "learning_rate": 0.00017960462184327843, "loss": 1.4575, "step": 7862 }, { "epoch": 0.10217609881051194, "grad_norm": 0.40425217151641846, "learning_rate": 0.00017960202238136708, "loss": 1.4841, "step": 7863 }, { "epoch": 0.10218909335442782, "grad_norm": 0.5028495788574219, "learning_rate": 0.00017959942291945568, "loss": 1.4228, "step": 7864 }, { "epoch": 0.10220208789834369, "grad_norm": 0.41667184233665466, "learning_rate": 0.0001795968234575443, "loss": 1.4579, "step": 7865 }, { "epoch": 0.10221508244225956, "grad_norm": 0.34058699011802673, "learning_rate": 0.0001795942239956329, "loss": 1.2667, "step": 7866 }, { "epoch": 0.10222807698617543, "grad_norm": 0.4319571554660797, "learning_rate": 0.00017959162453372153, "loss": 1.3744, "step": 7867 }, { "epoch": 0.10224107153009131, "grad_norm": 0.4323987662792206, "learning_rate": 0.00017958902507181015, "loss": 1.4948, "step": 7868 }, { "epoch": 0.10225406607400718, "grad_norm": 0.4942198395729065, "learning_rate": 0.00017958642560989875, "loss": 1.6714, "step": 7869 }, { "epoch": 0.10226706061792305, "grad_norm": 0.4277282655239105, "learning_rate": 0.00017958382614798737, "loss": 1.4999, "step": 7870 }, { "epoch": 0.10228005516183893, "grad_norm": 0.4215066432952881, "learning_rate": 0.000179581226686076, "loss": 1.4917, "step": 7871 }, { "epoch": 0.1022930497057548, "grad_norm": 0.4057972729206085, "learning_rate": 0.0001795786272241646, "loss": 1.5589, "step": 7872 }, { "epoch": 0.10230604424967067, "grad_norm": 0.5839102268218994, "learning_rate": 0.00017957602776225322, "loss": 1.5924, "step": 7873 }, { "epoch": 0.10231903879358654, "grad_norm": 0.3789006769657135, "learning_rate": 0.00017957342830034182, "loss": 1.4465, "step": 7874 }, { "epoch": 0.10233203333750242, "grad_norm": 0.3740769326686859, "learning_rate": 0.00017957082883843047, "loss": 1.5131, "step": 7875 }, { "epoch": 0.10234502788141829, "grad_norm": 0.3642594814300537, "learning_rate": 0.00017956822937651907, "loss": 1.5794, "step": 7876 }, { "epoch": 0.10235802242533416, "grad_norm": 0.38273754715919495, "learning_rate": 0.0001795656299146077, "loss": 1.4333, "step": 7877 }, { "epoch": 0.10237101696925004, "grad_norm": 0.45849257707595825, "learning_rate": 0.0001795630304526963, "loss": 1.5496, "step": 7878 }, { "epoch": 0.10238401151316591, "grad_norm": 0.438909649848938, "learning_rate": 0.0001795604309907849, "loss": 1.6207, "step": 7879 }, { "epoch": 0.10239700605708178, "grad_norm": 0.468718022108078, "learning_rate": 0.00017955783152887354, "loss": 1.3413, "step": 7880 }, { "epoch": 0.10241000060099766, "grad_norm": 0.41679978370666504, "learning_rate": 0.00017955523206696213, "loss": 1.4155, "step": 7881 }, { "epoch": 0.10242299514491353, "grad_norm": 0.41075843572616577, "learning_rate": 0.00017955263260505076, "loss": 1.5617, "step": 7882 }, { "epoch": 0.1024359896888294, "grad_norm": 0.35567933320999146, "learning_rate": 0.00017955003314313938, "loss": 1.4127, "step": 7883 }, { "epoch": 0.10244898423274527, "grad_norm": 0.42304596304893494, "learning_rate": 0.00017954743368122798, "loss": 1.5871, "step": 7884 }, { "epoch": 0.10246197877666115, "grad_norm": 0.5064476132392883, "learning_rate": 0.0001795448342193166, "loss": 1.4753, "step": 7885 }, { "epoch": 0.10247497332057702, "grad_norm": 0.3652750551700592, "learning_rate": 0.00017954223475740523, "loss": 1.4736, "step": 7886 }, { "epoch": 0.1024879678644929, "grad_norm": 0.38267767429351807, "learning_rate": 0.00017953963529549385, "loss": 1.4946, "step": 7887 }, { "epoch": 0.10250096240840877, "grad_norm": 0.42121654748916626, "learning_rate": 0.00017953703583358245, "loss": 1.4531, "step": 7888 }, { "epoch": 0.10251395695232464, "grad_norm": 0.2655147612094879, "learning_rate": 0.00017953443637167108, "loss": 1.5136, "step": 7889 }, { "epoch": 0.10252695149624051, "grad_norm": 0.37908679246902466, "learning_rate": 0.0001795318369097597, "loss": 1.2953, "step": 7890 }, { "epoch": 0.10253994604015639, "grad_norm": 0.37963083386421204, "learning_rate": 0.0001795292374478483, "loss": 1.446, "step": 7891 }, { "epoch": 0.10255294058407226, "grad_norm": 0.3967644274234772, "learning_rate": 0.00017952663798593692, "loss": 1.2688, "step": 7892 }, { "epoch": 0.10256593512798813, "grad_norm": 0.3967345356941223, "learning_rate": 0.00017952403852402552, "loss": 1.5164, "step": 7893 }, { "epoch": 0.102578929671904, "grad_norm": 0.43092790246009827, "learning_rate": 0.00017952143906211417, "loss": 1.3236, "step": 7894 }, { "epoch": 0.10259192421581988, "grad_norm": 0.4328557848930359, "learning_rate": 0.00017951883960020277, "loss": 1.5222, "step": 7895 }, { "epoch": 0.10260491875973575, "grad_norm": 0.3354707956314087, "learning_rate": 0.0001795162401382914, "loss": 1.3445, "step": 7896 }, { "epoch": 0.10261791330365162, "grad_norm": 0.44896838068962097, "learning_rate": 0.00017951364067638, "loss": 1.4368, "step": 7897 }, { "epoch": 0.1026309078475675, "grad_norm": 0.42886048555374146, "learning_rate": 0.00017951104121446862, "loss": 1.4952, "step": 7898 }, { "epoch": 0.10264390239148338, "grad_norm": 0.4371505379676819, "learning_rate": 0.00017950844175255724, "loss": 1.6611, "step": 7899 }, { "epoch": 0.10265689693539926, "grad_norm": 0.4039475917816162, "learning_rate": 0.00017950584229064584, "loss": 1.4485, "step": 7900 }, { "epoch": 0.10266989147931513, "grad_norm": 0.37279394268989563, "learning_rate": 0.00017950324282873446, "loss": 1.541, "step": 7901 }, { "epoch": 0.102682886023231, "grad_norm": 0.4172093868255615, "learning_rate": 0.00017950064336682309, "loss": 1.558, "step": 7902 }, { "epoch": 0.10269588056714687, "grad_norm": 0.4678516387939453, "learning_rate": 0.00017949804390491168, "loss": 1.4673, "step": 7903 }, { "epoch": 0.10270887511106275, "grad_norm": 0.39488333463668823, "learning_rate": 0.0001794954444430003, "loss": 1.5715, "step": 7904 }, { "epoch": 0.10272186965497862, "grad_norm": 0.4110446274280548, "learning_rate": 0.0001794928449810889, "loss": 1.418, "step": 7905 }, { "epoch": 0.1027348641988945, "grad_norm": 0.3216738700866699, "learning_rate": 0.00017949024551917756, "loss": 1.4773, "step": 7906 }, { "epoch": 0.10274785874281037, "grad_norm": 0.4549819231033325, "learning_rate": 0.00017948764605726615, "loss": 1.4852, "step": 7907 }, { "epoch": 0.10276085328672624, "grad_norm": 0.5669338703155518, "learning_rate": 0.00017948504659535478, "loss": 1.4325, "step": 7908 }, { "epoch": 0.10277384783064211, "grad_norm": 0.433988094329834, "learning_rate": 0.00017948244713344338, "loss": 1.4724, "step": 7909 }, { "epoch": 0.10278684237455799, "grad_norm": 0.40066781640052795, "learning_rate": 0.000179479847671532, "loss": 1.5209, "step": 7910 }, { "epoch": 0.10279983691847386, "grad_norm": 0.39563825726509094, "learning_rate": 0.00017947724820962063, "loss": 1.4775, "step": 7911 }, { "epoch": 0.10281283146238973, "grad_norm": 0.45007336139678955, "learning_rate": 0.00017947464874770922, "loss": 1.5504, "step": 7912 }, { "epoch": 0.1028258260063056, "grad_norm": 0.4451403319835663, "learning_rate": 0.00017947204928579785, "loss": 1.4718, "step": 7913 }, { "epoch": 0.10283882055022148, "grad_norm": 0.34433048963546753, "learning_rate": 0.00017946944982388647, "loss": 1.3367, "step": 7914 }, { "epoch": 0.10285181509413735, "grad_norm": 0.3291279971599579, "learning_rate": 0.00017946685036197507, "loss": 1.3682, "step": 7915 }, { "epoch": 0.10286480963805322, "grad_norm": 0.2266838252544403, "learning_rate": 0.0001794642509000637, "loss": 1.2422, "step": 7916 }, { "epoch": 0.1028778041819691, "grad_norm": 0.3619321286678314, "learning_rate": 0.0001794616514381523, "loss": 1.2012, "step": 7917 }, { "epoch": 0.10289079872588497, "grad_norm": 0.3631254732608795, "learning_rate": 0.00017945905197624094, "loss": 1.3714, "step": 7918 }, { "epoch": 0.10290379326980084, "grad_norm": 0.37563759088516235, "learning_rate": 0.00017945645251432954, "loss": 1.4261, "step": 7919 }, { "epoch": 0.10291678781371671, "grad_norm": 0.43486857414245605, "learning_rate": 0.00017945385305241816, "loss": 1.4363, "step": 7920 }, { "epoch": 0.10292978235763259, "grad_norm": 0.42600587010383606, "learning_rate": 0.00017945125359050676, "loss": 1.5778, "step": 7921 }, { "epoch": 0.10294277690154846, "grad_norm": 0.5117783546447754, "learning_rate": 0.00017944865412859539, "loss": 1.4536, "step": 7922 }, { "epoch": 0.10295577144546433, "grad_norm": 0.2974124252796173, "learning_rate": 0.000179446054666684, "loss": 1.3523, "step": 7923 }, { "epoch": 0.1029687659893802, "grad_norm": 0.49731770157814026, "learning_rate": 0.0001794434552047726, "loss": 1.3632, "step": 7924 }, { "epoch": 0.10298176053329608, "grad_norm": 0.2756224572658539, "learning_rate": 0.00017944085574286126, "loss": 1.3703, "step": 7925 }, { "epoch": 0.10299475507721195, "grad_norm": 0.4568815529346466, "learning_rate": 0.00017943825628094986, "loss": 1.4597, "step": 7926 }, { "epoch": 0.10300774962112783, "grad_norm": 0.2808983325958252, "learning_rate": 0.00017943565681903845, "loss": 1.2667, "step": 7927 }, { "epoch": 0.1030207441650437, "grad_norm": 0.4003196358680725, "learning_rate": 0.00017943305735712708, "loss": 1.3522, "step": 7928 }, { "epoch": 0.10303373870895957, "grad_norm": 0.3558260202407837, "learning_rate": 0.0001794304578952157, "loss": 1.3169, "step": 7929 }, { "epoch": 0.10304673325287544, "grad_norm": 0.43108677864074707, "learning_rate": 0.00017942785843330433, "loss": 1.4761, "step": 7930 }, { "epoch": 0.10305972779679132, "grad_norm": 0.3817146122455597, "learning_rate": 0.00017942525897139293, "loss": 1.5432, "step": 7931 }, { "epoch": 0.10307272234070719, "grad_norm": 0.39911437034606934, "learning_rate": 0.00017942265950948155, "loss": 1.3403, "step": 7932 }, { "epoch": 0.10308571688462306, "grad_norm": 0.41367942094802856, "learning_rate": 0.00017942006004757017, "loss": 1.5102, "step": 7933 }, { "epoch": 0.10309871142853894, "grad_norm": 0.411382794380188, "learning_rate": 0.00017941746058565877, "loss": 1.4226, "step": 7934 }, { "epoch": 0.10311170597245481, "grad_norm": 0.45562857389450073, "learning_rate": 0.0001794148611237474, "loss": 1.5076, "step": 7935 }, { "epoch": 0.10312470051637068, "grad_norm": 0.35147354006767273, "learning_rate": 0.000179412261661836, "loss": 1.3266, "step": 7936 }, { "epoch": 0.10313769506028657, "grad_norm": 0.3584577143192291, "learning_rate": 0.00017940966219992464, "loss": 1.4739, "step": 7937 }, { "epoch": 0.10315068960420244, "grad_norm": 0.37035125494003296, "learning_rate": 0.00017940706273801324, "loss": 1.481, "step": 7938 }, { "epoch": 0.10316368414811831, "grad_norm": 0.414473295211792, "learning_rate": 0.00017940446327610184, "loss": 1.3736, "step": 7939 }, { "epoch": 0.10317667869203419, "grad_norm": 0.4054807424545288, "learning_rate": 0.00017940186381419046, "loss": 1.4648, "step": 7940 }, { "epoch": 0.10318967323595006, "grad_norm": 0.46387138962745667, "learning_rate": 0.0001793992643522791, "loss": 1.4871, "step": 7941 }, { "epoch": 0.10320266777986593, "grad_norm": 0.4025227129459381, "learning_rate": 0.0001793966648903677, "loss": 1.476, "step": 7942 }, { "epoch": 0.1032156623237818, "grad_norm": 0.39503058791160583, "learning_rate": 0.0001793940654284563, "loss": 1.5687, "step": 7943 }, { "epoch": 0.10322865686769768, "grad_norm": 0.3631810247898102, "learning_rate": 0.00017939146596654493, "loss": 1.3722, "step": 7944 }, { "epoch": 0.10324165141161355, "grad_norm": 0.37845462560653687, "learning_rate": 0.00017938886650463356, "loss": 1.2985, "step": 7945 }, { "epoch": 0.10325464595552943, "grad_norm": 0.3191598951816559, "learning_rate": 0.00017938626704272216, "loss": 1.4712, "step": 7946 }, { "epoch": 0.1032676404994453, "grad_norm": 0.3889971077442169, "learning_rate": 0.00017938366758081078, "loss": 1.5827, "step": 7947 }, { "epoch": 0.10328063504336117, "grad_norm": 0.4626803398132324, "learning_rate": 0.00017938106811889938, "loss": 1.5201, "step": 7948 }, { "epoch": 0.10329362958727704, "grad_norm": 0.27911537885665894, "learning_rate": 0.00017937846865698803, "loss": 1.3734, "step": 7949 }, { "epoch": 0.10330662413119292, "grad_norm": 0.5439074635505676, "learning_rate": 0.00017937586919507663, "loss": 1.4754, "step": 7950 }, { "epoch": 0.10331961867510879, "grad_norm": 0.45616888999938965, "learning_rate": 0.00017937326973316525, "loss": 1.4651, "step": 7951 }, { "epoch": 0.10333261321902466, "grad_norm": 0.4382798373699188, "learning_rate": 0.00017937067027125385, "loss": 1.2766, "step": 7952 }, { "epoch": 0.10334560776294054, "grad_norm": 0.43490156531333923, "learning_rate": 0.00017936807080934247, "loss": 1.5375, "step": 7953 }, { "epoch": 0.10335860230685641, "grad_norm": 0.469547301530838, "learning_rate": 0.0001793654713474311, "loss": 1.6105, "step": 7954 }, { "epoch": 0.10337159685077228, "grad_norm": 0.43007007241249084, "learning_rate": 0.0001793628718855197, "loss": 1.5242, "step": 7955 }, { "epoch": 0.10338459139468816, "grad_norm": 0.3522557318210602, "learning_rate": 0.00017936027242360832, "loss": 1.388, "step": 7956 }, { "epoch": 0.10339758593860403, "grad_norm": 0.40999171137809753, "learning_rate": 0.00017935767296169694, "loss": 1.2348, "step": 7957 }, { "epoch": 0.1034105804825199, "grad_norm": 0.36213162541389465, "learning_rate": 0.00017935507349978554, "loss": 1.5623, "step": 7958 }, { "epoch": 0.10342357502643577, "grad_norm": 0.4497208595275879, "learning_rate": 0.00017935247403787417, "loss": 1.5633, "step": 7959 }, { "epoch": 0.10343656957035165, "grad_norm": 0.3032718598842621, "learning_rate": 0.0001793498745759628, "loss": 1.3471, "step": 7960 }, { "epoch": 0.10344956411426752, "grad_norm": 0.40988150238990784, "learning_rate": 0.00017934727511405142, "loss": 1.3207, "step": 7961 }, { "epoch": 0.10346255865818339, "grad_norm": 0.3369917869567871, "learning_rate": 0.00017934467565214, "loss": 1.5725, "step": 7962 }, { "epoch": 0.10347555320209927, "grad_norm": 0.48962143063545227, "learning_rate": 0.00017934207619022864, "loss": 1.3241, "step": 7963 }, { "epoch": 0.10348854774601514, "grad_norm": 0.38944628834724426, "learning_rate": 0.00017933947672831726, "loss": 1.4909, "step": 7964 }, { "epoch": 0.10350154228993101, "grad_norm": 0.3094650208950043, "learning_rate": 0.00017933687726640586, "loss": 1.3422, "step": 7965 }, { "epoch": 0.10351453683384688, "grad_norm": 0.3750936985015869, "learning_rate": 0.00017933427780449448, "loss": 1.6183, "step": 7966 }, { "epoch": 0.10352753137776276, "grad_norm": 0.36818066239356995, "learning_rate": 0.00017933167834258308, "loss": 1.3194, "step": 7967 }, { "epoch": 0.10354052592167863, "grad_norm": 0.33165282011032104, "learning_rate": 0.0001793290788806717, "loss": 1.4134, "step": 7968 }, { "epoch": 0.1035535204655945, "grad_norm": 0.2703864872455597, "learning_rate": 0.00017932647941876033, "loss": 1.1932, "step": 7969 }, { "epoch": 0.10356651500951038, "grad_norm": 0.37373217940330505, "learning_rate": 0.00017932387995684893, "loss": 1.5205, "step": 7970 }, { "epoch": 0.10357950955342625, "grad_norm": 0.42540067434310913, "learning_rate": 0.00017932128049493755, "loss": 1.4115, "step": 7971 }, { "epoch": 0.10359250409734212, "grad_norm": 0.39851224422454834, "learning_rate": 0.00017931868103302618, "loss": 1.4232, "step": 7972 }, { "epoch": 0.103605498641258, "grad_norm": 0.4035250246524811, "learning_rate": 0.0001793160815711148, "loss": 1.414, "step": 7973 }, { "epoch": 0.10361849318517387, "grad_norm": 0.39069944620132446, "learning_rate": 0.0001793134821092034, "loss": 1.4404, "step": 7974 }, { "epoch": 0.10363148772908976, "grad_norm": 0.36573952436447144, "learning_rate": 0.00017931088264729202, "loss": 1.5088, "step": 7975 }, { "epoch": 0.10364448227300563, "grad_norm": 0.347029447555542, "learning_rate": 0.00017930828318538065, "loss": 1.6356, "step": 7976 }, { "epoch": 0.1036574768169215, "grad_norm": 0.48322170972824097, "learning_rate": 0.00017930568372346924, "loss": 1.5193, "step": 7977 }, { "epoch": 0.10367047136083737, "grad_norm": 0.4446842670440674, "learning_rate": 0.00017930308426155787, "loss": 1.3507, "step": 7978 }, { "epoch": 0.10368346590475325, "grad_norm": 0.3830401599407196, "learning_rate": 0.00017930048479964647, "loss": 1.5086, "step": 7979 }, { "epoch": 0.10369646044866912, "grad_norm": 0.4655143916606903, "learning_rate": 0.00017929788533773512, "loss": 1.5273, "step": 7980 }, { "epoch": 0.10370945499258499, "grad_norm": 0.38228240609169006, "learning_rate": 0.00017929528587582372, "loss": 1.3069, "step": 7981 }, { "epoch": 0.10372244953650087, "grad_norm": 0.37955889105796814, "learning_rate": 0.0001792926864139123, "loss": 1.3866, "step": 7982 }, { "epoch": 0.10373544408041674, "grad_norm": 0.3734840452671051, "learning_rate": 0.00017929008695200094, "loss": 1.5102, "step": 7983 }, { "epoch": 0.10374843862433261, "grad_norm": 0.3247419595718384, "learning_rate": 0.00017928748749008956, "loss": 1.3812, "step": 7984 }, { "epoch": 0.10376143316824848, "grad_norm": 0.3345896005630493, "learning_rate": 0.0001792848880281782, "loss": 1.4327, "step": 7985 }, { "epoch": 0.10377442771216436, "grad_norm": 0.39056822657585144, "learning_rate": 0.00017928228856626678, "loss": 1.4673, "step": 7986 }, { "epoch": 0.10378742225608023, "grad_norm": 0.4080594778060913, "learning_rate": 0.0001792796891043554, "loss": 1.4138, "step": 7987 }, { "epoch": 0.1038004167999961, "grad_norm": 0.38304218649864197, "learning_rate": 0.00017927708964244403, "loss": 1.4701, "step": 7988 }, { "epoch": 0.10381341134391198, "grad_norm": 0.4749680459499359, "learning_rate": 0.00017927449018053263, "loss": 1.5491, "step": 7989 }, { "epoch": 0.10382640588782785, "grad_norm": 0.4998060464859009, "learning_rate": 0.00017927189071862125, "loss": 1.4176, "step": 7990 }, { "epoch": 0.10383940043174372, "grad_norm": 0.43049871921539307, "learning_rate": 0.00017926929125670985, "loss": 1.3857, "step": 7991 }, { "epoch": 0.1038523949756596, "grad_norm": 0.3602466583251953, "learning_rate": 0.0001792666917947985, "loss": 1.4252, "step": 7992 }, { "epoch": 0.10386538951957547, "grad_norm": 0.31236013770103455, "learning_rate": 0.0001792640923328871, "loss": 1.125, "step": 7993 }, { "epoch": 0.10387838406349134, "grad_norm": 0.42238807678222656, "learning_rate": 0.0001792614928709757, "loss": 1.4078, "step": 7994 }, { "epoch": 0.10389137860740721, "grad_norm": 0.4486556649208069, "learning_rate": 0.00017925889340906432, "loss": 1.3463, "step": 7995 }, { "epoch": 0.10390437315132309, "grad_norm": 0.3729092478752136, "learning_rate": 0.00017925629394715295, "loss": 1.366, "step": 7996 }, { "epoch": 0.10391736769523896, "grad_norm": 0.4954756498336792, "learning_rate": 0.00017925369448524157, "loss": 1.5263, "step": 7997 }, { "epoch": 0.10393036223915483, "grad_norm": 0.40927091240882874, "learning_rate": 0.00017925109502333017, "loss": 1.5341, "step": 7998 }, { "epoch": 0.1039433567830707, "grad_norm": 0.4058709442615509, "learning_rate": 0.0001792484955614188, "loss": 1.1035, "step": 7999 }, { "epoch": 0.10395635132698658, "grad_norm": 0.4331141412258148, "learning_rate": 0.00017924589609950742, "loss": 1.4959, "step": 8000 }, { "epoch": 0.10396934587090245, "grad_norm": 0.45258501172065735, "learning_rate": 0.00017924329663759602, "loss": 1.5827, "step": 8001 }, { "epoch": 0.10398234041481833, "grad_norm": 0.37838178873062134, "learning_rate": 0.00017924069717568464, "loss": 1.5544, "step": 8002 }, { "epoch": 0.1039953349587342, "grad_norm": 0.3194632828235626, "learning_rate": 0.00017923809771377326, "loss": 1.2325, "step": 8003 }, { "epoch": 0.10400832950265007, "grad_norm": 0.3741583228111267, "learning_rate": 0.0001792354982518619, "loss": 1.5401, "step": 8004 }, { "epoch": 0.10402132404656594, "grad_norm": 0.4134669303894043, "learning_rate": 0.00017923289878995049, "loss": 1.3613, "step": 8005 }, { "epoch": 0.10403431859048182, "grad_norm": 0.4060792922973633, "learning_rate": 0.0001792302993280391, "loss": 1.5222, "step": 8006 }, { "epoch": 0.10404731313439769, "grad_norm": 0.47166624665260315, "learning_rate": 0.00017922769986612774, "loss": 1.6411, "step": 8007 }, { "epoch": 0.10406030767831356, "grad_norm": 0.38318881392478943, "learning_rate": 0.00017922510040421633, "loss": 1.3628, "step": 8008 }, { "epoch": 0.10407330222222944, "grad_norm": 0.31945088505744934, "learning_rate": 0.00017922250094230496, "loss": 1.2308, "step": 8009 }, { "epoch": 0.10408629676614531, "grad_norm": 0.41478046774864197, "learning_rate": 0.00017921990148039355, "loss": 1.3938, "step": 8010 }, { "epoch": 0.10409929131006118, "grad_norm": 0.3366563022136688, "learning_rate": 0.00017921730201848218, "loss": 1.4673, "step": 8011 }, { "epoch": 0.10411228585397705, "grad_norm": 0.40028998255729675, "learning_rate": 0.0001792147025565708, "loss": 1.2632, "step": 8012 }, { "epoch": 0.10412528039789294, "grad_norm": 0.3477810323238373, "learning_rate": 0.0001792121030946594, "loss": 1.3746, "step": 8013 }, { "epoch": 0.10413827494180881, "grad_norm": 0.26680251955986023, "learning_rate": 0.00017920950363274803, "loss": 1.2252, "step": 8014 }, { "epoch": 0.10415126948572469, "grad_norm": 0.4064973294734955, "learning_rate": 0.00017920690417083665, "loss": 1.453, "step": 8015 }, { "epoch": 0.10416426402964056, "grad_norm": 0.4885926842689514, "learning_rate": 0.00017920430470892527, "loss": 1.5816, "step": 8016 }, { "epoch": 0.10417725857355643, "grad_norm": 0.3536035418510437, "learning_rate": 0.00017920170524701387, "loss": 1.474, "step": 8017 }, { "epoch": 0.1041902531174723, "grad_norm": 0.3967699706554413, "learning_rate": 0.0001791991057851025, "loss": 1.3436, "step": 8018 }, { "epoch": 0.10420324766138818, "grad_norm": 0.4234827160835266, "learning_rate": 0.00017919650632319112, "loss": 1.4573, "step": 8019 }, { "epoch": 0.10421624220530405, "grad_norm": 0.4700900614261627, "learning_rate": 0.00017919390686127972, "loss": 1.5331, "step": 8020 }, { "epoch": 0.10422923674921993, "grad_norm": 0.30281639099121094, "learning_rate": 0.00017919130739936834, "loss": 1.3903, "step": 8021 }, { "epoch": 0.1042422312931358, "grad_norm": 0.34131497144699097, "learning_rate": 0.00017918870793745694, "loss": 1.3567, "step": 8022 }, { "epoch": 0.10425522583705167, "grad_norm": 0.3436199426651001, "learning_rate": 0.00017918610847554556, "loss": 1.3489, "step": 8023 }, { "epoch": 0.10426822038096754, "grad_norm": 0.359914094209671, "learning_rate": 0.0001791835090136342, "loss": 1.5661, "step": 8024 }, { "epoch": 0.10428121492488342, "grad_norm": 0.3884362578392029, "learning_rate": 0.00017918090955172279, "loss": 1.4661, "step": 8025 }, { "epoch": 0.10429420946879929, "grad_norm": 0.36077505350112915, "learning_rate": 0.0001791783100898114, "loss": 1.3918, "step": 8026 }, { "epoch": 0.10430720401271516, "grad_norm": 0.3498428463935852, "learning_rate": 0.00017917571062790004, "loss": 1.3795, "step": 8027 }, { "epoch": 0.10432019855663104, "grad_norm": 0.3328615427017212, "learning_rate": 0.00017917311116598866, "loss": 1.4904, "step": 8028 }, { "epoch": 0.10433319310054691, "grad_norm": 0.30838197469711304, "learning_rate": 0.00017917051170407726, "loss": 1.4022, "step": 8029 }, { "epoch": 0.10434618764446278, "grad_norm": 0.37146902084350586, "learning_rate": 0.00017916791224216588, "loss": 1.4213, "step": 8030 }, { "epoch": 0.10435918218837865, "grad_norm": 0.41162291169166565, "learning_rate": 0.0001791653127802545, "loss": 1.4455, "step": 8031 }, { "epoch": 0.10437217673229453, "grad_norm": 0.37905123829841614, "learning_rate": 0.0001791627133183431, "loss": 1.3856, "step": 8032 }, { "epoch": 0.1043851712762104, "grad_norm": 0.3954203128814697, "learning_rate": 0.00017916011385643173, "loss": 1.3571, "step": 8033 }, { "epoch": 0.10439816582012627, "grad_norm": 0.3242570459842682, "learning_rate": 0.00017915751439452035, "loss": 1.4654, "step": 8034 }, { "epoch": 0.10441116036404215, "grad_norm": 0.39476215839385986, "learning_rate": 0.00017915491493260898, "loss": 1.4966, "step": 8035 }, { "epoch": 0.10442415490795802, "grad_norm": 0.4378581941127777, "learning_rate": 0.00017915231547069757, "loss": 1.434, "step": 8036 }, { "epoch": 0.10443714945187389, "grad_norm": 0.42644456028938293, "learning_rate": 0.00017914971600878617, "loss": 1.5247, "step": 8037 }, { "epoch": 0.10445014399578977, "grad_norm": 0.2627384066581726, "learning_rate": 0.00017914711654687482, "loss": 1.4398, "step": 8038 }, { "epoch": 0.10446313853970564, "grad_norm": 0.35542765259742737, "learning_rate": 0.00017914451708496342, "loss": 1.3226, "step": 8039 }, { "epoch": 0.10447613308362151, "grad_norm": 0.33493709564208984, "learning_rate": 0.00017914191762305205, "loss": 1.4565, "step": 8040 }, { "epoch": 0.10448912762753738, "grad_norm": 0.3139078617095947, "learning_rate": 0.00017913931816114064, "loss": 1.2585, "step": 8041 }, { "epoch": 0.10450212217145326, "grad_norm": 0.37238168716430664, "learning_rate": 0.00017913671869922927, "loss": 1.4998, "step": 8042 }, { "epoch": 0.10451511671536913, "grad_norm": 0.3731575310230255, "learning_rate": 0.0001791341192373179, "loss": 1.5094, "step": 8043 }, { "epoch": 0.104528111259285, "grad_norm": 0.23553532361984253, "learning_rate": 0.0001791315197754065, "loss": 1.3379, "step": 8044 }, { "epoch": 0.10454110580320088, "grad_norm": 0.32989755272865295, "learning_rate": 0.0001791289203134951, "loss": 1.3699, "step": 8045 }, { "epoch": 0.10455410034711675, "grad_norm": 0.3031196594238281, "learning_rate": 0.00017912632085158374, "loss": 1.3595, "step": 8046 }, { "epoch": 0.10456709489103262, "grad_norm": 0.4284035563468933, "learning_rate": 0.00017912372138967236, "loss": 1.7581, "step": 8047 }, { "epoch": 0.1045800894349485, "grad_norm": 0.3994840383529663, "learning_rate": 0.00017912112192776096, "loss": 1.3112, "step": 8048 }, { "epoch": 0.10459308397886437, "grad_norm": 0.4671224057674408, "learning_rate": 0.00017911852246584956, "loss": 1.5354, "step": 8049 }, { "epoch": 0.10460607852278024, "grad_norm": 0.4269476532936096, "learning_rate": 0.0001791159230039382, "loss": 1.6347, "step": 8050 }, { "epoch": 0.10461907306669613, "grad_norm": 0.39409470558166504, "learning_rate": 0.0001791133235420268, "loss": 1.5037, "step": 8051 }, { "epoch": 0.104632067610612, "grad_norm": 0.3732718229293823, "learning_rate": 0.00017911072408011543, "loss": 1.3904, "step": 8052 }, { "epoch": 0.10464506215452787, "grad_norm": 0.44791218638420105, "learning_rate": 0.00017910812461820403, "loss": 1.446, "step": 8053 }, { "epoch": 0.10465805669844375, "grad_norm": 0.4334414005279541, "learning_rate": 0.00017910552515629265, "loss": 1.4468, "step": 8054 }, { "epoch": 0.10467105124235962, "grad_norm": 0.38104677200317383, "learning_rate": 0.00017910292569438128, "loss": 1.5195, "step": 8055 }, { "epoch": 0.10468404578627549, "grad_norm": 0.3623162508010864, "learning_rate": 0.00017910032623246987, "loss": 1.5297, "step": 8056 }, { "epoch": 0.10469704033019137, "grad_norm": 0.42105117440223694, "learning_rate": 0.0001790977267705585, "loss": 1.4329, "step": 8057 }, { "epoch": 0.10471003487410724, "grad_norm": 0.37765073776245117, "learning_rate": 0.00017909512730864712, "loss": 1.3613, "step": 8058 }, { "epoch": 0.10472302941802311, "grad_norm": 0.4411623179912567, "learning_rate": 0.00017909252784673575, "loss": 1.4635, "step": 8059 }, { "epoch": 0.10473602396193898, "grad_norm": 0.36713117361068726, "learning_rate": 0.00017908992838482435, "loss": 1.3269, "step": 8060 }, { "epoch": 0.10474901850585486, "grad_norm": 0.43734514713287354, "learning_rate": 0.00017908732892291294, "loss": 1.2507, "step": 8061 }, { "epoch": 0.10476201304977073, "grad_norm": 0.45628947019577026, "learning_rate": 0.0001790847294610016, "loss": 1.3814, "step": 8062 }, { "epoch": 0.1047750075936866, "grad_norm": 0.4164862632751465, "learning_rate": 0.0001790821299990902, "loss": 1.3777, "step": 8063 }, { "epoch": 0.10478800213760248, "grad_norm": 0.4260309934616089, "learning_rate": 0.00017907953053717882, "loss": 1.5667, "step": 8064 }, { "epoch": 0.10480099668151835, "grad_norm": 0.4057696461677551, "learning_rate": 0.0001790769310752674, "loss": 1.4148, "step": 8065 }, { "epoch": 0.10481399122543422, "grad_norm": 0.4715280830860138, "learning_rate": 0.00017907433161335604, "loss": 1.6246, "step": 8066 }, { "epoch": 0.1048269857693501, "grad_norm": 0.4925602376461029, "learning_rate": 0.00017907173215144466, "loss": 1.4226, "step": 8067 }, { "epoch": 0.10483998031326597, "grad_norm": 0.32506945729255676, "learning_rate": 0.00017906913268953326, "loss": 1.614, "step": 8068 }, { "epoch": 0.10485297485718184, "grad_norm": 0.3706367015838623, "learning_rate": 0.00017906653322762188, "loss": 1.3106, "step": 8069 }, { "epoch": 0.10486596940109771, "grad_norm": 0.31357234716415405, "learning_rate": 0.0001790639337657105, "loss": 1.2569, "step": 8070 }, { "epoch": 0.10487896394501359, "grad_norm": 0.35913679003715515, "learning_rate": 0.00017906133430379913, "loss": 1.6568, "step": 8071 }, { "epoch": 0.10489195848892946, "grad_norm": 0.42747074365615845, "learning_rate": 0.00017905873484188773, "loss": 1.4414, "step": 8072 }, { "epoch": 0.10490495303284533, "grad_norm": 0.34334713220596313, "learning_rate": 0.00017905613537997636, "loss": 1.3707, "step": 8073 }, { "epoch": 0.1049179475767612, "grad_norm": 0.432465136051178, "learning_rate": 0.00017905353591806498, "loss": 1.5078, "step": 8074 }, { "epoch": 0.10493094212067708, "grad_norm": 0.490108847618103, "learning_rate": 0.00017905093645615358, "loss": 1.5214, "step": 8075 }, { "epoch": 0.10494393666459295, "grad_norm": 0.276267409324646, "learning_rate": 0.0001790483369942422, "loss": 1.4508, "step": 8076 }, { "epoch": 0.10495693120850882, "grad_norm": 0.41033869981765747, "learning_rate": 0.00017904573753233083, "loss": 1.4749, "step": 8077 }, { "epoch": 0.1049699257524247, "grad_norm": 0.404231458902359, "learning_rate": 0.00017904313807041942, "loss": 1.3342, "step": 8078 }, { "epoch": 0.10498292029634057, "grad_norm": 0.35010117292404175, "learning_rate": 0.00017904053860850805, "loss": 1.3754, "step": 8079 }, { "epoch": 0.10499591484025644, "grad_norm": 0.45433515310287476, "learning_rate": 0.00017903793914659665, "loss": 1.3815, "step": 8080 }, { "epoch": 0.10500890938417232, "grad_norm": 0.3908640444278717, "learning_rate": 0.0001790353396846853, "loss": 1.4581, "step": 8081 }, { "epoch": 0.10502190392808819, "grad_norm": 0.4251435101032257, "learning_rate": 0.0001790327402227739, "loss": 1.3893, "step": 8082 }, { "epoch": 0.10503489847200406, "grad_norm": 0.42225953936576843, "learning_rate": 0.00017903014076086252, "loss": 1.4656, "step": 8083 }, { "epoch": 0.10504789301591994, "grad_norm": 0.29635852575302124, "learning_rate": 0.00017902754129895112, "loss": 1.2572, "step": 8084 }, { "epoch": 0.10506088755983581, "grad_norm": 0.33479174971580505, "learning_rate": 0.00017902494183703974, "loss": 1.2901, "step": 8085 }, { "epoch": 0.10507388210375168, "grad_norm": 0.4221325218677521, "learning_rate": 0.00017902234237512836, "loss": 1.507, "step": 8086 }, { "epoch": 0.10508687664766755, "grad_norm": 0.47237905859947205, "learning_rate": 0.00017901974291321696, "loss": 1.5308, "step": 8087 }, { "epoch": 0.10509987119158343, "grad_norm": 0.4242340326309204, "learning_rate": 0.0001790171434513056, "loss": 1.5421, "step": 8088 }, { "epoch": 0.10511286573549931, "grad_norm": 0.3724267780780792, "learning_rate": 0.0001790145439893942, "loss": 1.3626, "step": 8089 }, { "epoch": 0.10512586027941519, "grad_norm": 0.3293302655220032, "learning_rate": 0.00017901194452748284, "loss": 1.4489, "step": 8090 }, { "epoch": 0.10513885482333106, "grad_norm": 0.3883960545063019, "learning_rate": 0.00017900934506557143, "loss": 1.5761, "step": 8091 }, { "epoch": 0.10515184936724693, "grad_norm": 0.30380895733833313, "learning_rate": 0.00017900674560366003, "loss": 1.3394, "step": 8092 }, { "epoch": 0.1051648439111628, "grad_norm": 0.3718329071998596, "learning_rate": 0.00017900414614174868, "loss": 1.4642, "step": 8093 }, { "epoch": 0.10517783845507868, "grad_norm": 0.4548642337322235, "learning_rate": 0.00017900154667983728, "loss": 1.6218, "step": 8094 }, { "epoch": 0.10519083299899455, "grad_norm": 0.3894287347793579, "learning_rate": 0.0001789989472179259, "loss": 1.3941, "step": 8095 }, { "epoch": 0.10520382754291042, "grad_norm": 0.49693650007247925, "learning_rate": 0.0001789963477560145, "loss": 1.3501, "step": 8096 }, { "epoch": 0.1052168220868263, "grad_norm": 0.4253354072570801, "learning_rate": 0.00017899374829410313, "loss": 1.5231, "step": 8097 }, { "epoch": 0.10522981663074217, "grad_norm": 0.4086737036705017, "learning_rate": 0.00017899114883219175, "loss": 1.4767, "step": 8098 }, { "epoch": 0.10524281117465804, "grad_norm": 0.42117011547088623, "learning_rate": 0.00017898854937028035, "loss": 1.5377, "step": 8099 }, { "epoch": 0.10525580571857392, "grad_norm": 0.3988659977912903, "learning_rate": 0.00017898594990836897, "loss": 1.6711, "step": 8100 }, { "epoch": 0.10526880026248979, "grad_norm": 0.4064216911792755, "learning_rate": 0.0001789833504464576, "loss": 1.4378, "step": 8101 }, { "epoch": 0.10528179480640566, "grad_norm": 0.3536483347415924, "learning_rate": 0.00017898075098454622, "loss": 1.3556, "step": 8102 }, { "epoch": 0.10529478935032154, "grad_norm": 0.3714431822299957, "learning_rate": 0.00017897815152263482, "loss": 1.6121, "step": 8103 }, { "epoch": 0.10530778389423741, "grad_norm": 0.35359179973602295, "learning_rate": 0.00017897555206072342, "loss": 1.3843, "step": 8104 }, { "epoch": 0.10532077843815328, "grad_norm": 0.39783602952957153, "learning_rate": 0.00017897295259881207, "loss": 1.3086, "step": 8105 }, { "epoch": 0.10533377298206915, "grad_norm": 0.4251030683517456, "learning_rate": 0.00017897035313690066, "loss": 1.5838, "step": 8106 }, { "epoch": 0.10534676752598503, "grad_norm": 0.4016352593898773, "learning_rate": 0.0001789677536749893, "loss": 1.4287, "step": 8107 }, { "epoch": 0.1053597620699009, "grad_norm": 0.41159847378730774, "learning_rate": 0.00017896515421307791, "loss": 1.3477, "step": 8108 }, { "epoch": 0.10537275661381677, "grad_norm": 0.42181316018104553, "learning_rate": 0.0001789625547511665, "loss": 1.5682, "step": 8109 }, { "epoch": 0.10538575115773265, "grad_norm": 0.4328581690788269, "learning_rate": 0.00017895995528925514, "loss": 1.2462, "step": 8110 }, { "epoch": 0.10539874570164852, "grad_norm": 0.4267323613166809, "learning_rate": 0.00017895735582734373, "loss": 1.3571, "step": 8111 }, { "epoch": 0.10541174024556439, "grad_norm": 0.38363146781921387, "learning_rate": 0.00017895475636543238, "loss": 1.2698, "step": 8112 }, { "epoch": 0.10542473478948026, "grad_norm": 0.35397276282310486, "learning_rate": 0.00017895215690352098, "loss": 1.3735, "step": 8113 }, { "epoch": 0.10543772933339614, "grad_norm": 0.42066776752471924, "learning_rate": 0.0001789495574416096, "loss": 1.4774, "step": 8114 }, { "epoch": 0.10545072387731201, "grad_norm": 0.372273325920105, "learning_rate": 0.0001789469579796982, "loss": 1.4501, "step": 8115 }, { "epoch": 0.10546371842122788, "grad_norm": 0.3861474394798279, "learning_rate": 0.00017894435851778683, "loss": 1.5177, "step": 8116 }, { "epoch": 0.10547671296514376, "grad_norm": 0.3844112455844879, "learning_rate": 0.00017894175905587545, "loss": 1.4317, "step": 8117 }, { "epoch": 0.10548970750905963, "grad_norm": 0.3366958498954773, "learning_rate": 0.00017893915959396405, "loss": 1.1885, "step": 8118 }, { "epoch": 0.1055027020529755, "grad_norm": 0.42144110798835754, "learning_rate": 0.00017893656013205267, "loss": 1.5554, "step": 8119 }, { "epoch": 0.10551569659689138, "grad_norm": 0.4657565951347351, "learning_rate": 0.0001789339606701413, "loss": 1.599, "step": 8120 }, { "epoch": 0.10552869114080725, "grad_norm": 0.2501119375228882, "learning_rate": 0.0001789313612082299, "loss": 1.2468, "step": 8121 }, { "epoch": 0.10554168568472312, "grad_norm": 0.4707627296447754, "learning_rate": 0.00017892876174631852, "loss": 1.4058, "step": 8122 }, { "epoch": 0.105554680228639, "grad_norm": 0.4245232343673706, "learning_rate": 0.00017892616228440712, "loss": 1.4248, "step": 8123 }, { "epoch": 0.10556767477255487, "grad_norm": 0.3967687785625458, "learning_rate": 0.00017892356282249577, "loss": 1.5913, "step": 8124 }, { "epoch": 0.10558066931647074, "grad_norm": 0.37496134638786316, "learning_rate": 0.00017892096336058437, "loss": 1.4844, "step": 8125 }, { "epoch": 0.10559366386038661, "grad_norm": 0.38550230860710144, "learning_rate": 0.000178918363898673, "loss": 1.6091, "step": 8126 }, { "epoch": 0.1056066584043025, "grad_norm": 0.39407652616500854, "learning_rate": 0.0001789157644367616, "loss": 1.3621, "step": 8127 }, { "epoch": 0.10561965294821837, "grad_norm": 0.433112770318985, "learning_rate": 0.00017891316497485021, "loss": 1.4456, "step": 8128 }, { "epoch": 0.10563264749213425, "grad_norm": 0.44419875741004944, "learning_rate": 0.00017891056551293884, "loss": 1.3648, "step": 8129 }, { "epoch": 0.10564564203605012, "grad_norm": 0.38489314913749695, "learning_rate": 0.00017890796605102744, "loss": 1.2905, "step": 8130 }, { "epoch": 0.10565863657996599, "grad_norm": 0.33085474371910095, "learning_rate": 0.00017890536658911606, "loss": 1.3691, "step": 8131 }, { "epoch": 0.10567163112388187, "grad_norm": 0.4659140408039093, "learning_rate": 0.00017890276712720468, "loss": 1.5332, "step": 8132 }, { "epoch": 0.10568462566779774, "grad_norm": 0.3782356381416321, "learning_rate": 0.00017890016766529328, "loss": 1.5257, "step": 8133 }, { "epoch": 0.10569762021171361, "grad_norm": 0.4764174520969391, "learning_rate": 0.0001788975682033819, "loss": 1.5432, "step": 8134 }, { "epoch": 0.10571061475562948, "grad_norm": 0.30911892652511597, "learning_rate": 0.0001788949687414705, "loss": 1.3439, "step": 8135 }, { "epoch": 0.10572360929954536, "grad_norm": 0.43696072697639465, "learning_rate": 0.00017889236927955916, "loss": 1.2969, "step": 8136 }, { "epoch": 0.10573660384346123, "grad_norm": 0.3797033131122589, "learning_rate": 0.00017888976981764775, "loss": 1.5047, "step": 8137 }, { "epoch": 0.1057495983873771, "grad_norm": 0.3823290169239044, "learning_rate": 0.00017888717035573638, "loss": 1.4263, "step": 8138 }, { "epoch": 0.10576259293129298, "grad_norm": 0.4489169120788574, "learning_rate": 0.00017888457089382497, "loss": 1.5657, "step": 8139 }, { "epoch": 0.10577558747520885, "grad_norm": 0.3395143449306488, "learning_rate": 0.0001788819714319136, "loss": 1.5834, "step": 8140 }, { "epoch": 0.10578858201912472, "grad_norm": 0.34869250655174255, "learning_rate": 0.00017887937197000222, "loss": 1.272, "step": 8141 }, { "epoch": 0.1058015765630406, "grad_norm": 0.44896647334098816, "learning_rate": 0.00017887677250809082, "loss": 1.3581, "step": 8142 }, { "epoch": 0.10581457110695647, "grad_norm": 0.7160615921020508, "learning_rate": 0.00017887417304617945, "loss": 1.663, "step": 8143 }, { "epoch": 0.10582756565087234, "grad_norm": 0.38803112506866455, "learning_rate": 0.00017887157358426807, "loss": 1.4343, "step": 8144 }, { "epoch": 0.10584056019478821, "grad_norm": 0.31806567311286926, "learning_rate": 0.00017886897412235667, "loss": 1.3541, "step": 8145 }, { "epoch": 0.10585355473870409, "grad_norm": 0.331630140542984, "learning_rate": 0.0001788663746604453, "loss": 1.1125, "step": 8146 }, { "epoch": 0.10586654928261996, "grad_norm": 0.4458574950695038, "learning_rate": 0.00017886377519853392, "loss": 1.4972, "step": 8147 }, { "epoch": 0.10587954382653583, "grad_norm": 0.31424760818481445, "learning_rate": 0.00017886117573662254, "loss": 1.159, "step": 8148 }, { "epoch": 0.1058925383704517, "grad_norm": 0.43325430154800415, "learning_rate": 0.00017885857627471114, "loss": 1.4445, "step": 8149 }, { "epoch": 0.10590553291436758, "grad_norm": 0.37639760971069336, "learning_rate": 0.00017885597681279976, "loss": 1.5666, "step": 8150 }, { "epoch": 0.10591852745828345, "grad_norm": 0.38434675335884094, "learning_rate": 0.0001788533773508884, "loss": 1.374, "step": 8151 }, { "epoch": 0.10593152200219932, "grad_norm": 0.34173765778541565, "learning_rate": 0.00017885077788897698, "loss": 1.2533, "step": 8152 }, { "epoch": 0.1059445165461152, "grad_norm": 0.3970118761062622, "learning_rate": 0.0001788481784270656, "loss": 1.5193, "step": 8153 }, { "epoch": 0.10595751109003107, "grad_norm": 0.43598806858062744, "learning_rate": 0.0001788455789651542, "loss": 1.4516, "step": 8154 }, { "epoch": 0.10597050563394694, "grad_norm": 0.3351729214191437, "learning_rate": 0.00017884297950324286, "loss": 1.2827, "step": 8155 }, { "epoch": 0.10598350017786282, "grad_norm": 0.43314504623413086, "learning_rate": 0.00017884038004133146, "loss": 1.4287, "step": 8156 }, { "epoch": 0.10599649472177869, "grad_norm": 0.506985604763031, "learning_rate": 0.00017883778057942008, "loss": 1.467, "step": 8157 }, { "epoch": 0.10600948926569456, "grad_norm": 0.4688164293766022, "learning_rate": 0.00017883518111750868, "loss": 1.3713, "step": 8158 }, { "epoch": 0.10602248380961043, "grad_norm": 0.3287079930305481, "learning_rate": 0.0001788325816555973, "loss": 1.2446, "step": 8159 }, { "epoch": 0.10603547835352631, "grad_norm": 0.4234907031059265, "learning_rate": 0.00017882998219368593, "loss": 1.4933, "step": 8160 }, { "epoch": 0.10604847289744218, "grad_norm": 0.44690683484077454, "learning_rate": 0.00017882738273177452, "loss": 1.5702, "step": 8161 }, { "epoch": 0.10606146744135805, "grad_norm": 0.44422945380210876, "learning_rate": 0.00017882478326986315, "loss": 1.6069, "step": 8162 }, { "epoch": 0.10607446198527393, "grad_norm": 0.26249459385871887, "learning_rate": 0.00017882218380795177, "loss": 1.4724, "step": 8163 }, { "epoch": 0.1060874565291898, "grad_norm": 0.4622667729854584, "learning_rate": 0.00017881958434604037, "loss": 1.4932, "step": 8164 }, { "epoch": 0.10610045107310569, "grad_norm": 0.44710975885391235, "learning_rate": 0.000178816984884129, "loss": 1.386, "step": 8165 }, { "epoch": 0.10611344561702156, "grad_norm": 0.3701595664024353, "learning_rate": 0.0001788143854222176, "loss": 1.4438, "step": 8166 }, { "epoch": 0.10612644016093743, "grad_norm": 0.19898907840251923, "learning_rate": 0.00017881178596030624, "loss": 1.2453, "step": 8167 }, { "epoch": 0.1061394347048533, "grad_norm": 0.3883827030658722, "learning_rate": 0.00017880918649839484, "loss": 1.5868, "step": 8168 }, { "epoch": 0.10615242924876918, "grad_norm": 0.4100607633590698, "learning_rate": 0.00017880658703648347, "loss": 1.5252, "step": 8169 }, { "epoch": 0.10616542379268505, "grad_norm": 0.4724958539009094, "learning_rate": 0.00017880398757457206, "loss": 1.4307, "step": 8170 }, { "epoch": 0.10617841833660092, "grad_norm": 0.5259565114974976, "learning_rate": 0.0001788013881126607, "loss": 1.3125, "step": 8171 }, { "epoch": 0.1061914128805168, "grad_norm": 0.4943244457244873, "learning_rate": 0.0001787987886507493, "loss": 1.5503, "step": 8172 }, { "epoch": 0.10620440742443267, "grad_norm": 0.29483816027641296, "learning_rate": 0.0001787961891888379, "loss": 1.3411, "step": 8173 }, { "epoch": 0.10621740196834854, "grad_norm": 0.32326146960258484, "learning_rate": 0.00017879358972692653, "loss": 1.4104, "step": 8174 }, { "epoch": 0.10623039651226442, "grad_norm": 0.33578529953956604, "learning_rate": 0.00017879099026501516, "loss": 1.5157, "step": 8175 }, { "epoch": 0.10624339105618029, "grad_norm": 0.42963072657585144, "learning_rate": 0.00017878839080310376, "loss": 1.3957, "step": 8176 }, { "epoch": 0.10625638560009616, "grad_norm": 0.434430330991745, "learning_rate": 0.00017878579134119238, "loss": 1.5243, "step": 8177 }, { "epoch": 0.10626938014401204, "grad_norm": 0.3892306983470917, "learning_rate": 0.00017878319187928098, "loss": 1.5756, "step": 8178 }, { "epoch": 0.10628237468792791, "grad_norm": 0.400583952665329, "learning_rate": 0.00017878059241736963, "loss": 1.4903, "step": 8179 }, { "epoch": 0.10629536923184378, "grad_norm": 0.3598881959915161, "learning_rate": 0.00017877799295545823, "loss": 1.6074, "step": 8180 }, { "epoch": 0.10630836377575965, "grad_norm": 0.4258742332458496, "learning_rate": 0.00017877539349354685, "loss": 1.1456, "step": 8181 }, { "epoch": 0.10632135831967553, "grad_norm": 0.4188748002052307, "learning_rate": 0.00017877279403163548, "loss": 1.5721, "step": 8182 }, { "epoch": 0.1063343528635914, "grad_norm": 0.3962958753108978, "learning_rate": 0.00017877019456972407, "loss": 1.7283, "step": 8183 }, { "epoch": 0.10634734740750727, "grad_norm": 0.3618302345275879, "learning_rate": 0.0001787675951078127, "loss": 1.5692, "step": 8184 }, { "epoch": 0.10636034195142315, "grad_norm": 0.3765220642089844, "learning_rate": 0.0001787649956459013, "loss": 1.4312, "step": 8185 }, { "epoch": 0.10637333649533902, "grad_norm": 0.43553921580314636, "learning_rate": 0.00017876239618398995, "loss": 1.4953, "step": 8186 }, { "epoch": 0.10638633103925489, "grad_norm": 0.3680141270160675, "learning_rate": 0.00017875979672207854, "loss": 1.5256, "step": 8187 }, { "epoch": 0.10639932558317076, "grad_norm": 0.34125861525535583, "learning_rate": 0.00017875719726016714, "loss": 1.3869, "step": 8188 }, { "epoch": 0.10641232012708664, "grad_norm": 0.3506383001804352, "learning_rate": 0.00017875459779825577, "loss": 1.6094, "step": 8189 }, { "epoch": 0.10642531467100251, "grad_norm": 0.31801915168762207, "learning_rate": 0.0001787519983363444, "loss": 1.5161, "step": 8190 }, { "epoch": 0.10643830921491838, "grad_norm": 0.283669650554657, "learning_rate": 0.00017874939887443301, "loss": 1.3582, "step": 8191 }, { "epoch": 0.10645130375883426, "grad_norm": 0.3953026235103607, "learning_rate": 0.0001787467994125216, "loss": 1.4716, "step": 8192 }, { "epoch": 0.10646429830275013, "grad_norm": 0.4820462763309479, "learning_rate": 0.00017874419995061024, "loss": 1.5582, "step": 8193 }, { "epoch": 0.106477292846666, "grad_norm": 0.500321090221405, "learning_rate": 0.00017874160048869886, "loss": 1.5507, "step": 8194 }, { "epoch": 0.10649028739058188, "grad_norm": 0.25349944829940796, "learning_rate": 0.00017873900102678746, "loss": 1.1298, "step": 8195 }, { "epoch": 0.10650328193449775, "grad_norm": 0.32448187470436096, "learning_rate": 0.00017873640156487608, "loss": 1.4198, "step": 8196 }, { "epoch": 0.10651627647841362, "grad_norm": 0.5821177959442139, "learning_rate": 0.00017873380210296468, "loss": 1.3302, "step": 8197 }, { "epoch": 0.1065292710223295, "grad_norm": 0.3544238209724426, "learning_rate": 0.00017873120264105333, "loss": 1.4634, "step": 8198 }, { "epoch": 0.10654226556624537, "grad_norm": 0.3090178966522217, "learning_rate": 0.00017872860317914193, "loss": 1.2475, "step": 8199 }, { "epoch": 0.10655526011016124, "grad_norm": 0.39294150471687317, "learning_rate": 0.00017872600371723053, "loss": 1.5182, "step": 8200 }, { "epoch": 0.10656825465407711, "grad_norm": 0.4525615870952606, "learning_rate": 0.00017872340425531915, "loss": 1.4657, "step": 8201 }, { "epoch": 0.10658124919799299, "grad_norm": 0.3070647120475769, "learning_rate": 0.00017872080479340778, "loss": 1.3586, "step": 8202 }, { "epoch": 0.10659424374190887, "grad_norm": 0.42253831028938293, "learning_rate": 0.0001787182053314964, "loss": 1.7267, "step": 8203 }, { "epoch": 0.10660723828582475, "grad_norm": 0.4536573886871338, "learning_rate": 0.000178715605869585, "loss": 1.4703, "step": 8204 }, { "epoch": 0.10662023282974062, "grad_norm": 0.41535767912864685, "learning_rate": 0.00017871300640767362, "loss": 1.4469, "step": 8205 }, { "epoch": 0.10663322737365649, "grad_norm": 0.40323224663734436, "learning_rate": 0.00017871040694576225, "loss": 1.5483, "step": 8206 }, { "epoch": 0.10664622191757236, "grad_norm": 0.39425545930862427, "learning_rate": 0.00017870780748385084, "loss": 1.5584, "step": 8207 }, { "epoch": 0.10665921646148824, "grad_norm": 0.33285897970199585, "learning_rate": 0.00017870520802193947, "loss": 1.5746, "step": 8208 }, { "epoch": 0.10667221100540411, "grad_norm": 0.46968698501586914, "learning_rate": 0.00017870260856002807, "loss": 1.5728, "step": 8209 }, { "epoch": 0.10668520554931998, "grad_norm": 0.4709252119064331, "learning_rate": 0.00017870000909811672, "loss": 1.5017, "step": 8210 }, { "epoch": 0.10669820009323586, "grad_norm": 0.3548968732357025, "learning_rate": 0.00017869740963620531, "loss": 1.194, "step": 8211 }, { "epoch": 0.10671119463715173, "grad_norm": 0.2755083441734314, "learning_rate": 0.00017869481017429394, "loss": 1.3951, "step": 8212 }, { "epoch": 0.1067241891810676, "grad_norm": 0.43299031257629395, "learning_rate": 0.00017869221071238254, "loss": 1.7226, "step": 8213 }, { "epoch": 0.10673718372498348, "grad_norm": 0.4304381310939789, "learning_rate": 0.00017868961125047116, "loss": 1.5715, "step": 8214 }, { "epoch": 0.10675017826889935, "grad_norm": 0.4301910996437073, "learning_rate": 0.00017868701178855978, "loss": 1.3712, "step": 8215 }, { "epoch": 0.10676317281281522, "grad_norm": 0.4068301320075989, "learning_rate": 0.00017868441232664838, "loss": 1.2109, "step": 8216 }, { "epoch": 0.1067761673567311, "grad_norm": 0.3920714259147644, "learning_rate": 0.000178681812864737, "loss": 1.3769, "step": 8217 }, { "epoch": 0.10678916190064697, "grad_norm": 0.43683186173439026, "learning_rate": 0.00017867921340282563, "loss": 1.5601, "step": 8218 }, { "epoch": 0.10680215644456284, "grad_norm": 0.414109468460083, "learning_rate": 0.00017867661394091423, "loss": 1.7161, "step": 8219 }, { "epoch": 0.10681515098847871, "grad_norm": 0.44408103823661804, "learning_rate": 0.00017867401447900285, "loss": 1.4109, "step": 8220 }, { "epoch": 0.10682814553239459, "grad_norm": 0.34131303429603577, "learning_rate": 0.00017867141501709148, "loss": 1.3086, "step": 8221 }, { "epoch": 0.10684114007631046, "grad_norm": 0.35469838976860046, "learning_rate": 0.0001786688155551801, "loss": 1.1541, "step": 8222 }, { "epoch": 0.10685413462022633, "grad_norm": 0.41554033756256104, "learning_rate": 0.0001786662160932687, "loss": 1.47, "step": 8223 }, { "epoch": 0.1068671291641422, "grad_norm": 0.4296305179595947, "learning_rate": 0.00017866361663135732, "loss": 1.2554, "step": 8224 }, { "epoch": 0.10688012370805808, "grad_norm": 0.4148561358451843, "learning_rate": 0.00017866101716944595, "loss": 1.5861, "step": 8225 }, { "epoch": 0.10689311825197395, "grad_norm": 0.5389063954353333, "learning_rate": 0.00017865841770753455, "loss": 1.5225, "step": 8226 }, { "epoch": 0.10690611279588982, "grad_norm": 0.4192093312740326, "learning_rate": 0.00017865581824562317, "loss": 1.3588, "step": 8227 }, { "epoch": 0.1069191073398057, "grad_norm": 0.36082568764686584, "learning_rate": 0.00017865321878371177, "loss": 1.5087, "step": 8228 }, { "epoch": 0.10693210188372157, "grad_norm": 0.3729625940322876, "learning_rate": 0.0001786506193218004, "loss": 1.275, "step": 8229 }, { "epoch": 0.10694509642763744, "grad_norm": 0.3628804385662079, "learning_rate": 0.00017864801985988902, "loss": 1.3786, "step": 8230 }, { "epoch": 0.10695809097155332, "grad_norm": 0.443193644285202, "learning_rate": 0.00017864542039797761, "loss": 1.5793, "step": 8231 }, { "epoch": 0.10697108551546919, "grad_norm": 0.4084622263908386, "learning_rate": 0.00017864282093606624, "loss": 1.6569, "step": 8232 }, { "epoch": 0.10698408005938506, "grad_norm": 0.3738953471183777, "learning_rate": 0.00017864022147415486, "loss": 1.2883, "step": 8233 }, { "epoch": 0.10699707460330093, "grad_norm": 0.3393458127975464, "learning_rate": 0.0001786376220122435, "loss": 1.2927, "step": 8234 }, { "epoch": 0.10701006914721681, "grad_norm": 0.5116984248161316, "learning_rate": 0.00017863502255033208, "loss": 1.6635, "step": 8235 }, { "epoch": 0.10702306369113268, "grad_norm": 0.3769250214099884, "learning_rate": 0.0001786324230884207, "loss": 1.361, "step": 8236 }, { "epoch": 0.10703605823504855, "grad_norm": 0.39683759212493896, "learning_rate": 0.00017862982362650933, "loss": 1.4072, "step": 8237 }, { "epoch": 0.10704905277896443, "grad_norm": 0.40951600670814514, "learning_rate": 0.00017862722416459793, "loss": 1.4581, "step": 8238 }, { "epoch": 0.1070620473228803, "grad_norm": 0.4117775559425354, "learning_rate": 0.00017862462470268656, "loss": 1.5397, "step": 8239 }, { "epoch": 0.10707504186679617, "grad_norm": 0.37748417258262634, "learning_rate": 0.00017862202524077515, "loss": 1.3814, "step": 8240 }, { "epoch": 0.10708803641071206, "grad_norm": 0.381867378950119, "learning_rate": 0.0001786194257788638, "loss": 1.3362, "step": 8241 }, { "epoch": 0.10710103095462793, "grad_norm": 0.40221303701400757, "learning_rate": 0.0001786168263169524, "loss": 1.4331, "step": 8242 }, { "epoch": 0.1071140254985438, "grad_norm": 0.3956340551376343, "learning_rate": 0.000178614226855041, "loss": 1.4068, "step": 8243 }, { "epoch": 0.10712702004245968, "grad_norm": 0.43356987833976746, "learning_rate": 0.00017861162739312962, "loss": 1.411, "step": 8244 }, { "epoch": 0.10714001458637555, "grad_norm": 0.3969554901123047, "learning_rate": 0.00017860902793121825, "loss": 1.5321, "step": 8245 }, { "epoch": 0.10715300913029142, "grad_norm": 0.39121213555336, "learning_rate": 0.00017860642846930687, "loss": 1.4016, "step": 8246 }, { "epoch": 0.1071660036742073, "grad_norm": 0.3624888062477112, "learning_rate": 0.00017860382900739547, "loss": 1.3461, "step": 8247 }, { "epoch": 0.10717899821812317, "grad_norm": 0.3919220268726349, "learning_rate": 0.0001786012295454841, "loss": 1.4161, "step": 8248 }, { "epoch": 0.10719199276203904, "grad_norm": 0.4599630832672119, "learning_rate": 0.00017859863008357272, "loss": 1.4957, "step": 8249 }, { "epoch": 0.10720498730595492, "grad_norm": 0.3882891833782196, "learning_rate": 0.00017859603062166132, "loss": 1.4307, "step": 8250 }, { "epoch": 0.10721798184987079, "grad_norm": 0.31193241477012634, "learning_rate": 0.00017859343115974994, "loss": 1.2459, "step": 8251 }, { "epoch": 0.10723097639378666, "grad_norm": 0.3721153140068054, "learning_rate": 0.00017859083169783854, "loss": 1.4672, "step": 8252 }, { "epoch": 0.10724397093770253, "grad_norm": 0.43315741419792175, "learning_rate": 0.0001785882322359272, "loss": 1.5726, "step": 8253 }, { "epoch": 0.10725696548161841, "grad_norm": 0.38219547271728516, "learning_rate": 0.0001785856327740158, "loss": 1.5057, "step": 8254 }, { "epoch": 0.10726996002553428, "grad_norm": 0.3723348379135132, "learning_rate": 0.00017858303331210438, "loss": 1.3849, "step": 8255 }, { "epoch": 0.10728295456945015, "grad_norm": 0.3644058406352997, "learning_rate": 0.00017858043385019304, "loss": 1.2639, "step": 8256 }, { "epoch": 0.10729594911336603, "grad_norm": 0.4135352373123169, "learning_rate": 0.00017857783438828163, "loss": 1.624, "step": 8257 }, { "epoch": 0.1073089436572819, "grad_norm": 0.39943966269493103, "learning_rate": 0.00017857523492637026, "loss": 1.5365, "step": 8258 }, { "epoch": 0.10732193820119777, "grad_norm": 0.49921396374702454, "learning_rate": 0.00017857263546445886, "loss": 1.601, "step": 8259 }, { "epoch": 0.10733493274511365, "grad_norm": 0.34018537402153015, "learning_rate": 0.00017857003600254748, "loss": 1.5129, "step": 8260 }, { "epoch": 0.10734792728902952, "grad_norm": 0.33740413188934326, "learning_rate": 0.0001785674365406361, "loss": 1.1765, "step": 8261 }, { "epoch": 0.10736092183294539, "grad_norm": 0.3108792006969452, "learning_rate": 0.0001785648370787247, "loss": 1.1707, "step": 8262 }, { "epoch": 0.10737391637686126, "grad_norm": 0.39253249764442444, "learning_rate": 0.00017856223761681333, "loss": 1.5172, "step": 8263 }, { "epoch": 0.10738691092077714, "grad_norm": 0.34953510761260986, "learning_rate": 0.00017855963815490195, "loss": 1.3751, "step": 8264 }, { "epoch": 0.10739990546469301, "grad_norm": 0.3397866487503052, "learning_rate": 0.00017855703869299058, "loss": 1.2223, "step": 8265 }, { "epoch": 0.10741290000860888, "grad_norm": 0.34169650077819824, "learning_rate": 0.00017855443923107917, "loss": 1.365, "step": 8266 }, { "epoch": 0.10742589455252476, "grad_norm": 0.492722749710083, "learning_rate": 0.00017855183976916777, "loss": 1.4904, "step": 8267 }, { "epoch": 0.10743888909644063, "grad_norm": 0.3122013807296753, "learning_rate": 0.00017854924030725642, "loss": 1.241, "step": 8268 }, { "epoch": 0.1074518836403565, "grad_norm": 0.36099573969841003, "learning_rate": 0.00017854664084534502, "loss": 1.6654, "step": 8269 }, { "epoch": 0.10746487818427237, "grad_norm": 0.3124205470085144, "learning_rate": 0.00017854404138343364, "loss": 1.3006, "step": 8270 }, { "epoch": 0.10747787272818825, "grad_norm": 0.4837087392807007, "learning_rate": 0.00017854144192152224, "loss": 1.4524, "step": 8271 }, { "epoch": 0.10749086727210412, "grad_norm": 0.38991719484329224, "learning_rate": 0.00017853884245961087, "loss": 1.3326, "step": 8272 }, { "epoch": 0.10750386181602, "grad_norm": 0.41984185576438904, "learning_rate": 0.0001785362429976995, "loss": 1.5051, "step": 8273 }, { "epoch": 0.10751685635993587, "grad_norm": 0.34884050488471985, "learning_rate": 0.0001785336435357881, "loss": 1.3276, "step": 8274 }, { "epoch": 0.10752985090385174, "grad_norm": 0.44545289874076843, "learning_rate": 0.0001785310440738767, "loss": 1.2967, "step": 8275 }, { "epoch": 0.10754284544776761, "grad_norm": 0.45583096146583557, "learning_rate": 0.00017852844461196534, "loss": 1.5243, "step": 8276 }, { "epoch": 0.10755583999168349, "grad_norm": 0.3560146391391754, "learning_rate": 0.00017852584515005396, "loss": 1.4024, "step": 8277 }, { "epoch": 0.10756883453559936, "grad_norm": 0.42076122760772705, "learning_rate": 0.00017852324568814256, "loss": 1.36, "step": 8278 }, { "epoch": 0.10758182907951525, "grad_norm": 0.39722198247909546, "learning_rate": 0.00017852064622623118, "loss": 1.4957, "step": 8279 }, { "epoch": 0.10759482362343112, "grad_norm": 0.4036417603492737, "learning_rate": 0.0001785180467643198, "loss": 1.5451, "step": 8280 }, { "epoch": 0.10760781816734699, "grad_norm": 0.4436379373073578, "learning_rate": 0.0001785154473024084, "loss": 1.361, "step": 8281 }, { "epoch": 0.10762081271126286, "grad_norm": 0.5275803804397583, "learning_rate": 0.00017851284784049703, "loss": 1.4568, "step": 8282 }, { "epoch": 0.10763380725517874, "grad_norm": 0.5393362045288086, "learning_rate": 0.00017851024837858563, "loss": 1.5041, "step": 8283 }, { "epoch": 0.10764680179909461, "grad_norm": 0.3804057538509369, "learning_rate": 0.00017850764891667425, "loss": 1.3058, "step": 8284 }, { "epoch": 0.10765979634301048, "grad_norm": 0.5642374157905579, "learning_rate": 0.00017850504945476288, "loss": 1.4152, "step": 8285 }, { "epoch": 0.10767279088692636, "grad_norm": 0.3726355731487274, "learning_rate": 0.00017850244999285147, "loss": 1.6072, "step": 8286 }, { "epoch": 0.10768578543084223, "grad_norm": 0.4349422752857208, "learning_rate": 0.0001784998505309401, "loss": 1.5623, "step": 8287 }, { "epoch": 0.1076987799747581, "grad_norm": 0.39895492792129517, "learning_rate": 0.00017849725106902872, "loss": 1.4752, "step": 8288 }, { "epoch": 0.10771177451867398, "grad_norm": 0.2515937089920044, "learning_rate": 0.00017849465160711735, "loss": 1.1081, "step": 8289 }, { "epoch": 0.10772476906258985, "grad_norm": 0.4791084825992584, "learning_rate": 0.00017849205214520594, "loss": 1.7281, "step": 8290 }, { "epoch": 0.10773776360650572, "grad_norm": 0.5024106502532959, "learning_rate": 0.00017848945268329457, "loss": 1.3555, "step": 8291 }, { "epoch": 0.1077507581504216, "grad_norm": 0.5199039578437805, "learning_rate": 0.0001784868532213832, "loss": 1.5712, "step": 8292 }, { "epoch": 0.10776375269433747, "grad_norm": 0.34852391481399536, "learning_rate": 0.0001784842537594718, "loss": 1.292, "step": 8293 }, { "epoch": 0.10777674723825334, "grad_norm": 0.4339490234851837, "learning_rate": 0.00017848165429756041, "loss": 1.4883, "step": 8294 }, { "epoch": 0.10778974178216921, "grad_norm": 0.5038226246833801, "learning_rate": 0.00017847905483564904, "loss": 1.5377, "step": 8295 }, { "epoch": 0.10780273632608509, "grad_norm": 0.3878563940525055, "learning_rate": 0.00017847645537373766, "loss": 1.4318, "step": 8296 }, { "epoch": 0.10781573087000096, "grad_norm": 0.2645588517189026, "learning_rate": 0.00017847385591182626, "loss": 1.4389, "step": 8297 }, { "epoch": 0.10782872541391683, "grad_norm": 0.44635987281799316, "learning_rate": 0.00017847125644991486, "loss": 1.5234, "step": 8298 }, { "epoch": 0.1078417199578327, "grad_norm": 0.4055132567882538, "learning_rate": 0.0001784686569880035, "loss": 1.352, "step": 8299 }, { "epoch": 0.10785471450174858, "grad_norm": 0.3837491571903229, "learning_rate": 0.0001784660575260921, "loss": 1.7072, "step": 8300 }, { "epoch": 0.10786770904566445, "grad_norm": 0.47413310408592224, "learning_rate": 0.00017846345806418073, "loss": 1.5348, "step": 8301 }, { "epoch": 0.10788070358958032, "grad_norm": 0.36519861221313477, "learning_rate": 0.00017846085860226933, "loss": 1.4536, "step": 8302 }, { "epoch": 0.1078936981334962, "grad_norm": 0.3981136679649353, "learning_rate": 0.00017845825914035795, "loss": 1.3759, "step": 8303 }, { "epoch": 0.10790669267741207, "grad_norm": 0.3445344865322113, "learning_rate": 0.00017845565967844658, "loss": 1.556, "step": 8304 }, { "epoch": 0.10791968722132794, "grad_norm": 0.40029338002204895, "learning_rate": 0.00017845306021653518, "loss": 1.6734, "step": 8305 }, { "epoch": 0.10793268176524382, "grad_norm": 0.45845022797584534, "learning_rate": 0.0001784504607546238, "loss": 1.3827, "step": 8306 }, { "epoch": 0.10794567630915969, "grad_norm": 0.41059330105781555, "learning_rate": 0.00017844786129271242, "loss": 1.4094, "step": 8307 }, { "epoch": 0.10795867085307556, "grad_norm": 0.45285895466804504, "learning_rate": 0.00017844526183080105, "loss": 1.414, "step": 8308 }, { "epoch": 0.10797166539699143, "grad_norm": 0.3477665185928345, "learning_rate": 0.00017844266236888965, "loss": 1.4796, "step": 8309 }, { "epoch": 0.10798465994090731, "grad_norm": 0.2362794131040573, "learning_rate": 0.00017844006290697824, "loss": 1.1584, "step": 8310 }, { "epoch": 0.10799765448482318, "grad_norm": 0.38085436820983887, "learning_rate": 0.0001784374634450669, "loss": 1.186, "step": 8311 }, { "epoch": 0.10801064902873905, "grad_norm": 0.38006147742271423, "learning_rate": 0.0001784348639831555, "loss": 1.4155, "step": 8312 }, { "epoch": 0.10802364357265493, "grad_norm": 0.3679640591144562, "learning_rate": 0.00017843226452124412, "loss": 1.4904, "step": 8313 }, { "epoch": 0.1080366381165708, "grad_norm": 0.46301308274269104, "learning_rate": 0.00017842966505933271, "loss": 1.588, "step": 8314 }, { "epoch": 0.10804963266048667, "grad_norm": 0.35534656047821045, "learning_rate": 0.00017842706559742134, "loss": 1.2514, "step": 8315 }, { "epoch": 0.10806262720440254, "grad_norm": 0.3737853169441223, "learning_rate": 0.00017842446613550996, "loss": 1.2733, "step": 8316 }, { "epoch": 0.10807562174831843, "grad_norm": 0.4065062701702118, "learning_rate": 0.00017842186667359856, "loss": 1.3768, "step": 8317 }, { "epoch": 0.1080886162922343, "grad_norm": 0.42382216453552246, "learning_rate": 0.00017841926721168719, "loss": 1.5366, "step": 8318 }, { "epoch": 0.10810161083615018, "grad_norm": 0.3473309576511383, "learning_rate": 0.0001784166677497758, "loss": 1.3417, "step": 8319 }, { "epoch": 0.10811460538006605, "grad_norm": 0.388875275850296, "learning_rate": 0.00017841406828786443, "loss": 1.5457, "step": 8320 }, { "epoch": 0.10812759992398192, "grad_norm": 0.5575242638587952, "learning_rate": 0.00017841146882595303, "loss": 1.4715, "step": 8321 }, { "epoch": 0.1081405944678978, "grad_norm": 0.43632930517196655, "learning_rate": 0.00017840886936404163, "loss": 1.4983, "step": 8322 }, { "epoch": 0.10815358901181367, "grad_norm": 0.5354071259498596, "learning_rate": 0.00017840626990213028, "loss": 1.444, "step": 8323 }, { "epoch": 0.10816658355572954, "grad_norm": 0.4031495749950409, "learning_rate": 0.00017840367044021888, "loss": 1.4035, "step": 8324 }, { "epoch": 0.10817957809964542, "grad_norm": 0.355292946100235, "learning_rate": 0.0001784010709783075, "loss": 1.4213, "step": 8325 }, { "epoch": 0.10819257264356129, "grad_norm": 0.44800370931625366, "learning_rate": 0.0001783984715163961, "loss": 1.3318, "step": 8326 }, { "epoch": 0.10820556718747716, "grad_norm": 0.3072616457939148, "learning_rate": 0.00017839587205448472, "loss": 1.3076, "step": 8327 }, { "epoch": 0.10821856173139303, "grad_norm": 0.30312517285346985, "learning_rate": 0.00017839327259257335, "loss": 1.4326, "step": 8328 }, { "epoch": 0.10823155627530891, "grad_norm": 0.4308004081249237, "learning_rate": 0.00017839067313066195, "loss": 1.4755, "step": 8329 }, { "epoch": 0.10824455081922478, "grad_norm": 0.40817609429359436, "learning_rate": 0.0001783880736687506, "loss": 1.3326, "step": 8330 }, { "epoch": 0.10825754536314065, "grad_norm": 0.3512544631958008, "learning_rate": 0.0001783854742068392, "loss": 1.4777, "step": 8331 }, { "epoch": 0.10827053990705653, "grad_norm": 0.4642842710018158, "learning_rate": 0.00017838287474492782, "loss": 1.4219, "step": 8332 }, { "epoch": 0.1082835344509724, "grad_norm": 0.2809307277202606, "learning_rate": 0.00017838027528301642, "loss": 1.0385, "step": 8333 }, { "epoch": 0.10829652899488827, "grad_norm": 0.44858652353286743, "learning_rate": 0.00017837767582110504, "loss": 1.4492, "step": 8334 }, { "epoch": 0.10830952353880414, "grad_norm": 0.3510497808456421, "learning_rate": 0.00017837507635919367, "loss": 1.5068, "step": 8335 }, { "epoch": 0.10832251808272002, "grad_norm": 0.35914674401283264, "learning_rate": 0.00017837247689728226, "loss": 1.3606, "step": 8336 }, { "epoch": 0.10833551262663589, "grad_norm": 0.3239414095878601, "learning_rate": 0.0001783698774353709, "loss": 1.2075, "step": 8337 }, { "epoch": 0.10834850717055176, "grad_norm": 0.45280778408050537, "learning_rate": 0.0001783672779734595, "loss": 1.4402, "step": 8338 }, { "epoch": 0.10836150171446764, "grad_norm": 0.41107428073883057, "learning_rate": 0.0001783646785115481, "loss": 1.4391, "step": 8339 }, { "epoch": 0.10837449625838351, "grad_norm": 0.47195643186569214, "learning_rate": 0.00017836207904963673, "loss": 1.2798, "step": 8340 }, { "epoch": 0.10838749080229938, "grad_norm": 0.3820756673812866, "learning_rate": 0.00017835947958772533, "loss": 1.2492, "step": 8341 }, { "epoch": 0.10840048534621526, "grad_norm": 0.3791525363922119, "learning_rate": 0.00017835688012581398, "loss": 1.3166, "step": 8342 }, { "epoch": 0.10841347989013113, "grad_norm": 0.3632073700428009, "learning_rate": 0.00017835428066390258, "loss": 1.308, "step": 8343 }, { "epoch": 0.108426474434047, "grad_norm": 0.40637534856796265, "learning_rate": 0.0001783516812019912, "loss": 1.3872, "step": 8344 }, { "epoch": 0.10843946897796287, "grad_norm": 0.40175411105155945, "learning_rate": 0.0001783490817400798, "loss": 1.3106, "step": 8345 }, { "epoch": 0.10845246352187875, "grad_norm": 0.5319552421569824, "learning_rate": 0.00017834648227816843, "loss": 1.3413, "step": 8346 }, { "epoch": 0.10846545806579462, "grad_norm": 0.36020201444625854, "learning_rate": 0.00017834388281625705, "loss": 1.3822, "step": 8347 }, { "epoch": 0.1084784526097105, "grad_norm": 0.3974641263484955, "learning_rate": 0.00017834128335434565, "loss": 1.5008, "step": 8348 }, { "epoch": 0.10849144715362637, "grad_norm": 0.4384458661079407, "learning_rate": 0.00017833868389243427, "loss": 1.3096, "step": 8349 }, { "epoch": 0.10850444169754224, "grad_norm": 0.33949577808380127, "learning_rate": 0.0001783360844305229, "loss": 1.4741, "step": 8350 }, { "epoch": 0.10851743624145811, "grad_norm": 0.47890397906303406, "learning_rate": 0.0001783334849686115, "loss": 1.5686, "step": 8351 }, { "epoch": 0.10853043078537399, "grad_norm": 0.3752754032611847, "learning_rate": 0.00017833088550670012, "loss": 1.2255, "step": 8352 }, { "epoch": 0.10854342532928986, "grad_norm": 0.24401240050792694, "learning_rate": 0.00017832828604478872, "loss": 1.2197, "step": 8353 }, { "epoch": 0.10855641987320573, "grad_norm": 0.3515971899032593, "learning_rate": 0.00017832568658287737, "loss": 1.3718, "step": 8354 }, { "epoch": 0.10856941441712162, "grad_norm": 0.44225677847862244, "learning_rate": 0.00017832308712096597, "loss": 1.5992, "step": 8355 }, { "epoch": 0.10858240896103749, "grad_norm": 0.47988206148147583, "learning_rate": 0.0001783204876590546, "loss": 1.3909, "step": 8356 }, { "epoch": 0.10859540350495336, "grad_norm": 0.4157367944717407, "learning_rate": 0.0001783178881971432, "loss": 1.5578, "step": 8357 }, { "epoch": 0.10860839804886924, "grad_norm": 0.38189607858657837, "learning_rate": 0.0001783152887352318, "loss": 1.5537, "step": 8358 }, { "epoch": 0.10862139259278511, "grad_norm": 0.4212566316127777, "learning_rate": 0.00017831268927332044, "loss": 1.4137, "step": 8359 }, { "epoch": 0.10863438713670098, "grad_norm": 0.31272128224372864, "learning_rate": 0.00017831008981140903, "loss": 1.2933, "step": 8360 }, { "epoch": 0.10864738168061686, "grad_norm": 0.4786490797996521, "learning_rate": 0.00017830749034949766, "loss": 1.5483, "step": 8361 }, { "epoch": 0.10866037622453273, "grad_norm": 0.3467690348625183, "learning_rate": 0.00017830489088758628, "loss": 1.2842, "step": 8362 }, { "epoch": 0.1086733707684486, "grad_norm": 0.4524156451225281, "learning_rate": 0.0001783022914256749, "loss": 1.4121, "step": 8363 }, { "epoch": 0.10868636531236447, "grad_norm": 0.41800379753112793, "learning_rate": 0.0001782996919637635, "loss": 1.5609, "step": 8364 }, { "epoch": 0.10869935985628035, "grad_norm": 0.3651736080646515, "learning_rate": 0.0001782970925018521, "loss": 1.5597, "step": 8365 }, { "epoch": 0.10871235440019622, "grad_norm": 0.3153468668460846, "learning_rate": 0.00017829449303994075, "loss": 1.4299, "step": 8366 }, { "epoch": 0.1087253489441121, "grad_norm": 0.4016343951225281, "learning_rate": 0.00017829189357802935, "loss": 1.421, "step": 8367 }, { "epoch": 0.10873834348802797, "grad_norm": 0.37183037400245667, "learning_rate": 0.00017828929411611798, "loss": 1.3028, "step": 8368 }, { "epoch": 0.10875133803194384, "grad_norm": 0.3810477554798126, "learning_rate": 0.0001782866946542066, "loss": 1.5618, "step": 8369 }, { "epoch": 0.10876433257585971, "grad_norm": 0.348891943693161, "learning_rate": 0.0001782840951922952, "loss": 1.4816, "step": 8370 }, { "epoch": 0.10877732711977559, "grad_norm": 0.45606300234794617, "learning_rate": 0.00017828149573038382, "loss": 1.3803, "step": 8371 }, { "epoch": 0.10879032166369146, "grad_norm": 0.4514254629611969, "learning_rate": 0.00017827889626847242, "loss": 1.5694, "step": 8372 }, { "epoch": 0.10880331620760733, "grad_norm": 0.332529753446579, "learning_rate": 0.00017827629680656107, "loss": 1.3346, "step": 8373 }, { "epoch": 0.1088163107515232, "grad_norm": 0.3527812361717224, "learning_rate": 0.00017827369734464967, "loss": 1.4949, "step": 8374 }, { "epoch": 0.10882930529543908, "grad_norm": 0.43754810094833374, "learning_rate": 0.0001782710978827383, "loss": 1.4718, "step": 8375 }, { "epoch": 0.10884229983935495, "grad_norm": 0.3545166254043579, "learning_rate": 0.0001782684984208269, "loss": 1.5941, "step": 8376 }, { "epoch": 0.10885529438327082, "grad_norm": 0.4640529453754425, "learning_rate": 0.00017826589895891551, "loss": 1.521, "step": 8377 }, { "epoch": 0.1088682889271867, "grad_norm": 0.39585447311401367, "learning_rate": 0.00017826329949700414, "loss": 1.4753, "step": 8378 }, { "epoch": 0.10888128347110257, "grad_norm": 0.3528577387332916, "learning_rate": 0.00017826070003509274, "loss": 1.5648, "step": 8379 }, { "epoch": 0.10889427801501844, "grad_norm": 0.30275285243988037, "learning_rate": 0.00017825810057318136, "loss": 1.5874, "step": 8380 }, { "epoch": 0.10890727255893431, "grad_norm": 0.4410100281238556, "learning_rate": 0.00017825550111126999, "loss": 1.7, "step": 8381 }, { "epoch": 0.10892026710285019, "grad_norm": 0.40226468443870544, "learning_rate": 0.00017825290164935858, "loss": 1.4831, "step": 8382 }, { "epoch": 0.10893326164676606, "grad_norm": 0.33901968598365784, "learning_rate": 0.0001782503021874472, "loss": 1.4329, "step": 8383 }, { "epoch": 0.10894625619068193, "grad_norm": 0.28389546275138855, "learning_rate": 0.0001782477027255358, "loss": 1.2968, "step": 8384 }, { "epoch": 0.1089592507345978, "grad_norm": 0.3036123216152191, "learning_rate": 0.00017824510326362446, "loss": 1.2778, "step": 8385 }, { "epoch": 0.10897224527851368, "grad_norm": 0.40709248185157776, "learning_rate": 0.00017824250380171305, "loss": 1.6276, "step": 8386 }, { "epoch": 0.10898523982242955, "grad_norm": 0.6016970872879028, "learning_rate": 0.00017823990433980168, "loss": 1.4721, "step": 8387 }, { "epoch": 0.10899823436634543, "grad_norm": 0.4364887475967407, "learning_rate": 0.00017823730487789028, "loss": 1.5284, "step": 8388 }, { "epoch": 0.1090112289102613, "grad_norm": 0.33313050866127014, "learning_rate": 0.0001782347054159789, "loss": 1.493, "step": 8389 }, { "epoch": 0.10902422345417717, "grad_norm": 0.4251091182231903, "learning_rate": 0.00017823210595406752, "loss": 1.4608, "step": 8390 }, { "epoch": 0.10903721799809304, "grad_norm": 0.49666833877563477, "learning_rate": 0.00017822950649215612, "loss": 1.5754, "step": 8391 }, { "epoch": 0.10905021254200892, "grad_norm": 0.4272218644618988, "learning_rate": 0.00017822690703024475, "loss": 1.3725, "step": 8392 }, { "epoch": 0.10906320708592479, "grad_norm": 0.322427362203598, "learning_rate": 0.00017822430756833337, "loss": 1.3875, "step": 8393 }, { "epoch": 0.10907620162984068, "grad_norm": 0.3300779461860657, "learning_rate": 0.00017822170810642197, "loss": 1.3297, "step": 8394 }, { "epoch": 0.10908919617375655, "grad_norm": 0.341291606426239, "learning_rate": 0.0001782191086445106, "loss": 1.3353, "step": 8395 }, { "epoch": 0.10910219071767242, "grad_norm": 0.4103115499019623, "learning_rate": 0.0001782165091825992, "loss": 1.5571, "step": 8396 }, { "epoch": 0.1091151852615883, "grad_norm": 0.4109514653682709, "learning_rate": 0.00017821390972068784, "loss": 1.2875, "step": 8397 }, { "epoch": 0.10912817980550417, "grad_norm": 0.4343460500240326, "learning_rate": 0.00017821131025877644, "loss": 1.2937, "step": 8398 }, { "epoch": 0.10914117434942004, "grad_norm": 0.32695797085762024, "learning_rate": 0.00017820871079686506, "loss": 1.3604, "step": 8399 }, { "epoch": 0.10915416889333591, "grad_norm": 0.31041043996810913, "learning_rate": 0.00017820611133495366, "loss": 1.2872, "step": 8400 }, { "epoch": 0.10916716343725179, "grad_norm": 0.4419708549976349, "learning_rate": 0.00017820351187304229, "loss": 1.5659, "step": 8401 }, { "epoch": 0.10918015798116766, "grad_norm": 0.4739778935909271, "learning_rate": 0.0001782009124111309, "loss": 1.3573, "step": 8402 }, { "epoch": 0.10919315252508353, "grad_norm": 0.41323843598365784, "learning_rate": 0.0001781983129492195, "loss": 1.4687, "step": 8403 }, { "epoch": 0.1092061470689994, "grad_norm": 0.377329021692276, "learning_rate": 0.00017819571348730816, "loss": 1.1409, "step": 8404 }, { "epoch": 0.10921914161291528, "grad_norm": 0.324496865272522, "learning_rate": 0.00017819311402539676, "loss": 1.4109, "step": 8405 }, { "epoch": 0.10923213615683115, "grad_norm": 0.3908027708530426, "learning_rate": 0.00017819051456348535, "loss": 1.5341, "step": 8406 }, { "epoch": 0.10924513070074703, "grad_norm": 0.41613420844078064, "learning_rate": 0.00017818791510157398, "loss": 1.3407, "step": 8407 }, { "epoch": 0.1092581252446629, "grad_norm": 0.5755199790000916, "learning_rate": 0.0001781853156396626, "loss": 1.6164, "step": 8408 }, { "epoch": 0.10927111978857877, "grad_norm": 0.42957574129104614, "learning_rate": 0.00017818271617775123, "loss": 1.3906, "step": 8409 }, { "epoch": 0.10928411433249464, "grad_norm": 0.31957948207855225, "learning_rate": 0.00017818011671583982, "loss": 1.2539, "step": 8410 }, { "epoch": 0.10929710887641052, "grad_norm": 0.38507527112960815, "learning_rate": 0.00017817751725392845, "loss": 1.488, "step": 8411 }, { "epoch": 0.10931010342032639, "grad_norm": 0.47600457072257996, "learning_rate": 0.00017817491779201707, "loss": 1.5102, "step": 8412 }, { "epoch": 0.10932309796424226, "grad_norm": 0.37596261501312256, "learning_rate": 0.00017817231833010567, "loss": 1.4317, "step": 8413 }, { "epoch": 0.10933609250815814, "grad_norm": 0.49288859963417053, "learning_rate": 0.0001781697188681943, "loss": 1.5681, "step": 8414 }, { "epoch": 0.10934908705207401, "grad_norm": 0.3904706835746765, "learning_rate": 0.0001781671194062829, "loss": 1.3318, "step": 8415 }, { "epoch": 0.10936208159598988, "grad_norm": 0.29792383313179016, "learning_rate": 0.00017816451994437154, "loss": 1.2497, "step": 8416 }, { "epoch": 0.10937507613990576, "grad_norm": 0.3291919231414795, "learning_rate": 0.00017816192048246014, "loss": 1.2146, "step": 8417 }, { "epoch": 0.10938807068382163, "grad_norm": 0.37436097860336304, "learning_rate": 0.00017815932102054877, "loss": 1.4496, "step": 8418 }, { "epoch": 0.1094010652277375, "grad_norm": 0.33629274368286133, "learning_rate": 0.00017815672155863736, "loss": 1.4727, "step": 8419 }, { "epoch": 0.10941405977165337, "grad_norm": 0.36726537346839905, "learning_rate": 0.000178154122096726, "loss": 1.3965, "step": 8420 }, { "epoch": 0.10942705431556925, "grad_norm": 0.32399168610572815, "learning_rate": 0.0001781515226348146, "loss": 1.2605, "step": 8421 }, { "epoch": 0.10944004885948512, "grad_norm": 0.3672553598880768, "learning_rate": 0.0001781489231729032, "loss": 1.4345, "step": 8422 }, { "epoch": 0.10945304340340099, "grad_norm": 0.378427118062973, "learning_rate": 0.00017814632371099183, "loss": 1.3482, "step": 8423 }, { "epoch": 0.10946603794731687, "grad_norm": 0.4666999876499176, "learning_rate": 0.00017814372424908046, "loss": 1.6318, "step": 8424 }, { "epoch": 0.10947903249123274, "grad_norm": 0.3476693630218506, "learning_rate": 0.00017814112478716906, "loss": 1.316, "step": 8425 }, { "epoch": 0.10949202703514861, "grad_norm": 0.35928329825401306, "learning_rate": 0.00017813852532525768, "loss": 1.3788, "step": 8426 }, { "epoch": 0.10950502157906448, "grad_norm": 0.4357830286026001, "learning_rate": 0.00017813592586334628, "loss": 1.521, "step": 8427 }, { "epoch": 0.10951801612298036, "grad_norm": 0.4237259328365326, "learning_rate": 0.00017813332640143493, "loss": 1.3429, "step": 8428 }, { "epoch": 0.10953101066689623, "grad_norm": 0.3974064886569977, "learning_rate": 0.00017813072693952353, "loss": 1.3985, "step": 8429 }, { "epoch": 0.1095440052108121, "grad_norm": 0.42458170652389526, "learning_rate": 0.00017812812747761215, "loss": 1.535, "step": 8430 }, { "epoch": 0.10955699975472798, "grad_norm": 0.404532253742218, "learning_rate": 0.00017812552801570075, "loss": 1.3499, "step": 8431 }, { "epoch": 0.10956999429864386, "grad_norm": 0.3189915120601654, "learning_rate": 0.00017812292855378937, "loss": 1.3588, "step": 8432 }, { "epoch": 0.10958298884255974, "grad_norm": 0.33536967635154724, "learning_rate": 0.000178120329091878, "loss": 1.4059, "step": 8433 }, { "epoch": 0.10959598338647561, "grad_norm": 0.26062992215156555, "learning_rate": 0.0001781177296299666, "loss": 1.3249, "step": 8434 }, { "epoch": 0.10960897793039148, "grad_norm": 0.35935214161872864, "learning_rate": 0.00017811513016805522, "loss": 1.4979, "step": 8435 }, { "epoch": 0.10962197247430736, "grad_norm": 0.4060344994068146, "learning_rate": 0.00017811253070614384, "loss": 1.2814, "step": 8436 }, { "epoch": 0.10963496701822323, "grad_norm": 0.37158462405204773, "learning_rate": 0.00017810993124423244, "loss": 1.476, "step": 8437 }, { "epoch": 0.1096479615621391, "grad_norm": 0.3771154582500458, "learning_rate": 0.00017810733178232107, "loss": 1.5873, "step": 8438 }, { "epoch": 0.10966095610605497, "grad_norm": 0.447327584028244, "learning_rate": 0.00017810473232040966, "loss": 1.6553, "step": 8439 }, { "epoch": 0.10967395064997085, "grad_norm": 0.45415040850639343, "learning_rate": 0.00017810213285849832, "loss": 1.4273, "step": 8440 }, { "epoch": 0.10968694519388672, "grad_norm": 0.4562950134277344, "learning_rate": 0.0001780995333965869, "loss": 1.5406, "step": 8441 }, { "epoch": 0.1096999397378026, "grad_norm": 0.2560071051120758, "learning_rate": 0.00017809693393467554, "loss": 1.3796, "step": 8442 }, { "epoch": 0.10971293428171847, "grad_norm": 0.4355359375476837, "learning_rate": 0.00017809433447276416, "loss": 1.5637, "step": 8443 }, { "epoch": 0.10972592882563434, "grad_norm": 0.3415091633796692, "learning_rate": 0.00017809173501085276, "loss": 1.419, "step": 8444 }, { "epoch": 0.10973892336955021, "grad_norm": 0.5199602246284485, "learning_rate": 0.00017808913554894138, "loss": 1.679, "step": 8445 }, { "epoch": 0.10975191791346608, "grad_norm": 0.3922266364097595, "learning_rate": 0.00017808653608702998, "loss": 1.4026, "step": 8446 }, { "epoch": 0.10976491245738196, "grad_norm": 0.4205973446369171, "learning_rate": 0.00017808393662511863, "loss": 1.6379, "step": 8447 }, { "epoch": 0.10977790700129783, "grad_norm": 0.4112226366996765, "learning_rate": 0.00017808133716320723, "loss": 1.5078, "step": 8448 }, { "epoch": 0.1097909015452137, "grad_norm": 0.5164234638214111, "learning_rate": 0.00017807873770129583, "loss": 1.4231, "step": 8449 }, { "epoch": 0.10980389608912958, "grad_norm": 0.4740740656852722, "learning_rate": 0.00017807613823938445, "loss": 1.5024, "step": 8450 }, { "epoch": 0.10981689063304545, "grad_norm": 0.4100686311721802, "learning_rate": 0.00017807353877747308, "loss": 1.5633, "step": 8451 }, { "epoch": 0.10982988517696132, "grad_norm": 0.38827216625213623, "learning_rate": 0.0001780709393155617, "loss": 1.5038, "step": 8452 }, { "epoch": 0.1098428797208772, "grad_norm": 0.5411360263824463, "learning_rate": 0.0001780683398536503, "loss": 1.4401, "step": 8453 }, { "epoch": 0.10985587426479307, "grad_norm": 0.3749694526195526, "learning_rate": 0.00017806574039173892, "loss": 1.4677, "step": 8454 }, { "epoch": 0.10986886880870894, "grad_norm": 0.35603395104408264, "learning_rate": 0.00017806314092982755, "loss": 1.5813, "step": 8455 }, { "epoch": 0.10988186335262481, "grad_norm": 0.3705896735191345, "learning_rate": 0.00017806054146791614, "loss": 1.4599, "step": 8456 }, { "epoch": 0.10989485789654069, "grad_norm": 0.3408558666706085, "learning_rate": 0.00017805794200600477, "loss": 1.3976, "step": 8457 }, { "epoch": 0.10990785244045656, "grad_norm": 0.40790504217147827, "learning_rate": 0.00017805534254409337, "loss": 1.5811, "step": 8458 }, { "epoch": 0.10992084698437243, "grad_norm": 0.4712159037590027, "learning_rate": 0.00017805274308218202, "loss": 1.3591, "step": 8459 }, { "epoch": 0.1099338415282883, "grad_norm": 0.4193955361843109, "learning_rate": 0.00017805014362027062, "loss": 1.4916, "step": 8460 }, { "epoch": 0.10994683607220418, "grad_norm": 0.3751177489757538, "learning_rate": 0.0001780475441583592, "loss": 1.3984, "step": 8461 }, { "epoch": 0.10995983061612005, "grad_norm": 0.300302118062973, "learning_rate": 0.00017804494469644784, "loss": 1.4287, "step": 8462 }, { "epoch": 0.10997282516003593, "grad_norm": 0.3934277594089508, "learning_rate": 0.00017804234523453646, "loss": 1.2978, "step": 8463 }, { "epoch": 0.1099858197039518, "grad_norm": 0.33747467398643494, "learning_rate": 0.00017803974577262509, "loss": 1.2051, "step": 8464 }, { "epoch": 0.10999881424786767, "grad_norm": 0.40012240409851074, "learning_rate": 0.00017803714631071368, "loss": 1.5453, "step": 8465 }, { "epoch": 0.11001180879178354, "grad_norm": 0.4303434491157532, "learning_rate": 0.0001780345468488023, "loss": 1.1881, "step": 8466 }, { "epoch": 0.11002480333569942, "grad_norm": 0.39507564902305603, "learning_rate": 0.00017803194738689093, "loss": 1.4508, "step": 8467 }, { "epoch": 0.11003779787961529, "grad_norm": 0.35819846391677856, "learning_rate": 0.00017802934792497953, "loss": 1.4652, "step": 8468 }, { "epoch": 0.11005079242353116, "grad_norm": 0.3903351426124573, "learning_rate": 0.00017802674846306815, "loss": 1.4496, "step": 8469 }, { "epoch": 0.11006378696744705, "grad_norm": 0.3091757893562317, "learning_rate": 0.00017802414900115675, "loss": 1.3738, "step": 8470 }, { "epoch": 0.11007678151136292, "grad_norm": 0.36216795444488525, "learning_rate": 0.0001780215495392454, "loss": 1.4149, "step": 8471 }, { "epoch": 0.1100897760552788, "grad_norm": 0.34850308299064636, "learning_rate": 0.000178018950077334, "loss": 1.4391, "step": 8472 }, { "epoch": 0.11010277059919467, "grad_norm": 0.3682047426700592, "learning_rate": 0.0001780163506154226, "loss": 1.4511, "step": 8473 }, { "epoch": 0.11011576514311054, "grad_norm": 0.3127826750278473, "learning_rate": 0.00017801375115351122, "loss": 1.3262, "step": 8474 }, { "epoch": 0.11012875968702641, "grad_norm": 0.39212527871131897, "learning_rate": 0.00017801115169159985, "loss": 1.4265, "step": 8475 }, { "epoch": 0.11014175423094229, "grad_norm": 0.6448709964752197, "learning_rate": 0.00017800855222968847, "loss": 1.4576, "step": 8476 }, { "epoch": 0.11015474877485816, "grad_norm": 0.48138076066970825, "learning_rate": 0.00017800595276777707, "loss": 1.3788, "step": 8477 }, { "epoch": 0.11016774331877403, "grad_norm": 0.3340052366256714, "learning_rate": 0.0001780033533058657, "loss": 1.2852, "step": 8478 }, { "epoch": 0.1101807378626899, "grad_norm": 0.45999959111213684, "learning_rate": 0.00017800075384395432, "loss": 1.4337, "step": 8479 }, { "epoch": 0.11019373240660578, "grad_norm": 0.4603680670261383, "learning_rate": 0.00017799815438204292, "loss": 1.3663, "step": 8480 }, { "epoch": 0.11020672695052165, "grad_norm": 0.417120099067688, "learning_rate": 0.00017799555492013154, "loss": 1.4079, "step": 8481 }, { "epoch": 0.11021972149443753, "grad_norm": 0.37151020765304565, "learning_rate": 0.00017799295545822016, "loss": 1.331, "step": 8482 }, { "epoch": 0.1102327160383534, "grad_norm": 0.5135869383811951, "learning_rate": 0.0001779903559963088, "loss": 1.0712, "step": 8483 }, { "epoch": 0.11024571058226927, "grad_norm": 0.3920194208621979, "learning_rate": 0.00017798775653439739, "loss": 1.5737, "step": 8484 }, { "epoch": 0.11025870512618514, "grad_norm": 0.32528769969940186, "learning_rate": 0.000177985157072486, "loss": 1.3838, "step": 8485 }, { "epoch": 0.11027169967010102, "grad_norm": 0.4005073606967926, "learning_rate": 0.00017798255761057463, "loss": 1.4042, "step": 8486 }, { "epoch": 0.11028469421401689, "grad_norm": 0.295510470867157, "learning_rate": 0.00017797995814866323, "loss": 1.2493, "step": 8487 }, { "epoch": 0.11029768875793276, "grad_norm": 0.46690911054611206, "learning_rate": 0.00017797735868675186, "loss": 1.4751, "step": 8488 }, { "epoch": 0.11031068330184864, "grad_norm": 0.4221472144126892, "learning_rate": 0.00017797475922484045, "loss": 1.3042, "step": 8489 }, { "epoch": 0.11032367784576451, "grad_norm": 0.313406378030777, "learning_rate": 0.00017797215976292908, "loss": 1.3951, "step": 8490 }, { "epoch": 0.11033667238968038, "grad_norm": 0.44129878282546997, "learning_rate": 0.0001779695603010177, "loss": 1.4626, "step": 8491 }, { "epoch": 0.11034966693359625, "grad_norm": 0.30615466833114624, "learning_rate": 0.0001779669608391063, "loss": 1.446, "step": 8492 }, { "epoch": 0.11036266147751213, "grad_norm": 0.3778288662433624, "learning_rate": 0.00017796436137719492, "loss": 1.6043, "step": 8493 }, { "epoch": 0.110375656021428, "grad_norm": 0.30573880672454834, "learning_rate": 0.00017796176191528355, "loss": 1.3341, "step": 8494 }, { "epoch": 0.11038865056534387, "grad_norm": 0.37661105394363403, "learning_rate": 0.00017795916245337217, "loss": 1.5529, "step": 8495 }, { "epoch": 0.11040164510925975, "grad_norm": 0.33200299739837646, "learning_rate": 0.00017795656299146077, "loss": 1.3194, "step": 8496 }, { "epoch": 0.11041463965317562, "grad_norm": 0.4370069205760956, "learning_rate": 0.0001779539635295494, "loss": 1.4061, "step": 8497 }, { "epoch": 0.11042763419709149, "grad_norm": 0.3966250717639923, "learning_rate": 0.00017795136406763802, "loss": 1.5674, "step": 8498 }, { "epoch": 0.11044062874100737, "grad_norm": 0.33314865827560425, "learning_rate": 0.00017794876460572662, "loss": 1.2829, "step": 8499 }, { "epoch": 0.11045362328492324, "grad_norm": 0.32898661494255066, "learning_rate": 0.00017794616514381524, "loss": 1.4767, "step": 8500 }, { "epoch": 0.11046661782883911, "grad_norm": 0.3853452205657959, "learning_rate": 0.00017794356568190384, "loss": 1.3856, "step": 8501 }, { "epoch": 0.11047961237275498, "grad_norm": 0.4081169366836548, "learning_rate": 0.0001779409662199925, "loss": 1.354, "step": 8502 }, { "epoch": 0.11049260691667086, "grad_norm": 0.6198732852935791, "learning_rate": 0.0001779383667580811, "loss": 1.5742, "step": 8503 }, { "epoch": 0.11050560146058673, "grad_norm": 0.44481897354125977, "learning_rate": 0.00017793576729616969, "loss": 1.324, "step": 8504 }, { "epoch": 0.1105185960045026, "grad_norm": 0.36927396059036255, "learning_rate": 0.0001779331678342583, "loss": 1.3693, "step": 8505 }, { "epoch": 0.11053159054841848, "grad_norm": 0.3820902109146118, "learning_rate": 0.00017793056837234693, "loss": 1.4511, "step": 8506 }, { "epoch": 0.11054458509233435, "grad_norm": 0.37168335914611816, "learning_rate": 0.00017792796891043556, "loss": 1.4119, "step": 8507 }, { "epoch": 0.11055757963625024, "grad_norm": 0.4066098928451538, "learning_rate": 0.00017792536944852416, "loss": 1.3242, "step": 8508 }, { "epoch": 0.11057057418016611, "grad_norm": 0.31800830364227295, "learning_rate": 0.00017792276998661278, "loss": 1.354, "step": 8509 }, { "epoch": 0.11058356872408198, "grad_norm": 0.4043053388595581, "learning_rate": 0.0001779201705247014, "loss": 1.2733, "step": 8510 }, { "epoch": 0.11059656326799785, "grad_norm": 0.3298899531364441, "learning_rate": 0.00017791757106279, "loss": 1.4046, "step": 8511 }, { "epoch": 0.11060955781191373, "grad_norm": 0.4717923104763031, "learning_rate": 0.00017791497160087863, "loss": 1.3724, "step": 8512 }, { "epoch": 0.1106225523558296, "grad_norm": 0.41223472356796265, "learning_rate": 0.00017791237213896722, "loss": 1.4516, "step": 8513 }, { "epoch": 0.11063554689974547, "grad_norm": 0.3834102153778076, "learning_rate": 0.00017790977267705588, "loss": 1.6291, "step": 8514 }, { "epoch": 0.11064854144366135, "grad_norm": 0.3822353184223175, "learning_rate": 0.00017790717321514447, "loss": 1.2732, "step": 8515 }, { "epoch": 0.11066153598757722, "grad_norm": 0.4058038294315338, "learning_rate": 0.00017790457375323307, "loss": 1.4128, "step": 8516 }, { "epoch": 0.11067453053149309, "grad_norm": 0.29582715034484863, "learning_rate": 0.00017790197429132172, "loss": 1.1204, "step": 8517 }, { "epoch": 0.11068752507540897, "grad_norm": 0.3588052988052368, "learning_rate": 0.00017789937482941032, "loss": 1.2235, "step": 8518 }, { "epoch": 0.11070051961932484, "grad_norm": 0.4162876009941101, "learning_rate": 0.00017789677536749894, "loss": 1.5949, "step": 8519 }, { "epoch": 0.11071351416324071, "grad_norm": 0.3948515057563782, "learning_rate": 0.00017789417590558754, "loss": 1.388, "step": 8520 }, { "epoch": 0.11072650870715658, "grad_norm": 0.37233200669288635, "learning_rate": 0.00017789157644367617, "loss": 1.4276, "step": 8521 }, { "epoch": 0.11073950325107246, "grad_norm": 0.4185180962085724, "learning_rate": 0.0001778889769817648, "loss": 1.5226, "step": 8522 }, { "epoch": 0.11075249779498833, "grad_norm": 0.39894646406173706, "learning_rate": 0.0001778863775198534, "loss": 1.5131, "step": 8523 }, { "epoch": 0.1107654923389042, "grad_norm": 0.3927561342716217, "learning_rate": 0.000177883778057942, "loss": 1.4621, "step": 8524 }, { "epoch": 0.11077848688282008, "grad_norm": 0.3699970245361328, "learning_rate": 0.00017788117859603064, "loss": 1.3531, "step": 8525 }, { "epoch": 0.11079148142673595, "grad_norm": 0.39307940006256104, "learning_rate": 0.00017787857913411926, "loss": 1.4239, "step": 8526 }, { "epoch": 0.11080447597065182, "grad_norm": 0.39279013872146606, "learning_rate": 0.00017787597967220786, "loss": 1.4238, "step": 8527 }, { "epoch": 0.1108174705145677, "grad_norm": 0.39066559076309204, "learning_rate": 0.00017787338021029646, "loss": 1.4186, "step": 8528 }, { "epoch": 0.11083046505848357, "grad_norm": 0.2830652892589569, "learning_rate": 0.0001778707807483851, "loss": 1.4811, "step": 8529 }, { "epoch": 0.11084345960239944, "grad_norm": 0.3115213215351105, "learning_rate": 0.0001778681812864737, "loss": 1.1241, "step": 8530 }, { "epoch": 0.11085645414631531, "grad_norm": 0.46800488233566284, "learning_rate": 0.00017786558182456233, "loss": 1.6362, "step": 8531 }, { "epoch": 0.11086944869023119, "grad_norm": 0.3846072256565094, "learning_rate": 0.00017786298236265093, "loss": 1.4724, "step": 8532 }, { "epoch": 0.11088244323414706, "grad_norm": 0.30631932616233826, "learning_rate": 0.00017786038290073955, "loss": 1.3344, "step": 8533 }, { "epoch": 0.11089543777806293, "grad_norm": 0.38891199231147766, "learning_rate": 0.00017785778343882818, "loss": 1.4796, "step": 8534 }, { "epoch": 0.1109084323219788, "grad_norm": 0.3535824716091156, "learning_rate": 0.00017785518397691677, "loss": 1.3722, "step": 8535 }, { "epoch": 0.11092142686589468, "grad_norm": 0.489162802696228, "learning_rate": 0.0001778525845150054, "loss": 1.5587, "step": 8536 }, { "epoch": 0.11093442140981055, "grad_norm": 0.482291042804718, "learning_rate": 0.00017784998505309402, "loss": 1.3759, "step": 8537 }, { "epoch": 0.11094741595372642, "grad_norm": 0.44002512097358704, "learning_rate": 0.00017784738559118265, "loss": 1.5708, "step": 8538 }, { "epoch": 0.1109604104976423, "grad_norm": 0.31956562399864197, "learning_rate": 0.00017784478612927124, "loss": 1.2889, "step": 8539 }, { "epoch": 0.11097340504155817, "grad_norm": 0.34225621819496155, "learning_rate": 0.00017784218666735987, "loss": 1.3836, "step": 8540 }, { "epoch": 0.11098639958547404, "grad_norm": 0.39811187982559204, "learning_rate": 0.0001778395872054485, "loss": 1.5996, "step": 8541 }, { "epoch": 0.11099939412938992, "grad_norm": 0.3725215494632721, "learning_rate": 0.0001778369877435371, "loss": 1.5099, "step": 8542 }, { "epoch": 0.11101238867330579, "grad_norm": 0.3944075107574463, "learning_rate": 0.00017783438828162572, "loss": 1.6023, "step": 8543 }, { "epoch": 0.11102538321722166, "grad_norm": 0.39231187105178833, "learning_rate": 0.0001778317888197143, "loss": 1.5115, "step": 8544 }, { "epoch": 0.11103837776113754, "grad_norm": 0.41220220923423767, "learning_rate": 0.00017782918935780294, "loss": 1.3902, "step": 8545 }, { "epoch": 0.11105137230505342, "grad_norm": 0.4165779948234558, "learning_rate": 0.00017782658989589156, "loss": 1.5146, "step": 8546 }, { "epoch": 0.1110643668489693, "grad_norm": 0.27829891443252563, "learning_rate": 0.00017782399043398016, "loss": 1.2942, "step": 8547 }, { "epoch": 0.11107736139288517, "grad_norm": 0.41873669624328613, "learning_rate": 0.00017782139097206878, "loss": 1.3266, "step": 8548 }, { "epoch": 0.11109035593680104, "grad_norm": 0.4187389314174652, "learning_rate": 0.0001778187915101574, "loss": 1.5471, "step": 8549 }, { "epoch": 0.11110335048071691, "grad_norm": 0.4204435348510742, "learning_rate": 0.00017781619204824603, "loss": 1.3635, "step": 8550 }, { "epoch": 0.11111634502463279, "grad_norm": 0.4332634508609772, "learning_rate": 0.00017781359258633463, "loss": 1.439, "step": 8551 }, { "epoch": 0.11112933956854866, "grad_norm": 0.4367465078830719, "learning_rate": 0.00017781099312442325, "loss": 1.4757, "step": 8552 }, { "epoch": 0.11114233411246453, "grad_norm": 0.32651135325431824, "learning_rate": 0.00017780839366251188, "loss": 1.4655, "step": 8553 }, { "epoch": 0.1111553286563804, "grad_norm": 0.33064085245132446, "learning_rate": 0.00017780579420060048, "loss": 1.2961, "step": 8554 }, { "epoch": 0.11116832320029628, "grad_norm": 0.3711194097995758, "learning_rate": 0.0001778031947386891, "loss": 1.1332, "step": 8555 }, { "epoch": 0.11118131774421215, "grad_norm": 0.46861642599105835, "learning_rate": 0.00017780059527677773, "loss": 1.3883, "step": 8556 }, { "epoch": 0.11119431228812802, "grad_norm": 0.4704437255859375, "learning_rate": 0.00017779799581486632, "loss": 1.5139, "step": 8557 }, { "epoch": 0.1112073068320439, "grad_norm": 0.4395003616809845, "learning_rate": 0.00017779539635295495, "loss": 1.4169, "step": 8558 }, { "epoch": 0.11122030137595977, "grad_norm": 0.4101468622684479, "learning_rate": 0.00017779279689104354, "loss": 1.416, "step": 8559 }, { "epoch": 0.11123329591987564, "grad_norm": 0.3807888329029083, "learning_rate": 0.0001777901974291322, "loss": 1.5845, "step": 8560 }, { "epoch": 0.11124629046379152, "grad_norm": 0.424621045589447, "learning_rate": 0.0001777875979672208, "loss": 1.4007, "step": 8561 }, { "epoch": 0.11125928500770739, "grad_norm": 0.37832802534103394, "learning_rate": 0.00017778499850530942, "loss": 1.5257, "step": 8562 }, { "epoch": 0.11127227955162326, "grad_norm": 0.4164350628852844, "learning_rate": 0.00017778239904339802, "loss": 1.5499, "step": 8563 }, { "epoch": 0.11128527409553914, "grad_norm": 0.4691551923751831, "learning_rate": 0.00017777979958148664, "loss": 1.5241, "step": 8564 }, { "epoch": 0.11129826863945501, "grad_norm": 0.33752667903900146, "learning_rate": 0.00017777720011957526, "loss": 1.374, "step": 8565 }, { "epoch": 0.11131126318337088, "grad_norm": 0.44429147243499756, "learning_rate": 0.00017777460065766386, "loss": 1.36, "step": 8566 }, { "epoch": 0.11132425772728675, "grad_norm": 0.3906398117542267, "learning_rate": 0.00017777200119575249, "loss": 1.4907, "step": 8567 }, { "epoch": 0.11133725227120263, "grad_norm": 0.4127035439014435, "learning_rate": 0.0001777694017338411, "loss": 1.252, "step": 8568 }, { "epoch": 0.1113502468151185, "grad_norm": 0.37651756405830383, "learning_rate": 0.00017776680227192974, "loss": 1.3173, "step": 8569 }, { "epoch": 0.11136324135903437, "grad_norm": 0.4683295786380768, "learning_rate": 0.00017776420281001833, "loss": 1.6301, "step": 8570 }, { "epoch": 0.11137623590295025, "grad_norm": 0.42249608039855957, "learning_rate": 0.00017776160334810693, "loss": 1.5849, "step": 8571 }, { "epoch": 0.11138923044686612, "grad_norm": 0.4293670058250427, "learning_rate": 0.00017775900388619558, "loss": 1.3854, "step": 8572 }, { "epoch": 0.11140222499078199, "grad_norm": 0.4635482430458069, "learning_rate": 0.00017775640442428418, "loss": 1.4164, "step": 8573 }, { "epoch": 0.11141521953469787, "grad_norm": 0.4318121075630188, "learning_rate": 0.0001777538049623728, "loss": 1.4145, "step": 8574 }, { "epoch": 0.11142821407861374, "grad_norm": 0.3186655640602112, "learning_rate": 0.0001777512055004614, "loss": 1.2911, "step": 8575 }, { "epoch": 0.11144120862252961, "grad_norm": 0.3924480974674225, "learning_rate": 0.00017774860603855003, "loss": 1.4145, "step": 8576 }, { "epoch": 0.11145420316644548, "grad_norm": 0.4285651445388794, "learning_rate": 0.00017774600657663865, "loss": 1.386, "step": 8577 }, { "epoch": 0.11146719771036136, "grad_norm": 0.3569808900356293, "learning_rate": 0.00017774340711472725, "loss": 1.3373, "step": 8578 }, { "epoch": 0.11148019225427723, "grad_norm": 0.4820652902126312, "learning_rate": 0.00017774080765281587, "loss": 1.5354, "step": 8579 }, { "epoch": 0.1114931867981931, "grad_norm": 0.48390087485313416, "learning_rate": 0.0001777382081909045, "loss": 1.5638, "step": 8580 }, { "epoch": 0.11150618134210898, "grad_norm": 0.3615210950374603, "learning_rate": 0.00017773560872899312, "loss": 1.3449, "step": 8581 }, { "epoch": 0.11151917588602485, "grad_norm": 0.41461074352264404, "learning_rate": 0.00017773300926708172, "loss": 1.4682, "step": 8582 }, { "epoch": 0.11153217042994072, "grad_norm": 0.4105028510093689, "learning_rate": 0.00017773040980517032, "loss": 1.4419, "step": 8583 }, { "epoch": 0.11154516497385661, "grad_norm": 0.3765884339809418, "learning_rate": 0.00017772781034325897, "loss": 1.2889, "step": 8584 }, { "epoch": 0.11155815951777248, "grad_norm": 0.34632161259651184, "learning_rate": 0.00017772521088134756, "loss": 1.41, "step": 8585 }, { "epoch": 0.11157115406168835, "grad_norm": 0.4560492932796478, "learning_rate": 0.0001777226114194362, "loss": 1.425, "step": 8586 }, { "epoch": 0.11158414860560423, "grad_norm": 0.3899783194065094, "learning_rate": 0.00017772001195752479, "loss": 1.2785, "step": 8587 }, { "epoch": 0.1115971431495201, "grad_norm": 0.4155313968658447, "learning_rate": 0.0001777174124956134, "loss": 1.2635, "step": 8588 }, { "epoch": 0.11161013769343597, "grad_norm": 0.4057666063308716, "learning_rate": 0.00017771481303370204, "loss": 1.4463, "step": 8589 }, { "epoch": 0.11162313223735185, "grad_norm": 0.5535851120948792, "learning_rate": 0.00017771221357179063, "loss": 1.2467, "step": 8590 }, { "epoch": 0.11163612678126772, "grad_norm": 0.5103722810745239, "learning_rate": 0.00017770961410987928, "loss": 1.4143, "step": 8591 }, { "epoch": 0.11164912132518359, "grad_norm": 0.3232981264591217, "learning_rate": 0.00017770701464796788, "loss": 1.5859, "step": 8592 }, { "epoch": 0.11166211586909947, "grad_norm": 0.4483446180820465, "learning_rate": 0.0001777044151860565, "loss": 1.41, "step": 8593 }, { "epoch": 0.11167511041301534, "grad_norm": 0.3665595054626465, "learning_rate": 0.0001777018157241451, "loss": 1.4495, "step": 8594 }, { "epoch": 0.11168810495693121, "grad_norm": 0.49292799830436707, "learning_rate": 0.00017769921626223373, "loss": 1.4515, "step": 8595 }, { "epoch": 0.11170109950084708, "grad_norm": 0.40318048000335693, "learning_rate": 0.00017769661680032235, "loss": 1.2241, "step": 8596 }, { "epoch": 0.11171409404476296, "grad_norm": 0.38092324137687683, "learning_rate": 0.00017769401733841095, "loss": 1.271, "step": 8597 }, { "epoch": 0.11172708858867883, "grad_norm": 0.34880295395851135, "learning_rate": 0.00017769141787649957, "loss": 1.3237, "step": 8598 }, { "epoch": 0.1117400831325947, "grad_norm": 0.3966793417930603, "learning_rate": 0.0001776888184145882, "loss": 1.3332, "step": 8599 }, { "epoch": 0.11175307767651058, "grad_norm": 0.3927079439163208, "learning_rate": 0.0001776862189526768, "loss": 1.3222, "step": 8600 }, { "epoch": 0.11176607222042645, "grad_norm": 0.4302537441253662, "learning_rate": 0.00017768361949076542, "loss": 1.6084, "step": 8601 }, { "epoch": 0.11177906676434232, "grad_norm": 0.46645569801330566, "learning_rate": 0.00017768102002885402, "loss": 1.4463, "step": 8602 }, { "epoch": 0.1117920613082582, "grad_norm": 0.3670461177825928, "learning_rate": 0.00017767842056694267, "loss": 1.3618, "step": 8603 }, { "epoch": 0.11180505585217407, "grad_norm": 0.37556034326553345, "learning_rate": 0.00017767582110503127, "loss": 1.4138, "step": 8604 }, { "epoch": 0.11181805039608994, "grad_norm": 0.3809594511985779, "learning_rate": 0.0001776732216431199, "loss": 1.3004, "step": 8605 }, { "epoch": 0.11183104494000581, "grad_norm": 0.36627018451690674, "learning_rate": 0.0001776706221812085, "loss": 1.4118, "step": 8606 }, { "epoch": 0.11184403948392169, "grad_norm": 0.42096495628356934, "learning_rate": 0.0001776680227192971, "loss": 1.7041, "step": 8607 }, { "epoch": 0.11185703402783756, "grad_norm": 0.3480881452560425, "learning_rate": 0.00017766542325738574, "loss": 1.3083, "step": 8608 }, { "epoch": 0.11187002857175343, "grad_norm": 0.378113329410553, "learning_rate": 0.00017766282379547434, "loss": 1.467, "step": 8609 }, { "epoch": 0.1118830231156693, "grad_norm": 0.3729805052280426, "learning_rate": 0.00017766022433356296, "loss": 1.1988, "step": 8610 }, { "epoch": 0.11189601765958518, "grad_norm": 0.4683253765106201, "learning_rate": 0.00017765762487165158, "loss": 1.4543, "step": 8611 }, { "epoch": 0.11190901220350105, "grad_norm": 0.3739033341407776, "learning_rate": 0.00017765502540974018, "loss": 1.5065, "step": 8612 }, { "epoch": 0.11192200674741692, "grad_norm": 0.46230778098106384, "learning_rate": 0.0001776524259478288, "loss": 1.6942, "step": 8613 }, { "epoch": 0.1119350012913328, "grad_norm": 0.3907121419906616, "learning_rate": 0.0001776498264859174, "loss": 1.5587, "step": 8614 }, { "epoch": 0.11194799583524867, "grad_norm": 0.3419528603553772, "learning_rate": 0.00017764722702400605, "loss": 1.4183, "step": 8615 }, { "epoch": 0.11196099037916454, "grad_norm": 0.306140273809433, "learning_rate": 0.00017764462756209465, "loss": 1.5105, "step": 8616 }, { "epoch": 0.11197398492308042, "grad_norm": 0.33101925253868103, "learning_rate": 0.00017764202810018328, "loss": 1.4979, "step": 8617 }, { "epoch": 0.11198697946699629, "grad_norm": 0.3967652916908264, "learning_rate": 0.00017763942863827187, "loss": 1.4418, "step": 8618 }, { "epoch": 0.11199997401091216, "grad_norm": 0.36774036288261414, "learning_rate": 0.0001776368291763605, "loss": 1.5051, "step": 8619 }, { "epoch": 0.11201296855482804, "grad_norm": 0.3067176342010498, "learning_rate": 0.00017763422971444912, "loss": 1.108, "step": 8620 }, { "epoch": 0.11202596309874391, "grad_norm": 0.39619627594947815, "learning_rate": 0.00017763163025253772, "loss": 1.4802, "step": 8621 }, { "epoch": 0.1120389576426598, "grad_norm": 0.3856840133666992, "learning_rate": 0.00017762903079062635, "loss": 1.4216, "step": 8622 }, { "epoch": 0.11205195218657567, "grad_norm": 0.4031469523906708, "learning_rate": 0.00017762643132871497, "loss": 1.4308, "step": 8623 }, { "epoch": 0.11206494673049154, "grad_norm": 0.3182077705860138, "learning_rate": 0.0001776238318668036, "loss": 1.6204, "step": 8624 }, { "epoch": 0.11207794127440741, "grad_norm": 0.3879203796386719, "learning_rate": 0.0001776212324048922, "loss": 1.4453, "step": 8625 }, { "epoch": 0.11209093581832329, "grad_norm": 0.40703073143959045, "learning_rate": 0.0001776186329429808, "loss": 1.5185, "step": 8626 }, { "epoch": 0.11210393036223916, "grad_norm": 0.40695518255233765, "learning_rate": 0.00017761603348106944, "loss": 1.2622, "step": 8627 }, { "epoch": 0.11211692490615503, "grad_norm": 0.35042908787727356, "learning_rate": 0.00017761343401915804, "loss": 1.4216, "step": 8628 }, { "epoch": 0.1121299194500709, "grad_norm": 0.2891005277633667, "learning_rate": 0.00017761083455724666, "loss": 1.3575, "step": 8629 }, { "epoch": 0.11214291399398678, "grad_norm": 0.4249033033847809, "learning_rate": 0.0001776082350953353, "loss": 1.6655, "step": 8630 }, { "epoch": 0.11215590853790265, "grad_norm": 0.4816652536392212, "learning_rate": 0.00017760563563342388, "loss": 1.5725, "step": 8631 }, { "epoch": 0.11216890308181852, "grad_norm": 0.3626181483268738, "learning_rate": 0.0001776030361715125, "loss": 1.4344, "step": 8632 }, { "epoch": 0.1121818976257344, "grad_norm": 0.39744338393211365, "learning_rate": 0.0001776004367096011, "loss": 1.517, "step": 8633 }, { "epoch": 0.11219489216965027, "grad_norm": 0.3981624245643616, "learning_rate": 0.00017759783724768976, "loss": 1.4557, "step": 8634 }, { "epoch": 0.11220788671356614, "grad_norm": 0.4194968640804291, "learning_rate": 0.00017759523778577835, "loss": 1.403, "step": 8635 }, { "epoch": 0.11222088125748202, "grad_norm": 0.37882140278816223, "learning_rate": 0.00017759263832386698, "loss": 1.4211, "step": 8636 }, { "epoch": 0.11223387580139789, "grad_norm": 0.44159385561943054, "learning_rate": 0.00017759003886195558, "loss": 1.5235, "step": 8637 }, { "epoch": 0.11224687034531376, "grad_norm": 0.376777321100235, "learning_rate": 0.0001775874394000442, "loss": 1.5527, "step": 8638 }, { "epoch": 0.11225986488922964, "grad_norm": 0.3993493914604187, "learning_rate": 0.00017758483993813283, "loss": 1.6731, "step": 8639 }, { "epoch": 0.11227285943314551, "grad_norm": 0.3251712918281555, "learning_rate": 0.00017758224047622142, "loss": 1.4856, "step": 8640 }, { "epoch": 0.11228585397706138, "grad_norm": 0.5070064663887024, "learning_rate": 0.00017757964101431005, "loss": 1.3719, "step": 8641 }, { "epoch": 0.11229884852097725, "grad_norm": 0.39667999744415283, "learning_rate": 0.00017757704155239867, "loss": 1.4109, "step": 8642 }, { "epoch": 0.11231184306489313, "grad_norm": 0.35416334867477417, "learning_rate": 0.00017757444209048727, "loss": 1.3849, "step": 8643 }, { "epoch": 0.112324837608809, "grad_norm": 0.4591248631477356, "learning_rate": 0.0001775718426285759, "loss": 1.5327, "step": 8644 }, { "epoch": 0.11233783215272487, "grad_norm": 0.3754018247127533, "learning_rate": 0.0001775692431666645, "loss": 1.3392, "step": 8645 }, { "epoch": 0.11235082669664075, "grad_norm": 0.41714969277381897, "learning_rate": 0.00017756664370475314, "loss": 1.4186, "step": 8646 }, { "epoch": 0.11236382124055662, "grad_norm": 0.4091562330722809, "learning_rate": 0.00017756404424284174, "loss": 1.3283, "step": 8647 }, { "epoch": 0.11237681578447249, "grad_norm": 0.4639502465724945, "learning_rate": 0.00017756144478093036, "loss": 1.5955, "step": 8648 }, { "epoch": 0.11238981032838836, "grad_norm": 0.2942112982273102, "learning_rate": 0.00017755884531901896, "loss": 1.2763, "step": 8649 }, { "epoch": 0.11240280487230424, "grad_norm": 0.4005592465400696, "learning_rate": 0.0001775562458571076, "loss": 1.4896, "step": 8650 }, { "epoch": 0.11241579941622011, "grad_norm": 0.4401690363883972, "learning_rate": 0.0001775536463951962, "loss": 1.4973, "step": 8651 }, { "epoch": 0.11242879396013598, "grad_norm": 0.3246303200721741, "learning_rate": 0.0001775510469332848, "loss": 1.2462, "step": 8652 }, { "epoch": 0.11244178850405186, "grad_norm": 0.4185452163219452, "learning_rate": 0.00017754844747137343, "loss": 1.4796, "step": 8653 }, { "epoch": 0.11245478304796773, "grad_norm": 0.3429237902164459, "learning_rate": 0.00017754584800946206, "loss": 1.6688, "step": 8654 }, { "epoch": 0.1124677775918836, "grad_norm": 0.3326869010925293, "learning_rate": 0.00017754324854755065, "loss": 1.3756, "step": 8655 }, { "epoch": 0.11248077213579948, "grad_norm": 0.4681721329689026, "learning_rate": 0.00017754064908563928, "loss": 1.4612, "step": 8656 }, { "epoch": 0.11249376667971535, "grad_norm": 0.44035854935646057, "learning_rate": 0.00017753804962372788, "loss": 1.4571, "step": 8657 }, { "epoch": 0.11250676122363122, "grad_norm": 0.3797861933708191, "learning_rate": 0.00017753545016181653, "loss": 1.4258, "step": 8658 }, { "epoch": 0.1125197557675471, "grad_norm": 0.5091722011566162, "learning_rate": 0.00017753285069990513, "loss": 1.6727, "step": 8659 }, { "epoch": 0.11253275031146298, "grad_norm": 0.3938788175582886, "learning_rate": 0.00017753025123799375, "loss": 1.4774, "step": 8660 }, { "epoch": 0.11254574485537885, "grad_norm": 0.32285359501838684, "learning_rate": 0.00017752765177608235, "loss": 1.3694, "step": 8661 }, { "epoch": 0.11255873939929473, "grad_norm": 0.3990798890590668, "learning_rate": 0.00017752505231417097, "loss": 1.3121, "step": 8662 }, { "epoch": 0.1125717339432106, "grad_norm": 0.39969685673713684, "learning_rate": 0.0001775224528522596, "loss": 1.5714, "step": 8663 }, { "epoch": 0.11258472848712647, "grad_norm": 0.36271074414253235, "learning_rate": 0.0001775198533903482, "loss": 1.494, "step": 8664 }, { "epoch": 0.11259772303104235, "grad_norm": 0.43183159828186035, "learning_rate": 0.00017751725392843685, "loss": 1.401, "step": 8665 }, { "epoch": 0.11261071757495822, "grad_norm": 0.3798542022705078, "learning_rate": 0.00017751465446652544, "loss": 1.2555, "step": 8666 }, { "epoch": 0.11262371211887409, "grad_norm": 0.42847663164138794, "learning_rate": 0.00017751205500461404, "loss": 1.5422, "step": 8667 }, { "epoch": 0.11263670666278996, "grad_norm": 0.47489461302757263, "learning_rate": 0.00017750945554270266, "loss": 1.3911, "step": 8668 }, { "epoch": 0.11264970120670584, "grad_norm": 0.4326644241809845, "learning_rate": 0.0001775068560807913, "loss": 1.4057, "step": 8669 }, { "epoch": 0.11266269575062171, "grad_norm": 0.35288766026496887, "learning_rate": 0.00017750425661887991, "loss": 1.2756, "step": 8670 }, { "epoch": 0.11267569029453758, "grad_norm": 0.38767990469932556, "learning_rate": 0.0001775016571569685, "loss": 1.3891, "step": 8671 }, { "epoch": 0.11268868483845346, "grad_norm": 0.4462668299674988, "learning_rate": 0.00017749905769505714, "loss": 1.4228, "step": 8672 }, { "epoch": 0.11270167938236933, "grad_norm": 0.2589188814163208, "learning_rate": 0.00017749645823314576, "loss": 1.2878, "step": 8673 }, { "epoch": 0.1127146739262852, "grad_norm": 0.4760963022708893, "learning_rate": 0.00017749385877123436, "loss": 1.5268, "step": 8674 }, { "epoch": 0.11272766847020108, "grad_norm": 0.4072748124599457, "learning_rate": 0.00017749125930932298, "loss": 1.4719, "step": 8675 }, { "epoch": 0.11274066301411695, "grad_norm": 0.40276533365249634, "learning_rate": 0.00017748865984741158, "loss": 1.3391, "step": 8676 }, { "epoch": 0.11275365755803282, "grad_norm": 0.3967902660369873, "learning_rate": 0.00017748606038550023, "loss": 1.4678, "step": 8677 }, { "epoch": 0.1127666521019487, "grad_norm": 0.40766680240631104, "learning_rate": 0.00017748346092358883, "loss": 1.6556, "step": 8678 }, { "epoch": 0.11277964664586457, "grad_norm": 0.4481208622455597, "learning_rate": 0.00017748086146167743, "loss": 1.4944, "step": 8679 }, { "epoch": 0.11279264118978044, "grad_norm": 0.39495933055877686, "learning_rate": 0.00017747826199976605, "loss": 1.3384, "step": 8680 }, { "epoch": 0.11280563573369631, "grad_norm": 0.41857582330703735, "learning_rate": 0.00017747566253785467, "loss": 1.5347, "step": 8681 }, { "epoch": 0.11281863027761219, "grad_norm": 0.43404144048690796, "learning_rate": 0.0001774730630759433, "loss": 1.5017, "step": 8682 }, { "epoch": 0.11283162482152806, "grad_norm": 0.36908257007598877, "learning_rate": 0.0001774704636140319, "loss": 1.4221, "step": 8683 }, { "epoch": 0.11284461936544393, "grad_norm": 0.3650283217430115, "learning_rate": 0.00017746786415212052, "loss": 1.4195, "step": 8684 }, { "epoch": 0.1128576139093598, "grad_norm": 0.49003270268440247, "learning_rate": 0.00017746526469020915, "loss": 1.524, "step": 8685 }, { "epoch": 0.11287060845327568, "grad_norm": 0.38387563824653625, "learning_rate": 0.00017746266522829774, "loss": 1.4145, "step": 8686 }, { "epoch": 0.11288360299719155, "grad_norm": 0.35055360198020935, "learning_rate": 0.00017746006576638637, "loss": 1.4636, "step": 8687 }, { "epoch": 0.11289659754110742, "grad_norm": 0.42743873596191406, "learning_rate": 0.00017745746630447496, "loss": 1.6415, "step": 8688 }, { "epoch": 0.1129095920850233, "grad_norm": 0.45148128271102905, "learning_rate": 0.00017745486684256362, "loss": 1.4739, "step": 8689 }, { "epoch": 0.11292258662893917, "grad_norm": 0.4883705675601959, "learning_rate": 0.00017745226738065221, "loss": 1.5701, "step": 8690 }, { "epoch": 0.11293558117285504, "grad_norm": 0.4535878896713257, "learning_rate": 0.00017744966791874084, "loss": 1.5729, "step": 8691 }, { "epoch": 0.11294857571677092, "grad_norm": 0.31692615151405334, "learning_rate": 0.00017744706845682944, "loss": 1.3584, "step": 8692 }, { "epoch": 0.11296157026068679, "grad_norm": 0.4068675637245178, "learning_rate": 0.00017744446899491806, "loss": 1.3121, "step": 8693 }, { "epoch": 0.11297456480460266, "grad_norm": 0.3965752124786377, "learning_rate": 0.00017744186953300668, "loss": 1.4902, "step": 8694 }, { "epoch": 0.11298755934851853, "grad_norm": 0.3603360950946808, "learning_rate": 0.00017743927007109528, "loss": 1.4944, "step": 8695 }, { "epoch": 0.11300055389243441, "grad_norm": 0.34367141127586365, "learning_rate": 0.0001774366706091839, "loss": 1.4593, "step": 8696 }, { "epoch": 0.11301354843635028, "grad_norm": 0.4477933645248413, "learning_rate": 0.00017743407114727253, "loss": 1.5064, "step": 8697 }, { "epoch": 0.11302654298026617, "grad_norm": 0.48470833897590637, "learning_rate": 0.00017743147168536113, "loss": 1.4904, "step": 8698 }, { "epoch": 0.11303953752418204, "grad_norm": 0.3278193771839142, "learning_rate": 0.00017742887222344975, "loss": 1.2976, "step": 8699 }, { "epoch": 0.11305253206809791, "grad_norm": 0.3547235429286957, "learning_rate": 0.00017742627276153835, "loss": 1.4448, "step": 8700 }, { "epoch": 0.11306552661201379, "grad_norm": 0.4378271996974945, "learning_rate": 0.000177423673299627, "loss": 1.3966, "step": 8701 }, { "epoch": 0.11307852115592966, "grad_norm": 0.4148450195789337, "learning_rate": 0.0001774210738377156, "loss": 1.6076, "step": 8702 }, { "epoch": 0.11309151569984553, "grad_norm": 0.2955113649368286, "learning_rate": 0.00017741847437580422, "loss": 1.4415, "step": 8703 }, { "epoch": 0.1131045102437614, "grad_norm": 0.3896235525608063, "learning_rate": 0.00017741587491389285, "loss": 1.4444, "step": 8704 }, { "epoch": 0.11311750478767728, "grad_norm": 0.37741780281066895, "learning_rate": 0.00017741327545198145, "loss": 1.3442, "step": 8705 }, { "epoch": 0.11313049933159315, "grad_norm": 0.4967813491821289, "learning_rate": 0.00017741067599007007, "loss": 1.4187, "step": 8706 }, { "epoch": 0.11314349387550902, "grad_norm": 0.3762017786502838, "learning_rate": 0.00017740807652815867, "loss": 1.4215, "step": 8707 }, { "epoch": 0.1131564884194249, "grad_norm": 0.3131179213523865, "learning_rate": 0.00017740547706624732, "loss": 1.3398, "step": 8708 }, { "epoch": 0.11316948296334077, "grad_norm": 0.3077961504459381, "learning_rate": 0.00017740287760433592, "loss": 1.3629, "step": 8709 }, { "epoch": 0.11318247750725664, "grad_norm": 0.41269850730895996, "learning_rate": 0.00017740027814242451, "loss": 1.3385, "step": 8710 }, { "epoch": 0.11319547205117252, "grad_norm": 0.5147266387939453, "learning_rate": 0.00017739767868051314, "loss": 1.5745, "step": 8711 }, { "epoch": 0.11320846659508839, "grad_norm": 0.3871222734451294, "learning_rate": 0.00017739507921860176, "loss": 1.2539, "step": 8712 }, { "epoch": 0.11322146113900426, "grad_norm": 0.423583060503006, "learning_rate": 0.0001773924797566904, "loss": 1.367, "step": 8713 }, { "epoch": 0.11323445568292013, "grad_norm": 0.39959803223609924, "learning_rate": 0.00017738988029477898, "loss": 1.57, "step": 8714 }, { "epoch": 0.11324745022683601, "grad_norm": 0.4388839900493622, "learning_rate": 0.0001773872808328676, "loss": 1.3366, "step": 8715 }, { "epoch": 0.11326044477075188, "grad_norm": 0.3355761468410492, "learning_rate": 0.00017738468137095623, "loss": 1.5388, "step": 8716 }, { "epoch": 0.11327343931466775, "grad_norm": 0.293712317943573, "learning_rate": 0.00017738208190904483, "loss": 1.2502, "step": 8717 }, { "epoch": 0.11328643385858363, "grad_norm": 0.380664587020874, "learning_rate": 0.00017737948244713346, "loss": 1.6161, "step": 8718 }, { "epoch": 0.1132994284024995, "grad_norm": 0.4230097234249115, "learning_rate": 0.00017737688298522205, "loss": 1.5456, "step": 8719 }, { "epoch": 0.11331242294641537, "grad_norm": 0.34559720754623413, "learning_rate": 0.0001773742835233107, "loss": 1.3284, "step": 8720 }, { "epoch": 0.11332541749033125, "grad_norm": 0.3350925147533417, "learning_rate": 0.0001773716840613993, "loss": 1.3247, "step": 8721 }, { "epoch": 0.11333841203424712, "grad_norm": 0.2867088317871094, "learning_rate": 0.0001773690845994879, "loss": 1.5199, "step": 8722 }, { "epoch": 0.11335140657816299, "grad_norm": 0.3675834834575653, "learning_rate": 0.00017736648513757652, "loss": 1.4505, "step": 8723 }, { "epoch": 0.11336440112207886, "grad_norm": 0.33889830112457275, "learning_rate": 0.00017736388567566515, "loss": 1.3594, "step": 8724 }, { "epoch": 0.11337739566599474, "grad_norm": 0.4452855587005615, "learning_rate": 0.00017736128621375377, "loss": 1.3238, "step": 8725 }, { "epoch": 0.11339039020991061, "grad_norm": 0.3662298321723938, "learning_rate": 0.00017735868675184237, "loss": 1.1203, "step": 8726 }, { "epoch": 0.11340338475382648, "grad_norm": 0.3540753722190857, "learning_rate": 0.000177356087289931, "loss": 1.3663, "step": 8727 }, { "epoch": 0.11341637929774236, "grad_norm": 0.39487308263778687, "learning_rate": 0.00017735348782801962, "loss": 1.4743, "step": 8728 }, { "epoch": 0.11342937384165823, "grad_norm": 0.38472577929496765, "learning_rate": 0.00017735088836610822, "loss": 1.504, "step": 8729 }, { "epoch": 0.1134423683855741, "grad_norm": 0.35093024373054504, "learning_rate": 0.00017734828890419684, "loss": 1.317, "step": 8730 }, { "epoch": 0.11345536292948998, "grad_norm": 0.44766491651535034, "learning_rate": 0.00017734568944228544, "loss": 1.1707, "step": 8731 }, { "epoch": 0.11346835747340585, "grad_norm": 0.35561200976371765, "learning_rate": 0.0001773430899803741, "loss": 1.429, "step": 8732 }, { "epoch": 0.11348135201732172, "grad_norm": 0.3999424874782562, "learning_rate": 0.0001773404905184627, "loss": 1.4971, "step": 8733 }, { "epoch": 0.1134943465612376, "grad_norm": 0.3015002906322479, "learning_rate": 0.00017733789105655128, "loss": 1.317, "step": 8734 }, { "epoch": 0.11350734110515347, "grad_norm": 0.3526372015476227, "learning_rate": 0.0001773352915946399, "loss": 1.2748, "step": 8735 }, { "epoch": 0.11352033564906935, "grad_norm": 0.3884231746196747, "learning_rate": 0.00017733269213272853, "loss": 1.3825, "step": 8736 }, { "epoch": 0.11353333019298523, "grad_norm": 0.3941156566143036, "learning_rate": 0.00017733009267081716, "loss": 1.4156, "step": 8737 }, { "epoch": 0.1135463247369011, "grad_norm": 0.37532535195350647, "learning_rate": 0.00017732749320890576, "loss": 1.438, "step": 8738 }, { "epoch": 0.11355931928081697, "grad_norm": 0.35799306631088257, "learning_rate": 0.00017732489374699438, "loss": 1.3502, "step": 8739 }, { "epoch": 0.11357231382473285, "grad_norm": 0.33526861667633057, "learning_rate": 0.000177322294285083, "loss": 1.5803, "step": 8740 }, { "epoch": 0.11358530836864872, "grad_norm": 0.22466392815113068, "learning_rate": 0.0001773196948231716, "loss": 1.1175, "step": 8741 }, { "epoch": 0.11359830291256459, "grad_norm": 0.45935171842575073, "learning_rate": 0.00017731709536126023, "loss": 1.4189, "step": 8742 }, { "epoch": 0.11361129745648046, "grad_norm": 0.3255417048931122, "learning_rate": 0.00017731449589934885, "loss": 1.2526, "step": 8743 }, { "epoch": 0.11362429200039634, "grad_norm": 0.44808411598205566, "learning_rate": 0.00017731189643743748, "loss": 1.2929, "step": 8744 }, { "epoch": 0.11363728654431221, "grad_norm": 0.48643577098846436, "learning_rate": 0.00017730929697552607, "loss": 1.4578, "step": 8745 }, { "epoch": 0.11365028108822808, "grad_norm": 0.36762893199920654, "learning_rate": 0.0001773066975136147, "loss": 1.5275, "step": 8746 }, { "epoch": 0.11366327563214396, "grad_norm": 0.4509170949459076, "learning_rate": 0.00017730409805170332, "loss": 1.5922, "step": 8747 }, { "epoch": 0.11367627017605983, "grad_norm": 0.43448859453201294, "learning_rate": 0.00017730149858979192, "loss": 1.3632, "step": 8748 }, { "epoch": 0.1136892647199757, "grad_norm": 0.34186384081840515, "learning_rate": 0.00017729889912788054, "loss": 1.4937, "step": 8749 }, { "epoch": 0.11370225926389158, "grad_norm": 0.3646673262119293, "learning_rate": 0.00017729629966596914, "loss": 1.4673, "step": 8750 }, { "epoch": 0.11371525380780745, "grad_norm": 0.23201428353786469, "learning_rate": 0.00017729370020405777, "loss": 1.342, "step": 8751 }, { "epoch": 0.11372824835172332, "grad_norm": 0.3720986247062683, "learning_rate": 0.0001772911007421464, "loss": 1.6353, "step": 8752 }, { "epoch": 0.1137412428956392, "grad_norm": 0.3540084958076477, "learning_rate": 0.000177288501280235, "loss": 1.3618, "step": 8753 }, { "epoch": 0.11375423743955507, "grad_norm": 0.47163715958595276, "learning_rate": 0.0001772859018183236, "loss": 1.4355, "step": 8754 }, { "epoch": 0.11376723198347094, "grad_norm": 0.3831759989261627, "learning_rate": 0.00017728330235641224, "loss": 1.5364, "step": 8755 }, { "epoch": 0.11378022652738681, "grad_norm": 0.41464853286743164, "learning_rate": 0.00017728070289450086, "loss": 1.4174, "step": 8756 }, { "epoch": 0.11379322107130269, "grad_norm": 0.4090052843093872, "learning_rate": 0.00017727810343258946, "loss": 1.5311, "step": 8757 }, { "epoch": 0.11380621561521856, "grad_norm": 0.37021541595458984, "learning_rate": 0.00017727550397067808, "loss": 1.3689, "step": 8758 }, { "epoch": 0.11381921015913443, "grad_norm": 0.4000149667263031, "learning_rate": 0.0001772729045087667, "loss": 1.4935, "step": 8759 }, { "epoch": 0.1138322047030503, "grad_norm": 0.30711108446121216, "learning_rate": 0.0001772703050468553, "loss": 1.1174, "step": 8760 }, { "epoch": 0.11384519924696618, "grad_norm": 0.49210160970687866, "learning_rate": 0.00017726770558494393, "loss": 1.5122, "step": 8761 }, { "epoch": 0.11385819379088205, "grad_norm": 0.36736223101615906, "learning_rate": 0.00017726510612303253, "loss": 1.3047, "step": 8762 }, { "epoch": 0.11387118833479792, "grad_norm": 0.42107829451560974, "learning_rate": 0.00017726250666112115, "loss": 1.6142, "step": 8763 }, { "epoch": 0.1138841828787138, "grad_norm": 0.30236494541168213, "learning_rate": 0.00017725990719920977, "loss": 1.3697, "step": 8764 }, { "epoch": 0.11389717742262967, "grad_norm": 0.3880172371864319, "learning_rate": 0.00017725730773729837, "loss": 1.3695, "step": 8765 }, { "epoch": 0.11391017196654554, "grad_norm": 0.4311392307281494, "learning_rate": 0.000177254708275387, "loss": 1.4307, "step": 8766 }, { "epoch": 0.11392316651046142, "grad_norm": 0.36553239822387695, "learning_rate": 0.00017725210881347562, "loss": 1.2901, "step": 8767 }, { "epoch": 0.11393616105437729, "grad_norm": 0.3549526035785675, "learning_rate": 0.00017724950935156425, "loss": 1.1591, "step": 8768 }, { "epoch": 0.11394915559829316, "grad_norm": 0.40318581461906433, "learning_rate": 0.00017724690988965284, "loss": 1.4488, "step": 8769 }, { "epoch": 0.11396215014220903, "grad_norm": 0.32147684693336487, "learning_rate": 0.00017724431042774147, "loss": 1.4482, "step": 8770 }, { "epoch": 0.11397514468612491, "grad_norm": 0.3440645933151245, "learning_rate": 0.0001772417109658301, "loss": 1.5292, "step": 8771 }, { "epoch": 0.11398813923004078, "grad_norm": 0.40621593594551086, "learning_rate": 0.0001772391115039187, "loss": 1.5201, "step": 8772 }, { "epoch": 0.11400113377395665, "grad_norm": 0.3528688848018646, "learning_rate": 0.00017723651204200731, "loss": 1.3687, "step": 8773 }, { "epoch": 0.11401412831787254, "grad_norm": 0.31640294194221497, "learning_rate": 0.0001772339125800959, "loss": 1.3212, "step": 8774 }, { "epoch": 0.11402712286178841, "grad_norm": 0.3772437274456024, "learning_rate": 0.00017723131311818456, "loss": 1.4486, "step": 8775 }, { "epoch": 0.11404011740570429, "grad_norm": 0.350006639957428, "learning_rate": 0.00017722871365627316, "loss": 1.42, "step": 8776 }, { "epoch": 0.11405311194962016, "grad_norm": 0.5429489612579346, "learning_rate": 0.00017722611419436176, "loss": 1.4749, "step": 8777 }, { "epoch": 0.11406610649353603, "grad_norm": 0.45498207211494446, "learning_rate": 0.0001772235147324504, "loss": 1.569, "step": 8778 }, { "epoch": 0.1140791010374519, "grad_norm": 0.3631664216518402, "learning_rate": 0.000177220915270539, "loss": 1.4255, "step": 8779 }, { "epoch": 0.11409209558136778, "grad_norm": 0.4022939205169678, "learning_rate": 0.00017721831580862763, "loss": 1.5694, "step": 8780 }, { "epoch": 0.11410509012528365, "grad_norm": 0.2507469952106476, "learning_rate": 0.00017721571634671623, "loss": 1.4725, "step": 8781 }, { "epoch": 0.11411808466919952, "grad_norm": 0.40444067120552063, "learning_rate": 0.00017721311688480485, "loss": 1.5507, "step": 8782 }, { "epoch": 0.1141310792131154, "grad_norm": 0.32774579524993896, "learning_rate": 0.00017721051742289348, "loss": 1.3757, "step": 8783 }, { "epoch": 0.11414407375703127, "grad_norm": 0.4960706830024719, "learning_rate": 0.00017720791796098207, "loss": 1.3014, "step": 8784 }, { "epoch": 0.11415706830094714, "grad_norm": 0.4631768763065338, "learning_rate": 0.0001772053184990707, "loss": 1.4516, "step": 8785 }, { "epoch": 0.11417006284486302, "grad_norm": 0.5482436418533325, "learning_rate": 0.00017720271903715932, "loss": 1.4805, "step": 8786 }, { "epoch": 0.11418305738877889, "grad_norm": 0.5119069814682007, "learning_rate": 0.00017720011957524795, "loss": 1.5827, "step": 8787 }, { "epoch": 0.11419605193269476, "grad_norm": 0.3748325705528259, "learning_rate": 0.00017719752011333655, "loss": 1.1814, "step": 8788 }, { "epoch": 0.11420904647661063, "grad_norm": 0.36338698863983154, "learning_rate": 0.00017719492065142514, "loss": 1.4111, "step": 8789 }, { "epoch": 0.11422204102052651, "grad_norm": 0.53028404712677, "learning_rate": 0.0001771923211895138, "loss": 1.5906, "step": 8790 }, { "epoch": 0.11423503556444238, "grad_norm": 0.38513025641441345, "learning_rate": 0.0001771897217276024, "loss": 1.6797, "step": 8791 }, { "epoch": 0.11424803010835825, "grad_norm": 0.4013994634151459, "learning_rate": 0.00017718712226569102, "loss": 1.3541, "step": 8792 }, { "epoch": 0.11426102465227413, "grad_norm": 0.31763675808906555, "learning_rate": 0.00017718452280377961, "loss": 1.1484, "step": 8793 }, { "epoch": 0.11427401919619, "grad_norm": 0.3755086660385132, "learning_rate": 0.00017718192334186824, "loss": 1.5035, "step": 8794 }, { "epoch": 0.11428701374010587, "grad_norm": 0.4056829810142517, "learning_rate": 0.00017717932387995686, "loss": 1.3224, "step": 8795 }, { "epoch": 0.11430000828402175, "grad_norm": 0.45021751523017883, "learning_rate": 0.00017717672441804546, "loss": 1.4002, "step": 8796 }, { "epoch": 0.11431300282793762, "grad_norm": 0.32861268520355225, "learning_rate": 0.00017717412495613408, "loss": 1.2972, "step": 8797 }, { "epoch": 0.11432599737185349, "grad_norm": 0.5190981030464172, "learning_rate": 0.0001771715254942227, "loss": 1.581, "step": 8798 }, { "epoch": 0.11433899191576936, "grad_norm": 0.4130539894104004, "learning_rate": 0.00017716892603231133, "loss": 1.385, "step": 8799 }, { "epoch": 0.11435198645968524, "grad_norm": 0.39239710569381714, "learning_rate": 0.00017716632657039993, "loss": 1.531, "step": 8800 }, { "epoch": 0.11436498100360111, "grad_norm": 0.36715611815452576, "learning_rate": 0.00017716372710848853, "loss": 1.1537, "step": 8801 }, { "epoch": 0.11437797554751698, "grad_norm": 0.42312273383140564, "learning_rate": 0.00017716112764657718, "loss": 1.3779, "step": 8802 }, { "epoch": 0.11439097009143286, "grad_norm": 0.3798922896385193, "learning_rate": 0.00017715852818466578, "loss": 1.6103, "step": 8803 }, { "epoch": 0.11440396463534873, "grad_norm": 0.4298017919063568, "learning_rate": 0.0001771559287227544, "loss": 1.362, "step": 8804 }, { "epoch": 0.1144169591792646, "grad_norm": 0.3507894277572632, "learning_rate": 0.000177153329260843, "loss": 1.4814, "step": 8805 }, { "epoch": 0.11442995372318047, "grad_norm": 0.46142932772636414, "learning_rate": 0.00017715072979893162, "loss": 1.5395, "step": 8806 }, { "epoch": 0.11444294826709635, "grad_norm": 0.3149317800998688, "learning_rate": 0.00017714813033702025, "loss": 1.4222, "step": 8807 }, { "epoch": 0.11445594281101222, "grad_norm": 0.4920963943004608, "learning_rate": 0.00017714553087510885, "loss": 1.603, "step": 8808 }, { "epoch": 0.1144689373549281, "grad_norm": 0.3889062702655792, "learning_rate": 0.00017714293141319747, "loss": 1.3815, "step": 8809 }, { "epoch": 0.11448193189884397, "grad_norm": 0.4440886676311493, "learning_rate": 0.0001771403319512861, "loss": 1.6133, "step": 8810 }, { "epoch": 0.11449492644275984, "grad_norm": 0.4331966042518616, "learning_rate": 0.00017713773248937472, "loss": 1.6185, "step": 8811 }, { "epoch": 0.11450792098667573, "grad_norm": 0.4049389064311981, "learning_rate": 0.00017713513302746332, "loss": 1.3872, "step": 8812 }, { "epoch": 0.1145209155305916, "grad_norm": 0.26543596386909485, "learning_rate": 0.00017713253356555194, "loss": 1.4367, "step": 8813 }, { "epoch": 0.11453391007450747, "grad_norm": 0.2800551652908325, "learning_rate": 0.00017712993410364057, "loss": 1.5072, "step": 8814 }, { "epoch": 0.11454690461842335, "grad_norm": 0.3064061105251312, "learning_rate": 0.00017712733464172916, "loss": 1.2181, "step": 8815 }, { "epoch": 0.11455989916233922, "grad_norm": 0.33081939816474915, "learning_rate": 0.0001771247351798178, "loss": 1.3777, "step": 8816 }, { "epoch": 0.11457289370625509, "grad_norm": 0.42478451132774353, "learning_rate": 0.0001771221357179064, "loss": 1.542, "step": 8817 }, { "epoch": 0.11458588825017096, "grad_norm": 0.41032856702804565, "learning_rate": 0.000177119536255995, "loss": 1.4939, "step": 8818 }, { "epoch": 0.11459888279408684, "grad_norm": 0.3695392310619354, "learning_rate": 0.00017711693679408363, "loss": 1.5077, "step": 8819 }, { "epoch": 0.11461187733800271, "grad_norm": 0.38681721687316895, "learning_rate": 0.00017711433733217223, "loss": 1.1793, "step": 8820 }, { "epoch": 0.11462487188191858, "grad_norm": 0.4438772201538086, "learning_rate": 0.00017711173787026088, "loss": 1.5969, "step": 8821 }, { "epoch": 0.11463786642583446, "grad_norm": 0.351814329624176, "learning_rate": 0.00017710913840834948, "loss": 1.5354, "step": 8822 }, { "epoch": 0.11465086096975033, "grad_norm": 0.4256584346294403, "learning_rate": 0.0001771065389464381, "loss": 1.3641, "step": 8823 }, { "epoch": 0.1146638555136662, "grad_norm": 0.3491363823413849, "learning_rate": 0.0001771039394845267, "loss": 1.2797, "step": 8824 }, { "epoch": 0.11467685005758207, "grad_norm": 0.3753140866756439, "learning_rate": 0.00017710134002261533, "loss": 1.2732, "step": 8825 }, { "epoch": 0.11468984460149795, "grad_norm": 0.35834160447120667, "learning_rate": 0.00017709874056070395, "loss": 1.4378, "step": 8826 }, { "epoch": 0.11470283914541382, "grad_norm": 0.3439320921897888, "learning_rate": 0.00017709614109879255, "loss": 1.606, "step": 8827 }, { "epoch": 0.1147158336893297, "grad_norm": 0.3852708041667938, "learning_rate": 0.00017709354163688117, "loss": 1.1904, "step": 8828 }, { "epoch": 0.11472882823324557, "grad_norm": 0.3906395733356476, "learning_rate": 0.0001770909421749698, "loss": 1.6437, "step": 8829 }, { "epoch": 0.11474182277716144, "grad_norm": 0.3611818850040436, "learning_rate": 0.00017708834271305842, "loss": 1.3814, "step": 8830 }, { "epoch": 0.11475481732107731, "grad_norm": 0.38790372014045715, "learning_rate": 0.00017708574325114702, "loss": 1.407, "step": 8831 }, { "epoch": 0.11476781186499319, "grad_norm": 0.40654081106185913, "learning_rate": 0.00017708314378923562, "loss": 1.3125, "step": 8832 }, { "epoch": 0.11478080640890906, "grad_norm": 0.5135018825531006, "learning_rate": 0.00017708054432732427, "loss": 1.3447, "step": 8833 }, { "epoch": 0.11479380095282493, "grad_norm": 0.45891162753105164, "learning_rate": 0.00017707794486541287, "loss": 1.382, "step": 8834 }, { "epoch": 0.1148067954967408, "grad_norm": 0.3153199255466461, "learning_rate": 0.0001770753454035015, "loss": 1.466, "step": 8835 }, { "epoch": 0.11481979004065668, "grad_norm": 0.41551390290260315, "learning_rate": 0.0001770727459415901, "loss": 1.4179, "step": 8836 }, { "epoch": 0.11483278458457255, "grad_norm": 0.44601374864578247, "learning_rate": 0.0001770701464796787, "loss": 1.3488, "step": 8837 }, { "epoch": 0.11484577912848842, "grad_norm": 0.34145426750183105, "learning_rate": 0.00017706754701776734, "loss": 1.4425, "step": 8838 }, { "epoch": 0.1148587736724043, "grad_norm": 0.45654135942459106, "learning_rate": 0.00017706494755585593, "loss": 1.4304, "step": 8839 }, { "epoch": 0.11487176821632017, "grad_norm": 0.4220637083053589, "learning_rate": 0.00017706234809394456, "loss": 1.5045, "step": 8840 }, { "epoch": 0.11488476276023604, "grad_norm": 0.31946858763694763, "learning_rate": 0.00017705974863203318, "loss": 1.4616, "step": 8841 }, { "epoch": 0.11489775730415192, "grad_norm": 0.38705259561538696, "learning_rate": 0.0001770571491701218, "loss": 1.2423, "step": 8842 }, { "epoch": 0.11491075184806779, "grad_norm": 0.40289783477783203, "learning_rate": 0.0001770545497082104, "loss": 1.3599, "step": 8843 }, { "epoch": 0.11492374639198366, "grad_norm": 0.37104278802871704, "learning_rate": 0.000177051950246299, "loss": 1.55, "step": 8844 }, { "epoch": 0.11493674093589953, "grad_norm": 0.37584975361824036, "learning_rate": 0.00017704935078438765, "loss": 1.3435, "step": 8845 }, { "epoch": 0.11494973547981541, "grad_norm": 0.48514819145202637, "learning_rate": 0.00017704675132247625, "loss": 1.3335, "step": 8846 }, { "epoch": 0.11496273002373128, "grad_norm": 0.36433327198028564, "learning_rate": 0.00017704415186056488, "loss": 1.4699, "step": 8847 }, { "epoch": 0.11497572456764715, "grad_norm": 0.378147155046463, "learning_rate": 0.00017704155239865347, "loss": 1.5004, "step": 8848 }, { "epoch": 0.11498871911156303, "grad_norm": 0.44031819701194763, "learning_rate": 0.0001770389529367421, "loss": 1.5224, "step": 8849 }, { "epoch": 0.11500171365547891, "grad_norm": 0.3756456673145294, "learning_rate": 0.00017703635347483072, "loss": 1.4458, "step": 8850 }, { "epoch": 0.11501470819939479, "grad_norm": 0.4542354941368103, "learning_rate": 0.00017703375401291932, "loss": 1.4985, "step": 8851 }, { "epoch": 0.11502770274331066, "grad_norm": 0.3495444357395172, "learning_rate": 0.00017703115455100797, "loss": 1.2886, "step": 8852 }, { "epoch": 0.11504069728722653, "grad_norm": 0.3535845875740051, "learning_rate": 0.00017702855508909657, "loss": 1.2623, "step": 8853 }, { "epoch": 0.1150536918311424, "grad_norm": 0.4975256323814392, "learning_rate": 0.0001770259556271852, "loss": 1.5397, "step": 8854 }, { "epoch": 0.11506668637505828, "grad_norm": 0.3407978415489197, "learning_rate": 0.0001770233561652738, "loss": 1.3114, "step": 8855 }, { "epoch": 0.11507968091897415, "grad_norm": 0.4966141879558563, "learning_rate": 0.00017702075670336241, "loss": 1.6036, "step": 8856 }, { "epoch": 0.11509267546289002, "grad_norm": 0.24291011691093445, "learning_rate": 0.00017701815724145104, "loss": 1.4086, "step": 8857 }, { "epoch": 0.1151056700068059, "grad_norm": 0.4362110197544098, "learning_rate": 0.00017701555777953964, "loss": 1.6148, "step": 8858 }, { "epoch": 0.11511866455072177, "grad_norm": 0.4057730734348297, "learning_rate": 0.00017701295831762826, "loss": 1.456, "step": 8859 }, { "epoch": 0.11513165909463764, "grad_norm": 0.40696412324905396, "learning_rate": 0.00017701035885571689, "loss": 1.4372, "step": 8860 }, { "epoch": 0.11514465363855352, "grad_norm": 0.41950613260269165, "learning_rate": 0.00017700775939380548, "loss": 1.6611, "step": 8861 }, { "epoch": 0.11515764818246939, "grad_norm": 0.3580048680305481, "learning_rate": 0.0001770051599318941, "loss": 1.3704, "step": 8862 }, { "epoch": 0.11517064272638526, "grad_norm": 0.434225469827652, "learning_rate": 0.0001770025604699827, "loss": 1.4208, "step": 8863 }, { "epoch": 0.11518363727030113, "grad_norm": 0.43868011236190796, "learning_rate": 0.00017699996100807136, "loss": 1.6859, "step": 8864 }, { "epoch": 0.11519663181421701, "grad_norm": 0.4498310685157776, "learning_rate": 0.00017699736154615995, "loss": 1.4112, "step": 8865 }, { "epoch": 0.11520962635813288, "grad_norm": 0.31805649399757385, "learning_rate": 0.00017699476208424858, "loss": 1.6124, "step": 8866 }, { "epoch": 0.11522262090204875, "grad_norm": 0.37745043635368347, "learning_rate": 0.00017699216262233718, "loss": 1.4802, "step": 8867 }, { "epoch": 0.11523561544596463, "grad_norm": 0.40003910660743713, "learning_rate": 0.0001769895631604258, "loss": 1.4032, "step": 8868 }, { "epoch": 0.1152486099898805, "grad_norm": 0.5220471024513245, "learning_rate": 0.00017698696369851442, "loss": 1.5611, "step": 8869 }, { "epoch": 0.11526160453379637, "grad_norm": 0.4814424216747284, "learning_rate": 0.00017698436423660302, "loss": 1.5152, "step": 8870 }, { "epoch": 0.11527459907771224, "grad_norm": 0.4091566205024719, "learning_rate": 0.00017698176477469165, "loss": 1.4474, "step": 8871 }, { "epoch": 0.11528759362162812, "grad_norm": 0.44456708431243896, "learning_rate": 0.00017697916531278027, "loss": 1.4016, "step": 8872 }, { "epoch": 0.11530058816554399, "grad_norm": 0.4723033010959625, "learning_rate": 0.00017697656585086887, "loss": 1.6079, "step": 8873 }, { "epoch": 0.11531358270945986, "grad_norm": 0.4143528938293457, "learning_rate": 0.0001769739663889575, "loss": 1.4639, "step": 8874 }, { "epoch": 0.11532657725337574, "grad_norm": 0.4051390588283539, "learning_rate": 0.0001769713669270461, "loss": 1.4984, "step": 8875 }, { "epoch": 0.11533957179729161, "grad_norm": 0.4169769883155823, "learning_rate": 0.00017696876746513474, "loss": 1.6132, "step": 8876 }, { "epoch": 0.11535256634120748, "grad_norm": 0.42784371972084045, "learning_rate": 0.00017696616800322334, "loss": 1.4837, "step": 8877 }, { "epoch": 0.11536556088512336, "grad_norm": 0.46404895186424255, "learning_rate": 0.00017696356854131196, "loss": 1.4151, "step": 8878 }, { "epoch": 0.11537855542903923, "grad_norm": 0.38801050186157227, "learning_rate": 0.00017696096907940056, "loss": 1.5737, "step": 8879 }, { "epoch": 0.1153915499729551, "grad_norm": 0.3865167796611786, "learning_rate": 0.00017695836961748919, "loss": 1.5625, "step": 8880 }, { "epoch": 0.11540454451687097, "grad_norm": 0.3784886300563812, "learning_rate": 0.0001769557701555778, "loss": 1.5683, "step": 8881 }, { "epoch": 0.11541753906078685, "grad_norm": 0.3542916178703308, "learning_rate": 0.0001769531706936664, "loss": 1.3851, "step": 8882 }, { "epoch": 0.11543053360470272, "grad_norm": 0.34836384654045105, "learning_rate": 0.00017695057123175503, "loss": 1.3651, "step": 8883 }, { "epoch": 0.1154435281486186, "grad_norm": 0.31087860465049744, "learning_rate": 0.00017694797176984366, "loss": 1.4249, "step": 8884 }, { "epoch": 0.11545652269253447, "grad_norm": 0.38339918851852417, "learning_rate": 0.00017694537230793225, "loss": 1.4929, "step": 8885 }, { "epoch": 0.11546951723645034, "grad_norm": 0.37504562735557556, "learning_rate": 0.00017694277284602088, "loss": 1.4332, "step": 8886 }, { "epoch": 0.11548251178036621, "grad_norm": 0.40253403782844543, "learning_rate": 0.0001769401733841095, "loss": 1.6358, "step": 8887 }, { "epoch": 0.1154955063242821, "grad_norm": 0.3863559663295746, "learning_rate": 0.00017693757392219813, "loss": 1.2662, "step": 8888 }, { "epoch": 0.11550850086819797, "grad_norm": 0.33933955430984497, "learning_rate": 0.00017693497446028672, "loss": 1.6058, "step": 8889 }, { "epoch": 0.11552149541211384, "grad_norm": 0.38857215642929077, "learning_rate": 0.00017693237499837535, "loss": 1.4662, "step": 8890 }, { "epoch": 0.11553448995602972, "grad_norm": 0.34573081135749817, "learning_rate": 0.00017692977553646397, "loss": 1.3575, "step": 8891 }, { "epoch": 0.11554748449994559, "grad_norm": 0.4519613981246948, "learning_rate": 0.00017692717607455257, "loss": 1.5525, "step": 8892 }, { "epoch": 0.11556047904386146, "grad_norm": 0.38523808121681213, "learning_rate": 0.0001769245766126412, "loss": 1.5181, "step": 8893 }, { "epoch": 0.11557347358777734, "grad_norm": 0.3955749273300171, "learning_rate": 0.0001769219771507298, "loss": 1.6206, "step": 8894 }, { "epoch": 0.11558646813169321, "grad_norm": 0.319232314825058, "learning_rate": 0.00017691937768881844, "loss": 1.3868, "step": 8895 }, { "epoch": 0.11559946267560908, "grad_norm": 0.3183676302433014, "learning_rate": 0.00017691677822690704, "loss": 1.3049, "step": 8896 }, { "epoch": 0.11561245721952496, "grad_norm": 0.3820331394672394, "learning_rate": 0.00017691417876499567, "loss": 1.3616, "step": 8897 }, { "epoch": 0.11562545176344083, "grad_norm": 0.40897414088249207, "learning_rate": 0.00017691157930308426, "loss": 1.4553, "step": 8898 }, { "epoch": 0.1156384463073567, "grad_norm": 0.43336331844329834, "learning_rate": 0.0001769089798411729, "loss": 1.3499, "step": 8899 }, { "epoch": 0.11565144085127257, "grad_norm": 0.35033878684043884, "learning_rate": 0.0001769063803792615, "loss": 1.458, "step": 8900 }, { "epoch": 0.11566443539518845, "grad_norm": 0.35629796981811523, "learning_rate": 0.0001769037809173501, "loss": 1.5141, "step": 8901 }, { "epoch": 0.11567742993910432, "grad_norm": 0.34182658791542053, "learning_rate": 0.00017690118145543873, "loss": 1.4882, "step": 8902 }, { "epoch": 0.1156904244830202, "grad_norm": 0.29156792163848877, "learning_rate": 0.00017689858199352736, "loss": 1.3431, "step": 8903 }, { "epoch": 0.11570341902693607, "grad_norm": 0.42234691977500916, "learning_rate": 0.00017689598253161596, "loss": 1.5054, "step": 8904 }, { "epoch": 0.11571641357085194, "grad_norm": 0.3064664304256439, "learning_rate": 0.00017689338306970458, "loss": 1.2828, "step": 8905 }, { "epoch": 0.11572940811476781, "grad_norm": 0.44407403469085693, "learning_rate": 0.00017689078360779318, "loss": 1.5759, "step": 8906 }, { "epoch": 0.11574240265868369, "grad_norm": 0.42143547534942627, "learning_rate": 0.00017688818414588183, "loss": 1.4991, "step": 8907 }, { "epoch": 0.11575539720259956, "grad_norm": 0.4423423111438751, "learning_rate": 0.00017688558468397043, "loss": 1.3935, "step": 8908 }, { "epoch": 0.11576839174651543, "grad_norm": 0.505020260810852, "learning_rate": 0.00017688298522205905, "loss": 1.4539, "step": 8909 }, { "epoch": 0.1157813862904313, "grad_norm": 0.4939204156398773, "learning_rate": 0.00017688038576014765, "loss": 1.5619, "step": 8910 }, { "epoch": 0.11579438083434718, "grad_norm": 0.3910490870475769, "learning_rate": 0.00017687778629823627, "loss": 1.3836, "step": 8911 }, { "epoch": 0.11580737537826305, "grad_norm": 0.3882463872432709, "learning_rate": 0.0001768751868363249, "loss": 1.4598, "step": 8912 }, { "epoch": 0.11582036992217892, "grad_norm": 0.33080700039863586, "learning_rate": 0.0001768725873744135, "loss": 1.3029, "step": 8913 }, { "epoch": 0.1158333644660948, "grad_norm": 0.3902685344219208, "learning_rate": 0.00017686998791250212, "loss": 1.1121, "step": 8914 }, { "epoch": 0.11584635901001067, "grad_norm": 0.2802811861038208, "learning_rate": 0.00017686738845059074, "loss": 1.4127, "step": 8915 }, { "epoch": 0.11585935355392654, "grad_norm": 0.38256385922431946, "learning_rate": 0.00017686478898867934, "loss": 1.5203, "step": 8916 }, { "epoch": 0.11587234809784241, "grad_norm": 0.3764645755290985, "learning_rate": 0.00017686218952676797, "loss": 1.4504, "step": 8917 }, { "epoch": 0.11588534264175829, "grad_norm": 0.391086220741272, "learning_rate": 0.00017685959006485656, "loss": 1.4046, "step": 8918 }, { "epoch": 0.11589833718567416, "grad_norm": 0.42881014943122864, "learning_rate": 0.00017685699060294521, "loss": 1.4219, "step": 8919 }, { "epoch": 0.11591133172959003, "grad_norm": 0.3621368706226349, "learning_rate": 0.0001768543911410338, "loss": 1.4589, "step": 8920 }, { "epoch": 0.1159243262735059, "grad_norm": 0.40348464250564575, "learning_rate": 0.00017685179167912244, "loss": 1.3321, "step": 8921 }, { "epoch": 0.11593732081742178, "grad_norm": 0.3442576229572296, "learning_rate": 0.00017684919221721103, "loss": 1.5715, "step": 8922 }, { "epoch": 0.11595031536133765, "grad_norm": 0.42147359251976013, "learning_rate": 0.00017684659275529966, "loss": 1.3348, "step": 8923 }, { "epoch": 0.11596330990525353, "grad_norm": 0.44710928201675415, "learning_rate": 0.00017684399329338828, "loss": 1.4736, "step": 8924 }, { "epoch": 0.1159763044491694, "grad_norm": 0.3510482907295227, "learning_rate": 0.00017684139383147688, "loss": 1.4809, "step": 8925 }, { "epoch": 0.11598929899308529, "grad_norm": 0.3672427535057068, "learning_rate": 0.00017683879436956553, "loss": 1.4735, "step": 8926 }, { "epoch": 0.11600229353700116, "grad_norm": 0.3786901533603668, "learning_rate": 0.00017683619490765413, "loss": 1.2821, "step": 8927 }, { "epoch": 0.11601528808091703, "grad_norm": 0.42218825221061707, "learning_rate": 0.00017683359544574273, "loss": 1.1771, "step": 8928 }, { "epoch": 0.1160282826248329, "grad_norm": 0.5461128950119019, "learning_rate": 0.00017683099598383135, "loss": 1.537, "step": 8929 }, { "epoch": 0.11604127716874878, "grad_norm": 0.39218270778656006, "learning_rate": 0.00017682839652191998, "loss": 1.6629, "step": 8930 }, { "epoch": 0.11605427171266465, "grad_norm": 0.48970291018486023, "learning_rate": 0.0001768257970600086, "loss": 1.465, "step": 8931 }, { "epoch": 0.11606726625658052, "grad_norm": 0.36469948291778564, "learning_rate": 0.0001768231975980972, "loss": 1.1396, "step": 8932 }, { "epoch": 0.1160802608004964, "grad_norm": 0.3875502347946167, "learning_rate": 0.00017682059813618582, "loss": 1.4774, "step": 8933 }, { "epoch": 0.11609325534441227, "grad_norm": 0.35956117510795593, "learning_rate": 0.00017681799867427445, "loss": 1.3158, "step": 8934 }, { "epoch": 0.11610624988832814, "grad_norm": 0.3810860514640808, "learning_rate": 0.00017681539921236304, "loss": 1.3578, "step": 8935 }, { "epoch": 0.11611924443224401, "grad_norm": 0.3534294068813324, "learning_rate": 0.00017681279975045167, "loss": 1.4307, "step": 8936 }, { "epoch": 0.11613223897615989, "grad_norm": 0.3832017779350281, "learning_rate": 0.00017681020028854027, "loss": 1.1423, "step": 8937 }, { "epoch": 0.11614523352007576, "grad_norm": 0.35043561458587646, "learning_rate": 0.00017680760082662892, "loss": 1.3967, "step": 8938 }, { "epoch": 0.11615822806399163, "grad_norm": 0.3551879823207855, "learning_rate": 0.00017680500136471751, "loss": 1.4977, "step": 8939 }, { "epoch": 0.1161712226079075, "grad_norm": 0.34447479248046875, "learning_rate": 0.0001768024019028061, "loss": 1.5291, "step": 8940 }, { "epoch": 0.11618421715182338, "grad_norm": 0.396355003118515, "learning_rate": 0.00017679980244089474, "loss": 1.4142, "step": 8941 }, { "epoch": 0.11619721169573925, "grad_norm": 0.5634851455688477, "learning_rate": 0.00017679720297898336, "loss": 1.3662, "step": 8942 }, { "epoch": 0.11621020623965513, "grad_norm": 0.3756301701068878, "learning_rate": 0.00017679460351707199, "loss": 1.1948, "step": 8943 }, { "epoch": 0.116223200783571, "grad_norm": 0.33726197481155396, "learning_rate": 0.00017679200405516058, "loss": 1.4539, "step": 8944 }, { "epoch": 0.11623619532748687, "grad_norm": 0.399055540561676, "learning_rate": 0.0001767894045932492, "loss": 1.4658, "step": 8945 }, { "epoch": 0.11624918987140274, "grad_norm": 0.3979785144329071, "learning_rate": 0.00017678680513133783, "loss": 1.5467, "step": 8946 }, { "epoch": 0.11626218441531862, "grad_norm": 0.3564103841781616, "learning_rate": 0.00017678420566942643, "loss": 1.5565, "step": 8947 }, { "epoch": 0.11627517895923449, "grad_norm": 0.31619182229042053, "learning_rate": 0.00017678160620751505, "loss": 1.2381, "step": 8948 }, { "epoch": 0.11628817350315036, "grad_norm": 0.3950020670890808, "learning_rate": 0.00017677900674560365, "loss": 1.4118, "step": 8949 }, { "epoch": 0.11630116804706624, "grad_norm": 0.35931581258773804, "learning_rate": 0.0001767764072836923, "loss": 1.3007, "step": 8950 }, { "epoch": 0.11631416259098211, "grad_norm": 0.3802351951599121, "learning_rate": 0.0001767738078217809, "loss": 1.4365, "step": 8951 }, { "epoch": 0.11632715713489798, "grad_norm": 0.354037880897522, "learning_rate": 0.00017677120835986952, "loss": 1.4072, "step": 8952 }, { "epoch": 0.11634015167881386, "grad_norm": 0.3977590799331665, "learning_rate": 0.00017676860889795812, "loss": 1.4536, "step": 8953 }, { "epoch": 0.11635314622272973, "grad_norm": 0.4268656373023987, "learning_rate": 0.00017676600943604675, "loss": 1.583, "step": 8954 }, { "epoch": 0.1163661407666456, "grad_norm": 0.32860028743743896, "learning_rate": 0.00017676340997413537, "loss": 1.4924, "step": 8955 }, { "epoch": 0.11637913531056147, "grad_norm": 0.3602045178413391, "learning_rate": 0.00017676081051222397, "loss": 1.4033, "step": 8956 }, { "epoch": 0.11639212985447735, "grad_norm": 0.31551361083984375, "learning_rate": 0.0001767582110503126, "loss": 1.4526, "step": 8957 }, { "epoch": 0.11640512439839322, "grad_norm": 0.3984453082084656, "learning_rate": 0.00017675561158840122, "loss": 1.2386, "step": 8958 }, { "epoch": 0.11641811894230909, "grad_norm": 0.39161843061447144, "learning_rate": 0.00017675301212648981, "loss": 1.4361, "step": 8959 }, { "epoch": 0.11643111348622497, "grad_norm": 0.37419062852859497, "learning_rate": 0.00017675041266457844, "loss": 1.6336, "step": 8960 }, { "epoch": 0.11644410803014084, "grad_norm": 0.35908767580986023, "learning_rate": 0.00017674781320266706, "loss": 1.3653, "step": 8961 }, { "epoch": 0.11645710257405671, "grad_norm": 0.40294522047042847, "learning_rate": 0.0001767452137407557, "loss": 1.4608, "step": 8962 }, { "epoch": 0.11647009711797258, "grad_norm": 0.3234565258026123, "learning_rate": 0.00017674261427884429, "loss": 1.5228, "step": 8963 }, { "epoch": 0.11648309166188846, "grad_norm": 0.4464538097381592, "learning_rate": 0.0001767400148169329, "loss": 1.6113, "step": 8964 }, { "epoch": 0.11649608620580434, "grad_norm": 0.36843040585517883, "learning_rate": 0.00017673741535502153, "loss": 1.5668, "step": 8965 }, { "epoch": 0.11650908074972022, "grad_norm": 0.47493094205856323, "learning_rate": 0.00017673481589311013, "loss": 1.5702, "step": 8966 }, { "epoch": 0.11652207529363609, "grad_norm": 0.3580007553100586, "learning_rate": 0.00017673221643119876, "loss": 1.4124, "step": 8967 }, { "epoch": 0.11653506983755196, "grad_norm": 0.4454807937145233, "learning_rate": 0.00017672961696928735, "loss": 1.3978, "step": 8968 }, { "epoch": 0.11654806438146784, "grad_norm": 0.34001752734184265, "learning_rate": 0.00017672701750737598, "loss": 1.3282, "step": 8969 }, { "epoch": 0.11656105892538371, "grad_norm": 0.37206077575683594, "learning_rate": 0.0001767244180454646, "loss": 1.3462, "step": 8970 }, { "epoch": 0.11657405346929958, "grad_norm": 0.3516480624675751, "learning_rate": 0.0001767218185835532, "loss": 1.3093, "step": 8971 }, { "epoch": 0.11658704801321546, "grad_norm": 0.4320347309112549, "learning_rate": 0.00017671921912164182, "loss": 1.4906, "step": 8972 }, { "epoch": 0.11660004255713133, "grad_norm": 0.30538275837898254, "learning_rate": 0.00017671661965973045, "loss": 1.267, "step": 8973 }, { "epoch": 0.1166130371010472, "grad_norm": 0.4605169892311096, "learning_rate": 0.00017671402019781907, "loss": 1.3431, "step": 8974 }, { "epoch": 0.11662603164496307, "grad_norm": 0.4159318804740906, "learning_rate": 0.00017671142073590767, "loss": 1.5798, "step": 8975 }, { "epoch": 0.11663902618887895, "grad_norm": 0.40308213233947754, "learning_rate": 0.0001767088212739963, "loss": 1.483, "step": 8976 }, { "epoch": 0.11665202073279482, "grad_norm": 0.3748931586742401, "learning_rate": 0.00017670622181208492, "loss": 1.3179, "step": 8977 }, { "epoch": 0.11666501527671069, "grad_norm": 0.5202804803848267, "learning_rate": 0.00017670362235017352, "loss": 1.5477, "step": 8978 }, { "epoch": 0.11667800982062657, "grad_norm": 0.3610033690929413, "learning_rate": 0.00017670102288826214, "loss": 1.2878, "step": 8979 }, { "epoch": 0.11669100436454244, "grad_norm": 0.37570205330848694, "learning_rate": 0.00017669842342635074, "loss": 1.3448, "step": 8980 }, { "epoch": 0.11670399890845831, "grad_norm": 0.3589829206466675, "learning_rate": 0.0001766958239644394, "loss": 1.2419, "step": 8981 }, { "epoch": 0.11671699345237418, "grad_norm": 0.4157191216945648, "learning_rate": 0.000176693224502528, "loss": 1.4887, "step": 8982 }, { "epoch": 0.11672998799629006, "grad_norm": 0.488595187664032, "learning_rate": 0.00017669062504061659, "loss": 1.6534, "step": 8983 }, { "epoch": 0.11674298254020593, "grad_norm": 0.3122594356536865, "learning_rate": 0.0001766880255787052, "loss": 1.3417, "step": 8984 }, { "epoch": 0.1167559770841218, "grad_norm": 0.3656911253929138, "learning_rate": 0.00017668542611679383, "loss": 1.4776, "step": 8985 }, { "epoch": 0.11676897162803768, "grad_norm": 0.4155288636684418, "learning_rate": 0.00017668282665488246, "loss": 1.5027, "step": 8986 }, { "epoch": 0.11678196617195355, "grad_norm": 0.410571813583374, "learning_rate": 0.00017668022719297106, "loss": 1.5129, "step": 8987 }, { "epoch": 0.11679496071586942, "grad_norm": 0.4012853801250458, "learning_rate": 0.00017667762773105968, "loss": 1.2757, "step": 8988 }, { "epoch": 0.1168079552597853, "grad_norm": 0.3849453628063202, "learning_rate": 0.0001766750282691483, "loss": 1.5095, "step": 8989 }, { "epoch": 0.11682094980370117, "grad_norm": 0.45215800404548645, "learning_rate": 0.0001766724288072369, "loss": 1.4937, "step": 8990 }, { "epoch": 0.11683394434761704, "grad_norm": 0.42277297377586365, "learning_rate": 0.00017666982934532553, "loss": 1.6123, "step": 8991 }, { "epoch": 0.11684693889153291, "grad_norm": 0.37789762020111084, "learning_rate": 0.00017666722988341412, "loss": 1.7941, "step": 8992 }, { "epoch": 0.11685993343544879, "grad_norm": 0.34148523211479187, "learning_rate": 0.00017666463042150278, "loss": 1.5682, "step": 8993 }, { "epoch": 0.11687292797936466, "grad_norm": 0.38256609439849854, "learning_rate": 0.00017666203095959137, "loss": 1.435, "step": 8994 }, { "epoch": 0.11688592252328053, "grad_norm": 0.4391743838787079, "learning_rate": 0.00017665943149767997, "loss": 1.4486, "step": 8995 }, { "epoch": 0.1168989170671964, "grad_norm": 0.3742262125015259, "learning_rate": 0.0001766568320357686, "loss": 1.5345, "step": 8996 }, { "epoch": 0.11691191161111228, "grad_norm": 0.27352893352508545, "learning_rate": 0.00017665423257385722, "loss": 1.3189, "step": 8997 }, { "epoch": 0.11692490615502815, "grad_norm": 0.41723528504371643, "learning_rate": 0.00017665163311194584, "loss": 1.3694, "step": 8998 }, { "epoch": 0.11693790069894403, "grad_norm": 0.4188641607761383, "learning_rate": 0.00017664903365003444, "loss": 1.4569, "step": 8999 }, { "epoch": 0.1169508952428599, "grad_norm": 0.4057868421077728, "learning_rate": 0.00017664643418812307, "loss": 1.5233, "step": 9000 }, { "epoch": 0.11696388978677577, "grad_norm": 0.3293067514896393, "learning_rate": 0.0001766438347262117, "loss": 1.3608, "step": 9001 }, { "epoch": 0.11697688433069164, "grad_norm": 0.4646606743335724, "learning_rate": 0.0001766412352643003, "loss": 1.6135, "step": 9002 }, { "epoch": 0.11698987887460753, "grad_norm": 0.4501727819442749, "learning_rate": 0.0001766386358023889, "loss": 1.4578, "step": 9003 }, { "epoch": 0.1170028734185234, "grad_norm": 0.389575719833374, "learning_rate": 0.00017663603634047754, "loss": 1.3977, "step": 9004 }, { "epoch": 0.11701586796243928, "grad_norm": 0.35004299879074097, "learning_rate": 0.00017663343687856616, "loss": 1.556, "step": 9005 }, { "epoch": 0.11702886250635515, "grad_norm": 0.36122962832450867, "learning_rate": 0.00017663083741665476, "loss": 1.3904, "step": 9006 }, { "epoch": 0.11704185705027102, "grad_norm": 0.4256083071231842, "learning_rate": 0.00017662823795474336, "loss": 1.474, "step": 9007 }, { "epoch": 0.1170548515941869, "grad_norm": 0.43045875430107117, "learning_rate": 0.000176625638492832, "loss": 1.4376, "step": 9008 }, { "epoch": 0.11706784613810277, "grad_norm": 0.3174442946910858, "learning_rate": 0.0001766230390309206, "loss": 1.4488, "step": 9009 }, { "epoch": 0.11708084068201864, "grad_norm": 0.3972882926464081, "learning_rate": 0.00017662043956900923, "loss": 1.4777, "step": 9010 }, { "epoch": 0.11709383522593451, "grad_norm": 0.37631741166114807, "learning_rate": 0.00017661784010709783, "loss": 1.3369, "step": 9011 }, { "epoch": 0.11710682976985039, "grad_norm": 0.33900636434555054, "learning_rate": 0.00017661524064518645, "loss": 1.4271, "step": 9012 }, { "epoch": 0.11711982431376626, "grad_norm": 0.3936507999897003, "learning_rate": 0.00017661264118327508, "loss": 1.4139, "step": 9013 }, { "epoch": 0.11713281885768213, "grad_norm": 0.33837345242500305, "learning_rate": 0.00017661004172136367, "loss": 1.2523, "step": 9014 }, { "epoch": 0.117145813401598, "grad_norm": 0.4173916280269623, "learning_rate": 0.0001766074422594523, "loss": 1.3606, "step": 9015 }, { "epoch": 0.11715880794551388, "grad_norm": 0.41455206274986267, "learning_rate": 0.00017660484279754092, "loss": 1.5347, "step": 9016 }, { "epoch": 0.11717180248942975, "grad_norm": 0.4208053648471832, "learning_rate": 0.00017660224333562955, "loss": 1.277, "step": 9017 }, { "epoch": 0.11718479703334563, "grad_norm": 0.34625452756881714, "learning_rate": 0.00017659964387371814, "loss": 1.3009, "step": 9018 }, { "epoch": 0.1171977915772615, "grad_norm": 0.3499453067779541, "learning_rate": 0.00017659704441180677, "loss": 1.4951, "step": 9019 }, { "epoch": 0.11721078612117737, "grad_norm": 0.3977719843387604, "learning_rate": 0.0001765944449498954, "loss": 1.3714, "step": 9020 }, { "epoch": 0.11722378066509324, "grad_norm": 0.34811314940452576, "learning_rate": 0.000176591845487984, "loss": 1.4242, "step": 9021 }, { "epoch": 0.11723677520900912, "grad_norm": 0.39048734307289124, "learning_rate": 0.00017658924602607262, "loss": 1.5532, "step": 9022 }, { "epoch": 0.11724976975292499, "grad_norm": 0.29212865233421326, "learning_rate": 0.0001765866465641612, "loss": 1.1341, "step": 9023 }, { "epoch": 0.11726276429684086, "grad_norm": 0.3969223201274872, "learning_rate": 0.00017658404710224984, "loss": 1.3572, "step": 9024 }, { "epoch": 0.11727575884075674, "grad_norm": 0.31001126766204834, "learning_rate": 0.00017658144764033846, "loss": 1.3244, "step": 9025 }, { "epoch": 0.11728875338467261, "grad_norm": 0.3509131073951721, "learning_rate": 0.00017657884817842706, "loss": 1.129, "step": 9026 }, { "epoch": 0.11730174792858848, "grad_norm": 0.4359632134437561, "learning_rate": 0.00017657624871651568, "loss": 1.4418, "step": 9027 }, { "epoch": 0.11731474247250435, "grad_norm": 0.3695792853832245, "learning_rate": 0.0001765736492546043, "loss": 1.5434, "step": 9028 }, { "epoch": 0.11732773701642023, "grad_norm": 0.36861875653266907, "learning_rate": 0.00017657104979269293, "loss": 1.5469, "step": 9029 }, { "epoch": 0.1173407315603361, "grad_norm": 0.3502877354621887, "learning_rate": 0.00017656845033078153, "loss": 1.3562, "step": 9030 }, { "epoch": 0.11735372610425197, "grad_norm": 0.4547896981239319, "learning_rate": 0.00017656585086887015, "loss": 1.3683, "step": 9031 }, { "epoch": 0.11736672064816785, "grad_norm": 0.4016554355621338, "learning_rate": 0.00017656325140695878, "loss": 1.4324, "step": 9032 }, { "epoch": 0.11737971519208372, "grad_norm": 0.43591275811195374, "learning_rate": 0.00017656065194504738, "loss": 1.5161, "step": 9033 }, { "epoch": 0.11739270973599959, "grad_norm": 0.397156685590744, "learning_rate": 0.000176558052483136, "loss": 1.2897, "step": 9034 }, { "epoch": 0.11740570427991547, "grad_norm": 0.4603898227214813, "learning_rate": 0.00017655545302122462, "loss": 1.3899, "step": 9035 }, { "epoch": 0.11741869882383134, "grad_norm": 0.3756342828273773, "learning_rate": 0.00017655285355931325, "loss": 1.3801, "step": 9036 }, { "epoch": 0.11743169336774721, "grad_norm": 0.5173438787460327, "learning_rate": 0.00017655025409740185, "loss": 1.3651, "step": 9037 }, { "epoch": 0.11744468791166308, "grad_norm": 0.3822080194950104, "learning_rate": 0.00017654765463549044, "loss": 1.6543, "step": 9038 }, { "epoch": 0.11745768245557896, "grad_norm": 0.35000085830688477, "learning_rate": 0.0001765450551735791, "loss": 1.313, "step": 9039 }, { "epoch": 0.11747067699949483, "grad_norm": 0.32739773392677307, "learning_rate": 0.0001765424557116677, "loss": 1.4679, "step": 9040 }, { "epoch": 0.11748367154341072, "grad_norm": 0.4821600317955017, "learning_rate": 0.00017653985624975632, "loss": 1.5763, "step": 9041 }, { "epoch": 0.11749666608732659, "grad_norm": 0.26479119062423706, "learning_rate": 0.00017653725678784491, "loss": 1.3357, "step": 9042 }, { "epoch": 0.11750966063124246, "grad_norm": 0.47364068031311035, "learning_rate": 0.00017653465732593354, "loss": 1.4487, "step": 9043 }, { "epoch": 0.11752265517515834, "grad_norm": 0.34486469626426697, "learning_rate": 0.00017653205786402216, "loss": 1.2702, "step": 9044 }, { "epoch": 0.11753564971907421, "grad_norm": 0.5029379725456238, "learning_rate": 0.00017652945840211076, "loss": 1.4134, "step": 9045 }, { "epoch": 0.11754864426299008, "grad_norm": 0.4247707724571228, "learning_rate": 0.00017652685894019939, "loss": 1.3643, "step": 9046 }, { "epoch": 0.11756163880690595, "grad_norm": 0.4294482469558716, "learning_rate": 0.000176524259478288, "loss": 1.4631, "step": 9047 }, { "epoch": 0.11757463335082183, "grad_norm": 0.33735018968582153, "learning_rate": 0.00017652166001637663, "loss": 1.3189, "step": 9048 }, { "epoch": 0.1175876278947377, "grad_norm": 0.46224460005760193, "learning_rate": 0.00017651906055446523, "loss": 1.3773, "step": 9049 }, { "epoch": 0.11760062243865357, "grad_norm": 0.4145694673061371, "learning_rate": 0.00017651646109255383, "loss": 1.5058, "step": 9050 }, { "epoch": 0.11761361698256945, "grad_norm": 0.3737334609031677, "learning_rate": 0.00017651386163064248, "loss": 1.3586, "step": 9051 }, { "epoch": 0.11762661152648532, "grad_norm": 0.38131749629974365, "learning_rate": 0.00017651126216873108, "loss": 1.5761, "step": 9052 }, { "epoch": 0.11763960607040119, "grad_norm": 0.38808903098106384, "learning_rate": 0.0001765086627068197, "loss": 1.3509, "step": 9053 }, { "epoch": 0.11765260061431707, "grad_norm": 0.3853827714920044, "learning_rate": 0.0001765060632449083, "loss": 1.5033, "step": 9054 }, { "epoch": 0.11766559515823294, "grad_norm": 0.34500375390052795, "learning_rate": 0.00017650346378299692, "loss": 1.5851, "step": 9055 }, { "epoch": 0.11767858970214881, "grad_norm": 0.3166362941265106, "learning_rate": 0.00017650086432108555, "loss": 1.173, "step": 9056 }, { "epoch": 0.11769158424606468, "grad_norm": 0.387178510427475, "learning_rate": 0.00017649826485917415, "loss": 1.712, "step": 9057 }, { "epoch": 0.11770457878998056, "grad_norm": 0.425152063369751, "learning_rate": 0.00017649566539726277, "loss": 1.3726, "step": 9058 }, { "epoch": 0.11771757333389643, "grad_norm": 0.45254212617874146, "learning_rate": 0.0001764930659353514, "loss": 1.5894, "step": 9059 }, { "epoch": 0.1177305678778123, "grad_norm": 0.37838661670684814, "learning_rate": 0.00017649046647344002, "loss": 1.5149, "step": 9060 }, { "epoch": 0.11774356242172818, "grad_norm": 0.3081110417842865, "learning_rate": 0.00017648786701152862, "loss": 1.3828, "step": 9061 }, { "epoch": 0.11775655696564405, "grad_norm": 0.42954570055007935, "learning_rate": 0.00017648526754961721, "loss": 1.5931, "step": 9062 }, { "epoch": 0.11776955150955992, "grad_norm": 0.399687796831131, "learning_rate": 0.00017648266808770587, "loss": 1.4147, "step": 9063 }, { "epoch": 0.1177825460534758, "grad_norm": 0.39408156275749207, "learning_rate": 0.00017648006862579446, "loss": 1.6926, "step": 9064 }, { "epoch": 0.11779554059739167, "grad_norm": 0.442289799451828, "learning_rate": 0.0001764774691638831, "loss": 1.3548, "step": 9065 }, { "epoch": 0.11780853514130754, "grad_norm": 0.5029153823852539, "learning_rate": 0.00017647486970197169, "loss": 1.4643, "step": 9066 }, { "epoch": 0.11782152968522341, "grad_norm": 0.4636981785297394, "learning_rate": 0.0001764722702400603, "loss": 1.5374, "step": 9067 }, { "epoch": 0.11783452422913929, "grad_norm": 0.4107612073421478, "learning_rate": 0.00017646967077814893, "loss": 1.5267, "step": 9068 }, { "epoch": 0.11784751877305516, "grad_norm": 0.3928777277469635, "learning_rate": 0.00017646707131623753, "loss": 1.6081, "step": 9069 }, { "epoch": 0.11786051331697103, "grad_norm": 0.3787069618701935, "learning_rate": 0.00017646447185432616, "loss": 1.2859, "step": 9070 }, { "epoch": 0.1178735078608869, "grad_norm": 0.35391828417778015, "learning_rate": 0.00017646187239241478, "loss": 1.4555, "step": 9071 }, { "epoch": 0.11788650240480278, "grad_norm": 0.45430445671081543, "learning_rate": 0.0001764592729305034, "loss": 1.3373, "step": 9072 }, { "epoch": 0.11789949694871865, "grad_norm": 0.45637673139572144, "learning_rate": 0.000176456673468592, "loss": 1.5001, "step": 9073 }, { "epoch": 0.11791249149263452, "grad_norm": 0.5029851198196411, "learning_rate": 0.00017645407400668063, "loss": 1.5598, "step": 9074 }, { "epoch": 0.1179254860365504, "grad_norm": 0.32405078411102295, "learning_rate": 0.00017645147454476925, "loss": 1.4992, "step": 9075 }, { "epoch": 0.11793848058046627, "grad_norm": 0.4317871630191803, "learning_rate": 0.00017644887508285785, "loss": 1.1203, "step": 9076 }, { "epoch": 0.11795147512438214, "grad_norm": 0.3266223073005676, "learning_rate": 0.00017644627562094647, "loss": 1.2223, "step": 9077 }, { "epoch": 0.11796446966829802, "grad_norm": 0.38675281405448914, "learning_rate": 0.0001764436761590351, "loss": 1.5496, "step": 9078 }, { "epoch": 0.1179774642122139, "grad_norm": 0.3666847050189972, "learning_rate": 0.0001764410766971237, "loss": 1.2681, "step": 9079 }, { "epoch": 0.11799045875612978, "grad_norm": 0.4113713204860687, "learning_rate": 0.00017643847723521232, "loss": 1.3278, "step": 9080 }, { "epoch": 0.11800345330004565, "grad_norm": 0.3360549211502075, "learning_rate": 0.00017643587777330092, "loss": 1.2036, "step": 9081 }, { "epoch": 0.11801644784396152, "grad_norm": 0.3180699348449707, "learning_rate": 0.00017643327831138957, "loss": 1.4236, "step": 9082 }, { "epoch": 0.1180294423878774, "grad_norm": 0.36815357208251953, "learning_rate": 0.00017643067884947817, "loss": 1.3284, "step": 9083 }, { "epoch": 0.11804243693179327, "grad_norm": 0.4223071336746216, "learning_rate": 0.0001764280793875668, "loss": 1.6531, "step": 9084 }, { "epoch": 0.11805543147570914, "grad_norm": 0.43429213762283325, "learning_rate": 0.0001764254799256554, "loss": 1.5114, "step": 9085 }, { "epoch": 0.11806842601962501, "grad_norm": 0.4690849483013153, "learning_rate": 0.000176422880463744, "loss": 1.5304, "step": 9086 }, { "epoch": 0.11808142056354089, "grad_norm": 0.33998048305511475, "learning_rate": 0.00017642028100183264, "loss": 1.1628, "step": 9087 }, { "epoch": 0.11809441510745676, "grad_norm": 0.40953803062438965, "learning_rate": 0.00017641768153992123, "loss": 1.693, "step": 9088 }, { "epoch": 0.11810740965137263, "grad_norm": 0.446580708026886, "learning_rate": 0.00017641508207800986, "loss": 1.4581, "step": 9089 }, { "epoch": 0.1181204041952885, "grad_norm": 0.32885387539863586, "learning_rate": 0.00017641248261609848, "loss": 1.3159, "step": 9090 }, { "epoch": 0.11813339873920438, "grad_norm": 0.4315154254436493, "learning_rate": 0.00017640988315418708, "loss": 1.4291, "step": 9091 }, { "epoch": 0.11814639328312025, "grad_norm": 0.4724661409854889, "learning_rate": 0.0001764072836922757, "loss": 1.3778, "step": 9092 }, { "epoch": 0.11815938782703612, "grad_norm": 0.36929646134376526, "learning_rate": 0.0001764046842303643, "loss": 1.5197, "step": 9093 }, { "epoch": 0.118172382370952, "grad_norm": 0.3613959550857544, "learning_rate": 0.00017640208476845295, "loss": 1.5014, "step": 9094 }, { "epoch": 0.11818537691486787, "grad_norm": 0.4160013496875763, "learning_rate": 0.00017639948530654155, "loss": 1.5534, "step": 9095 }, { "epoch": 0.11819837145878374, "grad_norm": 0.3813033998012543, "learning_rate": 0.00017639688584463018, "loss": 1.3654, "step": 9096 }, { "epoch": 0.11821136600269962, "grad_norm": 0.451663613319397, "learning_rate": 0.00017639428638271877, "loss": 1.567, "step": 9097 }, { "epoch": 0.11822436054661549, "grad_norm": 0.3539445698261261, "learning_rate": 0.0001763916869208074, "loss": 1.5162, "step": 9098 }, { "epoch": 0.11823735509053136, "grad_norm": 0.4470173418521881, "learning_rate": 0.00017638908745889602, "loss": 1.5748, "step": 9099 }, { "epoch": 0.11825034963444724, "grad_norm": 0.3758106529712677, "learning_rate": 0.00017638648799698462, "loss": 1.4006, "step": 9100 }, { "epoch": 0.11826334417836311, "grad_norm": 0.3752334415912628, "learning_rate": 0.00017638388853507324, "loss": 1.3266, "step": 9101 }, { "epoch": 0.11827633872227898, "grad_norm": 0.3364852964878082, "learning_rate": 0.00017638128907316187, "loss": 1.2271, "step": 9102 }, { "epoch": 0.11828933326619485, "grad_norm": 0.4314340353012085, "learning_rate": 0.0001763786896112505, "loss": 1.4655, "step": 9103 }, { "epoch": 0.11830232781011073, "grad_norm": 0.32277750968933105, "learning_rate": 0.0001763760901493391, "loss": 1.3103, "step": 9104 }, { "epoch": 0.1183153223540266, "grad_norm": 0.403269499540329, "learning_rate": 0.0001763734906874277, "loss": 1.5042, "step": 9105 }, { "epoch": 0.11832831689794247, "grad_norm": 0.360903263092041, "learning_rate": 0.00017637089122551634, "loss": 1.4842, "step": 9106 }, { "epoch": 0.11834131144185835, "grad_norm": 0.33754587173461914, "learning_rate": 0.00017636829176360494, "loss": 1.3197, "step": 9107 }, { "epoch": 0.11835430598577422, "grad_norm": 0.4857274293899536, "learning_rate": 0.00017636569230169356, "loss": 1.4843, "step": 9108 }, { "epoch": 0.11836730052969009, "grad_norm": 0.3593307137489319, "learning_rate": 0.00017636309283978216, "loss": 1.4489, "step": 9109 }, { "epoch": 0.11838029507360597, "grad_norm": 0.3252532184123993, "learning_rate": 0.00017636049337787078, "loss": 1.398, "step": 9110 }, { "epoch": 0.11839328961752184, "grad_norm": 0.5149595737457275, "learning_rate": 0.0001763578939159594, "loss": 1.5012, "step": 9111 }, { "epoch": 0.11840628416143771, "grad_norm": 0.4347781538963318, "learning_rate": 0.000176355294454048, "loss": 1.4523, "step": 9112 }, { "epoch": 0.11841927870535358, "grad_norm": 0.4299534559249878, "learning_rate": 0.00017635269499213666, "loss": 1.6555, "step": 9113 }, { "epoch": 0.11843227324926946, "grad_norm": 0.38847315311431885, "learning_rate": 0.00017635009553022525, "loss": 1.2422, "step": 9114 }, { "epoch": 0.11844526779318533, "grad_norm": 0.3944111764431, "learning_rate": 0.00017634749606831388, "loss": 1.3886, "step": 9115 }, { "epoch": 0.1184582623371012, "grad_norm": 0.5636235475540161, "learning_rate": 0.00017634489660640248, "loss": 1.5124, "step": 9116 }, { "epoch": 0.11847125688101709, "grad_norm": 0.4249912202358246, "learning_rate": 0.0001763422971444911, "loss": 1.4151, "step": 9117 }, { "epoch": 0.11848425142493296, "grad_norm": 0.45633092522621155, "learning_rate": 0.00017633969768257973, "loss": 1.3655, "step": 9118 }, { "epoch": 0.11849724596884884, "grad_norm": 0.3120082914829254, "learning_rate": 0.00017633709822066832, "loss": 1.4502, "step": 9119 }, { "epoch": 0.11851024051276471, "grad_norm": 0.423132985830307, "learning_rate": 0.00017633449875875695, "loss": 1.4398, "step": 9120 }, { "epoch": 0.11852323505668058, "grad_norm": 0.46242639422416687, "learning_rate": 0.00017633189929684557, "loss": 1.6068, "step": 9121 }, { "epoch": 0.11853622960059645, "grad_norm": 0.3650546669960022, "learning_rate": 0.00017632929983493417, "loss": 1.5062, "step": 9122 }, { "epoch": 0.11854922414451233, "grad_norm": 0.3244951069355011, "learning_rate": 0.0001763267003730228, "loss": 1.1703, "step": 9123 }, { "epoch": 0.1185622186884282, "grad_norm": 0.40561574697494507, "learning_rate": 0.0001763241009111114, "loss": 1.5337, "step": 9124 }, { "epoch": 0.11857521323234407, "grad_norm": 0.5181355476379395, "learning_rate": 0.00017632150144920004, "loss": 1.54, "step": 9125 }, { "epoch": 0.11858820777625995, "grad_norm": 0.23585772514343262, "learning_rate": 0.00017631890198728864, "loss": 1.2874, "step": 9126 }, { "epoch": 0.11860120232017582, "grad_norm": 0.42218998074531555, "learning_rate": 0.00017631630252537726, "loss": 1.5099, "step": 9127 }, { "epoch": 0.11861419686409169, "grad_norm": 0.4974414110183716, "learning_rate": 0.00017631370306346586, "loss": 1.5862, "step": 9128 }, { "epoch": 0.11862719140800757, "grad_norm": 0.37885788083076477, "learning_rate": 0.00017631110360155449, "loss": 1.4945, "step": 9129 }, { "epoch": 0.11864018595192344, "grad_norm": 0.37644726037979126, "learning_rate": 0.0001763085041396431, "loss": 1.7116, "step": 9130 }, { "epoch": 0.11865318049583931, "grad_norm": 0.42313623428344727, "learning_rate": 0.0001763059046777317, "loss": 1.3986, "step": 9131 }, { "epoch": 0.11866617503975518, "grad_norm": 0.4575001895427704, "learning_rate": 0.00017630330521582033, "loss": 1.514, "step": 9132 }, { "epoch": 0.11867916958367106, "grad_norm": 0.38589704036712646, "learning_rate": 0.00017630070575390896, "loss": 1.44, "step": 9133 }, { "epoch": 0.11869216412758693, "grad_norm": 0.3714844584465027, "learning_rate": 0.00017629810629199755, "loss": 1.4082, "step": 9134 }, { "epoch": 0.1187051586715028, "grad_norm": 0.46000030636787415, "learning_rate": 0.00017629550683008618, "loss": 1.6049, "step": 9135 }, { "epoch": 0.11871815321541868, "grad_norm": 0.4333815276622772, "learning_rate": 0.00017629290736817478, "loss": 1.352, "step": 9136 }, { "epoch": 0.11873114775933455, "grad_norm": 0.36204734444618225, "learning_rate": 0.00017629030790626343, "loss": 1.5806, "step": 9137 }, { "epoch": 0.11874414230325042, "grad_norm": 0.3939478099346161, "learning_rate": 0.00017628770844435203, "loss": 1.5185, "step": 9138 }, { "epoch": 0.1187571368471663, "grad_norm": 0.4169251620769501, "learning_rate": 0.00017628510898244065, "loss": 1.3396, "step": 9139 }, { "epoch": 0.11877013139108217, "grad_norm": 0.4443172216415405, "learning_rate": 0.00017628250952052925, "loss": 1.4886, "step": 9140 }, { "epoch": 0.11878312593499804, "grad_norm": 0.4923625588417053, "learning_rate": 0.00017627991005861787, "loss": 1.4416, "step": 9141 }, { "epoch": 0.11879612047891391, "grad_norm": 0.4224836826324463, "learning_rate": 0.0001762773105967065, "loss": 1.4631, "step": 9142 }, { "epoch": 0.11880911502282979, "grad_norm": 0.348072350025177, "learning_rate": 0.0001762747111347951, "loss": 1.4693, "step": 9143 }, { "epoch": 0.11882210956674566, "grad_norm": 0.3817839026451111, "learning_rate": 0.00017627211167288372, "loss": 1.3953, "step": 9144 }, { "epoch": 0.11883510411066153, "grad_norm": 0.4439152777194977, "learning_rate": 0.00017626951221097234, "loss": 1.3352, "step": 9145 }, { "epoch": 0.1188480986545774, "grad_norm": 0.399453341960907, "learning_rate": 0.00017626691274906094, "loss": 1.4849, "step": 9146 }, { "epoch": 0.11886109319849328, "grad_norm": 0.2881518602371216, "learning_rate": 0.00017626431328714956, "loss": 1.1897, "step": 9147 }, { "epoch": 0.11887408774240915, "grad_norm": 0.41193607449531555, "learning_rate": 0.0001762617138252382, "loss": 1.3936, "step": 9148 }, { "epoch": 0.11888708228632502, "grad_norm": 0.28240767121315, "learning_rate": 0.0001762591143633268, "loss": 1.3064, "step": 9149 }, { "epoch": 0.1189000768302409, "grad_norm": 0.4817437529563904, "learning_rate": 0.0001762565149014154, "loss": 1.4227, "step": 9150 }, { "epoch": 0.11891307137415677, "grad_norm": 0.3776431381702423, "learning_rate": 0.00017625391543950404, "loss": 1.1917, "step": 9151 }, { "epoch": 0.11892606591807264, "grad_norm": 0.48175594210624695, "learning_rate": 0.00017625131597759266, "loss": 1.4662, "step": 9152 }, { "epoch": 0.11893906046198852, "grad_norm": 0.46940624713897705, "learning_rate": 0.00017624871651568126, "loss": 1.2709, "step": 9153 }, { "epoch": 0.11895205500590439, "grad_norm": 0.33565062284469604, "learning_rate": 0.00017624611705376988, "loss": 1.2616, "step": 9154 }, { "epoch": 0.11896504954982028, "grad_norm": 0.4791678190231323, "learning_rate": 0.00017624351759185848, "loss": 1.4862, "step": 9155 }, { "epoch": 0.11897804409373615, "grad_norm": 0.47324106097221375, "learning_rate": 0.00017624091812994713, "loss": 1.5412, "step": 9156 }, { "epoch": 0.11899103863765202, "grad_norm": 0.4095821678638458, "learning_rate": 0.00017623831866803573, "loss": 1.652, "step": 9157 }, { "epoch": 0.1190040331815679, "grad_norm": 0.42597275972366333, "learning_rate": 0.00017623571920612435, "loss": 1.5127, "step": 9158 }, { "epoch": 0.11901702772548377, "grad_norm": 0.4836615324020386, "learning_rate": 0.00017623311974421295, "loss": 1.5348, "step": 9159 }, { "epoch": 0.11903002226939964, "grad_norm": 0.34499868750572205, "learning_rate": 0.00017623052028230157, "loss": 1.3959, "step": 9160 }, { "epoch": 0.11904301681331551, "grad_norm": 0.33290573954582214, "learning_rate": 0.0001762279208203902, "loss": 1.1943, "step": 9161 }, { "epoch": 0.11905601135723139, "grad_norm": 0.3397541046142578, "learning_rate": 0.0001762253213584788, "loss": 1.1803, "step": 9162 }, { "epoch": 0.11906900590114726, "grad_norm": 0.408047080039978, "learning_rate": 0.00017622272189656742, "loss": 1.5064, "step": 9163 }, { "epoch": 0.11908200044506313, "grad_norm": 0.4218473434448242, "learning_rate": 0.00017622012243465604, "loss": 1.4539, "step": 9164 }, { "epoch": 0.119094994988979, "grad_norm": 0.36197763681411743, "learning_rate": 0.00017621752297274464, "loss": 1.5051, "step": 9165 }, { "epoch": 0.11910798953289488, "grad_norm": 0.3601924777030945, "learning_rate": 0.00017621492351083327, "loss": 1.5871, "step": 9166 }, { "epoch": 0.11912098407681075, "grad_norm": 0.4823008179664612, "learning_rate": 0.00017621232404892186, "loss": 1.2832, "step": 9167 }, { "epoch": 0.11913397862072662, "grad_norm": 0.4834491014480591, "learning_rate": 0.00017620972458701052, "loss": 1.4861, "step": 9168 }, { "epoch": 0.1191469731646425, "grad_norm": 0.47572794556617737, "learning_rate": 0.0001762071251250991, "loss": 1.5033, "step": 9169 }, { "epoch": 0.11915996770855837, "grad_norm": 0.38881343603134155, "learning_rate": 0.00017620452566318774, "loss": 1.5676, "step": 9170 }, { "epoch": 0.11917296225247424, "grad_norm": 0.3057236671447754, "learning_rate": 0.00017620192620127634, "loss": 1.3916, "step": 9171 }, { "epoch": 0.11918595679639012, "grad_norm": 0.3584456145763397, "learning_rate": 0.00017619932673936496, "loss": 1.3828, "step": 9172 }, { "epoch": 0.11919895134030599, "grad_norm": 0.3690851032733917, "learning_rate": 0.00017619672727745358, "loss": 1.4101, "step": 9173 }, { "epoch": 0.11921194588422186, "grad_norm": 0.4371185600757599, "learning_rate": 0.00017619412781554218, "loss": 1.5609, "step": 9174 }, { "epoch": 0.11922494042813774, "grad_norm": 0.4170822203159332, "learning_rate": 0.0001761915283536308, "loss": 1.3929, "step": 9175 }, { "epoch": 0.11923793497205361, "grad_norm": 0.24932368099689484, "learning_rate": 0.00017618892889171943, "loss": 1.1849, "step": 9176 }, { "epoch": 0.11925092951596948, "grad_norm": 0.5735031962394714, "learning_rate": 0.00017618632942980803, "loss": 1.6123, "step": 9177 }, { "epoch": 0.11926392405988535, "grad_norm": 0.3706362545490265, "learning_rate": 0.00017618372996789665, "loss": 1.3372, "step": 9178 }, { "epoch": 0.11927691860380123, "grad_norm": 0.3488906919956207, "learning_rate": 0.00017618113050598525, "loss": 1.2939, "step": 9179 }, { "epoch": 0.1192899131477171, "grad_norm": 0.46113717555999756, "learning_rate": 0.0001761785310440739, "loss": 1.5784, "step": 9180 }, { "epoch": 0.11930290769163297, "grad_norm": 0.38700899481773376, "learning_rate": 0.0001761759315821625, "loss": 1.4882, "step": 9181 }, { "epoch": 0.11931590223554885, "grad_norm": 0.40001922845840454, "learning_rate": 0.00017617333212025112, "loss": 1.2753, "step": 9182 }, { "epoch": 0.11932889677946472, "grad_norm": 0.48631253838539124, "learning_rate": 0.00017617073265833972, "loss": 1.6322, "step": 9183 }, { "epoch": 0.11934189132338059, "grad_norm": 0.35049542784690857, "learning_rate": 0.00017616813319642834, "loss": 1.4875, "step": 9184 }, { "epoch": 0.11935488586729646, "grad_norm": 0.3739171624183655, "learning_rate": 0.00017616553373451697, "loss": 1.1823, "step": 9185 }, { "epoch": 0.11936788041121234, "grad_norm": 0.4527442157268524, "learning_rate": 0.00017616293427260557, "loss": 1.5332, "step": 9186 }, { "epoch": 0.11938087495512821, "grad_norm": 0.48978838324546814, "learning_rate": 0.00017616033481069422, "loss": 1.5907, "step": 9187 }, { "epoch": 0.11939386949904408, "grad_norm": 0.312648743391037, "learning_rate": 0.00017615773534878282, "loss": 1.3738, "step": 9188 }, { "epoch": 0.11940686404295996, "grad_norm": 0.31487321853637695, "learning_rate": 0.0001761551358868714, "loss": 1.538, "step": 9189 }, { "epoch": 0.11941985858687583, "grad_norm": 0.3423134684562683, "learning_rate": 0.00017615253642496004, "loss": 1.463, "step": 9190 }, { "epoch": 0.1194328531307917, "grad_norm": 0.27038952708244324, "learning_rate": 0.00017614993696304866, "loss": 1.3668, "step": 9191 }, { "epoch": 0.11944584767470758, "grad_norm": 0.37435290217399597, "learning_rate": 0.0001761473375011373, "loss": 1.2847, "step": 9192 }, { "epoch": 0.11945884221862346, "grad_norm": 0.32118090987205505, "learning_rate": 0.00017614473803922588, "loss": 1.2147, "step": 9193 }, { "epoch": 0.11947183676253934, "grad_norm": 0.38151276111602783, "learning_rate": 0.0001761421385773145, "loss": 1.3207, "step": 9194 }, { "epoch": 0.11948483130645521, "grad_norm": 0.39752355217933655, "learning_rate": 0.00017613953911540313, "loss": 1.5357, "step": 9195 }, { "epoch": 0.11949782585037108, "grad_norm": 0.3887764811515808, "learning_rate": 0.00017613693965349173, "loss": 1.208, "step": 9196 }, { "epoch": 0.11951082039428695, "grad_norm": 0.4786817133426666, "learning_rate": 0.00017613434019158035, "loss": 1.599, "step": 9197 }, { "epoch": 0.11952381493820283, "grad_norm": 0.38299015164375305, "learning_rate": 0.00017613174072966895, "loss": 1.3418, "step": 9198 }, { "epoch": 0.1195368094821187, "grad_norm": 0.3579920530319214, "learning_rate": 0.0001761291412677576, "loss": 1.5076, "step": 9199 }, { "epoch": 0.11954980402603457, "grad_norm": 0.37152478098869324, "learning_rate": 0.0001761265418058462, "loss": 1.2421, "step": 9200 }, { "epoch": 0.11956279856995045, "grad_norm": 0.28056010603904724, "learning_rate": 0.0001761239423439348, "loss": 1.6692, "step": 9201 }, { "epoch": 0.11957579311386632, "grad_norm": 0.4593879282474518, "learning_rate": 0.00017612134288202342, "loss": 1.393, "step": 9202 }, { "epoch": 0.11958878765778219, "grad_norm": 0.36429837346076965, "learning_rate": 0.00017611874342011205, "loss": 1.5133, "step": 9203 }, { "epoch": 0.11960178220169806, "grad_norm": 0.3880722224712372, "learning_rate": 0.00017611614395820067, "loss": 1.4259, "step": 9204 }, { "epoch": 0.11961477674561394, "grad_norm": 0.3845650851726532, "learning_rate": 0.00017611354449628927, "loss": 1.6412, "step": 9205 }, { "epoch": 0.11962777128952981, "grad_norm": 0.34730517864227295, "learning_rate": 0.0001761109450343779, "loss": 1.3237, "step": 9206 }, { "epoch": 0.11964076583344568, "grad_norm": 0.3938792049884796, "learning_rate": 0.00017610834557246652, "loss": 1.534, "step": 9207 }, { "epoch": 0.11965376037736156, "grad_norm": 0.3831009566783905, "learning_rate": 0.00017610574611055512, "loss": 1.3745, "step": 9208 }, { "epoch": 0.11966675492127743, "grad_norm": 0.4241017997264862, "learning_rate": 0.00017610314664864374, "loss": 1.417, "step": 9209 }, { "epoch": 0.1196797494651933, "grad_norm": 0.4668117165565491, "learning_rate": 0.00017610054718673234, "loss": 1.3501, "step": 9210 }, { "epoch": 0.11969274400910918, "grad_norm": 0.3994259834289551, "learning_rate": 0.000176097947724821, "loss": 1.5354, "step": 9211 }, { "epoch": 0.11970573855302505, "grad_norm": 0.3899083137512207, "learning_rate": 0.0001760953482629096, "loss": 1.465, "step": 9212 }, { "epoch": 0.11971873309694092, "grad_norm": 0.3840634822845459, "learning_rate": 0.00017609274880099818, "loss": 1.2357, "step": 9213 }, { "epoch": 0.1197317276408568, "grad_norm": 0.34098339080810547, "learning_rate": 0.0001760901493390868, "loss": 1.5441, "step": 9214 }, { "epoch": 0.11974472218477267, "grad_norm": 0.4204641282558441, "learning_rate": 0.00017608754987717543, "loss": 1.3739, "step": 9215 }, { "epoch": 0.11975771672868854, "grad_norm": 0.38086310029029846, "learning_rate": 0.00017608495041526406, "loss": 1.5106, "step": 9216 }, { "epoch": 0.11977071127260441, "grad_norm": 0.4430563151836395, "learning_rate": 0.00017608235095335265, "loss": 1.6276, "step": 9217 }, { "epoch": 0.11978370581652029, "grad_norm": 0.3624548316001892, "learning_rate": 0.00017607975149144128, "loss": 1.3159, "step": 9218 }, { "epoch": 0.11979670036043616, "grad_norm": 0.3226706385612488, "learning_rate": 0.0001760771520295299, "loss": 1.4212, "step": 9219 }, { "epoch": 0.11980969490435203, "grad_norm": 0.3631337285041809, "learning_rate": 0.0001760745525676185, "loss": 1.438, "step": 9220 }, { "epoch": 0.1198226894482679, "grad_norm": 0.3964703381061554, "learning_rate": 0.00017607195310570713, "loss": 1.5525, "step": 9221 }, { "epoch": 0.11983568399218378, "grad_norm": 0.4067436754703522, "learning_rate": 0.00017606935364379575, "loss": 1.4267, "step": 9222 }, { "epoch": 0.11984867853609965, "grad_norm": 0.37554192543029785, "learning_rate": 0.00017606675418188437, "loss": 1.5489, "step": 9223 }, { "epoch": 0.11986167308001552, "grad_norm": 0.38695022463798523, "learning_rate": 0.00017606415471997297, "loss": 1.4888, "step": 9224 }, { "epoch": 0.1198746676239314, "grad_norm": 0.27086248993873596, "learning_rate": 0.0001760615552580616, "loss": 1.1403, "step": 9225 }, { "epoch": 0.11988766216784727, "grad_norm": 0.3320566415786743, "learning_rate": 0.00017605895579615022, "loss": 1.3235, "step": 9226 }, { "epoch": 0.11990065671176314, "grad_norm": 0.4477056860923767, "learning_rate": 0.00017605635633423882, "loss": 1.3832, "step": 9227 }, { "epoch": 0.11991365125567902, "grad_norm": 0.42116689682006836, "learning_rate": 0.00017605375687232744, "loss": 1.3223, "step": 9228 }, { "epoch": 0.11992664579959489, "grad_norm": 0.3749730587005615, "learning_rate": 0.00017605115741041604, "loss": 1.4251, "step": 9229 }, { "epoch": 0.11993964034351076, "grad_norm": 0.3425008952617645, "learning_rate": 0.00017604855794850466, "loss": 1.2577, "step": 9230 }, { "epoch": 0.11995263488742665, "grad_norm": 0.3652496039867401, "learning_rate": 0.0001760459584865933, "loss": 1.2502, "step": 9231 }, { "epoch": 0.11996562943134252, "grad_norm": 0.4033985435962677, "learning_rate": 0.0001760433590246819, "loss": 1.407, "step": 9232 }, { "epoch": 0.1199786239752584, "grad_norm": 0.38904184103012085, "learning_rate": 0.0001760407595627705, "loss": 1.3089, "step": 9233 }, { "epoch": 0.11999161851917427, "grad_norm": 0.4373151659965515, "learning_rate": 0.00017603816010085914, "loss": 1.4005, "step": 9234 }, { "epoch": 0.12000461306309014, "grad_norm": 0.39823117852211, "learning_rate": 0.00017603556063894776, "loss": 1.4045, "step": 9235 }, { "epoch": 0.12001760760700601, "grad_norm": 0.3663402199745178, "learning_rate": 0.00017603296117703636, "loss": 1.3554, "step": 9236 }, { "epoch": 0.12003060215092189, "grad_norm": 0.44000574946403503, "learning_rate": 0.00017603036171512498, "loss": 1.3884, "step": 9237 }, { "epoch": 0.12004359669483776, "grad_norm": 0.38616931438446045, "learning_rate": 0.0001760277622532136, "loss": 1.4719, "step": 9238 }, { "epoch": 0.12005659123875363, "grad_norm": 0.40794098377227783, "learning_rate": 0.0001760251627913022, "loss": 1.3611, "step": 9239 }, { "epoch": 0.1200695857826695, "grad_norm": 0.473959743976593, "learning_rate": 0.00017602256332939083, "loss": 1.4182, "step": 9240 }, { "epoch": 0.12008258032658538, "grad_norm": 0.30298808217048645, "learning_rate": 0.00017601996386747943, "loss": 1.3097, "step": 9241 }, { "epoch": 0.12009557487050125, "grad_norm": 0.32721996307373047, "learning_rate": 0.00017601736440556808, "loss": 1.4447, "step": 9242 }, { "epoch": 0.12010856941441712, "grad_norm": 0.40660449862480164, "learning_rate": 0.00017601476494365667, "loss": 1.4161, "step": 9243 }, { "epoch": 0.120121563958333, "grad_norm": 0.7570306658744812, "learning_rate": 0.00017601216548174527, "loss": 1.4576, "step": 9244 }, { "epoch": 0.12013455850224887, "grad_norm": 0.4389442801475525, "learning_rate": 0.0001760095660198339, "loss": 1.5235, "step": 9245 }, { "epoch": 0.12014755304616474, "grad_norm": 0.39088016748428345, "learning_rate": 0.00017600696655792252, "loss": 1.3278, "step": 9246 }, { "epoch": 0.12016054759008062, "grad_norm": 0.3125208020210266, "learning_rate": 0.00017600436709601115, "loss": 1.3421, "step": 9247 }, { "epoch": 0.12017354213399649, "grad_norm": 0.3390772342681885, "learning_rate": 0.00017600176763409974, "loss": 1.18, "step": 9248 }, { "epoch": 0.12018653667791236, "grad_norm": 0.35902246832847595, "learning_rate": 0.00017599916817218837, "loss": 1.2974, "step": 9249 }, { "epoch": 0.12019953122182823, "grad_norm": 0.41039159893989563, "learning_rate": 0.000175996568710277, "loss": 1.552, "step": 9250 }, { "epoch": 0.12021252576574411, "grad_norm": 0.7680051922798157, "learning_rate": 0.0001759939692483656, "loss": 1.3846, "step": 9251 }, { "epoch": 0.12022552030965998, "grad_norm": 0.3319466710090637, "learning_rate": 0.0001759913697864542, "loss": 1.3807, "step": 9252 }, { "epoch": 0.12023851485357585, "grad_norm": 0.3757462501525879, "learning_rate": 0.0001759887703245428, "loss": 1.4435, "step": 9253 }, { "epoch": 0.12025150939749173, "grad_norm": 0.32348567247390747, "learning_rate": 0.00017598617086263146, "loss": 1.4973, "step": 9254 }, { "epoch": 0.1202645039414076, "grad_norm": 0.4984109699726105, "learning_rate": 0.00017598357140072006, "loss": 1.5998, "step": 9255 }, { "epoch": 0.12027749848532347, "grad_norm": 0.4879307746887207, "learning_rate": 0.00017598097193880866, "loss": 1.3849, "step": 9256 }, { "epoch": 0.12029049302923935, "grad_norm": 0.46921801567077637, "learning_rate": 0.00017597837247689728, "loss": 1.5048, "step": 9257 }, { "epoch": 0.12030348757315522, "grad_norm": 0.5488225221633911, "learning_rate": 0.0001759757730149859, "loss": 1.5263, "step": 9258 }, { "epoch": 0.12031648211707109, "grad_norm": 0.46183568239212036, "learning_rate": 0.00017597317355307453, "loss": 1.6255, "step": 9259 }, { "epoch": 0.12032947666098696, "grad_norm": 0.35899120569229126, "learning_rate": 0.00017597057409116313, "loss": 1.3729, "step": 9260 }, { "epoch": 0.12034247120490284, "grad_norm": 0.36731332540512085, "learning_rate": 0.00017596797462925175, "loss": 1.2305, "step": 9261 }, { "epoch": 0.12035546574881871, "grad_norm": 0.3999217450618744, "learning_rate": 0.00017596537516734038, "loss": 1.7356, "step": 9262 }, { "epoch": 0.12036846029273458, "grad_norm": 0.4213993549346924, "learning_rate": 0.00017596277570542897, "loss": 1.4108, "step": 9263 }, { "epoch": 0.12038145483665046, "grad_norm": 0.3461703956127167, "learning_rate": 0.0001759601762435176, "loss": 1.5847, "step": 9264 }, { "epoch": 0.12039444938056633, "grad_norm": 0.36527061462402344, "learning_rate": 0.00017595757678160622, "loss": 1.3304, "step": 9265 }, { "epoch": 0.1204074439244822, "grad_norm": 0.4013476073741913, "learning_rate": 0.00017595497731969485, "loss": 1.2884, "step": 9266 }, { "epoch": 0.12042043846839807, "grad_norm": 0.4246699810028076, "learning_rate": 0.00017595237785778345, "loss": 1.5034, "step": 9267 }, { "epoch": 0.12043343301231395, "grad_norm": 0.38888460397720337, "learning_rate": 0.00017594977839587204, "loss": 1.3504, "step": 9268 }, { "epoch": 0.12044642755622983, "grad_norm": 0.3762776553630829, "learning_rate": 0.0001759471789339607, "loss": 1.5032, "step": 9269 }, { "epoch": 0.12045942210014571, "grad_norm": 0.40783512592315674, "learning_rate": 0.0001759445794720493, "loss": 1.4541, "step": 9270 }, { "epoch": 0.12047241664406158, "grad_norm": 0.33674609661102295, "learning_rate": 0.00017594198001013792, "loss": 1.3576, "step": 9271 }, { "epoch": 0.12048541118797745, "grad_norm": 0.3624383807182312, "learning_rate": 0.0001759393805482265, "loss": 1.4659, "step": 9272 }, { "epoch": 0.12049840573189333, "grad_norm": 0.4997468590736389, "learning_rate": 0.00017593678108631514, "loss": 1.568, "step": 9273 }, { "epoch": 0.1205114002758092, "grad_norm": 0.49450770020484924, "learning_rate": 0.00017593418162440376, "loss": 1.4277, "step": 9274 }, { "epoch": 0.12052439481972507, "grad_norm": 0.21475233137607574, "learning_rate": 0.00017593158216249236, "loss": 1.2039, "step": 9275 }, { "epoch": 0.12053738936364095, "grad_norm": 0.3248988389968872, "learning_rate": 0.00017592898270058098, "loss": 1.2936, "step": 9276 }, { "epoch": 0.12055038390755682, "grad_norm": 0.41979753971099854, "learning_rate": 0.0001759263832386696, "loss": 1.3144, "step": 9277 }, { "epoch": 0.12056337845147269, "grad_norm": 0.4150199294090271, "learning_rate": 0.00017592378377675823, "loss": 1.3194, "step": 9278 }, { "epoch": 0.12057637299538856, "grad_norm": 0.36927545070648193, "learning_rate": 0.00017592118431484683, "loss": 1.5334, "step": 9279 }, { "epoch": 0.12058936753930444, "grad_norm": 0.38874977827072144, "learning_rate": 0.00017591858485293546, "loss": 1.3427, "step": 9280 }, { "epoch": 0.12060236208322031, "grad_norm": 0.4251580238342285, "learning_rate": 0.00017591598539102408, "loss": 1.2466, "step": 9281 }, { "epoch": 0.12061535662713618, "grad_norm": 0.32135188579559326, "learning_rate": 0.00017591338592911268, "loss": 1.3673, "step": 9282 }, { "epoch": 0.12062835117105206, "grad_norm": 0.4495638608932495, "learning_rate": 0.0001759107864672013, "loss": 1.4018, "step": 9283 }, { "epoch": 0.12064134571496793, "grad_norm": 0.3772464990615845, "learning_rate": 0.0001759081870052899, "loss": 1.5787, "step": 9284 }, { "epoch": 0.1206543402588838, "grad_norm": 0.4275606870651245, "learning_rate": 0.00017590558754337852, "loss": 1.4332, "step": 9285 }, { "epoch": 0.12066733480279968, "grad_norm": 0.3489293158054352, "learning_rate": 0.00017590298808146715, "loss": 1.356, "step": 9286 }, { "epoch": 0.12068032934671555, "grad_norm": 0.4110635221004486, "learning_rate": 0.00017590038861955575, "loss": 1.4342, "step": 9287 }, { "epoch": 0.12069332389063142, "grad_norm": 0.44617557525634766, "learning_rate": 0.00017589778915764437, "loss": 1.4562, "step": 9288 }, { "epoch": 0.1207063184345473, "grad_norm": 0.3555718660354614, "learning_rate": 0.000175895189695733, "loss": 1.1468, "step": 9289 }, { "epoch": 0.12071931297846317, "grad_norm": 0.4795862138271332, "learning_rate": 0.00017589259023382162, "loss": 1.5313, "step": 9290 }, { "epoch": 0.12073230752237904, "grad_norm": 0.44497111439704895, "learning_rate": 0.00017588999077191022, "loss": 1.5787, "step": 9291 }, { "epoch": 0.12074530206629491, "grad_norm": 0.3562009930610657, "learning_rate": 0.00017588739130999884, "loss": 1.6434, "step": 9292 }, { "epoch": 0.12075829661021079, "grad_norm": 0.43425676226615906, "learning_rate": 0.00017588479184808747, "loss": 1.3577, "step": 9293 }, { "epoch": 0.12077129115412666, "grad_norm": 0.3656645119190216, "learning_rate": 0.00017588219238617606, "loss": 1.3952, "step": 9294 }, { "epoch": 0.12078428569804253, "grad_norm": 0.3730233609676361, "learning_rate": 0.0001758795929242647, "loss": 1.474, "step": 9295 }, { "epoch": 0.1207972802419584, "grad_norm": 0.39313989877700806, "learning_rate": 0.0001758769934623533, "loss": 1.3837, "step": 9296 }, { "epoch": 0.12081027478587428, "grad_norm": 0.415451318025589, "learning_rate": 0.0001758743940004419, "loss": 1.4818, "step": 9297 }, { "epoch": 0.12082326932979015, "grad_norm": 0.40186449885368347, "learning_rate": 0.00017587179453853053, "loss": 1.412, "step": 9298 }, { "epoch": 0.12083626387370602, "grad_norm": 0.32585060596466064, "learning_rate": 0.00017586919507661913, "loss": 1.4182, "step": 9299 }, { "epoch": 0.1208492584176219, "grad_norm": 0.36978092789649963, "learning_rate": 0.00017586659561470778, "loss": 1.3818, "step": 9300 }, { "epoch": 0.12086225296153777, "grad_norm": 0.3578946590423584, "learning_rate": 0.00017586399615279638, "loss": 1.2783, "step": 9301 }, { "epoch": 0.12087524750545364, "grad_norm": 0.2977055311203003, "learning_rate": 0.000175861396690885, "loss": 1.5007, "step": 9302 }, { "epoch": 0.12088824204936952, "grad_norm": 0.37292179465293884, "learning_rate": 0.0001758587972289736, "loss": 1.3219, "step": 9303 }, { "epoch": 0.12090123659328539, "grad_norm": 0.39701586961746216, "learning_rate": 0.00017585619776706223, "loss": 1.4429, "step": 9304 }, { "epoch": 0.12091423113720126, "grad_norm": 0.27553433179855347, "learning_rate": 0.00017585359830515085, "loss": 1.3378, "step": 9305 }, { "epoch": 0.12092722568111713, "grad_norm": 0.30529335141181946, "learning_rate": 0.00017585099884323945, "loss": 1.2722, "step": 9306 }, { "epoch": 0.12094022022503302, "grad_norm": 0.3753760755062103, "learning_rate": 0.00017584839938132807, "loss": 1.3034, "step": 9307 }, { "epoch": 0.1209532147689489, "grad_norm": 0.38662222027778625, "learning_rate": 0.0001758457999194167, "loss": 1.3716, "step": 9308 }, { "epoch": 0.12096620931286477, "grad_norm": 0.3851815164089203, "learning_rate": 0.00017584320045750532, "loss": 1.2685, "step": 9309 }, { "epoch": 0.12097920385678064, "grad_norm": 0.3915620744228363, "learning_rate": 0.00017584060099559392, "loss": 1.3465, "step": 9310 }, { "epoch": 0.12099219840069651, "grad_norm": 0.4091010093688965, "learning_rate": 0.00017583800153368252, "loss": 1.1862, "step": 9311 }, { "epoch": 0.12100519294461239, "grad_norm": 0.360620379447937, "learning_rate": 0.00017583540207177117, "loss": 1.3402, "step": 9312 }, { "epoch": 0.12101818748852826, "grad_norm": 0.4678625464439392, "learning_rate": 0.00017583280260985976, "loss": 1.6014, "step": 9313 }, { "epoch": 0.12103118203244413, "grad_norm": 0.4157090187072754, "learning_rate": 0.0001758302031479484, "loss": 1.245, "step": 9314 }, { "epoch": 0.12104417657636, "grad_norm": 0.2855381965637207, "learning_rate": 0.000175827603686037, "loss": 1.289, "step": 9315 }, { "epoch": 0.12105717112027588, "grad_norm": 0.4155445992946625, "learning_rate": 0.0001758250042241256, "loss": 1.4041, "step": 9316 }, { "epoch": 0.12107016566419175, "grad_norm": 0.4189385175704956, "learning_rate": 0.00017582240476221424, "loss": 1.4988, "step": 9317 }, { "epoch": 0.12108316020810762, "grad_norm": 0.5436775088310242, "learning_rate": 0.00017581980530030283, "loss": 1.5024, "step": 9318 }, { "epoch": 0.1210961547520235, "grad_norm": 0.38973748683929443, "learning_rate": 0.00017581720583839146, "loss": 1.3585, "step": 9319 }, { "epoch": 0.12110914929593937, "grad_norm": 0.4831514358520508, "learning_rate": 0.00017581460637648008, "loss": 1.4914, "step": 9320 }, { "epoch": 0.12112214383985524, "grad_norm": 0.4586810767650604, "learning_rate": 0.0001758120069145687, "loss": 1.4678, "step": 9321 }, { "epoch": 0.12113513838377112, "grad_norm": 0.3998999297618866, "learning_rate": 0.0001758094074526573, "loss": 1.5773, "step": 9322 }, { "epoch": 0.12114813292768699, "grad_norm": 0.41111308336257935, "learning_rate": 0.0001758068079907459, "loss": 1.3625, "step": 9323 }, { "epoch": 0.12116112747160286, "grad_norm": 0.32451263070106506, "learning_rate": 0.00017580420852883455, "loss": 1.2822, "step": 9324 }, { "epoch": 0.12117412201551873, "grad_norm": 0.42317408323287964, "learning_rate": 0.00017580160906692315, "loss": 1.3132, "step": 9325 }, { "epoch": 0.12118711655943461, "grad_norm": 0.33010753989219666, "learning_rate": 0.00017579900960501177, "loss": 1.3395, "step": 9326 }, { "epoch": 0.12120011110335048, "grad_norm": 0.4237150251865387, "learning_rate": 0.00017579641014310037, "loss": 1.5932, "step": 9327 }, { "epoch": 0.12121310564726635, "grad_norm": 0.33284929394721985, "learning_rate": 0.000175793810681189, "loss": 1.2039, "step": 9328 }, { "epoch": 0.12122610019118223, "grad_norm": 0.4798779785633087, "learning_rate": 0.00017579121121927762, "loss": 1.5158, "step": 9329 }, { "epoch": 0.1212390947350981, "grad_norm": 0.3473212420940399, "learning_rate": 0.00017578861175736622, "loss": 1.3428, "step": 9330 }, { "epoch": 0.12125208927901397, "grad_norm": 0.2981327176094055, "learning_rate": 0.00017578601229545484, "loss": 1.3875, "step": 9331 }, { "epoch": 0.12126508382292985, "grad_norm": 0.42622026801109314, "learning_rate": 0.00017578341283354347, "loss": 1.4639, "step": 9332 }, { "epoch": 0.12127807836684572, "grad_norm": 0.29069605469703674, "learning_rate": 0.0001757808133716321, "loss": 1.2565, "step": 9333 }, { "epoch": 0.12129107291076159, "grad_norm": 0.3991733193397522, "learning_rate": 0.0001757782139097207, "loss": 1.4763, "step": 9334 }, { "epoch": 0.12130406745467746, "grad_norm": 0.42441391944885254, "learning_rate": 0.00017577561444780931, "loss": 1.5025, "step": 9335 }, { "epoch": 0.12131706199859334, "grad_norm": 0.36271902918815613, "learning_rate": 0.00017577301498589794, "loss": 1.6598, "step": 9336 }, { "epoch": 0.12133005654250921, "grad_norm": 0.4363735318183899, "learning_rate": 0.00017577041552398654, "loss": 1.4993, "step": 9337 }, { "epoch": 0.12134305108642508, "grad_norm": 0.3632548749446869, "learning_rate": 0.00017576781606207516, "loss": 1.2148, "step": 9338 }, { "epoch": 0.12135604563034096, "grad_norm": 0.48507094383239746, "learning_rate": 0.00017576521660016378, "loss": 1.526, "step": 9339 }, { "epoch": 0.12136904017425683, "grad_norm": 0.3822447955608368, "learning_rate": 0.00017576261713825238, "loss": 1.3814, "step": 9340 }, { "epoch": 0.1213820347181727, "grad_norm": 0.43666285276412964, "learning_rate": 0.000175760017676341, "loss": 1.4766, "step": 9341 }, { "epoch": 0.12139502926208857, "grad_norm": 0.46178942918777466, "learning_rate": 0.0001757574182144296, "loss": 1.4913, "step": 9342 }, { "epoch": 0.12140802380600445, "grad_norm": 0.4100555181503296, "learning_rate": 0.00017575481875251826, "loss": 1.4562, "step": 9343 }, { "epoch": 0.12142101834992032, "grad_norm": 0.3818536102771759, "learning_rate": 0.00017575221929060685, "loss": 1.5386, "step": 9344 }, { "epoch": 0.12143401289383621, "grad_norm": 0.25779303908348083, "learning_rate": 0.00017574961982869548, "loss": 1.1867, "step": 9345 }, { "epoch": 0.12144700743775208, "grad_norm": 0.3203864097595215, "learning_rate": 0.00017574702036678407, "loss": 1.2648, "step": 9346 }, { "epoch": 0.12146000198166795, "grad_norm": 0.487255722284317, "learning_rate": 0.0001757444209048727, "loss": 1.5688, "step": 9347 }, { "epoch": 0.12147299652558383, "grad_norm": 0.33795788884162903, "learning_rate": 0.00017574182144296132, "loss": 1.4224, "step": 9348 }, { "epoch": 0.1214859910694997, "grad_norm": 0.4090534448623657, "learning_rate": 0.00017573922198104992, "loss": 1.3815, "step": 9349 }, { "epoch": 0.12149898561341557, "grad_norm": 0.4001588821411133, "learning_rate": 0.00017573662251913855, "loss": 1.4646, "step": 9350 }, { "epoch": 0.12151198015733145, "grad_norm": 0.4228440523147583, "learning_rate": 0.00017573402305722717, "loss": 1.393, "step": 9351 }, { "epoch": 0.12152497470124732, "grad_norm": 0.45497748255729675, "learning_rate": 0.00017573142359531577, "loss": 1.4689, "step": 9352 }, { "epoch": 0.12153796924516319, "grad_norm": 0.7702919840812683, "learning_rate": 0.0001757288241334044, "loss": 1.4536, "step": 9353 }, { "epoch": 0.12155096378907906, "grad_norm": 0.4107643663883209, "learning_rate": 0.000175726224671493, "loss": 1.5099, "step": 9354 }, { "epoch": 0.12156395833299494, "grad_norm": 0.3273683488368988, "learning_rate": 0.00017572362520958164, "loss": 1.3992, "step": 9355 }, { "epoch": 0.12157695287691081, "grad_norm": 0.26075479388237, "learning_rate": 0.00017572102574767024, "loss": 1.3166, "step": 9356 }, { "epoch": 0.12158994742082668, "grad_norm": 0.3445676863193512, "learning_rate": 0.00017571842628575886, "loss": 1.2702, "step": 9357 }, { "epoch": 0.12160294196474256, "grad_norm": 0.3990176320075989, "learning_rate": 0.00017571582682384746, "loss": 1.4989, "step": 9358 }, { "epoch": 0.12161593650865843, "grad_norm": 0.4130074977874756, "learning_rate": 0.00017571322736193608, "loss": 1.4303, "step": 9359 }, { "epoch": 0.1216289310525743, "grad_norm": 0.39101868867874146, "learning_rate": 0.0001757106279000247, "loss": 1.406, "step": 9360 }, { "epoch": 0.12164192559649017, "grad_norm": 0.3175065219402313, "learning_rate": 0.0001757080284381133, "loss": 1.4506, "step": 9361 }, { "epoch": 0.12165492014040605, "grad_norm": 0.32256609201431274, "learning_rate": 0.00017570542897620193, "loss": 1.5279, "step": 9362 }, { "epoch": 0.12166791468432192, "grad_norm": 0.4326832890510559, "learning_rate": 0.00017570282951429056, "loss": 1.4225, "step": 9363 }, { "epoch": 0.1216809092282378, "grad_norm": 0.33629029989242554, "learning_rate": 0.00017570023005237918, "loss": 1.374, "step": 9364 }, { "epoch": 0.12169390377215367, "grad_norm": 0.414267897605896, "learning_rate": 0.00017569763059046778, "loss": 1.4348, "step": 9365 }, { "epoch": 0.12170689831606954, "grad_norm": 0.3454011380672455, "learning_rate": 0.00017569503112855637, "loss": 1.3953, "step": 9366 }, { "epoch": 0.12171989285998541, "grad_norm": 0.523872971534729, "learning_rate": 0.00017569243166664503, "loss": 1.5102, "step": 9367 }, { "epoch": 0.12173288740390129, "grad_norm": 0.4031912088394165, "learning_rate": 0.00017568983220473362, "loss": 1.6535, "step": 9368 }, { "epoch": 0.12174588194781716, "grad_norm": 0.3654964864253998, "learning_rate": 0.00017568723274282225, "loss": 1.396, "step": 9369 }, { "epoch": 0.12175887649173303, "grad_norm": 0.37155187129974365, "learning_rate": 0.00017568463328091087, "loss": 1.5124, "step": 9370 }, { "epoch": 0.1217718710356489, "grad_norm": 0.31775298714637756, "learning_rate": 0.00017568203381899947, "loss": 1.3741, "step": 9371 }, { "epoch": 0.12178486557956478, "grad_norm": 0.303800106048584, "learning_rate": 0.0001756794343570881, "loss": 1.2745, "step": 9372 }, { "epoch": 0.12179786012348065, "grad_norm": 0.3304106593132019, "learning_rate": 0.0001756768348951767, "loss": 1.5423, "step": 9373 }, { "epoch": 0.12181085466739652, "grad_norm": 0.42666420340538025, "learning_rate": 0.00017567423543326534, "loss": 1.4133, "step": 9374 }, { "epoch": 0.1218238492113124, "grad_norm": 0.43140193819999695, "learning_rate": 0.00017567163597135394, "loss": 1.5115, "step": 9375 }, { "epoch": 0.12183684375522827, "grad_norm": 0.3730902373790741, "learning_rate": 0.00017566903650944257, "loss": 1.3174, "step": 9376 }, { "epoch": 0.12184983829914414, "grad_norm": 0.36808860301971436, "learning_rate": 0.00017566643704753116, "loss": 1.4683, "step": 9377 }, { "epoch": 0.12186283284306001, "grad_norm": 0.3478783667087555, "learning_rate": 0.0001756638375856198, "loss": 1.3244, "step": 9378 }, { "epoch": 0.12187582738697589, "grad_norm": 0.42890840768814087, "learning_rate": 0.0001756612381237084, "loss": 1.5046, "step": 9379 }, { "epoch": 0.12188882193089176, "grad_norm": 0.2624491751194, "learning_rate": 0.000175658638661797, "loss": 1.3872, "step": 9380 }, { "epoch": 0.12190181647480763, "grad_norm": 0.29321375489234924, "learning_rate": 0.00017565603919988563, "loss": 1.4468, "step": 9381 }, { "epoch": 0.1219148110187235, "grad_norm": 0.4059242606163025, "learning_rate": 0.00017565343973797426, "loss": 1.4384, "step": 9382 }, { "epoch": 0.1219278055626394, "grad_norm": 0.48989370465278625, "learning_rate": 0.00017565084027606286, "loss": 1.2889, "step": 9383 }, { "epoch": 0.12194080010655527, "grad_norm": 0.3938312232494354, "learning_rate": 0.00017564824081415148, "loss": 1.4097, "step": 9384 }, { "epoch": 0.12195379465047114, "grad_norm": 0.4003935754299164, "learning_rate": 0.00017564564135224008, "loss": 1.3756, "step": 9385 }, { "epoch": 0.12196678919438701, "grad_norm": 0.4381786584854126, "learning_rate": 0.00017564304189032873, "loss": 1.2769, "step": 9386 }, { "epoch": 0.12197978373830289, "grad_norm": 0.36698466539382935, "learning_rate": 0.00017564044242841733, "loss": 1.5093, "step": 9387 }, { "epoch": 0.12199277828221876, "grad_norm": 0.31886789202690125, "learning_rate": 0.00017563784296650595, "loss": 1.416, "step": 9388 }, { "epoch": 0.12200577282613463, "grad_norm": 0.37381190061569214, "learning_rate": 0.00017563524350459455, "loss": 1.4511, "step": 9389 }, { "epoch": 0.1220187673700505, "grad_norm": 0.43987271189689636, "learning_rate": 0.00017563264404268317, "loss": 1.5223, "step": 9390 }, { "epoch": 0.12203176191396638, "grad_norm": 0.37794679403305054, "learning_rate": 0.0001756300445807718, "loss": 1.28, "step": 9391 }, { "epoch": 0.12204475645788225, "grad_norm": 0.37859833240509033, "learning_rate": 0.0001756274451188604, "loss": 1.2429, "step": 9392 }, { "epoch": 0.12205775100179812, "grad_norm": 0.4435123801231384, "learning_rate": 0.00017562484565694902, "loss": 1.4836, "step": 9393 }, { "epoch": 0.122070745545714, "grad_norm": 0.5046146512031555, "learning_rate": 0.00017562224619503764, "loss": 1.5646, "step": 9394 }, { "epoch": 0.12208374008962987, "grad_norm": 0.4131261706352234, "learning_rate": 0.00017561964673312624, "loss": 1.4854, "step": 9395 }, { "epoch": 0.12209673463354574, "grad_norm": 0.370067298412323, "learning_rate": 0.00017561704727121487, "loss": 1.348, "step": 9396 }, { "epoch": 0.12210972917746162, "grad_norm": 0.3547513484954834, "learning_rate": 0.00017561444780930346, "loss": 1.3926, "step": 9397 }, { "epoch": 0.12212272372137749, "grad_norm": 0.36864474415779114, "learning_rate": 0.00017561184834739211, "loss": 1.3426, "step": 9398 }, { "epoch": 0.12213571826529336, "grad_norm": 0.39651089906692505, "learning_rate": 0.0001756092488854807, "loss": 1.4726, "step": 9399 }, { "epoch": 0.12214871280920923, "grad_norm": 0.37016063928604126, "learning_rate": 0.00017560664942356934, "loss": 1.2783, "step": 9400 }, { "epoch": 0.1221617073531251, "grad_norm": 0.4092501103878021, "learning_rate": 0.00017560404996165793, "loss": 1.3685, "step": 9401 }, { "epoch": 0.12217470189704098, "grad_norm": 0.3386761546134949, "learning_rate": 0.00017560145049974656, "loss": 1.5491, "step": 9402 }, { "epoch": 0.12218769644095685, "grad_norm": 0.5029440522193909, "learning_rate": 0.00017559885103783518, "loss": 1.5938, "step": 9403 }, { "epoch": 0.12220069098487273, "grad_norm": 0.36059367656707764, "learning_rate": 0.00017559625157592378, "loss": 1.2096, "step": 9404 }, { "epoch": 0.1222136855287886, "grad_norm": 0.49594607949256897, "learning_rate": 0.0001755936521140124, "loss": 1.4915, "step": 9405 }, { "epoch": 0.12222668007270447, "grad_norm": 0.30236825346946716, "learning_rate": 0.00017559105265210103, "loss": 1.2743, "step": 9406 }, { "epoch": 0.12223967461662034, "grad_norm": 0.37394291162490845, "learning_rate": 0.00017558845319018963, "loss": 1.2983, "step": 9407 }, { "epoch": 0.12225266916053622, "grad_norm": 0.42718207836151123, "learning_rate": 0.00017558585372827825, "loss": 1.3058, "step": 9408 }, { "epoch": 0.12226566370445209, "grad_norm": 0.4009205996990204, "learning_rate": 0.00017558325426636688, "loss": 1.4841, "step": 9409 }, { "epoch": 0.12227865824836796, "grad_norm": 0.3838242292404175, "learning_rate": 0.0001755806548044555, "loss": 1.4362, "step": 9410 }, { "epoch": 0.12229165279228384, "grad_norm": 0.4049939811229706, "learning_rate": 0.0001755780553425441, "loss": 1.4122, "step": 9411 }, { "epoch": 0.12230464733619971, "grad_norm": 0.3356418013572693, "learning_rate": 0.00017557545588063272, "loss": 1.3598, "step": 9412 }, { "epoch": 0.12231764188011558, "grad_norm": 0.5174401998519897, "learning_rate": 0.00017557285641872135, "loss": 1.5235, "step": 9413 }, { "epoch": 0.12233063642403146, "grad_norm": 0.5014281868934631, "learning_rate": 0.00017557025695680994, "loss": 1.5247, "step": 9414 }, { "epoch": 0.12234363096794733, "grad_norm": 0.3203625977039337, "learning_rate": 0.00017556765749489857, "loss": 1.2185, "step": 9415 }, { "epoch": 0.1223566255118632, "grad_norm": 0.3388095200061798, "learning_rate": 0.00017556505803298717, "loss": 1.2516, "step": 9416 }, { "epoch": 0.12236962005577907, "grad_norm": 0.3947952091693878, "learning_rate": 0.00017556245857107582, "loss": 1.5154, "step": 9417 }, { "epoch": 0.12238261459969495, "grad_norm": 0.4261745512485504, "learning_rate": 0.00017555985910916441, "loss": 1.5087, "step": 9418 }, { "epoch": 0.12239560914361082, "grad_norm": 0.4744755029678345, "learning_rate": 0.000175557259647253, "loss": 1.4432, "step": 9419 }, { "epoch": 0.1224086036875267, "grad_norm": 0.4018121659755707, "learning_rate": 0.00017555466018534164, "loss": 1.466, "step": 9420 }, { "epoch": 0.12242159823144258, "grad_norm": 0.39566123485565186, "learning_rate": 0.00017555206072343026, "loss": 1.2452, "step": 9421 }, { "epoch": 0.12243459277535845, "grad_norm": 0.42603591084480286, "learning_rate": 0.00017554946126151889, "loss": 1.4223, "step": 9422 }, { "epoch": 0.12244758731927433, "grad_norm": 0.3795925974845886, "learning_rate": 0.00017554686179960748, "loss": 1.4818, "step": 9423 }, { "epoch": 0.1224605818631902, "grad_norm": 0.4446429908275604, "learning_rate": 0.0001755442623376961, "loss": 1.4246, "step": 9424 }, { "epoch": 0.12247357640710607, "grad_norm": 0.336588978767395, "learning_rate": 0.00017554166287578473, "loss": 1.2808, "step": 9425 }, { "epoch": 0.12248657095102194, "grad_norm": 0.39158517122268677, "learning_rate": 0.00017553906341387333, "loss": 1.3705, "step": 9426 }, { "epoch": 0.12249956549493782, "grad_norm": 0.390872985124588, "learning_rate": 0.00017553646395196195, "loss": 1.2758, "step": 9427 }, { "epoch": 0.12251256003885369, "grad_norm": 0.455569326877594, "learning_rate": 0.00017553386449005055, "loss": 1.5882, "step": 9428 }, { "epoch": 0.12252555458276956, "grad_norm": 0.42535704374313354, "learning_rate": 0.0001755312650281392, "loss": 1.4453, "step": 9429 }, { "epoch": 0.12253854912668544, "grad_norm": 0.3217112421989441, "learning_rate": 0.0001755286655662278, "loss": 1.4702, "step": 9430 }, { "epoch": 0.12255154367060131, "grad_norm": 0.39075425267219543, "learning_rate": 0.00017552606610431642, "loss": 1.4497, "step": 9431 }, { "epoch": 0.12256453821451718, "grad_norm": 0.3755894601345062, "learning_rate": 0.00017552346664240502, "loss": 1.4311, "step": 9432 }, { "epoch": 0.12257753275843306, "grad_norm": 0.4219174385070801, "learning_rate": 0.00017552086718049365, "loss": 1.3618, "step": 9433 }, { "epoch": 0.12259052730234893, "grad_norm": 0.47242096066474915, "learning_rate": 0.00017551826771858227, "loss": 1.3837, "step": 9434 }, { "epoch": 0.1226035218462648, "grad_norm": 0.5205214619636536, "learning_rate": 0.00017551566825667087, "loss": 1.4809, "step": 9435 }, { "epoch": 0.12261651639018067, "grad_norm": 0.4229324162006378, "learning_rate": 0.0001755130687947595, "loss": 1.4869, "step": 9436 }, { "epoch": 0.12262951093409655, "grad_norm": 0.4389857053756714, "learning_rate": 0.00017551046933284812, "loss": 1.4979, "step": 9437 }, { "epoch": 0.12264250547801242, "grad_norm": 0.4339556097984314, "learning_rate": 0.00017550786987093671, "loss": 1.5938, "step": 9438 }, { "epoch": 0.1226555000219283, "grad_norm": 0.2765056788921356, "learning_rate": 0.00017550527040902534, "loss": 1.327, "step": 9439 }, { "epoch": 0.12266849456584417, "grad_norm": 0.46839138865470886, "learning_rate": 0.00017550267094711394, "loss": 1.3228, "step": 9440 }, { "epoch": 0.12268148910976004, "grad_norm": 0.4560263454914093, "learning_rate": 0.0001755000714852026, "loss": 1.255, "step": 9441 }, { "epoch": 0.12269448365367591, "grad_norm": 0.3007943034172058, "learning_rate": 0.00017549747202329119, "loss": 1.4302, "step": 9442 }, { "epoch": 0.12270747819759178, "grad_norm": 0.31359604001045227, "learning_rate": 0.0001754948725613798, "loss": 1.3717, "step": 9443 }, { "epoch": 0.12272047274150766, "grad_norm": 0.3923957347869873, "learning_rate": 0.00017549227309946843, "loss": 1.5479, "step": 9444 }, { "epoch": 0.12273346728542353, "grad_norm": 0.37875622510910034, "learning_rate": 0.00017548967363755703, "loss": 1.4428, "step": 9445 }, { "epoch": 0.1227464618293394, "grad_norm": 0.2996625006198883, "learning_rate": 0.00017548707417564566, "loss": 1.3771, "step": 9446 }, { "epoch": 0.12275945637325528, "grad_norm": 0.36629050970077515, "learning_rate": 0.00017548447471373425, "loss": 1.2443, "step": 9447 }, { "epoch": 0.12277245091717115, "grad_norm": 0.39929649233818054, "learning_rate": 0.0001754818752518229, "loss": 1.3785, "step": 9448 }, { "epoch": 0.12278544546108702, "grad_norm": 0.43769311904907227, "learning_rate": 0.0001754792757899115, "loss": 1.5107, "step": 9449 }, { "epoch": 0.1227984400050029, "grad_norm": 0.5155845284461975, "learning_rate": 0.0001754766763280001, "loss": 1.5704, "step": 9450 }, { "epoch": 0.12281143454891877, "grad_norm": 0.43582573533058167, "learning_rate": 0.00017547407686608872, "loss": 1.3126, "step": 9451 }, { "epoch": 0.12282442909283464, "grad_norm": 0.3846193850040436, "learning_rate": 0.00017547147740417735, "loss": 1.4231, "step": 9452 }, { "epoch": 0.12283742363675051, "grad_norm": 0.41088631749153137, "learning_rate": 0.00017546887794226597, "loss": 1.4574, "step": 9453 }, { "epoch": 0.12285041818066639, "grad_norm": 0.45022234320640564, "learning_rate": 0.00017546627848035457, "loss": 1.343, "step": 9454 }, { "epoch": 0.12286341272458226, "grad_norm": 0.34805867075920105, "learning_rate": 0.0001754636790184432, "loss": 1.4487, "step": 9455 }, { "epoch": 0.12287640726849813, "grad_norm": 0.2945215404033661, "learning_rate": 0.00017546107955653182, "loss": 1.3538, "step": 9456 }, { "epoch": 0.122889401812414, "grad_norm": 0.428950697183609, "learning_rate": 0.00017545848009462042, "loss": 1.5344, "step": 9457 }, { "epoch": 0.12290239635632988, "grad_norm": 0.39568817615509033, "learning_rate": 0.00017545588063270904, "loss": 1.5052, "step": 9458 }, { "epoch": 0.12291539090024577, "grad_norm": 0.49282294511795044, "learning_rate": 0.00017545328117079764, "loss": 1.3926, "step": 9459 }, { "epoch": 0.12292838544416164, "grad_norm": 0.4138358533382416, "learning_rate": 0.0001754506817088863, "loss": 1.4615, "step": 9460 }, { "epoch": 0.12294137998807751, "grad_norm": 0.4411356747150421, "learning_rate": 0.0001754480822469749, "loss": 1.6069, "step": 9461 }, { "epoch": 0.12295437453199339, "grad_norm": 0.41140323877334595, "learning_rate": 0.00017544548278506348, "loss": 1.4461, "step": 9462 }, { "epoch": 0.12296736907590926, "grad_norm": 0.41371530294418335, "learning_rate": 0.0001754428833231521, "loss": 1.4174, "step": 9463 }, { "epoch": 0.12298036361982513, "grad_norm": 0.3792431950569153, "learning_rate": 0.00017544028386124073, "loss": 1.3999, "step": 9464 }, { "epoch": 0.122993358163741, "grad_norm": 0.38682985305786133, "learning_rate": 0.00017543768439932936, "loss": 1.4607, "step": 9465 }, { "epoch": 0.12300635270765688, "grad_norm": 0.37916719913482666, "learning_rate": 0.00017543508493741796, "loss": 1.3804, "step": 9466 }, { "epoch": 0.12301934725157275, "grad_norm": 0.37214428186416626, "learning_rate": 0.00017543248547550658, "loss": 1.5521, "step": 9467 }, { "epoch": 0.12303234179548862, "grad_norm": 0.44244682788848877, "learning_rate": 0.0001754298860135952, "loss": 1.3643, "step": 9468 }, { "epoch": 0.1230453363394045, "grad_norm": 0.3487483263015747, "learning_rate": 0.0001754272865516838, "loss": 1.2952, "step": 9469 }, { "epoch": 0.12305833088332037, "grad_norm": 0.4238225817680359, "learning_rate": 0.00017542468708977243, "loss": 1.3952, "step": 9470 }, { "epoch": 0.12307132542723624, "grad_norm": 0.26047229766845703, "learning_rate": 0.00017542208762786102, "loss": 1.2802, "step": 9471 }, { "epoch": 0.12308431997115211, "grad_norm": 0.34330442547798157, "learning_rate": 0.00017541948816594968, "loss": 1.3557, "step": 9472 }, { "epoch": 0.12309731451506799, "grad_norm": 0.42483723163604736, "learning_rate": 0.00017541688870403827, "loss": 1.5422, "step": 9473 }, { "epoch": 0.12311030905898386, "grad_norm": 0.3888275623321533, "learning_rate": 0.00017541428924212687, "loss": 1.394, "step": 9474 }, { "epoch": 0.12312330360289973, "grad_norm": 0.42202091217041016, "learning_rate": 0.0001754116897802155, "loss": 1.4337, "step": 9475 }, { "epoch": 0.1231362981468156, "grad_norm": 0.3551664650440216, "learning_rate": 0.00017540909031830412, "loss": 1.3497, "step": 9476 }, { "epoch": 0.12314929269073148, "grad_norm": 0.38255226612091064, "learning_rate": 0.00017540649085639274, "loss": 1.3497, "step": 9477 }, { "epoch": 0.12316228723464735, "grad_norm": 0.3706818222999573, "learning_rate": 0.00017540389139448134, "loss": 1.5531, "step": 9478 }, { "epoch": 0.12317528177856323, "grad_norm": 0.45490768551826477, "learning_rate": 0.00017540129193256997, "loss": 1.6672, "step": 9479 }, { "epoch": 0.1231882763224791, "grad_norm": 0.2934543490409851, "learning_rate": 0.0001753986924706586, "loss": 1.2269, "step": 9480 }, { "epoch": 0.12320127086639497, "grad_norm": 0.36701881885528564, "learning_rate": 0.0001753960930087472, "loss": 1.414, "step": 9481 }, { "epoch": 0.12321426541031084, "grad_norm": 0.37207210063934326, "learning_rate": 0.0001753934935468358, "loss": 1.3823, "step": 9482 }, { "epoch": 0.12322725995422672, "grad_norm": 0.3229626715183258, "learning_rate": 0.00017539089408492444, "loss": 1.2548, "step": 9483 }, { "epoch": 0.12324025449814259, "grad_norm": 0.3672535717487335, "learning_rate": 0.00017538829462301306, "loss": 1.3426, "step": 9484 }, { "epoch": 0.12325324904205846, "grad_norm": 0.25910496711730957, "learning_rate": 0.00017538569516110166, "loss": 1.3688, "step": 9485 }, { "epoch": 0.12326624358597434, "grad_norm": 0.3056235611438751, "learning_rate": 0.00017538309569919028, "loss": 1.22, "step": 9486 }, { "epoch": 0.12327923812989021, "grad_norm": 0.482042521238327, "learning_rate": 0.0001753804962372789, "loss": 1.669, "step": 9487 }, { "epoch": 0.12329223267380608, "grad_norm": 0.45445653796195984, "learning_rate": 0.0001753778967753675, "loss": 1.5081, "step": 9488 }, { "epoch": 0.12330522721772195, "grad_norm": 0.3391783535480499, "learning_rate": 0.00017537529731345613, "loss": 1.2828, "step": 9489 }, { "epoch": 0.12331822176163783, "grad_norm": 0.2934155762195587, "learning_rate": 0.00017537269785154473, "loss": 1.248, "step": 9490 }, { "epoch": 0.1233312163055537, "grad_norm": 0.3356635868549347, "learning_rate": 0.00017537009838963335, "loss": 1.4247, "step": 9491 }, { "epoch": 0.12334421084946957, "grad_norm": 0.4243333339691162, "learning_rate": 0.00017536749892772198, "loss": 1.517, "step": 9492 }, { "epoch": 0.12335720539338545, "grad_norm": 0.3923400044441223, "learning_rate": 0.00017536489946581057, "loss": 1.5942, "step": 9493 }, { "epoch": 0.12337019993730132, "grad_norm": 0.2597026526927948, "learning_rate": 0.0001753623000038992, "loss": 1.2606, "step": 9494 }, { "epoch": 0.12338319448121719, "grad_norm": 0.4306289553642273, "learning_rate": 0.00017535970054198782, "loss": 1.3489, "step": 9495 }, { "epoch": 0.12339618902513307, "grad_norm": 0.5493301749229431, "learning_rate": 0.00017535710108007645, "loss": 1.5796, "step": 9496 }, { "epoch": 0.12340918356904895, "grad_norm": 0.2613542377948761, "learning_rate": 0.00017535450161816504, "loss": 1.0414, "step": 9497 }, { "epoch": 0.12342217811296483, "grad_norm": 0.40695062279701233, "learning_rate": 0.00017535190215625367, "loss": 1.4048, "step": 9498 }, { "epoch": 0.1234351726568807, "grad_norm": 0.3995246887207031, "learning_rate": 0.0001753493026943423, "loss": 1.4324, "step": 9499 }, { "epoch": 0.12344816720079657, "grad_norm": 0.35809874534606934, "learning_rate": 0.0001753467032324309, "loss": 1.5628, "step": 9500 }, { "epoch": 0.12346116174471244, "grad_norm": 0.4493583142757416, "learning_rate": 0.00017534410377051951, "loss": 1.5692, "step": 9501 }, { "epoch": 0.12347415628862832, "grad_norm": 0.33677470684051514, "learning_rate": 0.0001753415043086081, "loss": 1.4222, "step": 9502 }, { "epoch": 0.12348715083254419, "grad_norm": 0.41809993982315063, "learning_rate": 0.00017533890484669674, "loss": 1.5264, "step": 9503 }, { "epoch": 0.12350014537646006, "grad_norm": 0.32507792115211487, "learning_rate": 0.00017533630538478536, "loss": 1.328, "step": 9504 }, { "epoch": 0.12351313992037594, "grad_norm": 0.430988073348999, "learning_rate": 0.00017533370592287396, "loss": 1.5181, "step": 9505 }, { "epoch": 0.12352613446429181, "grad_norm": 0.38258063793182373, "learning_rate": 0.00017533110646096258, "loss": 1.3622, "step": 9506 }, { "epoch": 0.12353912900820768, "grad_norm": 0.2594582438468933, "learning_rate": 0.0001753285069990512, "loss": 1.2754, "step": 9507 }, { "epoch": 0.12355212355212356, "grad_norm": 0.4643630385398865, "learning_rate": 0.00017532590753713983, "loss": 1.362, "step": 9508 }, { "epoch": 0.12356511809603943, "grad_norm": 0.37695011496543884, "learning_rate": 0.00017532330807522843, "loss": 1.5886, "step": 9509 }, { "epoch": 0.1235781126399553, "grad_norm": 0.388703852891922, "learning_rate": 0.00017532070861331705, "loss": 1.4588, "step": 9510 }, { "epoch": 0.12359110718387117, "grad_norm": 0.4319486916065216, "learning_rate": 0.00017531810915140568, "loss": 1.5042, "step": 9511 }, { "epoch": 0.12360410172778705, "grad_norm": 0.41553017497062683, "learning_rate": 0.00017531550968949428, "loss": 1.4554, "step": 9512 }, { "epoch": 0.12361709627170292, "grad_norm": 0.36376845836639404, "learning_rate": 0.0001753129102275829, "loss": 1.6371, "step": 9513 }, { "epoch": 0.12363009081561879, "grad_norm": 0.3340856432914734, "learning_rate": 0.0001753103107656715, "loss": 1.3669, "step": 9514 }, { "epoch": 0.12364308535953467, "grad_norm": 0.3058205544948578, "learning_rate": 0.00017530771130376015, "loss": 1.5662, "step": 9515 }, { "epoch": 0.12365607990345054, "grad_norm": 0.48043322563171387, "learning_rate": 0.00017530511184184875, "loss": 1.6312, "step": 9516 }, { "epoch": 0.12366907444736641, "grad_norm": 0.4191220998764038, "learning_rate": 0.00017530251237993734, "loss": 1.6763, "step": 9517 }, { "epoch": 0.12368206899128228, "grad_norm": 0.3762301802635193, "learning_rate": 0.000175299912918026, "loss": 1.2855, "step": 9518 }, { "epoch": 0.12369506353519816, "grad_norm": 0.424789696931839, "learning_rate": 0.0001752973134561146, "loss": 1.4668, "step": 9519 }, { "epoch": 0.12370805807911403, "grad_norm": 0.36871305108070374, "learning_rate": 0.00017529471399420322, "loss": 1.3455, "step": 9520 }, { "epoch": 0.1237210526230299, "grad_norm": 0.46252450346946716, "learning_rate": 0.00017529211453229181, "loss": 1.6386, "step": 9521 }, { "epoch": 0.12373404716694578, "grad_norm": 0.3832029104232788, "learning_rate": 0.00017528951507038044, "loss": 1.2696, "step": 9522 }, { "epoch": 0.12374704171086165, "grad_norm": 0.37716394662857056, "learning_rate": 0.00017528691560846906, "loss": 1.389, "step": 9523 }, { "epoch": 0.12376003625477752, "grad_norm": 0.3911168575286865, "learning_rate": 0.00017528431614655766, "loss": 1.4422, "step": 9524 }, { "epoch": 0.1237730307986934, "grad_norm": 0.4176744818687439, "learning_rate": 0.00017528171668464629, "loss": 1.5345, "step": 9525 }, { "epoch": 0.12378602534260927, "grad_norm": 0.41857510805130005, "learning_rate": 0.0001752791172227349, "loss": 1.361, "step": 9526 }, { "epoch": 0.12379901988652514, "grad_norm": 0.3802216947078705, "learning_rate": 0.00017527651776082353, "loss": 1.5399, "step": 9527 }, { "epoch": 0.12381201443044101, "grad_norm": 0.38757428526878357, "learning_rate": 0.00017527391829891213, "loss": 1.2467, "step": 9528 }, { "epoch": 0.12382500897435689, "grad_norm": 0.49100178480148315, "learning_rate": 0.00017527131883700073, "loss": 1.4281, "step": 9529 }, { "epoch": 0.12383800351827276, "grad_norm": 0.39563506841659546, "learning_rate": 0.00017526871937508938, "loss": 1.4232, "step": 9530 }, { "epoch": 0.12385099806218863, "grad_norm": 0.3539801239967346, "learning_rate": 0.00017526611991317798, "loss": 1.6213, "step": 9531 }, { "epoch": 0.1238639926061045, "grad_norm": 0.33889925479888916, "learning_rate": 0.0001752635204512666, "loss": 1.4746, "step": 9532 }, { "epoch": 0.12387698715002038, "grad_norm": 0.32110705971717834, "learning_rate": 0.0001752609209893552, "loss": 1.2862, "step": 9533 }, { "epoch": 0.12388998169393625, "grad_norm": 0.2726416289806366, "learning_rate": 0.00017525832152744382, "loss": 1.3376, "step": 9534 }, { "epoch": 0.12390297623785214, "grad_norm": 0.40119504928588867, "learning_rate": 0.00017525572206553245, "loss": 1.3445, "step": 9535 }, { "epoch": 0.12391597078176801, "grad_norm": 0.4073296785354614, "learning_rate": 0.00017525312260362105, "loss": 1.4134, "step": 9536 }, { "epoch": 0.12392896532568388, "grad_norm": 0.4655570387840271, "learning_rate": 0.00017525052314170967, "loss": 1.3833, "step": 9537 }, { "epoch": 0.12394195986959976, "grad_norm": 0.44354668259620667, "learning_rate": 0.0001752479236797983, "loss": 1.3378, "step": 9538 }, { "epoch": 0.12395495441351563, "grad_norm": 0.3735615313053131, "learning_rate": 0.00017524532421788692, "loss": 1.1322, "step": 9539 }, { "epoch": 0.1239679489574315, "grad_norm": 0.49971988797187805, "learning_rate": 0.00017524272475597552, "loss": 1.2174, "step": 9540 }, { "epoch": 0.12398094350134738, "grad_norm": 0.3243158161640167, "learning_rate": 0.00017524012529406414, "loss": 1.577, "step": 9541 }, { "epoch": 0.12399393804526325, "grad_norm": 0.2929551601409912, "learning_rate": 0.00017523752583215277, "loss": 1.3187, "step": 9542 }, { "epoch": 0.12400693258917912, "grad_norm": 0.37671124935150146, "learning_rate": 0.00017523492637024136, "loss": 1.4367, "step": 9543 }, { "epoch": 0.124019927133095, "grad_norm": 0.44659480452537537, "learning_rate": 0.00017523232690833, "loss": 1.4277, "step": 9544 }, { "epoch": 0.12403292167701087, "grad_norm": 0.36298638582229614, "learning_rate": 0.00017522972744641859, "loss": 1.6034, "step": 9545 }, { "epoch": 0.12404591622092674, "grad_norm": 0.3981684744358063, "learning_rate": 0.0001752271279845072, "loss": 1.6356, "step": 9546 }, { "epoch": 0.12405891076484261, "grad_norm": 0.39422181248664856, "learning_rate": 0.00017522452852259583, "loss": 1.4045, "step": 9547 }, { "epoch": 0.12407190530875849, "grad_norm": 0.43765023350715637, "learning_rate": 0.00017522192906068443, "loss": 1.4791, "step": 9548 }, { "epoch": 0.12408489985267436, "grad_norm": 0.2757258117198944, "learning_rate": 0.00017521932959877306, "loss": 1.4103, "step": 9549 }, { "epoch": 0.12409789439659023, "grad_norm": 0.34877264499664307, "learning_rate": 0.00017521673013686168, "loss": 1.261, "step": 9550 }, { "epoch": 0.1241108889405061, "grad_norm": 0.4051900804042816, "learning_rate": 0.0001752141306749503, "loss": 1.4377, "step": 9551 }, { "epoch": 0.12412388348442198, "grad_norm": 0.4281129539012909, "learning_rate": 0.0001752115312130389, "loss": 1.4058, "step": 9552 }, { "epoch": 0.12413687802833785, "grad_norm": 0.37326550483703613, "learning_rate": 0.00017520893175112753, "loss": 1.4333, "step": 9553 }, { "epoch": 0.12414987257225372, "grad_norm": 0.44275426864624023, "learning_rate": 0.00017520633228921615, "loss": 1.4008, "step": 9554 }, { "epoch": 0.1241628671161696, "grad_norm": 0.30423763394355774, "learning_rate": 0.00017520373282730475, "loss": 1.287, "step": 9555 }, { "epoch": 0.12417586166008547, "grad_norm": 0.38611316680908203, "learning_rate": 0.00017520113336539337, "loss": 1.2952, "step": 9556 }, { "epoch": 0.12418885620400134, "grad_norm": 0.47034040093421936, "learning_rate": 0.000175198533903482, "loss": 1.5033, "step": 9557 }, { "epoch": 0.12420185074791722, "grad_norm": 0.4749831259250641, "learning_rate": 0.0001751959344415706, "loss": 1.3976, "step": 9558 }, { "epoch": 0.12421484529183309, "grad_norm": 0.41762426495552063, "learning_rate": 0.00017519333497965922, "loss": 1.5608, "step": 9559 }, { "epoch": 0.12422783983574896, "grad_norm": 0.41292503476142883, "learning_rate": 0.00017519073551774782, "loss": 1.6435, "step": 9560 }, { "epoch": 0.12424083437966484, "grad_norm": 0.3542183041572571, "learning_rate": 0.00017518813605583647, "loss": 1.3538, "step": 9561 }, { "epoch": 0.12425382892358071, "grad_norm": 0.44358035922050476, "learning_rate": 0.00017518553659392507, "loss": 1.483, "step": 9562 }, { "epoch": 0.12426682346749658, "grad_norm": 0.3438158333301544, "learning_rate": 0.0001751829371320137, "loss": 1.147, "step": 9563 }, { "epoch": 0.12427981801141245, "grad_norm": 0.4154832065105438, "learning_rate": 0.0001751803376701023, "loss": 1.6448, "step": 9564 }, { "epoch": 0.12429281255532833, "grad_norm": 0.37668576836586, "learning_rate": 0.0001751777382081909, "loss": 1.405, "step": 9565 }, { "epoch": 0.1243058070992442, "grad_norm": 0.4438360035419464, "learning_rate": 0.00017517513874627954, "loss": 1.3371, "step": 9566 }, { "epoch": 0.12431880164316007, "grad_norm": 0.3932449221611023, "learning_rate": 0.00017517253928436813, "loss": 1.5031, "step": 9567 }, { "epoch": 0.12433179618707595, "grad_norm": 0.42104554176330566, "learning_rate": 0.00017516993982245676, "loss": 1.4451, "step": 9568 }, { "epoch": 0.12434479073099182, "grad_norm": 0.36716482043266296, "learning_rate": 0.00017516734036054538, "loss": 1.4091, "step": 9569 }, { "epoch": 0.12435778527490769, "grad_norm": 0.3912370204925537, "learning_rate": 0.000175164740898634, "loss": 1.4318, "step": 9570 }, { "epoch": 0.12437077981882357, "grad_norm": 0.37993699312210083, "learning_rate": 0.0001751621414367226, "loss": 1.6242, "step": 9571 }, { "epoch": 0.12438377436273944, "grad_norm": 0.4448337256908417, "learning_rate": 0.0001751595419748112, "loss": 1.5356, "step": 9572 }, { "epoch": 0.12439676890665531, "grad_norm": 0.49726277589797974, "learning_rate": 0.00017515694251289985, "loss": 1.5825, "step": 9573 }, { "epoch": 0.1244097634505712, "grad_norm": 0.39247578382492065, "learning_rate": 0.00017515434305098845, "loss": 1.5825, "step": 9574 }, { "epoch": 0.12442275799448707, "grad_norm": 0.4429816007614136, "learning_rate": 0.00017515174358907708, "loss": 1.579, "step": 9575 }, { "epoch": 0.12443575253840294, "grad_norm": 0.365694135427475, "learning_rate": 0.00017514914412716567, "loss": 1.4903, "step": 9576 }, { "epoch": 0.12444874708231882, "grad_norm": 0.34383833408355713, "learning_rate": 0.0001751465446652543, "loss": 1.417, "step": 9577 }, { "epoch": 0.12446174162623469, "grad_norm": 0.3965412676334381, "learning_rate": 0.00017514394520334292, "loss": 1.3773, "step": 9578 }, { "epoch": 0.12447473617015056, "grad_norm": 0.29918986558914185, "learning_rate": 0.00017514134574143152, "loss": 1.2956, "step": 9579 }, { "epoch": 0.12448773071406644, "grad_norm": 0.4160357415676117, "learning_rate": 0.00017513874627952014, "loss": 1.5154, "step": 9580 }, { "epoch": 0.12450072525798231, "grad_norm": 0.42975103855133057, "learning_rate": 0.00017513614681760877, "loss": 1.4572, "step": 9581 }, { "epoch": 0.12451371980189818, "grad_norm": 0.4243880808353424, "learning_rate": 0.0001751335473556974, "loss": 1.6084, "step": 9582 }, { "epoch": 0.12452671434581405, "grad_norm": 0.44164714217185974, "learning_rate": 0.000175130947893786, "loss": 1.4222, "step": 9583 }, { "epoch": 0.12453970888972993, "grad_norm": 0.32200759649276733, "learning_rate": 0.0001751283484318746, "loss": 1.3142, "step": 9584 }, { "epoch": 0.1245527034336458, "grad_norm": 0.39200395345687866, "learning_rate": 0.00017512574896996324, "loss": 1.4469, "step": 9585 }, { "epoch": 0.12456569797756167, "grad_norm": 0.4106068015098572, "learning_rate": 0.00017512314950805184, "loss": 1.6844, "step": 9586 }, { "epoch": 0.12457869252147755, "grad_norm": 0.33713802695274353, "learning_rate": 0.00017512055004614046, "loss": 1.4453, "step": 9587 }, { "epoch": 0.12459168706539342, "grad_norm": 0.4372261166572571, "learning_rate": 0.00017511795058422906, "loss": 1.4759, "step": 9588 }, { "epoch": 0.12460468160930929, "grad_norm": 0.4063788652420044, "learning_rate": 0.00017511535112231768, "loss": 1.6063, "step": 9589 }, { "epoch": 0.12461767615322517, "grad_norm": 0.3728642165660858, "learning_rate": 0.0001751127516604063, "loss": 1.4006, "step": 9590 }, { "epoch": 0.12463067069714104, "grad_norm": 0.3007648289203644, "learning_rate": 0.0001751101521984949, "loss": 1.281, "step": 9591 }, { "epoch": 0.12464366524105691, "grad_norm": 0.3721601068973541, "learning_rate": 0.00017510755273658356, "loss": 1.5699, "step": 9592 }, { "epoch": 0.12465665978497278, "grad_norm": 0.4571717381477356, "learning_rate": 0.00017510495327467215, "loss": 1.4366, "step": 9593 }, { "epoch": 0.12466965432888866, "grad_norm": 0.4183889329433441, "learning_rate": 0.00017510235381276078, "loss": 1.4985, "step": 9594 }, { "epoch": 0.12468264887280453, "grad_norm": 0.4092417061328888, "learning_rate": 0.00017509975435084938, "loss": 1.3064, "step": 9595 }, { "epoch": 0.1246956434167204, "grad_norm": 0.3628922402858734, "learning_rate": 0.000175097154888938, "loss": 1.4016, "step": 9596 }, { "epoch": 0.12470863796063628, "grad_norm": 0.37899014353752136, "learning_rate": 0.00017509455542702662, "loss": 1.2399, "step": 9597 }, { "epoch": 0.12472163250455215, "grad_norm": 0.3253507912158966, "learning_rate": 0.00017509195596511522, "loss": 1.3236, "step": 9598 }, { "epoch": 0.12473462704846802, "grad_norm": 0.43177762627601624, "learning_rate": 0.00017508935650320385, "loss": 1.3943, "step": 9599 }, { "epoch": 0.1247476215923839, "grad_norm": 0.42290180921554565, "learning_rate": 0.00017508675704129247, "loss": 1.5283, "step": 9600 }, { "epoch": 0.12476061613629977, "grad_norm": 0.4956355094909668, "learning_rate": 0.00017508415757938107, "loss": 1.4605, "step": 9601 }, { "epoch": 0.12477361068021564, "grad_norm": 0.40923964977264404, "learning_rate": 0.0001750815581174697, "loss": 1.2837, "step": 9602 }, { "epoch": 0.12478660522413151, "grad_norm": 0.41952595114707947, "learning_rate": 0.0001750789586555583, "loss": 1.428, "step": 9603 }, { "epoch": 0.12479959976804739, "grad_norm": 0.40707823634147644, "learning_rate": 0.00017507635919364694, "loss": 1.3958, "step": 9604 }, { "epoch": 0.12481259431196326, "grad_norm": 0.21762244403362274, "learning_rate": 0.00017507375973173554, "loss": 1.3969, "step": 9605 }, { "epoch": 0.12482558885587913, "grad_norm": 0.3387683033943176, "learning_rate": 0.00017507116026982416, "loss": 1.2537, "step": 9606 }, { "epoch": 0.124838583399795, "grad_norm": 0.3670079708099365, "learning_rate": 0.00017506856080791276, "loss": 1.376, "step": 9607 }, { "epoch": 0.12485157794371088, "grad_norm": 0.39507466554641724, "learning_rate": 0.00017506596134600139, "loss": 1.3001, "step": 9608 }, { "epoch": 0.12486457248762675, "grad_norm": 0.2887982130050659, "learning_rate": 0.00017506336188409, "loss": 1.2367, "step": 9609 }, { "epoch": 0.12487756703154262, "grad_norm": 0.3801294267177582, "learning_rate": 0.0001750607624221786, "loss": 1.4169, "step": 9610 }, { "epoch": 0.1248905615754585, "grad_norm": 0.33027201890945435, "learning_rate": 0.00017505816296026723, "loss": 1.534, "step": 9611 }, { "epoch": 0.12490355611937438, "grad_norm": 0.4124357998371124, "learning_rate": 0.00017505556349835586, "loss": 1.477, "step": 9612 }, { "epoch": 0.12491655066329026, "grad_norm": 0.3271085023880005, "learning_rate": 0.00017505296403644445, "loss": 1.2346, "step": 9613 }, { "epoch": 0.12492954520720613, "grad_norm": 0.36430051922798157, "learning_rate": 0.00017505036457453308, "loss": 1.3238, "step": 9614 }, { "epoch": 0.124942539751122, "grad_norm": 0.4155019223690033, "learning_rate": 0.00017504776511262168, "loss": 1.3999, "step": 9615 }, { "epoch": 0.12495553429503788, "grad_norm": 0.31151819229125977, "learning_rate": 0.00017504516565071033, "loss": 1.3762, "step": 9616 }, { "epoch": 0.12496852883895375, "grad_norm": 0.4495059847831726, "learning_rate": 0.00017504256618879892, "loss": 1.4062, "step": 9617 }, { "epoch": 0.12498152338286962, "grad_norm": 0.4299633800983429, "learning_rate": 0.00017503996672688755, "loss": 1.434, "step": 9618 }, { "epoch": 0.1249945179267855, "grad_norm": 0.42572519183158875, "learning_rate": 0.00017503736726497615, "loss": 1.2537, "step": 9619 }, { "epoch": 0.12500751247070135, "grad_norm": 0.43866828083992004, "learning_rate": 0.00017503476780306477, "loss": 1.321, "step": 9620 }, { "epoch": 0.12502050701461723, "grad_norm": 0.3476933538913727, "learning_rate": 0.0001750321683411534, "loss": 1.3085, "step": 9621 }, { "epoch": 0.1250335015585331, "grad_norm": 0.33552300930023193, "learning_rate": 0.000175029568879242, "loss": 1.3884, "step": 9622 }, { "epoch": 0.12504649610244897, "grad_norm": 0.40971824526786804, "learning_rate": 0.00017502696941733062, "loss": 1.299, "step": 9623 }, { "epoch": 0.12505949064636485, "grad_norm": 0.42949429154396057, "learning_rate": 0.00017502436995541924, "loss": 1.5686, "step": 9624 }, { "epoch": 0.12507248519028072, "grad_norm": 0.37826862931251526, "learning_rate": 0.00017502177049350784, "loss": 1.4853, "step": 9625 }, { "epoch": 0.1250854797341966, "grad_norm": 0.44199806451797485, "learning_rate": 0.00017501917103159646, "loss": 1.4439, "step": 9626 }, { "epoch": 0.12509847427811246, "grad_norm": 0.4022911787033081, "learning_rate": 0.00017501657156968506, "loss": 1.574, "step": 9627 }, { "epoch": 0.12511146882202834, "grad_norm": 0.3098083436489105, "learning_rate": 0.0001750139721077737, "loss": 1.4625, "step": 9628 }, { "epoch": 0.1251244633659442, "grad_norm": 0.4486669898033142, "learning_rate": 0.0001750113726458623, "loss": 1.481, "step": 9629 }, { "epoch": 0.12513745790986008, "grad_norm": 0.35100775957107544, "learning_rate": 0.00017500877318395093, "loss": 1.3312, "step": 9630 }, { "epoch": 0.12515045245377598, "grad_norm": 0.34200555086135864, "learning_rate": 0.00017500617372203956, "loss": 1.4985, "step": 9631 }, { "epoch": 0.12516344699769186, "grad_norm": 0.3374740481376648, "learning_rate": 0.00017500357426012816, "loss": 1.3524, "step": 9632 }, { "epoch": 0.12517644154160773, "grad_norm": 0.3939988613128662, "learning_rate": 0.00017500097479821678, "loss": 1.3465, "step": 9633 }, { "epoch": 0.1251894360855236, "grad_norm": 0.345689594745636, "learning_rate": 0.00017499837533630538, "loss": 1.3531, "step": 9634 }, { "epoch": 0.12520243062943948, "grad_norm": 0.4319462478160858, "learning_rate": 0.00017499577587439403, "loss": 1.5274, "step": 9635 }, { "epoch": 0.12521542517335535, "grad_norm": 0.4508669078350067, "learning_rate": 0.00017499317641248263, "loss": 1.4374, "step": 9636 }, { "epoch": 0.12522841971727122, "grad_norm": 0.37562060356140137, "learning_rate": 0.00017499057695057125, "loss": 1.5023, "step": 9637 }, { "epoch": 0.1252414142611871, "grad_norm": 0.3058767020702362, "learning_rate": 0.00017498797748865985, "loss": 1.2453, "step": 9638 }, { "epoch": 0.12525440880510297, "grad_norm": 0.38932371139526367, "learning_rate": 0.00017498537802674847, "loss": 1.303, "step": 9639 }, { "epoch": 0.12526740334901884, "grad_norm": 0.39255431294441223, "learning_rate": 0.0001749827785648371, "loss": 1.3559, "step": 9640 }, { "epoch": 0.12528039789293471, "grad_norm": 0.3753657042980194, "learning_rate": 0.0001749801791029257, "loss": 1.5122, "step": 9641 }, { "epoch": 0.1252933924368506, "grad_norm": 0.31538498401641846, "learning_rate": 0.00017497757964101432, "loss": 1.2619, "step": 9642 }, { "epoch": 0.12530638698076646, "grad_norm": 0.3877624571323395, "learning_rate": 0.00017497498017910294, "loss": 1.5394, "step": 9643 }, { "epoch": 0.12531938152468233, "grad_norm": 0.24258852005004883, "learning_rate": 0.00017497238071719154, "loss": 1.3273, "step": 9644 }, { "epoch": 0.1253323760685982, "grad_norm": 0.3393879234790802, "learning_rate": 0.00017496978125528017, "loss": 1.3401, "step": 9645 }, { "epoch": 0.12534537061251408, "grad_norm": 0.44138243794441223, "learning_rate": 0.00017496718179336876, "loss": 1.3758, "step": 9646 }, { "epoch": 0.12535836515642995, "grad_norm": 0.3094342052936554, "learning_rate": 0.00017496458233145742, "loss": 1.3767, "step": 9647 }, { "epoch": 0.12537135970034582, "grad_norm": 0.3506278991699219, "learning_rate": 0.000174961982869546, "loss": 1.5701, "step": 9648 }, { "epoch": 0.1253843542442617, "grad_norm": 0.4021117687225342, "learning_rate": 0.00017495938340763464, "loss": 1.3221, "step": 9649 }, { "epoch": 0.12539734878817757, "grad_norm": 0.37280309200286865, "learning_rate": 0.00017495678394572323, "loss": 1.2719, "step": 9650 }, { "epoch": 0.12541034333209344, "grad_norm": 0.3920639157295227, "learning_rate": 0.00017495418448381186, "loss": 1.2147, "step": 9651 }, { "epoch": 0.12542333787600932, "grad_norm": 0.39731329679489136, "learning_rate": 0.00017495158502190048, "loss": 1.6081, "step": 9652 }, { "epoch": 0.1254363324199252, "grad_norm": 0.46653154492378235, "learning_rate": 0.00017494898555998908, "loss": 1.4074, "step": 9653 }, { "epoch": 0.12544932696384106, "grad_norm": 0.3996526598930359, "learning_rate": 0.0001749463860980777, "loss": 1.4357, "step": 9654 }, { "epoch": 0.12546232150775694, "grad_norm": 0.413546085357666, "learning_rate": 0.00017494378663616633, "loss": 1.5657, "step": 9655 }, { "epoch": 0.1254753160516728, "grad_norm": 0.4259064495563507, "learning_rate": 0.00017494118717425493, "loss": 1.3984, "step": 9656 }, { "epoch": 0.12548831059558868, "grad_norm": 0.3858471214771271, "learning_rate": 0.00017493858771234355, "loss": 1.3308, "step": 9657 }, { "epoch": 0.12550130513950455, "grad_norm": 0.46405744552612305, "learning_rate": 0.00017493598825043215, "loss": 1.5752, "step": 9658 }, { "epoch": 0.12551429968342043, "grad_norm": 0.47689926624298096, "learning_rate": 0.0001749333887885208, "loss": 1.5239, "step": 9659 }, { "epoch": 0.1255272942273363, "grad_norm": 0.42176753282546997, "learning_rate": 0.0001749307893266094, "loss": 1.6362, "step": 9660 }, { "epoch": 0.12554028877125217, "grad_norm": 0.4492424428462982, "learning_rate": 0.00017492818986469802, "loss": 1.3585, "step": 9661 }, { "epoch": 0.12555328331516805, "grad_norm": 0.3477698266506195, "learning_rate": 0.00017492559040278662, "loss": 1.2526, "step": 9662 }, { "epoch": 0.12556627785908392, "grad_norm": 0.39830154180526733, "learning_rate": 0.00017492299094087524, "loss": 1.4401, "step": 9663 }, { "epoch": 0.1255792724029998, "grad_norm": 0.39245128631591797, "learning_rate": 0.00017492039147896387, "loss": 1.6153, "step": 9664 }, { "epoch": 0.12559226694691566, "grad_norm": 0.39771386981010437, "learning_rate": 0.00017491779201705247, "loss": 1.4228, "step": 9665 }, { "epoch": 0.12560526149083154, "grad_norm": 0.24116118252277374, "learning_rate": 0.00017491519255514112, "loss": 1.3096, "step": 9666 }, { "epoch": 0.1256182560347474, "grad_norm": 0.44289201498031616, "learning_rate": 0.00017491259309322972, "loss": 1.4459, "step": 9667 }, { "epoch": 0.12563125057866328, "grad_norm": 0.43484336137771606, "learning_rate": 0.0001749099936313183, "loss": 1.446, "step": 9668 }, { "epoch": 0.12564424512257916, "grad_norm": 0.2405206859111786, "learning_rate": 0.00017490739416940694, "loss": 1.3298, "step": 9669 }, { "epoch": 0.12565723966649503, "grad_norm": 0.35324826836586, "learning_rate": 0.00017490479470749556, "loss": 1.5308, "step": 9670 }, { "epoch": 0.1256702342104109, "grad_norm": 0.31959229707717896, "learning_rate": 0.00017490219524558419, "loss": 1.3147, "step": 9671 }, { "epoch": 0.12568322875432678, "grad_norm": 0.4756709933280945, "learning_rate": 0.00017489959578367278, "loss": 1.4761, "step": 9672 }, { "epoch": 0.12569622329824265, "grad_norm": 0.3300042152404785, "learning_rate": 0.0001748969963217614, "loss": 1.4613, "step": 9673 }, { "epoch": 0.12570921784215852, "grad_norm": 0.260833203792572, "learning_rate": 0.00017489439685985003, "loss": 1.2715, "step": 9674 }, { "epoch": 0.1257222123860744, "grad_norm": 0.4841623306274414, "learning_rate": 0.00017489179739793863, "loss": 1.3387, "step": 9675 }, { "epoch": 0.12573520692999027, "grad_norm": 0.353515088558197, "learning_rate": 0.00017488919793602725, "loss": 1.6013, "step": 9676 }, { "epoch": 0.12574820147390614, "grad_norm": 0.45149222016334534, "learning_rate": 0.00017488659847411585, "loss": 1.5653, "step": 9677 }, { "epoch": 0.125761196017822, "grad_norm": 0.4398069381713867, "learning_rate": 0.0001748839990122045, "loss": 1.5515, "step": 9678 }, { "epoch": 0.1257741905617379, "grad_norm": 0.37375950813293457, "learning_rate": 0.0001748813995502931, "loss": 1.3408, "step": 9679 }, { "epoch": 0.12578718510565376, "grad_norm": 0.276103675365448, "learning_rate": 0.0001748788000883817, "loss": 1.3096, "step": 9680 }, { "epoch": 0.12580017964956963, "grad_norm": 0.442453533411026, "learning_rate": 0.00017487620062647032, "loss": 1.5137, "step": 9681 }, { "epoch": 0.1258131741934855, "grad_norm": 0.48259907960891724, "learning_rate": 0.00017487360116455895, "loss": 1.5208, "step": 9682 }, { "epoch": 0.12582616873740138, "grad_norm": 0.37423524260520935, "learning_rate": 0.00017487100170264757, "loss": 1.4551, "step": 9683 }, { "epoch": 0.12583916328131725, "grad_norm": 0.34281760454177856, "learning_rate": 0.00017486840224073617, "loss": 1.5296, "step": 9684 }, { "epoch": 0.12585215782523312, "grad_norm": 0.3728395402431488, "learning_rate": 0.0001748658027788248, "loss": 1.3543, "step": 9685 }, { "epoch": 0.125865152369149, "grad_norm": 0.3292859196662903, "learning_rate": 0.00017486320331691342, "loss": 1.2164, "step": 9686 }, { "epoch": 0.12587814691306487, "grad_norm": 0.4014461934566498, "learning_rate": 0.00017486060385500202, "loss": 1.4251, "step": 9687 }, { "epoch": 0.12589114145698074, "grad_norm": 0.4269237518310547, "learning_rate": 0.00017485800439309064, "loss": 1.4323, "step": 9688 }, { "epoch": 0.12590413600089662, "grad_norm": 0.3459317088127136, "learning_rate": 0.00017485540493117924, "loss": 1.3725, "step": 9689 }, { "epoch": 0.1259171305448125, "grad_norm": 0.33514484763145447, "learning_rate": 0.0001748528054692679, "loss": 1.4692, "step": 9690 }, { "epoch": 0.12593012508872836, "grad_norm": 0.3969482481479645, "learning_rate": 0.00017485020600735649, "loss": 1.4396, "step": 9691 }, { "epoch": 0.12594311963264423, "grad_norm": 0.31107670068740845, "learning_rate": 0.0001748476065454451, "loss": 1.2722, "step": 9692 }, { "epoch": 0.1259561141765601, "grad_norm": 0.4391982853412628, "learning_rate": 0.0001748450070835337, "loss": 1.443, "step": 9693 }, { "epoch": 0.12596910872047598, "grad_norm": 0.3729909360408783, "learning_rate": 0.00017484240762162233, "loss": 1.424, "step": 9694 }, { "epoch": 0.12598210326439185, "grad_norm": 0.5445814728736877, "learning_rate": 0.00017483980815971096, "loss": 1.4115, "step": 9695 }, { "epoch": 0.12599509780830773, "grad_norm": 0.45242586731910706, "learning_rate": 0.00017483720869779955, "loss": 1.3058, "step": 9696 }, { "epoch": 0.1260080923522236, "grad_norm": 0.38592544198036194, "learning_rate": 0.00017483460923588818, "loss": 1.4362, "step": 9697 }, { "epoch": 0.12602108689613947, "grad_norm": 0.5082756280899048, "learning_rate": 0.0001748320097739768, "loss": 1.3649, "step": 9698 }, { "epoch": 0.12603408144005535, "grad_norm": 0.2916025221347809, "learning_rate": 0.0001748294103120654, "loss": 1.3652, "step": 9699 }, { "epoch": 0.12604707598397122, "grad_norm": 0.319277286529541, "learning_rate": 0.00017482681085015403, "loss": 1.47, "step": 9700 }, { "epoch": 0.1260600705278871, "grad_norm": 0.3986632227897644, "learning_rate": 0.00017482421138824262, "loss": 1.4361, "step": 9701 }, { "epoch": 0.12607306507180296, "grad_norm": 0.41115862131118774, "learning_rate": 0.00017482161192633127, "loss": 1.4418, "step": 9702 }, { "epoch": 0.12608605961571884, "grad_norm": 0.39329981803894043, "learning_rate": 0.00017481901246441987, "loss": 1.554, "step": 9703 }, { "epoch": 0.1260990541596347, "grad_norm": 0.3860429525375366, "learning_rate": 0.0001748164130025085, "loss": 1.4948, "step": 9704 }, { "epoch": 0.12611204870355058, "grad_norm": 0.3884877860546112, "learning_rate": 0.00017481381354059712, "loss": 1.4031, "step": 9705 }, { "epoch": 0.12612504324746646, "grad_norm": 0.3482835590839386, "learning_rate": 0.00017481121407868572, "loss": 1.444, "step": 9706 }, { "epoch": 0.12613803779138236, "grad_norm": 0.5124087929725647, "learning_rate": 0.00017480861461677434, "loss": 1.5332, "step": 9707 }, { "epoch": 0.12615103233529823, "grad_norm": 0.5126358270645142, "learning_rate": 0.00017480601515486294, "loss": 1.385, "step": 9708 }, { "epoch": 0.1261640268792141, "grad_norm": 0.4038727879524231, "learning_rate": 0.00017480341569295156, "loss": 1.4983, "step": 9709 }, { "epoch": 0.12617702142312998, "grad_norm": 2.4577696323394775, "learning_rate": 0.0001748008162310402, "loss": 1.3601, "step": 9710 }, { "epoch": 0.12619001596704585, "grad_norm": 0.3879411816596985, "learning_rate": 0.00017479821676912879, "loss": 1.3718, "step": 9711 }, { "epoch": 0.12620301051096172, "grad_norm": 0.40619683265686035, "learning_rate": 0.0001747956173072174, "loss": 1.5004, "step": 9712 }, { "epoch": 0.1262160050548776, "grad_norm": 0.43570661544799805, "learning_rate": 0.00017479301784530603, "loss": 1.4814, "step": 9713 }, { "epoch": 0.12622899959879347, "grad_norm": 0.3898126184940338, "learning_rate": 0.00017479041838339466, "loss": 1.3759, "step": 9714 }, { "epoch": 0.12624199414270934, "grad_norm": 0.4663819968700409, "learning_rate": 0.00017478781892148326, "loss": 1.5347, "step": 9715 }, { "epoch": 0.1262549886866252, "grad_norm": 0.3044021725654602, "learning_rate": 0.00017478521945957188, "loss": 1.1682, "step": 9716 }, { "epoch": 0.1262679832305411, "grad_norm": 0.43841981887817383, "learning_rate": 0.0001747826199976605, "loss": 1.2314, "step": 9717 }, { "epoch": 0.12628097777445696, "grad_norm": 0.4041503071784973, "learning_rate": 0.0001747800205357491, "loss": 1.5978, "step": 9718 }, { "epoch": 0.12629397231837283, "grad_norm": 0.38805675506591797, "learning_rate": 0.00017477742107383773, "loss": 1.4442, "step": 9719 }, { "epoch": 0.1263069668622887, "grad_norm": 0.37587684392929077, "learning_rate": 0.00017477482161192633, "loss": 1.4096, "step": 9720 }, { "epoch": 0.12631996140620458, "grad_norm": 0.39248234033584595, "learning_rate": 0.00017477222215001498, "loss": 1.4625, "step": 9721 }, { "epoch": 0.12633295595012045, "grad_norm": 0.4851655066013336, "learning_rate": 0.00017476962268810357, "loss": 1.5584, "step": 9722 }, { "epoch": 0.12634595049403632, "grad_norm": 0.30397647619247437, "learning_rate": 0.00017476702322619217, "loss": 1.3512, "step": 9723 }, { "epoch": 0.1263589450379522, "grad_norm": 0.3748270571231842, "learning_rate": 0.0001747644237642808, "loss": 1.4888, "step": 9724 }, { "epoch": 0.12637193958186807, "grad_norm": 0.4174167215824127, "learning_rate": 0.00017476182430236942, "loss": 1.4032, "step": 9725 }, { "epoch": 0.12638493412578394, "grad_norm": 0.3229725658893585, "learning_rate": 0.00017475922484045804, "loss": 1.4386, "step": 9726 }, { "epoch": 0.12639792866969982, "grad_norm": 0.4116579592227936, "learning_rate": 0.00017475662537854664, "loss": 1.3328, "step": 9727 }, { "epoch": 0.1264109232136157, "grad_norm": 0.46099111437797546, "learning_rate": 0.00017475402591663527, "loss": 1.482, "step": 9728 }, { "epoch": 0.12642391775753156, "grad_norm": 0.35294049978256226, "learning_rate": 0.0001747514264547239, "loss": 1.396, "step": 9729 }, { "epoch": 0.12643691230144743, "grad_norm": 0.37404778599739075, "learning_rate": 0.0001747488269928125, "loss": 1.4334, "step": 9730 }, { "epoch": 0.1264499068453633, "grad_norm": 0.34945693612098694, "learning_rate": 0.0001747462275309011, "loss": 1.5552, "step": 9731 }, { "epoch": 0.12646290138927918, "grad_norm": 0.3755609691143036, "learning_rate": 0.0001747436280689897, "loss": 1.3793, "step": 9732 }, { "epoch": 0.12647589593319505, "grad_norm": 0.4764070212841034, "learning_rate": 0.00017474102860707836, "loss": 1.4632, "step": 9733 }, { "epoch": 0.12648889047711093, "grad_norm": 0.3914155960083008, "learning_rate": 0.00017473842914516696, "loss": 1.4597, "step": 9734 }, { "epoch": 0.1265018850210268, "grad_norm": 0.39013493061065674, "learning_rate": 0.00017473582968325556, "loss": 1.3596, "step": 9735 }, { "epoch": 0.12651487956494267, "grad_norm": 0.30755171179771423, "learning_rate": 0.00017473323022134418, "loss": 1.3081, "step": 9736 }, { "epoch": 0.12652787410885855, "grad_norm": 0.39651721715927124, "learning_rate": 0.0001747306307594328, "loss": 1.3449, "step": 9737 }, { "epoch": 0.12654086865277442, "grad_norm": 0.36479347944259644, "learning_rate": 0.00017472803129752143, "loss": 1.3956, "step": 9738 }, { "epoch": 0.1265538631966903, "grad_norm": 0.3559912145137787, "learning_rate": 0.00017472543183561003, "loss": 1.1267, "step": 9739 }, { "epoch": 0.12656685774060616, "grad_norm": 0.44560301303863525, "learning_rate": 0.00017472283237369865, "loss": 1.4641, "step": 9740 }, { "epoch": 0.12657985228452204, "grad_norm": 0.39466798305511475, "learning_rate": 0.00017472023291178728, "loss": 1.5841, "step": 9741 }, { "epoch": 0.1265928468284379, "grad_norm": 0.4178992807865143, "learning_rate": 0.00017471763344987587, "loss": 1.2845, "step": 9742 }, { "epoch": 0.12660584137235378, "grad_norm": 0.3627108037471771, "learning_rate": 0.0001747150339879645, "loss": 1.4178, "step": 9743 }, { "epoch": 0.12661883591626966, "grad_norm": 0.3000160753726959, "learning_rate": 0.00017471243452605312, "loss": 1.2315, "step": 9744 }, { "epoch": 0.12663183046018553, "grad_norm": 0.38646963238716125, "learning_rate": 0.00017470983506414175, "loss": 1.5533, "step": 9745 }, { "epoch": 0.1266448250041014, "grad_norm": 0.44327566027641296, "learning_rate": 0.00017470723560223034, "loss": 1.477, "step": 9746 }, { "epoch": 0.12665781954801728, "grad_norm": 0.46295878291130066, "learning_rate": 0.00017470463614031897, "loss": 1.4691, "step": 9747 }, { "epoch": 0.12667081409193315, "grad_norm": 0.35043972730636597, "learning_rate": 0.0001747020366784076, "loss": 1.2843, "step": 9748 }, { "epoch": 0.12668380863584902, "grad_norm": 0.4215966761112213, "learning_rate": 0.0001746994372164962, "loss": 1.5864, "step": 9749 }, { "epoch": 0.1266968031797649, "grad_norm": 0.3857872188091278, "learning_rate": 0.00017469683775458482, "loss": 1.2177, "step": 9750 }, { "epoch": 0.12670979772368077, "grad_norm": 0.45712223649024963, "learning_rate": 0.0001746942382926734, "loss": 1.4889, "step": 9751 }, { "epoch": 0.12672279226759664, "grad_norm": 0.397826224565506, "learning_rate": 0.00017469163883076204, "loss": 1.3779, "step": 9752 }, { "epoch": 0.1267357868115125, "grad_norm": 0.45743003487586975, "learning_rate": 0.00017468903936885066, "loss": 1.6859, "step": 9753 }, { "epoch": 0.12674878135542839, "grad_norm": 0.31271249055862427, "learning_rate": 0.00017468643990693926, "loss": 1.1679, "step": 9754 }, { "epoch": 0.12676177589934426, "grad_norm": 0.3818182051181793, "learning_rate": 0.00017468384044502788, "loss": 1.2417, "step": 9755 }, { "epoch": 0.12677477044326013, "grad_norm": 0.3628392219543457, "learning_rate": 0.0001746812409831165, "loss": 1.7035, "step": 9756 }, { "epoch": 0.126787764987176, "grad_norm": 0.3837031424045563, "learning_rate": 0.00017467864152120513, "loss": 1.4246, "step": 9757 }, { "epoch": 0.12680075953109188, "grad_norm": 0.36988452076911926, "learning_rate": 0.00017467604205929373, "loss": 1.5692, "step": 9758 }, { "epoch": 0.12681375407500775, "grad_norm": 0.4251534640789032, "learning_rate": 0.00017467344259738235, "loss": 1.3156, "step": 9759 }, { "epoch": 0.12682674861892362, "grad_norm": 0.3459431231021881, "learning_rate": 0.00017467084313547098, "loss": 1.3139, "step": 9760 }, { "epoch": 0.1268397431628395, "grad_norm": 0.4471019506454468, "learning_rate": 0.00017466824367355958, "loss": 1.3948, "step": 9761 }, { "epoch": 0.12685273770675537, "grad_norm": 0.28526002168655396, "learning_rate": 0.0001746656442116482, "loss": 1.3949, "step": 9762 }, { "epoch": 0.12686573225067124, "grad_norm": 0.40695279836654663, "learning_rate": 0.0001746630447497368, "loss": 1.3802, "step": 9763 }, { "epoch": 0.12687872679458712, "grad_norm": 0.36840522289276123, "learning_rate": 0.00017466044528782542, "loss": 1.6343, "step": 9764 }, { "epoch": 0.126891721338503, "grad_norm": 0.48099154233932495, "learning_rate": 0.00017465784582591405, "loss": 1.3398, "step": 9765 }, { "epoch": 0.12690471588241886, "grad_norm": 0.3887917995452881, "learning_rate": 0.00017465524636400264, "loss": 1.3394, "step": 9766 }, { "epoch": 0.12691771042633473, "grad_norm": 0.3510952889919281, "learning_rate": 0.00017465264690209127, "loss": 1.3629, "step": 9767 }, { "epoch": 0.1269307049702506, "grad_norm": 0.46056118607521057, "learning_rate": 0.0001746500474401799, "loss": 1.5036, "step": 9768 }, { "epoch": 0.12694369951416648, "grad_norm": 0.4226197600364685, "learning_rate": 0.00017464744797826852, "loss": 1.4027, "step": 9769 }, { "epoch": 0.12695669405808235, "grad_norm": 0.4660278856754303, "learning_rate": 0.00017464484851635712, "loss": 1.4996, "step": 9770 }, { "epoch": 0.12696968860199823, "grad_norm": 0.39086925983428955, "learning_rate": 0.00017464224905444574, "loss": 1.2653, "step": 9771 }, { "epoch": 0.1269826831459141, "grad_norm": 0.39842262864112854, "learning_rate": 0.00017463964959253436, "loss": 1.3166, "step": 9772 }, { "epoch": 0.12699567768982997, "grad_norm": 0.4808734357357025, "learning_rate": 0.00017463705013062296, "loss": 1.4062, "step": 9773 }, { "epoch": 0.12700867223374585, "grad_norm": 0.36386552453041077, "learning_rate": 0.00017463445066871159, "loss": 1.2046, "step": 9774 }, { "epoch": 0.12702166677766172, "grad_norm": 0.42672857642173767, "learning_rate": 0.00017463185120680018, "loss": 1.7419, "step": 9775 }, { "epoch": 0.1270346613215776, "grad_norm": 0.4021878242492676, "learning_rate": 0.00017462925174488884, "loss": 1.2812, "step": 9776 }, { "epoch": 0.12704765586549346, "grad_norm": 0.33598777651786804, "learning_rate": 0.00017462665228297743, "loss": 1.0491, "step": 9777 }, { "epoch": 0.12706065040940934, "grad_norm": 0.4053421914577484, "learning_rate": 0.00017462405282106603, "loss": 1.419, "step": 9778 }, { "epoch": 0.1270736449533252, "grad_norm": 0.4082357585430145, "learning_rate": 0.00017462145335915468, "loss": 1.7965, "step": 9779 }, { "epoch": 0.12708663949724108, "grad_norm": 0.286214143037796, "learning_rate": 0.00017461885389724328, "loss": 1.4666, "step": 9780 }, { "epoch": 0.12709963404115696, "grad_norm": 0.5199337601661682, "learning_rate": 0.0001746162544353319, "loss": 1.472, "step": 9781 }, { "epoch": 0.12711262858507283, "grad_norm": 0.41897672414779663, "learning_rate": 0.0001746136549734205, "loss": 1.332, "step": 9782 }, { "epoch": 0.12712562312898873, "grad_norm": 0.38286423683166504, "learning_rate": 0.00017461105551150913, "loss": 1.4898, "step": 9783 }, { "epoch": 0.1271386176729046, "grad_norm": 0.3432929813861847, "learning_rate": 0.00017460845604959775, "loss": 1.4053, "step": 9784 }, { "epoch": 0.12715161221682048, "grad_norm": 0.34561246633529663, "learning_rate": 0.00017460585658768635, "loss": 1.5973, "step": 9785 }, { "epoch": 0.12716460676073635, "grad_norm": 0.4448879659175873, "learning_rate": 0.00017460325712577497, "loss": 1.5423, "step": 9786 }, { "epoch": 0.12717760130465222, "grad_norm": 0.43982186913490295, "learning_rate": 0.0001746006576638636, "loss": 1.3203, "step": 9787 }, { "epoch": 0.1271905958485681, "grad_norm": 0.3380548357963562, "learning_rate": 0.00017459805820195222, "loss": 1.3999, "step": 9788 }, { "epoch": 0.12720359039248397, "grad_norm": 0.410491943359375, "learning_rate": 0.00017459545874004082, "loss": 1.373, "step": 9789 }, { "epoch": 0.12721658493639984, "grad_norm": 0.412641316652298, "learning_rate": 0.00017459285927812942, "loss": 1.4604, "step": 9790 }, { "epoch": 0.1272295794803157, "grad_norm": 0.34360596537590027, "learning_rate": 0.00017459025981621807, "loss": 1.3508, "step": 9791 }, { "epoch": 0.1272425740242316, "grad_norm": 0.3051782548427582, "learning_rate": 0.00017458766035430666, "loss": 1.5529, "step": 9792 }, { "epoch": 0.12725556856814746, "grad_norm": 0.3569876551628113, "learning_rate": 0.0001745850608923953, "loss": 1.5069, "step": 9793 }, { "epoch": 0.12726856311206333, "grad_norm": 0.4056491255760193, "learning_rate": 0.00017458246143048389, "loss": 1.2813, "step": 9794 }, { "epoch": 0.1272815576559792, "grad_norm": 0.45286914706230164, "learning_rate": 0.0001745798619685725, "loss": 1.5803, "step": 9795 }, { "epoch": 0.12729455219989508, "grad_norm": 0.3876975178718567, "learning_rate": 0.00017457726250666114, "loss": 1.1811, "step": 9796 }, { "epoch": 0.12730754674381095, "grad_norm": 0.3707972466945648, "learning_rate": 0.00017457466304474973, "loss": 1.4004, "step": 9797 }, { "epoch": 0.12732054128772682, "grad_norm": 0.3774029016494751, "learning_rate": 0.00017457206358283836, "loss": 1.3448, "step": 9798 }, { "epoch": 0.1273335358316427, "grad_norm": 0.4498341381549835, "learning_rate": 0.00017456946412092698, "loss": 1.4729, "step": 9799 }, { "epoch": 0.12734653037555857, "grad_norm": 0.6029853224754333, "learning_rate": 0.0001745668646590156, "loss": 1.4736, "step": 9800 }, { "epoch": 0.12735952491947444, "grad_norm": 0.3127281665802002, "learning_rate": 0.0001745642651971042, "loss": 1.2452, "step": 9801 }, { "epoch": 0.12737251946339032, "grad_norm": 0.39024844765663147, "learning_rate": 0.0001745616657351928, "loss": 1.584, "step": 9802 }, { "epoch": 0.1273855140073062, "grad_norm": 0.3598044812679291, "learning_rate": 0.00017455906627328145, "loss": 1.5844, "step": 9803 }, { "epoch": 0.12739850855122206, "grad_norm": 0.4696773290634155, "learning_rate": 0.00017455646681137005, "loss": 1.288, "step": 9804 }, { "epoch": 0.12741150309513793, "grad_norm": 0.4604615271091461, "learning_rate": 0.00017455386734945867, "loss": 1.5048, "step": 9805 }, { "epoch": 0.1274244976390538, "grad_norm": 0.3901355266571045, "learning_rate": 0.00017455126788754727, "loss": 1.2994, "step": 9806 }, { "epoch": 0.12743749218296968, "grad_norm": 0.39078086614608765, "learning_rate": 0.0001745486684256359, "loss": 1.4265, "step": 9807 }, { "epoch": 0.12745048672688555, "grad_norm": 0.48265746235847473, "learning_rate": 0.00017454606896372452, "loss": 1.7173, "step": 9808 }, { "epoch": 0.12746348127080143, "grad_norm": 0.42275094985961914, "learning_rate": 0.00017454346950181312, "loss": 1.3499, "step": 9809 }, { "epoch": 0.1274764758147173, "grad_norm": 0.354317843914032, "learning_rate": 0.00017454087003990174, "loss": 1.3284, "step": 9810 }, { "epoch": 0.12748947035863317, "grad_norm": 0.4367891848087311, "learning_rate": 0.00017453827057799037, "loss": 1.4068, "step": 9811 }, { "epoch": 0.12750246490254905, "grad_norm": 0.3880905210971832, "learning_rate": 0.000174535671116079, "loss": 1.4614, "step": 9812 }, { "epoch": 0.12751545944646492, "grad_norm": 0.3903173506259918, "learning_rate": 0.0001745330716541676, "loss": 1.5189, "step": 9813 }, { "epoch": 0.1275284539903808, "grad_norm": 0.3960549831390381, "learning_rate": 0.0001745304721922562, "loss": 1.6009, "step": 9814 }, { "epoch": 0.12754144853429666, "grad_norm": 0.5237694382667542, "learning_rate": 0.00017452787273034484, "loss": 1.5611, "step": 9815 }, { "epoch": 0.12755444307821254, "grad_norm": 0.45572733879089355, "learning_rate": 0.00017452527326843344, "loss": 1.3569, "step": 9816 }, { "epoch": 0.1275674376221284, "grad_norm": 0.25524795055389404, "learning_rate": 0.00017452267380652206, "loss": 1.4838, "step": 9817 }, { "epoch": 0.12758043216604428, "grad_norm": 0.4669489562511444, "learning_rate": 0.00017452007434461068, "loss": 1.4431, "step": 9818 }, { "epoch": 0.12759342670996016, "grad_norm": 0.4880148470401764, "learning_rate": 0.00017451747488269928, "loss": 1.6159, "step": 9819 }, { "epoch": 0.12760642125387603, "grad_norm": 0.3587469756603241, "learning_rate": 0.0001745148754207879, "loss": 1.3839, "step": 9820 }, { "epoch": 0.1276194157977919, "grad_norm": 0.42655476927757263, "learning_rate": 0.0001745122759588765, "loss": 1.4199, "step": 9821 }, { "epoch": 0.12763241034170777, "grad_norm": 0.33567482233047485, "learning_rate": 0.00017450967649696516, "loss": 1.3931, "step": 9822 }, { "epoch": 0.12764540488562365, "grad_norm": 0.3328567147254944, "learning_rate": 0.00017450707703505375, "loss": 1.3867, "step": 9823 }, { "epoch": 0.12765839942953952, "grad_norm": 0.33665406703948975, "learning_rate": 0.00017450447757314238, "loss": 1.2934, "step": 9824 }, { "epoch": 0.1276713939734554, "grad_norm": 0.4406411945819855, "learning_rate": 0.00017450187811123097, "loss": 1.3838, "step": 9825 }, { "epoch": 0.12768438851737127, "grad_norm": 0.38822394609451294, "learning_rate": 0.0001744992786493196, "loss": 1.5633, "step": 9826 }, { "epoch": 0.12769738306128714, "grad_norm": 0.4578480124473572, "learning_rate": 0.00017449667918740822, "loss": 1.4932, "step": 9827 }, { "epoch": 0.127710377605203, "grad_norm": 0.4058312773704529, "learning_rate": 0.00017449407972549682, "loss": 1.4168, "step": 9828 }, { "epoch": 0.12772337214911889, "grad_norm": 0.4173702597618103, "learning_rate": 0.00017449148026358545, "loss": 1.3421, "step": 9829 }, { "epoch": 0.12773636669303476, "grad_norm": 0.44695279002189636, "learning_rate": 0.00017448888080167407, "loss": 1.5197, "step": 9830 }, { "epoch": 0.12774936123695063, "grad_norm": 0.36493515968322754, "learning_rate": 0.00017448628133976267, "loss": 1.4921, "step": 9831 }, { "epoch": 0.1277623557808665, "grad_norm": 0.356090784072876, "learning_rate": 0.0001744836818778513, "loss": 1.5286, "step": 9832 }, { "epoch": 0.12777535032478238, "grad_norm": 0.34752827882766724, "learning_rate": 0.0001744810824159399, "loss": 1.2365, "step": 9833 }, { "epoch": 0.12778834486869825, "grad_norm": 0.33421093225479126, "learning_rate": 0.00017447848295402854, "loss": 1.417, "step": 9834 }, { "epoch": 0.12780133941261412, "grad_norm": 0.4228314757347107, "learning_rate": 0.00017447588349211714, "loss": 1.6549, "step": 9835 }, { "epoch": 0.12781433395653, "grad_norm": 0.4263027012348175, "learning_rate": 0.00017447328403020576, "loss": 1.3583, "step": 9836 }, { "epoch": 0.12782732850044587, "grad_norm": 0.45514747500419617, "learning_rate": 0.00017447068456829436, "loss": 1.6082, "step": 9837 }, { "epoch": 0.12784032304436174, "grad_norm": 0.3952755928039551, "learning_rate": 0.00017446808510638298, "loss": 1.5577, "step": 9838 }, { "epoch": 0.12785331758827762, "grad_norm": 0.40582403540611267, "learning_rate": 0.0001744654856444716, "loss": 1.4233, "step": 9839 }, { "epoch": 0.1278663121321935, "grad_norm": 0.44542303681373596, "learning_rate": 0.0001744628861825602, "loss": 1.493, "step": 9840 }, { "epoch": 0.12787930667610936, "grad_norm": 0.3275494873523712, "learning_rate": 0.00017446028672064883, "loss": 1.2484, "step": 9841 }, { "epoch": 0.12789230122002523, "grad_norm": 0.39033234119415283, "learning_rate": 0.00017445768725873746, "loss": 1.4901, "step": 9842 }, { "epoch": 0.1279052957639411, "grad_norm": 0.37724947929382324, "learning_rate": 0.00017445508779682608, "loss": 1.4831, "step": 9843 }, { "epoch": 0.12791829030785698, "grad_norm": 0.42083632946014404, "learning_rate": 0.00017445248833491468, "loss": 1.5955, "step": 9844 }, { "epoch": 0.12793128485177285, "grad_norm": 0.3097129762172699, "learning_rate": 0.00017444988887300327, "loss": 1.3957, "step": 9845 }, { "epoch": 0.12794427939568873, "grad_norm": 0.27840664982795715, "learning_rate": 0.00017444728941109193, "loss": 1.2559, "step": 9846 }, { "epoch": 0.1279572739396046, "grad_norm": 0.3947417140007019, "learning_rate": 0.00017444468994918052, "loss": 1.5292, "step": 9847 }, { "epoch": 0.12797026848352047, "grad_norm": 0.37195923924446106, "learning_rate": 0.00017444209048726915, "loss": 1.3016, "step": 9848 }, { "epoch": 0.12798326302743634, "grad_norm": 0.41136813163757324, "learning_rate": 0.00017443949102535775, "loss": 1.4146, "step": 9849 }, { "epoch": 0.12799625757135222, "grad_norm": 0.45513203740119934, "learning_rate": 0.00017443689156344637, "loss": 1.4977, "step": 9850 }, { "epoch": 0.1280092521152681, "grad_norm": 0.46818891167640686, "learning_rate": 0.000174434292101535, "loss": 1.6209, "step": 9851 }, { "epoch": 0.12802224665918396, "grad_norm": 0.46940889954566956, "learning_rate": 0.0001744316926396236, "loss": 1.2997, "step": 9852 }, { "epoch": 0.12803524120309984, "grad_norm": 0.4131140410900116, "learning_rate": 0.00017442909317771224, "loss": 1.5038, "step": 9853 }, { "epoch": 0.1280482357470157, "grad_norm": 0.3882255554199219, "learning_rate": 0.00017442649371580084, "loss": 1.3948, "step": 9854 }, { "epoch": 0.12806123029093158, "grad_norm": 0.3601725101470947, "learning_rate": 0.00017442389425388946, "loss": 1.3917, "step": 9855 }, { "epoch": 0.12807422483484746, "grad_norm": 0.375658243894577, "learning_rate": 0.00017442129479197806, "loss": 1.2736, "step": 9856 }, { "epoch": 0.12808721937876333, "grad_norm": 1.2153809070587158, "learning_rate": 0.0001744186953300667, "loss": 1.2592, "step": 9857 }, { "epoch": 0.1281002139226792, "grad_norm": 0.424893319606781, "learning_rate": 0.0001744160958681553, "loss": 1.4105, "step": 9858 }, { "epoch": 0.1281132084665951, "grad_norm": 0.3003944456577301, "learning_rate": 0.0001744134964062439, "loss": 1.6327, "step": 9859 }, { "epoch": 0.12812620301051098, "grad_norm": 0.4156136214733124, "learning_rate": 0.00017441089694433253, "loss": 1.4591, "step": 9860 }, { "epoch": 0.12813919755442685, "grad_norm": 0.4319014549255371, "learning_rate": 0.00017440829748242116, "loss": 1.3651, "step": 9861 }, { "epoch": 0.12815219209834272, "grad_norm": 0.4158675968647003, "learning_rate": 0.00017440569802050975, "loss": 1.4095, "step": 9862 }, { "epoch": 0.1281651866422586, "grad_norm": 0.42379266023635864, "learning_rate": 0.00017440309855859838, "loss": 1.5282, "step": 9863 }, { "epoch": 0.12817818118617447, "grad_norm": 0.38992419838905334, "learning_rate": 0.00017440049909668698, "loss": 1.492, "step": 9864 }, { "epoch": 0.12819117573009034, "grad_norm": 0.43003717064857483, "learning_rate": 0.00017439789963477563, "loss": 1.461, "step": 9865 }, { "epoch": 0.1282041702740062, "grad_norm": 0.39112550020217896, "learning_rate": 0.00017439530017286423, "loss": 1.4819, "step": 9866 }, { "epoch": 0.12821716481792209, "grad_norm": 0.46558400988578796, "learning_rate": 0.00017439270071095285, "loss": 1.5735, "step": 9867 }, { "epoch": 0.12823015936183796, "grad_norm": 0.3939507305622101, "learning_rate": 0.00017439010124904145, "loss": 1.457, "step": 9868 }, { "epoch": 0.12824315390575383, "grad_norm": 0.38000988960266113, "learning_rate": 0.00017438750178713007, "loss": 1.5126, "step": 9869 }, { "epoch": 0.1282561484496697, "grad_norm": 0.346591055393219, "learning_rate": 0.0001743849023252187, "loss": 1.3331, "step": 9870 }, { "epoch": 0.12826914299358558, "grad_norm": 0.3416518568992615, "learning_rate": 0.0001743823028633073, "loss": 1.2106, "step": 9871 }, { "epoch": 0.12828213753750145, "grad_norm": 0.45469239354133606, "learning_rate": 0.00017437970340139592, "loss": 1.4145, "step": 9872 }, { "epoch": 0.12829513208141732, "grad_norm": 0.32440483570098877, "learning_rate": 0.00017437710393948454, "loss": 1.1556, "step": 9873 }, { "epoch": 0.1283081266253332, "grad_norm": 0.40842241048812866, "learning_rate": 0.00017437450447757314, "loss": 1.5499, "step": 9874 }, { "epoch": 0.12832112116924907, "grad_norm": 0.49925798177719116, "learning_rate": 0.00017437190501566176, "loss": 1.5201, "step": 9875 }, { "epoch": 0.12833411571316494, "grad_norm": 0.41571882367134094, "learning_rate": 0.00017436930555375036, "loss": 1.4696, "step": 9876 }, { "epoch": 0.12834711025708082, "grad_norm": 0.410710871219635, "learning_rate": 0.00017436670609183901, "loss": 1.3628, "step": 9877 }, { "epoch": 0.1283601048009967, "grad_norm": 0.37453174591064453, "learning_rate": 0.0001743641066299276, "loss": 1.3728, "step": 9878 }, { "epoch": 0.12837309934491256, "grad_norm": 0.4252074062824249, "learning_rate": 0.00017436150716801624, "loss": 1.6187, "step": 9879 }, { "epoch": 0.12838609388882843, "grad_norm": 0.5142189264297485, "learning_rate": 0.00017435890770610483, "loss": 1.6014, "step": 9880 }, { "epoch": 0.1283990884327443, "grad_norm": 0.3541359305381775, "learning_rate": 0.00017435630824419346, "loss": 1.6009, "step": 9881 }, { "epoch": 0.12841208297666018, "grad_norm": 0.3904385566711426, "learning_rate": 0.00017435370878228208, "loss": 1.5398, "step": 9882 }, { "epoch": 0.12842507752057605, "grad_norm": 0.4350864589214325, "learning_rate": 0.00017435110932037068, "loss": 1.4435, "step": 9883 }, { "epoch": 0.12843807206449193, "grad_norm": 0.4111993908882141, "learning_rate": 0.0001743485098584593, "loss": 1.2768, "step": 9884 }, { "epoch": 0.1284510666084078, "grad_norm": 0.37192025780677795, "learning_rate": 0.00017434591039654793, "loss": 1.5097, "step": 9885 }, { "epoch": 0.12846406115232367, "grad_norm": 0.5264346599578857, "learning_rate": 0.00017434331093463653, "loss": 1.5803, "step": 9886 }, { "epoch": 0.12847705569623954, "grad_norm": 0.3304254114627838, "learning_rate": 0.00017434071147272515, "loss": 1.4239, "step": 9887 }, { "epoch": 0.12849005024015542, "grad_norm": 0.5335957407951355, "learning_rate": 0.00017433811201081375, "loss": 1.4026, "step": 9888 }, { "epoch": 0.1285030447840713, "grad_norm": 0.42111849784851074, "learning_rate": 0.0001743355125489024, "loss": 1.5571, "step": 9889 }, { "epoch": 0.12851603932798716, "grad_norm": 0.3414677083492279, "learning_rate": 0.000174332913086991, "loss": 1.4748, "step": 9890 }, { "epoch": 0.12852903387190304, "grad_norm": 0.43178120255470276, "learning_rate": 0.00017433031362507962, "loss": 1.4642, "step": 9891 }, { "epoch": 0.1285420284158189, "grad_norm": 0.3523310422897339, "learning_rate": 0.00017432771416316825, "loss": 1.484, "step": 9892 }, { "epoch": 0.12855502295973478, "grad_norm": 0.3393974006175995, "learning_rate": 0.00017432511470125684, "loss": 1.3141, "step": 9893 }, { "epoch": 0.12856801750365066, "grad_norm": 0.4137031137943268, "learning_rate": 0.00017432251523934547, "loss": 1.4515, "step": 9894 }, { "epoch": 0.12858101204756653, "grad_norm": 0.43366238474845886, "learning_rate": 0.00017431991577743406, "loss": 1.5058, "step": 9895 }, { "epoch": 0.1285940065914824, "grad_norm": 0.3363425135612488, "learning_rate": 0.00017431731631552272, "loss": 1.4453, "step": 9896 }, { "epoch": 0.12860700113539827, "grad_norm": 0.47601714730262756, "learning_rate": 0.00017431471685361131, "loss": 1.5211, "step": 9897 }, { "epoch": 0.12861999567931415, "grad_norm": 0.2401895374059677, "learning_rate": 0.00017431211739169994, "loss": 1.2929, "step": 9898 }, { "epoch": 0.12863299022323002, "grad_norm": 0.44325509667396545, "learning_rate": 0.00017430951792978854, "loss": 1.4449, "step": 9899 }, { "epoch": 0.1286459847671459, "grad_norm": 0.38812899589538574, "learning_rate": 0.00017430691846787716, "loss": 1.5306, "step": 9900 }, { "epoch": 0.12865897931106177, "grad_norm": 0.5528951287269592, "learning_rate": 0.00017430431900596578, "loss": 1.392, "step": 9901 }, { "epoch": 0.12867197385497764, "grad_norm": 0.3680340349674225, "learning_rate": 0.00017430171954405438, "loss": 1.2677, "step": 9902 }, { "epoch": 0.1286849683988935, "grad_norm": 0.31848806142807007, "learning_rate": 0.000174299120082143, "loss": 1.2974, "step": 9903 }, { "epoch": 0.12869796294280939, "grad_norm": 0.3728020489215851, "learning_rate": 0.00017429652062023163, "loss": 1.5088, "step": 9904 }, { "epoch": 0.12871095748672526, "grad_norm": 0.41009002923965454, "learning_rate": 0.00017429392115832023, "loss": 1.4047, "step": 9905 }, { "epoch": 0.12872395203064113, "grad_norm": 0.46786561608314514, "learning_rate": 0.00017429132169640885, "loss": 1.547, "step": 9906 }, { "epoch": 0.128736946574557, "grad_norm": 0.4904294013977051, "learning_rate": 0.00017428872223449745, "loss": 1.5006, "step": 9907 }, { "epoch": 0.12874994111847288, "grad_norm": 0.3381388187408447, "learning_rate": 0.0001742861227725861, "loss": 1.3418, "step": 9908 }, { "epoch": 0.12876293566238875, "grad_norm": 0.3874700665473938, "learning_rate": 0.0001742835233106747, "loss": 1.3174, "step": 9909 }, { "epoch": 0.12877593020630462, "grad_norm": 0.39168882369995117, "learning_rate": 0.00017428092384876332, "loss": 1.4861, "step": 9910 }, { "epoch": 0.1287889247502205, "grad_norm": 0.4563981890678406, "learning_rate": 0.00017427832438685192, "loss": 1.4282, "step": 9911 }, { "epoch": 0.12880191929413637, "grad_norm": 0.41339311003685, "learning_rate": 0.00017427572492494055, "loss": 1.3893, "step": 9912 }, { "epoch": 0.12881491383805224, "grad_norm": 0.48420026898384094, "learning_rate": 0.00017427312546302917, "loss": 1.5723, "step": 9913 }, { "epoch": 0.12882790838196811, "grad_norm": 0.29279735684394836, "learning_rate": 0.00017427052600111777, "loss": 1.3857, "step": 9914 }, { "epoch": 0.128840902925884, "grad_norm": 0.39467430114746094, "learning_rate": 0.0001742679265392064, "loss": 1.6277, "step": 9915 }, { "epoch": 0.12885389746979986, "grad_norm": 0.41499289870262146, "learning_rate": 0.00017426532707729502, "loss": 1.3248, "step": 9916 }, { "epoch": 0.12886689201371573, "grad_norm": 0.3549771010875702, "learning_rate": 0.00017426272761538361, "loss": 1.5646, "step": 9917 }, { "epoch": 0.1288798865576316, "grad_norm": 0.42362797260284424, "learning_rate": 0.00017426012815347224, "loss": 1.4338, "step": 9918 }, { "epoch": 0.12889288110154748, "grad_norm": 0.406766414642334, "learning_rate": 0.00017425752869156084, "loss": 1.6041, "step": 9919 }, { "epoch": 0.12890587564546335, "grad_norm": 0.4528195261955261, "learning_rate": 0.0001742549292296495, "loss": 1.3579, "step": 9920 }, { "epoch": 0.12891887018937923, "grad_norm": 0.4112229645252228, "learning_rate": 0.00017425232976773808, "loss": 1.5462, "step": 9921 }, { "epoch": 0.1289318647332951, "grad_norm": 0.4516632556915283, "learning_rate": 0.0001742497303058267, "loss": 1.6634, "step": 9922 }, { "epoch": 0.12894485927721097, "grad_norm": 0.4296059012413025, "learning_rate": 0.0001742471308439153, "loss": 1.4795, "step": 9923 }, { "epoch": 0.12895785382112684, "grad_norm": 0.43611133098602295, "learning_rate": 0.00017424453138200393, "loss": 1.4189, "step": 9924 }, { "epoch": 0.12897084836504272, "grad_norm": 0.39404794573783875, "learning_rate": 0.00017424193192009256, "loss": 1.2678, "step": 9925 }, { "epoch": 0.1289838429089586, "grad_norm": 0.3659047484397888, "learning_rate": 0.00017423933245818115, "loss": 1.3423, "step": 9926 }, { "epoch": 0.12899683745287446, "grad_norm": 0.32098567485809326, "learning_rate": 0.0001742367329962698, "loss": 1.6296, "step": 9927 }, { "epoch": 0.12900983199679034, "grad_norm": 0.42100879549980164, "learning_rate": 0.0001742341335343584, "loss": 1.4443, "step": 9928 }, { "epoch": 0.1290228265407062, "grad_norm": 0.2853875458240509, "learning_rate": 0.000174231534072447, "loss": 1.3316, "step": 9929 }, { "epoch": 0.12903582108462208, "grad_norm": 0.3158057630062103, "learning_rate": 0.00017422893461053562, "loss": 1.5502, "step": 9930 }, { "epoch": 0.12904881562853796, "grad_norm": 0.3264307975769043, "learning_rate": 0.00017422633514862425, "loss": 1.4234, "step": 9931 }, { "epoch": 0.12906181017245383, "grad_norm": 0.4035046696662903, "learning_rate": 0.00017422373568671287, "loss": 1.5789, "step": 9932 }, { "epoch": 0.1290748047163697, "grad_norm": 0.3322938084602356, "learning_rate": 0.00017422113622480147, "loss": 1.2747, "step": 9933 }, { "epoch": 0.12908779926028557, "grad_norm": 0.35565537214279175, "learning_rate": 0.0001742185367628901, "loss": 1.3475, "step": 9934 }, { "epoch": 0.12910079380420147, "grad_norm": 0.44883137941360474, "learning_rate": 0.00017421593730097872, "loss": 1.5142, "step": 9935 }, { "epoch": 0.12911378834811735, "grad_norm": 0.49093690514564514, "learning_rate": 0.00017421333783906732, "loss": 1.5117, "step": 9936 }, { "epoch": 0.12912678289203322, "grad_norm": 0.3607786297798157, "learning_rate": 0.00017421073837715594, "loss": 1.611, "step": 9937 }, { "epoch": 0.1291397774359491, "grad_norm": 0.38029131293296814, "learning_rate": 0.00017420813891524454, "loss": 1.5231, "step": 9938 }, { "epoch": 0.12915277197986497, "grad_norm": 0.3842584490776062, "learning_rate": 0.0001742055394533332, "loss": 1.5305, "step": 9939 }, { "epoch": 0.12916576652378084, "grad_norm": 0.37519174814224243, "learning_rate": 0.0001742029399914218, "loss": 1.265, "step": 9940 }, { "epoch": 0.1291787610676967, "grad_norm": 0.3791878819465637, "learning_rate": 0.00017420034052951038, "loss": 1.3648, "step": 9941 }, { "epoch": 0.12919175561161259, "grad_norm": 0.31812354922294617, "learning_rate": 0.000174197741067599, "loss": 1.3863, "step": 9942 }, { "epoch": 0.12920475015552846, "grad_norm": 0.2940219044685364, "learning_rate": 0.00017419514160568763, "loss": 1.3034, "step": 9943 }, { "epoch": 0.12921774469944433, "grad_norm": 0.38797011971473694, "learning_rate": 0.00017419254214377626, "loss": 1.3865, "step": 9944 }, { "epoch": 0.1292307392433602, "grad_norm": 0.33331772685050964, "learning_rate": 0.00017418994268186486, "loss": 1.4643, "step": 9945 }, { "epoch": 0.12924373378727608, "grad_norm": 0.3813553750514984, "learning_rate": 0.00017418734321995348, "loss": 1.4999, "step": 9946 }, { "epoch": 0.12925672833119195, "grad_norm": 0.4414568245410919, "learning_rate": 0.0001741847437580421, "loss": 1.3252, "step": 9947 }, { "epoch": 0.12926972287510782, "grad_norm": 0.3919098377227783, "learning_rate": 0.0001741821442961307, "loss": 1.3675, "step": 9948 }, { "epoch": 0.1292827174190237, "grad_norm": 0.37384888529777527, "learning_rate": 0.00017417954483421933, "loss": 1.3339, "step": 9949 }, { "epoch": 0.12929571196293957, "grad_norm": 0.4755634367465973, "learning_rate": 0.00017417694537230792, "loss": 1.4288, "step": 9950 }, { "epoch": 0.12930870650685544, "grad_norm": 0.41291344165802, "learning_rate": 0.00017417434591039658, "loss": 1.6188, "step": 9951 }, { "epoch": 0.12932170105077131, "grad_norm": 0.30861541628837585, "learning_rate": 0.00017417174644848517, "loss": 1.3917, "step": 9952 }, { "epoch": 0.1293346955946872, "grad_norm": 0.40257593989372253, "learning_rate": 0.0001741691469865738, "loss": 1.3579, "step": 9953 }, { "epoch": 0.12934769013860306, "grad_norm": 0.38112974166870117, "learning_rate": 0.0001741665475246624, "loss": 1.3009, "step": 9954 }, { "epoch": 0.12936068468251893, "grad_norm": 0.41760537028312683, "learning_rate": 0.00017416394806275102, "loss": 1.2413, "step": 9955 }, { "epoch": 0.1293736792264348, "grad_norm": 0.42220789194107056, "learning_rate": 0.00017416134860083964, "loss": 1.3973, "step": 9956 }, { "epoch": 0.12938667377035068, "grad_norm": 0.41747868061065674, "learning_rate": 0.00017415874913892824, "loss": 1.4522, "step": 9957 }, { "epoch": 0.12939966831426655, "grad_norm": 0.44334641098976135, "learning_rate": 0.00017415614967701687, "loss": 1.6256, "step": 9958 }, { "epoch": 0.12941266285818243, "grad_norm": 0.3481699824333191, "learning_rate": 0.0001741535502151055, "loss": 1.4451, "step": 9959 }, { "epoch": 0.1294256574020983, "grad_norm": 0.333882212638855, "learning_rate": 0.0001741509507531941, "loss": 1.4569, "step": 9960 }, { "epoch": 0.12943865194601417, "grad_norm": 0.4541371464729309, "learning_rate": 0.0001741483512912827, "loss": 1.279, "step": 9961 }, { "epoch": 0.12945164648993004, "grad_norm": 0.3667670786380768, "learning_rate": 0.0001741457518293713, "loss": 1.3596, "step": 9962 }, { "epoch": 0.12946464103384592, "grad_norm": 0.4397205412387848, "learning_rate": 0.00017414315236745996, "loss": 1.5906, "step": 9963 }, { "epoch": 0.1294776355777618, "grad_norm": 0.3494838774204254, "learning_rate": 0.00017414055290554856, "loss": 1.5216, "step": 9964 }, { "epoch": 0.12949063012167766, "grad_norm": 0.41109204292297363, "learning_rate": 0.00017413795344363718, "loss": 1.5668, "step": 9965 }, { "epoch": 0.12950362466559354, "grad_norm": 0.46567806601524353, "learning_rate": 0.0001741353539817258, "loss": 1.3667, "step": 9966 }, { "epoch": 0.1295166192095094, "grad_norm": 0.43416059017181396, "learning_rate": 0.0001741327545198144, "loss": 1.5459, "step": 9967 }, { "epoch": 0.12952961375342528, "grad_norm": 0.2909247875213623, "learning_rate": 0.00017413015505790303, "loss": 1.2324, "step": 9968 }, { "epoch": 0.12954260829734116, "grad_norm": 0.42831721901893616, "learning_rate": 0.00017412755559599163, "loss": 1.3707, "step": 9969 }, { "epoch": 0.12955560284125703, "grad_norm": 0.4365481734275818, "learning_rate": 0.00017412495613408025, "loss": 1.3582, "step": 9970 }, { "epoch": 0.1295685973851729, "grad_norm": 0.351633757352829, "learning_rate": 0.00017412235667216888, "loss": 1.4046, "step": 9971 }, { "epoch": 0.12958159192908877, "grad_norm": 0.35377952456474304, "learning_rate": 0.00017411975721025747, "loss": 1.238, "step": 9972 }, { "epoch": 0.12959458647300465, "grad_norm": 0.38907644152641296, "learning_rate": 0.0001741171577483461, "loss": 1.4636, "step": 9973 }, { "epoch": 0.12960758101692052, "grad_norm": 0.4175402820110321, "learning_rate": 0.00017411455828643472, "loss": 1.6342, "step": 9974 }, { "epoch": 0.1296205755608364, "grad_norm": 0.2818557918071747, "learning_rate": 0.00017411195882452335, "loss": 1.4161, "step": 9975 }, { "epoch": 0.12963357010475227, "grad_norm": 0.4198802411556244, "learning_rate": 0.00017410935936261194, "loss": 1.4363, "step": 9976 }, { "epoch": 0.12964656464866814, "grad_norm": 0.39951491355895996, "learning_rate": 0.00017410675990070057, "loss": 1.5615, "step": 9977 }, { "epoch": 0.129659559192584, "grad_norm": 0.38862529397010803, "learning_rate": 0.0001741041604387892, "loss": 1.4479, "step": 9978 }, { "epoch": 0.12967255373649988, "grad_norm": 0.3623588979244232, "learning_rate": 0.0001741015609768778, "loss": 1.4353, "step": 9979 }, { "epoch": 0.12968554828041576, "grad_norm": 0.35875847935676575, "learning_rate": 0.00017409896151496641, "loss": 1.2707, "step": 9980 }, { "epoch": 0.12969854282433163, "grad_norm": 0.34811297059059143, "learning_rate": 0.000174096362053055, "loss": 1.5755, "step": 9981 }, { "epoch": 0.1297115373682475, "grad_norm": 0.40699586272239685, "learning_rate": 0.00017409376259114366, "loss": 1.6184, "step": 9982 }, { "epoch": 0.12972453191216338, "grad_norm": 0.3738786578178406, "learning_rate": 0.00017409116312923226, "loss": 1.2635, "step": 9983 }, { "epoch": 0.12973752645607925, "grad_norm": 0.29468458890914917, "learning_rate": 0.00017408856366732086, "loss": 1.6569, "step": 9984 }, { "epoch": 0.12975052099999512, "grad_norm": 0.43510788679122925, "learning_rate": 0.00017408596420540948, "loss": 1.4216, "step": 9985 }, { "epoch": 0.129763515543911, "grad_norm": 0.33560457825660706, "learning_rate": 0.0001740833647434981, "loss": 1.3781, "step": 9986 }, { "epoch": 0.12977651008782687, "grad_norm": 0.3269752562046051, "learning_rate": 0.00017408076528158673, "loss": 1.1667, "step": 9987 }, { "epoch": 0.12978950463174274, "grad_norm": 0.4033019542694092, "learning_rate": 0.00017407816581967533, "loss": 1.3924, "step": 9988 }, { "epoch": 0.12980249917565861, "grad_norm": 0.5156260132789612, "learning_rate": 0.00017407556635776395, "loss": 1.2991, "step": 9989 }, { "epoch": 0.1298154937195745, "grad_norm": 0.4047981798648834, "learning_rate": 0.00017407296689585258, "loss": 1.4773, "step": 9990 }, { "epoch": 0.12982848826349036, "grad_norm": 0.3604089915752411, "learning_rate": 0.00017407036743394118, "loss": 1.5106, "step": 9991 }, { "epoch": 0.12984148280740623, "grad_norm": 0.4189310371875763, "learning_rate": 0.0001740677679720298, "loss": 1.3032, "step": 9992 }, { "epoch": 0.1298544773513221, "grad_norm": 0.38563403487205505, "learning_rate": 0.0001740651685101184, "loss": 1.5565, "step": 9993 }, { "epoch": 0.12986747189523798, "grad_norm": 0.371124804019928, "learning_rate": 0.00017406256904820705, "loss": 1.3489, "step": 9994 }, { "epoch": 0.12988046643915385, "grad_norm": 0.40574729442596436, "learning_rate": 0.00017405996958629565, "loss": 1.3917, "step": 9995 }, { "epoch": 0.12989346098306973, "grad_norm": 0.2749800682067871, "learning_rate": 0.00017405737012438424, "loss": 1.3252, "step": 9996 }, { "epoch": 0.1299064555269856, "grad_norm": 0.3030362129211426, "learning_rate": 0.00017405477066247287, "loss": 1.2645, "step": 9997 }, { "epoch": 0.12991945007090147, "grad_norm": 0.4077836871147156, "learning_rate": 0.0001740521712005615, "loss": 1.6004, "step": 9998 }, { "epoch": 0.12993244461481734, "grad_norm": 0.47647544741630554, "learning_rate": 0.00017404957173865012, "loss": 1.4612, "step": 9999 }, { "epoch": 0.12994543915873322, "grad_norm": 0.3247867524623871, "learning_rate": 0.00017404697227673871, "loss": 1.4825, "step": 10000 }, { "epoch": 0.1299584337026491, "grad_norm": 0.3616940379142761, "learning_rate": 0.00017404437281482734, "loss": 1.299, "step": 10001 }, { "epoch": 0.12997142824656496, "grad_norm": 0.4881681501865387, "learning_rate": 0.00017404177335291596, "loss": 1.5746, "step": 10002 }, { "epoch": 0.12998442279048084, "grad_norm": 0.4217986762523651, "learning_rate": 0.00017403917389100456, "loss": 1.4649, "step": 10003 }, { "epoch": 0.1299974173343967, "grad_norm": 0.5113387703895569, "learning_rate": 0.00017403657442909318, "loss": 1.7252, "step": 10004 }, { "epoch": 0.13001041187831258, "grad_norm": 0.32327333092689514, "learning_rate": 0.0001740339749671818, "loss": 1.2406, "step": 10005 }, { "epoch": 0.13002340642222845, "grad_norm": 0.3602115213871002, "learning_rate": 0.00017403137550527043, "loss": 1.569, "step": 10006 }, { "epoch": 0.13003640096614433, "grad_norm": 0.43408840894699097, "learning_rate": 0.00017402877604335903, "loss": 1.5304, "step": 10007 }, { "epoch": 0.1300493955100602, "grad_norm": 0.47221341729164124, "learning_rate": 0.00017402617658144763, "loss": 1.5453, "step": 10008 }, { "epoch": 0.13006239005397607, "grad_norm": 0.45013290643692017, "learning_rate": 0.00017402357711953628, "loss": 1.4231, "step": 10009 }, { "epoch": 0.13007538459789195, "grad_norm": 0.32998397946357727, "learning_rate": 0.00017402097765762488, "loss": 1.3098, "step": 10010 }, { "epoch": 0.13008837914180785, "grad_norm": 0.29858359694480896, "learning_rate": 0.0001740183781957135, "loss": 1.3373, "step": 10011 }, { "epoch": 0.13010137368572372, "grad_norm": 0.45778200030326843, "learning_rate": 0.0001740157787338021, "loss": 1.4403, "step": 10012 }, { "epoch": 0.1301143682296396, "grad_norm": 0.397909939289093, "learning_rate": 0.00017401317927189072, "loss": 1.4327, "step": 10013 }, { "epoch": 0.13012736277355547, "grad_norm": 0.35794296860694885, "learning_rate": 0.00017401057980997935, "loss": 1.2409, "step": 10014 }, { "epoch": 0.13014035731747134, "grad_norm": 0.40209275484085083, "learning_rate": 0.00017400798034806795, "loss": 1.2208, "step": 10015 }, { "epoch": 0.1301533518613872, "grad_norm": 0.39884623885154724, "learning_rate": 0.00017400538088615657, "loss": 1.3237, "step": 10016 }, { "epoch": 0.13016634640530308, "grad_norm": 0.3505159020423889, "learning_rate": 0.0001740027814242452, "loss": 1.4201, "step": 10017 }, { "epoch": 0.13017934094921896, "grad_norm": 0.48346638679504395, "learning_rate": 0.00017400018196233382, "loss": 1.6036, "step": 10018 }, { "epoch": 0.13019233549313483, "grad_norm": 0.41650640964508057, "learning_rate": 0.00017399758250042242, "loss": 1.3955, "step": 10019 }, { "epoch": 0.1302053300370507, "grad_norm": 0.3803153336048126, "learning_rate": 0.00017399498303851104, "loss": 1.333, "step": 10020 }, { "epoch": 0.13021832458096658, "grad_norm": 0.4341132938861847, "learning_rate": 0.00017399238357659967, "loss": 1.4136, "step": 10021 }, { "epoch": 0.13023131912488245, "grad_norm": 0.44876110553741455, "learning_rate": 0.00017398978411468826, "loss": 1.4719, "step": 10022 }, { "epoch": 0.13024431366879832, "grad_norm": 0.43796607851982117, "learning_rate": 0.0001739871846527769, "loss": 1.5937, "step": 10023 }, { "epoch": 0.1302573082127142, "grad_norm": 0.3934550881385803, "learning_rate": 0.00017398458519086548, "loss": 1.5697, "step": 10024 }, { "epoch": 0.13027030275663007, "grad_norm": 0.3820855915546417, "learning_rate": 0.0001739819857289541, "loss": 1.476, "step": 10025 }, { "epoch": 0.13028329730054594, "grad_norm": 0.405866801738739, "learning_rate": 0.00017397938626704273, "loss": 1.3468, "step": 10026 }, { "epoch": 0.13029629184446181, "grad_norm": 0.3781905174255371, "learning_rate": 0.00017397678680513133, "loss": 1.6149, "step": 10027 }, { "epoch": 0.1303092863883777, "grad_norm": 0.40196311473846436, "learning_rate": 0.00017397418734321996, "loss": 1.502, "step": 10028 }, { "epoch": 0.13032228093229356, "grad_norm": 0.3302616775035858, "learning_rate": 0.00017397158788130858, "loss": 1.4482, "step": 10029 }, { "epoch": 0.13033527547620943, "grad_norm": 0.3582909107208252, "learning_rate": 0.0001739689884193972, "loss": 1.2684, "step": 10030 }, { "epoch": 0.1303482700201253, "grad_norm": 0.5488734841346741, "learning_rate": 0.0001739663889574858, "loss": 1.348, "step": 10031 }, { "epoch": 0.13036126456404118, "grad_norm": 0.42265790700912476, "learning_rate": 0.00017396378949557443, "loss": 1.6309, "step": 10032 }, { "epoch": 0.13037425910795705, "grad_norm": 0.3424038887023926, "learning_rate": 0.00017396119003366305, "loss": 1.6459, "step": 10033 }, { "epoch": 0.13038725365187293, "grad_norm": 0.31561824679374695, "learning_rate": 0.00017395859057175165, "loss": 1.5422, "step": 10034 }, { "epoch": 0.1304002481957888, "grad_norm": 0.3706008195877075, "learning_rate": 0.00017395599110984027, "loss": 1.654, "step": 10035 }, { "epoch": 0.13041324273970467, "grad_norm": 0.3678598999977112, "learning_rate": 0.00017395339164792887, "loss": 1.4911, "step": 10036 }, { "epoch": 0.13042623728362054, "grad_norm": 0.4187855124473572, "learning_rate": 0.0001739507921860175, "loss": 1.2834, "step": 10037 }, { "epoch": 0.13043923182753642, "grad_norm": 0.28725212812423706, "learning_rate": 0.00017394819272410612, "loss": 1.4427, "step": 10038 }, { "epoch": 0.1304522263714523, "grad_norm": 0.4345038831233978, "learning_rate": 0.00017394559326219472, "loss": 1.5042, "step": 10039 }, { "epoch": 0.13046522091536816, "grad_norm": 0.4611448645591736, "learning_rate": 0.00017394299380028337, "loss": 1.6384, "step": 10040 }, { "epoch": 0.13047821545928404, "grad_norm": 0.3396282196044922, "learning_rate": 0.00017394039433837197, "loss": 1.2994, "step": 10041 }, { "epoch": 0.1304912100031999, "grad_norm": 0.44800370931625366, "learning_rate": 0.0001739377948764606, "loss": 1.5327, "step": 10042 }, { "epoch": 0.13050420454711578, "grad_norm": 0.36406949162483215, "learning_rate": 0.0001739351954145492, "loss": 1.5999, "step": 10043 }, { "epoch": 0.13051719909103165, "grad_norm": 0.3297083377838135, "learning_rate": 0.0001739325959526378, "loss": 1.302, "step": 10044 }, { "epoch": 0.13053019363494753, "grad_norm": 0.3537190854549408, "learning_rate": 0.00017392999649072644, "loss": 1.3884, "step": 10045 }, { "epoch": 0.1305431881788634, "grad_norm": 0.388339102268219, "learning_rate": 0.00017392739702881503, "loss": 1.3715, "step": 10046 }, { "epoch": 0.13055618272277927, "grad_norm": 0.2931365370750427, "learning_rate": 0.00017392479756690366, "loss": 1.2103, "step": 10047 }, { "epoch": 0.13056917726669515, "grad_norm": 0.37645572423934937, "learning_rate": 0.00017392219810499228, "loss": 1.3214, "step": 10048 }, { "epoch": 0.13058217181061102, "grad_norm": 0.3196539878845215, "learning_rate": 0.0001739195986430809, "loss": 1.528, "step": 10049 }, { "epoch": 0.1305951663545269, "grad_norm": 0.45146259665489197, "learning_rate": 0.0001739169991811695, "loss": 1.4401, "step": 10050 }, { "epoch": 0.13060816089844277, "grad_norm": 0.3216227889060974, "learning_rate": 0.0001739143997192581, "loss": 1.5061, "step": 10051 }, { "epoch": 0.13062115544235864, "grad_norm": 0.3479582667350769, "learning_rate": 0.00017391180025734675, "loss": 1.5469, "step": 10052 }, { "epoch": 0.1306341499862745, "grad_norm": 0.3404166102409363, "learning_rate": 0.00017390920079543535, "loss": 1.4499, "step": 10053 }, { "epoch": 0.13064714453019038, "grad_norm": 0.37766242027282715, "learning_rate": 0.00017390660133352398, "loss": 1.4437, "step": 10054 }, { "epoch": 0.13066013907410626, "grad_norm": 0.2526474893093109, "learning_rate": 0.00017390400187161257, "loss": 1.2312, "step": 10055 }, { "epoch": 0.13067313361802213, "grad_norm": 0.4539259970188141, "learning_rate": 0.0001739014024097012, "loss": 1.5017, "step": 10056 }, { "epoch": 0.130686128161938, "grad_norm": 0.35707470774650574, "learning_rate": 0.00017389880294778982, "loss": 1.3938, "step": 10057 }, { "epoch": 0.13069912270585388, "grad_norm": 0.37519896030426025, "learning_rate": 0.00017389620348587842, "loss": 1.3482, "step": 10058 }, { "epoch": 0.13071211724976975, "grad_norm": 0.4118571877479553, "learning_rate": 0.00017389360402396704, "loss": 1.392, "step": 10059 }, { "epoch": 0.13072511179368562, "grad_norm": 0.29952940344810486, "learning_rate": 0.00017389100456205567, "loss": 1.1987, "step": 10060 }, { "epoch": 0.1307381063376015, "grad_norm": 0.39584389328956604, "learning_rate": 0.0001738884051001443, "loss": 1.4993, "step": 10061 }, { "epoch": 0.13075110088151737, "grad_norm": 0.30917489528656006, "learning_rate": 0.0001738858056382329, "loss": 1.1698, "step": 10062 }, { "epoch": 0.13076409542543324, "grad_norm": 0.4434426724910736, "learning_rate": 0.0001738832061763215, "loss": 1.3465, "step": 10063 }, { "epoch": 0.13077708996934911, "grad_norm": 0.4485422670841217, "learning_rate": 0.00017388060671441014, "loss": 1.5882, "step": 10064 }, { "epoch": 0.130790084513265, "grad_norm": 0.4047817289829254, "learning_rate": 0.00017387800725249874, "loss": 1.3376, "step": 10065 }, { "epoch": 0.13080307905718086, "grad_norm": 0.3567257225513458, "learning_rate": 0.00017387540779058736, "loss": 1.4853, "step": 10066 }, { "epoch": 0.13081607360109673, "grad_norm": 0.3692239820957184, "learning_rate": 0.00017387280832867596, "loss": 1.3567, "step": 10067 }, { "epoch": 0.1308290681450126, "grad_norm": 0.511049211025238, "learning_rate": 0.00017387020886676458, "loss": 1.4765, "step": 10068 }, { "epoch": 0.13084206268892848, "grad_norm": 0.40648844838142395, "learning_rate": 0.0001738676094048532, "loss": 1.3206, "step": 10069 }, { "epoch": 0.13085505723284435, "grad_norm": 0.2965278625488281, "learning_rate": 0.0001738650099429418, "loss": 1.4545, "step": 10070 }, { "epoch": 0.13086805177676022, "grad_norm": 0.3403579592704773, "learning_rate": 0.00017386241048103043, "loss": 1.6177, "step": 10071 }, { "epoch": 0.1308810463206761, "grad_norm": 0.3705838620662689, "learning_rate": 0.00017385981101911905, "loss": 1.5671, "step": 10072 }, { "epoch": 0.13089404086459197, "grad_norm": 0.43603697419166565, "learning_rate": 0.00017385721155720768, "loss": 1.4139, "step": 10073 }, { "epoch": 0.13090703540850784, "grad_norm": 0.3996585011482239, "learning_rate": 0.00017385461209529628, "loss": 1.4051, "step": 10074 }, { "epoch": 0.13092002995242372, "grad_norm": 0.44317492842674255, "learning_rate": 0.0001738520126333849, "loss": 1.5417, "step": 10075 }, { "epoch": 0.1309330244963396, "grad_norm": 0.3266716003417969, "learning_rate": 0.00017384941317147352, "loss": 1.719, "step": 10076 }, { "epoch": 0.13094601904025546, "grad_norm": 0.45574870705604553, "learning_rate": 0.00017384681370956212, "loss": 1.4382, "step": 10077 }, { "epoch": 0.13095901358417134, "grad_norm": 0.45127570629119873, "learning_rate": 0.00017384421424765075, "loss": 1.5466, "step": 10078 }, { "epoch": 0.1309720081280872, "grad_norm": 0.43713030219078064, "learning_rate": 0.00017384161478573937, "loss": 1.4455, "step": 10079 }, { "epoch": 0.13098500267200308, "grad_norm": 0.42944011092185974, "learning_rate": 0.00017383901532382797, "loss": 1.2745, "step": 10080 }, { "epoch": 0.13099799721591895, "grad_norm": 0.43258917331695557, "learning_rate": 0.0001738364158619166, "loss": 1.5097, "step": 10081 }, { "epoch": 0.13101099175983483, "grad_norm": 0.3020515441894531, "learning_rate": 0.0001738338164000052, "loss": 1.2863, "step": 10082 }, { "epoch": 0.1310239863037507, "grad_norm": 0.4114360213279724, "learning_rate": 0.00017383121693809384, "loss": 1.4895, "step": 10083 }, { "epoch": 0.13103698084766657, "grad_norm": 0.44200456142425537, "learning_rate": 0.00017382861747618244, "loss": 1.4812, "step": 10084 }, { "epoch": 0.13104997539158245, "grad_norm": 0.3922804892063141, "learning_rate": 0.00017382601801427106, "loss": 1.4677, "step": 10085 }, { "epoch": 0.13106296993549832, "grad_norm": 0.4374137818813324, "learning_rate": 0.00017382341855235966, "loss": 1.4032, "step": 10086 }, { "epoch": 0.13107596447941422, "grad_norm": 0.3786354660987854, "learning_rate": 0.00017382081909044829, "loss": 1.2245, "step": 10087 }, { "epoch": 0.1310889590233301, "grad_norm": 0.3816983699798584, "learning_rate": 0.0001738182196285369, "loss": 1.4987, "step": 10088 }, { "epoch": 0.13110195356724597, "grad_norm": 0.4352808892726898, "learning_rate": 0.0001738156201666255, "loss": 1.3087, "step": 10089 }, { "epoch": 0.13111494811116184, "grad_norm": 0.3937617242336273, "learning_rate": 0.00017381302070471413, "loss": 1.297, "step": 10090 }, { "epoch": 0.1311279426550777, "grad_norm": 0.3940543830394745, "learning_rate": 0.00017381042124280276, "loss": 1.5594, "step": 10091 }, { "epoch": 0.13114093719899358, "grad_norm": 0.3190916180610657, "learning_rate": 0.00017380782178089135, "loss": 1.4878, "step": 10092 }, { "epoch": 0.13115393174290946, "grad_norm": 0.3550054132938385, "learning_rate": 0.00017380522231897998, "loss": 1.3759, "step": 10093 }, { "epoch": 0.13116692628682533, "grad_norm": 0.3647286593914032, "learning_rate": 0.00017380262285706858, "loss": 1.4697, "step": 10094 }, { "epoch": 0.1311799208307412, "grad_norm": 0.43294957280158997, "learning_rate": 0.00017380002339515723, "loss": 1.2944, "step": 10095 }, { "epoch": 0.13119291537465708, "grad_norm": 0.4288536012172699, "learning_rate": 0.00017379742393324582, "loss": 1.322, "step": 10096 }, { "epoch": 0.13120590991857295, "grad_norm": 0.4165160357952118, "learning_rate": 0.00017379482447133445, "loss": 1.4039, "step": 10097 }, { "epoch": 0.13121890446248882, "grad_norm": 0.3345651626586914, "learning_rate": 0.00017379222500942305, "loss": 1.3406, "step": 10098 }, { "epoch": 0.1312318990064047, "grad_norm": 0.45656460523605347, "learning_rate": 0.00017378962554751167, "loss": 1.571, "step": 10099 }, { "epoch": 0.13124489355032057, "grad_norm": 0.45965951681137085, "learning_rate": 0.0001737870260856003, "loss": 1.5504, "step": 10100 }, { "epoch": 0.13125788809423644, "grad_norm": 0.3732752799987793, "learning_rate": 0.0001737844266236889, "loss": 1.5779, "step": 10101 }, { "epoch": 0.13127088263815231, "grad_norm": 0.42539912462234497, "learning_rate": 0.00017378182716177752, "loss": 1.4566, "step": 10102 }, { "epoch": 0.1312838771820682, "grad_norm": 0.3683663010597229, "learning_rate": 0.00017377922769986614, "loss": 1.5273, "step": 10103 }, { "epoch": 0.13129687172598406, "grad_norm": 0.4218456447124481, "learning_rate": 0.00017377662823795477, "loss": 1.4019, "step": 10104 }, { "epoch": 0.13130986626989993, "grad_norm": 0.36343470215797424, "learning_rate": 0.00017377402877604336, "loss": 1.4652, "step": 10105 }, { "epoch": 0.1313228608138158, "grad_norm": 0.37586092948913574, "learning_rate": 0.00017377142931413196, "loss": 1.7567, "step": 10106 }, { "epoch": 0.13133585535773168, "grad_norm": 0.3357359766960144, "learning_rate": 0.0001737688298522206, "loss": 1.46, "step": 10107 }, { "epoch": 0.13134884990164755, "grad_norm": 0.41328558325767517, "learning_rate": 0.0001737662303903092, "loss": 1.342, "step": 10108 }, { "epoch": 0.13136184444556342, "grad_norm": 0.40142837166786194, "learning_rate": 0.00017376363092839783, "loss": 1.4464, "step": 10109 }, { "epoch": 0.1313748389894793, "grad_norm": 0.3585902452468872, "learning_rate": 0.00017376103146648643, "loss": 1.4277, "step": 10110 }, { "epoch": 0.13138783353339517, "grad_norm": 0.4308318495750427, "learning_rate": 0.00017375843200457506, "loss": 1.403, "step": 10111 }, { "epoch": 0.13140082807731104, "grad_norm": 0.40160951018333435, "learning_rate": 0.00017375583254266368, "loss": 1.5691, "step": 10112 }, { "epoch": 0.13141382262122692, "grad_norm": 0.3902919590473175, "learning_rate": 0.00017375323308075228, "loss": 1.5813, "step": 10113 }, { "epoch": 0.1314268171651428, "grad_norm": 0.39474159479141235, "learning_rate": 0.00017375063361884093, "loss": 1.4, "step": 10114 }, { "epoch": 0.13143981170905866, "grad_norm": 0.43503084778785706, "learning_rate": 0.00017374803415692953, "loss": 1.3456, "step": 10115 }, { "epoch": 0.13145280625297454, "grad_norm": 0.33803674578666687, "learning_rate": 0.00017374543469501815, "loss": 1.3012, "step": 10116 }, { "epoch": 0.1314658007968904, "grad_norm": 0.35437917709350586, "learning_rate": 0.00017374283523310675, "loss": 1.451, "step": 10117 }, { "epoch": 0.13147879534080628, "grad_norm": 0.4315512478351593, "learning_rate": 0.00017374023577119537, "loss": 1.6047, "step": 10118 }, { "epoch": 0.13149178988472215, "grad_norm": 0.39365872740745544, "learning_rate": 0.000173737636309284, "loss": 1.4927, "step": 10119 }, { "epoch": 0.13150478442863803, "grad_norm": 0.2758640646934509, "learning_rate": 0.0001737350368473726, "loss": 1.3181, "step": 10120 }, { "epoch": 0.1315177789725539, "grad_norm": 0.374590665102005, "learning_rate": 0.00017373243738546122, "loss": 1.4964, "step": 10121 }, { "epoch": 0.13153077351646977, "grad_norm": 0.3375817835330963, "learning_rate": 0.00017372983792354984, "loss": 1.3943, "step": 10122 }, { "epoch": 0.13154376806038565, "grad_norm": 0.3470326364040375, "learning_rate": 0.00017372723846163844, "loss": 1.3671, "step": 10123 }, { "epoch": 0.13155676260430152, "grad_norm": 0.39509227871894836, "learning_rate": 0.00017372463899972707, "loss": 1.4218, "step": 10124 }, { "epoch": 0.1315697571482174, "grad_norm": 0.398009717464447, "learning_rate": 0.00017372203953781566, "loss": 1.3567, "step": 10125 }, { "epoch": 0.13158275169213327, "grad_norm": 0.39217978715896606, "learning_rate": 0.00017371944007590431, "loss": 1.5198, "step": 10126 }, { "epoch": 0.13159574623604914, "grad_norm": 0.4248711168766022, "learning_rate": 0.0001737168406139929, "loss": 1.5381, "step": 10127 }, { "epoch": 0.131608740779965, "grad_norm": 0.40015125274658203, "learning_rate": 0.00017371424115208154, "loss": 1.1266, "step": 10128 }, { "epoch": 0.13162173532388088, "grad_norm": 0.42026111483573914, "learning_rate": 0.00017371164169017013, "loss": 1.575, "step": 10129 }, { "epoch": 0.13163472986779676, "grad_norm": 0.3741748332977295, "learning_rate": 0.00017370904222825876, "loss": 1.4243, "step": 10130 }, { "epoch": 0.13164772441171263, "grad_norm": 0.43683138489723206, "learning_rate": 0.00017370644276634738, "loss": 1.3241, "step": 10131 }, { "epoch": 0.1316607189556285, "grad_norm": 0.4618951976299286, "learning_rate": 0.00017370384330443598, "loss": 1.4304, "step": 10132 }, { "epoch": 0.13167371349954438, "grad_norm": 0.4014121890068054, "learning_rate": 0.0001737012438425246, "loss": 1.5852, "step": 10133 }, { "epoch": 0.13168670804346025, "grad_norm": 0.365263968706131, "learning_rate": 0.00017369864438061323, "loss": 1.6531, "step": 10134 }, { "epoch": 0.13169970258737612, "grad_norm": 0.27088990807533264, "learning_rate": 0.00017369604491870183, "loss": 1.2816, "step": 10135 }, { "epoch": 0.131712697131292, "grad_norm": 0.41688206791877747, "learning_rate": 0.00017369344545679045, "loss": 1.3097, "step": 10136 }, { "epoch": 0.13172569167520787, "grad_norm": 0.40518784523010254, "learning_rate": 0.00017369084599487905, "loss": 1.3726, "step": 10137 }, { "epoch": 0.13173868621912374, "grad_norm": 0.38007235527038574, "learning_rate": 0.0001736882465329677, "loss": 1.3544, "step": 10138 }, { "epoch": 0.1317516807630396, "grad_norm": 0.40002375841140747, "learning_rate": 0.0001736856470710563, "loss": 1.6149, "step": 10139 }, { "epoch": 0.1317646753069555, "grad_norm": 0.36495813727378845, "learning_rate": 0.00017368304760914492, "loss": 1.4099, "step": 10140 }, { "epoch": 0.13177766985087136, "grad_norm": 0.33296138048171997, "learning_rate": 0.00017368044814723352, "loss": 1.3445, "step": 10141 }, { "epoch": 0.13179066439478723, "grad_norm": 0.5464882254600525, "learning_rate": 0.00017367784868532214, "loss": 1.4817, "step": 10142 }, { "epoch": 0.1318036589387031, "grad_norm": 0.3261016011238098, "learning_rate": 0.00017367524922341077, "loss": 1.3142, "step": 10143 }, { "epoch": 0.13181665348261898, "grad_norm": 0.47110143303871155, "learning_rate": 0.00017367264976149937, "loss": 1.3247, "step": 10144 }, { "epoch": 0.13182964802653485, "grad_norm": 0.3830544054508209, "learning_rate": 0.000173670050299588, "loss": 1.5533, "step": 10145 }, { "epoch": 0.13184264257045072, "grad_norm": 0.3269497752189636, "learning_rate": 0.00017366745083767661, "loss": 1.4121, "step": 10146 }, { "epoch": 0.1318556371143666, "grad_norm": 0.35049694776535034, "learning_rate": 0.0001736648513757652, "loss": 1.1665, "step": 10147 }, { "epoch": 0.13186863165828247, "grad_norm": 0.4158876836299896, "learning_rate": 0.00017366225191385384, "loss": 1.4094, "step": 10148 }, { "epoch": 0.13188162620219834, "grad_norm": 0.43320849537849426, "learning_rate": 0.00017365965245194246, "loss": 1.5473, "step": 10149 }, { "epoch": 0.13189462074611422, "grad_norm": 0.35757991671562195, "learning_rate": 0.00017365705299003109, "loss": 1.436, "step": 10150 }, { "epoch": 0.1319076152900301, "grad_norm": 0.4002746343612671, "learning_rate": 0.00017365445352811968, "loss": 1.3635, "step": 10151 }, { "epoch": 0.13192060983394596, "grad_norm": 0.29216960072517395, "learning_rate": 0.0001736518540662083, "loss": 1.3389, "step": 10152 }, { "epoch": 0.13193360437786184, "grad_norm": 0.325194776058197, "learning_rate": 0.00017364925460429693, "loss": 1.1925, "step": 10153 }, { "epoch": 0.1319465989217777, "grad_norm": 0.5074470043182373, "learning_rate": 0.00017364665514238553, "loss": 1.432, "step": 10154 }, { "epoch": 0.13195959346569358, "grad_norm": 0.451858788728714, "learning_rate": 0.00017364405568047415, "loss": 1.3861, "step": 10155 }, { "epoch": 0.13197258800960945, "grad_norm": 0.3618403971195221, "learning_rate": 0.00017364145621856275, "loss": 1.6644, "step": 10156 }, { "epoch": 0.13198558255352533, "grad_norm": 0.3296149671077728, "learning_rate": 0.0001736388567566514, "loss": 1.3162, "step": 10157 }, { "epoch": 0.1319985770974412, "grad_norm": 0.34646278619766235, "learning_rate": 0.00017363625729474, "loss": 1.5117, "step": 10158 }, { "epoch": 0.13201157164135707, "grad_norm": 0.3559836745262146, "learning_rate": 0.00017363365783282862, "loss": 1.3612, "step": 10159 }, { "epoch": 0.13202456618527295, "grad_norm": 0.4512968957424164, "learning_rate": 0.00017363105837091722, "loss": 1.4755, "step": 10160 }, { "epoch": 0.13203756072918882, "grad_norm": 0.38232871890068054, "learning_rate": 0.00017362845890900585, "loss": 1.4515, "step": 10161 }, { "epoch": 0.1320505552731047, "grad_norm": 0.38601744174957275, "learning_rate": 0.00017362585944709447, "loss": 1.4488, "step": 10162 }, { "epoch": 0.13206354981702056, "grad_norm": 0.5873156189918518, "learning_rate": 0.00017362325998518307, "loss": 1.5512, "step": 10163 }, { "epoch": 0.13207654436093647, "grad_norm": 0.4278324842453003, "learning_rate": 0.0001736206605232717, "loss": 1.517, "step": 10164 }, { "epoch": 0.13208953890485234, "grad_norm": 0.3717762231826782, "learning_rate": 0.00017361806106136032, "loss": 1.3807, "step": 10165 }, { "epoch": 0.1321025334487682, "grad_norm": 0.3839687407016754, "learning_rate": 0.00017361546159944891, "loss": 1.5757, "step": 10166 }, { "epoch": 0.13211552799268408, "grad_norm": 0.3228435516357422, "learning_rate": 0.00017361286213753754, "loss": 1.4229, "step": 10167 }, { "epoch": 0.13212852253659996, "grad_norm": 0.312141090631485, "learning_rate": 0.00017361026267562614, "loss": 1.2159, "step": 10168 }, { "epoch": 0.13214151708051583, "grad_norm": 0.46125009655952454, "learning_rate": 0.0001736076632137148, "loss": 1.3299, "step": 10169 }, { "epoch": 0.1321545116244317, "grad_norm": 0.2772444486618042, "learning_rate": 0.00017360506375180339, "loss": 1.2596, "step": 10170 }, { "epoch": 0.13216750616834758, "grad_norm": 0.4322095513343811, "learning_rate": 0.000173602464289892, "loss": 1.5936, "step": 10171 }, { "epoch": 0.13218050071226345, "grad_norm": 0.31296905875205994, "learning_rate": 0.0001735998648279806, "loss": 1.4962, "step": 10172 }, { "epoch": 0.13219349525617932, "grad_norm": 0.44079339504241943, "learning_rate": 0.00017359726536606923, "loss": 1.3271, "step": 10173 }, { "epoch": 0.1322064898000952, "grad_norm": 0.4436073899269104, "learning_rate": 0.00017359466590415786, "loss": 1.3907, "step": 10174 }, { "epoch": 0.13221948434401107, "grad_norm": 0.3870185315608978, "learning_rate": 0.00017359206644224645, "loss": 1.4656, "step": 10175 }, { "epoch": 0.13223247888792694, "grad_norm": 0.370878666639328, "learning_rate": 0.00017358946698033508, "loss": 1.5911, "step": 10176 }, { "epoch": 0.1322454734318428, "grad_norm": 0.43744736909866333, "learning_rate": 0.0001735868675184237, "loss": 1.3463, "step": 10177 }, { "epoch": 0.1322584679757587, "grad_norm": 0.4157949984073639, "learning_rate": 0.0001735842680565123, "loss": 1.3609, "step": 10178 }, { "epoch": 0.13227146251967456, "grad_norm": 0.4446072280406952, "learning_rate": 0.00017358166859460092, "loss": 1.4509, "step": 10179 }, { "epoch": 0.13228445706359043, "grad_norm": 0.8006092309951782, "learning_rate": 0.00017357906913268952, "loss": 1.3285, "step": 10180 }, { "epoch": 0.1322974516075063, "grad_norm": 0.48992887139320374, "learning_rate": 0.00017357646967077817, "loss": 1.3868, "step": 10181 }, { "epoch": 0.13231044615142218, "grad_norm": 0.38604074716567993, "learning_rate": 0.00017357387020886677, "loss": 1.4719, "step": 10182 }, { "epoch": 0.13232344069533805, "grad_norm": 0.4488598704338074, "learning_rate": 0.0001735712707469554, "loss": 1.5205, "step": 10183 }, { "epoch": 0.13233643523925392, "grad_norm": 0.37880659103393555, "learning_rate": 0.000173568671285044, "loss": 1.5112, "step": 10184 }, { "epoch": 0.1323494297831698, "grad_norm": 0.43228811025619507, "learning_rate": 0.00017356607182313262, "loss": 1.4397, "step": 10185 }, { "epoch": 0.13236242432708567, "grad_norm": 0.44876420497894287, "learning_rate": 0.00017356347236122124, "loss": 1.4611, "step": 10186 }, { "epoch": 0.13237541887100154, "grad_norm": 0.33343929052352905, "learning_rate": 0.00017356087289930984, "loss": 1.5622, "step": 10187 }, { "epoch": 0.13238841341491742, "grad_norm": 0.3798485994338989, "learning_rate": 0.0001735582734373985, "loss": 1.3399, "step": 10188 }, { "epoch": 0.1324014079588333, "grad_norm": 0.47137361764907837, "learning_rate": 0.0001735556739754871, "loss": 1.564, "step": 10189 }, { "epoch": 0.13241440250274916, "grad_norm": 0.3483763635158539, "learning_rate": 0.00017355307451357569, "loss": 1.2146, "step": 10190 }, { "epoch": 0.13242739704666504, "grad_norm": 0.43212610483169556, "learning_rate": 0.0001735504750516643, "loss": 1.5085, "step": 10191 }, { "epoch": 0.1324403915905809, "grad_norm": 0.351666659116745, "learning_rate": 0.00017354787558975293, "loss": 1.3518, "step": 10192 }, { "epoch": 0.13245338613449678, "grad_norm": 0.41554951667785645, "learning_rate": 0.00017354527612784156, "loss": 1.467, "step": 10193 }, { "epoch": 0.13246638067841265, "grad_norm": 0.2740756571292877, "learning_rate": 0.00017354267666593016, "loss": 1.1586, "step": 10194 }, { "epoch": 0.13247937522232853, "grad_norm": 0.5781879425048828, "learning_rate": 0.00017354007720401878, "loss": 1.4672, "step": 10195 }, { "epoch": 0.1324923697662444, "grad_norm": 0.3172120451927185, "learning_rate": 0.0001735374777421074, "loss": 1.171, "step": 10196 }, { "epoch": 0.13250536431016027, "grad_norm": 0.45059317350387573, "learning_rate": 0.000173534878280196, "loss": 1.3814, "step": 10197 }, { "epoch": 0.13251835885407615, "grad_norm": 0.5048568248748779, "learning_rate": 0.00017353227881828463, "loss": 1.4982, "step": 10198 }, { "epoch": 0.13253135339799202, "grad_norm": 0.3370574116706848, "learning_rate": 0.00017352967935637322, "loss": 1.3753, "step": 10199 }, { "epoch": 0.1325443479419079, "grad_norm": 0.39954623579978943, "learning_rate": 0.00017352707989446188, "loss": 1.468, "step": 10200 }, { "epoch": 0.13255734248582376, "grad_norm": 0.3422948718070984, "learning_rate": 0.00017352448043255047, "loss": 1.4215, "step": 10201 }, { "epoch": 0.13257033702973964, "grad_norm": 0.3474291265010834, "learning_rate": 0.00017352188097063907, "loss": 1.3762, "step": 10202 }, { "epoch": 0.1325833315736555, "grad_norm": 0.34193500876426697, "learning_rate": 0.0001735192815087277, "loss": 1.3661, "step": 10203 }, { "epoch": 0.13259632611757138, "grad_norm": 0.4035217761993408, "learning_rate": 0.00017351668204681632, "loss": 1.4916, "step": 10204 }, { "epoch": 0.13260932066148726, "grad_norm": 0.43940484523773193, "learning_rate": 0.00017351408258490494, "loss": 1.4685, "step": 10205 }, { "epoch": 0.13262231520540313, "grad_norm": 0.3900766670703888, "learning_rate": 0.00017351148312299354, "loss": 1.4861, "step": 10206 }, { "epoch": 0.132635309749319, "grad_norm": 0.373460590839386, "learning_rate": 0.00017350888366108217, "loss": 1.3934, "step": 10207 }, { "epoch": 0.13264830429323488, "grad_norm": 0.38645410537719727, "learning_rate": 0.0001735062841991708, "loss": 1.3767, "step": 10208 }, { "epoch": 0.13266129883715075, "grad_norm": 0.5162149667739868, "learning_rate": 0.0001735036847372594, "loss": 1.5425, "step": 10209 }, { "epoch": 0.13267429338106662, "grad_norm": 0.46621623635292053, "learning_rate": 0.000173501085275348, "loss": 1.4586, "step": 10210 }, { "epoch": 0.1326872879249825, "grad_norm": 0.39738011360168457, "learning_rate": 0.0001734984858134366, "loss": 1.4309, "step": 10211 }, { "epoch": 0.13270028246889837, "grad_norm": 0.45434921979904175, "learning_rate": 0.00017349588635152526, "loss": 1.5152, "step": 10212 }, { "epoch": 0.13271327701281424, "grad_norm": 0.3409869372844696, "learning_rate": 0.00017349328688961386, "loss": 1.3408, "step": 10213 }, { "epoch": 0.1327262715567301, "grad_norm": 0.36123141646385193, "learning_rate": 0.00017349068742770246, "loss": 1.444, "step": 10214 }, { "epoch": 0.132739266100646, "grad_norm": 0.4403824806213379, "learning_rate": 0.00017348808796579108, "loss": 1.4582, "step": 10215 }, { "epoch": 0.13275226064456186, "grad_norm": 0.532588005065918, "learning_rate": 0.0001734854885038797, "loss": 1.6039, "step": 10216 }, { "epoch": 0.13276525518847773, "grad_norm": 0.35847392678260803, "learning_rate": 0.00017348288904196833, "loss": 1.4615, "step": 10217 }, { "epoch": 0.1327782497323936, "grad_norm": 0.3607536852359772, "learning_rate": 0.00017348028958005693, "loss": 1.292, "step": 10218 }, { "epoch": 0.13279124427630948, "grad_norm": 0.4733213782310486, "learning_rate": 0.00017347769011814555, "loss": 1.6282, "step": 10219 }, { "epoch": 0.13280423882022535, "grad_norm": 0.4726121127605438, "learning_rate": 0.00017347509065623418, "loss": 1.3695, "step": 10220 }, { "epoch": 0.13281723336414122, "grad_norm": 0.3787165582180023, "learning_rate": 0.00017347249119432277, "loss": 1.4199, "step": 10221 }, { "epoch": 0.1328302279080571, "grad_norm": 0.38981640338897705, "learning_rate": 0.0001734698917324114, "loss": 1.3939, "step": 10222 }, { "epoch": 0.13284322245197297, "grad_norm": 0.3238557279109955, "learning_rate": 0.00017346729227050002, "loss": 1.292, "step": 10223 }, { "epoch": 0.13285621699588884, "grad_norm": 0.18271692097187042, "learning_rate": 0.00017346469280858865, "loss": 1.1872, "step": 10224 }, { "epoch": 0.13286921153980472, "grad_norm": 0.3624701499938965, "learning_rate": 0.00017346209334667724, "loss": 1.5763, "step": 10225 }, { "epoch": 0.1328822060837206, "grad_norm": 0.3814622163772583, "learning_rate": 0.00017345949388476587, "loss": 1.4388, "step": 10226 }, { "epoch": 0.13289520062763646, "grad_norm": 0.4015159010887146, "learning_rate": 0.0001734568944228545, "loss": 1.4904, "step": 10227 }, { "epoch": 0.13290819517155233, "grad_norm": 0.4241006076335907, "learning_rate": 0.0001734542949609431, "loss": 1.3432, "step": 10228 }, { "epoch": 0.1329211897154682, "grad_norm": 0.4571399986743927, "learning_rate": 0.00017345169549903172, "loss": 1.4652, "step": 10229 }, { "epoch": 0.13293418425938408, "grad_norm": 0.6004480123519897, "learning_rate": 0.0001734490960371203, "loss": 1.4914, "step": 10230 }, { "epoch": 0.13294717880329995, "grad_norm": 0.3231486976146698, "learning_rate": 0.00017344649657520894, "loss": 1.4043, "step": 10231 }, { "epoch": 0.13296017334721583, "grad_norm": 0.45555949211120605, "learning_rate": 0.00017344389711329756, "loss": 1.5629, "step": 10232 }, { "epoch": 0.1329731678911317, "grad_norm": 0.3109978139400482, "learning_rate": 0.00017344129765138616, "loss": 1.3871, "step": 10233 }, { "epoch": 0.13298616243504757, "grad_norm": 0.4051888883113861, "learning_rate": 0.00017343869818947478, "loss": 1.3399, "step": 10234 }, { "epoch": 0.13299915697896345, "grad_norm": 0.40826016664505005, "learning_rate": 0.0001734360987275634, "loss": 1.5187, "step": 10235 }, { "epoch": 0.13301215152287932, "grad_norm": 0.32955506443977356, "learning_rate": 0.00017343349926565203, "loss": 1.4244, "step": 10236 }, { "epoch": 0.1330251460667952, "grad_norm": 0.3049336075782776, "learning_rate": 0.00017343089980374063, "loss": 1.3437, "step": 10237 }, { "epoch": 0.13303814061071106, "grad_norm": 0.3286857008934021, "learning_rate": 0.00017342830034182925, "loss": 1.2963, "step": 10238 }, { "epoch": 0.13305113515462694, "grad_norm": 0.3328917920589447, "learning_rate": 0.00017342570087991788, "loss": 1.4539, "step": 10239 }, { "epoch": 0.13306412969854284, "grad_norm": 0.4485374093055725, "learning_rate": 0.00017342310141800648, "loss": 1.4527, "step": 10240 }, { "epoch": 0.1330771242424587, "grad_norm": 0.398308128118515, "learning_rate": 0.0001734205019560951, "loss": 1.496, "step": 10241 }, { "epoch": 0.13309011878637458, "grad_norm": 0.4124026894569397, "learning_rate": 0.0001734179024941837, "loss": 1.4088, "step": 10242 }, { "epoch": 0.13310311333029046, "grad_norm": 0.32842618227005005, "learning_rate": 0.00017341530303227235, "loss": 1.4355, "step": 10243 }, { "epoch": 0.13311610787420633, "grad_norm": 0.372936487197876, "learning_rate": 0.00017341270357036095, "loss": 1.4122, "step": 10244 }, { "epoch": 0.1331291024181222, "grad_norm": 0.3864259719848633, "learning_rate": 0.00017341010410844954, "loss": 1.4042, "step": 10245 }, { "epoch": 0.13314209696203808, "grad_norm": 0.415939062833786, "learning_rate": 0.00017340750464653817, "loss": 1.2855, "step": 10246 }, { "epoch": 0.13315509150595395, "grad_norm": 0.37138351798057556, "learning_rate": 0.0001734049051846268, "loss": 1.7111, "step": 10247 }, { "epoch": 0.13316808604986982, "grad_norm": 0.41065409779548645, "learning_rate": 0.00017340230572271542, "loss": 1.4176, "step": 10248 }, { "epoch": 0.1331810805937857, "grad_norm": 0.3087286055088043, "learning_rate": 0.00017339970626080402, "loss": 1.465, "step": 10249 }, { "epoch": 0.13319407513770157, "grad_norm": 0.38127902150154114, "learning_rate": 0.00017339710679889264, "loss": 1.489, "step": 10250 }, { "epoch": 0.13320706968161744, "grad_norm": 0.47871479392051697, "learning_rate": 0.00017339450733698126, "loss": 1.4475, "step": 10251 }, { "epoch": 0.1332200642255333, "grad_norm": 0.39952757954597473, "learning_rate": 0.00017339190787506986, "loss": 1.4572, "step": 10252 }, { "epoch": 0.1332330587694492, "grad_norm": 0.4555143415927887, "learning_rate": 0.00017338930841315849, "loss": 1.2898, "step": 10253 }, { "epoch": 0.13324605331336506, "grad_norm": 0.35699325799942017, "learning_rate": 0.00017338670895124708, "loss": 1.4545, "step": 10254 }, { "epoch": 0.13325904785728093, "grad_norm": 0.4163183569908142, "learning_rate": 0.00017338410948933573, "loss": 1.2866, "step": 10255 }, { "epoch": 0.1332720424011968, "grad_norm": 0.35340219736099243, "learning_rate": 0.00017338151002742433, "loss": 1.4647, "step": 10256 }, { "epoch": 0.13328503694511268, "grad_norm": 0.4699966013431549, "learning_rate": 0.00017337891056551293, "loss": 1.5466, "step": 10257 }, { "epoch": 0.13329803148902855, "grad_norm": 0.5489582419395447, "learning_rate": 0.00017337631110360155, "loss": 1.6783, "step": 10258 }, { "epoch": 0.13331102603294442, "grad_norm": 0.41623225808143616, "learning_rate": 0.00017337371164169018, "loss": 1.3705, "step": 10259 }, { "epoch": 0.1333240205768603, "grad_norm": 0.4936605393886566, "learning_rate": 0.0001733711121797788, "loss": 1.3937, "step": 10260 }, { "epoch": 0.13333701512077617, "grad_norm": 0.3930872082710266, "learning_rate": 0.0001733685127178674, "loss": 1.3575, "step": 10261 }, { "epoch": 0.13335000966469204, "grad_norm": 0.34105145931243896, "learning_rate": 0.00017336591325595602, "loss": 1.2825, "step": 10262 }, { "epoch": 0.13336300420860792, "grad_norm": 0.4458957314491272, "learning_rate": 0.00017336331379404465, "loss": 1.4232, "step": 10263 }, { "epoch": 0.1333759987525238, "grad_norm": 0.45651087164878845, "learning_rate": 0.00017336071433213325, "loss": 1.5728, "step": 10264 }, { "epoch": 0.13338899329643966, "grad_norm": 0.4242545962333679, "learning_rate": 0.00017335811487022187, "loss": 1.476, "step": 10265 }, { "epoch": 0.13340198784035553, "grad_norm": 0.4469432532787323, "learning_rate": 0.0001733555154083105, "loss": 1.4461, "step": 10266 }, { "epoch": 0.1334149823842714, "grad_norm": 0.35513532161712646, "learning_rate": 0.00017335291594639912, "loss": 1.3621, "step": 10267 }, { "epoch": 0.13342797692818728, "grad_norm": 0.4214397668838501, "learning_rate": 0.00017335031648448772, "loss": 1.4888, "step": 10268 }, { "epoch": 0.13344097147210315, "grad_norm": 0.4333958625793457, "learning_rate": 0.00017334771702257632, "loss": 1.3795, "step": 10269 }, { "epoch": 0.13345396601601903, "grad_norm": 0.43227511644363403, "learning_rate": 0.00017334511756066497, "loss": 1.3898, "step": 10270 }, { "epoch": 0.1334669605599349, "grad_norm": 0.38267451524734497, "learning_rate": 0.00017334251809875356, "loss": 1.5822, "step": 10271 }, { "epoch": 0.13347995510385077, "grad_norm": 0.36044323444366455, "learning_rate": 0.0001733399186368422, "loss": 1.2313, "step": 10272 }, { "epoch": 0.13349294964776665, "grad_norm": 0.37287914752960205, "learning_rate": 0.00017333731917493079, "loss": 1.4127, "step": 10273 }, { "epoch": 0.13350594419168252, "grad_norm": 0.441448837518692, "learning_rate": 0.0001733347197130194, "loss": 1.5315, "step": 10274 }, { "epoch": 0.1335189387355984, "grad_norm": 0.4397992491722107, "learning_rate": 0.00017333212025110803, "loss": 1.3751, "step": 10275 }, { "epoch": 0.13353193327951426, "grad_norm": 0.48151400685310364, "learning_rate": 0.00017332952078919663, "loss": 1.5896, "step": 10276 }, { "epoch": 0.13354492782343014, "grad_norm": 0.3794490098953247, "learning_rate": 0.00017332692132728526, "loss": 1.4475, "step": 10277 }, { "epoch": 0.133557922367346, "grad_norm": 0.4280606210231781, "learning_rate": 0.00017332432186537388, "loss": 1.5373, "step": 10278 }, { "epoch": 0.13357091691126188, "grad_norm": 0.4502997100353241, "learning_rate": 0.0001733217224034625, "loss": 1.5484, "step": 10279 }, { "epoch": 0.13358391145517776, "grad_norm": 0.37605345249176025, "learning_rate": 0.0001733191229415511, "loss": 1.5437, "step": 10280 }, { "epoch": 0.13359690599909363, "grad_norm": 0.2983585000038147, "learning_rate": 0.00017331652347963973, "loss": 1.2876, "step": 10281 }, { "epoch": 0.1336099005430095, "grad_norm": 0.5259339213371277, "learning_rate": 0.00017331392401772835, "loss": 1.4547, "step": 10282 }, { "epoch": 0.13362289508692538, "grad_norm": 0.49637162685394287, "learning_rate": 0.00017331132455581695, "loss": 1.6262, "step": 10283 }, { "epoch": 0.13363588963084125, "grad_norm": 0.3729323744773865, "learning_rate": 0.00017330872509390557, "loss": 1.4826, "step": 10284 }, { "epoch": 0.13364888417475712, "grad_norm": 0.34967002272605896, "learning_rate": 0.00017330612563199417, "loss": 1.3662, "step": 10285 }, { "epoch": 0.133661878718673, "grad_norm": 0.3983106315135956, "learning_rate": 0.0001733035261700828, "loss": 1.5116, "step": 10286 }, { "epoch": 0.13367487326258887, "grad_norm": 0.403053343296051, "learning_rate": 0.00017330092670817142, "loss": 1.4737, "step": 10287 }, { "epoch": 0.13368786780650474, "grad_norm": 0.4467945396900177, "learning_rate": 0.00017329832724626002, "loss": 1.3548, "step": 10288 }, { "epoch": 0.1337008623504206, "grad_norm": 0.4631403684616089, "learning_rate": 0.00017329572778434864, "loss": 1.3214, "step": 10289 }, { "epoch": 0.13371385689433649, "grad_norm": 0.5224770307540894, "learning_rate": 0.00017329312832243727, "loss": 1.3509, "step": 10290 }, { "epoch": 0.13372685143825236, "grad_norm": 0.4467334449291229, "learning_rate": 0.0001732905288605259, "loss": 1.597, "step": 10291 }, { "epoch": 0.13373984598216823, "grad_norm": 0.4227731227874756, "learning_rate": 0.0001732879293986145, "loss": 1.5186, "step": 10292 }, { "epoch": 0.1337528405260841, "grad_norm": 0.4261920750141144, "learning_rate": 0.0001732853299367031, "loss": 1.7286, "step": 10293 }, { "epoch": 0.13376583506999998, "grad_norm": 0.41498351097106934, "learning_rate": 0.00017328273047479174, "loss": 1.4786, "step": 10294 }, { "epoch": 0.13377882961391585, "grad_norm": 0.39950403571128845, "learning_rate": 0.00017328013101288033, "loss": 1.6114, "step": 10295 }, { "epoch": 0.13379182415783172, "grad_norm": 0.38255569338798523, "learning_rate": 0.00017327753155096896, "loss": 1.5391, "step": 10296 }, { "epoch": 0.1338048187017476, "grad_norm": 0.3950129449367523, "learning_rate": 0.00017327493208905758, "loss": 1.6083, "step": 10297 }, { "epoch": 0.13381781324566347, "grad_norm": 0.4778757095336914, "learning_rate": 0.00017327233262714618, "loss": 1.3761, "step": 10298 }, { "epoch": 0.13383080778957934, "grad_norm": 0.4529207646846771, "learning_rate": 0.0001732697331652348, "loss": 1.5515, "step": 10299 }, { "epoch": 0.13384380233349522, "grad_norm": 0.38711661100387573, "learning_rate": 0.0001732671337033234, "loss": 1.2721, "step": 10300 }, { "epoch": 0.1338567968774111, "grad_norm": 0.3175511360168457, "learning_rate": 0.00017326453424141205, "loss": 1.4429, "step": 10301 }, { "epoch": 0.13386979142132696, "grad_norm": 0.36104458570480347, "learning_rate": 0.00017326193477950065, "loss": 1.4208, "step": 10302 }, { "epoch": 0.13388278596524283, "grad_norm": 0.3169723451137543, "learning_rate": 0.00017325933531758928, "loss": 1.4414, "step": 10303 }, { "epoch": 0.1338957805091587, "grad_norm": 0.4668019711971283, "learning_rate": 0.00017325673585567787, "loss": 1.3821, "step": 10304 }, { "epoch": 0.13390877505307458, "grad_norm": 0.319766640663147, "learning_rate": 0.0001732541363937665, "loss": 1.3179, "step": 10305 }, { "epoch": 0.13392176959699045, "grad_norm": 0.46580541133880615, "learning_rate": 0.00017325153693185512, "loss": 1.4061, "step": 10306 }, { "epoch": 0.13393476414090633, "grad_norm": 0.41201043128967285, "learning_rate": 0.00017324893746994372, "loss": 1.4647, "step": 10307 }, { "epoch": 0.1339477586848222, "grad_norm": 0.4252960681915283, "learning_rate": 0.00017324633800803234, "loss": 1.3306, "step": 10308 }, { "epoch": 0.13396075322873807, "grad_norm": 0.5184053778648376, "learning_rate": 0.00017324373854612097, "loss": 1.3388, "step": 10309 }, { "epoch": 0.13397374777265394, "grad_norm": 0.380666047334671, "learning_rate": 0.0001732411390842096, "loss": 1.3891, "step": 10310 }, { "epoch": 0.13398674231656982, "grad_norm": 0.41391023993492126, "learning_rate": 0.0001732385396222982, "loss": 1.7379, "step": 10311 }, { "epoch": 0.1339997368604857, "grad_norm": 0.402710884809494, "learning_rate": 0.0001732359401603868, "loss": 1.4074, "step": 10312 }, { "epoch": 0.13401273140440156, "grad_norm": 0.39145785570144653, "learning_rate": 0.00017323334069847544, "loss": 1.3624, "step": 10313 }, { "epoch": 0.13402572594831744, "grad_norm": 0.43482527136802673, "learning_rate": 0.00017323074123656404, "loss": 1.3909, "step": 10314 }, { "epoch": 0.1340387204922333, "grad_norm": 0.38763582706451416, "learning_rate": 0.00017322814177465266, "loss": 1.4342, "step": 10315 }, { "epoch": 0.1340517150361492, "grad_norm": 0.40869957208633423, "learning_rate": 0.00017322554231274126, "loss": 1.5143, "step": 10316 }, { "epoch": 0.13406470958006508, "grad_norm": 0.3941170573234558, "learning_rate": 0.00017322294285082988, "loss": 1.5095, "step": 10317 }, { "epoch": 0.13407770412398096, "grad_norm": 0.4547525942325592, "learning_rate": 0.0001732203433889185, "loss": 1.3317, "step": 10318 }, { "epoch": 0.13409069866789683, "grad_norm": 0.3826883137226105, "learning_rate": 0.0001732177439270071, "loss": 1.5269, "step": 10319 }, { "epoch": 0.1341036932118127, "grad_norm": 0.392856240272522, "learning_rate": 0.00017321514446509573, "loss": 1.5489, "step": 10320 }, { "epoch": 0.13411668775572858, "grad_norm": 0.452817440032959, "learning_rate": 0.00017321254500318435, "loss": 1.4722, "step": 10321 }, { "epoch": 0.13412968229964445, "grad_norm": 0.3699339032173157, "learning_rate": 0.00017320994554127298, "loss": 1.2521, "step": 10322 }, { "epoch": 0.13414267684356032, "grad_norm": 0.40437933802604675, "learning_rate": 0.00017320734607936158, "loss": 1.4651, "step": 10323 }, { "epoch": 0.1341556713874762, "grad_norm": 0.4699774980545044, "learning_rate": 0.00017320474661745017, "loss": 1.4492, "step": 10324 }, { "epoch": 0.13416866593139207, "grad_norm": 0.3139057159423828, "learning_rate": 0.00017320214715553883, "loss": 1.3312, "step": 10325 }, { "epoch": 0.13418166047530794, "grad_norm": 0.4362099766731262, "learning_rate": 0.00017319954769362742, "loss": 1.4336, "step": 10326 }, { "epoch": 0.1341946550192238, "grad_norm": 0.38473936915397644, "learning_rate": 0.00017319694823171605, "loss": 1.4587, "step": 10327 }, { "epoch": 0.13420764956313969, "grad_norm": 0.4335325062274933, "learning_rate": 0.00017319434876980464, "loss": 1.3493, "step": 10328 }, { "epoch": 0.13422064410705556, "grad_norm": 0.41719844937324524, "learning_rate": 0.00017319174930789327, "loss": 1.434, "step": 10329 }, { "epoch": 0.13423363865097143, "grad_norm": 0.46054530143737793, "learning_rate": 0.0001731891498459819, "loss": 1.4347, "step": 10330 }, { "epoch": 0.1342466331948873, "grad_norm": 0.4843178391456604, "learning_rate": 0.0001731865503840705, "loss": 1.4339, "step": 10331 }, { "epoch": 0.13425962773880318, "grad_norm": 0.38382700085639954, "learning_rate": 0.00017318395092215912, "loss": 1.4265, "step": 10332 }, { "epoch": 0.13427262228271905, "grad_norm": 0.43007034063339233, "learning_rate": 0.00017318135146024774, "loss": 1.7029, "step": 10333 }, { "epoch": 0.13428561682663492, "grad_norm": 0.42118921875953674, "learning_rate": 0.00017317875199833636, "loss": 1.4431, "step": 10334 }, { "epoch": 0.1342986113705508, "grad_norm": 0.43030717968940735, "learning_rate": 0.00017317615253642496, "loss": 1.5413, "step": 10335 }, { "epoch": 0.13431160591446667, "grad_norm": 0.46663835644721985, "learning_rate": 0.00017317355307451359, "loss": 1.4771, "step": 10336 }, { "epoch": 0.13432460045838254, "grad_norm": 0.39839795231819153, "learning_rate": 0.0001731709536126022, "loss": 1.3301, "step": 10337 }, { "epoch": 0.13433759500229842, "grad_norm": 0.41001981496810913, "learning_rate": 0.0001731683541506908, "loss": 1.4955, "step": 10338 }, { "epoch": 0.1343505895462143, "grad_norm": 0.41984260082244873, "learning_rate": 0.00017316575468877943, "loss": 1.506, "step": 10339 }, { "epoch": 0.13436358409013016, "grad_norm": 0.4015372395515442, "learning_rate": 0.00017316315522686806, "loss": 1.4426, "step": 10340 }, { "epoch": 0.13437657863404603, "grad_norm": 0.4550345838069916, "learning_rate": 0.00017316055576495665, "loss": 1.343, "step": 10341 }, { "epoch": 0.1343895731779619, "grad_norm": 0.33817529678344727, "learning_rate": 0.00017315795630304528, "loss": 1.6399, "step": 10342 }, { "epoch": 0.13440256772187778, "grad_norm": 0.3979460895061493, "learning_rate": 0.00017315535684113388, "loss": 1.5058, "step": 10343 }, { "epoch": 0.13441556226579365, "grad_norm": 0.34761905670166016, "learning_rate": 0.00017315275737922253, "loss": 1.4553, "step": 10344 }, { "epoch": 0.13442855680970953, "grad_norm": 0.4102572798728943, "learning_rate": 0.00017315015791731113, "loss": 1.4372, "step": 10345 }, { "epoch": 0.1344415513536254, "grad_norm": 0.4973316788673401, "learning_rate": 0.00017314755845539975, "loss": 1.3344, "step": 10346 }, { "epoch": 0.13445454589754127, "grad_norm": 0.3183603286743164, "learning_rate": 0.00017314495899348835, "loss": 1.1312, "step": 10347 }, { "epoch": 0.13446754044145715, "grad_norm": 0.360584020614624, "learning_rate": 0.00017314235953157697, "loss": 1.4803, "step": 10348 }, { "epoch": 0.13448053498537302, "grad_norm": 0.44296449422836304, "learning_rate": 0.0001731397600696656, "loss": 1.5199, "step": 10349 }, { "epoch": 0.1344935295292889, "grad_norm": 0.25250524282455444, "learning_rate": 0.0001731371606077542, "loss": 1.0543, "step": 10350 }, { "epoch": 0.13450652407320476, "grad_norm": 0.3233751058578491, "learning_rate": 0.00017313456114584282, "loss": 1.4248, "step": 10351 }, { "epoch": 0.13451951861712064, "grad_norm": 0.32315072417259216, "learning_rate": 0.00017313196168393144, "loss": 1.4422, "step": 10352 }, { "epoch": 0.1345325131610365, "grad_norm": 0.3747360110282898, "learning_rate": 0.00017312936222202004, "loss": 1.3942, "step": 10353 }, { "epoch": 0.13454550770495238, "grad_norm": 0.33627772331237793, "learning_rate": 0.00017312676276010866, "loss": 1.4478, "step": 10354 }, { "epoch": 0.13455850224886826, "grad_norm": 0.38291966915130615, "learning_rate": 0.00017312416329819726, "loss": 1.3497, "step": 10355 }, { "epoch": 0.13457149679278413, "grad_norm": 0.4060259461402893, "learning_rate": 0.0001731215638362859, "loss": 1.3532, "step": 10356 }, { "epoch": 0.1345844913367, "grad_norm": 0.4041518270969391, "learning_rate": 0.0001731189643743745, "loss": 1.4656, "step": 10357 }, { "epoch": 0.13459748588061587, "grad_norm": 0.4120069742202759, "learning_rate": 0.00017311636491246314, "loss": 1.3757, "step": 10358 }, { "epoch": 0.13461048042453175, "grad_norm": 0.3214429020881653, "learning_rate": 0.00017311376545055173, "loss": 1.2607, "step": 10359 }, { "epoch": 0.13462347496844762, "grad_norm": 0.4030624330043793, "learning_rate": 0.00017311116598864036, "loss": 1.5785, "step": 10360 }, { "epoch": 0.1346364695123635, "grad_norm": 0.35320013761520386, "learning_rate": 0.00017310856652672898, "loss": 1.269, "step": 10361 }, { "epoch": 0.13464946405627937, "grad_norm": 0.35841625928878784, "learning_rate": 0.00017310596706481758, "loss": 1.548, "step": 10362 }, { "epoch": 0.13466245860019524, "grad_norm": 0.3769742250442505, "learning_rate": 0.0001731033676029062, "loss": 1.3, "step": 10363 }, { "epoch": 0.1346754531441111, "grad_norm": 0.45740842819213867, "learning_rate": 0.00017310076814099483, "loss": 1.3847, "step": 10364 }, { "epoch": 0.13468844768802699, "grad_norm": 0.335723876953125, "learning_rate": 0.00017309816867908345, "loss": 1.4011, "step": 10365 }, { "epoch": 0.13470144223194286, "grad_norm": 0.3708224296569824, "learning_rate": 0.00017309556921717205, "loss": 1.3776, "step": 10366 }, { "epoch": 0.13471443677585873, "grad_norm": 0.4020022451877594, "learning_rate": 0.00017309296975526065, "loss": 1.4931, "step": 10367 }, { "epoch": 0.1347274313197746, "grad_norm": 0.42780259251594543, "learning_rate": 0.0001730903702933493, "loss": 1.5831, "step": 10368 }, { "epoch": 0.13474042586369048, "grad_norm": 0.41389259696006775, "learning_rate": 0.0001730877708314379, "loss": 1.4241, "step": 10369 }, { "epoch": 0.13475342040760635, "grad_norm": 0.3301853835582733, "learning_rate": 0.00017308517136952652, "loss": 1.4376, "step": 10370 }, { "epoch": 0.13476641495152222, "grad_norm": 0.3607798218727112, "learning_rate": 0.00017308257190761512, "loss": 1.2714, "step": 10371 }, { "epoch": 0.1347794094954381, "grad_norm": 0.39608651399612427, "learning_rate": 0.00017307997244570374, "loss": 1.4355, "step": 10372 }, { "epoch": 0.13479240403935397, "grad_norm": 0.4363602101802826, "learning_rate": 0.00017307737298379237, "loss": 1.3418, "step": 10373 }, { "epoch": 0.13480539858326984, "grad_norm": 0.4446527659893036, "learning_rate": 0.00017307477352188096, "loss": 1.4276, "step": 10374 }, { "epoch": 0.13481839312718571, "grad_norm": 0.3339526057243347, "learning_rate": 0.00017307217405996962, "loss": 1.3378, "step": 10375 }, { "epoch": 0.1348313876711016, "grad_norm": 0.33654919266700745, "learning_rate": 0.0001730695745980582, "loss": 1.2298, "step": 10376 }, { "epoch": 0.13484438221501746, "grad_norm": 0.4624748229980469, "learning_rate": 0.00017306697513614684, "loss": 1.4168, "step": 10377 }, { "epoch": 0.13485737675893333, "grad_norm": 0.47972872853279114, "learning_rate": 0.00017306437567423544, "loss": 1.4545, "step": 10378 }, { "epoch": 0.1348703713028492, "grad_norm": 0.49401047825813293, "learning_rate": 0.00017306177621232406, "loss": 1.4005, "step": 10379 }, { "epoch": 0.13488336584676508, "grad_norm": 0.511713445186615, "learning_rate": 0.00017305917675041268, "loss": 1.3228, "step": 10380 }, { "epoch": 0.13489636039068095, "grad_norm": 0.458068311214447, "learning_rate": 0.00017305657728850128, "loss": 1.3656, "step": 10381 }, { "epoch": 0.13490935493459683, "grad_norm": 0.43266889452934265, "learning_rate": 0.0001730539778265899, "loss": 1.375, "step": 10382 }, { "epoch": 0.1349223494785127, "grad_norm": 0.395559698343277, "learning_rate": 0.00017305137836467853, "loss": 1.3088, "step": 10383 }, { "epoch": 0.13493534402242857, "grad_norm": 0.4026636779308319, "learning_rate": 0.00017304877890276713, "loss": 1.2945, "step": 10384 }, { "epoch": 0.13494833856634444, "grad_norm": 0.5190314650535583, "learning_rate": 0.00017304617944085575, "loss": 1.4983, "step": 10385 }, { "epoch": 0.13496133311026032, "grad_norm": 0.49648547172546387, "learning_rate": 0.00017304357997894435, "loss": 1.5121, "step": 10386 }, { "epoch": 0.1349743276541762, "grad_norm": 0.46948403120040894, "learning_rate": 0.000173040980517033, "loss": 1.449, "step": 10387 }, { "epoch": 0.13498732219809206, "grad_norm": 0.40046605467796326, "learning_rate": 0.0001730383810551216, "loss": 1.4601, "step": 10388 }, { "epoch": 0.13500031674200794, "grad_norm": 0.3224344849586487, "learning_rate": 0.00017303578159321022, "loss": 1.4101, "step": 10389 }, { "epoch": 0.1350133112859238, "grad_norm": 0.3160516619682312, "learning_rate": 0.00017303318213129882, "loss": 1.4129, "step": 10390 }, { "epoch": 0.13502630582983968, "grad_norm": 0.46160274744033813, "learning_rate": 0.00017303058266938745, "loss": 1.4075, "step": 10391 }, { "epoch": 0.13503930037375558, "grad_norm": 0.3166397213935852, "learning_rate": 0.00017302798320747607, "loss": 1.0969, "step": 10392 }, { "epoch": 0.13505229491767146, "grad_norm": 0.4604710042476654, "learning_rate": 0.00017302538374556467, "loss": 1.4373, "step": 10393 }, { "epoch": 0.13506528946158733, "grad_norm": 0.452091783285141, "learning_rate": 0.0001730227842836533, "loss": 1.5174, "step": 10394 }, { "epoch": 0.1350782840055032, "grad_norm": 0.4143034517765045, "learning_rate": 0.00017302018482174192, "loss": 1.5032, "step": 10395 }, { "epoch": 0.13509127854941907, "grad_norm": 0.30584678053855896, "learning_rate": 0.0001730175853598305, "loss": 1.1431, "step": 10396 }, { "epoch": 0.13510427309333495, "grad_norm": 0.35098373889923096, "learning_rate": 0.00017301498589791914, "loss": 1.3377, "step": 10397 }, { "epoch": 0.13511726763725082, "grad_norm": 0.4178418219089508, "learning_rate": 0.00017301238643600774, "loss": 1.4858, "step": 10398 }, { "epoch": 0.1351302621811667, "grad_norm": 0.45049336552619934, "learning_rate": 0.0001730097869740964, "loss": 1.4288, "step": 10399 }, { "epoch": 0.13514325672508257, "grad_norm": 0.6616224646568298, "learning_rate": 0.00017300718751218498, "loss": 1.5191, "step": 10400 }, { "epoch": 0.13515625126899844, "grad_norm": 0.44530558586120605, "learning_rate": 0.0001730045880502736, "loss": 1.3849, "step": 10401 }, { "epoch": 0.1351692458129143, "grad_norm": 0.22822310030460358, "learning_rate": 0.0001730019885883622, "loss": 1.0842, "step": 10402 }, { "epoch": 0.13518224035683019, "grad_norm": 0.2947538495063782, "learning_rate": 0.00017299938912645083, "loss": 1.4456, "step": 10403 }, { "epoch": 0.13519523490074606, "grad_norm": 0.4881010353565216, "learning_rate": 0.00017299678966453945, "loss": 1.4939, "step": 10404 }, { "epoch": 0.13520822944466193, "grad_norm": 0.40051162242889404, "learning_rate": 0.00017299419020262805, "loss": 1.5808, "step": 10405 }, { "epoch": 0.1352212239885778, "grad_norm": 0.3905968964099884, "learning_rate": 0.00017299159074071668, "loss": 1.1902, "step": 10406 }, { "epoch": 0.13523421853249368, "grad_norm": 0.43358638882637024, "learning_rate": 0.0001729889912788053, "loss": 1.3914, "step": 10407 }, { "epoch": 0.13524721307640955, "grad_norm": 0.2835332453250885, "learning_rate": 0.0001729863918168939, "loss": 1.3044, "step": 10408 }, { "epoch": 0.13526020762032542, "grad_norm": 0.41867509484291077, "learning_rate": 0.00017298379235498252, "loss": 1.483, "step": 10409 }, { "epoch": 0.1352732021642413, "grad_norm": 0.4036106467247009, "learning_rate": 0.00017298119289307115, "loss": 1.2726, "step": 10410 }, { "epoch": 0.13528619670815717, "grad_norm": 0.2868950068950653, "learning_rate": 0.00017297859343115977, "loss": 1.3164, "step": 10411 }, { "epoch": 0.13529919125207304, "grad_norm": 0.3779778778553009, "learning_rate": 0.00017297599396924837, "loss": 1.5453, "step": 10412 }, { "epoch": 0.13531218579598892, "grad_norm": 0.5382668972015381, "learning_rate": 0.000172973394507337, "loss": 1.5733, "step": 10413 }, { "epoch": 0.1353251803399048, "grad_norm": 0.4318283796310425, "learning_rate": 0.00017297079504542562, "loss": 1.6126, "step": 10414 }, { "epoch": 0.13533817488382066, "grad_norm": 0.3862241208553314, "learning_rate": 0.00017296819558351422, "loss": 1.2677, "step": 10415 }, { "epoch": 0.13535116942773653, "grad_norm": 0.46455109119415283, "learning_rate": 0.00017296559612160284, "loss": 1.351, "step": 10416 }, { "epoch": 0.1353641639716524, "grad_norm": 0.4568712115287781, "learning_rate": 0.00017296299665969144, "loss": 1.5687, "step": 10417 }, { "epoch": 0.13537715851556828, "grad_norm": 0.3928455710411072, "learning_rate": 0.0001729603971977801, "loss": 1.3934, "step": 10418 }, { "epoch": 0.13539015305948415, "grad_norm": 0.4216494560241699, "learning_rate": 0.0001729577977358687, "loss": 1.4433, "step": 10419 }, { "epoch": 0.13540314760340003, "grad_norm": 0.4168281555175781, "learning_rate": 0.00017295519827395728, "loss": 1.6246, "step": 10420 }, { "epoch": 0.1354161421473159, "grad_norm": 0.5439172387123108, "learning_rate": 0.0001729525988120459, "loss": 1.6783, "step": 10421 }, { "epoch": 0.13542913669123177, "grad_norm": 0.40333667397499084, "learning_rate": 0.00017294999935013453, "loss": 1.4696, "step": 10422 }, { "epoch": 0.13544213123514764, "grad_norm": 0.3222063183784485, "learning_rate": 0.00017294739988822316, "loss": 1.5009, "step": 10423 }, { "epoch": 0.13545512577906352, "grad_norm": 0.36979150772094727, "learning_rate": 0.00017294480042631175, "loss": 1.3801, "step": 10424 }, { "epoch": 0.1354681203229794, "grad_norm": 0.3979223072528839, "learning_rate": 0.00017294220096440038, "loss": 1.3626, "step": 10425 }, { "epoch": 0.13548111486689526, "grad_norm": 0.4525166451931, "learning_rate": 0.000172939601502489, "loss": 1.4386, "step": 10426 }, { "epoch": 0.13549410941081114, "grad_norm": 0.24766109883785248, "learning_rate": 0.0001729370020405776, "loss": 1.3634, "step": 10427 }, { "epoch": 0.135507103954727, "grad_norm": 0.34726038575172424, "learning_rate": 0.00017293440257866623, "loss": 1.3956, "step": 10428 }, { "epoch": 0.13552009849864288, "grad_norm": 0.3226400315761566, "learning_rate": 0.00017293180311675482, "loss": 1.1377, "step": 10429 }, { "epoch": 0.13553309304255876, "grad_norm": 0.25845417380332947, "learning_rate": 0.00017292920365484347, "loss": 1.185, "step": 10430 }, { "epoch": 0.13554608758647463, "grad_norm": 0.41717925667762756, "learning_rate": 0.00017292660419293207, "loss": 1.4206, "step": 10431 }, { "epoch": 0.1355590821303905, "grad_norm": 0.35647621750831604, "learning_rate": 0.0001729240047310207, "loss": 1.5524, "step": 10432 }, { "epoch": 0.13557207667430637, "grad_norm": 0.37825509905815125, "learning_rate": 0.0001729214052691093, "loss": 1.4484, "step": 10433 }, { "epoch": 0.13558507121822225, "grad_norm": 0.35352712869644165, "learning_rate": 0.00017291880580719792, "loss": 1.4822, "step": 10434 }, { "epoch": 0.13559806576213812, "grad_norm": 0.3956758379936218, "learning_rate": 0.00017291620634528654, "loss": 1.475, "step": 10435 }, { "epoch": 0.135611060306054, "grad_norm": 0.22947891056537628, "learning_rate": 0.00017291360688337514, "loss": 1.3214, "step": 10436 }, { "epoch": 0.13562405484996987, "grad_norm": 0.380686491727829, "learning_rate": 0.00017291100742146376, "loss": 1.3786, "step": 10437 }, { "epoch": 0.13563704939388574, "grad_norm": 0.39232754707336426, "learning_rate": 0.0001729084079595524, "loss": 1.3554, "step": 10438 }, { "epoch": 0.1356500439378016, "grad_norm": 0.45147377252578735, "learning_rate": 0.000172905808497641, "loss": 1.5893, "step": 10439 }, { "epoch": 0.13566303848171749, "grad_norm": 0.4316250681877136, "learning_rate": 0.0001729032090357296, "loss": 1.6121, "step": 10440 }, { "epoch": 0.13567603302563336, "grad_norm": 0.4013034999370575, "learning_rate": 0.0001729006095738182, "loss": 1.4084, "step": 10441 }, { "epoch": 0.13568902756954923, "grad_norm": 0.3859449326992035, "learning_rate": 0.00017289801011190686, "loss": 1.4563, "step": 10442 }, { "epoch": 0.1357020221134651, "grad_norm": 0.44385725259780884, "learning_rate": 0.00017289541064999546, "loss": 1.5617, "step": 10443 }, { "epoch": 0.13571501665738098, "grad_norm": 0.30905887484550476, "learning_rate": 0.00017289281118808408, "loss": 1.3442, "step": 10444 }, { "epoch": 0.13572801120129685, "grad_norm": 0.3928665518760681, "learning_rate": 0.00017289021172617268, "loss": 1.2986, "step": 10445 }, { "epoch": 0.13574100574521272, "grad_norm": 0.4879981279373169, "learning_rate": 0.0001728876122642613, "loss": 1.6302, "step": 10446 }, { "epoch": 0.1357540002891286, "grad_norm": 0.3093537986278534, "learning_rate": 0.00017288501280234993, "loss": 1.4506, "step": 10447 }, { "epoch": 0.13576699483304447, "grad_norm": 0.3475709557533264, "learning_rate": 0.00017288241334043853, "loss": 1.3921, "step": 10448 }, { "epoch": 0.13577998937696034, "grad_norm": 0.3754947781562805, "learning_rate": 0.00017287981387852718, "loss": 1.3088, "step": 10449 }, { "epoch": 0.13579298392087621, "grad_norm": 0.4017201066017151, "learning_rate": 0.00017287721441661577, "loss": 1.5099, "step": 10450 }, { "epoch": 0.1358059784647921, "grad_norm": 0.42656177282333374, "learning_rate": 0.00017287461495470437, "loss": 1.5031, "step": 10451 }, { "epoch": 0.13581897300870796, "grad_norm": 0.42006027698516846, "learning_rate": 0.000172872015492793, "loss": 1.465, "step": 10452 }, { "epoch": 0.13583196755262383, "grad_norm": 0.44549089670181274, "learning_rate": 0.00017286941603088162, "loss": 1.3782, "step": 10453 }, { "epoch": 0.1358449620965397, "grad_norm": 0.340950608253479, "learning_rate": 0.00017286681656897025, "loss": 1.2409, "step": 10454 }, { "epoch": 0.13585795664045558, "grad_norm": 0.46853703260421753, "learning_rate": 0.00017286421710705884, "loss": 1.4525, "step": 10455 }, { "epoch": 0.13587095118437145, "grad_norm": 0.28742673993110657, "learning_rate": 0.00017286161764514747, "loss": 1.2775, "step": 10456 }, { "epoch": 0.13588394572828733, "grad_norm": 0.7063469886779785, "learning_rate": 0.0001728590181832361, "loss": 1.4862, "step": 10457 }, { "epoch": 0.1358969402722032, "grad_norm": 0.4160999357700348, "learning_rate": 0.0001728564187213247, "loss": 1.5103, "step": 10458 }, { "epoch": 0.13590993481611907, "grad_norm": 0.2786518335342407, "learning_rate": 0.00017285381925941331, "loss": 1.4151, "step": 10459 }, { "epoch": 0.13592292936003494, "grad_norm": 0.37167608737945557, "learning_rate": 0.0001728512197975019, "loss": 1.4621, "step": 10460 }, { "epoch": 0.13593592390395082, "grad_norm": 0.3835524916648865, "learning_rate": 0.00017284862033559056, "loss": 1.4056, "step": 10461 }, { "epoch": 0.1359489184478667, "grad_norm": 0.43963563442230225, "learning_rate": 0.00017284602087367916, "loss": 1.3055, "step": 10462 }, { "epoch": 0.13596191299178256, "grad_norm": 0.3543713092803955, "learning_rate": 0.00017284342141176776, "loss": 1.3345, "step": 10463 }, { "epoch": 0.13597490753569844, "grad_norm": 0.3504061698913574, "learning_rate": 0.00017284082194985638, "loss": 1.6044, "step": 10464 }, { "epoch": 0.1359879020796143, "grad_norm": 0.36942625045776367, "learning_rate": 0.000172838222487945, "loss": 1.4402, "step": 10465 }, { "epoch": 0.13600089662353018, "grad_norm": 0.38377702236175537, "learning_rate": 0.00017283562302603363, "loss": 1.444, "step": 10466 }, { "epoch": 0.13601389116744605, "grad_norm": 0.3686785101890564, "learning_rate": 0.00017283302356412223, "loss": 1.4189, "step": 10467 }, { "epoch": 0.13602688571136196, "grad_norm": 0.4503157436847687, "learning_rate": 0.00017283042410221085, "loss": 1.4786, "step": 10468 }, { "epoch": 0.13603988025527783, "grad_norm": 0.37379541993141174, "learning_rate": 0.00017282782464029948, "loss": 1.665, "step": 10469 }, { "epoch": 0.1360528747991937, "grad_norm": 0.53169184923172, "learning_rate": 0.00017282522517838807, "loss": 1.3902, "step": 10470 }, { "epoch": 0.13606586934310957, "grad_norm": 0.34697386622428894, "learning_rate": 0.0001728226257164767, "loss": 1.3928, "step": 10471 }, { "epoch": 0.13607886388702545, "grad_norm": 0.3834492266178131, "learning_rate": 0.0001728200262545653, "loss": 1.3315, "step": 10472 }, { "epoch": 0.13609185843094132, "grad_norm": 0.3892078697681427, "learning_rate": 0.00017281742679265395, "loss": 1.4145, "step": 10473 }, { "epoch": 0.1361048529748572, "grad_norm": 0.38613563776016235, "learning_rate": 0.00017281482733074255, "loss": 1.4233, "step": 10474 }, { "epoch": 0.13611784751877307, "grad_norm": 0.34394288063049316, "learning_rate": 0.00017281222786883114, "loss": 1.3178, "step": 10475 }, { "epoch": 0.13613084206268894, "grad_norm": 0.4280282258987427, "learning_rate": 0.00017280962840691977, "loss": 1.4976, "step": 10476 }, { "epoch": 0.1361438366066048, "grad_norm": 0.38222944736480713, "learning_rate": 0.0001728070289450084, "loss": 1.4249, "step": 10477 }, { "epoch": 0.13615683115052069, "grad_norm": 0.4223169684410095, "learning_rate": 0.00017280442948309702, "loss": 1.4968, "step": 10478 }, { "epoch": 0.13616982569443656, "grad_norm": 0.3264272212982178, "learning_rate": 0.00017280183002118561, "loss": 1.3399, "step": 10479 }, { "epoch": 0.13618282023835243, "grad_norm": 0.41677117347717285, "learning_rate": 0.00017279923055927424, "loss": 1.5063, "step": 10480 }, { "epoch": 0.1361958147822683, "grad_norm": 0.36260271072387695, "learning_rate": 0.00017279663109736286, "loss": 1.3966, "step": 10481 }, { "epoch": 0.13620880932618418, "grad_norm": 0.3674204349517822, "learning_rate": 0.00017279403163545146, "loss": 1.383, "step": 10482 }, { "epoch": 0.13622180387010005, "grad_norm": 0.37665751576423645, "learning_rate": 0.00017279143217354008, "loss": 1.4083, "step": 10483 }, { "epoch": 0.13623479841401592, "grad_norm": 0.426018089056015, "learning_rate": 0.0001727888327116287, "loss": 1.3924, "step": 10484 }, { "epoch": 0.1362477929579318, "grad_norm": 0.3398778736591339, "learning_rate": 0.00017278623324971733, "loss": 1.5797, "step": 10485 }, { "epoch": 0.13626078750184767, "grad_norm": 0.29407134652137756, "learning_rate": 0.00017278363378780593, "loss": 1.509, "step": 10486 }, { "epoch": 0.13627378204576354, "grad_norm": 0.44475987553596497, "learning_rate": 0.00017278103432589456, "loss": 1.2657, "step": 10487 }, { "epoch": 0.13628677658967941, "grad_norm": 0.3878095746040344, "learning_rate": 0.00017277843486398318, "loss": 1.3901, "step": 10488 }, { "epoch": 0.1362997711335953, "grad_norm": 0.4573918879032135, "learning_rate": 0.00017277583540207178, "loss": 1.6271, "step": 10489 }, { "epoch": 0.13631276567751116, "grad_norm": 0.4283890426158905, "learning_rate": 0.0001727732359401604, "loss": 1.4373, "step": 10490 }, { "epoch": 0.13632576022142703, "grad_norm": 0.29495999217033386, "learning_rate": 0.000172770636478249, "loss": 1.3181, "step": 10491 }, { "epoch": 0.1363387547653429, "grad_norm": 0.41229575872421265, "learning_rate": 0.00017276803701633762, "loss": 1.313, "step": 10492 }, { "epoch": 0.13635174930925878, "grad_norm": 0.35758906602859497, "learning_rate": 0.00017276543755442625, "loss": 1.4246, "step": 10493 }, { "epoch": 0.13636474385317465, "grad_norm": 0.35581740736961365, "learning_rate": 0.00017276283809251485, "loss": 1.2879, "step": 10494 }, { "epoch": 0.13637773839709053, "grad_norm": 0.4496549963951111, "learning_rate": 0.00017276023863060347, "loss": 1.5218, "step": 10495 }, { "epoch": 0.1363907329410064, "grad_norm": 0.4742807447910309, "learning_rate": 0.0001727576391686921, "loss": 1.4893, "step": 10496 }, { "epoch": 0.13640372748492227, "grad_norm": 0.4038223922252655, "learning_rate": 0.00017275503970678072, "loss": 1.4817, "step": 10497 }, { "epoch": 0.13641672202883814, "grad_norm": 0.45823538303375244, "learning_rate": 0.00017275244024486932, "loss": 1.5909, "step": 10498 }, { "epoch": 0.13642971657275402, "grad_norm": 0.397400826215744, "learning_rate": 0.00017274984078295794, "loss": 1.3527, "step": 10499 }, { "epoch": 0.1364427111166699, "grad_norm": 0.40433722734451294, "learning_rate": 0.00017274724132104657, "loss": 1.2691, "step": 10500 }, { "epoch": 0.13645570566058576, "grad_norm": 0.2724015712738037, "learning_rate": 0.00017274464185913516, "loss": 1.461, "step": 10501 }, { "epoch": 0.13646870020450164, "grad_norm": 0.399953693151474, "learning_rate": 0.0001727420423972238, "loss": 1.3822, "step": 10502 }, { "epoch": 0.1364816947484175, "grad_norm": 0.5578676462173462, "learning_rate": 0.00017273944293531238, "loss": 1.335, "step": 10503 }, { "epoch": 0.13649468929233338, "grad_norm": 0.3851067125797272, "learning_rate": 0.000172736843473401, "loss": 1.488, "step": 10504 }, { "epoch": 0.13650768383624926, "grad_norm": 0.2920408248901367, "learning_rate": 0.00017273424401148963, "loss": 1.2559, "step": 10505 }, { "epoch": 0.13652067838016513, "grad_norm": 0.3744671046733856, "learning_rate": 0.00017273164454957823, "loss": 1.3797, "step": 10506 }, { "epoch": 0.136533672924081, "grad_norm": 0.24825863540172577, "learning_rate": 0.00017272904508766686, "loss": 1.4313, "step": 10507 }, { "epoch": 0.13654666746799687, "grad_norm": 0.42083391547203064, "learning_rate": 0.00017272644562575548, "loss": 1.5479, "step": 10508 }, { "epoch": 0.13655966201191275, "grad_norm": 0.4425850808620453, "learning_rate": 0.0001727238461638441, "loss": 1.417, "step": 10509 }, { "epoch": 0.13657265655582862, "grad_norm": 0.4810177981853485, "learning_rate": 0.0001727212467019327, "loss": 1.4951, "step": 10510 }, { "epoch": 0.1365856510997445, "grad_norm": 0.28087952733039856, "learning_rate": 0.00017271864724002133, "loss": 1.2765, "step": 10511 }, { "epoch": 0.13659864564366037, "grad_norm": 0.47911617159843445, "learning_rate": 0.00017271604777810995, "loss": 1.4801, "step": 10512 }, { "epoch": 0.13661164018757624, "grad_norm": 0.4173252582550049, "learning_rate": 0.00017271344831619855, "loss": 1.3148, "step": 10513 }, { "epoch": 0.1366246347314921, "grad_norm": 0.35897526144981384, "learning_rate": 0.00017271084885428717, "loss": 1.4882, "step": 10514 }, { "epoch": 0.13663762927540798, "grad_norm": 0.43524637818336487, "learning_rate": 0.00017270824939237577, "loss": 1.2251, "step": 10515 }, { "epoch": 0.13665062381932386, "grad_norm": 0.38038232922554016, "learning_rate": 0.00017270564993046442, "loss": 1.4763, "step": 10516 }, { "epoch": 0.13666361836323973, "grad_norm": 0.39000430703163147, "learning_rate": 0.00017270305046855302, "loss": 1.498, "step": 10517 }, { "epoch": 0.1366766129071556, "grad_norm": 0.8693828582763672, "learning_rate": 0.00017270045100664162, "loss": 1.4689, "step": 10518 }, { "epoch": 0.13668960745107148, "grad_norm": 0.49815839529037476, "learning_rate": 0.00017269785154473024, "loss": 1.4892, "step": 10519 }, { "epoch": 0.13670260199498735, "grad_norm": 0.3810057044029236, "learning_rate": 0.00017269525208281887, "loss": 1.3877, "step": 10520 }, { "epoch": 0.13671559653890322, "grad_norm": 0.2651503086090088, "learning_rate": 0.0001726926526209075, "loss": 1.2183, "step": 10521 }, { "epoch": 0.1367285910828191, "grad_norm": 0.3170636296272278, "learning_rate": 0.0001726900531589961, "loss": 1.5981, "step": 10522 }, { "epoch": 0.13674158562673497, "grad_norm": 0.3709258437156677, "learning_rate": 0.0001726874536970847, "loss": 1.4153, "step": 10523 }, { "epoch": 0.13675458017065084, "grad_norm": 0.48535898327827454, "learning_rate": 0.00017268485423517334, "loss": 1.2872, "step": 10524 }, { "epoch": 0.13676757471456671, "grad_norm": 0.3966870605945587, "learning_rate": 0.00017268225477326193, "loss": 1.5348, "step": 10525 }, { "epoch": 0.1367805692584826, "grad_norm": 0.304897665977478, "learning_rate": 0.00017267965531135056, "loss": 1.2594, "step": 10526 }, { "epoch": 0.13679356380239846, "grad_norm": 0.39303287863731384, "learning_rate": 0.00017267705584943918, "loss": 1.6777, "step": 10527 }, { "epoch": 0.13680655834631433, "grad_norm": 0.4536191523075104, "learning_rate": 0.0001726744563875278, "loss": 1.6083, "step": 10528 }, { "epoch": 0.1368195528902302, "grad_norm": 0.4273262321949005, "learning_rate": 0.0001726718569256164, "loss": 1.4775, "step": 10529 }, { "epoch": 0.13683254743414608, "grad_norm": 0.4868556261062622, "learning_rate": 0.000172669257463705, "loss": 1.4794, "step": 10530 }, { "epoch": 0.13684554197806195, "grad_norm": 0.4230353534221649, "learning_rate": 0.00017266665800179365, "loss": 1.3272, "step": 10531 }, { "epoch": 0.13685853652197782, "grad_norm": 0.3850875794887543, "learning_rate": 0.00017266405853988225, "loss": 1.3766, "step": 10532 }, { "epoch": 0.1368715310658937, "grad_norm": 0.38219889998435974, "learning_rate": 0.00017266145907797087, "loss": 1.3608, "step": 10533 }, { "epoch": 0.13688452560980957, "grad_norm": 0.580220103263855, "learning_rate": 0.00017265885961605947, "loss": 1.4533, "step": 10534 }, { "epoch": 0.13689752015372544, "grad_norm": 0.4341967701911926, "learning_rate": 0.0001726562601541481, "loss": 1.5912, "step": 10535 }, { "epoch": 0.13691051469764132, "grad_norm": 0.38223201036453247, "learning_rate": 0.00017265366069223672, "loss": 1.422, "step": 10536 }, { "epoch": 0.1369235092415572, "grad_norm": 0.38255515694618225, "learning_rate": 0.00017265106123032532, "loss": 1.3093, "step": 10537 }, { "epoch": 0.13693650378547306, "grad_norm": 0.3335675895214081, "learning_rate": 0.00017264846176841394, "loss": 1.3909, "step": 10538 }, { "epoch": 0.13694949832938894, "grad_norm": 0.3044624924659729, "learning_rate": 0.00017264586230650257, "loss": 1.3799, "step": 10539 }, { "epoch": 0.1369624928733048, "grad_norm": 0.31655076146125793, "learning_rate": 0.0001726432628445912, "loss": 1.1527, "step": 10540 }, { "epoch": 0.13697548741722068, "grad_norm": 0.40791475772857666, "learning_rate": 0.0001726406633826798, "loss": 1.5779, "step": 10541 }, { "epoch": 0.13698848196113655, "grad_norm": 0.2784699499607086, "learning_rate": 0.0001726380639207684, "loss": 1.3844, "step": 10542 }, { "epoch": 0.13700147650505243, "grad_norm": 0.3830648958683014, "learning_rate": 0.00017263546445885704, "loss": 1.5756, "step": 10543 }, { "epoch": 0.13701447104896833, "grad_norm": 0.38863110542297363, "learning_rate": 0.00017263286499694564, "loss": 1.3717, "step": 10544 }, { "epoch": 0.1370274655928842, "grad_norm": 0.4381507933139801, "learning_rate": 0.00017263026553503426, "loss": 1.5142, "step": 10545 }, { "epoch": 0.13704046013680007, "grad_norm": 0.36823081970214844, "learning_rate": 0.00017262766607312286, "loss": 1.4017, "step": 10546 }, { "epoch": 0.13705345468071595, "grad_norm": 0.38603949546813965, "learning_rate": 0.00017262506661121148, "loss": 1.4844, "step": 10547 }, { "epoch": 0.13706644922463182, "grad_norm": 0.30869975686073303, "learning_rate": 0.0001726224671493001, "loss": 1.2908, "step": 10548 }, { "epoch": 0.1370794437685477, "grad_norm": 0.4072871506214142, "learning_rate": 0.0001726198676873887, "loss": 1.4849, "step": 10549 }, { "epoch": 0.13709243831246357, "grad_norm": 0.322182297706604, "learning_rate": 0.00017261726822547733, "loss": 1.1986, "step": 10550 }, { "epoch": 0.13710543285637944, "grad_norm": 0.43072229623794556, "learning_rate": 0.00017261466876356595, "loss": 1.672, "step": 10551 }, { "epoch": 0.1371184274002953, "grad_norm": 0.3653903007507324, "learning_rate": 0.00017261206930165458, "loss": 1.4151, "step": 10552 }, { "epoch": 0.13713142194421118, "grad_norm": 0.42655181884765625, "learning_rate": 0.00017260946983974317, "loss": 1.4845, "step": 10553 }, { "epoch": 0.13714441648812706, "grad_norm": 0.4901348948478699, "learning_rate": 0.0001726068703778318, "loss": 1.2358, "step": 10554 }, { "epoch": 0.13715741103204293, "grad_norm": 0.3265390396118164, "learning_rate": 0.00017260427091592042, "loss": 1.3265, "step": 10555 }, { "epoch": 0.1371704055759588, "grad_norm": 0.35710471868515015, "learning_rate": 0.00017260167145400902, "loss": 1.2568, "step": 10556 }, { "epoch": 0.13718340011987468, "grad_norm": 0.5008095502853394, "learning_rate": 0.00017259907199209765, "loss": 1.5164, "step": 10557 }, { "epoch": 0.13719639466379055, "grad_norm": 0.4730646014213562, "learning_rate": 0.00017259647253018627, "loss": 1.4292, "step": 10558 }, { "epoch": 0.13720938920770642, "grad_norm": 0.34148985147476196, "learning_rate": 0.00017259387306827487, "loss": 1.3327, "step": 10559 }, { "epoch": 0.1372223837516223, "grad_norm": 0.4202231466770172, "learning_rate": 0.0001725912736063635, "loss": 1.5288, "step": 10560 }, { "epoch": 0.13723537829553817, "grad_norm": 0.3581228256225586, "learning_rate": 0.0001725886741444521, "loss": 1.306, "step": 10561 }, { "epoch": 0.13724837283945404, "grad_norm": 0.3379080891609192, "learning_rate": 0.00017258607468254074, "loss": 1.2369, "step": 10562 }, { "epoch": 0.13726136738336991, "grad_norm": 0.3051481544971466, "learning_rate": 0.00017258347522062934, "loss": 1.2441, "step": 10563 }, { "epoch": 0.1372743619272858, "grad_norm": 0.413403183221817, "learning_rate": 0.00017258087575871796, "loss": 1.3255, "step": 10564 }, { "epoch": 0.13728735647120166, "grad_norm": 0.42408865690231323, "learning_rate": 0.00017257827629680656, "loss": 1.4321, "step": 10565 }, { "epoch": 0.13730035101511753, "grad_norm": 0.3705972135066986, "learning_rate": 0.00017257567683489518, "loss": 1.4341, "step": 10566 }, { "epoch": 0.1373133455590334, "grad_norm": 0.5651363134384155, "learning_rate": 0.0001725730773729838, "loss": 1.4846, "step": 10567 }, { "epoch": 0.13732634010294928, "grad_norm": 0.3010171353816986, "learning_rate": 0.0001725704779110724, "loss": 1.2273, "step": 10568 }, { "epoch": 0.13733933464686515, "grad_norm": 0.5309604406356812, "learning_rate": 0.00017256787844916103, "loss": 1.4023, "step": 10569 }, { "epoch": 0.13735232919078103, "grad_norm": 0.3206302523612976, "learning_rate": 0.00017256527898724966, "loss": 1.3123, "step": 10570 }, { "epoch": 0.1373653237346969, "grad_norm": 0.3580516278743744, "learning_rate": 0.00017256267952533828, "loss": 1.4181, "step": 10571 }, { "epoch": 0.13737831827861277, "grad_norm": 0.38778382539749146, "learning_rate": 0.00017256008006342688, "loss": 1.3119, "step": 10572 }, { "epoch": 0.13739131282252864, "grad_norm": 0.43884924054145813, "learning_rate": 0.00017255748060151547, "loss": 1.4495, "step": 10573 }, { "epoch": 0.13740430736644452, "grad_norm": 0.7913076877593994, "learning_rate": 0.00017255488113960413, "loss": 1.5133, "step": 10574 }, { "epoch": 0.1374173019103604, "grad_norm": 0.4408593773841858, "learning_rate": 0.00017255228167769272, "loss": 1.3516, "step": 10575 }, { "epoch": 0.13743029645427626, "grad_norm": 0.3535459339618683, "learning_rate": 0.00017254968221578135, "loss": 1.6191, "step": 10576 }, { "epoch": 0.13744329099819214, "grad_norm": 0.5241795778274536, "learning_rate": 0.00017254708275386995, "loss": 1.5931, "step": 10577 }, { "epoch": 0.137456285542108, "grad_norm": 0.3066680133342743, "learning_rate": 0.00017254448329195857, "loss": 1.3775, "step": 10578 }, { "epoch": 0.13746928008602388, "grad_norm": 0.46135973930358887, "learning_rate": 0.0001725418838300472, "loss": 1.4092, "step": 10579 }, { "epoch": 0.13748227462993975, "grad_norm": 0.3281068205833435, "learning_rate": 0.0001725392843681358, "loss": 1.3127, "step": 10580 }, { "epoch": 0.13749526917385563, "grad_norm": 0.42797136306762695, "learning_rate": 0.00017253668490622442, "loss": 1.6918, "step": 10581 }, { "epoch": 0.1375082637177715, "grad_norm": 0.4591136872768402, "learning_rate": 0.00017253408544431304, "loss": 1.494, "step": 10582 }, { "epoch": 0.13752125826168737, "grad_norm": 0.3453805148601532, "learning_rate": 0.00017253148598240167, "loss": 1.3688, "step": 10583 }, { "epoch": 0.13753425280560325, "grad_norm": 0.4060210883617401, "learning_rate": 0.00017252888652049026, "loss": 1.5277, "step": 10584 }, { "epoch": 0.13754724734951912, "grad_norm": 0.6141288876533508, "learning_rate": 0.00017252628705857886, "loss": 1.3349, "step": 10585 }, { "epoch": 0.137560241893435, "grad_norm": 0.4474853277206421, "learning_rate": 0.0001725236875966675, "loss": 1.6698, "step": 10586 }, { "epoch": 0.13757323643735087, "grad_norm": 0.3038195073604584, "learning_rate": 0.0001725210881347561, "loss": 1.4218, "step": 10587 }, { "epoch": 0.13758623098126674, "grad_norm": 0.43007174134254456, "learning_rate": 0.00017251848867284473, "loss": 1.5399, "step": 10588 }, { "epoch": 0.1375992255251826, "grad_norm": 0.49214497208595276, "learning_rate": 0.00017251588921093333, "loss": 1.5708, "step": 10589 }, { "epoch": 0.13761222006909848, "grad_norm": 0.3328685164451599, "learning_rate": 0.00017251328974902196, "loss": 1.2818, "step": 10590 }, { "epoch": 0.13762521461301436, "grad_norm": 0.3670251667499542, "learning_rate": 0.00017251069028711058, "loss": 1.2983, "step": 10591 }, { "epoch": 0.13763820915693023, "grad_norm": 0.2783401608467102, "learning_rate": 0.00017250809082519918, "loss": 1.3688, "step": 10592 }, { "epoch": 0.1376512037008461, "grad_norm": 0.3808026909828186, "learning_rate": 0.0001725054913632878, "loss": 1.4213, "step": 10593 }, { "epoch": 0.13766419824476198, "grad_norm": 0.3565732538700104, "learning_rate": 0.00017250289190137643, "loss": 1.4686, "step": 10594 }, { "epoch": 0.13767719278867785, "grad_norm": 0.44309598207473755, "learning_rate": 0.00017250029243946505, "loss": 1.1618, "step": 10595 }, { "epoch": 0.13769018733259372, "grad_norm": 0.3477417528629303, "learning_rate": 0.00017249769297755365, "loss": 1.5659, "step": 10596 }, { "epoch": 0.1377031818765096, "grad_norm": 0.423973947763443, "learning_rate": 0.00017249509351564227, "loss": 1.4957, "step": 10597 }, { "epoch": 0.13771617642042547, "grad_norm": 0.3800935745239258, "learning_rate": 0.0001724924940537309, "loss": 1.4856, "step": 10598 }, { "epoch": 0.13772917096434134, "grad_norm": 0.4738127589225769, "learning_rate": 0.0001724898945918195, "loss": 1.5107, "step": 10599 }, { "epoch": 0.1377421655082572, "grad_norm": 0.39662086963653564, "learning_rate": 0.00017248729512990812, "loss": 1.4037, "step": 10600 }, { "epoch": 0.1377551600521731, "grad_norm": 0.47219574451446533, "learning_rate": 0.00017248469566799674, "loss": 1.4837, "step": 10601 }, { "epoch": 0.13776815459608896, "grad_norm": 0.3509545922279358, "learning_rate": 0.00017248209620608534, "loss": 1.551, "step": 10602 }, { "epoch": 0.13778114914000483, "grad_norm": 0.4001471698284149, "learning_rate": 0.00017247949674417397, "loss": 1.3914, "step": 10603 }, { "epoch": 0.1377941436839207, "grad_norm": 0.35533377528190613, "learning_rate": 0.00017247689728226256, "loss": 1.6054, "step": 10604 }, { "epoch": 0.13780713822783658, "grad_norm": 0.30173033475875854, "learning_rate": 0.00017247429782035121, "loss": 1.41, "step": 10605 }, { "epoch": 0.13782013277175245, "grad_norm": 0.4919365346431732, "learning_rate": 0.0001724716983584398, "loss": 1.3973, "step": 10606 }, { "epoch": 0.13783312731566832, "grad_norm": 0.338100790977478, "learning_rate": 0.00017246909889652844, "loss": 1.1979, "step": 10607 }, { "epoch": 0.1378461218595842, "grad_norm": 0.361561119556427, "learning_rate": 0.00017246649943461703, "loss": 1.4515, "step": 10608 }, { "epoch": 0.13785911640350007, "grad_norm": 0.4210669994354248, "learning_rate": 0.00017246389997270566, "loss": 1.4154, "step": 10609 }, { "epoch": 0.13787211094741594, "grad_norm": 0.38256022334098816, "learning_rate": 0.00017246130051079428, "loss": 1.3763, "step": 10610 }, { "epoch": 0.13788510549133182, "grad_norm": 0.4099956750869751, "learning_rate": 0.00017245870104888288, "loss": 1.4154, "step": 10611 }, { "epoch": 0.1378981000352477, "grad_norm": 0.42823123931884766, "learning_rate": 0.0001724561015869715, "loss": 1.2585, "step": 10612 }, { "epoch": 0.13791109457916356, "grad_norm": 0.43820980191230774, "learning_rate": 0.00017245350212506013, "loss": 1.58, "step": 10613 }, { "epoch": 0.13792408912307944, "grad_norm": 0.3689756393432617, "learning_rate": 0.00017245090266314873, "loss": 1.3779, "step": 10614 }, { "epoch": 0.1379370836669953, "grad_norm": 0.4533342719078064, "learning_rate": 0.00017244830320123735, "loss": 1.3371, "step": 10615 }, { "epoch": 0.13795007821091118, "grad_norm": 0.3453167676925659, "learning_rate": 0.00017244570373932595, "loss": 1.4459, "step": 10616 }, { "epoch": 0.13796307275482705, "grad_norm": 0.42374488711357117, "learning_rate": 0.0001724431042774146, "loss": 1.4445, "step": 10617 }, { "epoch": 0.13797606729874293, "grad_norm": 0.4540706276893616, "learning_rate": 0.0001724405048155032, "loss": 1.4461, "step": 10618 }, { "epoch": 0.1379890618426588, "grad_norm": 0.4351465106010437, "learning_rate": 0.00017243790535359182, "loss": 1.4642, "step": 10619 }, { "epoch": 0.1380020563865747, "grad_norm": 0.4485321640968323, "learning_rate": 0.00017243530589168042, "loss": 1.6215, "step": 10620 }, { "epoch": 0.13801505093049057, "grad_norm": 0.37194985151290894, "learning_rate": 0.00017243270642976904, "loss": 1.4377, "step": 10621 }, { "epoch": 0.13802804547440645, "grad_norm": 0.4043198227882385, "learning_rate": 0.00017243010696785767, "loss": 1.2744, "step": 10622 }, { "epoch": 0.13804104001832232, "grad_norm": 0.4267769157886505, "learning_rate": 0.00017242750750594627, "loss": 1.3616, "step": 10623 }, { "epoch": 0.1380540345622382, "grad_norm": 0.46974459290504456, "learning_rate": 0.0001724249080440349, "loss": 1.5613, "step": 10624 }, { "epoch": 0.13806702910615407, "grad_norm": 0.3130131661891937, "learning_rate": 0.00017242230858212351, "loss": 1.1999, "step": 10625 }, { "epoch": 0.13808002365006994, "grad_norm": 0.4928038716316223, "learning_rate": 0.0001724197091202121, "loss": 1.5424, "step": 10626 }, { "epoch": 0.1380930181939858, "grad_norm": 0.35475754737854004, "learning_rate": 0.00017241710965830074, "loss": 1.4689, "step": 10627 }, { "epoch": 0.13810601273790168, "grad_norm": 0.4033093750476837, "learning_rate": 0.00017241451019638933, "loss": 1.2252, "step": 10628 }, { "epoch": 0.13811900728181756, "grad_norm": 0.34757670760154724, "learning_rate": 0.00017241191073447799, "loss": 1.3315, "step": 10629 }, { "epoch": 0.13813200182573343, "grad_norm": 0.39496028423309326, "learning_rate": 0.00017240931127256658, "loss": 1.4887, "step": 10630 }, { "epoch": 0.1381449963696493, "grad_norm": 0.3175010085105896, "learning_rate": 0.0001724067118106552, "loss": 1.474, "step": 10631 }, { "epoch": 0.13815799091356518, "grad_norm": 0.44320324063301086, "learning_rate": 0.00017240411234874383, "loss": 1.4776, "step": 10632 }, { "epoch": 0.13817098545748105, "grad_norm": 0.4860862195491791, "learning_rate": 0.00017240151288683243, "loss": 1.3037, "step": 10633 }, { "epoch": 0.13818398000139692, "grad_norm": 0.4313218891620636, "learning_rate": 0.00017239891342492105, "loss": 1.3952, "step": 10634 }, { "epoch": 0.1381969745453128, "grad_norm": 0.4191610813140869, "learning_rate": 0.00017239631396300965, "loss": 1.3451, "step": 10635 }, { "epoch": 0.13820996908922867, "grad_norm": 0.35789498686790466, "learning_rate": 0.0001723937145010983, "loss": 1.4343, "step": 10636 }, { "epoch": 0.13822296363314454, "grad_norm": 0.3415585458278656, "learning_rate": 0.0001723911150391869, "loss": 1.358, "step": 10637 }, { "epoch": 0.13823595817706041, "grad_norm": 0.4315042495727539, "learning_rate": 0.00017238851557727552, "loss": 1.5833, "step": 10638 }, { "epoch": 0.1382489527209763, "grad_norm": 0.44580018520355225, "learning_rate": 0.00017238591611536412, "loss": 1.5413, "step": 10639 }, { "epoch": 0.13826194726489216, "grad_norm": 0.48365074396133423, "learning_rate": 0.00017238331665345275, "loss": 1.4997, "step": 10640 }, { "epoch": 0.13827494180880803, "grad_norm": 0.3096107542514801, "learning_rate": 0.00017238071719154137, "loss": 1.4661, "step": 10641 }, { "epoch": 0.1382879363527239, "grad_norm": 0.39686885476112366, "learning_rate": 0.00017237811772962997, "loss": 1.5045, "step": 10642 }, { "epoch": 0.13830093089663978, "grad_norm": 0.5136502981185913, "learning_rate": 0.0001723755182677186, "loss": 1.4981, "step": 10643 }, { "epoch": 0.13831392544055565, "grad_norm": 0.4076172411441803, "learning_rate": 0.00017237291880580722, "loss": 1.4703, "step": 10644 }, { "epoch": 0.13832691998447152, "grad_norm": 0.33252274990081787, "learning_rate": 0.00017237031934389581, "loss": 1.4858, "step": 10645 }, { "epoch": 0.1383399145283874, "grad_norm": 0.3941386044025421, "learning_rate": 0.00017236771988198444, "loss": 1.5372, "step": 10646 }, { "epoch": 0.13835290907230327, "grad_norm": 0.3625786304473877, "learning_rate": 0.00017236512042007304, "loss": 1.4991, "step": 10647 }, { "epoch": 0.13836590361621914, "grad_norm": 0.3857329189777374, "learning_rate": 0.0001723625209581617, "loss": 1.5909, "step": 10648 }, { "epoch": 0.13837889816013502, "grad_norm": 0.36059844493865967, "learning_rate": 0.00017235992149625029, "loss": 1.6087, "step": 10649 }, { "epoch": 0.1383918927040509, "grad_norm": 0.3763032853603363, "learning_rate": 0.0001723573220343389, "loss": 1.3809, "step": 10650 }, { "epoch": 0.13840488724796676, "grad_norm": 0.37493008375167847, "learning_rate": 0.0001723547225724275, "loss": 1.4514, "step": 10651 }, { "epoch": 0.13841788179188264, "grad_norm": 0.33912596106529236, "learning_rate": 0.00017235212311051613, "loss": 1.3984, "step": 10652 }, { "epoch": 0.1384308763357985, "grad_norm": 0.29063013195991516, "learning_rate": 0.00017234952364860476, "loss": 1.3259, "step": 10653 }, { "epoch": 0.13844387087971438, "grad_norm": 0.3569600284099579, "learning_rate": 0.00017234692418669335, "loss": 1.2968, "step": 10654 }, { "epoch": 0.13845686542363025, "grad_norm": 0.389163076877594, "learning_rate": 0.00017234432472478198, "loss": 1.1315, "step": 10655 }, { "epoch": 0.13846985996754613, "grad_norm": 0.4171965718269348, "learning_rate": 0.0001723417252628706, "loss": 1.4694, "step": 10656 }, { "epoch": 0.138482854511462, "grad_norm": 0.35065358877182007, "learning_rate": 0.0001723391258009592, "loss": 1.3226, "step": 10657 }, { "epoch": 0.13849584905537787, "grad_norm": 0.2886101305484772, "learning_rate": 0.00017233652633904782, "loss": 1.3109, "step": 10658 }, { "epoch": 0.13850884359929375, "grad_norm": 0.41169869899749756, "learning_rate": 0.00017233392687713642, "loss": 1.572, "step": 10659 }, { "epoch": 0.13852183814320962, "grad_norm": 0.31052806973457336, "learning_rate": 0.00017233132741522507, "loss": 1.3383, "step": 10660 }, { "epoch": 0.1385348326871255, "grad_norm": 0.37719324231147766, "learning_rate": 0.00017232872795331367, "loss": 1.5925, "step": 10661 }, { "epoch": 0.13854782723104136, "grad_norm": 0.4659688174724579, "learning_rate": 0.0001723261284914023, "loss": 1.5769, "step": 10662 }, { "epoch": 0.13856082177495724, "grad_norm": 0.3831879794597626, "learning_rate": 0.0001723235290294909, "loss": 1.3675, "step": 10663 }, { "epoch": 0.1385738163188731, "grad_norm": 0.41098740696907043, "learning_rate": 0.00017232092956757952, "loss": 1.5494, "step": 10664 }, { "epoch": 0.13858681086278898, "grad_norm": 0.33471986651420593, "learning_rate": 0.00017231833010566814, "loss": 1.3501, "step": 10665 }, { "epoch": 0.13859980540670486, "grad_norm": 0.455466628074646, "learning_rate": 0.00017231573064375674, "loss": 1.7076, "step": 10666 }, { "epoch": 0.13861279995062073, "grad_norm": 0.3916175365447998, "learning_rate": 0.00017231313118184536, "loss": 1.5835, "step": 10667 }, { "epoch": 0.1386257944945366, "grad_norm": 0.3723258972167969, "learning_rate": 0.000172310531719934, "loss": 1.406, "step": 10668 }, { "epoch": 0.13863878903845248, "grad_norm": 0.4556010067462921, "learning_rate": 0.00017230793225802259, "loss": 1.5664, "step": 10669 }, { "epoch": 0.13865178358236835, "grad_norm": 0.4294789731502533, "learning_rate": 0.0001723053327961112, "loss": 1.5854, "step": 10670 }, { "epoch": 0.13866477812628422, "grad_norm": 0.4607834815979004, "learning_rate": 0.00017230273333419983, "loss": 1.4694, "step": 10671 }, { "epoch": 0.1386777726702001, "grad_norm": 0.4007158875465393, "learning_rate": 0.00017230013387228846, "loss": 1.3922, "step": 10672 }, { "epoch": 0.13869076721411597, "grad_norm": 0.4190846085548401, "learning_rate": 0.00017229753441037706, "loss": 1.5175, "step": 10673 }, { "epoch": 0.13870376175803184, "grad_norm": 0.4587985873222351, "learning_rate": 0.00017229493494846568, "loss": 1.3897, "step": 10674 }, { "epoch": 0.1387167563019477, "grad_norm": 0.37550240755081177, "learning_rate": 0.0001722923354865543, "loss": 1.4088, "step": 10675 }, { "epoch": 0.1387297508458636, "grad_norm": 0.38696759939193726, "learning_rate": 0.0001722897360246429, "loss": 1.3405, "step": 10676 }, { "epoch": 0.13874274538977946, "grad_norm": 0.35055679082870483, "learning_rate": 0.00017228713656273153, "loss": 1.5386, "step": 10677 }, { "epoch": 0.13875573993369533, "grad_norm": 0.34917086362838745, "learning_rate": 0.00017228453710082012, "loss": 1.3728, "step": 10678 }, { "epoch": 0.1387687344776112, "grad_norm": 0.44074827432632446, "learning_rate": 0.00017228193763890878, "loss": 1.4399, "step": 10679 }, { "epoch": 0.13878172902152708, "grad_norm": 0.347922146320343, "learning_rate": 0.00017227933817699737, "loss": 1.401, "step": 10680 }, { "epoch": 0.13879472356544295, "grad_norm": 0.44942864775657654, "learning_rate": 0.00017227673871508597, "loss": 1.4835, "step": 10681 }, { "epoch": 0.13880771810935882, "grad_norm": 0.44252923130989075, "learning_rate": 0.0001722741392531746, "loss": 1.4024, "step": 10682 }, { "epoch": 0.1388207126532747, "grad_norm": 0.3438055217266083, "learning_rate": 0.00017227153979126322, "loss": 1.5072, "step": 10683 }, { "epoch": 0.13883370719719057, "grad_norm": 0.43290746212005615, "learning_rate": 0.00017226894032935184, "loss": 1.3606, "step": 10684 }, { "epoch": 0.13884670174110644, "grad_norm": 0.4248257279396057, "learning_rate": 0.00017226634086744044, "loss": 1.5866, "step": 10685 }, { "epoch": 0.13885969628502232, "grad_norm": 0.41684690117836, "learning_rate": 0.00017226374140552907, "loss": 1.3661, "step": 10686 }, { "epoch": 0.1388726908289382, "grad_norm": 0.4077261984348297, "learning_rate": 0.0001722611419436177, "loss": 1.2825, "step": 10687 }, { "epoch": 0.13888568537285406, "grad_norm": 0.3705204129219055, "learning_rate": 0.0001722585424817063, "loss": 1.4974, "step": 10688 }, { "epoch": 0.13889867991676993, "grad_norm": 0.5640507340431213, "learning_rate": 0.0001722559430197949, "loss": 1.4143, "step": 10689 }, { "epoch": 0.1389116744606858, "grad_norm": 0.42965078353881836, "learning_rate": 0.0001722533435578835, "loss": 1.6264, "step": 10690 }, { "epoch": 0.13892466900460168, "grad_norm": 0.4460815489292145, "learning_rate": 0.00017225074409597216, "loss": 1.7322, "step": 10691 }, { "epoch": 0.13893766354851755, "grad_norm": 0.35348957777023315, "learning_rate": 0.00017224814463406076, "loss": 1.2189, "step": 10692 }, { "epoch": 0.13895065809243343, "grad_norm": 0.42552369832992554, "learning_rate": 0.00017224554517214938, "loss": 1.4319, "step": 10693 }, { "epoch": 0.1389636526363493, "grad_norm": 0.3813215494155884, "learning_rate": 0.00017224294571023798, "loss": 1.5396, "step": 10694 }, { "epoch": 0.13897664718026517, "grad_norm": 0.43992581963539124, "learning_rate": 0.0001722403462483266, "loss": 1.4858, "step": 10695 }, { "epoch": 0.13898964172418107, "grad_norm": 0.4361250102519989, "learning_rate": 0.00017223774678641523, "loss": 1.5107, "step": 10696 }, { "epoch": 0.13900263626809695, "grad_norm": 0.3931949734687805, "learning_rate": 0.00017223514732450383, "loss": 1.3818, "step": 10697 }, { "epoch": 0.13901563081201282, "grad_norm": 0.35805174708366394, "learning_rate": 0.00017223254786259245, "loss": 1.3802, "step": 10698 }, { "epoch": 0.1390286253559287, "grad_norm": 0.378232479095459, "learning_rate": 0.00017222994840068108, "loss": 1.5178, "step": 10699 }, { "epoch": 0.13904161989984457, "grad_norm": 0.34583643078804016, "learning_rate": 0.00017222734893876967, "loss": 1.3225, "step": 10700 }, { "epoch": 0.13905461444376044, "grad_norm": 0.3947950303554535, "learning_rate": 0.0001722247494768583, "loss": 1.4634, "step": 10701 }, { "epoch": 0.1390676089876763, "grad_norm": 0.4488944113254547, "learning_rate": 0.0001722221500149469, "loss": 1.5424, "step": 10702 }, { "epoch": 0.13908060353159218, "grad_norm": 0.4175129234790802, "learning_rate": 0.00017221955055303555, "loss": 1.4955, "step": 10703 }, { "epoch": 0.13909359807550806, "grad_norm": 0.4612016975879669, "learning_rate": 0.00017221695109112414, "loss": 1.5747, "step": 10704 }, { "epoch": 0.13910659261942393, "grad_norm": 0.3313022255897522, "learning_rate": 0.00017221435162921277, "loss": 1.3595, "step": 10705 }, { "epoch": 0.1391195871633398, "grad_norm": 0.31256556510925293, "learning_rate": 0.0001722117521673014, "loss": 1.2647, "step": 10706 }, { "epoch": 0.13913258170725568, "grad_norm": 0.3766583800315857, "learning_rate": 0.00017220915270539, "loss": 1.5235, "step": 10707 }, { "epoch": 0.13914557625117155, "grad_norm": 0.40331658720970154, "learning_rate": 0.00017220655324347861, "loss": 1.4741, "step": 10708 }, { "epoch": 0.13915857079508742, "grad_norm": 0.4173697233200073, "learning_rate": 0.0001722039537815672, "loss": 1.738, "step": 10709 }, { "epoch": 0.1391715653390033, "grad_norm": 0.4242304861545563, "learning_rate": 0.00017220135431965584, "loss": 1.5726, "step": 10710 }, { "epoch": 0.13918455988291917, "grad_norm": 0.39564964175224304, "learning_rate": 0.00017219875485774446, "loss": 1.5576, "step": 10711 }, { "epoch": 0.13919755442683504, "grad_norm": 0.3382103443145752, "learning_rate": 0.00017219615539583306, "loss": 1.2134, "step": 10712 }, { "epoch": 0.1392105489707509, "grad_norm": 0.4230686128139496, "learning_rate": 0.00017219355593392168, "loss": 1.6145, "step": 10713 }, { "epoch": 0.1392235435146668, "grad_norm": 0.34732839465141296, "learning_rate": 0.0001721909564720103, "loss": 1.4446, "step": 10714 }, { "epoch": 0.13923653805858266, "grad_norm": 0.4478197991847992, "learning_rate": 0.00017218835701009893, "loss": 1.4869, "step": 10715 }, { "epoch": 0.13924953260249853, "grad_norm": 0.35791486501693726, "learning_rate": 0.00017218575754818753, "loss": 1.256, "step": 10716 }, { "epoch": 0.1392625271464144, "grad_norm": 0.3732423782348633, "learning_rate": 0.00017218315808627615, "loss": 1.4686, "step": 10717 }, { "epoch": 0.13927552169033028, "grad_norm": 0.3404478132724762, "learning_rate": 0.00017218055862436478, "loss": 1.479, "step": 10718 }, { "epoch": 0.13928851623424615, "grad_norm": 0.4723266065120697, "learning_rate": 0.00017217795916245338, "loss": 1.6898, "step": 10719 }, { "epoch": 0.13930151077816202, "grad_norm": 0.3965282440185547, "learning_rate": 0.000172175359700542, "loss": 1.4205, "step": 10720 }, { "epoch": 0.1393145053220779, "grad_norm": 0.5039243102073669, "learning_rate": 0.0001721727602386306, "loss": 1.4879, "step": 10721 }, { "epoch": 0.13932749986599377, "grad_norm": 0.4013881981372833, "learning_rate": 0.00017217016077671925, "loss": 1.4122, "step": 10722 }, { "epoch": 0.13934049440990964, "grad_norm": 0.33086255192756653, "learning_rate": 0.00017216756131480785, "loss": 1.2714, "step": 10723 }, { "epoch": 0.13935348895382552, "grad_norm": 0.34979256987571716, "learning_rate": 0.00017216496185289644, "loss": 1.3973, "step": 10724 }, { "epoch": 0.1393664834977414, "grad_norm": 0.8638617992401123, "learning_rate": 0.00017216236239098507, "loss": 1.5524, "step": 10725 }, { "epoch": 0.13937947804165726, "grad_norm": 0.41470256447792053, "learning_rate": 0.0001721597629290737, "loss": 1.4213, "step": 10726 }, { "epoch": 0.13939247258557314, "grad_norm": 0.4871446490287781, "learning_rate": 0.00017215716346716232, "loss": 1.4645, "step": 10727 }, { "epoch": 0.139405467129489, "grad_norm": 0.35828638076782227, "learning_rate": 0.00017215456400525091, "loss": 1.4868, "step": 10728 }, { "epoch": 0.13941846167340488, "grad_norm": 0.39944687485694885, "learning_rate": 0.00017215196454333954, "loss": 1.4677, "step": 10729 }, { "epoch": 0.13943145621732075, "grad_norm": 0.3520928919315338, "learning_rate": 0.00017214936508142816, "loss": 1.2735, "step": 10730 }, { "epoch": 0.13944445076123663, "grad_norm": 0.39557158946990967, "learning_rate": 0.00017214676561951676, "loss": 1.2845, "step": 10731 }, { "epoch": 0.1394574453051525, "grad_norm": 0.3717704713344574, "learning_rate": 0.00017214416615760539, "loss": 1.5166, "step": 10732 }, { "epoch": 0.13947043984906837, "grad_norm": 0.4318528175354004, "learning_rate": 0.00017214156669569398, "loss": 1.5827, "step": 10733 }, { "epoch": 0.13948343439298425, "grad_norm": 0.3730836510658264, "learning_rate": 0.00017213896723378263, "loss": 1.3915, "step": 10734 }, { "epoch": 0.13949642893690012, "grad_norm": 0.47108346223831177, "learning_rate": 0.00017213636777187123, "loss": 1.2788, "step": 10735 }, { "epoch": 0.139509423480816, "grad_norm": 0.4092840850353241, "learning_rate": 0.00017213376830995983, "loss": 1.4124, "step": 10736 }, { "epoch": 0.13952241802473186, "grad_norm": 0.46748873591423035, "learning_rate": 0.00017213116884804845, "loss": 1.5797, "step": 10737 }, { "epoch": 0.13953541256864774, "grad_norm": 0.4005483090877533, "learning_rate": 0.00017212856938613708, "loss": 1.5289, "step": 10738 }, { "epoch": 0.1395484071125636, "grad_norm": 0.41816186904907227, "learning_rate": 0.0001721259699242257, "loss": 1.3852, "step": 10739 }, { "epoch": 0.13956140165647948, "grad_norm": 0.2817828357219696, "learning_rate": 0.0001721233704623143, "loss": 1.4576, "step": 10740 }, { "epoch": 0.13957439620039536, "grad_norm": 0.398346483707428, "learning_rate": 0.00017212077100040292, "loss": 1.2757, "step": 10741 }, { "epoch": 0.13958739074431123, "grad_norm": 0.4326547682285309, "learning_rate": 0.00017211817153849155, "loss": 1.3765, "step": 10742 }, { "epoch": 0.1396003852882271, "grad_norm": 0.4393986463546753, "learning_rate": 0.00017211557207658015, "loss": 1.6248, "step": 10743 }, { "epoch": 0.13961337983214298, "grad_norm": 0.38088586926460266, "learning_rate": 0.00017211297261466877, "loss": 1.2487, "step": 10744 }, { "epoch": 0.13962637437605885, "grad_norm": 0.4307241439819336, "learning_rate": 0.0001721103731527574, "loss": 1.4015, "step": 10745 }, { "epoch": 0.13963936891997472, "grad_norm": 0.43914666771888733, "learning_rate": 0.00017210777369084602, "loss": 1.4995, "step": 10746 }, { "epoch": 0.1396523634638906, "grad_norm": 0.4074976444244385, "learning_rate": 0.00017210517422893462, "loss": 1.5153, "step": 10747 }, { "epoch": 0.13966535800780647, "grad_norm": 0.40763652324676514, "learning_rate": 0.00017210257476702321, "loss": 1.4273, "step": 10748 }, { "epoch": 0.13967835255172234, "grad_norm": 0.4152686297893524, "learning_rate": 0.00017209997530511187, "loss": 1.4594, "step": 10749 }, { "epoch": 0.1396913470956382, "grad_norm": 0.2346455156803131, "learning_rate": 0.00017209737584320046, "loss": 1.4827, "step": 10750 }, { "epoch": 0.13970434163955409, "grad_norm": 0.39084431529045105, "learning_rate": 0.0001720947763812891, "loss": 1.6069, "step": 10751 }, { "epoch": 0.13971733618346996, "grad_norm": 0.4696912169456482, "learning_rate": 0.00017209217691937769, "loss": 1.4265, "step": 10752 }, { "epoch": 0.13973033072738583, "grad_norm": 0.357042133808136, "learning_rate": 0.0001720895774574663, "loss": 1.3044, "step": 10753 }, { "epoch": 0.1397433252713017, "grad_norm": 0.4950782358646393, "learning_rate": 0.00017208697799555493, "loss": 1.4042, "step": 10754 }, { "epoch": 0.13975631981521758, "grad_norm": 0.4539402425289154, "learning_rate": 0.00017208437853364353, "loss": 1.5494, "step": 10755 }, { "epoch": 0.13976931435913345, "grad_norm": 0.39369234442710876, "learning_rate": 0.00017208177907173216, "loss": 1.2591, "step": 10756 }, { "epoch": 0.13978230890304932, "grad_norm": 0.3353974223136902, "learning_rate": 0.00017207917960982078, "loss": 1.3499, "step": 10757 }, { "epoch": 0.1397953034469652, "grad_norm": 0.39832958579063416, "learning_rate": 0.0001720765801479094, "loss": 1.4912, "step": 10758 }, { "epoch": 0.13980829799088107, "grad_norm": 0.33993783593177795, "learning_rate": 0.000172073980685998, "loss": 1.2443, "step": 10759 }, { "epoch": 0.13982129253479694, "grad_norm": 0.30756813287734985, "learning_rate": 0.00017207138122408663, "loss": 1.2784, "step": 10760 }, { "epoch": 0.13983428707871282, "grad_norm": 0.4411168694496155, "learning_rate": 0.00017206878176217525, "loss": 1.4152, "step": 10761 }, { "epoch": 0.1398472816226287, "grad_norm": 0.40573054552078247, "learning_rate": 0.00017206618230026385, "loss": 1.4497, "step": 10762 }, { "epoch": 0.13986027616654456, "grad_norm": 0.352725625038147, "learning_rate": 0.00017206358283835247, "loss": 1.4119, "step": 10763 }, { "epoch": 0.13987327071046043, "grad_norm": 0.3691973090171814, "learning_rate": 0.00017206098337644107, "loss": 1.4265, "step": 10764 }, { "epoch": 0.1398862652543763, "grad_norm": 0.36644649505615234, "learning_rate": 0.0001720583839145297, "loss": 1.276, "step": 10765 }, { "epoch": 0.13989925979829218, "grad_norm": 0.40595853328704834, "learning_rate": 0.00017205578445261832, "loss": 1.5091, "step": 10766 }, { "epoch": 0.13991225434220805, "grad_norm": 0.39276009798049927, "learning_rate": 0.00017205318499070692, "loss": 1.3175, "step": 10767 }, { "epoch": 0.13992524888612393, "grad_norm": 0.5398718118667603, "learning_rate": 0.00017205058552879554, "loss": 1.3301, "step": 10768 }, { "epoch": 0.1399382434300398, "grad_norm": 0.38218799233436584, "learning_rate": 0.00017204798606688417, "loss": 1.3951, "step": 10769 }, { "epoch": 0.13995123797395567, "grad_norm": 0.428398460149765, "learning_rate": 0.0001720453866049728, "loss": 1.553, "step": 10770 }, { "epoch": 0.13996423251787155, "grad_norm": 0.4070543944835663, "learning_rate": 0.0001720427871430614, "loss": 1.4543, "step": 10771 }, { "epoch": 0.13997722706178742, "grad_norm": 0.465413361787796, "learning_rate": 0.00017204018768115, "loss": 1.2245, "step": 10772 }, { "epoch": 0.13999022160570332, "grad_norm": 0.3781396746635437, "learning_rate": 0.00017203758821923864, "loss": 1.488, "step": 10773 }, { "epoch": 0.1400032161496192, "grad_norm": 0.3066791892051697, "learning_rate": 0.00017203498875732723, "loss": 1.6712, "step": 10774 }, { "epoch": 0.14001621069353506, "grad_norm": 0.3620041310787201, "learning_rate": 0.00017203238929541586, "loss": 1.4716, "step": 10775 }, { "epoch": 0.14002920523745094, "grad_norm": 0.4115479290485382, "learning_rate": 0.00017202978983350446, "loss": 1.3786, "step": 10776 }, { "epoch": 0.1400421997813668, "grad_norm": 0.3244473934173584, "learning_rate": 0.0001720271903715931, "loss": 1.3423, "step": 10777 }, { "epoch": 0.14005519432528268, "grad_norm": 0.4513790011405945, "learning_rate": 0.0001720245909096817, "loss": 1.3319, "step": 10778 }, { "epoch": 0.14006818886919856, "grad_norm": 0.35393035411834717, "learning_rate": 0.0001720219914477703, "loss": 1.3531, "step": 10779 }, { "epoch": 0.14008118341311443, "grad_norm": 0.36377277970314026, "learning_rate": 0.00017201939198585895, "loss": 1.3916, "step": 10780 }, { "epoch": 0.1400941779570303, "grad_norm": 0.4200163185596466, "learning_rate": 0.00017201679252394755, "loss": 1.3245, "step": 10781 }, { "epoch": 0.14010717250094618, "grad_norm": 0.3571647107601166, "learning_rate": 0.00017201419306203618, "loss": 1.3705, "step": 10782 }, { "epoch": 0.14012016704486205, "grad_norm": 0.4349687695503235, "learning_rate": 0.00017201159360012477, "loss": 1.4128, "step": 10783 }, { "epoch": 0.14013316158877792, "grad_norm": 0.41650599241256714, "learning_rate": 0.0001720089941382134, "loss": 1.3617, "step": 10784 }, { "epoch": 0.1401461561326938, "grad_norm": 0.4022287428379059, "learning_rate": 0.00017200639467630202, "loss": 1.4494, "step": 10785 }, { "epoch": 0.14015915067660967, "grad_norm": 0.33680790662765503, "learning_rate": 0.00017200379521439062, "loss": 1.5274, "step": 10786 }, { "epoch": 0.14017214522052554, "grad_norm": 0.46293941140174866, "learning_rate": 0.00017200119575247924, "loss": 1.2085, "step": 10787 }, { "epoch": 0.1401851397644414, "grad_norm": 0.3471514582633972, "learning_rate": 0.00017199859629056787, "loss": 1.5912, "step": 10788 }, { "epoch": 0.1401981343083573, "grad_norm": 0.32524195313453674, "learning_rate": 0.0001719959968286565, "loss": 1.4603, "step": 10789 }, { "epoch": 0.14021112885227316, "grad_norm": 0.360619455575943, "learning_rate": 0.0001719933973667451, "loss": 1.3013, "step": 10790 }, { "epoch": 0.14022412339618903, "grad_norm": 0.43202275037765503, "learning_rate": 0.0001719907979048337, "loss": 1.4518, "step": 10791 }, { "epoch": 0.1402371179401049, "grad_norm": 0.42665886878967285, "learning_rate": 0.00017198819844292234, "loss": 1.4011, "step": 10792 }, { "epoch": 0.14025011248402078, "grad_norm": 0.3868856728076935, "learning_rate": 0.00017198559898101094, "loss": 1.3267, "step": 10793 }, { "epoch": 0.14026310702793665, "grad_norm": 0.3203757107257843, "learning_rate": 0.00017198299951909956, "loss": 1.4568, "step": 10794 }, { "epoch": 0.14027610157185252, "grad_norm": 0.35420846939086914, "learning_rate": 0.00017198040005718816, "loss": 1.5337, "step": 10795 }, { "epoch": 0.1402890961157684, "grad_norm": 0.4244528114795685, "learning_rate": 0.00017197780059527678, "loss": 1.3677, "step": 10796 }, { "epoch": 0.14030209065968427, "grad_norm": 0.35529932379722595, "learning_rate": 0.0001719752011333654, "loss": 1.3323, "step": 10797 }, { "epoch": 0.14031508520360014, "grad_norm": 0.37506797909736633, "learning_rate": 0.000171972601671454, "loss": 1.4979, "step": 10798 }, { "epoch": 0.14032807974751602, "grad_norm": 0.32244589924812317, "learning_rate": 0.00017197000220954263, "loss": 1.5285, "step": 10799 }, { "epoch": 0.1403410742914319, "grad_norm": 0.3708314597606659, "learning_rate": 0.00017196740274763125, "loss": 1.2533, "step": 10800 }, { "epoch": 0.14035406883534776, "grad_norm": 0.3478354513645172, "learning_rate": 0.00017196480328571988, "loss": 1.4054, "step": 10801 }, { "epoch": 0.14036706337926363, "grad_norm": 0.3617178201675415, "learning_rate": 0.00017196220382380848, "loss": 1.2874, "step": 10802 }, { "epoch": 0.1403800579231795, "grad_norm": 0.3668794631958008, "learning_rate": 0.00017195960436189707, "loss": 1.4735, "step": 10803 }, { "epoch": 0.14039305246709538, "grad_norm": 0.39561164379119873, "learning_rate": 0.00017195700489998572, "loss": 1.1695, "step": 10804 }, { "epoch": 0.14040604701101125, "grad_norm": 0.45432788133621216, "learning_rate": 0.00017195440543807432, "loss": 1.4439, "step": 10805 }, { "epoch": 0.14041904155492713, "grad_norm": 0.28599485754966736, "learning_rate": 0.00017195180597616295, "loss": 1.4012, "step": 10806 }, { "epoch": 0.140432036098843, "grad_norm": 0.375226229429245, "learning_rate": 0.00017194920651425154, "loss": 1.2792, "step": 10807 }, { "epoch": 0.14044503064275887, "grad_norm": 0.3512086272239685, "learning_rate": 0.00017194660705234017, "loss": 1.505, "step": 10808 }, { "epoch": 0.14045802518667475, "grad_norm": 0.5066471695899963, "learning_rate": 0.0001719440075904288, "loss": 1.5636, "step": 10809 }, { "epoch": 0.14047101973059062, "grad_norm": 0.42726922035217285, "learning_rate": 0.0001719414081285174, "loss": 1.5015, "step": 10810 }, { "epoch": 0.1404840142745065, "grad_norm": 0.3772279620170593, "learning_rate": 0.00017193880866660602, "loss": 1.3784, "step": 10811 }, { "epoch": 0.14049700881842236, "grad_norm": 0.38701918721199036, "learning_rate": 0.00017193620920469464, "loss": 1.3881, "step": 10812 }, { "epoch": 0.14051000336233824, "grad_norm": 0.29993683099746704, "learning_rate": 0.00017193360974278326, "loss": 1.4011, "step": 10813 }, { "epoch": 0.1405229979062541, "grad_norm": 0.45598283410072327, "learning_rate": 0.00017193101028087186, "loss": 1.4538, "step": 10814 }, { "epoch": 0.14053599245016998, "grad_norm": 0.6004972457885742, "learning_rate": 0.00017192841081896049, "loss": 1.2832, "step": 10815 }, { "epoch": 0.14054898699408586, "grad_norm": 0.2675066888332367, "learning_rate": 0.0001719258113570491, "loss": 1.3657, "step": 10816 }, { "epoch": 0.14056198153800173, "grad_norm": 0.3949751853942871, "learning_rate": 0.0001719232118951377, "loss": 1.4446, "step": 10817 }, { "epoch": 0.1405749760819176, "grad_norm": 0.41369563341140747, "learning_rate": 0.00017192061243322633, "loss": 1.3497, "step": 10818 }, { "epoch": 0.14058797062583347, "grad_norm": 0.4654668867588043, "learning_rate": 0.00017191801297131496, "loss": 1.6589, "step": 10819 }, { "epoch": 0.14060096516974935, "grad_norm": 0.39784547686576843, "learning_rate": 0.00017191541350940355, "loss": 1.4265, "step": 10820 }, { "epoch": 0.14061395971366522, "grad_norm": 0.421874076128006, "learning_rate": 0.00017191281404749218, "loss": 1.6356, "step": 10821 }, { "epoch": 0.1406269542575811, "grad_norm": 0.3859943449497223, "learning_rate": 0.00017191021458558078, "loss": 1.2623, "step": 10822 }, { "epoch": 0.14063994880149697, "grad_norm": 0.3922678828239441, "learning_rate": 0.00017190761512366943, "loss": 1.3465, "step": 10823 }, { "epoch": 0.14065294334541284, "grad_norm": 0.39367225766181946, "learning_rate": 0.00017190501566175802, "loss": 1.5162, "step": 10824 }, { "epoch": 0.1406659378893287, "grad_norm": 0.3528103530406952, "learning_rate": 0.00017190241619984665, "loss": 1.2572, "step": 10825 }, { "epoch": 0.14067893243324459, "grad_norm": 0.38575342297554016, "learning_rate": 0.00017189981673793525, "loss": 1.5632, "step": 10826 }, { "epoch": 0.14069192697716046, "grad_norm": 0.44456741213798523, "learning_rate": 0.00017189721727602387, "loss": 1.3928, "step": 10827 }, { "epoch": 0.14070492152107633, "grad_norm": 0.4356706142425537, "learning_rate": 0.0001718946178141125, "loss": 1.5353, "step": 10828 }, { "epoch": 0.1407179160649922, "grad_norm": 0.5134885311126709, "learning_rate": 0.0001718920183522011, "loss": 1.6023, "step": 10829 }, { "epoch": 0.14073091060890808, "grad_norm": 0.4360247254371643, "learning_rate": 0.00017188941889028972, "loss": 1.4886, "step": 10830 }, { "epoch": 0.14074390515282395, "grad_norm": 0.3940284550189972, "learning_rate": 0.00017188681942837834, "loss": 1.2592, "step": 10831 }, { "epoch": 0.14075689969673982, "grad_norm": 0.4224902093410492, "learning_rate": 0.00017188421996646694, "loss": 1.4618, "step": 10832 }, { "epoch": 0.1407698942406557, "grad_norm": 0.3279314935207367, "learning_rate": 0.00017188162050455556, "loss": 1.4254, "step": 10833 }, { "epoch": 0.14078288878457157, "grad_norm": 0.36889636516571045, "learning_rate": 0.00017187902104264416, "loss": 1.3752, "step": 10834 }, { "epoch": 0.14079588332848744, "grad_norm": 0.38372352719306946, "learning_rate": 0.0001718764215807328, "loss": 1.5421, "step": 10835 }, { "epoch": 0.14080887787240332, "grad_norm": 0.4109812080860138, "learning_rate": 0.0001718738221188214, "loss": 1.2519, "step": 10836 }, { "epoch": 0.1408218724163192, "grad_norm": 0.3611900210380554, "learning_rate": 0.00017187122265691003, "loss": 1.5434, "step": 10837 }, { "epoch": 0.14083486696023506, "grad_norm": 0.3826359212398529, "learning_rate": 0.00017186862319499863, "loss": 1.2481, "step": 10838 }, { "epoch": 0.14084786150415093, "grad_norm": 0.5015074610710144, "learning_rate": 0.00017186602373308726, "loss": 1.3544, "step": 10839 }, { "epoch": 0.1408608560480668, "grad_norm": 0.39184704422950745, "learning_rate": 0.00017186342427117588, "loss": 1.3709, "step": 10840 }, { "epoch": 0.14087385059198268, "grad_norm": 0.46706393361091614, "learning_rate": 0.00017186082480926448, "loss": 1.45, "step": 10841 }, { "epoch": 0.14088684513589855, "grad_norm": 0.43448421359062195, "learning_rate": 0.0001718582253473531, "loss": 1.5921, "step": 10842 }, { "epoch": 0.14089983967981443, "grad_norm": 0.4248597025871277, "learning_rate": 0.00017185562588544173, "loss": 1.5616, "step": 10843 }, { "epoch": 0.1409128342237303, "grad_norm": 0.39774712920188904, "learning_rate": 0.00017185302642353035, "loss": 1.4397, "step": 10844 }, { "epoch": 0.14092582876764617, "grad_norm": 0.3106837868690491, "learning_rate": 0.00017185042696161895, "loss": 1.4959, "step": 10845 }, { "epoch": 0.14093882331156204, "grad_norm": 0.45596882700920105, "learning_rate": 0.00017184782749970755, "loss": 1.3741, "step": 10846 }, { "epoch": 0.14095181785547792, "grad_norm": 0.4356490969657898, "learning_rate": 0.0001718452280377962, "loss": 1.3784, "step": 10847 }, { "epoch": 0.1409648123993938, "grad_norm": 0.3726494014263153, "learning_rate": 0.0001718426285758848, "loss": 1.5095, "step": 10848 }, { "epoch": 0.1409778069433097, "grad_norm": 0.4151771366596222, "learning_rate": 0.00017184002911397342, "loss": 1.2665, "step": 10849 }, { "epoch": 0.14099080148722556, "grad_norm": 0.32824963331222534, "learning_rate": 0.00017183742965206202, "loss": 1.3579, "step": 10850 }, { "epoch": 0.14100379603114144, "grad_norm": 0.3854600191116333, "learning_rate": 0.00017183483019015064, "loss": 1.5345, "step": 10851 }, { "epoch": 0.1410167905750573, "grad_norm": 0.32627803087234497, "learning_rate": 0.00017183223072823927, "loss": 1.3422, "step": 10852 }, { "epoch": 0.14102978511897318, "grad_norm": 0.4469495117664337, "learning_rate": 0.00017182963126632786, "loss": 1.4616, "step": 10853 }, { "epoch": 0.14104277966288906, "grad_norm": 0.29169851541519165, "learning_rate": 0.00017182703180441652, "loss": 1.3603, "step": 10854 }, { "epoch": 0.14105577420680493, "grad_norm": 0.29822713136672974, "learning_rate": 0.0001718244323425051, "loss": 1.4908, "step": 10855 }, { "epoch": 0.1410687687507208, "grad_norm": 0.3460671901702881, "learning_rate": 0.00017182183288059374, "loss": 1.1356, "step": 10856 }, { "epoch": 0.14108176329463668, "grad_norm": 0.4160926342010498, "learning_rate": 0.00017181923341868233, "loss": 1.403, "step": 10857 }, { "epoch": 0.14109475783855255, "grad_norm": 0.42342308163642883, "learning_rate": 0.00017181663395677096, "loss": 1.5801, "step": 10858 }, { "epoch": 0.14110775238246842, "grad_norm": 0.42777422070503235, "learning_rate": 0.00017181403449485958, "loss": 1.4779, "step": 10859 }, { "epoch": 0.1411207469263843, "grad_norm": 0.4467618763446808, "learning_rate": 0.00017181143503294818, "loss": 1.5562, "step": 10860 }, { "epoch": 0.14113374147030017, "grad_norm": 0.3858185112476349, "learning_rate": 0.0001718088355710368, "loss": 1.5842, "step": 10861 }, { "epoch": 0.14114673601421604, "grad_norm": 0.39428919553756714, "learning_rate": 0.00017180623610912543, "loss": 1.5314, "step": 10862 }, { "epoch": 0.1411597305581319, "grad_norm": 0.335600882768631, "learning_rate": 0.00017180363664721403, "loss": 1.6467, "step": 10863 }, { "epoch": 0.14117272510204779, "grad_norm": 0.5017620921134949, "learning_rate": 0.00017180103718530265, "loss": 1.414, "step": 10864 }, { "epoch": 0.14118571964596366, "grad_norm": 0.4224272668361664, "learning_rate": 0.00017179843772339125, "loss": 1.5467, "step": 10865 }, { "epoch": 0.14119871418987953, "grad_norm": 0.39809706807136536, "learning_rate": 0.0001717958382614799, "loss": 1.4449, "step": 10866 }, { "epoch": 0.1412117087337954, "grad_norm": 0.2664085030555725, "learning_rate": 0.0001717932387995685, "loss": 1.2187, "step": 10867 }, { "epoch": 0.14122470327771128, "grad_norm": 0.36554619669914246, "learning_rate": 0.00017179063933765712, "loss": 1.3219, "step": 10868 }, { "epoch": 0.14123769782162715, "grad_norm": 0.4317478835582733, "learning_rate": 0.00017178803987574572, "loss": 1.4364, "step": 10869 }, { "epoch": 0.14125069236554302, "grad_norm": 0.46758297085762024, "learning_rate": 0.00017178544041383434, "loss": 1.5511, "step": 10870 }, { "epoch": 0.1412636869094589, "grad_norm": 0.3566475212574005, "learning_rate": 0.00017178284095192297, "loss": 1.048, "step": 10871 }, { "epoch": 0.14127668145337477, "grad_norm": 0.42115387320518494, "learning_rate": 0.00017178024149001157, "loss": 1.4822, "step": 10872 }, { "epoch": 0.14128967599729064, "grad_norm": 0.4404464662075043, "learning_rate": 0.0001717776420281002, "loss": 1.5487, "step": 10873 }, { "epoch": 0.14130267054120652, "grad_norm": 0.3825858235359192, "learning_rate": 0.00017177504256618882, "loss": 1.3018, "step": 10874 }, { "epoch": 0.1413156650851224, "grad_norm": 0.3493032455444336, "learning_rate": 0.0001717724431042774, "loss": 1.4301, "step": 10875 }, { "epoch": 0.14132865962903826, "grad_norm": 0.44915756583213806, "learning_rate": 0.00017176984364236604, "loss": 1.6921, "step": 10876 }, { "epoch": 0.14134165417295413, "grad_norm": 0.3446107506752014, "learning_rate": 0.00017176724418045463, "loss": 1.3504, "step": 10877 }, { "epoch": 0.14135464871687, "grad_norm": 0.31326380372047424, "learning_rate": 0.00017176464471854329, "loss": 1.5635, "step": 10878 }, { "epoch": 0.14136764326078588, "grad_norm": 0.46126633882522583, "learning_rate": 0.00017176204525663188, "loss": 1.4412, "step": 10879 }, { "epoch": 0.14138063780470175, "grad_norm": 0.40074828267097473, "learning_rate": 0.0001717594457947205, "loss": 1.3619, "step": 10880 }, { "epoch": 0.14139363234861763, "grad_norm": 0.3470706045627594, "learning_rate": 0.0001717568463328091, "loss": 1.4544, "step": 10881 }, { "epoch": 0.1414066268925335, "grad_norm": 0.3700906038284302, "learning_rate": 0.00017175424687089773, "loss": 1.2816, "step": 10882 }, { "epoch": 0.14141962143644937, "grad_norm": 0.41927361488342285, "learning_rate": 0.00017175164740898635, "loss": 1.4843, "step": 10883 }, { "epoch": 0.14143261598036524, "grad_norm": 0.38305652141571045, "learning_rate": 0.00017174904794707495, "loss": 1.3507, "step": 10884 }, { "epoch": 0.14144561052428112, "grad_norm": 0.4511977732181549, "learning_rate": 0.00017174644848516358, "loss": 1.3806, "step": 10885 }, { "epoch": 0.141458605068197, "grad_norm": 0.3517056107521057, "learning_rate": 0.0001717438490232522, "loss": 1.3782, "step": 10886 }, { "epoch": 0.14147159961211286, "grad_norm": 0.35727646946907043, "learning_rate": 0.0001717412495613408, "loss": 1.1464, "step": 10887 }, { "epoch": 0.14148459415602874, "grad_norm": 0.3452603816986084, "learning_rate": 0.00017173865009942942, "loss": 1.3215, "step": 10888 }, { "epoch": 0.1414975886999446, "grad_norm": 0.4418522119522095, "learning_rate": 0.00017173605063751802, "loss": 1.4409, "step": 10889 }, { "epoch": 0.14151058324386048, "grad_norm": 0.47208675742149353, "learning_rate": 0.00017173345117560667, "loss": 1.4753, "step": 10890 }, { "epoch": 0.14152357778777636, "grad_norm": 0.4535081684589386, "learning_rate": 0.00017173085171369527, "loss": 1.3373, "step": 10891 }, { "epoch": 0.14153657233169223, "grad_norm": 0.323885977268219, "learning_rate": 0.0001717282522517839, "loss": 1.2169, "step": 10892 }, { "epoch": 0.1415495668756081, "grad_norm": 0.4445568919181824, "learning_rate": 0.00017172565278987252, "loss": 1.4632, "step": 10893 }, { "epoch": 0.14156256141952397, "grad_norm": 0.4691617488861084, "learning_rate": 0.00017172305332796112, "loss": 1.5746, "step": 10894 }, { "epoch": 0.14157555596343985, "grad_norm": 0.33492690324783325, "learning_rate": 0.00017172045386604974, "loss": 1.2249, "step": 10895 }, { "epoch": 0.14158855050735572, "grad_norm": 0.4575447738170624, "learning_rate": 0.00017171785440413834, "loss": 1.44, "step": 10896 }, { "epoch": 0.1416015450512716, "grad_norm": 0.4821552038192749, "learning_rate": 0.000171715254942227, "loss": 1.5711, "step": 10897 }, { "epoch": 0.14161453959518747, "grad_norm": 0.4753732681274414, "learning_rate": 0.00017171265548031559, "loss": 1.5572, "step": 10898 }, { "epoch": 0.14162753413910334, "grad_norm": 0.3616781532764435, "learning_rate": 0.0001717100560184042, "loss": 1.5594, "step": 10899 }, { "epoch": 0.1416405286830192, "grad_norm": 0.42938709259033203, "learning_rate": 0.0001717074565564928, "loss": 1.3992, "step": 10900 }, { "epoch": 0.14165352322693509, "grad_norm": 0.44628751277923584, "learning_rate": 0.00017170485709458143, "loss": 1.5046, "step": 10901 }, { "epoch": 0.14166651777085096, "grad_norm": 0.4351179003715515, "learning_rate": 0.00017170225763267006, "loss": 1.6107, "step": 10902 }, { "epoch": 0.14167951231476683, "grad_norm": 0.4173390865325928, "learning_rate": 0.00017169965817075865, "loss": 1.377, "step": 10903 }, { "epoch": 0.1416925068586827, "grad_norm": 0.39643236994743347, "learning_rate": 0.00017169705870884728, "loss": 1.5887, "step": 10904 }, { "epoch": 0.14170550140259858, "grad_norm": 0.38675668835639954, "learning_rate": 0.0001716944592469359, "loss": 1.6578, "step": 10905 }, { "epoch": 0.14171849594651445, "grad_norm": 0.4231336712837219, "learning_rate": 0.0001716918597850245, "loss": 1.5683, "step": 10906 }, { "epoch": 0.14173149049043032, "grad_norm": 0.3367353677749634, "learning_rate": 0.00017168926032311313, "loss": 1.4348, "step": 10907 }, { "epoch": 0.1417444850343462, "grad_norm": 0.3846409320831299, "learning_rate": 0.00017168666086120172, "loss": 1.5294, "step": 10908 }, { "epoch": 0.14175747957826207, "grad_norm": 0.46416211128234863, "learning_rate": 0.00017168406139929037, "loss": 1.3925, "step": 10909 }, { "epoch": 0.14177047412217794, "grad_norm": 0.28698790073394775, "learning_rate": 0.00017168146193737897, "loss": 1.373, "step": 10910 }, { "epoch": 0.14178346866609381, "grad_norm": 0.4230034649372101, "learning_rate": 0.0001716788624754676, "loss": 1.632, "step": 10911 }, { "epoch": 0.1417964632100097, "grad_norm": 0.4080103039741516, "learning_rate": 0.0001716762630135562, "loss": 1.5997, "step": 10912 }, { "epoch": 0.14180945775392556, "grad_norm": 0.3559096157550812, "learning_rate": 0.00017167366355164482, "loss": 1.3581, "step": 10913 }, { "epoch": 0.14182245229784143, "grad_norm": 0.4752035140991211, "learning_rate": 0.00017167106408973344, "loss": 1.6462, "step": 10914 }, { "epoch": 0.1418354468417573, "grad_norm": 0.45039257407188416, "learning_rate": 0.00017166846462782204, "loss": 1.3962, "step": 10915 }, { "epoch": 0.14184844138567318, "grad_norm": 0.46960341930389404, "learning_rate": 0.00017166586516591066, "loss": 1.5447, "step": 10916 }, { "epoch": 0.14186143592958905, "grad_norm": 0.5272099375724792, "learning_rate": 0.0001716632657039993, "loss": 1.2417, "step": 10917 }, { "epoch": 0.14187443047350493, "grad_norm": 0.3738939166069031, "learning_rate": 0.00017166066624208789, "loss": 1.5318, "step": 10918 }, { "epoch": 0.1418874250174208, "grad_norm": 0.3941914737224579, "learning_rate": 0.0001716580667801765, "loss": 1.4321, "step": 10919 }, { "epoch": 0.14190041956133667, "grad_norm": 0.39023685455322266, "learning_rate": 0.0001716554673182651, "loss": 1.3207, "step": 10920 }, { "epoch": 0.14191341410525254, "grad_norm": 0.40618133544921875, "learning_rate": 0.00017165286785635376, "loss": 1.5365, "step": 10921 }, { "epoch": 0.14192640864916842, "grad_norm": 0.4446902573108673, "learning_rate": 0.00017165026839444236, "loss": 1.5207, "step": 10922 }, { "epoch": 0.1419394031930843, "grad_norm": 0.35658472776412964, "learning_rate": 0.00017164766893253098, "loss": 1.4366, "step": 10923 }, { "epoch": 0.14195239773700016, "grad_norm": 0.4167204201221466, "learning_rate": 0.00017164506947061958, "loss": 1.4769, "step": 10924 }, { "epoch": 0.14196539228091606, "grad_norm": 0.3677447438240051, "learning_rate": 0.0001716424700087082, "loss": 1.5478, "step": 10925 }, { "epoch": 0.14197838682483194, "grad_norm": 0.39147135615348816, "learning_rate": 0.00017163987054679683, "loss": 1.3728, "step": 10926 }, { "epoch": 0.1419913813687478, "grad_norm": 0.5226168632507324, "learning_rate": 0.00017163727108488543, "loss": 1.3327, "step": 10927 }, { "epoch": 0.14200437591266368, "grad_norm": 0.5036211609840393, "learning_rate": 0.00017163467162297408, "loss": 1.3846, "step": 10928 }, { "epoch": 0.14201737045657956, "grad_norm": 0.41347211599349976, "learning_rate": 0.00017163207216106267, "loss": 1.5415, "step": 10929 }, { "epoch": 0.14203036500049543, "grad_norm": 0.4115000367164612, "learning_rate": 0.00017162947269915127, "loss": 1.4409, "step": 10930 }, { "epoch": 0.1420433595444113, "grad_norm": 0.27023792266845703, "learning_rate": 0.0001716268732372399, "loss": 1.2019, "step": 10931 }, { "epoch": 0.14205635408832717, "grad_norm": 0.42166751623153687, "learning_rate": 0.00017162427377532852, "loss": 1.4693, "step": 10932 }, { "epoch": 0.14206934863224305, "grad_norm": 0.33947116136550903, "learning_rate": 0.00017162167431341714, "loss": 1.3955, "step": 10933 }, { "epoch": 0.14208234317615892, "grad_norm": 0.4271692633628845, "learning_rate": 0.00017161907485150574, "loss": 1.4593, "step": 10934 }, { "epoch": 0.1420953377200748, "grad_norm": 0.4756326973438263, "learning_rate": 0.00017161647538959437, "loss": 1.3656, "step": 10935 }, { "epoch": 0.14210833226399067, "grad_norm": 0.3282777667045593, "learning_rate": 0.000171613875927683, "loss": 1.2668, "step": 10936 }, { "epoch": 0.14212132680790654, "grad_norm": 0.2732298672199249, "learning_rate": 0.0001716112764657716, "loss": 1.3854, "step": 10937 }, { "epoch": 0.1421343213518224, "grad_norm": 0.34180983901023865, "learning_rate": 0.0001716086770038602, "loss": 1.3432, "step": 10938 }, { "epoch": 0.14214731589573829, "grad_norm": 0.2745610177516937, "learning_rate": 0.0001716060775419488, "loss": 1.2864, "step": 10939 }, { "epoch": 0.14216031043965416, "grad_norm": 0.561955988407135, "learning_rate": 0.00017160347808003746, "loss": 1.6208, "step": 10940 }, { "epoch": 0.14217330498357003, "grad_norm": 0.3340148329734802, "learning_rate": 0.00017160087861812606, "loss": 1.4183, "step": 10941 }, { "epoch": 0.1421862995274859, "grad_norm": 0.4886804521083832, "learning_rate": 0.00017159827915621466, "loss": 1.4969, "step": 10942 }, { "epoch": 0.14219929407140178, "grad_norm": 0.3795613646507263, "learning_rate": 0.00017159567969430328, "loss": 1.4586, "step": 10943 }, { "epoch": 0.14221228861531765, "grad_norm": 0.4117739200592041, "learning_rate": 0.0001715930802323919, "loss": 1.4792, "step": 10944 }, { "epoch": 0.14222528315923352, "grad_norm": 0.4263666570186615, "learning_rate": 0.00017159048077048053, "loss": 1.4526, "step": 10945 }, { "epoch": 0.1422382777031494, "grad_norm": 0.5333686470985413, "learning_rate": 0.00017158788130856913, "loss": 1.5833, "step": 10946 }, { "epoch": 0.14225127224706527, "grad_norm": 0.3140769898891449, "learning_rate": 0.00017158528184665775, "loss": 1.4418, "step": 10947 }, { "epoch": 0.14226426679098114, "grad_norm": 0.3371421992778778, "learning_rate": 0.00017158268238474638, "loss": 1.385, "step": 10948 }, { "epoch": 0.14227726133489701, "grad_norm": 0.4432927370071411, "learning_rate": 0.00017158008292283497, "loss": 1.2335, "step": 10949 }, { "epoch": 0.1422902558788129, "grad_norm": 0.4457622170448303, "learning_rate": 0.0001715774834609236, "loss": 1.3749, "step": 10950 }, { "epoch": 0.14230325042272876, "grad_norm": 0.43068206310272217, "learning_rate": 0.0001715748839990122, "loss": 1.3275, "step": 10951 }, { "epoch": 0.14231624496664463, "grad_norm": 0.49113088846206665, "learning_rate": 0.00017157228453710085, "loss": 1.3536, "step": 10952 }, { "epoch": 0.1423292395105605, "grad_norm": 0.312968909740448, "learning_rate": 0.00017156968507518944, "loss": 1.2682, "step": 10953 }, { "epoch": 0.14234223405447638, "grad_norm": 0.3179893493652344, "learning_rate": 0.00017156708561327804, "loss": 1.5962, "step": 10954 }, { "epoch": 0.14235522859839225, "grad_norm": 0.4181991517543793, "learning_rate": 0.00017156448615136667, "loss": 1.4341, "step": 10955 }, { "epoch": 0.14236822314230813, "grad_norm": 0.34897053241729736, "learning_rate": 0.0001715618866894553, "loss": 1.2624, "step": 10956 }, { "epoch": 0.142381217686224, "grad_norm": 0.4222790598869324, "learning_rate": 0.00017155928722754392, "loss": 1.3553, "step": 10957 }, { "epoch": 0.14239421223013987, "grad_norm": 0.4330940544605255, "learning_rate": 0.0001715566877656325, "loss": 1.2607, "step": 10958 }, { "epoch": 0.14240720677405574, "grad_norm": 0.44673553109169006, "learning_rate": 0.00017155408830372114, "loss": 1.4648, "step": 10959 }, { "epoch": 0.14242020131797162, "grad_norm": 0.38661807775497437, "learning_rate": 0.00017155148884180976, "loss": 1.445, "step": 10960 }, { "epoch": 0.1424331958618875, "grad_norm": 0.3997431993484497, "learning_rate": 0.00017154888937989836, "loss": 1.2605, "step": 10961 }, { "epoch": 0.14244619040580336, "grad_norm": 0.362303227186203, "learning_rate": 0.00017154628991798698, "loss": 1.2934, "step": 10962 }, { "epoch": 0.14245918494971924, "grad_norm": 0.39247629046440125, "learning_rate": 0.00017154369045607558, "loss": 1.5652, "step": 10963 }, { "epoch": 0.1424721794936351, "grad_norm": 0.5087887644767761, "learning_rate": 0.00017154109099416423, "loss": 1.6123, "step": 10964 }, { "epoch": 0.14248517403755098, "grad_norm": 0.43665438890457153, "learning_rate": 0.00017153849153225283, "loss": 1.5289, "step": 10965 }, { "epoch": 0.14249816858146686, "grad_norm": 0.27714627981185913, "learning_rate": 0.00017153589207034145, "loss": 1.2534, "step": 10966 }, { "epoch": 0.14251116312538273, "grad_norm": 0.46377792954444885, "learning_rate": 0.00017153329260843008, "loss": 1.5276, "step": 10967 }, { "epoch": 0.1425241576692986, "grad_norm": 0.46302077174186707, "learning_rate": 0.00017153069314651868, "loss": 1.5346, "step": 10968 }, { "epoch": 0.14253715221321447, "grad_norm": 0.44581708312034607, "learning_rate": 0.0001715280936846073, "loss": 1.4782, "step": 10969 }, { "epoch": 0.14255014675713035, "grad_norm": 0.497243195772171, "learning_rate": 0.0001715254942226959, "loss": 1.5687, "step": 10970 }, { "epoch": 0.14256314130104622, "grad_norm": 0.3940199315547943, "learning_rate": 0.00017152289476078452, "loss": 1.3456, "step": 10971 }, { "epoch": 0.1425761358449621, "grad_norm": 0.48827141523361206, "learning_rate": 0.00017152029529887315, "loss": 1.4532, "step": 10972 }, { "epoch": 0.14258913038887797, "grad_norm": 0.32716068625450134, "learning_rate": 0.00017151769583696174, "loss": 1.4463, "step": 10973 }, { "epoch": 0.14260212493279384, "grad_norm": 0.35811272263526917, "learning_rate": 0.00017151509637505037, "loss": 1.3541, "step": 10974 }, { "epoch": 0.1426151194767097, "grad_norm": 0.37943723797798157, "learning_rate": 0.000171512496913139, "loss": 1.5692, "step": 10975 }, { "epoch": 0.14262811402062558, "grad_norm": 0.3421405255794525, "learning_rate": 0.00017150989745122762, "loss": 1.5162, "step": 10976 }, { "epoch": 0.14264110856454146, "grad_norm": 0.4516383111476898, "learning_rate": 0.00017150729798931622, "loss": 1.4494, "step": 10977 }, { "epoch": 0.14265410310845733, "grad_norm": 0.493931382894516, "learning_rate": 0.00017150469852740484, "loss": 1.3791, "step": 10978 }, { "epoch": 0.1426670976523732, "grad_norm": 0.26790735125541687, "learning_rate": 0.00017150209906549346, "loss": 1.2092, "step": 10979 }, { "epoch": 0.14268009219628908, "grad_norm": 0.3421579599380493, "learning_rate": 0.00017149949960358206, "loss": 1.481, "step": 10980 }, { "epoch": 0.14269308674020495, "grad_norm": 0.25323083996772766, "learning_rate": 0.0001714969001416707, "loss": 1.3159, "step": 10981 }, { "epoch": 0.14270608128412082, "grad_norm": 0.42508524656295776, "learning_rate": 0.00017149430067975928, "loss": 1.5003, "step": 10982 }, { "epoch": 0.1427190758280367, "grad_norm": 0.34492215514183044, "learning_rate": 0.00017149170121784794, "loss": 1.5328, "step": 10983 }, { "epoch": 0.14273207037195257, "grad_norm": 0.35682275891304016, "learning_rate": 0.00017148910175593653, "loss": 1.3526, "step": 10984 }, { "epoch": 0.14274506491586844, "grad_norm": 0.5120967030525208, "learning_rate": 0.00017148650229402513, "loss": 1.5462, "step": 10985 }, { "epoch": 0.14275805945978431, "grad_norm": 0.30757835507392883, "learning_rate": 0.00017148390283211375, "loss": 1.3035, "step": 10986 }, { "epoch": 0.1427710540037002, "grad_norm": 0.5287626385688782, "learning_rate": 0.00017148130337020238, "loss": 1.475, "step": 10987 }, { "epoch": 0.14278404854761606, "grad_norm": 0.3142072260379791, "learning_rate": 0.000171478703908291, "loss": 1.2456, "step": 10988 }, { "epoch": 0.14279704309153193, "grad_norm": 0.38019683957099915, "learning_rate": 0.0001714761044463796, "loss": 1.3963, "step": 10989 }, { "epoch": 0.1428100376354478, "grad_norm": 0.39742204546928406, "learning_rate": 0.00017147350498446823, "loss": 1.5119, "step": 10990 }, { "epoch": 0.14282303217936368, "grad_norm": 0.3889755606651306, "learning_rate": 0.00017147090552255685, "loss": 1.1885, "step": 10991 }, { "epoch": 0.14283602672327955, "grad_norm": 0.4565885663032532, "learning_rate": 0.00017146830606064545, "loss": 1.2739, "step": 10992 }, { "epoch": 0.14284902126719543, "grad_norm": 0.44561681151390076, "learning_rate": 0.00017146570659873407, "loss": 1.484, "step": 10993 }, { "epoch": 0.1428620158111113, "grad_norm": 0.4350430369377136, "learning_rate": 0.00017146310713682267, "loss": 1.4873, "step": 10994 }, { "epoch": 0.14287501035502717, "grad_norm": 0.33578014373779297, "learning_rate": 0.00017146050767491132, "loss": 1.5052, "step": 10995 }, { "epoch": 0.14288800489894304, "grad_norm": 0.44567304849624634, "learning_rate": 0.00017145790821299992, "loss": 1.5442, "step": 10996 }, { "epoch": 0.14290099944285892, "grad_norm": 0.4245055317878723, "learning_rate": 0.00017145530875108852, "loss": 1.3197, "step": 10997 }, { "epoch": 0.1429139939867748, "grad_norm": 0.3837421238422394, "learning_rate": 0.00017145270928917714, "loss": 1.5555, "step": 10998 }, { "epoch": 0.14292698853069066, "grad_norm": 0.3903602063655853, "learning_rate": 0.00017145010982726576, "loss": 1.2573, "step": 10999 }, { "epoch": 0.14293998307460654, "grad_norm": 0.4907805025577545, "learning_rate": 0.0001714475103653544, "loss": 1.5547, "step": 11000 }, { "epoch": 0.14295297761852244, "grad_norm": 0.44125404953956604, "learning_rate": 0.000171444910903443, "loss": 1.6019, "step": 11001 }, { "epoch": 0.1429659721624383, "grad_norm": 0.3582591712474823, "learning_rate": 0.0001714423114415316, "loss": 1.3749, "step": 11002 }, { "epoch": 0.14297896670635418, "grad_norm": 0.42408159375190735, "learning_rate": 0.00017143971197962024, "loss": 1.4933, "step": 11003 }, { "epoch": 0.14299196125027006, "grad_norm": 0.3925914764404297, "learning_rate": 0.00017143711251770883, "loss": 1.3263, "step": 11004 }, { "epoch": 0.14300495579418593, "grad_norm": 0.40110456943511963, "learning_rate": 0.00017143451305579746, "loss": 1.4584, "step": 11005 }, { "epoch": 0.1430179503381018, "grad_norm": 0.3904884159564972, "learning_rate": 0.00017143191359388608, "loss": 1.4286, "step": 11006 }, { "epoch": 0.14303094488201767, "grad_norm": 0.4744924306869507, "learning_rate": 0.0001714293141319747, "loss": 1.5802, "step": 11007 }, { "epoch": 0.14304393942593355, "grad_norm": 0.3206443786621094, "learning_rate": 0.0001714267146700633, "loss": 1.3874, "step": 11008 }, { "epoch": 0.14305693396984942, "grad_norm": 0.4607909321784973, "learning_rate": 0.0001714241152081519, "loss": 1.4407, "step": 11009 }, { "epoch": 0.1430699285137653, "grad_norm": 0.43808385729789734, "learning_rate": 0.00017142151574624055, "loss": 1.4162, "step": 11010 }, { "epoch": 0.14308292305768117, "grad_norm": 0.41574355959892273, "learning_rate": 0.00017141891628432915, "loss": 1.4881, "step": 11011 }, { "epoch": 0.14309591760159704, "grad_norm": 0.38654017448425293, "learning_rate": 0.00017141631682241777, "loss": 1.4732, "step": 11012 }, { "epoch": 0.1431089121455129, "grad_norm": 0.4168238639831543, "learning_rate": 0.00017141371736050637, "loss": 1.4363, "step": 11013 }, { "epoch": 0.14312190668942879, "grad_norm": 0.4135061800479889, "learning_rate": 0.000171411117898595, "loss": 1.4598, "step": 11014 }, { "epoch": 0.14313490123334466, "grad_norm": 0.3929010331630707, "learning_rate": 0.00017140851843668362, "loss": 1.3049, "step": 11015 }, { "epoch": 0.14314789577726053, "grad_norm": 0.47263097763061523, "learning_rate": 0.00017140591897477222, "loss": 1.4306, "step": 11016 }, { "epoch": 0.1431608903211764, "grad_norm": 0.36611026525497437, "learning_rate": 0.00017140331951286084, "loss": 1.2967, "step": 11017 }, { "epoch": 0.14317388486509228, "grad_norm": 0.3584540784358978, "learning_rate": 0.00017140072005094947, "loss": 1.208, "step": 11018 }, { "epoch": 0.14318687940900815, "grad_norm": 0.34560224413871765, "learning_rate": 0.0001713981205890381, "loss": 1.4214, "step": 11019 }, { "epoch": 0.14319987395292402, "grad_norm": 0.46460917592048645, "learning_rate": 0.0001713955211271267, "loss": 1.5404, "step": 11020 }, { "epoch": 0.1432128684968399, "grad_norm": 0.44852039217948914, "learning_rate": 0.0001713929216652153, "loss": 1.4811, "step": 11021 }, { "epoch": 0.14322586304075577, "grad_norm": 0.3241423964500427, "learning_rate": 0.00017139032220330394, "loss": 1.4997, "step": 11022 }, { "epoch": 0.14323885758467164, "grad_norm": 0.3574792444705963, "learning_rate": 0.00017138772274139254, "loss": 1.3769, "step": 11023 }, { "epoch": 0.14325185212858751, "grad_norm": 0.4007195830345154, "learning_rate": 0.00017138512327948116, "loss": 1.4272, "step": 11024 }, { "epoch": 0.1432648466725034, "grad_norm": 0.41601213812828064, "learning_rate": 0.00017138252381756976, "loss": 1.4171, "step": 11025 }, { "epoch": 0.14327784121641926, "grad_norm": 0.34559139609336853, "learning_rate": 0.00017137992435565838, "loss": 1.1943, "step": 11026 }, { "epoch": 0.14329083576033513, "grad_norm": 0.4047466814517975, "learning_rate": 0.000171377324893747, "loss": 1.4327, "step": 11027 }, { "epoch": 0.143303830304251, "grad_norm": 0.3817596137523651, "learning_rate": 0.0001713747254318356, "loss": 1.4415, "step": 11028 }, { "epoch": 0.14331682484816688, "grad_norm": 0.45390865206718445, "learning_rate": 0.00017137212596992423, "loss": 1.3426, "step": 11029 }, { "epoch": 0.14332981939208275, "grad_norm": 0.4162684381008148, "learning_rate": 0.00017136952650801285, "loss": 1.3738, "step": 11030 }, { "epoch": 0.14334281393599863, "grad_norm": 0.4135703444480896, "learning_rate": 0.00017136692704610148, "loss": 1.5929, "step": 11031 }, { "epoch": 0.1433558084799145, "grad_norm": 0.39735686779022217, "learning_rate": 0.00017136432758419007, "loss": 1.5254, "step": 11032 }, { "epoch": 0.14336880302383037, "grad_norm": 0.39109891653060913, "learning_rate": 0.0001713617281222787, "loss": 1.3437, "step": 11033 }, { "epoch": 0.14338179756774624, "grad_norm": 0.37252798676490784, "learning_rate": 0.00017135912866036732, "loss": 1.3626, "step": 11034 }, { "epoch": 0.14339479211166212, "grad_norm": 0.33316025137901306, "learning_rate": 0.00017135652919845592, "loss": 1.4341, "step": 11035 }, { "epoch": 0.143407786655578, "grad_norm": 0.561901330947876, "learning_rate": 0.00017135392973654455, "loss": 1.5039, "step": 11036 }, { "epoch": 0.14342078119949386, "grad_norm": 0.48901546001434326, "learning_rate": 0.00017135133027463314, "loss": 1.4492, "step": 11037 }, { "epoch": 0.14343377574340974, "grad_norm": 0.3802975118160248, "learning_rate": 0.00017134873081272177, "loss": 1.3288, "step": 11038 }, { "epoch": 0.1434467702873256, "grad_norm": 0.43023818731307983, "learning_rate": 0.0001713461313508104, "loss": 1.3944, "step": 11039 }, { "epoch": 0.14345976483124148, "grad_norm": 0.40096986293792725, "learning_rate": 0.000171343531888899, "loss": 1.5239, "step": 11040 }, { "epoch": 0.14347275937515735, "grad_norm": 0.281429260969162, "learning_rate": 0.00017134093242698764, "loss": 1.4416, "step": 11041 }, { "epoch": 0.14348575391907323, "grad_norm": 0.4465419352054596, "learning_rate": 0.00017133833296507624, "loss": 1.5091, "step": 11042 }, { "epoch": 0.1434987484629891, "grad_norm": 0.36565929651260376, "learning_rate": 0.00017133573350316486, "loss": 1.3416, "step": 11043 }, { "epoch": 0.14351174300690497, "grad_norm": 0.38336122035980225, "learning_rate": 0.00017133313404125346, "loss": 1.477, "step": 11044 }, { "epoch": 0.14352473755082085, "grad_norm": 0.39667901396751404, "learning_rate": 0.00017133053457934208, "loss": 1.5861, "step": 11045 }, { "epoch": 0.14353773209473672, "grad_norm": 0.37267830967903137, "learning_rate": 0.0001713279351174307, "loss": 1.5611, "step": 11046 }, { "epoch": 0.1435507266386526, "grad_norm": 0.4420000910758972, "learning_rate": 0.0001713253356555193, "loss": 1.4206, "step": 11047 }, { "epoch": 0.14356372118256847, "grad_norm": 0.3165684640407562, "learning_rate": 0.00017132273619360793, "loss": 1.3244, "step": 11048 }, { "epoch": 0.14357671572648434, "grad_norm": 0.35696810483932495, "learning_rate": 0.00017132013673169656, "loss": 1.3325, "step": 11049 }, { "epoch": 0.1435897102704002, "grad_norm": 0.3536531329154968, "learning_rate": 0.00017131753726978518, "loss": 1.4568, "step": 11050 }, { "epoch": 0.14360270481431608, "grad_norm": 0.33488917350769043, "learning_rate": 0.00017131493780787378, "loss": 1.4413, "step": 11051 }, { "epoch": 0.14361569935823196, "grad_norm": 0.42920148372650146, "learning_rate": 0.00017131233834596237, "loss": 1.528, "step": 11052 }, { "epoch": 0.14362869390214783, "grad_norm": 0.3805094063282013, "learning_rate": 0.00017130973888405103, "loss": 1.3961, "step": 11053 }, { "epoch": 0.1436416884460637, "grad_norm": 0.5175474882125854, "learning_rate": 0.00017130713942213962, "loss": 1.3858, "step": 11054 }, { "epoch": 0.14365468298997958, "grad_norm": 0.3624133765697479, "learning_rate": 0.00017130453996022825, "loss": 1.3044, "step": 11055 }, { "epoch": 0.14366767753389545, "grad_norm": 0.4375150203704834, "learning_rate": 0.00017130194049831685, "loss": 1.4163, "step": 11056 }, { "epoch": 0.14368067207781132, "grad_norm": 0.4642685651779175, "learning_rate": 0.00017129934103640547, "loss": 1.5782, "step": 11057 }, { "epoch": 0.1436936666217272, "grad_norm": 0.3251955211162567, "learning_rate": 0.0001712967415744941, "loss": 1.5498, "step": 11058 }, { "epoch": 0.14370666116564307, "grad_norm": 0.5314301252365112, "learning_rate": 0.0001712941421125827, "loss": 1.4687, "step": 11059 }, { "epoch": 0.14371965570955894, "grad_norm": 0.2936308681964874, "learning_rate": 0.00017129154265067132, "loss": 1.435, "step": 11060 }, { "epoch": 0.14373265025347481, "grad_norm": 0.380831241607666, "learning_rate": 0.00017128894318875994, "loss": 1.3785, "step": 11061 }, { "epoch": 0.1437456447973907, "grad_norm": 0.35102686285972595, "learning_rate": 0.00017128634372684857, "loss": 1.3924, "step": 11062 }, { "epoch": 0.14375863934130656, "grad_norm": 0.373811274766922, "learning_rate": 0.00017128374426493716, "loss": 1.3483, "step": 11063 }, { "epoch": 0.14377163388522243, "grad_norm": 0.42298251390457153, "learning_rate": 0.00017128114480302576, "loss": 1.3907, "step": 11064 }, { "epoch": 0.1437846284291383, "grad_norm": 0.41398540139198303, "learning_rate": 0.0001712785453411144, "loss": 1.3628, "step": 11065 }, { "epoch": 0.14379762297305418, "grad_norm": 0.411158949136734, "learning_rate": 0.000171275945879203, "loss": 1.4593, "step": 11066 }, { "epoch": 0.14381061751697005, "grad_norm": 0.4771862328052521, "learning_rate": 0.00017127334641729163, "loss": 1.5053, "step": 11067 }, { "epoch": 0.14382361206088592, "grad_norm": 0.38249608874320984, "learning_rate": 0.00017127074695538023, "loss": 1.312, "step": 11068 }, { "epoch": 0.1438366066048018, "grad_norm": 0.38712644577026367, "learning_rate": 0.00017126814749346886, "loss": 1.2148, "step": 11069 }, { "epoch": 0.14384960114871767, "grad_norm": 0.36538711190223694, "learning_rate": 0.00017126554803155748, "loss": 1.3879, "step": 11070 }, { "epoch": 0.14386259569263354, "grad_norm": 0.23660413920879364, "learning_rate": 0.00017126294856964608, "loss": 1.3647, "step": 11071 }, { "epoch": 0.14387559023654942, "grad_norm": 0.45454829931259155, "learning_rate": 0.0001712603491077347, "loss": 1.5353, "step": 11072 }, { "epoch": 0.1438885847804653, "grad_norm": 0.3014482855796814, "learning_rate": 0.00017125774964582333, "loss": 1.2862, "step": 11073 }, { "epoch": 0.14390157932438116, "grad_norm": 0.36239662766456604, "learning_rate": 0.00017125515018391195, "loss": 1.5245, "step": 11074 }, { "epoch": 0.14391457386829704, "grad_norm": 0.41044533252716064, "learning_rate": 0.00017125255072200055, "loss": 1.4635, "step": 11075 }, { "epoch": 0.1439275684122129, "grad_norm": 0.2653467059135437, "learning_rate": 0.00017124995126008915, "loss": 1.0017, "step": 11076 }, { "epoch": 0.1439405629561288, "grad_norm": 0.3259505033493042, "learning_rate": 0.0001712473517981778, "loss": 1.4711, "step": 11077 }, { "epoch": 0.14395355750004468, "grad_norm": 0.3671286404132843, "learning_rate": 0.0001712447523362664, "loss": 1.1098, "step": 11078 }, { "epoch": 0.14396655204396056, "grad_norm": 0.3763706088066101, "learning_rate": 0.00017124215287435502, "loss": 1.3169, "step": 11079 }, { "epoch": 0.14397954658787643, "grad_norm": 0.31940358877182007, "learning_rate": 0.00017123955341244364, "loss": 1.202, "step": 11080 }, { "epoch": 0.1439925411317923, "grad_norm": 0.3961261808872223, "learning_rate": 0.00017123695395053224, "loss": 1.5025, "step": 11081 }, { "epoch": 0.14400553567570817, "grad_norm": 0.5282770991325378, "learning_rate": 0.00017123435448862086, "loss": 1.5227, "step": 11082 }, { "epoch": 0.14401853021962405, "grad_norm": 0.36336007714271545, "learning_rate": 0.00017123175502670946, "loss": 1.5521, "step": 11083 }, { "epoch": 0.14403152476353992, "grad_norm": 0.27255767583847046, "learning_rate": 0.00017122915556479811, "loss": 1.2776, "step": 11084 }, { "epoch": 0.1440445193074558, "grad_norm": 0.30391019582748413, "learning_rate": 0.0001712265561028867, "loss": 1.3658, "step": 11085 }, { "epoch": 0.14405751385137167, "grad_norm": 0.36819812655448914, "learning_rate": 0.00017122395664097534, "loss": 1.6366, "step": 11086 }, { "epoch": 0.14407050839528754, "grad_norm": 0.3538173735141754, "learning_rate": 0.00017122135717906393, "loss": 1.4214, "step": 11087 }, { "epoch": 0.1440835029392034, "grad_norm": 0.4467506408691406, "learning_rate": 0.00017121875771715256, "loss": 1.6023, "step": 11088 }, { "epoch": 0.14409649748311928, "grad_norm": 0.3107706606388092, "learning_rate": 0.00017121615825524118, "loss": 1.495, "step": 11089 }, { "epoch": 0.14410949202703516, "grad_norm": 0.44247889518737793, "learning_rate": 0.00017121355879332978, "loss": 1.4899, "step": 11090 }, { "epoch": 0.14412248657095103, "grad_norm": 0.41364026069641113, "learning_rate": 0.0001712109593314184, "loss": 1.4425, "step": 11091 }, { "epoch": 0.1441354811148669, "grad_norm": 0.5177464485168457, "learning_rate": 0.00017120835986950703, "loss": 1.47, "step": 11092 }, { "epoch": 0.14414847565878278, "grad_norm": 0.325764924287796, "learning_rate": 0.00017120576040759563, "loss": 1.4266, "step": 11093 }, { "epoch": 0.14416147020269865, "grad_norm": 0.3556436002254486, "learning_rate": 0.00017120316094568425, "loss": 1.2437, "step": 11094 }, { "epoch": 0.14417446474661452, "grad_norm": 0.4278867244720459, "learning_rate": 0.00017120056148377285, "loss": 1.29, "step": 11095 }, { "epoch": 0.1441874592905304, "grad_norm": 0.376253217458725, "learning_rate": 0.0001711979620218615, "loss": 1.3918, "step": 11096 }, { "epoch": 0.14420045383444627, "grad_norm": 0.3262886106967926, "learning_rate": 0.0001711953625599501, "loss": 1.6173, "step": 11097 }, { "epoch": 0.14421344837836214, "grad_norm": 0.4144711196422577, "learning_rate": 0.00017119276309803872, "loss": 1.5227, "step": 11098 }, { "epoch": 0.14422644292227801, "grad_norm": 0.39721980690956116, "learning_rate": 0.00017119016363612732, "loss": 1.3542, "step": 11099 }, { "epoch": 0.1442394374661939, "grad_norm": 0.34060022234916687, "learning_rate": 0.00017118756417421594, "loss": 1.3545, "step": 11100 }, { "epoch": 0.14425243201010976, "grad_norm": 0.4440550208091736, "learning_rate": 0.00017118496471230457, "loss": 1.549, "step": 11101 }, { "epoch": 0.14426542655402563, "grad_norm": 0.3309880197048187, "learning_rate": 0.00017118236525039316, "loss": 1.3861, "step": 11102 }, { "epoch": 0.1442784210979415, "grad_norm": 0.40864211320877075, "learning_rate": 0.0001711797657884818, "loss": 1.3038, "step": 11103 }, { "epoch": 0.14429141564185738, "grad_norm": 0.38104912638664246, "learning_rate": 0.00017117716632657041, "loss": 1.3511, "step": 11104 }, { "epoch": 0.14430441018577325, "grad_norm": 0.4476778507232666, "learning_rate": 0.00017117456686465904, "loss": 1.4264, "step": 11105 }, { "epoch": 0.14431740472968912, "grad_norm": 0.42206504940986633, "learning_rate": 0.00017117196740274764, "loss": 1.6553, "step": 11106 }, { "epoch": 0.144330399273605, "grad_norm": 0.3561924397945404, "learning_rate": 0.00017116936794083623, "loss": 1.38, "step": 11107 }, { "epoch": 0.14434339381752087, "grad_norm": 0.37079834938049316, "learning_rate": 0.00017116676847892488, "loss": 1.2549, "step": 11108 }, { "epoch": 0.14435638836143674, "grad_norm": 0.4306679368019104, "learning_rate": 0.00017116416901701348, "loss": 1.5187, "step": 11109 }, { "epoch": 0.14436938290535262, "grad_norm": 0.38881582021713257, "learning_rate": 0.0001711615695551021, "loss": 1.3777, "step": 11110 }, { "epoch": 0.1443823774492685, "grad_norm": 0.5320708155632019, "learning_rate": 0.0001711589700931907, "loss": 1.6033, "step": 11111 }, { "epoch": 0.14439537199318436, "grad_norm": 0.3984955847263336, "learning_rate": 0.00017115637063127933, "loss": 1.3257, "step": 11112 }, { "epoch": 0.14440836653710024, "grad_norm": 0.39208194613456726, "learning_rate": 0.00017115377116936795, "loss": 1.3624, "step": 11113 }, { "epoch": 0.1444213610810161, "grad_norm": 0.3975045084953308, "learning_rate": 0.00017115117170745655, "loss": 1.5256, "step": 11114 }, { "epoch": 0.14443435562493198, "grad_norm": 0.38531166315078735, "learning_rate": 0.0001711485722455452, "loss": 1.3806, "step": 11115 }, { "epoch": 0.14444735016884785, "grad_norm": 0.40180933475494385, "learning_rate": 0.0001711459727836338, "loss": 1.6383, "step": 11116 }, { "epoch": 0.14446034471276373, "grad_norm": 0.4208340048789978, "learning_rate": 0.00017114337332172242, "loss": 1.3653, "step": 11117 }, { "epoch": 0.1444733392566796, "grad_norm": 0.4378970265388489, "learning_rate": 0.00017114077385981102, "loss": 1.4139, "step": 11118 }, { "epoch": 0.14448633380059547, "grad_norm": 0.37437704205513, "learning_rate": 0.00017113817439789965, "loss": 1.4935, "step": 11119 }, { "epoch": 0.14449932834451135, "grad_norm": 0.4415818154811859, "learning_rate": 0.00017113557493598827, "loss": 1.3587, "step": 11120 }, { "epoch": 0.14451232288842722, "grad_norm": 0.4054763615131378, "learning_rate": 0.00017113297547407687, "loss": 1.4784, "step": 11121 }, { "epoch": 0.1445253174323431, "grad_norm": 0.39209458231925964, "learning_rate": 0.0001711303760121655, "loss": 1.25, "step": 11122 }, { "epoch": 0.14453831197625897, "grad_norm": 0.40410366654396057, "learning_rate": 0.00017112777655025412, "loss": 1.3344, "step": 11123 }, { "epoch": 0.14455130652017484, "grad_norm": 0.3767906129360199, "learning_rate": 0.00017112517708834271, "loss": 1.4141, "step": 11124 }, { "epoch": 0.1445643010640907, "grad_norm": 0.36057430505752563, "learning_rate": 0.00017112257762643134, "loss": 1.3687, "step": 11125 }, { "epoch": 0.14457729560800658, "grad_norm": 0.4281517267227173, "learning_rate": 0.00017111997816451994, "loss": 1.516, "step": 11126 }, { "epoch": 0.14459029015192246, "grad_norm": 0.4196946620941162, "learning_rate": 0.0001711173787026086, "loss": 1.6903, "step": 11127 }, { "epoch": 0.14460328469583833, "grad_norm": 0.3593243360519409, "learning_rate": 0.00017111477924069718, "loss": 1.4337, "step": 11128 }, { "epoch": 0.1446162792397542, "grad_norm": 0.4050748348236084, "learning_rate": 0.0001711121797787858, "loss": 1.4971, "step": 11129 }, { "epoch": 0.14462927378367008, "grad_norm": 0.39851024746894836, "learning_rate": 0.0001711095803168744, "loss": 1.5997, "step": 11130 }, { "epoch": 0.14464226832758595, "grad_norm": 0.414989173412323, "learning_rate": 0.00017110698085496303, "loss": 1.3923, "step": 11131 }, { "epoch": 0.14465526287150182, "grad_norm": 0.40924960374832153, "learning_rate": 0.00017110438139305166, "loss": 1.3856, "step": 11132 }, { "epoch": 0.1446682574154177, "grad_norm": 0.41865432262420654, "learning_rate": 0.00017110178193114025, "loss": 1.4682, "step": 11133 }, { "epoch": 0.14468125195933357, "grad_norm": 0.34652113914489746, "learning_rate": 0.00017109918246922888, "loss": 1.4289, "step": 11134 }, { "epoch": 0.14469424650324944, "grad_norm": 0.3899216055870056, "learning_rate": 0.0001710965830073175, "loss": 1.2774, "step": 11135 }, { "epoch": 0.1447072410471653, "grad_norm": 0.47964945435523987, "learning_rate": 0.0001710939835454061, "loss": 1.4399, "step": 11136 }, { "epoch": 0.1447202355910812, "grad_norm": 0.4223995506763458, "learning_rate": 0.00017109138408349472, "loss": 1.4325, "step": 11137 }, { "epoch": 0.14473323013499706, "grad_norm": 0.36780813336372375, "learning_rate": 0.00017108878462158332, "loss": 1.4635, "step": 11138 }, { "epoch": 0.14474622467891293, "grad_norm": 0.30381128191947937, "learning_rate": 0.00017108618515967197, "loss": 1.3566, "step": 11139 }, { "epoch": 0.1447592192228288, "grad_norm": 0.34869423508644104, "learning_rate": 0.00017108358569776057, "loss": 1.3611, "step": 11140 }, { "epoch": 0.14477221376674468, "grad_norm": 0.3324102461338043, "learning_rate": 0.0001710809862358492, "loss": 1.4183, "step": 11141 }, { "epoch": 0.14478520831066055, "grad_norm": 0.4056366980075836, "learning_rate": 0.0001710783867739378, "loss": 1.4025, "step": 11142 }, { "epoch": 0.14479820285457642, "grad_norm": 0.377490371465683, "learning_rate": 0.00017107578731202642, "loss": 1.4025, "step": 11143 }, { "epoch": 0.1448111973984923, "grad_norm": 0.3825136721134186, "learning_rate": 0.00017107318785011504, "loss": 1.3839, "step": 11144 }, { "epoch": 0.14482419194240817, "grad_norm": 0.32290422916412354, "learning_rate": 0.00017107058838820364, "loss": 1.477, "step": 11145 }, { "epoch": 0.14483718648632404, "grad_norm": 0.422802597284317, "learning_rate": 0.00017106798892629226, "loss": 1.3897, "step": 11146 }, { "epoch": 0.14485018103023992, "grad_norm": 0.3886449337005615, "learning_rate": 0.0001710653894643809, "loss": 1.4643, "step": 11147 }, { "epoch": 0.1448631755741558, "grad_norm": 0.349416583776474, "learning_rate": 0.00017106279000246948, "loss": 1.4922, "step": 11148 }, { "epoch": 0.14487617011807166, "grad_norm": 0.32664382457733154, "learning_rate": 0.0001710601905405581, "loss": 1.3626, "step": 11149 }, { "epoch": 0.14488916466198754, "grad_norm": 0.40521445870399475, "learning_rate": 0.0001710575910786467, "loss": 1.3644, "step": 11150 }, { "epoch": 0.1449021592059034, "grad_norm": 0.3790925443172455, "learning_rate": 0.00017105499161673536, "loss": 1.4366, "step": 11151 }, { "epoch": 0.14491515374981928, "grad_norm": 0.3130298852920532, "learning_rate": 0.00017105239215482396, "loss": 1.6264, "step": 11152 }, { "epoch": 0.14492814829373518, "grad_norm": 0.34523412585258484, "learning_rate": 0.00017104979269291258, "loss": 1.1371, "step": 11153 }, { "epoch": 0.14494114283765105, "grad_norm": 0.34358522295951843, "learning_rate": 0.0001710471932310012, "loss": 1.2934, "step": 11154 }, { "epoch": 0.14495413738156693, "grad_norm": 0.3753041923046112, "learning_rate": 0.0001710445937690898, "loss": 1.5024, "step": 11155 }, { "epoch": 0.1449671319254828, "grad_norm": 0.5765540599822998, "learning_rate": 0.00017104199430717843, "loss": 1.6194, "step": 11156 }, { "epoch": 0.14498012646939867, "grad_norm": 0.3787371814250946, "learning_rate": 0.00017103939484526702, "loss": 1.4714, "step": 11157 }, { "epoch": 0.14499312101331455, "grad_norm": 0.36760982871055603, "learning_rate": 0.00017103679538335568, "loss": 1.484, "step": 11158 }, { "epoch": 0.14500611555723042, "grad_norm": 0.4599936306476593, "learning_rate": 0.00017103419592144427, "loss": 1.3481, "step": 11159 }, { "epoch": 0.1450191101011463, "grad_norm": 0.40311580896377563, "learning_rate": 0.00017103159645953287, "loss": 1.3385, "step": 11160 }, { "epoch": 0.14503210464506217, "grad_norm": 0.4979571998119354, "learning_rate": 0.0001710289969976215, "loss": 1.4405, "step": 11161 }, { "epoch": 0.14504509918897804, "grad_norm": 0.37653255462646484, "learning_rate": 0.00017102639753571012, "loss": 1.4526, "step": 11162 }, { "epoch": 0.1450580937328939, "grad_norm": 0.41032832860946655, "learning_rate": 0.00017102379807379874, "loss": 1.4929, "step": 11163 }, { "epoch": 0.14507108827680978, "grad_norm": 0.4043903946876526, "learning_rate": 0.00017102119861188734, "loss": 1.4022, "step": 11164 }, { "epoch": 0.14508408282072566, "grad_norm": 0.27943894267082214, "learning_rate": 0.00017101859914997597, "loss": 1.42, "step": 11165 }, { "epoch": 0.14509707736464153, "grad_norm": 0.5411479473114014, "learning_rate": 0.0001710159996880646, "loss": 1.4382, "step": 11166 }, { "epoch": 0.1451100719085574, "grad_norm": 0.4582824110984802, "learning_rate": 0.0001710134002261532, "loss": 1.5043, "step": 11167 }, { "epoch": 0.14512306645247328, "grad_norm": 0.46927210688591003, "learning_rate": 0.0001710108007642418, "loss": 1.4868, "step": 11168 }, { "epoch": 0.14513606099638915, "grad_norm": 0.3008632957935333, "learning_rate": 0.0001710082013023304, "loss": 1.293, "step": 11169 }, { "epoch": 0.14514905554030502, "grad_norm": 0.36784055829048157, "learning_rate": 0.00017100560184041906, "loss": 1.5808, "step": 11170 }, { "epoch": 0.1451620500842209, "grad_norm": 0.41808003187179565, "learning_rate": 0.00017100300237850766, "loss": 1.3989, "step": 11171 }, { "epoch": 0.14517504462813677, "grad_norm": 0.42032837867736816, "learning_rate": 0.00017100040291659628, "loss": 1.4308, "step": 11172 }, { "epoch": 0.14518803917205264, "grad_norm": 0.5430816411972046, "learning_rate": 0.00017099780345468488, "loss": 1.4639, "step": 11173 }, { "epoch": 0.1452010337159685, "grad_norm": 0.3855245113372803, "learning_rate": 0.0001709952039927735, "loss": 1.5902, "step": 11174 }, { "epoch": 0.1452140282598844, "grad_norm": 0.3944976031780243, "learning_rate": 0.00017099260453086213, "loss": 1.3476, "step": 11175 }, { "epoch": 0.14522702280380026, "grad_norm": 0.4476650357246399, "learning_rate": 0.00017099000506895073, "loss": 1.4947, "step": 11176 }, { "epoch": 0.14524001734771613, "grad_norm": 0.411156564950943, "learning_rate": 0.00017098740560703935, "loss": 1.5132, "step": 11177 }, { "epoch": 0.145253011891632, "grad_norm": 0.36436235904693604, "learning_rate": 0.00017098480614512798, "loss": 1.2626, "step": 11178 }, { "epoch": 0.14526600643554788, "grad_norm": 0.42791301012039185, "learning_rate": 0.00017098220668321657, "loss": 1.4867, "step": 11179 }, { "epoch": 0.14527900097946375, "grad_norm": 0.4045431911945343, "learning_rate": 0.0001709796072213052, "loss": 1.4253, "step": 11180 }, { "epoch": 0.14529199552337962, "grad_norm": 0.37130504846572876, "learning_rate": 0.0001709770077593938, "loss": 1.358, "step": 11181 }, { "epoch": 0.1453049900672955, "grad_norm": 0.3352845311164856, "learning_rate": 0.00017097440829748245, "loss": 1.4942, "step": 11182 }, { "epoch": 0.14531798461121137, "grad_norm": 0.37348663806915283, "learning_rate": 0.00017097180883557104, "loss": 1.4177, "step": 11183 }, { "epoch": 0.14533097915512724, "grad_norm": 0.37484636902809143, "learning_rate": 0.00017096920937365967, "loss": 1.4759, "step": 11184 }, { "epoch": 0.14534397369904312, "grad_norm": 0.3262447416782379, "learning_rate": 0.00017096660991174827, "loss": 1.575, "step": 11185 }, { "epoch": 0.145356968242959, "grad_norm": 0.47388604283332825, "learning_rate": 0.0001709640104498369, "loss": 1.5107, "step": 11186 }, { "epoch": 0.14536996278687486, "grad_norm": 0.5634368658065796, "learning_rate": 0.00017096141098792551, "loss": 1.6389, "step": 11187 }, { "epoch": 0.14538295733079074, "grad_norm": 0.2917027771472931, "learning_rate": 0.0001709588115260141, "loss": 1.3179, "step": 11188 }, { "epoch": 0.1453959518747066, "grad_norm": 0.38248276710510254, "learning_rate": 0.00017095621206410276, "loss": 1.4292, "step": 11189 }, { "epoch": 0.14540894641862248, "grad_norm": 0.3398478329181671, "learning_rate": 0.00017095361260219136, "loss": 1.5298, "step": 11190 }, { "epoch": 0.14542194096253835, "grad_norm": 0.4128149151802063, "learning_rate": 0.00017095101314027996, "loss": 1.3958, "step": 11191 }, { "epoch": 0.14543493550645423, "grad_norm": 0.4019889235496521, "learning_rate": 0.00017094841367836858, "loss": 1.3419, "step": 11192 }, { "epoch": 0.1454479300503701, "grad_norm": 0.3496367931365967, "learning_rate": 0.0001709458142164572, "loss": 1.5676, "step": 11193 }, { "epoch": 0.14546092459428597, "grad_norm": 0.38252392411231995, "learning_rate": 0.00017094321475454583, "loss": 1.4532, "step": 11194 }, { "epoch": 0.14547391913820185, "grad_norm": 0.684743344783783, "learning_rate": 0.00017094061529263443, "loss": 1.587, "step": 11195 }, { "epoch": 0.14548691368211772, "grad_norm": 0.4842517077922821, "learning_rate": 0.00017093801583072305, "loss": 1.4501, "step": 11196 }, { "epoch": 0.1454999082260336, "grad_norm": 0.43303027749061584, "learning_rate": 0.00017093541636881168, "loss": 1.4915, "step": 11197 }, { "epoch": 0.14551290276994946, "grad_norm": 0.4664687216281891, "learning_rate": 0.00017093281690690028, "loss": 1.4023, "step": 11198 }, { "epoch": 0.14552589731386534, "grad_norm": 0.4656442701816559, "learning_rate": 0.0001709302174449889, "loss": 1.5368, "step": 11199 }, { "epoch": 0.1455388918577812, "grad_norm": 0.4284462034702301, "learning_rate": 0.0001709276179830775, "loss": 1.4093, "step": 11200 }, { "epoch": 0.14555188640169708, "grad_norm": 0.42092394828796387, "learning_rate": 0.00017092501852116615, "loss": 1.3105, "step": 11201 }, { "epoch": 0.14556488094561296, "grad_norm": 0.4908880293369293, "learning_rate": 0.00017092241905925475, "loss": 1.5216, "step": 11202 }, { "epoch": 0.14557787548952883, "grad_norm": 0.4073388874530792, "learning_rate": 0.00017091981959734334, "loss": 1.5007, "step": 11203 }, { "epoch": 0.1455908700334447, "grad_norm": 0.32807299494743347, "learning_rate": 0.00017091722013543197, "loss": 1.3313, "step": 11204 }, { "epoch": 0.14560386457736058, "grad_norm": 0.5015289187431335, "learning_rate": 0.0001709146206735206, "loss": 1.4676, "step": 11205 }, { "epoch": 0.14561685912127645, "grad_norm": 0.304504930973053, "learning_rate": 0.00017091202121160922, "loss": 1.3186, "step": 11206 }, { "epoch": 0.14562985366519232, "grad_norm": 0.3211117386817932, "learning_rate": 0.00017090942174969781, "loss": 1.4544, "step": 11207 }, { "epoch": 0.1456428482091082, "grad_norm": 0.3818785548210144, "learning_rate": 0.00017090682228778644, "loss": 1.4742, "step": 11208 }, { "epoch": 0.14565584275302407, "grad_norm": 0.40314486622810364, "learning_rate": 0.00017090422282587506, "loss": 1.4474, "step": 11209 }, { "epoch": 0.14566883729693994, "grad_norm": 0.4286314845085144, "learning_rate": 0.00017090162336396366, "loss": 1.6516, "step": 11210 }, { "epoch": 0.1456818318408558, "grad_norm": 0.5021548867225647, "learning_rate": 0.00017089902390205229, "loss": 1.413, "step": 11211 }, { "epoch": 0.1456948263847717, "grad_norm": 0.3518587648868561, "learning_rate": 0.00017089642444014088, "loss": 1.3294, "step": 11212 }, { "epoch": 0.14570782092868756, "grad_norm": 0.37559816241264343, "learning_rate": 0.00017089382497822953, "loss": 1.5423, "step": 11213 }, { "epoch": 0.14572081547260343, "grad_norm": 0.4267975091934204, "learning_rate": 0.00017089122551631813, "loss": 1.4666, "step": 11214 }, { "epoch": 0.1457338100165193, "grad_norm": 0.3794823884963989, "learning_rate": 0.00017088862605440673, "loss": 1.4704, "step": 11215 }, { "epoch": 0.14574680456043518, "grad_norm": 0.40062078833580017, "learning_rate": 0.00017088602659249535, "loss": 1.3516, "step": 11216 }, { "epoch": 0.14575979910435105, "grad_norm": 0.4705944061279297, "learning_rate": 0.00017088342713058398, "loss": 1.583, "step": 11217 }, { "epoch": 0.14577279364826692, "grad_norm": 0.3194669485092163, "learning_rate": 0.0001708808276686726, "loss": 1.4838, "step": 11218 }, { "epoch": 0.1457857881921828, "grad_norm": 0.2934216856956482, "learning_rate": 0.0001708782282067612, "loss": 1.3715, "step": 11219 }, { "epoch": 0.14579878273609867, "grad_norm": 0.45715153217315674, "learning_rate": 0.00017087562874484982, "loss": 1.6046, "step": 11220 }, { "epoch": 0.14581177728001454, "grad_norm": 0.32778844237327576, "learning_rate": 0.00017087302928293845, "loss": 1.4126, "step": 11221 }, { "epoch": 0.14582477182393042, "grad_norm": 0.38202613592147827, "learning_rate": 0.00017087042982102705, "loss": 1.322, "step": 11222 }, { "epoch": 0.1458377663678463, "grad_norm": 0.40567055344581604, "learning_rate": 0.00017086783035911567, "loss": 1.4551, "step": 11223 }, { "epoch": 0.14585076091176216, "grad_norm": 0.23350529372692108, "learning_rate": 0.00017086523089720427, "loss": 1.3017, "step": 11224 }, { "epoch": 0.14586375545567803, "grad_norm": 0.3749062418937683, "learning_rate": 0.00017086263143529292, "loss": 1.3058, "step": 11225 }, { "epoch": 0.1458767499995939, "grad_norm": 0.2677316963672638, "learning_rate": 0.00017086003197338152, "loss": 1.3903, "step": 11226 }, { "epoch": 0.14588974454350978, "grad_norm": 0.39584994316101074, "learning_rate": 0.00017085743251147014, "loss": 1.5382, "step": 11227 }, { "epoch": 0.14590273908742565, "grad_norm": 0.34881478548049927, "learning_rate": 0.00017085483304955877, "loss": 1.4239, "step": 11228 }, { "epoch": 0.14591573363134155, "grad_norm": 0.4115470051765442, "learning_rate": 0.00017085223358764736, "loss": 1.4182, "step": 11229 }, { "epoch": 0.14592872817525743, "grad_norm": 0.3866683840751648, "learning_rate": 0.000170849634125736, "loss": 1.4995, "step": 11230 }, { "epoch": 0.1459417227191733, "grad_norm": 0.4981044828891754, "learning_rate": 0.00017084703466382458, "loss": 1.4853, "step": 11231 }, { "epoch": 0.14595471726308917, "grad_norm": 0.408642441034317, "learning_rate": 0.0001708444352019132, "loss": 1.41, "step": 11232 }, { "epoch": 0.14596771180700505, "grad_norm": 0.4571438729763031, "learning_rate": 0.00017084183574000183, "loss": 1.623, "step": 11233 }, { "epoch": 0.14598070635092092, "grad_norm": 0.27386486530303955, "learning_rate": 0.00017083923627809043, "loss": 1.1693, "step": 11234 }, { "epoch": 0.1459937008948368, "grad_norm": 0.44032835960388184, "learning_rate": 0.00017083663681617906, "loss": 1.5072, "step": 11235 }, { "epoch": 0.14600669543875266, "grad_norm": 0.4462278485298157, "learning_rate": 0.00017083403735426768, "loss": 1.4152, "step": 11236 }, { "epoch": 0.14601968998266854, "grad_norm": 0.2907092273235321, "learning_rate": 0.0001708314378923563, "loss": 1.6447, "step": 11237 }, { "epoch": 0.1460326845265844, "grad_norm": 0.39260178804397583, "learning_rate": 0.0001708288384304449, "loss": 1.5782, "step": 11238 }, { "epoch": 0.14604567907050028, "grad_norm": 0.33923542499542236, "learning_rate": 0.00017082623896853353, "loss": 1.49, "step": 11239 }, { "epoch": 0.14605867361441616, "grad_norm": 0.3640190660953522, "learning_rate": 0.00017082363950662215, "loss": 1.5123, "step": 11240 }, { "epoch": 0.14607166815833203, "grad_norm": 0.4283309578895569, "learning_rate": 0.00017082104004471075, "loss": 1.4022, "step": 11241 }, { "epoch": 0.1460846627022479, "grad_norm": 0.4036342203617096, "learning_rate": 0.00017081844058279937, "loss": 1.457, "step": 11242 }, { "epoch": 0.14609765724616378, "grad_norm": 0.38892436027526855, "learning_rate": 0.00017081584112088797, "loss": 1.4649, "step": 11243 }, { "epoch": 0.14611065179007965, "grad_norm": 0.4600119888782501, "learning_rate": 0.0001708132416589766, "loss": 1.3541, "step": 11244 }, { "epoch": 0.14612364633399552, "grad_norm": 0.37838712334632874, "learning_rate": 0.00017081064219706522, "loss": 1.5102, "step": 11245 }, { "epoch": 0.1461366408779114, "grad_norm": 0.4538290798664093, "learning_rate": 0.00017080804273515382, "loss": 1.4415, "step": 11246 }, { "epoch": 0.14614963542182727, "grad_norm": 0.4385407567024231, "learning_rate": 0.00017080544327324244, "loss": 1.388, "step": 11247 }, { "epoch": 0.14616262996574314, "grad_norm": 0.4056902825832367, "learning_rate": 0.00017080284381133107, "loss": 1.3527, "step": 11248 }, { "epoch": 0.146175624509659, "grad_norm": 0.5407351851463318, "learning_rate": 0.0001708002443494197, "loss": 1.4754, "step": 11249 }, { "epoch": 0.1461886190535749, "grad_norm": 0.3910459280014038, "learning_rate": 0.0001707976448875083, "loss": 1.2567, "step": 11250 }, { "epoch": 0.14620161359749076, "grad_norm": 0.37198469042778015, "learning_rate": 0.0001707950454255969, "loss": 1.3948, "step": 11251 }, { "epoch": 0.14621460814140663, "grad_norm": 0.39405572414398193, "learning_rate": 0.00017079244596368554, "loss": 1.4035, "step": 11252 }, { "epoch": 0.1462276026853225, "grad_norm": 0.49109235405921936, "learning_rate": 0.00017078984650177413, "loss": 1.5257, "step": 11253 }, { "epoch": 0.14624059722923838, "grad_norm": 0.4589422345161438, "learning_rate": 0.00017078724703986276, "loss": 1.367, "step": 11254 }, { "epoch": 0.14625359177315425, "grad_norm": 0.47225743532180786, "learning_rate": 0.00017078464757795136, "loss": 1.3805, "step": 11255 }, { "epoch": 0.14626658631707012, "grad_norm": 0.44636714458465576, "learning_rate": 0.00017078204811604, "loss": 1.4947, "step": 11256 }, { "epoch": 0.146279580860986, "grad_norm": 0.4095315933227539, "learning_rate": 0.0001707794486541286, "loss": 1.4525, "step": 11257 }, { "epoch": 0.14629257540490187, "grad_norm": 0.4779440462589264, "learning_rate": 0.0001707768491922172, "loss": 1.3759, "step": 11258 }, { "epoch": 0.14630556994881774, "grad_norm": 0.40051358938217163, "learning_rate": 0.00017077424973030583, "loss": 1.4801, "step": 11259 }, { "epoch": 0.14631856449273362, "grad_norm": 0.35656315088272095, "learning_rate": 0.00017077165026839445, "loss": 1.2252, "step": 11260 }, { "epoch": 0.1463315590366495, "grad_norm": 0.4320249557495117, "learning_rate": 0.00017076905080648308, "loss": 1.5085, "step": 11261 }, { "epoch": 0.14634455358056536, "grad_norm": 0.4325583279132843, "learning_rate": 0.00017076645134457167, "loss": 1.5621, "step": 11262 }, { "epoch": 0.14635754812448123, "grad_norm": 0.3113324046134949, "learning_rate": 0.0001707638518826603, "loss": 1.4537, "step": 11263 }, { "epoch": 0.1463705426683971, "grad_norm": 0.32814615964889526, "learning_rate": 0.00017076125242074892, "loss": 1.3648, "step": 11264 }, { "epoch": 0.14638353721231298, "grad_norm": 0.42125335335731506, "learning_rate": 0.00017075865295883752, "loss": 1.3706, "step": 11265 }, { "epoch": 0.14639653175622885, "grad_norm": 0.4062483012676239, "learning_rate": 0.00017075605349692614, "loss": 1.5214, "step": 11266 }, { "epoch": 0.14640952630014473, "grad_norm": 0.3844647705554962, "learning_rate": 0.00017075345403501477, "loss": 1.4872, "step": 11267 }, { "epoch": 0.1464225208440606, "grad_norm": 0.3286508619785309, "learning_rate": 0.0001707508545731034, "loss": 1.3626, "step": 11268 }, { "epoch": 0.14643551538797647, "grad_norm": 0.3928113877773285, "learning_rate": 0.000170748255111192, "loss": 1.4427, "step": 11269 }, { "epoch": 0.14644850993189235, "grad_norm": 0.45838379859924316, "learning_rate": 0.0001707456556492806, "loss": 1.372, "step": 11270 }, { "epoch": 0.14646150447580822, "grad_norm": 0.4003114700317383, "learning_rate": 0.00017074305618736924, "loss": 1.2152, "step": 11271 }, { "epoch": 0.1464744990197241, "grad_norm": 0.45826438069343567, "learning_rate": 0.00017074045672545784, "loss": 1.3034, "step": 11272 }, { "epoch": 0.14648749356363996, "grad_norm": 0.4394860863685608, "learning_rate": 0.00017073785726354646, "loss": 1.4534, "step": 11273 }, { "epoch": 0.14650048810755584, "grad_norm": 0.26345837116241455, "learning_rate": 0.00017073525780163506, "loss": 1.4064, "step": 11274 }, { "epoch": 0.1465134826514717, "grad_norm": 0.37141886353492737, "learning_rate": 0.00017073265833972368, "loss": 1.3198, "step": 11275 }, { "epoch": 0.14652647719538758, "grad_norm": 0.42236199975013733, "learning_rate": 0.0001707300588778123, "loss": 1.4317, "step": 11276 }, { "epoch": 0.14653947173930346, "grad_norm": 0.38771164417266846, "learning_rate": 0.0001707274594159009, "loss": 1.3816, "step": 11277 }, { "epoch": 0.14655246628321933, "grad_norm": 0.383962482213974, "learning_rate": 0.00017072485995398953, "loss": 1.4785, "step": 11278 }, { "epoch": 0.1465654608271352, "grad_norm": 0.3914499580860138, "learning_rate": 0.00017072226049207815, "loss": 1.4628, "step": 11279 }, { "epoch": 0.14657845537105108, "grad_norm": 0.36763709783554077, "learning_rate": 0.00017071966103016678, "loss": 1.5323, "step": 11280 }, { "epoch": 0.14659144991496695, "grad_norm": 0.5018381476402283, "learning_rate": 0.00017071706156825538, "loss": 1.5501, "step": 11281 }, { "epoch": 0.14660444445888282, "grad_norm": 0.39596062898635864, "learning_rate": 0.00017071446210634397, "loss": 1.259, "step": 11282 }, { "epoch": 0.1466174390027987, "grad_norm": 0.4241613745689392, "learning_rate": 0.00017071186264443262, "loss": 1.3641, "step": 11283 }, { "epoch": 0.14663043354671457, "grad_norm": 0.3441568613052368, "learning_rate": 0.00017070926318252122, "loss": 1.3624, "step": 11284 }, { "epoch": 0.14664342809063044, "grad_norm": 0.43853938579559326, "learning_rate": 0.00017070666372060985, "loss": 1.3387, "step": 11285 }, { "epoch": 0.1466564226345463, "grad_norm": 0.3544641435146332, "learning_rate": 0.00017070406425869844, "loss": 1.4138, "step": 11286 }, { "epoch": 0.14666941717846219, "grad_norm": 0.3245483934879303, "learning_rate": 0.00017070146479678707, "loss": 1.2798, "step": 11287 }, { "epoch": 0.14668241172237806, "grad_norm": 0.44017425179481506, "learning_rate": 0.0001706988653348757, "loss": 1.4239, "step": 11288 }, { "epoch": 0.14669540626629393, "grad_norm": 0.40762537717819214, "learning_rate": 0.0001706962658729643, "loss": 1.4932, "step": 11289 }, { "epoch": 0.1467084008102098, "grad_norm": 0.38577723503112793, "learning_rate": 0.00017069366641105291, "loss": 1.294, "step": 11290 }, { "epoch": 0.14672139535412568, "grad_norm": 0.31441450119018555, "learning_rate": 0.00017069106694914154, "loss": 1.3226, "step": 11291 }, { "epoch": 0.14673438989804155, "grad_norm": 0.3920429050922394, "learning_rate": 0.00017068846748723016, "loss": 1.5302, "step": 11292 }, { "epoch": 0.14674738444195742, "grad_norm": 0.36099323630332947, "learning_rate": 0.00017068586802531876, "loss": 1.389, "step": 11293 }, { "epoch": 0.1467603789858733, "grad_norm": 0.4814140200614929, "learning_rate": 0.00017068326856340739, "loss": 1.4357, "step": 11294 }, { "epoch": 0.14677337352978917, "grad_norm": 0.38020220398902893, "learning_rate": 0.000170680669101496, "loss": 1.3838, "step": 11295 }, { "epoch": 0.14678636807370504, "grad_norm": 0.38883206248283386, "learning_rate": 0.0001706780696395846, "loss": 1.4297, "step": 11296 }, { "epoch": 0.14679936261762092, "grad_norm": 0.5336456894874573, "learning_rate": 0.00017067547017767323, "loss": 1.2553, "step": 11297 }, { "epoch": 0.1468123571615368, "grad_norm": 0.4061763882637024, "learning_rate": 0.00017067287071576183, "loss": 1.587, "step": 11298 }, { "epoch": 0.14682535170545266, "grad_norm": 0.3841736316680908, "learning_rate": 0.00017067027125385045, "loss": 1.3514, "step": 11299 }, { "epoch": 0.14683834624936853, "grad_norm": 0.4954317510128021, "learning_rate": 0.00017066767179193908, "loss": 1.4853, "step": 11300 }, { "epoch": 0.1468513407932844, "grad_norm": 0.3520929217338562, "learning_rate": 0.00017066507233002768, "loss": 1.4049, "step": 11301 }, { "epoch": 0.14686433533720028, "grad_norm": 0.3793904185295105, "learning_rate": 0.00017066247286811633, "loss": 1.4555, "step": 11302 }, { "epoch": 0.14687732988111615, "grad_norm": 0.4387750029563904, "learning_rate": 0.00017065987340620492, "loss": 1.3121, "step": 11303 }, { "epoch": 0.14689032442503203, "grad_norm": 0.466605007648468, "learning_rate": 0.00017065727394429355, "loss": 1.2954, "step": 11304 }, { "epoch": 0.14690331896894793, "grad_norm": 0.4261796474456787, "learning_rate": 0.00017065467448238215, "loss": 1.5242, "step": 11305 }, { "epoch": 0.1469163135128638, "grad_norm": 0.3150370717048645, "learning_rate": 0.00017065207502047077, "loss": 1.3216, "step": 11306 }, { "epoch": 0.14692930805677967, "grad_norm": 0.47326594591140747, "learning_rate": 0.0001706494755585594, "loss": 1.5901, "step": 11307 }, { "epoch": 0.14694230260069555, "grad_norm": 0.35577261447906494, "learning_rate": 0.000170646876096648, "loss": 1.1514, "step": 11308 }, { "epoch": 0.14695529714461142, "grad_norm": 0.4099962115287781, "learning_rate": 0.00017064427663473662, "loss": 1.4694, "step": 11309 }, { "epoch": 0.1469682916885273, "grad_norm": 0.38423559069633484, "learning_rate": 0.00017064167717282524, "loss": 1.4282, "step": 11310 }, { "epoch": 0.14698128623244316, "grad_norm": 0.38542187213897705, "learning_rate": 0.00017063907771091387, "loss": 1.2436, "step": 11311 }, { "epoch": 0.14699428077635904, "grad_norm": 0.39691177010536194, "learning_rate": 0.00017063647824900246, "loss": 1.4153, "step": 11312 }, { "epoch": 0.1470072753202749, "grad_norm": 0.37217992544174194, "learning_rate": 0.00017063387878709106, "loss": 1.3109, "step": 11313 }, { "epoch": 0.14702026986419078, "grad_norm": 0.3418477773666382, "learning_rate": 0.0001706312793251797, "loss": 1.48, "step": 11314 }, { "epoch": 0.14703326440810666, "grad_norm": 0.382400780916214, "learning_rate": 0.0001706286798632683, "loss": 1.4198, "step": 11315 }, { "epoch": 0.14704625895202253, "grad_norm": 0.4471965432167053, "learning_rate": 0.00017062608040135693, "loss": 1.4009, "step": 11316 }, { "epoch": 0.1470592534959384, "grad_norm": 0.7015073299407959, "learning_rate": 0.00017062348093944553, "loss": 1.4773, "step": 11317 }, { "epoch": 0.14707224803985428, "grad_norm": 0.29717767238616943, "learning_rate": 0.00017062088147753416, "loss": 1.381, "step": 11318 }, { "epoch": 0.14708524258377015, "grad_norm": 0.4319836497306824, "learning_rate": 0.00017061828201562278, "loss": 1.4696, "step": 11319 }, { "epoch": 0.14709823712768602, "grad_norm": 0.2944984436035156, "learning_rate": 0.00017061568255371138, "loss": 1.3719, "step": 11320 }, { "epoch": 0.1471112316716019, "grad_norm": 0.41287854313850403, "learning_rate": 0.0001706130830918, "loss": 1.466, "step": 11321 }, { "epoch": 0.14712422621551777, "grad_norm": 0.48095208406448364, "learning_rate": 0.00017061048362988863, "loss": 1.4417, "step": 11322 }, { "epoch": 0.14713722075943364, "grad_norm": 0.3037012219429016, "learning_rate": 0.00017060788416797725, "loss": 1.4666, "step": 11323 }, { "epoch": 0.1471502153033495, "grad_norm": 0.34090596437454224, "learning_rate": 0.00017060528470606585, "loss": 1.3608, "step": 11324 }, { "epoch": 0.14716320984726539, "grad_norm": 0.3609218895435333, "learning_rate": 0.00017060268524415445, "loss": 1.2406, "step": 11325 }, { "epoch": 0.14717620439118126, "grad_norm": 0.42388495802879333, "learning_rate": 0.0001706000857822431, "loss": 1.3757, "step": 11326 }, { "epoch": 0.14718919893509713, "grad_norm": 0.41894280910491943, "learning_rate": 0.0001705974863203317, "loss": 1.34, "step": 11327 }, { "epoch": 0.147202193479013, "grad_norm": 0.4209027886390686, "learning_rate": 0.00017059488685842032, "loss": 1.4093, "step": 11328 }, { "epoch": 0.14721518802292888, "grad_norm": 0.4063383638858795, "learning_rate": 0.00017059228739650892, "loss": 1.595, "step": 11329 }, { "epoch": 0.14722818256684475, "grad_norm": 0.4827539920806885, "learning_rate": 0.00017058968793459754, "loss": 1.6648, "step": 11330 }, { "epoch": 0.14724117711076062, "grad_norm": 0.4543558657169342, "learning_rate": 0.00017058708847268617, "loss": 1.5291, "step": 11331 }, { "epoch": 0.1472541716546765, "grad_norm": 0.3768217861652374, "learning_rate": 0.00017058448901077476, "loss": 1.5283, "step": 11332 }, { "epoch": 0.14726716619859237, "grad_norm": 0.8364927768707275, "learning_rate": 0.0001705818895488634, "loss": 1.2543, "step": 11333 }, { "epoch": 0.14728016074250824, "grad_norm": 0.32101795077323914, "learning_rate": 0.000170579290086952, "loss": 1.2807, "step": 11334 }, { "epoch": 0.14729315528642412, "grad_norm": 0.31307604908943176, "learning_rate": 0.00017057669062504064, "loss": 1.4749, "step": 11335 }, { "epoch": 0.14730614983034, "grad_norm": 0.39819425344467163, "learning_rate": 0.00017057409116312923, "loss": 1.5478, "step": 11336 }, { "epoch": 0.14731914437425586, "grad_norm": 0.4148833453655243, "learning_rate": 0.00017057149170121786, "loss": 1.5194, "step": 11337 }, { "epoch": 0.14733213891817173, "grad_norm": 0.42625489830970764, "learning_rate": 0.00017056889223930648, "loss": 1.3707, "step": 11338 }, { "epoch": 0.1473451334620876, "grad_norm": 0.34176045656204224, "learning_rate": 0.00017056629277739508, "loss": 1.2678, "step": 11339 }, { "epoch": 0.14735812800600348, "grad_norm": 0.3996594250202179, "learning_rate": 0.0001705636933154837, "loss": 1.5279, "step": 11340 }, { "epoch": 0.14737112254991935, "grad_norm": 0.40667155385017395, "learning_rate": 0.00017056109385357233, "loss": 1.4916, "step": 11341 }, { "epoch": 0.14738411709383523, "grad_norm": 0.2968926429748535, "learning_rate": 0.00017055849439166093, "loss": 1.4355, "step": 11342 }, { "epoch": 0.1473971116377511, "grad_norm": 0.3608457148075104, "learning_rate": 0.00017055589492974955, "loss": 1.4887, "step": 11343 }, { "epoch": 0.14741010618166697, "grad_norm": 0.37557971477508545, "learning_rate": 0.00017055329546783815, "loss": 1.241, "step": 11344 }, { "epoch": 0.14742310072558285, "grad_norm": 0.3155742287635803, "learning_rate": 0.0001705506960059268, "loss": 1.3422, "step": 11345 }, { "epoch": 0.14743609526949872, "grad_norm": 0.48092982172966003, "learning_rate": 0.0001705480965440154, "loss": 1.641, "step": 11346 }, { "epoch": 0.1474490898134146, "grad_norm": 0.39166221022605896, "learning_rate": 0.00017054549708210402, "loss": 1.4743, "step": 11347 }, { "epoch": 0.14746208435733046, "grad_norm": 0.3319631814956665, "learning_rate": 0.00017054289762019262, "loss": 1.5296, "step": 11348 }, { "epoch": 0.14747507890124634, "grad_norm": 0.37180498242378235, "learning_rate": 0.00017054029815828124, "loss": 1.5253, "step": 11349 }, { "epoch": 0.1474880734451622, "grad_norm": 0.5395805835723877, "learning_rate": 0.00017053769869636987, "loss": 1.5573, "step": 11350 }, { "epoch": 0.14750106798907808, "grad_norm": 0.3219076097011566, "learning_rate": 0.00017053509923445847, "loss": 1.3363, "step": 11351 }, { "epoch": 0.14751406253299396, "grad_norm": 0.4966753423213959, "learning_rate": 0.0001705324997725471, "loss": 1.4895, "step": 11352 }, { "epoch": 0.14752705707690983, "grad_norm": 0.3192373514175415, "learning_rate": 0.00017052990031063571, "loss": 1.0323, "step": 11353 }, { "epoch": 0.1475400516208257, "grad_norm": 0.3798332214355469, "learning_rate": 0.0001705273008487243, "loss": 1.4693, "step": 11354 }, { "epoch": 0.14755304616474157, "grad_norm": 0.4342315196990967, "learning_rate": 0.00017052470138681294, "loss": 1.4513, "step": 11355 }, { "epoch": 0.14756604070865745, "grad_norm": 0.4443801939487457, "learning_rate": 0.00017052210192490153, "loss": 1.3978, "step": 11356 }, { "epoch": 0.14757903525257332, "grad_norm": 0.4159848988056183, "learning_rate": 0.00017051950246299019, "loss": 1.3956, "step": 11357 }, { "epoch": 0.1475920297964892, "grad_norm": 0.39567968249320984, "learning_rate": 0.00017051690300107878, "loss": 1.4222, "step": 11358 }, { "epoch": 0.14760502434040507, "grad_norm": 0.49021586775779724, "learning_rate": 0.0001705143035391674, "loss": 1.5596, "step": 11359 }, { "epoch": 0.14761801888432094, "grad_norm": 0.41064900159835815, "learning_rate": 0.000170511704077256, "loss": 1.3305, "step": 11360 }, { "epoch": 0.1476310134282368, "grad_norm": 0.31437233090400696, "learning_rate": 0.00017050910461534463, "loss": 1.4809, "step": 11361 }, { "epoch": 0.14764400797215269, "grad_norm": 0.5019233822822571, "learning_rate": 0.00017050650515343325, "loss": 1.3856, "step": 11362 }, { "epoch": 0.14765700251606856, "grad_norm": 0.3403305411338806, "learning_rate": 0.00017050390569152185, "loss": 1.2366, "step": 11363 }, { "epoch": 0.14766999705998443, "grad_norm": 0.47358494997024536, "learning_rate": 0.00017050130622961048, "loss": 1.4457, "step": 11364 }, { "epoch": 0.1476829916039003, "grad_norm": 0.45909085869789124, "learning_rate": 0.0001704987067676991, "loss": 1.3399, "step": 11365 }, { "epoch": 0.14769598614781618, "grad_norm": 0.2728167176246643, "learning_rate": 0.0001704961073057877, "loss": 1.1756, "step": 11366 }, { "epoch": 0.14770898069173205, "grad_norm": 0.3456907272338867, "learning_rate": 0.00017049350784387632, "loss": 1.1714, "step": 11367 }, { "epoch": 0.14772197523564792, "grad_norm": 0.29394423961639404, "learning_rate": 0.00017049090838196492, "loss": 1.4336, "step": 11368 }, { "epoch": 0.1477349697795638, "grad_norm": 0.3596062958240509, "learning_rate": 0.00017048830892005357, "loss": 1.3993, "step": 11369 }, { "epoch": 0.14774796432347967, "grad_norm": 0.430662602186203, "learning_rate": 0.00017048570945814217, "loss": 1.3741, "step": 11370 }, { "epoch": 0.14776095886739554, "grad_norm": 0.6005839109420776, "learning_rate": 0.0001704831099962308, "loss": 1.631, "step": 11371 }, { "epoch": 0.14777395341131142, "grad_norm": 0.3450881242752075, "learning_rate": 0.0001704805105343194, "loss": 1.2928, "step": 11372 }, { "epoch": 0.1477869479552273, "grad_norm": 0.3537757396697998, "learning_rate": 0.00017047791107240801, "loss": 1.3556, "step": 11373 }, { "epoch": 0.14779994249914316, "grad_norm": 0.380252480506897, "learning_rate": 0.00017047531161049664, "loss": 1.5213, "step": 11374 }, { "epoch": 0.14781293704305903, "grad_norm": 0.30626946687698364, "learning_rate": 0.00017047271214858524, "loss": 1.1617, "step": 11375 }, { "epoch": 0.1478259315869749, "grad_norm": 0.37865880131721497, "learning_rate": 0.0001704701126866739, "loss": 1.6296, "step": 11376 }, { "epoch": 0.14783892613089078, "grad_norm": 0.40330734848976135, "learning_rate": 0.00017046751322476249, "loss": 1.5046, "step": 11377 }, { "epoch": 0.14785192067480665, "grad_norm": 0.33202293515205383, "learning_rate": 0.0001704649137628511, "loss": 1.496, "step": 11378 }, { "epoch": 0.14786491521872253, "grad_norm": 0.4565335214138031, "learning_rate": 0.0001704623143009397, "loss": 1.4885, "step": 11379 }, { "epoch": 0.1478779097626384, "grad_norm": 0.4210394322872162, "learning_rate": 0.00017045971483902833, "loss": 1.4602, "step": 11380 }, { "epoch": 0.14789090430655427, "grad_norm": 0.29736971855163574, "learning_rate": 0.00017045711537711696, "loss": 1.4655, "step": 11381 }, { "epoch": 0.14790389885047017, "grad_norm": 0.45637187361717224, "learning_rate": 0.00017045451591520555, "loss": 1.4731, "step": 11382 }, { "epoch": 0.14791689339438605, "grad_norm": 0.47508472204208374, "learning_rate": 0.00017045191645329418, "loss": 1.4168, "step": 11383 }, { "epoch": 0.14792988793830192, "grad_norm": 0.3757266402244568, "learning_rate": 0.0001704493169913828, "loss": 1.484, "step": 11384 }, { "epoch": 0.1479428824822178, "grad_norm": 0.45095619559288025, "learning_rate": 0.0001704467175294714, "loss": 1.4376, "step": 11385 }, { "epoch": 0.14795587702613366, "grad_norm": 0.3567884564399719, "learning_rate": 0.00017044411806756002, "loss": 1.253, "step": 11386 }, { "epoch": 0.14796887157004954, "grad_norm": 0.4230720102787018, "learning_rate": 0.00017044151860564862, "loss": 1.4911, "step": 11387 }, { "epoch": 0.1479818661139654, "grad_norm": 0.3521578907966614, "learning_rate": 0.00017043891914373727, "loss": 1.3722, "step": 11388 }, { "epoch": 0.14799486065788128, "grad_norm": 0.3804466426372528, "learning_rate": 0.00017043631968182587, "loss": 1.5331, "step": 11389 }, { "epoch": 0.14800785520179716, "grad_norm": 0.3658037781715393, "learning_rate": 0.0001704337202199145, "loss": 1.3457, "step": 11390 }, { "epoch": 0.14802084974571303, "grad_norm": 0.3560846447944641, "learning_rate": 0.0001704311207580031, "loss": 1.2726, "step": 11391 }, { "epoch": 0.1480338442896289, "grad_norm": 0.3416954576969147, "learning_rate": 0.00017042852129609172, "loss": 1.4962, "step": 11392 }, { "epoch": 0.14804683883354477, "grad_norm": 0.38784071803092957, "learning_rate": 0.00017042592183418034, "loss": 1.3989, "step": 11393 }, { "epoch": 0.14805983337746065, "grad_norm": 0.4625445306301117, "learning_rate": 0.00017042332237226894, "loss": 1.4303, "step": 11394 }, { "epoch": 0.14807282792137652, "grad_norm": 0.7778862714767456, "learning_rate": 0.00017042072291035756, "loss": 1.5541, "step": 11395 }, { "epoch": 0.1480858224652924, "grad_norm": 0.3546242415904999, "learning_rate": 0.0001704181234484462, "loss": 1.5026, "step": 11396 }, { "epoch": 0.14809881700920827, "grad_norm": 0.3760068714618683, "learning_rate": 0.00017041552398653479, "loss": 1.4071, "step": 11397 }, { "epoch": 0.14811181155312414, "grad_norm": 0.48493143916130066, "learning_rate": 0.0001704129245246234, "loss": 1.4072, "step": 11398 }, { "epoch": 0.14812480609704, "grad_norm": 0.40202566981315613, "learning_rate": 0.000170410325062712, "loss": 1.4443, "step": 11399 }, { "epoch": 0.14813780064095589, "grad_norm": 0.3808056712150574, "learning_rate": 0.00017040772560080066, "loss": 1.4696, "step": 11400 }, { "epoch": 0.14815079518487176, "grad_norm": 0.3787548243999481, "learning_rate": 0.00017040512613888926, "loss": 1.5949, "step": 11401 }, { "epoch": 0.14816378972878763, "grad_norm": 0.3699875771999359, "learning_rate": 0.00017040252667697788, "loss": 1.6121, "step": 11402 }, { "epoch": 0.1481767842727035, "grad_norm": 0.4473171830177307, "learning_rate": 0.00017039992721506648, "loss": 1.3451, "step": 11403 }, { "epoch": 0.14818977881661938, "grad_norm": 0.3499412536621094, "learning_rate": 0.0001703973277531551, "loss": 1.2668, "step": 11404 }, { "epoch": 0.14820277336053525, "grad_norm": 0.4555814862251282, "learning_rate": 0.00017039472829124373, "loss": 1.55, "step": 11405 }, { "epoch": 0.14821576790445112, "grad_norm": 0.459335595369339, "learning_rate": 0.00017039212882933232, "loss": 1.4368, "step": 11406 }, { "epoch": 0.148228762448367, "grad_norm": 0.3566705286502838, "learning_rate": 0.00017038952936742095, "loss": 1.5282, "step": 11407 }, { "epoch": 0.14824175699228287, "grad_norm": 0.38004159927368164, "learning_rate": 0.00017038692990550957, "loss": 1.5673, "step": 11408 }, { "epoch": 0.14825475153619874, "grad_norm": 0.37175101041793823, "learning_rate": 0.00017038433044359817, "loss": 1.5419, "step": 11409 }, { "epoch": 0.14826774608011462, "grad_norm": 0.4951498806476593, "learning_rate": 0.0001703817309816868, "loss": 1.2901, "step": 11410 }, { "epoch": 0.1482807406240305, "grad_norm": 0.3955337107181549, "learning_rate": 0.00017037913151977542, "loss": 1.3176, "step": 11411 }, { "epoch": 0.14829373516794636, "grad_norm": 0.42036792635917664, "learning_rate": 0.00017037653205786404, "loss": 1.6788, "step": 11412 }, { "epoch": 0.14830672971186223, "grad_norm": 0.3640468418598175, "learning_rate": 0.00017037393259595264, "loss": 1.4525, "step": 11413 }, { "epoch": 0.1483197242557781, "grad_norm": 0.408597469329834, "learning_rate": 0.00017037133313404127, "loss": 1.4319, "step": 11414 }, { "epoch": 0.14833271879969398, "grad_norm": 0.3696690499782562, "learning_rate": 0.0001703687336721299, "loss": 1.7039, "step": 11415 }, { "epoch": 0.14834571334360985, "grad_norm": 0.35438260436058044, "learning_rate": 0.0001703661342102185, "loss": 1.3458, "step": 11416 }, { "epoch": 0.14835870788752573, "grad_norm": 0.37571418285369873, "learning_rate": 0.0001703635347483071, "loss": 1.299, "step": 11417 }, { "epoch": 0.1483717024314416, "grad_norm": 0.4100554585456848, "learning_rate": 0.0001703609352863957, "loss": 1.544, "step": 11418 }, { "epoch": 0.14838469697535747, "grad_norm": 0.3780863583087921, "learning_rate": 0.00017035833582448436, "loss": 1.2972, "step": 11419 }, { "epoch": 0.14839769151927334, "grad_norm": 0.5090693235397339, "learning_rate": 0.00017035573636257296, "loss": 1.3581, "step": 11420 }, { "epoch": 0.14841068606318922, "grad_norm": 0.3582127094268799, "learning_rate": 0.00017035313690066156, "loss": 1.5535, "step": 11421 }, { "epoch": 0.1484236806071051, "grad_norm": 0.4482356309890747, "learning_rate": 0.00017035053743875018, "loss": 1.2912, "step": 11422 }, { "epoch": 0.14843667515102096, "grad_norm": 0.4512385129928589, "learning_rate": 0.0001703479379768388, "loss": 1.5941, "step": 11423 }, { "epoch": 0.14844966969493684, "grad_norm": 0.263071745634079, "learning_rate": 0.00017034533851492743, "loss": 1.3899, "step": 11424 }, { "epoch": 0.1484626642388527, "grad_norm": 0.4690077304840088, "learning_rate": 0.00017034273905301603, "loss": 1.4529, "step": 11425 }, { "epoch": 0.14847565878276858, "grad_norm": 0.4605686664581299, "learning_rate": 0.00017034013959110465, "loss": 1.4893, "step": 11426 }, { "epoch": 0.14848865332668446, "grad_norm": 0.42092588543891907, "learning_rate": 0.00017033754012919328, "loss": 1.5955, "step": 11427 }, { "epoch": 0.14850164787060033, "grad_norm": 0.4872777760028839, "learning_rate": 0.00017033494066728187, "loss": 1.3893, "step": 11428 }, { "epoch": 0.1485146424145162, "grad_norm": 0.4220643639564514, "learning_rate": 0.0001703323412053705, "loss": 1.4826, "step": 11429 }, { "epoch": 0.14852763695843207, "grad_norm": 0.41432252526283264, "learning_rate": 0.0001703297417434591, "loss": 1.6453, "step": 11430 }, { "epoch": 0.14854063150234795, "grad_norm": 0.3603918254375458, "learning_rate": 0.00017032714228154775, "loss": 1.5115, "step": 11431 }, { "epoch": 0.14855362604626382, "grad_norm": 0.4360947608947754, "learning_rate": 0.00017032454281963634, "loss": 1.306, "step": 11432 }, { "epoch": 0.1485666205901797, "grad_norm": 0.40506333112716675, "learning_rate": 0.00017032194335772497, "loss": 1.529, "step": 11433 }, { "epoch": 0.14857961513409557, "grad_norm": 0.32672175765037537, "learning_rate": 0.00017031934389581357, "loss": 1.3138, "step": 11434 }, { "epoch": 0.14859260967801144, "grad_norm": 0.31174424290657043, "learning_rate": 0.0001703167444339022, "loss": 1.4927, "step": 11435 }, { "epoch": 0.1486056042219273, "grad_norm": 0.4484836757183075, "learning_rate": 0.00017031414497199082, "loss": 1.5058, "step": 11436 }, { "epoch": 0.14861859876584319, "grad_norm": 0.38390782475471497, "learning_rate": 0.0001703115455100794, "loss": 1.2884, "step": 11437 }, { "epoch": 0.14863159330975906, "grad_norm": 0.39949488639831543, "learning_rate": 0.00017030894604816804, "loss": 1.5338, "step": 11438 }, { "epoch": 0.14864458785367493, "grad_norm": 0.2830457389354706, "learning_rate": 0.00017030634658625666, "loss": 1.2745, "step": 11439 }, { "epoch": 0.1486575823975908, "grad_norm": 0.48456454277038574, "learning_rate": 0.00017030374712434526, "loss": 1.51, "step": 11440 }, { "epoch": 0.14867057694150668, "grad_norm": 0.39934781193733215, "learning_rate": 0.00017030114766243388, "loss": 1.4383, "step": 11441 }, { "epoch": 0.14868357148542255, "grad_norm": 0.34529444575309753, "learning_rate": 0.00017029854820052248, "loss": 1.3632, "step": 11442 }, { "epoch": 0.14869656602933842, "grad_norm": 0.36620810627937317, "learning_rate": 0.00017029594873861113, "loss": 1.359, "step": 11443 }, { "epoch": 0.1487095605732543, "grad_norm": 0.3614499568939209, "learning_rate": 0.00017029334927669973, "loss": 1.3205, "step": 11444 }, { "epoch": 0.14872255511717017, "grad_norm": 0.4184872508049011, "learning_rate": 0.00017029074981478835, "loss": 1.7229, "step": 11445 }, { "epoch": 0.14873554966108604, "grad_norm": 0.39357790350914, "learning_rate": 0.00017028815035287695, "loss": 1.6542, "step": 11446 }, { "epoch": 0.14874854420500191, "grad_norm": 0.456364244222641, "learning_rate": 0.00017028555089096558, "loss": 1.4659, "step": 11447 }, { "epoch": 0.1487615387489178, "grad_norm": 0.450084388256073, "learning_rate": 0.0001702829514290542, "loss": 1.4164, "step": 11448 }, { "epoch": 0.14877453329283366, "grad_norm": 0.4823257625102997, "learning_rate": 0.0001702803519671428, "loss": 1.4727, "step": 11449 }, { "epoch": 0.14878752783674953, "grad_norm": 0.4477674961090088, "learning_rate": 0.00017027775250523142, "loss": 1.4606, "step": 11450 }, { "epoch": 0.1488005223806654, "grad_norm": 0.3501686751842499, "learning_rate": 0.00017027515304332005, "loss": 1.5221, "step": 11451 }, { "epoch": 0.14881351692458128, "grad_norm": 0.4303988516330719, "learning_rate": 0.00017027255358140864, "loss": 1.4635, "step": 11452 }, { "epoch": 0.14882651146849715, "grad_norm": 0.42449280619621277, "learning_rate": 0.00017026995411949727, "loss": 1.6963, "step": 11453 }, { "epoch": 0.14883950601241303, "grad_norm": 0.34983667731285095, "learning_rate": 0.0001702673546575859, "loss": 1.3887, "step": 11454 }, { "epoch": 0.1488525005563289, "grad_norm": 0.3292420506477356, "learning_rate": 0.00017026475519567452, "loss": 1.2677, "step": 11455 }, { "epoch": 0.14886549510024477, "grad_norm": 0.36140671372413635, "learning_rate": 0.00017026215573376312, "loss": 1.3379, "step": 11456 }, { "epoch": 0.14887848964416064, "grad_norm": 0.2870061993598938, "learning_rate": 0.00017025955627185174, "loss": 1.4775, "step": 11457 }, { "epoch": 0.14889148418807654, "grad_norm": 0.5135142207145691, "learning_rate": 0.00017025695680994036, "loss": 1.5304, "step": 11458 }, { "epoch": 0.14890447873199242, "grad_norm": 0.3882032036781311, "learning_rate": 0.00017025435734802896, "loss": 1.3712, "step": 11459 }, { "epoch": 0.1489174732759083, "grad_norm": 0.5394908785820007, "learning_rate": 0.00017025175788611759, "loss": 1.5066, "step": 11460 }, { "epoch": 0.14893046781982416, "grad_norm": 0.4915997087955475, "learning_rate": 0.00017024915842420618, "loss": 1.4292, "step": 11461 }, { "epoch": 0.14894346236374004, "grad_norm": 0.4425760507583618, "learning_rate": 0.00017024655896229484, "loss": 1.4188, "step": 11462 }, { "epoch": 0.1489564569076559, "grad_norm": 0.358690470457077, "learning_rate": 0.00017024395950038343, "loss": 1.4782, "step": 11463 }, { "epoch": 0.14896945145157178, "grad_norm": 0.29866454005241394, "learning_rate": 0.00017024136003847203, "loss": 1.2896, "step": 11464 }, { "epoch": 0.14898244599548766, "grad_norm": 0.3622207045555115, "learning_rate": 0.00017023876057656065, "loss": 1.4822, "step": 11465 }, { "epoch": 0.14899544053940353, "grad_norm": 0.441483736038208, "learning_rate": 0.00017023616111464928, "loss": 1.6407, "step": 11466 }, { "epoch": 0.1490084350833194, "grad_norm": 0.39719903469085693, "learning_rate": 0.0001702335616527379, "loss": 1.6008, "step": 11467 }, { "epoch": 0.14902142962723527, "grad_norm": 0.43006718158721924, "learning_rate": 0.0001702309621908265, "loss": 1.4292, "step": 11468 }, { "epoch": 0.14903442417115115, "grad_norm": 0.4161652624607086, "learning_rate": 0.00017022836272891513, "loss": 1.3961, "step": 11469 }, { "epoch": 0.14904741871506702, "grad_norm": 0.30316856503486633, "learning_rate": 0.00017022576326700375, "loss": 1.2608, "step": 11470 }, { "epoch": 0.1490604132589829, "grad_norm": 0.3793087303638458, "learning_rate": 0.00017022316380509235, "loss": 1.3831, "step": 11471 }, { "epoch": 0.14907340780289877, "grad_norm": 0.4868999123573303, "learning_rate": 0.00017022056434318097, "loss": 1.4315, "step": 11472 }, { "epoch": 0.14908640234681464, "grad_norm": 0.34300515055656433, "learning_rate": 0.00017021796488126957, "loss": 1.2452, "step": 11473 }, { "epoch": 0.1490993968907305, "grad_norm": 0.3368987441062927, "learning_rate": 0.00017021536541935822, "loss": 1.3229, "step": 11474 }, { "epoch": 0.14911239143464639, "grad_norm": 0.37760478258132935, "learning_rate": 0.00017021276595744682, "loss": 1.2469, "step": 11475 }, { "epoch": 0.14912538597856226, "grad_norm": 0.3617461025714874, "learning_rate": 0.00017021016649553542, "loss": 1.5085, "step": 11476 }, { "epoch": 0.14913838052247813, "grad_norm": 0.35681548714637756, "learning_rate": 0.00017020756703362404, "loss": 1.3706, "step": 11477 }, { "epoch": 0.149151375066394, "grad_norm": 0.4595588445663452, "learning_rate": 0.00017020496757171266, "loss": 1.4449, "step": 11478 }, { "epoch": 0.14916436961030988, "grad_norm": 0.5224841833114624, "learning_rate": 0.0001702023681098013, "loss": 1.4452, "step": 11479 }, { "epoch": 0.14917736415422575, "grad_norm": 0.41214656829833984, "learning_rate": 0.00017019976864788989, "loss": 1.5993, "step": 11480 }, { "epoch": 0.14919035869814162, "grad_norm": 0.3594188392162323, "learning_rate": 0.0001701971691859785, "loss": 1.4421, "step": 11481 }, { "epoch": 0.1492033532420575, "grad_norm": 0.5174275040626526, "learning_rate": 0.00017019456972406714, "loss": 1.3417, "step": 11482 }, { "epoch": 0.14921634778597337, "grad_norm": 0.23060114681720734, "learning_rate": 0.00017019197026215573, "loss": 1.1345, "step": 11483 }, { "epoch": 0.14922934232988924, "grad_norm": 0.30914217233657837, "learning_rate": 0.00017018937080024436, "loss": 1.3373, "step": 11484 }, { "epoch": 0.14924233687380511, "grad_norm": 0.3719383776187897, "learning_rate": 0.00017018677133833298, "loss": 1.527, "step": 11485 }, { "epoch": 0.149255331417721, "grad_norm": 0.4321482479572296, "learning_rate": 0.0001701841718764216, "loss": 1.321, "step": 11486 }, { "epoch": 0.14926832596163686, "grad_norm": 0.2888380289077759, "learning_rate": 0.0001701815724145102, "loss": 1.3297, "step": 11487 }, { "epoch": 0.14928132050555273, "grad_norm": 0.4421655535697937, "learning_rate": 0.0001701789729525988, "loss": 1.5391, "step": 11488 }, { "epoch": 0.1492943150494686, "grad_norm": 0.3577654957771301, "learning_rate": 0.00017017637349068745, "loss": 1.368, "step": 11489 }, { "epoch": 0.14930730959338448, "grad_norm": 0.3633783161640167, "learning_rate": 0.00017017377402877605, "loss": 1.5692, "step": 11490 }, { "epoch": 0.14932030413730035, "grad_norm": 0.3944384753704071, "learning_rate": 0.00017017117456686467, "loss": 1.3401, "step": 11491 }, { "epoch": 0.14933329868121623, "grad_norm": 0.34175336360931396, "learning_rate": 0.00017016857510495327, "loss": 1.3792, "step": 11492 }, { "epoch": 0.1493462932251321, "grad_norm": 0.3473910689353943, "learning_rate": 0.0001701659756430419, "loss": 1.3927, "step": 11493 }, { "epoch": 0.14935928776904797, "grad_norm": 0.412923663854599, "learning_rate": 0.00017016337618113052, "loss": 1.462, "step": 11494 }, { "epoch": 0.14937228231296384, "grad_norm": 0.46928200125694275, "learning_rate": 0.00017016077671921912, "loss": 1.5559, "step": 11495 }, { "epoch": 0.14938527685687972, "grad_norm": 0.36658424139022827, "learning_rate": 0.00017015817725730774, "loss": 1.4706, "step": 11496 }, { "epoch": 0.1493982714007956, "grad_norm": 0.44214513897895813, "learning_rate": 0.00017015557779539637, "loss": 1.5378, "step": 11497 }, { "epoch": 0.14941126594471146, "grad_norm": 0.40049588680267334, "learning_rate": 0.000170152978333485, "loss": 1.3949, "step": 11498 }, { "epoch": 0.14942426048862734, "grad_norm": 0.4464159607887268, "learning_rate": 0.0001701503788715736, "loss": 1.4155, "step": 11499 }, { "epoch": 0.1494372550325432, "grad_norm": 0.4204886257648468, "learning_rate": 0.0001701477794096622, "loss": 1.5775, "step": 11500 }, { "epoch": 0.14945024957645908, "grad_norm": 0.41453877091407776, "learning_rate": 0.00017014517994775084, "loss": 1.4874, "step": 11501 }, { "epoch": 0.14946324412037496, "grad_norm": 0.43641197681427, "learning_rate": 0.00017014258048583943, "loss": 1.3632, "step": 11502 }, { "epoch": 0.14947623866429083, "grad_norm": 0.4206138551235199, "learning_rate": 0.00017013998102392806, "loss": 1.5258, "step": 11503 }, { "epoch": 0.1494892332082067, "grad_norm": 0.4245132803916931, "learning_rate": 0.00017013738156201666, "loss": 1.3837, "step": 11504 }, { "epoch": 0.14950222775212257, "grad_norm": 0.4638732969760895, "learning_rate": 0.00017013478210010528, "loss": 1.4121, "step": 11505 }, { "epoch": 0.14951522229603845, "grad_norm": 0.36251822113990784, "learning_rate": 0.0001701321826381939, "loss": 1.4755, "step": 11506 }, { "epoch": 0.14952821683995432, "grad_norm": 0.3813481330871582, "learning_rate": 0.0001701295831762825, "loss": 1.5329, "step": 11507 }, { "epoch": 0.1495412113838702, "grad_norm": 0.5225658416748047, "learning_rate": 0.00017012698371437113, "loss": 1.5728, "step": 11508 }, { "epoch": 0.14955420592778607, "grad_norm": 0.35174351930618286, "learning_rate": 0.00017012438425245975, "loss": 1.1789, "step": 11509 }, { "epoch": 0.14956720047170194, "grad_norm": 0.3173539340496063, "learning_rate": 0.00017012178479054838, "loss": 1.3163, "step": 11510 }, { "epoch": 0.1495801950156178, "grad_norm": 0.395747572183609, "learning_rate": 0.00017011918532863697, "loss": 1.4944, "step": 11511 }, { "epoch": 0.14959318955953368, "grad_norm": 0.3002728521823883, "learning_rate": 0.0001701165858667256, "loss": 1.2828, "step": 11512 }, { "epoch": 0.14960618410344956, "grad_norm": 0.43162137269973755, "learning_rate": 0.00017011398640481422, "loss": 1.4107, "step": 11513 }, { "epoch": 0.14961917864736543, "grad_norm": 0.41468486189842224, "learning_rate": 0.00017011138694290282, "loss": 1.5094, "step": 11514 }, { "epoch": 0.1496321731912813, "grad_norm": 0.44490954279899597, "learning_rate": 0.00017010878748099144, "loss": 1.3995, "step": 11515 }, { "epoch": 0.14964516773519718, "grad_norm": 0.399789959192276, "learning_rate": 0.00017010618801908004, "loss": 1.3026, "step": 11516 }, { "epoch": 0.14965816227911305, "grad_norm": 0.5348469614982605, "learning_rate": 0.0001701035885571687, "loss": 1.3991, "step": 11517 }, { "epoch": 0.14967115682302892, "grad_norm": 0.38565200567245483, "learning_rate": 0.0001701009890952573, "loss": 1.3996, "step": 11518 }, { "epoch": 0.1496841513669448, "grad_norm": 0.38135018944740295, "learning_rate": 0.0001700983896333459, "loss": 1.3403, "step": 11519 }, { "epoch": 0.14969714591086067, "grad_norm": 0.3521561622619629, "learning_rate": 0.0001700957901714345, "loss": 1.0993, "step": 11520 }, { "epoch": 0.14971014045477654, "grad_norm": 0.4569131135940552, "learning_rate": 0.00017009319070952314, "loss": 1.3737, "step": 11521 }, { "epoch": 0.14972313499869241, "grad_norm": 0.6189255118370056, "learning_rate": 0.00017009059124761176, "loss": 1.5228, "step": 11522 }, { "epoch": 0.1497361295426083, "grad_norm": 0.4383171498775482, "learning_rate": 0.00017008799178570036, "loss": 1.3263, "step": 11523 }, { "epoch": 0.14974912408652416, "grad_norm": 0.5443047285079956, "learning_rate": 0.00017008539232378898, "loss": 1.4963, "step": 11524 }, { "epoch": 0.14976211863044003, "grad_norm": 0.32509666681289673, "learning_rate": 0.0001700827928618776, "loss": 1.4382, "step": 11525 }, { "epoch": 0.1497751131743559, "grad_norm": 0.35972630977630615, "learning_rate": 0.0001700801933999662, "loss": 1.34, "step": 11526 }, { "epoch": 0.14978810771827178, "grad_norm": 0.3555624783039093, "learning_rate": 0.00017007759393805483, "loss": 1.2037, "step": 11527 }, { "epoch": 0.14980110226218765, "grad_norm": 0.4087228775024414, "learning_rate": 0.00017007499447614345, "loss": 1.4519, "step": 11528 }, { "epoch": 0.14981409680610352, "grad_norm": 0.34961187839508057, "learning_rate": 0.00017007239501423208, "loss": 1.4393, "step": 11529 }, { "epoch": 0.1498270913500194, "grad_norm": 0.3724414110183716, "learning_rate": 0.00017006979555232068, "loss": 1.3521, "step": 11530 }, { "epoch": 0.14984008589393527, "grad_norm": 0.4106716513633728, "learning_rate": 0.00017006719609040927, "loss": 1.4609, "step": 11531 }, { "epoch": 0.14985308043785114, "grad_norm": 0.4787338972091675, "learning_rate": 0.00017006459662849793, "loss": 1.473, "step": 11532 }, { "epoch": 0.14986607498176702, "grad_norm": 0.3432345390319824, "learning_rate": 0.00017006199716658652, "loss": 1.2589, "step": 11533 }, { "epoch": 0.14987906952568292, "grad_norm": 0.29949235916137695, "learning_rate": 0.00017005939770467515, "loss": 1.4328, "step": 11534 }, { "epoch": 0.1498920640695988, "grad_norm": 0.383391410112381, "learning_rate": 0.00017005679824276374, "loss": 1.4559, "step": 11535 }, { "epoch": 0.14990505861351466, "grad_norm": 0.36394205689430237, "learning_rate": 0.00017005419878085237, "loss": 1.572, "step": 11536 }, { "epoch": 0.14991805315743054, "grad_norm": 0.4608973562717438, "learning_rate": 0.000170051599318941, "loss": 1.581, "step": 11537 }, { "epoch": 0.1499310477013464, "grad_norm": 0.5038651823997498, "learning_rate": 0.0001700489998570296, "loss": 1.4742, "step": 11538 }, { "epoch": 0.14994404224526228, "grad_norm": 0.32855066657066345, "learning_rate": 0.00017004640039511822, "loss": 1.4412, "step": 11539 }, { "epoch": 0.14995703678917816, "grad_norm": 0.36024582386016846, "learning_rate": 0.00017004380093320684, "loss": 1.2248, "step": 11540 }, { "epoch": 0.14997003133309403, "grad_norm": 0.46043330430984497, "learning_rate": 0.00017004120147129546, "loss": 1.5752, "step": 11541 }, { "epoch": 0.1499830258770099, "grad_norm": 0.41294220089912415, "learning_rate": 0.00017003860200938406, "loss": 1.2996, "step": 11542 }, { "epoch": 0.14999602042092577, "grad_norm": 0.32781121134757996, "learning_rate": 0.00017003600254747266, "loss": 1.3951, "step": 11543 }, { "epoch": 0.15000901496484165, "grad_norm": 0.4537646472454071, "learning_rate": 0.0001700334030855613, "loss": 1.4994, "step": 11544 }, { "epoch": 0.15002200950875752, "grad_norm": 0.4044649004936218, "learning_rate": 0.0001700308036236499, "loss": 1.4393, "step": 11545 }, { "epoch": 0.1500350040526734, "grad_norm": 0.4367256760597229, "learning_rate": 0.00017002820416173853, "loss": 1.2856, "step": 11546 }, { "epoch": 0.15004799859658927, "grad_norm": 0.3509795665740967, "learning_rate": 0.00017002560469982713, "loss": 1.3965, "step": 11547 }, { "epoch": 0.15006099314050514, "grad_norm": 0.3995439112186432, "learning_rate": 0.00017002300523791575, "loss": 1.4932, "step": 11548 }, { "epoch": 0.150073987684421, "grad_norm": 0.5210452079772949, "learning_rate": 0.00017002040577600438, "loss": 1.3718, "step": 11549 }, { "epoch": 0.15008698222833688, "grad_norm": 0.3905256390571594, "learning_rate": 0.00017001780631409298, "loss": 1.5177, "step": 11550 }, { "epoch": 0.15009997677225276, "grad_norm": 0.4281039237976074, "learning_rate": 0.0001700152068521816, "loss": 1.5604, "step": 11551 }, { "epoch": 0.15011297131616863, "grad_norm": 0.34358009696006775, "learning_rate": 0.00017001260739027023, "loss": 1.5522, "step": 11552 }, { "epoch": 0.1501259658600845, "grad_norm": 0.4277726411819458, "learning_rate": 0.00017001000792835885, "loss": 1.3577, "step": 11553 }, { "epoch": 0.15013896040400038, "grad_norm": 0.3907712399959564, "learning_rate": 0.00017000740846644745, "loss": 1.4417, "step": 11554 }, { "epoch": 0.15015195494791625, "grad_norm": 0.29075127840042114, "learning_rate": 0.00017000480900453607, "loss": 1.4091, "step": 11555 }, { "epoch": 0.15016494949183212, "grad_norm": 0.34866446256637573, "learning_rate": 0.0001700022095426247, "loss": 1.369, "step": 11556 }, { "epoch": 0.150177944035748, "grad_norm": 0.41150814294815063, "learning_rate": 0.0001699996100807133, "loss": 1.6419, "step": 11557 }, { "epoch": 0.15019093857966387, "grad_norm": 0.3709924817085266, "learning_rate": 0.00016999701061880192, "loss": 1.2633, "step": 11558 }, { "epoch": 0.15020393312357974, "grad_norm": 0.43236422538757324, "learning_rate": 0.00016999441115689054, "loss": 1.527, "step": 11559 }, { "epoch": 0.15021692766749561, "grad_norm": 0.39735615253448486, "learning_rate": 0.00016999181169497914, "loss": 1.4665, "step": 11560 }, { "epoch": 0.1502299222114115, "grad_norm": 0.3811829686164856, "learning_rate": 0.00016998921223306776, "loss": 1.472, "step": 11561 }, { "epoch": 0.15024291675532736, "grad_norm": 0.447645902633667, "learning_rate": 0.00016998661277115636, "loss": 1.4971, "step": 11562 }, { "epoch": 0.15025591129924323, "grad_norm": 0.4006495773792267, "learning_rate": 0.000169984013309245, "loss": 1.3629, "step": 11563 }, { "epoch": 0.1502689058431591, "grad_norm": 0.44452422857284546, "learning_rate": 0.0001699814138473336, "loss": 1.409, "step": 11564 }, { "epoch": 0.15028190038707498, "grad_norm": 0.33562028408050537, "learning_rate": 0.00016997881438542224, "loss": 1.4957, "step": 11565 }, { "epoch": 0.15029489493099085, "grad_norm": 0.3849475383758545, "learning_rate": 0.00016997621492351083, "loss": 1.3658, "step": 11566 }, { "epoch": 0.15030788947490673, "grad_norm": 0.5096610188484192, "learning_rate": 0.00016997361546159946, "loss": 1.4549, "step": 11567 }, { "epoch": 0.1503208840188226, "grad_norm": 0.5005442500114441, "learning_rate": 0.00016997101599968808, "loss": 1.5121, "step": 11568 }, { "epoch": 0.15033387856273847, "grad_norm": 0.4770018458366394, "learning_rate": 0.00016996841653777668, "loss": 1.4404, "step": 11569 }, { "epoch": 0.15034687310665434, "grad_norm": 0.3391443192958832, "learning_rate": 0.0001699658170758653, "loss": 1.4948, "step": 11570 }, { "epoch": 0.15035986765057022, "grad_norm": 0.31590428948402405, "learning_rate": 0.00016996321761395393, "loss": 1.2762, "step": 11571 }, { "epoch": 0.1503728621944861, "grad_norm": 0.5151616930961609, "learning_rate": 0.00016996061815204253, "loss": 1.4638, "step": 11572 }, { "epoch": 0.15038585673840196, "grad_norm": 0.37689313292503357, "learning_rate": 0.00016995801869013115, "loss": 1.3971, "step": 11573 }, { "epoch": 0.15039885128231784, "grad_norm": 0.34364745020866394, "learning_rate": 0.00016995541922821975, "loss": 1.3721, "step": 11574 }, { "epoch": 0.1504118458262337, "grad_norm": 0.3163674473762512, "learning_rate": 0.0001699528197663084, "loss": 1.4001, "step": 11575 }, { "epoch": 0.15042484037014958, "grad_norm": 0.29825130105018616, "learning_rate": 0.000169950220304397, "loss": 1.3976, "step": 11576 }, { "epoch": 0.15043783491406545, "grad_norm": 0.36027073860168457, "learning_rate": 0.00016994762084248562, "loss": 1.4552, "step": 11577 }, { "epoch": 0.15045082945798133, "grad_norm": 0.44277259707450867, "learning_rate": 0.00016994502138057422, "loss": 1.4799, "step": 11578 }, { "epoch": 0.1504638240018972, "grad_norm": 0.3719746470451355, "learning_rate": 0.00016994242191866284, "loss": 1.3541, "step": 11579 }, { "epoch": 0.15047681854581307, "grad_norm": 0.288047730922699, "learning_rate": 0.00016993982245675147, "loss": 1.4721, "step": 11580 }, { "epoch": 0.15048981308972895, "grad_norm": 0.45356979966163635, "learning_rate": 0.00016993722299484006, "loss": 1.3371, "step": 11581 }, { "epoch": 0.15050280763364482, "grad_norm": 0.3906473219394684, "learning_rate": 0.0001699346235329287, "loss": 1.2715, "step": 11582 }, { "epoch": 0.1505158021775607, "grad_norm": 0.36312851309776306, "learning_rate": 0.0001699320240710173, "loss": 1.3731, "step": 11583 }, { "epoch": 0.15052879672147657, "grad_norm": 0.4284430146217346, "learning_rate": 0.00016992942460910594, "loss": 1.4051, "step": 11584 }, { "epoch": 0.15054179126539244, "grad_norm": 0.35854005813598633, "learning_rate": 0.00016992682514719454, "loss": 1.4289, "step": 11585 }, { "epoch": 0.1505547858093083, "grad_norm": 0.4714095890522003, "learning_rate": 0.00016992422568528313, "loss": 1.4351, "step": 11586 }, { "epoch": 0.15056778035322418, "grad_norm": 0.4110665023326874, "learning_rate": 0.00016992162622337178, "loss": 1.3518, "step": 11587 }, { "epoch": 0.15058077489714006, "grad_norm": 0.3543650209903717, "learning_rate": 0.00016991902676146038, "loss": 1.2744, "step": 11588 }, { "epoch": 0.15059376944105593, "grad_norm": 0.3380261957645416, "learning_rate": 0.000169916427299549, "loss": 1.3543, "step": 11589 }, { "epoch": 0.1506067639849718, "grad_norm": 0.3972093462944031, "learning_rate": 0.0001699138278376376, "loss": 1.3247, "step": 11590 }, { "epoch": 0.15061975852888768, "grad_norm": 0.39904487133026123, "learning_rate": 0.00016991122837572623, "loss": 1.4365, "step": 11591 }, { "epoch": 0.15063275307280355, "grad_norm": 0.4320739805698395, "learning_rate": 0.00016990862891381485, "loss": 1.4437, "step": 11592 }, { "epoch": 0.15064574761671942, "grad_norm": 0.3298642039299011, "learning_rate": 0.00016990602945190345, "loss": 1.4883, "step": 11593 }, { "epoch": 0.1506587421606353, "grad_norm": 0.3774215281009674, "learning_rate": 0.00016990342998999207, "loss": 1.2819, "step": 11594 }, { "epoch": 0.15067173670455117, "grad_norm": 0.42426952719688416, "learning_rate": 0.0001699008305280807, "loss": 1.4879, "step": 11595 }, { "epoch": 0.15068473124846704, "grad_norm": 0.28732702136039734, "learning_rate": 0.00016989823106616932, "loss": 1.3825, "step": 11596 }, { "epoch": 0.15069772579238291, "grad_norm": 0.34764164686203003, "learning_rate": 0.00016989563160425792, "loss": 1.2436, "step": 11597 }, { "epoch": 0.1507107203362988, "grad_norm": 0.29732435941696167, "learning_rate": 0.00016989303214234655, "loss": 1.3263, "step": 11598 }, { "epoch": 0.15072371488021466, "grad_norm": 0.4686795473098755, "learning_rate": 0.00016989043268043517, "loss": 1.4554, "step": 11599 }, { "epoch": 0.15073670942413053, "grad_norm": 0.3847302496433258, "learning_rate": 0.00016988783321852377, "loss": 1.3348, "step": 11600 }, { "epoch": 0.1507497039680464, "grad_norm": 0.37850356101989746, "learning_rate": 0.0001698852337566124, "loss": 1.5803, "step": 11601 }, { "epoch": 0.15076269851196228, "grad_norm": 0.49775758385658264, "learning_rate": 0.00016988263429470102, "loss": 1.4388, "step": 11602 }, { "epoch": 0.15077569305587815, "grad_norm": 0.39869606494903564, "learning_rate": 0.0001698800348327896, "loss": 1.2424, "step": 11603 }, { "epoch": 0.15078868759979402, "grad_norm": 0.4403208792209625, "learning_rate": 0.00016987743537087824, "loss": 1.485, "step": 11604 }, { "epoch": 0.1508016821437099, "grad_norm": 0.42019885778427124, "learning_rate": 0.00016987483590896684, "loss": 1.2227, "step": 11605 }, { "epoch": 0.15081467668762577, "grad_norm": 0.4300461411476135, "learning_rate": 0.0001698722364470555, "loss": 1.4772, "step": 11606 }, { "epoch": 0.15082767123154164, "grad_norm": 0.31564974784851074, "learning_rate": 0.00016986963698514408, "loss": 1.3623, "step": 11607 }, { "epoch": 0.15084066577545752, "grad_norm": 0.4545326232910156, "learning_rate": 0.0001698670375232327, "loss": 1.5086, "step": 11608 }, { "epoch": 0.1508536603193734, "grad_norm": 0.447229266166687, "learning_rate": 0.0001698644380613213, "loss": 1.3773, "step": 11609 }, { "epoch": 0.1508666548632893, "grad_norm": 0.593318521976471, "learning_rate": 0.00016986183859940993, "loss": 1.6173, "step": 11610 }, { "epoch": 0.15087964940720516, "grad_norm": 0.3710833787918091, "learning_rate": 0.00016985923913749856, "loss": 1.4388, "step": 11611 }, { "epoch": 0.15089264395112104, "grad_norm": 0.32597455382347107, "learning_rate": 0.00016985663967558715, "loss": 1.2032, "step": 11612 }, { "epoch": 0.1509056384950369, "grad_norm": 0.5803954005241394, "learning_rate": 0.00016985404021367578, "loss": 1.526, "step": 11613 }, { "epoch": 0.15091863303895278, "grad_norm": 0.33099353313446045, "learning_rate": 0.0001698514407517644, "loss": 1.4271, "step": 11614 }, { "epoch": 0.15093162758286865, "grad_norm": 0.40249207615852356, "learning_rate": 0.000169848841289853, "loss": 1.5788, "step": 11615 }, { "epoch": 0.15094462212678453, "grad_norm": 0.3555307686328888, "learning_rate": 0.00016984624182794162, "loss": 1.3229, "step": 11616 }, { "epoch": 0.1509576166707004, "grad_norm": 0.3716249465942383, "learning_rate": 0.00016984364236603022, "loss": 1.4209, "step": 11617 }, { "epoch": 0.15097061121461627, "grad_norm": 0.4033333957195282, "learning_rate": 0.00016984104290411887, "loss": 1.368, "step": 11618 }, { "epoch": 0.15098360575853215, "grad_norm": 0.4291049540042877, "learning_rate": 0.00016983844344220747, "loss": 1.583, "step": 11619 }, { "epoch": 0.15099660030244802, "grad_norm": 0.21523873507976532, "learning_rate": 0.0001698358439802961, "loss": 1.3157, "step": 11620 }, { "epoch": 0.1510095948463639, "grad_norm": 0.6846172213554382, "learning_rate": 0.0001698332445183847, "loss": 1.4644, "step": 11621 }, { "epoch": 0.15102258939027977, "grad_norm": 0.4366627335548401, "learning_rate": 0.00016983064505647332, "loss": 1.3652, "step": 11622 }, { "epoch": 0.15103558393419564, "grad_norm": 0.3246694505214691, "learning_rate": 0.00016982804559456194, "loss": 1.4393, "step": 11623 }, { "epoch": 0.1510485784781115, "grad_norm": 0.3185274302959442, "learning_rate": 0.00016982544613265054, "loss": 1.3833, "step": 11624 }, { "epoch": 0.15106157302202738, "grad_norm": 0.428157240152359, "learning_rate": 0.00016982284667073916, "loss": 1.439, "step": 11625 }, { "epoch": 0.15107456756594326, "grad_norm": 0.41474050283432007, "learning_rate": 0.0001698202472088278, "loss": 1.4438, "step": 11626 }, { "epoch": 0.15108756210985913, "grad_norm": 0.4155484139919281, "learning_rate": 0.00016981764774691638, "loss": 1.6136, "step": 11627 }, { "epoch": 0.151100556653775, "grad_norm": 0.36345165967941284, "learning_rate": 0.000169815048285005, "loss": 1.3957, "step": 11628 }, { "epoch": 0.15111355119769088, "grad_norm": 0.3928201198577881, "learning_rate": 0.0001698124488230936, "loss": 1.4091, "step": 11629 }, { "epoch": 0.15112654574160675, "grad_norm": 0.38821154832839966, "learning_rate": 0.00016980984936118226, "loss": 1.3059, "step": 11630 }, { "epoch": 0.15113954028552262, "grad_norm": 0.4513520300388336, "learning_rate": 0.00016980724989927085, "loss": 1.5232, "step": 11631 }, { "epoch": 0.1511525348294385, "grad_norm": 0.38104453682899475, "learning_rate": 0.00016980465043735948, "loss": 1.4161, "step": 11632 }, { "epoch": 0.15116552937335437, "grad_norm": 0.32530683279037476, "learning_rate": 0.0001698020509754481, "loss": 1.6381, "step": 11633 }, { "epoch": 0.15117852391727024, "grad_norm": 0.4283129870891571, "learning_rate": 0.0001697994515135367, "loss": 1.4463, "step": 11634 }, { "epoch": 0.15119151846118611, "grad_norm": 0.36654505133628845, "learning_rate": 0.00016979685205162533, "loss": 1.3663, "step": 11635 }, { "epoch": 0.151204513005102, "grad_norm": 0.3840312957763672, "learning_rate": 0.00016979425258971392, "loss": 1.4194, "step": 11636 }, { "epoch": 0.15121750754901786, "grad_norm": 0.4775658845901489, "learning_rate": 0.00016979165312780257, "loss": 1.326, "step": 11637 }, { "epoch": 0.15123050209293373, "grad_norm": 0.4977417588233948, "learning_rate": 0.00016978905366589117, "loss": 1.4097, "step": 11638 }, { "epoch": 0.1512434966368496, "grad_norm": 0.45721298456192017, "learning_rate": 0.0001697864542039798, "loss": 1.6385, "step": 11639 }, { "epoch": 0.15125649118076548, "grad_norm": 0.38940730690956116, "learning_rate": 0.0001697838547420684, "loss": 1.527, "step": 11640 }, { "epoch": 0.15126948572468135, "grad_norm": 0.29324549436569214, "learning_rate": 0.00016978125528015702, "loss": 1.347, "step": 11641 }, { "epoch": 0.15128248026859722, "grad_norm": 0.4306432604789734, "learning_rate": 0.00016977865581824564, "loss": 1.4768, "step": 11642 }, { "epoch": 0.1512954748125131, "grad_norm": 0.33141615986824036, "learning_rate": 0.00016977605635633424, "loss": 1.5374, "step": 11643 }, { "epoch": 0.15130846935642897, "grad_norm": 0.4426579475402832, "learning_rate": 0.00016977345689442286, "loss": 1.5092, "step": 11644 }, { "epoch": 0.15132146390034484, "grad_norm": 0.40388157963752747, "learning_rate": 0.0001697708574325115, "loss": 1.4579, "step": 11645 }, { "epoch": 0.15133445844426072, "grad_norm": 0.37738269567489624, "learning_rate": 0.0001697682579706001, "loss": 1.2104, "step": 11646 }, { "epoch": 0.1513474529881766, "grad_norm": 0.3412838578224182, "learning_rate": 0.0001697656585086887, "loss": 1.4118, "step": 11647 }, { "epoch": 0.15136044753209246, "grad_norm": 0.315868616104126, "learning_rate": 0.0001697630590467773, "loss": 1.189, "step": 11648 }, { "epoch": 0.15137344207600834, "grad_norm": 0.4698161780834198, "learning_rate": 0.00016976045958486596, "loss": 1.3925, "step": 11649 }, { "epoch": 0.1513864366199242, "grad_norm": 0.4214162528514862, "learning_rate": 0.00016975786012295456, "loss": 1.3597, "step": 11650 }, { "epoch": 0.15139943116384008, "grad_norm": 0.35944873094558716, "learning_rate": 0.00016975526066104318, "loss": 1.3452, "step": 11651 }, { "epoch": 0.15141242570775595, "grad_norm": 0.42097073793411255, "learning_rate": 0.00016975266119913178, "loss": 1.5193, "step": 11652 }, { "epoch": 0.15142542025167183, "grad_norm": 0.36438876390457153, "learning_rate": 0.0001697500617372204, "loss": 1.508, "step": 11653 }, { "epoch": 0.1514384147955877, "grad_norm": 0.3361772894859314, "learning_rate": 0.00016974746227530903, "loss": 1.4607, "step": 11654 }, { "epoch": 0.15145140933950357, "grad_norm": 0.34069326519966125, "learning_rate": 0.00016974486281339763, "loss": 1.4599, "step": 11655 }, { "epoch": 0.15146440388341945, "grad_norm": 0.3773195445537567, "learning_rate": 0.00016974226335148625, "loss": 1.365, "step": 11656 }, { "epoch": 0.15147739842733532, "grad_norm": 0.40503740310668945, "learning_rate": 0.00016973966388957487, "loss": 1.37, "step": 11657 }, { "epoch": 0.1514903929712512, "grad_norm": 0.4238918721675873, "learning_rate": 0.00016973706442766347, "loss": 1.3574, "step": 11658 }, { "epoch": 0.15150338751516707, "grad_norm": 0.3853955864906311, "learning_rate": 0.0001697344649657521, "loss": 1.485, "step": 11659 }, { "epoch": 0.15151638205908294, "grad_norm": 0.32171550393104553, "learning_rate": 0.0001697318655038407, "loss": 1.4155, "step": 11660 }, { "epoch": 0.1515293766029988, "grad_norm": 0.361360639333725, "learning_rate": 0.00016972926604192935, "loss": 1.45, "step": 11661 }, { "epoch": 0.15154237114691468, "grad_norm": 0.3709021508693695, "learning_rate": 0.00016972666658001794, "loss": 1.4208, "step": 11662 }, { "epoch": 0.15155536569083056, "grad_norm": 0.3821188807487488, "learning_rate": 0.00016972406711810657, "loss": 1.3785, "step": 11663 }, { "epoch": 0.15156836023474643, "grad_norm": 0.45584356784820557, "learning_rate": 0.00016972146765619516, "loss": 1.4151, "step": 11664 }, { "epoch": 0.1515813547786623, "grad_norm": 0.427600234746933, "learning_rate": 0.0001697188681942838, "loss": 1.4295, "step": 11665 }, { "epoch": 0.15159434932257818, "grad_norm": 0.4677857458591461, "learning_rate": 0.00016971626873237241, "loss": 1.5649, "step": 11666 }, { "epoch": 0.15160734386649405, "grad_norm": 0.44161033630371094, "learning_rate": 0.000169713669270461, "loss": 1.6056, "step": 11667 }, { "epoch": 0.15162033841040992, "grad_norm": 0.40123143792152405, "learning_rate": 0.00016971106980854964, "loss": 1.571, "step": 11668 }, { "epoch": 0.1516333329543258, "grad_norm": 0.35370585322380066, "learning_rate": 0.00016970847034663826, "loss": 1.4027, "step": 11669 }, { "epoch": 0.15164632749824167, "grad_norm": 0.41544097661972046, "learning_rate": 0.00016970587088472686, "loss": 1.3507, "step": 11670 }, { "epoch": 0.15165932204215754, "grad_norm": 0.3244834542274475, "learning_rate": 0.00016970327142281548, "loss": 1.2671, "step": 11671 }, { "epoch": 0.1516723165860734, "grad_norm": 0.4362773597240448, "learning_rate": 0.0001697006719609041, "loss": 1.4427, "step": 11672 }, { "epoch": 0.1516853111299893, "grad_norm": 0.3408445119857788, "learning_rate": 0.00016969807249899273, "loss": 1.4695, "step": 11673 }, { "epoch": 0.15169830567390516, "grad_norm": 0.40236592292785645, "learning_rate": 0.00016969547303708133, "loss": 1.3771, "step": 11674 }, { "epoch": 0.15171130021782103, "grad_norm": 0.4245149791240692, "learning_rate": 0.00016969287357516995, "loss": 1.4197, "step": 11675 }, { "epoch": 0.1517242947617369, "grad_norm": 0.333930641412735, "learning_rate": 0.00016969027411325858, "loss": 1.3159, "step": 11676 }, { "epoch": 0.15173728930565278, "grad_norm": 0.4928986728191376, "learning_rate": 0.00016968767465134717, "loss": 1.5399, "step": 11677 }, { "epoch": 0.15175028384956865, "grad_norm": 0.4258807599544525, "learning_rate": 0.0001696850751894358, "loss": 1.4626, "step": 11678 }, { "epoch": 0.15176327839348452, "grad_norm": 0.31232404708862305, "learning_rate": 0.0001696824757275244, "loss": 1.3532, "step": 11679 }, { "epoch": 0.1517762729374004, "grad_norm": 0.3483731746673584, "learning_rate": 0.00016967987626561305, "loss": 1.3621, "step": 11680 }, { "epoch": 0.15178926748131627, "grad_norm": 0.37102842330932617, "learning_rate": 0.00016967727680370165, "loss": 1.461, "step": 11681 }, { "epoch": 0.15180226202523214, "grad_norm": 0.2596631646156311, "learning_rate": 0.00016967467734179024, "loss": 1.3379, "step": 11682 }, { "epoch": 0.15181525656914802, "grad_norm": 0.3717079162597656, "learning_rate": 0.00016967207787987887, "loss": 1.5471, "step": 11683 }, { "epoch": 0.1518282511130639, "grad_norm": 0.43462130427360535, "learning_rate": 0.0001696694784179675, "loss": 1.2458, "step": 11684 }, { "epoch": 0.15184124565697976, "grad_norm": 0.3457672595977783, "learning_rate": 0.00016966687895605612, "loss": 1.5248, "step": 11685 }, { "epoch": 0.15185424020089566, "grad_norm": 0.4820672273635864, "learning_rate": 0.00016966427949414471, "loss": 1.4057, "step": 11686 }, { "epoch": 0.15186723474481154, "grad_norm": 0.43865731358528137, "learning_rate": 0.00016966168003223334, "loss": 1.5902, "step": 11687 }, { "epoch": 0.1518802292887274, "grad_norm": 0.4584430456161499, "learning_rate": 0.00016965908057032196, "loss": 1.4073, "step": 11688 }, { "epoch": 0.15189322383264328, "grad_norm": 0.42312484979629517, "learning_rate": 0.00016965648110841056, "loss": 1.5019, "step": 11689 }, { "epoch": 0.15190621837655915, "grad_norm": 0.37992262840270996, "learning_rate": 0.00016965388164649918, "loss": 1.7851, "step": 11690 }, { "epoch": 0.15191921292047503, "grad_norm": 0.424396812915802, "learning_rate": 0.00016965128218458778, "loss": 1.3634, "step": 11691 }, { "epoch": 0.1519322074643909, "grad_norm": 0.48533403873443604, "learning_rate": 0.00016964868272267643, "loss": 1.4662, "step": 11692 }, { "epoch": 0.15194520200830677, "grad_norm": 0.48497989773750305, "learning_rate": 0.00016964608326076503, "loss": 1.3883, "step": 11693 }, { "epoch": 0.15195819655222265, "grad_norm": 0.412862628698349, "learning_rate": 0.00016964348379885363, "loss": 1.4506, "step": 11694 }, { "epoch": 0.15197119109613852, "grad_norm": 0.3911252021789551, "learning_rate": 0.00016964088433694225, "loss": 1.3657, "step": 11695 }, { "epoch": 0.1519841856400544, "grad_norm": 0.4373502731323242, "learning_rate": 0.00016963828487503088, "loss": 1.4065, "step": 11696 }, { "epoch": 0.15199718018397027, "grad_norm": 0.33631205558776855, "learning_rate": 0.0001696356854131195, "loss": 1.3583, "step": 11697 }, { "epoch": 0.15201017472788614, "grad_norm": 0.2947554290294647, "learning_rate": 0.0001696330859512081, "loss": 1.3599, "step": 11698 }, { "epoch": 0.152023169271802, "grad_norm": 0.3604918122291565, "learning_rate": 0.00016963048648929672, "loss": 1.3281, "step": 11699 }, { "epoch": 0.15203616381571788, "grad_norm": 0.47640731930732727, "learning_rate": 0.00016962788702738535, "loss": 1.5404, "step": 11700 }, { "epoch": 0.15204915835963376, "grad_norm": 0.43649137020111084, "learning_rate": 0.00016962528756547395, "loss": 1.6743, "step": 11701 }, { "epoch": 0.15206215290354963, "grad_norm": 0.3978079557418823, "learning_rate": 0.00016962268810356257, "loss": 1.4434, "step": 11702 }, { "epoch": 0.1520751474474655, "grad_norm": 0.3781042993068695, "learning_rate": 0.00016962008864165117, "loss": 1.6099, "step": 11703 }, { "epoch": 0.15208814199138138, "grad_norm": 0.3335297405719757, "learning_rate": 0.00016961748917973982, "loss": 1.3613, "step": 11704 }, { "epoch": 0.15210113653529725, "grad_norm": 0.3304100036621094, "learning_rate": 0.00016961488971782842, "loss": 1.2748, "step": 11705 }, { "epoch": 0.15211413107921312, "grad_norm": 0.34727466106414795, "learning_rate": 0.00016961229025591704, "loss": 1.333, "step": 11706 }, { "epoch": 0.152127125623129, "grad_norm": 0.3454049229621887, "learning_rate": 0.00016960969079400564, "loss": 1.2993, "step": 11707 }, { "epoch": 0.15214012016704487, "grad_norm": 0.35378775000572205, "learning_rate": 0.00016960709133209426, "loss": 1.2406, "step": 11708 }, { "epoch": 0.15215311471096074, "grad_norm": 0.5171632170677185, "learning_rate": 0.0001696044918701829, "loss": 1.404, "step": 11709 }, { "epoch": 0.1521661092548766, "grad_norm": 0.37615811824798584, "learning_rate": 0.00016960189240827148, "loss": 1.3631, "step": 11710 }, { "epoch": 0.1521791037987925, "grad_norm": 0.3766386806964874, "learning_rate": 0.0001695992929463601, "loss": 1.2087, "step": 11711 }, { "epoch": 0.15219209834270836, "grad_norm": 0.37699365615844727, "learning_rate": 0.00016959669348444873, "loss": 1.3586, "step": 11712 }, { "epoch": 0.15220509288662423, "grad_norm": 0.35546258091926575, "learning_rate": 0.00016959409402253733, "loss": 1.3371, "step": 11713 }, { "epoch": 0.1522180874305401, "grad_norm": 0.4114397466182709, "learning_rate": 0.00016959149456062596, "loss": 1.4927, "step": 11714 }, { "epoch": 0.15223108197445598, "grad_norm": 0.38973891735076904, "learning_rate": 0.00016958889509871458, "loss": 1.4476, "step": 11715 }, { "epoch": 0.15224407651837185, "grad_norm": 0.496116042137146, "learning_rate": 0.0001695862956368032, "loss": 1.3409, "step": 11716 }, { "epoch": 0.15225707106228772, "grad_norm": 0.4139866232872009, "learning_rate": 0.0001695836961748918, "loss": 1.361, "step": 11717 }, { "epoch": 0.1522700656062036, "grad_norm": 0.4198775887489319, "learning_rate": 0.00016958109671298043, "loss": 1.5599, "step": 11718 }, { "epoch": 0.15228306015011947, "grad_norm": 0.49194204807281494, "learning_rate": 0.00016957849725106905, "loss": 1.5704, "step": 11719 }, { "epoch": 0.15229605469403534, "grad_norm": 0.3449385166168213, "learning_rate": 0.00016957589778915765, "loss": 1.3414, "step": 11720 }, { "epoch": 0.15230904923795122, "grad_norm": 0.47164613008499146, "learning_rate": 0.00016957329832724627, "loss": 1.6382, "step": 11721 }, { "epoch": 0.1523220437818671, "grad_norm": 0.4473242163658142, "learning_rate": 0.00016957069886533487, "loss": 1.3647, "step": 11722 }, { "epoch": 0.15233503832578296, "grad_norm": 0.4058029055595398, "learning_rate": 0.00016956809940342352, "loss": 1.5811, "step": 11723 }, { "epoch": 0.15234803286969884, "grad_norm": 0.4079439043998718, "learning_rate": 0.00016956549994151212, "loss": 1.4383, "step": 11724 }, { "epoch": 0.1523610274136147, "grad_norm": 0.41152986884117126, "learning_rate": 0.00016956290047960072, "loss": 1.5773, "step": 11725 }, { "epoch": 0.15237402195753058, "grad_norm": 0.3442077040672302, "learning_rate": 0.00016956030101768934, "loss": 1.4294, "step": 11726 }, { "epoch": 0.15238701650144645, "grad_norm": 0.3956660330295563, "learning_rate": 0.00016955770155577797, "loss": 1.4581, "step": 11727 }, { "epoch": 0.15240001104536233, "grad_norm": 0.42144888639450073, "learning_rate": 0.0001695551020938666, "loss": 1.4309, "step": 11728 }, { "epoch": 0.1524130055892782, "grad_norm": 0.3697633147239685, "learning_rate": 0.0001695525026319552, "loss": 1.515, "step": 11729 }, { "epoch": 0.15242600013319407, "grad_norm": 0.40641388297080994, "learning_rate": 0.0001695499031700438, "loss": 1.3713, "step": 11730 }, { "epoch": 0.15243899467710995, "grad_norm": 0.2823372781276703, "learning_rate": 0.00016954730370813244, "loss": 1.2594, "step": 11731 }, { "epoch": 0.15245198922102582, "grad_norm": 0.47480398416519165, "learning_rate": 0.00016954470424622103, "loss": 1.6418, "step": 11732 }, { "epoch": 0.1524649837649417, "grad_norm": 0.504581868648529, "learning_rate": 0.00016954210478430966, "loss": 1.4622, "step": 11733 }, { "epoch": 0.15247797830885756, "grad_norm": 0.4075296223163605, "learning_rate": 0.00016953950532239826, "loss": 1.4376, "step": 11734 }, { "epoch": 0.15249097285277344, "grad_norm": 0.2864323854446411, "learning_rate": 0.0001695369058604869, "loss": 1.601, "step": 11735 }, { "epoch": 0.1525039673966893, "grad_norm": 0.42500028014183044, "learning_rate": 0.0001695343063985755, "loss": 1.6395, "step": 11736 }, { "epoch": 0.15251696194060518, "grad_norm": 0.36792105436325073, "learning_rate": 0.0001695317069366641, "loss": 1.3489, "step": 11737 }, { "epoch": 0.15252995648452106, "grad_norm": 0.27140694856643677, "learning_rate": 0.00016952910747475273, "loss": 1.3624, "step": 11738 }, { "epoch": 0.15254295102843693, "grad_norm": 0.3548365831375122, "learning_rate": 0.00016952650801284135, "loss": 1.4093, "step": 11739 }, { "epoch": 0.1525559455723528, "grad_norm": 0.3475998640060425, "learning_rate": 0.00016952390855092998, "loss": 1.3448, "step": 11740 }, { "epoch": 0.15256894011626868, "grad_norm": 0.43726444244384766, "learning_rate": 0.00016952130908901857, "loss": 1.4867, "step": 11741 }, { "epoch": 0.15258193466018455, "grad_norm": 0.4378892779350281, "learning_rate": 0.0001695187096271072, "loss": 1.6379, "step": 11742 }, { "epoch": 0.15259492920410042, "grad_norm": 0.45598649978637695, "learning_rate": 0.00016951611016519582, "loss": 1.3628, "step": 11743 }, { "epoch": 0.1526079237480163, "grad_norm": 0.3495231866836548, "learning_rate": 0.00016951351070328442, "loss": 1.6197, "step": 11744 }, { "epoch": 0.15262091829193217, "grad_norm": 0.5096492767333984, "learning_rate": 0.00016951091124137304, "loss": 1.4921, "step": 11745 }, { "epoch": 0.15263391283584804, "grad_norm": 0.41441166400909424, "learning_rate": 0.00016950831177946167, "loss": 1.5289, "step": 11746 }, { "epoch": 0.1526469073797639, "grad_norm": 0.48872193694114685, "learning_rate": 0.0001695057123175503, "loss": 1.4264, "step": 11747 }, { "epoch": 0.15265990192367979, "grad_norm": 0.3623164892196655, "learning_rate": 0.0001695031128556389, "loss": 1.4951, "step": 11748 }, { "epoch": 0.15267289646759566, "grad_norm": 0.43154725432395935, "learning_rate": 0.0001695005133937275, "loss": 1.4624, "step": 11749 }, { "epoch": 0.15268589101151153, "grad_norm": 0.34462079405784607, "learning_rate": 0.00016949791393181614, "loss": 1.339, "step": 11750 }, { "epoch": 0.1526988855554274, "grad_norm": 0.37993985414505005, "learning_rate": 0.00016949531446990474, "loss": 1.3516, "step": 11751 }, { "epoch": 0.15271188009934328, "grad_norm": 0.4231403172016144, "learning_rate": 0.00016949271500799336, "loss": 1.4528, "step": 11752 }, { "epoch": 0.15272487464325915, "grad_norm": 0.3255899250507355, "learning_rate": 0.00016949011554608196, "loss": 1.3643, "step": 11753 }, { "epoch": 0.15273786918717502, "grad_norm": 0.45418569445610046, "learning_rate": 0.00016948751608417058, "loss": 1.476, "step": 11754 }, { "epoch": 0.1527508637310909, "grad_norm": 0.37670525908470154, "learning_rate": 0.0001694849166222592, "loss": 1.5341, "step": 11755 }, { "epoch": 0.15276385827500677, "grad_norm": 0.35125041007995605, "learning_rate": 0.0001694823171603478, "loss": 1.3668, "step": 11756 }, { "epoch": 0.15277685281892264, "grad_norm": 0.498047411441803, "learning_rate": 0.00016947971769843643, "loss": 1.52, "step": 11757 }, { "epoch": 0.15278984736283852, "grad_norm": 0.4123964309692383, "learning_rate": 0.00016947711823652505, "loss": 1.4486, "step": 11758 }, { "epoch": 0.1528028419067544, "grad_norm": 0.49128657579421997, "learning_rate": 0.00016947451877461368, "loss": 1.5993, "step": 11759 }, { "epoch": 0.15281583645067026, "grad_norm": 0.32412177324295044, "learning_rate": 0.00016947191931270228, "loss": 1.2347, "step": 11760 }, { "epoch": 0.15282883099458613, "grad_norm": 0.2931101322174072, "learning_rate": 0.0001694693198507909, "loss": 1.4559, "step": 11761 }, { "epoch": 0.15284182553850204, "grad_norm": 0.43628764152526855, "learning_rate": 0.00016946672038887952, "loss": 1.4967, "step": 11762 }, { "epoch": 0.1528548200824179, "grad_norm": 0.339720219373703, "learning_rate": 0.00016946412092696812, "loss": 1.2241, "step": 11763 }, { "epoch": 0.15286781462633378, "grad_norm": 0.39957964420318604, "learning_rate": 0.00016946152146505675, "loss": 1.7905, "step": 11764 }, { "epoch": 0.15288080917024965, "grad_norm": 0.48330721259117126, "learning_rate": 0.00016945892200314534, "loss": 1.3863, "step": 11765 }, { "epoch": 0.15289380371416553, "grad_norm": 0.31594640016555786, "learning_rate": 0.00016945632254123397, "loss": 1.2484, "step": 11766 }, { "epoch": 0.1529067982580814, "grad_norm": 0.35658761858940125, "learning_rate": 0.0001694537230793226, "loss": 1.5249, "step": 11767 }, { "epoch": 0.15291979280199727, "grad_norm": 0.3486155867576599, "learning_rate": 0.0001694511236174112, "loss": 1.4423, "step": 11768 }, { "epoch": 0.15293278734591315, "grad_norm": 0.36469170451164246, "learning_rate": 0.00016944852415549981, "loss": 1.3934, "step": 11769 }, { "epoch": 0.15294578188982902, "grad_norm": 0.44081711769104004, "learning_rate": 0.00016944592469358844, "loss": 1.4846, "step": 11770 }, { "epoch": 0.1529587764337449, "grad_norm": 0.35378196835517883, "learning_rate": 0.00016944332523167706, "loss": 1.2729, "step": 11771 }, { "epoch": 0.15297177097766076, "grad_norm": 0.42618003487586975, "learning_rate": 0.00016944072576976566, "loss": 1.4347, "step": 11772 }, { "epoch": 0.15298476552157664, "grad_norm": 0.2815198600292206, "learning_rate": 0.00016943812630785428, "loss": 1.1926, "step": 11773 }, { "epoch": 0.1529977600654925, "grad_norm": 0.3783093988895416, "learning_rate": 0.0001694355268459429, "loss": 1.4634, "step": 11774 }, { "epoch": 0.15301075460940838, "grad_norm": 0.41654062271118164, "learning_rate": 0.0001694329273840315, "loss": 1.2944, "step": 11775 }, { "epoch": 0.15302374915332426, "grad_norm": 0.4205237329006195, "learning_rate": 0.00016943032792212013, "loss": 1.5138, "step": 11776 }, { "epoch": 0.15303674369724013, "grad_norm": 0.3374442458152771, "learning_rate": 0.00016942772846020873, "loss": 1.3233, "step": 11777 }, { "epoch": 0.153049738241156, "grad_norm": 0.36444100737571716, "learning_rate": 0.00016942512899829735, "loss": 1.3894, "step": 11778 }, { "epoch": 0.15306273278507188, "grad_norm": 0.37599268555641174, "learning_rate": 0.00016942252953638598, "loss": 1.3029, "step": 11779 }, { "epoch": 0.15307572732898775, "grad_norm": 0.38979172706604004, "learning_rate": 0.00016941993007447457, "loss": 1.382, "step": 11780 }, { "epoch": 0.15308872187290362, "grad_norm": 0.48710331320762634, "learning_rate": 0.0001694173306125632, "loss": 1.4296, "step": 11781 }, { "epoch": 0.1531017164168195, "grad_norm": 0.45828455686569214, "learning_rate": 0.00016941473115065182, "loss": 1.281, "step": 11782 }, { "epoch": 0.15311471096073537, "grad_norm": 0.36745476722717285, "learning_rate": 0.00016941213168874045, "loss": 1.548, "step": 11783 }, { "epoch": 0.15312770550465124, "grad_norm": 0.3311762809753418, "learning_rate": 0.00016940953222682905, "loss": 1.2836, "step": 11784 }, { "epoch": 0.1531407000485671, "grad_norm": 0.3976030647754669, "learning_rate": 0.00016940693276491767, "loss": 1.3015, "step": 11785 }, { "epoch": 0.153153694592483, "grad_norm": 0.43746116757392883, "learning_rate": 0.0001694043333030063, "loss": 1.4303, "step": 11786 }, { "epoch": 0.15316668913639886, "grad_norm": 0.4561688303947449, "learning_rate": 0.0001694017338410949, "loss": 1.5099, "step": 11787 }, { "epoch": 0.15317968368031473, "grad_norm": 0.41558125615119934, "learning_rate": 0.00016939913437918352, "loss": 1.3331, "step": 11788 }, { "epoch": 0.1531926782242306, "grad_norm": 0.40876078605651855, "learning_rate": 0.00016939653491727214, "loss": 1.6399, "step": 11789 }, { "epoch": 0.15320567276814648, "grad_norm": 0.45276597142219543, "learning_rate": 0.00016939393545536077, "loss": 1.5795, "step": 11790 }, { "epoch": 0.15321866731206235, "grad_norm": 0.3830238878726959, "learning_rate": 0.00016939133599344936, "loss": 1.4626, "step": 11791 }, { "epoch": 0.15323166185597822, "grad_norm": 0.3617945909500122, "learning_rate": 0.00016938873653153796, "loss": 1.4513, "step": 11792 }, { "epoch": 0.1532446563998941, "grad_norm": 0.31999918818473816, "learning_rate": 0.0001693861370696266, "loss": 1.448, "step": 11793 }, { "epoch": 0.15325765094380997, "grad_norm": 0.36458662152290344, "learning_rate": 0.0001693835376077152, "loss": 1.5335, "step": 11794 }, { "epoch": 0.15327064548772584, "grad_norm": 0.3933316767215729, "learning_rate": 0.00016938093814580383, "loss": 1.2247, "step": 11795 }, { "epoch": 0.15328364003164172, "grad_norm": 0.40105941891670227, "learning_rate": 0.00016937833868389243, "loss": 1.4213, "step": 11796 }, { "epoch": 0.1532966345755576, "grad_norm": 0.35096871852874756, "learning_rate": 0.00016937573922198106, "loss": 1.4141, "step": 11797 }, { "epoch": 0.15330962911947346, "grad_norm": 0.40356430411338806, "learning_rate": 0.00016937313976006968, "loss": 1.4835, "step": 11798 }, { "epoch": 0.15332262366338933, "grad_norm": 0.3647235333919525, "learning_rate": 0.00016937054029815828, "loss": 1.3013, "step": 11799 }, { "epoch": 0.1533356182073052, "grad_norm": 0.43235769867897034, "learning_rate": 0.0001693679408362469, "loss": 1.4495, "step": 11800 }, { "epoch": 0.15334861275122108, "grad_norm": 0.3538236916065216, "learning_rate": 0.00016936534137433553, "loss": 1.3958, "step": 11801 }, { "epoch": 0.15336160729513695, "grad_norm": 0.42808666825294495, "learning_rate": 0.00016936274191242415, "loss": 1.4402, "step": 11802 }, { "epoch": 0.15337460183905283, "grad_norm": 0.3915475606918335, "learning_rate": 0.00016936014245051275, "loss": 1.2853, "step": 11803 }, { "epoch": 0.1533875963829687, "grad_norm": 0.27652060985565186, "learning_rate": 0.00016935754298860135, "loss": 1.4558, "step": 11804 }, { "epoch": 0.15340059092688457, "grad_norm": 0.4507613182067871, "learning_rate": 0.00016935494352669, "loss": 1.4878, "step": 11805 }, { "epoch": 0.15341358547080045, "grad_norm": 0.40186434984207153, "learning_rate": 0.0001693523440647786, "loss": 1.75, "step": 11806 }, { "epoch": 0.15342658001471632, "grad_norm": 0.35875099897384644, "learning_rate": 0.00016934974460286722, "loss": 1.2244, "step": 11807 }, { "epoch": 0.1534395745586322, "grad_norm": 0.435232013463974, "learning_rate": 0.00016934714514095582, "loss": 1.5072, "step": 11808 }, { "epoch": 0.15345256910254806, "grad_norm": 0.38161787390708923, "learning_rate": 0.00016934454567904444, "loss": 1.5401, "step": 11809 }, { "epoch": 0.15346556364646394, "grad_norm": 0.37121862173080444, "learning_rate": 0.00016934194621713307, "loss": 1.4201, "step": 11810 }, { "epoch": 0.1534785581903798, "grad_norm": 0.35048747062683105, "learning_rate": 0.00016933934675522166, "loss": 1.4342, "step": 11811 }, { "epoch": 0.15349155273429568, "grad_norm": 0.3794581890106201, "learning_rate": 0.0001693367472933103, "loss": 1.2232, "step": 11812 }, { "epoch": 0.15350454727821156, "grad_norm": 0.39692410826683044, "learning_rate": 0.0001693341478313989, "loss": 1.5388, "step": 11813 }, { "epoch": 0.15351754182212743, "grad_norm": 0.4707062244415283, "learning_rate": 0.00016933154836948754, "loss": 1.4907, "step": 11814 }, { "epoch": 0.1535305363660433, "grad_norm": 0.3456328511238098, "learning_rate": 0.00016932894890757613, "loss": 1.5022, "step": 11815 }, { "epoch": 0.15354353090995917, "grad_norm": 0.32065534591674805, "learning_rate": 0.00016932634944566476, "loss": 1.3006, "step": 11816 }, { "epoch": 0.15355652545387505, "grad_norm": 0.38260048627853394, "learning_rate": 0.00016932374998375338, "loss": 1.2331, "step": 11817 }, { "epoch": 0.15356951999779092, "grad_norm": 0.38718950748443604, "learning_rate": 0.00016932115052184198, "loss": 1.526, "step": 11818 }, { "epoch": 0.1535825145417068, "grad_norm": 0.3998222053050995, "learning_rate": 0.0001693185510599306, "loss": 1.6439, "step": 11819 }, { "epoch": 0.15359550908562267, "grad_norm": 0.5104019045829773, "learning_rate": 0.00016931595159801923, "loss": 1.3729, "step": 11820 }, { "epoch": 0.15360850362953854, "grad_norm": 0.351725310087204, "learning_rate": 0.00016931335213610783, "loss": 1.374, "step": 11821 }, { "epoch": 0.1536214981734544, "grad_norm": 0.4281952679157257, "learning_rate": 0.00016931075267419645, "loss": 1.4901, "step": 11822 }, { "epoch": 0.15363449271737029, "grad_norm": 0.3969263434410095, "learning_rate": 0.00016930815321228505, "loss": 1.1343, "step": 11823 }, { "epoch": 0.15364748726128616, "grad_norm": 0.26854419708251953, "learning_rate": 0.0001693055537503737, "loss": 1.3934, "step": 11824 }, { "epoch": 0.15366048180520203, "grad_norm": 0.35883522033691406, "learning_rate": 0.0001693029542884623, "loss": 1.5546, "step": 11825 }, { "epoch": 0.1536734763491179, "grad_norm": 0.3865770399570465, "learning_rate": 0.00016930035482655092, "loss": 1.4676, "step": 11826 }, { "epoch": 0.15368647089303378, "grad_norm": 0.382995069026947, "learning_rate": 0.00016929775536463952, "loss": 1.4358, "step": 11827 }, { "epoch": 0.15369946543694965, "grad_norm": 0.4226953387260437, "learning_rate": 0.00016929515590272814, "loss": 1.4437, "step": 11828 }, { "epoch": 0.15371245998086552, "grad_norm": 0.3526670038700104, "learning_rate": 0.00016929255644081677, "loss": 1.5229, "step": 11829 }, { "epoch": 0.1537254545247814, "grad_norm": 0.34893375635147095, "learning_rate": 0.00016928995697890537, "loss": 1.0607, "step": 11830 }, { "epoch": 0.15373844906869727, "grad_norm": 0.44681602716445923, "learning_rate": 0.000169287357516994, "loss": 1.2742, "step": 11831 }, { "epoch": 0.15375144361261314, "grad_norm": 0.3585251271724701, "learning_rate": 0.00016928475805508261, "loss": 1.3792, "step": 11832 }, { "epoch": 0.15376443815652902, "grad_norm": 0.2582624554634094, "learning_rate": 0.0001692821585931712, "loss": 1.4141, "step": 11833 }, { "epoch": 0.1537774327004449, "grad_norm": 0.4682348966598511, "learning_rate": 0.00016927955913125984, "loss": 1.4077, "step": 11834 }, { "epoch": 0.15379042724436076, "grad_norm": 0.27334079146385193, "learning_rate": 0.00016927695966934843, "loss": 1.2248, "step": 11835 }, { "epoch": 0.15380342178827663, "grad_norm": 0.4253043830394745, "learning_rate": 0.00016927436020743709, "loss": 1.4339, "step": 11836 }, { "epoch": 0.1538164163321925, "grad_norm": 0.4004599153995514, "learning_rate": 0.00016927176074552568, "loss": 1.2909, "step": 11837 }, { "epoch": 0.1538294108761084, "grad_norm": 0.4284515380859375, "learning_rate": 0.0001692691612836143, "loss": 1.5672, "step": 11838 }, { "epoch": 0.15384240542002428, "grad_norm": 0.3077544867992401, "learning_rate": 0.0001692665618217029, "loss": 1.2494, "step": 11839 }, { "epoch": 0.15385539996394015, "grad_norm": 0.39761441946029663, "learning_rate": 0.00016926396235979153, "loss": 1.4684, "step": 11840 }, { "epoch": 0.15386839450785603, "grad_norm": 0.4121223986148834, "learning_rate": 0.00016926136289788015, "loss": 1.4011, "step": 11841 }, { "epoch": 0.1538813890517719, "grad_norm": 0.4146476089954376, "learning_rate": 0.00016925876343596875, "loss": 1.505, "step": 11842 }, { "epoch": 0.15389438359568777, "grad_norm": 0.38500046730041504, "learning_rate": 0.00016925616397405738, "loss": 1.5509, "step": 11843 }, { "epoch": 0.15390737813960365, "grad_norm": 0.4074002802371979, "learning_rate": 0.000169253564512146, "loss": 1.2974, "step": 11844 }, { "epoch": 0.15392037268351952, "grad_norm": 0.3999421298503876, "learning_rate": 0.00016925096505023462, "loss": 1.5522, "step": 11845 }, { "epoch": 0.1539333672274354, "grad_norm": 0.34143367409706116, "learning_rate": 0.00016924836558832322, "loss": 1.6049, "step": 11846 }, { "epoch": 0.15394636177135126, "grad_norm": 0.35129424929618835, "learning_rate": 0.00016924576612641182, "loss": 1.1661, "step": 11847 }, { "epoch": 0.15395935631526714, "grad_norm": 0.3280280828475952, "learning_rate": 0.00016924316666450047, "loss": 1.3978, "step": 11848 }, { "epoch": 0.153972350859183, "grad_norm": 0.3519212603569031, "learning_rate": 0.00016924056720258907, "loss": 1.3831, "step": 11849 }, { "epoch": 0.15398534540309888, "grad_norm": 0.38983094692230225, "learning_rate": 0.0001692379677406777, "loss": 1.4393, "step": 11850 }, { "epoch": 0.15399833994701476, "grad_norm": 0.42494019865989685, "learning_rate": 0.0001692353682787663, "loss": 1.4972, "step": 11851 }, { "epoch": 0.15401133449093063, "grad_norm": 0.4433857798576355, "learning_rate": 0.00016923276881685491, "loss": 1.5799, "step": 11852 }, { "epoch": 0.1540243290348465, "grad_norm": 0.40632128715515137, "learning_rate": 0.00016923016935494354, "loss": 1.551, "step": 11853 }, { "epoch": 0.15403732357876238, "grad_norm": 0.4582209885120392, "learning_rate": 0.00016922756989303214, "loss": 1.3856, "step": 11854 }, { "epoch": 0.15405031812267825, "grad_norm": 0.4091321527957916, "learning_rate": 0.00016922497043112076, "loss": 1.4386, "step": 11855 }, { "epoch": 0.15406331266659412, "grad_norm": 0.3921829164028168, "learning_rate": 0.00016922237096920939, "loss": 1.4022, "step": 11856 }, { "epoch": 0.15407630721051, "grad_norm": 0.44247809052467346, "learning_rate": 0.000169219771507298, "loss": 1.4853, "step": 11857 }, { "epoch": 0.15408930175442587, "grad_norm": 0.41530054807662964, "learning_rate": 0.0001692171720453866, "loss": 1.4713, "step": 11858 }, { "epoch": 0.15410229629834174, "grad_norm": 0.4238353371620178, "learning_rate": 0.00016921457258347523, "loss": 1.5081, "step": 11859 }, { "epoch": 0.1541152908422576, "grad_norm": 0.44257503747940063, "learning_rate": 0.00016921197312156386, "loss": 1.3601, "step": 11860 }, { "epoch": 0.15412828538617349, "grad_norm": 0.28186023235321045, "learning_rate": 0.00016920937365965245, "loss": 1.3482, "step": 11861 }, { "epoch": 0.15414127993008936, "grad_norm": 0.36280325055122375, "learning_rate": 0.00016920677419774108, "loss": 1.4553, "step": 11862 }, { "epoch": 0.15415427447400523, "grad_norm": 0.3822360038757324, "learning_rate": 0.0001692041747358297, "loss": 1.4787, "step": 11863 }, { "epoch": 0.1541672690179211, "grad_norm": 0.3648161292076111, "learning_rate": 0.0001692015752739183, "loss": 1.4321, "step": 11864 }, { "epoch": 0.15418026356183698, "grad_norm": 0.3299514651298523, "learning_rate": 0.00016919897581200692, "loss": 1.107, "step": 11865 }, { "epoch": 0.15419325810575285, "grad_norm": 0.401964396238327, "learning_rate": 0.00016919637635009552, "loss": 1.4733, "step": 11866 }, { "epoch": 0.15420625264966872, "grad_norm": 0.36504676938056946, "learning_rate": 0.00016919377688818417, "loss": 1.4544, "step": 11867 }, { "epoch": 0.1542192471935846, "grad_norm": 0.44176867604255676, "learning_rate": 0.00016919117742627277, "loss": 1.4806, "step": 11868 }, { "epoch": 0.15423224173750047, "grad_norm": 0.33480894565582275, "learning_rate": 0.0001691885779643614, "loss": 1.2861, "step": 11869 }, { "epoch": 0.15424523628141634, "grad_norm": 0.4212414622306824, "learning_rate": 0.00016918597850245, "loss": 1.2678, "step": 11870 }, { "epoch": 0.15425823082533222, "grad_norm": 0.31657376885414124, "learning_rate": 0.00016918337904053862, "loss": 1.6445, "step": 11871 }, { "epoch": 0.1542712253692481, "grad_norm": 0.33360037207603455, "learning_rate": 0.00016918077957862724, "loss": 1.3473, "step": 11872 }, { "epoch": 0.15428421991316396, "grad_norm": 0.3970799148082733, "learning_rate": 0.00016917818011671584, "loss": 1.344, "step": 11873 }, { "epoch": 0.15429721445707983, "grad_norm": 0.42508774995803833, "learning_rate": 0.00016917558065480446, "loss": 1.3699, "step": 11874 }, { "epoch": 0.1543102090009957, "grad_norm": 0.43957820534706116, "learning_rate": 0.0001691729811928931, "loss": 1.4833, "step": 11875 }, { "epoch": 0.15432320354491158, "grad_norm": 0.34737902879714966, "learning_rate": 0.00016917038173098169, "loss": 1.4269, "step": 11876 }, { "epoch": 0.15433619808882745, "grad_norm": 0.484232634305954, "learning_rate": 0.0001691677822690703, "loss": 1.2752, "step": 11877 }, { "epoch": 0.15434919263274333, "grad_norm": 0.44650790095329285, "learning_rate": 0.0001691651828071589, "loss": 1.5006, "step": 11878 }, { "epoch": 0.1543621871766592, "grad_norm": 0.40729624032974243, "learning_rate": 0.00016916258334524756, "loss": 1.3514, "step": 11879 }, { "epoch": 0.15437518172057507, "grad_norm": 0.3570656180381775, "learning_rate": 0.00016915998388333616, "loss": 1.3146, "step": 11880 }, { "epoch": 0.15438817626449094, "grad_norm": 0.3242524266242981, "learning_rate": 0.00016915738442142478, "loss": 1.2045, "step": 11881 }, { "epoch": 0.15440117080840682, "grad_norm": 0.2881937623023987, "learning_rate": 0.00016915478495951338, "loss": 1.6563, "step": 11882 }, { "epoch": 0.1544141653523227, "grad_norm": 0.39352545142173767, "learning_rate": 0.000169152185497602, "loss": 1.5157, "step": 11883 }, { "epoch": 0.15442715989623856, "grad_norm": 0.4529942274093628, "learning_rate": 0.00016914958603569063, "loss": 1.4842, "step": 11884 }, { "epoch": 0.15444015444015444, "grad_norm": 0.43497994542121887, "learning_rate": 0.00016914698657377922, "loss": 1.4339, "step": 11885 }, { "epoch": 0.1544531489840703, "grad_norm": 0.3621883690357208, "learning_rate": 0.00016914438711186785, "loss": 1.3941, "step": 11886 }, { "epoch": 0.15446614352798618, "grad_norm": 0.4022464156150818, "learning_rate": 0.00016914178764995647, "loss": 1.3937, "step": 11887 }, { "epoch": 0.15447913807190206, "grad_norm": 0.4090622663497925, "learning_rate": 0.00016913918818804507, "loss": 1.4717, "step": 11888 }, { "epoch": 0.15449213261581793, "grad_norm": 0.3842346966266632, "learning_rate": 0.0001691365887261337, "loss": 1.3914, "step": 11889 }, { "epoch": 0.1545051271597338, "grad_norm": 0.39419880509376526, "learning_rate": 0.0001691339892642223, "loss": 1.2498, "step": 11890 }, { "epoch": 0.15451812170364967, "grad_norm": 0.3775467276573181, "learning_rate": 0.00016913138980231094, "loss": 1.3555, "step": 11891 }, { "epoch": 0.15453111624756555, "grad_norm": 0.3680625259876251, "learning_rate": 0.00016912879034039954, "loss": 1.3571, "step": 11892 }, { "epoch": 0.15454411079148142, "grad_norm": 0.42519471049308777, "learning_rate": 0.00016912619087848817, "loss": 1.3928, "step": 11893 }, { "epoch": 0.1545571053353973, "grad_norm": 0.41862019896507263, "learning_rate": 0.0001691235914165768, "loss": 1.4091, "step": 11894 }, { "epoch": 0.15457009987931317, "grad_norm": 0.343043714761734, "learning_rate": 0.0001691209919546654, "loss": 1.5125, "step": 11895 }, { "epoch": 0.15458309442322904, "grad_norm": 0.4209970533847809, "learning_rate": 0.000169118392492754, "loss": 1.4047, "step": 11896 }, { "epoch": 0.1545960889671449, "grad_norm": 0.39638662338256836, "learning_rate": 0.0001691157930308426, "loss": 1.6015, "step": 11897 }, { "epoch": 0.15460908351106079, "grad_norm": 0.412302166223526, "learning_rate": 0.00016911319356893126, "loss": 1.5197, "step": 11898 }, { "epoch": 0.15462207805497666, "grad_norm": 0.3367950916290283, "learning_rate": 0.00016911059410701986, "loss": 1.4427, "step": 11899 }, { "epoch": 0.15463507259889253, "grad_norm": 0.33590689301490784, "learning_rate": 0.00016910799464510848, "loss": 1.2768, "step": 11900 }, { "epoch": 0.1546480671428084, "grad_norm": 0.312641978263855, "learning_rate": 0.00016910539518319708, "loss": 1.3458, "step": 11901 }, { "epoch": 0.15466106168672428, "grad_norm": 0.39236322045326233, "learning_rate": 0.0001691027957212857, "loss": 1.5649, "step": 11902 }, { "epoch": 0.15467405623064015, "grad_norm": 0.29848867654800415, "learning_rate": 0.00016910019625937433, "loss": 1.3152, "step": 11903 }, { "epoch": 0.15468705077455602, "grad_norm": 0.4709525406360626, "learning_rate": 0.00016909759679746293, "loss": 1.6501, "step": 11904 }, { "epoch": 0.1547000453184719, "grad_norm": 0.27534300088882446, "learning_rate": 0.00016909499733555155, "loss": 1.3424, "step": 11905 }, { "epoch": 0.15471303986238777, "grad_norm": 0.39742717146873474, "learning_rate": 0.00016909239787364018, "loss": 1.4036, "step": 11906 }, { "epoch": 0.15472603440630364, "grad_norm": 0.36763429641723633, "learning_rate": 0.00016908979841172877, "loss": 1.5504, "step": 11907 }, { "epoch": 0.15473902895021951, "grad_norm": 0.489873468875885, "learning_rate": 0.0001690871989498174, "loss": 1.5053, "step": 11908 }, { "epoch": 0.1547520234941354, "grad_norm": 0.3825457990169525, "learning_rate": 0.000169084599487906, "loss": 1.4927, "step": 11909 }, { "epoch": 0.15476501803805126, "grad_norm": 0.384034126996994, "learning_rate": 0.00016908200002599465, "loss": 1.1581, "step": 11910 }, { "epoch": 0.15477801258196713, "grad_norm": 0.44588908553123474, "learning_rate": 0.00016907940056408324, "loss": 1.5723, "step": 11911 }, { "epoch": 0.154791007125883, "grad_norm": 0.3585994839668274, "learning_rate": 0.00016907680110217187, "loss": 1.5926, "step": 11912 }, { "epoch": 0.15480400166979888, "grad_norm": 0.4727434515953064, "learning_rate": 0.00016907420164026047, "loss": 1.643, "step": 11913 }, { "epoch": 0.15481699621371478, "grad_norm": 0.321334570646286, "learning_rate": 0.0001690716021783491, "loss": 1.315, "step": 11914 }, { "epoch": 0.15482999075763065, "grad_norm": 0.45254164934158325, "learning_rate": 0.00016906900271643771, "loss": 1.458, "step": 11915 }, { "epoch": 0.15484298530154653, "grad_norm": 0.28681501746177673, "learning_rate": 0.0001690664032545263, "loss": 1.5302, "step": 11916 }, { "epoch": 0.1548559798454624, "grad_norm": 0.28579723834991455, "learning_rate": 0.00016906380379261494, "loss": 1.2712, "step": 11917 }, { "epoch": 0.15486897438937827, "grad_norm": 0.35989296436309814, "learning_rate": 0.00016906120433070356, "loss": 1.5095, "step": 11918 }, { "epoch": 0.15488196893329415, "grad_norm": 0.3587045967578888, "learning_rate": 0.00016905860486879216, "loss": 1.1496, "step": 11919 }, { "epoch": 0.15489496347721002, "grad_norm": 0.4513356685638428, "learning_rate": 0.00016905600540688078, "loss": 1.5327, "step": 11920 }, { "epoch": 0.1549079580211259, "grad_norm": 0.36633196473121643, "learning_rate": 0.00016905340594496938, "loss": 1.3146, "step": 11921 }, { "epoch": 0.15492095256504176, "grad_norm": 0.37696900963783264, "learning_rate": 0.00016905080648305803, "loss": 1.5891, "step": 11922 }, { "epoch": 0.15493394710895764, "grad_norm": 0.41157111525535583, "learning_rate": 0.00016904820702114663, "loss": 1.3362, "step": 11923 }, { "epoch": 0.1549469416528735, "grad_norm": 0.4482050836086273, "learning_rate": 0.00016904560755923525, "loss": 1.3105, "step": 11924 }, { "epoch": 0.15495993619678938, "grad_norm": 0.42405182123184204, "learning_rate": 0.00016904300809732385, "loss": 1.6842, "step": 11925 }, { "epoch": 0.15497293074070526, "grad_norm": 0.4213549494743347, "learning_rate": 0.00016904040863541248, "loss": 1.4985, "step": 11926 }, { "epoch": 0.15498592528462113, "grad_norm": 0.38105490803718567, "learning_rate": 0.0001690378091735011, "loss": 1.504, "step": 11927 }, { "epoch": 0.154998919828537, "grad_norm": 0.32744768261909485, "learning_rate": 0.0001690352097115897, "loss": 1.5141, "step": 11928 }, { "epoch": 0.15501191437245287, "grad_norm": 0.38247156143188477, "learning_rate": 0.00016903261024967832, "loss": 1.4567, "step": 11929 }, { "epoch": 0.15502490891636875, "grad_norm": 0.297108918428421, "learning_rate": 0.00016903001078776695, "loss": 1.1905, "step": 11930 }, { "epoch": 0.15503790346028462, "grad_norm": 0.39979055523872375, "learning_rate": 0.00016902741132585554, "loss": 1.3984, "step": 11931 }, { "epoch": 0.1550508980042005, "grad_norm": 0.3583003580570221, "learning_rate": 0.00016902481186394417, "loss": 1.6012, "step": 11932 }, { "epoch": 0.15506389254811637, "grad_norm": 0.4437173008918762, "learning_rate": 0.0001690222124020328, "loss": 1.2219, "step": 11933 }, { "epoch": 0.15507688709203224, "grad_norm": 0.3262316882610321, "learning_rate": 0.00016901961294012142, "loss": 1.3229, "step": 11934 }, { "epoch": 0.1550898816359481, "grad_norm": 0.3768066167831421, "learning_rate": 0.00016901701347821001, "loss": 1.3877, "step": 11935 }, { "epoch": 0.15510287617986399, "grad_norm": 0.4145079255104065, "learning_rate": 0.00016901441401629864, "loss": 1.332, "step": 11936 }, { "epoch": 0.15511587072377986, "grad_norm": 0.4187820851802826, "learning_rate": 0.00016901181455438726, "loss": 1.4219, "step": 11937 }, { "epoch": 0.15512886526769573, "grad_norm": 0.3847416043281555, "learning_rate": 0.00016900921509247586, "loss": 1.2818, "step": 11938 }, { "epoch": 0.1551418598116116, "grad_norm": 0.41393569111824036, "learning_rate": 0.00016900661563056449, "loss": 1.4979, "step": 11939 }, { "epoch": 0.15515485435552748, "grad_norm": 0.4220622181892395, "learning_rate": 0.00016900401616865308, "loss": 1.4021, "step": 11940 }, { "epoch": 0.15516784889944335, "grad_norm": 0.4015654921531677, "learning_rate": 0.00016900141670674173, "loss": 1.6743, "step": 11941 }, { "epoch": 0.15518084344335922, "grad_norm": 0.40798476338386536, "learning_rate": 0.00016899881724483033, "loss": 1.5833, "step": 11942 }, { "epoch": 0.1551938379872751, "grad_norm": 0.3616056740283966, "learning_rate": 0.00016899621778291893, "loss": 1.1696, "step": 11943 }, { "epoch": 0.15520683253119097, "grad_norm": 0.39458411931991577, "learning_rate": 0.00016899361832100755, "loss": 1.4008, "step": 11944 }, { "epoch": 0.15521982707510684, "grad_norm": 0.51478111743927, "learning_rate": 0.00016899101885909618, "loss": 1.3113, "step": 11945 }, { "epoch": 0.15523282161902272, "grad_norm": 0.45048725605010986, "learning_rate": 0.0001689884193971848, "loss": 1.4865, "step": 11946 }, { "epoch": 0.1552458161629386, "grad_norm": 0.44991689920425415, "learning_rate": 0.0001689858199352734, "loss": 1.4505, "step": 11947 }, { "epoch": 0.15525881070685446, "grad_norm": 0.34293219447135925, "learning_rate": 0.00016898322047336202, "loss": 1.2344, "step": 11948 }, { "epoch": 0.15527180525077033, "grad_norm": 0.32282909750938416, "learning_rate": 0.00016898062101145065, "loss": 1.4802, "step": 11949 }, { "epoch": 0.1552847997946862, "grad_norm": 0.3538459241390228, "learning_rate": 0.00016897802154953925, "loss": 1.2857, "step": 11950 }, { "epoch": 0.15529779433860208, "grad_norm": 0.5327444076538086, "learning_rate": 0.00016897542208762787, "loss": 1.5016, "step": 11951 }, { "epoch": 0.15531078888251795, "grad_norm": 0.48369458317756653, "learning_rate": 0.00016897282262571647, "loss": 1.5615, "step": 11952 }, { "epoch": 0.15532378342643383, "grad_norm": 0.33527451753616333, "learning_rate": 0.00016897022316380512, "loss": 1.4315, "step": 11953 }, { "epoch": 0.1553367779703497, "grad_norm": 0.503851592540741, "learning_rate": 0.00016896762370189372, "loss": 1.5008, "step": 11954 }, { "epoch": 0.15534977251426557, "grad_norm": 0.4223160147666931, "learning_rate": 0.00016896502423998231, "loss": 1.5847, "step": 11955 }, { "epoch": 0.15536276705818144, "grad_norm": 0.4810388684272766, "learning_rate": 0.00016896242477807094, "loss": 1.3925, "step": 11956 }, { "epoch": 0.15537576160209732, "grad_norm": 0.36106744408607483, "learning_rate": 0.00016895982531615956, "loss": 1.1159, "step": 11957 }, { "epoch": 0.1553887561460132, "grad_norm": 0.3481575548648834, "learning_rate": 0.0001689572258542482, "loss": 1.3377, "step": 11958 }, { "epoch": 0.15540175068992906, "grad_norm": 0.41145047545433044, "learning_rate": 0.00016895462639233679, "loss": 1.4227, "step": 11959 }, { "epoch": 0.15541474523384494, "grad_norm": 0.3502742350101471, "learning_rate": 0.0001689520269304254, "loss": 1.4869, "step": 11960 }, { "epoch": 0.1554277397777608, "grad_norm": 0.4538623094558716, "learning_rate": 0.00016894942746851403, "loss": 1.3256, "step": 11961 }, { "epoch": 0.15544073432167668, "grad_norm": 0.43536028265953064, "learning_rate": 0.00016894682800660263, "loss": 1.5321, "step": 11962 }, { "epoch": 0.15545372886559256, "grad_norm": 0.41437843441963196, "learning_rate": 0.00016894422854469126, "loss": 1.5899, "step": 11963 }, { "epoch": 0.15546672340950843, "grad_norm": 0.4876006245613098, "learning_rate": 0.00016894162908277985, "loss": 1.5311, "step": 11964 }, { "epoch": 0.1554797179534243, "grad_norm": 0.45469412207603455, "learning_rate": 0.0001689390296208685, "loss": 1.6302, "step": 11965 }, { "epoch": 0.15549271249734017, "grad_norm": 0.3620186746120453, "learning_rate": 0.0001689364301589571, "loss": 1.5112, "step": 11966 }, { "epoch": 0.15550570704125605, "grad_norm": 0.44205158948898315, "learning_rate": 0.00016893383069704573, "loss": 1.2681, "step": 11967 }, { "epoch": 0.15551870158517192, "grad_norm": 0.5003493428230286, "learning_rate": 0.00016893123123513435, "loss": 1.4616, "step": 11968 }, { "epoch": 0.1555316961290878, "grad_norm": 0.3435812294483185, "learning_rate": 0.00016892863177322295, "loss": 1.3523, "step": 11969 }, { "epoch": 0.15554469067300367, "grad_norm": 0.35033366084098816, "learning_rate": 0.00016892603231131157, "loss": 1.471, "step": 11970 }, { "epoch": 0.15555768521691954, "grad_norm": 0.3777720332145691, "learning_rate": 0.00016892343284940017, "loss": 1.3449, "step": 11971 }, { "epoch": 0.1555706797608354, "grad_norm": 0.44455766677856445, "learning_rate": 0.0001689208333874888, "loss": 1.7027, "step": 11972 }, { "epoch": 0.15558367430475128, "grad_norm": 0.3895317316055298, "learning_rate": 0.00016891823392557742, "loss": 1.4081, "step": 11973 }, { "epoch": 0.15559666884866716, "grad_norm": 0.3123410642147064, "learning_rate": 0.00016891563446366602, "loss": 1.3425, "step": 11974 }, { "epoch": 0.15560966339258303, "grad_norm": 0.337747722864151, "learning_rate": 0.00016891303500175464, "loss": 1.4477, "step": 11975 }, { "epoch": 0.1556226579364989, "grad_norm": 0.4136996269226074, "learning_rate": 0.00016891043553984327, "loss": 1.4475, "step": 11976 }, { "epoch": 0.15563565248041478, "grad_norm": 0.39037227630615234, "learning_rate": 0.0001689078360779319, "loss": 1.3774, "step": 11977 }, { "epoch": 0.15564864702433065, "grad_norm": 0.3910379111766815, "learning_rate": 0.0001689052366160205, "loss": 1.4258, "step": 11978 }, { "epoch": 0.15566164156824652, "grad_norm": 0.40478605031967163, "learning_rate": 0.0001689026371541091, "loss": 1.4454, "step": 11979 }, { "epoch": 0.1556746361121624, "grad_norm": 0.4217974841594696, "learning_rate": 0.00016890003769219774, "loss": 1.4353, "step": 11980 }, { "epoch": 0.15568763065607827, "grad_norm": 0.28577250242233276, "learning_rate": 0.00016889743823028633, "loss": 1.502, "step": 11981 }, { "epoch": 0.15570062519999414, "grad_norm": 0.5484791994094849, "learning_rate": 0.00016889483876837496, "loss": 1.6585, "step": 11982 }, { "epoch": 0.15571361974391001, "grad_norm": 0.33722198009490967, "learning_rate": 0.00016889223930646356, "loss": 1.4272, "step": 11983 }, { "epoch": 0.1557266142878259, "grad_norm": 0.47989392280578613, "learning_rate": 0.00016888963984455218, "loss": 1.4959, "step": 11984 }, { "epoch": 0.15573960883174176, "grad_norm": 0.5010594725608826, "learning_rate": 0.0001688870403826408, "loss": 1.4862, "step": 11985 }, { "epoch": 0.15575260337565763, "grad_norm": 0.44075724482536316, "learning_rate": 0.0001688844409207294, "loss": 1.4108, "step": 11986 }, { "epoch": 0.1557655979195735, "grad_norm": 0.3535269498825073, "learning_rate": 0.00016888184145881803, "loss": 1.3329, "step": 11987 }, { "epoch": 0.15577859246348938, "grad_norm": 0.31797629594802856, "learning_rate": 0.00016887924199690665, "loss": 1.4949, "step": 11988 }, { "epoch": 0.15579158700740525, "grad_norm": 0.5116481184959412, "learning_rate": 0.00016887664253499528, "loss": 1.6497, "step": 11989 }, { "epoch": 0.15580458155132113, "grad_norm": 0.35807478427886963, "learning_rate": 0.00016887404307308387, "loss": 1.2568, "step": 11990 }, { "epoch": 0.15581757609523703, "grad_norm": 0.43894821405410767, "learning_rate": 0.0001688714436111725, "loss": 1.6529, "step": 11991 }, { "epoch": 0.1558305706391529, "grad_norm": 0.3731173276901245, "learning_rate": 0.00016886884414926112, "loss": 1.4194, "step": 11992 }, { "epoch": 0.15584356518306877, "grad_norm": 0.44934573769569397, "learning_rate": 0.00016886624468734972, "loss": 1.4915, "step": 11993 }, { "epoch": 0.15585655972698464, "grad_norm": 0.3778323531150818, "learning_rate": 0.00016886364522543834, "loss": 1.4198, "step": 11994 }, { "epoch": 0.15586955427090052, "grad_norm": 0.3148915767669678, "learning_rate": 0.00016886104576352694, "loss": 1.2636, "step": 11995 }, { "epoch": 0.1558825488148164, "grad_norm": 0.4303494691848755, "learning_rate": 0.0001688584463016156, "loss": 1.4403, "step": 11996 }, { "epoch": 0.15589554335873226, "grad_norm": 0.45718246698379517, "learning_rate": 0.0001688558468397042, "loss": 1.557, "step": 11997 }, { "epoch": 0.15590853790264814, "grad_norm": 0.40785664319992065, "learning_rate": 0.0001688532473777928, "loss": 1.5745, "step": 11998 }, { "epoch": 0.155921532446564, "grad_norm": 0.3330020606517792, "learning_rate": 0.0001688506479158814, "loss": 1.4788, "step": 11999 }, { "epoch": 0.15593452699047988, "grad_norm": 0.4892558455467224, "learning_rate": 0.00016884804845397004, "loss": 1.6078, "step": 12000 }, { "epoch": 0.15594752153439576, "grad_norm": 0.40915924310684204, "learning_rate": 0.00016884544899205866, "loss": 1.2677, "step": 12001 }, { "epoch": 0.15596051607831163, "grad_norm": 0.3810061812400818, "learning_rate": 0.00016884284953014726, "loss": 1.5153, "step": 12002 }, { "epoch": 0.1559735106222275, "grad_norm": 0.42884185910224915, "learning_rate": 0.00016884025006823588, "loss": 1.4682, "step": 12003 }, { "epoch": 0.15598650516614337, "grad_norm": 0.4271387457847595, "learning_rate": 0.0001688376506063245, "loss": 1.457, "step": 12004 }, { "epoch": 0.15599949971005925, "grad_norm": 0.45230433344841003, "learning_rate": 0.0001688350511444131, "loss": 1.3025, "step": 12005 }, { "epoch": 0.15601249425397512, "grad_norm": 0.35235241055488586, "learning_rate": 0.00016883245168250173, "loss": 1.4811, "step": 12006 }, { "epoch": 0.156025488797891, "grad_norm": 0.4068051874637604, "learning_rate": 0.00016882985222059035, "loss": 1.5421, "step": 12007 }, { "epoch": 0.15603848334180687, "grad_norm": 0.35475414991378784, "learning_rate": 0.00016882725275867898, "loss": 1.3846, "step": 12008 }, { "epoch": 0.15605147788572274, "grad_norm": 0.3560280203819275, "learning_rate": 0.00016882465329676758, "loss": 1.3331, "step": 12009 }, { "epoch": 0.1560644724296386, "grad_norm": 0.3426785171031952, "learning_rate": 0.00016882205383485617, "loss": 1.3675, "step": 12010 }, { "epoch": 0.15607746697355449, "grad_norm": 0.45811954140663147, "learning_rate": 0.00016881945437294483, "loss": 1.5147, "step": 12011 }, { "epoch": 0.15609046151747036, "grad_norm": 0.31036385893821716, "learning_rate": 0.00016881685491103342, "loss": 1.4167, "step": 12012 }, { "epoch": 0.15610345606138623, "grad_norm": 0.44240742921829224, "learning_rate": 0.00016881425544912205, "loss": 1.5844, "step": 12013 }, { "epoch": 0.1561164506053021, "grad_norm": 0.45457398891448975, "learning_rate": 0.00016881165598721064, "loss": 1.4403, "step": 12014 }, { "epoch": 0.15612944514921798, "grad_norm": 0.3350835144519806, "learning_rate": 0.00016880905652529927, "loss": 1.6916, "step": 12015 }, { "epoch": 0.15614243969313385, "grad_norm": 0.37067195773124695, "learning_rate": 0.0001688064570633879, "loss": 1.2549, "step": 12016 }, { "epoch": 0.15615543423704972, "grad_norm": 0.3972967565059662, "learning_rate": 0.0001688038576014765, "loss": 1.3549, "step": 12017 }, { "epoch": 0.1561684287809656, "grad_norm": 0.35073786973953247, "learning_rate": 0.00016880125813956512, "loss": 1.3421, "step": 12018 }, { "epoch": 0.15618142332488147, "grad_norm": 0.35622137784957886, "learning_rate": 0.00016879865867765374, "loss": 1.5753, "step": 12019 }, { "epoch": 0.15619441786879734, "grad_norm": 0.4132542908191681, "learning_rate": 0.00016879605921574236, "loss": 1.5363, "step": 12020 }, { "epoch": 0.15620741241271321, "grad_norm": 0.4252627491950989, "learning_rate": 0.00016879345975383096, "loss": 1.34, "step": 12021 }, { "epoch": 0.1562204069566291, "grad_norm": 0.41447702050209045, "learning_rate": 0.00016879086029191959, "loss": 1.3011, "step": 12022 }, { "epoch": 0.15623340150054496, "grad_norm": 0.44659537076950073, "learning_rate": 0.0001687882608300082, "loss": 1.5662, "step": 12023 }, { "epoch": 0.15624639604446083, "grad_norm": 0.3066611886024475, "learning_rate": 0.0001687856613680968, "loss": 1.3217, "step": 12024 }, { "epoch": 0.1562593905883767, "grad_norm": 0.46207112073898315, "learning_rate": 0.00016878306190618543, "loss": 1.5429, "step": 12025 }, { "epoch": 0.15627238513229258, "grad_norm": 0.4428323805332184, "learning_rate": 0.00016878046244427403, "loss": 1.3047, "step": 12026 }, { "epoch": 0.15628537967620845, "grad_norm": 0.36988434195518494, "learning_rate": 0.00016877786298236265, "loss": 1.2062, "step": 12027 }, { "epoch": 0.15629837422012433, "grad_norm": 0.3586268424987793, "learning_rate": 0.00016877526352045128, "loss": 1.3487, "step": 12028 }, { "epoch": 0.1563113687640402, "grad_norm": 0.26521074771881104, "learning_rate": 0.00016877266405853988, "loss": 1.3636, "step": 12029 }, { "epoch": 0.15632436330795607, "grad_norm": 0.5100510120391846, "learning_rate": 0.0001687700645966285, "loss": 1.5946, "step": 12030 }, { "epoch": 0.15633735785187194, "grad_norm": 0.4092614948749542, "learning_rate": 0.00016876746513471713, "loss": 1.2236, "step": 12031 }, { "epoch": 0.15635035239578782, "grad_norm": 0.3568085730075836, "learning_rate": 0.00016876486567280575, "loss": 1.4141, "step": 12032 }, { "epoch": 0.1563633469397037, "grad_norm": 0.36383116245269775, "learning_rate": 0.00016876226621089435, "loss": 1.7182, "step": 12033 }, { "epoch": 0.15637634148361956, "grad_norm": 0.43455275893211365, "learning_rate": 0.00016875966674898297, "loss": 1.2831, "step": 12034 }, { "epoch": 0.15638933602753544, "grad_norm": 0.38674166798591614, "learning_rate": 0.0001687570672870716, "loss": 1.5929, "step": 12035 }, { "epoch": 0.1564023305714513, "grad_norm": 0.44080469012260437, "learning_rate": 0.0001687544678251602, "loss": 1.4638, "step": 12036 }, { "epoch": 0.15641532511536718, "grad_norm": 0.4807296097278595, "learning_rate": 0.00016875186836324882, "loss": 1.5531, "step": 12037 }, { "epoch": 0.15642831965928305, "grad_norm": 0.3581717014312744, "learning_rate": 0.00016874926890133742, "loss": 1.2694, "step": 12038 }, { "epoch": 0.15644131420319893, "grad_norm": 0.3941170573234558, "learning_rate": 0.00016874666943942604, "loss": 1.3648, "step": 12039 }, { "epoch": 0.1564543087471148, "grad_norm": 0.3562326431274414, "learning_rate": 0.00016874406997751466, "loss": 1.3535, "step": 12040 }, { "epoch": 0.15646730329103067, "grad_norm": 0.3954648971557617, "learning_rate": 0.00016874147051560326, "loss": 1.5573, "step": 12041 }, { "epoch": 0.15648029783494655, "grad_norm": 0.26697438955307007, "learning_rate": 0.0001687388710536919, "loss": 1.434, "step": 12042 }, { "epoch": 0.15649329237886242, "grad_norm": 0.42756736278533936, "learning_rate": 0.0001687362715917805, "loss": 1.3733, "step": 12043 }, { "epoch": 0.1565062869227783, "grad_norm": 0.4607290029525757, "learning_rate": 0.00016873367212986913, "loss": 1.5074, "step": 12044 }, { "epoch": 0.15651928146669417, "grad_norm": 0.3384189009666443, "learning_rate": 0.00016873107266795773, "loss": 1.3675, "step": 12045 }, { "epoch": 0.15653227601061004, "grad_norm": 0.21360808610916138, "learning_rate": 0.00016872847320604636, "loss": 1.1779, "step": 12046 }, { "epoch": 0.1565452705545259, "grad_norm": 0.37245047092437744, "learning_rate": 0.00016872587374413498, "loss": 1.3669, "step": 12047 }, { "epoch": 0.15655826509844178, "grad_norm": 0.5019447803497314, "learning_rate": 0.00016872327428222358, "loss": 1.668, "step": 12048 }, { "epoch": 0.15657125964235766, "grad_norm": 0.4975603222846985, "learning_rate": 0.0001687206748203122, "loss": 1.6643, "step": 12049 }, { "epoch": 0.15658425418627353, "grad_norm": 0.41646575927734375, "learning_rate": 0.00016871807535840083, "loss": 1.4798, "step": 12050 }, { "epoch": 0.1565972487301894, "grad_norm": 0.3129939138889313, "learning_rate": 0.00016871547589648945, "loss": 1.2571, "step": 12051 }, { "epoch": 0.15661024327410528, "grad_norm": 0.46845167875289917, "learning_rate": 0.00016871287643457805, "loss": 1.4047, "step": 12052 }, { "epoch": 0.15662323781802115, "grad_norm": 0.34697386622428894, "learning_rate": 0.00016871027697266665, "loss": 1.5065, "step": 12053 }, { "epoch": 0.15663623236193702, "grad_norm": 0.3365623652935028, "learning_rate": 0.0001687076775107553, "loss": 1.3844, "step": 12054 }, { "epoch": 0.1566492269058529, "grad_norm": 0.33361899852752686, "learning_rate": 0.0001687050780488439, "loss": 1.3084, "step": 12055 }, { "epoch": 0.15666222144976877, "grad_norm": 0.42262473702430725, "learning_rate": 0.00016870247858693252, "loss": 1.3859, "step": 12056 }, { "epoch": 0.15667521599368464, "grad_norm": 0.44393181800842285, "learning_rate": 0.00016869987912502112, "loss": 1.5549, "step": 12057 }, { "epoch": 0.15668821053760051, "grad_norm": 0.36272329092025757, "learning_rate": 0.00016869727966310974, "loss": 1.4526, "step": 12058 }, { "epoch": 0.1567012050815164, "grad_norm": 0.34070709347724915, "learning_rate": 0.00016869468020119837, "loss": 1.1817, "step": 12059 }, { "epoch": 0.15671419962543226, "grad_norm": 0.3324626386165619, "learning_rate": 0.00016869208073928696, "loss": 1.4151, "step": 12060 }, { "epoch": 0.15672719416934813, "grad_norm": 0.43929848074913025, "learning_rate": 0.0001686894812773756, "loss": 1.405, "step": 12061 }, { "epoch": 0.156740188713264, "grad_norm": 0.37687528133392334, "learning_rate": 0.0001686868818154642, "loss": 1.5245, "step": 12062 }, { "epoch": 0.15675318325717988, "grad_norm": 0.34735623002052307, "learning_rate": 0.00016868428235355284, "loss": 1.433, "step": 12063 }, { "epoch": 0.15676617780109575, "grad_norm": 0.28883078694343567, "learning_rate": 0.00016868168289164143, "loss": 1.2274, "step": 12064 }, { "epoch": 0.15677917234501162, "grad_norm": 0.48190370202064514, "learning_rate": 0.00016867908342973003, "loss": 1.3238, "step": 12065 }, { "epoch": 0.1567921668889275, "grad_norm": 0.4140412211418152, "learning_rate": 0.00016867648396781868, "loss": 1.3874, "step": 12066 }, { "epoch": 0.1568051614328434, "grad_norm": 0.39700308442115784, "learning_rate": 0.00016867388450590728, "loss": 1.3749, "step": 12067 }, { "epoch": 0.15681815597675927, "grad_norm": 0.37823575735092163, "learning_rate": 0.0001686712850439959, "loss": 1.5489, "step": 12068 }, { "epoch": 0.15683115052067514, "grad_norm": 0.33525389432907104, "learning_rate": 0.0001686686855820845, "loss": 1.4614, "step": 12069 }, { "epoch": 0.15684414506459102, "grad_norm": 0.3624768555164337, "learning_rate": 0.00016866608612017313, "loss": 1.2793, "step": 12070 }, { "epoch": 0.1568571396085069, "grad_norm": 0.42806684970855713, "learning_rate": 0.00016866348665826175, "loss": 1.7052, "step": 12071 }, { "epoch": 0.15687013415242276, "grad_norm": 0.3201879560947418, "learning_rate": 0.00016866088719635035, "loss": 1.2095, "step": 12072 }, { "epoch": 0.15688312869633864, "grad_norm": 0.3719903528690338, "learning_rate": 0.00016865828773443897, "loss": 1.6028, "step": 12073 }, { "epoch": 0.1568961232402545, "grad_norm": 0.38908639550209045, "learning_rate": 0.0001686556882725276, "loss": 1.4535, "step": 12074 }, { "epoch": 0.15690911778417038, "grad_norm": 0.41018176078796387, "learning_rate": 0.00016865308881061622, "loss": 1.2228, "step": 12075 }, { "epoch": 0.15692211232808626, "grad_norm": 0.4546106457710266, "learning_rate": 0.00016865048934870482, "loss": 1.5348, "step": 12076 }, { "epoch": 0.15693510687200213, "grad_norm": 0.41519367694854736, "learning_rate": 0.00016864788988679342, "loss": 1.3985, "step": 12077 }, { "epoch": 0.156948101415918, "grad_norm": 0.43420615792274475, "learning_rate": 0.00016864529042488207, "loss": 1.4531, "step": 12078 }, { "epoch": 0.15696109595983387, "grad_norm": 0.2989868223667145, "learning_rate": 0.00016864269096297067, "loss": 1.4021, "step": 12079 }, { "epoch": 0.15697409050374975, "grad_norm": 0.421214759349823, "learning_rate": 0.0001686400915010593, "loss": 1.5715, "step": 12080 }, { "epoch": 0.15698708504766562, "grad_norm": 0.3859822750091553, "learning_rate": 0.00016863749203914792, "loss": 1.4295, "step": 12081 }, { "epoch": 0.1570000795915815, "grad_norm": 0.38460224866867065, "learning_rate": 0.0001686348925772365, "loss": 1.3309, "step": 12082 }, { "epoch": 0.15701307413549737, "grad_norm": 0.37776118516921997, "learning_rate": 0.00016863229311532514, "loss": 1.524, "step": 12083 }, { "epoch": 0.15702606867941324, "grad_norm": 0.35567814111709595, "learning_rate": 0.00016862969365341373, "loss": 1.5778, "step": 12084 }, { "epoch": 0.1570390632233291, "grad_norm": 0.4244820773601532, "learning_rate": 0.00016862709419150239, "loss": 1.6133, "step": 12085 }, { "epoch": 0.15705205776724498, "grad_norm": 0.42508867383003235, "learning_rate": 0.00016862449472959098, "loss": 1.4931, "step": 12086 }, { "epoch": 0.15706505231116086, "grad_norm": 0.4721623659133911, "learning_rate": 0.0001686218952676796, "loss": 1.5827, "step": 12087 }, { "epoch": 0.15707804685507673, "grad_norm": 0.36248722672462463, "learning_rate": 0.0001686192958057682, "loss": 1.3802, "step": 12088 }, { "epoch": 0.1570910413989926, "grad_norm": 0.39904311299324036, "learning_rate": 0.00016861669634385683, "loss": 1.3497, "step": 12089 }, { "epoch": 0.15710403594290848, "grad_norm": 0.4013567566871643, "learning_rate": 0.00016861409688194545, "loss": 1.5756, "step": 12090 }, { "epoch": 0.15711703048682435, "grad_norm": 0.3746693730354309, "learning_rate": 0.00016861149742003405, "loss": 1.5049, "step": 12091 }, { "epoch": 0.15713002503074022, "grad_norm": 0.3480197489261627, "learning_rate": 0.00016860889795812268, "loss": 1.3034, "step": 12092 }, { "epoch": 0.1571430195746561, "grad_norm": 0.4265461564064026, "learning_rate": 0.0001686062984962113, "loss": 1.5288, "step": 12093 }, { "epoch": 0.15715601411857197, "grad_norm": 0.4015458822250366, "learning_rate": 0.0001686036990342999, "loss": 1.3763, "step": 12094 }, { "epoch": 0.15716900866248784, "grad_norm": 0.3355420231819153, "learning_rate": 0.00016860109957238852, "loss": 1.4359, "step": 12095 }, { "epoch": 0.15718200320640371, "grad_norm": 0.5164967179298401, "learning_rate": 0.00016859850011047712, "loss": 1.5925, "step": 12096 }, { "epoch": 0.1571949977503196, "grad_norm": 0.39553114771842957, "learning_rate": 0.00016859590064856577, "loss": 1.5647, "step": 12097 }, { "epoch": 0.15720799229423546, "grad_norm": 0.4172547459602356, "learning_rate": 0.00016859330118665437, "loss": 1.4438, "step": 12098 }, { "epoch": 0.15722098683815133, "grad_norm": 0.4288550615310669, "learning_rate": 0.000168590701724743, "loss": 1.4382, "step": 12099 }, { "epoch": 0.1572339813820672, "grad_norm": 0.39426884055137634, "learning_rate": 0.0001685881022628316, "loss": 1.4978, "step": 12100 }, { "epoch": 0.15724697592598308, "grad_norm": 0.4198729991912842, "learning_rate": 0.00016858550280092022, "loss": 1.5107, "step": 12101 }, { "epoch": 0.15725997046989895, "grad_norm": 0.37695348262786865, "learning_rate": 0.00016858290333900884, "loss": 1.3203, "step": 12102 }, { "epoch": 0.15727296501381482, "grad_norm": 0.4280935823917389, "learning_rate": 0.00016858030387709744, "loss": 1.2111, "step": 12103 }, { "epoch": 0.1572859595577307, "grad_norm": 0.4634476900100708, "learning_rate": 0.00016857770441518606, "loss": 1.5782, "step": 12104 }, { "epoch": 0.15729895410164657, "grad_norm": 0.4004434049129486, "learning_rate": 0.00016857510495327469, "loss": 1.3733, "step": 12105 }, { "epoch": 0.15731194864556244, "grad_norm": 0.43846994638442993, "learning_rate": 0.0001685725054913633, "loss": 1.6742, "step": 12106 }, { "epoch": 0.15732494318947832, "grad_norm": 0.4106936752796173, "learning_rate": 0.0001685699060294519, "loss": 1.3449, "step": 12107 }, { "epoch": 0.1573379377333942, "grad_norm": 0.40755710005760193, "learning_rate": 0.0001685673065675405, "loss": 1.4562, "step": 12108 }, { "epoch": 0.15735093227731006, "grad_norm": 0.37867283821105957, "learning_rate": 0.00016856470710562916, "loss": 1.3525, "step": 12109 }, { "epoch": 0.15736392682122594, "grad_norm": 0.38126394152641296, "learning_rate": 0.00016856210764371775, "loss": 1.253, "step": 12110 }, { "epoch": 0.1573769213651418, "grad_norm": 0.4599338471889496, "learning_rate": 0.00016855950818180638, "loss": 1.4677, "step": 12111 }, { "epoch": 0.15738991590905768, "grad_norm": 0.42347508668899536, "learning_rate": 0.00016855690871989498, "loss": 1.3983, "step": 12112 }, { "epoch": 0.15740291045297355, "grad_norm": 0.3543795645236969, "learning_rate": 0.0001685543092579836, "loss": 1.5169, "step": 12113 }, { "epoch": 0.15741590499688943, "grad_norm": 0.4439345598220825, "learning_rate": 0.00016855170979607223, "loss": 1.4053, "step": 12114 }, { "epoch": 0.1574288995408053, "grad_norm": 0.4256225526332855, "learning_rate": 0.00016854911033416082, "loss": 1.6394, "step": 12115 }, { "epoch": 0.15744189408472117, "grad_norm": 0.3350345492362976, "learning_rate": 0.00016854651087224947, "loss": 1.348, "step": 12116 }, { "epoch": 0.15745488862863705, "grad_norm": 0.3530762791633606, "learning_rate": 0.00016854391141033807, "loss": 1.3662, "step": 12117 }, { "epoch": 0.15746788317255292, "grad_norm": 0.43185654282569885, "learning_rate": 0.0001685413119484267, "loss": 1.4478, "step": 12118 }, { "epoch": 0.1574808777164688, "grad_norm": 0.3207387626171112, "learning_rate": 0.0001685387124865153, "loss": 1.332, "step": 12119 }, { "epoch": 0.15749387226038467, "grad_norm": 0.45395734906196594, "learning_rate": 0.00016853611302460392, "loss": 1.5576, "step": 12120 }, { "epoch": 0.15750686680430054, "grad_norm": 0.5471612215042114, "learning_rate": 0.00016853351356269254, "loss": 1.3549, "step": 12121 }, { "epoch": 0.1575198613482164, "grad_norm": 0.24745294451713562, "learning_rate": 0.00016853091410078114, "loss": 1.3708, "step": 12122 }, { "epoch": 0.15753285589213228, "grad_norm": 0.3814707100391388, "learning_rate": 0.00016852831463886976, "loss": 1.513, "step": 12123 }, { "epoch": 0.15754585043604816, "grad_norm": 0.3231234550476074, "learning_rate": 0.0001685257151769584, "loss": 1.3583, "step": 12124 }, { "epoch": 0.15755884497996403, "grad_norm": 0.30630233883857727, "learning_rate": 0.00016852311571504699, "loss": 1.4103, "step": 12125 }, { "epoch": 0.1575718395238799, "grad_norm": 0.39077651500701904, "learning_rate": 0.0001685205162531356, "loss": 1.3963, "step": 12126 }, { "epoch": 0.15758483406779578, "grad_norm": 0.2572426497936249, "learning_rate": 0.0001685179167912242, "loss": 1.3737, "step": 12127 }, { "epoch": 0.15759782861171165, "grad_norm": 0.35180747509002686, "learning_rate": 0.00016851531732931286, "loss": 1.5199, "step": 12128 }, { "epoch": 0.15761082315562752, "grad_norm": 0.37713390588760376, "learning_rate": 0.00016851271786740146, "loss": 1.4161, "step": 12129 }, { "epoch": 0.1576238176995434, "grad_norm": 0.30682334303855896, "learning_rate": 0.00016851011840549008, "loss": 1.4216, "step": 12130 }, { "epoch": 0.15763681224345927, "grad_norm": 0.42932432889938354, "learning_rate": 0.00016850751894357868, "loss": 1.3388, "step": 12131 }, { "epoch": 0.15764980678737514, "grad_norm": 0.40550339221954346, "learning_rate": 0.0001685049194816673, "loss": 1.4025, "step": 12132 }, { "epoch": 0.157662801331291, "grad_norm": 0.27826327085494995, "learning_rate": 0.00016850232001975593, "loss": 1.3198, "step": 12133 }, { "epoch": 0.1576757958752069, "grad_norm": 0.422445684671402, "learning_rate": 0.00016849972055784453, "loss": 1.4361, "step": 12134 }, { "epoch": 0.15768879041912276, "grad_norm": 0.3424423336982727, "learning_rate": 0.00016849712109593315, "loss": 1.2521, "step": 12135 }, { "epoch": 0.15770178496303863, "grad_norm": 0.40800026059150696, "learning_rate": 0.00016849452163402177, "loss": 1.3826, "step": 12136 }, { "epoch": 0.1577147795069545, "grad_norm": 0.23556606471538544, "learning_rate": 0.00016849192217211037, "loss": 1.3703, "step": 12137 }, { "epoch": 0.15772777405087038, "grad_norm": 0.3475390374660492, "learning_rate": 0.000168489322710199, "loss": 1.4566, "step": 12138 }, { "epoch": 0.15774076859478625, "grad_norm": 0.3246475160121918, "learning_rate": 0.0001684867232482876, "loss": 1.2551, "step": 12139 }, { "epoch": 0.15775376313870212, "grad_norm": 0.35173729062080383, "learning_rate": 0.00016848412378637625, "loss": 1.401, "step": 12140 }, { "epoch": 0.157766757682618, "grad_norm": 0.3947131931781769, "learning_rate": 0.00016848152432446484, "loss": 1.4912, "step": 12141 }, { "epoch": 0.15777975222653387, "grad_norm": 0.43315646052360535, "learning_rate": 0.00016847892486255347, "loss": 1.4778, "step": 12142 }, { "epoch": 0.15779274677044977, "grad_norm": 0.413069486618042, "learning_rate": 0.00016847632540064206, "loss": 1.5419, "step": 12143 }, { "epoch": 0.15780574131436564, "grad_norm": 0.3021068274974823, "learning_rate": 0.0001684737259387307, "loss": 1.368, "step": 12144 }, { "epoch": 0.15781873585828152, "grad_norm": 0.3128720819950104, "learning_rate": 0.0001684711264768193, "loss": 1.3739, "step": 12145 }, { "epoch": 0.1578317304021974, "grad_norm": 0.3862049877643585, "learning_rate": 0.0001684685270149079, "loss": 1.4451, "step": 12146 }, { "epoch": 0.15784472494611326, "grad_norm": 0.3226591944694519, "learning_rate": 0.00016846592755299654, "loss": 1.3165, "step": 12147 }, { "epoch": 0.15785771949002914, "grad_norm": 0.3609507381916046, "learning_rate": 0.00016846332809108516, "loss": 1.3225, "step": 12148 }, { "epoch": 0.157870714033945, "grad_norm": 0.38621002435684204, "learning_rate": 0.00016846072862917376, "loss": 1.5426, "step": 12149 }, { "epoch": 0.15788370857786088, "grad_norm": 0.4232640564441681, "learning_rate": 0.00016845812916726238, "loss": 1.5035, "step": 12150 }, { "epoch": 0.15789670312177675, "grad_norm": 0.390117883682251, "learning_rate": 0.00016845552970535098, "loss": 1.5398, "step": 12151 }, { "epoch": 0.15790969766569263, "grad_norm": 0.35571977496147156, "learning_rate": 0.00016845293024343963, "loss": 1.5468, "step": 12152 }, { "epoch": 0.1579226922096085, "grad_norm": 0.3840009272098541, "learning_rate": 0.00016845033078152823, "loss": 1.301, "step": 12153 }, { "epoch": 0.15793568675352437, "grad_norm": 0.3309866786003113, "learning_rate": 0.00016844773131961685, "loss": 1.4471, "step": 12154 }, { "epoch": 0.15794868129744025, "grad_norm": 0.3633089065551758, "learning_rate": 0.00016844513185770548, "loss": 1.3995, "step": 12155 }, { "epoch": 0.15796167584135612, "grad_norm": 0.4083336293697357, "learning_rate": 0.00016844253239579407, "loss": 1.4613, "step": 12156 }, { "epoch": 0.157974670385272, "grad_norm": 0.3916897475719452, "learning_rate": 0.0001684399329338827, "loss": 1.454, "step": 12157 }, { "epoch": 0.15798766492918787, "grad_norm": 0.3418882489204407, "learning_rate": 0.0001684373334719713, "loss": 1.2817, "step": 12158 }, { "epoch": 0.15800065947310374, "grad_norm": 0.35763347148895264, "learning_rate": 0.00016843473401005995, "loss": 1.3088, "step": 12159 }, { "epoch": 0.1580136540170196, "grad_norm": 0.41593673825263977, "learning_rate": 0.00016843213454814855, "loss": 1.4513, "step": 12160 }, { "epoch": 0.15802664856093548, "grad_norm": 0.3025377094745636, "learning_rate": 0.00016842953508623714, "loss": 1.3647, "step": 12161 }, { "epoch": 0.15803964310485136, "grad_norm": 0.37623780965805054, "learning_rate": 0.00016842693562432577, "loss": 1.4612, "step": 12162 }, { "epoch": 0.15805263764876723, "grad_norm": 0.3976060450077057, "learning_rate": 0.0001684243361624144, "loss": 1.3837, "step": 12163 }, { "epoch": 0.1580656321926831, "grad_norm": 0.4235958755016327, "learning_rate": 0.00016842173670050302, "loss": 1.514, "step": 12164 }, { "epoch": 0.15807862673659898, "grad_norm": 0.4469101130962372, "learning_rate": 0.0001684191372385916, "loss": 1.4825, "step": 12165 }, { "epoch": 0.15809162128051485, "grad_norm": 0.4407886266708374, "learning_rate": 0.00016841653777668024, "loss": 1.5262, "step": 12166 }, { "epoch": 0.15810461582443072, "grad_norm": 0.468839168548584, "learning_rate": 0.00016841393831476886, "loss": 1.4787, "step": 12167 }, { "epoch": 0.1581176103683466, "grad_norm": 0.3731576204299927, "learning_rate": 0.00016841133885285746, "loss": 1.3271, "step": 12168 }, { "epoch": 0.15813060491226247, "grad_norm": 0.31835997104644775, "learning_rate": 0.00016840873939094608, "loss": 1.342, "step": 12169 }, { "epoch": 0.15814359945617834, "grad_norm": 0.43828311562538147, "learning_rate": 0.00016840613992903468, "loss": 1.4252, "step": 12170 }, { "epoch": 0.1581565940000942, "grad_norm": 0.3683306872844696, "learning_rate": 0.00016840354046712333, "loss": 1.5245, "step": 12171 }, { "epoch": 0.1581695885440101, "grad_norm": 0.4457702934741974, "learning_rate": 0.00016840094100521193, "loss": 1.4573, "step": 12172 }, { "epoch": 0.15818258308792596, "grad_norm": 0.3749345541000366, "learning_rate": 0.00016839834154330055, "loss": 1.503, "step": 12173 }, { "epoch": 0.15819557763184183, "grad_norm": 0.43286803364753723, "learning_rate": 0.00016839574208138915, "loss": 1.3979, "step": 12174 }, { "epoch": 0.1582085721757577, "grad_norm": 0.43005579710006714, "learning_rate": 0.00016839314261947778, "loss": 1.4188, "step": 12175 }, { "epoch": 0.15822156671967358, "grad_norm": 0.5076369643211365, "learning_rate": 0.0001683905431575664, "loss": 1.4817, "step": 12176 }, { "epoch": 0.15823456126358945, "grad_norm": 0.4112192988395691, "learning_rate": 0.000168387943695655, "loss": 1.6797, "step": 12177 }, { "epoch": 0.15824755580750532, "grad_norm": 0.38447749614715576, "learning_rate": 0.00016838534423374362, "loss": 1.3085, "step": 12178 }, { "epoch": 0.1582605503514212, "grad_norm": 0.34840282797813416, "learning_rate": 0.00016838274477183225, "loss": 1.1225, "step": 12179 }, { "epoch": 0.15827354489533707, "grad_norm": 0.3794090747833252, "learning_rate": 0.00016838014530992085, "loss": 1.554, "step": 12180 }, { "epoch": 0.15828653943925294, "grad_norm": 0.3945227265357971, "learning_rate": 0.00016837754584800947, "loss": 1.4316, "step": 12181 }, { "epoch": 0.15829953398316882, "grad_norm": 0.4256705343723297, "learning_rate": 0.00016837494638609807, "loss": 1.386, "step": 12182 }, { "epoch": 0.1583125285270847, "grad_norm": 0.3695443570613861, "learning_rate": 0.00016837234692418672, "loss": 1.3649, "step": 12183 }, { "epoch": 0.15832552307100056, "grad_norm": 0.39672568440437317, "learning_rate": 0.00016836974746227532, "loss": 1.277, "step": 12184 }, { "epoch": 0.15833851761491644, "grad_norm": 0.31588834524154663, "learning_rate": 0.00016836714800036394, "loss": 1.3848, "step": 12185 }, { "epoch": 0.1583515121588323, "grad_norm": 0.45509183406829834, "learning_rate": 0.00016836454853845254, "loss": 1.2637, "step": 12186 }, { "epoch": 0.15836450670274818, "grad_norm": 0.38124537467956543, "learning_rate": 0.00016836194907654116, "loss": 1.2465, "step": 12187 }, { "epoch": 0.15837750124666405, "grad_norm": 0.46467000246047974, "learning_rate": 0.0001683593496146298, "loss": 1.6668, "step": 12188 }, { "epoch": 0.15839049579057993, "grad_norm": 0.29997479915618896, "learning_rate": 0.00016835675015271838, "loss": 1.1691, "step": 12189 }, { "epoch": 0.1584034903344958, "grad_norm": 0.4041958153247833, "learning_rate": 0.000168354150690807, "loss": 1.3437, "step": 12190 }, { "epoch": 0.15841648487841167, "grad_norm": 0.4309135377407074, "learning_rate": 0.00016835155122889563, "loss": 1.5585, "step": 12191 }, { "epoch": 0.15842947942232755, "grad_norm": 0.4675934314727783, "learning_rate": 0.00016834895176698423, "loss": 1.4627, "step": 12192 }, { "epoch": 0.15844247396624342, "grad_norm": 0.4504415690898895, "learning_rate": 0.00016834635230507285, "loss": 1.501, "step": 12193 }, { "epoch": 0.1584554685101593, "grad_norm": 0.3013511896133423, "learning_rate": 0.00016834375284316148, "loss": 1.3931, "step": 12194 }, { "epoch": 0.15846846305407516, "grad_norm": 0.33756259083747864, "learning_rate": 0.0001683411533812501, "loss": 1.5259, "step": 12195 }, { "epoch": 0.15848145759799104, "grad_norm": 0.30408650636672974, "learning_rate": 0.0001683385539193387, "loss": 1.3791, "step": 12196 }, { "epoch": 0.1584944521419069, "grad_norm": 0.5211697816848755, "learning_rate": 0.00016833595445742733, "loss": 1.6588, "step": 12197 }, { "epoch": 0.15850744668582278, "grad_norm": 0.41396358609199524, "learning_rate": 0.00016833335499551595, "loss": 1.1439, "step": 12198 }, { "epoch": 0.15852044122973866, "grad_norm": 0.4607744812965393, "learning_rate": 0.00016833075553360455, "loss": 1.5323, "step": 12199 }, { "epoch": 0.15853343577365453, "grad_norm": 0.4046708047389984, "learning_rate": 0.00016832815607169317, "loss": 1.215, "step": 12200 }, { "epoch": 0.1585464303175704, "grad_norm": 0.4478952884674072, "learning_rate": 0.00016832555660978177, "loss": 1.4066, "step": 12201 }, { "epoch": 0.15855942486148628, "grad_norm": 0.3196367621421814, "learning_rate": 0.00016832295714787042, "loss": 1.3782, "step": 12202 }, { "epoch": 0.15857241940540215, "grad_norm": 0.32422423362731934, "learning_rate": 0.00016832035768595902, "loss": 1.3825, "step": 12203 }, { "epoch": 0.15858541394931802, "grad_norm": 0.4274030327796936, "learning_rate": 0.00016831775822404762, "loss": 1.2455, "step": 12204 }, { "epoch": 0.1585984084932339, "grad_norm": 0.39410191774368286, "learning_rate": 0.00016831515876213624, "loss": 1.3206, "step": 12205 }, { "epoch": 0.15861140303714977, "grad_norm": 0.3155837059020996, "learning_rate": 0.00016831255930022486, "loss": 1.3546, "step": 12206 }, { "epoch": 0.15862439758106564, "grad_norm": 0.3832182288169861, "learning_rate": 0.0001683099598383135, "loss": 1.4002, "step": 12207 }, { "epoch": 0.1586373921249815, "grad_norm": 0.44513365626335144, "learning_rate": 0.0001683073603764021, "loss": 1.4265, "step": 12208 }, { "epoch": 0.1586503866688974, "grad_norm": 0.344574511051178, "learning_rate": 0.0001683047609144907, "loss": 1.1895, "step": 12209 }, { "epoch": 0.15866338121281326, "grad_norm": 0.37313321232795715, "learning_rate": 0.00016830216145257934, "loss": 1.2722, "step": 12210 }, { "epoch": 0.15867637575672913, "grad_norm": 0.4319547414779663, "learning_rate": 0.00016829956199066793, "loss": 1.4179, "step": 12211 }, { "epoch": 0.158689370300645, "grad_norm": 0.36353787779808044, "learning_rate": 0.00016829696252875656, "loss": 1.4473, "step": 12212 }, { "epoch": 0.15870236484456088, "grad_norm": 0.38511598110198975, "learning_rate": 0.00016829436306684515, "loss": 1.1778, "step": 12213 }, { "epoch": 0.15871535938847675, "grad_norm": 0.4999373257160187, "learning_rate": 0.0001682917636049338, "loss": 1.4818, "step": 12214 }, { "epoch": 0.15872835393239262, "grad_norm": 0.39421847462654114, "learning_rate": 0.0001682891641430224, "loss": 1.4275, "step": 12215 }, { "epoch": 0.1587413484763085, "grad_norm": 0.849568247795105, "learning_rate": 0.000168286564681111, "loss": 1.3637, "step": 12216 }, { "epoch": 0.15875434302022437, "grad_norm": 0.42851266264915466, "learning_rate": 0.00016828396521919963, "loss": 1.5218, "step": 12217 }, { "epoch": 0.15876733756414024, "grad_norm": 0.38274115324020386, "learning_rate": 0.00016828136575728825, "loss": 1.3916, "step": 12218 }, { "epoch": 0.15878033210805614, "grad_norm": 0.41079720854759216, "learning_rate": 0.00016827876629537687, "loss": 1.4406, "step": 12219 }, { "epoch": 0.15879332665197202, "grad_norm": 0.3841322660446167, "learning_rate": 0.00016827616683346547, "loss": 1.2904, "step": 12220 }, { "epoch": 0.1588063211958879, "grad_norm": 0.4471125304698944, "learning_rate": 0.0001682735673715541, "loss": 1.4745, "step": 12221 }, { "epoch": 0.15881931573980376, "grad_norm": 0.42396268248558044, "learning_rate": 0.00016827096790964272, "loss": 1.4187, "step": 12222 }, { "epoch": 0.15883231028371964, "grad_norm": 0.34756559133529663, "learning_rate": 0.00016826836844773132, "loss": 1.423, "step": 12223 }, { "epoch": 0.1588453048276355, "grad_norm": 0.3663460910320282, "learning_rate": 0.00016826576898581994, "loss": 1.4851, "step": 12224 }, { "epoch": 0.15885829937155138, "grad_norm": 0.4245162904262543, "learning_rate": 0.00016826316952390854, "loss": 1.4524, "step": 12225 }, { "epoch": 0.15887129391546725, "grad_norm": 0.3648967146873474, "learning_rate": 0.0001682605700619972, "loss": 1.452, "step": 12226 }, { "epoch": 0.15888428845938313, "grad_norm": 0.4318394362926483, "learning_rate": 0.0001682579706000858, "loss": 1.36, "step": 12227 }, { "epoch": 0.158897283003299, "grad_norm": 0.4152277708053589, "learning_rate": 0.00016825537113817441, "loss": 1.5309, "step": 12228 }, { "epoch": 0.15891027754721487, "grad_norm": 0.3483196198940277, "learning_rate": 0.00016825277167626304, "loss": 1.3308, "step": 12229 }, { "epoch": 0.15892327209113075, "grad_norm": 0.3724815547466278, "learning_rate": 0.00016825017221435164, "loss": 1.3413, "step": 12230 }, { "epoch": 0.15893626663504662, "grad_norm": 0.3755175769329071, "learning_rate": 0.00016824757275244026, "loss": 1.6108, "step": 12231 }, { "epoch": 0.1589492611789625, "grad_norm": 0.37909120321273804, "learning_rate": 0.00016824497329052886, "loss": 1.4606, "step": 12232 }, { "epoch": 0.15896225572287837, "grad_norm": 0.3077991306781769, "learning_rate": 0.00016824237382861748, "loss": 1.3839, "step": 12233 }, { "epoch": 0.15897525026679424, "grad_norm": 0.4607841372489929, "learning_rate": 0.0001682397743667061, "loss": 1.6681, "step": 12234 }, { "epoch": 0.1589882448107101, "grad_norm": 0.47513526678085327, "learning_rate": 0.0001682371749047947, "loss": 1.4253, "step": 12235 }, { "epoch": 0.15900123935462598, "grad_norm": 0.4572325348854065, "learning_rate": 0.00016823457544288333, "loss": 1.3791, "step": 12236 }, { "epoch": 0.15901423389854186, "grad_norm": 0.32650768756866455, "learning_rate": 0.00016823197598097195, "loss": 1.5368, "step": 12237 }, { "epoch": 0.15902722844245773, "grad_norm": 0.3811675012111664, "learning_rate": 0.00016822937651906058, "loss": 1.4804, "step": 12238 }, { "epoch": 0.1590402229863736, "grad_norm": 0.33600297570228577, "learning_rate": 0.00016822677705714917, "loss": 1.34, "step": 12239 }, { "epoch": 0.15905321753028948, "grad_norm": 0.4331541955471039, "learning_rate": 0.0001682241775952378, "loss": 1.3042, "step": 12240 }, { "epoch": 0.15906621207420535, "grad_norm": 0.4234504997730255, "learning_rate": 0.00016822157813332642, "loss": 1.4482, "step": 12241 }, { "epoch": 0.15907920661812122, "grad_norm": 0.36658522486686707, "learning_rate": 0.00016821897867141502, "loss": 1.3592, "step": 12242 }, { "epoch": 0.1590922011620371, "grad_norm": 0.4279352128505707, "learning_rate": 0.00016821637920950365, "loss": 1.3467, "step": 12243 }, { "epoch": 0.15910519570595297, "grad_norm": 0.36761078238487244, "learning_rate": 0.00016821377974759224, "loss": 1.317, "step": 12244 }, { "epoch": 0.15911819024986884, "grad_norm": 0.4478946626186371, "learning_rate": 0.00016821118028568087, "loss": 1.5632, "step": 12245 }, { "epoch": 0.1591311847937847, "grad_norm": 0.3182785212993622, "learning_rate": 0.0001682085808237695, "loss": 1.543, "step": 12246 }, { "epoch": 0.1591441793377006, "grad_norm": 0.4061199128627777, "learning_rate": 0.0001682059813618581, "loss": 1.4911, "step": 12247 }, { "epoch": 0.15915717388161646, "grad_norm": 0.43322935700416565, "learning_rate": 0.00016820338189994671, "loss": 1.4278, "step": 12248 }, { "epoch": 0.15917016842553233, "grad_norm": 0.30969804525375366, "learning_rate": 0.00016820078243803534, "loss": 1.3471, "step": 12249 }, { "epoch": 0.1591831629694482, "grad_norm": 0.36357054114341736, "learning_rate": 0.00016819818297612396, "loss": 1.5466, "step": 12250 }, { "epoch": 0.15919615751336408, "grad_norm": 0.44624578952789307, "learning_rate": 0.00016819558351421256, "loss": 1.4011, "step": 12251 }, { "epoch": 0.15920915205727995, "grad_norm": 0.35145607590675354, "learning_rate": 0.00016819298405230118, "loss": 1.369, "step": 12252 }, { "epoch": 0.15922214660119582, "grad_norm": 0.2584977447986603, "learning_rate": 0.0001681903845903898, "loss": 1.4514, "step": 12253 }, { "epoch": 0.1592351411451117, "grad_norm": 0.4205785393714905, "learning_rate": 0.0001681877851284784, "loss": 1.3796, "step": 12254 }, { "epoch": 0.15924813568902757, "grad_norm": 0.32457953691482544, "learning_rate": 0.00016818518566656703, "loss": 1.3525, "step": 12255 }, { "epoch": 0.15926113023294344, "grad_norm": 0.4039461016654968, "learning_rate": 0.00016818258620465563, "loss": 1.3716, "step": 12256 }, { "epoch": 0.15927412477685932, "grad_norm": 0.3441017270088196, "learning_rate": 0.00016817998674274428, "loss": 1.4265, "step": 12257 }, { "epoch": 0.1592871193207752, "grad_norm": 0.3918721079826355, "learning_rate": 0.00016817738728083288, "loss": 1.4946, "step": 12258 }, { "epoch": 0.15930011386469106, "grad_norm": 0.45541828870773315, "learning_rate": 0.00016817478781892147, "loss": 1.3818, "step": 12259 }, { "epoch": 0.15931310840860693, "grad_norm": 0.3422129452228546, "learning_rate": 0.0001681721883570101, "loss": 1.373, "step": 12260 }, { "epoch": 0.1593261029525228, "grad_norm": 0.43476754426956177, "learning_rate": 0.00016816958889509872, "loss": 1.5534, "step": 12261 }, { "epoch": 0.15933909749643868, "grad_norm": 0.3889281153678894, "learning_rate": 0.00016816698943318735, "loss": 1.4018, "step": 12262 }, { "epoch": 0.15935209204035455, "grad_norm": 0.360443651676178, "learning_rate": 0.00016816438997127595, "loss": 1.3878, "step": 12263 }, { "epoch": 0.15936508658427043, "grad_norm": 0.4079136848449707, "learning_rate": 0.00016816179050936457, "loss": 1.433, "step": 12264 }, { "epoch": 0.1593780811281863, "grad_norm": 0.4729766249656677, "learning_rate": 0.0001681591910474532, "loss": 1.5027, "step": 12265 }, { "epoch": 0.15939107567210217, "grad_norm": 0.3890770971775055, "learning_rate": 0.0001681565915855418, "loss": 1.4746, "step": 12266 }, { "epoch": 0.15940407021601805, "grad_norm": 0.4352772533893585, "learning_rate": 0.00016815399212363042, "loss": 1.3511, "step": 12267 }, { "epoch": 0.15941706475993392, "grad_norm": 0.4645844101905823, "learning_rate": 0.00016815139266171904, "loss": 1.5011, "step": 12268 }, { "epoch": 0.1594300593038498, "grad_norm": 0.4356909692287445, "learning_rate": 0.00016814879319980767, "loss": 1.3281, "step": 12269 }, { "epoch": 0.15944305384776566, "grad_norm": 0.45108431577682495, "learning_rate": 0.00016814619373789626, "loss": 1.4642, "step": 12270 }, { "epoch": 0.15945604839168154, "grad_norm": 0.4070507287979126, "learning_rate": 0.00016814359427598486, "loss": 1.4571, "step": 12271 }, { "epoch": 0.1594690429355974, "grad_norm": 0.4008220434188843, "learning_rate": 0.0001681409948140735, "loss": 1.5671, "step": 12272 }, { "epoch": 0.15948203747951328, "grad_norm": 0.44126230478286743, "learning_rate": 0.0001681383953521621, "loss": 1.3447, "step": 12273 }, { "epoch": 0.15949503202342916, "grad_norm": 0.4537971317768097, "learning_rate": 0.00016813579589025073, "loss": 1.4687, "step": 12274 }, { "epoch": 0.15950802656734503, "grad_norm": 0.4635203182697296, "learning_rate": 0.00016813319642833933, "loss": 1.3791, "step": 12275 }, { "epoch": 0.1595210211112609, "grad_norm": 0.4206159710884094, "learning_rate": 0.00016813059696642796, "loss": 1.3304, "step": 12276 }, { "epoch": 0.15953401565517678, "grad_norm": 0.40822598338127136, "learning_rate": 0.00016812799750451658, "loss": 1.3977, "step": 12277 }, { "epoch": 0.15954701019909265, "grad_norm": 0.3308940827846527, "learning_rate": 0.00016812539804260518, "loss": 1.3779, "step": 12278 }, { "epoch": 0.15956000474300852, "grad_norm": 0.38944390416145325, "learning_rate": 0.0001681227985806938, "loss": 1.5612, "step": 12279 }, { "epoch": 0.1595729992869244, "grad_norm": 0.38299763202667236, "learning_rate": 0.00016812019911878243, "loss": 1.4782, "step": 12280 }, { "epoch": 0.15958599383084027, "grad_norm": 0.3599013686180115, "learning_rate": 0.00016811759965687105, "loss": 1.2701, "step": 12281 }, { "epoch": 0.15959898837475614, "grad_norm": 0.30971989035606384, "learning_rate": 0.00016811500019495965, "loss": 1.1869, "step": 12282 }, { "epoch": 0.159611982918672, "grad_norm": 0.2928314208984375, "learning_rate": 0.00016811240073304825, "loss": 0.9674, "step": 12283 }, { "epoch": 0.15962497746258789, "grad_norm": 0.36406126618385315, "learning_rate": 0.0001681098012711369, "loss": 1.3816, "step": 12284 }, { "epoch": 0.15963797200650376, "grad_norm": 0.4082266092300415, "learning_rate": 0.0001681072018092255, "loss": 1.3682, "step": 12285 }, { "epoch": 0.15965096655041963, "grad_norm": 0.35065507888793945, "learning_rate": 0.00016810460234731412, "loss": 1.3165, "step": 12286 }, { "epoch": 0.1596639610943355, "grad_norm": 0.47152179479599, "learning_rate": 0.00016810200288540272, "loss": 1.594, "step": 12287 }, { "epoch": 0.15967695563825138, "grad_norm": 0.42153266072273254, "learning_rate": 0.00016809940342349134, "loss": 1.5297, "step": 12288 }, { "epoch": 0.15968995018216725, "grad_norm": 0.4511811435222626, "learning_rate": 0.00016809680396157997, "loss": 1.2811, "step": 12289 }, { "epoch": 0.15970294472608312, "grad_norm": 0.487887978553772, "learning_rate": 0.00016809420449966856, "loss": 1.3768, "step": 12290 }, { "epoch": 0.159715939269999, "grad_norm": 0.3068332076072693, "learning_rate": 0.0001680916050377572, "loss": 1.2984, "step": 12291 }, { "epoch": 0.15972893381391487, "grad_norm": 0.3803914785385132, "learning_rate": 0.0001680890055758458, "loss": 1.4536, "step": 12292 }, { "epoch": 0.15974192835783074, "grad_norm": 0.2833569645881653, "learning_rate": 0.00016808640611393444, "loss": 1.4077, "step": 12293 }, { "epoch": 0.15975492290174662, "grad_norm": 0.43343907594680786, "learning_rate": 0.00016808380665202303, "loss": 1.4809, "step": 12294 }, { "epoch": 0.15976791744566252, "grad_norm": 0.5081380605697632, "learning_rate": 0.00016808120719011166, "loss": 1.4445, "step": 12295 }, { "epoch": 0.1597809119895784, "grad_norm": 0.341413676738739, "learning_rate": 0.00016807860772820028, "loss": 1.5356, "step": 12296 }, { "epoch": 0.15979390653349426, "grad_norm": 0.3639095425605774, "learning_rate": 0.00016807600826628888, "loss": 1.2932, "step": 12297 }, { "epoch": 0.15980690107741014, "grad_norm": 0.41499632596969604, "learning_rate": 0.0001680734088043775, "loss": 1.3549, "step": 12298 }, { "epoch": 0.159819895621326, "grad_norm": 0.390863835811615, "learning_rate": 0.0001680708093424661, "loss": 1.3866, "step": 12299 }, { "epoch": 0.15983289016524188, "grad_norm": 0.48037195205688477, "learning_rate": 0.00016806820988055473, "loss": 1.582, "step": 12300 }, { "epoch": 0.15984588470915775, "grad_norm": 0.337342232465744, "learning_rate": 0.00016806561041864335, "loss": 1.4569, "step": 12301 }, { "epoch": 0.15985887925307363, "grad_norm": 0.3409208059310913, "learning_rate": 0.00016806301095673195, "loss": 1.4153, "step": 12302 }, { "epoch": 0.1598718737969895, "grad_norm": 0.9956346154212952, "learning_rate": 0.0001680604114948206, "loss": 1.3828, "step": 12303 }, { "epoch": 0.15988486834090537, "grad_norm": 0.29615992307662964, "learning_rate": 0.0001680578120329092, "loss": 1.4366, "step": 12304 }, { "epoch": 0.15989786288482125, "grad_norm": 0.347982794046402, "learning_rate": 0.00016805521257099782, "loss": 1.3476, "step": 12305 }, { "epoch": 0.15991085742873712, "grad_norm": 0.4028204083442688, "learning_rate": 0.00016805261310908642, "loss": 1.3924, "step": 12306 }, { "epoch": 0.159923851972653, "grad_norm": 0.38938236236572266, "learning_rate": 0.00016805001364717504, "loss": 1.5419, "step": 12307 }, { "epoch": 0.15993684651656886, "grad_norm": 0.5025116801261902, "learning_rate": 0.00016804741418526367, "loss": 1.5345, "step": 12308 }, { "epoch": 0.15994984106048474, "grad_norm": 0.5307797789573669, "learning_rate": 0.00016804481472335227, "loss": 1.3458, "step": 12309 }, { "epoch": 0.1599628356044006, "grad_norm": 0.422865092754364, "learning_rate": 0.0001680422152614409, "loss": 1.3635, "step": 12310 }, { "epoch": 0.15997583014831648, "grad_norm": 0.4449157416820526, "learning_rate": 0.00016803961579952951, "loss": 1.5415, "step": 12311 }, { "epoch": 0.15998882469223236, "grad_norm": 0.3656712770462036, "learning_rate": 0.00016803701633761814, "loss": 1.375, "step": 12312 }, { "epoch": 0.16000181923614823, "grad_norm": 0.3500148355960846, "learning_rate": 0.00016803441687570674, "loss": 1.3688, "step": 12313 }, { "epoch": 0.1600148137800641, "grad_norm": 0.5006784200668335, "learning_rate": 0.00016803181741379533, "loss": 1.4751, "step": 12314 }, { "epoch": 0.16002780832397998, "grad_norm": 0.4967231750488281, "learning_rate": 0.00016802921795188398, "loss": 1.454, "step": 12315 }, { "epoch": 0.16004080286789585, "grad_norm": 0.4023984372615814, "learning_rate": 0.00016802661848997258, "loss": 1.4578, "step": 12316 }, { "epoch": 0.16005379741181172, "grad_norm": 0.4796692728996277, "learning_rate": 0.0001680240190280612, "loss": 1.4378, "step": 12317 }, { "epoch": 0.1600667919557276, "grad_norm": 0.39401862025260925, "learning_rate": 0.0001680214195661498, "loss": 1.363, "step": 12318 }, { "epoch": 0.16007978649964347, "grad_norm": 0.41930028796195984, "learning_rate": 0.00016801882010423843, "loss": 1.4318, "step": 12319 }, { "epoch": 0.16009278104355934, "grad_norm": 0.3851275146007538, "learning_rate": 0.00016801622064232705, "loss": 1.3894, "step": 12320 }, { "epoch": 0.1601057755874752, "grad_norm": 0.3232443034648895, "learning_rate": 0.00016801362118041565, "loss": 1.3706, "step": 12321 }, { "epoch": 0.16011877013139109, "grad_norm": 0.39164260029792786, "learning_rate": 0.00016801102171850427, "loss": 1.5531, "step": 12322 }, { "epoch": 0.16013176467530696, "grad_norm": 0.4041592478752136, "learning_rate": 0.0001680084222565929, "loss": 1.5064, "step": 12323 }, { "epoch": 0.16014475921922283, "grad_norm": 0.4000818431377411, "learning_rate": 0.00016800582279468152, "loss": 1.5579, "step": 12324 }, { "epoch": 0.1601577537631387, "grad_norm": 0.40931230783462524, "learning_rate": 0.00016800322333277012, "loss": 1.2782, "step": 12325 }, { "epoch": 0.16017074830705458, "grad_norm": 0.3512624204158783, "learning_rate": 0.00016800062387085872, "loss": 1.3011, "step": 12326 }, { "epoch": 0.16018374285097045, "grad_norm": 0.3761516511440277, "learning_rate": 0.00016799802440894737, "loss": 1.4862, "step": 12327 }, { "epoch": 0.16019673739488632, "grad_norm": 0.32938069105148315, "learning_rate": 0.00016799542494703597, "loss": 1.37, "step": 12328 }, { "epoch": 0.1602097319388022, "grad_norm": 0.4327149987220764, "learning_rate": 0.0001679928254851246, "loss": 1.3948, "step": 12329 }, { "epoch": 0.16022272648271807, "grad_norm": 0.3925078511238098, "learning_rate": 0.0001679902260232132, "loss": 1.4822, "step": 12330 }, { "epoch": 0.16023572102663394, "grad_norm": 0.36957135796546936, "learning_rate": 0.00016798762656130181, "loss": 1.3715, "step": 12331 }, { "epoch": 0.16024871557054982, "grad_norm": 0.47471681237220764, "learning_rate": 0.00016798502709939044, "loss": 1.4818, "step": 12332 }, { "epoch": 0.1602617101144657, "grad_norm": 0.4172595739364624, "learning_rate": 0.00016798242763747904, "loss": 1.6152, "step": 12333 }, { "epoch": 0.16027470465838156, "grad_norm": 0.43155524134635925, "learning_rate": 0.00016797982817556766, "loss": 1.5537, "step": 12334 }, { "epoch": 0.16028769920229743, "grad_norm": 0.319709450006485, "learning_rate": 0.00016797722871365628, "loss": 1.3403, "step": 12335 }, { "epoch": 0.1603006937462133, "grad_norm": 0.37101930379867554, "learning_rate": 0.0001679746292517449, "loss": 1.3522, "step": 12336 }, { "epoch": 0.16031368829012918, "grad_norm": 0.3466882109642029, "learning_rate": 0.0001679720297898335, "loss": 1.2713, "step": 12337 }, { "epoch": 0.16032668283404505, "grad_norm": 0.4527888000011444, "learning_rate": 0.0001679694303279221, "loss": 1.5937, "step": 12338 }, { "epoch": 0.16033967737796093, "grad_norm": 0.3342328369617462, "learning_rate": 0.00016796683086601076, "loss": 1.2712, "step": 12339 }, { "epoch": 0.1603526719218768, "grad_norm": 0.448095440864563, "learning_rate": 0.00016796423140409935, "loss": 1.3891, "step": 12340 }, { "epoch": 0.16036566646579267, "grad_norm": 0.4362563192844391, "learning_rate": 0.00016796163194218798, "loss": 1.2109, "step": 12341 }, { "epoch": 0.16037866100970855, "grad_norm": 0.3978665769100189, "learning_rate": 0.0001679590324802766, "loss": 1.2336, "step": 12342 }, { "epoch": 0.16039165555362442, "grad_norm": 0.5934082865715027, "learning_rate": 0.0001679564330183652, "loss": 1.5638, "step": 12343 }, { "epoch": 0.1604046500975403, "grad_norm": 0.3339918851852417, "learning_rate": 0.00016795383355645382, "loss": 1.3778, "step": 12344 }, { "epoch": 0.16041764464145616, "grad_norm": 0.4134880304336548, "learning_rate": 0.00016795123409454242, "loss": 1.5087, "step": 12345 }, { "epoch": 0.16043063918537204, "grad_norm": 0.5018779039382935, "learning_rate": 0.00016794863463263107, "loss": 1.2578, "step": 12346 }, { "epoch": 0.1604436337292879, "grad_norm": 0.3423566222190857, "learning_rate": 0.00016794603517071967, "loss": 1.3727, "step": 12347 }, { "epoch": 0.16045662827320378, "grad_norm": 0.442096471786499, "learning_rate": 0.0001679434357088083, "loss": 1.6549, "step": 12348 }, { "epoch": 0.16046962281711966, "grad_norm": 0.42707157135009766, "learning_rate": 0.0001679408362468969, "loss": 1.3967, "step": 12349 }, { "epoch": 0.16048261736103553, "grad_norm": 0.34618017077445984, "learning_rate": 0.00016793823678498552, "loss": 1.3041, "step": 12350 }, { "epoch": 0.1604956119049514, "grad_norm": 0.4824414551258087, "learning_rate": 0.00016793563732307414, "loss": 1.4628, "step": 12351 }, { "epoch": 0.16050860644886727, "grad_norm": 0.41753262281417847, "learning_rate": 0.00016793303786116274, "loss": 1.3927, "step": 12352 }, { "epoch": 0.16052160099278315, "grad_norm": 0.49709153175354004, "learning_rate": 0.00016793043839925136, "loss": 1.4807, "step": 12353 }, { "epoch": 0.16053459553669902, "grad_norm": 0.4568771719932556, "learning_rate": 0.00016792783893734, "loss": 1.4595, "step": 12354 }, { "epoch": 0.1605475900806149, "grad_norm": 0.4320828914642334, "learning_rate": 0.00016792523947542858, "loss": 1.5625, "step": 12355 }, { "epoch": 0.16056058462453077, "grad_norm": 0.38529759645462036, "learning_rate": 0.0001679226400135172, "loss": 1.439, "step": 12356 }, { "epoch": 0.16057357916844664, "grad_norm": 0.37635865807533264, "learning_rate": 0.0001679200405516058, "loss": 1.3404, "step": 12357 }, { "epoch": 0.1605865737123625, "grad_norm": 0.42272937297821045, "learning_rate": 0.00016791744108969446, "loss": 1.5292, "step": 12358 }, { "epoch": 0.16059956825627839, "grad_norm": 0.40684419870376587, "learning_rate": 0.00016791484162778306, "loss": 1.2902, "step": 12359 }, { "epoch": 0.16061256280019426, "grad_norm": 0.4250040650367737, "learning_rate": 0.00016791224216587168, "loss": 1.4581, "step": 12360 }, { "epoch": 0.16062555734411013, "grad_norm": 0.46793752908706665, "learning_rate": 0.00016790964270396028, "loss": 1.4947, "step": 12361 }, { "epoch": 0.160638551888026, "grad_norm": 0.3696674406528473, "learning_rate": 0.0001679070432420489, "loss": 1.3373, "step": 12362 }, { "epoch": 0.16065154643194188, "grad_norm": 0.3296555280685425, "learning_rate": 0.00016790444378013753, "loss": 1.47, "step": 12363 }, { "epoch": 0.16066454097585775, "grad_norm": 0.37473469972610474, "learning_rate": 0.00016790184431822612, "loss": 1.4424, "step": 12364 }, { "epoch": 0.16067753551977362, "grad_norm": 0.31683987379074097, "learning_rate": 0.00016789924485631475, "loss": 1.2346, "step": 12365 }, { "epoch": 0.1606905300636895, "grad_norm": 0.32565101981163025, "learning_rate": 0.00016789664539440337, "loss": 1.3305, "step": 12366 }, { "epoch": 0.16070352460760537, "grad_norm": 0.37582066655158997, "learning_rate": 0.00016789404593249197, "loss": 1.414, "step": 12367 }, { "epoch": 0.16071651915152124, "grad_norm": 0.3694254159927368, "learning_rate": 0.0001678914464705806, "loss": 1.1283, "step": 12368 }, { "epoch": 0.16072951369543712, "grad_norm": 0.38893142342567444, "learning_rate": 0.0001678888470086692, "loss": 1.2784, "step": 12369 }, { "epoch": 0.160742508239353, "grad_norm": 0.3975680470466614, "learning_rate": 0.00016788624754675784, "loss": 1.475, "step": 12370 }, { "epoch": 0.1607555027832689, "grad_norm": 0.4705055356025696, "learning_rate": 0.00016788364808484644, "loss": 1.5295, "step": 12371 }, { "epoch": 0.16076849732718476, "grad_norm": 0.38787126541137695, "learning_rate": 0.00016788104862293507, "loss": 1.5269, "step": 12372 }, { "epoch": 0.16078149187110063, "grad_norm": 0.44869711995124817, "learning_rate": 0.00016787844916102366, "loss": 1.5004, "step": 12373 }, { "epoch": 0.1607944864150165, "grad_norm": 0.27974042296409607, "learning_rate": 0.0001678758496991123, "loss": 1.2975, "step": 12374 }, { "epoch": 0.16080748095893238, "grad_norm": 0.38531556725502014, "learning_rate": 0.0001678732502372009, "loss": 1.5095, "step": 12375 }, { "epoch": 0.16082047550284825, "grad_norm": 0.4017702341079712, "learning_rate": 0.0001678706507752895, "loss": 1.4914, "step": 12376 }, { "epoch": 0.16083347004676413, "grad_norm": 0.4168252646923065, "learning_rate": 0.00016786805131337816, "loss": 1.6041, "step": 12377 }, { "epoch": 0.16084646459068, "grad_norm": 0.35655486583709717, "learning_rate": 0.00016786545185146676, "loss": 1.6019, "step": 12378 }, { "epoch": 0.16085945913459587, "grad_norm": 0.4495946764945984, "learning_rate": 0.00016786285238955538, "loss": 1.3708, "step": 12379 }, { "epoch": 0.16087245367851175, "grad_norm": 0.4912163317203522, "learning_rate": 0.00016786025292764398, "loss": 1.4714, "step": 12380 }, { "epoch": 0.16088544822242762, "grad_norm": 0.4345300495624542, "learning_rate": 0.0001678576534657326, "loss": 1.5672, "step": 12381 }, { "epoch": 0.1608984427663435, "grad_norm": 0.38896751403808594, "learning_rate": 0.00016785505400382123, "loss": 1.6449, "step": 12382 }, { "epoch": 0.16091143731025936, "grad_norm": 0.3724277913570404, "learning_rate": 0.00016785245454190983, "loss": 1.3915, "step": 12383 }, { "epoch": 0.16092443185417524, "grad_norm": 0.3206885755062103, "learning_rate": 0.00016784985507999845, "loss": 1.2872, "step": 12384 }, { "epoch": 0.1609374263980911, "grad_norm": 0.4560156762599945, "learning_rate": 0.00016784725561808708, "loss": 1.535, "step": 12385 }, { "epoch": 0.16095042094200698, "grad_norm": 0.4259679615497589, "learning_rate": 0.00016784465615617567, "loss": 1.4153, "step": 12386 }, { "epoch": 0.16096341548592286, "grad_norm": 0.4947696626186371, "learning_rate": 0.0001678420566942643, "loss": 1.3949, "step": 12387 }, { "epoch": 0.16097641002983873, "grad_norm": 0.41922178864479065, "learning_rate": 0.0001678394572323529, "loss": 1.3765, "step": 12388 }, { "epoch": 0.1609894045737546, "grad_norm": 0.41384056210517883, "learning_rate": 0.00016783685777044155, "loss": 1.6216, "step": 12389 }, { "epoch": 0.16100239911767047, "grad_norm": 0.43948566913604736, "learning_rate": 0.00016783425830853014, "loss": 1.6283, "step": 12390 }, { "epoch": 0.16101539366158635, "grad_norm": 0.47521379590034485, "learning_rate": 0.00016783165884661877, "loss": 1.5599, "step": 12391 }, { "epoch": 0.16102838820550222, "grad_norm": 0.3941165804862976, "learning_rate": 0.00016782905938470737, "loss": 1.4446, "step": 12392 }, { "epoch": 0.1610413827494181, "grad_norm": 0.33074066042900085, "learning_rate": 0.000167826459922796, "loss": 1.2219, "step": 12393 }, { "epoch": 0.16105437729333397, "grad_norm": 0.38846495747566223, "learning_rate": 0.00016782386046088461, "loss": 1.3052, "step": 12394 }, { "epoch": 0.16106737183724984, "grad_norm": 0.3925749957561493, "learning_rate": 0.0001678212609989732, "loss": 1.6176, "step": 12395 }, { "epoch": 0.1610803663811657, "grad_norm": 0.3784163296222687, "learning_rate": 0.00016781866153706184, "loss": 1.391, "step": 12396 }, { "epoch": 0.16109336092508159, "grad_norm": 0.44950613379478455, "learning_rate": 0.00016781606207515046, "loss": 1.3998, "step": 12397 }, { "epoch": 0.16110635546899746, "grad_norm": 0.16205565631389618, "learning_rate": 0.00016781346261323906, "loss": 1.2534, "step": 12398 }, { "epoch": 0.16111935001291333, "grad_norm": 0.361299991607666, "learning_rate": 0.00016781086315132768, "loss": 1.5207, "step": 12399 }, { "epoch": 0.1611323445568292, "grad_norm": 0.3345767855644226, "learning_rate": 0.00016780826368941628, "loss": 1.5089, "step": 12400 }, { "epoch": 0.16114533910074508, "grad_norm": 0.41074296832084656, "learning_rate": 0.00016780566422750493, "loss": 1.5026, "step": 12401 }, { "epoch": 0.16115833364466095, "grad_norm": 0.4379768967628479, "learning_rate": 0.00016780306476559353, "loss": 1.3023, "step": 12402 }, { "epoch": 0.16117132818857682, "grad_norm": 0.4849367141723633, "learning_rate": 0.00016780046530368215, "loss": 1.3242, "step": 12403 }, { "epoch": 0.1611843227324927, "grad_norm": 0.3181443214416504, "learning_rate": 0.00016779786584177075, "loss": 1.4434, "step": 12404 }, { "epoch": 0.16119731727640857, "grad_norm": 0.47958919405937195, "learning_rate": 0.00016779526637985938, "loss": 1.4558, "step": 12405 }, { "epoch": 0.16121031182032444, "grad_norm": 0.3560980558395386, "learning_rate": 0.000167792666917948, "loss": 1.4342, "step": 12406 }, { "epoch": 0.16122330636424032, "grad_norm": 0.4329281449317932, "learning_rate": 0.0001677900674560366, "loss": 1.3874, "step": 12407 }, { "epoch": 0.1612363009081562, "grad_norm": 0.4536091983318329, "learning_rate": 0.00016778746799412522, "loss": 1.5581, "step": 12408 }, { "epoch": 0.16124929545207206, "grad_norm": 0.3778620958328247, "learning_rate": 0.00016778486853221385, "loss": 1.3726, "step": 12409 }, { "epoch": 0.16126228999598793, "grad_norm": 0.3825274109840393, "learning_rate": 0.00016778226907030244, "loss": 1.2303, "step": 12410 }, { "epoch": 0.1612752845399038, "grad_norm": 0.5703203678131104, "learning_rate": 0.00016777966960839107, "loss": 1.5041, "step": 12411 }, { "epoch": 0.16128827908381968, "grad_norm": 0.41571110486984253, "learning_rate": 0.00016777707014647967, "loss": 1.2696, "step": 12412 }, { "epoch": 0.16130127362773555, "grad_norm": 0.4587453305721283, "learning_rate": 0.00016777447068456832, "loss": 1.5076, "step": 12413 }, { "epoch": 0.16131426817165143, "grad_norm": 0.39475521445274353, "learning_rate": 0.00016777187122265691, "loss": 1.5531, "step": 12414 }, { "epoch": 0.1613272627155673, "grad_norm": 0.44247695803642273, "learning_rate": 0.00016776927176074554, "loss": 1.4369, "step": 12415 }, { "epoch": 0.16134025725948317, "grad_norm": 0.5244262218475342, "learning_rate": 0.00016776667229883416, "loss": 1.456, "step": 12416 }, { "epoch": 0.16135325180339904, "grad_norm": 0.39449018239974976, "learning_rate": 0.00016776407283692276, "loss": 1.3718, "step": 12417 }, { "epoch": 0.16136624634731492, "grad_norm": 0.3736203908920288, "learning_rate": 0.00016776147337501139, "loss": 1.5875, "step": 12418 }, { "epoch": 0.1613792408912308, "grad_norm": 0.3177570402622223, "learning_rate": 0.00016775887391309998, "loss": 1.2373, "step": 12419 }, { "epoch": 0.16139223543514666, "grad_norm": 0.3923036456108093, "learning_rate": 0.00016775627445118863, "loss": 1.4983, "step": 12420 }, { "epoch": 0.16140522997906254, "grad_norm": 0.31973084807395935, "learning_rate": 0.00016775367498927723, "loss": 1.274, "step": 12421 }, { "epoch": 0.1614182245229784, "grad_norm": 0.5027869939804077, "learning_rate": 0.00016775107552736583, "loss": 1.376, "step": 12422 }, { "epoch": 0.16143121906689428, "grad_norm": 0.3654315173625946, "learning_rate": 0.00016774847606545445, "loss": 1.2902, "step": 12423 }, { "epoch": 0.16144421361081016, "grad_norm": 0.42000365257263184, "learning_rate": 0.00016774587660354308, "loss": 1.3067, "step": 12424 }, { "epoch": 0.16145720815472603, "grad_norm": 0.37522441148757935, "learning_rate": 0.0001677432771416317, "loss": 1.4023, "step": 12425 }, { "epoch": 0.1614702026986419, "grad_norm": 0.4151870012283325, "learning_rate": 0.0001677406776797203, "loss": 1.4056, "step": 12426 }, { "epoch": 0.16148319724255777, "grad_norm": 0.32252201437950134, "learning_rate": 0.00016773807821780892, "loss": 1.5134, "step": 12427 }, { "epoch": 0.16149619178647365, "grad_norm": 0.3820458650588989, "learning_rate": 0.00016773547875589755, "loss": 1.4787, "step": 12428 }, { "epoch": 0.16150918633038952, "grad_norm": 0.41924530267715454, "learning_rate": 0.00016773287929398615, "loss": 1.1841, "step": 12429 }, { "epoch": 0.1615221808743054, "grad_norm": 0.35878345370292664, "learning_rate": 0.00016773027983207477, "loss": 1.3852, "step": 12430 }, { "epoch": 0.16153517541822127, "grad_norm": 0.36791571974754333, "learning_rate": 0.00016772768037016337, "loss": 1.4378, "step": 12431 }, { "epoch": 0.16154816996213714, "grad_norm": 0.3435977101325989, "learning_rate": 0.00016772508090825202, "loss": 1.2819, "step": 12432 }, { "epoch": 0.161561164506053, "grad_norm": 0.42321762442588806, "learning_rate": 0.00016772248144634062, "loss": 1.3297, "step": 12433 }, { "epoch": 0.16157415904996889, "grad_norm": 0.37435030937194824, "learning_rate": 0.00016771988198442924, "loss": 1.3103, "step": 12434 }, { "epoch": 0.16158715359388476, "grad_norm": 0.38132244348526, "learning_rate": 0.00016771728252251784, "loss": 1.4204, "step": 12435 }, { "epoch": 0.16160014813780063, "grad_norm": 0.3555836081504822, "learning_rate": 0.00016771468306060646, "loss": 1.3865, "step": 12436 }, { "epoch": 0.1616131426817165, "grad_norm": 0.3584928512573242, "learning_rate": 0.0001677120835986951, "loss": 1.583, "step": 12437 }, { "epoch": 0.16162613722563238, "grad_norm": 0.5064800977706909, "learning_rate": 0.00016770948413678369, "loss": 1.4038, "step": 12438 }, { "epoch": 0.16163913176954825, "grad_norm": 0.44835808873176575, "learning_rate": 0.0001677068846748723, "loss": 1.3392, "step": 12439 }, { "epoch": 0.16165212631346412, "grad_norm": 0.49193501472473145, "learning_rate": 0.00016770428521296093, "loss": 1.5003, "step": 12440 }, { "epoch": 0.16166512085738, "grad_norm": 0.4471583664417267, "learning_rate": 0.00016770168575104953, "loss": 1.4338, "step": 12441 }, { "epoch": 0.16167811540129587, "grad_norm": 0.3750327527523041, "learning_rate": 0.00016769908628913816, "loss": 1.322, "step": 12442 }, { "epoch": 0.16169110994521174, "grad_norm": 0.4402744472026825, "learning_rate": 0.00016769648682722675, "loss": 1.2689, "step": 12443 }, { "epoch": 0.16170410448912761, "grad_norm": 0.3910582363605499, "learning_rate": 0.0001676938873653154, "loss": 1.4286, "step": 12444 }, { "epoch": 0.1617170990330435, "grad_norm": 0.40360188484191895, "learning_rate": 0.000167691287903404, "loss": 1.4026, "step": 12445 }, { "epoch": 0.16173009357695936, "grad_norm": 0.39934396743774414, "learning_rate": 0.00016768868844149263, "loss": 1.3837, "step": 12446 }, { "epoch": 0.16174308812087526, "grad_norm": 0.38112446665763855, "learning_rate": 0.00016768608897958122, "loss": 1.3256, "step": 12447 }, { "epoch": 0.16175608266479113, "grad_norm": 0.3562523126602173, "learning_rate": 0.00016768348951766985, "loss": 1.4838, "step": 12448 }, { "epoch": 0.161769077208707, "grad_norm": 0.31360092759132385, "learning_rate": 0.00016768089005575847, "loss": 1.405, "step": 12449 }, { "epoch": 0.16178207175262288, "grad_norm": 0.3318835198879242, "learning_rate": 0.00016767829059384707, "loss": 1.341, "step": 12450 }, { "epoch": 0.16179506629653875, "grad_norm": 0.3418152630329132, "learning_rate": 0.0001676756911319357, "loss": 1.6521, "step": 12451 }, { "epoch": 0.16180806084045463, "grad_norm": 0.5165520310401917, "learning_rate": 0.00016767309167002432, "loss": 1.6486, "step": 12452 }, { "epoch": 0.1618210553843705, "grad_norm": 0.4152046740055084, "learning_rate": 0.00016767049220811292, "loss": 1.5561, "step": 12453 }, { "epoch": 0.16183404992828637, "grad_norm": 0.27496403455734253, "learning_rate": 0.00016766789274620154, "loss": 1.2952, "step": 12454 }, { "epoch": 0.16184704447220224, "grad_norm": 0.3715158700942993, "learning_rate": 0.00016766529328429017, "loss": 1.344, "step": 12455 }, { "epoch": 0.16186003901611812, "grad_norm": 0.2977321445941925, "learning_rate": 0.0001676626938223788, "loss": 1.2016, "step": 12456 }, { "epoch": 0.161873033560034, "grad_norm": 0.3309260904788971, "learning_rate": 0.0001676600943604674, "loss": 1.2894, "step": 12457 }, { "epoch": 0.16188602810394986, "grad_norm": 0.3265513777732849, "learning_rate": 0.000167657494898556, "loss": 1.5233, "step": 12458 }, { "epoch": 0.16189902264786574, "grad_norm": 0.4204879403114319, "learning_rate": 0.00016765489543664464, "loss": 1.3116, "step": 12459 }, { "epoch": 0.1619120171917816, "grad_norm": 0.3536544144153595, "learning_rate": 0.00016765229597473323, "loss": 1.6288, "step": 12460 }, { "epoch": 0.16192501173569748, "grad_norm": 0.4077003598213196, "learning_rate": 0.00016764969651282186, "loss": 1.2719, "step": 12461 }, { "epoch": 0.16193800627961336, "grad_norm": 0.35277050733566284, "learning_rate": 0.00016764709705091046, "loss": 1.5044, "step": 12462 }, { "epoch": 0.16195100082352923, "grad_norm": 0.35302749276161194, "learning_rate": 0.0001676444975889991, "loss": 1.1694, "step": 12463 }, { "epoch": 0.1619639953674451, "grad_norm": 0.39460182189941406, "learning_rate": 0.0001676418981270877, "loss": 1.2709, "step": 12464 }, { "epoch": 0.16197698991136097, "grad_norm": 0.3833228349685669, "learning_rate": 0.0001676392986651763, "loss": 1.4144, "step": 12465 }, { "epoch": 0.16198998445527685, "grad_norm": 0.42872926592826843, "learning_rate": 0.00016763669920326493, "loss": 1.4237, "step": 12466 }, { "epoch": 0.16200297899919272, "grad_norm": 0.387791246175766, "learning_rate": 0.00016763409974135355, "loss": 1.3357, "step": 12467 }, { "epoch": 0.1620159735431086, "grad_norm": 0.34608373045921326, "learning_rate": 0.00016763150027944218, "loss": 1.3956, "step": 12468 }, { "epoch": 0.16202896808702447, "grad_norm": 0.3558301031589508, "learning_rate": 0.00016762890081753077, "loss": 1.4846, "step": 12469 }, { "epoch": 0.16204196263094034, "grad_norm": 0.4031662046909332, "learning_rate": 0.0001676263013556194, "loss": 1.4586, "step": 12470 }, { "epoch": 0.1620549571748562, "grad_norm": 0.2614748775959015, "learning_rate": 0.00016762370189370802, "loss": 1.3889, "step": 12471 }, { "epoch": 0.16206795171877209, "grad_norm": 0.4343404471874237, "learning_rate": 0.00016762110243179662, "loss": 1.5688, "step": 12472 }, { "epoch": 0.16208094626268796, "grad_norm": 0.38676416873931885, "learning_rate": 0.00016761850296988524, "loss": 1.3723, "step": 12473 }, { "epoch": 0.16209394080660383, "grad_norm": 0.34105879068374634, "learning_rate": 0.00016761590350797384, "loss": 1.4464, "step": 12474 }, { "epoch": 0.1621069353505197, "grad_norm": 0.4008832573890686, "learning_rate": 0.0001676133040460625, "loss": 1.4875, "step": 12475 }, { "epoch": 0.16211992989443558, "grad_norm": 0.3638954162597656, "learning_rate": 0.0001676107045841511, "loss": 1.3394, "step": 12476 }, { "epoch": 0.16213292443835145, "grad_norm": 0.2658711373806, "learning_rate": 0.0001676081051222397, "loss": 1.2147, "step": 12477 }, { "epoch": 0.16214591898226732, "grad_norm": 0.33120301365852356, "learning_rate": 0.0001676055056603283, "loss": 1.44, "step": 12478 }, { "epoch": 0.1621589135261832, "grad_norm": 0.42474862933158875, "learning_rate": 0.00016760290619841694, "loss": 1.4726, "step": 12479 }, { "epoch": 0.16217190807009907, "grad_norm": 0.43191882967948914, "learning_rate": 0.00016760030673650556, "loss": 1.3718, "step": 12480 }, { "epoch": 0.16218490261401494, "grad_norm": 0.27691856026649475, "learning_rate": 0.00016759770727459416, "loss": 1.3691, "step": 12481 }, { "epoch": 0.16219789715793081, "grad_norm": 0.4043032228946686, "learning_rate": 0.00016759510781268278, "loss": 1.524, "step": 12482 }, { "epoch": 0.1622108917018467, "grad_norm": 0.36862054467201233, "learning_rate": 0.0001675925083507714, "loss": 1.361, "step": 12483 }, { "epoch": 0.16222388624576256, "grad_norm": 0.4421246647834778, "learning_rate": 0.00016758990888886, "loss": 1.6674, "step": 12484 }, { "epoch": 0.16223688078967843, "grad_norm": 0.38296762108802795, "learning_rate": 0.00016758730942694863, "loss": 1.3305, "step": 12485 }, { "epoch": 0.1622498753335943, "grad_norm": 0.45953384041786194, "learning_rate": 0.00016758470996503723, "loss": 1.4367, "step": 12486 }, { "epoch": 0.16226286987751018, "grad_norm": 0.2831425368785858, "learning_rate": 0.00016758211050312588, "loss": 1.2269, "step": 12487 }, { "epoch": 0.16227586442142605, "grad_norm": 0.4589655101299286, "learning_rate": 0.00016757951104121448, "loss": 1.5357, "step": 12488 }, { "epoch": 0.16228885896534193, "grad_norm": 0.42549723386764526, "learning_rate": 0.00016757691157930307, "loss": 1.4394, "step": 12489 }, { "epoch": 0.1623018535092578, "grad_norm": 0.37913697957992554, "learning_rate": 0.00016757431211739172, "loss": 1.3221, "step": 12490 }, { "epoch": 0.16231484805317367, "grad_norm": 0.39310240745544434, "learning_rate": 0.00016757171265548032, "loss": 1.4348, "step": 12491 }, { "epoch": 0.16232784259708954, "grad_norm": 0.3992055058479309, "learning_rate": 0.00016756911319356895, "loss": 1.4737, "step": 12492 }, { "epoch": 0.16234083714100542, "grad_norm": 0.40475642681121826, "learning_rate": 0.00016756651373165754, "loss": 1.4655, "step": 12493 }, { "epoch": 0.1623538316849213, "grad_norm": 0.5873568058013916, "learning_rate": 0.00016756391426974617, "loss": 1.6717, "step": 12494 }, { "epoch": 0.16236682622883716, "grad_norm": 0.41350436210632324, "learning_rate": 0.0001675613148078348, "loss": 1.5462, "step": 12495 }, { "epoch": 0.16237982077275304, "grad_norm": 0.34503740072250366, "learning_rate": 0.0001675587153459234, "loss": 1.4019, "step": 12496 }, { "epoch": 0.1623928153166689, "grad_norm": 0.6243388056755066, "learning_rate": 0.00016755611588401201, "loss": 1.3853, "step": 12497 }, { "epoch": 0.16240580986058478, "grad_norm": 0.6286265254020691, "learning_rate": 0.00016755351642210064, "loss": 1.5795, "step": 12498 }, { "epoch": 0.16241880440450066, "grad_norm": 0.43260112404823303, "learning_rate": 0.00016755091696018926, "loss": 1.4648, "step": 12499 }, { "epoch": 0.16243179894841653, "grad_norm": 0.4709896147251129, "learning_rate": 0.00016754831749827786, "loss": 1.5809, "step": 12500 }, { "epoch": 0.1624447934923324, "grad_norm": 0.3622850775718689, "learning_rate": 0.00016754571803636649, "loss": 1.4312, "step": 12501 }, { "epoch": 0.16245778803624827, "grad_norm": 0.373507022857666, "learning_rate": 0.0001675431185744551, "loss": 1.4647, "step": 12502 }, { "epoch": 0.16247078258016415, "grad_norm": 0.37186363339424133, "learning_rate": 0.0001675405191125437, "loss": 1.4544, "step": 12503 }, { "epoch": 0.16248377712408002, "grad_norm": 0.4374506175518036, "learning_rate": 0.00016753791965063233, "loss": 1.5011, "step": 12504 }, { "epoch": 0.1624967716679959, "grad_norm": 0.3648233413696289, "learning_rate": 0.00016753532018872093, "loss": 1.4016, "step": 12505 }, { "epoch": 0.16250976621191177, "grad_norm": 0.40256452560424805, "learning_rate": 0.00016753272072680955, "loss": 1.4461, "step": 12506 }, { "epoch": 0.16252276075582764, "grad_norm": 0.3151097595691681, "learning_rate": 0.00016753012126489818, "loss": 1.4472, "step": 12507 }, { "epoch": 0.1625357552997435, "grad_norm": 0.5215485095977783, "learning_rate": 0.00016752752180298678, "loss": 1.5156, "step": 12508 }, { "epoch": 0.16254874984365938, "grad_norm": 0.3705575466156006, "learning_rate": 0.0001675249223410754, "loss": 1.5263, "step": 12509 }, { "epoch": 0.16256174438757526, "grad_norm": 0.3705456852912903, "learning_rate": 0.00016752232287916402, "loss": 1.5039, "step": 12510 }, { "epoch": 0.16257473893149113, "grad_norm": 0.3548305034637451, "learning_rate": 0.00016751972341725265, "loss": 1.4415, "step": 12511 }, { "epoch": 0.162587733475407, "grad_norm": 0.4036993086338043, "learning_rate": 0.00016751712395534125, "loss": 1.3671, "step": 12512 }, { "epoch": 0.16260072801932288, "grad_norm": 0.4407697021961212, "learning_rate": 0.00016751452449342987, "loss": 1.4945, "step": 12513 }, { "epoch": 0.16261372256323875, "grad_norm": 0.4536076784133911, "learning_rate": 0.0001675119250315185, "loss": 1.6528, "step": 12514 }, { "epoch": 0.16262671710715462, "grad_norm": 0.34685930609703064, "learning_rate": 0.0001675093255696071, "loss": 1.2501, "step": 12515 }, { "epoch": 0.1626397116510705, "grad_norm": 0.3214898407459259, "learning_rate": 0.00016750672610769572, "loss": 1.3126, "step": 12516 }, { "epoch": 0.16265270619498637, "grad_norm": 0.4461318552494049, "learning_rate": 0.00016750412664578431, "loss": 1.6137, "step": 12517 }, { "epoch": 0.16266570073890224, "grad_norm": 0.38026106357574463, "learning_rate": 0.00016750152718387297, "loss": 1.2828, "step": 12518 }, { "epoch": 0.16267869528281811, "grad_norm": 0.42775630950927734, "learning_rate": 0.00016749892772196156, "loss": 1.3382, "step": 12519 }, { "epoch": 0.162691689826734, "grad_norm": 0.31045591831207275, "learning_rate": 0.00016749632826005016, "loss": 1.2946, "step": 12520 }, { "epoch": 0.16270468437064986, "grad_norm": 0.4147631525993347, "learning_rate": 0.00016749372879813879, "loss": 1.5143, "step": 12521 }, { "epoch": 0.16271767891456573, "grad_norm": 0.4874553680419922, "learning_rate": 0.0001674911293362274, "loss": 1.5505, "step": 12522 }, { "epoch": 0.16273067345848163, "grad_norm": 0.5340845584869385, "learning_rate": 0.00016748852987431603, "loss": 1.5953, "step": 12523 }, { "epoch": 0.1627436680023975, "grad_norm": 0.4391802251338959, "learning_rate": 0.00016748593041240463, "loss": 1.2792, "step": 12524 }, { "epoch": 0.16275666254631338, "grad_norm": 0.48946189880371094, "learning_rate": 0.00016748333095049326, "loss": 1.5752, "step": 12525 }, { "epoch": 0.16276965709022925, "grad_norm": 0.4446520209312439, "learning_rate": 0.00016748073148858188, "loss": 1.3333, "step": 12526 }, { "epoch": 0.16278265163414513, "grad_norm": 0.31156834959983826, "learning_rate": 0.00016747813202667048, "loss": 1.4307, "step": 12527 }, { "epoch": 0.162795646178061, "grad_norm": 0.42377573251724243, "learning_rate": 0.0001674755325647591, "loss": 1.5881, "step": 12528 }, { "epoch": 0.16280864072197687, "grad_norm": 0.3825845420360565, "learning_rate": 0.00016747293310284773, "loss": 1.4812, "step": 12529 }, { "epoch": 0.16282163526589274, "grad_norm": 0.3442310094833374, "learning_rate": 0.00016747033364093635, "loss": 1.4224, "step": 12530 }, { "epoch": 0.16283462980980862, "grad_norm": 0.24969570338726044, "learning_rate": 0.00016746773417902495, "loss": 1.2607, "step": 12531 }, { "epoch": 0.1628476243537245, "grad_norm": 0.4235866963863373, "learning_rate": 0.00016746513471711355, "loss": 1.4143, "step": 12532 }, { "epoch": 0.16286061889764036, "grad_norm": 0.3431743085384369, "learning_rate": 0.0001674625352552022, "loss": 1.2748, "step": 12533 }, { "epoch": 0.16287361344155624, "grad_norm": 0.3926869034767151, "learning_rate": 0.0001674599357932908, "loss": 1.2887, "step": 12534 }, { "epoch": 0.1628866079854721, "grad_norm": 0.2974802553653717, "learning_rate": 0.00016745733633137942, "loss": 1.2394, "step": 12535 }, { "epoch": 0.16289960252938798, "grad_norm": 0.4136240482330322, "learning_rate": 0.00016745473686946802, "loss": 1.3044, "step": 12536 }, { "epoch": 0.16291259707330386, "grad_norm": 0.3392886519432068, "learning_rate": 0.00016745213740755664, "loss": 1.5434, "step": 12537 }, { "epoch": 0.16292559161721973, "grad_norm": 0.35140377283096313, "learning_rate": 0.00016744953794564527, "loss": 1.4271, "step": 12538 }, { "epoch": 0.1629385861611356, "grad_norm": 0.4330267012119293, "learning_rate": 0.00016744693848373386, "loss": 1.2443, "step": 12539 }, { "epoch": 0.16295158070505147, "grad_norm": 0.4170609712600708, "learning_rate": 0.0001674443390218225, "loss": 1.4508, "step": 12540 }, { "epoch": 0.16296457524896735, "grad_norm": 0.4131290316581726, "learning_rate": 0.0001674417395599111, "loss": 1.3705, "step": 12541 }, { "epoch": 0.16297756979288322, "grad_norm": 0.3768766224384308, "learning_rate": 0.00016743914009799974, "loss": 1.3165, "step": 12542 }, { "epoch": 0.1629905643367991, "grad_norm": 0.41880467534065247, "learning_rate": 0.00016743654063608833, "loss": 1.2335, "step": 12543 }, { "epoch": 0.16300355888071497, "grad_norm": 0.4210159480571747, "learning_rate": 0.00016743394117417693, "loss": 1.6584, "step": 12544 }, { "epoch": 0.16301655342463084, "grad_norm": 0.464741051197052, "learning_rate": 0.00016743134171226558, "loss": 1.4828, "step": 12545 }, { "epoch": 0.1630295479685467, "grad_norm": 0.24336490035057068, "learning_rate": 0.00016742874225035418, "loss": 1.5262, "step": 12546 }, { "epoch": 0.16304254251246258, "grad_norm": 0.406823992729187, "learning_rate": 0.0001674261427884428, "loss": 1.4437, "step": 12547 }, { "epoch": 0.16305553705637846, "grad_norm": 0.30817320942878723, "learning_rate": 0.0001674235433265314, "loss": 1.3974, "step": 12548 }, { "epoch": 0.16306853160029433, "grad_norm": 0.39377352595329285, "learning_rate": 0.00016742094386462003, "loss": 1.2797, "step": 12549 }, { "epoch": 0.1630815261442102, "grad_norm": 0.38271719217300415, "learning_rate": 0.00016741834440270865, "loss": 1.505, "step": 12550 }, { "epoch": 0.16309452068812608, "grad_norm": 0.48419830203056335, "learning_rate": 0.00016741574494079725, "loss": 1.4236, "step": 12551 }, { "epoch": 0.16310751523204195, "grad_norm": 0.37128719687461853, "learning_rate": 0.00016741314547888587, "loss": 1.3847, "step": 12552 }, { "epoch": 0.16312050977595782, "grad_norm": 0.339509516954422, "learning_rate": 0.0001674105460169745, "loss": 1.2947, "step": 12553 }, { "epoch": 0.1631335043198737, "grad_norm": 0.3616383969783783, "learning_rate": 0.00016740794655506312, "loss": 1.4312, "step": 12554 }, { "epoch": 0.16314649886378957, "grad_norm": 0.48535311222076416, "learning_rate": 0.00016740534709315172, "loss": 1.4295, "step": 12555 }, { "epoch": 0.16315949340770544, "grad_norm": 0.540007472038269, "learning_rate": 0.00016740274763124034, "loss": 1.5298, "step": 12556 }, { "epoch": 0.16317248795162131, "grad_norm": 0.36977851390838623, "learning_rate": 0.00016740014816932897, "loss": 1.3399, "step": 12557 }, { "epoch": 0.1631854824955372, "grad_norm": 0.4357253313064575, "learning_rate": 0.00016739754870741757, "loss": 1.3493, "step": 12558 }, { "epoch": 0.16319847703945306, "grad_norm": 0.3888311982154846, "learning_rate": 0.0001673949492455062, "loss": 1.414, "step": 12559 }, { "epoch": 0.16321147158336893, "grad_norm": 0.42950886487960815, "learning_rate": 0.0001673923497835948, "loss": 1.5973, "step": 12560 }, { "epoch": 0.1632244661272848, "grad_norm": 0.36851391196250916, "learning_rate": 0.0001673897503216834, "loss": 1.4451, "step": 12561 }, { "epoch": 0.16323746067120068, "grad_norm": 0.4680193066596985, "learning_rate": 0.00016738715085977204, "loss": 1.398, "step": 12562 }, { "epoch": 0.16325045521511655, "grad_norm": 0.3904152512550354, "learning_rate": 0.00016738455139786063, "loss": 1.2553, "step": 12563 }, { "epoch": 0.16326344975903243, "grad_norm": 0.4095899164676666, "learning_rate": 0.00016738195193594929, "loss": 1.5629, "step": 12564 }, { "epoch": 0.1632764443029483, "grad_norm": 0.3752861022949219, "learning_rate": 0.00016737935247403788, "loss": 1.3462, "step": 12565 }, { "epoch": 0.16328943884686417, "grad_norm": 0.46612271666526794, "learning_rate": 0.0001673767530121265, "loss": 1.4625, "step": 12566 }, { "epoch": 0.16330243339078004, "grad_norm": 0.38140806555747986, "learning_rate": 0.0001673741535502151, "loss": 1.1929, "step": 12567 }, { "epoch": 0.16331542793469592, "grad_norm": 0.3754453957080841, "learning_rate": 0.00016737155408830373, "loss": 1.5937, "step": 12568 }, { "epoch": 0.1633284224786118, "grad_norm": 0.2939801514148712, "learning_rate": 0.00016736895462639235, "loss": 1.1274, "step": 12569 }, { "epoch": 0.16334141702252766, "grad_norm": 0.3077393174171448, "learning_rate": 0.00016736635516448095, "loss": 1.3211, "step": 12570 }, { "epoch": 0.16335441156644354, "grad_norm": 0.30053406953811646, "learning_rate": 0.00016736375570256958, "loss": 1.4289, "step": 12571 }, { "epoch": 0.1633674061103594, "grad_norm": 0.4110419750213623, "learning_rate": 0.0001673611562406582, "loss": 1.2429, "step": 12572 }, { "epoch": 0.16338040065427528, "grad_norm": 0.4024268090724945, "learning_rate": 0.0001673585567787468, "loss": 1.4757, "step": 12573 }, { "epoch": 0.16339339519819115, "grad_norm": 0.42453181743621826, "learning_rate": 0.00016735595731683542, "loss": 1.5472, "step": 12574 }, { "epoch": 0.16340638974210703, "grad_norm": 0.41031795740127563, "learning_rate": 0.00016735335785492402, "loss": 1.5385, "step": 12575 }, { "epoch": 0.1634193842860229, "grad_norm": 0.6229802966117859, "learning_rate": 0.00016735075839301267, "loss": 1.5193, "step": 12576 }, { "epoch": 0.16343237882993877, "grad_norm": 0.45157402753829956, "learning_rate": 0.00016734815893110127, "loss": 1.4181, "step": 12577 }, { "epoch": 0.16344537337385465, "grad_norm": 0.32237404584884644, "learning_rate": 0.0001673455594691899, "loss": 1.4816, "step": 12578 }, { "epoch": 0.16345836791777052, "grad_norm": 0.4542950391769409, "learning_rate": 0.0001673429600072785, "loss": 1.7294, "step": 12579 }, { "epoch": 0.1634713624616864, "grad_norm": 0.462037056684494, "learning_rate": 0.00016734036054536712, "loss": 1.4178, "step": 12580 }, { "epoch": 0.16348435700560227, "grad_norm": 0.37257617712020874, "learning_rate": 0.00016733776108345574, "loss": 1.4599, "step": 12581 }, { "epoch": 0.16349735154951814, "grad_norm": 0.4069055914878845, "learning_rate": 0.00016733516162154434, "loss": 1.2239, "step": 12582 }, { "epoch": 0.163510346093434, "grad_norm": 0.435127854347229, "learning_rate": 0.00016733256215963296, "loss": 1.4567, "step": 12583 }, { "epoch": 0.16352334063734988, "grad_norm": 0.4554899036884308, "learning_rate": 0.00016732996269772159, "loss": 1.4514, "step": 12584 }, { "epoch": 0.16353633518126576, "grad_norm": 0.4384957551956177, "learning_rate": 0.0001673273632358102, "loss": 1.348, "step": 12585 }, { "epoch": 0.16354932972518163, "grad_norm": 0.42155084013938904, "learning_rate": 0.0001673247637738988, "loss": 1.545, "step": 12586 }, { "epoch": 0.1635623242690975, "grad_norm": 0.5434795618057251, "learning_rate": 0.0001673221643119874, "loss": 1.4157, "step": 12587 }, { "epoch": 0.16357531881301338, "grad_norm": 0.46022364497184753, "learning_rate": 0.00016731956485007606, "loss": 1.4784, "step": 12588 }, { "epoch": 0.16358831335692925, "grad_norm": 0.4144461452960968, "learning_rate": 0.00016731696538816465, "loss": 1.6507, "step": 12589 }, { "epoch": 0.16360130790084512, "grad_norm": 0.4117562472820282, "learning_rate": 0.00016731436592625328, "loss": 1.4959, "step": 12590 }, { "epoch": 0.163614302444761, "grad_norm": 0.30628302693367004, "learning_rate": 0.00016731176646434188, "loss": 1.2468, "step": 12591 }, { "epoch": 0.16362729698867687, "grad_norm": 0.4234026372432709, "learning_rate": 0.0001673091670024305, "loss": 1.4373, "step": 12592 }, { "epoch": 0.16364029153259274, "grad_norm": 0.43162137269973755, "learning_rate": 0.00016730656754051912, "loss": 1.4762, "step": 12593 }, { "epoch": 0.16365328607650861, "grad_norm": 0.39841943979263306, "learning_rate": 0.00016730396807860772, "loss": 1.3299, "step": 12594 }, { "epoch": 0.1636662806204245, "grad_norm": 0.4639006555080414, "learning_rate": 0.00016730136861669635, "loss": 1.4119, "step": 12595 }, { "epoch": 0.16367927516434036, "grad_norm": 0.34936848282814026, "learning_rate": 0.00016729876915478497, "loss": 1.4184, "step": 12596 }, { "epoch": 0.16369226970825623, "grad_norm": 0.43725940585136414, "learning_rate": 0.0001672961696928736, "loss": 1.4743, "step": 12597 }, { "epoch": 0.1637052642521721, "grad_norm": 0.43568554520606995, "learning_rate": 0.0001672935702309622, "loss": 1.4848, "step": 12598 }, { "epoch": 0.16371825879608798, "grad_norm": 0.31008097529411316, "learning_rate": 0.00016729097076905082, "loss": 1.1339, "step": 12599 }, { "epoch": 0.16373125334000388, "grad_norm": 0.4076475203037262, "learning_rate": 0.00016728837130713944, "loss": 1.5483, "step": 12600 }, { "epoch": 0.16374424788391975, "grad_norm": 0.3901926279067993, "learning_rate": 0.00016728577184522804, "loss": 1.4576, "step": 12601 }, { "epoch": 0.16375724242783563, "grad_norm": 0.41686126589775085, "learning_rate": 0.00016728317238331666, "loss": 1.2471, "step": 12602 }, { "epoch": 0.1637702369717515, "grad_norm": 0.36490100622177124, "learning_rate": 0.0001672805729214053, "loss": 1.3633, "step": 12603 }, { "epoch": 0.16378323151566737, "grad_norm": 0.4355268180370331, "learning_rate": 0.00016727797345949389, "loss": 1.5152, "step": 12604 }, { "epoch": 0.16379622605958324, "grad_norm": 0.49196478724479675, "learning_rate": 0.0001672753739975825, "loss": 1.3193, "step": 12605 }, { "epoch": 0.16380922060349912, "grad_norm": 0.4712654948234558, "learning_rate": 0.0001672727745356711, "loss": 1.4477, "step": 12606 }, { "epoch": 0.163822215147415, "grad_norm": 0.4347473680973053, "learning_rate": 0.00016727017507375976, "loss": 1.4417, "step": 12607 }, { "epoch": 0.16383520969133086, "grad_norm": 0.41889631748199463, "learning_rate": 0.00016726757561184836, "loss": 1.5078, "step": 12608 }, { "epoch": 0.16384820423524674, "grad_norm": 0.3379688858985901, "learning_rate": 0.00016726497614993698, "loss": 1.4814, "step": 12609 }, { "epoch": 0.1638611987791626, "grad_norm": 0.28536832332611084, "learning_rate": 0.00016726237668802558, "loss": 1.193, "step": 12610 }, { "epoch": 0.16387419332307848, "grad_norm": 0.4006774127483368, "learning_rate": 0.0001672597772261142, "loss": 1.5469, "step": 12611 }, { "epoch": 0.16388718786699435, "grad_norm": 0.4076038897037506, "learning_rate": 0.00016725717776420283, "loss": 1.3567, "step": 12612 }, { "epoch": 0.16390018241091023, "grad_norm": 0.2930568754673004, "learning_rate": 0.00016725457830229142, "loss": 1.3811, "step": 12613 }, { "epoch": 0.1639131769548261, "grad_norm": 0.35494542121887207, "learning_rate": 0.00016725197884038005, "loss": 1.4878, "step": 12614 }, { "epoch": 0.16392617149874197, "grad_norm": 0.33522146940231323, "learning_rate": 0.00016724937937846867, "loss": 1.4318, "step": 12615 }, { "epoch": 0.16393916604265785, "grad_norm": 0.3044874370098114, "learning_rate": 0.00016724677991655727, "loss": 1.3189, "step": 12616 }, { "epoch": 0.16395216058657372, "grad_norm": 0.3664427101612091, "learning_rate": 0.0001672441804546459, "loss": 1.4674, "step": 12617 }, { "epoch": 0.1639651551304896, "grad_norm": 0.3620952367782593, "learning_rate": 0.0001672415809927345, "loss": 1.4301, "step": 12618 }, { "epoch": 0.16397814967440547, "grad_norm": 0.3472168445587158, "learning_rate": 0.00016723898153082314, "loss": 1.2693, "step": 12619 }, { "epoch": 0.16399114421832134, "grad_norm": 0.402240514755249, "learning_rate": 0.00016723638206891174, "loss": 1.4768, "step": 12620 }, { "epoch": 0.1640041387622372, "grad_norm": 0.38466158509254456, "learning_rate": 0.00016723378260700037, "loss": 1.4592, "step": 12621 }, { "epoch": 0.16401713330615308, "grad_norm": 0.49159517884254456, "learning_rate": 0.00016723118314508896, "loss": 1.5502, "step": 12622 }, { "epoch": 0.16403012785006896, "grad_norm": 0.43623292446136475, "learning_rate": 0.0001672285836831776, "loss": 1.2735, "step": 12623 }, { "epoch": 0.16404312239398483, "grad_norm": 0.35589292645454407, "learning_rate": 0.0001672259842212662, "loss": 1.2686, "step": 12624 }, { "epoch": 0.1640561169379007, "grad_norm": 0.43091779947280884, "learning_rate": 0.0001672233847593548, "loss": 1.264, "step": 12625 }, { "epoch": 0.16406911148181658, "grad_norm": 0.4590790569782257, "learning_rate": 0.00016722078529744343, "loss": 1.4688, "step": 12626 }, { "epoch": 0.16408210602573245, "grad_norm": 0.31129980087280273, "learning_rate": 0.00016721818583553206, "loss": 1.3618, "step": 12627 }, { "epoch": 0.16409510056964832, "grad_norm": 0.4001850485801697, "learning_rate": 0.00016721558637362066, "loss": 1.4362, "step": 12628 }, { "epoch": 0.1641080951135642, "grad_norm": 0.4880580008029938, "learning_rate": 0.00016721298691170928, "loss": 1.6125, "step": 12629 }, { "epoch": 0.16412108965748007, "grad_norm": 0.35294270515441895, "learning_rate": 0.00016721038744979788, "loss": 1.4813, "step": 12630 }, { "epoch": 0.16413408420139594, "grad_norm": 0.3448569178581238, "learning_rate": 0.00016720778798788653, "loss": 1.3985, "step": 12631 }, { "epoch": 0.16414707874531181, "grad_norm": 0.32524916529655457, "learning_rate": 0.00016720518852597513, "loss": 1.2129, "step": 12632 }, { "epoch": 0.1641600732892277, "grad_norm": 0.45836636424064636, "learning_rate": 0.00016720258906406375, "loss": 1.5342, "step": 12633 }, { "epoch": 0.16417306783314356, "grad_norm": 0.4628974199295044, "learning_rate": 0.00016719998960215235, "loss": 1.435, "step": 12634 }, { "epoch": 0.16418606237705943, "grad_norm": 0.30716392397880554, "learning_rate": 0.00016719739014024097, "loss": 1.1494, "step": 12635 }, { "epoch": 0.1641990569209753, "grad_norm": 0.39553263783454895, "learning_rate": 0.0001671947906783296, "loss": 1.5279, "step": 12636 }, { "epoch": 0.16421205146489118, "grad_norm": 0.3989872932434082, "learning_rate": 0.0001671921912164182, "loss": 1.551, "step": 12637 }, { "epoch": 0.16422504600880705, "grad_norm": 0.32105305790901184, "learning_rate": 0.00016718959175450685, "loss": 1.4091, "step": 12638 }, { "epoch": 0.16423804055272292, "grad_norm": 0.45716384053230286, "learning_rate": 0.00016718699229259544, "loss": 1.4811, "step": 12639 }, { "epoch": 0.1642510350966388, "grad_norm": 0.4184134006500244, "learning_rate": 0.00016718439283068407, "loss": 1.3924, "step": 12640 }, { "epoch": 0.16426402964055467, "grad_norm": 0.44609779119491577, "learning_rate": 0.00016718179336877267, "loss": 1.334, "step": 12641 }, { "epoch": 0.16427702418447054, "grad_norm": 0.37901467084884644, "learning_rate": 0.0001671791939068613, "loss": 1.4078, "step": 12642 }, { "epoch": 0.16429001872838642, "grad_norm": 0.3853563964366913, "learning_rate": 0.00016717659444494992, "loss": 1.5297, "step": 12643 }, { "epoch": 0.1643030132723023, "grad_norm": 0.4908764958381653, "learning_rate": 0.0001671739949830385, "loss": 1.505, "step": 12644 }, { "epoch": 0.16431600781621816, "grad_norm": 0.32138046622276306, "learning_rate": 0.00016717139552112714, "loss": 1.3888, "step": 12645 }, { "epoch": 0.16432900236013404, "grad_norm": 0.37270838022232056, "learning_rate": 0.00016716879605921576, "loss": 1.4012, "step": 12646 }, { "epoch": 0.1643419969040499, "grad_norm": 0.5202423334121704, "learning_rate": 0.00016716619659730436, "loss": 1.2435, "step": 12647 }, { "epoch": 0.16435499144796578, "grad_norm": 0.41264837980270386, "learning_rate": 0.00016716359713539298, "loss": 1.701, "step": 12648 }, { "epoch": 0.16436798599188165, "grad_norm": 0.338055819272995, "learning_rate": 0.00016716099767348158, "loss": 1.4998, "step": 12649 }, { "epoch": 0.16438098053579753, "grad_norm": 0.32998618483543396, "learning_rate": 0.00016715839821157023, "loss": 1.5474, "step": 12650 }, { "epoch": 0.1643939750797134, "grad_norm": 0.39601635932922363, "learning_rate": 0.00016715579874965883, "loss": 1.6777, "step": 12651 }, { "epoch": 0.16440696962362927, "grad_norm": 0.39117392897605896, "learning_rate": 0.00016715319928774745, "loss": 1.3039, "step": 12652 }, { "epoch": 0.16441996416754515, "grad_norm": 0.41321861743927, "learning_rate": 0.00016715059982583605, "loss": 1.2456, "step": 12653 }, { "epoch": 0.16443295871146102, "grad_norm": 0.36340978741645813, "learning_rate": 0.00016714800036392468, "loss": 1.6684, "step": 12654 }, { "epoch": 0.1644459532553769, "grad_norm": 0.4573553800582886, "learning_rate": 0.0001671454009020133, "loss": 1.4145, "step": 12655 }, { "epoch": 0.16445894779929277, "grad_norm": 0.35159313678741455, "learning_rate": 0.0001671428014401019, "loss": 1.2721, "step": 12656 }, { "epoch": 0.16447194234320864, "grad_norm": 0.3576256334781647, "learning_rate": 0.00016714020197819052, "loss": 1.3786, "step": 12657 }, { "epoch": 0.1644849368871245, "grad_norm": 0.4518357813358307, "learning_rate": 0.00016713760251627915, "loss": 1.3445, "step": 12658 }, { "epoch": 0.16449793143104038, "grad_norm": 0.3353070318698883, "learning_rate": 0.00016713500305436774, "loss": 1.4189, "step": 12659 }, { "epoch": 0.16451092597495626, "grad_norm": 0.5224003195762634, "learning_rate": 0.00016713240359245637, "loss": 1.3229, "step": 12660 }, { "epoch": 0.16452392051887213, "grad_norm": 0.45401838421821594, "learning_rate": 0.00016712980413054497, "loss": 1.6328, "step": 12661 }, { "epoch": 0.164536915062788, "grad_norm": 0.48674678802490234, "learning_rate": 0.00016712720466863362, "loss": 1.5087, "step": 12662 }, { "epoch": 0.16454990960670388, "grad_norm": 0.4273641109466553, "learning_rate": 0.00016712460520672222, "loss": 1.3423, "step": 12663 }, { "epoch": 0.16456290415061975, "grad_norm": 0.3729701638221741, "learning_rate": 0.00016712200574481084, "loss": 1.2581, "step": 12664 }, { "epoch": 0.16457589869453562, "grad_norm": 0.36620792746543884, "learning_rate": 0.00016711940628289944, "loss": 1.5081, "step": 12665 }, { "epoch": 0.1645888932384515, "grad_norm": 0.38684314489364624, "learning_rate": 0.00016711680682098806, "loss": 1.5505, "step": 12666 }, { "epoch": 0.16460188778236737, "grad_norm": 0.43507975339889526, "learning_rate": 0.00016711420735907669, "loss": 1.4525, "step": 12667 }, { "epoch": 0.16461488232628324, "grad_norm": 0.44767484068870544, "learning_rate": 0.00016711160789716528, "loss": 1.4059, "step": 12668 }, { "epoch": 0.1646278768701991, "grad_norm": 0.43577510118484497, "learning_rate": 0.0001671090084352539, "loss": 1.5334, "step": 12669 }, { "epoch": 0.164640871414115, "grad_norm": 0.4192003905773163, "learning_rate": 0.00016710640897334253, "loss": 1.4228, "step": 12670 }, { "epoch": 0.16465386595803086, "grad_norm": 0.3818548321723938, "learning_rate": 0.00016710380951143113, "loss": 1.4217, "step": 12671 }, { "epoch": 0.16466686050194673, "grad_norm": 0.45438137650489807, "learning_rate": 0.00016710121004951975, "loss": 1.4976, "step": 12672 }, { "epoch": 0.1646798550458626, "grad_norm": 0.4225194752216339, "learning_rate": 0.00016709861058760838, "loss": 1.4947, "step": 12673 }, { "epoch": 0.16469284958977848, "grad_norm": 0.299696147441864, "learning_rate": 0.000167096011125697, "loss": 1.2598, "step": 12674 }, { "epoch": 0.16470584413369435, "grad_norm": 0.41225069761276245, "learning_rate": 0.0001670934116637856, "loss": 1.4895, "step": 12675 }, { "epoch": 0.16471883867761025, "grad_norm": 0.38099080324172974, "learning_rate": 0.00016709081220187423, "loss": 1.4745, "step": 12676 }, { "epoch": 0.16473183322152612, "grad_norm": 0.28796982765197754, "learning_rate": 0.00016708821273996285, "loss": 1.2963, "step": 12677 }, { "epoch": 0.164744827765442, "grad_norm": 0.35307663679122925, "learning_rate": 0.00016708561327805145, "loss": 1.3488, "step": 12678 }, { "epoch": 0.16475782230935787, "grad_norm": 0.39195382595062256, "learning_rate": 0.00016708301381614007, "loss": 1.226, "step": 12679 }, { "epoch": 0.16477081685327374, "grad_norm": 0.38366764783859253, "learning_rate": 0.00016708041435422867, "loss": 1.2977, "step": 12680 }, { "epoch": 0.16478381139718962, "grad_norm": 0.4170806109905243, "learning_rate": 0.00016707781489231732, "loss": 1.6368, "step": 12681 }, { "epoch": 0.1647968059411055, "grad_norm": 0.41293367743492126, "learning_rate": 0.00016707521543040592, "loss": 1.2814, "step": 12682 }, { "epoch": 0.16480980048502136, "grad_norm": 0.48112955689430237, "learning_rate": 0.00016707261596849452, "loss": 1.3622, "step": 12683 }, { "epoch": 0.16482279502893724, "grad_norm": 0.3877650201320648, "learning_rate": 0.00016707001650658314, "loss": 1.5362, "step": 12684 }, { "epoch": 0.1648357895728531, "grad_norm": 0.4014728367328644, "learning_rate": 0.00016706741704467176, "loss": 1.56, "step": 12685 }, { "epoch": 0.16484878411676898, "grad_norm": 0.49827906489372253, "learning_rate": 0.0001670648175827604, "loss": 1.5652, "step": 12686 }, { "epoch": 0.16486177866068485, "grad_norm": 0.4770209491252899, "learning_rate": 0.00016706221812084899, "loss": 1.506, "step": 12687 }, { "epoch": 0.16487477320460073, "grad_norm": 0.39580288529396057, "learning_rate": 0.0001670596186589376, "loss": 1.2318, "step": 12688 }, { "epoch": 0.1648877677485166, "grad_norm": 0.34847956895828247, "learning_rate": 0.00016705701919702624, "loss": 1.4048, "step": 12689 }, { "epoch": 0.16490076229243247, "grad_norm": 0.3450503349304199, "learning_rate": 0.00016705441973511483, "loss": 1.2685, "step": 12690 }, { "epoch": 0.16491375683634835, "grad_norm": 0.38976818323135376, "learning_rate": 0.00016705182027320346, "loss": 1.5662, "step": 12691 }, { "epoch": 0.16492675138026422, "grad_norm": 0.4271141290664673, "learning_rate": 0.00016704922081129205, "loss": 1.5289, "step": 12692 }, { "epoch": 0.1649397459241801, "grad_norm": 0.39912524819374084, "learning_rate": 0.0001670466213493807, "loss": 1.3907, "step": 12693 }, { "epoch": 0.16495274046809597, "grad_norm": 0.4122074246406555, "learning_rate": 0.0001670440218874693, "loss": 1.4457, "step": 12694 }, { "epoch": 0.16496573501201184, "grad_norm": 0.36283794045448303, "learning_rate": 0.0001670414224255579, "loss": 1.4269, "step": 12695 }, { "epoch": 0.1649787295559277, "grad_norm": 0.34611138701438904, "learning_rate": 0.00016703882296364653, "loss": 1.2887, "step": 12696 }, { "epoch": 0.16499172409984358, "grad_norm": 0.4332440197467804, "learning_rate": 0.00016703622350173515, "loss": 1.5546, "step": 12697 }, { "epoch": 0.16500471864375946, "grad_norm": 0.4708455502986908, "learning_rate": 0.00016703362403982377, "loss": 1.4198, "step": 12698 }, { "epoch": 0.16501771318767533, "grad_norm": 0.404793918132782, "learning_rate": 0.00016703102457791237, "loss": 1.342, "step": 12699 }, { "epoch": 0.1650307077315912, "grad_norm": 0.4312038719654083, "learning_rate": 0.000167028425116001, "loss": 1.3826, "step": 12700 }, { "epoch": 0.16504370227550708, "grad_norm": 0.35647881031036377, "learning_rate": 0.00016702582565408962, "loss": 1.2897, "step": 12701 }, { "epoch": 0.16505669681942295, "grad_norm": 0.3905855417251587, "learning_rate": 0.00016702322619217822, "loss": 1.3434, "step": 12702 }, { "epoch": 0.16506969136333882, "grad_norm": 0.33579957485198975, "learning_rate": 0.00016702062673026684, "loss": 1.4348, "step": 12703 }, { "epoch": 0.1650826859072547, "grad_norm": 0.3677779734134674, "learning_rate": 0.00016701802726835544, "loss": 1.4049, "step": 12704 }, { "epoch": 0.16509568045117057, "grad_norm": 0.3726382255554199, "learning_rate": 0.0001670154278064441, "loss": 1.4293, "step": 12705 }, { "epoch": 0.16510867499508644, "grad_norm": 0.5283243656158447, "learning_rate": 0.0001670128283445327, "loss": 1.4101, "step": 12706 }, { "epoch": 0.1651216695390023, "grad_norm": 0.27916380763053894, "learning_rate": 0.0001670102288826213, "loss": 1.3427, "step": 12707 }, { "epoch": 0.1651346640829182, "grad_norm": 0.41635891795158386, "learning_rate": 0.0001670076294207099, "loss": 1.3672, "step": 12708 }, { "epoch": 0.16514765862683406, "grad_norm": 0.3994525671005249, "learning_rate": 0.00016700502995879854, "loss": 1.5141, "step": 12709 }, { "epoch": 0.16516065317074993, "grad_norm": 0.3423674404621124, "learning_rate": 0.00016700243049688716, "loss": 1.4823, "step": 12710 }, { "epoch": 0.1651736477146658, "grad_norm": 0.3354313373565674, "learning_rate": 0.00016699983103497576, "loss": 1.3609, "step": 12711 }, { "epoch": 0.16518664225858168, "grad_norm": 0.3780425786972046, "learning_rate": 0.00016699723157306438, "loss": 1.3445, "step": 12712 }, { "epoch": 0.16519963680249755, "grad_norm": 0.42627015709877014, "learning_rate": 0.000166994632111153, "loss": 1.4527, "step": 12713 }, { "epoch": 0.16521263134641342, "grad_norm": 0.33906233310699463, "learning_rate": 0.0001669920326492416, "loss": 1.7223, "step": 12714 }, { "epoch": 0.1652256258903293, "grad_norm": 0.43348586559295654, "learning_rate": 0.00016698943318733023, "loss": 1.2718, "step": 12715 }, { "epoch": 0.16523862043424517, "grad_norm": 0.38032829761505127, "learning_rate": 0.00016698683372541885, "loss": 1.4151, "step": 12716 }, { "epoch": 0.16525161497816104, "grad_norm": 0.43958452343940735, "learning_rate": 0.00016698423426350748, "loss": 1.5789, "step": 12717 }, { "epoch": 0.16526460952207692, "grad_norm": 0.4106976091861725, "learning_rate": 0.00016698163480159607, "loss": 1.3721, "step": 12718 }, { "epoch": 0.1652776040659928, "grad_norm": 0.3732120990753174, "learning_rate": 0.0001669790353396847, "loss": 1.4823, "step": 12719 }, { "epoch": 0.16529059860990866, "grad_norm": 0.3490251898765564, "learning_rate": 0.00016697643587777332, "loss": 1.5651, "step": 12720 }, { "epoch": 0.16530359315382454, "grad_norm": 0.31975293159484863, "learning_rate": 0.00016697383641586192, "loss": 1.4319, "step": 12721 }, { "epoch": 0.1653165876977404, "grad_norm": 0.29429569840431213, "learning_rate": 0.00016697123695395054, "loss": 1.4278, "step": 12722 }, { "epoch": 0.16532958224165628, "grad_norm": 0.40375661849975586, "learning_rate": 0.00016696863749203914, "loss": 1.5753, "step": 12723 }, { "epoch": 0.16534257678557215, "grad_norm": 0.3255603313446045, "learning_rate": 0.0001669660380301278, "loss": 1.3613, "step": 12724 }, { "epoch": 0.16535557132948803, "grad_norm": 0.6354274153709412, "learning_rate": 0.0001669634385682164, "loss": 1.4492, "step": 12725 }, { "epoch": 0.1653685658734039, "grad_norm": 0.35847610235214233, "learning_rate": 0.000166960839106305, "loss": 1.3535, "step": 12726 }, { "epoch": 0.16538156041731977, "grad_norm": 0.2997526228427887, "learning_rate": 0.0001669582396443936, "loss": 1.5066, "step": 12727 }, { "epoch": 0.16539455496123565, "grad_norm": 0.25073716044425964, "learning_rate": 0.00016695564018248224, "loss": 1.4377, "step": 12728 }, { "epoch": 0.16540754950515152, "grad_norm": 0.4339577853679657, "learning_rate": 0.00016695304072057086, "loss": 1.4269, "step": 12729 }, { "epoch": 0.1654205440490674, "grad_norm": 0.3620936870574951, "learning_rate": 0.00016695044125865946, "loss": 1.4394, "step": 12730 }, { "epoch": 0.16543353859298326, "grad_norm": 0.3854120969772339, "learning_rate": 0.00016694784179674808, "loss": 1.5163, "step": 12731 }, { "epoch": 0.16544653313689914, "grad_norm": 0.3299930989742279, "learning_rate": 0.0001669452423348367, "loss": 1.5508, "step": 12732 }, { "epoch": 0.165459527680815, "grad_norm": 0.39410945773124695, "learning_rate": 0.0001669426428729253, "loss": 1.4698, "step": 12733 }, { "epoch": 0.16547252222473088, "grad_norm": 0.41791895031929016, "learning_rate": 0.00016694004341101393, "loss": 1.3089, "step": 12734 }, { "epoch": 0.16548551676864676, "grad_norm": 0.3998549282550812, "learning_rate": 0.00016693744394910253, "loss": 1.352, "step": 12735 }, { "epoch": 0.16549851131256263, "grad_norm": 0.43996545672416687, "learning_rate": 0.00016693484448719118, "loss": 1.1485, "step": 12736 }, { "epoch": 0.1655115058564785, "grad_norm": 0.4087182879447937, "learning_rate": 0.00016693224502527978, "loss": 1.4929, "step": 12737 }, { "epoch": 0.16552450040039438, "grad_norm": 0.38097959756851196, "learning_rate": 0.00016692964556336837, "loss": 1.4011, "step": 12738 }, { "epoch": 0.16553749494431025, "grad_norm": 0.2987470328807831, "learning_rate": 0.000166927046101457, "loss": 1.436, "step": 12739 }, { "epoch": 0.16555048948822612, "grad_norm": 0.3647497296333313, "learning_rate": 0.00016692444663954562, "loss": 1.4595, "step": 12740 }, { "epoch": 0.165563484032142, "grad_norm": 0.4313162565231323, "learning_rate": 0.00016692184717763425, "loss": 1.4956, "step": 12741 }, { "epoch": 0.16557647857605787, "grad_norm": 0.40411344170570374, "learning_rate": 0.00016691924771572284, "loss": 1.4129, "step": 12742 }, { "epoch": 0.16558947311997374, "grad_norm": 0.38567715883255005, "learning_rate": 0.00016691664825381147, "loss": 1.5529, "step": 12743 }, { "epoch": 0.1656024676638896, "grad_norm": 0.36858832836151123, "learning_rate": 0.0001669140487919001, "loss": 1.4827, "step": 12744 }, { "epoch": 0.16561546220780549, "grad_norm": 0.5153440237045288, "learning_rate": 0.0001669114493299887, "loss": 1.4709, "step": 12745 }, { "epoch": 0.16562845675172136, "grad_norm": 0.36753207445144653, "learning_rate": 0.00016690884986807732, "loss": 1.4611, "step": 12746 }, { "epoch": 0.16564145129563723, "grad_norm": 0.3692333996295929, "learning_rate": 0.00016690625040616594, "loss": 1.4302, "step": 12747 }, { "epoch": 0.1656544458395531, "grad_norm": 0.42418134212493896, "learning_rate": 0.00016690365094425456, "loss": 1.5146, "step": 12748 }, { "epoch": 0.16566744038346898, "grad_norm": 0.5124931335449219, "learning_rate": 0.00016690105148234316, "loss": 1.7057, "step": 12749 }, { "epoch": 0.16568043492738485, "grad_norm": 0.4228874146938324, "learning_rate": 0.00016689845202043176, "loss": 1.4672, "step": 12750 }, { "epoch": 0.16569342947130072, "grad_norm": 0.4007064402103424, "learning_rate": 0.0001668958525585204, "loss": 1.3748, "step": 12751 }, { "epoch": 0.16570642401521662, "grad_norm": 0.42129477858543396, "learning_rate": 0.000166893253096609, "loss": 1.511, "step": 12752 }, { "epoch": 0.1657194185591325, "grad_norm": 0.5369883179664612, "learning_rate": 0.00016689065363469763, "loss": 1.55, "step": 12753 }, { "epoch": 0.16573241310304837, "grad_norm": 0.37164899706840515, "learning_rate": 0.00016688805417278623, "loss": 1.429, "step": 12754 }, { "epoch": 0.16574540764696424, "grad_norm": 0.35931336879730225, "learning_rate": 0.00016688545471087485, "loss": 1.2623, "step": 12755 }, { "epoch": 0.16575840219088012, "grad_norm": 0.3817209005355835, "learning_rate": 0.00016688285524896348, "loss": 1.0998, "step": 12756 }, { "epoch": 0.165771396734796, "grad_norm": 0.3386791944503784, "learning_rate": 0.00016688025578705208, "loss": 1.4165, "step": 12757 }, { "epoch": 0.16578439127871186, "grad_norm": 0.4538019895553589, "learning_rate": 0.0001668776563251407, "loss": 1.5655, "step": 12758 }, { "epoch": 0.16579738582262774, "grad_norm": 0.45190227031707764, "learning_rate": 0.00016687505686322933, "loss": 1.4953, "step": 12759 }, { "epoch": 0.1658103803665436, "grad_norm": 0.3689805865287781, "learning_rate": 0.00016687245740131795, "loss": 1.5334, "step": 12760 }, { "epoch": 0.16582337491045948, "grad_norm": 0.5716325044631958, "learning_rate": 0.00016686985793940655, "loss": 1.4974, "step": 12761 }, { "epoch": 0.16583636945437535, "grad_norm": 0.3748626708984375, "learning_rate": 0.00016686725847749517, "loss": 1.4128, "step": 12762 }, { "epoch": 0.16584936399829123, "grad_norm": 0.4868242144584656, "learning_rate": 0.0001668646590155838, "loss": 1.5548, "step": 12763 }, { "epoch": 0.1658623585422071, "grad_norm": 0.31340986490249634, "learning_rate": 0.0001668620595536724, "loss": 1.3049, "step": 12764 }, { "epoch": 0.16587535308612297, "grad_norm": 0.3670234680175781, "learning_rate": 0.00016685946009176102, "loss": 1.5032, "step": 12765 }, { "epoch": 0.16588834763003885, "grad_norm": 0.34331753849983215, "learning_rate": 0.00016685686062984962, "loss": 1.3376, "step": 12766 }, { "epoch": 0.16590134217395472, "grad_norm": 0.2816208600997925, "learning_rate": 0.00016685426116793824, "loss": 1.3386, "step": 12767 }, { "epoch": 0.1659143367178706, "grad_norm": 0.43149030208587646, "learning_rate": 0.00016685166170602686, "loss": 1.5037, "step": 12768 }, { "epoch": 0.16592733126178646, "grad_norm": 0.4204499125480652, "learning_rate": 0.00016684906224411546, "loss": 1.3985, "step": 12769 }, { "epoch": 0.16594032580570234, "grad_norm": 0.4354959726333618, "learning_rate": 0.0001668464627822041, "loss": 1.3395, "step": 12770 }, { "epoch": 0.1659533203496182, "grad_norm": 0.38286638259887695, "learning_rate": 0.0001668438633202927, "loss": 1.3766, "step": 12771 }, { "epoch": 0.16596631489353408, "grad_norm": 0.31627151370048523, "learning_rate": 0.00016684126385838134, "loss": 1.3678, "step": 12772 }, { "epoch": 0.16597930943744996, "grad_norm": 0.4957989454269409, "learning_rate": 0.00016683866439646993, "loss": 1.5672, "step": 12773 }, { "epoch": 0.16599230398136583, "grad_norm": 0.3770240843296051, "learning_rate": 0.00016683606493455856, "loss": 1.3653, "step": 12774 }, { "epoch": 0.1660052985252817, "grad_norm": 0.3548699617385864, "learning_rate": 0.00016683346547264718, "loss": 1.2602, "step": 12775 }, { "epoch": 0.16601829306919758, "grad_norm": 0.4624626934528351, "learning_rate": 0.00016683086601073578, "loss": 1.3246, "step": 12776 }, { "epoch": 0.16603128761311345, "grad_norm": 0.5028133392333984, "learning_rate": 0.0001668282665488244, "loss": 1.4082, "step": 12777 }, { "epoch": 0.16604428215702932, "grad_norm": 0.4477551579475403, "learning_rate": 0.000166825667086913, "loss": 1.6132, "step": 12778 }, { "epoch": 0.1660572767009452, "grad_norm": 0.3564830422401428, "learning_rate": 0.00016682306762500163, "loss": 1.2895, "step": 12779 }, { "epoch": 0.16607027124486107, "grad_norm": 0.37649983167648315, "learning_rate": 0.00016682046816309025, "loss": 1.6371, "step": 12780 }, { "epoch": 0.16608326578877694, "grad_norm": 0.3895941972732544, "learning_rate": 0.00016681786870117885, "loss": 1.3311, "step": 12781 }, { "epoch": 0.1660962603326928, "grad_norm": 0.3195134997367859, "learning_rate": 0.00016681526923926747, "loss": 1.3647, "step": 12782 }, { "epoch": 0.1661092548766087, "grad_norm": 0.42182788252830505, "learning_rate": 0.0001668126697773561, "loss": 1.4729, "step": 12783 }, { "epoch": 0.16612224942052456, "grad_norm": 0.4348873198032379, "learning_rate": 0.00016681007031544472, "loss": 1.3943, "step": 12784 }, { "epoch": 0.16613524396444043, "grad_norm": 0.3574744760990143, "learning_rate": 0.00016680747085353332, "loss": 1.2052, "step": 12785 }, { "epoch": 0.1661482385083563, "grad_norm": 0.4844158887863159, "learning_rate": 0.00016680487139162194, "loss": 1.57, "step": 12786 }, { "epoch": 0.16616123305227218, "grad_norm": 0.5127004384994507, "learning_rate": 0.00016680227192971057, "loss": 1.6192, "step": 12787 }, { "epoch": 0.16617422759618805, "grad_norm": 0.43348947167396545, "learning_rate": 0.00016679967246779916, "loss": 1.6037, "step": 12788 }, { "epoch": 0.16618722214010392, "grad_norm": 0.3308321237564087, "learning_rate": 0.0001667970730058878, "loss": 1.5887, "step": 12789 }, { "epoch": 0.1662002166840198, "grad_norm": 0.3559459447860718, "learning_rate": 0.00016679447354397641, "loss": 1.3745, "step": 12790 }, { "epoch": 0.16621321122793567, "grad_norm": 0.3071127235889435, "learning_rate": 0.00016679187408206504, "loss": 1.4145, "step": 12791 }, { "epoch": 0.16622620577185154, "grad_norm": 0.3767787218093872, "learning_rate": 0.00016678927462015364, "loss": 1.5357, "step": 12792 }, { "epoch": 0.16623920031576742, "grad_norm": 0.38860946893692017, "learning_rate": 0.00016678667515824223, "loss": 1.5691, "step": 12793 }, { "epoch": 0.1662521948596833, "grad_norm": 0.39276090264320374, "learning_rate": 0.00016678407569633088, "loss": 1.551, "step": 12794 }, { "epoch": 0.16626518940359916, "grad_norm": 0.34387215971946716, "learning_rate": 0.00016678147623441948, "loss": 1.3966, "step": 12795 }, { "epoch": 0.16627818394751503, "grad_norm": 0.24353717267513275, "learning_rate": 0.0001667788767725081, "loss": 1.1984, "step": 12796 }, { "epoch": 0.1662911784914309, "grad_norm": 0.374902606010437, "learning_rate": 0.0001667762773105967, "loss": 1.6019, "step": 12797 }, { "epoch": 0.16630417303534678, "grad_norm": 0.3707432746887207, "learning_rate": 0.00016677367784868533, "loss": 1.4587, "step": 12798 }, { "epoch": 0.16631716757926265, "grad_norm": 0.42449063062667847, "learning_rate": 0.00016677107838677395, "loss": 1.6654, "step": 12799 }, { "epoch": 0.16633016212317853, "grad_norm": 0.37490546703338623, "learning_rate": 0.00016676847892486255, "loss": 1.3744, "step": 12800 }, { "epoch": 0.1663431566670944, "grad_norm": 0.4316680133342743, "learning_rate": 0.00016676587946295117, "loss": 1.4633, "step": 12801 }, { "epoch": 0.16635615121101027, "grad_norm": 0.42527732253074646, "learning_rate": 0.0001667632800010398, "loss": 1.4212, "step": 12802 }, { "epoch": 0.16636914575492615, "grad_norm": 0.34765082597732544, "learning_rate": 0.00016676068053912842, "loss": 1.4254, "step": 12803 }, { "epoch": 0.16638214029884202, "grad_norm": 0.44545778632164, "learning_rate": 0.00016675808107721702, "loss": 1.5502, "step": 12804 }, { "epoch": 0.1663951348427579, "grad_norm": 0.42165297269821167, "learning_rate": 0.00016675548161530562, "loss": 1.4112, "step": 12805 }, { "epoch": 0.16640812938667376, "grad_norm": 0.42091259360313416, "learning_rate": 0.00016675288215339427, "loss": 1.4712, "step": 12806 }, { "epoch": 0.16642112393058964, "grad_norm": 0.4956926703453064, "learning_rate": 0.00016675028269148287, "loss": 1.3942, "step": 12807 }, { "epoch": 0.1664341184745055, "grad_norm": 0.4420771598815918, "learning_rate": 0.0001667476832295715, "loss": 1.4173, "step": 12808 }, { "epoch": 0.16644711301842138, "grad_norm": 0.21815244853496552, "learning_rate": 0.0001667450837676601, "loss": 1.2988, "step": 12809 }, { "epoch": 0.16646010756233726, "grad_norm": 0.3586496412754059, "learning_rate": 0.0001667424843057487, "loss": 1.4763, "step": 12810 }, { "epoch": 0.16647310210625313, "grad_norm": 0.3256484270095825, "learning_rate": 0.00016673988484383734, "loss": 1.461, "step": 12811 }, { "epoch": 0.166486096650169, "grad_norm": 0.36909791827201843, "learning_rate": 0.00016673728538192594, "loss": 1.4461, "step": 12812 }, { "epoch": 0.16649909119408488, "grad_norm": 0.396945059299469, "learning_rate": 0.00016673468592001456, "loss": 1.4764, "step": 12813 }, { "epoch": 0.16651208573800075, "grad_norm": 0.47493255138397217, "learning_rate": 0.00016673208645810318, "loss": 1.2814, "step": 12814 }, { "epoch": 0.16652508028191662, "grad_norm": 0.41407179832458496, "learning_rate": 0.0001667294869961918, "loss": 1.2914, "step": 12815 }, { "epoch": 0.1665380748258325, "grad_norm": 0.41274240612983704, "learning_rate": 0.0001667268875342804, "loss": 1.4875, "step": 12816 }, { "epoch": 0.16655106936974837, "grad_norm": 0.2747964560985565, "learning_rate": 0.000166724288072369, "loss": 1.3357, "step": 12817 }, { "epoch": 0.16656406391366424, "grad_norm": 0.3709779679775238, "learning_rate": 0.00016672168861045766, "loss": 1.4881, "step": 12818 }, { "epoch": 0.1665770584575801, "grad_norm": 0.3713243305683136, "learning_rate": 0.00016671908914854625, "loss": 1.4259, "step": 12819 }, { "epoch": 0.16659005300149599, "grad_norm": 0.5128543972969055, "learning_rate": 0.00016671648968663488, "loss": 1.5151, "step": 12820 }, { "epoch": 0.16660304754541186, "grad_norm": 0.42065709829330444, "learning_rate": 0.0001667138902247235, "loss": 1.3927, "step": 12821 }, { "epoch": 0.16661604208932773, "grad_norm": 0.453928142786026, "learning_rate": 0.0001667112907628121, "loss": 1.3186, "step": 12822 }, { "epoch": 0.1666290366332436, "grad_norm": 0.4526253342628479, "learning_rate": 0.00016670869130090072, "loss": 1.3925, "step": 12823 }, { "epoch": 0.16664203117715948, "grad_norm": 0.540834367275238, "learning_rate": 0.00016670609183898932, "loss": 1.4528, "step": 12824 }, { "epoch": 0.16665502572107535, "grad_norm": 0.3812747299671173, "learning_rate": 0.00016670349237707797, "loss": 1.52, "step": 12825 }, { "epoch": 0.16666802026499122, "grad_norm": 0.2953941226005554, "learning_rate": 0.00016670089291516657, "loss": 1.3892, "step": 12826 }, { "epoch": 0.1666810148089071, "grad_norm": 0.47402068972587585, "learning_rate": 0.0001666982934532552, "loss": 1.4324, "step": 12827 }, { "epoch": 0.166694009352823, "grad_norm": 0.42593613266944885, "learning_rate": 0.0001666956939913438, "loss": 1.5167, "step": 12828 }, { "epoch": 0.16670700389673887, "grad_norm": 0.3507402241230011, "learning_rate": 0.00016669309452943242, "loss": 1.2295, "step": 12829 }, { "epoch": 0.16671999844065474, "grad_norm": 0.369003564119339, "learning_rate": 0.00016669049506752104, "loss": 1.381, "step": 12830 }, { "epoch": 0.16673299298457062, "grad_norm": 0.43361109495162964, "learning_rate": 0.00016668789560560964, "loss": 1.4831, "step": 12831 }, { "epoch": 0.1667459875284865, "grad_norm": 0.33255043625831604, "learning_rate": 0.00016668529614369826, "loss": 1.3915, "step": 12832 }, { "epoch": 0.16675898207240236, "grad_norm": 0.34869447350502014, "learning_rate": 0.0001666826966817869, "loss": 1.2207, "step": 12833 }, { "epoch": 0.16677197661631823, "grad_norm": 0.4056282639503479, "learning_rate": 0.00016668009721987548, "loss": 1.5521, "step": 12834 }, { "epoch": 0.1667849711602341, "grad_norm": 0.4887652099132538, "learning_rate": 0.0001666774977579641, "loss": 1.543, "step": 12835 }, { "epoch": 0.16679796570414998, "grad_norm": 0.34442567825317383, "learning_rate": 0.0001666748982960527, "loss": 1.2209, "step": 12836 }, { "epoch": 0.16681096024806585, "grad_norm": 0.41982781887054443, "learning_rate": 0.00016667229883414136, "loss": 1.6639, "step": 12837 }, { "epoch": 0.16682395479198173, "grad_norm": 0.4775190055370331, "learning_rate": 0.00016666969937222996, "loss": 1.4735, "step": 12838 }, { "epoch": 0.1668369493358976, "grad_norm": 0.35715991258621216, "learning_rate": 0.00016666709991031858, "loss": 1.6269, "step": 12839 }, { "epoch": 0.16684994387981347, "grad_norm": 0.43140268325805664, "learning_rate": 0.00016666450044840718, "loss": 1.3078, "step": 12840 }, { "epoch": 0.16686293842372935, "grad_norm": 0.3623979985713959, "learning_rate": 0.0001666619009864958, "loss": 1.1823, "step": 12841 }, { "epoch": 0.16687593296764522, "grad_norm": 0.3775346875190735, "learning_rate": 0.00016665930152458443, "loss": 1.5372, "step": 12842 }, { "epoch": 0.1668889275115611, "grad_norm": 0.3679216206073761, "learning_rate": 0.00016665670206267302, "loss": 1.6282, "step": 12843 }, { "epoch": 0.16690192205547696, "grad_norm": 0.40435758233070374, "learning_rate": 0.00016665410260076165, "loss": 1.1387, "step": 12844 }, { "epoch": 0.16691491659939284, "grad_norm": 0.38754045963287354, "learning_rate": 0.00016665150313885027, "loss": 1.4503, "step": 12845 }, { "epoch": 0.1669279111433087, "grad_norm": 0.47942426800727844, "learning_rate": 0.0001666489036769389, "loss": 1.4406, "step": 12846 }, { "epoch": 0.16694090568722458, "grad_norm": 0.49686306715011597, "learning_rate": 0.0001666463042150275, "loss": 1.5014, "step": 12847 }, { "epoch": 0.16695390023114046, "grad_norm": 0.33438125252723694, "learning_rate": 0.0001666437047531161, "loss": 1.6046, "step": 12848 }, { "epoch": 0.16696689477505633, "grad_norm": 0.36495068669319153, "learning_rate": 0.00016664110529120474, "loss": 1.4154, "step": 12849 }, { "epoch": 0.1669798893189722, "grad_norm": 0.41140130162239075, "learning_rate": 0.00016663850582929334, "loss": 1.2698, "step": 12850 }, { "epoch": 0.16699288386288808, "grad_norm": 0.38917186856269836, "learning_rate": 0.00016663590636738196, "loss": 1.526, "step": 12851 }, { "epoch": 0.16700587840680395, "grad_norm": 0.40571409463882446, "learning_rate": 0.00016663330690547056, "loss": 1.307, "step": 12852 }, { "epoch": 0.16701887295071982, "grad_norm": 0.33642643690109253, "learning_rate": 0.0001666307074435592, "loss": 1.4184, "step": 12853 }, { "epoch": 0.1670318674946357, "grad_norm": 0.4248434007167816, "learning_rate": 0.0001666281079816478, "loss": 1.4063, "step": 12854 }, { "epoch": 0.16704486203855157, "grad_norm": 0.36782601475715637, "learning_rate": 0.0001666255085197364, "loss": 1.5157, "step": 12855 }, { "epoch": 0.16705785658246744, "grad_norm": 0.3769363760948181, "learning_rate": 0.00016662290905782503, "loss": 1.3521, "step": 12856 }, { "epoch": 0.1670708511263833, "grad_norm": 0.4285936951637268, "learning_rate": 0.00016662030959591366, "loss": 1.4896, "step": 12857 }, { "epoch": 0.16708384567029919, "grad_norm": 0.4447292983531952, "learning_rate": 0.00016661771013400228, "loss": 1.5812, "step": 12858 }, { "epoch": 0.16709684021421506, "grad_norm": 0.3940449059009552, "learning_rate": 0.00016661511067209088, "loss": 1.5996, "step": 12859 }, { "epoch": 0.16710983475813093, "grad_norm": 0.37623709440231323, "learning_rate": 0.0001666125112101795, "loss": 1.5044, "step": 12860 }, { "epoch": 0.1671228293020468, "grad_norm": 0.37312057614326477, "learning_rate": 0.00016660991174826813, "loss": 1.5332, "step": 12861 }, { "epoch": 0.16713582384596268, "grad_norm": 0.3506205081939697, "learning_rate": 0.00016660731228635673, "loss": 1.3821, "step": 12862 }, { "epoch": 0.16714881838987855, "grad_norm": 0.3562324345111847, "learning_rate": 0.00016660471282444535, "loss": 1.4178, "step": 12863 }, { "epoch": 0.16716181293379442, "grad_norm": 0.4145570695400238, "learning_rate": 0.00016660211336253397, "loss": 1.4428, "step": 12864 }, { "epoch": 0.1671748074777103, "grad_norm": 0.3577467203140259, "learning_rate": 0.00016659951390062257, "loss": 1.4778, "step": 12865 }, { "epoch": 0.16718780202162617, "grad_norm": 0.37337398529052734, "learning_rate": 0.0001665969144387112, "loss": 1.4985, "step": 12866 }, { "epoch": 0.16720079656554204, "grad_norm": 0.3283328711986542, "learning_rate": 0.0001665943149767998, "loss": 1.5841, "step": 12867 }, { "epoch": 0.16721379110945792, "grad_norm": 0.40434518456459045, "learning_rate": 0.00016659171551488845, "loss": 1.3934, "step": 12868 }, { "epoch": 0.1672267856533738, "grad_norm": 0.4591914713382721, "learning_rate": 0.00016658911605297704, "loss": 1.3366, "step": 12869 }, { "epoch": 0.16723978019728966, "grad_norm": 0.33408480882644653, "learning_rate": 0.00016658651659106567, "loss": 1.3697, "step": 12870 }, { "epoch": 0.16725277474120553, "grad_norm": 0.3955046832561493, "learning_rate": 0.00016658391712915426, "loss": 1.4126, "step": 12871 }, { "epoch": 0.1672657692851214, "grad_norm": 0.4294576644897461, "learning_rate": 0.0001665813176672429, "loss": 1.3959, "step": 12872 }, { "epoch": 0.16727876382903728, "grad_norm": 0.30360329151153564, "learning_rate": 0.00016657871820533151, "loss": 1.2906, "step": 12873 }, { "epoch": 0.16729175837295315, "grad_norm": 0.42848408222198486, "learning_rate": 0.0001665761187434201, "loss": 1.2307, "step": 12874 }, { "epoch": 0.16730475291686903, "grad_norm": 0.45242586731910706, "learning_rate": 0.00016657351928150874, "loss": 1.3869, "step": 12875 }, { "epoch": 0.1673177474607849, "grad_norm": 0.47310593724250793, "learning_rate": 0.00016657091981959736, "loss": 1.3105, "step": 12876 }, { "epoch": 0.16733074200470077, "grad_norm": 0.3324052393436432, "learning_rate": 0.00016656832035768596, "loss": 1.3804, "step": 12877 }, { "epoch": 0.16734373654861665, "grad_norm": 0.4319552481174469, "learning_rate": 0.00016656572089577458, "loss": 1.3998, "step": 12878 }, { "epoch": 0.16735673109253252, "grad_norm": 0.31943750381469727, "learning_rate": 0.00016656312143386318, "loss": 1.2418, "step": 12879 }, { "epoch": 0.1673697256364484, "grad_norm": 0.40766334533691406, "learning_rate": 0.00016656052197195183, "loss": 1.5104, "step": 12880 }, { "epoch": 0.16738272018036426, "grad_norm": 0.46422079205513, "learning_rate": 0.00016655792251004043, "loss": 1.5967, "step": 12881 }, { "epoch": 0.16739571472428014, "grad_norm": 0.3993177115917206, "learning_rate": 0.00016655532304812905, "loss": 1.3782, "step": 12882 }, { "epoch": 0.167408709268196, "grad_norm": 0.4317792057991028, "learning_rate": 0.00016655272358621765, "loss": 1.3474, "step": 12883 }, { "epoch": 0.16742170381211188, "grad_norm": 0.45836618542671204, "learning_rate": 0.00016655012412430627, "loss": 1.3878, "step": 12884 }, { "epoch": 0.16743469835602776, "grad_norm": 0.44449880719184875, "learning_rate": 0.0001665475246623949, "loss": 1.4617, "step": 12885 }, { "epoch": 0.16744769289994363, "grad_norm": 0.4375962018966675, "learning_rate": 0.0001665449252004835, "loss": 1.392, "step": 12886 }, { "epoch": 0.1674606874438595, "grad_norm": 0.4703550934791565, "learning_rate": 0.00016654232573857212, "loss": 1.5668, "step": 12887 }, { "epoch": 0.16747368198777537, "grad_norm": 0.31866562366485596, "learning_rate": 0.00016653972627666075, "loss": 1.5196, "step": 12888 }, { "epoch": 0.16748667653169125, "grad_norm": 0.312232106924057, "learning_rate": 0.00016653712681474934, "loss": 1.4614, "step": 12889 }, { "epoch": 0.16749967107560712, "grad_norm": 0.4223799407482147, "learning_rate": 0.00016653452735283797, "loss": 1.5073, "step": 12890 }, { "epoch": 0.167512665619523, "grad_norm": 0.41408222913742065, "learning_rate": 0.00016653192789092656, "loss": 1.5173, "step": 12891 }, { "epoch": 0.16752566016343887, "grad_norm": 0.40249311923980713, "learning_rate": 0.00016652932842901522, "loss": 1.4593, "step": 12892 }, { "epoch": 0.16753865470735474, "grad_norm": 0.39199817180633545, "learning_rate": 0.00016652672896710381, "loss": 1.544, "step": 12893 }, { "epoch": 0.1675516492512706, "grad_norm": 0.41567808389663696, "learning_rate": 0.00016652412950519244, "loss": 1.3974, "step": 12894 }, { "epoch": 0.16756464379518649, "grad_norm": 0.40071341395378113, "learning_rate": 0.00016652153004328106, "loss": 1.3259, "step": 12895 }, { "epoch": 0.16757763833910236, "grad_norm": 0.3968471586704254, "learning_rate": 0.00016651893058136966, "loss": 1.4359, "step": 12896 }, { "epoch": 0.16759063288301823, "grad_norm": 0.28312477469444275, "learning_rate": 0.00016651633111945828, "loss": 1.4723, "step": 12897 }, { "epoch": 0.1676036274269341, "grad_norm": 0.3450145423412323, "learning_rate": 0.00016651373165754688, "loss": 1.6804, "step": 12898 }, { "epoch": 0.16761662197084998, "grad_norm": 0.3740321099758148, "learning_rate": 0.00016651113219563553, "loss": 1.3322, "step": 12899 }, { "epoch": 0.16762961651476585, "grad_norm": 0.37756139039993286, "learning_rate": 0.00016650853273372413, "loss": 1.2638, "step": 12900 }, { "epoch": 0.16764261105868172, "grad_norm": 0.3208197057247162, "learning_rate": 0.00016650593327181273, "loss": 1.3465, "step": 12901 }, { "epoch": 0.1676556056025976, "grad_norm": 0.2987249791622162, "learning_rate": 0.00016650333380990135, "loss": 1.1555, "step": 12902 }, { "epoch": 0.16766860014651347, "grad_norm": 0.28416958451271057, "learning_rate": 0.00016650073434798998, "loss": 1.4346, "step": 12903 }, { "epoch": 0.16768159469042937, "grad_norm": 0.36585891246795654, "learning_rate": 0.0001664981348860786, "loss": 1.4997, "step": 12904 }, { "epoch": 0.16769458923434524, "grad_norm": 0.33395785093307495, "learning_rate": 0.0001664955354241672, "loss": 1.2841, "step": 12905 }, { "epoch": 0.16770758377826112, "grad_norm": 0.3879500925540924, "learning_rate": 0.00016649293596225582, "loss": 1.3641, "step": 12906 }, { "epoch": 0.167720578322177, "grad_norm": 0.27617931365966797, "learning_rate": 0.00016649033650034445, "loss": 1.4174, "step": 12907 }, { "epoch": 0.16773357286609286, "grad_norm": 0.35746923089027405, "learning_rate": 0.00016648773703843305, "loss": 1.3993, "step": 12908 }, { "epoch": 0.16774656741000873, "grad_norm": 0.39186882972717285, "learning_rate": 0.00016648513757652167, "loss": 1.2611, "step": 12909 }, { "epoch": 0.1677595619539246, "grad_norm": 0.5087866187095642, "learning_rate": 0.00016648253811461027, "loss": 1.4859, "step": 12910 }, { "epoch": 0.16777255649784048, "grad_norm": 0.4065325856208801, "learning_rate": 0.00016647993865269892, "loss": 1.4694, "step": 12911 }, { "epoch": 0.16778555104175635, "grad_norm": 0.4485664367675781, "learning_rate": 0.00016647733919078752, "loss": 1.47, "step": 12912 }, { "epoch": 0.16779854558567223, "grad_norm": 0.42203405499458313, "learning_rate": 0.00016647473972887614, "loss": 1.3472, "step": 12913 }, { "epoch": 0.1678115401295881, "grad_norm": 0.3816760778427124, "learning_rate": 0.00016647214026696474, "loss": 1.3832, "step": 12914 }, { "epoch": 0.16782453467350397, "grad_norm": 0.36738601326942444, "learning_rate": 0.00016646954080505336, "loss": 1.5662, "step": 12915 }, { "epoch": 0.16783752921741985, "grad_norm": 0.32766804099082947, "learning_rate": 0.000166466941343142, "loss": 1.4689, "step": 12916 }, { "epoch": 0.16785052376133572, "grad_norm": 0.4251675009727478, "learning_rate": 0.00016646434188123058, "loss": 1.5494, "step": 12917 }, { "epoch": 0.1678635183052516, "grad_norm": 0.31626254320144653, "learning_rate": 0.0001664617424193192, "loss": 1.3572, "step": 12918 }, { "epoch": 0.16787651284916746, "grad_norm": 0.40903937816619873, "learning_rate": 0.00016645914295740783, "loss": 1.4695, "step": 12919 }, { "epoch": 0.16788950739308334, "grad_norm": 0.352841317653656, "learning_rate": 0.00016645654349549643, "loss": 1.3018, "step": 12920 }, { "epoch": 0.1679025019369992, "grad_norm": 0.6975099444389343, "learning_rate": 0.00016645394403358506, "loss": 1.4839, "step": 12921 }, { "epoch": 0.16791549648091508, "grad_norm": 0.3246796429157257, "learning_rate": 0.00016645134457167365, "loss": 1.4653, "step": 12922 }, { "epoch": 0.16792849102483096, "grad_norm": 0.4402077794075012, "learning_rate": 0.0001664487451097623, "loss": 1.4932, "step": 12923 }, { "epoch": 0.16794148556874683, "grad_norm": 0.3910205662250519, "learning_rate": 0.0001664461456478509, "loss": 1.6219, "step": 12924 }, { "epoch": 0.1679544801126627, "grad_norm": 0.38421013951301575, "learning_rate": 0.00016644354618593953, "loss": 1.3568, "step": 12925 }, { "epoch": 0.16796747465657857, "grad_norm": 0.36789801716804504, "learning_rate": 0.00016644094672402812, "loss": 1.3636, "step": 12926 }, { "epoch": 0.16798046920049445, "grad_norm": 0.4123135507106781, "learning_rate": 0.00016643834726211675, "loss": 1.5264, "step": 12927 }, { "epoch": 0.16799346374441032, "grad_norm": 0.4110749661922455, "learning_rate": 0.00016643574780020537, "loss": 1.4197, "step": 12928 }, { "epoch": 0.1680064582883262, "grad_norm": 0.4219306409358978, "learning_rate": 0.00016643314833829397, "loss": 1.4166, "step": 12929 }, { "epoch": 0.16801945283224207, "grad_norm": 0.4811237156391144, "learning_rate": 0.0001664305488763826, "loss": 1.5247, "step": 12930 }, { "epoch": 0.16803244737615794, "grad_norm": 0.5712220072746277, "learning_rate": 0.00016642794941447122, "loss": 1.4353, "step": 12931 }, { "epoch": 0.1680454419200738, "grad_norm": 0.3985210955142975, "learning_rate": 0.00016642534995255982, "loss": 1.3716, "step": 12932 }, { "epoch": 0.16805843646398969, "grad_norm": 0.4543681740760803, "learning_rate": 0.00016642275049064844, "loss": 1.4218, "step": 12933 }, { "epoch": 0.16807143100790556, "grad_norm": 0.40007272362709045, "learning_rate": 0.00016642015102873707, "loss": 1.3386, "step": 12934 }, { "epoch": 0.16808442555182143, "grad_norm": 0.33320870995521545, "learning_rate": 0.0001664175515668257, "loss": 1.4486, "step": 12935 }, { "epoch": 0.1680974200957373, "grad_norm": 0.33012160658836365, "learning_rate": 0.0001664149521049143, "loss": 1.4514, "step": 12936 }, { "epoch": 0.16811041463965318, "grad_norm": 0.31427663564682007, "learning_rate": 0.0001664123526430029, "loss": 1.2062, "step": 12937 }, { "epoch": 0.16812340918356905, "grad_norm": 0.2677564322948456, "learning_rate": 0.00016640975318109154, "loss": 1.4618, "step": 12938 }, { "epoch": 0.16813640372748492, "grad_norm": 0.3974771201610565, "learning_rate": 0.00016640715371918013, "loss": 1.3538, "step": 12939 }, { "epoch": 0.1681493982714008, "grad_norm": 0.4523717164993286, "learning_rate": 0.00016640455425726876, "loss": 1.4396, "step": 12940 }, { "epoch": 0.16816239281531667, "grad_norm": 0.39971163868904114, "learning_rate": 0.00016640195479535736, "loss": 1.3753, "step": 12941 }, { "epoch": 0.16817538735923254, "grad_norm": 0.34252262115478516, "learning_rate": 0.000166399355333446, "loss": 1.4618, "step": 12942 }, { "epoch": 0.16818838190314842, "grad_norm": 0.39884689450263977, "learning_rate": 0.0001663967558715346, "loss": 1.3216, "step": 12943 }, { "epoch": 0.1682013764470643, "grad_norm": 0.3307528495788574, "learning_rate": 0.0001663941564096232, "loss": 1.4386, "step": 12944 }, { "epoch": 0.16821437099098016, "grad_norm": 0.39804431796073914, "learning_rate": 0.00016639155694771183, "loss": 1.3465, "step": 12945 }, { "epoch": 0.16822736553489603, "grad_norm": 0.38360700011253357, "learning_rate": 0.00016638895748580045, "loss": 1.4103, "step": 12946 }, { "epoch": 0.1682403600788119, "grad_norm": 0.35822516679763794, "learning_rate": 0.00016638635802388908, "loss": 1.3075, "step": 12947 }, { "epoch": 0.16825335462272778, "grad_norm": 0.3822590708732605, "learning_rate": 0.00016638375856197767, "loss": 1.3297, "step": 12948 }, { "epoch": 0.16826634916664365, "grad_norm": 0.37314456701278687, "learning_rate": 0.0001663811591000663, "loss": 1.4815, "step": 12949 }, { "epoch": 0.16827934371055953, "grad_norm": 0.2879674732685089, "learning_rate": 0.00016637855963815492, "loss": 1.2061, "step": 12950 }, { "epoch": 0.1682923382544754, "grad_norm": 0.39561727643013, "learning_rate": 0.00016637596017624352, "loss": 1.3816, "step": 12951 }, { "epoch": 0.16830533279839127, "grad_norm": 0.3500477373600006, "learning_rate": 0.00016637336071433214, "loss": 1.3735, "step": 12952 }, { "epoch": 0.16831832734230714, "grad_norm": 0.4114043414592743, "learning_rate": 0.00016637076125242074, "loss": 1.4901, "step": 12953 }, { "epoch": 0.16833132188622302, "grad_norm": 0.3913855254650116, "learning_rate": 0.0001663681617905094, "loss": 1.4516, "step": 12954 }, { "epoch": 0.1683443164301389, "grad_norm": 0.375450074672699, "learning_rate": 0.000166365562328598, "loss": 1.382, "step": 12955 }, { "epoch": 0.16835731097405476, "grad_norm": 0.3838854730129242, "learning_rate": 0.0001663629628666866, "loss": 1.4039, "step": 12956 }, { "epoch": 0.16837030551797064, "grad_norm": 0.3887314796447754, "learning_rate": 0.0001663603634047752, "loss": 1.2659, "step": 12957 }, { "epoch": 0.1683833000618865, "grad_norm": 0.34468498826026917, "learning_rate": 0.00016635776394286384, "loss": 1.29, "step": 12958 }, { "epoch": 0.16839629460580238, "grad_norm": 0.3886233866214752, "learning_rate": 0.00016635516448095246, "loss": 1.2587, "step": 12959 }, { "epoch": 0.16840928914971826, "grad_norm": 0.4780377745628357, "learning_rate": 0.00016635256501904106, "loss": 1.3597, "step": 12960 }, { "epoch": 0.16842228369363413, "grad_norm": 0.45188426971435547, "learning_rate": 0.00016634996555712968, "loss": 1.4089, "step": 12961 }, { "epoch": 0.16843527823755, "grad_norm": 0.39710554480552673, "learning_rate": 0.0001663473660952183, "loss": 1.4722, "step": 12962 }, { "epoch": 0.16844827278146587, "grad_norm": 0.3980942368507385, "learning_rate": 0.0001663447666333069, "loss": 1.4086, "step": 12963 }, { "epoch": 0.16846126732538175, "grad_norm": 0.31052473187446594, "learning_rate": 0.00016634216717139553, "loss": 1.3962, "step": 12964 }, { "epoch": 0.16847426186929762, "grad_norm": 0.3566180169582367, "learning_rate": 0.00016633956770948413, "loss": 1.444, "step": 12965 }, { "epoch": 0.1684872564132135, "grad_norm": 0.3781813085079193, "learning_rate": 0.00016633696824757278, "loss": 1.5075, "step": 12966 }, { "epoch": 0.16850025095712937, "grad_norm": 0.358777791261673, "learning_rate": 0.00016633436878566138, "loss": 1.343, "step": 12967 }, { "epoch": 0.16851324550104524, "grad_norm": 0.4308653175830841, "learning_rate": 0.00016633176932375, "loss": 1.4658, "step": 12968 }, { "epoch": 0.1685262400449611, "grad_norm": 0.4362732470035553, "learning_rate": 0.0001663291698618386, "loss": 1.4236, "step": 12969 }, { "epoch": 0.16853923458887698, "grad_norm": 0.3547877371311188, "learning_rate": 0.00016632657039992722, "loss": 1.5566, "step": 12970 }, { "epoch": 0.16855222913279286, "grad_norm": 0.4114231467247009, "learning_rate": 0.00016632397093801585, "loss": 1.3791, "step": 12971 }, { "epoch": 0.16856522367670873, "grad_norm": 0.5442303419113159, "learning_rate": 0.00016632137147610444, "loss": 1.249, "step": 12972 }, { "epoch": 0.1685782182206246, "grad_norm": 0.3428789973258972, "learning_rate": 0.00016631877201419307, "loss": 1.367, "step": 12973 }, { "epoch": 0.16859121276454048, "grad_norm": 0.4587998390197754, "learning_rate": 0.0001663161725522817, "loss": 1.7032, "step": 12974 }, { "epoch": 0.16860420730845635, "grad_norm": 0.47045060992240906, "learning_rate": 0.0001663135730903703, "loss": 1.3904, "step": 12975 }, { "epoch": 0.16861720185237222, "grad_norm": 0.4513276517391205, "learning_rate": 0.00016631097362845891, "loss": 1.5989, "step": 12976 }, { "epoch": 0.1686301963962881, "grad_norm": 0.3916197419166565, "learning_rate": 0.00016630837416654754, "loss": 1.3805, "step": 12977 }, { "epoch": 0.16864319094020397, "grad_norm": 0.3675021827220917, "learning_rate": 0.00016630577470463616, "loss": 1.4053, "step": 12978 }, { "epoch": 0.16865618548411984, "grad_norm": 0.41621658205986023, "learning_rate": 0.00016630317524272476, "loss": 1.4082, "step": 12979 }, { "epoch": 0.16866918002803574, "grad_norm": 0.40411582589149475, "learning_rate": 0.00016630057578081339, "loss": 1.576, "step": 12980 }, { "epoch": 0.16868217457195162, "grad_norm": 0.44377803802490234, "learning_rate": 0.000166297976318902, "loss": 1.4596, "step": 12981 }, { "epoch": 0.1686951691158675, "grad_norm": 0.3872390389442444, "learning_rate": 0.0001662953768569906, "loss": 1.4144, "step": 12982 }, { "epoch": 0.16870816365978336, "grad_norm": 0.37950897216796875, "learning_rate": 0.00016629277739507923, "loss": 1.4474, "step": 12983 }, { "epoch": 0.16872115820369923, "grad_norm": 0.3403869867324829, "learning_rate": 0.00016629017793316783, "loss": 1.316, "step": 12984 }, { "epoch": 0.1687341527476151, "grad_norm": 0.31518515944480896, "learning_rate": 0.00016628757847125645, "loss": 1.1081, "step": 12985 }, { "epoch": 0.16874714729153098, "grad_norm": 0.44658586382865906, "learning_rate": 0.00016628497900934508, "loss": 1.453, "step": 12986 }, { "epoch": 0.16876014183544685, "grad_norm": 0.4312993288040161, "learning_rate": 0.00016628237954743368, "loss": 1.5359, "step": 12987 }, { "epoch": 0.16877313637936273, "grad_norm": 0.3020199239253998, "learning_rate": 0.0001662797800855223, "loss": 1.4486, "step": 12988 }, { "epoch": 0.1687861309232786, "grad_norm": 0.36508649587631226, "learning_rate": 0.00016627718062361092, "loss": 1.4935, "step": 12989 }, { "epoch": 0.16879912546719447, "grad_norm": 0.38302719593048096, "learning_rate": 0.00016627458116169955, "loss": 1.3812, "step": 12990 }, { "epoch": 0.16881212001111034, "grad_norm": 0.4328066110610962, "learning_rate": 0.00016627198169978815, "loss": 1.3152, "step": 12991 }, { "epoch": 0.16882511455502622, "grad_norm": 0.3908267021179199, "learning_rate": 0.00016626938223787677, "loss": 1.4277, "step": 12992 }, { "epoch": 0.1688381090989421, "grad_norm": 0.37785065174102783, "learning_rate": 0.0001662667827759654, "loss": 1.3782, "step": 12993 }, { "epoch": 0.16885110364285796, "grad_norm": 0.37397968769073486, "learning_rate": 0.000166264183314054, "loss": 1.3038, "step": 12994 }, { "epoch": 0.16886409818677384, "grad_norm": 0.46899425983428955, "learning_rate": 0.00016626158385214262, "loss": 1.6436, "step": 12995 }, { "epoch": 0.1688770927306897, "grad_norm": 0.4795193374156952, "learning_rate": 0.00016625898439023121, "loss": 1.5185, "step": 12996 }, { "epoch": 0.16889008727460558, "grad_norm": 0.4219282567501068, "learning_rate": 0.00016625638492831987, "loss": 1.7168, "step": 12997 }, { "epoch": 0.16890308181852146, "grad_norm": 0.4655729830265045, "learning_rate": 0.00016625378546640846, "loss": 1.3464, "step": 12998 }, { "epoch": 0.16891607636243733, "grad_norm": 0.4344814121723175, "learning_rate": 0.00016625118600449706, "loss": 1.3381, "step": 12999 }, { "epoch": 0.1689290709063532, "grad_norm": 0.45287421345710754, "learning_rate": 0.00016624858654258568, "loss": 1.6687, "step": 13000 }, { "epoch": 0.16894206545026907, "grad_norm": 0.35197800397872925, "learning_rate": 0.0001662459870806743, "loss": 1.2894, "step": 13001 }, { "epoch": 0.16895505999418495, "grad_norm": 0.3689841330051422, "learning_rate": 0.00016624338761876293, "loss": 1.4353, "step": 13002 }, { "epoch": 0.16896805453810082, "grad_norm": 0.40450453758239746, "learning_rate": 0.00016624078815685153, "loss": 1.6761, "step": 13003 }, { "epoch": 0.1689810490820167, "grad_norm": 0.4451766610145569, "learning_rate": 0.00016623818869494016, "loss": 1.5902, "step": 13004 }, { "epoch": 0.16899404362593257, "grad_norm": 0.4453631639480591, "learning_rate": 0.00016623558923302878, "loss": 1.4594, "step": 13005 }, { "epoch": 0.16900703816984844, "grad_norm": 0.4766538143157959, "learning_rate": 0.00016623298977111738, "loss": 1.3703, "step": 13006 }, { "epoch": 0.1690200327137643, "grad_norm": 0.3757951557636261, "learning_rate": 0.000166230390309206, "loss": 1.2751, "step": 13007 }, { "epoch": 0.16903302725768019, "grad_norm": 0.48573431372642517, "learning_rate": 0.00016622779084729463, "loss": 1.6351, "step": 13008 }, { "epoch": 0.16904602180159606, "grad_norm": 0.4519944489002228, "learning_rate": 0.00016622519138538325, "loss": 1.4012, "step": 13009 }, { "epoch": 0.16905901634551193, "grad_norm": 0.35359400510787964, "learning_rate": 0.00016622259192347185, "loss": 1.2539, "step": 13010 }, { "epoch": 0.1690720108894278, "grad_norm": 0.4511498212814331, "learning_rate": 0.00016621999246156045, "loss": 1.5149, "step": 13011 }, { "epoch": 0.16908500543334368, "grad_norm": 0.46017739176750183, "learning_rate": 0.0001662173929996491, "loss": 1.5946, "step": 13012 }, { "epoch": 0.16909799997725955, "grad_norm": 0.3584032654762268, "learning_rate": 0.0001662147935377377, "loss": 1.4686, "step": 13013 }, { "epoch": 0.16911099452117542, "grad_norm": 0.42846420407295227, "learning_rate": 0.00016621219407582632, "loss": 1.3919, "step": 13014 }, { "epoch": 0.1691239890650913, "grad_norm": 0.43575266003608704, "learning_rate": 0.00016620959461391492, "loss": 1.3496, "step": 13015 }, { "epoch": 0.16913698360900717, "grad_norm": 0.3435360789299011, "learning_rate": 0.00016620699515200354, "loss": 1.2168, "step": 13016 }, { "epoch": 0.16914997815292304, "grad_norm": 0.4018901288509369, "learning_rate": 0.00016620439569009217, "loss": 1.5358, "step": 13017 }, { "epoch": 0.16916297269683891, "grad_norm": 0.4270346164703369, "learning_rate": 0.00016620179622818076, "loss": 1.3092, "step": 13018 }, { "epoch": 0.1691759672407548, "grad_norm": 0.35556191205978394, "learning_rate": 0.0001661991967662694, "loss": 1.5059, "step": 13019 }, { "epoch": 0.16918896178467066, "grad_norm": 0.3976903557777405, "learning_rate": 0.000166196597304358, "loss": 1.5113, "step": 13020 }, { "epoch": 0.16920195632858653, "grad_norm": 0.38232460618019104, "learning_rate": 0.00016619399784244664, "loss": 1.4498, "step": 13021 }, { "epoch": 0.1692149508725024, "grad_norm": 0.33682799339294434, "learning_rate": 0.00016619139838053523, "loss": 1.4111, "step": 13022 }, { "epoch": 0.16922794541641828, "grad_norm": 0.4582914412021637, "learning_rate": 0.00016618879891862383, "loss": 1.478, "step": 13023 }, { "epoch": 0.16924093996033415, "grad_norm": 0.40171658992767334, "learning_rate": 0.00016618619945671248, "loss": 1.3487, "step": 13024 }, { "epoch": 0.16925393450425003, "grad_norm": 0.4668387472629547, "learning_rate": 0.00016618359999480108, "loss": 1.4507, "step": 13025 }, { "epoch": 0.1692669290481659, "grad_norm": 0.34170088171958923, "learning_rate": 0.0001661810005328897, "loss": 1.3844, "step": 13026 }, { "epoch": 0.16927992359208177, "grad_norm": 0.4102608561515808, "learning_rate": 0.0001661784010709783, "loss": 1.2803, "step": 13027 }, { "epoch": 0.16929291813599764, "grad_norm": 0.36769479513168335, "learning_rate": 0.00016617580160906693, "loss": 1.4923, "step": 13028 }, { "epoch": 0.16930591267991352, "grad_norm": 0.42638328671455383, "learning_rate": 0.00016617320214715555, "loss": 1.4586, "step": 13029 }, { "epoch": 0.1693189072238294, "grad_norm": 0.5077546238899231, "learning_rate": 0.00016617060268524415, "loss": 1.5017, "step": 13030 }, { "epoch": 0.16933190176774526, "grad_norm": 0.5049060583114624, "learning_rate": 0.00016616800322333277, "loss": 1.4212, "step": 13031 }, { "epoch": 0.16934489631166114, "grad_norm": 0.37264689803123474, "learning_rate": 0.0001661654037614214, "loss": 1.2791, "step": 13032 }, { "epoch": 0.169357890855577, "grad_norm": 0.37991753220558167, "learning_rate": 0.00016616280429951002, "loss": 1.4667, "step": 13033 }, { "epoch": 0.16937088539949288, "grad_norm": 0.42088231444358826, "learning_rate": 0.00016616020483759862, "loss": 1.4358, "step": 13034 }, { "epoch": 0.16938387994340875, "grad_norm": 0.37237268686294556, "learning_rate": 0.00016615760537568724, "loss": 1.2656, "step": 13035 }, { "epoch": 0.16939687448732463, "grad_norm": 0.3532378375530243, "learning_rate": 0.00016615500591377587, "loss": 1.1733, "step": 13036 }, { "epoch": 0.1694098690312405, "grad_norm": 0.38380929827690125, "learning_rate": 0.00016615240645186447, "loss": 1.4433, "step": 13037 }, { "epoch": 0.16942286357515637, "grad_norm": 0.4379131495952606, "learning_rate": 0.0001661498069899531, "loss": 1.3661, "step": 13038 }, { "epoch": 0.16943585811907225, "grad_norm": 0.2939518690109253, "learning_rate": 0.0001661472075280417, "loss": 1.3203, "step": 13039 }, { "epoch": 0.16944885266298812, "grad_norm": 0.34767088294029236, "learning_rate": 0.0001661446080661303, "loss": 1.3178, "step": 13040 }, { "epoch": 0.169461847206904, "grad_norm": 0.34375861287117004, "learning_rate": 0.00016614200860421894, "loss": 1.1991, "step": 13041 }, { "epoch": 0.16947484175081987, "grad_norm": 0.36901646852493286, "learning_rate": 0.00016613940914230753, "loss": 1.3708, "step": 13042 }, { "epoch": 0.16948783629473574, "grad_norm": 0.4402235150337219, "learning_rate": 0.00016613680968039616, "loss": 1.4395, "step": 13043 }, { "epoch": 0.1695008308386516, "grad_norm": 0.47073665261268616, "learning_rate": 0.00016613421021848478, "loss": 1.5272, "step": 13044 }, { "epoch": 0.16951382538256748, "grad_norm": 0.4363754391670227, "learning_rate": 0.0001661316107565734, "loss": 1.4797, "step": 13045 }, { "epoch": 0.16952681992648336, "grad_norm": 0.3937673568725586, "learning_rate": 0.000166129011294662, "loss": 1.2299, "step": 13046 }, { "epoch": 0.16953981447039923, "grad_norm": 0.4259859323501587, "learning_rate": 0.00016612641183275063, "loss": 1.5098, "step": 13047 }, { "epoch": 0.1695528090143151, "grad_norm": 0.4275612235069275, "learning_rate": 0.00016612381237083925, "loss": 1.5159, "step": 13048 }, { "epoch": 0.16956580355823098, "grad_norm": 0.4734569489955902, "learning_rate": 0.00016612121290892785, "loss": 1.4805, "step": 13049 }, { "epoch": 0.16957879810214685, "grad_norm": 0.3092706799507141, "learning_rate": 0.00016611861344701648, "loss": 1.1996, "step": 13050 }, { "epoch": 0.16959179264606272, "grad_norm": 0.35904547572135925, "learning_rate": 0.0001661160139851051, "loss": 1.4469, "step": 13051 }, { "epoch": 0.1696047871899786, "grad_norm": 0.4766669273376465, "learning_rate": 0.00016611341452319372, "loss": 1.4596, "step": 13052 }, { "epoch": 0.16961778173389447, "grad_norm": 0.36199504137039185, "learning_rate": 0.00016611081506128232, "loss": 1.6569, "step": 13053 }, { "epoch": 0.16963077627781034, "grad_norm": 0.43424689769744873, "learning_rate": 0.00016610821559937092, "loss": 1.6603, "step": 13054 }, { "epoch": 0.16964377082172621, "grad_norm": 0.4443923234939575, "learning_rate": 0.00016610561613745957, "loss": 1.4116, "step": 13055 }, { "epoch": 0.16965676536564211, "grad_norm": 0.40876296162605286, "learning_rate": 0.00016610301667554817, "loss": 1.5402, "step": 13056 }, { "epoch": 0.169669759909558, "grad_norm": 0.4384091794490814, "learning_rate": 0.0001661004172136368, "loss": 1.4127, "step": 13057 }, { "epoch": 0.16968275445347386, "grad_norm": 0.5392494201660156, "learning_rate": 0.0001660978177517254, "loss": 1.5602, "step": 13058 }, { "epoch": 0.16969574899738973, "grad_norm": 0.41336789727211, "learning_rate": 0.00016609521828981401, "loss": 1.5778, "step": 13059 }, { "epoch": 0.1697087435413056, "grad_norm": 0.5546835660934448, "learning_rate": 0.00016609261882790264, "loss": 1.6286, "step": 13060 }, { "epoch": 0.16972173808522148, "grad_norm": 0.43889087438583374, "learning_rate": 0.00016609001936599124, "loss": 1.3447, "step": 13061 }, { "epoch": 0.16973473262913735, "grad_norm": 0.4774346947669983, "learning_rate": 0.00016608741990407986, "loss": 1.5451, "step": 13062 }, { "epoch": 0.16974772717305323, "grad_norm": 0.3462209105491638, "learning_rate": 0.00016608482044216849, "loss": 1.5146, "step": 13063 }, { "epoch": 0.1697607217169691, "grad_norm": 0.4561622440814972, "learning_rate": 0.0001660822209802571, "loss": 1.5189, "step": 13064 }, { "epoch": 0.16977371626088497, "grad_norm": 0.4071427285671234, "learning_rate": 0.0001660796215183457, "loss": 1.3883, "step": 13065 }, { "epoch": 0.16978671080480084, "grad_norm": 0.3243250548839569, "learning_rate": 0.0001660770220564343, "loss": 1.4454, "step": 13066 }, { "epoch": 0.16979970534871672, "grad_norm": 0.4391162097454071, "learning_rate": 0.00016607442259452296, "loss": 1.4878, "step": 13067 }, { "epoch": 0.1698126998926326, "grad_norm": 0.4476023316383362, "learning_rate": 0.00016607182313261155, "loss": 1.5835, "step": 13068 }, { "epoch": 0.16982569443654846, "grad_norm": 0.384269118309021, "learning_rate": 0.00016606922367070018, "loss": 1.4852, "step": 13069 }, { "epoch": 0.16983868898046434, "grad_norm": 0.41587504744529724, "learning_rate": 0.00016606662420878878, "loss": 1.371, "step": 13070 }, { "epoch": 0.1698516835243802, "grad_norm": 0.3882206380367279, "learning_rate": 0.0001660640247468774, "loss": 1.4205, "step": 13071 }, { "epoch": 0.16986467806829608, "grad_norm": 0.4553848206996918, "learning_rate": 0.00016606142528496602, "loss": 1.4247, "step": 13072 }, { "epoch": 0.16987767261221196, "grad_norm": 0.4057481586933136, "learning_rate": 0.00016605882582305462, "loss": 1.3769, "step": 13073 }, { "epoch": 0.16989066715612783, "grad_norm": 0.5231629014015198, "learning_rate": 0.00016605622636114325, "loss": 1.5658, "step": 13074 }, { "epoch": 0.1699036617000437, "grad_norm": 0.4660027325153351, "learning_rate": 0.00016605362689923187, "loss": 1.6079, "step": 13075 }, { "epoch": 0.16991665624395957, "grad_norm": 0.40947598218917847, "learning_rate": 0.0001660510274373205, "loss": 1.4387, "step": 13076 }, { "epoch": 0.16992965078787545, "grad_norm": 0.430973082780838, "learning_rate": 0.0001660484279754091, "loss": 1.5157, "step": 13077 }, { "epoch": 0.16994264533179132, "grad_norm": 0.4244171679019928, "learning_rate": 0.0001660458285134977, "loss": 1.3798, "step": 13078 }, { "epoch": 0.1699556398757072, "grad_norm": 0.4093726575374603, "learning_rate": 0.00016604322905158634, "loss": 1.3113, "step": 13079 }, { "epoch": 0.16996863441962307, "grad_norm": 0.4701089859008789, "learning_rate": 0.00016604062958967494, "loss": 1.4074, "step": 13080 }, { "epoch": 0.16998162896353894, "grad_norm": 0.394441694021225, "learning_rate": 0.00016603803012776356, "loss": 1.6285, "step": 13081 }, { "epoch": 0.1699946235074548, "grad_norm": 0.3873908519744873, "learning_rate": 0.0001660354306658522, "loss": 1.5151, "step": 13082 }, { "epoch": 0.17000761805137068, "grad_norm": 0.4067518413066864, "learning_rate": 0.00016603283120394079, "loss": 1.3673, "step": 13083 }, { "epoch": 0.17002061259528656, "grad_norm": 0.41336995363235474, "learning_rate": 0.0001660302317420294, "loss": 1.5925, "step": 13084 }, { "epoch": 0.17003360713920243, "grad_norm": 0.3967403471469879, "learning_rate": 0.000166027632280118, "loss": 1.4412, "step": 13085 }, { "epoch": 0.1700466016831183, "grad_norm": 0.4271251857280731, "learning_rate": 0.00016602503281820666, "loss": 1.4962, "step": 13086 }, { "epoch": 0.17005959622703418, "grad_norm": 0.3277058005332947, "learning_rate": 0.00016602243335629526, "loss": 1.1996, "step": 13087 }, { "epoch": 0.17007259077095005, "grad_norm": 0.32344329357147217, "learning_rate": 0.00016601983389438388, "loss": 1.3144, "step": 13088 }, { "epoch": 0.17008558531486592, "grad_norm": 0.30760663747787476, "learning_rate": 0.00016601723443247248, "loss": 1.3494, "step": 13089 }, { "epoch": 0.1700985798587818, "grad_norm": 0.39171960949897766, "learning_rate": 0.0001660146349705611, "loss": 1.3846, "step": 13090 }, { "epoch": 0.17011157440269767, "grad_norm": 0.37440428137779236, "learning_rate": 0.00016601203550864973, "loss": 1.5354, "step": 13091 }, { "epoch": 0.17012456894661354, "grad_norm": 0.2898044288158417, "learning_rate": 0.00016600943604673832, "loss": 1.2046, "step": 13092 }, { "epoch": 0.17013756349052941, "grad_norm": 0.3481447696685791, "learning_rate": 0.00016600683658482695, "loss": 1.4129, "step": 13093 }, { "epoch": 0.1701505580344453, "grad_norm": 0.24416305124759674, "learning_rate": 0.00016600423712291557, "loss": 1.4474, "step": 13094 }, { "epoch": 0.17016355257836116, "grad_norm": 0.3698457181453705, "learning_rate": 0.00016600163766100417, "loss": 1.4244, "step": 13095 }, { "epoch": 0.17017654712227703, "grad_norm": 0.4061986207962036, "learning_rate": 0.0001659990381990928, "loss": 1.5318, "step": 13096 }, { "epoch": 0.1701895416661929, "grad_norm": 0.3631649613380432, "learning_rate": 0.0001659964387371814, "loss": 1.3014, "step": 13097 }, { "epoch": 0.17020253621010878, "grad_norm": 0.40322378277778625, "learning_rate": 0.00016599383927527004, "loss": 1.3754, "step": 13098 }, { "epoch": 0.17021553075402465, "grad_norm": 0.3894997537136078, "learning_rate": 0.00016599123981335864, "loss": 1.5042, "step": 13099 }, { "epoch": 0.17022852529794053, "grad_norm": 0.34744152426719666, "learning_rate": 0.00016598864035144727, "loss": 1.4292, "step": 13100 }, { "epoch": 0.1702415198418564, "grad_norm": 0.34252381324768066, "learning_rate": 0.00016598604088953586, "loss": 1.5083, "step": 13101 }, { "epoch": 0.17025451438577227, "grad_norm": 0.4321986138820648, "learning_rate": 0.0001659834414276245, "loss": 1.2633, "step": 13102 }, { "epoch": 0.17026750892968814, "grad_norm": 0.5124055743217468, "learning_rate": 0.0001659808419657131, "loss": 1.6757, "step": 13103 }, { "epoch": 0.17028050347360402, "grad_norm": 0.31914374232292175, "learning_rate": 0.0001659782425038017, "loss": 1.5143, "step": 13104 }, { "epoch": 0.1702934980175199, "grad_norm": 0.3535726070404053, "learning_rate": 0.00016597564304189033, "loss": 1.5525, "step": 13105 }, { "epoch": 0.17030649256143576, "grad_norm": 0.42167311906814575, "learning_rate": 0.00016597304357997896, "loss": 1.4386, "step": 13106 }, { "epoch": 0.17031948710535164, "grad_norm": 0.41847625374794006, "learning_rate": 0.00016597044411806756, "loss": 1.2622, "step": 13107 }, { "epoch": 0.1703324816492675, "grad_norm": 0.41937389969825745, "learning_rate": 0.00016596784465615618, "loss": 1.4325, "step": 13108 }, { "epoch": 0.17034547619318338, "grad_norm": 0.3507000505924225, "learning_rate": 0.00016596524519424478, "loss": 1.4062, "step": 13109 }, { "epoch": 0.17035847073709925, "grad_norm": 0.40511760115623474, "learning_rate": 0.00016596264573233343, "loss": 1.517, "step": 13110 }, { "epoch": 0.17037146528101513, "grad_norm": 0.3216192424297333, "learning_rate": 0.00016596004627042203, "loss": 1.3859, "step": 13111 }, { "epoch": 0.170384459824931, "grad_norm": 0.5736033320426941, "learning_rate": 0.00016595744680851065, "loss": 1.2398, "step": 13112 }, { "epoch": 0.17039745436884687, "grad_norm": 0.3535250425338745, "learning_rate": 0.00016595484734659925, "loss": 1.646, "step": 13113 }, { "epoch": 0.17041044891276275, "grad_norm": 0.41037517786026, "learning_rate": 0.00016595224788468787, "loss": 1.3661, "step": 13114 }, { "epoch": 0.17042344345667862, "grad_norm": 0.35530975461006165, "learning_rate": 0.0001659496484227765, "loss": 1.2946, "step": 13115 }, { "epoch": 0.1704364380005945, "grad_norm": 0.4560537338256836, "learning_rate": 0.0001659470489608651, "loss": 1.4758, "step": 13116 }, { "epoch": 0.17044943254451037, "grad_norm": 0.45485448837280273, "learning_rate": 0.00016594444949895372, "loss": 1.4339, "step": 13117 }, { "epoch": 0.17046242708842624, "grad_norm": 0.40731996297836304, "learning_rate": 0.00016594185003704234, "loss": 1.4631, "step": 13118 }, { "epoch": 0.1704754216323421, "grad_norm": 0.40122923254966736, "learning_rate": 0.00016593925057513097, "loss": 1.5834, "step": 13119 }, { "epoch": 0.17048841617625798, "grad_norm": 0.46182429790496826, "learning_rate": 0.00016593665111321957, "loss": 1.3853, "step": 13120 }, { "epoch": 0.17050141072017386, "grad_norm": 0.27336835861206055, "learning_rate": 0.0001659340516513082, "loss": 1.3728, "step": 13121 }, { "epoch": 0.17051440526408973, "grad_norm": 0.38571515679359436, "learning_rate": 0.00016593145218939681, "loss": 1.3127, "step": 13122 }, { "epoch": 0.1705273998080056, "grad_norm": 0.36559775471687317, "learning_rate": 0.0001659288527274854, "loss": 1.2105, "step": 13123 }, { "epoch": 0.17054039435192148, "grad_norm": 0.35533106327056885, "learning_rate": 0.00016592625326557404, "loss": 1.3879, "step": 13124 }, { "epoch": 0.17055338889583735, "grad_norm": 0.3331126272678375, "learning_rate": 0.00016592365380366266, "loss": 1.5676, "step": 13125 }, { "epoch": 0.17056638343975322, "grad_norm": 0.38350364565849304, "learning_rate": 0.00016592105434175126, "loss": 1.5012, "step": 13126 }, { "epoch": 0.1705793779836691, "grad_norm": 0.29600998759269714, "learning_rate": 0.00016591845487983988, "loss": 1.3653, "step": 13127 }, { "epoch": 0.17059237252758497, "grad_norm": 0.35822761058807373, "learning_rate": 0.00016591585541792848, "loss": 1.5574, "step": 13128 }, { "epoch": 0.17060536707150084, "grad_norm": 0.26348254084587097, "learning_rate": 0.00016591325595601713, "loss": 1.376, "step": 13129 }, { "epoch": 0.1706183616154167, "grad_norm": 0.4692091643810272, "learning_rate": 0.00016591065649410573, "loss": 1.4453, "step": 13130 }, { "epoch": 0.1706313561593326, "grad_norm": 0.3583778738975525, "learning_rate": 0.00016590805703219435, "loss": 1.4775, "step": 13131 }, { "epoch": 0.1706443507032485, "grad_norm": 0.40448296070098877, "learning_rate": 0.00016590545757028295, "loss": 1.4801, "step": 13132 }, { "epoch": 0.17065734524716436, "grad_norm": 0.37129494547843933, "learning_rate": 0.00016590285810837158, "loss": 1.3942, "step": 13133 }, { "epoch": 0.17067033979108023, "grad_norm": 0.38922998309135437, "learning_rate": 0.0001659002586464602, "loss": 1.4409, "step": 13134 }, { "epoch": 0.1706833343349961, "grad_norm": 0.4431775212287903, "learning_rate": 0.0001658976591845488, "loss": 1.4489, "step": 13135 }, { "epoch": 0.17069632887891198, "grad_norm": 0.3151502311229706, "learning_rate": 0.00016589505972263742, "loss": 1.1253, "step": 13136 }, { "epoch": 0.17070932342282785, "grad_norm": 0.4834553301334381, "learning_rate": 0.00016589246026072605, "loss": 1.4064, "step": 13137 }, { "epoch": 0.17072231796674373, "grad_norm": 0.4206945300102234, "learning_rate": 0.00016588986079881464, "loss": 1.601, "step": 13138 }, { "epoch": 0.1707353125106596, "grad_norm": 0.3786860406398773, "learning_rate": 0.00016588726133690327, "loss": 1.3215, "step": 13139 }, { "epoch": 0.17074830705457547, "grad_norm": 0.44053781032562256, "learning_rate": 0.00016588466187499187, "loss": 1.3995, "step": 13140 }, { "epoch": 0.17076130159849134, "grad_norm": 0.4013817310333252, "learning_rate": 0.00016588206241308052, "loss": 1.6734, "step": 13141 }, { "epoch": 0.17077429614240722, "grad_norm": 0.30979812145233154, "learning_rate": 0.00016587946295116911, "loss": 1.3133, "step": 13142 }, { "epoch": 0.1707872906863231, "grad_norm": 0.399179071187973, "learning_rate": 0.00016587686348925774, "loss": 1.5437, "step": 13143 }, { "epoch": 0.17080028523023896, "grad_norm": 0.45547887682914734, "learning_rate": 0.00016587426402734634, "loss": 1.5763, "step": 13144 }, { "epoch": 0.17081327977415484, "grad_norm": 0.33724868297576904, "learning_rate": 0.00016587166456543496, "loss": 1.575, "step": 13145 }, { "epoch": 0.1708262743180707, "grad_norm": 0.281260222196579, "learning_rate": 0.00016586906510352359, "loss": 1.3803, "step": 13146 }, { "epoch": 0.17083926886198658, "grad_norm": 0.46044909954071045, "learning_rate": 0.00016586646564161218, "loss": 1.5038, "step": 13147 }, { "epoch": 0.17085226340590245, "grad_norm": 0.44409069418907166, "learning_rate": 0.0001658638661797008, "loss": 1.4848, "step": 13148 }, { "epoch": 0.17086525794981833, "grad_norm": 0.41668617725372314, "learning_rate": 0.00016586126671778943, "loss": 1.4433, "step": 13149 }, { "epoch": 0.1708782524937342, "grad_norm": 0.36708563566207886, "learning_rate": 0.00016585866725587803, "loss": 1.517, "step": 13150 }, { "epoch": 0.17089124703765007, "grad_norm": 0.49574822187423706, "learning_rate": 0.00016585606779396665, "loss": 1.51, "step": 13151 }, { "epoch": 0.17090424158156595, "grad_norm": 0.4861152470111847, "learning_rate": 0.00016585346833205525, "loss": 1.4301, "step": 13152 }, { "epoch": 0.17091723612548182, "grad_norm": 0.3129928708076477, "learning_rate": 0.0001658508688701439, "loss": 1.4281, "step": 13153 }, { "epoch": 0.1709302306693977, "grad_norm": 0.4674815237522125, "learning_rate": 0.0001658482694082325, "loss": 1.3359, "step": 13154 }, { "epoch": 0.17094322521331357, "grad_norm": 0.43875250220298767, "learning_rate": 0.00016584566994632112, "loss": 1.3378, "step": 13155 }, { "epoch": 0.17095621975722944, "grad_norm": 0.32091060280799866, "learning_rate": 0.00016584307048440975, "loss": 1.4266, "step": 13156 }, { "epoch": 0.1709692143011453, "grad_norm": 0.33862367272377014, "learning_rate": 0.00016584047102249835, "loss": 1.2224, "step": 13157 }, { "epoch": 0.17098220884506118, "grad_norm": 0.42534947395324707, "learning_rate": 0.00016583787156058697, "loss": 1.4238, "step": 13158 }, { "epoch": 0.17099520338897706, "grad_norm": 0.3321935534477234, "learning_rate": 0.00016583527209867557, "loss": 1.34, "step": 13159 }, { "epoch": 0.17100819793289293, "grad_norm": 0.45860472321510315, "learning_rate": 0.00016583267263676422, "loss": 1.5475, "step": 13160 }, { "epoch": 0.1710211924768088, "grad_norm": 0.42171356081962585, "learning_rate": 0.00016583007317485282, "loss": 1.2746, "step": 13161 }, { "epoch": 0.17103418702072468, "grad_norm": 0.3615332543849945, "learning_rate": 0.00016582747371294141, "loss": 1.5291, "step": 13162 }, { "epoch": 0.17104718156464055, "grad_norm": 0.3345520794391632, "learning_rate": 0.00016582487425103004, "loss": 1.2914, "step": 13163 }, { "epoch": 0.17106017610855642, "grad_norm": 0.36885762214660645, "learning_rate": 0.00016582227478911866, "loss": 1.4094, "step": 13164 }, { "epoch": 0.1710731706524723, "grad_norm": 0.40950146317481995, "learning_rate": 0.0001658196753272073, "loss": 1.3136, "step": 13165 }, { "epoch": 0.17108616519638817, "grad_norm": 0.4001840054988861, "learning_rate": 0.00016581707586529589, "loss": 1.4957, "step": 13166 }, { "epoch": 0.17109915974030404, "grad_norm": 0.361457884311676, "learning_rate": 0.0001658144764033845, "loss": 1.6166, "step": 13167 }, { "epoch": 0.17111215428421991, "grad_norm": 0.3711101710796356, "learning_rate": 0.00016581187694147313, "loss": 1.6462, "step": 13168 }, { "epoch": 0.1711251488281358, "grad_norm": 0.4070250391960144, "learning_rate": 0.00016580927747956173, "loss": 1.3827, "step": 13169 }, { "epoch": 0.17113814337205166, "grad_norm": 0.35707810521125793, "learning_rate": 0.00016580667801765036, "loss": 1.4192, "step": 13170 }, { "epoch": 0.17115113791596753, "grad_norm": 0.5058854818344116, "learning_rate": 0.00016580407855573895, "loss": 1.5332, "step": 13171 }, { "epoch": 0.1711641324598834, "grad_norm": 0.40400612354278564, "learning_rate": 0.0001658014790938276, "loss": 1.5948, "step": 13172 }, { "epoch": 0.17117712700379928, "grad_norm": 0.4434622526168823, "learning_rate": 0.0001657988796319162, "loss": 1.4775, "step": 13173 }, { "epoch": 0.17119012154771515, "grad_norm": 0.3293512165546417, "learning_rate": 0.00016579628017000483, "loss": 1.4274, "step": 13174 }, { "epoch": 0.17120311609163102, "grad_norm": 0.3103547692298889, "learning_rate": 0.00016579368070809342, "loss": 1.4477, "step": 13175 }, { "epoch": 0.1712161106355469, "grad_norm": 0.4046972692012787, "learning_rate": 0.00016579108124618205, "loss": 1.3526, "step": 13176 }, { "epoch": 0.17122910517946277, "grad_norm": 0.35560137033462524, "learning_rate": 0.00016578848178427067, "loss": 1.5781, "step": 13177 }, { "epoch": 0.17124209972337864, "grad_norm": 0.44884198904037476, "learning_rate": 0.00016578588232235927, "loss": 1.4761, "step": 13178 }, { "epoch": 0.17125509426729452, "grad_norm": 0.42357537150382996, "learning_rate": 0.0001657832828604479, "loss": 1.5649, "step": 13179 }, { "epoch": 0.1712680888112104, "grad_norm": 0.35290294885635376, "learning_rate": 0.00016578068339853652, "loss": 1.2905, "step": 13180 }, { "epoch": 0.17128108335512626, "grad_norm": 0.37031111121177673, "learning_rate": 0.00016577808393662512, "loss": 1.5097, "step": 13181 }, { "epoch": 0.17129407789904214, "grad_norm": 0.5222713947296143, "learning_rate": 0.00016577548447471374, "loss": 1.4259, "step": 13182 }, { "epoch": 0.171307072442958, "grad_norm": 0.3635243773460388, "learning_rate": 0.00016577288501280234, "loss": 1.3436, "step": 13183 }, { "epoch": 0.17132006698687388, "grad_norm": 0.4176403284072876, "learning_rate": 0.000165770285550891, "loss": 1.5, "step": 13184 }, { "epoch": 0.17133306153078975, "grad_norm": 0.4752853214740753, "learning_rate": 0.0001657676860889796, "loss": 1.3882, "step": 13185 }, { "epoch": 0.17134605607470563, "grad_norm": 0.40398016571998596, "learning_rate": 0.0001657650866270682, "loss": 1.403, "step": 13186 }, { "epoch": 0.1713590506186215, "grad_norm": 0.3192632496356964, "learning_rate": 0.0001657624871651568, "loss": 1.2473, "step": 13187 }, { "epoch": 0.17137204516253737, "grad_norm": 0.40304452180862427, "learning_rate": 0.00016575988770324543, "loss": 1.2444, "step": 13188 }, { "epoch": 0.17138503970645325, "grad_norm": 0.4249211847782135, "learning_rate": 0.00016575728824133406, "loss": 1.4169, "step": 13189 }, { "epoch": 0.17139803425036912, "grad_norm": 0.309519499540329, "learning_rate": 0.00016575468877942266, "loss": 1.2616, "step": 13190 }, { "epoch": 0.171411028794285, "grad_norm": 0.41727471351623535, "learning_rate": 0.00016575208931751128, "loss": 1.3114, "step": 13191 }, { "epoch": 0.17142402333820086, "grad_norm": 0.4680091142654419, "learning_rate": 0.0001657494898555999, "loss": 1.3036, "step": 13192 }, { "epoch": 0.17143701788211674, "grad_norm": 0.5041329264640808, "learning_rate": 0.0001657468903936885, "loss": 1.3392, "step": 13193 }, { "epoch": 0.1714500124260326, "grad_norm": 0.36246755719184875, "learning_rate": 0.00016574429093177713, "loss": 1.5403, "step": 13194 }, { "epoch": 0.17146300696994848, "grad_norm": 0.36042118072509766, "learning_rate": 0.00016574169146986575, "loss": 1.4594, "step": 13195 }, { "epoch": 0.17147600151386436, "grad_norm": 0.4207163453102112, "learning_rate": 0.00016573909200795438, "loss": 1.556, "step": 13196 }, { "epoch": 0.17148899605778023, "grad_norm": 0.48187360167503357, "learning_rate": 0.00016573649254604297, "loss": 1.6193, "step": 13197 }, { "epoch": 0.1715019906016961, "grad_norm": 0.28130674362182617, "learning_rate": 0.0001657338930841316, "loss": 1.4549, "step": 13198 }, { "epoch": 0.17151498514561198, "grad_norm": 0.32879185676574707, "learning_rate": 0.00016573129362222022, "loss": 1.3952, "step": 13199 }, { "epoch": 0.17152797968952785, "grad_norm": 0.43828660249710083, "learning_rate": 0.00016572869416030882, "loss": 1.7164, "step": 13200 }, { "epoch": 0.17154097423344372, "grad_norm": 0.2906350791454315, "learning_rate": 0.00016572609469839744, "loss": 1.4835, "step": 13201 }, { "epoch": 0.1715539687773596, "grad_norm": 0.354322612285614, "learning_rate": 0.00016572349523648604, "loss": 1.4717, "step": 13202 }, { "epoch": 0.17156696332127547, "grad_norm": 0.4343889057636261, "learning_rate": 0.0001657208957745747, "loss": 1.5283, "step": 13203 }, { "epoch": 0.17157995786519134, "grad_norm": 0.40784355998039246, "learning_rate": 0.0001657182963126633, "loss": 1.2878, "step": 13204 }, { "epoch": 0.1715929524091072, "grad_norm": 0.4645921289920807, "learning_rate": 0.0001657156968507519, "loss": 1.3383, "step": 13205 }, { "epoch": 0.1716059469530231, "grad_norm": 0.24410507082939148, "learning_rate": 0.0001657130973888405, "loss": 1.3735, "step": 13206 }, { "epoch": 0.17161894149693896, "grad_norm": 0.25429943203926086, "learning_rate": 0.00016571049792692914, "loss": 1.2981, "step": 13207 }, { "epoch": 0.17163193604085483, "grad_norm": 0.4111470878124237, "learning_rate": 0.00016570789846501776, "loss": 1.5104, "step": 13208 }, { "epoch": 0.17164493058477073, "grad_norm": 0.43327146768569946, "learning_rate": 0.00016570529900310636, "loss": 1.5432, "step": 13209 }, { "epoch": 0.1716579251286866, "grad_norm": 0.39463886618614197, "learning_rate": 0.00016570269954119498, "loss": 1.3528, "step": 13210 }, { "epoch": 0.17167091967260248, "grad_norm": 0.35991427302360535, "learning_rate": 0.0001657001000792836, "loss": 1.441, "step": 13211 }, { "epoch": 0.17168391421651835, "grad_norm": 0.232219859957695, "learning_rate": 0.0001656975006173722, "loss": 1.2687, "step": 13212 }, { "epoch": 0.17169690876043422, "grad_norm": 0.43521979451179504, "learning_rate": 0.00016569490115546083, "loss": 1.4167, "step": 13213 }, { "epoch": 0.1717099033043501, "grad_norm": 0.33573973178863525, "learning_rate": 0.00016569230169354943, "loss": 1.3387, "step": 13214 }, { "epoch": 0.17172289784826597, "grad_norm": 0.3633989989757538, "learning_rate": 0.00016568970223163808, "loss": 1.3057, "step": 13215 }, { "epoch": 0.17173589239218184, "grad_norm": 0.39607352018356323, "learning_rate": 0.00016568710276972668, "loss": 1.3566, "step": 13216 }, { "epoch": 0.17174888693609772, "grad_norm": 0.5490531325340271, "learning_rate": 0.00016568450330781527, "loss": 1.4677, "step": 13217 }, { "epoch": 0.1717618814800136, "grad_norm": 0.4028678238391876, "learning_rate": 0.0001656819038459039, "loss": 1.4905, "step": 13218 }, { "epoch": 0.17177487602392946, "grad_norm": 0.3589508533477783, "learning_rate": 0.00016567930438399252, "loss": 1.5017, "step": 13219 }, { "epoch": 0.17178787056784534, "grad_norm": 0.32853490114212036, "learning_rate": 0.00016567670492208115, "loss": 1.3615, "step": 13220 }, { "epoch": 0.1718008651117612, "grad_norm": 0.38775429129600525, "learning_rate": 0.00016567410546016974, "loss": 1.3167, "step": 13221 }, { "epoch": 0.17181385965567708, "grad_norm": 0.3832327723503113, "learning_rate": 0.00016567150599825837, "loss": 1.518, "step": 13222 }, { "epoch": 0.17182685419959295, "grad_norm": 0.47670498490333557, "learning_rate": 0.000165668906536347, "loss": 1.3783, "step": 13223 }, { "epoch": 0.17183984874350883, "grad_norm": 0.46496906876564026, "learning_rate": 0.0001656663070744356, "loss": 1.2044, "step": 13224 }, { "epoch": 0.1718528432874247, "grad_norm": 0.434190034866333, "learning_rate": 0.00016566370761252422, "loss": 1.4648, "step": 13225 }, { "epoch": 0.17186583783134057, "grad_norm": 0.4518909156322479, "learning_rate": 0.0001656611081506128, "loss": 1.4685, "step": 13226 }, { "epoch": 0.17187883237525645, "grad_norm": 0.4945921003818512, "learning_rate": 0.00016565850868870146, "loss": 1.5399, "step": 13227 }, { "epoch": 0.17189182691917232, "grad_norm": 0.3644948899745941, "learning_rate": 0.00016565590922679006, "loss": 1.3921, "step": 13228 }, { "epoch": 0.1719048214630882, "grad_norm": 0.49629640579223633, "learning_rate": 0.00016565330976487866, "loss": 1.3186, "step": 13229 }, { "epoch": 0.17191781600700407, "grad_norm": 0.24035559594631195, "learning_rate": 0.0001656507103029673, "loss": 1.1132, "step": 13230 }, { "epoch": 0.17193081055091994, "grad_norm": 0.4147299826145172, "learning_rate": 0.0001656481108410559, "loss": 1.4242, "step": 13231 }, { "epoch": 0.1719438050948358, "grad_norm": 0.45291200280189514, "learning_rate": 0.00016564551137914453, "loss": 1.5819, "step": 13232 }, { "epoch": 0.17195679963875168, "grad_norm": 0.42972901463508606, "learning_rate": 0.00016564291191723313, "loss": 1.446, "step": 13233 }, { "epoch": 0.17196979418266756, "grad_norm": 0.47062039375305176, "learning_rate": 0.00016564031245532175, "loss": 1.6251, "step": 13234 }, { "epoch": 0.17198278872658343, "grad_norm": 0.33339330554008484, "learning_rate": 0.00016563771299341038, "loss": 1.3773, "step": 13235 }, { "epoch": 0.1719957832704993, "grad_norm": 0.3569968640804291, "learning_rate": 0.00016563511353149898, "loss": 1.5002, "step": 13236 }, { "epoch": 0.17200877781441518, "grad_norm": 0.3116925060749054, "learning_rate": 0.0001656325140695876, "loss": 1.4727, "step": 13237 }, { "epoch": 0.17202177235833105, "grad_norm": 0.40232232213020325, "learning_rate": 0.00016562991460767623, "loss": 1.5291, "step": 13238 }, { "epoch": 0.17203476690224692, "grad_norm": 0.3872472941875458, "learning_rate": 0.00016562731514576485, "loss": 1.5817, "step": 13239 }, { "epoch": 0.1720477614461628, "grad_norm": 0.4164724051952362, "learning_rate": 0.00016562471568385345, "loss": 1.4418, "step": 13240 }, { "epoch": 0.17206075599007867, "grad_norm": 0.3428027331829071, "learning_rate": 0.00016562211622194207, "loss": 1.3743, "step": 13241 }, { "epoch": 0.17207375053399454, "grad_norm": 0.3754356801509857, "learning_rate": 0.0001656195167600307, "loss": 1.2897, "step": 13242 }, { "epoch": 0.1720867450779104, "grad_norm": 0.43637245893478394, "learning_rate": 0.0001656169172981193, "loss": 1.374, "step": 13243 }, { "epoch": 0.1720997396218263, "grad_norm": 0.4943617284297943, "learning_rate": 0.00016561431783620792, "loss": 1.3587, "step": 13244 }, { "epoch": 0.17211273416574216, "grad_norm": 0.3823917508125305, "learning_rate": 0.00016561171837429652, "loss": 1.3661, "step": 13245 }, { "epoch": 0.17212572870965803, "grad_norm": 0.3014433681964874, "learning_rate": 0.00016560911891238514, "loss": 1.4207, "step": 13246 }, { "epoch": 0.1721387232535739, "grad_norm": 0.43272411823272705, "learning_rate": 0.00016560651945047376, "loss": 1.3471, "step": 13247 }, { "epoch": 0.17215171779748978, "grad_norm": 0.4171760082244873, "learning_rate": 0.00016560391998856236, "loss": 1.4447, "step": 13248 }, { "epoch": 0.17216471234140565, "grad_norm": 0.44503775238990784, "learning_rate": 0.00016560132052665099, "loss": 1.3682, "step": 13249 }, { "epoch": 0.17217770688532152, "grad_norm": 0.392220139503479, "learning_rate": 0.0001655987210647396, "loss": 1.3014, "step": 13250 }, { "epoch": 0.1721907014292374, "grad_norm": 0.44181498885154724, "learning_rate": 0.00016559612160282824, "loss": 1.4563, "step": 13251 }, { "epoch": 0.17220369597315327, "grad_norm": 0.3580576181411743, "learning_rate": 0.00016559352214091683, "loss": 1.4462, "step": 13252 }, { "epoch": 0.17221669051706914, "grad_norm": 0.44580137729644775, "learning_rate": 0.00016559092267900546, "loss": 1.316, "step": 13253 }, { "epoch": 0.17222968506098502, "grad_norm": 0.40787971019744873, "learning_rate": 0.00016558832321709408, "loss": 1.3787, "step": 13254 }, { "epoch": 0.1722426796049009, "grad_norm": 0.2945028245449066, "learning_rate": 0.00016558572375518268, "loss": 1.1279, "step": 13255 }, { "epoch": 0.17225567414881676, "grad_norm": 0.4573705196380615, "learning_rate": 0.0001655831242932713, "loss": 1.5413, "step": 13256 }, { "epoch": 0.17226866869273263, "grad_norm": 0.35718896985054016, "learning_rate": 0.0001655805248313599, "loss": 1.4945, "step": 13257 }, { "epoch": 0.1722816632366485, "grad_norm": 0.35502755641937256, "learning_rate": 0.00016557792536944855, "loss": 1.3139, "step": 13258 }, { "epoch": 0.17229465778056438, "grad_norm": 0.4150936007499695, "learning_rate": 0.00016557532590753715, "loss": 1.3093, "step": 13259 }, { "epoch": 0.17230765232448025, "grad_norm": 0.4627138078212738, "learning_rate": 0.00016557272644562575, "loss": 1.4308, "step": 13260 }, { "epoch": 0.17232064686839613, "grad_norm": 0.4367901086807251, "learning_rate": 0.00016557012698371437, "loss": 1.6893, "step": 13261 }, { "epoch": 0.172333641412312, "grad_norm": 0.36571958661079407, "learning_rate": 0.000165567527521803, "loss": 1.1728, "step": 13262 }, { "epoch": 0.17234663595622787, "grad_norm": 0.4008215069770813, "learning_rate": 0.00016556492805989162, "loss": 1.2705, "step": 13263 }, { "epoch": 0.17235963050014375, "grad_norm": 0.2850292921066284, "learning_rate": 0.00016556232859798022, "loss": 1.3028, "step": 13264 }, { "epoch": 0.17237262504405962, "grad_norm": 0.29150980710983276, "learning_rate": 0.00016555972913606884, "loss": 1.4276, "step": 13265 }, { "epoch": 0.1723856195879755, "grad_norm": 0.4610038995742798, "learning_rate": 0.00016555712967415747, "loss": 1.2558, "step": 13266 }, { "epoch": 0.17239861413189136, "grad_norm": 0.37384361028671265, "learning_rate": 0.00016555453021224606, "loss": 1.6667, "step": 13267 }, { "epoch": 0.17241160867580724, "grad_norm": 0.30299925804138184, "learning_rate": 0.0001655519307503347, "loss": 1.3843, "step": 13268 }, { "epoch": 0.1724246032197231, "grad_norm": 0.4450954496860504, "learning_rate": 0.0001655493312884233, "loss": 1.3142, "step": 13269 }, { "epoch": 0.17243759776363898, "grad_norm": 0.4767892062664032, "learning_rate": 0.00016554673182651194, "loss": 1.5149, "step": 13270 }, { "epoch": 0.17245059230755486, "grad_norm": 0.3988877236843109, "learning_rate": 0.00016554413236460053, "loss": 1.3396, "step": 13271 }, { "epoch": 0.17246358685147073, "grad_norm": 0.45012447237968445, "learning_rate": 0.00016554153290268913, "loss": 1.4525, "step": 13272 }, { "epoch": 0.1724765813953866, "grad_norm": 0.4664531648159027, "learning_rate": 0.00016553893344077778, "loss": 1.4385, "step": 13273 }, { "epoch": 0.17248957593930248, "grad_norm": 0.45344552397727966, "learning_rate": 0.00016553633397886638, "loss": 1.4004, "step": 13274 }, { "epoch": 0.17250257048321835, "grad_norm": 0.4349400997161865, "learning_rate": 0.000165533734516955, "loss": 1.3669, "step": 13275 }, { "epoch": 0.17251556502713422, "grad_norm": 0.4356987774372101, "learning_rate": 0.0001655311350550436, "loss": 1.4662, "step": 13276 }, { "epoch": 0.1725285595710501, "grad_norm": 0.4695001542568207, "learning_rate": 0.00016552853559313223, "loss": 1.4871, "step": 13277 }, { "epoch": 0.17254155411496597, "grad_norm": 0.5414415001869202, "learning_rate": 0.00016552593613122085, "loss": 1.5076, "step": 13278 }, { "epoch": 0.17255454865888184, "grad_norm": 0.39165061712265015, "learning_rate": 0.00016552333666930945, "loss": 1.6107, "step": 13279 }, { "epoch": 0.1725675432027977, "grad_norm": 0.5397108197212219, "learning_rate": 0.00016552073720739807, "loss": 1.3159, "step": 13280 }, { "epoch": 0.17258053774671359, "grad_norm": 0.5476521253585815, "learning_rate": 0.0001655181377454867, "loss": 1.3692, "step": 13281 }, { "epoch": 0.17259353229062946, "grad_norm": 0.40746042132377625, "learning_rate": 0.00016551553828357532, "loss": 1.3033, "step": 13282 }, { "epoch": 0.17260652683454533, "grad_norm": 0.39313873648643494, "learning_rate": 0.00016551293882166392, "loss": 1.4005, "step": 13283 }, { "epoch": 0.1726195213784612, "grad_norm": 0.3358429968357086, "learning_rate": 0.00016551033935975252, "loss": 1.4321, "step": 13284 }, { "epoch": 0.1726325159223771, "grad_norm": 0.322401225566864, "learning_rate": 0.00016550773989784117, "loss": 1.5043, "step": 13285 }, { "epoch": 0.17264551046629298, "grad_norm": 0.4391809403896332, "learning_rate": 0.00016550514043592977, "loss": 1.4891, "step": 13286 }, { "epoch": 0.17265850501020885, "grad_norm": 0.44552457332611084, "learning_rate": 0.0001655025409740184, "loss": 1.4174, "step": 13287 }, { "epoch": 0.17267149955412472, "grad_norm": 0.43498408794403076, "learning_rate": 0.000165499941512107, "loss": 1.4961, "step": 13288 }, { "epoch": 0.1726844940980406, "grad_norm": 0.3670695126056671, "learning_rate": 0.0001654973420501956, "loss": 1.3749, "step": 13289 }, { "epoch": 0.17269748864195647, "grad_norm": 0.38664329051971436, "learning_rate": 0.00016549474258828424, "loss": 1.4293, "step": 13290 }, { "epoch": 0.17271048318587234, "grad_norm": 0.41639307141304016, "learning_rate": 0.00016549214312637283, "loss": 1.3655, "step": 13291 }, { "epoch": 0.17272347772978822, "grad_norm": 0.27676376700401306, "learning_rate": 0.00016548954366446146, "loss": 1.2425, "step": 13292 }, { "epoch": 0.1727364722737041, "grad_norm": 0.34870126843452454, "learning_rate": 0.00016548694420255008, "loss": 1.3445, "step": 13293 }, { "epoch": 0.17274946681761996, "grad_norm": 0.40599924325942993, "learning_rate": 0.0001654843447406387, "loss": 1.422, "step": 13294 }, { "epoch": 0.17276246136153584, "grad_norm": 0.3603878915309906, "learning_rate": 0.0001654817452787273, "loss": 1.5414, "step": 13295 }, { "epoch": 0.1727754559054517, "grad_norm": 0.37636372447013855, "learning_rate": 0.00016547914581681593, "loss": 1.4838, "step": 13296 }, { "epoch": 0.17278845044936758, "grad_norm": 0.3460656702518463, "learning_rate": 0.00016547654635490455, "loss": 1.3337, "step": 13297 }, { "epoch": 0.17280144499328345, "grad_norm": 0.45613473653793335, "learning_rate": 0.00016547394689299315, "loss": 1.5599, "step": 13298 }, { "epoch": 0.17281443953719933, "grad_norm": 0.36926427483558655, "learning_rate": 0.00016547134743108178, "loss": 1.4375, "step": 13299 }, { "epoch": 0.1728274340811152, "grad_norm": 0.37286102771759033, "learning_rate": 0.00016546874796917037, "loss": 1.1608, "step": 13300 }, { "epoch": 0.17284042862503107, "grad_norm": 0.3439924120903015, "learning_rate": 0.000165466148507259, "loss": 1.2277, "step": 13301 }, { "epoch": 0.17285342316894695, "grad_norm": 0.4017575681209564, "learning_rate": 0.00016546354904534762, "loss": 1.5146, "step": 13302 }, { "epoch": 0.17286641771286282, "grad_norm": 0.3311365842819214, "learning_rate": 0.00016546094958343622, "loss": 1.626, "step": 13303 }, { "epoch": 0.1728794122567787, "grad_norm": 0.4362022578716278, "learning_rate": 0.00016545835012152487, "loss": 1.4609, "step": 13304 }, { "epoch": 0.17289240680069456, "grad_norm": 0.4214492738246918, "learning_rate": 0.00016545575065961347, "loss": 1.4682, "step": 13305 }, { "epoch": 0.17290540134461044, "grad_norm": 0.4576879143714905, "learning_rate": 0.0001654531511977021, "loss": 1.3129, "step": 13306 }, { "epoch": 0.1729183958885263, "grad_norm": 0.422690212726593, "learning_rate": 0.0001654505517357907, "loss": 1.5334, "step": 13307 }, { "epoch": 0.17293139043244218, "grad_norm": 0.32679685950279236, "learning_rate": 0.00016544795227387932, "loss": 1.2894, "step": 13308 }, { "epoch": 0.17294438497635806, "grad_norm": 0.32620567083358765, "learning_rate": 0.00016544535281196794, "loss": 1.2126, "step": 13309 }, { "epoch": 0.17295737952027393, "grad_norm": 0.46092817187309265, "learning_rate": 0.00016544275335005654, "loss": 1.4209, "step": 13310 }, { "epoch": 0.1729703740641898, "grad_norm": 0.3583175241947174, "learning_rate": 0.00016544015388814516, "loss": 1.3962, "step": 13311 }, { "epoch": 0.17298336860810568, "grad_norm": 0.31701523065567017, "learning_rate": 0.0001654375544262338, "loss": 1.1837, "step": 13312 }, { "epoch": 0.17299636315202155, "grad_norm": 0.4170938730239868, "learning_rate": 0.00016543495496432238, "loss": 1.3438, "step": 13313 }, { "epoch": 0.17300935769593742, "grad_norm": 0.3459280729293823, "learning_rate": 0.000165432355502411, "loss": 1.4976, "step": 13314 }, { "epoch": 0.1730223522398533, "grad_norm": 0.4367409944534302, "learning_rate": 0.0001654297560404996, "loss": 1.4108, "step": 13315 }, { "epoch": 0.17303534678376917, "grad_norm": 0.399282306432724, "learning_rate": 0.00016542715657858826, "loss": 1.6142, "step": 13316 }, { "epoch": 0.17304834132768504, "grad_norm": 0.3932700455188751, "learning_rate": 0.00016542455711667685, "loss": 1.3773, "step": 13317 }, { "epoch": 0.1730613358716009, "grad_norm": 0.41390523314476013, "learning_rate": 0.00016542195765476548, "loss": 1.3757, "step": 13318 }, { "epoch": 0.17307433041551679, "grad_norm": 0.48950520157814026, "learning_rate": 0.00016541935819285408, "loss": 1.5238, "step": 13319 }, { "epoch": 0.17308732495943266, "grad_norm": 0.3633231520652771, "learning_rate": 0.0001654167587309427, "loss": 1.4904, "step": 13320 }, { "epoch": 0.17310031950334853, "grad_norm": 0.3302246034145355, "learning_rate": 0.00016541415926903133, "loss": 1.3426, "step": 13321 }, { "epoch": 0.1731133140472644, "grad_norm": 0.36626505851745605, "learning_rate": 0.00016541155980711992, "loss": 1.3724, "step": 13322 }, { "epoch": 0.17312630859118028, "grad_norm": 0.3938538134098053, "learning_rate": 0.00016540896034520855, "loss": 1.3163, "step": 13323 }, { "epoch": 0.17313930313509615, "grad_norm": 0.3940238952636719, "learning_rate": 0.00016540636088329717, "loss": 1.7296, "step": 13324 }, { "epoch": 0.17315229767901202, "grad_norm": 0.38923120498657227, "learning_rate": 0.0001654037614213858, "loss": 1.5316, "step": 13325 }, { "epoch": 0.1731652922229279, "grad_norm": 0.4201051890850067, "learning_rate": 0.0001654011619594744, "loss": 1.542, "step": 13326 }, { "epoch": 0.17317828676684377, "grad_norm": 0.4189986288547516, "learning_rate": 0.000165398562497563, "loss": 1.4463, "step": 13327 }, { "epoch": 0.17319128131075964, "grad_norm": 0.4199727177619934, "learning_rate": 0.00016539596303565164, "loss": 1.392, "step": 13328 }, { "epoch": 0.17320427585467552, "grad_norm": 0.4013659954071045, "learning_rate": 0.00016539336357374024, "loss": 1.4156, "step": 13329 }, { "epoch": 0.1732172703985914, "grad_norm": 0.44378623366355896, "learning_rate": 0.00016539076411182886, "loss": 1.3054, "step": 13330 }, { "epoch": 0.17323026494250726, "grad_norm": 0.43498170375823975, "learning_rate": 0.00016538816464991746, "loss": 1.3975, "step": 13331 }, { "epoch": 0.17324325948642313, "grad_norm": 0.4229772388935089, "learning_rate": 0.00016538556518800609, "loss": 1.5115, "step": 13332 }, { "epoch": 0.173256254030339, "grad_norm": 0.4326575994491577, "learning_rate": 0.0001653829657260947, "loss": 1.6879, "step": 13333 }, { "epoch": 0.17326924857425488, "grad_norm": 0.41954946517944336, "learning_rate": 0.0001653803662641833, "loss": 1.3056, "step": 13334 }, { "epoch": 0.17328224311817075, "grad_norm": 0.47150903940200806, "learning_rate": 0.00016537776680227193, "loss": 1.598, "step": 13335 }, { "epoch": 0.17329523766208663, "grad_norm": 0.45423462986946106, "learning_rate": 0.00016537516734036056, "loss": 1.5666, "step": 13336 }, { "epoch": 0.1733082322060025, "grad_norm": 0.46473002433776855, "learning_rate": 0.00016537256787844918, "loss": 1.4679, "step": 13337 }, { "epoch": 0.17332122674991837, "grad_norm": 0.44201385974884033, "learning_rate": 0.00016536996841653778, "loss": 1.4588, "step": 13338 }, { "epoch": 0.17333422129383425, "grad_norm": 0.6297826170921326, "learning_rate": 0.00016536736895462638, "loss": 1.4134, "step": 13339 }, { "epoch": 0.17334721583775012, "grad_norm": 0.4003432095050812, "learning_rate": 0.00016536476949271503, "loss": 1.4075, "step": 13340 }, { "epoch": 0.173360210381666, "grad_norm": 0.40075111389160156, "learning_rate": 0.00016536217003080363, "loss": 1.3696, "step": 13341 }, { "epoch": 0.17337320492558186, "grad_norm": 0.4209158718585968, "learning_rate": 0.00016535957056889225, "loss": 1.4707, "step": 13342 }, { "epoch": 0.17338619946949774, "grad_norm": 0.4144658148288727, "learning_rate": 0.00016535697110698087, "loss": 1.3246, "step": 13343 }, { "epoch": 0.1733991940134136, "grad_norm": 0.3935602903366089, "learning_rate": 0.00016535437164506947, "loss": 1.3723, "step": 13344 }, { "epoch": 0.17341218855732948, "grad_norm": 0.3846154510974884, "learning_rate": 0.0001653517721831581, "loss": 1.7502, "step": 13345 }, { "epoch": 0.17342518310124536, "grad_norm": 0.41360217332839966, "learning_rate": 0.0001653491727212467, "loss": 1.3585, "step": 13346 }, { "epoch": 0.17343817764516123, "grad_norm": 0.4347723722457886, "learning_rate": 0.00016534657325933535, "loss": 1.3222, "step": 13347 }, { "epoch": 0.1734511721890771, "grad_norm": 0.5524615049362183, "learning_rate": 0.00016534397379742394, "loss": 1.5136, "step": 13348 }, { "epoch": 0.17346416673299297, "grad_norm": 0.3776842951774597, "learning_rate": 0.00016534137433551257, "loss": 1.2762, "step": 13349 }, { "epoch": 0.17347716127690885, "grad_norm": 0.39902234077453613, "learning_rate": 0.00016533877487360116, "loss": 1.5468, "step": 13350 }, { "epoch": 0.17349015582082472, "grad_norm": 0.3801536560058594, "learning_rate": 0.0001653361754116898, "loss": 1.4054, "step": 13351 }, { "epoch": 0.1735031503647406, "grad_norm": 0.34295907616615295, "learning_rate": 0.0001653335759497784, "loss": 1.3759, "step": 13352 }, { "epoch": 0.17351614490865647, "grad_norm": 0.49465489387512207, "learning_rate": 0.000165330976487867, "loss": 1.3806, "step": 13353 }, { "epoch": 0.17352913945257234, "grad_norm": 0.33638831973075867, "learning_rate": 0.00016532837702595564, "loss": 1.6792, "step": 13354 }, { "epoch": 0.1735421339964882, "grad_norm": 0.36213600635528564, "learning_rate": 0.00016532577756404426, "loss": 1.5455, "step": 13355 }, { "epoch": 0.17355512854040409, "grad_norm": 0.37444978952407837, "learning_rate": 0.00016532317810213286, "loss": 1.4258, "step": 13356 }, { "epoch": 0.17356812308431996, "grad_norm": 0.371562659740448, "learning_rate": 0.00016532057864022148, "loss": 1.4555, "step": 13357 }, { "epoch": 0.17358111762823583, "grad_norm": 0.4598786234855652, "learning_rate": 0.00016531797917831008, "loss": 1.4655, "step": 13358 }, { "epoch": 0.1735941121721517, "grad_norm": 0.3862901031970978, "learning_rate": 0.00016531537971639873, "loss": 1.5347, "step": 13359 }, { "epoch": 0.17360710671606758, "grad_norm": 0.3729879856109619, "learning_rate": 0.00016531278025448733, "loss": 1.4025, "step": 13360 }, { "epoch": 0.17362010125998348, "grad_norm": 0.30724814534187317, "learning_rate": 0.00016531018079257595, "loss": 1.3901, "step": 13361 }, { "epoch": 0.17363309580389935, "grad_norm": 0.3591684401035309, "learning_rate": 0.00016530758133066455, "loss": 1.2564, "step": 13362 }, { "epoch": 0.17364609034781522, "grad_norm": 0.37339314818382263, "learning_rate": 0.00016530498186875317, "loss": 1.4111, "step": 13363 }, { "epoch": 0.1736590848917311, "grad_norm": 0.34402546286582947, "learning_rate": 0.0001653023824068418, "loss": 1.4524, "step": 13364 }, { "epoch": 0.17367207943564697, "grad_norm": 0.45641106367111206, "learning_rate": 0.0001652997829449304, "loss": 1.4755, "step": 13365 }, { "epoch": 0.17368507397956284, "grad_norm": 0.34375521540641785, "learning_rate": 0.00016529718348301902, "loss": 1.393, "step": 13366 }, { "epoch": 0.17369806852347872, "grad_norm": 0.3787298798561096, "learning_rate": 0.00016529458402110765, "loss": 1.2405, "step": 13367 }, { "epoch": 0.1737110630673946, "grad_norm": 0.3178604245185852, "learning_rate": 0.00016529198455919624, "loss": 1.289, "step": 13368 }, { "epoch": 0.17372405761131046, "grad_norm": 0.46018078923225403, "learning_rate": 0.00016528938509728487, "loss": 1.5789, "step": 13369 }, { "epoch": 0.17373705215522633, "grad_norm": 0.43831494450569153, "learning_rate": 0.00016528678563537346, "loss": 1.4416, "step": 13370 }, { "epoch": 0.1737500466991422, "grad_norm": 0.30505529046058655, "learning_rate": 0.00016528418617346212, "loss": 1.2265, "step": 13371 }, { "epoch": 0.17376304124305808, "grad_norm": 0.4703518748283386, "learning_rate": 0.0001652815867115507, "loss": 1.2355, "step": 13372 }, { "epoch": 0.17377603578697395, "grad_norm": 0.40107858180999756, "learning_rate": 0.00016527898724963934, "loss": 1.4418, "step": 13373 }, { "epoch": 0.17378903033088983, "grad_norm": 0.3823404014110565, "learning_rate": 0.00016527638778772794, "loss": 1.3932, "step": 13374 }, { "epoch": 0.1738020248748057, "grad_norm": 0.4739859700202942, "learning_rate": 0.00016527378832581656, "loss": 1.2348, "step": 13375 }, { "epoch": 0.17381501941872157, "grad_norm": 0.34157299995422363, "learning_rate": 0.00016527118886390518, "loss": 1.3998, "step": 13376 }, { "epoch": 0.17382801396263745, "grad_norm": 0.3854232132434845, "learning_rate": 0.00016526858940199378, "loss": 1.5666, "step": 13377 }, { "epoch": 0.17384100850655332, "grad_norm": 0.3532569706439972, "learning_rate": 0.00016526598994008243, "loss": 1.3913, "step": 13378 }, { "epoch": 0.1738540030504692, "grad_norm": 0.4467248022556305, "learning_rate": 0.00016526339047817103, "loss": 1.4405, "step": 13379 }, { "epoch": 0.17386699759438506, "grad_norm": 0.3107064366340637, "learning_rate": 0.00016526079101625966, "loss": 1.3317, "step": 13380 }, { "epoch": 0.17387999213830094, "grad_norm": 0.43914172053337097, "learning_rate": 0.00016525819155434825, "loss": 1.493, "step": 13381 }, { "epoch": 0.1738929866822168, "grad_norm": 0.39585360884666443, "learning_rate": 0.00016525559209243688, "loss": 1.2851, "step": 13382 }, { "epoch": 0.17390598122613268, "grad_norm": 0.3452441990375519, "learning_rate": 0.0001652529926305255, "loss": 1.4804, "step": 13383 }, { "epoch": 0.17391897577004856, "grad_norm": 0.37664923071861267, "learning_rate": 0.0001652503931686141, "loss": 1.3659, "step": 13384 }, { "epoch": 0.17393197031396443, "grad_norm": 0.36215031147003174, "learning_rate": 0.00016524779370670272, "loss": 1.542, "step": 13385 }, { "epoch": 0.1739449648578803, "grad_norm": 0.3891845643520355, "learning_rate": 0.00016524519424479135, "loss": 1.3174, "step": 13386 }, { "epoch": 0.17395795940179618, "grad_norm": 0.3435726761817932, "learning_rate": 0.00016524259478287995, "loss": 1.4056, "step": 13387 }, { "epoch": 0.17397095394571205, "grad_norm": 0.36685875058174133, "learning_rate": 0.00016523999532096857, "loss": 1.2921, "step": 13388 }, { "epoch": 0.17398394848962792, "grad_norm": 0.4003696143627167, "learning_rate": 0.00016523739585905717, "loss": 1.4938, "step": 13389 }, { "epoch": 0.1739969430335438, "grad_norm": 0.48055654764175415, "learning_rate": 0.00016523479639714582, "loss": 1.5115, "step": 13390 }, { "epoch": 0.17400993757745967, "grad_norm": 0.41080594062805176, "learning_rate": 0.00016523219693523442, "loss": 1.3036, "step": 13391 }, { "epoch": 0.17402293212137554, "grad_norm": 0.4288446605205536, "learning_rate": 0.00016522959747332304, "loss": 1.3061, "step": 13392 }, { "epoch": 0.1740359266652914, "grad_norm": 0.34696251153945923, "learning_rate": 0.00016522699801141164, "loss": 1.2724, "step": 13393 }, { "epoch": 0.17404892120920729, "grad_norm": 0.4945676028728485, "learning_rate": 0.00016522439854950026, "loss": 1.4038, "step": 13394 }, { "epoch": 0.17406191575312316, "grad_norm": 0.36798331141471863, "learning_rate": 0.0001652217990875889, "loss": 1.3993, "step": 13395 }, { "epoch": 0.17407491029703903, "grad_norm": 0.3245902955532074, "learning_rate": 0.00016521919962567748, "loss": 1.2825, "step": 13396 }, { "epoch": 0.1740879048409549, "grad_norm": 0.42146414518356323, "learning_rate": 0.0001652166001637661, "loss": 1.3793, "step": 13397 }, { "epoch": 0.17410089938487078, "grad_norm": 0.547269344329834, "learning_rate": 0.00016521400070185473, "loss": 1.3652, "step": 13398 }, { "epoch": 0.17411389392878665, "grad_norm": 0.3122113049030304, "learning_rate": 0.00016521140123994333, "loss": 1.501, "step": 13399 }, { "epoch": 0.17412688847270252, "grad_norm": 0.3969956338405609, "learning_rate": 0.00016520880177803196, "loss": 1.4332, "step": 13400 }, { "epoch": 0.1741398830166184, "grad_norm": 0.4108419120311737, "learning_rate": 0.00016520620231612055, "loss": 1.303, "step": 13401 }, { "epoch": 0.17415287756053427, "grad_norm": 0.33740755915641785, "learning_rate": 0.0001652036028542092, "loss": 1.4279, "step": 13402 }, { "epoch": 0.17416587210445014, "grad_norm": 0.5096627473831177, "learning_rate": 0.0001652010033922978, "loss": 1.4009, "step": 13403 }, { "epoch": 0.17417886664836602, "grad_norm": 0.380125492811203, "learning_rate": 0.00016519840393038643, "loss": 1.4589, "step": 13404 }, { "epoch": 0.1741918611922819, "grad_norm": 0.412034273147583, "learning_rate": 0.00016519580446847502, "loss": 1.4604, "step": 13405 }, { "epoch": 0.17420485573619776, "grad_norm": 0.3821055293083191, "learning_rate": 0.00016519320500656365, "loss": 1.4195, "step": 13406 }, { "epoch": 0.17421785028011363, "grad_norm": 0.4046683609485626, "learning_rate": 0.00016519060554465227, "loss": 1.2702, "step": 13407 }, { "epoch": 0.1742308448240295, "grad_norm": 0.35326266288757324, "learning_rate": 0.00016518800608274087, "loss": 1.304, "step": 13408 }, { "epoch": 0.17424383936794538, "grad_norm": 0.35064899921417236, "learning_rate": 0.0001651854066208295, "loss": 1.4338, "step": 13409 }, { "epoch": 0.17425683391186125, "grad_norm": 0.38213178515434265, "learning_rate": 0.00016518280715891812, "loss": 1.359, "step": 13410 }, { "epoch": 0.17426982845577713, "grad_norm": 0.28947770595550537, "learning_rate": 0.00016518020769700672, "loss": 1.5043, "step": 13411 }, { "epoch": 0.174282822999693, "grad_norm": 0.28834593296051025, "learning_rate": 0.00016517760823509534, "loss": 1.3545, "step": 13412 }, { "epoch": 0.17429581754360887, "grad_norm": 0.4509648382663727, "learning_rate": 0.00016517500877318394, "loss": 1.6549, "step": 13413 }, { "epoch": 0.17430881208752474, "grad_norm": 0.480600506067276, "learning_rate": 0.0001651724093112726, "loss": 1.4065, "step": 13414 }, { "epoch": 0.17432180663144062, "grad_norm": 0.37305060029029846, "learning_rate": 0.0001651698098493612, "loss": 1.4353, "step": 13415 }, { "epoch": 0.1743348011753565, "grad_norm": 0.4292123019695282, "learning_rate": 0.0001651672103874498, "loss": 1.3883, "step": 13416 }, { "epoch": 0.17434779571927236, "grad_norm": 0.2961142361164093, "learning_rate": 0.00016516461092553844, "loss": 1.3846, "step": 13417 }, { "epoch": 0.17436079026318824, "grad_norm": 0.45750001072883606, "learning_rate": 0.00016516201146362703, "loss": 1.5323, "step": 13418 }, { "epoch": 0.1743737848071041, "grad_norm": 0.43683168292045593, "learning_rate": 0.00016515941200171566, "loss": 1.4173, "step": 13419 }, { "epoch": 0.17438677935101998, "grad_norm": 0.46560683846473694, "learning_rate": 0.00016515681253980425, "loss": 1.5242, "step": 13420 }, { "epoch": 0.17439977389493586, "grad_norm": 0.41565653681755066, "learning_rate": 0.0001651542130778929, "loss": 1.4636, "step": 13421 }, { "epoch": 0.17441276843885173, "grad_norm": 0.3468264043331146, "learning_rate": 0.0001651516136159815, "loss": 1.2744, "step": 13422 }, { "epoch": 0.1744257629827676, "grad_norm": 0.35076987743377686, "learning_rate": 0.0001651490141540701, "loss": 1.4505, "step": 13423 }, { "epoch": 0.17443875752668347, "grad_norm": 0.38492074608802795, "learning_rate": 0.00016514641469215873, "loss": 1.4471, "step": 13424 }, { "epoch": 0.17445175207059935, "grad_norm": 0.44133260846138, "learning_rate": 0.00016514381523024735, "loss": 1.5224, "step": 13425 }, { "epoch": 0.17446474661451522, "grad_norm": 0.41571658849716187, "learning_rate": 0.00016514121576833597, "loss": 1.5849, "step": 13426 }, { "epoch": 0.1744777411584311, "grad_norm": 0.35711586475372314, "learning_rate": 0.00016513861630642457, "loss": 1.3589, "step": 13427 }, { "epoch": 0.17449073570234697, "grad_norm": 0.27254778146743774, "learning_rate": 0.0001651360168445132, "loss": 1.2383, "step": 13428 }, { "epoch": 0.17450373024626284, "grad_norm": 0.2841547131538391, "learning_rate": 0.00016513341738260182, "loss": 1.295, "step": 13429 }, { "epoch": 0.1745167247901787, "grad_norm": 0.24505028128623962, "learning_rate": 0.00016513081792069042, "loss": 1.3533, "step": 13430 }, { "epoch": 0.17452971933409459, "grad_norm": 0.46519798040390015, "learning_rate": 0.00016512821845877904, "loss": 1.5446, "step": 13431 }, { "epoch": 0.17454271387801046, "grad_norm": 0.3529621660709381, "learning_rate": 0.00016512561899686764, "loss": 1.4733, "step": 13432 }, { "epoch": 0.17455570842192633, "grad_norm": 0.4122268855571747, "learning_rate": 0.0001651230195349563, "loss": 1.5913, "step": 13433 }, { "epoch": 0.1745687029658422, "grad_norm": 0.4126298725605011, "learning_rate": 0.0001651204200730449, "loss": 1.3624, "step": 13434 }, { "epoch": 0.17458169750975808, "grad_norm": 0.4127906858921051, "learning_rate": 0.0001651178206111335, "loss": 1.5192, "step": 13435 }, { "epoch": 0.17459469205367395, "grad_norm": 0.6212459802627563, "learning_rate": 0.0001651152211492221, "loss": 1.5355, "step": 13436 }, { "epoch": 0.17460768659758985, "grad_norm": 0.5126368403434753, "learning_rate": 0.00016511262168731074, "loss": 1.4107, "step": 13437 }, { "epoch": 0.17462068114150572, "grad_norm": 0.4047090709209442, "learning_rate": 0.00016511002222539936, "loss": 1.1911, "step": 13438 }, { "epoch": 0.1746336756854216, "grad_norm": 0.343588650226593, "learning_rate": 0.00016510742276348796, "loss": 1.3364, "step": 13439 }, { "epoch": 0.17464667022933747, "grad_norm": 0.38149598240852356, "learning_rate": 0.00016510482330157658, "loss": 1.3004, "step": 13440 }, { "epoch": 0.17465966477325334, "grad_norm": 0.3995436131954193, "learning_rate": 0.0001651022238396652, "loss": 1.2263, "step": 13441 }, { "epoch": 0.17467265931716922, "grad_norm": 0.330473929643631, "learning_rate": 0.0001650996243777538, "loss": 1.2429, "step": 13442 }, { "epoch": 0.1746856538610851, "grad_norm": 0.44789496064186096, "learning_rate": 0.00016509702491584243, "loss": 1.3022, "step": 13443 }, { "epoch": 0.17469864840500096, "grad_norm": 0.2974104583263397, "learning_rate": 0.00016509442545393103, "loss": 1.2184, "step": 13444 }, { "epoch": 0.17471164294891683, "grad_norm": 0.38194137811660767, "learning_rate": 0.00016509182599201968, "loss": 1.446, "step": 13445 }, { "epoch": 0.1747246374928327, "grad_norm": 0.2928636372089386, "learning_rate": 0.00016508922653010827, "loss": 1.4253, "step": 13446 }, { "epoch": 0.17473763203674858, "grad_norm": 0.36509057879447937, "learning_rate": 0.0001650866270681969, "loss": 1.4523, "step": 13447 }, { "epoch": 0.17475062658066445, "grad_norm": 0.36516791582107544, "learning_rate": 0.0001650840276062855, "loss": 1.5678, "step": 13448 }, { "epoch": 0.17476362112458033, "grad_norm": 0.3799486756324768, "learning_rate": 0.00016508142814437412, "loss": 1.4619, "step": 13449 }, { "epoch": 0.1747766156684962, "grad_norm": 0.45014092326164246, "learning_rate": 0.00016507882868246275, "loss": 1.5263, "step": 13450 }, { "epoch": 0.17478961021241207, "grad_norm": 0.40081489086151123, "learning_rate": 0.00016507622922055134, "loss": 1.4657, "step": 13451 }, { "epoch": 0.17480260475632795, "grad_norm": 0.46514081954956055, "learning_rate": 0.00016507362975863997, "loss": 1.6489, "step": 13452 }, { "epoch": 0.17481559930024382, "grad_norm": 0.358997106552124, "learning_rate": 0.0001650710302967286, "loss": 1.6649, "step": 13453 }, { "epoch": 0.1748285938441597, "grad_norm": 0.4227268695831299, "learning_rate": 0.0001650684308348172, "loss": 1.2737, "step": 13454 }, { "epoch": 0.17484158838807556, "grad_norm": 0.3060193955898285, "learning_rate": 0.00016506583137290581, "loss": 1.3838, "step": 13455 }, { "epoch": 0.17485458293199144, "grad_norm": 0.42281782627105713, "learning_rate": 0.00016506323191099444, "loss": 1.5166, "step": 13456 }, { "epoch": 0.1748675774759073, "grad_norm": 0.439098060131073, "learning_rate": 0.00016506063244908306, "loss": 1.5466, "step": 13457 }, { "epoch": 0.17488057201982318, "grad_norm": 0.4470057487487793, "learning_rate": 0.00016505803298717166, "loss": 1.3987, "step": 13458 }, { "epoch": 0.17489356656373906, "grad_norm": 0.4449363052845001, "learning_rate": 0.00016505543352526028, "loss": 1.4259, "step": 13459 }, { "epoch": 0.17490656110765493, "grad_norm": 0.4303593635559082, "learning_rate": 0.0001650528340633489, "loss": 1.4017, "step": 13460 }, { "epoch": 0.1749195556515708, "grad_norm": 0.37148261070251465, "learning_rate": 0.0001650502346014375, "loss": 1.3952, "step": 13461 }, { "epoch": 0.17493255019548667, "grad_norm": 0.361826628446579, "learning_rate": 0.00016504763513952613, "loss": 1.4783, "step": 13462 }, { "epoch": 0.17494554473940255, "grad_norm": 0.3926715850830078, "learning_rate": 0.00016504503567761473, "loss": 1.4838, "step": 13463 }, { "epoch": 0.17495853928331842, "grad_norm": 0.3533923327922821, "learning_rate": 0.00016504243621570338, "loss": 1.4042, "step": 13464 }, { "epoch": 0.1749715338272343, "grad_norm": 0.4848467707633972, "learning_rate": 0.00016503983675379198, "loss": 1.3995, "step": 13465 }, { "epoch": 0.17498452837115017, "grad_norm": 0.35159027576446533, "learning_rate": 0.00016503723729188057, "loss": 1.5477, "step": 13466 }, { "epoch": 0.17499752291506604, "grad_norm": 0.34370023012161255, "learning_rate": 0.0001650346378299692, "loss": 1.3882, "step": 13467 }, { "epoch": 0.1750105174589819, "grad_norm": 0.49847400188446045, "learning_rate": 0.00016503203836805782, "loss": 1.4393, "step": 13468 }, { "epoch": 0.17502351200289779, "grad_norm": 0.48788490891456604, "learning_rate": 0.00016502943890614645, "loss": 1.3966, "step": 13469 }, { "epoch": 0.17503650654681366, "grad_norm": 0.3483833372592926, "learning_rate": 0.00016502683944423505, "loss": 1.3532, "step": 13470 }, { "epoch": 0.17504950109072953, "grad_norm": 0.4486258924007416, "learning_rate": 0.00016502423998232367, "loss": 1.4069, "step": 13471 }, { "epoch": 0.1750624956346454, "grad_norm": 0.40895354747772217, "learning_rate": 0.0001650216405204123, "loss": 1.4384, "step": 13472 }, { "epoch": 0.17507549017856128, "grad_norm": 0.35748904943466187, "learning_rate": 0.0001650190410585009, "loss": 1.2782, "step": 13473 }, { "epoch": 0.17508848472247715, "grad_norm": 0.3825523853302002, "learning_rate": 0.00016501644159658952, "loss": 1.687, "step": 13474 }, { "epoch": 0.17510147926639302, "grad_norm": 0.4871703088283539, "learning_rate": 0.00016501384213467811, "loss": 1.4416, "step": 13475 }, { "epoch": 0.1751144738103089, "grad_norm": 0.4077610373497009, "learning_rate": 0.00016501124267276677, "loss": 1.486, "step": 13476 }, { "epoch": 0.17512746835422477, "grad_norm": 0.43831121921539307, "learning_rate": 0.00016500864321085536, "loss": 1.338, "step": 13477 }, { "epoch": 0.17514046289814064, "grad_norm": 0.42199379205703735, "learning_rate": 0.00016500604374894396, "loss": 1.3239, "step": 13478 }, { "epoch": 0.17515345744205651, "grad_norm": 0.3701782524585724, "learning_rate": 0.00016500344428703258, "loss": 1.3909, "step": 13479 }, { "epoch": 0.1751664519859724, "grad_norm": 0.7420125603675842, "learning_rate": 0.0001650008448251212, "loss": 1.5017, "step": 13480 }, { "epoch": 0.17517944652988826, "grad_norm": 0.37119871377944946, "learning_rate": 0.00016499824536320983, "loss": 1.4445, "step": 13481 }, { "epoch": 0.17519244107380413, "grad_norm": 0.32587212324142456, "learning_rate": 0.00016499564590129843, "loss": 1.3808, "step": 13482 }, { "epoch": 0.17520543561772, "grad_norm": 0.34763771295547485, "learning_rate": 0.00016499304643938706, "loss": 1.2203, "step": 13483 }, { "epoch": 0.17521843016163588, "grad_norm": 0.4741244912147522, "learning_rate": 0.00016499044697747568, "loss": 1.379, "step": 13484 }, { "epoch": 0.17523142470555175, "grad_norm": 0.6077156662940979, "learning_rate": 0.00016498784751556428, "loss": 1.6124, "step": 13485 }, { "epoch": 0.17524441924946763, "grad_norm": 0.3056173324584961, "learning_rate": 0.0001649852480536529, "loss": 1.1708, "step": 13486 }, { "epoch": 0.1752574137933835, "grad_norm": 0.38942110538482666, "learning_rate": 0.0001649826485917415, "loss": 1.4529, "step": 13487 }, { "epoch": 0.17527040833729937, "grad_norm": 0.397161066532135, "learning_rate": 0.00016498004912983015, "loss": 1.4906, "step": 13488 }, { "epoch": 0.17528340288121524, "grad_norm": 0.38064852356910706, "learning_rate": 0.00016497744966791875, "loss": 1.3094, "step": 13489 }, { "epoch": 0.17529639742513112, "grad_norm": 0.37729570269584656, "learning_rate": 0.00016497485020600735, "loss": 1.5922, "step": 13490 }, { "epoch": 0.175309391969047, "grad_norm": 0.49073395133018494, "learning_rate": 0.000164972250744096, "loss": 1.5308, "step": 13491 }, { "epoch": 0.17532238651296286, "grad_norm": 0.4614732563495636, "learning_rate": 0.0001649696512821846, "loss": 1.483, "step": 13492 }, { "epoch": 0.17533538105687874, "grad_norm": 0.3456222712993622, "learning_rate": 0.00016496705182027322, "loss": 1.2901, "step": 13493 }, { "epoch": 0.1753483756007946, "grad_norm": 0.3475855886936188, "learning_rate": 0.00016496445235836182, "loss": 1.4711, "step": 13494 }, { "epoch": 0.17536137014471048, "grad_norm": 0.44296783208847046, "learning_rate": 0.00016496185289645044, "loss": 1.3944, "step": 13495 }, { "epoch": 0.17537436468862636, "grad_norm": 0.4115827679634094, "learning_rate": 0.00016495925343453907, "loss": 1.4954, "step": 13496 }, { "epoch": 0.17538735923254223, "grad_norm": 0.33640995621681213, "learning_rate": 0.00016495665397262766, "loss": 1.3453, "step": 13497 }, { "epoch": 0.1754003537764581, "grad_norm": 0.5466011166572571, "learning_rate": 0.0001649540545107163, "loss": 1.5637, "step": 13498 }, { "epoch": 0.17541334832037397, "grad_norm": 0.3716542422771454, "learning_rate": 0.0001649514550488049, "loss": 1.4289, "step": 13499 }, { "epoch": 0.17542634286428985, "grad_norm": 0.349242627620697, "learning_rate": 0.00016494885558689354, "loss": 1.4617, "step": 13500 }, { "epoch": 0.17543933740820572, "grad_norm": 0.36021289229393005, "learning_rate": 0.00016494625612498213, "loss": 1.1995, "step": 13501 }, { "epoch": 0.1754523319521216, "grad_norm": 0.29966408014297485, "learning_rate": 0.00016494365666307076, "loss": 1.3276, "step": 13502 }, { "epoch": 0.17546532649603747, "grad_norm": 0.4373058080673218, "learning_rate": 0.00016494105720115938, "loss": 1.3889, "step": 13503 }, { "epoch": 0.17547832103995334, "grad_norm": 0.3564509451389313, "learning_rate": 0.00016493845773924798, "loss": 1.5267, "step": 13504 }, { "epoch": 0.1754913155838692, "grad_norm": 0.3977997899055481, "learning_rate": 0.0001649358582773366, "loss": 1.4256, "step": 13505 }, { "epoch": 0.17550431012778508, "grad_norm": 0.32826700806617737, "learning_rate": 0.0001649332588154252, "loss": 1.3825, "step": 13506 }, { "epoch": 0.17551730467170096, "grad_norm": 0.4034384787082672, "learning_rate": 0.00016493065935351383, "loss": 1.2995, "step": 13507 }, { "epoch": 0.17553029921561683, "grad_norm": 0.37431657314300537, "learning_rate": 0.00016492805989160245, "loss": 1.4742, "step": 13508 }, { "epoch": 0.1755432937595327, "grad_norm": 0.40061405301094055, "learning_rate": 0.00016492546042969105, "loss": 1.49, "step": 13509 }, { "epoch": 0.17555628830344858, "grad_norm": 0.36853882670402527, "learning_rate": 0.00016492286096777967, "loss": 1.3964, "step": 13510 }, { "epoch": 0.17556928284736445, "grad_norm": 0.4105042517185211, "learning_rate": 0.0001649202615058683, "loss": 1.2653, "step": 13511 }, { "epoch": 0.17558227739128032, "grad_norm": 0.42176076769828796, "learning_rate": 0.00016491766204395692, "loss": 1.4333, "step": 13512 }, { "epoch": 0.17559527193519622, "grad_norm": 0.42144495248794556, "learning_rate": 0.00016491506258204552, "loss": 1.5783, "step": 13513 }, { "epoch": 0.1756082664791121, "grad_norm": 0.40591567754745483, "learning_rate": 0.00016491246312013414, "loss": 1.5444, "step": 13514 }, { "epoch": 0.17562126102302797, "grad_norm": 0.29232579469680786, "learning_rate": 0.00016490986365822277, "loss": 1.3192, "step": 13515 }, { "epoch": 0.17563425556694384, "grad_norm": 0.48433569073677063, "learning_rate": 0.00016490726419631137, "loss": 1.3967, "step": 13516 }, { "epoch": 0.17564725011085972, "grad_norm": 0.3832167685031891, "learning_rate": 0.0001649046647344, "loss": 1.6544, "step": 13517 }, { "epoch": 0.1756602446547756, "grad_norm": 0.48258134722709656, "learning_rate": 0.0001649020652724886, "loss": 1.3863, "step": 13518 }, { "epoch": 0.17567323919869146, "grad_norm": 0.4282076954841614, "learning_rate": 0.0001648994658105772, "loss": 1.3261, "step": 13519 }, { "epoch": 0.17568623374260733, "grad_norm": 0.3869810104370117, "learning_rate": 0.00016489686634866584, "loss": 1.3552, "step": 13520 }, { "epoch": 0.1756992282865232, "grad_norm": 0.4020592272281647, "learning_rate": 0.00016489426688675443, "loss": 1.5436, "step": 13521 }, { "epoch": 0.17571222283043908, "grad_norm": 0.347240686416626, "learning_rate": 0.00016489166742484306, "loss": 1.4601, "step": 13522 }, { "epoch": 0.17572521737435495, "grad_norm": 0.5202689170837402, "learning_rate": 0.00016488906796293168, "loss": 1.6156, "step": 13523 }, { "epoch": 0.17573821191827083, "grad_norm": 0.3712344765663147, "learning_rate": 0.0001648864685010203, "loss": 1.3057, "step": 13524 }, { "epoch": 0.1757512064621867, "grad_norm": 0.37039464712142944, "learning_rate": 0.0001648838690391089, "loss": 1.3453, "step": 13525 }, { "epoch": 0.17576420100610257, "grad_norm": 0.3911130726337433, "learning_rate": 0.00016488126957719753, "loss": 1.4548, "step": 13526 }, { "epoch": 0.17577719555001844, "grad_norm": 0.397586464881897, "learning_rate": 0.00016487867011528615, "loss": 1.3052, "step": 13527 }, { "epoch": 0.17579019009393432, "grad_norm": 0.39860421419143677, "learning_rate": 0.00016487607065337475, "loss": 1.3872, "step": 13528 }, { "epoch": 0.1758031846378502, "grad_norm": 0.47928884625434875, "learning_rate": 0.00016487347119146338, "loss": 1.3522, "step": 13529 }, { "epoch": 0.17581617918176606, "grad_norm": 0.34829577803611755, "learning_rate": 0.000164870871729552, "loss": 1.471, "step": 13530 }, { "epoch": 0.17582917372568194, "grad_norm": 0.4381202459335327, "learning_rate": 0.00016486827226764062, "loss": 1.4197, "step": 13531 }, { "epoch": 0.1758421682695978, "grad_norm": 0.408735990524292, "learning_rate": 0.00016486567280572922, "loss": 1.4568, "step": 13532 }, { "epoch": 0.17585516281351368, "grad_norm": 0.3802216053009033, "learning_rate": 0.00016486307334381782, "loss": 1.7062, "step": 13533 }, { "epoch": 0.17586815735742956, "grad_norm": 0.39596137404441833, "learning_rate": 0.00016486047388190647, "loss": 1.5423, "step": 13534 }, { "epoch": 0.17588115190134543, "grad_norm": 0.33654001355171204, "learning_rate": 0.00016485787441999507, "loss": 1.4071, "step": 13535 }, { "epoch": 0.1758941464452613, "grad_norm": 0.3452851176261902, "learning_rate": 0.0001648552749580837, "loss": 1.265, "step": 13536 }, { "epoch": 0.17590714098917717, "grad_norm": 0.30356869101524353, "learning_rate": 0.0001648526754961723, "loss": 1.2295, "step": 13537 }, { "epoch": 0.17592013553309305, "grad_norm": 0.31200847029685974, "learning_rate": 0.00016485007603426091, "loss": 1.3852, "step": 13538 }, { "epoch": 0.17593313007700892, "grad_norm": 0.49106618762016296, "learning_rate": 0.00016484747657234954, "loss": 1.4197, "step": 13539 }, { "epoch": 0.1759461246209248, "grad_norm": 0.412903368473053, "learning_rate": 0.00016484487711043814, "loss": 1.4273, "step": 13540 }, { "epoch": 0.17595911916484067, "grad_norm": 0.32957229018211365, "learning_rate": 0.00016484227764852676, "loss": 1.4691, "step": 13541 }, { "epoch": 0.17597211370875654, "grad_norm": 0.4434698224067688, "learning_rate": 0.00016483967818661538, "loss": 1.5286, "step": 13542 }, { "epoch": 0.1759851082526724, "grad_norm": 0.3510220944881439, "learning_rate": 0.000164837078724704, "loss": 1.4435, "step": 13543 }, { "epoch": 0.17599810279658828, "grad_norm": 0.42501917481422424, "learning_rate": 0.0001648344792627926, "loss": 1.427, "step": 13544 }, { "epoch": 0.17601109734050416, "grad_norm": 0.41684237122535706, "learning_rate": 0.0001648318798008812, "loss": 1.2161, "step": 13545 }, { "epoch": 0.17602409188442003, "grad_norm": 0.5242071151733398, "learning_rate": 0.00016482928033896986, "loss": 1.4906, "step": 13546 }, { "epoch": 0.1760370864283359, "grad_norm": 0.4684368073940277, "learning_rate": 0.00016482668087705845, "loss": 1.423, "step": 13547 }, { "epoch": 0.17605008097225178, "grad_norm": 0.5643107891082764, "learning_rate": 0.00016482408141514708, "loss": 1.6422, "step": 13548 }, { "epoch": 0.17606307551616765, "grad_norm": 0.43667447566986084, "learning_rate": 0.00016482148195323568, "loss": 1.3703, "step": 13549 }, { "epoch": 0.17607607006008352, "grad_norm": 0.42366620898246765, "learning_rate": 0.0001648188824913243, "loss": 1.3363, "step": 13550 }, { "epoch": 0.1760890646039994, "grad_norm": 0.3786791265010834, "learning_rate": 0.00016481628302941292, "loss": 1.4411, "step": 13551 }, { "epoch": 0.17610205914791527, "grad_norm": 0.434643030166626, "learning_rate": 0.00016481368356750152, "loss": 1.4437, "step": 13552 }, { "epoch": 0.17611505369183114, "grad_norm": 0.34708961844444275, "learning_rate": 0.00016481108410559015, "loss": 1.4127, "step": 13553 }, { "epoch": 0.17612804823574701, "grad_norm": 0.3480059802532196, "learning_rate": 0.00016480848464367877, "loss": 1.4018, "step": 13554 }, { "epoch": 0.1761410427796629, "grad_norm": 0.472269743680954, "learning_rate": 0.0001648058851817674, "loss": 1.4841, "step": 13555 }, { "epoch": 0.17615403732357876, "grad_norm": 0.3097432255744934, "learning_rate": 0.000164803285719856, "loss": 1.1991, "step": 13556 }, { "epoch": 0.17616703186749463, "grad_norm": 0.4113951623439789, "learning_rate": 0.00016480068625794462, "loss": 1.6214, "step": 13557 }, { "epoch": 0.1761800264114105, "grad_norm": 0.4583827257156372, "learning_rate": 0.00016479808679603324, "loss": 1.5132, "step": 13558 }, { "epoch": 0.17619302095532638, "grad_norm": 0.49137359857559204, "learning_rate": 0.00016479548733412184, "loss": 1.422, "step": 13559 }, { "epoch": 0.17620601549924225, "grad_norm": 0.5129138231277466, "learning_rate": 0.00016479288787221046, "loss": 1.3676, "step": 13560 }, { "epoch": 0.17621901004315813, "grad_norm": 0.4639744758605957, "learning_rate": 0.00016479028841029906, "loss": 1.4898, "step": 13561 }, { "epoch": 0.176232004587074, "grad_norm": 0.39463192224502563, "learning_rate": 0.00016478768894838768, "loss": 1.5868, "step": 13562 }, { "epoch": 0.17624499913098987, "grad_norm": 0.40299704670906067, "learning_rate": 0.0001647850894864763, "loss": 1.3985, "step": 13563 }, { "epoch": 0.17625799367490574, "grad_norm": 0.3154836893081665, "learning_rate": 0.0001647824900245649, "loss": 1.4936, "step": 13564 }, { "epoch": 0.17627098821882162, "grad_norm": 0.535276472568512, "learning_rate": 0.00016477989056265356, "loss": 1.2896, "step": 13565 }, { "epoch": 0.1762839827627375, "grad_norm": 0.4607764482498169, "learning_rate": 0.00016477729110074216, "loss": 1.5687, "step": 13566 }, { "epoch": 0.17629697730665336, "grad_norm": 0.3937928378582001, "learning_rate": 0.00016477469163883078, "loss": 1.3595, "step": 13567 }, { "epoch": 0.17630997185056924, "grad_norm": 0.433514267206192, "learning_rate": 0.00016477209217691938, "loss": 1.6069, "step": 13568 }, { "epoch": 0.1763229663944851, "grad_norm": 0.4924296736717224, "learning_rate": 0.000164769492715008, "loss": 1.4961, "step": 13569 }, { "epoch": 0.17633596093840098, "grad_norm": 0.43467968702316284, "learning_rate": 0.00016476689325309663, "loss": 1.2885, "step": 13570 }, { "epoch": 0.17634895548231685, "grad_norm": 0.29190871119499207, "learning_rate": 0.00016476429379118522, "loss": 1.067, "step": 13571 }, { "epoch": 0.17636195002623273, "grad_norm": 0.39473944902420044, "learning_rate": 0.00016476169432927385, "loss": 1.4965, "step": 13572 }, { "epoch": 0.1763749445701486, "grad_norm": 0.5320652723312378, "learning_rate": 0.00016475909486736247, "loss": 1.3424, "step": 13573 }, { "epoch": 0.17638793911406447, "grad_norm": 0.32814016938209534, "learning_rate": 0.00016475649540545107, "loss": 1.1422, "step": 13574 }, { "epoch": 0.17640093365798035, "grad_norm": 0.33688774704933167, "learning_rate": 0.0001647538959435397, "loss": 1.3045, "step": 13575 }, { "epoch": 0.17641392820189622, "grad_norm": 0.31415408849716187, "learning_rate": 0.0001647512964816283, "loss": 1.4122, "step": 13576 }, { "epoch": 0.1764269227458121, "grad_norm": 0.42619383335113525, "learning_rate": 0.00016474869701971694, "loss": 1.2647, "step": 13577 }, { "epoch": 0.17643991728972797, "grad_norm": 0.36367809772491455, "learning_rate": 0.00016474609755780554, "loss": 1.4734, "step": 13578 }, { "epoch": 0.17645291183364384, "grad_norm": 0.4452381432056427, "learning_rate": 0.00016474349809589417, "loss": 1.6356, "step": 13579 }, { "epoch": 0.1764659063775597, "grad_norm": 0.4153081178665161, "learning_rate": 0.00016474089863398276, "loss": 1.2722, "step": 13580 }, { "epoch": 0.17647890092147558, "grad_norm": 0.4036629796028137, "learning_rate": 0.0001647382991720714, "loss": 1.1678, "step": 13581 }, { "epoch": 0.17649189546539146, "grad_norm": 0.4922664761543274, "learning_rate": 0.00016473569971016, "loss": 1.5953, "step": 13582 }, { "epoch": 0.17650489000930733, "grad_norm": 0.4043973982334137, "learning_rate": 0.0001647331002482486, "loss": 1.3069, "step": 13583 }, { "epoch": 0.1765178845532232, "grad_norm": 0.38580867648124695, "learning_rate": 0.00016473050078633723, "loss": 1.4947, "step": 13584 }, { "epoch": 0.17653087909713908, "grad_norm": 0.352580189704895, "learning_rate": 0.00016472790132442586, "loss": 1.4729, "step": 13585 }, { "epoch": 0.17654387364105495, "grad_norm": 0.44040560722351074, "learning_rate": 0.00016472530186251448, "loss": 1.2951, "step": 13586 }, { "epoch": 0.17655686818497082, "grad_norm": 0.4771287739276886, "learning_rate": 0.00016472270240060308, "loss": 1.4535, "step": 13587 }, { "epoch": 0.1765698627288867, "grad_norm": 0.42403465509414673, "learning_rate": 0.00016472010293869168, "loss": 1.2877, "step": 13588 }, { "epoch": 0.1765828572728026, "grad_norm": 0.3243350684642792, "learning_rate": 0.00016471750347678033, "loss": 1.2671, "step": 13589 }, { "epoch": 0.17659585181671847, "grad_norm": 0.3546353280544281, "learning_rate": 0.00016471490401486893, "loss": 1.4155, "step": 13590 }, { "epoch": 0.17660884636063434, "grad_norm": 0.4194067716598511, "learning_rate": 0.00016471230455295755, "loss": 1.54, "step": 13591 }, { "epoch": 0.17662184090455021, "grad_norm": 0.33934760093688965, "learning_rate": 0.00016470970509104615, "loss": 1.5222, "step": 13592 }, { "epoch": 0.1766348354484661, "grad_norm": 0.39700618386268616, "learning_rate": 0.00016470710562913477, "loss": 1.4634, "step": 13593 }, { "epoch": 0.17664782999238196, "grad_norm": 0.37450742721557617, "learning_rate": 0.0001647045061672234, "loss": 1.647, "step": 13594 }, { "epoch": 0.17666082453629783, "grad_norm": 0.42002931237220764, "learning_rate": 0.000164701906705312, "loss": 1.5675, "step": 13595 }, { "epoch": 0.1766738190802137, "grad_norm": 0.34220069646835327, "learning_rate": 0.00016469930724340062, "loss": 1.3423, "step": 13596 }, { "epoch": 0.17668681362412958, "grad_norm": 0.6377004981040955, "learning_rate": 0.00016469670778148924, "loss": 1.5027, "step": 13597 }, { "epoch": 0.17669980816804545, "grad_norm": 0.3749501407146454, "learning_rate": 0.00016469410831957787, "loss": 1.5834, "step": 13598 }, { "epoch": 0.17671280271196133, "grad_norm": 0.4093885123729706, "learning_rate": 0.00016469150885766647, "loss": 1.3875, "step": 13599 }, { "epoch": 0.1767257972558772, "grad_norm": 0.24530300498008728, "learning_rate": 0.00016468890939575506, "loss": 1.2604, "step": 13600 }, { "epoch": 0.17673879179979307, "grad_norm": 0.41008853912353516, "learning_rate": 0.00016468630993384371, "loss": 1.4578, "step": 13601 }, { "epoch": 0.17675178634370894, "grad_norm": 0.4438818097114563, "learning_rate": 0.0001646837104719323, "loss": 1.3622, "step": 13602 }, { "epoch": 0.17676478088762482, "grad_norm": 0.43295395374298096, "learning_rate": 0.00016468111101002094, "loss": 1.5506, "step": 13603 }, { "epoch": 0.1767777754315407, "grad_norm": 0.32715073227882385, "learning_rate": 0.00016467851154810956, "loss": 1.3477, "step": 13604 }, { "epoch": 0.17679076997545656, "grad_norm": 0.40638142824172974, "learning_rate": 0.00016467591208619816, "loss": 1.1918, "step": 13605 }, { "epoch": 0.17680376451937244, "grad_norm": 0.31783148646354675, "learning_rate": 0.00016467331262428678, "loss": 1.3725, "step": 13606 }, { "epoch": 0.1768167590632883, "grad_norm": 0.6016378402709961, "learning_rate": 0.00016467071316237538, "loss": 1.6826, "step": 13607 }, { "epoch": 0.17682975360720418, "grad_norm": 0.403513640165329, "learning_rate": 0.00016466811370046403, "loss": 1.3186, "step": 13608 }, { "epoch": 0.17684274815112005, "grad_norm": 0.4337422847747803, "learning_rate": 0.00016466551423855263, "loss": 1.3861, "step": 13609 }, { "epoch": 0.17685574269503593, "grad_norm": 0.4710150957107544, "learning_rate": 0.00016466291477664125, "loss": 1.4649, "step": 13610 }, { "epoch": 0.1768687372389518, "grad_norm": 0.3504270315170288, "learning_rate": 0.00016466031531472985, "loss": 1.1975, "step": 13611 }, { "epoch": 0.17688173178286767, "grad_norm": 0.482486367225647, "learning_rate": 0.00016465771585281848, "loss": 1.5034, "step": 13612 }, { "epoch": 0.17689472632678355, "grad_norm": 0.40145424008369446, "learning_rate": 0.0001646551163909071, "loss": 1.4967, "step": 13613 }, { "epoch": 0.17690772087069942, "grad_norm": 0.4193040132522583, "learning_rate": 0.0001646525169289957, "loss": 1.4751, "step": 13614 }, { "epoch": 0.1769207154146153, "grad_norm": 0.40405330061912537, "learning_rate": 0.00016464991746708432, "loss": 1.2807, "step": 13615 }, { "epoch": 0.17693370995853117, "grad_norm": 0.40608423948287964, "learning_rate": 0.00016464731800517295, "loss": 1.6081, "step": 13616 }, { "epoch": 0.17694670450244704, "grad_norm": 0.3507035970687866, "learning_rate": 0.00016464471854326154, "loss": 1.1923, "step": 13617 }, { "epoch": 0.1769596990463629, "grad_norm": 0.30400189757347107, "learning_rate": 0.00016464211908135017, "loss": 1.2624, "step": 13618 }, { "epoch": 0.17697269359027878, "grad_norm": 0.3268266022205353, "learning_rate": 0.00016463951961943877, "loss": 1.0787, "step": 13619 }, { "epoch": 0.17698568813419466, "grad_norm": 0.464998722076416, "learning_rate": 0.00016463692015752742, "loss": 1.4647, "step": 13620 }, { "epoch": 0.17699868267811053, "grad_norm": 0.2747749090194702, "learning_rate": 0.00016463432069561601, "loss": 1.3683, "step": 13621 }, { "epoch": 0.1770116772220264, "grad_norm": 0.45030391216278076, "learning_rate": 0.00016463172123370464, "loss": 1.4602, "step": 13622 }, { "epoch": 0.17702467176594228, "grad_norm": 0.42780452966690063, "learning_rate": 0.00016462912177179324, "loss": 1.4929, "step": 13623 }, { "epoch": 0.17703766630985815, "grad_norm": 0.27499017119407654, "learning_rate": 0.00016462652230988186, "loss": 1.398, "step": 13624 }, { "epoch": 0.17705066085377402, "grad_norm": 0.40039119124412537, "learning_rate": 0.00016462392284797049, "loss": 1.5109, "step": 13625 }, { "epoch": 0.1770636553976899, "grad_norm": 0.41366317868232727, "learning_rate": 0.00016462132338605908, "loss": 1.4753, "step": 13626 }, { "epoch": 0.17707664994160577, "grad_norm": 0.3705471158027649, "learning_rate": 0.0001646187239241477, "loss": 1.4735, "step": 13627 }, { "epoch": 0.17708964448552164, "grad_norm": 0.4617614448070526, "learning_rate": 0.00016461612446223633, "loss": 1.6215, "step": 13628 }, { "epoch": 0.17710263902943751, "grad_norm": 0.3661966025829315, "learning_rate": 0.00016461352500032493, "loss": 1.228, "step": 13629 }, { "epoch": 0.1771156335733534, "grad_norm": 0.33977580070495605, "learning_rate": 0.00016461092553841355, "loss": 1.3822, "step": 13630 }, { "epoch": 0.17712862811726926, "grad_norm": 0.3308474123477936, "learning_rate": 0.00016460832607650215, "loss": 1.32, "step": 13631 }, { "epoch": 0.17714162266118513, "grad_norm": 0.40739473700523376, "learning_rate": 0.0001646057266145908, "loss": 1.5853, "step": 13632 }, { "epoch": 0.177154617205101, "grad_norm": 0.31952229142189026, "learning_rate": 0.0001646031271526794, "loss": 1.3924, "step": 13633 }, { "epoch": 0.17716761174901688, "grad_norm": 0.3937205374240875, "learning_rate": 0.00016460052769076802, "loss": 1.1896, "step": 13634 }, { "epoch": 0.17718060629293275, "grad_norm": 0.3613029718399048, "learning_rate": 0.00016459792822885662, "loss": 1.2623, "step": 13635 }, { "epoch": 0.17719360083684862, "grad_norm": 0.34531939029693604, "learning_rate": 0.00016459532876694525, "loss": 1.4017, "step": 13636 }, { "epoch": 0.1772065953807645, "grad_norm": 0.37922030687332153, "learning_rate": 0.00016459272930503387, "loss": 1.5051, "step": 13637 }, { "epoch": 0.17721958992468037, "grad_norm": 0.3644823431968689, "learning_rate": 0.00016459012984312247, "loss": 1.4082, "step": 13638 }, { "epoch": 0.17723258446859624, "grad_norm": 0.3796321153640747, "learning_rate": 0.00016458753038121112, "loss": 1.4114, "step": 13639 }, { "epoch": 0.17724557901251212, "grad_norm": 0.5171111226081848, "learning_rate": 0.00016458493091929972, "loss": 1.3827, "step": 13640 }, { "epoch": 0.177258573556428, "grad_norm": 0.3387519121170044, "learning_rate": 0.00016458233145738831, "loss": 1.3418, "step": 13641 }, { "epoch": 0.17727156810034386, "grad_norm": 0.3412686288356781, "learning_rate": 0.00016457973199547694, "loss": 1.471, "step": 13642 }, { "epoch": 0.17728456264425974, "grad_norm": 0.3255894184112549, "learning_rate": 0.00016457713253356556, "loss": 1.231, "step": 13643 }, { "epoch": 0.1772975571881756, "grad_norm": 0.4041486382484436, "learning_rate": 0.0001645745330716542, "loss": 1.3806, "step": 13644 }, { "epoch": 0.17731055173209148, "grad_norm": 0.3939409852027893, "learning_rate": 0.00016457193360974279, "loss": 1.4591, "step": 13645 }, { "epoch": 0.17732354627600735, "grad_norm": 0.35969486832618713, "learning_rate": 0.0001645693341478314, "loss": 1.5352, "step": 13646 }, { "epoch": 0.17733654081992323, "grad_norm": 0.4259096384048462, "learning_rate": 0.00016456673468592003, "loss": 1.4844, "step": 13647 }, { "epoch": 0.1773495353638391, "grad_norm": 0.35051289200782776, "learning_rate": 0.00016456413522400863, "loss": 1.2712, "step": 13648 }, { "epoch": 0.17736252990775497, "grad_norm": 0.4646059572696686, "learning_rate": 0.00016456153576209726, "loss": 1.5192, "step": 13649 }, { "epoch": 0.17737552445167085, "grad_norm": 0.44001391530036926, "learning_rate": 0.00016455893630018585, "loss": 1.4213, "step": 13650 }, { "epoch": 0.17738851899558672, "grad_norm": 0.37329018115997314, "learning_rate": 0.0001645563368382745, "loss": 1.5207, "step": 13651 }, { "epoch": 0.1774015135395026, "grad_norm": 0.4457497000694275, "learning_rate": 0.0001645537373763631, "loss": 1.4993, "step": 13652 }, { "epoch": 0.17741450808341847, "grad_norm": 0.40998464822769165, "learning_rate": 0.00016455113791445173, "loss": 1.2091, "step": 13653 }, { "epoch": 0.17742750262733434, "grad_norm": 0.3715917468070984, "learning_rate": 0.00016454853845254032, "loss": 1.3589, "step": 13654 }, { "epoch": 0.1774404971712502, "grad_norm": 0.46409469842910767, "learning_rate": 0.00016454593899062895, "loss": 1.5047, "step": 13655 }, { "epoch": 0.17745349171516608, "grad_norm": 0.40728530287742615, "learning_rate": 0.00016454333952871757, "loss": 1.4768, "step": 13656 }, { "epoch": 0.17746648625908196, "grad_norm": 0.43321725726127625, "learning_rate": 0.00016454074006680617, "loss": 1.6089, "step": 13657 }, { "epoch": 0.17747948080299783, "grad_norm": 0.438004732131958, "learning_rate": 0.0001645381406048948, "loss": 1.4013, "step": 13658 }, { "epoch": 0.1774924753469137, "grad_norm": 0.4390339255332947, "learning_rate": 0.00016453554114298342, "loss": 1.3902, "step": 13659 }, { "epoch": 0.17750546989082958, "grad_norm": 0.4157491624355316, "learning_rate": 0.00016453294168107202, "loss": 1.5744, "step": 13660 }, { "epoch": 0.17751846443474545, "grad_norm": 0.3626139760017395, "learning_rate": 0.00016453034221916064, "loss": 1.3843, "step": 13661 }, { "epoch": 0.17753145897866132, "grad_norm": 0.49568676948547363, "learning_rate": 0.00016452774275724924, "loss": 1.4975, "step": 13662 }, { "epoch": 0.1775444535225772, "grad_norm": 0.4177223742008209, "learning_rate": 0.0001645251432953379, "loss": 1.5525, "step": 13663 }, { "epoch": 0.17755744806649307, "grad_norm": 0.3607726991176605, "learning_rate": 0.0001645225438334265, "loss": 1.278, "step": 13664 }, { "epoch": 0.17757044261040897, "grad_norm": 0.3979356288909912, "learning_rate": 0.0001645199443715151, "loss": 1.3086, "step": 13665 }, { "epoch": 0.17758343715432484, "grad_norm": 0.42499083280563354, "learning_rate": 0.0001645173449096037, "loss": 1.5858, "step": 13666 }, { "epoch": 0.17759643169824071, "grad_norm": 0.3166114389896393, "learning_rate": 0.00016451474544769233, "loss": 1.4167, "step": 13667 }, { "epoch": 0.1776094262421566, "grad_norm": 0.440518856048584, "learning_rate": 0.00016451214598578096, "loss": 1.5791, "step": 13668 }, { "epoch": 0.17762242078607246, "grad_norm": 0.3920338451862335, "learning_rate": 0.00016450954652386956, "loss": 1.3097, "step": 13669 }, { "epoch": 0.17763541532998833, "grad_norm": 0.41914042830467224, "learning_rate": 0.00016450694706195818, "loss": 1.4959, "step": 13670 }, { "epoch": 0.1776484098739042, "grad_norm": 0.38990136981010437, "learning_rate": 0.0001645043476000468, "loss": 1.2791, "step": 13671 }, { "epoch": 0.17766140441782008, "grad_norm": 0.3316994309425354, "learning_rate": 0.0001645017481381354, "loss": 1.3783, "step": 13672 }, { "epoch": 0.17767439896173595, "grad_norm": 0.4271516501903534, "learning_rate": 0.00016449914867622403, "loss": 1.3465, "step": 13673 }, { "epoch": 0.17768739350565182, "grad_norm": 0.3148038685321808, "learning_rate": 0.00016449654921431262, "loss": 1.3453, "step": 13674 }, { "epoch": 0.1777003880495677, "grad_norm": 0.25042101740837097, "learning_rate": 0.00016449394975240128, "loss": 1.228, "step": 13675 }, { "epoch": 0.17771338259348357, "grad_norm": 0.28322187066078186, "learning_rate": 0.00016449135029048987, "loss": 1.5356, "step": 13676 }, { "epoch": 0.17772637713739944, "grad_norm": 0.33902743458747864, "learning_rate": 0.0001644887508285785, "loss": 1.5147, "step": 13677 }, { "epoch": 0.17773937168131532, "grad_norm": 0.3827899396419525, "learning_rate": 0.00016448615136666712, "loss": 1.5787, "step": 13678 }, { "epoch": 0.1777523662252312, "grad_norm": 0.43593862652778625, "learning_rate": 0.00016448355190475572, "loss": 1.4679, "step": 13679 }, { "epoch": 0.17776536076914706, "grad_norm": 0.40292251110076904, "learning_rate": 0.00016448095244284434, "loss": 1.2723, "step": 13680 }, { "epoch": 0.17777835531306294, "grad_norm": 0.38633421063423157, "learning_rate": 0.00016447835298093294, "loss": 1.5552, "step": 13681 }, { "epoch": 0.1777913498569788, "grad_norm": 0.23915137350559235, "learning_rate": 0.0001644757535190216, "loss": 1.1348, "step": 13682 }, { "epoch": 0.17780434440089468, "grad_norm": 0.4916902482509613, "learning_rate": 0.0001644731540571102, "loss": 1.4298, "step": 13683 }, { "epoch": 0.17781733894481055, "grad_norm": 0.3672642409801483, "learning_rate": 0.0001644705545951988, "loss": 1.3906, "step": 13684 }, { "epoch": 0.17783033348872643, "grad_norm": 0.3869721293449402, "learning_rate": 0.0001644679551332874, "loss": 1.4877, "step": 13685 }, { "epoch": 0.1778433280326423, "grad_norm": 0.32864007353782654, "learning_rate": 0.00016446535567137604, "loss": 1.392, "step": 13686 }, { "epoch": 0.17785632257655817, "grad_norm": 0.5088270902633667, "learning_rate": 0.00016446275620946466, "loss": 1.3626, "step": 13687 }, { "epoch": 0.17786931712047405, "grad_norm": 0.45448508858680725, "learning_rate": 0.00016446015674755326, "loss": 1.3931, "step": 13688 }, { "epoch": 0.17788231166438992, "grad_norm": 0.4488750696182251, "learning_rate": 0.00016445755728564188, "loss": 1.3493, "step": 13689 }, { "epoch": 0.1778953062083058, "grad_norm": 0.3026556074619293, "learning_rate": 0.0001644549578237305, "loss": 1.2031, "step": 13690 }, { "epoch": 0.17790830075222167, "grad_norm": 0.3764384984970093, "learning_rate": 0.0001644523583618191, "loss": 1.4912, "step": 13691 }, { "epoch": 0.17792129529613754, "grad_norm": 0.47016799449920654, "learning_rate": 0.00016444975889990773, "loss": 1.5177, "step": 13692 }, { "epoch": 0.1779342898400534, "grad_norm": 0.4489028751850128, "learning_rate": 0.00016444715943799633, "loss": 1.3652, "step": 13693 }, { "epoch": 0.17794728438396928, "grad_norm": 0.40877869725227356, "learning_rate": 0.00016444455997608498, "loss": 1.4849, "step": 13694 }, { "epoch": 0.17796027892788516, "grad_norm": 0.3565262258052826, "learning_rate": 0.00016444196051417358, "loss": 1.4428, "step": 13695 }, { "epoch": 0.17797327347180103, "grad_norm": 0.43148869276046753, "learning_rate": 0.00016443936105226217, "loss": 1.3438, "step": 13696 }, { "epoch": 0.1779862680157169, "grad_norm": 0.30440086126327515, "learning_rate": 0.0001644367615903508, "loss": 1.3172, "step": 13697 }, { "epoch": 0.17799926255963278, "grad_norm": 0.46799495816230774, "learning_rate": 0.00016443416212843942, "loss": 1.6404, "step": 13698 }, { "epoch": 0.17801225710354865, "grad_norm": 0.4725026786327362, "learning_rate": 0.00016443156266652805, "loss": 1.4119, "step": 13699 }, { "epoch": 0.17802525164746452, "grad_norm": 0.3942779302597046, "learning_rate": 0.00016442896320461664, "loss": 1.4096, "step": 13700 }, { "epoch": 0.1780382461913804, "grad_norm": 0.4225706160068512, "learning_rate": 0.00016442636374270527, "loss": 1.4275, "step": 13701 }, { "epoch": 0.17805124073529627, "grad_norm": 0.3985162675380707, "learning_rate": 0.0001644237642807939, "loss": 1.3276, "step": 13702 }, { "epoch": 0.17806423527921214, "grad_norm": 0.3767097294330597, "learning_rate": 0.0001644211648188825, "loss": 1.3346, "step": 13703 }, { "epoch": 0.178077229823128, "grad_norm": 0.42983728647232056, "learning_rate": 0.00016441856535697111, "loss": 1.3921, "step": 13704 }, { "epoch": 0.1780902243670439, "grad_norm": 0.35475462675094604, "learning_rate": 0.0001644159658950597, "loss": 1.375, "step": 13705 }, { "epoch": 0.17810321891095976, "grad_norm": 0.3303016424179077, "learning_rate": 0.00016441336643314836, "loss": 1.3996, "step": 13706 }, { "epoch": 0.17811621345487563, "grad_norm": 0.3781614601612091, "learning_rate": 0.00016441076697123696, "loss": 1.5475, "step": 13707 }, { "epoch": 0.1781292079987915, "grad_norm": 0.6083277463912964, "learning_rate": 0.00016440816750932559, "loss": 1.5927, "step": 13708 }, { "epoch": 0.17814220254270738, "grad_norm": 0.3208395838737488, "learning_rate": 0.00016440556804741418, "loss": 1.1576, "step": 13709 }, { "epoch": 0.17815519708662325, "grad_norm": 0.43532049655914307, "learning_rate": 0.0001644029685855028, "loss": 1.3186, "step": 13710 }, { "epoch": 0.17816819163053912, "grad_norm": 0.3627203106880188, "learning_rate": 0.00016440036912359143, "loss": 1.3851, "step": 13711 }, { "epoch": 0.178181186174455, "grad_norm": 0.36681053042411804, "learning_rate": 0.00016439776966168003, "loss": 1.5454, "step": 13712 }, { "epoch": 0.17819418071837087, "grad_norm": 0.4069535732269287, "learning_rate": 0.00016439517019976865, "loss": 1.6454, "step": 13713 }, { "epoch": 0.17820717526228674, "grad_norm": 0.4214186370372772, "learning_rate": 0.00016439257073785728, "loss": 1.5185, "step": 13714 }, { "epoch": 0.17822016980620262, "grad_norm": 0.45307931303977966, "learning_rate": 0.00016438997127594588, "loss": 1.418, "step": 13715 }, { "epoch": 0.1782331643501185, "grad_norm": 0.3571094274520874, "learning_rate": 0.0001643873718140345, "loss": 1.4635, "step": 13716 }, { "epoch": 0.17824615889403436, "grad_norm": 0.35167446732521057, "learning_rate": 0.00016438477235212312, "loss": 1.2989, "step": 13717 }, { "epoch": 0.17825915343795024, "grad_norm": 0.34684211015701294, "learning_rate": 0.00016438217289021175, "loss": 1.4367, "step": 13718 }, { "epoch": 0.1782721479818661, "grad_norm": 0.4268544912338257, "learning_rate": 0.00016437957342830035, "loss": 1.4932, "step": 13719 }, { "epoch": 0.17828514252578198, "grad_norm": 0.4498889148235321, "learning_rate": 0.00016437697396638897, "loss": 1.5777, "step": 13720 }, { "epoch": 0.17829813706969785, "grad_norm": 0.37742704153060913, "learning_rate": 0.0001643743745044776, "loss": 1.5398, "step": 13721 }, { "epoch": 0.17831113161361373, "grad_norm": 0.3591799736022949, "learning_rate": 0.0001643717750425662, "loss": 1.3604, "step": 13722 }, { "epoch": 0.1783241261575296, "grad_norm": 0.36309027671813965, "learning_rate": 0.00016436917558065482, "loss": 1.3675, "step": 13723 }, { "epoch": 0.17833712070144547, "grad_norm": 0.374479740858078, "learning_rate": 0.00016436657611874341, "loss": 1.3342, "step": 13724 }, { "epoch": 0.17835011524536135, "grad_norm": 0.42510488629341125, "learning_rate": 0.00016436397665683204, "loss": 1.4327, "step": 13725 }, { "epoch": 0.17836310978927722, "grad_norm": 0.44269806146621704, "learning_rate": 0.00016436137719492066, "loss": 1.4045, "step": 13726 }, { "epoch": 0.1783761043331931, "grad_norm": 0.4799162447452545, "learning_rate": 0.00016435877773300926, "loss": 1.4628, "step": 13727 }, { "epoch": 0.17838909887710896, "grad_norm": 0.4027549624443054, "learning_rate": 0.00016435617827109789, "loss": 1.373, "step": 13728 }, { "epoch": 0.17840209342102484, "grad_norm": 0.35438939929008484, "learning_rate": 0.0001643535788091865, "loss": 1.4518, "step": 13729 }, { "epoch": 0.1784150879649407, "grad_norm": 0.4149875342845917, "learning_rate": 0.00016435097934727513, "loss": 1.4657, "step": 13730 }, { "epoch": 0.17842808250885658, "grad_norm": 0.4245990216732025, "learning_rate": 0.00016434837988536373, "loss": 1.4014, "step": 13731 }, { "epoch": 0.17844107705277246, "grad_norm": 0.3770747780799866, "learning_rate": 0.00016434578042345236, "loss": 1.5124, "step": 13732 }, { "epoch": 0.17845407159668833, "grad_norm": 0.36359724402427673, "learning_rate": 0.00016434318096154098, "loss": 1.3344, "step": 13733 }, { "epoch": 0.1784670661406042, "grad_norm": 0.3413032591342926, "learning_rate": 0.00016434058149962958, "loss": 1.3387, "step": 13734 }, { "epoch": 0.17848006068452008, "grad_norm": 0.43239447474479675, "learning_rate": 0.0001643379820377182, "loss": 1.6183, "step": 13735 }, { "epoch": 0.17849305522843595, "grad_norm": 0.4184035062789917, "learning_rate": 0.0001643353825758068, "loss": 1.4185, "step": 13736 }, { "epoch": 0.17850604977235182, "grad_norm": 0.4724022150039673, "learning_rate": 0.00016433278311389545, "loss": 1.3884, "step": 13737 }, { "epoch": 0.1785190443162677, "grad_norm": 0.35345447063446045, "learning_rate": 0.00016433018365198405, "loss": 1.3651, "step": 13738 }, { "epoch": 0.17853203886018357, "grad_norm": 0.36239373683929443, "learning_rate": 0.00016432758419007265, "loss": 1.4274, "step": 13739 }, { "epoch": 0.17854503340409944, "grad_norm": 0.45474785566329956, "learning_rate": 0.00016432498472816127, "loss": 1.6063, "step": 13740 }, { "epoch": 0.1785580279480153, "grad_norm": 0.4569450914859772, "learning_rate": 0.0001643223852662499, "loss": 1.2744, "step": 13741 }, { "epoch": 0.17857102249193121, "grad_norm": 0.38433629274368286, "learning_rate": 0.00016431978580433852, "loss": 1.5087, "step": 13742 }, { "epoch": 0.1785840170358471, "grad_norm": 0.39262667298316956, "learning_rate": 0.00016431718634242712, "loss": 1.5212, "step": 13743 }, { "epoch": 0.17859701157976296, "grad_norm": 0.4967557489871979, "learning_rate": 0.00016431458688051574, "loss": 1.5266, "step": 13744 }, { "epoch": 0.17861000612367883, "grad_norm": 0.42679092288017273, "learning_rate": 0.00016431198741860437, "loss": 1.3931, "step": 13745 }, { "epoch": 0.1786230006675947, "grad_norm": 0.45991623401641846, "learning_rate": 0.00016430938795669296, "loss": 1.3164, "step": 13746 }, { "epoch": 0.17863599521151058, "grad_norm": 0.3419206440448761, "learning_rate": 0.0001643067884947816, "loss": 1.5314, "step": 13747 }, { "epoch": 0.17864898975542645, "grad_norm": 0.3307262361049652, "learning_rate": 0.00016430418903287019, "loss": 1.2903, "step": 13748 }, { "epoch": 0.17866198429934232, "grad_norm": 0.4099598824977875, "learning_rate": 0.00016430158957095884, "loss": 1.4847, "step": 13749 }, { "epoch": 0.1786749788432582, "grad_norm": 0.516158401966095, "learning_rate": 0.00016429899010904743, "loss": 1.4336, "step": 13750 }, { "epoch": 0.17868797338717407, "grad_norm": 0.4255308210849762, "learning_rate": 0.00016429639064713603, "loss": 1.4437, "step": 13751 }, { "epoch": 0.17870096793108994, "grad_norm": 0.430316686630249, "learning_rate": 0.00016429379118522468, "loss": 1.4715, "step": 13752 }, { "epoch": 0.17871396247500582, "grad_norm": 0.4875301420688629, "learning_rate": 0.00016429119172331328, "loss": 1.4921, "step": 13753 }, { "epoch": 0.1787269570189217, "grad_norm": 0.3707745671272278, "learning_rate": 0.0001642885922614019, "loss": 1.5666, "step": 13754 }, { "epoch": 0.17873995156283756, "grad_norm": 0.43877026438713074, "learning_rate": 0.0001642859927994905, "loss": 1.3701, "step": 13755 }, { "epoch": 0.17875294610675344, "grad_norm": 0.43068212270736694, "learning_rate": 0.00016428339333757913, "loss": 1.5801, "step": 13756 }, { "epoch": 0.1787659406506693, "grad_norm": 0.3238990902900696, "learning_rate": 0.00016428079387566775, "loss": 1.4267, "step": 13757 }, { "epoch": 0.17877893519458518, "grad_norm": 0.38150307536125183, "learning_rate": 0.00016427819441375635, "loss": 1.4572, "step": 13758 }, { "epoch": 0.17879192973850105, "grad_norm": 0.45798560976982117, "learning_rate": 0.00016427559495184497, "loss": 1.504, "step": 13759 }, { "epoch": 0.17880492428241693, "grad_norm": 0.36258891224861145, "learning_rate": 0.0001642729954899336, "loss": 1.5949, "step": 13760 }, { "epoch": 0.1788179188263328, "grad_norm": 0.40525761246681213, "learning_rate": 0.00016427039602802222, "loss": 1.4535, "step": 13761 }, { "epoch": 0.17883091337024867, "grad_norm": 0.43064939975738525, "learning_rate": 0.00016426779656611082, "loss": 1.4622, "step": 13762 }, { "epoch": 0.17884390791416455, "grad_norm": 0.43405669927597046, "learning_rate": 0.00016426519710419944, "loss": 1.4771, "step": 13763 }, { "epoch": 0.17885690245808042, "grad_norm": 0.39857834577560425, "learning_rate": 0.00016426259764228807, "loss": 1.5152, "step": 13764 }, { "epoch": 0.1788698970019963, "grad_norm": 0.547569215297699, "learning_rate": 0.00016425999818037667, "loss": 1.5585, "step": 13765 }, { "epoch": 0.17888289154591216, "grad_norm": 0.45613643527030945, "learning_rate": 0.0001642573987184653, "loss": 1.4797, "step": 13766 }, { "epoch": 0.17889588608982804, "grad_norm": 0.3804607093334198, "learning_rate": 0.0001642547992565539, "loss": 1.6104, "step": 13767 }, { "epoch": 0.1789088806337439, "grad_norm": 0.4312661588191986, "learning_rate": 0.0001642521997946425, "loss": 1.3192, "step": 13768 }, { "epoch": 0.17892187517765978, "grad_norm": 0.4098782539367676, "learning_rate": 0.00016424960033273114, "loss": 1.3174, "step": 13769 }, { "epoch": 0.17893486972157566, "grad_norm": 0.38290780782699585, "learning_rate": 0.00016424700087081973, "loss": 1.4901, "step": 13770 }, { "epoch": 0.17894786426549153, "grad_norm": 0.4679395258426666, "learning_rate": 0.00016424440140890836, "loss": 1.4962, "step": 13771 }, { "epoch": 0.1789608588094074, "grad_norm": 0.3485274910926819, "learning_rate": 0.00016424180194699698, "loss": 1.4394, "step": 13772 }, { "epoch": 0.17897385335332328, "grad_norm": 0.32977503538131714, "learning_rate": 0.0001642392024850856, "loss": 1.3033, "step": 13773 }, { "epoch": 0.17898684789723915, "grad_norm": 0.373714417219162, "learning_rate": 0.0001642366030231742, "loss": 1.3787, "step": 13774 }, { "epoch": 0.17899984244115502, "grad_norm": 0.2925184965133667, "learning_rate": 0.00016423400356126283, "loss": 1.0962, "step": 13775 }, { "epoch": 0.1790128369850709, "grad_norm": 0.3441365659236908, "learning_rate": 0.00016423140409935145, "loss": 1.4411, "step": 13776 }, { "epoch": 0.17902583152898677, "grad_norm": 0.35370850563049316, "learning_rate": 0.00016422880463744005, "loss": 1.589, "step": 13777 }, { "epoch": 0.17903882607290264, "grad_norm": 0.43551695346832275, "learning_rate": 0.00016422620517552868, "loss": 1.3024, "step": 13778 }, { "epoch": 0.1790518206168185, "grad_norm": 0.5173757076263428, "learning_rate": 0.00016422360571361727, "loss": 1.446, "step": 13779 }, { "epoch": 0.1790648151607344, "grad_norm": 0.3859349191188812, "learning_rate": 0.0001642210062517059, "loss": 1.3633, "step": 13780 }, { "epoch": 0.17907780970465026, "grad_norm": 0.3870016932487488, "learning_rate": 0.00016421840678979452, "loss": 1.3529, "step": 13781 }, { "epoch": 0.17909080424856613, "grad_norm": 0.48903608322143555, "learning_rate": 0.00016421580732788312, "loss": 1.5851, "step": 13782 }, { "epoch": 0.179103798792482, "grad_norm": 0.4143184423446655, "learning_rate": 0.00016421320786597174, "loss": 1.4583, "step": 13783 }, { "epoch": 0.17911679333639788, "grad_norm": 0.40734627842903137, "learning_rate": 0.00016421060840406037, "loss": 1.4948, "step": 13784 }, { "epoch": 0.17912978788031375, "grad_norm": 0.402630478143692, "learning_rate": 0.000164208008942149, "loss": 1.3483, "step": 13785 }, { "epoch": 0.17914278242422962, "grad_norm": 0.3838624656200409, "learning_rate": 0.0001642054094802376, "loss": 1.4212, "step": 13786 }, { "epoch": 0.1791557769681455, "grad_norm": 0.44989466667175293, "learning_rate": 0.00016420281001832622, "loss": 1.5962, "step": 13787 }, { "epoch": 0.17916877151206137, "grad_norm": 0.3460371792316437, "learning_rate": 0.00016420021055641484, "loss": 1.3895, "step": 13788 }, { "epoch": 0.17918176605597724, "grad_norm": 0.39832690358161926, "learning_rate": 0.00016419761109450344, "loss": 1.269, "step": 13789 }, { "epoch": 0.17919476059989312, "grad_norm": 0.47547417879104614, "learning_rate": 0.00016419501163259206, "loss": 1.5116, "step": 13790 }, { "epoch": 0.179207755143809, "grad_norm": 0.4040995240211487, "learning_rate": 0.00016419241217068069, "loss": 1.3665, "step": 13791 }, { "epoch": 0.17922074968772486, "grad_norm": 0.310497909784317, "learning_rate": 0.0001641898127087693, "loss": 1.4201, "step": 13792 }, { "epoch": 0.17923374423164073, "grad_norm": 0.36639413237571716, "learning_rate": 0.0001641872132468579, "loss": 1.3808, "step": 13793 }, { "epoch": 0.1792467387755566, "grad_norm": 0.407367467880249, "learning_rate": 0.0001641846137849465, "loss": 1.5367, "step": 13794 }, { "epoch": 0.17925973331947248, "grad_norm": 0.3933084011077881, "learning_rate": 0.00016418201432303516, "loss": 1.2892, "step": 13795 }, { "epoch": 0.17927272786338835, "grad_norm": 0.28527238965034485, "learning_rate": 0.00016417941486112375, "loss": 1.2736, "step": 13796 }, { "epoch": 0.17928572240730423, "grad_norm": 0.46660131216049194, "learning_rate": 0.00016417681539921238, "loss": 1.4808, "step": 13797 }, { "epoch": 0.1792987169512201, "grad_norm": 0.3512675166130066, "learning_rate": 0.00016417421593730098, "loss": 1.4368, "step": 13798 }, { "epoch": 0.17931171149513597, "grad_norm": 0.3426361680030823, "learning_rate": 0.0001641716164753896, "loss": 1.3443, "step": 13799 }, { "epoch": 0.17932470603905185, "grad_norm": 0.48628151416778564, "learning_rate": 0.00016416901701347823, "loss": 1.6091, "step": 13800 }, { "epoch": 0.17933770058296772, "grad_norm": 0.40451952815055847, "learning_rate": 0.00016416641755156682, "loss": 1.4953, "step": 13801 }, { "epoch": 0.1793506951268836, "grad_norm": 0.3639897108078003, "learning_rate": 0.00016416381808965545, "loss": 1.3484, "step": 13802 }, { "epoch": 0.17936368967079946, "grad_norm": 0.3489830195903778, "learning_rate": 0.00016416121862774407, "loss": 1.2712, "step": 13803 }, { "epoch": 0.17937668421471534, "grad_norm": 0.4058685004711151, "learning_rate": 0.0001641586191658327, "loss": 1.4822, "step": 13804 }, { "epoch": 0.1793896787586312, "grad_norm": 0.4718252420425415, "learning_rate": 0.0001641560197039213, "loss": 1.551, "step": 13805 }, { "epoch": 0.17940267330254708, "grad_norm": 0.4722570776939392, "learning_rate": 0.0001641534202420099, "loss": 1.4756, "step": 13806 }, { "epoch": 0.17941566784646296, "grad_norm": 0.43057000637054443, "learning_rate": 0.00016415082078009854, "loss": 1.5222, "step": 13807 }, { "epoch": 0.17942866239037883, "grad_norm": 0.4507659375667572, "learning_rate": 0.00016414822131818714, "loss": 1.5635, "step": 13808 }, { "epoch": 0.1794416569342947, "grad_norm": 0.39975231885910034, "learning_rate": 0.00016414562185627576, "loss": 1.3116, "step": 13809 }, { "epoch": 0.17945465147821058, "grad_norm": 0.38917291164398193, "learning_rate": 0.00016414302239436436, "loss": 1.3201, "step": 13810 }, { "epoch": 0.17946764602212645, "grad_norm": 0.2852829694747925, "learning_rate": 0.00016414042293245299, "loss": 1.332, "step": 13811 }, { "epoch": 0.17948064056604232, "grad_norm": 0.3799082338809967, "learning_rate": 0.0001641378234705416, "loss": 1.4331, "step": 13812 }, { "epoch": 0.1794936351099582, "grad_norm": 0.32822662591934204, "learning_rate": 0.0001641352240086302, "loss": 1.2213, "step": 13813 }, { "epoch": 0.17950662965387407, "grad_norm": 0.44622769951820374, "learning_rate": 0.00016413262454671883, "loss": 1.5485, "step": 13814 }, { "epoch": 0.17951962419778994, "grad_norm": 0.44788530468940735, "learning_rate": 0.00016413002508480746, "loss": 1.5579, "step": 13815 }, { "epoch": 0.1795326187417058, "grad_norm": 0.4718187153339386, "learning_rate": 0.00016412742562289608, "loss": 1.3605, "step": 13816 }, { "epoch": 0.17954561328562169, "grad_norm": 0.4571569859981537, "learning_rate": 0.00016412482616098468, "loss": 1.375, "step": 13817 }, { "epoch": 0.1795586078295376, "grad_norm": 0.36007070541381836, "learning_rate": 0.00016412222669907328, "loss": 1.2141, "step": 13818 }, { "epoch": 0.17957160237345346, "grad_norm": 0.40235635638237, "learning_rate": 0.00016411962723716193, "loss": 1.2654, "step": 13819 }, { "epoch": 0.17958459691736933, "grad_norm": 0.45574745535850525, "learning_rate": 0.00016411702777525052, "loss": 1.4978, "step": 13820 }, { "epoch": 0.1795975914612852, "grad_norm": 0.4910139739513397, "learning_rate": 0.00016411442831333915, "loss": 1.3389, "step": 13821 }, { "epoch": 0.17961058600520108, "grad_norm": 0.41516998410224915, "learning_rate": 0.00016411182885142775, "loss": 1.4916, "step": 13822 }, { "epoch": 0.17962358054911695, "grad_norm": 0.41109657287597656, "learning_rate": 0.00016410922938951637, "loss": 1.5574, "step": 13823 }, { "epoch": 0.17963657509303282, "grad_norm": 0.39612483978271484, "learning_rate": 0.000164106629927605, "loss": 1.2068, "step": 13824 }, { "epoch": 0.1796495696369487, "grad_norm": 0.4362066090106964, "learning_rate": 0.0001641040304656936, "loss": 1.6603, "step": 13825 }, { "epoch": 0.17966256418086457, "grad_norm": 0.4348241984844208, "learning_rate": 0.00016410143100378224, "loss": 1.6296, "step": 13826 }, { "epoch": 0.17967555872478044, "grad_norm": 0.48206841945648193, "learning_rate": 0.00016409883154187084, "loss": 1.54, "step": 13827 }, { "epoch": 0.17968855326869632, "grad_norm": 0.3649912178516388, "learning_rate": 0.00016409623207995947, "loss": 1.2774, "step": 13828 }, { "epoch": 0.1797015478126122, "grad_norm": 0.3826022446155548, "learning_rate": 0.00016409363261804806, "loss": 1.562, "step": 13829 }, { "epoch": 0.17971454235652806, "grad_norm": 0.5306301712989807, "learning_rate": 0.0001640910331561367, "loss": 1.465, "step": 13830 }, { "epoch": 0.17972753690044393, "grad_norm": 0.38976162672042847, "learning_rate": 0.0001640884336942253, "loss": 1.4673, "step": 13831 }, { "epoch": 0.1797405314443598, "grad_norm": 0.49456754326820374, "learning_rate": 0.0001640858342323139, "loss": 1.5426, "step": 13832 }, { "epoch": 0.17975352598827568, "grad_norm": 0.34266799688339233, "learning_rate": 0.00016408323477040253, "loss": 1.4015, "step": 13833 }, { "epoch": 0.17976652053219155, "grad_norm": 0.35109132528305054, "learning_rate": 0.00016408063530849116, "loss": 1.4474, "step": 13834 }, { "epoch": 0.17977951507610743, "grad_norm": 0.4558723270893097, "learning_rate": 0.00016407803584657976, "loss": 1.4873, "step": 13835 }, { "epoch": 0.1797925096200233, "grad_norm": 0.4390784204006195, "learning_rate": 0.00016407543638466838, "loss": 1.4802, "step": 13836 }, { "epoch": 0.17980550416393917, "grad_norm": 0.41165438294410706, "learning_rate": 0.00016407283692275698, "loss": 1.4237, "step": 13837 }, { "epoch": 0.17981849870785505, "grad_norm": 0.3708517253398895, "learning_rate": 0.00016407023746084563, "loss": 1.4113, "step": 13838 }, { "epoch": 0.17983149325177092, "grad_norm": 0.30848339200019836, "learning_rate": 0.00016406763799893423, "loss": 1.4877, "step": 13839 }, { "epoch": 0.1798444877956868, "grad_norm": 0.3068132698535919, "learning_rate": 0.00016406503853702285, "loss": 1.5416, "step": 13840 }, { "epoch": 0.17985748233960266, "grad_norm": 0.32952257990837097, "learning_rate": 0.00016406243907511145, "loss": 1.4833, "step": 13841 }, { "epoch": 0.17987047688351854, "grad_norm": 0.44472792744636536, "learning_rate": 0.00016405983961320007, "loss": 1.427, "step": 13842 }, { "epoch": 0.1798834714274344, "grad_norm": 0.3594261109828949, "learning_rate": 0.0001640572401512887, "loss": 1.4627, "step": 13843 }, { "epoch": 0.17989646597135028, "grad_norm": 0.3843877613544464, "learning_rate": 0.0001640546406893773, "loss": 1.3977, "step": 13844 }, { "epoch": 0.17990946051526616, "grad_norm": 0.38599681854248047, "learning_rate": 0.00016405204122746592, "loss": 1.4046, "step": 13845 }, { "epoch": 0.17992245505918203, "grad_norm": 0.4580393433570862, "learning_rate": 0.00016404944176555454, "loss": 1.4546, "step": 13846 }, { "epoch": 0.1799354496030979, "grad_norm": 0.3166486918926239, "learning_rate": 0.00016404684230364314, "loss": 1.3271, "step": 13847 }, { "epoch": 0.17994844414701378, "grad_norm": 0.3559587895870209, "learning_rate": 0.00016404424284173177, "loss": 1.413, "step": 13848 }, { "epoch": 0.17996143869092965, "grad_norm": 0.48100772500038147, "learning_rate": 0.00016404164337982036, "loss": 1.4306, "step": 13849 }, { "epoch": 0.17997443323484552, "grad_norm": 0.45428118109703064, "learning_rate": 0.00016403904391790902, "loss": 1.6888, "step": 13850 }, { "epoch": 0.1799874277787614, "grad_norm": 0.433809369802475, "learning_rate": 0.0001640364444559976, "loss": 1.5382, "step": 13851 }, { "epoch": 0.18000042232267727, "grad_norm": 0.4594508111476898, "learning_rate": 0.00016403384499408624, "loss": 1.5995, "step": 13852 }, { "epoch": 0.18001341686659314, "grad_norm": 0.4555775821208954, "learning_rate": 0.00016403124553217483, "loss": 1.439, "step": 13853 }, { "epoch": 0.180026411410509, "grad_norm": 0.32143816351890564, "learning_rate": 0.00016402864607026346, "loss": 1.375, "step": 13854 }, { "epoch": 0.18003940595442489, "grad_norm": 0.3113643229007721, "learning_rate": 0.00016402604660835208, "loss": 1.5854, "step": 13855 }, { "epoch": 0.18005240049834076, "grad_norm": 0.4634411036968231, "learning_rate": 0.00016402344714644068, "loss": 1.5706, "step": 13856 }, { "epoch": 0.18006539504225663, "grad_norm": 0.32296934723854065, "learning_rate": 0.0001640208476845293, "loss": 1.3125, "step": 13857 }, { "epoch": 0.1800783895861725, "grad_norm": 0.3722306489944458, "learning_rate": 0.00016401824822261793, "loss": 1.4272, "step": 13858 }, { "epoch": 0.18009138413008838, "grad_norm": 0.4071848690509796, "learning_rate": 0.00016401564876070655, "loss": 1.5021, "step": 13859 }, { "epoch": 0.18010437867400425, "grad_norm": 0.36169856786727905, "learning_rate": 0.00016401304929879515, "loss": 1.4682, "step": 13860 }, { "epoch": 0.18011737321792012, "grad_norm": 0.44762492179870605, "learning_rate": 0.00016401044983688378, "loss": 1.5594, "step": 13861 }, { "epoch": 0.180130367761836, "grad_norm": 0.46865421533584595, "learning_rate": 0.0001640078503749724, "loss": 1.5174, "step": 13862 }, { "epoch": 0.18014336230575187, "grad_norm": 0.33863139152526855, "learning_rate": 0.000164005250913061, "loss": 1.3611, "step": 13863 }, { "epoch": 0.18015635684966774, "grad_norm": 0.29973089694976807, "learning_rate": 0.00016400265145114962, "loss": 1.3293, "step": 13864 }, { "epoch": 0.18016935139358362, "grad_norm": 0.4617586135864258, "learning_rate": 0.00016400005198923825, "loss": 1.4859, "step": 13865 }, { "epoch": 0.1801823459374995, "grad_norm": 0.3091930150985718, "learning_rate": 0.00016399745252732684, "loss": 1.489, "step": 13866 }, { "epoch": 0.18019534048141536, "grad_norm": 0.3450116813182831, "learning_rate": 0.00016399485306541547, "loss": 1.4479, "step": 13867 }, { "epoch": 0.18020833502533123, "grad_norm": 0.3327886760234833, "learning_rate": 0.00016399225360350407, "loss": 1.3031, "step": 13868 }, { "epoch": 0.1802213295692471, "grad_norm": 0.36577045917510986, "learning_rate": 0.00016398965414159272, "loss": 1.3405, "step": 13869 }, { "epoch": 0.18023432411316298, "grad_norm": 0.39328888058662415, "learning_rate": 0.00016398705467968132, "loss": 1.4669, "step": 13870 }, { "epoch": 0.18024731865707885, "grad_norm": 0.3182171583175659, "learning_rate": 0.00016398445521776994, "loss": 1.4693, "step": 13871 }, { "epoch": 0.18026031320099473, "grad_norm": 0.3632968068122864, "learning_rate": 0.00016398185575585854, "loss": 1.2892, "step": 13872 }, { "epoch": 0.1802733077449106, "grad_norm": 0.45355263352394104, "learning_rate": 0.00016397925629394716, "loss": 1.3309, "step": 13873 }, { "epoch": 0.18028630228882647, "grad_norm": 0.3434823453426361, "learning_rate": 0.00016397665683203579, "loss": 1.4543, "step": 13874 }, { "epoch": 0.18029929683274235, "grad_norm": 0.39346957206726074, "learning_rate": 0.00016397405737012438, "loss": 1.5127, "step": 13875 }, { "epoch": 0.18031229137665822, "grad_norm": 0.3791983425617218, "learning_rate": 0.000163971457908213, "loss": 1.5049, "step": 13876 }, { "epoch": 0.1803252859205741, "grad_norm": 0.40748652815818787, "learning_rate": 0.00016396885844630163, "loss": 1.3163, "step": 13877 }, { "epoch": 0.18033828046448996, "grad_norm": 0.365323007106781, "learning_rate": 0.00016396625898439023, "loss": 1.6092, "step": 13878 }, { "epoch": 0.18035127500840584, "grad_norm": 0.30958792567253113, "learning_rate": 0.00016396365952247885, "loss": 1.5301, "step": 13879 }, { "epoch": 0.1803642695523217, "grad_norm": 0.35711079835891724, "learning_rate": 0.00016396106006056745, "loss": 1.4603, "step": 13880 }, { "epoch": 0.18037726409623758, "grad_norm": 0.34600594639778137, "learning_rate": 0.0001639584605986561, "loss": 1.5782, "step": 13881 }, { "epoch": 0.18039025864015346, "grad_norm": 0.31940600275993347, "learning_rate": 0.0001639558611367447, "loss": 1.2671, "step": 13882 }, { "epoch": 0.18040325318406933, "grad_norm": 0.428899347782135, "learning_rate": 0.00016395326167483333, "loss": 1.3482, "step": 13883 }, { "epoch": 0.1804162477279852, "grad_norm": 0.3702729642391205, "learning_rate": 0.00016395066221292192, "loss": 1.4955, "step": 13884 }, { "epoch": 0.18042924227190107, "grad_norm": 0.42614781856536865, "learning_rate": 0.00016394806275101055, "loss": 1.469, "step": 13885 }, { "epoch": 0.18044223681581695, "grad_norm": 0.3947365880012512, "learning_rate": 0.00016394546328909917, "loss": 1.345, "step": 13886 }, { "epoch": 0.18045523135973282, "grad_norm": 0.38064542412757874, "learning_rate": 0.00016394286382718777, "loss": 1.4187, "step": 13887 }, { "epoch": 0.1804682259036487, "grad_norm": 0.36633992195129395, "learning_rate": 0.0001639402643652764, "loss": 1.4348, "step": 13888 }, { "epoch": 0.18048122044756457, "grad_norm": 0.43246686458587646, "learning_rate": 0.00016393766490336502, "loss": 1.3842, "step": 13889 }, { "epoch": 0.18049421499148044, "grad_norm": 0.32606256008148193, "learning_rate": 0.00016393506544145362, "loss": 1.5689, "step": 13890 }, { "epoch": 0.1805072095353963, "grad_norm": 0.3458372950553894, "learning_rate": 0.00016393246597954224, "loss": 1.5251, "step": 13891 }, { "epoch": 0.18052020407931219, "grad_norm": 0.32178136706352234, "learning_rate": 0.00016392986651763084, "loss": 1.3493, "step": 13892 }, { "epoch": 0.18053319862322806, "grad_norm": 0.45217379927635193, "learning_rate": 0.0001639272670557195, "loss": 1.3539, "step": 13893 }, { "epoch": 0.18054619316714396, "grad_norm": 0.30768677592277527, "learning_rate": 0.00016392466759380809, "loss": 1.5169, "step": 13894 }, { "epoch": 0.18055918771105983, "grad_norm": 0.4948274791240692, "learning_rate": 0.0001639220681318967, "loss": 1.2032, "step": 13895 }, { "epoch": 0.1805721822549757, "grad_norm": 0.3379811942577362, "learning_rate": 0.0001639194686699853, "loss": 1.44, "step": 13896 }, { "epoch": 0.18058517679889158, "grad_norm": 0.34871724247932434, "learning_rate": 0.00016391686920807393, "loss": 1.2331, "step": 13897 }, { "epoch": 0.18059817134280745, "grad_norm": 0.41146326065063477, "learning_rate": 0.00016391426974616256, "loss": 1.3666, "step": 13898 }, { "epoch": 0.18061116588672332, "grad_norm": 0.3320792019367218, "learning_rate": 0.00016391167028425115, "loss": 1.341, "step": 13899 }, { "epoch": 0.1806241604306392, "grad_norm": 0.37427496910095215, "learning_rate": 0.0001639090708223398, "loss": 1.5761, "step": 13900 }, { "epoch": 0.18063715497455507, "grad_norm": 0.33989691734313965, "learning_rate": 0.0001639064713604284, "loss": 1.3395, "step": 13901 }, { "epoch": 0.18065014951847094, "grad_norm": 0.44891902804374695, "learning_rate": 0.000163903871898517, "loss": 1.4473, "step": 13902 }, { "epoch": 0.18066314406238682, "grad_norm": 0.36803674697875977, "learning_rate": 0.00016390127243660563, "loss": 1.4268, "step": 13903 }, { "epoch": 0.1806761386063027, "grad_norm": 0.3450038731098175, "learning_rate": 0.00016389867297469425, "loss": 1.5073, "step": 13904 }, { "epoch": 0.18068913315021856, "grad_norm": 0.462792307138443, "learning_rate": 0.00016389607351278287, "loss": 1.3914, "step": 13905 }, { "epoch": 0.18070212769413443, "grad_norm": 0.4687727689743042, "learning_rate": 0.00016389347405087147, "loss": 1.5746, "step": 13906 }, { "epoch": 0.1807151222380503, "grad_norm": 0.36433643102645874, "learning_rate": 0.0001638908745889601, "loss": 1.3829, "step": 13907 }, { "epoch": 0.18072811678196618, "grad_norm": 0.41371214389801025, "learning_rate": 0.00016388827512704872, "loss": 1.3827, "step": 13908 }, { "epoch": 0.18074111132588205, "grad_norm": 0.33636152744293213, "learning_rate": 0.00016388567566513732, "loss": 1.37, "step": 13909 }, { "epoch": 0.18075410586979793, "grad_norm": 0.3626081645488739, "learning_rate": 0.00016388307620322594, "loss": 1.2576, "step": 13910 }, { "epoch": 0.1807671004137138, "grad_norm": 0.4206397831439972, "learning_rate": 0.00016388047674131454, "loss": 1.3836, "step": 13911 }, { "epoch": 0.18078009495762967, "grad_norm": 0.49211814999580383, "learning_rate": 0.0001638778772794032, "loss": 1.5328, "step": 13912 }, { "epoch": 0.18079308950154555, "grad_norm": 0.3886708915233612, "learning_rate": 0.0001638752778174918, "loss": 1.3879, "step": 13913 }, { "epoch": 0.18080608404546142, "grad_norm": 0.40076062083244324, "learning_rate": 0.0001638726783555804, "loss": 1.4102, "step": 13914 }, { "epoch": 0.1808190785893773, "grad_norm": 0.42153453826904297, "learning_rate": 0.000163870078893669, "loss": 1.5717, "step": 13915 }, { "epoch": 0.18083207313329316, "grad_norm": 0.403816282749176, "learning_rate": 0.00016386747943175764, "loss": 1.3718, "step": 13916 }, { "epoch": 0.18084506767720904, "grad_norm": 0.36507534980773926, "learning_rate": 0.00016386487996984626, "loss": 1.2626, "step": 13917 }, { "epoch": 0.1808580622211249, "grad_norm": 0.3647579550743103, "learning_rate": 0.00016386228050793486, "loss": 1.2863, "step": 13918 }, { "epoch": 0.18087105676504078, "grad_norm": 0.3325686752796173, "learning_rate": 0.00016385968104602348, "loss": 1.4135, "step": 13919 }, { "epoch": 0.18088405130895666, "grad_norm": 0.47475892305374146, "learning_rate": 0.0001638570815841121, "loss": 1.574, "step": 13920 }, { "epoch": 0.18089704585287253, "grad_norm": 0.5276363492012024, "learning_rate": 0.0001638544821222007, "loss": 1.4119, "step": 13921 }, { "epoch": 0.1809100403967884, "grad_norm": 0.4015517830848694, "learning_rate": 0.00016385188266028933, "loss": 1.3681, "step": 13922 }, { "epoch": 0.18092303494070427, "grad_norm": 0.3477279841899872, "learning_rate": 0.00016384928319837793, "loss": 1.3906, "step": 13923 }, { "epoch": 0.18093602948462015, "grad_norm": 0.4474170506000519, "learning_rate": 0.00016384668373646658, "loss": 1.352, "step": 13924 }, { "epoch": 0.18094902402853602, "grad_norm": 0.44052085280418396, "learning_rate": 0.00016384408427455517, "loss": 1.3615, "step": 13925 }, { "epoch": 0.1809620185724519, "grad_norm": 0.3612934350967407, "learning_rate": 0.0001638414848126438, "loss": 1.2988, "step": 13926 }, { "epoch": 0.18097501311636777, "grad_norm": 0.4232841432094574, "learning_rate": 0.0001638388853507324, "loss": 1.4153, "step": 13927 }, { "epoch": 0.18098800766028364, "grad_norm": 0.2938156723976135, "learning_rate": 0.00016383628588882102, "loss": 1.1903, "step": 13928 }, { "epoch": 0.1810010022041995, "grad_norm": 0.45036935806274414, "learning_rate": 0.00016383368642690965, "loss": 1.6003, "step": 13929 }, { "epoch": 0.18101399674811539, "grad_norm": 0.41746649146080017, "learning_rate": 0.00016383108696499824, "loss": 1.3764, "step": 13930 }, { "epoch": 0.18102699129203126, "grad_norm": 0.35695648193359375, "learning_rate": 0.00016382848750308687, "loss": 1.4215, "step": 13931 }, { "epoch": 0.18103998583594713, "grad_norm": 0.4255262613296509, "learning_rate": 0.0001638258880411755, "loss": 1.4189, "step": 13932 }, { "epoch": 0.181052980379863, "grad_norm": 0.36589908599853516, "learning_rate": 0.0001638232885792641, "loss": 1.3418, "step": 13933 }, { "epoch": 0.18106597492377888, "grad_norm": 0.39140254259109497, "learning_rate": 0.0001638206891173527, "loss": 1.3956, "step": 13934 }, { "epoch": 0.18107896946769475, "grad_norm": 0.39601925015449524, "learning_rate": 0.00016381808965544134, "loss": 1.4962, "step": 13935 }, { "epoch": 0.18109196401161062, "grad_norm": 0.3855540156364441, "learning_rate": 0.00016381549019352996, "loss": 1.5114, "step": 13936 }, { "epoch": 0.1811049585555265, "grad_norm": 0.44705599546432495, "learning_rate": 0.00016381289073161856, "loss": 1.4459, "step": 13937 }, { "epoch": 0.18111795309944237, "grad_norm": 0.3450058102607727, "learning_rate": 0.00016381029126970718, "loss": 1.3676, "step": 13938 }, { "epoch": 0.18113094764335824, "grad_norm": 0.3786931037902832, "learning_rate": 0.0001638076918077958, "loss": 1.4215, "step": 13939 }, { "epoch": 0.18114394218727412, "grad_norm": 0.3350026309490204, "learning_rate": 0.0001638050923458844, "loss": 1.2822, "step": 13940 }, { "epoch": 0.18115693673119, "grad_norm": 0.5217697620391846, "learning_rate": 0.00016380249288397303, "loss": 1.3663, "step": 13941 }, { "epoch": 0.18116993127510586, "grad_norm": 0.41333919763565063, "learning_rate": 0.00016379989342206163, "loss": 1.5799, "step": 13942 }, { "epoch": 0.18118292581902173, "grad_norm": 0.38928717374801636, "learning_rate": 0.00016379729396015028, "loss": 1.3081, "step": 13943 }, { "epoch": 0.1811959203629376, "grad_norm": 0.38982003927230835, "learning_rate": 0.00016379469449823888, "loss": 1.3577, "step": 13944 }, { "epoch": 0.18120891490685348, "grad_norm": 0.25227731466293335, "learning_rate": 0.00016379209503632747, "loss": 1.4509, "step": 13945 }, { "epoch": 0.18122190945076935, "grad_norm": 0.39276865124702454, "learning_rate": 0.0001637894955744161, "loss": 1.4878, "step": 13946 }, { "epoch": 0.18123490399468523, "grad_norm": 0.45176300406455994, "learning_rate": 0.00016378689611250472, "loss": 1.2566, "step": 13947 }, { "epoch": 0.1812478985386011, "grad_norm": 0.38843151926994324, "learning_rate": 0.00016378429665059335, "loss": 1.4452, "step": 13948 }, { "epoch": 0.18126089308251697, "grad_norm": 0.31233280897140503, "learning_rate": 0.00016378169718868195, "loss": 1.4721, "step": 13949 }, { "epoch": 0.18127388762643284, "grad_norm": 0.4567350447177887, "learning_rate": 0.00016377909772677057, "loss": 1.4581, "step": 13950 }, { "epoch": 0.18128688217034872, "grad_norm": 0.3333583474159241, "learning_rate": 0.0001637764982648592, "loss": 1.3596, "step": 13951 }, { "epoch": 0.1812998767142646, "grad_norm": 0.3850037753582001, "learning_rate": 0.0001637738988029478, "loss": 1.4526, "step": 13952 }, { "epoch": 0.18131287125818046, "grad_norm": 0.39848461747169495, "learning_rate": 0.00016377129934103642, "loss": 1.4833, "step": 13953 }, { "epoch": 0.18132586580209634, "grad_norm": 0.4641290605068207, "learning_rate": 0.000163768699879125, "loss": 1.3582, "step": 13954 }, { "epoch": 0.1813388603460122, "grad_norm": 0.43027442693710327, "learning_rate": 0.00016376610041721366, "loss": 1.5244, "step": 13955 }, { "epoch": 0.18135185488992808, "grad_norm": 0.38939008116722107, "learning_rate": 0.00016376350095530226, "loss": 1.5802, "step": 13956 }, { "epoch": 0.18136484943384396, "grad_norm": 0.3373205363750458, "learning_rate": 0.00016376090149339086, "loss": 1.2878, "step": 13957 }, { "epoch": 0.18137784397775983, "grad_norm": 0.3444799780845642, "learning_rate": 0.00016375830203147948, "loss": 1.4647, "step": 13958 }, { "epoch": 0.1813908385216757, "grad_norm": 0.34580856561660767, "learning_rate": 0.0001637557025695681, "loss": 1.4113, "step": 13959 }, { "epoch": 0.18140383306559157, "grad_norm": 0.3667464852333069, "learning_rate": 0.00016375310310765673, "loss": 1.2927, "step": 13960 }, { "epoch": 0.18141682760950745, "grad_norm": 0.5043931603431702, "learning_rate": 0.00016375050364574533, "loss": 1.1624, "step": 13961 }, { "epoch": 0.18142982215342332, "grad_norm": 0.42321163415908813, "learning_rate": 0.00016374790418383395, "loss": 1.2044, "step": 13962 }, { "epoch": 0.1814428166973392, "grad_norm": 0.3954523503780365, "learning_rate": 0.00016374530472192258, "loss": 1.5502, "step": 13963 }, { "epoch": 0.18145581124125507, "grad_norm": 0.4727545380592346, "learning_rate": 0.00016374270526001118, "loss": 1.4951, "step": 13964 }, { "epoch": 0.18146880578517094, "grad_norm": 0.45332348346710205, "learning_rate": 0.0001637401057980998, "loss": 1.3232, "step": 13965 }, { "epoch": 0.1814818003290868, "grad_norm": 0.4639752209186554, "learning_rate": 0.0001637375063361884, "loss": 1.4182, "step": 13966 }, { "epoch": 0.18149479487300268, "grad_norm": 0.2845968008041382, "learning_rate": 0.00016373490687427705, "loss": 1.4525, "step": 13967 }, { "epoch": 0.18150778941691856, "grad_norm": 0.42732781171798706, "learning_rate": 0.00016373230741236565, "loss": 1.3563, "step": 13968 }, { "epoch": 0.18152078396083443, "grad_norm": 0.33735087513923645, "learning_rate": 0.00016372970795045427, "loss": 1.2831, "step": 13969 }, { "epoch": 0.18153377850475033, "grad_norm": 0.4555298984050751, "learning_rate": 0.00016372710848854287, "loss": 1.4099, "step": 13970 }, { "epoch": 0.1815467730486662, "grad_norm": 0.4065535366535187, "learning_rate": 0.0001637245090266315, "loss": 1.4597, "step": 13971 }, { "epoch": 0.18155976759258208, "grad_norm": 0.367956280708313, "learning_rate": 0.00016372190956472012, "loss": 1.3735, "step": 13972 }, { "epoch": 0.18157276213649795, "grad_norm": 0.40117332339286804, "learning_rate": 0.00016371931010280872, "loss": 1.401, "step": 13973 }, { "epoch": 0.18158575668041382, "grad_norm": 0.31792551279067993, "learning_rate": 0.00016371671064089734, "loss": 1.323, "step": 13974 }, { "epoch": 0.1815987512243297, "grad_norm": 0.42645546793937683, "learning_rate": 0.00016371411117898596, "loss": 1.3871, "step": 13975 }, { "epoch": 0.18161174576824557, "grad_norm": 0.40832582116127014, "learning_rate": 0.00016371151171707456, "loss": 1.6539, "step": 13976 }, { "epoch": 0.18162474031216144, "grad_norm": 0.4843306839466095, "learning_rate": 0.0001637089122551632, "loss": 1.5694, "step": 13977 }, { "epoch": 0.18163773485607732, "grad_norm": 0.34088650345802307, "learning_rate": 0.0001637063127932518, "loss": 1.4222, "step": 13978 }, { "epoch": 0.1816507293999932, "grad_norm": 0.42761799693107605, "learning_rate": 0.00016370371333134044, "loss": 1.3876, "step": 13979 }, { "epoch": 0.18166372394390906, "grad_norm": 0.3573712408542633, "learning_rate": 0.00016370111386942903, "loss": 1.5206, "step": 13980 }, { "epoch": 0.18167671848782493, "grad_norm": 0.3561965227127075, "learning_rate": 0.00016369851440751766, "loss": 1.5019, "step": 13981 }, { "epoch": 0.1816897130317408, "grad_norm": 0.33115676045417786, "learning_rate": 0.00016369591494560628, "loss": 1.246, "step": 13982 }, { "epoch": 0.18170270757565668, "grad_norm": 0.3669005334377289, "learning_rate": 0.00016369331548369488, "loss": 1.3176, "step": 13983 }, { "epoch": 0.18171570211957255, "grad_norm": 0.3958088755607605, "learning_rate": 0.0001636907160217835, "loss": 1.6497, "step": 13984 }, { "epoch": 0.18172869666348843, "grad_norm": 0.38397201895713806, "learning_rate": 0.0001636881165598721, "loss": 1.3685, "step": 13985 }, { "epoch": 0.1817416912074043, "grad_norm": 0.3442166745662689, "learning_rate": 0.00016368551709796073, "loss": 1.5329, "step": 13986 }, { "epoch": 0.18175468575132017, "grad_norm": 0.36517348885536194, "learning_rate": 0.00016368291763604935, "loss": 1.3943, "step": 13987 }, { "epoch": 0.18176768029523604, "grad_norm": 0.4144994616508484, "learning_rate": 0.00016368031817413795, "loss": 1.6596, "step": 13988 }, { "epoch": 0.18178067483915192, "grad_norm": 0.4141557216644287, "learning_rate": 0.00016367771871222657, "loss": 1.4506, "step": 13989 }, { "epoch": 0.1817936693830678, "grad_norm": 0.34273284673690796, "learning_rate": 0.0001636751192503152, "loss": 1.5336, "step": 13990 }, { "epoch": 0.18180666392698366, "grad_norm": 0.3870570957660675, "learning_rate": 0.00016367251978840382, "loss": 1.4961, "step": 13991 }, { "epoch": 0.18181965847089954, "grad_norm": 0.36208274960517883, "learning_rate": 0.00016366992032649242, "loss": 1.2687, "step": 13992 }, { "epoch": 0.1818326530148154, "grad_norm": 0.4293138384819031, "learning_rate": 0.00016366732086458104, "loss": 1.3902, "step": 13993 }, { "epoch": 0.18184564755873128, "grad_norm": 0.38129618763923645, "learning_rate": 0.00016366472140266967, "loss": 1.4509, "step": 13994 }, { "epoch": 0.18185864210264716, "grad_norm": 0.39229297637939453, "learning_rate": 0.00016366212194075826, "loss": 1.5316, "step": 13995 }, { "epoch": 0.18187163664656303, "grad_norm": 0.4997608959674835, "learning_rate": 0.0001636595224788469, "loss": 1.6033, "step": 13996 }, { "epoch": 0.1818846311904789, "grad_norm": 0.36065179109573364, "learning_rate": 0.0001636569230169355, "loss": 1.2498, "step": 13997 }, { "epoch": 0.18189762573439477, "grad_norm": 0.38008543848991394, "learning_rate": 0.00016365432355502414, "loss": 1.3301, "step": 13998 }, { "epoch": 0.18191062027831065, "grad_norm": 0.44220849871635437, "learning_rate": 0.00016365172409311274, "loss": 1.5326, "step": 13999 }, { "epoch": 0.18192361482222652, "grad_norm": 0.431658536195755, "learning_rate": 0.00016364912463120133, "loss": 1.5058, "step": 14000 }, { "epoch": 0.1819366093661424, "grad_norm": 0.43918317556381226, "learning_rate": 0.00016364652516928996, "loss": 1.4801, "step": 14001 }, { "epoch": 0.18194960391005827, "grad_norm": 0.47162097692489624, "learning_rate": 0.00016364392570737858, "loss": 1.3416, "step": 14002 }, { "epoch": 0.18196259845397414, "grad_norm": 0.38275420665740967, "learning_rate": 0.0001636413262454672, "loss": 1.4922, "step": 14003 }, { "epoch": 0.18197559299789, "grad_norm": 0.385733962059021, "learning_rate": 0.0001636387267835558, "loss": 1.529, "step": 14004 }, { "epoch": 0.18198858754180589, "grad_norm": 0.2653253376483917, "learning_rate": 0.00016363612732164443, "loss": 1.3918, "step": 14005 }, { "epoch": 0.18200158208572176, "grad_norm": 0.3419744372367859, "learning_rate": 0.00016363352785973305, "loss": 1.4181, "step": 14006 }, { "epoch": 0.18201457662963763, "grad_norm": 0.41318562626838684, "learning_rate": 0.00016363092839782165, "loss": 1.4796, "step": 14007 }, { "epoch": 0.1820275711735535, "grad_norm": 0.389669805765152, "learning_rate": 0.00016362832893591027, "loss": 1.396, "step": 14008 }, { "epoch": 0.18204056571746938, "grad_norm": 0.3908182382583618, "learning_rate": 0.0001636257294739989, "loss": 1.4028, "step": 14009 }, { "epoch": 0.18205356026138525, "grad_norm": 0.401261568069458, "learning_rate": 0.00016362313001208752, "loss": 1.632, "step": 14010 }, { "epoch": 0.18206655480530112, "grad_norm": 0.4321623146533966, "learning_rate": 0.00016362053055017612, "loss": 1.5315, "step": 14011 }, { "epoch": 0.182079549349217, "grad_norm": 0.40462014079093933, "learning_rate": 0.00016361793108826472, "loss": 1.4829, "step": 14012 }, { "epoch": 0.18209254389313287, "grad_norm": 0.37876957654953003, "learning_rate": 0.00016361533162635337, "loss": 1.288, "step": 14013 }, { "epoch": 0.18210553843704874, "grad_norm": 0.38370659947395325, "learning_rate": 0.00016361273216444197, "loss": 1.3761, "step": 14014 }, { "epoch": 0.18211853298096461, "grad_norm": 0.3439337909221649, "learning_rate": 0.0001636101327025306, "loss": 1.4422, "step": 14015 }, { "epoch": 0.1821315275248805, "grad_norm": 0.42591163516044617, "learning_rate": 0.0001636075332406192, "loss": 1.4578, "step": 14016 }, { "epoch": 0.18214452206879636, "grad_norm": 0.41921764612197876, "learning_rate": 0.00016360493377870781, "loss": 1.3393, "step": 14017 }, { "epoch": 0.18215751661271223, "grad_norm": 0.3973502218723297, "learning_rate": 0.00016360233431679644, "loss": 1.2588, "step": 14018 }, { "epoch": 0.1821705111566281, "grad_norm": 0.4169273376464844, "learning_rate": 0.00016359973485488504, "loss": 1.4287, "step": 14019 }, { "epoch": 0.18218350570054398, "grad_norm": 0.3799017667770386, "learning_rate": 0.00016359713539297366, "loss": 1.5446, "step": 14020 }, { "epoch": 0.18219650024445985, "grad_norm": 0.43941307067871094, "learning_rate": 0.00016359453593106228, "loss": 1.4363, "step": 14021 }, { "epoch": 0.18220949478837573, "grad_norm": 0.3740796148777008, "learning_rate": 0.0001635919364691509, "loss": 1.3879, "step": 14022 }, { "epoch": 0.1822224893322916, "grad_norm": 0.44395968317985535, "learning_rate": 0.0001635893370072395, "loss": 1.526, "step": 14023 }, { "epoch": 0.18223548387620747, "grad_norm": 0.3495246171951294, "learning_rate": 0.0001635867375453281, "loss": 1.3491, "step": 14024 }, { "epoch": 0.18224847842012334, "grad_norm": 0.3668956756591797, "learning_rate": 0.00016358413808341676, "loss": 1.4034, "step": 14025 }, { "epoch": 0.18226147296403922, "grad_norm": 0.44895651936531067, "learning_rate": 0.00016358153862150535, "loss": 1.4097, "step": 14026 }, { "epoch": 0.1822744675079551, "grad_norm": 0.36002567410469055, "learning_rate": 0.00016357893915959398, "loss": 1.157, "step": 14027 }, { "epoch": 0.18228746205187096, "grad_norm": 0.31939542293548584, "learning_rate": 0.00016357633969768257, "loss": 1.4068, "step": 14028 }, { "epoch": 0.18230045659578684, "grad_norm": 0.45844006538391113, "learning_rate": 0.0001635737402357712, "loss": 1.4876, "step": 14029 }, { "epoch": 0.1823134511397027, "grad_norm": 0.3604048490524292, "learning_rate": 0.00016357114077385982, "loss": 1.5303, "step": 14030 }, { "epoch": 0.18232644568361858, "grad_norm": 0.46258118748664856, "learning_rate": 0.00016356854131194842, "loss": 1.4353, "step": 14031 }, { "epoch": 0.18233944022753446, "grad_norm": 0.3510545492172241, "learning_rate": 0.00016356594185003705, "loss": 1.444, "step": 14032 }, { "epoch": 0.18235243477145033, "grad_norm": 0.3985535204410553, "learning_rate": 0.00016356334238812567, "loss": 1.4398, "step": 14033 }, { "epoch": 0.1823654293153662, "grad_norm": 0.45214900374412537, "learning_rate": 0.0001635607429262143, "loss": 1.5935, "step": 14034 }, { "epoch": 0.18237842385928207, "grad_norm": 0.43656328320503235, "learning_rate": 0.0001635581434643029, "loss": 1.6284, "step": 14035 }, { "epoch": 0.18239141840319795, "grad_norm": 0.3589610755443573, "learning_rate": 0.00016355554400239152, "loss": 1.3415, "step": 14036 }, { "epoch": 0.18240441294711382, "grad_norm": 0.4076949656009674, "learning_rate": 0.00016355294454048014, "loss": 1.3789, "step": 14037 }, { "epoch": 0.1824174074910297, "grad_norm": 0.40643566846847534, "learning_rate": 0.00016355034507856874, "loss": 1.4208, "step": 14038 }, { "epoch": 0.18243040203494557, "grad_norm": 0.376617431640625, "learning_rate": 0.00016354774561665736, "loss": 1.6287, "step": 14039 }, { "epoch": 0.18244339657886144, "grad_norm": 0.4039861857891083, "learning_rate": 0.00016354514615474596, "loss": 1.4394, "step": 14040 }, { "epoch": 0.1824563911227773, "grad_norm": 0.2979445159435272, "learning_rate": 0.00016354254669283458, "loss": 1.0381, "step": 14041 }, { "epoch": 0.18246938566669318, "grad_norm": 0.33211493492126465, "learning_rate": 0.0001635399472309232, "loss": 1.484, "step": 14042 }, { "epoch": 0.18248238021060906, "grad_norm": 0.3744337558746338, "learning_rate": 0.0001635373477690118, "loss": 1.5327, "step": 14043 }, { "epoch": 0.18249537475452493, "grad_norm": 0.3544542193412781, "learning_rate": 0.00016353474830710043, "loss": 1.5058, "step": 14044 }, { "epoch": 0.1825083692984408, "grad_norm": 0.29757222533226013, "learning_rate": 0.00016353214884518906, "loss": 1.3475, "step": 14045 }, { "epoch": 0.1825213638423567, "grad_norm": 0.41322824358940125, "learning_rate": 0.00016352954938327768, "loss": 1.5791, "step": 14046 }, { "epoch": 0.18253435838627258, "grad_norm": 0.35056132078170776, "learning_rate": 0.00016352694992136628, "loss": 1.4366, "step": 14047 }, { "epoch": 0.18254735293018845, "grad_norm": 0.42155611515045166, "learning_rate": 0.0001635243504594549, "loss": 1.5535, "step": 14048 }, { "epoch": 0.18256034747410432, "grad_norm": 0.34348419308662415, "learning_rate": 0.00016352175099754353, "loss": 1.4098, "step": 14049 }, { "epoch": 0.1825733420180202, "grad_norm": 0.32147687673568726, "learning_rate": 0.00016351915153563212, "loss": 1.4183, "step": 14050 }, { "epoch": 0.18258633656193607, "grad_norm": 0.504754900932312, "learning_rate": 0.00016351655207372075, "loss": 1.5397, "step": 14051 }, { "epoch": 0.18259933110585194, "grad_norm": 0.3047609031200409, "learning_rate": 0.00016351395261180937, "loss": 1.5062, "step": 14052 }, { "epoch": 0.18261232564976781, "grad_norm": 0.29748672246932983, "learning_rate": 0.000163511353149898, "loss": 1.3646, "step": 14053 }, { "epoch": 0.1826253201936837, "grad_norm": 0.4541914761066437, "learning_rate": 0.0001635087536879866, "loss": 1.5072, "step": 14054 }, { "epoch": 0.18263831473759956, "grad_norm": 0.46341708302497864, "learning_rate": 0.0001635061542260752, "loss": 1.4969, "step": 14055 }, { "epoch": 0.18265130928151543, "grad_norm": 0.4568157494068146, "learning_rate": 0.00016350355476416384, "loss": 1.4673, "step": 14056 }, { "epoch": 0.1826643038254313, "grad_norm": 0.3995761573314667, "learning_rate": 0.00016350095530225244, "loss": 1.3008, "step": 14057 }, { "epoch": 0.18267729836934718, "grad_norm": 0.39155545830726624, "learning_rate": 0.00016349835584034107, "loss": 1.5998, "step": 14058 }, { "epoch": 0.18269029291326305, "grad_norm": 0.34831470251083374, "learning_rate": 0.00016349575637842966, "loss": 1.3119, "step": 14059 }, { "epoch": 0.18270328745717893, "grad_norm": 0.37073448300361633, "learning_rate": 0.0001634931569165183, "loss": 1.3918, "step": 14060 }, { "epoch": 0.1827162820010948, "grad_norm": 0.32926833629608154, "learning_rate": 0.0001634905574546069, "loss": 1.568, "step": 14061 }, { "epoch": 0.18272927654501067, "grad_norm": 0.3862396776676178, "learning_rate": 0.0001634879579926955, "loss": 1.4659, "step": 14062 }, { "epoch": 0.18274227108892654, "grad_norm": 0.44544902443885803, "learning_rate": 0.00016348535853078413, "loss": 1.4969, "step": 14063 }, { "epoch": 0.18275526563284242, "grad_norm": 0.28099730610847473, "learning_rate": 0.00016348275906887276, "loss": 1.3014, "step": 14064 }, { "epoch": 0.1827682601767583, "grad_norm": 0.41786128282546997, "learning_rate": 0.00016348015960696138, "loss": 1.5926, "step": 14065 }, { "epoch": 0.18278125472067416, "grad_norm": 0.3417384624481201, "learning_rate": 0.00016347756014504998, "loss": 1.2703, "step": 14066 }, { "epoch": 0.18279424926459004, "grad_norm": 0.45354530215263367, "learning_rate": 0.00016347496068313858, "loss": 1.1899, "step": 14067 }, { "epoch": 0.1828072438085059, "grad_norm": 0.3581564724445343, "learning_rate": 0.00016347236122122723, "loss": 1.4701, "step": 14068 }, { "epoch": 0.18282023835242178, "grad_norm": 0.4057212471961975, "learning_rate": 0.00016346976175931583, "loss": 1.6823, "step": 14069 }, { "epoch": 0.18283323289633766, "grad_norm": 0.40115946531295776, "learning_rate": 0.00016346716229740445, "loss": 1.4272, "step": 14070 }, { "epoch": 0.18284622744025353, "grad_norm": 0.38686510920524597, "learning_rate": 0.00016346456283549305, "loss": 1.4186, "step": 14071 }, { "epoch": 0.1828592219841694, "grad_norm": 0.40734225511550903, "learning_rate": 0.00016346196337358167, "loss": 1.2761, "step": 14072 }, { "epoch": 0.18287221652808527, "grad_norm": 0.5120412111282349, "learning_rate": 0.0001634593639116703, "loss": 1.4514, "step": 14073 }, { "epoch": 0.18288521107200115, "grad_norm": 0.3747871220111847, "learning_rate": 0.0001634567644497589, "loss": 1.432, "step": 14074 }, { "epoch": 0.18289820561591702, "grad_norm": 0.3794386386871338, "learning_rate": 0.00016345416498784752, "loss": 1.402, "step": 14075 }, { "epoch": 0.1829112001598329, "grad_norm": 0.42419809103012085, "learning_rate": 0.00016345156552593614, "loss": 1.5567, "step": 14076 }, { "epoch": 0.18292419470374877, "grad_norm": 0.38697177171707153, "learning_rate": 0.00016344896606402477, "loss": 1.4489, "step": 14077 }, { "epoch": 0.18293718924766464, "grad_norm": 0.3752872049808502, "learning_rate": 0.00016344636660211337, "loss": 1.3389, "step": 14078 }, { "epoch": 0.1829501837915805, "grad_norm": 0.44811201095581055, "learning_rate": 0.00016344376714020196, "loss": 1.4964, "step": 14079 }, { "epoch": 0.18296317833549638, "grad_norm": 0.36299264430999756, "learning_rate": 0.00016344116767829061, "loss": 1.2521, "step": 14080 }, { "epoch": 0.18297617287941226, "grad_norm": 0.4067133069038391, "learning_rate": 0.0001634385682163792, "loss": 1.3476, "step": 14081 }, { "epoch": 0.18298916742332813, "grad_norm": 0.3378688395023346, "learning_rate": 0.00016343596875446784, "loss": 1.5133, "step": 14082 }, { "epoch": 0.183002161967244, "grad_norm": 0.3995343744754791, "learning_rate": 0.00016343336929255646, "loss": 1.5854, "step": 14083 }, { "epoch": 0.18301515651115988, "grad_norm": 0.4022303521633148, "learning_rate": 0.00016343076983064506, "loss": 1.4406, "step": 14084 }, { "epoch": 0.18302815105507575, "grad_norm": 0.4465762972831726, "learning_rate": 0.00016342817036873368, "loss": 1.3251, "step": 14085 }, { "epoch": 0.18304114559899162, "grad_norm": 0.4093140959739685, "learning_rate": 0.00016342557090682228, "loss": 1.4383, "step": 14086 }, { "epoch": 0.1830541401429075, "grad_norm": 0.29103437066078186, "learning_rate": 0.00016342297144491093, "loss": 1.4526, "step": 14087 }, { "epoch": 0.18306713468682337, "grad_norm": 0.3808046281337738, "learning_rate": 0.00016342037198299953, "loss": 1.5579, "step": 14088 }, { "epoch": 0.18308012923073924, "grad_norm": 0.2769884467124939, "learning_rate": 0.00016341777252108815, "loss": 1.3528, "step": 14089 }, { "epoch": 0.18309312377465511, "grad_norm": 0.42562422156333923, "learning_rate": 0.00016341517305917675, "loss": 1.51, "step": 14090 }, { "epoch": 0.183106118318571, "grad_norm": 0.3662552535533905, "learning_rate": 0.00016341257359726537, "loss": 1.2242, "step": 14091 }, { "epoch": 0.18311911286248686, "grad_norm": 0.37842535972595215, "learning_rate": 0.000163409974135354, "loss": 1.3649, "step": 14092 }, { "epoch": 0.18313210740640273, "grad_norm": 0.29212892055511475, "learning_rate": 0.0001634073746734426, "loss": 1.2562, "step": 14093 }, { "epoch": 0.1831451019503186, "grad_norm": 0.3692339360713959, "learning_rate": 0.00016340477521153122, "loss": 1.5351, "step": 14094 }, { "epoch": 0.18315809649423448, "grad_norm": 0.36979204416275024, "learning_rate": 0.00016340217574961985, "loss": 1.2865, "step": 14095 }, { "epoch": 0.18317109103815035, "grad_norm": 0.4215281307697296, "learning_rate": 0.00016339957628770844, "loss": 1.3955, "step": 14096 }, { "epoch": 0.18318408558206623, "grad_norm": 0.3555452823638916, "learning_rate": 0.00016339697682579707, "loss": 1.5199, "step": 14097 }, { "epoch": 0.1831970801259821, "grad_norm": 0.5135360956192017, "learning_rate": 0.00016339437736388567, "loss": 1.3644, "step": 14098 }, { "epoch": 0.18321007466989797, "grad_norm": 0.385479211807251, "learning_rate": 0.00016339177790197432, "loss": 1.4385, "step": 14099 }, { "epoch": 0.18322306921381384, "grad_norm": 0.3867081105709076, "learning_rate": 0.00016338917844006291, "loss": 1.5387, "step": 14100 }, { "epoch": 0.18323606375772972, "grad_norm": 0.43120014667510986, "learning_rate": 0.00016338657897815154, "loss": 1.3749, "step": 14101 }, { "epoch": 0.1832490583016456, "grad_norm": 0.3811238408088684, "learning_rate": 0.00016338397951624014, "loss": 1.3773, "step": 14102 }, { "epoch": 0.18326205284556146, "grad_norm": 0.3594764173030853, "learning_rate": 0.00016338138005432876, "loss": 1.5016, "step": 14103 }, { "epoch": 0.18327504738947734, "grad_norm": 0.29420915246009827, "learning_rate": 0.00016337878059241738, "loss": 1.2895, "step": 14104 }, { "epoch": 0.1832880419333932, "grad_norm": 0.4095514714717865, "learning_rate": 0.00016337618113050598, "loss": 1.5777, "step": 14105 }, { "epoch": 0.18330103647730908, "grad_norm": 0.2775671184062958, "learning_rate": 0.0001633735816685946, "loss": 1.3844, "step": 14106 }, { "epoch": 0.18331403102122495, "grad_norm": 0.4624216556549072, "learning_rate": 0.00016337098220668323, "loss": 1.4317, "step": 14107 }, { "epoch": 0.18332702556514083, "grad_norm": 0.46793991327285767, "learning_rate": 0.00016336838274477183, "loss": 1.3629, "step": 14108 }, { "epoch": 0.1833400201090567, "grad_norm": 0.37692970037460327, "learning_rate": 0.00016336578328286045, "loss": 1.4633, "step": 14109 }, { "epoch": 0.18335301465297257, "grad_norm": 0.4139084219932556, "learning_rate": 0.00016336318382094905, "loss": 1.4245, "step": 14110 }, { "epoch": 0.18336600919688845, "grad_norm": 0.39490747451782227, "learning_rate": 0.0001633605843590377, "loss": 1.4193, "step": 14111 }, { "epoch": 0.18337900374080432, "grad_norm": 0.48330605030059814, "learning_rate": 0.0001633579848971263, "loss": 1.3912, "step": 14112 }, { "epoch": 0.1833919982847202, "grad_norm": 0.345071017742157, "learning_rate": 0.00016335538543521492, "loss": 1.5614, "step": 14113 }, { "epoch": 0.18340499282863607, "grad_norm": 0.37234464287757874, "learning_rate": 0.00016335278597330352, "loss": 1.4033, "step": 14114 }, { "epoch": 0.18341798737255194, "grad_norm": 0.38158395886421204, "learning_rate": 0.00016335018651139215, "loss": 1.4478, "step": 14115 }, { "epoch": 0.1834309819164678, "grad_norm": 0.423510879278183, "learning_rate": 0.00016334758704948077, "loss": 1.2816, "step": 14116 }, { "epoch": 0.18344397646038368, "grad_norm": 0.2975612282752991, "learning_rate": 0.00016334498758756937, "loss": 1.3716, "step": 14117 }, { "epoch": 0.18345697100429956, "grad_norm": 0.38476189970970154, "learning_rate": 0.000163342388125658, "loss": 1.4032, "step": 14118 }, { "epoch": 0.18346996554821543, "grad_norm": 0.3861877918243408, "learning_rate": 0.00016333978866374662, "loss": 1.4978, "step": 14119 }, { "epoch": 0.1834829600921313, "grad_norm": 0.31106966733932495, "learning_rate": 0.00016333718920183524, "loss": 1.3745, "step": 14120 }, { "epoch": 0.18349595463604718, "grad_norm": 0.3159928619861603, "learning_rate": 0.00016333458973992384, "loss": 1.5574, "step": 14121 }, { "epoch": 0.18350894917996308, "grad_norm": 0.4266694486141205, "learning_rate": 0.00016333199027801246, "loss": 1.6152, "step": 14122 }, { "epoch": 0.18352194372387895, "grad_norm": 0.4439978301525116, "learning_rate": 0.0001633293908161011, "loss": 1.5469, "step": 14123 }, { "epoch": 0.18353493826779482, "grad_norm": 0.5029610395431519, "learning_rate": 0.00016332679135418968, "loss": 1.5436, "step": 14124 }, { "epoch": 0.1835479328117107, "grad_norm": 0.45036670565605164, "learning_rate": 0.0001633241918922783, "loss": 1.328, "step": 14125 }, { "epoch": 0.18356092735562657, "grad_norm": 0.29177844524383545, "learning_rate": 0.00016332159243036693, "loss": 1.5317, "step": 14126 }, { "epoch": 0.18357392189954244, "grad_norm": 0.4240396320819855, "learning_rate": 0.00016331899296845553, "loss": 1.5785, "step": 14127 }, { "epoch": 0.18358691644345831, "grad_norm": 0.42567187547683716, "learning_rate": 0.00016331639350654416, "loss": 1.6303, "step": 14128 }, { "epoch": 0.1835999109873742, "grad_norm": 0.3158164620399475, "learning_rate": 0.00016331379404463275, "loss": 1.2832, "step": 14129 }, { "epoch": 0.18361290553129006, "grad_norm": 0.3543689250946045, "learning_rate": 0.0001633111945827214, "loss": 1.3565, "step": 14130 }, { "epoch": 0.18362590007520593, "grad_norm": 0.442054808139801, "learning_rate": 0.00016330859512081, "loss": 1.3854, "step": 14131 }, { "epoch": 0.1836388946191218, "grad_norm": 0.40369337797164917, "learning_rate": 0.00016330599565889863, "loss": 1.4277, "step": 14132 }, { "epoch": 0.18365188916303768, "grad_norm": 0.4353345036506653, "learning_rate": 0.00016330339619698722, "loss": 1.603, "step": 14133 }, { "epoch": 0.18366488370695355, "grad_norm": 0.3719423711299896, "learning_rate": 0.00016330079673507585, "loss": 1.419, "step": 14134 }, { "epoch": 0.18367787825086943, "grad_norm": 0.3628728985786438, "learning_rate": 0.00016329819727316447, "loss": 1.3113, "step": 14135 }, { "epoch": 0.1836908727947853, "grad_norm": 0.47335267066955566, "learning_rate": 0.00016329559781125307, "loss": 1.5012, "step": 14136 }, { "epoch": 0.18370386733870117, "grad_norm": 0.4623568654060364, "learning_rate": 0.0001632929983493417, "loss": 1.5038, "step": 14137 }, { "epoch": 0.18371686188261704, "grad_norm": 0.42240414023399353, "learning_rate": 0.00016329039888743032, "loss": 1.4782, "step": 14138 }, { "epoch": 0.18372985642653292, "grad_norm": 0.41479647159576416, "learning_rate": 0.00016328779942551892, "loss": 1.3718, "step": 14139 }, { "epoch": 0.1837428509704488, "grad_norm": 0.42063257098197937, "learning_rate": 0.00016328519996360754, "loss": 1.3496, "step": 14140 }, { "epoch": 0.18375584551436466, "grad_norm": 0.3319729268550873, "learning_rate": 0.00016328260050169614, "loss": 1.5304, "step": 14141 }, { "epoch": 0.18376884005828054, "grad_norm": 0.39472097158432007, "learning_rate": 0.0001632800010397848, "loss": 1.4674, "step": 14142 }, { "epoch": 0.1837818346021964, "grad_norm": 0.35716456174850464, "learning_rate": 0.0001632774015778734, "loss": 1.2327, "step": 14143 }, { "epoch": 0.18379482914611228, "grad_norm": 0.36894872784614563, "learning_rate": 0.000163274802115962, "loss": 1.5294, "step": 14144 }, { "epoch": 0.18380782369002815, "grad_norm": 0.3244621455669403, "learning_rate": 0.0001632722026540506, "loss": 1.3939, "step": 14145 }, { "epoch": 0.18382081823394403, "grad_norm": 0.3625744879245758, "learning_rate": 0.00016326960319213923, "loss": 1.4218, "step": 14146 }, { "epoch": 0.1838338127778599, "grad_norm": 0.4209868907928467, "learning_rate": 0.00016326700373022786, "loss": 1.5122, "step": 14147 }, { "epoch": 0.18384680732177577, "grad_norm": 0.45798903703689575, "learning_rate": 0.00016326440426831646, "loss": 1.528, "step": 14148 }, { "epoch": 0.18385980186569165, "grad_norm": 0.37147215008735657, "learning_rate": 0.00016326180480640508, "loss": 1.3215, "step": 14149 }, { "epoch": 0.18387279640960752, "grad_norm": 0.4124443531036377, "learning_rate": 0.0001632592053444937, "loss": 1.4528, "step": 14150 }, { "epoch": 0.1838857909535234, "grad_norm": 0.4228821396827698, "learning_rate": 0.0001632566058825823, "loss": 1.5896, "step": 14151 }, { "epoch": 0.18389878549743927, "grad_norm": 0.4723481237888336, "learning_rate": 0.00016325400642067093, "loss": 1.4238, "step": 14152 }, { "epoch": 0.18391178004135514, "grad_norm": 0.4504651427268982, "learning_rate": 0.00016325140695875952, "loss": 1.5265, "step": 14153 }, { "epoch": 0.183924774585271, "grad_norm": 0.34285762906074524, "learning_rate": 0.00016324880749684818, "loss": 1.297, "step": 14154 }, { "epoch": 0.18393776912918688, "grad_norm": 0.422423392534256, "learning_rate": 0.00016324620803493677, "loss": 1.3698, "step": 14155 }, { "epoch": 0.18395076367310276, "grad_norm": 0.41746264696121216, "learning_rate": 0.0001632436085730254, "loss": 1.4437, "step": 14156 }, { "epoch": 0.18396375821701863, "grad_norm": 0.3864283263683319, "learning_rate": 0.00016324100911111402, "loss": 1.3427, "step": 14157 }, { "epoch": 0.1839767527609345, "grad_norm": 0.37371590733528137, "learning_rate": 0.00016323840964920262, "loss": 1.4361, "step": 14158 }, { "epoch": 0.18398974730485038, "grad_norm": 0.3760530352592468, "learning_rate": 0.00016323581018729124, "loss": 1.5752, "step": 14159 }, { "epoch": 0.18400274184876625, "grad_norm": 0.3577599823474884, "learning_rate": 0.00016323321072537984, "loss": 1.2996, "step": 14160 }, { "epoch": 0.18401573639268212, "grad_norm": 0.2551228404045105, "learning_rate": 0.0001632306112634685, "loss": 1.2791, "step": 14161 }, { "epoch": 0.184028730936598, "grad_norm": 0.2513895332813263, "learning_rate": 0.0001632280118015571, "loss": 1.1956, "step": 14162 }, { "epoch": 0.18404172548051387, "grad_norm": 0.4167480170726776, "learning_rate": 0.0001632254123396457, "loss": 1.5575, "step": 14163 }, { "epoch": 0.18405472002442974, "grad_norm": 0.3538648784160614, "learning_rate": 0.0001632228128777343, "loss": 1.5567, "step": 14164 }, { "epoch": 0.18406771456834561, "grad_norm": 0.3859049677848816, "learning_rate": 0.00016322021341582294, "loss": 1.3137, "step": 14165 }, { "epoch": 0.1840807091122615, "grad_norm": 0.361909955739975, "learning_rate": 0.00016321761395391156, "loss": 1.2688, "step": 14166 }, { "epoch": 0.18409370365617736, "grad_norm": 0.469959557056427, "learning_rate": 0.00016321501449200016, "loss": 1.5363, "step": 14167 }, { "epoch": 0.18410669820009323, "grad_norm": 0.339082807302475, "learning_rate": 0.00016321241503008878, "loss": 1.3991, "step": 14168 }, { "epoch": 0.1841196927440091, "grad_norm": 0.38139861822128296, "learning_rate": 0.0001632098155681774, "loss": 1.4373, "step": 14169 }, { "epoch": 0.18413268728792498, "grad_norm": 0.33445730805397034, "learning_rate": 0.000163207216106266, "loss": 1.4306, "step": 14170 }, { "epoch": 0.18414568183184085, "grad_norm": 0.3137668967247009, "learning_rate": 0.00016320461664435463, "loss": 1.3268, "step": 14171 }, { "epoch": 0.18415867637575672, "grad_norm": 0.3523469567298889, "learning_rate": 0.00016320201718244323, "loss": 1.2153, "step": 14172 }, { "epoch": 0.1841716709196726, "grad_norm": 0.35693836212158203, "learning_rate": 0.00016319941772053188, "loss": 1.3563, "step": 14173 }, { "epoch": 0.18418466546358847, "grad_norm": 0.3365146517753601, "learning_rate": 0.00016319681825862048, "loss": 1.313, "step": 14174 }, { "epoch": 0.18419766000750434, "grad_norm": 0.25826820731163025, "learning_rate": 0.0001631942187967091, "loss": 1.2878, "step": 14175 }, { "epoch": 0.18421065455142022, "grad_norm": 0.5472778081893921, "learning_rate": 0.0001631916193347977, "loss": 1.6011, "step": 14176 }, { "epoch": 0.1842236490953361, "grad_norm": 0.33301112055778503, "learning_rate": 0.00016318901987288632, "loss": 1.3935, "step": 14177 }, { "epoch": 0.18423664363925196, "grad_norm": 0.3766372501850128, "learning_rate": 0.00016318642041097495, "loss": 1.4649, "step": 14178 }, { "epoch": 0.18424963818316784, "grad_norm": 0.37197381258010864, "learning_rate": 0.00016318382094906354, "loss": 1.5793, "step": 14179 }, { "epoch": 0.1842626327270837, "grad_norm": 0.38353264331817627, "learning_rate": 0.00016318122148715217, "loss": 1.3998, "step": 14180 }, { "epoch": 0.18427562727099958, "grad_norm": 0.4202147424221039, "learning_rate": 0.0001631786220252408, "loss": 1.4258, "step": 14181 }, { "epoch": 0.18428862181491545, "grad_norm": 0.43637004494667053, "learning_rate": 0.0001631760225633294, "loss": 1.4895, "step": 14182 }, { "epoch": 0.18430161635883133, "grad_norm": 0.4439176917076111, "learning_rate": 0.00016317342310141801, "loss": 1.4651, "step": 14183 }, { "epoch": 0.1843146109027472, "grad_norm": 0.33773234486579895, "learning_rate": 0.0001631708236395066, "loss": 1.3833, "step": 14184 }, { "epoch": 0.18432760544666307, "grad_norm": 0.373038649559021, "learning_rate": 0.00016316822417759526, "loss": 1.2611, "step": 14185 }, { "epoch": 0.18434059999057895, "grad_norm": 0.3922519385814667, "learning_rate": 0.00016316562471568386, "loss": 1.4736, "step": 14186 }, { "epoch": 0.18435359453449482, "grad_norm": 0.33782196044921875, "learning_rate": 0.00016316302525377249, "loss": 1.3659, "step": 14187 }, { "epoch": 0.1843665890784107, "grad_norm": 0.3193321228027344, "learning_rate": 0.00016316042579186108, "loss": 1.1122, "step": 14188 }, { "epoch": 0.18437958362232656, "grad_norm": 0.391689270734787, "learning_rate": 0.0001631578263299497, "loss": 1.3567, "step": 14189 }, { "epoch": 0.18439257816624244, "grad_norm": 0.23014414310455322, "learning_rate": 0.00016315522686803833, "loss": 1.2416, "step": 14190 }, { "epoch": 0.1844055727101583, "grad_norm": 0.3476603925228119, "learning_rate": 0.00016315262740612693, "loss": 1.1089, "step": 14191 }, { "epoch": 0.18441856725407418, "grad_norm": 0.5365970134735107, "learning_rate": 0.00016315002794421555, "loss": 1.3553, "step": 14192 }, { "epoch": 0.18443156179799006, "grad_norm": 0.315402626991272, "learning_rate": 0.00016314742848230418, "loss": 1.5172, "step": 14193 }, { "epoch": 0.18444455634190593, "grad_norm": 0.3669466972351074, "learning_rate": 0.00016314482902039278, "loss": 1.5686, "step": 14194 }, { "epoch": 0.1844575508858218, "grad_norm": 0.4669295847415924, "learning_rate": 0.0001631422295584814, "loss": 1.4138, "step": 14195 }, { "epoch": 0.18447054542973768, "grad_norm": 0.36518460512161255, "learning_rate": 0.00016313963009657002, "loss": 1.3373, "step": 14196 }, { "epoch": 0.18448353997365355, "grad_norm": 0.437038391828537, "learning_rate": 0.00016313703063465865, "loss": 1.4447, "step": 14197 }, { "epoch": 0.18449653451756945, "grad_norm": 0.2697826325893402, "learning_rate": 0.00016313443117274725, "loss": 1.4632, "step": 14198 }, { "epoch": 0.18450952906148532, "grad_norm": 0.4508165419101715, "learning_rate": 0.00016313183171083587, "loss": 1.5388, "step": 14199 }, { "epoch": 0.1845225236054012, "grad_norm": 0.33104583621025085, "learning_rate": 0.0001631292322489245, "loss": 1.429, "step": 14200 }, { "epoch": 0.18453551814931707, "grad_norm": 0.3331906199455261, "learning_rate": 0.0001631266327870131, "loss": 1.4997, "step": 14201 }, { "epoch": 0.18454851269323294, "grad_norm": 0.4687112271785736, "learning_rate": 0.00016312403332510172, "loss": 1.4175, "step": 14202 }, { "epoch": 0.18456150723714881, "grad_norm": 0.37302765250205994, "learning_rate": 0.00016312143386319031, "loss": 1.404, "step": 14203 }, { "epoch": 0.1845745017810647, "grad_norm": 0.3988381326198578, "learning_rate": 0.00016311883440127897, "loss": 1.6268, "step": 14204 }, { "epoch": 0.18458749632498056, "grad_norm": 0.4780881702899933, "learning_rate": 0.00016311623493936756, "loss": 1.6107, "step": 14205 }, { "epoch": 0.18460049086889643, "grad_norm": 0.2883290648460388, "learning_rate": 0.00016311363547745616, "loss": 1.2355, "step": 14206 }, { "epoch": 0.1846134854128123, "grad_norm": 0.4382941424846649, "learning_rate": 0.00016311103601554479, "loss": 1.3941, "step": 14207 }, { "epoch": 0.18462647995672818, "grad_norm": 0.3439268171787262, "learning_rate": 0.0001631084365536334, "loss": 1.142, "step": 14208 }, { "epoch": 0.18463947450064405, "grad_norm": 0.2982230484485626, "learning_rate": 0.00016310583709172203, "loss": 1.3042, "step": 14209 }, { "epoch": 0.18465246904455992, "grad_norm": 0.4891691207885742, "learning_rate": 0.00016310323762981063, "loss": 1.4856, "step": 14210 }, { "epoch": 0.1846654635884758, "grad_norm": 0.4565635919570923, "learning_rate": 0.00016310063816789926, "loss": 1.4687, "step": 14211 }, { "epoch": 0.18467845813239167, "grad_norm": 0.32642844319343567, "learning_rate": 0.00016309803870598788, "loss": 1.4441, "step": 14212 }, { "epoch": 0.18469145267630754, "grad_norm": 0.3676673173904419, "learning_rate": 0.00016309543924407648, "loss": 1.2649, "step": 14213 }, { "epoch": 0.18470444722022342, "grad_norm": 0.3801582157611847, "learning_rate": 0.0001630928397821651, "loss": 1.3451, "step": 14214 }, { "epoch": 0.1847174417641393, "grad_norm": 0.3406851887702942, "learning_rate": 0.0001630902403202537, "loss": 1.4178, "step": 14215 }, { "epoch": 0.18473043630805516, "grad_norm": 0.43474966287612915, "learning_rate": 0.00016308764085834235, "loss": 1.465, "step": 14216 }, { "epoch": 0.18474343085197104, "grad_norm": 0.38164186477661133, "learning_rate": 0.00016308504139643095, "loss": 1.6319, "step": 14217 }, { "epoch": 0.1847564253958869, "grad_norm": 0.34267258644104004, "learning_rate": 0.00016308244193451955, "loss": 1.2316, "step": 14218 }, { "epoch": 0.18476941993980278, "grad_norm": 0.31737038493156433, "learning_rate": 0.00016307984247260817, "loss": 1.3975, "step": 14219 }, { "epoch": 0.18478241448371865, "grad_norm": 0.398255854845047, "learning_rate": 0.0001630772430106968, "loss": 1.4754, "step": 14220 }, { "epoch": 0.18479540902763453, "grad_norm": 0.3205759823322296, "learning_rate": 0.00016307464354878542, "loss": 1.2871, "step": 14221 }, { "epoch": 0.1848084035715504, "grad_norm": 0.4778819680213928, "learning_rate": 0.00016307204408687402, "loss": 1.5783, "step": 14222 }, { "epoch": 0.18482139811546627, "grad_norm": 0.40417802333831787, "learning_rate": 0.00016306944462496264, "loss": 1.4569, "step": 14223 }, { "epoch": 0.18483439265938215, "grad_norm": 0.321976900100708, "learning_rate": 0.00016306684516305127, "loss": 1.4991, "step": 14224 }, { "epoch": 0.18484738720329802, "grad_norm": 0.27175503969192505, "learning_rate": 0.00016306424570113986, "loss": 1.276, "step": 14225 }, { "epoch": 0.1848603817472139, "grad_norm": 0.49025505781173706, "learning_rate": 0.0001630616462392285, "loss": 1.5754, "step": 14226 }, { "epoch": 0.18487337629112977, "grad_norm": 0.37089693546295166, "learning_rate": 0.00016305904677731709, "loss": 1.3411, "step": 14227 }, { "epoch": 0.18488637083504564, "grad_norm": 0.3145904839038849, "learning_rate": 0.00016305644731540574, "loss": 1.4623, "step": 14228 }, { "epoch": 0.1848993653789615, "grad_norm": 0.4463021755218506, "learning_rate": 0.00016305384785349433, "loss": 1.4372, "step": 14229 }, { "epoch": 0.18491235992287738, "grad_norm": 0.3998275101184845, "learning_rate": 0.00016305124839158293, "loss": 1.4106, "step": 14230 }, { "epoch": 0.18492535446679326, "grad_norm": 0.33859017491340637, "learning_rate": 0.00016304864892967158, "loss": 1.4446, "step": 14231 }, { "epoch": 0.18493834901070913, "grad_norm": 0.33995580673217773, "learning_rate": 0.00016304604946776018, "loss": 1.4584, "step": 14232 }, { "epoch": 0.184951343554625, "grad_norm": 0.36465945839881897, "learning_rate": 0.0001630434500058488, "loss": 1.4677, "step": 14233 }, { "epoch": 0.18496433809854088, "grad_norm": 0.3799170255661011, "learning_rate": 0.0001630408505439374, "loss": 1.395, "step": 14234 }, { "epoch": 0.18497733264245675, "grad_norm": 0.4696342945098877, "learning_rate": 0.00016303825108202603, "loss": 1.2541, "step": 14235 }, { "epoch": 0.18499032718637262, "grad_norm": 0.31894004344940186, "learning_rate": 0.00016303565162011465, "loss": 1.4096, "step": 14236 }, { "epoch": 0.1850033217302885, "grad_norm": 0.441837340593338, "learning_rate": 0.00016303305215820325, "loss": 1.4345, "step": 14237 }, { "epoch": 0.18501631627420437, "grad_norm": 0.4292527735233307, "learning_rate": 0.00016303045269629187, "loss": 1.3966, "step": 14238 }, { "epoch": 0.18502931081812024, "grad_norm": 0.34820497035980225, "learning_rate": 0.0001630278532343805, "loss": 1.4185, "step": 14239 }, { "epoch": 0.1850423053620361, "grad_norm": 0.4338149130344391, "learning_rate": 0.00016302525377246912, "loss": 1.5342, "step": 14240 }, { "epoch": 0.185055299905952, "grad_norm": 0.45493677258491516, "learning_rate": 0.00016302265431055772, "loss": 1.4273, "step": 14241 }, { "epoch": 0.18506829444986786, "grad_norm": 0.39177313446998596, "learning_rate": 0.00016302005484864634, "loss": 1.3888, "step": 14242 }, { "epoch": 0.18508128899378373, "grad_norm": 0.4118193984031677, "learning_rate": 0.00016301745538673497, "loss": 1.537, "step": 14243 }, { "epoch": 0.1850942835376996, "grad_norm": 0.34462904930114746, "learning_rate": 0.00016301485592482357, "loss": 1.2559, "step": 14244 }, { "epoch": 0.18510727808161548, "grad_norm": 0.3598201870918274, "learning_rate": 0.0001630122564629122, "loss": 1.4949, "step": 14245 }, { "epoch": 0.18512027262553135, "grad_norm": 0.39716100692749023, "learning_rate": 0.0001630096570010008, "loss": 1.345, "step": 14246 }, { "epoch": 0.18513326716944722, "grad_norm": 0.35244104266166687, "learning_rate": 0.0001630070575390894, "loss": 1.294, "step": 14247 }, { "epoch": 0.1851462617133631, "grad_norm": 0.4715386629104614, "learning_rate": 0.00016300445807717804, "loss": 1.6238, "step": 14248 }, { "epoch": 0.18515925625727897, "grad_norm": 0.346696138381958, "learning_rate": 0.00016300185861526663, "loss": 1.5361, "step": 14249 }, { "epoch": 0.18517225080119484, "grad_norm": 0.3919036090373993, "learning_rate": 0.00016299925915335526, "loss": 1.4763, "step": 14250 }, { "epoch": 0.18518524534511072, "grad_norm": 0.560883104801178, "learning_rate": 0.00016299665969144388, "loss": 1.6108, "step": 14251 }, { "epoch": 0.1851982398890266, "grad_norm": 0.3743640184402466, "learning_rate": 0.0001629940602295325, "loss": 1.4283, "step": 14252 }, { "epoch": 0.18521123443294246, "grad_norm": 0.41247740387916565, "learning_rate": 0.0001629914607676211, "loss": 1.6239, "step": 14253 }, { "epoch": 0.18522422897685833, "grad_norm": 0.4529300332069397, "learning_rate": 0.00016298886130570973, "loss": 1.3986, "step": 14254 }, { "epoch": 0.1852372235207742, "grad_norm": 0.38263243436813354, "learning_rate": 0.00016298626184379835, "loss": 1.4298, "step": 14255 }, { "epoch": 0.18525021806469008, "grad_norm": 0.43897202610969543, "learning_rate": 0.00016298366238188695, "loss": 1.5402, "step": 14256 }, { "epoch": 0.18526321260860595, "grad_norm": 0.35338664054870605, "learning_rate": 0.00016298106291997558, "loss": 1.4421, "step": 14257 }, { "epoch": 0.18527620715252183, "grad_norm": 0.32810527086257935, "learning_rate": 0.00016297846345806417, "loss": 1.2929, "step": 14258 }, { "epoch": 0.1852892016964377, "grad_norm": 0.36444562673568726, "learning_rate": 0.00016297586399615282, "loss": 1.5201, "step": 14259 }, { "epoch": 0.18530219624035357, "grad_norm": 0.3933938443660736, "learning_rate": 0.00016297326453424142, "loss": 1.5183, "step": 14260 }, { "epoch": 0.18531519078426945, "grad_norm": 0.37150898575782776, "learning_rate": 0.00016297066507233002, "loss": 1.3755, "step": 14261 }, { "epoch": 0.18532818532818532, "grad_norm": 0.5009927153587341, "learning_rate": 0.00016296806561041864, "loss": 1.5118, "step": 14262 }, { "epoch": 0.1853411798721012, "grad_norm": 0.3658714294433594, "learning_rate": 0.00016296546614850727, "loss": 1.643, "step": 14263 }, { "epoch": 0.18535417441601706, "grad_norm": 0.4005360007286072, "learning_rate": 0.0001629628666865959, "loss": 1.4701, "step": 14264 }, { "epoch": 0.18536716895993294, "grad_norm": 0.4908580780029297, "learning_rate": 0.0001629602672246845, "loss": 1.5584, "step": 14265 }, { "epoch": 0.1853801635038488, "grad_norm": 0.3978935480117798, "learning_rate": 0.00016295766776277311, "loss": 1.3195, "step": 14266 }, { "epoch": 0.18539315804776468, "grad_norm": 0.5323774218559265, "learning_rate": 0.00016295506830086174, "loss": 1.378, "step": 14267 }, { "epoch": 0.18540615259168056, "grad_norm": 0.35051625967025757, "learning_rate": 0.00016295246883895034, "loss": 1.4612, "step": 14268 }, { "epoch": 0.18541914713559643, "grad_norm": 0.4646984338760376, "learning_rate": 0.00016294986937703896, "loss": 1.5986, "step": 14269 }, { "epoch": 0.1854321416795123, "grad_norm": 0.45621147751808167, "learning_rate": 0.00016294726991512759, "loss": 1.4939, "step": 14270 }, { "epoch": 0.18544513622342818, "grad_norm": 0.39475017786026, "learning_rate": 0.0001629446704532162, "loss": 1.4582, "step": 14271 }, { "epoch": 0.18545813076734405, "grad_norm": 0.3017983138561249, "learning_rate": 0.0001629420709913048, "loss": 1.252, "step": 14272 }, { "epoch": 0.18547112531125992, "grad_norm": 0.3345814049243927, "learning_rate": 0.0001629394715293934, "loss": 1.3758, "step": 14273 }, { "epoch": 0.18548411985517582, "grad_norm": 0.3642207086086273, "learning_rate": 0.00016293687206748206, "loss": 1.4862, "step": 14274 }, { "epoch": 0.1854971143990917, "grad_norm": 0.45335185527801514, "learning_rate": 0.00016293427260557065, "loss": 1.3258, "step": 14275 }, { "epoch": 0.18551010894300757, "grad_norm": 0.32101088762283325, "learning_rate": 0.00016293167314365928, "loss": 1.4992, "step": 14276 }, { "epoch": 0.18552310348692344, "grad_norm": 0.4141268730163574, "learning_rate": 0.00016292907368174788, "loss": 1.6381, "step": 14277 }, { "epoch": 0.1855360980308393, "grad_norm": 0.46873944997787476, "learning_rate": 0.0001629264742198365, "loss": 1.3337, "step": 14278 }, { "epoch": 0.1855490925747552, "grad_norm": 0.38198965787887573, "learning_rate": 0.00016292387475792512, "loss": 1.4683, "step": 14279 }, { "epoch": 0.18556208711867106, "grad_norm": 0.3092433214187622, "learning_rate": 0.00016292127529601372, "loss": 1.3181, "step": 14280 }, { "epoch": 0.18557508166258693, "grad_norm": 0.421966016292572, "learning_rate": 0.00016291867583410235, "loss": 1.4173, "step": 14281 }, { "epoch": 0.1855880762065028, "grad_norm": 0.38572409749031067, "learning_rate": 0.00016291607637219097, "loss": 1.3703, "step": 14282 }, { "epoch": 0.18560107075041868, "grad_norm": 0.39017271995544434, "learning_rate": 0.0001629134769102796, "loss": 1.2168, "step": 14283 }, { "epoch": 0.18561406529433455, "grad_norm": 0.35076531767845154, "learning_rate": 0.0001629108774483682, "loss": 1.4448, "step": 14284 }, { "epoch": 0.18562705983825042, "grad_norm": 0.4101915657520294, "learning_rate": 0.0001629082779864568, "loss": 1.5174, "step": 14285 }, { "epoch": 0.1856400543821663, "grad_norm": 0.40193435549736023, "learning_rate": 0.00016290567852454544, "loss": 1.3288, "step": 14286 }, { "epoch": 0.18565304892608217, "grad_norm": 0.4339751601219177, "learning_rate": 0.00016290307906263404, "loss": 1.3335, "step": 14287 }, { "epoch": 0.18566604346999804, "grad_norm": 0.432715505361557, "learning_rate": 0.00016290047960072266, "loss": 1.2862, "step": 14288 }, { "epoch": 0.18567903801391392, "grad_norm": 0.34752845764160156, "learning_rate": 0.00016289788013881126, "loss": 1.4881, "step": 14289 }, { "epoch": 0.1856920325578298, "grad_norm": 0.330517053604126, "learning_rate": 0.00016289528067689989, "loss": 1.3736, "step": 14290 }, { "epoch": 0.18570502710174566, "grad_norm": 0.3264653980731964, "learning_rate": 0.0001628926812149885, "loss": 1.473, "step": 14291 }, { "epoch": 0.18571802164566154, "grad_norm": 0.2794967293739319, "learning_rate": 0.0001628900817530771, "loss": 1.5245, "step": 14292 }, { "epoch": 0.1857310161895774, "grad_norm": 0.3297751843929291, "learning_rate": 0.00016288748229116573, "loss": 1.4719, "step": 14293 }, { "epoch": 0.18574401073349328, "grad_norm": 0.36630377173423767, "learning_rate": 0.00016288488282925436, "loss": 1.3589, "step": 14294 }, { "epoch": 0.18575700527740915, "grad_norm": 0.3053281009197235, "learning_rate": 0.00016288228336734298, "loss": 1.4403, "step": 14295 }, { "epoch": 0.18576999982132503, "grad_norm": 0.3138176202774048, "learning_rate": 0.00016287968390543158, "loss": 1.2616, "step": 14296 }, { "epoch": 0.1857829943652409, "grad_norm": 0.422550767660141, "learning_rate": 0.0001628770844435202, "loss": 1.4104, "step": 14297 }, { "epoch": 0.18579598890915677, "grad_norm": 0.33258068561553955, "learning_rate": 0.00016287448498160883, "loss": 1.5382, "step": 14298 }, { "epoch": 0.18580898345307265, "grad_norm": 0.4508451223373413, "learning_rate": 0.00016287188551969742, "loss": 1.3465, "step": 14299 }, { "epoch": 0.18582197799698852, "grad_norm": 0.47415196895599365, "learning_rate": 0.00016286928605778605, "loss": 1.4864, "step": 14300 }, { "epoch": 0.1858349725409044, "grad_norm": 0.4498703181743622, "learning_rate": 0.00016286668659587465, "loss": 1.5972, "step": 14301 }, { "epoch": 0.18584796708482026, "grad_norm": 0.3581358790397644, "learning_rate": 0.00016286408713396327, "loss": 1.4269, "step": 14302 }, { "epoch": 0.18586096162873614, "grad_norm": 0.2862187922000885, "learning_rate": 0.0001628614876720519, "loss": 1.316, "step": 14303 }, { "epoch": 0.185873956172652, "grad_norm": 0.32734933495521545, "learning_rate": 0.0001628588882101405, "loss": 1.3223, "step": 14304 }, { "epoch": 0.18588695071656788, "grad_norm": 0.3670196831226349, "learning_rate": 0.00016285628874822912, "loss": 1.3676, "step": 14305 }, { "epoch": 0.18589994526048376, "grad_norm": 0.43077024817466736, "learning_rate": 0.00016285368928631774, "loss": 1.4851, "step": 14306 }, { "epoch": 0.18591293980439963, "grad_norm": 0.3802175223827362, "learning_rate": 0.00016285108982440637, "loss": 1.4537, "step": 14307 }, { "epoch": 0.1859259343483155, "grad_norm": 0.39573338627815247, "learning_rate": 0.00016284849036249496, "loss": 1.3061, "step": 14308 }, { "epoch": 0.18593892889223138, "grad_norm": 0.3352431356906891, "learning_rate": 0.0001628458909005836, "loss": 1.4291, "step": 14309 }, { "epoch": 0.18595192343614725, "grad_norm": 0.44189995527267456, "learning_rate": 0.0001628432914386722, "loss": 1.4303, "step": 14310 }, { "epoch": 0.18596491798006312, "grad_norm": 0.3592013716697693, "learning_rate": 0.0001628406919767608, "loss": 1.4524, "step": 14311 }, { "epoch": 0.185977912523979, "grad_norm": 0.34093964099884033, "learning_rate": 0.00016283809251484943, "loss": 1.3018, "step": 14312 }, { "epoch": 0.18599090706789487, "grad_norm": 0.41372033953666687, "learning_rate": 0.00016283549305293806, "loss": 1.4411, "step": 14313 }, { "epoch": 0.18600390161181074, "grad_norm": 0.43197929859161377, "learning_rate": 0.00016283289359102666, "loss": 1.29, "step": 14314 }, { "epoch": 0.1860168961557266, "grad_norm": 0.3922059237957001, "learning_rate": 0.00016283029412911528, "loss": 1.3814, "step": 14315 }, { "epoch": 0.1860298906996425, "grad_norm": 0.4541341960430145, "learning_rate": 0.00016282769466720388, "loss": 1.1392, "step": 14316 }, { "epoch": 0.18604288524355836, "grad_norm": 0.3847472667694092, "learning_rate": 0.00016282509520529253, "loss": 1.3816, "step": 14317 }, { "epoch": 0.18605587978747423, "grad_norm": 0.38595613837242126, "learning_rate": 0.00016282249574338113, "loss": 1.4065, "step": 14318 }, { "epoch": 0.1860688743313901, "grad_norm": 0.46247953176498413, "learning_rate": 0.00016281989628146975, "loss": 1.5003, "step": 14319 }, { "epoch": 0.18608186887530598, "grad_norm": 0.4265228807926178, "learning_rate": 0.00016281729681955835, "loss": 1.4465, "step": 14320 }, { "epoch": 0.18609486341922185, "grad_norm": 0.4624263048171997, "learning_rate": 0.00016281469735764697, "loss": 1.4938, "step": 14321 }, { "epoch": 0.18610785796313772, "grad_norm": 0.43953514099121094, "learning_rate": 0.0001628120978957356, "loss": 1.5869, "step": 14322 }, { "epoch": 0.1861208525070536, "grad_norm": 0.49323806166648865, "learning_rate": 0.0001628094984338242, "loss": 1.3974, "step": 14323 }, { "epoch": 0.18613384705096947, "grad_norm": 0.3058515191078186, "learning_rate": 0.00016280689897191282, "loss": 1.4803, "step": 14324 }, { "epoch": 0.18614684159488534, "grad_norm": 0.3950141668319702, "learning_rate": 0.00016280429951000144, "loss": 1.5046, "step": 14325 }, { "epoch": 0.18615983613880122, "grad_norm": 0.39663898944854736, "learning_rate": 0.00016280170004809007, "loss": 1.3636, "step": 14326 }, { "epoch": 0.1861728306827171, "grad_norm": 0.30346032977104187, "learning_rate": 0.00016279910058617867, "loss": 1.4355, "step": 14327 }, { "epoch": 0.18618582522663296, "grad_norm": 0.3703216314315796, "learning_rate": 0.00016279650112426726, "loss": 1.3973, "step": 14328 }, { "epoch": 0.18619881977054883, "grad_norm": 0.4186863601207733, "learning_rate": 0.00016279390166235592, "loss": 1.3924, "step": 14329 }, { "epoch": 0.1862118143144647, "grad_norm": 0.36253488063812256, "learning_rate": 0.0001627913022004445, "loss": 1.2816, "step": 14330 }, { "epoch": 0.18622480885838058, "grad_norm": 0.4027514159679413, "learning_rate": 0.00016278870273853314, "loss": 1.5649, "step": 14331 }, { "epoch": 0.18623780340229645, "grad_norm": 0.4339659810066223, "learning_rate": 0.00016278610327662173, "loss": 1.425, "step": 14332 }, { "epoch": 0.18625079794621233, "grad_norm": 0.3830181658267975, "learning_rate": 0.00016278350381471036, "loss": 1.2091, "step": 14333 }, { "epoch": 0.1862637924901282, "grad_norm": 0.42852750420570374, "learning_rate": 0.00016278090435279898, "loss": 1.4521, "step": 14334 }, { "epoch": 0.18627678703404407, "grad_norm": 0.3467976450920105, "learning_rate": 0.00016277830489088758, "loss": 1.5321, "step": 14335 }, { "epoch": 0.18628978157795995, "grad_norm": 0.4158039093017578, "learning_rate": 0.0001627757054289762, "loss": 1.3732, "step": 14336 }, { "epoch": 0.18630277612187582, "grad_norm": 0.4933732748031616, "learning_rate": 0.00016277310596706483, "loss": 1.5187, "step": 14337 }, { "epoch": 0.1863157706657917, "grad_norm": 0.37910646200180054, "learning_rate": 0.00016277050650515345, "loss": 1.3129, "step": 14338 }, { "epoch": 0.18632876520970756, "grad_norm": 0.42034706473350525, "learning_rate": 0.00016276790704324205, "loss": 1.5287, "step": 14339 }, { "epoch": 0.18634175975362344, "grad_norm": 0.3714899718761444, "learning_rate": 0.00016276530758133065, "loss": 1.1593, "step": 14340 }, { "epoch": 0.1863547542975393, "grad_norm": 0.4030725359916687, "learning_rate": 0.0001627627081194193, "loss": 1.3238, "step": 14341 }, { "epoch": 0.18636774884145518, "grad_norm": 0.3666721284389496, "learning_rate": 0.0001627601086575079, "loss": 1.399, "step": 14342 }, { "epoch": 0.18638074338537106, "grad_norm": 0.3854379653930664, "learning_rate": 0.00016275750919559652, "loss": 1.3854, "step": 14343 }, { "epoch": 0.18639373792928693, "grad_norm": 0.4046310484409332, "learning_rate": 0.00016275490973368515, "loss": 1.4014, "step": 14344 }, { "epoch": 0.1864067324732028, "grad_norm": 0.4487934708595276, "learning_rate": 0.00016275231027177374, "loss": 1.6155, "step": 14345 }, { "epoch": 0.18641972701711867, "grad_norm": 0.46088072657585144, "learning_rate": 0.00016274971080986237, "loss": 1.3763, "step": 14346 }, { "epoch": 0.18643272156103455, "grad_norm": 0.45239901542663574, "learning_rate": 0.00016274711134795097, "loss": 1.5041, "step": 14347 }, { "epoch": 0.18644571610495042, "grad_norm": 0.37555330991744995, "learning_rate": 0.00016274451188603962, "loss": 1.5502, "step": 14348 }, { "epoch": 0.1864587106488663, "grad_norm": 0.4384852647781372, "learning_rate": 0.00016274191242412822, "loss": 1.5333, "step": 14349 }, { "epoch": 0.18647170519278217, "grad_norm": 0.334956556558609, "learning_rate": 0.00016273931296221684, "loss": 1.3302, "step": 14350 }, { "epoch": 0.18648469973669807, "grad_norm": 0.35770031809806824, "learning_rate": 0.00016273671350030544, "loss": 1.4343, "step": 14351 }, { "epoch": 0.18649769428061394, "grad_norm": 0.29289156198501587, "learning_rate": 0.00016273411403839406, "loss": 0.9746, "step": 14352 }, { "epoch": 0.1865106888245298, "grad_norm": 0.40831005573272705, "learning_rate": 0.00016273151457648269, "loss": 1.6699, "step": 14353 }, { "epoch": 0.1865236833684457, "grad_norm": 0.3534592390060425, "learning_rate": 0.00016272891511457128, "loss": 1.4727, "step": 14354 }, { "epoch": 0.18653667791236156, "grad_norm": 0.3372885584831238, "learning_rate": 0.0001627263156526599, "loss": 1.3443, "step": 14355 }, { "epoch": 0.18654967245627743, "grad_norm": 0.43740254640579224, "learning_rate": 0.00016272371619074853, "loss": 1.4162, "step": 14356 }, { "epoch": 0.1865626670001933, "grad_norm": 0.3833792209625244, "learning_rate": 0.00016272111672883713, "loss": 1.2537, "step": 14357 }, { "epoch": 0.18657566154410918, "grad_norm": 0.3932992219924927, "learning_rate": 0.00016271851726692575, "loss": 1.4399, "step": 14358 }, { "epoch": 0.18658865608802505, "grad_norm": 0.3103752136230469, "learning_rate": 0.00016271591780501435, "loss": 1.2438, "step": 14359 }, { "epoch": 0.18660165063194092, "grad_norm": 0.4436561167240143, "learning_rate": 0.000162713318343103, "loss": 1.5726, "step": 14360 }, { "epoch": 0.1866146451758568, "grad_norm": 0.3723568320274353, "learning_rate": 0.0001627107188811916, "loss": 1.5834, "step": 14361 }, { "epoch": 0.18662763971977267, "grad_norm": 0.3735470175743103, "learning_rate": 0.00016270811941928022, "loss": 1.4049, "step": 14362 }, { "epoch": 0.18664063426368854, "grad_norm": 0.4717046916484833, "learning_rate": 0.00016270551995736882, "loss": 1.4987, "step": 14363 }, { "epoch": 0.18665362880760442, "grad_norm": 0.3478568494319916, "learning_rate": 0.00016270292049545745, "loss": 1.45, "step": 14364 }, { "epoch": 0.1866666233515203, "grad_norm": 0.3395802676677704, "learning_rate": 0.00016270032103354607, "loss": 1.5079, "step": 14365 }, { "epoch": 0.18667961789543616, "grad_norm": 0.3740818500518799, "learning_rate": 0.00016269772157163467, "loss": 1.552, "step": 14366 }, { "epoch": 0.18669261243935203, "grad_norm": 0.4298551380634308, "learning_rate": 0.0001626951221097233, "loss": 1.4899, "step": 14367 }, { "epoch": 0.1867056069832679, "grad_norm": 0.28909531235694885, "learning_rate": 0.00016269252264781192, "loss": 1.5072, "step": 14368 }, { "epoch": 0.18671860152718378, "grad_norm": 0.3859047591686249, "learning_rate": 0.00016268992318590051, "loss": 1.4512, "step": 14369 }, { "epoch": 0.18673159607109965, "grad_norm": 0.40439948439598083, "learning_rate": 0.00016268732372398914, "loss": 1.4448, "step": 14370 }, { "epoch": 0.18674459061501553, "grad_norm": 0.41528624296188354, "learning_rate": 0.00016268472426207774, "loss": 1.4992, "step": 14371 }, { "epoch": 0.1867575851589314, "grad_norm": 0.4132675528526306, "learning_rate": 0.0001626821248001664, "loss": 1.5386, "step": 14372 }, { "epoch": 0.18677057970284727, "grad_norm": 0.4855875074863434, "learning_rate": 0.00016267952533825499, "loss": 1.2689, "step": 14373 }, { "epoch": 0.18678357424676315, "grad_norm": 0.36735275387763977, "learning_rate": 0.0001626769258763436, "loss": 1.3126, "step": 14374 }, { "epoch": 0.18679656879067902, "grad_norm": 0.44390103220939636, "learning_rate": 0.0001626743264144322, "loss": 1.5056, "step": 14375 }, { "epoch": 0.1868095633345949, "grad_norm": 0.29486754536628723, "learning_rate": 0.00016267172695252083, "loss": 1.2375, "step": 14376 }, { "epoch": 0.18682255787851076, "grad_norm": 0.3379875719547272, "learning_rate": 0.00016266912749060946, "loss": 1.4989, "step": 14377 }, { "epoch": 0.18683555242242664, "grad_norm": 0.3537369668483734, "learning_rate": 0.00016266652802869805, "loss": 1.4326, "step": 14378 }, { "epoch": 0.1868485469663425, "grad_norm": 0.2884156405925751, "learning_rate": 0.00016266392856678668, "loss": 1.4428, "step": 14379 }, { "epoch": 0.18686154151025838, "grad_norm": 0.35906651616096497, "learning_rate": 0.0001626613291048753, "loss": 1.2599, "step": 14380 }, { "epoch": 0.18687453605417426, "grad_norm": 0.44593536853790283, "learning_rate": 0.00016265872964296393, "loss": 1.4354, "step": 14381 }, { "epoch": 0.18688753059809013, "grad_norm": 0.3976133167743683, "learning_rate": 0.00016265613018105252, "loss": 1.4556, "step": 14382 }, { "epoch": 0.186900525142006, "grad_norm": 0.38843801617622375, "learning_rate": 0.00016265353071914115, "loss": 1.5049, "step": 14383 }, { "epoch": 0.18691351968592188, "grad_norm": 0.30734995007514954, "learning_rate": 0.00016265093125722977, "loss": 1.4126, "step": 14384 }, { "epoch": 0.18692651422983775, "grad_norm": 0.42527928948402405, "learning_rate": 0.00016264833179531837, "loss": 1.6251, "step": 14385 }, { "epoch": 0.18693950877375362, "grad_norm": 0.3487423062324524, "learning_rate": 0.000162645732333407, "loss": 1.3567, "step": 14386 }, { "epoch": 0.1869525033176695, "grad_norm": 0.45996689796447754, "learning_rate": 0.00016264313287149562, "loss": 1.3291, "step": 14387 }, { "epoch": 0.18696549786158537, "grad_norm": 0.40402936935424805, "learning_rate": 0.00016264053340958422, "loss": 1.5707, "step": 14388 }, { "epoch": 0.18697849240550124, "grad_norm": 0.38168686628341675, "learning_rate": 0.00016263793394767284, "loss": 1.3254, "step": 14389 }, { "epoch": 0.1869914869494171, "grad_norm": 0.4388217628002167, "learning_rate": 0.00016263533448576144, "loss": 1.3776, "step": 14390 }, { "epoch": 0.18700448149333299, "grad_norm": 0.4481607973575592, "learning_rate": 0.0001626327350238501, "loss": 1.346, "step": 14391 }, { "epoch": 0.18701747603724886, "grad_norm": 0.30641359090805054, "learning_rate": 0.0001626301355619387, "loss": 1.3329, "step": 14392 }, { "epoch": 0.18703047058116473, "grad_norm": 0.37614983320236206, "learning_rate": 0.0001626275361000273, "loss": 1.5578, "step": 14393 }, { "epoch": 0.1870434651250806, "grad_norm": 0.4834055006504059, "learning_rate": 0.0001626249366381159, "loss": 1.4689, "step": 14394 }, { "epoch": 0.18705645966899648, "grad_norm": 0.3917204439640045, "learning_rate": 0.00016262233717620453, "loss": 1.584, "step": 14395 }, { "epoch": 0.18706945421291235, "grad_norm": 0.44108593463897705, "learning_rate": 0.00016261973771429316, "loss": 1.5352, "step": 14396 }, { "epoch": 0.18708244875682822, "grad_norm": 0.2887263000011444, "learning_rate": 0.00016261713825238176, "loss": 1.3455, "step": 14397 }, { "epoch": 0.1870954433007441, "grad_norm": 0.38707253336906433, "learning_rate": 0.00016261453879047038, "loss": 1.3652, "step": 14398 }, { "epoch": 0.18710843784465997, "grad_norm": 0.4041129946708679, "learning_rate": 0.000162611939328559, "loss": 1.1025, "step": 14399 }, { "epoch": 0.18712143238857584, "grad_norm": 0.35989147424697876, "learning_rate": 0.0001626093398666476, "loss": 1.3621, "step": 14400 }, { "epoch": 0.18713442693249172, "grad_norm": 0.3689303994178772, "learning_rate": 0.00016260674040473623, "loss": 1.5493, "step": 14401 }, { "epoch": 0.1871474214764076, "grad_norm": 0.4389260411262512, "learning_rate": 0.00016260414094282482, "loss": 1.541, "step": 14402 }, { "epoch": 0.18716041602032346, "grad_norm": 0.3757264316082001, "learning_rate": 0.00016260154148091348, "loss": 1.3747, "step": 14403 }, { "epoch": 0.18717341056423933, "grad_norm": 0.3165930509567261, "learning_rate": 0.00016259894201900207, "loss": 1.3815, "step": 14404 }, { "epoch": 0.1871864051081552, "grad_norm": 0.5496413707733154, "learning_rate": 0.0001625963425570907, "loss": 1.545, "step": 14405 }, { "epoch": 0.18719939965207108, "grad_norm": 0.3831750750541687, "learning_rate": 0.0001625937430951793, "loss": 1.4059, "step": 14406 }, { "epoch": 0.18721239419598695, "grad_norm": 0.3455681800842285, "learning_rate": 0.00016259114363326792, "loss": 1.3575, "step": 14407 }, { "epoch": 0.18722538873990283, "grad_norm": 0.3597778081893921, "learning_rate": 0.00016258854417135654, "loss": 1.2048, "step": 14408 }, { "epoch": 0.1872383832838187, "grad_norm": 0.3858657479286194, "learning_rate": 0.00016258594470944514, "loss": 1.4789, "step": 14409 }, { "epoch": 0.18725137782773457, "grad_norm": 0.420121431350708, "learning_rate": 0.00016258334524753377, "loss": 1.6188, "step": 14410 }, { "epoch": 0.18726437237165044, "grad_norm": 0.39170965552330017, "learning_rate": 0.0001625807457856224, "loss": 1.4187, "step": 14411 }, { "epoch": 0.18727736691556632, "grad_norm": 0.4454212188720703, "learning_rate": 0.000162578146323711, "loss": 1.4111, "step": 14412 }, { "epoch": 0.1872903614594822, "grad_norm": 0.3327889144420624, "learning_rate": 0.0001625755468617996, "loss": 1.5136, "step": 14413 }, { "epoch": 0.18730335600339806, "grad_norm": 0.5569693446159363, "learning_rate": 0.0001625729473998882, "loss": 1.4477, "step": 14414 }, { "epoch": 0.18731635054731394, "grad_norm": 0.4914078116416931, "learning_rate": 0.00016257034793797686, "loss": 1.4584, "step": 14415 }, { "epoch": 0.1873293450912298, "grad_norm": 0.3696451485157013, "learning_rate": 0.00016256774847606546, "loss": 1.4937, "step": 14416 }, { "epoch": 0.18734233963514568, "grad_norm": 0.295354425907135, "learning_rate": 0.00016256514901415408, "loss": 1.2159, "step": 14417 }, { "epoch": 0.18735533417906156, "grad_norm": 0.4005788266658783, "learning_rate": 0.0001625625495522427, "loss": 1.4283, "step": 14418 }, { "epoch": 0.18736832872297743, "grad_norm": 0.4608438014984131, "learning_rate": 0.0001625599500903313, "loss": 1.4152, "step": 14419 }, { "epoch": 0.1873813232668933, "grad_norm": 0.5253211259841919, "learning_rate": 0.00016255735062841993, "loss": 1.5659, "step": 14420 }, { "epoch": 0.18739431781080917, "grad_norm": 0.4151586890220642, "learning_rate": 0.00016255475116650853, "loss": 1.359, "step": 14421 }, { "epoch": 0.18740731235472505, "grad_norm": 0.43396228551864624, "learning_rate": 0.00016255215170459718, "loss": 1.3933, "step": 14422 }, { "epoch": 0.18742030689864092, "grad_norm": 0.4491978883743286, "learning_rate": 0.00016254955224268578, "loss": 1.5948, "step": 14423 }, { "epoch": 0.1874333014425568, "grad_norm": 0.4526844620704651, "learning_rate": 0.00016254695278077437, "loss": 1.371, "step": 14424 }, { "epoch": 0.18744629598647267, "grad_norm": 0.3974749445915222, "learning_rate": 0.000162544353318863, "loss": 1.2999, "step": 14425 }, { "epoch": 0.18745929053038854, "grad_norm": 0.43171507120132446, "learning_rate": 0.00016254175385695162, "loss": 1.4081, "step": 14426 }, { "epoch": 0.18747228507430444, "grad_norm": 0.40456482768058777, "learning_rate": 0.00016253915439504025, "loss": 1.3136, "step": 14427 }, { "epoch": 0.1874852796182203, "grad_norm": 0.45029914379119873, "learning_rate": 0.00016253655493312884, "loss": 1.4957, "step": 14428 }, { "epoch": 0.18749827416213619, "grad_norm": 0.5150418281555176, "learning_rate": 0.00016253395547121747, "loss": 1.4739, "step": 14429 }, { "epoch": 0.18751126870605206, "grad_norm": 0.42873334884643555, "learning_rate": 0.0001625313560093061, "loss": 1.5303, "step": 14430 }, { "epoch": 0.18752426324996793, "grad_norm": 0.3903109133243561, "learning_rate": 0.0001625287565473947, "loss": 1.3689, "step": 14431 }, { "epoch": 0.1875372577938838, "grad_norm": 0.3819105327129364, "learning_rate": 0.00016252615708548332, "loss": 1.4961, "step": 14432 }, { "epoch": 0.18755025233779968, "grad_norm": 0.36253443360328674, "learning_rate": 0.0001625235576235719, "loss": 1.5181, "step": 14433 }, { "epoch": 0.18756324688171555, "grad_norm": 0.4301897883415222, "learning_rate": 0.00016252095816166056, "loss": 1.3342, "step": 14434 }, { "epoch": 0.18757624142563142, "grad_norm": 0.37700337171554565, "learning_rate": 0.00016251835869974916, "loss": 1.3786, "step": 14435 }, { "epoch": 0.1875892359695473, "grad_norm": 0.3972987234592438, "learning_rate": 0.00016251575923783776, "loss": 1.618, "step": 14436 }, { "epoch": 0.18760223051346317, "grad_norm": 0.42468562722206116, "learning_rate": 0.00016251315977592638, "loss": 1.4217, "step": 14437 }, { "epoch": 0.18761522505737904, "grad_norm": 0.3062169849872589, "learning_rate": 0.000162510560314015, "loss": 1.298, "step": 14438 }, { "epoch": 0.18762821960129492, "grad_norm": 0.4621412456035614, "learning_rate": 0.00016250796085210363, "loss": 1.3647, "step": 14439 }, { "epoch": 0.1876412141452108, "grad_norm": 0.40942925214767456, "learning_rate": 0.00016250536139019223, "loss": 1.3456, "step": 14440 }, { "epoch": 0.18765420868912666, "grad_norm": 0.4475214183330536, "learning_rate": 0.00016250276192828085, "loss": 1.6426, "step": 14441 }, { "epoch": 0.18766720323304253, "grad_norm": 0.3709256649017334, "learning_rate": 0.00016250016246636948, "loss": 1.3788, "step": 14442 }, { "epoch": 0.1876801977769584, "grad_norm": 0.3314778208732605, "learning_rate": 0.00016249756300445808, "loss": 1.2837, "step": 14443 }, { "epoch": 0.18769319232087428, "grad_norm": 0.47179046273231506, "learning_rate": 0.0001624949635425467, "loss": 1.5521, "step": 14444 }, { "epoch": 0.18770618686479015, "grad_norm": 0.43415403366088867, "learning_rate": 0.0001624923640806353, "loss": 1.4386, "step": 14445 }, { "epoch": 0.18771918140870603, "grad_norm": 0.35949957370758057, "learning_rate": 0.00016248976461872395, "loss": 1.4333, "step": 14446 }, { "epoch": 0.1877321759526219, "grad_norm": 0.4087499976158142, "learning_rate": 0.00016248716515681255, "loss": 1.4038, "step": 14447 }, { "epoch": 0.18774517049653777, "grad_norm": 0.4740716814994812, "learning_rate": 0.00016248456569490117, "loss": 1.6589, "step": 14448 }, { "epoch": 0.18775816504045365, "grad_norm": 0.522384524345398, "learning_rate": 0.00016248196623298977, "loss": 1.3671, "step": 14449 }, { "epoch": 0.18777115958436952, "grad_norm": 0.41290026903152466, "learning_rate": 0.0001624793667710784, "loss": 1.5605, "step": 14450 }, { "epoch": 0.1877841541282854, "grad_norm": 0.3898678123950958, "learning_rate": 0.00016247676730916702, "loss": 1.4625, "step": 14451 }, { "epoch": 0.18779714867220126, "grad_norm": 0.3673611879348755, "learning_rate": 0.00016247416784725562, "loss": 1.5046, "step": 14452 }, { "epoch": 0.18781014321611714, "grad_norm": 0.36359888315200806, "learning_rate": 0.00016247156838534424, "loss": 1.3245, "step": 14453 }, { "epoch": 0.187823137760033, "grad_norm": 0.394386351108551, "learning_rate": 0.00016246896892343286, "loss": 1.4362, "step": 14454 }, { "epoch": 0.18783613230394888, "grad_norm": 0.38717418909072876, "learning_rate": 0.00016246636946152146, "loss": 1.389, "step": 14455 }, { "epoch": 0.18784912684786476, "grad_norm": 0.3920443058013916, "learning_rate": 0.00016246376999961009, "loss": 1.4478, "step": 14456 }, { "epoch": 0.18786212139178063, "grad_norm": 0.3487685024738312, "learning_rate": 0.0001624611705376987, "loss": 1.4958, "step": 14457 }, { "epoch": 0.1878751159356965, "grad_norm": 0.2473049908876419, "learning_rate": 0.00016245857107578734, "loss": 1.2694, "step": 14458 }, { "epoch": 0.18788811047961237, "grad_norm": 0.3660510778427124, "learning_rate": 0.00016245597161387593, "loss": 1.4073, "step": 14459 }, { "epoch": 0.18790110502352825, "grad_norm": 0.4749138057231903, "learning_rate": 0.00016245337215196456, "loss": 1.3103, "step": 14460 }, { "epoch": 0.18791409956744412, "grad_norm": 0.3831072151660919, "learning_rate": 0.00016245077269005318, "loss": 1.5522, "step": 14461 }, { "epoch": 0.18792709411136, "grad_norm": 0.43080803751945496, "learning_rate": 0.00016244817322814178, "loss": 1.535, "step": 14462 }, { "epoch": 0.18794008865527587, "grad_norm": 0.3907145857810974, "learning_rate": 0.0001624455737662304, "loss": 1.4529, "step": 14463 }, { "epoch": 0.18795308319919174, "grad_norm": 0.47572651505470276, "learning_rate": 0.000162442974304319, "loss": 1.3927, "step": 14464 }, { "epoch": 0.1879660777431076, "grad_norm": 0.4210858643054962, "learning_rate": 0.00016244037484240765, "loss": 1.321, "step": 14465 }, { "epoch": 0.18797907228702349, "grad_norm": 0.36122605204582214, "learning_rate": 0.00016243777538049625, "loss": 1.522, "step": 14466 }, { "epoch": 0.18799206683093936, "grad_norm": 0.4624292850494385, "learning_rate": 0.00016243517591858485, "loss": 1.4885, "step": 14467 }, { "epoch": 0.18800506137485523, "grad_norm": 0.46274593472480774, "learning_rate": 0.00016243257645667347, "loss": 1.3888, "step": 14468 }, { "epoch": 0.1880180559187711, "grad_norm": 0.4571024179458618, "learning_rate": 0.0001624299769947621, "loss": 1.3338, "step": 14469 }, { "epoch": 0.18803105046268698, "grad_norm": 0.22090663015842438, "learning_rate": 0.00016242737753285072, "loss": 1.437, "step": 14470 }, { "epoch": 0.18804404500660285, "grad_norm": 0.5126134753227234, "learning_rate": 0.00016242477807093932, "loss": 1.5309, "step": 14471 }, { "epoch": 0.18805703955051872, "grad_norm": 0.3658894896507263, "learning_rate": 0.00016242217860902794, "loss": 1.3647, "step": 14472 }, { "epoch": 0.1880700340944346, "grad_norm": 0.36310434341430664, "learning_rate": 0.00016241957914711657, "loss": 1.2909, "step": 14473 }, { "epoch": 0.18808302863835047, "grad_norm": 0.39679476618766785, "learning_rate": 0.00016241697968520516, "loss": 1.4251, "step": 14474 }, { "epoch": 0.18809602318226634, "grad_norm": 0.294733464717865, "learning_rate": 0.0001624143802232938, "loss": 1.5591, "step": 14475 }, { "epoch": 0.18810901772618221, "grad_norm": 0.39871710538864136, "learning_rate": 0.00016241178076138239, "loss": 1.2823, "step": 14476 }, { "epoch": 0.1881220122700981, "grad_norm": 0.4935499429702759, "learning_rate": 0.00016240918129947104, "loss": 1.3357, "step": 14477 }, { "epoch": 0.18813500681401396, "grad_norm": 0.48948514461517334, "learning_rate": 0.00016240658183755964, "loss": 1.4179, "step": 14478 }, { "epoch": 0.18814800135792983, "grad_norm": 0.38988742232322693, "learning_rate": 0.00016240398237564823, "loss": 1.4717, "step": 14479 }, { "epoch": 0.1881609959018457, "grad_norm": 0.3298640847206116, "learning_rate": 0.00016240138291373686, "loss": 1.2856, "step": 14480 }, { "epoch": 0.18817399044576158, "grad_norm": 0.3414093554019928, "learning_rate": 0.00016239878345182548, "loss": 1.3592, "step": 14481 }, { "epoch": 0.18818698498967745, "grad_norm": 0.3263503611087799, "learning_rate": 0.0001623961839899141, "loss": 1.6188, "step": 14482 }, { "epoch": 0.18819997953359333, "grad_norm": 0.40886253118515015, "learning_rate": 0.0001623935845280027, "loss": 1.4467, "step": 14483 }, { "epoch": 0.1882129740775092, "grad_norm": 0.47259044647216797, "learning_rate": 0.00016239098506609133, "loss": 1.2118, "step": 14484 }, { "epoch": 0.18822596862142507, "grad_norm": 0.36327096819877625, "learning_rate": 0.00016238838560417995, "loss": 1.4744, "step": 14485 }, { "epoch": 0.18823896316534094, "grad_norm": 0.44845834374427795, "learning_rate": 0.00016238578614226855, "loss": 1.1621, "step": 14486 }, { "epoch": 0.18825195770925682, "grad_norm": 0.3794812262058258, "learning_rate": 0.00016238318668035717, "loss": 1.4718, "step": 14487 }, { "epoch": 0.1882649522531727, "grad_norm": 0.48565933108329773, "learning_rate": 0.00016238058721844577, "loss": 1.5504, "step": 14488 }, { "epoch": 0.18827794679708856, "grad_norm": 0.44529977440834045, "learning_rate": 0.00016237798775653442, "loss": 1.5978, "step": 14489 }, { "epoch": 0.18829094134100444, "grad_norm": 0.42387479543685913, "learning_rate": 0.00016237538829462302, "loss": 1.2925, "step": 14490 }, { "epoch": 0.1883039358849203, "grad_norm": 0.3885887861251831, "learning_rate": 0.00016237278883271162, "loss": 1.6111, "step": 14491 }, { "epoch": 0.18831693042883618, "grad_norm": 0.42726266384124756, "learning_rate": 0.00016237018937080027, "loss": 1.4592, "step": 14492 }, { "epoch": 0.18832992497275206, "grad_norm": 0.414634644985199, "learning_rate": 0.00016236758990888887, "loss": 1.566, "step": 14493 }, { "epoch": 0.18834291951666793, "grad_norm": 0.4234277009963989, "learning_rate": 0.0001623649904469775, "loss": 1.3832, "step": 14494 }, { "epoch": 0.1883559140605838, "grad_norm": 0.3759523332118988, "learning_rate": 0.0001623623909850661, "loss": 1.5865, "step": 14495 }, { "epoch": 0.18836890860449967, "grad_norm": 0.2468254268169403, "learning_rate": 0.0001623597915231547, "loss": 1.2233, "step": 14496 }, { "epoch": 0.18838190314841555, "grad_norm": 0.4683910608291626, "learning_rate": 0.00016235719206124334, "loss": 1.3723, "step": 14497 }, { "epoch": 0.18839489769233142, "grad_norm": 0.4523530602455139, "learning_rate": 0.00016235459259933194, "loss": 1.4103, "step": 14498 }, { "epoch": 0.1884078922362473, "grad_norm": 0.4392837584018707, "learning_rate": 0.00016235199313742056, "loss": 1.4252, "step": 14499 }, { "epoch": 0.18842088678016317, "grad_norm": 0.386633038520813, "learning_rate": 0.00016234939367550918, "loss": 1.4198, "step": 14500 }, { "epoch": 0.18843388132407904, "grad_norm": 0.3381975591182709, "learning_rate": 0.0001623467942135978, "loss": 1.2635, "step": 14501 }, { "epoch": 0.1884468758679949, "grad_norm": 0.42978349328041077, "learning_rate": 0.0001623441947516864, "loss": 1.7845, "step": 14502 }, { "epoch": 0.1884598704119108, "grad_norm": 0.3976183235645294, "learning_rate": 0.00016234159528977503, "loss": 1.4765, "step": 14503 }, { "epoch": 0.18847286495582669, "grad_norm": 0.44078388810157776, "learning_rate": 0.00016233899582786365, "loss": 1.2836, "step": 14504 }, { "epoch": 0.18848585949974256, "grad_norm": 0.390920490026474, "learning_rate": 0.00016233639636595225, "loss": 1.4417, "step": 14505 }, { "epoch": 0.18849885404365843, "grad_norm": 0.40321066975593567, "learning_rate": 0.00016233379690404088, "loss": 1.5436, "step": 14506 }, { "epoch": 0.1885118485875743, "grad_norm": 0.4346846044063568, "learning_rate": 0.00016233119744212947, "loss": 1.4673, "step": 14507 }, { "epoch": 0.18852484313149018, "grad_norm": 0.3635194003582001, "learning_rate": 0.0001623285979802181, "loss": 1.5856, "step": 14508 }, { "epoch": 0.18853783767540605, "grad_norm": 0.39587870240211487, "learning_rate": 0.00016232599851830672, "loss": 1.4979, "step": 14509 }, { "epoch": 0.18855083221932192, "grad_norm": 0.327092707157135, "learning_rate": 0.00016232339905639532, "loss": 1.3366, "step": 14510 }, { "epoch": 0.1885638267632378, "grad_norm": 0.4528246223926544, "learning_rate": 0.00016232079959448394, "loss": 1.592, "step": 14511 }, { "epoch": 0.18857682130715367, "grad_norm": 0.4175698459148407, "learning_rate": 0.00016231820013257257, "loss": 1.5478, "step": 14512 }, { "epoch": 0.18858981585106954, "grad_norm": 0.38876259326934814, "learning_rate": 0.0001623156006706612, "loss": 1.36, "step": 14513 }, { "epoch": 0.18860281039498542, "grad_norm": 0.3338746726512909, "learning_rate": 0.0001623130012087498, "loss": 1.3279, "step": 14514 }, { "epoch": 0.1886158049389013, "grad_norm": 0.33175089955329895, "learning_rate": 0.00016231040174683842, "loss": 1.5069, "step": 14515 }, { "epoch": 0.18862879948281716, "grad_norm": 0.434696227312088, "learning_rate": 0.00016230780228492704, "loss": 1.5451, "step": 14516 }, { "epoch": 0.18864179402673303, "grad_norm": 0.3537701666355133, "learning_rate": 0.00016230520282301564, "loss": 1.3026, "step": 14517 }, { "epoch": 0.1886547885706489, "grad_norm": 0.3466985821723938, "learning_rate": 0.00016230260336110426, "loss": 1.5071, "step": 14518 }, { "epoch": 0.18866778311456478, "grad_norm": 0.4111585021018982, "learning_rate": 0.00016230000389919286, "loss": 1.6204, "step": 14519 }, { "epoch": 0.18868077765848065, "grad_norm": 0.36198440194129944, "learning_rate": 0.00016229740443728148, "loss": 1.2028, "step": 14520 }, { "epoch": 0.18869377220239653, "grad_norm": 0.44787272810935974, "learning_rate": 0.0001622948049753701, "loss": 1.5003, "step": 14521 }, { "epoch": 0.1887067667463124, "grad_norm": 0.37772712111473083, "learning_rate": 0.0001622922055134587, "loss": 1.4645, "step": 14522 }, { "epoch": 0.18871976129022827, "grad_norm": 0.4513583183288574, "learning_rate": 0.00016228960605154733, "loss": 1.409, "step": 14523 }, { "epoch": 0.18873275583414414, "grad_norm": 0.40195319056510925, "learning_rate": 0.00016228700658963595, "loss": 1.4865, "step": 14524 }, { "epoch": 0.18874575037806002, "grad_norm": 0.3088810443878174, "learning_rate": 0.00016228440712772458, "loss": 1.5114, "step": 14525 }, { "epoch": 0.1887587449219759, "grad_norm": 0.36387673020362854, "learning_rate": 0.00016228180766581318, "loss": 1.2078, "step": 14526 }, { "epoch": 0.18877173946589176, "grad_norm": 0.397029846906662, "learning_rate": 0.0001622792082039018, "loss": 1.4947, "step": 14527 }, { "epoch": 0.18878473400980764, "grad_norm": 0.3769640326499939, "learning_rate": 0.00016227660874199043, "loss": 1.4792, "step": 14528 }, { "epoch": 0.1887977285537235, "grad_norm": 0.3839905858039856, "learning_rate": 0.00016227400928007902, "loss": 1.227, "step": 14529 }, { "epoch": 0.18881072309763938, "grad_norm": 0.3760770261287689, "learning_rate": 0.00016227140981816765, "loss": 1.5946, "step": 14530 }, { "epoch": 0.18882371764155526, "grad_norm": 0.3543286919593811, "learning_rate": 0.00016226881035625627, "loss": 1.3848, "step": 14531 }, { "epoch": 0.18883671218547113, "grad_norm": 0.47472327947616577, "learning_rate": 0.0001622662108943449, "loss": 1.3272, "step": 14532 }, { "epoch": 0.188849706729387, "grad_norm": 0.33231085538864136, "learning_rate": 0.0001622636114324335, "loss": 1.2445, "step": 14533 }, { "epoch": 0.18886270127330287, "grad_norm": 0.3936046361923218, "learning_rate": 0.0001622610119705221, "loss": 1.5161, "step": 14534 }, { "epoch": 0.18887569581721875, "grad_norm": 0.46012234687805176, "learning_rate": 0.00016225841250861074, "loss": 1.5214, "step": 14535 }, { "epoch": 0.18888869036113462, "grad_norm": 0.3569498658180237, "learning_rate": 0.00016225581304669934, "loss": 1.2607, "step": 14536 }, { "epoch": 0.1889016849050505, "grad_norm": 0.4469507038593292, "learning_rate": 0.00016225321358478796, "loss": 1.5127, "step": 14537 }, { "epoch": 0.18891467944896637, "grad_norm": 0.32847365736961365, "learning_rate": 0.00016225061412287656, "loss": 1.4589, "step": 14538 }, { "epoch": 0.18892767399288224, "grad_norm": 0.3133804202079773, "learning_rate": 0.0001622480146609652, "loss": 1.3264, "step": 14539 }, { "epoch": 0.1889406685367981, "grad_norm": 0.38544103503227234, "learning_rate": 0.0001622454151990538, "loss": 1.2701, "step": 14540 }, { "epoch": 0.18895366308071398, "grad_norm": 0.29341474175453186, "learning_rate": 0.0001622428157371424, "loss": 1.3892, "step": 14541 }, { "epoch": 0.18896665762462986, "grad_norm": 0.3488509953022003, "learning_rate": 0.00016224021627523103, "loss": 1.3131, "step": 14542 }, { "epoch": 0.18897965216854573, "grad_norm": 0.39652740955352783, "learning_rate": 0.00016223761681331966, "loss": 1.3547, "step": 14543 }, { "epoch": 0.1889926467124616, "grad_norm": 0.3201472759246826, "learning_rate": 0.00016223501735140828, "loss": 1.4861, "step": 14544 }, { "epoch": 0.18900564125637748, "grad_norm": 0.47403889894485474, "learning_rate": 0.00016223241788949688, "loss": 1.6844, "step": 14545 }, { "epoch": 0.18901863580029335, "grad_norm": 0.3672645390033722, "learning_rate": 0.00016222981842758548, "loss": 1.5306, "step": 14546 }, { "epoch": 0.18903163034420922, "grad_norm": 0.3776554763317108, "learning_rate": 0.00016222721896567413, "loss": 1.3687, "step": 14547 }, { "epoch": 0.1890446248881251, "grad_norm": 0.36863601207733154, "learning_rate": 0.00016222461950376273, "loss": 1.5099, "step": 14548 }, { "epoch": 0.18905761943204097, "grad_norm": 0.3465145230293274, "learning_rate": 0.00016222202004185135, "loss": 1.3275, "step": 14549 }, { "epoch": 0.18907061397595684, "grad_norm": 0.45081740617752075, "learning_rate": 0.00016221942057993995, "loss": 1.3354, "step": 14550 }, { "epoch": 0.18908360851987271, "grad_norm": 0.4660468101501465, "learning_rate": 0.00016221682111802857, "loss": 1.5158, "step": 14551 }, { "epoch": 0.1890966030637886, "grad_norm": 0.41518107056617737, "learning_rate": 0.0001622142216561172, "loss": 1.3131, "step": 14552 }, { "epoch": 0.18910959760770446, "grad_norm": 0.5014123320579529, "learning_rate": 0.0001622116221942058, "loss": 1.5265, "step": 14553 }, { "epoch": 0.18912259215162033, "grad_norm": 0.4716885983943939, "learning_rate": 0.00016220902273229442, "loss": 1.4843, "step": 14554 }, { "epoch": 0.1891355866955362, "grad_norm": 0.3320855498313904, "learning_rate": 0.00016220642327038304, "loss": 1.3151, "step": 14555 }, { "epoch": 0.18914858123945208, "grad_norm": 0.491390198469162, "learning_rate": 0.00016220382380847167, "loss": 1.4699, "step": 14556 }, { "epoch": 0.18916157578336795, "grad_norm": 0.38898661732673645, "learning_rate": 0.00016220122434656026, "loss": 1.1798, "step": 14557 }, { "epoch": 0.18917457032728383, "grad_norm": 0.40661144256591797, "learning_rate": 0.00016219862488464886, "loss": 1.4118, "step": 14558 }, { "epoch": 0.1891875648711997, "grad_norm": 0.4319323003292084, "learning_rate": 0.00016219602542273751, "loss": 1.511, "step": 14559 }, { "epoch": 0.18920055941511557, "grad_norm": 0.38511526584625244, "learning_rate": 0.0001621934259608261, "loss": 1.3516, "step": 14560 }, { "epoch": 0.18921355395903144, "grad_norm": 0.32243314385414124, "learning_rate": 0.00016219082649891474, "loss": 1.3032, "step": 14561 }, { "epoch": 0.18922654850294732, "grad_norm": 0.47862011194229126, "learning_rate": 0.00016218822703700333, "loss": 1.4037, "step": 14562 }, { "epoch": 0.1892395430468632, "grad_norm": 0.39550742506980896, "learning_rate": 0.00016218562757509196, "loss": 1.2888, "step": 14563 }, { "epoch": 0.18925253759077906, "grad_norm": 0.442732572555542, "learning_rate": 0.00016218302811318058, "loss": 1.5183, "step": 14564 }, { "epoch": 0.18926553213469494, "grad_norm": 0.361819863319397, "learning_rate": 0.00016218042865126918, "loss": 1.4079, "step": 14565 }, { "epoch": 0.1892785266786108, "grad_norm": 0.3531530201435089, "learning_rate": 0.00016217782918935783, "loss": 1.2965, "step": 14566 }, { "epoch": 0.18929152122252668, "grad_norm": 0.38613361120224, "learning_rate": 0.00016217522972744643, "loss": 1.2648, "step": 14567 }, { "epoch": 0.18930451576644255, "grad_norm": 0.3453353941440582, "learning_rate": 0.00016217263026553505, "loss": 1.409, "step": 14568 }, { "epoch": 0.18931751031035843, "grad_norm": 0.5640254020690918, "learning_rate": 0.00016217003080362365, "loss": 1.532, "step": 14569 }, { "epoch": 0.1893305048542743, "grad_norm": 0.33535224199295044, "learning_rate": 0.00016216743134171227, "loss": 1.2854, "step": 14570 }, { "epoch": 0.18934349939819017, "grad_norm": 0.32907330989837646, "learning_rate": 0.0001621648318798009, "loss": 1.4261, "step": 14571 }, { "epoch": 0.18935649394210605, "grad_norm": 0.36334043741226196, "learning_rate": 0.0001621622324178895, "loss": 1.4106, "step": 14572 }, { "epoch": 0.18936948848602192, "grad_norm": 0.4157494008541107, "learning_rate": 0.00016215963295597812, "loss": 1.4241, "step": 14573 }, { "epoch": 0.1893824830299378, "grad_norm": 0.39280542731285095, "learning_rate": 0.00016215703349406675, "loss": 1.3342, "step": 14574 }, { "epoch": 0.18939547757385367, "grad_norm": 0.32324910163879395, "learning_rate": 0.00016215443403215534, "loss": 1.5257, "step": 14575 }, { "epoch": 0.18940847211776954, "grad_norm": 0.4282236695289612, "learning_rate": 0.00016215183457024397, "loss": 1.4957, "step": 14576 }, { "epoch": 0.1894214666616854, "grad_norm": 0.37148046493530273, "learning_rate": 0.00016214923510833256, "loss": 1.394, "step": 14577 }, { "epoch": 0.18943446120560128, "grad_norm": 0.41627007722854614, "learning_rate": 0.00016214663564642122, "loss": 1.4561, "step": 14578 }, { "epoch": 0.18944745574951719, "grad_norm": 0.44311755895614624, "learning_rate": 0.0001621440361845098, "loss": 1.5579, "step": 14579 }, { "epoch": 0.18946045029343306, "grad_norm": 0.35448744893074036, "learning_rate": 0.00016214143672259844, "loss": 1.2271, "step": 14580 }, { "epoch": 0.18947344483734893, "grad_norm": 0.4766080677509308, "learning_rate": 0.00016213883726068704, "loss": 1.4918, "step": 14581 }, { "epoch": 0.1894864393812648, "grad_norm": 0.3092266917228699, "learning_rate": 0.00016213623779877566, "loss": 1.3528, "step": 14582 }, { "epoch": 0.18949943392518068, "grad_norm": 0.38101866841316223, "learning_rate": 0.00016213363833686428, "loss": 1.5183, "step": 14583 }, { "epoch": 0.18951242846909655, "grad_norm": 0.49301955103874207, "learning_rate": 0.00016213103887495288, "loss": 1.5361, "step": 14584 }, { "epoch": 0.18952542301301242, "grad_norm": 0.4606168568134308, "learning_rate": 0.0001621284394130415, "loss": 1.4337, "step": 14585 }, { "epoch": 0.1895384175569283, "grad_norm": 0.3919869661331177, "learning_rate": 0.00016212583995113013, "loss": 1.4166, "step": 14586 }, { "epoch": 0.18955141210084417, "grad_norm": 0.4393739700317383, "learning_rate": 0.00016212324048921876, "loss": 1.4197, "step": 14587 }, { "epoch": 0.18956440664476004, "grad_norm": 0.4152737855911255, "learning_rate": 0.00016212064102730735, "loss": 1.5014, "step": 14588 }, { "epoch": 0.18957740118867591, "grad_norm": 0.29390281438827515, "learning_rate": 0.00016211804156539595, "loss": 1.3837, "step": 14589 }, { "epoch": 0.1895903957325918, "grad_norm": 0.409537136554718, "learning_rate": 0.0001621154421034846, "loss": 1.3489, "step": 14590 }, { "epoch": 0.18960339027650766, "grad_norm": 0.2716923654079437, "learning_rate": 0.0001621128426415732, "loss": 1.5005, "step": 14591 }, { "epoch": 0.18961638482042353, "grad_norm": 0.356402188539505, "learning_rate": 0.00016211024317966182, "loss": 1.386, "step": 14592 }, { "epoch": 0.1896293793643394, "grad_norm": 0.5671102404594421, "learning_rate": 0.00016210764371775042, "loss": 1.4691, "step": 14593 }, { "epoch": 0.18964237390825528, "grad_norm": 0.41866335272789, "learning_rate": 0.00016210504425583905, "loss": 1.5245, "step": 14594 }, { "epoch": 0.18965536845217115, "grad_norm": 0.39142248034477234, "learning_rate": 0.00016210244479392767, "loss": 1.3377, "step": 14595 }, { "epoch": 0.18966836299608703, "grad_norm": 0.33140134811401367, "learning_rate": 0.00016209984533201627, "loss": 1.4644, "step": 14596 }, { "epoch": 0.1896813575400029, "grad_norm": 0.4844195544719696, "learning_rate": 0.0001620972458701049, "loss": 1.5659, "step": 14597 }, { "epoch": 0.18969435208391877, "grad_norm": 0.471505731344223, "learning_rate": 0.00016209464640819352, "loss": 1.4393, "step": 14598 }, { "epoch": 0.18970734662783464, "grad_norm": 0.46963560581207275, "learning_rate": 0.00016209204694628214, "loss": 1.3687, "step": 14599 }, { "epoch": 0.18972034117175052, "grad_norm": 0.3357981741428375, "learning_rate": 0.00016208944748437074, "loss": 1.392, "step": 14600 }, { "epoch": 0.1897333357156664, "grad_norm": 0.27924999594688416, "learning_rate": 0.00016208684802245934, "loss": 1.3199, "step": 14601 }, { "epoch": 0.18974633025958226, "grad_norm": 0.4085221588611603, "learning_rate": 0.000162084248560548, "loss": 1.2778, "step": 14602 }, { "epoch": 0.18975932480349814, "grad_norm": 0.3391517102718353, "learning_rate": 0.00016208164909863658, "loss": 1.3443, "step": 14603 }, { "epoch": 0.189772319347414, "grad_norm": 0.3961552083492279, "learning_rate": 0.0001620790496367252, "loss": 1.2885, "step": 14604 }, { "epoch": 0.18978531389132988, "grad_norm": 0.34779369831085205, "learning_rate": 0.00016207645017481383, "loss": 1.5441, "step": 14605 }, { "epoch": 0.18979830843524576, "grad_norm": 0.35380983352661133, "learning_rate": 0.00016207385071290243, "loss": 1.3273, "step": 14606 }, { "epoch": 0.18981130297916163, "grad_norm": 0.35235559940338135, "learning_rate": 0.00016207125125099106, "loss": 1.4866, "step": 14607 }, { "epoch": 0.1898242975230775, "grad_norm": 0.38930296897888184, "learning_rate": 0.00016206865178907965, "loss": 1.3539, "step": 14608 }, { "epoch": 0.18983729206699337, "grad_norm": 0.42879459261894226, "learning_rate": 0.0001620660523271683, "loss": 1.4561, "step": 14609 }, { "epoch": 0.18985028661090925, "grad_norm": 0.37104061245918274, "learning_rate": 0.0001620634528652569, "loss": 1.447, "step": 14610 }, { "epoch": 0.18986328115482512, "grad_norm": 0.44099271297454834, "learning_rate": 0.00016206085340334553, "loss": 1.5425, "step": 14611 }, { "epoch": 0.189876275698741, "grad_norm": 0.4174787104129791, "learning_rate": 0.00016205825394143412, "loss": 1.3412, "step": 14612 }, { "epoch": 0.18988927024265687, "grad_norm": 0.33044126629829407, "learning_rate": 0.00016205565447952275, "loss": 1.4188, "step": 14613 }, { "epoch": 0.18990226478657274, "grad_norm": 0.430999755859375, "learning_rate": 0.00016205305501761137, "loss": 1.4897, "step": 14614 }, { "epoch": 0.1899152593304886, "grad_norm": 0.45416224002838135, "learning_rate": 0.00016205045555569997, "loss": 1.4092, "step": 14615 }, { "epoch": 0.18992825387440448, "grad_norm": 0.3304848372936249, "learning_rate": 0.0001620478560937886, "loss": 1.1832, "step": 14616 }, { "epoch": 0.18994124841832036, "grad_norm": 0.28313982486724854, "learning_rate": 0.00016204525663187722, "loss": 1.2612, "step": 14617 }, { "epoch": 0.18995424296223623, "grad_norm": 0.41987302899360657, "learning_rate": 0.00016204265716996582, "loss": 1.5434, "step": 14618 }, { "epoch": 0.1899672375061521, "grad_norm": 0.3710688352584839, "learning_rate": 0.00016204005770805444, "loss": 1.5838, "step": 14619 }, { "epoch": 0.18998023205006798, "grad_norm": 0.4317571818828583, "learning_rate": 0.00016203745824614304, "loss": 1.4844, "step": 14620 }, { "epoch": 0.18999322659398385, "grad_norm": 0.4230884313583374, "learning_rate": 0.0001620348587842317, "loss": 1.3868, "step": 14621 }, { "epoch": 0.19000622113789972, "grad_norm": 0.3468683063983917, "learning_rate": 0.0001620322593223203, "loss": 1.3814, "step": 14622 }, { "epoch": 0.1900192156818156, "grad_norm": 0.42843106389045715, "learning_rate": 0.0001620296598604089, "loss": 1.4123, "step": 14623 }, { "epoch": 0.19003221022573147, "grad_norm": 0.36498942971229553, "learning_rate": 0.0001620270603984975, "loss": 1.3291, "step": 14624 }, { "epoch": 0.19004520476964734, "grad_norm": 0.3694465458393097, "learning_rate": 0.00016202446093658613, "loss": 1.4638, "step": 14625 }, { "epoch": 0.19005819931356321, "grad_norm": 0.5853530168533325, "learning_rate": 0.00016202186147467476, "loss": 1.4738, "step": 14626 }, { "epoch": 0.1900711938574791, "grad_norm": 0.40372177958488464, "learning_rate": 0.00016201926201276336, "loss": 1.4716, "step": 14627 }, { "epoch": 0.19008418840139496, "grad_norm": 0.39939644932746887, "learning_rate": 0.00016201666255085198, "loss": 1.3647, "step": 14628 }, { "epoch": 0.19009718294531083, "grad_norm": 0.7032272815704346, "learning_rate": 0.0001620140630889406, "loss": 1.2941, "step": 14629 }, { "epoch": 0.1901101774892267, "grad_norm": 0.4274415373802185, "learning_rate": 0.0001620114636270292, "loss": 1.2683, "step": 14630 }, { "epoch": 0.19012317203314258, "grad_norm": 0.3828834593296051, "learning_rate": 0.00016200886416511783, "loss": 1.4966, "step": 14631 }, { "epoch": 0.19013616657705845, "grad_norm": 0.41696980595588684, "learning_rate": 0.00016200626470320642, "loss": 1.57, "step": 14632 }, { "epoch": 0.19014916112097432, "grad_norm": 0.3982534110546112, "learning_rate": 0.00016200366524129507, "loss": 1.5723, "step": 14633 }, { "epoch": 0.1901621556648902, "grad_norm": 0.40588676929473877, "learning_rate": 0.00016200106577938367, "loss": 1.3448, "step": 14634 }, { "epoch": 0.19017515020880607, "grad_norm": 0.3777240514755249, "learning_rate": 0.0001619984663174723, "loss": 1.2801, "step": 14635 }, { "epoch": 0.19018814475272194, "grad_norm": 0.4189203381538391, "learning_rate": 0.0001619958668555609, "loss": 1.3555, "step": 14636 }, { "epoch": 0.19020113929663782, "grad_norm": 0.35768625140190125, "learning_rate": 0.00016199326739364952, "loss": 1.407, "step": 14637 }, { "epoch": 0.1902141338405537, "grad_norm": 0.4081965982913971, "learning_rate": 0.00016199066793173814, "loss": 1.4959, "step": 14638 }, { "epoch": 0.19022712838446956, "grad_norm": 0.37342196702957153, "learning_rate": 0.00016198806846982674, "loss": 1.5566, "step": 14639 }, { "epoch": 0.19024012292838544, "grad_norm": 0.3264179527759552, "learning_rate": 0.0001619854690079154, "loss": 1.4074, "step": 14640 }, { "epoch": 0.1902531174723013, "grad_norm": 0.3984972834587097, "learning_rate": 0.000161982869546004, "loss": 1.4893, "step": 14641 }, { "epoch": 0.19026611201621718, "grad_norm": 0.4176251292228699, "learning_rate": 0.0001619802700840926, "loss": 1.4782, "step": 14642 }, { "epoch": 0.19027910656013305, "grad_norm": 0.38510435819625854, "learning_rate": 0.0001619776706221812, "loss": 1.4315, "step": 14643 }, { "epoch": 0.19029210110404893, "grad_norm": 0.3662984371185303, "learning_rate": 0.00016197507116026984, "loss": 1.3816, "step": 14644 }, { "epoch": 0.1903050956479648, "grad_norm": 0.34030601382255554, "learning_rate": 0.00016197247169835846, "loss": 1.2881, "step": 14645 }, { "epoch": 0.19031809019188067, "grad_norm": 0.40683484077453613, "learning_rate": 0.00016196987223644706, "loss": 1.4184, "step": 14646 }, { "epoch": 0.19033108473579655, "grad_norm": 0.4211297929286957, "learning_rate": 0.00016196727277453568, "loss": 1.4294, "step": 14647 }, { "epoch": 0.19034407927971242, "grad_norm": 0.5057207942008972, "learning_rate": 0.0001619646733126243, "loss": 1.4789, "step": 14648 }, { "epoch": 0.1903570738236283, "grad_norm": 0.4111502170562744, "learning_rate": 0.0001619620738507129, "loss": 1.3301, "step": 14649 }, { "epoch": 0.19037006836754417, "grad_norm": 0.31516367197036743, "learning_rate": 0.00016195947438880153, "loss": 1.4038, "step": 14650 }, { "epoch": 0.19038306291146004, "grad_norm": 0.38898375630378723, "learning_rate": 0.00016195687492689013, "loss": 1.7325, "step": 14651 }, { "epoch": 0.1903960574553759, "grad_norm": 0.3711279332637787, "learning_rate": 0.00016195427546497878, "loss": 1.3553, "step": 14652 }, { "epoch": 0.19040905199929178, "grad_norm": 0.4377843141555786, "learning_rate": 0.00016195167600306737, "loss": 1.5221, "step": 14653 }, { "epoch": 0.19042204654320766, "grad_norm": 0.4867298901081085, "learning_rate": 0.000161949076541156, "loss": 1.4911, "step": 14654 }, { "epoch": 0.19043504108712356, "grad_norm": 0.41919898986816406, "learning_rate": 0.0001619464770792446, "loss": 1.2737, "step": 14655 }, { "epoch": 0.19044803563103943, "grad_norm": 0.46002867817878723, "learning_rate": 0.00016194387761733322, "loss": 1.4875, "step": 14656 }, { "epoch": 0.1904610301749553, "grad_norm": 0.3393423557281494, "learning_rate": 0.00016194127815542185, "loss": 1.4528, "step": 14657 }, { "epoch": 0.19047402471887118, "grad_norm": 0.37846317887306213, "learning_rate": 0.00016193867869351044, "loss": 1.2983, "step": 14658 }, { "epoch": 0.19048701926278705, "grad_norm": 0.38445013761520386, "learning_rate": 0.00016193607923159907, "loss": 1.4298, "step": 14659 }, { "epoch": 0.19050001380670292, "grad_norm": 0.3944438099861145, "learning_rate": 0.0001619334797696877, "loss": 1.4748, "step": 14660 }, { "epoch": 0.1905130083506188, "grad_norm": 0.45238086581230164, "learning_rate": 0.0001619308803077763, "loss": 1.3561, "step": 14661 }, { "epoch": 0.19052600289453467, "grad_norm": 0.3778780996799469, "learning_rate": 0.00016192828084586491, "loss": 1.3976, "step": 14662 }, { "epoch": 0.19053899743845054, "grad_norm": 0.436320424079895, "learning_rate": 0.0001619256813839535, "loss": 1.4859, "step": 14663 }, { "epoch": 0.19055199198236641, "grad_norm": 0.5251297354698181, "learning_rate": 0.00016192308192204216, "loss": 1.6171, "step": 14664 }, { "epoch": 0.1905649865262823, "grad_norm": 0.4117533564567566, "learning_rate": 0.00016192048246013076, "loss": 1.3532, "step": 14665 }, { "epoch": 0.19057798107019816, "grad_norm": 0.36731234192848206, "learning_rate": 0.00016191788299821938, "loss": 1.3374, "step": 14666 }, { "epoch": 0.19059097561411403, "grad_norm": 0.3928210437297821, "learning_rate": 0.00016191528353630798, "loss": 1.5527, "step": 14667 }, { "epoch": 0.1906039701580299, "grad_norm": 0.40122100710868835, "learning_rate": 0.0001619126840743966, "loss": 1.559, "step": 14668 }, { "epoch": 0.19061696470194578, "grad_norm": 0.4511236846446991, "learning_rate": 0.00016191008461248523, "loss": 1.6414, "step": 14669 }, { "epoch": 0.19062995924586165, "grad_norm": 0.46413132548332214, "learning_rate": 0.00016190748515057383, "loss": 1.3942, "step": 14670 }, { "epoch": 0.19064295378977753, "grad_norm": 0.41753649711608887, "learning_rate": 0.00016190488568866245, "loss": 1.4066, "step": 14671 }, { "epoch": 0.1906559483336934, "grad_norm": 0.43236780166625977, "learning_rate": 0.00016190228622675108, "loss": 1.3647, "step": 14672 }, { "epoch": 0.19066894287760927, "grad_norm": 0.41736575961112976, "learning_rate": 0.00016189968676483967, "loss": 1.514, "step": 14673 }, { "epoch": 0.19068193742152514, "grad_norm": 0.4440431296825409, "learning_rate": 0.0001618970873029283, "loss": 1.5375, "step": 14674 }, { "epoch": 0.19069493196544102, "grad_norm": 0.4027050733566284, "learning_rate": 0.0001618944878410169, "loss": 1.2582, "step": 14675 }, { "epoch": 0.1907079265093569, "grad_norm": 0.4415140151977539, "learning_rate": 0.00016189188837910555, "loss": 1.3887, "step": 14676 }, { "epoch": 0.19072092105327276, "grad_norm": 0.4337286651134491, "learning_rate": 0.00016188928891719415, "loss": 1.3946, "step": 14677 }, { "epoch": 0.19073391559718864, "grad_norm": 0.4080226719379425, "learning_rate": 0.00016188668945528277, "loss": 1.4649, "step": 14678 }, { "epoch": 0.1907469101411045, "grad_norm": 0.31970280408859253, "learning_rate": 0.0001618840899933714, "loss": 1.2141, "step": 14679 }, { "epoch": 0.19075990468502038, "grad_norm": 0.41169095039367676, "learning_rate": 0.00016188149053146, "loss": 1.4313, "step": 14680 }, { "epoch": 0.19077289922893625, "grad_norm": 0.3573702871799469, "learning_rate": 0.00016187889106954862, "loss": 1.4711, "step": 14681 }, { "epoch": 0.19078589377285213, "grad_norm": 0.3207574784755707, "learning_rate": 0.00016187629160763721, "loss": 1.2906, "step": 14682 }, { "epoch": 0.190798888316768, "grad_norm": 0.392945796251297, "learning_rate": 0.00016187369214572587, "loss": 1.3037, "step": 14683 }, { "epoch": 0.19081188286068387, "grad_norm": 0.3822785019874573, "learning_rate": 0.00016187109268381446, "loss": 1.3718, "step": 14684 }, { "epoch": 0.19082487740459975, "grad_norm": 0.35723164677619934, "learning_rate": 0.00016186849322190306, "loss": 1.3592, "step": 14685 }, { "epoch": 0.19083787194851562, "grad_norm": 0.40905579924583435, "learning_rate": 0.00016186589375999168, "loss": 1.5063, "step": 14686 }, { "epoch": 0.1908508664924315, "grad_norm": 0.2638438045978546, "learning_rate": 0.0001618632942980803, "loss": 1.2266, "step": 14687 }, { "epoch": 0.19086386103634737, "grad_norm": 0.32551392912864685, "learning_rate": 0.00016186069483616893, "loss": 1.3832, "step": 14688 }, { "epoch": 0.19087685558026324, "grad_norm": 0.3668440580368042, "learning_rate": 0.00016185809537425753, "loss": 1.3449, "step": 14689 }, { "epoch": 0.1908898501241791, "grad_norm": 0.3846147954463959, "learning_rate": 0.00016185549591234616, "loss": 1.4486, "step": 14690 }, { "epoch": 0.19090284466809498, "grad_norm": 0.48661091923713684, "learning_rate": 0.00016185289645043478, "loss": 1.4397, "step": 14691 }, { "epoch": 0.19091583921201086, "grad_norm": 0.42199310660362244, "learning_rate": 0.00016185029698852338, "loss": 1.5245, "step": 14692 }, { "epoch": 0.19092883375592673, "grad_norm": 0.40835314989089966, "learning_rate": 0.000161847697526612, "loss": 1.4548, "step": 14693 }, { "epoch": 0.1909418282998426, "grad_norm": 0.3708430230617523, "learning_rate": 0.0001618450980647006, "loss": 1.3529, "step": 14694 }, { "epoch": 0.19095482284375848, "grad_norm": 0.4395867586135864, "learning_rate": 0.00016184249860278925, "loss": 1.5337, "step": 14695 }, { "epoch": 0.19096781738767435, "grad_norm": 0.34248197078704834, "learning_rate": 0.00016183989914087785, "loss": 1.3031, "step": 14696 }, { "epoch": 0.19098081193159022, "grad_norm": 0.4046511948108673, "learning_rate": 0.00016183729967896645, "loss": 1.3646, "step": 14697 }, { "epoch": 0.1909938064755061, "grad_norm": 0.40073636174201965, "learning_rate": 0.00016183470021705507, "loss": 1.5307, "step": 14698 }, { "epoch": 0.19100680101942197, "grad_norm": 0.4188086688518524, "learning_rate": 0.0001618321007551437, "loss": 1.3921, "step": 14699 }, { "epoch": 0.19101979556333784, "grad_norm": 0.42957553267478943, "learning_rate": 0.00016182950129323232, "loss": 1.2958, "step": 14700 }, { "epoch": 0.1910327901072537, "grad_norm": 0.40109172463417053, "learning_rate": 0.00016182690183132092, "loss": 1.5406, "step": 14701 }, { "epoch": 0.1910457846511696, "grad_norm": 0.46192339062690735, "learning_rate": 0.00016182430236940954, "loss": 1.4243, "step": 14702 }, { "epoch": 0.19105877919508546, "grad_norm": 0.4547237455844879, "learning_rate": 0.00016182170290749817, "loss": 1.5191, "step": 14703 }, { "epoch": 0.19107177373900133, "grad_norm": 0.37635189294815063, "learning_rate": 0.00016181910344558676, "loss": 1.3985, "step": 14704 }, { "epoch": 0.1910847682829172, "grad_norm": 0.41376179456710815, "learning_rate": 0.0001618165039836754, "loss": 1.5099, "step": 14705 }, { "epoch": 0.19109776282683308, "grad_norm": 0.4531638026237488, "learning_rate": 0.00016181390452176398, "loss": 1.4845, "step": 14706 }, { "epoch": 0.19111075737074895, "grad_norm": 0.37797045707702637, "learning_rate": 0.00016181130505985264, "loss": 1.5374, "step": 14707 }, { "epoch": 0.19112375191466482, "grad_norm": 0.3327268064022064, "learning_rate": 0.00016180870559794123, "loss": 1.4572, "step": 14708 }, { "epoch": 0.1911367464585807, "grad_norm": 0.3709682524204254, "learning_rate": 0.00016180610613602986, "loss": 1.493, "step": 14709 }, { "epoch": 0.19114974100249657, "grad_norm": 0.41870760917663574, "learning_rate": 0.00016180350667411846, "loss": 1.6566, "step": 14710 }, { "epoch": 0.19116273554641244, "grad_norm": 0.43770626187324524, "learning_rate": 0.00016180090721220708, "loss": 1.2873, "step": 14711 }, { "epoch": 0.19117573009032832, "grad_norm": 0.3818780481815338, "learning_rate": 0.0001617983077502957, "loss": 1.2588, "step": 14712 }, { "epoch": 0.1911887246342442, "grad_norm": 0.3426341414451599, "learning_rate": 0.0001617957082883843, "loss": 1.4366, "step": 14713 }, { "epoch": 0.19120171917816006, "grad_norm": 0.43491262197494507, "learning_rate": 0.00016179310882647293, "loss": 1.5368, "step": 14714 }, { "epoch": 0.19121471372207594, "grad_norm": 0.4278816878795624, "learning_rate": 0.00016179050936456155, "loss": 1.3731, "step": 14715 }, { "epoch": 0.1912277082659918, "grad_norm": 0.46045053005218506, "learning_rate": 0.00016178790990265015, "loss": 1.5695, "step": 14716 }, { "epoch": 0.19124070280990768, "grad_norm": 0.7254002094268799, "learning_rate": 0.00016178531044073877, "loss": 1.4771, "step": 14717 }, { "epoch": 0.19125369735382355, "grad_norm": 0.3770655393600464, "learning_rate": 0.0001617827109788274, "loss": 1.2624, "step": 14718 }, { "epoch": 0.19126669189773943, "grad_norm": 0.3826221227645874, "learning_rate": 0.00016178011151691602, "loss": 1.1633, "step": 14719 }, { "epoch": 0.1912796864416553, "grad_norm": 0.44069963693618774, "learning_rate": 0.00016177751205500462, "loss": 1.5034, "step": 14720 }, { "epoch": 0.19129268098557117, "grad_norm": 0.399186372756958, "learning_rate": 0.00016177491259309324, "loss": 1.3803, "step": 14721 }, { "epoch": 0.19130567552948705, "grad_norm": 0.3506208658218384, "learning_rate": 0.00016177231313118187, "loss": 1.2111, "step": 14722 }, { "epoch": 0.19131867007340292, "grad_norm": 0.3993605077266693, "learning_rate": 0.00016176971366927047, "loss": 1.5176, "step": 14723 }, { "epoch": 0.1913316646173188, "grad_norm": 0.376873254776001, "learning_rate": 0.0001617671142073591, "loss": 1.3856, "step": 14724 }, { "epoch": 0.19134465916123466, "grad_norm": 0.3938893973827362, "learning_rate": 0.0001617645147454477, "loss": 1.483, "step": 14725 }, { "epoch": 0.19135765370515054, "grad_norm": 0.4488093852996826, "learning_rate": 0.0001617619152835363, "loss": 1.4998, "step": 14726 }, { "epoch": 0.1913706482490664, "grad_norm": 0.4942302703857422, "learning_rate": 0.00016175931582162494, "loss": 1.4478, "step": 14727 }, { "epoch": 0.19138364279298228, "grad_norm": 0.47813063859939575, "learning_rate": 0.00016175671635971353, "loss": 1.3778, "step": 14728 }, { "epoch": 0.19139663733689816, "grad_norm": 0.35897913575172424, "learning_rate": 0.00016175411689780216, "loss": 1.4086, "step": 14729 }, { "epoch": 0.19140963188081403, "grad_norm": 0.42157989740371704, "learning_rate": 0.00016175151743589078, "loss": 1.4952, "step": 14730 }, { "epoch": 0.19142262642472993, "grad_norm": 0.38916924595832825, "learning_rate": 0.0001617489179739794, "loss": 1.3164, "step": 14731 }, { "epoch": 0.1914356209686458, "grad_norm": 0.4732559323310852, "learning_rate": 0.000161746318512068, "loss": 1.4816, "step": 14732 }, { "epoch": 0.19144861551256168, "grad_norm": 0.3626325726509094, "learning_rate": 0.00016174371905015663, "loss": 1.3921, "step": 14733 }, { "epoch": 0.19146161005647755, "grad_norm": 0.4242331385612488, "learning_rate": 0.00016174111958824525, "loss": 1.2925, "step": 14734 }, { "epoch": 0.19147460460039342, "grad_norm": 0.3638611137866974, "learning_rate": 0.00016173852012633385, "loss": 1.4722, "step": 14735 }, { "epoch": 0.1914875991443093, "grad_norm": 0.4010542333126068, "learning_rate": 0.00016173592066442248, "loss": 1.4748, "step": 14736 }, { "epoch": 0.19150059368822517, "grad_norm": 0.41190001368522644, "learning_rate": 0.00016173332120251107, "loss": 1.3564, "step": 14737 }, { "epoch": 0.19151358823214104, "grad_norm": 0.38537663221359253, "learning_rate": 0.00016173072174059972, "loss": 1.3698, "step": 14738 }, { "epoch": 0.19152658277605691, "grad_norm": 0.3804991841316223, "learning_rate": 0.00016172812227868832, "loss": 1.2688, "step": 14739 }, { "epoch": 0.1915395773199728, "grad_norm": 0.37376806139945984, "learning_rate": 0.00016172552281677692, "loss": 1.5053, "step": 14740 }, { "epoch": 0.19155257186388866, "grad_norm": 0.4185049831867218, "learning_rate": 0.00016172292335486554, "loss": 1.4177, "step": 14741 }, { "epoch": 0.19156556640780453, "grad_norm": 0.43594786524772644, "learning_rate": 0.00016172032389295417, "loss": 1.4985, "step": 14742 }, { "epoch": 0.1915785609517204, "grad_norm": 0.3320576250553131, "learning_rate": 0.0001617177244310428, "loss": 1.3372, "step": 14743 }, { "epoch": 0.19159155549563628, "grad_norm": 0.40826481580734253, "learning_rate": 0.0001617151249691314, "loss": 1.4166, "step": 14744 }, { "epoch": 0.19160455003955215, "grad_norm": 0.4043372869491577, "learning_rate": 0.00016171252550722001, "loss": 1.5113, "step": 14745 }, { "epoch": 0.19161754458346802, "grad_norm": 0.567789614200592, "learning_rate": 0.00016170992604530864, "loss": 1.4549, "step": 14746 }, { "epoch": 0.1916305391273839, "grad_norm": 0.444301038980484, "learning_rate": 0.00016170732658339724, "loss": 1.3388, "step": 14747 }, { "epoch": 0.19164353367129977, "grad_norm": 0.3947491943836212, "learning_rate": 0.00016170472712148586, "loss": 1.3371, "step": 14748 }, { "epoch": 0.19165652821521564, "grad_norm": 0.3155144155025482, "learning_rate": 0.00016170212765957446, "loss": 1.312, "step": 14749 }, { "epoch": 0.19166952275913152, "grad_norm": 0.30175110697746277, "learning_rate": 0.0001616995281976631, "loss": 1.3784, "step": 14750 }, { "epoch": 0.1916825173030474, "grad_norm": 0.37675991654396057, "learning_rate": 0.0001616969287357517, "loss": 1.5213, "step": 14751 }, { "epoch": 0.19169551184696326, "grad_norm": 0.4649588167667389, "learning_rate": 0.0001616943292738403, "loss": 1.3588, "step": 14752 }, { "epoch": 0.19170850639087914, "grad_norm": 0.38730716705322266, "learning_rate": 0.00016169172981192896, "loss": 1.4931, "step": 14753 }, { "epoch": 0.191721500934795, "grad_norm": 0.3740478456020355, "learning_rate": 0.00016168913035001755, "loss": 1.4747, "step": 14754 }, { "epoch": 0.19173449547871088, "grad_norm": 0.3958975672721863, "learning_rate": 0.00016168653088810618, "loss": 1.4363, "step": 14755 }, { "epoch": 0.19174749002262675, "grad_norm": 0.44136694073677063, "learning_rate": 0.00016168393142619478, "loss": 1.4861, "step": 14756 }, { "epoch": 0.19176048456654263, "grad_norm": 0.34408873319625854, "learning_rate": 0.0001616813319642834, "loss": 1.3286, "step": 14757 }, { "epoch": 0.1917734791104585, "grad_norm": 0.3613181412220001, "learning_rate": 0.00016167873250237202, "loss": 1.348, "step": 14758 }, { "epoch": 0.19178647365437437, "grad_norm": 0.39268746972084045, "learning_rate": 0.00016167613304046062, "loss": 1.5056, "step": 14759 }, { "epoch": 0.19179946819829025, "grad_norm": 0.43165770173072815, "learning_rate": 0.00016167353357854925, "loss": 1.5264, "step": 14760 }, { "epoch": 0.19181246274220612, "grad_norm": 0.44122982025146484, "learning_rate": 0.00016167093411663787, "loss": 1.4459, "step": 14761 }, { "epoch": 0.191825457286122, "grad_norm": 0.406219482421875, "learning_rate": 0.0001616683346547265, "loss": 1.4387, "step": 14762 }, { "epoch": 0.19183845183003786, "grad_norm": 0.575989305973053, "learning_rate": 0.0001616657351928151, "loss": 1.3879, "step": 14763 }, { "epoch": 0.19185144637395374, "grad_norm": 0.3489566445350647, "learning_rate": 0.0001616631357309037, "loss": 1.4151, "step": 14764 }, { "epoch": 0.1918644409178696, "grad_norm": 0.36041444540023804, "learning_rate": 0.00016166053626899234, "loss": 1.3354, "step": 14765 }, { "epoch": 0.19187743546178548, "grad_norm": 0.49323713779449463, "learning_rate": 0.00016165793680708094, "loss": 1.4864, "step": 14766 }, { "epoch": 0.19189043000570136, "grad_norm": 0.3746762275695801, "learning_rate": 0.00016165533734516956, "loss": 1.425, "step": 14767 }, { "epoch": 0.19190342454961723, "grad_norm": 0.30793729424476624, "learning_rate": 0.00016165273788325816, "loss": 1.359, "step": 14768 }, { "epoch": 0.1919164190935331, "grad_norm": 0.46501293778419495, "learning_rate": 0.00016165013842134679, "loss": 1.304, "step": 14769 }, { "epoch": 0.19192941363744898, "grad_norm": 0.39953964948654175, "learning_rate": 0.0001616475389594354, "loss": 1.4446, "step": 14770 }, { "epoch": 0.19194240818136485, "grad_norm": 0.39432424306869507, "learning_rate": 0.000161644939497524, "loss": 1.3085, "step": 14771 }, { "epoch": 0.19195540272528072, "grad_norm": 0.3681644797325134, "learning_rate": 0.00016164234003561263, "loss": 1.4651, "step": 14772 }, { "epoch": 0.1919683972691966, "grad_norm": 0.43860921263694763, "learning_rate": 0.00016163974057370126, "loss": 1.3085, "step": 14773 }, { "epoch": 0.19198139181311247, "grad_norm": 0.43673309683799744, "learning_rate": 0.00016163714111178988, "loss": 1.3206, "step": 14774 }, { "epoch": 0.19199438635702834, "grad_norm": 0.35295191407203674, "learning_rate": 0.00016163454164987848, "loss": 1.2792, "step": 14775 }, { "epoch": 0.1920073809009442, "grad_norm": 0.46073010563850403, "learning_rate": 0.0001616319421879671, "loss": 1.4596, "step": 14776 }, { "epoch": 0.1920203754448601, "grad_norm": 0.47358232736587524, "learning_rate": 0.00016162934272605573, "loss": 1.4292, "step": 14777 }, { "epoch": 0.19203336998877596, "grad_norm": 0.46355482935905457, "learning_rate": 0.00016162674326414432, "loss": 1.3921, "step": 14778 }, { "epoch": 0.19204636453269183, "grad_norm": 0.3571394681930542, "learning_rate": 0.00016162414380223295, "loss": 1.3749, "step": 14779 }, { "epoch": 0.1920593590766077, "grad_norm": 0.44956809282302856, "learning_rate": 0.00016162154434032155, "loss": 1.3928, "step": 14780 }, { "epoch": 0.19207235362052358, "grad_norm": 0.385164350271225, "learning_rate": 0.00016161894487841017, "loss": 1.254, "step": 14781 }, { "epoch": 0.19208534816443945, "grad_norm": 0.3551305830478668, "learning_rate": 0.0001616163454164988, "loss": 1.5076, "step": 14782 }, { "epoch": 0.19209834270835532, "grad_norm": 0.40480339527130127, "learning_rate": 0.0001616137459545874, "loss": 1.2248, "step": 14783 }, { "epoch": 0.1921113372522712, "grad_norm": 0.3807767927646637, "learning_rate": 0.00016161114649267602, "loss": 1.4591, "step": 14784 }, { "epoch": 0.19212433179618707, "grad_norm": 0.4305060803890228, "learning_rate": 0.00016160854703076464, "loss": 1.4321, "step": 14785 }, { "epoch": 0.19213732634010294, "grad_norm": 0.4142296016216278, "learning_rate": 0.00016160594756885327, "loss": 1.4799, "step": 14786 }, { "epoch": 0.19215032088401882, "grad_norm": 0.3961220979690552, "learning_rate": 0.00016160334810694186, "loss": 1.5275, "step": 14787 }, { "epoch": 0.1921633154279347, "grad_norm": 0.37783369421958923, "learning_rate": 0.0001616007486450305, "loss": 1.5385, "step": 14788 }, { "epoch": 0.19217630997185056, "grad_norm": 0.3969023525714874, "learning_rate": 0.0001615981491831191, "loss": 1.6432, "step": 14789 }, { "epoch": 0.19218930451576643, "grad_norm": 0.37032586336135864, "learning_rate": 0.0001615955497212077, "loss": 1.3727, "step": 14790 }, { "epoch": 0.1922022990596823, "grad_norm": 0.4205678105354309, "learning_rate": 0.00016159295025929633, "loss": 1.3817, "step": 14791 }, { "epoch": 0.19221529360359818, "grad_norm": 0.3681456744670868, "learning_rate": 0.00016159035079738496, "loss": 1.3027, "step": 14792 }, { "epoch": 0.19222828814751405, "grad_norm": 0.36517757177352905, "learning_rate": 0.00016158775133547358, "loss": 1.3506, "step": 14793 }, { "epoch": 0.19224128269142993, "grad_norm": 0.4429134428501129, "learning_rate": 0.00016158515187356218, "loss": 1.4592, "step": 14794 }, { "epoch": 0.1922542772353458, "grad_norm": 0.4614850580692291, "learning_rate": 0.00016158255241165078, "loss": 1.604, "step": 14795 }, { "epoch": 0.19226727177926167, "grad_norm": 0.4379009008407593, "learning_rate": 0.00016157995294973943, "loss": 1.5071, "step": 14796 }, { "epoch": 0.19228026632317755, "grad_norm": 0.28308600187301636, "learning_rate": 0.00016157735348782803, "loss": 1.3241, "step": 14797 }, { "epoch": 0.19229326086709342, "grad_norm": 0.42078229784965515, "learning_rate": 0.00016157475402591665, "loss": 1.4385, "step": 14798 }, { "epoch": 0.1923062554110093, "grad_norm": 0.3954154849052429, "learning_rate": 0.00016157215456400525, "loss": 1.3316, "step": 14799 }, { "epoch": 0.19231924995492516, "grad_norm": 0.29797929525375366, "learning_rate": 0.00016156955510209387, "loss": 1.418, "step": 14800 }, { "epoch": 0.19233224449884104, "grad_norm": 0.3662402033805847, "learning_rate": 0.0001615669556401825, "loss": 1.6391, "step": 14801 }, { "epoch": 0.1923452390427569, "grad_norm": 0.41779810190200806, "learning_rate": 0.0001615643561782711, "loss": 1.4004, "step": 14802 }, { "epoch": 0.19235823358667278, "grad_norm": 0.34955495595932007, "learning_rate": 0.00016156175671635972, "loss": 1.4351, "step": 14803 }, { "epoch": 0.19237122813058866, "grad_norm": 0.35122936964035034, "learning_rate": 0.00016155915725444834, "loss": 1.3991, "step": 14804 }, { "epoch": 0.19238422267450453, "grad_norm": 0.4929906129837036, "learning_rate": 0.00016155655779253697, "loss": 1.4813, "step": 14805 }, { "epoch": 0.1923972172184204, "grad_norm": 0.3611014485359192, "learning_rate": 0.00016155395833062557, "loss": 1.3937, "step": 14806 }, { "epoch": 0.1924102117623363, "grad_norm": 0.2804672122001648, "learning_rate": 0.00016155135886871416, "loss": 1.273, "step": 14807 }, { "epoch": 0.19242320630625218, "grad_norm": 0.3807078003883362, "learning_rate": 0.00016154875940680281, "loss": 1.2595, "step": 14808 }, { "epoch": 0.19243620085016805, "grad_norm": 0.3427078425884247, "learning_rate": 0.0001615461599448914, "loss": 1.2713, "step": 14809 }, { "epoch": 0.19244919539408392, "grad_norm": 0.44836607575416565, "learning_rate": 0.00016154356048298004, "loss": 1.2625, "step": 14810 }, { "epoch": 0.1924621899379998, "grad_norm": 0.4267934560775757, "learning_rate": 0.00016154096102106863, "loss": 1.2648, "step": 14811 }, { "epoch": 0.19247518448191567, "grad_norm": 0.461042195558548, "learning_rate": 0.00016153836155915726, "loss": 1.4129, "step": 14812 }, { "epoch": 0.19248817902583154, "grad_norm": 0.430828720331192, "learning_rate": 0.00016153576209724588, "loss": 1.3693, "step": 14813 }, { "epoch": 0.1925011735697474, "grad_norm": 0.3735158145427704, "learning_rate": 0.00016153316263533448, "loss": 1.4946, "step": 14814 }, { "epoch": 0.1925141681136633, "grad_norm": 0.2744368016719818, "learning_rate": 0.0001615305631734231, "loss": 1.3076, "step": 14815 }, { "epoch": 0.19252716265757916, "grad_norm": 0.4095487594604492, "learning_rate": 0.00016152796371151173, "loss": 1.3416, "step": 14816 }, { "epoch": 0.19254015720149503, "grad_norm": 0.35569536685943604, "learning_rate": 0.00016152536424960035, "loss": 1.3008, "step": 14817 }, { "epoch": 0.1925531517454109, "grad_norm": 0.3805360496044159, "learning_rate": 0.00016152276478768895, "loss": 1.3214, "step": 14818 }, { "epoch": 0.19256614628932678, "grad_norm": 0.3798256814479828, "learning_rate": 0.00016152016532577755, "loss": 1.4774, "step": 14819 }, { "epoch": 0.19257914083324265, "grad_norm": 0.3908930718898773, "learning_rate": 0.0001615175658638662, "loss": 1.5745, "step": 14820 }, { "epoch": 0.19259213537715852, "grad_norm": 0.37838214635849, "learning_rate": 0.0001615149664019548, "loss": 1.5448, "step": 14821 }, { "epoch": 0.1926051299210744, "grad_norm": 0.4039660096168518, "learning_rate": 0.00016151236694004342, "loss": 1.6515, "step": 14822 }, { "epoch": 0.19261812446499027, "grad_norm": 0.44893568754196167, "learning_rate": 0.00016150976747813202, "loss": 1.484, "step": 14823 }, { "epoch": 0.19263111900890614, "grad_norm": 0.36752158403396606, "learning_rate": 0.00016150716801622064, "loss": 1.367, "step": 14824 }, { "epoch": 0.19264411355282202, "grad_norm": 0.4139344096183777, "learning_rate": 0.00016150456855430927, "loss": 1.4476, "step": 14825 }, { "epoch": 0.1926571080967379, "grad_norm": 0.4148998558521271, "learning_rate": 0.00016150196909239787, "loss": 1.3479, "step": 14826 }, { "epoch": 0.19267010264065376, "grad_norm": 0.41702941060066223, "learning_rate": 0.00016149936963048652, "loss": 1.46, "step": 14827 }, { "epoch": 0.19268309718456963, "grad_norm": 0.3881840407848358, "learning_rate": 0.00016149677016857511, "loss": 1.4713, "step": 14828 }, { "epoch": 0.1926960917284855, "grad_norm": 0.3611997067928314, "learning_rate": 0.00016149417070666374, "loss": 1.3885, "step": 14829 }, { "epoch": 0.19270908627240138, "grad_norm": 0.36825549602508545, "learning_rate": 0.00016149157124475234, "loss": 1.363, "step": 14830 }, { "epoch": 0.19272208081631725, "grad_norm": 0.4498448967933655, "learning_rate": 0.00016148897178284096, "loss": 1.3699, "step": 14831 }, { "epoch": 0.19273507536023313, "grad_norm": 0.40507617592811584, "learning_rate": 0.00016148637232092959, "loss": 1.4294, "step": 14832 }, { "epoch": 0.192748069904149, "grad_norm": 0.3195624053478241, "learning_rate": 0.00016148377285901818, "loss": 1.4273, "step": 14833 }, { "epoch": 0.19276106444806487, "grad_norm": 0.24283470213413239, "learning_rate": 0.0001614811733971068, "loss": 1.1657, "step": 14834 }, { "epoch": 0.19277405899198075, "grad_norm": 0.29763153195381165, "learning_rate": 0.00016147857393519543, "loss": 1.3572, "step": 14835 }, { "epoch": 0.19278705353589662, "grad_norm": 0.3514959514141083, "learning_rate": 0.00016147597447328403, "loss": 1.2956, "step": 14836 }, { "epoch": 0.1928000480798125, "grad_norm": 0.44645237922668457, "learning_rate": 0.00016147337501137265, "loss": 1.6183, "step": 14837 }, { "epoch": 0.19281304262372836, "grad_norm": 0.4316481947898865, "learning_rate": 0.00016147077554946125, "loss": 1.4404, "step": 14838 }, { "epoch": 0.19282603716764424, "grad_norm": 0.3804508149623871, "learning_rate": 0.0001614681760875499, "loss": 1.4531, "step": 14839 }, { "epoch": 0.1928390317115601, "grad_norm": 0.29477280378341675, "learning_rate": 0.0001614655766256385, "loss": 1.2613, "step": 14840 }, { "epoch": 0.19285202625547598, "grad_norm": 0.4052661061286926, "learning_rate": 0.00016146297716372712, "loss": 1.2303, "step": 14841 }, { "epoch": 0.19286502079939186, "grad_norm": 0.38528206944465637, "learning_rate": 0.00016146037770181572, "loss": 1.4822, "step": 14842 }, { "epoch": 0.19287801534330773, "grad_norm": 0.4656522274017334, "learning_rate": 0.00016145777823990435, "loss": 1.4365, "step": 14843 }, { "epoch": 0.1928910098872236, "grad_norm": 0.38580530881881714, "learning_rate": 0.00016145517877799297, "loss": 1.3121, "step": 14844 }, { "epoch": 0.19290400443113948, "grad_norm": 0.4122304320335388, "learning_rate": 0.00016145257931608157, "loss": 1.5367, "step": 14845 }, { "epoch": 0.19291699897505535, "grad_norm": 0.5036138892173767, "learning_rate": 0.0001614499798541702, "loss": 1.5542, "step": 14846 }, { "epoch": 0.19292999351897122, "grad_norm": 0.4620932638645172, "learning_rate": 0.00016144738039225882, "loss": 1.3894, "step": 14847 }, { "epoch": 0.1929429880628871, "grad_norm": 0.395526260137558, "learning_rate": 0.00016144478093034741, "loss": 1.2675, "step": 14848 }, { "epoch": 0.19295598260680297, "grad_norm": 0.42580515146255493, "learning_rate": 0.00016144218146843604, "loss": 1.4859, "step": 14849 }, { "epoch": 0.19296897715071884, "grad_norm": 0.3319178819656372, "learning_rate": 0.00016143958200652464, "loss": 1.0941, "step": 14850 }, { "epoch": 0.1929819716946347, "grad_norm": 0.41081032156944275, "learning_rate": 0.0001614369825446133, "loss": 1.2713, "step": 14851 }, { "epoch": 0.19299496623855059, "grad_norm": 0.42496830224990845, "learning_rate": 0.00016143438308270189, "loss": 1.5343, "step": 14852 }, { "epoch": 0.19300796078246646, "grad_norm": 0.4384685158729553, "learning_rate": 0.0001614317836207905, "loss": 1.5021, "step": 14853 }, { "epoch": 0.19302095532638233, "grad_norm": 0.4159129858016968, "learning_rate": 0.0001614291841588791, "loss": 1.5032, "step": 14854 }, { "epoch": 0.1930339498702982, "grad_norm": 0.3636203408241272, "learning_rate": 0.00016142658469696773, "loss": 1.4775, "step": 14855 }, { "epoch": 0.19304694441421408, "grad_norm": 0.3794700503349304, "learning_rate": 0.00016142398523505636, "loss": 1.4778, "step": 14856 }, { "epoch": 0.19305993895812995, "grad_norm": 0.41769009828567505, "learning_rate": 0.00016142138577314495, "loss": 1.4356, "step": 14857 }, { "epoch": 0.19307293350204582, "grad_norm": 0.41391444206237793, "learning_rate": 0.00016141878631123358, "loss": 1.5713, "step": 14858 }, { "epoch": 0.1930859280459617, "grad_norm": 0.34555503726005554, "learning_rate": 0.0001614161868493222, "loss": 1.3823, "step": 14859 }, { "epoch": 0.19309892258987757, "grad_norm": 0.48385536670684814, "learning_rate": 0.00016141358738741083, "loss": 1.3848, "step": 14860 }, { "epoch": 0.19311191713379344, "grad_norm": 0.3248080909252167, "learning_rate": 0.00016141098792549942, "loss": 1.1726, "step": 14861 }, { "epoch": 0.19312491167770932, "grad_norm": 0.3720283508300781, "learning_rate": 0.00016140838846358805, "loss": 1.2739, "step": 14862 }, { "epoch": 0.1931379062216252, "grad_norm": 0.36990800499916077, "learning_rate": 0.00016140578900167667, "loss": 1.4363, "step": 14863 }, { "epoch": 0.19315090076554106, "grad_norm": 0.46047475934028625, "learning_rate": 0.00016140318953976527, "loss": 1.3494, "step": 14864 }, { "epoch": 0.19316389530945693, "grad_norm": 0.40693244338035583, "learning_rate": 0.0001614005900778539, "loss": 1.3564, "step": 14865 }, { "epoch": 0.1931768898533728, "grad_norm": 0.40175721049308777, "learning_rate": 0.00016139799061594252, "loss": 1.6216, "step": 14866 }, { "epoch": 0.19318988439728868, "grad_norm": 0.49909675121307373, "learning_rate": 0.00016139539115403112, "loss": 1.4139, "step": 14867 }, { "epoch": 0.19320287894120455, "grad_norm": 0.45593997836112976, "learning_rate": 0.00016139279169211974, "loss": 1.3887, "step": 14868 }, { "epoch": 0.19321587348512043, "grad_norm": 0.3343084752559662, "learning_rate": 0.00016139019223020834, "loss": 1.2647, "step": 14869 }, { "epoch": 0.1932288680290363, "grad_norm": 0.34654197096824646, "learning_rate": 0.000161387592768297, "loss": 1.3814, "step": 14870 }, { "epoch": 0.19324186257295217, "grad_norm": 0.3975386619567871, "learning_rate": 0.0001613849933063856, "loss": 1.5305, "step": 14871 }, { "epoch": 0.19325485711686805, "grad_norm": 0.3944903314113617, "learning_rate": 0.0001613823938444742, "loss": 1.3117, "step": 14872 }, { "epoch": 0.19326785166078392, "grad_norm": 0.3762023150920868, "learning_rate": 0.0001613797943825628, "loss": 1.3454, "step": 14873 }, { "epoch": 0.1932808462046998, "grad_norm": 0.451759397983551, "learning_rate": 0.00016137719492065143, "loss": 1.5639, "step": 14874 }, { "epoch": 0.19329384074861566, "grad_norm": 0.375691682100296, "learning_rate": 0.00016137459545874006, "loss": 1.3765, "step": 14875 }, { "epoch": 0.19330683529253154, "grad_norm": 0.4128933250904083, "learning_rate": 0.00016137199599682866, "loss": 1.3414, "step": 14876 }, { "epoch": 0.1933198298364474, "grad_norm": 0.3383465111255646, "learning_rate": 0.00016136939653491728, "loss": 1.3813, "step": 14877 }, { "epoch": 0.19333282438036328, "grad_norm": 0.2938116788864136, "learning_rate": 0.0001613667970730059, "loss": 1.2532, "step": 14878 }, { "epoch": 0.19334581892427916, "grad_norm": 0.4449491798877716, "learning_rate": 0.0001613641976110945, "loss": 1.4268, "step": 14879 }, { "epoch": 0.19335881346819503, "grad_norm": 0.4733844995498657, "learning_rate": 0.00016136159814918313, "loss": 1.4647, "step": 14880 }, { "epoch": 0.1933718080121109, "grad_norm": 0.34632447361946106, "learning_rate": 0.00016135899868727172, "loss": 1.1606, "step": 14881 }, { "epoch": 0.19338480255602677, "grad_norm": 0.3805374801158905, "learning_rate": 0.00016135639922536038, "loss": 1.3975, "step": 14882 }, { "epoch": 0.19339779709994268, "grad_norm": 0.407209187746048, "learning_rate": 0.00016135379976344897, "loss": 1.5714, "step": 14883 }, { "epoch": 0.19341079164385855, "grad_norm": 0.4528035819530487, "learning_rate": 0.0001613512003015376, "loss": 1.3497, "step": 14884 }, { "epoch": 0.19342378618777442, "grad_norm": 0.5349000096321106, "learning_rate": 0.0001613486008396262, "loss": 1.5769, "step": 14885 }, { "epoch": 0.1934367807316903, "grad_norm": 0.3710574209690094, "learning_rate": 0.00016134600137771482, "loss": 1.4136, "step": 14886 }, { "epoch": 0.19344977527560617, "grad_norm": 0.3422017991542816, "learning_rate": 0.00016134340191580344, "loss": 1.2903, "step": 14887 }, { "epoch": 0.19346276981952204, "grad_norm": 0.37780115008354187, "learning_rate": 0.00016134080245389204, "loss": 1.2997, "step": 14888 }, { "epoch": 0.1934757643634379, "grad_norm": 0.41516053676605225, "learning_rate": 0.00016133820299198067, "loss": 1.3218, "step": 14889 }, { "epoch": 0.1934887589073538, "grad_norm": 0.28825294971466064, "learning_rate": 0.0001613356035300693, "loss": 1.45, "step": 14890 }, { "epoch": 0.19350175345126966, "grad_norm": 0.34397125244140625, "learning_rate": 0.0001613330040681579, "loss": 1.4276, "step": 14891 }, { "epoch": 0.19351474799518553, "grad_norm": 0.47288596630096436, "learning_rate": 0.0001613304046062465, "loss": 1.3474, "step": 14892 }, { "epoch": 0.1935277425391014, "grad_norm": 0.43025270104408264, "learning_rate": 0.0001613278051443351, "loss": 1.2521, "step": 14893 }, { "epoch": 0.19354073708301728, "grad_norm": 0.4127647876739502, "learning_rate": 0.00016132520568242376, "loss": 1.3128, "step": 14894 }, { "epoch": 0.19355373162693315, "grad_norm": 0.42666390538215637, "learning_rate": 0.00016132260622051236, "loss": 1.4336, "step": 14895 }, { "epoch": 0.19356672617084902, "grad_norm": 0.36437827348709106, "learning_rate": 0.00016132000675860098, "loss": 1.3463, "step": 14896 }, { "epoch": 0.1935797207147649, "grad_norm": 0.5230821371078491, "learning_rate": 0.00016131740729668958, "loss": 1.4383, "step": 14897 }, { "epoch": 0.19359271525868077, "grad_norm": 0.38183239102363586, "learning_rate": 0.0001613148078347782, "loss": 1.458, "step": 14898 }, { "epoch": 0.19360570980259664, "grad_norm": 0.3007412552833557, "learning_rate": 0.00016131220837286683, "loss": 1.4062, "step": 14899 }, { "epoch": 0.19361870434651252, "grad_norm": 0.41883182525634766, "learning_rate": 0.00016130960891095543, "loss": 1.4188, "step": 14900 }, { "epoch": 0.1936316988904284, "grad_norm": 0.39784562587738037, "learning_rate": 0.00016130700944904408, "loss": 1.3255, "step": 14901 }, { "epoch": 0.19364469343434426, "grad_norm": 0.4048933684825897, "learning_rate": 0.00016130440998713268, "loss": 1.4297, "step": 14902 }, { "epoch": 0.19365768797826013, "grad_norm": 0.47147148847579956, "learning_rate": 0.00016130181052522127, "loss": 1.4944, "step": 14903 }, { "epoch": 0.193670682522176, "grad_norm": 0.42690977454185486, "learning_rate": 0.0001612992110633099, "loss": 1.4924, "step": 14904 }, { "epoch": 0.19368367706609188, "grad_norm": 0.43294331431388855, "learning_rate": 0.00016129661160139852, "loss": 1.3688, "step": 14905 }, { "epoch": 0.19369667161000775, "grad_norm": 0.4685990810394287, "learning_rate": 0.00016129401213948715, "loss": 1.6216, "step": 14906 }, { "epoch": 0.19370966615392363, "grad_norm": 0.48875540494918823, "learning_rate": 0.00016129141267757574, "loss": 1.5112, "step": 14907 }, { "epoch": 0.1937226606978395, "grad_norm": 0.3637704849243164, "learning_rate": 0.00016128881321566437, "loss": 1.3556, "step": 14908 }, { "epoch": 0.19373565524175537, "grad_norm": 0.3244069516658783, "learning_rate": 0.000161286213753753, "loss": 1.323, "step": 14909 }, { "epoch": 0.19374864978567125, "grad_norm": 0.4190044105052948, "learning_rate": 0.0001612836142918416, "loss": 1.2397, "step": 14910 }, { "epoch": 0.19376164432958712, "grad_norm": 0.40611886978149414, "learning_rate": 0.00016128101482993021, "loss": 1.5755, "step": 14911 }, { "epoch": 0.193774638873503, "grad_norm": 0.47749456763267517, "learning_rate": 0.0001612784153680188, "loss": 1.595, "step": 14912 }, { "epoch": 0.19378763341741886, "grad_norm": 0.45079129934310913, "learning_rate": 0.00016127581590610746, "loss": 1.2838, "step": 14913 }, { "epoch": 0.19380062796133474, "grad_norm": 0.415567547082901, "learning_rate": 0.00016127321644419606, "loss": 1.4747, "step": 14914 }, { "epoch": 0.1938136225052506, "grad_norm": 0.3816990554332733, "learning_rate": 0.00016127061698228469, "loss": 1.397, "step": 14915 }, { "epoch": 0.19382661704916648, "grad_norm": 0.46385645866394043, "learning_rate": 0.00016126801752037328, "loss": 1.3613, "step": 14916 }, { "epoch": 0.19383961159308236, "grad_norm": 0.5078310370445251, "learning_rate": 0.0001612654180584619, "loss": 1.4508, "step": 14917 }, { "epoch": 0.19385260613699823, "grad_norm": 0.4516267776489258, "learning_rate": 0.00016126281859655053, "loss": 1.5428, "step": 14918 }, { "epoch": 0.1938656006809141, "grad_norm": 0.5169050097465515, "learning_rate": 0.00016126021913463913, "loss": 1.5714, "step": 14919 }, { "epoch": 0.19387859522482997, "grad_norm": 0.31570351123809814, "learning_rate": 0.00016125761967272775, "loss": 1.3927, "step": 14920 }, { "epoch": 0.19389158976874585, "grad_norm": 0.43506646156311035, "learning_rate": 0.00016125502021081638, "loss": 1.2868, "step": 14921 }, { "epoch": 0.19390458431266172, "grad_norm": 0.4190494120121002, "learning_rate": 0.00016125242074890498, "loss": 1.3835, "step": 14922 }, { "epoch": 0.1939175788565776, "grad_norm": 0.40744659304618835, "learning_rate": 0.0001612498212869936, "loss": 1.3033, "step": 14923 }, { "epoch": 0.19393057340049347, "grad_norm": 0.5022830963134766, "learning_rate": 0.0001612472218250822, "loss": 1.6206, "step": 14924 }, { "epoch": 0.19394356794440934, "grad_norm": 0.43683817982673645, "learning_rate": 0.00016124462236317085, "loss": 1.4002, "step": 14925 }, { "epoch": 0.1939565624883252, "grad_norm": 0.31244176626205444, "learning_rate": 0.00016124202290125945, "loss": 1.255, "step": 14926 }, { "epoch": 0.19396955703224109, "grad_norm": 0.44326674938201904, "learning_rate": 0.00016123942343934807, "loss": 1.6125, "step": 14927 }, { "epoch": 0.19398255157615696, "grad_norm": 0.3563145101070404, "learning_rate": 0.00016123682397743667, "loss": 1.3766, "step": 14928 }, { "epoch": 0.19399554612007283, "grad_norm": 0.395197331905365, "learning_rate": 0.0001612342245155253, "loss": 1.3243, "step": 14929 }, { "epoch": 0.1940085406639887, "grad_norm": 0.40622276067733765, "learning_rate": 0.00016123162505361392, "loss": 1.3819, "step": 14930 }, { "epoch": 0.19402153520790458, "grad_norm": 0.4030214846134186, "learning_rate": 0.00016122902559170251, "loss": 1.3286, "step": 14931 }, { "epoch": 0.19403452975182045, "grad_norm": 0.38832899928092957, "learning_rate": 0.00016122642612979114, "loss": 1.4969, "step": 14932 }, { "epoch": 0.19404752429573632, "grad_norm": 0.4044993817806244, "learning_rate": 0.00016122382666787976, "loss": 1.3746, "step": 14933 }, { "epoch": 0.1940605188396522, "grad_norm": 0.47294238209724426, "learning_rate": 0.00016122122720596836, "loss": 1.4719, "step": 14934 }, { "epoch": 0.19407351338356807, "grad_norm": 0.4514651894569397, "learning_rate": 0.00016121862774405699, "loss": 1.542, "step": 14935 }, { "epoch": 0.19408650792748394, "grad_norm": 0.41973602771759033, "learning_rate": 0.00016121602828214558, "loss": 1.4591, "step": 14936 }, { "epoch": 0.19409950247139982, "grad_norm": 0.42308345437049866, "learning_rate": 0.00016121342882023423, "loss": 1.3356, "step": 14937 }, { "epoch": 0.1941124970153157, "grad_norm": 0.4389118254184723, "learning_rate": 0.00016121082935832283, "loss": 1.3293, "step": 14938 }, { "epoch": 0.19412549155923156, "grad_norm": 0.3561875522136688, "learning_rate": 0.00016120822989641146, "loss": 1.6497, "step": 14939 }, { "epoch": 0.19413848610314743, "grad_norm": 0.4700963497161865, "learning_rate": 0.00016120563043450008, "loss": 1.5215, "step": 14940 }, { "epoch": 0.1941514806470633, "grad_norm": 0.2432563751935959, "learning_rate": 0.00016120303097258868, "loss": 1.241, "step": 14941 }, { "epoch": 0.19416447519097918, "grad_norm": 0.4091291129589081, "learning_rate": 0.0001612004315106773, "loss": 1.3623, "step": 14942 }, { "epoch": 0.19417746973489505, "grad_norm": 0.4197945296764374, "learning_rate": 0.0001611978320487659, "loss": 1.3261, "step": 14943 }, { "epoch": 0.19419046427881093, "grad_norm": 0.4064938724040985, "learning_rate": 0.00016119523258685455, "loss": 1.3506, "step": 14944 }, { "epoch": 0.1942034588227268, "grad_norm": 0.5298076868057251, "learning_rate": 0.00016119263312494315, "loss": 1.5875, "step": 14945 }, { "epoch": 0.19421645336664267, "grad_norm": 0.39641743898391724, "learning_rate": 0.00016119003366303175, "loss": 1.3535, "step": 14946 }, { "epoch": 0.19422944791055854, "grad_norm": 0.3830353319644928, "learning_rate": 0.00016118743420112037, "loss": 1.3586, "step": 14947 }, { "epoch": 0.19424244245447442, "grad_norm": 0.3914673328399658, "learning_rate": 0.000161184834739209, "loss": 1.4979, "step": 14948 }, { "epoch": 0.1942554369983903, "grad_norm": 0.36403918266296387, "learning_rate": 0.00016118223527729762, "loss": 1.5723, "step": 14949 }, { "epoch": 0.19426843154230616, "grad_norm": 0.3627685010433197, "learning_rate": 0.00016117963581538622, "loss": 1.5653, "step": 14950 }, { "epoch": 0.19428142608622204, "grad_norm": 0.3853529095649719, "learning_rate": 0.00016117703635347484, "loss": 1.4284, "step": 14951 }, { "epoch": 0.1942944206301379, "grad_norm": 0.38471460342407227, "learning_rate": 0.00016117443689156347, "loss": 1.4382, "step": 14952 }, { "epoch": 0.19430741517405378, "grad_norm": 0.4330098330974579, "learning_rate": 0.00016117183742965206, "loss": 1.404, "step": 14953 }, { "epoch": 0.19432040971796966, "grad_norm": 0.49237698316574097, "learning_rate": 0.0001611692379677407, "loss": 1.5298, "step": 14954 }, { "epoch": 0.19433340426188553, "grad_norm": 0.42953816056251526, "learning_rate": 0.00016116663850582929, "loss": 1.4421, "step": 14955 }, { "epoch": 0.1943463988058014, "grad_norm": 0.31219932436943054, "learning_rate": 0.00016116403904391794, "loss": 1.3428, "step": 14956 }, { "epoch": 0.19435939334971727, "grad_norm": 0.43414148688316345, "learning_rate": 0.00016116143958200653, "loss": 1.3496, "step": 14957 }, { "epoch": 0.19437238789363315, "grad_norm": 0.3261825740337372, "learning_rate": 0.00016115884012009513, "loss": 1.1845, "step": 14958 }, { "epoch": 0.19438538243754902, "grad_norm": 0.4357633590698242, "learning_rate": 0.00016115624065818376, "loss": 1.4187, "step": 14959 }, { "epoch": 0.19439837698146492, "grad_norm": 0.34273025393486023, "learning_rate": 0.00016115364119627238, "loss": 1.4127, "step": 14960 }, { "epoch": 0.1944113715253808, "grad_norm": 0.45404061675071716, "learning_rate": 0.000161151041734361, "loss": 1.5838, "step": 14961 }, { "epoch": 0.19442436606929667, "grad_norm": 0.4121553897857666, "learning_rate": 0.0001611484422724496, "loss": 1.4094, "step": 14962 }, { "epoch": 0.19443736061321254, "grad_norm": 0.5067926645278931, "learning_rate": 0.00016114584281053823, "loss": 1.4486, "step": 14963 }, { "epoch": 0.1944503551571284, "grad_norm": 0.4152904450893402, "learning_rate": 0.00016114324334862685, "loss": 1.3304, "step": 14964 }, { "epoch": 0.19446334970104429, "grad_norm": 0.4469233751296997, "learning_rate": 0.00016114064388671545, "loss": 1.4429, "step": 14965 }, { "epoch": 0.19447634424496016, "grad_norm": 0.38391441106796265, "learning_rate": 0.00016113804442480407, "loss": 1.2534, "step": 14966 }, { "epoch": 0.19448933878887603, "grad_norm": 0.39998140931129456, "learning_rate": 0.00016113544496289267, "loss": 1.3039, "step": 14967 }, { "epoch": 0.1945023333327919, "grad_norm": 0.38620835542678833, "learning_rate": 0.00016113284550098132, "loss": 1.4754, "step": 14968 }, { "epoch": 0.19451532787670778, "grad_norm": 0.39291083812713623, "learning_rate": 0.00016113024603906992, "loss": 1.4612, "step": 14969 }, { "epoch": 0.19452832242062365, "grad_norm": 0.4708918035030365, "learning_rate": 0.00016112764657715852, "loss": 1.358, "step": 14970 }, { "epoch": 0.19454131696453952, "grad_norm": 0.42590638995170593, "learning_rate": 0.00016112504711524714, "loss": 1.2989, "step": 14971 }, { "epoch": 0.1945543115084554, "grad_norm": 0.37809309363365173, "learning_rate": 0.00016112244765333577, "loss": 1.8283, "step": 14972 }, { "epoch": 0.19456730605237127, "grad_norm": 0.3768952488899231, "learning_rate": 0.0001611198481914244, "loss": 1.4062, "step": 14973 }, { "epoch": 0.19458030059628714, "grad_norm": 0.3749127984046936, "learning_rate": 0.000161117248729513, "loss": 1.4083, "step": 14974 }, { "epoch": 0.19459329514020302, "grad_norm": 0.4495859146118164, "learning_rate": 0.0001611146492676016, "loss": 1.5011, "step": 14975 }, { "epoch": 0.1946062896841189, "grad_norm": 0.31008484959602356, "learning_rate": 0.00016111204980569024, "loss": 1.3625, "step": 14976 }, { "epoch": 0.19461928422803476, "grad_norm": 0.3724196255207062, "learning_rate": 0.00016110945034377883, "loss": 1.4536, "step": 14977 }, { "epoch": 0.19463227877195063, "grad_norm": 0.41101840138435364, "learning_rate": 0.00016110685088186746, "loss": 1.4428, "step": 14978 }, { "epoch": 0.1946452733158665, "grad_norm": 0.41465938091278076, "learning_rate": 0.00016110425141995608, "loss": 1.3214, "step": 14979 }, { "epoch": 0.19465826785978238, "grad_norm": 0.3552533686161041, "learning_rate": 0.0001611016519580447, "loss": 1.3875, "step": 14980 }, { "epoch": 0.19467126240369825, "grad_norm": 0.3392327129840851, "learning_rate": 0.0001610990524961333, "loss": 1.503, "step": 14981 }, { "epoch": 0.19468425694761413, "grad_norm": 0.4082401990890503, "learning_rate": 0.00016109645303422193, "loss": 1.4145, "step": 14982 }, { "epoch": 0.19469725149153, "grad_norm": 0.29725563526153564, "learning_rate": 0.00016109385357231055, "loss": 1.3869, "step": 14983 }, { "epoch": 0.19471024603544587, "grad_norm": 0.4168282151222229, "learning_rate": 0.00016109125411039915, "loss": 1.4667, "step": 14984 }, { "epoch": 0.19472324057936174, "grad_norm": 0.3943009078502655, "learning_rate": 0.00016108865464848778, "loss": 1.4355, "step": 14985 }, { "epoch": 0.19473623512327762, "grad_norm": 0.4274914264678955, "learning_rate": 0.00016108605518657637, "loss": 1.5612, "step": 14986 }, { "epoch": 0.1947492296671935, "grad_norm": 0.44223588705062866, "learning_rate": 0.000161083455724665, "loss": 1.4419, "step": 14987 }, { "epoch": 0.19476222421110936, "grad_norm": 0.41913723945617676, "learning_rate": 0.00016108085626275362, "loss": 1.464, "step": 14988 }, { "epoch": 0.19477521875502524, "grad_norm": 0.4552180767059326, "learning_rate": 0.00016107825680084222, "loss": 1.4621, "step": 14989 }, { "epoch": 0.1947882132989411, "grad_norm": 0.43758639693260193, "learning_rate": 0.00016107565733893084, "loss": 1.5732, "step": 14990 }, { "epoch": 0.19480120784285698, "grad_norm": 0.41083887219429016, "learning_rate": 0.00016107305787701947, "loss": 1.5109, "step": 14991 }, { "epoch": 0.19481420238677286, "grad_norm": 0.4736224412918091, "learning_rate": 0.0001610704584151081, "loss": 1.6214, "step": 14992 }, { "epoch": 0.19482719693068873, "grad_norm": 0.38998010754585266, "learning_rate": 0.0001610678589531967, "loss": 1.4728, "step": 14993 }, { "epoch": 0.1948401914746046, "grad_norm": 0.4466601312160492, "learning_rate": 0.00016106525949128532, "loss": 1.4204, "step": 14994 }, { "epoch": 0.19485318601852047, "grad_norm": 0.369575172662735, "learning_rate": 0.00016106266002937394, "loss": 1.2967, "step": 14995 }, { "epoch": 0.19486618056243635, "grad_norm": 0.3789312243461609, "learning_rate": 0.00016106006056746254, "loss": 1.3714, "step": 14996 }, { "epoch": 0.19487917510635222, "grad_norm": 0.4120161831378937, "learning_rate": 0.00016105746110555116, "loss": 1.3069, "step": 14997 }, { "epoch": 0.1948921696502681, "grad_norm": 0.44996580481529236, "learning_rate": 0.00016105486164363976, "loss": 1.3407, "step": 14998 }, { "epoch": 0.19490516419418397, "grad_norm": 0.367512583732605, "learning_rate": 0.0001610522621817284, "loss": 1.4125, "step": 14999 }, { "epoch": 0.19491815873809984, "grad_norm": 0.4112405776977539, "learning_rate": 0.000161049662719817, "loss": 1.3201, "step": 15000 }, { "epoch": 0.1949311532820157, "grad_norm": 0.29522496461868286, "learning_rate": 0.0001610470632579056, "loss": 1.453, "step": 15001 }, { "epoch": 0.19494414782593159, "grad_norm": 0.3313886225223541, "learning_rate": 0.00016104446379599423, "loss": 1.2701, "step": 15002 }, { "epoch": 0.19495714236984746, "grad_norm": 0.4674217998981476, "learning_rate": 0.00016104186433408285, "loss": 1.4279, "step": 15003 }, { "epoch": 0.19497013691376333, "grad_norm": 0.3128993809223175, "learning_rate": 0.00016103926487217148, "loss": 1.4752, "step": 15004 }, { "epoch": 0.1949831314576792, "grad_norm": 0.37602829933166504, "learning_rate": 0.00016103666541026008, "loss": 1.5826, "step": 15005 }, { "epoch": 0.19499612600159508, "grad_norm": 0.4832821488380432, "learning_rate": 0.0001610340659483487, "loss": 1.4449, "step": 15006 }, { "epoch": 0.19500912054551095, "grad_norm": 0.4294029772281647, "learning_rate": 0.00016103146648643733, "loss": 1.586, "step": 15007 }, { "epoch": 0.19502211508942682, "grad_norm": 0.31111422181129456, "learning_rate": 0.00016102886702452592, "loss": 1.319, "step": 15008 }, { "epoch": 0.1950351096333427, "grad_norm": 0.46707379817962646, "learning_rate": 0.00016102626756261455, "loss": 1.3575, "step": 15009 }, { "epoch": 0.19504810417725857, "grad_norm": 0.35692548751831055, "learning_rate": 0.00016102366810070314, "loss": 1.3841, "step": 15010 }, { "epoch": 0.19506109872117444, "grad_norm": 0.4133400022983551, "learning_rate": 0.0001610210686387918, "loss": 1.5599, "step": 15011 }, { "epoch": 0.19507409326509031, "grad_norm": 0.3109988272190094, "learning_rate": 0.0001610184691768804, "loss": 1.3967, "step": 15012 }, { "epoch": 0.1950870878090062, "grad_norm": 0.49542033672332764, "learning_rate": 0.000161015869714969, "loss": 1.5919, "step": 15013 }, { "epoch": 0.19510008235292206, "grad_norm": 0.40462857484817505, "learning_rate": 0.00016101327025305764, "loss": 1.2506, "step": 15014 }, { "epoch": 0.19511307689683793, "grad_norm": 0.4302003085613251, "learning_rate": 0.00016101067079114624, "loss": 1.4859, "step": 15015 }, { "epoch": 0.1951260714407538, "grad_norm": 0.38717302680015564, "learning_rate": 0.00016100807132923486, "loss": 1.2318, "step": 15016 }, { "epoch": 0.19513906598466968, "grad_norm": 0.3128765821456909, "learning_rate": 0.00016100547186732346, "loss": 1.2826, "step": 15017 }, { "epoch": 0.19515206052858555, "grad_norm": 0.45582106709480286, "learning_rate": 0.00016100287240541209, "loss": 1.3975, "step": 15018 }, { "epoch": 0.19516505507250143, "grad_norm": 0.37630948424339294, "learning_rate": 0.0001610002729435007, "loss": 1.3727, "step": 15019 }, { "epoch": 0.1951780496164173, "grad_norm": 0.4631521701812744, "learning_rate": 0.0001609976734815893, "loss": 1.5121, "step": 15020 }, { "epoch": 0.19519104416033317, "grad_norm": 0.4156006872653961, "learning_rate": 0.00016099507401967793, "loss": 1.3354, "step": 15021 }, { "epoch": 0.19520403870424904, "grad_norm": 0.4105610251426697, "learning_rate": 0.00016099247455776656, "loss": 1.7009, "step": 15022 }, { "epoch": 0.19521703324816492, "grad_norm": 0.46963226795196533, "learning_rate": 0.00016098987509585518, "loss": 1.528, "step": 15023 }, { "epoch": 0.1952300277920808, "grad_norm": 0.4761582612991333, "learning_rate": 0.00016098727563394378, "loss": 1.3929, "step": 15024 }, { "epoch": 0.19524302233599666, "grad_norm": 0.41948556900024414, "learning_rate": 0.00016098467617203238, "loss": 1.2504, "step": 15025 }, { "epoch": 0.19525601687991254, "grad_norm": 0.3190854787826538, "learning_rate": 0.00016098207671012103, "loss": 1.3906, "step": 15026 }, { "epoch": 0.1952690114238284, "grad_norm": 0.46148914098739624, "learning_rate": 0.00016097947724820963, "loss": 1.4814, "step": 15027 }, { "epoch": 0.19528200596774428, "grad_norm": 0.33796706795692444, "learning_rate": 0.00016097687778629825, "loss": 1.2624, "step": 15028 }, { "epoch": 0.19529500051166016, "grad_norm": 0.37835144996643066, "learning_rate": 0.00016097427832438685, "loss": 1.419, "step": 15029 }, { "epoch": 0.19530799505557603, "grad_norm": 0.3936806619167328, "learning_rate": 0.00016097167886247547, "loss": 1.423, "step": 15030 }, { "epoch": 0.1953209895994919, "grad_norm": 0.48103076219558716, "learning_rate": 0.0001609690794005641, "loss": 1.555, "step": 15031 }, { "epoch": 0.19533398414340777, "grad_norm": 0.4297008216381073, "learning_rate": 0.0001609664799386527, "loss": 1.5039, "step": 15032 }, { "epoch": 0.19534697868732365, "grad_norm": 0.5032608509063721, "learning_rate": 0.00016096388047674132, "loss": 1.6337, "step": 15033 }, { "epoch": 0.19535997323123952, "grad_norm": 0.3664565086364746, "learning_rate": 0.00016096128101482994, "loss": 1.5047, "step": 15034 }, { "epoch": 0.1953729677751554, "grad_norm": 0.32763874530792236, "learning_rate": 0.00016095868155291857, "loss": 1.2648, "step": 15035 }, { "epoch": 0.1953859623190713, "grad_norm": 0.39580076932907104, "learning_rate": 0.00016095608209100716, "loss": 1.385, "step": 15036 }, { "epoch": 0.19539895686298717, "grad_norm": 0.3973984122276306, "learning_rate": 0.0001609534826290958, "loss": 1.3607, "step": 15037 }, { "epoch": 0.19541195140690304, "grad_norm": 0.3765105903148651, "learning_rate": 0.0001609508831671844, "loss": 1.4883, "step": 15038 }, { "epoch": 0.1954249459508189, "grad_norm": 0.40490561723709106, "learning_rate": 0.000160948283705273, "loss": 1.4128, "step": 15039 }, { "epoch": 0.19543794049473479, "grad_norm": 0.37630191445350647, "learning_rate": 0.00016094568424336163, "loss": 1.3293, "step": 15040 }, { "epoch": 0.19545093503865066, "grad_norm": 0.36157554388046265, "learning_rate": 0.00016094308478145023, "loss": 1.2252, "step": 15041 }, { "epoch": 0.19546392958256653, "grad_norm": 0.4231138825416565, "learning_rate": 0.00016094048531953886, "loss": 1.5418, "step": 15042 }, { "epoch": 0.1954769241264824, "grad_norm": 0.38538050651550293, "learning_rate": 0.00016093788585762748, "loss": 1.5232, "step": 15043 }, { "epoch": 0.19548991867039828, "grad_norm": 0.47394877672195435, "learning_rate": 0.00016093528639571608, "loss": 1.3685, "step": 15044 }, { "epoch": 0.19550291321431415, "grad_norm": 0.4126327931880951, "learning_rate": 0.0001609326869338047, "loss": 1.575, "step": 15045 }, { "epoch": 0.19551590775823002, "grad_norm": 0.5006478428840637, "learning_rate": 0.00016093008747189333, "loss": 1.6771, "step": 15046 }, { "epoch": 0.1955289023021459, "grad_norm": 0.3928852081298828, "learning_rate": 0.00016092748800998195, "loss": 1.314, "step": 15047 }, { "epoch": 0.19554189684606177, "grad_norm": 0.4020337462425232, "learning_rate": 0.00016092488854807055, "loss": 1.4295, "step": 15048 }, { "epoch": 0.19555489138997764, "grad_norm": 0.3356950581073761, "learning_rate": 0.00016092228908615917, "loss": 1.5733, "step": 15049 }, { "epoch": 0.19556788593389351, "grad_norm": 0.4210571348667145, "learning_rate": 0.0001609196896242478, "loss": 1.4687, "step": 15050 }, { "epoch": 0.1955808804778094, "grad_norm": 0.41777604818344116, "learning_rate": 0.0001609170901623364, "loss": 1.2899, "step": 15051 }, { "epoch": 0.19559387502172526, "grad_norm": 0.32231879234313965, "learning_rate": 0.00016091449070042502, "loss": 1.1813, "step": 15052 }, { "epoch": 0.19560686956564113, "grad_norm": 0.4138682186603546, "learning_rate": 0.00016091189123851364, "loss": 1.5524, "step": 15053 }, { "epoch": 0.195619864109557, "grad_norm": 0.37750229239463806, "learning_rate": 0.00016090929177660224, "loss": 1.3417, "step": 15054 }, { "epoch": 0.19563285865347288, "grad_norm": 0.40623587369918823, "learning_rate": 0.00016090669231469087, "loss": 1.391, "step": 15055 }, { "epoch": 0.19564585319738875, "grad_norm": 0.5168881416320801, "learning_rate": 0.00016090409285277946, "loss": 1.4582, "step": 15056 }, { "epoch": 0.19565884774130463, "grad_norm": 0.49380379915237427, "learning_rate": 0.00016090149339086812, "loss": 1.5014, "step": 15057 }, { "epoch": 0.1956718422852205, "grad_norm": 0.4175034761428833, "learning_rate": 0.0001608988939289567, "loss": 1.3414, "step": 15058 }, { "epoch": 0.19568483682913637, "grad_norm": 0.32605743408203125, "learning_rate": 0.00016089629446704534, "loss": 1.2939, "step": 15059 }, { "epoch": 0.19569783137305224, "grad_norm": 0.39813730120658875, "learning_rate": 0.00016089369500513393, "loss": 1.6173, "step": 15060 }, { "epoch": 0.19571082591696812, "grad_norm": 0.41415518522262573, "learning_rate": 0.00016089109554322256, "loss": 1.4739, "step": 15061 }, { "epoch": 0.195723820460884, "grad_norm": 0.5410938262939453, "learning_rate": 0.00016088849608131118, "loss": 1.4342, "step": 15062 }, { "epoch": 0.19573681500479986, "grad_norm": 0.4245575964450836, "learning_rate": 0.00016088589661939978, "loss": 1.4338, "step": 15063 }, { "epoch": 0.19574980954871574, "grad_norm": 0.4145648777484894, "learning_rate": 0.0001608832971574884, "loss": 1.5126, "step": 15064 }, { "epoch": 0.1957628040926316, "grad_norm": 0.4183512330055237, "learning_rate": 0.00016088069769557703, "loss": 1.4174, "step": 15065 }, { "epoch": 0.19577579863654748, "grad_norm": 0.30564042925834656, "learning_rate": 0.00016087809823366565, "loss": 1.4225, "step": 15066 }, { "epoch": 0.19578879318046336, "grad_norm": 0.3914017677307129, "learning_rate": 0.00016087549877175425, "loss": 1.4402, "step": 15067 }, { "epoch": 0.19580178772437923, "grad_norm": 0.4162934422492981, "learning_rate": 0.00016087289930984285, "loss": 1.3736, "step": 15068 }, { "epoch": 0.1958147822682951, "grad_norm": 0.40347203612327576, "learning_rate": 0.0001608702998479315, "loss": 1.3863, "step": 15069 }, { "epoch": 0.19582777681221097, "grad_norm": 0.30318590998649597, "learning_rate": 0.0001608677003860201, "loss": 1.2831, "step": 15070 }, { "epoch": 0.19584077135612685, "grad_norm": 0.2869959771633148, "learning_rate": 0.00016086510092410872, "loss": 1.3392, "step": 15071 }, { "epoch": 0.19585376590004272, "grad_norm": 0.3996869921684265, "learning_rate": 0.00016086250146219732, "loss": 1.3383, "step": 15072 }, { "epoch": 0.1958667604439586, "grad_norm": 0.43461596965789795, "learning_rate": 0.00016085990200028594, "loss": 1.436, "step": 15073 }, { "epoch": 0.19587975498787447, "grad_norm": 0.4679774343967438, "learning_rate": 0.00016085730253837457, "loss": 1.5102, "step": 15074 }, { "epoch": 0.19589274953179034, "grad_norm": 0.4502407908439636, "learning_rate": 0.00016085470307646317, "loss": 1.5869, "step": 15075 }, { "epoch": 0.1959057440757062, "grad_norm": 0.40007972717285156, "learning_rate": 0.0001608521036145518, "loss": 1.4142, "step": 15076 }, { "epoch": 0.19591873861962208, "grad_norm": 0.4622056186199188, "learning_rate": 0.00016084950415264042, "loss": 1.4487, "step": 15077 }, { "epoch": 0.19593173316353796, "grad_norm": 0.40014514327049255, "learning_rate": 0.00016084690469072904, "loss": 1.4344, "step": 15078 }, { "epoch": 0.19594472770745383, "grad_norm": 0.4946998655796051, "learning_rate": 0.00016084430522881764, "loss": 1.4279, "step": 15079 }, { "epoch": 0.1959577222513697, "grad_norm": 0.40331342816352844, "learning_rate": 0.00016084170576690623, "loss": 1.2849, "step": 15080 }, { "epoch": 0.19597071679528558, "grad_norm": 0.37406259775161743, "learning_rate": 0.0001608391063049949, "loss": 1.3799, "step": 15081 }, { "epoch": 0.19598371133920145, "grad_norm": 0.40779539942741394, "learning_rate": 0.00016083650684308348, "loss": 1.4894, "step": 15082 }, { "epoch": 0.19599670588311732, "grad_norm": 0.43439584970474243, "learning_rate": 0.0001608339073811721, "loss": 1.3144, "step": 15083 }, { "epoch": 0.1960097004270332, "grad_norm": 0.371105432510376, "learning_rate": 0.0001608313079192607, "loss": 1.3048, "step": 15084 }, { "epoch": 0.19602269497094907, "grad_norm": 0.4025285840034485, "learning_rate": 0.00016082870845734933, "loss": 1.3729, "step": 15085 }, { "epoch": 0.19603568951486494, "grad_norm": 0.28473252058029175, "learning_rate": 0.00016082610899543795, "loss": 1.4099, "step": 15086 }, { "epoch": 0.19604868405878081, "grad_norm": 0.5011013150215149, "learning_rate": 0.00016082350953352655, "loss": 1.6652, "step": 15087 }, { "epoch": 0.1960616786026967, "grad_norm": 0.4537227749824524, "learning_rate": 0.0001608209100716152, "loss": 1.6578, "step": 15088 }, { "epoch": 0.19607467314661256, "grad_norm": 0.3511347472667694, "learning_rate": 0.0001608183106097038, "loss": 1.3642, "step": 15089 }, { "epoch": 0.19608766769052843, "grad_norm": 0.26621341705322266, "learning_rate": 0.00016081571114779243, "loss": 1.1196, "step": 15090 }, { "epoch": 0.1961006622344443, "grad_norm": 0.41732555627822876, "learning_rate": 0.00016081311168588102, "loss": 1.4199, "step": 15091 }, { "epoch": 0.19611365677836018, "grad_norm": 0.36707526445388794, "learning_rate": 0.00016081051222396965, "loss": 1.3991, "step": 15092 }, { "epoch": 0.19612665132227605, "grad_norm": 0.4805380403995514, "learning_rate": 0.00016080791276205827, "loss": 1.4135, "step": 15093 }, { "epoch": 0.19613964586619193, "grad_norm": 0.4907483160495758, "learning_rate": 0.00016080531330014687, "loss": 1.4601, "step": 15094 }, { "epoch": 0.1961526404101078, "grad_norm": 0.419883131980896, "learning_rate": 0.0001608027138382355, "loss": 1.5845, "step": 15095 }, { "epoch": 0.19616563495402367, "grad_norm": 0.4388166666030884, "learning_rate": 0.00016080011437632412, "loss": 1.3623, "step": 15096 }, { "epoch": 0.19617862949793954, "grad_norm": 0.3660356402397156, "learning_rate": 0.00016079751491441272, "loss": 1.5341, "step": 15097 }, { "epoch": 0.19619162404185542, "grad_norm": 0.39928776025772095, "learning_rate": 0.00016079491545250134, "loss": 1.5847, "step": 15098 }, { "epoch": 0.1962046185857713, "grad_norm": 0.3672029674053192, "learning_rate": 0.00016079231599058994, "loss": 1.3771, "step": 15099 }, { "epoch": 0.19621761312968716, "grad_norm": 0.3349177837371826, "learning_rate": 0.0001607897165286786, "loss": 1.6002, "step": 15100 }, { "epoch": 0.19623060767360304, "grad_norm": 0.4589473009109497, "learning_rate": 0.00016078711706676719, "loss": 1.4055, "step": 15101 }, { "epoch": 0.1962436022175189, "grad_norm": 0.4271509051322937, "learning_rate": 0.0001607845176048558, "loss": 1.3003, "step": 15102 }, { "epoch": 0.19625659676143478, "grad_norm": 0.3963555693626404, "learning_rate": 0.0001607819181429444, "loss": 1.5422, "step": 15103 }, { "epoch": 0.19626959130535065, "grad_norm": 0.4377681016921997, "learning_rate": 0.00016077931868103303, "loss": 1.3185, "step": 15104 }, { "epoch": 0.19628258584926653, "grad_norm": 0.3426659405231476, "learning_rate": 0.00016077671921912166, "loss": 1.3945, "step": 15105 }, { "epoch": 0.1962955803931824, "grad_norm": 0.3157619833946228, "learning_rate": 0.00016077411975721025, "loss": 1.2127, "step": 15106 }, { "epoch": 0.19630857493709827, "grad_norm": 0.40778079628944397, "learning_rate": 0.00016077152029529888, "loss": 1.5935, "step": 15107 }, { "epoch": 0.19632156948101415, "grad_norm": 0.2817988097667694, "learning_rate": 0.0001607689208333875, "loss": 1.173, "step": 15108 }, { "epoch": 0.19633456402493002, "grad_norm": 0.44415712356567383, "learning_rate": 0.0001607663213714761, "loss": 1.5046, "step": 15109 }, { "epoch": 0.1963475585688459, "grad_norm": 0.36379292607307434, "learning_rate": 0.00016076372190956473, "loss": 1.4882, "step": 15110 }, { "epoch": 0.19636055311276177, "grad_norm": 0.35104623436927795, "learning_rate": 0.00016076112244765332, "loss": 1.4546, "step": 15111 }, { "epoch": 0.19637354765667767, "grad_norm": 0.31093838810920715, "learning_rate": 0.00016075852298574197, "loss": 1.2872, "step": 15112 }, { "epoch": 0.19638654220059354, "grad_norm": 0.38891029357910156, "learning_rate": 0.00016075592352383057, "loss": 1.5188, "step": 15113 }, { "epoch": 0.1963995367445094, "grad_norm": 0.40772297978401184, "learning_rate": 0.0001607533240619192, "loss": 1.4282, "step": 15114 }, { "epoch": 0.19641253128842528, "grad_norm": 0.3571866750717163, "learning_rate": 0.0001607507246000078, "loss": 1.206, "step": 15115 }, { "epoch": 0.19642552583234116, "grad_norm": 0.535038948059082, "learning_rate": 0.00016074812513809642, "loss": 1.4726, "step": 15116 }, { "epoch": 0.19643852037625703, "grad_norm": 0.4346778094768524, "learning_rate": 0.00016074552567618504, "loss": 1.3508, "step": 15117 }, { "epoch": 0.1964515149201729, "grad_norm": 0.4804379940032959, "learning_rate": 0.00016074292621427364, "loss": 1.3864, "step": 15118 }, { "epoch": 0.19646450946408878, "grad_norm": 0.4321335256099701, "learning_rate": 0.00016074032675236226, "loss": 1.2403, "step": 15119 }, { "epoch": 0.19647750400800465, "grad_norm": 0.49203550815582275, "learning_rate": 0.0001607377272904509, "loss": 1.5687, "step": 15120 }, { "epoch": 0.19649049855192052, "grad_norm": 0.39814361929893494, "learning_rate": 0.0001607351278285395, "loss": 1.4176, "step": 15121 }, { "epoch": 0.1965034930958364, "grad_norm": 0.4023113548755646, "learning_rate": 0.0001607325283666281, "loss": 1.4427, "step": 15122 }, { "epoch": 0.19651648763975227, "grad_norm": 0.3334875702857971, "learning_rate": 0.00016072992890471674, "loss": 1.4648, "step": 15123 }, { "epoch": 0.19652948218366814, "grad_norm": 0.4837316572666168, "learning_rate": 0.00016072732944280536, "loss": 1.4269, "step": 15124 }, { "epoch": 0.19654247672758401, "grad_norm": 0.33407703042030334, "learning_rate": 0.00016072472998089396, "loss": 1.2473, "step": 15125 }, { "epoch": 0.1965554712714999, "grad_norm": 0.4090714454650879, "learning_rate": 0.00016072213051898258, "loss": 1.4877, "step": 15126 }, { "epoch": 0.19656846581541576, "grad_norm": 0.2927691340446472, "learning_rate": 0.0001607195310570712, "loss": 1.2998, "step": 15127 }, { "epoch": 0.19658146035933163, "grad_norm": 0.465712308883667, "learning_rate": 0.0001607169315951598, "loss": 1.3434, "step": 15128 }, { "epoch": 0.1965944549032475, "grad_norm": 0.3964065611362457, "learning_rate": 0.00016071433213324843, "loss": 1.5113, "step": 15129 }, { "epoch": 0.19660744944716338, "grad_norm": 0.37474626302719116, "learning_rate": 0.00016071173267133703, "loss": 1.3561, "step": 15130 }, { "epoch": 0.19662044399107925, "grad_norm": 0.31888917088508606, "learning_rate": 0.00016070913320942568, "loss": 1.2511, "step": 15131 }, { "epoch": 0.19663343853499513, "grad_norm": 0.37481632828712463, "learning_rate": 0.00016070653374751427, "loss": 1.2369, "step": 15132 }, { "epoch": 0.196646433078911, "grad_norm": 0.35175007581710815, "learning_rate": 0.0001607039342856029, "loss": 1.2775, "step": 15133 }, { "epoch": 0.19665942762282687, "grad_norm": 0.4542943239212036, "learning_rate": 0.0001607013348236915, "loss": 1.2889, "step": 15134 }, { "epoch": 0.19667242216674274, "grad_norm": 0.36963728070259094, "learning_rate": 0.00016069873536178012, "loss": 1.3964, "step": 15135 }, { "epoch": 0.19668541671065862, "grad_norm": 0.417575478553772, "learning_rate": 0.00016069613589986875, "loss": 1.6066, "step": 15136 }, { "epoch": 0.1966984112545745, "grad_norm": 0.3544687032699585, "learning_rate": 0.00016069353643795734, "loss": 1.3855, "step": 15137 }, { "epoch": 0.19671140579849036, "grad_norm": 0.3952132761478424, "learning_rate": 0.00016069093697604597, "loss": 1.5143, "step": 15138 }, { "epoch": 0.19672440034240624, "grad_norm": 0.3200976848602295, "learning_rate": 0.0001606883375141346, "loss": 1.2969, "step": 15139 }, { "epoch": 0.1967373948863221, "grad_norm": 0.34209194779396057, "learning_rate": 0.0001606857380522232, "loss": 1.1786, "step": 15140 }, { "epoch": 0.19675038943023798, "grad_norm": 0.3913453221321106, "learning_rate": 0.0001606831385903118, "loss": 1.3601, "step": 15141 }, { "epoch": 0.19676338397415385, "grad_norm": 0.342191606760025, "learning_rate": 0.0001606805391284004, "loss": 1.3319, "step": 15142 }, { "epoch": 0.19677637851806973, "grad_norm": 0.43631264567375183, "learning_rate": 0.00016067793966648906, "loss": 1.4964, "step": 15143 }, { "epoch": 0.1967893730619856, "grad_norm": 0.37610581517219543, "learning_rate": 0.00016067534020457766, "loss": 1.4363, "step": 15144 }, { "epoch": 0.19680236760590147, "grad_norm": 0.33718085289001465, "learning_rate": 0.00016067274074266628, "loss": 1.5437, "step": 15145 }, { "epoch": 0.19681536214981735, "grad_norm": 0.3893381655216217, "learning_rate": 0.00016067014128075488, "loss": 1.331, "step": 15146 }, { "epoch": 0.19682835669373322, "grad_norm": 0.40297672152519226, "learning_rate": 0.0001606675418188435, "loss": 1.3131, "step": 15147 }, { "epoch": 0.1968413512376491, "grad_norm": 0.4165307879447937, "learning_rate": 0.00016066494235693213, "loss": 1.3983, "step": 15148 }, { "epoch": 0.19685434578156497, "grad_norm": 0.38147854804992676, "learning_rate": 0.00016066234289502073, "loss": 1.3775, "step": 15149 }, { "epoch": 0.19686734032548084, "grad_norm": 0.3278824985027313, "learning_rate": 0.00016065974343310935, "loss": 1.4299, "step": 15150 }, { "epoch": 0.1968803348693967, "grad_norm": 0.38771364092826843, "learning_rate": 0.00016065714397119798, "loss": 1.3929, "step": 15151 }, { "epoch": 0.19689332941331258, "grad_norm": 0.4045740067958832, "learning_rate": 0.00016065454450928657, "loss": 1.2267, "step": 15152 }, { "epoch": 0.19690632395722846, "grad_norm": 0.3722262978553772, "learning_rate": 0.0001606519450473752, "loss": 1.2616, "step": 15153 }, { "epoch": 0.19691931850114433, "grad_norm": 0.35703858733177185, "learning_rate": 0.0001606493455854638, "loss": 1.3488, "step": 15154 }, { "epoch": 0.1969323130450602, "grad_norm": 0.3989024758338928, "learning_rate": 0.00016064674612355245, "loss": 1.5359, "step": 15155 }, { "epoch": 0.19694530758897608, "grad_norm": 0.4781392216682434, "learning_rate": 0.00016064414666164105, "loss": 1.5791, "step": 15156 }, { "epoch": 0.19695830213289195, "grad_norm": 0.4096522331237793, "learning_rate": 0.00016064154719972967, "loss": 1.4269, "step": 15157 }, { "epoch": 0.19697129667680782, "grad_norm": 0.351851224899292, "learning_rate": 0.00016063894773781827, "loss": 1.3586, "step": 15158 }, { "epoch": 0.1969842912207237, "grad_norm": 0.35385170578956604, "learning_rate": 0.0001606363482759069, "loss": 1.2723, "step": 15159 }, { "epoch": 0.19699728576463957, "grad_norm": 0.38648566603660583, "learning_rate": 0.00016063374881399552, "loss": 1.4522, "step": 15160 }, { "epoch": 0.19701028030855544, "grad_norm": 0.32445815205574036, "learning_rate": 0.0001606311493520841, "loss": 1.4577, "step": 15161 }, { "epoch": 0.19702327485247131, "grad_norm": 0.327451229095459, "learning_rate": 0.00016062854989017276, "loss": 1.192, "step": 15162 }, { "epoch": 0.1970362693963872, "grad_norm": 0.4033142924308777, "learning_rate": 0.00016062595042826136, "loss": 1.2914, "step": 15163 }, { "epoch": 0.19704926394030306, "grad_norm": 0.45929038524627686, "learning_rate": 0.00016062335096634996, "loss": 1.4588, "step": 15164 }, { "epoch": 0.19706225848421893, "grad_norm": 0.3005059063434601, "learning_rate": 0.00016062075150443858, "loss": 1.4982, "step": 15165 }, { "epoch": 0.1970752530281348, "grad_norm": 0.38960590958595276, "learning_rate": 0.0001606181520425272, "loss": 1.2371, "step": 15166 }, { "epoch": 0.19708824757205068, "grad_norm": 0.4554326832294464, "learning_rate": 0.00016061555258061583, "loss": 1.5176, "step": 15167 }, { "epoch": 0.19710124211596655, "grad_norm": 0.4252159595489502, "learning_rate": 0.00016061295311870443, "loss": 1.2863, "step": 15168 }, { "epoch": 0.19711423665988242, "grad_norm": 0.43140292167663574, "learning_rate": 0.00016061035365679306, "loss": 1.4859, "step": 15169 }, { "epoch": 0.1971272312037983, "grad_norm": 0.3556015193462372, "learning_rate": 0.00016060775419488168, "loss": 1.2549, "step": 15170 }, { "epoch": 0.19714022574771417, "grad_norm": 0.4013342559337616, "learning_rate": 0.00016060515473297028, "loss": 1.5748, "step": 15171 }, { "epoch": 0.19715322029163004, "grad_norm": 0.517819881439209, "learning_rate": 0.0001606025552710589, "loss": 1.4189, "step": 15172 }, { "epoch": 0.19716621483554592, "grad_norm": 0.49728506803512573, "learning_rate": 0.0001605999558091475, "loss": 1.5233, "step": 15173 }, { "epoch": 0.1971792093794618, "grad_norm": 0.4505499303340912, "learning_rate": 0.00016059735634723615, "loss": 1.5079, "step": 15174 }, { "epoch": 0.19719220392337766, "grad_norm": 0.4149855673313141, "learning_rate": 0.00016059475688532475, "loss": 1.364, "step": 15175 }, { "epoch": 0.19720519846729354, "grad_norm": 0.3838134706020355, "learning_rate": 0.00016059215742341335, "loss": 1.4092, "step": 15176 }, { "epoch": 0.1972181930112094, "grad_norm": 0.3868907690048218, "learning_rate": 0.00016058955796150197, "loss": 1.3183, "step": 15177 }, { "epoch": 0.19723118755512528, "grad_norm": 0.469340980052948, "learning_rate": 0.0001605869584995906, "loss": 1.4049, "step": 15178 }, { "epoch": 0.19724418209904115, "grad_norm": 0.4031793475151062, "learning_rate": 0.00016058435903767922, "loss": 1.5555, "step": 15179 }, { "epoch": 0.19725717664295703, "grad_norm": 0.3342891335487366, "learning_rate": 0.00016058175957576782, "loss": 1.4964, "step": 15180 }, { "epoch": 0.1972701711868729, "grad_norm": 0.40358588099479675, "learning_rate": 0.00016057916011385644, "loss": 1.375, "step": 15181 }, { "epoch": 0.19728316573078877, "grad_norm": 0.39205893874168396, "learning_rate": 0.00016057656065194506, "loss": 1.5246, "step": 15182 }, { "epoch": 0.19729616027470465, "grad_norm": 0.36472901701927185, "learning_rate": 0.00016057396119003366, "loss": 1.3433, "step": 15183 }, { "epoch": 0.19730915481862052, "grad_norm": 0.45858439803123474, "learning_rate": 0.0001605713617281223, "loss": 1.5803, "step": 15184 }, { "epoch": 0.1973221493625364, "grad_norm": 0.47431516647338867, "learning_rate": 0.00016056876226621088, "loss": 1.4762, "step": 15185 }, { "epoch": 0.19733514390645226, "grad_norm": 0.4197148382663727, "learning_rate": 0.00016056616280429954, "loss": 1.441, "step": 15186 }, { "epoch": 0.19734813845036814, "grad_norm": 0.3786357343196869, "learning_rate": 0.00016056356334238813, "loss": 1.2299, "step": 15187 }, { "epoch": 0.19736113299428404, "grad_norm": 0.43684062361717224, "learning_rate": 0.00016056096388047676, "loss": 1.5556, "step": 15188 }, { "epoch": 0.1973741275381999, "grad_norm": 0.37042975425720215, "learning_rate": 0.00016055836441856535, "loss": 1.5341, "step": 15189 }, { "epoch": 0.19738712208211578, "grad_norm": 0.4426417648792267, "learning_rate": 0.00016055576495665398, "loss": 1.4656, "step": 15190 }, { "epoch": 0.19740011662603166, "grad_norm": 0.38509050011634827, "learning_rate": 0.0001605531654947426, "loss": 1.588, "step": 15191 }, { "epoch": 0.19741311116994753, "grad_norm": 0.3662339150905609, "learning_rate": 0.0001605505660328312, "loss": 1.4387, "step": 15192 }, { "epoch": 0.1974261057138634, "grad_norm": 0.42003586888313293, "learning_rate": 0.00016054796657091983, "loss": 1.3125, "step": 15193 }, { "epoch": 0.19743910025777928, "grad_norm": 0.3900770843029022, "learning_rate": 0.00016054536710900845, "loss": 1.3786, "step": 15194 }, { "epoch": 0.19745209480169515, "grad_norm": 0.37473246455192566, "learning_rate": 0.00016054276764709705, "loss": 1.3634, "step": 15195 }, { "epoch": 0.19746508934561102, "grad_norm": 0.43817541003227234, "learning_rate": 0.00016054016818518567, "loss": 1.3275, "step": 15196 }, { "epoch": 0.1974780838895269, "grad_norm": 0.4224016070365906, "learning_rate": 0.0001605375687232743, "loss": 1.5399, "step": 15197 }, { "epoch": 0.19749107843344277, "grad_norm": 0.40805330872535706, "learning_rate": 0.00016053496926136292, "loss": 1.3911, "step": 15198 }, { "epoch": 0.19750407297735864, "grad_norm": 0.42046448588371277, "learning_rate": 0.00016053236979945152, "loss": 1.4295, "step": 15199 }, { "epoch": 0.19751706752127451, "grad_norm": 0.37618348002433777, "learning_rate": 0.00016052977033754014, "loss": 1.3308, "step": 15200 }, { "epoch": 0.1975300620651904, "grad_norm": 0.3812530040740967, "learning_rate": 0.00016052717087562877, "loss": 1.5153, "step": 15201 }, { "epoch": 0.19754305660910626, "grad_norm": 0.3528536558151245, "learning_rate": 0.00016052457141371736, "loss": 1.4887, "step": 15202 }, { "epoch": 0.19755605115302213, "grad_norm": 0.4155598282814026, "learning_rate": 0.000160521971951806, "loss": 1.5151, "step": 15203 }, { "epoch": 0.197569045696938, "grad_norm": 0.4857502579689026, "learning_rate": 0.0001605193724898946, "loss": 1.6726, "step": 15204 }, { "epoch": 0.19758204024085388, "grad_norm": 0.4043503999710083, "learning_rate": 0.00016051677302798324, "loss": 1.4527, "step": 15205 }, { "epoch": 0.19759503478476975, "grad_norm": 0.36850136518478394, "learning_rate": 0.00016051417356607184, "loss": 1.478, "step": 15206 }, { "epoch": 0.19760802932868562, "grad_norm": 0.44881299138069153, "learning_rate": 0.00016051157410416043, "loss": 1.3344, "step": 15207 }, { "epoch": 0.1976210238726015, "grad_norm": 0.45037394762039185, "learning_rate": 0.00016050897464224906, "loss": 1.4538, "step": 15208 }, { "epoch": 0.19763401841651737, "grad_norm": 0.3963901698589325, "learning_rate": 0.00016050637518033768, "loss": 1.4962, "step": 15209 }, { "epoch": 0.19764701296043324, "grad_norm": 0.3214680850505829, "learning_rate": 0.0001605037757184263, "loss": 1.2978, "step": 15210 }, { "epoch": 0.19766000750434912, "grad_norm": 0.38941630721092224, "learning_rate": 0.0001605011762565149, "loss": 1.4117, "step": 15211 }, { "epoch": 0.197673002048265, "grad_norm": 0.31193453073501587, "learning_rate": 0.00016049857679460353, "loss": 1.5375, "step": 15212 }, { "epoch": 0.19768599659218086, "grad_norm": 0.4008481502532959, "learning_rate": 0.00016049597733269215, "loss": 1.2097, "step": 15213 }, { "epoch": 0.19769899113609674, "grad_norm": 0.4754476845264435, "learning_rate": 0.00016049337787078075, "loss": 1.4644, "step": 15214 }, { "epoch": 0.1977119856800126, "grad_norm": 0.4401509165763855, "learning_rate": 0.00016049077840886937, "loss": 1.3521, "step": 15215 }, { "epoch": 0.19772498022392848, "grad_norm": 0.40085768699645996, "learning_rate": 0.00016048817894695797, "loss": 1.4934, "step": 15216 }, { "epoch": 0.19773797476784435, "grad_norm": 0.4902122914791107, "learning_rate": 0.00016048557948504662, "loss": 1.4409, "step": 15217 }, { "epoch": 0.19775096931176023, "grad_norm": 0.4231351613998413, "learning_rate": 0.00016048298002313522, "loss": 1.4791, "step": 15218 }, { "epoch": 0.1977639638556761, "grad_norm": 0.3941724896430969, "learning_rate": 0.00016048038056122382, "loss": 1.4948, "step": 15219 }, { "epoch": 0.19777695839959197, "grad_norm": 0.47850003838539124, "learning_rate": 0.00016047778109931244, "loss": 1.3973, "step": 15220 }, { "epoch": 0.19778995294350785, "grad_norm": 0.4435640871524811, "learning_rate": 0.00016047518163740107, "loss": 1.4406, "step": 15221 }, { "epoch": 0.19780294748742372, "grad_norm": 0.3991568684577942, "learning_rate": 0.0001604725821754897, "loss": 1.5071, "step": 15222 }, { "epoch": 0.1978159420313396, "grad_norm": 0.4084078371524811, "learning_rate": 0.0001604699827135783, "loss": 1.4639, "step": 15223 }, { "epoch": 0.19782893657525547, "grad_norm": 0.5762602090835571, "learning_rate": 0.00016046738325166691, "loss": 1.6933, "step": 15224 }, { "epoch": 0.19784193111917134, "grad_norm": 0.45471665263175964, "learning_rate": 0.00016046478378975554, "loss": 1.4782, "step": 15225 }, { "epoch": 0.1978549256630872, "grad_norm": 0.4198942184448242, "learning_rate": 0.00016046218432784414, "loss": 1.4061, "step": 15226 }, { "epoch": 0.19786792020700308, "grad_norm": 0.4814091920852661, "learning_rate": 0.00016045958486593276, "loss": 1.5305, "step": 15227 }, { "epoch": 0.19788091475091896, "grad_norm": 0.4235839247703552, "learning_rate": 0.00016045698540402136, "loss": 1.4895, "step": 15228 }, { "epoch": 0.19789390929483483, "grad_norm": 0.5342458486557007, "learning_rate": 0.00016045438594211, "loss": 1.458, "step": 15229 }, { "epoch": 0.1979069038387507, "grad_norm": 0.3506269156932831, "learning_rate": 0.0001604517864801986, "loss": 1.5893, "step": 15230 }, { "epoch": 0.19791989838266658, "grad_norm": 0.49221667647361755, "learning_rate": 0.0001604491870182872, "loss": 1.4676, "step": 15231 }, { "epoch": 0.19793289292658245, "grad_norm": 0.44165927171707153, "learning_rate": 0.00016044658755637583, "loss": 1.3752, "step": 15232 }, { "epoch": 0.19794588747049832, "grad_norm": 0.36550450325012207, "learning_rate": 0.00016044398809446445, "loss": 1.4414, "step": 15233 }, { "epoch": 0.1979588820144142, "grad_norm": 0.377628892660141, "learning_rate": 0.00016044138863255308, "loss": 1.3878, "step": 15234 }, { "epoch": 0.19797187655833007, "grad_norm": 0.47497355937957764, "learning_rate": 0.00016043878917064167, "loss": 1.4643, "step": 15235 }, { "epoch": 0.19798487110224594, "grad_norm": 0.46771544218063354, "learning_rate": 0.0001604361897087303, "loss": 1.4994, "step": 15236 }, { "epoch": 0.1979978656461618, "grad_norm": 0.45935195684432983, "learning_rate": 0.00016043359024681892, "loss": 1.4672, "step": 15237 }, { "epoch": 0.1980108601900777, "grad_norm": 0.43917736411094666, "learning_rate": 0.00016043099078490752, "loss": 1.5373, "step": 15238 }, { "epoch": 0.19802385473399356, "grad_norm": 0.2885652482509613, "learning_rate": 0.00016042839132299615, "loss": 1.2032, "step": 15239 }, { "epoch": 0.19803684927790943, "grad_norm": 0.39125555753707886, "learning_rate": 0.00016042579186108477, "loss": 1.5456, "step": 15240 }, { "epoch": 0.1980498438218253, "grad_norm": 0.3073153793811798, "learning_rate": 0.0001604231923991734, "loss": 1.4896, "step": 15241 }, { "epoch": 0.19806283836574118, "grad_norm": 0.37376752495765686, "learning_rate": 0.000160420592937262, "loss": 1.4096, "step": 15242 }, { "epoch": 0.19807583290965705, "grad_norm": 0.37771785259246826, "learning_rate": 0.00016041799347535062, "loss": 1.4587, "step": 15243 }, { "epoch": 0.19808882745357292, "grad_norm": 0.5241137742996216, "learning_rate": 0.00016041539401343924, "loss": 1.4467, "step": 15244 }, { "epoch": 0.1981018219974888, "grad_norm": 0.44248658418655396, "learning_rate": 0.00016041279455152784, "loss": 1.3923, "step": 15245 }, { "epoch": 0.19811481654140467, "grad_norm": 0.4415137469768524, "learning_rate": 0.00016041019508961646, "loss": 1.4749, "step": 15246 }, { "epoch": 0.19812781108532054, "grad_norm": 0.41170668601989746, "learning_rate": 0.00016040759562770506, "loss": 1.6811, "step": 15247 }, { "epoch": 0.19814080562923642, "grad_norm": 0.3179848790168762, "learning_rate": 0.00016040499616579368, "loss": 1.2587, "step": 15248 }, { "epoch": 0.1981538001731523, "grad_norm": 0.3288537859916687, "learning_rate": 0.0001604023967038823, "loss": 1.4788, "step": 15249 }, { "epoch": 0.19816679471706816, "grad_norm": 0.4839995801448822, "learning_rate": 0.0001603997972419709, "loss": 1.3582, "step": 15250 }, { "epoch": 0.19817978926098404, "grad_norm": 0.3626045882701874, "learning_rate": 0.00016039719778005953, "loss": 1.4185, "step": 15251 }, { "epoch": 0.1981927838048999, "grad_norm": 0.31482669711112976, "learning_rate": 0.00016039459831814816, "loss": 1.3867, "step": 15252 }, { "epoch": 0.19820577834881578, "grad_norm": 0.5015284419059753, "learning_rate": 0.00016039199885623678, "loss": 1.3923, "step": 15253 }, { "epoch": 0.19821877289273165, "grad_norm": 0.43372347950935364, "learning_rate": 0.00016038939939432538, "loss": 1.6296, "step": 15254 }, { "epoch": 0.19823176743664753, "grad_norm": 0.38777047395706177, "learning_rate": 0.000160386799932414, "loss": 1.4918, "step": 15255 }, { "epoch": 0.1982447619805634, "grad_norm": 0.3947080075740814, "learning_rate": 0.00016038420047050263, "loss": 1.4615, "step": 15256 }, { "epoch": 0.19825775652447927, "grad_norm": 0.44883888959884644, "learning_rate": 0.00016038160100859122, "loss": 1.3895, "step": 15257 }, { "epoch": 0.19827075106839515, "grad_norm": 0.3320392370223999, "learning_rate": 0.00016037900154667985, "loss": 1.1723, "step": 15258 }, { "epoch": 0.19828374561231102, "grad_norm": 0.47458067536354065, "learning_rate": 0.00016037640208476845, "loss": 1.3871, "step": 15259 }, { "epoch": 0.1982967401562269, "grad_norm": 0.3882908821105957, "learning_rate": 0.00016037380262285707, "loss": 1.5049, "step": 15260 }, { "epoch": 0.19830973470014276, "grad_norm": 0.46307921409606934, "learning_rate": 0.0001603712031609457, "loss": 1.4342, "step": 15261 }, { "epoch": 0.19832272924405864, "grad_norm": 0.3385463058948517, "learning_rate": 0.0001603686036990343, "loss": 1.4359, "step": 15262 }, { "epoch": 0.1983357237879745, "grad_norm": 0.4020019769668579, "learning_rate": 0.00016036600423712292, "loss": 1.4977, "step": 15263 }, { "epoch": 0.1983487183318904, "grad_norm": 0.3457539677619934, "learning_rate": 0.00016036340477521154, "loss": 1.4751, "step": 15264 }, { "epoch": 0.19836171287580628, "grad_norm": 0.3555501103401184, "learning_rate": 0.00016036080531330017, "loss": 1.4812, "step": 15265 }, { "epoch": 0.19837470741972216, "grad_norm": 0.3446584939956665, "learning_rate": 0.00016035820585138876, "loss": 1.2154, "step": 15266 }, { "epoch": 0.19838770196363803, "grad_norm": 0.39608034491539, "learning_rate": 0.0001603556063894774, "loss": 1.588, "step": 15267 }, { "epoch": 0.1984006965075539, "grad_norm": 0.30151817202568054, "learning_rate": 0.000160353006927566, "loss": 1.5172, "step": 15268 }, { "epoch": 0.19841369105146978, "grad_norm": 0.40922778844833374, "learning_rate": 0.0001603504074656546, "loss": 1.3745, "step": 15269 }, { "epoch": 0.19842668559538565, "grad_norm": 0.45608213543891907, "learning_rate": 0.00016034780800374323, "loss": 1.5089, "step": 15270 }, { "epoch": 0.19843968013930152, "grad_norm": 0.4785734713077545, "learning_rate": 0.00016034520854183186, "loss": 1.4504, "step": 15271 }, { "epoch": 0.1984526746832174, "grad_norm": 0.4256673753261566, "learning_rate": 0.00016034260907992048, "loss": 1.2851, "step": 15272 }, { "epoch": 0.19846566922713327, "grad_norm": 0.328163743019104, "learning_rate": 0.00016034000961800908, "loss": 1.4876, "step": 15273 }, { "epoch": 0.19847866377104914, "grad_norm": 0.4179215133190155, "learning_rate": 0.00016033741015609768, "loss": 1.4193, "step": 15274 }, { "epoch": 0.198491658314965, "grad_norm": 0.44137489795684814, "learning_rate": 0.00016033481069418633, "loss": 1.5518, "step": 15275 }, { "epoch": 0.1985046528588809, "grad_norm": 0.4182802140712738, "learning_rate": 0.00016033221123227493, "loss": 1.4004, "step": 15276 }, { "epoch": 0.19851764740279676, "grad_norm": 0.3562569320201874, "learning_rate": 0.00016032961177036355, "loss": 1.2554, "step": 15277 }, { "epoch": 0.19853064194671263, "grad_norm": 0.35423633456230164, "learning_rate": 0.00016032701230845215, "loss": 1.2387, "step": 15278 }, { "epoch": 0.1985436364906285, "grad_norm": 0.4391341209411621, "learning_rate": 0.00016032441284654077, "loss": 1.4935, "step": 15279 }, { "epoch": 0.19855663103454438, "grad_norm": 0.36307451128959656, "learning_rate": 0.0001603218133846294, "loss": 1.236, "step": 15280 }, { "epoch": 0.19856962557846025, "grad_norm": 0.3874627351760864, "learning_rate": 0.000160319213922718, "loss": 1.3927, "step": 15281 }, { "epoch": 0.19858262012237612, "grad_norm": 0.39018988609313965, "learning_rate": 0.00016031661446080662, "loss": 1.3894, "step": 15282 }, { "epoch": 0.198595614666292, "grad_norm": 0.33459872007369995, "learning_rate": 0.00016031401499889524, "loss": 1.2518, "step": 15283 }, { "epoch": 0.19860860921020787, "grad_norm": 0.49354103207588196, "learning_rate": 0.00016031141553698387, "loss": 1.4767, "step": 15284 }, { "epoch": 0.19862160375412374, "grad_norm": 0.35415980219841003, "learning_rate": 0.00016030881607507247, "loss": 1.3861, "step": 15285 }, { "epoch": 0.19863459829803962, "grad_norm": 0.2480810284614563, "learning_rate": 0.00016030621661316106, "loss": 1.3568, "step": 15286 }, { "epoch": 0.1986475928419555, "grad_norm": 0.3108460009098053, "learning_rate": 0.00016030361715124971, "loss": 1.3442, "step": 15287 }, { "epoch": 0.19866058738587136, "grad_norm": 0.2840315103530884, "learning_rate": 0.0001603010176893383, "loss": 1.4583, "step": 15288 }, { "epoch": 0.19867358192978724, "grad_norm": 0.37063631415367126, "learning_rate": 0.00016029841822742694, "loss": 1.4405, "step": 15289 }, { "epoch": 0.1986865764737031, "grad_norm": 0.4510897099971771, "learning_rate": 0.00016029581876551553, "loss": 1.3659, "step": 15290 }, { "epoch": 0.19869957101761898, "grad_norm": 0.3901059031486511, "learning_rate": 0.00016029321930360416, "loss": 1.6081, "step": 15291 }, { "epoch": 0.19871256556153485, "grad_norm": 0.5823162198066711, "learning_rate": 0.00016029061984169278, "loss": 1.4775, "step": 15292 }, { "epoch": 0.19872556010545073, "grad_norm": 0.37442460656166077, "learning_rate": 0.00016028802037978138, "loss": 1.368, "step": 15293 }, { "epoch": 0.1987385546493666, "grad_norm": 0.3791543245315552, "learning_rate": 0.00016028542091787, "loss": 1.2698, "step": 15294 }, { "epoch": 0.19875154919328247, "grad_norm": 0.4248009920120239, "learning_rate": 0.00016028282145595863, "loss": 1.4038, "step": 15295 }, { "epoch": 0.19876454373719835, "grad_norm": 0.41493403911590576, "learning_rate": 0.00016028022199404725, "loss": 1.6313, "step": 15296 }, { "epoch": 0.19877753828111422, "grad_norm": 0.42783305048942566, "learning_rate": 0.00016027762253213585, "loss": 1.4309, "step": 15297 }, { "epoch": 0.1987905328250301, "grad_norm": 0.32946762442588806, "learning_rate": 0.00016027502307022445, "loss": 1.1649, "step": 15298 }, { "epoch": 0.19880352736894596, "grad_norm": 0.4396952688694, "learning_rate": 0.0001602724236083131, "loss": 1.4214, "step": 15299 }, { "epoch": 0.19881652191286184, "grad_norm": 0.5071067810058594, "learning_rate": 0.0001602698241464017, "loss": 1.405, "step": 15300 }, { "epoch": 0.1988295164567777, "grad_norm": 0.4332043528556824, "learning_rate": 0.00016026722468449032, "loss": 1.4329, "step": 15301 }, { "epoch": 0.19884251100069358, "grad_norm": 0.42541804909706116, "learning_rate": 0.00016026462522257892, "loss": 1.4254, "step": 15302 }, { "epoch": 0.19885550554460946, "grad_norm": 0.4576720893383026, "learning_rate": 0.00016026202576066754, "loss": 1.3789, "step": 15303 }, { "epoch": 0.19886850008852533, "grad_norm": 0.39556246995925903, "learning_rate": 0.00016025942629875617, "loss": 1.3404, "step": 15304 }, { "epoch": 0.1988814946324412, "grad_norm": 0.41974642872810364, "learning_rate": 0.00016025682683684477, "loss": 1.4725, "step": 15305 }, { "epoch": 0.19889448917635708, "grad_norm": 0.437761515378952, "learning_rate": 0.0001602542273749334, "loss": 1.5507, "step": 15306 }, { "epoch": 0.19890748372027295, "grad_norm": 0.42709267139434814, "learning_rate": 0.00016025162791302201, "loss": 1.5817, "step": 15307 }, { "epoch": 0.19892047826418882, "grad_norm": 0.41304272413253784, "learning_rate": 0.00016024902845111064, "loss": 1.5118, "step": 15308 }, { "epoch": 0.1989334728081047, "grad_norm": 0.32319214940071106, "learning_rate": 0.00016024642898919924, "loss": 1.4065, "step": 15309 }, { "epoch": 0.19894646735202057, "grad_norm": 0.680975615978241, "learning_rate": 0.00016024382952728786, "loss": 1.3247, "step": 15310 }, { "epoch": 0.19895946189593644, "grad_norm": 0.4191313683986664, "learning_rate": 0.00016024123006537648, "loss": 1.4534, "step": 15311 }, { "epoch": 0.1989724564398523, "grad_norm": 0.31099042296409607, "learning_rate": 0.00016023863060346508, "loss": 1.3029, "step": 15312 }, { "epoch": 0.1989854509837682, "grad_norm": 0.3521687984466553, "learning_rate": 0.0001602360311415537, "loss": 1.2817, "step": 15313 }, { "epoch": 0.19899844552768406, "grad_norm": 0.3875702917575836, "learning_rate": 0.00016023343167964233, "loss": 1.2578, "step": 15314 }, { "epoch": 0.19901144007159993, "grad_norm": 0.3726058304309845, "learning_rate": 0.00016023083221773093, "loss": 1.4163, "step": 15315 }, { "epoch": 0.1990244346155158, "grad_norm": 0.4327508211135864, "learning_rate": 0.00016022823275581955, "loss": 1.633, "step": 15316 }, { "epoch": 0.19903742915943168, "grad_norm": 0.3837486505508423, "learning_rate": 0.00016022563329390815, "loss": 1.4178, "step": 15317 }, { "epoch": 0.19905042370334755, "grad_norm": 0.32990044355392456, "learning_rate": 0.0001602230338319968, "loss": 1.4929, "step": 15318 }, { "epoch": 0.19906341824726342, "grad_norm": 0.36149898171424866, "learning_rate": 0.0001602204343700854, "loss": 1.3582, "step": 15319 }, { "epoch": 0.1990764127911793, "grad_norm": 0.41414421796798706, "learning_rate": 0.00016021783490817402, "loss": 1.4847, "step": 15320 }, { "epoch": 0.19908940733509517, "grad_norm": 0.42448264360427856, "learning_rate": 0.00016021523544626262, "loss": 1.584, "step": 15321 }, { "epoch": 0.19910240187901104, "grad_norm": 0.3937159776687622, "learning_rate": 0.00016021263598435125, "loss": 1.4891, "step": 15322 }, { "epoch": 0.19911539642292692, "grad_norm": 0.396188348531723, "learning_rate": 0.00016021003652243987, "loss": 1.4622, "step": 15323 }, { "epoch": 0.1991283909668428, "grad_norm": 0.40531226992607117, "learning_rate": 0.00016020743706052847, "loss": 1.5891, "step": 15324 }, { "epoch": 0.19914138551075866, "grad_norm": 0.43410012125968933, "learning_rate": 0.0001602048375986171, "loss": 1.5348, "step": 15325 }, { "epoch": 0.19915438005467453, "grad_norm": 0.288671612739563, "learning_rate": 0.00016020223813670572, "loss": 1.3558, "step": 15326 }, { "epoch": 0.1991673745985904, "grad_norm": 0.413058340549469, "learning_rate": 0.00016019963867479434, "loss": 1.1742, "step": 15327 }, { "epoch": 0.19918036914250628, "grad_norm": 0.42460256814956665, "learning_rate": 0.00016019703921288294, "loss": 1.5886, "step": 15328 }, { "epoch": 0.19919336368642215, "grad_norm": 0.4351479411125183, "learning_rate": 0.00016019443975097154, "loss": 1.429, "step": 15329 }, { "epoch": 0.19920635823033803, "grad_norm": 0.41776150465011597, "learning_rate": 0.0001601918402890602, "loss": 1.4502, "step": 15330 }, { "epoch": 0.1992193527742539, "grad_norm": 0.31935542821884155, "learning_rate": 0.00016018924082714878, "loss": 1.3763, "step": 15331 }, { "epoch": 0.19923234731816977, "grad_norm": 0.4103107750415802, "learning_rate": 0.0001601866413652374, "loss": 1.3706, "step": 15332 }, { "epoch": 0.19924534186208565, "grad_norm": 0.4254096746444702, "learning_rate": 0.000160184041903326, "loss": 1.341, "step": 15333 }, { "epoch": 0.19925833640600152, "grad_norm": 0.3568406403064728, "learning_rate": 0.00016018144244141463, "loss": 1.4953, "step": 15334 }, { "epoch": 0.1992713309499174, "grad_norm": 0.36509087681770325, "learning_rate": 0.00016017884297950326, "loss": 1.2538, "step": 15335 }, { "epoch": 0.19928432549383326, "grad_norm": 0.3267819881439209, "learning_rate": 0.00016017624351759185, "loss": 1.4212, "step": 15336 }, { "epoch": 0.19929732003774914, "grad_norm": 0.44438377022743225, "learning_rate": 0.00016017364405568048, "loss": 1.3985, "step": 15337 }, { "epoch": 0.199310314581665, "grad_norm": 0.4287395775318146, "learning_rate": 0.0001601710445937691, "loss": 1.4115, "step": 15338 }, { "epoch": 0.19932330912558088, "grad_norm": 0.4248035252094269, "learning_rate": 0.00016016844513185773, "loss": 1.5455, "step": 15339 }, { "epoch": 0.19933630366949678, "grad_norm": 0.44686010479927063, "learning_rate": 0.00016016584566994632, "loss": 1.42, "step": 15340 }, { "epoch": 0.19934929821341266, "grad_norm": 0.2797262966632843, "learning_rate": 0.00016016324620803492, "loss": 1.3089, "step": 15341 }, { "epoch": 0.19936229275732853, "grad_norm": 0.37204888463020325, "learning_rate": 0.00016016064674612357, "loss": 1.4282, "step": 15342 }, { "epoch": 0.1993752873012444, "grad_norm": 0.3677528202533722, "learning_rate": 0.00016015804728421217, "loss": 1.4471, "step": 15343 }, { "epoch": 0.19938828184516028, "grad_norm": 0.3339022397994995, "learning_rate": 0.0001601554478223008, "loss": 1.4126, "step": 15344 }, { "epoch": 0.19940127638907615, "grad_norm": 0.36243629455566406, "learning_rate": 0.00016015284836038942, "loss": 1.3091, "step": 15345 }, { "epoch": 0.19941427093299202, "grad_norm": 0.3422911763191223, "learning_rate": 0.00016015024889847802, "loss": 1.1941, "step": 15346 }, { "epoch": 0.1994272654769079, "grad_norm": 0.42427539825439453, "learning_rate": 0.00016014764943656664, "loss": 1.4216, "step": 15347 }, { "epoch": 0.19944026002082377, "grad_norm": 0.3341255784034729, "learning_rate": 0.00016014504997465524, "loss": 1.3904, "step": 15348 }, { "epoch": 0.19945325456473964, "grad_norm": 0.3369588553905487, "learning_rate": 0.0001601424505127439, "loss": 1.2856, "step": 15349 }, { "epoch": 0.1994662491086555, "grad_norm": 0.2476450800895691, "learning_rate": 0.0001601398510508325, "loss": 1.1781, "step": 15350 }, { "epoch": 0.1994792436525714, "grad_norm": 0.4013558030128479, "learning_rate": 0.0001601372515889211, "loss": 1.3791, "step": 15351 }, { "epoch": 0.19949223819648726, "grad_norm": 0.39862382411956787, "learning_rate": 0.0001601346521270097, "loss": 1.3654, "step": 15352 }, { "epoch": 0.19950523274040313, "grad_norm": 0.4367014169692993, "learning_rate": 0.00016013205266509833, "loss": 1.3027, "step": 15353 }, { "epoch": 0.199518227284319, "grad_norm": 0.33646225929260254, "learning_rate": 0.00016012945320318696, "loss": 1.4137, "step": 15354 }, { "epoch": 0.19953122182823488, "grad_norm": 0.3747285008430481, "learning_rate": 0.00016012685374127556, "loss": 1.5891, "step": 15355 }, { "epoch": 0.19954421637215075, "grad_norm": 0.3984653353691101, "learning_rate": 0.00016012425427936418, "loss": 1.5199, "step": 15356 }, { "epoch": 0.19955721091606662, "grad_norm": 0.4099576473236084, "learning_rate": 0.0001601216548174528, "loss": 1.4076, "step": 15357 }, { "epoch": 0.1995702054599825, "grad_norm": 0.2693162262439728, "learning_rate": 0.0001601190553555414, "loss": 1.406, "step": 15358 }, { "epoch": 0.19958320000389837, "grad_norm": 0.3406940996646881, "learning_rate": 0.00016011645589363003, "loss": 1.5985, "step": 15359 }, { "epoch": 0.19959619454781424, "grad_norm": 0.4689192771911621, "learning_rate": 0.00016011385643171862, "loss": 1.4187, "step": 15360 }, { "epoch": 0.19960918909173012, "grad_norm": 0.34262481331825256, "learning_rate": 0.00016011125696980728, "loss": 1.3294, "step": 15361 }, { "epoch": 0.199622183635646, "grad_norm": 0.41267237067222595, "learning_rate": 0.00016010865750789587, "loss": 1.3925, "step": 15362 }, { "epoch": 0.19963517817956186, "grad_norm": 0.41196075081825256, "learning_rate": 0.0001601060580459845, "loss": 1.2361, "step": 15363 }, { "epoch": 0.19964817272347773, "grad_norm": 0.405814528465271, "learning_rate": 0.0001601034585840731, "loss": 1.4084, "step": 15364 }, { "epoch": 0.1996611672673936, "grad_norm": 0.44442397356033325, "learning_rate": 0.00016010085912216172, "loss": 1.5637, "step": 15365 }, { "epoch": 0.19967416181130948, "grad_norm": 0.35996049642562866, "learning_rate": 0.00016009825966025034, "loss": 1.5295, "step": 15366 }, { "epoch": 0.19968715635522535, "grad_norm": 0.3599914014339447, "learning_rate": 0.00016009566019833894, "loss": 1.2685, "step": 15367 }, { "epoch": 0.19970015089914123, "grad_norm": 0.3731040358543396, "learning_rate": 0.00016009306073642757, "loss": 1.3927, "step": 15368 }, { "epoch": 0.1997131454430571, "grad_norm": 0.37993544340133667, "learning_rate": 0.0001600904612745162, "loss": 1.6071, "step": 15369 }, { "epoch": 0.19972613998697297, "grad_norm": 0.41570788621902466, "learning_rate": 0.0001600878618126048, "loss": 1.3826, "step": 15370 }, { "epoch": 0.19973913453088885, "grad_norm": 0.43232133984565735, "learning_rate": 0.0001600852623506934, "loss": 1.3585, "step": 15371 }, { "epoch": 0.19975212907480472, "grad_norm": 0.42845484614372253, "learning_rate": 0.000160082662888782, "loss": 1.4164, "step": 15372 }, { "epoch": 0.1997651236187206, "grad_norm": 0.4609348177909851, "learning_rate": 0.00016008006342687066, "loss": 1.4316, "step": 15373 }, { "epoch": 0.19977811816263646, "grad_norm": 0.34090402722358704, "learning_rate": 0.00016007746396495926, "loss": 1.3717, "step": 15374 }, { "epoch": 0.19979111270655234, "grad_norm": 0.38243624567985535, "learning_rate": 0.00016007486450304788, "loss": 1.4322, "step": 15375 }, { "epoch": 0.1998041072504682, "grad_norm": 0.3448341488838196, "learning_rate": 0.00016007226504113648, "loss": 1.0993, "step": 15376 }, { "epoch": 0.19981710179438408, "grad_norm": 0.374994158744812, "learning_rate": 0.0001600696655792251, "loss": 1.5967, "step": 15377 }, { "epoch": 0.19983009633829996, "grad_norm": 0.4522508978843689, "learning_rate": 0.00016006706611731373, "loss": 1.4916, "step": 15378 }, { "epoch": 0.19984309088221583, "grad_norm": 0.4896794855594635, "learning_rate": 0.00016006446665540233, "loss": 1.584, "step": 15379 }, { "epoch": 0.1998560854261317, "grad_norm": 0.3767539858818054, "learning_rate": 0.00016006186719349095, "loss": 1.4583, "step": 15380 }, { "epoch": 0.19986907997004758, "grad_norm": 0.40603840351104736, "learning_rate": 0.00016005926773157958, "loss": 1.3325, "step": 15381 }, { "epoch": 0.19988207451396345, "grad_norm": 0.4316093921661377, "learning_rate": 0.00016005666826966817, "loss": 1.3926, "step": 15382 }, { "epoch": 0.19989506905787932, "grad_norm": 0.4680945575237274, "learning_rate": 0.0001600540688077568, "loss": 1.4563, "step": 15383 }, { "epoch": 0.1999080636017952, "grad_norm": 0.46094948053359985, "learning_rate": 0.00016005146934584542, "loss": 1.3507, "step": 15384 }, { "epoch": 0.19992105814571107, "grad_norm": 0.38625067472457886, "learning_rate": 0.00016004886988393405, "loss": 1.7112, "step": 15385 }, { "epoch": 0.19993405268962694, "grad_norm": 0.391434907913208, "learning_rate": 0.00016004627042202264, "loss": 1.4305, "step": 15386 }, { "epoch": 0.1999470472335428, "grad_norm": 0.32393285632133484, "learning_rate": 0.00016004367096011127, "loss": 1.2506, "step": 15387 }, { "epoch": 0.19996004177745869, "grad_norm": 0.4099315106868744, "learning_rate": 0.0001600410714981999, "loss": 1.6296, "step": 15388 }, { "epoch": 0.19997303632137456, "grad_norm": 0.4354977011680603, "learning_rate": 0.0001600384720362885, "loss": 1.4602, "step": 15389 }, { "epoch": 0.19998603086529043, "grad_norm": 0.2789519131183624, "learning_rate": 0.00016003587257437711, "loss": 1.2519, "step": 15390 }, { "epoch": 0.1999990254092063, "grad_norm": 0.3328530192375183, "learning_rate": 0.0001600332731124657, "loss": 1.316, "step": 15391 }, { "epoch": 0.20001201995312218, "grad_norm": 0.3718877136707306, "learning_rate": 0.00016003067365055436, "loss": 1.4504, "step": 15392 }, { "epoch": 0.20002501449703805, "grad_norm": 0.34923285245895386, "learning_rate": 0.00016002807418864296, "loss": 1.4455, "step": 15393 }, { "epoch": 0.20003800904095392, "grad_norm": 0.4272012710571289, "learning_rate": 0.00016002547472673159, "loss": 1.4843, "step": 15394 }, { "epoch": 0.2000510035848698, "grad_norm": 0.44626718759536743, "learning_rate": 0.00016002287526482018, "loss": 1.4089, "step": 15395 }, { "epoch": 0.20006399812878567, "grad_norm": 0.4409841299057007, "learning_rate": 0.0001600202758029088, "loss": 1.4024, "step": 15396 }, { "epoch": 0.20007699267270154, "grad_norm": 0.31712839007377625, "learning_rate": 0.00016001767634099743, "loss": 1.401, "step": 15397 }, { "epoch": 0.20008998721661742, "grad_norm": 0.330126017332077, "learning_rate": 0.00016001507687908603, "loss": 1.4193, "step": 15398 }, { "epoch": 0.2001029817605333, "grad_norm": 0.4245782792568207, "learning_rate": 0.00016001247741717465, "loss": 1.3154, "step": 15399 }, { "epoch": 0.20011597630444916, "grad_norm": 0.3618156909942627, "learning_rate": 0.00016000987795526328, "loss": 1.4052, "step": 15400 }, { "epoch": 0.20012897084836503, "grad_norm": 0.4344066381454468, "learning_rate": 0.00016000727849335188, "loss": 1.5139, "step": 15401 }, { "epoch": 0.2001419653922809, "grad_norm": 0.40536969900131226, "learning_rate": 0.0001600046790314405, "loss": 1.5962, "step": 15402 }, { "epoch": 0.20015495993619678, "grad_norm": 0.358058899641037, "learning_rate": 0.0001600020795695291, "loss": 1.4791, "step": 15403 }, { "epoch": 0.20016795448011265, "grad_norm": 0.29033300280570984, "learning_rate": 0.00015999948010761775, "loss": 1.3785, "step": 15404 }, { "epoch": 0.20018094902402853, "grad_norm": 0.4340335428714752, "learning_rate": 0.00015999688064570635, "loss": 1.4549, "step": 15405 }, { "epoch": 0.2001939435679444, "grad_norm": 0.473200261592865, "learning_rate": 0.00015999428118379497, "loss": 1.459, "step": 15406 }, { "epoch": 0.20020693811186027, "grad_norm": 0.3932296633720398, "learning_rate": 0.00015999168172188357, "loss": 1.3656, "step": 15407 }, { "epoch": 0.20021993265577614, "grad_norm": 0.42950358986854553, "learning_rate": 0.0001599890822599722, "loss": 1.5815, "step": 15408 }, { "epoch": 0.20023292719969202, "grad_norm": 0.40682581067085266, "learning_rate": 0.00015998648279806082, "loss": 1.416, "step": 15409 }, { "epoch": 0.2002459217436079, "grad_norm": 0.4486320912837982, "learning_rate": 0.00015998388333614941, "loss": 1.2984, "step": 15410 }, { "epoch": 0.20025891628752376, "grad_norm": 0.6088532209396362, "learning_rate": 0.00015998128387423804, "loss": 1.6367, "step": 15411 }, { "epoch": 0.20027191083143964, "grad_norm": 0.4755866527557373, "learning_rate": 0.00015997868441232666, "loss": 1.5699, "step": 15412 }, { "epoch": 0.2002849053753555, "grad_norm": 0.4551866054534912, "learning_rate": 0.00015997608495041526, "loss": 1.467, "step": 15413 }, { "epoch": 0.20029789991927138, "grad_norm": 0.5039704442024231, "learning_rate": 0.00015997348548850389, "loss": 1.4165, "step": 15414 }, { "epoch": 0.20031089446318726, "grad_norm": 0.3217463493347168, "learning_rate": 0.00015997088602659248, "loss": 1.315, "step": 15415 }, { "epoch": 0.20032388900710316, "grad_norm": 0.44191160798072815, "learning_rate": 0.00015996828656468113, "loss": 1.4067, "step": 15416 }, { "epoch": 0.20033688355101903, "grad_norm": 0.31735658645629883, "learning_rate": 0.00015996568710276973, "loss": 1.3719, "step": 15417 }, { "epoch": 0.2003498780949349, "grad_norm": 0.3791908621788025, "learning_rate": 0.00015996308764085836, "loss": 1.3287, "step": 15418 }, { "epoch": 0.20036287263885078, "grad_norm": 0.32334086298942566, "learning_rate": 0.00015996048817894698, "loss": 1.6866, "step": 15419 }, { "epoch": 0.20037586718276665, "grad_norm": 0.460700660943985, "learning_rate": 0.00015995788871703558, "loss": 1.4302, "step": 15420 }, { "epoch": 0.20038886172668252, "grad_norm": 0.5116479396820068, "learning_rate": 0.0001599552892551242, "loss": 1.3797, "step": 15421 }, { "epoch": 0.2004018562705984, "grad_norm": 0.360805481672287, "learning_rate": 0.0001599526897932128, "loss": 1.33, "step": 15422 }, { "epoch": 0.20041485081451427, "grad_norm": 0.40143775939941406, "learning_rate": 0.00015995009033130145, "loss": 1.5545, "step": 15423 }, { "epoch": 0.20042784535843014, "grad_norm": 0.345742404460907, "learning_rate": 0.00015994749086939005, "loss": 1.3964, "step": 15424 }, { "epoch": 0.200440839902346, "grad_norm": 0.40881749987602234, "learning_rate": 0.00015994489140747865, "loss": 1.7012, "step": 15425 }, { "epoch": 0.20045383444626189, "grad_norm": 0.46395188570022583, "learning_rate": 0.00015994229194556727, "loss": 1.3633, "step": 15426 }, { "epoch": 0.20046682899017776, "grad_norm": 0.4470542371273041, "learning_rate": 0.0001599396924836559, "loss": 1.4938, "step": 15427 }, { "epoch": 0.20047982353409363, "grad_norm": 0.43475550413131714, "learning_rate": 0.00015993709302174452, "loss": 1.4408, "step": 15428 }, { "epoch": 0.2004928180780095, "grad_norm": 0.41542017459869385, "learning_rate": 0.00015993449355983312, "loss": 1.305, "step": 15429 }, { "epoch": 0.20050581262192538, "grad_norm": 0.3461635410785675, "learning_rate": 0.00015993189409792174, "loss": 1.3386, "step": 15430 }, { "epoch": 0.20051880716584125, "grad_norm": 0.39723336696624756, "learning_rate": 0.00015992929463601037, "loss": 1.3459, "step": 15431 }, { "epoch": 0.20053180170975712, "grad_norm": 0.41502058506011963, "learning_rate": 0.00015992669517409896, "loss": 1.4575, "step": 15432 }, { "epoch": 0.200544796253673, "grad_norm": 0.455619752407074, "learning_rate": 0.0001599240957121876, "loss": 1.561, "step": 15433 }, { "epoch": 0.20055779079758887, "grad_norm": 0.48920756578445435, "learning_rate": 0.00015992149625027619, "loss": 1.5112, "step": 15434 }, { "epoch": 0.20057078534150474, "grad_norm": 0.4552110433578491, "learning_rate": 0.00015991889678836484, "loss": 1.3981, "step": 15435 }, { "epoch": 0.20058377988542062, "grad_norm": 0.38848674297332764, "learning_rate": 0.00015991629732645343, "loss": 1.4132, "step": 15436 }, { "epoch": 0.2005967744293365, "grad_norm": 0.38771361112594604, "learning_rate": 0.00015991369786454203, "loss": 1.157, "step": 15437 }, { "epoch": 0.20060976897325236, "grad_norm": 0.3963020145893097, "learning_rate": 0.00015991109840263066, "loss": 1.3774, "step": 15438 }, { "epoch": 0.20062276351716823, "grad_norm": 0.4398963153362274, "learning_rate": 0.00015990849894071928, "loss": 1.4806, "step": 15439 }, { "epoch": 0.2006357580610841, "grad_norm": 0.42457035183906555, "learning_rate": 0.0001599058994788079, "loss": 1.3053, "step": 15440 }, { "epoch": 0.20064875260499998, "grad_norm": 0.34232112765312195, "learning_rate": 0.0001599033000168965, "loss": 1.4357, "step": 15441 }, { "epoch": 0.20066174714891585, "grad_norm": 0.4253518283367157, "learning_rate": 0.00015990070055498513, "loss": 1.4779, "step": 15442 }, { "epoch": 0.20067474169283173, "grad_norm": 0.4288158118724823, "learning_rate": 0.00015989810109307375, "loss": 1.4467, "step": 15443 }, { "epoch": 0.2006877362367476, "grad_norm": 0.3745271861553192, "learning_rate": 0.00015989550163116235, "loss": 1.4147, "step": 15444 }, { "epoch": 0.20070073078066347, "grad_norm": 0.4292162358760834, "learning_rate": 0.00015989290216925097, "loss": 1.5221, "step": 15445 }, { "epoch": 0.20071372532457935, "grad_norm": 0.4208000600337982, "learning_rate": 0.00015989030270733957, "loss": 1.5414, "step": 15446 }, { "epoch": 0.20072671986849522, "grad_norm": 0.41436871886253357, "learning_rate": 0.00015988770324542822, "loss": 1.4846, "step": 15447 }, { "epoch": 0.2007397144124111, "grad_norm": 0.42770326137542725, "learning_rate": 0.00015988510378351682, "loss": 1.4128, "step": 15448 }, { "epoch": 0.20075270895632696, "grad_norm": 0.3122018575668335, "learning_rate": 0.00015988250432160544, "loss": 1.0307, "step": 15449 }, { "epoch": 0.20076570350024284, "grad_norm": 0.3554200232028961, "learning_rate": 0.00015987990485969404, "loss": 1.5785, "step": 15450 }, { "epoch": 0.2007786980441587, "grad_norm": 0.35506123304367065, "learning_rate": 0.00015987730539778267, "loss": 1.3464, "step": 15451 }, { "epoch": 0.20079169258807458, "grad_norm": 0.3930074870586395, "learning_rate": 0.0001598747059358713, "loss": 1.393, "step": 15452 }, { "epoch": 0.20080468713199046, "grad_norm": 0.3954486548900604, "learning_rate": 0.0001598721064739599, "loss": 1.3006, "step": 15453 }, { "epoch": 0.20081768167590633, "grad_norm": 0.4204241931438446, "learning_rate": 0.0001598695070120485, "loss": 1.373, "step": 15454 }, { "epoch": 0.2008306762198222, "grad_norm": 0.3705895245075226, "learning_rate": 0.00015986690755013714, "loss": 1.348, "step": 15455 }, { "epoch": 0.20084367076373807, "grad_norm": 0.37882500886917114, "learning_rate": 0.00015986430808822573, "loss": 1.4137, "step": 15456 }, { "epoch": 0.20085666530765395, "grad_norm": 0.3344724476337433, "learning_rate": 0.00015986170862631436, "loss": 1.5115, "step": 15457 }, { "epoch": 0.20086965985156982, "grad_norm": 0.3381964862346649, "learning_rate": 0.00015985910916440298, "loss": 1.4018, "step": 15458 }, { "epoch": 0.2008826543954857, "grad_norm": 0.4564279019832611, "learning_rate": 0.0001598565097024916, "loss": 1.4675, "step": 15459 }, { "epoch": 0.20089564893940157, "grad_norm": 0.38492849469184875, "learning_rate": 0.0001598539102405802, "loss": 1.4442, "step": 15460 }, { "epoch": 0.20090864348331744, "grad_norm": 0.46018293499946594, "learning_rate": 0.00015985131077866883, "loss": 1.3129, "step": 15461 }, { "epoch": 0.2009216380272333, "grad_norm": 0.31527480483055115, "learning_rate": 0.00015984871131675745, "loss": 1.5662, "step": 15462 }, { "epoch": 0.20093463257114919, "grad_norm": 0.4076431095600128, "learning_rate": 0.00015984611185484605, "loss": 1.5504, "step": 15463 }, { "epoch": 0.20094762711506506, "grad_norm": 0.342613160610199, "learning_rate": 0.00015984351239293468, "loss": 1.3724, "step": 15464 }, { "epoch": 0.20096062165898093, "grad_norm": 0.3431416451931, "learning_rate": 0.00015984091293102327, "loss": 1.1886, "step": 15465 }, { "epoch": 0.2009736162028968, "grad_norm": 0.3727257251739502, "learning_rate": 0.0001598383134691119, "loss": 1.3544, "step": 15466 }, { "epoch": 0.20098661074681268, "grad_norm": 0.5540410876274109, "learning_rate": 0.00015983571400720052, "loss": 1.2226, "step": 15467 }, { "epoch": 0.20099960529072855, "grad_norm": 0.5565593242645264, "learning_rate": 0.00015983311454528912, "loss": 1.4975, "step": 15468 }, { "epoch": 0.20101259983464442, "grad_norm": 0.3361845910549164, "learning_rate": 0.00015983051508337774, "loss": 1.4783, "step": 15469 }, { "epoch": 0.2010255943785603, "grad_norm": 0.3931155502796173, "learning_rate": 0.00015982791562146637, "loss": 1.3817, "step": 15470 }, { "epoch": 0.20103858892247617, "grad_norm": 0.4010556638240814, "learning_rate": 0.000159825316159555, "loss": 1.3986, "step": 15471 }, { "epoch": 0.20105158346639204, "grad_norm": 0.3854156732559204, "learning_rate": 0.0001598227166976436, "loss": 1.2552, "step": 15472 }, { "epoch": 0.20106457801030791, "grad_norm": 0.3804440200328827, "learning_rate": 0.00015982011723573221, "loss": 1.3033, "step": 15473 }, { "epoch": 0.2010775725542238, "grad_norm": 0.36511868238449097, "learning_rate": 0.00015981751777382084, "loss": 1.5625, "step": 15474 }, { "epoch": 0.20109056709813966, "grad_norm": 0.3993107080459595, "learning_rate": 0.00015981491831190944, "loss": 1.4384, "step": 15475 }, { "epoch": 0.20110356164205553, "grad_norm": 0.3993953764438629, "learning_rate": 0.00015981231884999806, "loss": 1.3731, "step": 15476 }, { "epoch": 0.2011165561859714, "grad_norm": 0.3434096574783325, "learning_rate": 0.00015980971938808666, "loss": 1.1904, "step": 15477 }, { "epoch": 0.20112955072988728, "grad_norm": 0.3749016523361206, "learning_rate": 0.0001598071199261753, "loss": 1.4604, "step": 15478 }, { "epoch": 0.20114254527380315, "grad_norm": 0.37235578894615173, "learning_rate": 0.0001598045204642639, "loss": 1.2544, "step": 15479 }, { "epoch": 0.20115553981771903, "grad_norm": 0.45200005173683167, "learning_rate": 0.0001598019210023525, "loss": 1.2509, "step": 15480 }, { "epoch": 0.2011685343616349, "grad_norm": 0.3638080358505249, "learning_rate": 0.00015979932154044113, "loss": 1.4136, "step": 15481 }, { "epoch": 0.20118152890555077, "grad_norm": 0.3104375898838043, "learning_rate": 0.00015979672207852975, "loss": 1.2958, "step": 15482 }, { "epoch": 0.20119452344946664, "grad_norm": 0.3581565320491791, "learning_rate": 0.00015979412261661838, "loss": 1.0592, "step": 15483 }, { "epoch": 0.20120751799338252, "grad_norm": 0.34259718656539917, "learning_rate": 0.00015979152315470698, "loss": 1.3443, "step": 15484 }, { "epoch": 0.2012205125372984, "grad_norm": 0.5038226246833801, "learning_rate": 0.0001597889236927956, "loss": 1.4838, "step": 15485 }, { "epoch": 0.20123350708121426, "grad_norm": 0.3625810444355011, "learning_rate": 0.00015978632423088422, "loss": 1.26, "step": 15486 }, { "epoch": 0.20124650162513014, "grad_norm": 0.4064147472381592, "learning_rate": 0.00015978372476897282, "loss": 1.3402, "step": 15487 }, { "epoch": 0.201259496169046, "grad_norm": 0.3508455455303192, "learning_rate": 0.00015978112530706145, "loss": 1.3568, "step": 15488 }, { "epoch": 0.20127249071296188, "grad_norm": 0.32590991258621216, "learning_rate": 0.00015977852584515004, "loss": 1.4675, "step": 15489 }, { "epoch": 0.20128548525687776, "grad_norm": 0.3837073743343353, "learning_rate": 0.0001597759263832387, "loss": 1.5279, "step": 15490 }, { "epoch": 0.20129847980079363, "grad_norm": 0.35764941573143005, "learning_rate": 0.0001597733269213273, "loss": 1.52, "step": 15491 }, { "epoch": 0.20131147434470953, "grad_norm": 0.38586580753326416, "learning_rate": 0.0001597707274594159, "loss": 1.4397, "step": 15492 }, { "epoch": 0.2013244688886254, "grad_norm": 0.42878884077072144, "learning_rate": 0.00015976812799750454, "loss": 1.3246, "step": 15493 }, { "epoch": 0.20133746343254127, "grad_norm": 0.31376323103904724, "learning_rate": 0.00015976552853559314, "loss": 1.3979, "step": 15494 }, { "epoch": 0.20135045797645715, "grad_norm": 0.42834824323654175, "learning_rate": 0.00015976292907368176, "loss": 1.4761, "step": 15495 }, { "epoch": 0.20136345252037302, "grad_norm": 0.36774513125419617, "learning_rate": 0.00015976032961177036, "loss": 1.2995, "step": 15496 }, { "epoch": 0.2013764470642889, "grad_norm": 0.38691818714141846, "learning_rate": 0.00015975773014985899, "loss": 1.4661, "step": 15497 }, { "epoch": 0.20138944160820477, "grad_norm": 0.3923111855983734, "learning_rate": 0.0001597551306879476, "loss": 1.4596, "step": 15498 }, { "epoch": 0.20140243615212064, "grad_norm": 0.3583773672580719, "learning_rate": 0.0001597525312260362, "loss": 1.382, "step": 15499 }, { "epoch": 0.2014154306960365, "grad_norm": 0.30078837275505066, "learning_rate": 0.00015974993176412483, "loss": 1.4106, "step": 15500 }, { "epoch": 0.20142842523995239, "grad_norm": 0.36736586689949036, "learning_rate": 0.00015974733230221346, "loss": 1.4908, "step": 15501 }, { "epoch": 0.20144141978386826, "grad_norm": 0.35619914531707764, "learning_rate": 0.00015974473284030208, "loss": 1.4938, "step": 15502 }, { "epoch": 0.20145441432778413, "grad_norm": 0.37158042192459106, "learning_rate": 0.00015974213337839068, "loss": 1.3052, "step": 15503 }, { "epoch": 0.2014674088717, "grad_norm": 0.5701084733009338, "learning_rate": 0.0001597395339164793, "loss": 1.3253, "step": 15504 }, { "epoch": 0.20148040341561588, "grad_norm": 0.458329439163208, "learning_rate": 0.00015973693445456793, "loss": 1.4172, "step": 15505 }, { "epoch": 0.20149339795953175, "grad_norm": 0.40700650215148926, "learning_rate": 0.00015973433499265652, "loss": 1.5024, "step": 15506 }, { "epoch": 0.20150639250344762, "grad_norm": 0.46246078610420227, "learning_rate": 0.00015973173553074515, "loss": 1.4115, "step": 15507 }, { "epoch": 0.2015193870473635, "grad_norm": 0.3385193645954132, "learning_rate": 0.00015972913606883375, "loss": 1.5335, "step": 15508 }, { "epoch": 0.20153238159127937, "grad_norm": 0.42362180352211, "learning_rate": 0.00015972653660692237, "loss": 1.3271, "step": 15509 }, { "epoch": 0.20154537613519524, "grad_norm": 0.37288254499435425, "learning_rate": 0.000159723937145011, "loss": 1.2622, "step": 15510 }, { "epoch": 0.20155837067911112, "grad_norm": 0.43298378586769104, "learning_rate": 0.0001597213376830996, "loss": 1.5571, "step": 15511 }, { "epoch": 0.201571365223027, "grad_norm": 0.3844226002693176, "learning_rate": 0.00015971873822118822, "loss": 1.4003, "step": 15512 }, { "epoch": 0.20158435976694286, "grad_norm": 0.3851691484451294, "learning_rate": 0.00015971613875927684, "loss": 1.3184, "step": 15513 }, { "epoch": 0.20159735431085873, "grad_norm": 0.4106464982032776, "learning_rate": 0.00015971353929736547, "loss": 1.3351, "step": 15514 }, { "epoch": 0.2016103488547746, "grad_norm": 0.41100969910621643, "learning_rate": 0.00015971093983545406, "loss": 1.4826, "step": 15515 }, { "epoch": 0.20162334339869048, "grad_norm": 0.34024062752723694, "learning_rate": 0.0001597083403735427, "loss": 1.5004, "step": 15516 }, { "epoch": 0.20163633794260635, "grad_norm": 0.4197511672973633, "learning_rate": 0.0001597057409116313, "loss": 1.4641, "step": 15517 }, { "epoch": 0.20164933248652223, "grad_norm": 0.45267030596733093, "learning_rate": 0.0001597031414497199, "loss": 1.3783, "step": 15518 }, { "epoch": 0.2016623270304381, "grad_norm": 0.446397989988327, "learning_rate": 0.00015970054198780853, "loss": 1.5595, "step": 15519 }, { "epoch": 0.20167532157435397, "grad_norm": 0.37602293491363525, "learning_rate": 0.00015969794252589713, "loss": 1.3057, "step": 15520 }, { "epoch": 0.20168831611826984, "grad_norm": 0.3605562746524811, "learning_rate": 0.00015969534306398576, "loss": 1.428, "step": 15521 }, { "epoch": 0.20170131066218572, "grad_norm": 0.4639723300933838, "learning_rate": 0.00015969274360207438, "loss": 1.418, "step": 15522 }, { "epoch": 0.2017143052061016, "grad_norm": 0.4100245237350464, "learning_rate": 0.00015969014414016298, "loss": 1.6012, "step": 15523 }, { "epoch": 0.20172729975001746, "grad_norm": 0.38920027017593384, "learning_rate": 0.0001596875446782516, "loss": 1.4594, "step": 15524 }, { "epoch": 0.20174029429393334, "grad_norm": 0.47984549403190613, "learning_rate": 0.00015968494521634023, "loss": 1.572, "step": 15525 }, { "epoch": 0.2017532888378492, "grad_norm": 0.4243656098842621, "learning_rate": 0.00015968234575442885, "loss": 1.4721, "step": 15526 }, { "epoch": 0.20176628338176508, "grad_norm": 0.38825806975364685, "learning_rate": 0.00015967974629251745, "loss": 1.3754, "step": 15527 }, { "epoch": 0.20177927792568096, "grad_norm": 0.3386104106903076, "learning_rate": 0.00015967714683060607, "loss": 1.2337, "step": 15528 }, { "epoch": 0.20179227246959683, "grad_norm": 0.3906554877758026, "learning_rate": 0.0001596745473686947, "loss": 1.433, "step": 15529 }, { "epoch": 0.2018052670135127, "grad_norm": 0.3716062009334564, "learning_rate": 0.0001596719479067833, "loss": 1.6036, "step": 15530 }, { "epoch": 0.20181826155742857, "grad_norm": 0.3616575598716736, "learning_rate": 0.00015966934844487192, "loss": 1.6153, "step": 15531 }, { "epoch": 0.20183125610134445, "grad_norm": 0.426098108291626, "learning_rate": 0.00015966674898296054, "loss": 1.3482, "step": 15532 }, { "epoch": 0.20184425064526032, "grad_norm": 0.3066435754299164, "learning_rate": 0.00015966414952104917, "loss": 1.1552, "step": 15533 }, { "epoch": 0.2018572451891762, "grad_norm": 0.3735445439815521, "learning_rate": 0.00015966155005913777, "loss": 1.4611, "step": 15534 }, { "epoch": 0.20187023973309207, "grad_norm": 0.3736024796962738, "learning_rate": 0.00015965895059722636, "loss": 1.4677, "step": 15535 }, { "epoch": 0.20188323427700794, "grad_norm": 0.43967676162719727, "learning_rate": 0.00015965635113531502, "loss": 1.5224, "step": 15536 }, { "epoch": 0.2018962288209238, "grad_norm": 0.3615036904811859, "learning_rate": 0.0001596537516734036, "loss": 1.2323, "step": 15537 }, { "epoch": 0.20190922336483969, "grad_norm": 0.18317799270153046, "learning_rate": 0.00015965115221149224, "loss": 1.2085, "step": 15538 }, { "epoch": 0.20192221790875556, "grad_norm": 0.3674650490283966, "learning_rate": 0.00015964855274958083, "loss": 1.5367, "step": 15539 }, { "epoch": 0.20193521245267143, "grad_norm": 0.37121474742889404, "learning_rate": 0.00015964595328766946, "loss": 1.3428, "step": 15540 }, { "epoch": 0.2019482069965873, "grad_norm": 0.39096876978874207, "learning_rate": 0.00015964335382575808, "loss": 1.5221, "step": 15541 }, { "epoch": 0.20196120154050318, "grad_norm": 0.48480314016342163, "learning_rate": 0.00015964075436384668, "loss": 1.6317, "step": 15542 }, { "epoch": 0.20197419608441905, "grad_norm": 0.44641631841659546, "learning_rate": 0.0001596381549019353, "loss": 1.3509, "step": 15543 }, { "epoch": 0.20198719062833492, "grad_norm": 0.39969927072525024, "learning_rate": 0.00015963555544002393, "loss": 1.4268, "step": 15544 }, { "epoch": 0.2020001851722508, "grad_norm": 0.38742053508758545, "learning_rate": 0.00015963295597811255, "loss": 1.2812, "step": 15545 }, { "epoch": 0.20201317971616667, "grad_norm": 0.4840254783630371, "learning_rate": 0.00015963035651620115, "loss": 1.4762, "step": 15546 }, { "epoch": 0.20202617426008254, "grad_norm": 0.33920347690582275, "learning_rate": 0.00015962775705428975, "loss": 1.4521, "step": 15547 }, { "epoch": 0.20203916880399841, "grad_norm": 0.4188118278980255, "learning_rate": 0.0001596251575923784, "loss": 1.5022, "step": 15548 }, { "epoch": 0.2020521633479143, "grad_norm": 0.32119986414909363, "learning_rate": 0.000159622558130467, "loss": 1.3524, "step": 15549 }, { "epoch": 0.20206515789183016, "grad_norm": 0.32455724477767944, "learning_rate": 0.00015961995866855562, "loss": 1.5207, "step": 15550 }, { "epoch": 0.20207815243574603, "grad_norm": 0.3758303225040436, "learning_rate": 0.00015961735920664422, "loss": 1.3958, "step": 15551 }, { "epoch": 0.2020911469796619, "grad_norm": 0.44503000378608704, "learning_rate": 0.00015961475974473284, "loss": 1.5735, "step": 15552 }, { "epoch": 0.20210414152357778, "grad_norm": 0.39568275213241577, "learning_rate": 0.00015961216028282147, "loss": 1.4604, "step": 15553 }, { "epoch": 0.20211713606749365, "grad_norm": 0.28650161623954773, "learning_rate": 0.00015960956082091007, "loss": 1.3105, "step": 15554 }, { "epoch": 0.20213013061140953, "grad_norm": 0.39524441957473755, "learning_rate": 0.0001596069613589987, "loss": 1.4664, "step": 15555 }, { "epoch": 0.2021431251553254, "grad_norm": 0.48658257722854614, "learning_rate": 0.00015960436189708732, "loss": 1.3023, "step": 15556 }, { "epoch": 0.20215611969924127, "grad_norm": 0.2924402356147766, "learning_rate": 0.00015960176243517594, "loss": 1.2134, "step": 15557 }, { "epoch": 0.20216911424315714, "grad_norm": 0.39157775044441223, "learning_rate": 0.00015959916297326454, "loss": 1.3529, "step": 15558 }, { "epoch": 0.20218210878707302, "grad_norm": 0.31740477681159973, "learning_rate": 0.00015959656351135313, "loss": 1.4197, "step": 15559 }, { "epoch": 0.2021951033309889, "grad_norm": 0.37640008330345154, "learning_rate": 0.00015959396404944179, "loss": 1.3638, "step": 15560 }, { "epoch": 0.20220809787490476, "grad_norm": 0.23595954477787018, "learning_rate": 0.00015959136458753038, "loss": 1.3842, "step": 15561 }, { "epoch": 0.20222109241882064, "grad_norm": 0.5004596710205078, "learning_rate": 0.000159588765125619, "loss": 1.5304, "step": 15562 }, { "epoch": 0.2022340869627365, "grad_norm": 0.3433200716972351, "learning_rate": 0.0001595861656637076, "loss": 1.3388, "step": 15563 }, { "epoch": 0.20224708150665238, "grad_norm": 0.2922031283378601, "learning_rate": 0.00015958356620179623, "loss": 1.1781, "step": 15564 }, { "epoch": 0.20226007605056825, "grad_norm": 0.37811943888664246, "learning_rate": 0.00015958096673988485, "loss": 1.4627, "step": 15565 }, { "epoch": 0.20227307059448413, "grad_norm": 0.48024192452430725, "learning_rate": 0.00015957836727797345, "loss": 1.5468, "step": 15566 }, { "epoch": 0.2022860651384, "grad_norm": 0.3539566695690155, "learning_rate": 0.00015957576781606208, "loss": 1.4408, "step": 15567 }, { "epoch": 0.20229905968231587, "grad_norm": 0.3906601071357727, "learning_rate": 0.0001595731683541507, "loss": 1.5796, "step": 15568 }, { "epoch": 0.20231205422623177, "grad_norm": 0.44790250062942505, "learning_rate": 0.00015957056889223933, "loss": 1.3717, "step": 15569 }, { "epoch": 0.20232504877014765, "grad_norm": 0.39754799008369446, "learning_rate": 0.00015956796943032792, "loss": 1.2721, "step": 15570 }, { "epoch": 0.20233804331406352, "grad_norm": 0.4742945730686188, "learning_rate": 0.00015956536996841655, "loss": 1.4732, "step": 15571 }, { "epoch": 0.2023510378579794, "grad_norm": 0.35886383056640625, "learning_rate": 0.00015956277050650517, "loss": 1.4878, "step": 15572 }, { "epoch": 0.20236403240189527, "grad_norm": 0.3766234219074249, "learning_rate": 0.00015956017104459377, "loss": 1.5001, "step": 15573 }, { "epoch": 0.20237702694581114, "grad_norm": 0.4675527811050415, "learning_rate": 0.0001595575715826824, "loss": 1.3493, "step": 15574 }, { "epoch": 0.202390021489727, "grad_norm": 0.3647250831127167, "learning_rate": 0.00015955497212077102, "loss": 1.5048, "step": 15575 }, { "epoch": 0.20240301603364289, "grad_norm": 0.4306110143661499, "learning_rate": 0.00015955237265885962, "loss": 1.4666, "step": 15576 }, { "epoch": 0.20241601057755876, "grad_norm": 0.34354308247566223, "learning_rate": 0.00015954977319694824, "loss": 1.5806, "step": 15577 }, { "epoch": 0.20242900512147463, "grad_norm": 0.4518747627735138, "learning_rate": 0.00015954717373503684, "loss": 1.4257, "step": 15578 }, { "epoch": 0.2024419996653905, "grad_norm": 0.2816008925437927, "learning_rate": 0.0001595445742731255, "loss": 1.1589, "step": 15579 }, { "epoch": 0.20245499420930638, "grad_norm": 0.43849489092826843, "learning_rate": 0.00015954197481121409, "loss": 1.4997, "step": 15580 }, { "epoch": 0.20246798875322225, "grad_norm": 0.31130221486091614, "learning_rate": 0.0001595393753493027, "loss": 1.2816, "step": 15581 }, { "epoch": 0.20248098329713812, "grad_norm": 0.3955734372138977, "learning_rate": 0.0001595367758873913, "loss": 1.4013, "step": 15582 }, { "epoch": 0.202493977841054, "grad_norm": 0.39053240418434143, "learning_rate": 0.00015953417642547993, "loss": 1.4422, "step": 15583 }, { "epoch": 0.20250697238496987, "grad_norm": 0.3094363808631897, "learning_rate": 0.00015953157696356856, "loss": 1.2012, "step": 15584 }, { "epoch": 0.20251996692888574, "grad_norm": 0.4734448492527008, "learning_rate": 0.00015952897750165715, "loss": 1.4293, "step": 15585 }, { "epoch": 0.20253296147280161, "grad_norm": 0.5012496113777161, "learning_rate": 0.00015952637803974578, "loss": 1.4869, "step": 15586 }, { "epoch": 0.2025459560167175, "grad_norm": 0.42707526683807373, "learning_rate": 0.0001595237785778344, "loss": 1.4535, "step": 15587 }, { "epoch": 0.20255895056063336, "grad_norm": 0.42233243584632874, "learning_rate": 0.000159521179115923, "loss": 1.5284, "step": 15588 }, { "epoch": 0.20257194510454923, "grad_norm": 0.29213064908981323, "learning_rate": 0.00015951857965401162, "loss": 1.3369, "step": 15589 }, { "epoch": 0.2025849396484651, "grad_norm": 0.5354099273681641, "learning_rate": 0.00015951598019210022, "loss": 1.6117, "step": 15590 }, { "epoch": 0.20259793419238098, "grad_norm": 0.4830285906791687, "learning_rate": 0.00015951338073018887, "loss": 1.52, "step": 15591 }, { "epoch": 0.20261092873629685, "grad_norm": 0.3432127833366394, "learning_rate": 0.00015951078126827747, "loss": 1.2603, "step": 15592 }, { "epoch": 0.20262392328021273, "grad_norm": 0.4127909541130066, "learning_rate": 0.0001595081818063661, "loss": 1.4846, "step": 15593 }, { "epoch": 0.2026369178241286, "grad_norm": 0.3981674611568451, "learning_rate": 0.0001595055823444547, "loss": 1.3194, "step": 15594 }, { "epoch": 0.20264991236804447, "grad_norm": 0.3570355772972107, "learning_rate": 0.00015950298288254332, "loss": 1.6007, "step": 15595 }, { "epoch": 0.20266290691196034, "grad_norm": 0.48730647563934326, "learning_rate": 0.00015950038342063194, "loss": 1.3962, "step": 15596 }, { "epoch": 0.20267590145587622, "grad_norm": 0.4497148096561432, "learning_rate": 0.00015949778395872054, "loss": 1.5353, "step": 15597 }, { "epoch": 0.2026888959997921, "grad_norm": 0.31450557708740234, "learning_rate": 0.00015949518449680916, "loss": 1.3312, "step": 15598 }, { "epoch": 0.20270189054370796, "grad_norm": 0.3757617771625519, "learning_rate": 0.0001594925850348978, "loss": 1.3624, "step": 15599 }, { "epoch": 0.20271488508762384, "grad_norm": 0.45655694603919983, "learning_rate": 0.0001594899855729864, "loss": 1.334, "step": 15600 }, { "epoch": 0.2027278796315397, "grad_norm": 0.44738686084747314, "learning_rate": 0.000159487386111075, "loss": 1.4487, "step": 15601 }, { "epoch": 0.20274087417545558, "grad_norm": 0.39667317271232605, "learning_rate": 0.0001594847866491636, "loss": 1.5499, "step": 15602 }, { "epoch": 0.20275386871937146, "grad_norm": 0.37171974778175354, "learning_rate": 0.00015948218718725226, "loss": 1.3191, "step": 15603 }, { "epoch": 0.20276686326328733, "grad_norm": 0.4180026948451996, "learning_rate": 0.00015947958772534086, "loss": 1.3679, "step": 15604 }, { "epoch": 0.2027798578072032, "grad_norm": 0.26227790117263794, "learning_rate": 0.00015947698826342948, "loss": 1.2342, "step": 15605 }, { "epoch": 0.20279285235111907, "grad_norm": 0.3400615155696869, "learning_rate": 0.0001594743888015181, "loss": 1.5662, "step": 15606 }, { "epoch": 0.20280584689503495, "grad_norm": 0.38658204674720764, "learning_rate": 0.0001594717893396067, "loss": 1.2616, "step": 15607 }, { "epoch": 0.20281884143895082, "grad_norm": 0.4290766716003418, "learning_rate": 0.00015946918987769533, "loss": 1.3712, "step": 15608 }, { "epoch": 0.2028318359828667, "grad_norm": 0.4338560700416565, "learning_rate": 0.00015946659041578392, "loss": 1.5662, "step": 15609 }, { "epoch": 0.20284483052678257, "grad_norm": 0.3897329568862915, "learning_rate": 0.00015946399095387258, "loss": 1.3752, "step": 15610 }, { "epoch": 0.20285782507069844, "grad_norm": 0.3282984793186188, "learning_rate": 0.00015946139149196117, "loss": 1.2311, "step": 15611 }, { "epoch": 0.2028708196146143, "grad_norm": 0.43870893120765686, "learning_rate": 0.0001594587920300498, "loss": 1.516, "step": 15612 }, { "epoch": 0.20288381415853018, "grad_norm": 0.4248964190483093, "learning_rate": 0.0001594561925681384, "loss": 1.5338, "step": 15613 }, { "epoch": 0.20289680870244606, "grad_norm": 0.3369921147823334, "learning_rate": 0.00015945359310622702, "loss": 1.3801, "step": 15614 }, { "epoch": 0.20290980324636193, "grad_norm": 0.43938079476356506, "learning_rate": 0.00015945099364431564, "loss": 1.583, "step": 15615 }, { "epoch": 0.2029227977902778, "grad_norm": 0.3859417140483856, "learning_rate": 0.00015944839418240424, "loss": 1.6263, "step": 15616 }, { "epoch": 0.20293579233419368, "grad_norm": 0.4081466495990753, "learning_rate": 0.00015944579472049287, "loss": 1.3804, "step": 15617 }, { "epoch": 0.20294878687810955, "grad_norm": 0.5416001081466675, "learning_rate": 0.0001594431952585815, "loss": 1.4011, "step": 15618 }, { "epoch": 0.20296178142202542, "grad_norm": 0.4448879361152649, "learning_rate": 0.0001594405957966701, "loss": 1.4706, "step": 15619 }, { "epoch": 0.2029747759659413, "grad_norm": 0.3218129277229309, "learning_rate": 0.0001594379963347587, "loss": 1.1609, "step": 15620 }, { "epoch": 0.20298777050985717, "grad_norm": 0.25834921002388, "learning_rate": 0.0001594353968728473, "loss": 1.2834, "step": 15621 }, { "epoch": 0.20300076505377304, "grad_norm": 0.3170090913772583, "learning_rate": 0.00015943279741093596, "loss": 1.1739, "step": 15622 }, { "epoch": 0.20301375959768891, "grad_norm": 0.34562620520591736, "learning_rate": 0.00015943019794902456, "loss": 1.5567, "step": 15623 }, { "epoch": 0.2030267541416048, "grad_norm": 0.31423917412757874, "learning_rate": 0.00015942759848711318, "loss": 1.2251, "step": 15624 }, { "epoch": 0.20303974868552066, "grad_norm": 0.4045373797416687, "learning_rate": 0.00015942499902520178, "loss": 1.6074, "step": 15625 }, { "epoch": 0.20305274322943653, "grad_norm": 0.4055655002593994, "learning_rate": 0.0001594223995632904, "loss": 1.4157, "step": 15626 }, { "epoch": 0.2030657377733524, "grad_norm": 0.4327966868877411, "learning_rate": 0.00015941980010137903, "loss": 1.454, "step": 15627 }, { "epoch": 0.20307873231726828, "grad_norm": 0.40168339014053345, "learning_rate": 0.00015941720063946763, "loss": 1.595, "step": 15628 }, { "epoch": 0.20309172686118415, "grad_norm": 0.3811909258365631, "learning_rate": 0.00015941460117755625, "loss": 1.3004, "step": 15629 }, { "epoch": 0.20310472140510002, "grad_norm": 0.4503130614757538, "learning_rate": 0.00015941200171564488, "loss": 1.4717, "step": 15630 }, { "epoch": 0.2031177159490159, "grad_norm": 0.4343057870864868, "learning_rate": 0.00015940940225373347, "loss": 1.3414, "step": 15631 }, { "epoch": 0.20313071049293177, "grad_norm": 0.4421515166759491, "learning_rate": 0.0001594068027918221, "loss": 1.4232, "step": 15632 }, { "epoch": 0.20314370503684764, "grad_norm": 0.3260214626789093, "learning_rate": 0.0001594042033299107, "loss": 1.3136, "step": 15633 }, { "epoch": 0.20315669958076352, "grad_norm": 0.31766918301582336, "learning_rate": 0.00015940160386799935, "loss": 1.293, "step": 15634 }, { "epoch": 0.2031696941246794, "grad_norm": 0.4145980179309845, "learning_rate": 0.00015939900440608794, "loss": 1.473, "step": 15635 }, { "epoch": 0.20318268866859526, "grad_norm": 0.4416239261627197, "learning_rate": 0.00015939640494417657, "loss": 1.5645, "step": 15636 }, { "epoch": 0.20319568321251114, "grad_norm": 0.3710215985774994, "learning_rate": 0.00015939380548226517, "loss": 1.3829, "step": 15637 }, { "epoch": 0.203208677756427, "grad_norm": 0.4692445993423462, "learning_rate": 0.0001593912060203538, "loss": 1.4375, "step": 15638 }, { "epoch": 0.20322167230034288, "grad_norm": 0.4876609146595001, "learning_rate": 0.00015938860655844242, "loss": 1.3926, "step": 15639 }, { "epoch": 0.20323466684425875, "grad_norm": 0.4070512354373932, "learning_rate": 0.000159386007096531, "loss": 1.3986, "step": 15640 }, { "epoch": 0.20324766138817463, "grad_norm": 0.4974505603313446, "learning_rate": 0.00015938340763461964, "loss": 1.5225, "step": 15641 }, { "epoch": 0.2032606559320905, "grad_norm": 0.33264318108558655, "learning_rate": 0.00015938080817270826, "loss": 1.3339, "step": 15642 }, { "epoch": 0.20327365047600637, "grad_norm": 0.4418434500694275, "learning_rate": 0.00015937820871079686, "loss": 1.5256, "step": 15643 }, { "epoch": 0.20328664501992225, "grad_norm": 0.38963791728019714, "learning_rate": 0.00015937560924888548, "loss": 1.4744, "step": 15644 }, { "epoch": 0.20329963956383815, "grad_norm": 0.3890082538127899, "learning_rate": 0.0001593730097869741, "loss": 1.6533, "step": 15645 }, { "epoch": 0.20331263410775402, "grad_norm": 0.3245887756347656, "learning_rate": 0.00015937041032506273, "loss": 1.4314, "step": 15646 }, { "epoch": 0.2033256286516699, "grad_norm": 0.3747952878475189, "learning_rate": 0.00015936781086315133, "loss": 1.24, "step": 15647 }, { "epoch": 0.20333862319558577, "grad_norm": 0.3432357907295227, "learning_rate": 0.00015936521140123995, "loss": 1.4552, "step": 15648 }, { "epoch": 0.20335161773950164, "grad_norm": 0.4468333423137665, "learning_rate": 0.00015936261193932858, "loss": 1.5789, "step": 15649 }, { "epoch": 0.2033646122834175, "grad_norm": 0.42616572976112366, "learning_rate": 0.00015936001247741718, "loss": 1.5048, "step": 15650 }, { "epoch": 0.20337760682733338, "grad_norm": 0.4014796316623688, "learning_rate": 0.0001593574130155058, "loss": 1.5122, "step": 15651 }, { "epoch": 0.20339060137124926, "grad_norm": 0.36720192432403564, "learning_rate": 0.0001593548135535944, "loss": 1.3338, "step": 15652 }, { "epoch": 0.20340359591516513, "grad_norm": 0.4661295711994171, "learning_rate": 0.00015935221409168305, "loss": 1.5724, "step": 15653 }, { "epoch": 0.203416590459081, "grad_norm": 0.44531241059303284, "learning_rate": 0.00015934961462977165, "loss": 1.4317, "step": 15654 }, { "epoch": 0.20342958500299688, "grad_norm": 0.37221646308898926, "learning_rate": 0.00015934701516786027, "loss": 1.3614, "step": 15655 }, { "epoch": 0.20344257954691275, "grad_norm": 0.4490053653717041, "learning_rate": 0.00015934441570594887, "loss": 1.385, "step": 15656 }, { "epoch": 0.20345557409082862, "grad_norm": 0.27513256669044495, "learning_rate": 0.0001593418162440375, "loss": 1.4627, "step": 15657 }, { "epoch": 0.2034685686347445, "grad_norm": 0.40304383635520935, "learning_rate": 0.00015933921678212612, "loss": 1.3386, "step": 15658 }, { "epoch": 0.20348156317866037, "grad_norm": 0.30084583163261414, "learning_rate": 0.00015933661732021472, "loss": 1.2755, "step": 15659 }, { "epoch": 0.20349455772257624, "grad_norm": 0.5392645001411438, "learning_rate": 0.00015933401785830334, "loss": 1.5687, "step": 15660 }, { "epoch": 0.20350755226649211, "grad_norm": 0.2891553044319153, "learning_rate": 0.00015933141839639196, "loss": 1.2629, "step": 15661 }, { "epoch": 0.203520546810408, "grad_norm": 0.41944870352745056, "learning_rate": 0.00015932881893448056, "loss": 1.2568, "step": 15662 }, { "epoch": 0.20353354135432386, "grad_norm": 0.40333545207977295, "learning_rate": 0.00015932621947256919, "loss": 1.3555, "step": 15663 }, { "epoch": 0.20354653589823973, "grad_norm": 0.42014238238334656, "learning_rate": 0.00015932362001065778, "loss": 1.4136, "step": 15664 }, { "epoch": 0.2035595304421556, "grad_norm": 0.36154791712760925, "learning_rate": 0.00015932102054874644, "loss": 1.489, "step": 15665 }, { "epoch": 0.20357252498607148, "grad_norm": 0.47213754057884216, "learning_rate": 0.00015931842108683503, "loss": 1.3316, "step": 15666 }, { "epoch": 0.20358551952998735, "grad_norm": 0.3318568170070648, "learning_rate": 0.00015931582162492366, "loss": 1.5078, "step": 15667 }, { "epoch": 0.20359851407390323, "grad_norm": 0.36425235867500305, "learning_rate": 0.00015931322216301225, "loss": 1.4387, "step": 15668 }, { "epoch": 0.2036115086178191, "grad_norm": 0.37598493695259094, "learning_rate": 0.00015931062270110088, "loss": 1.2973, "step": 15669 }, { "epoch": 0.20362450316173497, "grad_norm": 0.4518984258174896, "learning_rate": 0.0001593080232391895, "loss": 1.3447, "step": 15670 }, { "epoch": 0.20363749770565084, "grad_norm": 0.39861059188842773, "learning_rate": 0.0001593054237772781, "loss": 1.411, "step": 15671 }, { "epoch": 0.20365049224956672, "grad_norm": 0.34336212277412415, "learning_rate": 0.00015930282431536673, "loss": 1.3022, "step": 15672 }, { "epoch": 0.2036634867934826, "grad_norm": 0.4216004014015198, "learning_rate": 0.00015930022485345535, "loss": 1.3542, "step": 15673 }, { "epoch": 0.20367648133739846, "grad_norm": 0.39204496145248413, "learning_rate": 0.00015929762539154395, "loss": 1.6154, "step": 15674 }, { "epoch": 0.20368947588131434, "grad_norm": 0.4893943965435028, "learning_rate": 0.00015929502592963257, "loss": 1.4186, "step": 15675 }, { "epoch": 0.2037024704252302, "grad_norm": 0.4786006808280945, "learning_rate": 0.00015929242646772117, "loss": 1.5974, "step": 15676 }, { "epoch": 0.20371546496914608, "grad_norm": 0.35081303119659424, "learning_rate": 0.00015928982700580982, "loss": 1.3947, "step": 15677 }, { "epoch": 0.20372845951306195, "grad_norm": 0.30897438526153564, "learning_rate": 0.00015928722754389842, "loss": 1.1852, "step": 15678 }, { "epoch": 0.20374145405697783, "grad_norm": 0.4839898347854614, "learning_rate": 0.00015928462808198704, "loss": 1.4347, "step": 15679 }, { "epoch": 0.2037544486008937, "grad_norm": 0.4220789074897766, "learning_rate": 0.00015928202862007567, "loss": 1.4932, "step": 15680 }, { "epoch": 0.20376744314480957, "grad_norm": 0.3967871367931366, "learning_rate": 0.00015927942915816426, "loss": 1.661, "step": 15681 }, { "epoch": 0.20378043768872545, "grad_norm": 0.4438914954662323, "learning_rate": 0.0001592768296962529, "loss": 1.3474, "step": 15682 }, { "epoch": 0.20379343223264132, "grad_norm": 0.35931092500686646, "learning_rate": 0.00015927423023434149, "loss": 1.4284, "step": 15683 }, { "epoch": 0.2038064267765572, "grad_norm": 0.45528286695480347, "learning_rate": 0.00015927163077243014, "loss": 1.6341, "step": 15684 }, { "epoch": 0.20381942132047307, "grad_norm": 0.39101269841194153, "learning_rate": 0.00015926903131051874, "loss": 1.2844, "step": 15685 }, { "epoch": 0.20383241586438894, "grad_norm": 0.4158966839313507, "learning_rate": 0.00015926643184860733, "loss": 1.4993, "step": 15686 }, { "epoch": 0.2038454104083048, "grad_norm": 0.3594337999820709, "learning_rate": 0.00015926383238669596, "loss": 1.3699, "step": 15687 }, { "epoch": 0.20385840495222068, "grad_norm": 0.39116764068603516, "learning_rate": 0.00015926123292478458, "loss": 1.236, "step": 15688 }, { "epoch": 0.20387139949613656, "grad_norm": 0.3533034026622772, "learning_rate": 0.0001592586334628732, "loss": 1.4514, "step": 15689 }, { "epoch": 0.20388439404005243, "grad_norm": 0.24810311198234558, "learning_rate": 0.0001592560340009618, "loss": 1.1727, "step": 15690 }, { "epoch": 0.2038973885839683, "grad_norm": 0.29851341247558594, "learning_rate": 0.00015925343453905043, "loss": 1.233, "step": 15691 }, { "epoch": 0.20391038312788418, "grad_norm": 0.36616063117980957, "learning_rate": 0.00015925083507713905, "loss": 1.5684, "step": 15692 }, { "epoch": 0.20392337767180005, "grad_norm": 0.33179977536201477, "learning_rate": 0.00015924823561522765, "loss": 1.4019, "step": 15693 }, { "epoch": 0.20393637221571592, "grad_norm": 0.394796758890152, "learning_rate": 0.00015924563615331627, "loss": 1.272, "step": 15694 }, { "epoch": 0.2039493667596318, "grad_norm": 0.27692022919654846, "learning_rate": 0.00015924303669140487, "loss": 1.3884, "step": 15695 }, { "epoch": 0.20396236130354767, "grad_norm": 0.40785714983940125, "learning_rate": 0.00015924043722949352, "loss": 1.3902, "step": 15696 }, { "epoch": 0.20397535584746354, "grad_norm": 0.37398409843444824, "learning_rate": 0.00015923783776758212, "loss": 1.3423, "step": 15697 }, { "epoch": 0.2039883503913794, "grad_norm": 0.35514935851097107, "learning_rate": 0.00015923523830567072, "loss": 1.3234, "step": 15698 }, { "epoch": 0.2040013449352953, "grad_norm": 0.3922223746776581, "learning_rate": 0.00015923263884375934, "loss": 1.5144, "step": 15699 }, { "epoch": 0.20401433947921116, "grad_norm": 0.4430294632911682, "learning_rate": 0.00015923003938184797, "loss": 1.5148, "step": 15700 }, { "epoch": 0.20402733402312703, "grad_norm": 0.4479601979255676, "learning_rate": 0.0001592274399199366, "loss": 1.49, "step": 15701 }, { "epoch": 0.2040403285670429, "grad_norm": 0.3259592056274414, "learning_rate": 0.0001592248404580252, "loss": 1.3565, "step": 15702 }, { "epoch": 0.20405332311095878, "grad_norm": 0.44806450605392456, "learning_rate": 0.0001592222409961138, "loss": 1.4721, "step": 15703 }, { "epoch": 0.20406631765487465, "grad_norm": 0.4268580973148346, "learning_rate": 0.00015921964153420244, "loss": 1.5731, "step": 15704 }, { "epoch": 0.20407931219879052, "grad_norm": 0.43186575174331665, "learning_rate": 0.00015921704207229104, "loss": 1.5134, "step": 15705 }, { "epoch": 0.2040923067427064, "grad_norm": 0.40926510095596313, "learning_rate": 0.00015921444261037966, "loss": 1.3174, "step": 15706 }, { "epoch": 0.20410530128662227, "grad_norm": 0.4243534207344055, "learning_rate": 0.00015921184314846826, "loss": 1.3368, "step": 15707 }, { "epoch": 0.20411829583053814, "grad_norm": 0.4110370874404907, "learning_rate": 0.0001592092436865569, "loss": 1.4053, "step": 15708 }, { "epoch": 0.20413129037445402, "grad_norm": 0.4195098578929901, "learning_rate": 0.0001592066442246455, "loss": 1.4566, "step": 15709 }, { "epoch": 0.2041442849183699, "grad_norm": 0.29919928312301636, "learning_rate": 0.00015920404476273413, "loss": 1.4793, "step": 15710 }, { "epoch": 0.20415727946228576, "grad_norm": 0.45262691378593445, "learning_rate": 0.00015920144530082273, "loss": 1.3782, "step": 15711 }, { "epoch": 0.20417027400620164, "grad_norm": 0.5835135579109192, "learning_rate": 0.00015919884583891135, "loss": 1.5884, "step": 15712 }, { "epoch": 0.2041832685501175, "grad_norm": 0.4178208112716675, "learning_rate": 0.00015919624637699998, "loss": 1.4329, "step": 15713 }, { "epoch": 0.20419626309403338, "grad_norm": 0.2904089689254761, "learning_rate": 0.00015919364691508857, "loss": 1.2274, "step": 15714 }, { "epoch": 0.20420925763794925, "grad_norm": 0.4081857204437256, "learning_rate": 0.0001591910474531772, "loss": 1.4441, "step": 15715 }, { "epoch": 0.20422225218186513, "grad_norm": 0.36956870555877686, "learning_rate": 0.00015918844799126582, "loss": 1.3105, "step": 15716 }, { "epoch": 0.204235246725781, "grad_norm": 0.35003021359443665, "learning_rate": 0.00015918584852935442, "loss": 1.4961, "step": 15717 }, { "epoch": 0.20424824126969687, "grad_norm": 0.3857676088809967, "learning_rate": 0.00015918324906744305, "loss": 1.2679, "step": 15718 }, { "epoch": 0.20426123581361275, "grad_norm": 0.3591368496417999, "learning_rate": 0.00015918064960553167, "loss": 1.4774, "step": 15719 }, { "epoch": 0.20427423035752862, "grad_norm": 0.4455430209636688, "learning_rate": 0.0001591780501436203, "loss": 1.5148, "step": 15720 }, { "epoch": 0.20428722490144452, "grad_norm": 0.32895219326019287, "learning_rate": 0.0001591754506817089, "loss": 1.2892, "step": 15721 }, { "epoch": 0.2043002194453604, "grad_norm": 0.35661959648132324, "learning_rate": 0.00015917285121979752, "loss": 1.4415, "step": 15722 }, { "epoch": 0.20431321398927627, "grad_norm": 0.42002347111701965, "learning_rate": 0.00015917025175788614, "loss": 1.6175, "step": 15723 }, { "epoch": 0.20432620853319214, "grad_norm": 0.4138714373111725, "learning_rate": 0.00015916765229597474, "loss": 1.586, "step": 15724 }, { "epoch": 0.204339203077108, "grad_norm": 0.36805209517478943, "learning_rate": 0.00015916505283406336, "loss": 1.2892, "step": 15725 }, { "epoch": 0.20435219762102388, "grad_norm": 0.41735419631004333, "learning_rate": 0.00015916245337215196, "loss": 1.4875, "step": 15726 }, { "epoch": 0.20436519216493976, "grad_norm": 0.28439757227897644, "learning_rate": 0.00015915985391024058, "loss": 1.5228, "step": 15727 }, { "epoch": 0.20437818670885563, "grad_norm": 0.400066077709198, "learning_rate": 0.0001591572544483292, "loss": 1.4091, "step": 15728 }, { "epoch": 0.2043911812527715, "grad_norm": 0.41826218366622925, "learning_rate": 0.0001591546549864178, "loss": 1.3983, "step": 15729 }, { "epoch": 0.20440417579668738, "grad_norm": 0.3373241126537323, "learning_rate": 0.00015915205552450643, "loss": 1.4874, "step": 15730 }, { "epoch": 0.20441717034060325, "grad_norm": 0.4516903758049011, "learning_rate": 0.00015914945606259505, "loss": 1.4143, "step": 15731 }, { "epoch": 0.20443016488451912, "grad_norm": 0.456332802772522, "learning_rate": 0.00015914685660068368, "loss": 1.5287, "step": 15732 }, { "epoch": 0.204443159428435, "grad_norm": 0.5830400586128235, "learning_rate": 0.00015914425713877228, "loss": 1.7161, "step": 15733 }, { "epoch": 0.20445615397235087, "grad_norm": 0.4263465404510498, "learning_rate": 0.0001591416576768609, "loss": 1.5081, "step": 15734 }, { "epoch": 0.20446914851626674, "grad_norm": 0.4236100912094116, "learning_rate": 0.00015913905821494953, "loss": 1.2357, "step": 15735 }, { "epoch": 0.20448214306018261, "grad_norm": 0.36477360129356384, "learning_rate": 0.00015913645875303812, "loss": 1.3883, "step": 15736 }, { "epoch": 0.2044951376040985, "grad_norm": 0.44050678610801697, "learning_rate": 0.00015913385929112675, "loss": 1.4581, "step": 15737 }, { "epoch": 0.20450813214801436, "grad_norm": 0.3738628029823303, "learning_rate": 0.00015913125982921534, "loss": 1.4019, "step": 15738 }, { "epoch": 0.20452112669193023, "grad_norm": 0.4606102406978607, "learning_rate": 0.000159128660367304, "loss": 1.5841, "step": 15739 }, { "epoch": 0.2045341212358461, "grad_norm": 0.3182002902030945, "learning_rate": 0.0001591260609053926, "loss": 1.65, "step": 15740 }, { "epoch": 0.20454711577976198, "grad_norm": 0.3573318123817444, "learning_rate": 0.0001591234614434812, "loss": 1.2803, "step": 15741 }, { "epoch": 0.20456011032367785, "grad_norm": 0.30597618222236633, "learning_rate": 0.00015912086198156982, "loss": 1.2344, "step": 15742 }, { "epoch": 0.20457310486759372, "grad_norm": 0.41995641589164734, "learning_rate": 0.00015911826251965844, "loss": 1.2867, "step": 15743 }, { "epoch": 0.2045860994115096, "grad_norm": 0.41299331188201904, "learning_rate": 0.00015911566305774706, "loss": 1.403, "step": 15744 }, { "epoch": 0.20459909395542547, "grad_norm": 0.4078940451145172, "learning_rate": 0.00015911306359583566, "loss": 1.3966, "step": 15745 }, { "epoch": 0.20461208849934134, "grad_norm": 0.46846461296081543, "learning_rate": 0.0001591104641339243, "loss": 1.2318, "step": 15746 }, { "epoch": 0.20462508304325722, "grad_norm": 0.46171319484710693, "learning_rate": 0.0001591078646720129, "loss": 1.6357, "step": 15747 }, { "epoch": 0.2046380775871731, "grad_norm": 0.3639085590839386, "learning_rate": 0.0001591052652101015, "loss": 1.5535, "step": 15748 }, { "epoch": 0.20465107213108896, "grad_norm": 0.4720836579799652, "learning_rate": 0.00015910266574819013, "loss": 1.4096, "step": 15749 }, { "epoch": 0.20466406667500484, "grad_norm": 0.45323488116264343, "learning_rate": 0.00015910006628627873, "loss": 1.3315, "step": 15750 }, { "epoch": 0.2046770612189207, "grad_norm": 0.46483924984931946, "learning_rate": 0.00015909746682436738, "loss": 1.5884, "step": 15751 }, { "epoch": 0.20469005576283658, "grad_norm": 0.42523857951164246, "learning_rate": 0.00015909486736245598, "loss": 1.4434, "step": 15752 }, { "epoch": 0.20470305030675245, "grad_norm": 0.3048466742038727, "learning_rate": 0.00015909226790054458, "loss": 1.4924, "step": 15753 }, { "epoch": 0.20471604485066833, "grad_norm": 0.5041714310646057, "learning_rate": 0.00015908966843863323, "loss": 1.5424, "step": 15754 }, { "epoch": 0.2047290393945842, "grad_norm": 0.4051111936569214, "learning_rate": 0.00015908706897672183, "loss": 1.3879, "step": 15755 }, { "epoch": 0.20474203393850007, "grad_norm": 0.44219812750816345, "learning_rate": 0.00015908446951481045, "loss": 1.3498, "step": 15756 }, { "epoch": 0.20475502848241595, "grad_norm": 0.28013938665390015, "learning_rate": 0.00015908187005289905, "loss": 1.1812, "step": 15757 }, { "epoch": 0.20476802302633182, "grad_norm": 0.3454078733921051, "learning_rate": 0.00015907927059098767, "loss": 1.4764, "step": 15758 }, { "epoch": 0.2047810175702477, "grad_norm": 0.362848699092865, "learning_rate": 0.0001590766711290763, "loss": 1.3763, "step": 15759 }, { "epoch": 0.20479401211416356, "grad_norm": 0.40795400738716125, "learning_rate": 0.0001590740716671649, "loss": 1.4743, "step": 15760 }, { "epoch": 0.20480700665807944, "grad_norm": 0.34853318333625793, "learning_rate": 0.00015907147220525352, "loss": 1.2877, "step": 15761 }, { "epoch": 0.2048200012019953, "grad_norm": 0.4030263125896454, "learning_rate": 0.00015906887274334214, "loss": 1.46, "step": 15762 }, { "epoch": 0.20483299574591118, "grad_norm": 0.5251719355583191, "learning_rate": 0.00015906627328143077, "loss": 1.4268, "step": 15763 }, { "epoch": 0.20484599028982706, "grad_norm": 0.3947938084602356, "learning_rate": 0.00015906367381951936, "loss": 1.4655, "step": 15764 }, { "epoch": 0.20485898483374293, "grad_norm": 0.447459876537323, "learning_rate": 0.00015906107435760796, "loss": 1.321, "step": 15765 }, { "epoch": 0.2048719793776588, "grad_norm": 0.5051454901695251, "learning_rate": 0.00015905847489569661, "loss": 1.5749, "step": 15766 }, { "epoch": 0.20488497392157468, "grad_norm": 0.4089381992816925, "learning_rate": 0.0001590558754337852, "loss": 1.4098, "step": 15767 }, { "epoch": 0.20489796846549055, "grad_norm": 0.36150145530700684, "learning_rate": 0.00015905327597187384, "loss": 1.3307, "step": 15768 }, { "epoch": 0.20491096300940642, "grad_norm": 0.45095813274383545, "learning_rate": 0.00015905067650996243, "loss": 1.3518, "step": 15769 }, { "epoch": 0.2049239575533223, "grad_norm": 0.28538087010383606, "learning_rate": 0.00015904807704805106, "loss": 1.2048, "step": 15770 }, { "epoch": 0.20493695209723817, "grad_norm": 0.3877955675125122, "learning_rate": 0.00015904547758613968, "loss": 1.3066, "step": 15771 }, { "epoch": 0.20494994664115404, "grad_norm": 0.36222320795059204, "learning_rate": 0.00015904287812422828, "loss": 1.3091, "step": 15772 }, { "epoch": 0.2049629411850699, "grad_norm": 0.3057667315006256, "learning_rate": 0.0001590402786623169, "loss": 1.6036, "step": 15773 }, { "epoch": 0.2049759357289858, "grad_norm": 0.3966691792011261, "learning_rate": 0.00015903767920040553, "loss": 1.6149, "step": 15774 }, { "epoch": 0.20498893027290166, "grad_norm": 0.6498134136199951, "learning_rate": 0.00015903507973849415, "loss": 1.3092, "step": 15775 }, { "epoch": 0.20500192481681753, "grad_norm": 0.32731446623802185, "learning_rate": 0.00015903248027658275, "loss": 1.2598, "step": 15776 }, { "epoch": 0.2050149193607334, "grad_norm": 0.3012624979019165, "learning_rate": 0.00015902988081467137, "loss": 1.3868, "step": 15777 }, { "epoch": 0.20502791390464928, "grad_norm": 0.5168237686157227, "learning_rate": 0.00015902728135276, "loss": 1.3713, "step": 15778 }, { "epoch": 0.20504090844856515, "grad_norm": 0.4670848250389099, "learning_rate": 0.0001590246818908486, "loss": 1.5733, "step": 15779 }, { "epoch": 0.20505390299248102, "grad_norm": 0.3513728678226471, "learning_rate": 0.00015902208242893722, "loss": 1.3412, "step": 15780 }, { "epoch": 0.2050668975363969, "grad_norm": 0.473827600479126, "learning_rate": 0.00015901948296702582, "loss": 1.3986, "step": 15781 }, { "epoch": 0.20507989208031277, "grad_norm": 0.40078842639923096, "learning_rate": 0.00015901688350511444, "loss": 1.2068, "step": 15782 }, { "epoch": 0.20509288662422864, "grad_norm": 0.36712297797203064, "learning_rate": 0.00015901428404320307, "loss": 1.3649, "step": 15783 }, { "epoch": 0.20510588116814452, "grad_norm": 0.44345995783805847, "learning_rate": 0.00015901168458129166, "loss": 1.6381, "step": 15784 }, { "epoch": 0.2051188757120604, "grad_norm": 0.391267865896225, "learning_rate": 0.0001590090851193803, "loss": 1.4177, "step": 15785 }, { "epoch": 0.20513187025597626, "grad_norm": 0.2519415020942688, "learning_rate": 0.00015900648565746891, "loss": 1.4768, "step": 15786 }, { "epoch": 0.20514486479989213, "grad_norm": 0.37938761711120605, "learning_rate": 0.00015900388619555754, "loss": 1.4536, "step": 15787 }, { "epoch": 0.205157859343808, "grad_norm": 0.3375428020954132, "learning_rate": 0.00015900128673364614, "loss": 1.4751, "step": 15788 }, { "epoch": 0.20517085388772388, "grad_norm": 0.34928786754608154, "learning_rate": 0.00015899868727173476, "loss": 1.1602, "step": 15789 }, { "epoch": 0.20518384843163975, "grad_norm": 0.3935679495334625, "learning_rate": 0.00015899608780982338, "loss": 1.7161, "step": 15790 }, { "epoch": 0.20519684297555563, "grad_norm": 0.2962068021297455, "learning_rate": 0.00015899348834791198, "loss": 1.4203, "step": 15791 }, { "epoch": 0.2052098375194715, "grad_norm": 0.3858349621295929, "learning_rate": 0.0001589908888860006, "loss": 1.364, "step": 15792 }, { "epoch": 0.20522283206338737, "grad_norm": 0.39436033368110657, "learning_rate": 0.00015898828942408923, "loss": 1.5025, "step": 15793 }, { "epoch": 0.20523582660730325, "grad_norm": 0.3853914737701416, "learning_rate": 0.00015898568996217783, "loss": 1.5118, "step": 15794 }, { "epoch": 0.20524882115121912, "grad_norm": 0.48946279287338257, "learning_rate": 0.00015898309050026645, "loss": 1.4366, "step": 15795 }, { "epoch": 0.205261815695135, "grad_norm": 0.4551539719104767, "learning_rate": 0.00015898049103835505, "loss": 1.4611, "step": 15796 }, { "epoch": 0.2052748102390509, "grad_norm": 0.43925783038139343, "learning_rate": 0.0001589778915764437, "loss": 1.496, "step": 15797 }, { "epoch": 0.20528780478296677, "grad_norm": 0.37782543897628784, "learning_rate": 0.0001589752921145323, "loss": 1.2591, "step": 15798 }, { "epoch": 0.20530079932688264, "grad_norm": 0.45286956429481506, "learning_rate": 0.00015897269265262092, "loss": 1.469, "step": 15799 }, { "epoch": 0.2053137938707985, "grad_norm": 0.3681240379810333, "learning_rate": 0.00015897009319070952, "loss": 1.3431, "step": 15800 }, { "epoch": 0.20532678841471438, "grad_norm": 0.3992835283279419, "learning_rate": 0.00015896749372879815, "loss": 1.5222, "step": 15801 }, { "epoch": 0.20533978295863026, "grad_norm": 0.34749746322631836, "learning_rate": 0.00015896489426688677, "loss": 1.3768, "step": 15802 }, { "epoch": 0.20535277750254613, "grad_norm": 0.49276745319366455, "learning_rate": 0.00015896229480497537, "loss": 1.5404, "step": 15803 }, { "epoch": 0.205365772046462, "grad_norm": 0.4035680592060089, "learning_rate": 0.000158959695343064, "loss": 1.3819, "step": 15804 }, { "epoch": 0.20537876659037788, "grad_norm": 0.38701581954956055, "learning_rate": 0.00015895709588115262, "loss": 1.4325, "step": 15805 }, { "epoch": 0.20539176113429375, "grad_norm": 0.3915572464466095, "learning_rate": 0.00015895449641924124, "loss": 1.2669, "step": 15806 }, { "epoch": 0.20540475567820962, "grad_norm": 0.5313198566436768, "learning_rate": 0.00015895189695732984, "loss": 1.442, "step": 15807 }, { "epoch": 0.2054177502221255, "grad_norm": 0.3440777659416199, "learning_rate": 0.00015894929749541844, "loss": 1.2321, "step": 15808 }, { "epoch": 0.20543074476604137, "grad_norm": 0.42358773946762085, "learning_rate": 0.0001589466980335071, "loss": 1.4623, "step": 15809 }, { "epoch": 0.20544373930995724, "grad_norm": 0.3406578600406647, "learning_rate": 0.00015894409857159568, "loss": 1.3457, "step": 15810 }, { "epoch": 0.2054567338538731, "grad_norm": 0.32047003507614136, "learning_rate": 0.0001589414991096843, "loss": 1.2605, "step": 15811 }, { "epoch": 0.205469728397789, "grad_norm": 0.44259655475616455, "learning_rate": 0.0001589388996477729, "loss": 1.2508, "step": 15812 }, { "epoch": 0.20548272294170486, "grad_norm": 0.41553908586502075, "learning_rate": 0.00015893630018586153, "loss": 1.5704, "step": 15813 }, { "epoch": 0.20549571748562073, "grad_norm": 0.3841545879840851, "learning_rate": 0.00015893370072395016, "loss": 1.4083, "step": 15814 }, { "epoch": 0.2055087120295366, "grad_norm": 0.42117422819137573, "learning_rate": 0.00015893110126203875, "loss": 1.3001, "step": 15815 }, { "epoch": 0.20552170657345248, "grad_norm": 0.4432094395160675, "learning_rate": 0.00015892850180012738, "loss": 1.4535, "step": 15816 }, { "epoch": 0.20553470111736835, "grad_norm": 0.36133062839508057, "learning_rate": 0.000158925902338216, "loss": 1.36, "step": 15817 }, { "epoch": 0.20554769566128422, "grad_norm": 0.3729369044303894, "learning_rate": 0.00015892330287630463, "loss": 1.4188, "step": 15818 }, { "epoch": 0.2055606902052001, "grad_norm": 0.3679851293563843, "learning_rate": 0.00015892070341439322, "loss": 1.4607, "step": 15819 }, { "epoch": 0.20557368474911597, "grad_norm": 0.4136151969432831, "learning_rate": 0.00015891810395248182, "loss": 1.459, "step": 15820 }, { "epoch": 0.20558667929303184, "grad_norm": 0.31538987159729004, "learning_rate": 0.00015891550449057047, "loss": 1.3662, "step": 15821 }, { "epoch": 0.20559967383694772, "grad_norm": 0.37139299511909485, "learning_rate": 0.00015891290502865907, "loss": 1.3113, "step": 15822 }, { "epoch": 0.2056126683808636, "grad_norm": 0.39008355140686035, "learning_rate": 0.0001589103055667477, "loss": 1.5512, "step": 15823 }, { "epoch": 0.20562566292477946, "grad_norm": 0.3811801075935364, "learning_rate": 0.0001589077061048363, "loss": 1.5279, "step": 15824 }, { "epoch": 0.20563865746869534, "grad_norm": 0.42425671219825745, "learning_rate": 0.00015890510664292492, "loss": 1.223, "step": 15825 }, { "epoch": 0.2056516520126112, "grad_norm": 0.3898901641368866, "learning_rate": 0.00015890250718101354, "loss": 1.3233, "step": 15826 }, { "epoch": 0.20566464655652708, "grad_norm": 0.47206342220306396, "learning_rate": 0.00015889990771910214, "loss": 1.375, "step": 15827 }, { "epoch": 0.20567764110044295, "grad_norm": 0.383807897567749, "learning_rate": 0.0001588973082571908, "loss": 1.6269, "step": 15828 }, { "epoch": 0.20569063564435883, "grad_norm": 0.4191513955593109, "learning_rate": 0.0001588947087952794, "loss": 1.4365, "step": 15829 }, { "epoch": 0.2057036301882747, "grad_norm": 0.3272859454154968, "learning_rate": 0.000158892109333368, "loss": 1.3244, "step": 15830 }, { "epoch": 0.20571662473219057, "grad_norm": 0.32198211550712585, "learning_rate": 0.0001588895098714566, "loss": 1.2497, "step": 15831 }, { "epoch": 0.20572961927610645, "grad_norm": 0.2837758958339691, "learning_rate": 0.00015888691040954523, "loss": 1.2955, "step": 15832 }, { "epoch": 0.20574261382002232, "grad_norm": 0.40911129117012024, "learning_rate": 0.00015888431094763386, "loss": 1.6714, "step": 15833 }, { "epoch": 0.2057556083639382, "grad_norm": 0.4043314754962921, "learning_rate": 0.00015888171148572246, "loss": 1.2847, "step": 15834 }, { "epoch": 0.20576860290785406, "grad_norm": 0.4200332462787628, "learning_rate": 0.00015887911202381108, "loss": 1.3008, "step": 15835 }, { "epoch": 0.20578159745176994, "grad_norm": 0.3216518759727478, "learning_rate": 0.0001588765125618997, "loss": 1.2906, "step": 15836 }, { "epoch": 0.2057945919956858, "grad_norm": 0.2856453061103821, "learning_rate": 0.0001588739130999883, "loss": 1.4698, "step": 15837 }, { "epoch": 0.20580758653960168, "grad_norm": 0.35720422863960266, "learning_rate": 0.00015887131363807693, "loss": 1.1851, "step": 15838 }, { "epoch": 0.20582058108351756, "grad_norm": 0.44327622652053833, "learning_rate": 0.00015886871417616552, "loss": 1.6635, "step": 15839 }, { "epoch": 0.20583357562743343, "grad_norm": 0.4015038013458252, "learning_rate": 0.00015886611471425418, "loss": 1.4849, "step": 15840 }, { "epoch": 0.2058465701713493, "grad_norm": 0.29389163851737976, "learning_rate": 0.00015886351525234277, "loss": 1.3409, "step": 15841 }, { "epoch": 0.20585956471526518, "grad_norm": 0.39749184250831604, "learning_rate": 0.0001588609157904314, "loss": 1.4076, "step": 15842 }, { "epoch": 0.20587255925918105, "grad_norm": 0.7407920360565186, "learning_rate": 0.00015885831632852, "loss": 1.5349, "step": 15843 }, { "epoch": 0.20588555380309692, "grad_norm": 0.3987780511379242, "learning_rate": 0.00015885571686660862, "loss": 1.5439, "step": 15844 }, { "epoch": 0.2058985483470128, "grad_norm": 0.46152356266975403, "learning_rate": 0.00015885311740469724, "loss": 1.5296, "step": 15845 }, { "epoch": 0.20591154289092867, "grad_norm": 0.38446956872940063, "learning_rate": 0.00015885051794278584, "loss": 1.4374, "step": 15846 }, { "epoch": 0.20592453743484454, "grad_norm": 0.5016198754310608, "learning_rate": 0.00015884791848087447, "loss": 1.4926, "step": 15847 }, { "epoch": 0.2059375319787604, "grad_norm": 0.48141491413116455, "learning_rate": 0.0001588453190189631, "loss": 1.3809, "step": 15848 }, { "epoch": 0.20595052652267629, "grad_norm": 0.3519539535045624, "learning_rate": 0.0001588427195570517, "loss": 1.348, "step": 15849 }, { "epoch": 0.20596352106659216, "grad_norm": 0.37036508321762085, "learning_rate": 0.0001588401200951403, "loss": 1.3443, "step": 15850 }, { "epoch": 0.20597651561050803, "grad_norm": 0.47400668263435364, "learning_rate": 0.0001588375206332289, "loss": 1.6047, "step": 15851 }, { "epoch": 0.2059895101544239, "grad_norm": 0.360757052898407, "learning_rate": 0.00015883492117131756, "loss": 1.4429, "step": 15852 }, { "epoch": 0.20600250469833978, "grad_norm": 0.3367481827735901, "learning_rate": 0.00015883232170940616, "loss": 1.3385, "step": 15853 }, { "epoch": 0.20601549924225565, "grad_norm": 0.3506537675857544, "learning_rate": 0.00015882972224749478, "loss": 1.3692, "step": 15854 }, { "epoch": 0.20602849378617152, "grad_norm": 0.40682289004325867, "learning_rate": 0.00015882712278558338, "loss": 1.4108, "step": 15855 }, { "epoch": 0.2060414883300874, "grad_norm": 0.4694855213165283, "learning_rate": 0.000158824523323672, "loss": 1.5517, "step": 15856 }, { "epoch": 0.20605448287400327, "grad_norm": 0.346362441778183, "learning_rate": 0.00015882192386176063, "loss": 1.3311, "step": 15857 }, { "epoch": 0.20606747741791914, "grad_norm": 0.26887205243110657, "learning_rate": 0.00015881932439984923, "loss": 1.438, "step": 15858 }, { "epoch": 0.20608047196183502, "grad_norm": 0.3908768892288208, "learning_rate": 0.00015881672493793785, "loss": 1.3651, "step": 15859 }, { "epoch": 0.2060934665057509, "grad_norm": 0.506881833076477, "learning_rate": 0.00015881412547602647, "loss": 1.4457, "step": 15860 }, { "epoch": 0.20610646104966676, "grad_norm": 0.41534358263015747, "learning_rate": 0.0001588115260141151, "loss": 1.4301, "step": 15861 }, { "epoch": 0.20611945559358263, "grad_norm": 0.3881515860557556, "learning_rate": 0.0001588089265522037, "loss": 1.3328, "step": 15862 }, { "epoch": 0.2061324501374985, "grad_norm": 0.4791701138019562, "learning_rate": 0.0001588063270902923, "loss": 1.4697, "step": 15863 }, { "epoch": 0.20614544468141438, "grad_norm": 0.4101627767086029, "learning_rate": 0.00015880372762838095, "loss": 1.3185, "step": 15864 }, { "epoch": 0.20615843922533025, "grad_norm": 0.4083032011985779, "learning_rate": 0.00015880112816646954, "loss": 1.5074, "step": 15865 }, { "epoch": 0.20617143376924613, "grad_norm": 0.39344367384910583, "learning_rate": 0.00015879852870455817, "loss": 1.2071, "step": 15866 }, { "epoch": 0.206184428313162, "grad_norm": 0.35956549644470215, "learning_rate": 0.0001587959292426468, "loss": 1.4655, "step": 15867 }, { "epoch": 0.20619742285707787, "grad_norm": 0.3508695662021637, "learning_rate": 0.0001587933297807354, "loss": 1.4405, "step": 15868 }, { "epoch": 0.20621041740099375, "grad_norm": 0.4173499345779419, "learning_rate": 0.00015879073031882401, "loss": 1.3928, "step": 15869 }, { "epoch": 0.20622341194490962, "grad_norm": 0.3480115532875061, "learning_rate": 0.0001587881308569126, "loss": 1.31, "step": 15870 }, { "epoch": 0.2062364064888255, "grad_norm": 0.42981192469596863, "learning_rate": 0.00015878553139500126, "loss": 1.4739, "step": 15871 }, { "epoch": 0.20624940103274136, "grad_norm": 0.34881457686424255, "learning_rate": 0.00015878293193308986, "loss": 1.3708, "step": 15872 }, { "epoch": 0.20626239557665726, "grad_norm": 0.35697826743125916, "learning_rate": 0.00015878033247117848, "loss": 1.5492, "step": 15873 }, { "epoch": 0.20627539012057314, "grad_norm": 0.315758615732193, "learning_rate": 0.00015877773300926708, "loss": 1.4323, "step": 15874 }, { "epoch": 0.206288384664489, "grad_norm": 0.4722059667110443, "learning_rate": 0.0001587751335473557, "loss": 1.5743, "step": 15875 }, { "epoch": 0.20630137920840488, "grad_norm": 0.42276933789253235, "learning_rate": 0.00015877253408544433, "loss": 1.3927, "step": 15876 }, { "epoch": 0.20631437375232076, "grad_norm": 0.2741607129573822, "learning_rate": 0.00015876993462353293, "loss": 1.2414, "step": 15877 }, { "epoch": 0.20632736829623663, "grad_norm": 0.3732401430606842, "learning_rate": 0.00015876733516162155, "loss": 1.3106, "step": 15878 }, { "epoch": 0.2063403628401525, "grad_norm": 0.35604941844940186, "learning_rate": 0.00015876473569971018, "loss": 1.4573, "step": 15879 }, { "epoch": 0.20635335738406838, "grad_norm": 0.42389288544654846, "learning_rate": 0.00015876213623779877, "loss": 1.4561, "step": 15880 }, { "epoch": 0.20636635192798425, "grad_norm": 0.36977311968803406, "learning_rate": 0.0001587595367758874, "loss": 1.4669, "step": 15881 }, { "epoch": 0.20637934647190012, "grad_norm": 0.3273666203022003, "learning_rate": 0.000158756937313976, "loss": 1.2684, "step": 15882 }, { "epoch": 0.206392341015816, "grad_norm": 0.49288409948349, "learning_rate": 0.00015875433785206465, "loss": 1.4825, "step": 15883 }, { "epoch": 0.20640533555973187, "grad_norm": 0.4416276514530182, "learning_rate": 0.00015875173839015325, "loss": 1.5172, "step": 15884 }, { "epoch": 0.20641833010364774, "grad_norm": 0.3795471787452698, "learning_rate": 0.00015874913892824187, "loss": 1.5336, "step": 15885 }, { "epoch": 0.2064313246475636, "grad_norm": 0.3744145929813385, "learning_rate": 0.00015874653946633047, "loss": 1.4966, "step": 15886 }, { "epoch": 0.2064443191914795, "grad_norm": 0.41878148913383484, "learning_rate": 0.0001587439400044191, "loss": 1.4001, "step": 15887 }, { "epoch": 0.20645731373539536, "grad_norm": 0.38130518794059753, "learning_rate": 0.00015874134054250772, "loss": 1.4254, "step": 15888 }, { "epoch": 0.20647030827931123, "grad_norm": 0.36912328004837036, "learning_rate": 0.00015873874108059631, "loss": 1.3318, "step": 15889 }, { "epoch": 0.2064833028232271, "grad_norm": 0.34443825483322144, "learning_rate": 0.00015873614161868494, "loss": 1.368, "step": 15890 }, { "epoch": 0.20649629736714298, "grad_norm": 0.4547821283340454, "learning_rate": 0.00015873354215677356, "loss": 1.2034, "step": 15891 }, { "epoch": 0.20650929191105885, "grad_norm": 0.36991581320762634, "learning_rate": 0.00015873094269486216, "loss": 1.3629, "step": 15892 }, { "epoch": 0.20652228645497472, "grad_norm": 0.4537360668182373, "learning_rate": 0.00015872834323295078, "loss": 1.3154, "step": 15893 }, { "epoch": 0.2065352809988906, "grad_norm": 0.37049147486686707, "learning_rate": 0.00015872574377103938, "loss": 1.2739, "step": 15894 }, { "epoch": 0.20654827554280647, "grad_norm": 0.39780426025390625, "learning_rate": 0.00015872314430912803, "loss": 1.5707, "step": 15895 }, { "epoch": 0.20656127008672234, "grad_norm": 0.34157872200012207, "learning_rate": 0.00015872054484721663, "loss": 1.2745, "step": 15896 }, { "epoch": 0.20657426463063822, "grad_norm": 0.40147995948791504, "learning_rate": 0.00015871794538530526, "loss": 1.1553, "step": 15897 }, { "epoch": 0.2065872591745541, "grad_norm": 0.6175597310066223, "learning_rate": 0.00015871534592339385, "loss": 1.4381, "step": 15898 }, { "epoch": 0.20660025371846996, "grad_norm": 0.3995334208011627, "learning_rate": 0.00015871274646148248, "loss": 1.535, "step": 15899 }, { "epoch": 0.20661324826238583, "grad_norm": 0.4335523247718811, "learning_rate": 0.0001587101469995711, "loss": 1.6232, "step": 15900 }, { "epoch": 0.2066262428063017, "grad_norm": 0.41150805354118347, "learning_rate": 0.0001587075475376597, "loss": 1.2491, "step": 15901 }, { "epoch": 0.20663923735021758, "grad_norm": 0.4173851013183594, "learning_rate": 0.00015870494807574835, "loss": 1.5438, "step": 15902 }, { "epoch": 0.20665223189413345, "grad_norm": 0.38663020730018616, "learning_rate": 0.00015870234861383695, "loss": 1.2649, "step": 15903 }, { "epoch": 0.20666522643804933, "grad_norm": 0.28352129459381104, "learning_rate": 0.00015869974915192555, "loss": 1.323, "step": 15904 }, { "epoch": 0.2066782209819652, "grad_norm": 0.4399521052837372, "learning_rate": 0.00015869714969001417, "loss": 1.3059, "step": 15905 }, { "epoch": 0.20669121552588107, "grad_norm": 0.3807695806026459, "learning_rate": 0.0001586945502281028, "loss": 1.382, "step": 15906 }, { "epoch": 0.20670421006979695, "grad_norm": 0.36949726939201355, "learning_rate": 0.00015869195076619142, "loss": 1.5614, "step": 15907 }, { "epoch": 0.20671720461371282, "grad_norm": 0.3936013877391815, "learning_rate": 0.00015868935130428002, "loss": 1.4939, "step": 15908 }, { "epoch": 0.2067301991576287, "grad_norm": 0.4262370765209198, "learning_rate": 0.00015868675184236864, "loss": 1.3336, "step": 15909 }, { "epoch": 0.20674319370154456, "grad_norm": 0.3740140497684479, "learning_rate": 0.00015868415238045727, "loss": 1.2651, "step": 15910 }, { "epoch": 0.20675618824546044, "grad_norm": 0.3990071713924408, "learning_rate": 0.00015868155291854586, "loss": 1.2441, "step": 15911 }, { "epoch": 0.2067691827893763, "grad_norm": 0.30727601051330566, "learning_rate": 0.0001586789534566345, "loss": 1.1888, "step": 15912 }, { "epoch": 0.20678217733329218, "grad_norm": 0.38075730204582214, "learning_rate": 0.00015867635399472308, "loss": 1.4345, "step": 15913 }, { "epoch": 0.20679517187720806, "grad_norm": 0.43977880477905273, "learning_rate": 0.00015867375453281174, "loss": 1.4487, "step": 15914 }, { "epoch": 0.20680816642112393, "grad_norm": 0.45685312151908875, "learning_rate": 0.00015867115507090033, "loss": 1.4278, "step": 15915 }, { "epoch": 0.2068211609650398, "grad_norm": 0.42141231894493103, "learning_rate": 0.00015866855560898896, "loss": 1.3266, "step": 15916 }, { "epoch": 0.20683415550895567, "grad_norm": 0.4219370484352112, "learning_rate": 0.00015866595614707756, "loss": 1.5276, "step": 15917 }, { "epoch": 0.20684715005287155, "grad_norm": 0.3282308876514435, "learning_rate": 0.00015866335668516618, "loss": 1.315, "step": 15918 }, { "epoch": 0.20686014459678742, "grad_norm": 0.3735176920890808, "learning_rate": 0.0001586607572232548, "loss": 1.3812, "step": 15919 }, { "epoch": 0.2068731391407033, "grad_norm": 0.49324479699134827, "learning_rate": 0.0001586581577613434, "loss": 1.313, "step": 15920 }, { "epoch": 0.20688613368461917, "grad_norm": 0.4210513234138489, "learning_rate": 0.00015865555829943203, "loss": 1.3932, "step": 15921 }, { "epoch": 0.20689912822853504, "grad_norm": 0.4304329752922058, "learning_rate": 0.00015865295883752065, "loss": 1.5712, "step": 15922 }, { "epoch": 0.2069121227724509, "grad_norm": 0.41186749935150146, "learning_rate": 0.00015865035937560925, "loss": 1.4385, "step": 15923 }, { "epoch": 0.20692511731636679, "grad_norm": 0.38800108432769775, "learning_rate": 0.00015864775991369787, "loss": 1.5389, "step": 15924 }, { "epoch": 0.20693811186028266, "grad_norm": 0.3718966841697693, "learning_rate": 0.00015864516045178647, "loss": 1.2611, "step": 15925 }, { "epoch": 0.20695110640419853, "grad_norm": 0.41373559832572937, "learning_rate": 0.00015864256098987512, "loss": 1.503, "step": 15926 }, { "epoch": 0.2069641009481144, "grad_norm": 0.47427499294281006, "learning_rate": 0.00015863996152796372, "loss": 1.5411, "step": 15927 }, { "epoch": 0.20697709549203028, "grad_norm": 0.42541879415512085, "learning_rate": 0.00015863736206605234, "loss": 1.4241, "step": 15928 }, { "epoch": 0.20699009003594615, "grad_norm": 0.4966329336166382, "learning_rate": 0.00015863476260414094, "loss": 1.5141, "step": 15929 }, { "epoch": 0.20700308457986202, "grad_norm": 0.5133181810379028, "learning_rate": 0.00015863216314222957, "loss": 1.4732, "step": 15930 }, { "epoch": 0.2070160791237779, "grad_norm": 0.42081692814826965, "learning_rate": 0.0001586295636803182, "loss": 1.6751, "step": 15931 }, { "epoch": 0.20702907366769377, "grad_norm": 0.43273743987083435, "learning_rate": 0.0001586269642184068, "loss": 1.3903, "step": 15932 }, { "epoch": 0.20704206821160964, "grad_norm": 0.43079596757888794, "learning_rate": 0.0001586243647564954, "loss": 1.4664, "step": 15933 }, { "epoch": 0.20705506275552552, "grad_norm": 0.4602683186531067, "learning_rate": 0.00015862176529458404, "loss": 1.3738, "step": 15934 }, { "epoch": 0.2070680572994414, "grad_norm": 0.2862103283405304, "learning_rate": 0.00015861916583267263, "loss": 1.6209, "step": 15935 }, { "epoch": 0.20708105184335726, "grad_norm": 0.4206918776035309, "learning_rate": 0.00015861656637076126, "loss": 1.6406, "step": 15936 }, { "epoch": 0.20709404638727313, "grad_norm": 0.3876684308052063, "learning_rate": 0.00015861396690884986, "loss": 1.3282, "step": 15937 }, { "epoch": 0.207107040931189, "grad_norm": 0.38095077872276306, "learning_rate": 0.0001586113674469385, "loss": 1.3787, "step": 15938 }, { "epoch": 0.20712003547510488, "grad_norm": 0.45426690578460693, "learning_rate": 0.0001586087679850271, "loss": 1.3792, "step": 15939 }, { "epoch": 0.20713303001902075, "grad_norm": 0.3864433765411377, "learning_rate": 0.00015860616852311573, "loss": 1.4522, "step": 15940 }, { "epoch": 0.20714602456293663, "grad_norm": 0.42905518412590027, "learning_rate": 0.00015860356906120435, "loss": 1.3308, "step": 15941 }, { "epoch": 0.2071590191068525, "grad_norm": 0.3632519543170929, "learning_rate": 0.00015860096959929295, "loss": 1.404, "step": 15942 }, { "epoch": 0.20717201365076837, "grad_norm": 0.33392533659935, "learning_rate": 0.00015859837013738158, "loss": 1.3958, "step": 15943 }, { "epoch": 0.20718500819468424, "grad_norm": 0.39345288276672363, "learning_rate": 0.00015859577067547017, "loss": 1.4429, "step": 15944 }, { "epoch": 0.20719800273860012, "grad_norm": 0.5228486061096191, "learning_rate": 0.00015859317121355882, "loss": 1.4701, "step": 15945 }, { "epoch": 0.207210997282516, "grad_norm": 0.38974788784980774, "learning_rate": 0.00015859057175164742, "loss": 1.4143, "step": 15946 }, { "epoch": 0.20722399182643186, "grad_norm": 0.33740234375, "learning_rate": 0.00015858797228973602, "loss": 1.5008, "step": 15947 }, { "epoch": 0.20723698637034774, "grad_norm": 0.47325247526168823, "learning_rate": 0.00015858537282782464, "loss": 1.4823, "step": 15948 }, { "epoch": 0.20724998091426364, "grad_norm": 0.4428384602069855, "learning_rate": 0.00015858277336591327, "loss": 1.3386, "step": 15949 }, { "epoch": 0.2072629754581795, "grad_norm": 0.4490267038345337, "learning_rate": 0.0001585801739040019, "loss": 1.4046, "step": 15950 }, { "epoch": 0.20727597000209538, "grad_norm": 0.3699183464050293, "learning_rate": 0.0001585775744420905, "loss": 1.2574, "step": 15951 }, { "epoch": 0.20728896454601126, "grad_norm": 0.4242355525493622, "learning_rate": 0.00015857497498017911, "loss": 1.3812, "step": 15952 }, { "epoch": 0.20730195908992713, "grad_norm": 0.4202848970890045, "learning_rate": 0.00015857237551826774, "loss": 1.2855, "step": 15953 }, { "epoch": 0.207314953633843, "grad_norm": 0.32425323128700256, "learning_rate": 0.00015856977605635634, "loss": 1.34, "step": 15954 }, { "epoch": 0.20732794817775888, "grad_norm": 0.4508119523525238, "learning_rate": 0.00015856717659444496, "loss": 1.4274, "step": 15955 }, { "epoch": 0.20734094272167475, "grad_norm": 0.3245319426059723, "learning_rate": 0.00015856457713253356, "loss": 1.0483, "step": 15956 }, { "epoch": 0.20735393726559062, "grad_norm": 0.41097545623779297, "learning_rate": 0.0001585619776706222, "loss": 1.3924, "step": 15957 }, { "epoch": 0.2073669318095065, "grad_norm": 0.4216390550136566, "learning_rate": 0.0001585593782087108, "loss": 1.4084, "step": 15958 }, { "epoch": 0.20737992635342237, "grad_norm": 0.5487633943557739, "learning_rate": 0.0001585567787467994, "loss": 1.5324, "step": 15959 }, { "epoch": 0.20739292089733824, "grad_norm": 0.4324282109737396, "learning_rate": 0.00015855417928488803, "loss": 1.5385, "step": 15960 }, { "epoch": 0.2074059154412541, "grad_norm": 0.4272029399871826, "learning_rate": 0.00015855157982297665, "loss": 1.5907, "step": 15961 }, { "epoch": 0.20741890998516999, "grad_norm": 0.43973782658576965, "learning_rate": 0.00015854898036106528, "loss": 1.6421, "step": 15962 }, { "epoch": 0.20743190452908586, "grad_norm": 0.5025953054428101, "learning_rate": 0.00015854638089915388, "loss": 1.5181, "step": 15963 }, { "epoch": 0.20744489907300173, "grad_norm": 0.43513232469558716, "learning_rate": 0.0001585437814372425, "loss": 1.4248, "step": 15964 }, { "epoch": 0.2074578936169176, "grad_norm": 0.4654023349285126, "learning_rate": 0.00015854118197533112, "loss": 1.4621, "step": 15965 }, { "epoch": 0.20747088816083348, "grad_norm": 0.34579113125801086, "learning_rate": 0.00015853858251341972, "loss": 1.5525, "step": 15966 }, { "epoch": 0.20748388270474935, "grad_norm": 0.38553041219711304, "learning_rate": 0.00015853598305150835, "loss": 1.3002, "step": 15967 }, { "epoch": 0.20749687724866522, "grad_norm": 0.3341819941997528, "learning_rate": 0.00015853338358959694, "loss": 1.5916, "step": 15968 }, { "epoch": 0.2075098717925811, "grad_norm": 0.4365738332271576, "learning_rate": 0.0001585307841276856, "loss": 1.4613, "step": 15969 }, { "epoch": 0.20752286633649697, "grad_norm": 0.30693507194519043, "learning_rate": 0.0001585281846657742, "loss": 1.3327, "step": 15970 }, { "epoch": 0.20753586088041284, "grad_norm": 0.6149924397468567, "learning_rate": 0.0001585255852038628, "loss": 1.3987, "step": 15971 }, { "epoch": 0.20754885542432872, "grad_norm": 0.3154333233833313, "learning_rate": 0.00015852298574195141, "loss": 1.3273, "step": 15972 }, { "epoch": 0.2075618499682446, "grad_norm": 0.3717316687107086, "learning_rate": 0.00015852038628004004, "loss": 1.3175, "step": 15973 }, { "epoch": 0.20757484451216046, "grad_norm": 0.3503427505493164, "learning_rate": 0.00015851778681812866, "loss": 1.6451, "step": 15974 }, { "epoch": 0.20758783905607633, "grad_norm": 0.4445754289627075, "learning_rate": 0.00015851518735621726, "loss": 1.4261, "step": 15975 }, { "epoch": 0.2076008335999922, "grad_norm": 0.48500898480415344, "learning_rate": 0.00015851258789430589, "loss": 1.3555, "step": 15976 }, { "epoch": 0.20761382814390808, "grad_norm": 0.4229230284690857, "learning_rate": 0.0001585099884323945, "loss": 1.4876, "step": 15977 }, { "epoch": 0.20762682268782395, "grad_norm": 0.3553781509399414, "learning_rate": 0.0001585073889704831, "loss": 1.2741, "step": 15978 }, { "epoch": 0.20763981723173983, "grad_norm": 0.35326969623565674, "learning_rate": 0.00015850478950857173, "loss": 1.6283, "step": 15979 }, { "epoch": 0.2076528117756557, "grad_norm": 0.27169209718704224, "learning_rate": 0.00015850219004666036, "loss": 1.5028, "step": 15980 }, { "epoch": 0.20766580631957157, "grad_norm": 0.38618168234825134, "learning_rate": 0.00015849959058474898, "loss": 1.5922, "step": 15981 }, { "epoch": 0.20767880086348744, "grad_norm": 0.35482385754585266, "learning_rate": 0.00015849699112283758, "loss": 1.4253, "step": 15982 }, { "epoch": 0.20769179540740332, "grad_norm": 0.43640565872192383, "learning_rate": 0.0001584943916609262, "loss": 1.4547, "step": 15983 }, { "epoch": 0.2077047899513192, "grad_norm": 0.2577308416366577, "learning_rate": 0.00015849179219901483, "loss": 1.3898, "step": 15984 }, { "epoch": 0.20771778449523506, "grad_norm": 0.3763541877269745, "learning_rate": 0.00015848919273710342, "loss": 1.6398, "step": 15985 }, { "epoch": 0.20773077903915094, "grad_norm": 0.38937291502952576, "learning_rate": 0.00015848659327519205, "loss": 1.6141, "step": 15986 }, { "epoch": 0.2077437735830668, "grad_norm": 0.35316237807273865, "learning_rate": 0.00015848399381328065, "loss": 1.462, "step": 15987 }, { "epoch": 0.20775676812698268, "grad_norm": 0.5038059949874878, "learning_rate": 0.00015848139435136927, "loss": 1.5502, "step": 15988 }, { "epoch": 0.20776976267089856, "grad_norm": 0.2985912561416626, "learning_rate": 0.0001584787948894579, "loss": 1.3208, "step": 15989 }, { "epoch": 0.20778275721481443, "grad_norm": 0.42337754368782043, "learning_rate": 0.0001584761954275465, "loss": 1.5212, "step": 15990 }, { "epoch": 0.2077957517587303, "grad_norm": 0.38767901062965393, "learning_rate": 0.00015847359596563512, "loss": 1.5582, "step": 15991 }, { "epoch": 0.20780874630264617, "grad_norm": 0.4252013862133026, "learning_rate": 0.00015847099650372374, "loss": 1.4976, "step": 15992 }, { "epoch": 0.20782174084656205, "grad_norm": 0.5667216181755066, "learning_rate": 0.00015846839704181237, "loss": 1.3592, "step": 15993 }, { "epoch": 0.20783473539047792, "grad_norm": 0.39167776703834534, "learning_rate": 0.00015846579757990096, "loss": 1.4129, "step": 15994 }, { "epoch": 0.2078477299343938, "grad_norm": 0.3853174149990082, "learning_rate": 0.0001584631981179896, "loss": 1.4487, "step": 15995 }, { "epoch": 0.20786072447830967, "grad_norm": 0.3639722764492035, "learning_rate": 0.0001584605986560782, "loss": 1.4497, "step": 15996 }, { "epoch": 0.20787371902222554, "grad_norm": 0.44041547179222107, "learning_rate": 0.0001584579991941668, "loss": 1.4591, "step": 15997 }, { "epoch": 0.2078867135661414, "grad_norm": 0.34470033645629883, "learning_rate": 0.00015845539973225543, "loss": 1.2498, "step": 15998 }, { "epoch": 0.20789970811005729, "grad_norm": 0.5876336693763733, "learning_rate": 0.00015845280027034403, "loss": 1.3802, "step": 15999 }, { "epoch": 0.20791270265397316, "grad_norm": 0.3623909652233124, "learning_rate": 0.00015845020080843266, "loss": 1.299, "step": 16000 }, { "epoch": 0.20792569719788903, "grad_norm": 0.5420477390289307, "learning_rate": 0.00015844760134652128, "loss": 1.4149, "step": 16001 }, { "epoch": 0.2079386917418049, "grad_norm": 0.34653428196907043, "learning_rate": 0.00015844500188460988, "loss": 1.4183, "step": 16002 }, { "epoch": 0.20795168628572078, "grad_norm": 1.1590534448623657, "learning_rate": 0.0001584424024226985, "loss": 1.4341, "step": 16003 }, { "epoch": 0.20796468082963665, "grad_norm": 0.43542009592056274, "learning_rate": 0.00015843980296078713, "loss": 1.5338, "step": 16004 }, { "epoch": 0.20797767537355252, "grad_norm": 0.4745631515979767, "learning_rate": 0.00015843720349887575, "loss": 1.451, "step": 16005 }, { "epoch": 0.2079906699174684, "grad_norm": 0.30314990878105164, "learning_rate": 0.00015843460403696435, "loss": 1.2994, "step": 16006 }, { "epoch": 0.20800366446138427, "grad_norm": 0.38124462962150574, "learning_rate": 0.00015843200457505297, "loss": 1.5497, "step": 16007 }, { "epoch": 0.20801665900530014, "grad_norm": 0.44628238677978516, "learning_rate": 0.0001584294051131416, "loss": 1.4621, "step": 16008 }, { "epoch": 0.20802965354921601, "grad_norm": 0.3599419593811035, "learning_rate": 0.0001584268056512302, "loss": 1.3801, "step": 16009 }, { "epoch": 0.2080426480931319, "grad_norm": 0.3340938091278076, "learning_rate": 0.00015842420618931882, "loss": 1.3018, "step": 16010 }, { "epoch": 0.20805564263704776, "grad_norm": 0.4244717061519623, "learning_rate": 0.00015842160672740742, "loss": 1.4893, "step": 16011 }, { "epoch": 0.20806863718096363, "grad_norm": 0.37749183177948, "learning_rate": 0.00015841900726549607, "loss": 1.4971, "step": 16012 }, { "epoch": 0.2080816317248795, "grad_norm": 0.38569486141204834, "learning_rate": 0.00015841640780358467, "loss": 1.5133, "step": 16013 }, { "epoch": 0.20809462626879538, "grad_norm": 0.4032568633556366, "learning_rate": 0.00015841380834167326, "loss": 1.5366, "step": 16014 }, { "epoch": 0.20810762081271125, "grad_norm": 0.3759194016456604, "learning_rate": 0.00015841120887976191, "loss": 1.497, "step": 16015 }, { "epoch": 0.20812061535662713, "grad_norm": 0.3568772077560425, "learning_rate": 0.0001584086094178505, "loss": 1.4852, "step": 16016 }, { "epoch": 0.208133609900543, "grad_norm": 0.38058537244796753, "learning_rate": 0.00015840600995593914, "loss": 1.3279, "step": 16017 }, { "epoch": 0.20814660444445887, "grad_norm": 0.4988006353378296, "learning_rate": 0.00015840341049402773, "loss": 1.4481, "step": 16018 }, { "epoch": 0.20815959898837474, "grad_norm": 0.33993902802467346, "learning_rate": 0.00015840081103211636, "loss": 1.4293, "step": 16019 }, { "epoch": 0.20817259353229062, "grad_norm": 0.3664860725402832, "learning_rate": 0.00015839821157020498, "loss": 1.3491, "step": 16020 }, { "epoch": 0.2081855880762065, "grad_norm": 0.34311312437057495, "learning_rate": 0.00015839561210829358, "loss": 1.2992, "step": 16021 }, { "epoch": 0.20819858262012236, "grad_norm": 0.37073156237602234, "learning_rate": 0.0001583930126463822, "loss": 1.5062, "step": 16022 }, { "epoch": 0.20821157716403824, "grad_norm": 0.44511500000953674, "learning_rate": 0.00015839041318447083, "loss": 1.3081, "step": 16023 }, { "epoch": 0.2082245717079541, "grad_norm": 0.3527889549732208, "learning_rate": 0.00015838781372255945, "loss": 1.224, "step": 16024 }, { "epoch": 0.20823756625187, "grad_norm": 0.4156639575958252, "learning_rate": 0.00015838521426064805, "loss": 1.3504, "step": 16025 }, { "epoch": 0.20825056079578588, "grad_norm": 0.43120813369750977, "learning_rate": 0.00015838261479873665, "loss": 1.5303, "step": 16026 }, { "epoch": 0.20826355533970176, "grad_norm": 0.37040260434150696, "learning_rate": 0.0001583800153368253, "loss": 1.3712, "step": 16027 }, { "epoch": 0.20827654988361763, "grad_norm": 0.38942664861679077, "learning_rate": 0.0001583774158749139, "loss": 1.4237, "step": 16028 }, { "epoch": 0.2082895444275335, "grad_norm": 0.36321499943733215, "learning_rate": 0.00015837481641300252, "loss": 1.4585, "step": 16029 }, { "epoch": 0.20830253897144937, "grad_norm": 0.4463197588920593, "learning_rate": 0.00015837221695109112, "loss": 1.4577, "step": 16030 }, { "epoch": 0.20831553351536525, "grad_norm": 0.443386971950531, "learning_rate": 0.00015836961748917974, "loss": 1.5799, "step": 16031 }, { "epoch": 0.20832852805928112, "grad_norm": 0.3910764753818512, "learning_rate": 0.00015836701802726837, "loss": 1.5709, "step": 16032 }, { "epoch": 0.208341522603197, "grad_norm": 0.4198080003261566, "learning_rate": 0.00015836441856535697, "loss": 1.3891, "step": 16033 }, { "epoch": 0.20835451714711287, "grad_norm": 0.24765633046627045, "learning_rate": 0.0001583618191034456, "loss": 1.3661, "step": 16034 }, { "epoch": 0.20836751169102874, "grad_norm": 0.43960055708885193, "learning_rate": 0.00015835921964153421, "loss": 1.6753, "step": 16035 }, { "epoch": 0.2083805062349446, "grad_norm": 0.6276617050170898, "learning_rate": 0.00015835662017962284, "loss": 1.3542, "step": 16036 }, { "epoch": 0.20839350077886049, "grad_norm": 0.40269678831100464, "learning_rate": 0.00015835402071771144, "loss": 1.4307, "step": 16037 }, { "epoch": 0.20840649532277636, "grad_norm": 0.34608420729637146, "learning_rate": 0.00015835142125580006, "loss": 1.2931, "step": 16038 }, { "epoch": 0.20841948986669223, "grad_norm": 0.3153967559337616, "learning_rate": 0.00015834882179388869, "loss": 1.3352, "step": 16039 }, { "epoch": 0.2084324844106081, "grad_norm": 0.3433372974395752, "learning_rate": 0.00015834622233197728, "loss": 1.3119, "step": 16040 }, { "epoch": 0.20844547895452398, "grad_norm": 0.2960616648197174, "learning_rate": 0.0001583436228700659, "loss": 1.3269, "step": 16041 }, { "epoch": 0.20845847349843985, "grad_norm": 0.3713153898715973, "learning_rate": 0.0001583410234081545, "loss": 1.2676, "step": 16042 }, { "epoch": 0.20847146804235572, "grad_norm": 0.4613809585571289, "learning_rate": 0.00015833842394624313, "loss": 1.6384, "step": 16043 }, { "epoch": 0.2084844625862716, "grad_norm": 0.3246658742427826, "learning_rate": 0.00015833582448433175, "loss": 1.3396, "step": 16044 }, { "epoch": 0.20849745713018747, "grad_norm": 0.34756481647491455, "learning_rate": 0.00015833322502242035, "loss": 1.2093, "step": 16045 }, { "epoch": 0.20851045167410334, "grad_norm": 0.48932504653930664, "learning_rate": 0.00015833062556050898, "loss": 1.4613, "step": 16046 }, { "epoch": 0.20852344621801921, "grad_norm": 0.4023049771785736, "learning_rate": 0.0001583280260985976, "loss": 1.27, "step": 16047 }, { "epoch": 0.2085364407619351, "grad_norm": 0.4189609885215759, "learning_rate": 0.00015832542663668622, "loss": 1.311, "step": 16048 }, { "epoch": 0.20854943530585096, "grad_norm": 0.36577218770980835, "learning_rate": 0.00015832282717477482, "loss": 1.3969, "step": 16049 }, { "epoch": 0.20856242984976683, "grad_norm": 0.35754260420799255, "learning_rate": 0.00015832022771286345, "loss": 1.4532, "step": 16050 }, { "epoch": 0.2085754243936827, "grad_norm": 0.392670601606369, "learning_rate": 0.00015831762825095207, "loss": 1.3658, "step": 16051 }, { "epoch": 0.20858841893759858, "grad_norm": 0.3320735692977905, "learning_rate": 0.00015831502878904067, "loss": 1.1229, "step": 16052 }, { "epoch": 0.20860141348151445, "grad_norm": 0.44898515939712524, "learning_rate": 0.0001583124293271293, "loss": 1.4091, "step": 16053 }, { "epoch": 0.20861440802543033, "grad_norm": 0.35932672023773193, "learning_rate": 0.00015830982986521792, "loss": 1.3058, "step": 16054 }, { "epoch": 0.2086274025693462, "grad_norm": 0.42297473549842834, "learning_rate": 0.00015830723040330651, "loss": 1.4127, "step": 16055 }, { "epoch": 0.20864039711326207, "grad_norm": 0.38213151693344116, "learning_rate": 0.00015830463094139514, "loss": 1.4545, "step": 16056 }, { "epoch": 0.20865339165717794, "grad_norm": 0.36874356865882874, "learning_rate": 0.00015830203147948374, "loss": 1.3049, "step": 16057 }, { "epoch": 0.20866638620109382, "grad_norm": 0.37892380356788635, "learning_rate": 0.0001582994320175724, "loss": 1.3975, "step": 16058 }, { "epoch": 0.2086793807450097, "grad_norm": 0.48240986466407776, "learning_rate": 0.00015829683255566099, "loss": 1.5236, "step": 16059 }, { "epoch": 0.20869237528892556, "grad_norm": 0.35460755228996277, "learning_rate": 0.0001582942330937496, "loss": 1.3819, "step": 16060 }, { "epoch": 0.20870536983284144, "grad_norm": 0.3681202232837677, "learning_rate": 0.0001582916336318382, "loss": 1.2837, "step": 16061 }, { "epoch": 0.2087183643767573, "grad_norm": 0.4416978359222412, "learning_rate": 0.00015828903416992683, "loss": 1.5869, "step": 16062 }, { "epoch": 0.20873135892067318, "grad_norm": 0.4337884485721588, "learning_rate": 0.00015828643470801546, "loss": 1.5075, "step": 16063 }, { "epoch": 0.20874435346458906, "grad_norm": 0.41698238253593445, "learning_rate": 0.00015828383524610405, "loss": 1.5484, "step": 16064 }, { "epoch": 0.20875734800850493, "grad_norm": 0.40398716926574707, "learning_rate": 0.00015828123578419268, "loss": 1.3974, "step": 16065 }, { "epoch": 0.2087703425524208, "grad_norm": 0.4739348292350769, "learning_rate": 0.0001582786363222813, "loss": 1.2976, "step": 16066 }, { "epoch": 0.20878333709633667, "grad_norm": 0.4163246750831604, "learning_rate": 0.00015827603686036993, "loss": 1.2366, "step": 16067 }, { "epoch": 0.20879633164025255, "grad_norm": 0.3740679919719696, "learning_rate": 0.00015827343739845852, "loss": 1.3101, "step": 16068 }, { "epoch": 0.20880932618416842, "grad_norm": 0.3800796866416931, "learning_rate": 0.00015827083793654712, "loss": 1.5359, "step": 16069 }, { "epoch": 0.2088223207280843, "grad_norm": 0.4328382909297943, "learning_rate": 0.00015826823847463577, "loss": 1.3673, "step": 16070 }, { "epoch": 0.20883531527200017, "grad_norm": 0.3781449794769287, "learning_rate": 0.00015826563901272437, "loss": 1.4101, "step": 16071 }, { "epoch": 0.20884830981591604, "grad_norm": 0.3667421042919159, "learning_rate": 0.000158263039550813, "loss": 1.4905, "step": 16072 }, { "epoch": 0.2088613043598319, "grad_norm": 0.47123926877975464, "learning_rate": 0.0001582604400889016, "loss": 1.4843, "step": 16073 }, { "epoch": 0.20887429890374778, "grad_norm": 0.3746497631072998, "learning_rate": 0.00015825784062699022, "loss": 1.4777, "step": 16074 }, { "epoch": 0.20888729344766366, "grad_norm": 0.3833242654800415, "learning_rate": 0.00015825524116507884, "loss": 1.2077, "step": 16075 }, { "epoch": 0.20890028799157953, "grad_norm": 0.3413725197315216, "learning_rate": 0.00015825264170316744, "loss": 1.3196, "step": 16076 }, { "epoch": 0.2089132825354954, "grad_norm": 0.3846360445022583, "learning_rate": 0.00015825004224125606, "loss": 1.5497, "step": 16077 }, { "epoch": 0.20892627707941128, "grad_norm": 0.3661326467990875, "learning_rate": 0.0001582474427793447, "loss": 1.4268, "step": 16078 }, { "epoch": 0.20893927162332715, "grad_norm": 0.36104902625083923, "learning_rate": 0.0001582448433174333, "loss": 1.4486, "step": 16079 }, { "epoch": 0.20895226616724302, "grad_norm": 0.34755033254623413, "learning_rate": 0.0001582422438555219, "loss": 1.5399, "step": 16080 }, { "epoch": 0.2089652607111589, "grad_norm": 0.43129974603652954, "learning_rate": 0.0001582396443936105, "loss": 1.585, "step": 16081 }, { "epoch": 0.20897825525507477, "grad_norm": 0.35307538509368896, "learning_rate": 0.00015823704493169916, "loss": 1.5001, "step": 16082 }, { "epoch": 0.20899124979899064, "grad_norm": 0.35758987069129944, "learning_rate": 0.00015823444546978776, "loss": 1.3192, "step": 16083 }, { "epoch": 0.20900424434290651, "grad_norm": 0.3619868755340576, "learning_rate": 0.00015823184600787638, "loss": 1.5394, "step": 16084 }, { "epoch": 0.2090172388868224, "grad_norm": 0.4265950918197632, "learning_rate": 0.00015822924654596498, "loss": 1.5089, "step": 16085 }, { "epoch": 0.20903023343073826, "grad_norm": 0.3617009222507477, "learning_rate": 0.0001582266470840536, "loss": 1.1267, "step": 16086 }, { "epoch": 0.20904322797465413, "grad_norm": 0.37554478645324707, "learning_rate": 0.00015822404762214223, "loss": 1.4605, "step": 16087 }, { "epoch": 0.20905622251857, "grad_norm": 0.3618440330028534, "learning_rate": 0.00015822144816023082, "loss": 1.2524, "step": 16088 }, { "epoch": 0.20906921706248588, "grad_norm": 0.3825511932373047, "learning_rate": 0.00015821884869831948, "loss": 1.3665, "step": 16089 }, { "epoch": 0.20908221160640175, "grad_norm": 0.3011224865913391, "learning_rate": 0.00015821624923640807, "loss": 1.4324, "step": 16090 }, { "epoch": 0.20909520615031763, "grad_norm": 0.33794263005256653, "learning_rate": 0.0001582136497744967, "loss": 1.4026, "step": 16091 }, { "epoch": 0.2091082006942335, "grad_norm": 0.3835112452507019, "learning_rate": 0.0001582110503125853, "loss": 1.4223, "step": 16092 }, { "epoch": 0.20912119523814937, "grad_norm": 0.39376211166381836, "learning_rate": 0.00015820845085067392, "loss": 1.4813, "step": 16093 }, { "epoch": 0.20913418978206524, "grad_norm": 0.3421460688114166, "learning_rate": 0.00015820585138876254, "loss": 1.3319, "step": 16094 }, { "epoch": 0.20914718432598112, "grad_norm": 0.37054017186164856, "learning_rate": 0.00015820325192685114, "loss": 1.2539, "step": 16095 }, { "epoch": 0.209160178869897, "grad_norm": 0.3377704620361328, "learning_rate": 0.00015820065246493977, "loss": 1.3083, "step": 16096 }, { "epoch": 0.20917317341381286, "grad_norm": 0.3811125159263611, "learning_rate": 0.0001581980530030284, "loss": 1.5962, "step": 16097 }, { "epoch": 0.20918616795772874, "grad_norm": 0.42365556955337524, "learning_rate": 0.000158195453541117, "loss": 1.3844, "step": 16098 }, { "epoch": 0.2091991625016446, "grad_norm": 0.34604668617248535, "learning_rate": 0.0001581928540792056, "loss": 1.3526, "step": 16099 }, { "epoch": 0.20921215704556048, "grad_norm": 0.534712016582489, "learning_rate": 0.0001581902546172942, "loss": 1.5126, "step": 16100 }, { "epoch": 0.20922515158947638, "grad_norm": 0.4300841689109802, "learning_rate": 0.00015818765515538286, "loss": 1.4797, "step": 16101 }, { "epoch": 0.20923814613339226, "grad_norm": 0.36704885959625244, "learning_rate": 0.00015818505569347146, "loss": 1.3589, "step": 16102 }, { "epoch": 0.20925114067730813, "grad_norm": 0.31938013434410095, "learning_rate": 0.00015818245623156008, "loss": 1.4357, "step": 16103 }, { "epoch": 0.209264135221224, "grad_norm": 0.43198609352111816, "learning_rate": 0.00015817985676964868, "loss": 1.3706, "step": 16104 }, { "epoch": 0.20927712976513987, "grad_norm": 0.36224010586738586, "learning_rate": 0.0001581772573077373, "loss": 1.6895, "step": 16105 }, { "epoch": 0.20929012430905575, "grad_norm": 0.3959023356437683, "learning_rate": 0.00015817465784582593, "loss": 1.4057, "step": 16106 }, { "epoch": 0.20930311885297162, "grad_norm": 0.32151564955711365, "learning_rate": 0.00015817205838391453, "loss": 1.2412, "step": 16107 }, { "epoch": 0.2093161133968875, "grad_norm": 0.3085571825504303, "learning_rate": 0.00015816945892200315, "loss": 1.5794, "step": 16108 }, { "epoch": 0.20932910794080337, "grad_norm": 0.4798315465450287, "learning_rate": 0.00015816685946009178, "loss": 1.4049, "step": 16109 }, { "epoch": 0.20934210248471924, "grad_norm": 0.29774633049964905, "learning_rate": 0.00015816425999818037, "loss": 1.3269, "step": 16110 }, { "epoch": 0.2093550970286351, "grad_norm": 0.2533249258995056, "learning_rate": 0.000158161660536269, "loss": 1.2082, "step": 16111 }, { "epoch": 0.20936809157255099, "grad_norm": 0.4961012601852417, "learning_rate": 0.0001581590610743576, "loss": 1.4736, "step": 16112 }, { "epoch": 0.20938108611646686, "grad_norm": 0.4445107877254486, "learning_rate": 0.00015815646161244625, "loss": 1.3557, "step": 16113 }, { "epoch": 0.20939408066038273, "grad_norm": 0.41197922825813293, "learning_rate": 0.00015815386215053484, "loss": 1.4149, "step": 16114 }, { "epoch": 0.2094070752042986, "grad_norm": 0.444100022315979, "learning_rate": 0.00015815126268862347, "loss": 1.3203, "step": 16115 }, { "epoch": 0.20942006974821448, "grad_norm": 0.3689761459827423, "learning_rate": 0.00015814866322671207, "loss": 1.5023, "step": 16116 }, { "epoch": 0.20943306429213035, "grad_norm": 0.33834308385849, "learning_rate": 0.0001581460637648007, "loss": 1.3092, "step": 16117 }, { "epoch": 0.20944605883604622, "grad_norm": 0.4474155306816101, "learning_rate": 0.00015814346430288932, "loss": 1.5661, "step": 16118 }, { "epoch": 0.2094590533799621, "grad_norm": 0.507619321346283, "learning_rate": 0.0001581408648409779, "loss": 1.5395, "step": 16119 }, { "epoch": 0.20947204792387797, "grad_norm": 0.3917638063430786, "learning_rate": 0.00015813826537906654, "loss": 1.481, "step": 16120 }, { "epoch": 0.20948504246779384, "grad_norm": 0.40150707960128784, "learning_rate": 0.00015813566591715516, "loss": 1.3217, "step": 16121 }, { "epoch": 0.20949803701170971, "grad_norm": 0.4090917706489563, "learning_rate": 0.00015813306645524379, "loss": 1.3591, "step": 16122 }, { "epoch": 0.2095110315556256, "grad_norm": 0.4967215359210968, "learning_rate": 0.00015813046699333238, "loss": 1.393, "step": 16123 }, { "epoch": 0.20952402609954146, "grad_norm": 0.4418075680732727, "learning_rate": 0.000158127867531421, "loss": 1.4935, "step": 16124 }, { "epoch": 0.20953702064345733, "grad_norm": 0.37651708722114563, "learning_rate": 0.00015812526806950963, "loss": 1.4646, "step": 16125 }, { "epoch": 0.2095500151873732, "grad_norm": 0.4067695140838623, "learning_rate": 0.00015812266860759823, "loss": 1.3542, "step": 16126 }, { "epoch": 0.20956300973128908, "grad_norm": 0.33154159784317017, "learning_rate": 0.00015812006914568685, "loss": 1.5, "step": 16127 }, { "epoch": 0.20957600427520495, "grad_norm": 0.3993632197380066, "learning_rate": 0.00015811746968377548, "loss": 1.4224, "step": 16128 }, { "epoch": 0.20958899881912083, "grad_norm": 0.45638805627822876, "learning_rate": 0.00015811487022186408, "loss": 1.5868, "step": 16129 }, { "epoch": 0.2096019933630367, "grad_norm": 0.4839898943901062, "learning_rate": 0.0001581122707599527, "loss": 1.6074, "step": 16130 }, { "epoch": 0.20961498790695257, "grad_norm": 0.4412135183811188, "learning_rate": 0.0001581096712980413, "loss": 1.3248, "step": 16131 }, { "epoch": 0.20962798245086844, "grad_norm": 0.30279359221458435, "learning_rate": 0.00015810707183612995, "loss": 1.168, "step": 16132 }, { "epoch": 0.20964097699478432, "grad_norm": 0.5604149699211121, "learning_rate": 0.00015810447237421855, "loss": 1.5378, "step": 16133 }, { "epoch": 0.2096539715387002, "grad_norm": 0.34835976362228394, "learning_rate": 0.00015810187291230717, "loss": 1.3418, "step": 16134 }, { "epoch": 0.20966696608261606, "grad_norm": 0.3554418087005615, "learning_rate": 0.00015809927345039577, "loss": 1.3782, "step": 16135 }, { "epoch": 0.20967996062653194, "grad_norm": 0.3810730278491974, "learning_rate": 0.0001580966739884844, "loss": 1.6233, "step": 16136 }, { "epoch": 0.2096929551704478, "grad_norm": 0.8057996034622192, "learning_rate": 0.00015809407452657302, "loss": 1.4464, "step": 16137 }, { "epoch": 0.20970594971436368, "grad_norm": 0.35921770334243774, "learning_rate": 0.00015809147506466162, "loss": 1.4362, "step": 16138 }, { "epoch": 0.20971894425827955, "grad_norm": 0.33673444390296936, "learning_rate": 0.00015808887560275024, "loss": 1.2865, "step": 16139 }, { "epoch": 0.20973193880219543, "grad_norm": 0.32769522070884705, "learning_rate": 0.00015808627614083886, "loss": 1.4023, "step": 16140 }, { "epoch": 0.2097449333461113, "grad_norm": 0.40654343366622925, "learning_rate": 0.00015808367667892746, "loss": 1.3868, "step": 16141 }, { "epoch": 0.20975792789002717, "grad_norm": 0.5318293571472168, "learning_rate": 0.00015808107721701609, "loss": 1.5092, "step": 16142 }, { "epoch": 0.20977092243394305, "grad_norm": 0.37608638405799866, "learning_rate": 0.00015807847775510468, "loss": 1.4289, "step": 16143 }, { "epoch": 0.20978391697785892, "grad_norm": 0.30139660835266113, "learning_rate": 0.00015807587829319333, "loss": 1.1744, "step": 16144 }, { "epoch": 0.2097969115217748, "grad_norm": 0.33328428864479065, "learning_rate": 0.00015807327883128193, "loss": 1.4049, "step": 16145 }, { "epoch": 0.20980990606569067, "grad_norm": 0.3661699593067169, "learning_rate": 0.00015807067936937056, "loss": 1.3411, "step": 16146 }, { "epoch": 0.20982290060960654, "grad_norm": 0.33218416571617126, "learning_rate": 0.00015806807990745915, "loss": 1.3809, "step": 16147 }, { "epoch": 0.2098358951535224, "grad_norm": 0.5098658800125122, "learning_rate": 0.00015806548044554778, "loss": 1.2054, "step": 16148 }, { "epoch": 0.20984888969743828, "grad_norm": 0.4795396029949188, "learning_rate": 0.0001580628809836364, "loss": 1.5008, "step": 16149 }, { "epoch": 0.20986188424135416, "grad_norm": 0.4537249207496643, "learning_rate": 0.000158060281521725, "loss": 1.4523, "step": 16150 }, { "epoch": 0.20987487878527003, "grad_norm": 0.4230712354183197, "learning_rate": 0.00015805768205981362, "loss": 1.4202, "step": 16151 }, { "epoch": 0.2098878733291859, "grad_norm": 0.419634073972702, "learning_rate": 0.00015805508259790225, "loss": 1.4281, "step": 16152 }, { "epoch": 0.20990086787310178, "grad_norm": 0.5495619773864746, "learning_rate": 0.00015805248313599085, "loss": 1.4266, "step": 16153 }, { "epoch": 0.20991386241701765, "grad_norm": 0.43823331594467163, "learning_rate": 0.00015804988367407947, "loss": 1.4767, "step": 16154 }, { "epoch": 0.20992685696093352, "grad_norm": 0.3293590247631073, "learning_rate": 0.00015804728421216807, "loss": 1.2809, "step": 16155 }, { "epoch": 0.2099398515048494, "grad_norm": 0.4741308391094208, "learning_rate": 0.00015804468475025672, "loss": 1.41, "step": 16156 }, { "epoch": 0.20995284604876527, "grad_norm": 0.5016207098960876, "learning_rate": 0.00015804208528834532, "loss": 1.4976, "step": 16157 }, { "epoch": 0.20996584059268114, "grad_norm": 0.2731064260005951, "learning_rate": 0.00015803948582643394, "loss": 1.416, "step": 16158 }, { "epoch": 0.20997883513659701, "grad_norm": 0.30615097284317017, "learning_rate": 0.00015803688636452254, "loss": 1.3727, "step": 16159 }, { "epoch": 0.2099918296805129, "grad_norm": 0.3715973198413849, "learning_rate": 0.00015803428690261116, "loss": 1.5319, "step": 16160 }, { "epoch": 0.21000482422442876, "grad_norm": 0.39406460523605347, "learning_rate": 0.0001580316874406998, "loss": 1.3951, "step": 16161 }, { "epoch": 0.21001781876834463, "grad_norm": 0.4085550606250763, "learning_rate": 0.00015802908797878839, "loss": 1.3726, "step": 16162 }, { "epoch": 0.2100308133122605, "grad_norm": 0.4924324154853821, "learning_rate": 0.00015802648851687704, "loss": 1.5622, "step": 16163 }, { "epoch": 0.21004380785617638, "grad_norm": 0.3017157316207886, "learning_rate": 0.00015802388905496563, "loss": 1.327, "step": 16164 }, { "epoch": 0.21005680240009225, "grad_norm": 0.35309597849845886, "learning_rate": 0.00015802128959305423, "loss": 1.354, "step": 16165 }, { "epoch": 0.21006979694400812, "grad_norm": 0.34255531430244446, "learning_rate": 0.00015801869013114286, "loss": 1.2859, "step": 16166 }, { "epoch": 0.210082791487924, "grad_norm": 0.4562980532646179, "learning_rate": 0.00015801609066923148, "loss": 1.3558, "step": 16167 }, { "epoch": 0.21009578603183987, "grad_norm": 0.3897833526134491, "learning_rate": 0.0001580134912073201, "loss": 1.256, "step": 16168 }, { "epoch": 0.21010878057575574, "grad_norm": 0.526243269443512, "learning_rate": 0.0001580108917454087, "loss": 1.4002, "step": 16169 }, { "epoch": 0.21012177511967162, "grad_norm": 0.41489970684051514, "learning_rate": 0.00015800829228349733, "loss": 1.3833, "step": 16170 }, { "epoch": 0.2101347696635875, "grad_norm": 0.4673166871070862, "learning_rate": 0.00015800569282158595, "loss": 1.5615, "step": 16171 }, { "epoch": 0.21014776420750336, "grad_norm": 0.5549204349517822, "learning_rate": 0.00015800309335967455, "loss": 1.442, "step": 16172 }, { "epoch": 0.21016075875141924, "grad_norm": 0.279826819896698, "learning_rate": 0.00015800049389776317, "loss": 1.3215, "step": 16173 }, { "epoch": 0.2101737532953351, "grad_norm": 0.4905039966106415, "learning_rate": 0.00015799789443585177, "loss": 1.4875, "step": 16174 }, { "epoch": 0.21018674783925098, "grad_norm": 0.40426307916641235, "learning_rate": 0.00015799529497394042, "loss": 1.4631, "step": 16175 }, { "epoch": 0.21019974238316685, "grad_norm": 0.42143338918685913, "learning_rate": 0.00015799269551202902, "loss": 1.4643, "step": 16176 }, { "epoch": 0.21021273692708273, "grad_norm": 0.485944539308548, "learning_rate": 0.00015799009605011762, "loss": 1.4182, "step": 16177 }, { "epoch": 0.21022573147099863, "grad_norm": 0.3476139307022095, "learning_rate": 0.00015798749658820624, "loss": 1.3245, "step": 16178 }, { "epoch": 0.2102387260149145, "grad_norm": 0.35941123962402344, "learning_rate": 0.00015798489712629487, "loss": 1.3055, "step": 16179 }, { "epoch": 0.21025172055883037, "grad_norm": 0.4703357517719269, "learning_rate": 0.0001579822976643835, "loss": 1.4774, "step": 16180 }, { "epoch": 0.21026471510274625, "grad_norm": 0.41082531213760376, "learning_rate": 0.0001579796982024721, "loss": 1.2505, "step": 16181 }, { "epoch": 0.21027770964666212, "grad_norm": 0.42048898339271545, "learning_rate": 0.0001579770987405607, "loss": 1.4652, "step": 16182 }, { "epoch": 0.210290704190578, "grad_norm": 0.42284929752349854, "learning_rate": 0.00015797449927864934, "loss": 1.4355, "step": 16183 }, { "epoch": 0.21030369873449387, "grad_norm": 0.30913910269737244, "learning_rate": 0.00015797189981673793, "loss": 1.3227, "step": 16184 }, { "epoch": 0.21031669327840974, "grad_norm": 0.338111013174057, "learning_rate": 0.00015796930035482656, "loss": 1.2936, "step": 16185 }, { "epoch": 0.2103296878223256, "grad_norm": 0.3680938184261322, "learning_rate": 0.00015796670089291516, "loss": 1.4755, "step": 16186 }, { "epoch": 0.21034268236624148, "grad_norm": 0.3610798120498657, "learning_rate": 0.0001579641014310038, "loss": 1.3156, "step": 16187 }, { "epoch": 0.21035567691015736, "grad_norm": 0.3957568109035492, "learning_rate": 0.0001579615019690924, "loss": 1.5311, "step": 16188 }, { "epoch": 0.21036867145407323, "grad_norm": 0.39040637016296387, "learning_rate": 0.00015795890250718103, "loss": 1.3733, "step": 16189 }, { "epoch": 0.2103816659979891, "grad_norm": 0.3577485680580139, "learning_rate": 0.00015795630304526963, "loss": 1.5836, "step": 16190 }, { "epoch": 0.21039466054190498, "grad_norm": 0.33625850081443787, "learning_rate": 0.00015795370358335825, "loss": 1.4179, "step": 16191 }, { "epoch": 0.21040765508582085, "grad_norm": 0.46243566274642944, "learning_rate": 0.00015795110412144688, "loss": 1.4302, "step": 16192 }, { "epoch": 0.21042064962973672, "grad_norm": 0.391183078289032, "learning_rate": 0.00015794850465953547, "loss": 1.309, "step": 16193 }, { "epoch": 0.2104336441736526, "grad_norm": 0.3739601671695709, "learning_rate": 0.0001579459051976241, "loss": 1.3358, "step": 16194 }, { "epoch": 0.21044663871756847, "grad_norm": 0.4088958203792572, "learning_rate": 0.00015794330573571272, "loss": 1.5672, "step": 16195 }, { "epoch": 0.21045963326148434, "grad_norm": 0.4685206115245819, "learning_rate": 0.00015794070627380132, "loss": 1.4968, "step": 16196 }, { "epoch": 0.21047262780540021, "grad_norm": 0.410035640001297, "learning_rate": 0.00015793810681188994, "loss": 1.3905, "step": 16197 }, { "epoch": 0.2104856223493161, "grad_norm": 0.3784518539905548, "learning_rate": 0.00015793550734997854, "loss": 1.4736, "step": 16198 }, { "epoch": 0.21049861689323196, "grad_norm": 0.33516770601272583, "learning_rate": 0.0001579329078880672, "loss": 1.4937, "step": 16199 }, { "epoch": 0.21051161143714783, "grad_norm": 0.3485300540924072, "learning_rate": 0.0001579303084261558, "loss": 1.4367, "step": 16200 }, { "epoch": 0.2105246059810637, "grad_norm": 0.40552788972854614, "learning_rate": 0.00015792770896424442, "loss": 1.5334, "step": 16201 }, { "epoch": 0.21053760052497958, "grad_norm": 0.3778834342956543, "learning_rate": 0.00015792510950233304, "loss": 1.3598, "step": 16202 }, { "epoch": 0.21055059506889545, "grad_norm": 0.33078300952911377, "learning_rate": 0.00015792251004042164, "loss": 1.4294, "step": 16203 }, { "epoch": 0.21056358961281132, "grad_norm": 0.38528957962989807, "learning_rate": 0.00015791991057851026, "loss": 1.524, "step": 16204 }, { "epoch": 0.2105765841567272, "grad_norm": 0.4119674563407898, "learning_rate": 0.00015791731111659886, "loss": 1.4431, "step": 16205 }, { "epoch": 0.21058957870064307, "grad_norm": 0.31666994094848633, "learning_rate": 0.00015791471165468748, "loss": 1.4804, "step": 16206 }, { "epoch": 0.21060257324455894, "grad_norm": 0.38198497891426086, "learning_rate": 0.0001579121121927761, "loss": 1.2563, "step": 16207 }, { "epoch": 0.21061556778847482, "grad_norm": 0.46017175912857056, "learning_rate": 0.0001579095127308647, "loss": 1.4845, "step": 16208 }, { "epoch": 0.2106285623323907, "grad_norm": 0.41139546036720276, "learning_rate": 0.00015790691326895333, "loss": 1.5126, "step": 16209 }, { "epoch": 0.21064155687630656, "grad_norm": 0.3713287115097046, "learning_rate": 0.00015790431380704195, "loss": 1.388, "step": 16210 }, { "epoch": 0.21065455142022244, "grad_norm": 0.47881636023521423, "learning_rate": 0.00015790171434513058, "loss": 1.387, "step": 16211 }, { "epoch": 0.2106675459641383, "grad_norm": 0.37192192673683167, "learning_rate": 0.00015789911488321918, "loss": 1.3125, "step": 16212 }, { "epoch": 0.21068054050805418, "grad_norm": 0.43556323647499084, "learning_rate": 0.0001578965154213078, "loss": 1.5504, "step": 16213 }, { "epoch": 0.21069353505197005, "grad_norm": 0.3167757987976074, "learning_rate": 0.00015789391595939643, "loss": 1.3135, "step": 16214 }, { "epoch": 0.21070652959588593, "grad_norm": 0.4473654627799988, "learning_rate": 0.00015789131649748502, "loss": 1.574, "step": 16215 }, { "epoch": 0.2107195241398018, "grad_norm": 0.4516356885433197, "learning_rate": 0.00015788871703557365, "loss": 1.4953, "step": 16216 }, { "epoch": 0.21073251868371767, "grad_norm": 0.4738386869430542, "learning_rate": 0.00015788611757366224, "loss": 1.4981, "step": 16217 }, { "epoch": 0.21074551322763355, "grad_norm": 0.4137638807296753, "learning_rate": 0.0001578835181117509, "loss": 1.4266, "step": 16218 }, { "epoch": 0.21075850777154942, "grad_norm": 0.3552573621273041, "learning_rate": 0.0001578809186498395, "loss": 1.2781, "step": 16219 }, { "epoch": 0.2107715023154653, "grad_norm": 0.4965246617794037, "learning_rate": 0.0001578783191879281, "loss": 1.2732, "step": 16220 }, { "epoch": 0.21078449685938117, "grad_norm": 0.4246561527252197, "learning_rate": 0.00015787571972601672, "loss": 1.4232, "step": 16221 }, { "epoch": 0.21079749140329704, "grad_norm": 0.3535086214542389, "learning_rate": 0.00015787312026410534, "loss": 1.5869, "step": 16222 }, { "epoch": 0.2108104859472129, "grad_norm": 0.3383576273918152, "learning_rate": 0.00015787052080219396, "loss": 1.5025, "step": 16223 }, { "epoch": 0.21082348049112878, "grad_norm": 0.35446271300315857, "learning_rate": 0.00015786792134028256, "loss": 1.4016, "step": 16224 }, { "epoch": 0.21083647503504466, "grad_norm": 0.5007792711257935, "learning_rate": 0.00015786532187837119, "loss": 1.7624, "step": 16225 }, { "epoch": 0.21084946957896053, "grad_norm": 0.41847583651542664, "learning_rate": 0.0001578627224164598, "loss": 1.4083, "step": 16226 }, { "epoch": 0.2108624641228764, "grad_norm": 0.40579652786254883, "learning_rate": 0.0001578601229545484, "loss": 1.4386, "step": 16227 }, { "epoch": 0.21087545866679228, "grad_norm": 0.45208218693733215, "learning_rate": 0.00015785752349263703, "loss": 1.3425, "step": 16228 }, { "epoch": 0.21088845321070815, "grad_norm": 0.4796958267688751, "learning_rate": 0.00015785492403072563, "loss": 1.4284, "step": 16229 }, { "epoch": 0.21090144775462402, "grad_norm": 0.4104662239551544, "learning_rate": 0.00015785232456881428, "loss": 1.2648, "step": 16230 }, { "epoch": 0.2109144422985399, "grad_norm": 0.454857736825943, "learning_rate": 0.00015784972510690288, "loss": 1.534, "step": 16231 }, { "epoch": 0.21092743684245577, "grad_norm": 0.31274157762527466, "learning_rate": 0.00015784712564499148, "loss": 1.2987, "step": 16232 }, { "epoch": 0.21094043138637164, "grad_norm": 0.4466742277145386, "learning_rate": 0.0001578445261830801, "loss": 1.3356, "step": 16233 }, { "epoch": 0.2109534259302875, "grad_norm": 0.47671762108802795, "learning_rate": 0.00015784192672116873, "loss": 1.4144, "step": 16234 }, { "epoch": 0.2109664204742034, "grad_norm": 0.4282459616661072, "learning_rate": 0.00015783932725925735, "loss": 1.2414, "step": 16235 }, { "epoch": 0.21097941501811926, "grad_norm": 0.4464629590511322, "learning_rate": 0.00015783672779734595, "loss": 1.3075, "step": 16236 }, { "epoch": 0.21099240956203513, "grad_norm": 0.40190696716308594, "learning_rate": 0.00015783412833543457, "loss": 1.227, "step": 16237 }, { "epoch": 0.211005404105951, "grad_norm": 0.36484453082084656, "learning_rate": 0.0001578315288735232, "loss": 1.3304, "step": 16238 }, { "epoch": 0.21101839864986688, "grad_norm": 0.2900242209434509, "learning_rate": 0.0001578289294116118, "loss": 1.27, "step": 16239 }, { "epoch": 0.21103139319378275, "grad_norm": 0.4291497766971588, "learning_rate": 0.00015782632994970042, "loss": 1.5278, "step": 16240 }, { "epoch": 0.21104438773769862, "grad_norm": 0.43599385023117065, "learning_rate": 0.00015782373048778904, "loss": 1.4757, "step": 16241 }, { "epoch": 0.2110573822816145, "grad_norm": 0.43510109186172485, "learning_rate": 0.00015782113102587767, "loss": 1.4869, "step": 16242 }, { "epoch": 0.21107037682553037, "grad_norm": 0.3670237064361572, "learning_rate": 0.00015781853156396626, "loss": 1.4084, "step": 16243 }, { "epoch": 0.21108337136944624, "grad_norm": 0.41854774951934814, "learning_rate": 0.0001578159321020549, "loss": 1.4313, "step": 16244 }, { "epoch": 0.21109636591336212, "grad_norm": 0.37367674708366394, "learning_rate": 0.0001578133326401435, "loss": 1.5754, "step": 16245 }, { "epoch": 0.211109360457278, "grad_norm": 0.35526806116104126, "learning_rate": 0.0001578107331782321, "loss": 1.347, "step": 16246 }, { "epoch": 0.21112235500119386, "grad_norm": 0.46190324425697327, "learning_rate": 0.00015780813371632074, "loss": 1.3326, "step": 16247 }, { "epoch": 0.21113534954510974, "grad_norm": 0.3207860589027405, "learning_rate": 0.00015780553425440933, "loss": 1.5087, "step": 16248 }, { "epoch": 0.2111483440890256, "grad_norm": 0.4525337219238281, "learning_rate": 0.00015780293479249796, "loss": 1.4193, "step": 16249 }, { "epoch": 0.21116133863294148, "grad_norm": 0.40851640701293945, "learning_rate": 0.00015780033533058658, "loss": 1.2945, "step": 16250 }, { "epoch": 0.21117433317685735, "grad_norm": 0.35073214769363403, "learning_rate": 0.00015779773586867518, "loss": 1.4833, "step": 16251 }, { "epoch": 0.21118732772077323, "grad_norm": 0.2889728844165802, "learning_rate": 0.0001577951364067638, "loss": 1.2882, "step": 16252 }, { "epoch": 0.2112003222646891, "grad_norm": 0.39264413714408875, "learning_rate": 0.00015779253694485243, "loss": 1.4889, "step": 16253 }, { "epoch": 0.211213316808605, "grad_norm": 0.40936630964279175, "learning_rate": 0.00015778993748294105, "loss": 1.3504, "step": 16254 }, { "epoch": 0.21122631135252087, "grad_norm": 0.4741725027561188, "learning_rate": 0.00015778733802102965, "loss": 1.5754, "step": 16255 }, { "epoch": 0.21123930589643675, "grad_norm": 0.358914315700531, "learning_rate": 0.00015778473855911827, "loss": 1.398, "step": 16256 }, { "epoch": 0.21125230044035262, "grad_norm": 0.35838326811790466, "learning_rate": 0.0001577821390972069, "loss": 1.6176, "step": 16257 }, { "epoch": 0.2112652949842685, "grad_norm": 0.4114440679550171, "learning_rate": 0.0001577795396352955, "loss": 1.4288, "step": 16258 }, { "epoch": 0.21127828952818437, "grad_norm": 0.3763172924518585, "learning_rate": 0.00015777694017338412, "loss": 1.3329, "step": 16259 }, { "epoch": 0.21129128407210024, "grad_norm": 0.422807902097702, "learning_rate": 0.00015777434071147272, "loss": 1.2846, "step": 16260 }, { "epoch": 0.2113042786160161, "grad_norm": 0.39940619468688965, "learning_rate": 0.00015777174124956134, "loss": 1.4688, "step": 16261 }, { "epoch": 0.21131727315993198, "grad_norm": 0.43360424041748047, "learning_rate": 0.00015776914178764997, "loss": 1.5483, "step": 16262 }, { "epoch": 0.21133026770384786, "grad_norm": 0.4620785117149353, "learning_rate": 0.00015776654232573856, "loss": 1.5158, "step": 16263 }, { "epoch": 0.21134326224776373, "grad_norm": 0.4285954535007477, "learning_rate": 0.0001577639428638272, "loss": 1.5753, "step": 16264 }, { "epoch": 0.2113562567916796, "grad_norm": 0.5011884570121765, "learning_rate": 0.0001577613434019158, "loss": 1.3573, "step": 16265 }, { "epoch": 0.21136925133559548, "grad_norm": 0.442808598279953, "learning_rate": 0.00015775874394000444, "loss": 1.478, "step": 16266 }, { "epoch": 0.21138224587951135, "grad_norm": 0.4511898159980774, "learning_rate": 0.00015775614447809304, "loss": 1.4197, "step": 16267 }, { "epoch": 0.21139524042342722, "grad_norm": 0.34370219707489014, "learning_rate": 0.00015775354501618166, "loss": 1.5622, "step": 16268 }, { "epoch": 0.2114082349673431, "grad_norm": 0.44472193717956543, "learning_rate": 0.00015775094555427028, "loss": 1.5086, "step": 16269 }, { "epoch": 0.21142122951125897, "grad_norm": 0.3992282748222351, "learning_rate": 0.00015774834609235888, "loss": 1.3097, "step": 16270 }, { "epoch": 0.21143422405517484, "grad_norm": 0.4432145059108734, "learning_rate": 0.0001577457466304475, "loss": 1.4042, "step": 16271 }, { "epoch": 0.2114472185990907, "grad_norm": 0.40112441778182983, "learning_rate": 0.0001577431471685361, "loss": 1.2503, "step": 16272 }, { "epoch": 0.2114602131430066, "grad_norm": 0.30481457710266113, "learning_rate": 0.00015774054770662475, "loss": 1.3486, "step": 16273 }, { "epoch": 0.21147320768692246, "grad_norm": 0.3991979658603668, "learning_rate": 0.00015773794824471335, "loss": 1.4356, "step": 16274 }, { "epoch": 0.21148620223083833, "grad_norm": 0.3961305618286133, "learning_rate": 0.00015773534878280195, "loss": 1.5381, "step": 16275 }, { "epoch": 0.2114991967747542, "grad_norm": 0.44020363688468933, "learning_rate": 0.0001577327493208906, "loss": 1.488, "step": 16276 }, { "epoch": 0.21151219131867008, "grad_norm": 0.4920528829097748, "learning_rate": 0.0001577301498589792, "loss": 1.5657, "step": 16277 }, { "epoch": 0.21152518586258595, "grad_norm": 0.5440728664398193, "learning_rate": 0.00015772755039706782, "loss": 1.5565, "step": 16278 }, { "epoch": 0.21153818040650182, "grad_norm": 0.2542400062084198, "learning_rate": 0.00015772495093515642, "loss": 1.3051, "step": 16279 }, { "epoch": 0.2115511749504177, "grad_norm": 0.3778766989707947, "learning_rate": 0.00015772235147324504, "loss": 1.4693, "step": 16280 }, { "epoch": 0.21156416949433357, "grad_norm": 0.49587738513946533, "learning_rate": 0.00015771975201133367, "loss": 1.2817, "step": 16281 }, { "epoch": 0.21157716403824944, "grad_norm": 0.36572280526161194, "learning_rate": 0.00015771715254942227, "loss": 1.3194, "step": 16282 }, { "epoch": 0.21159015858216532, "grad_norm": 0.491885244846344, "learning_rate": 0.0001577145530875109, "loss": 1.529, "step": 16283 }, { "epoch": 0.2116031531260812, "grad_norm": 0.40221449732780457, "learning_rate": 0.00015771195362559952, "loss": 1.312, "step": 16284 }, { "epoch": 0.21161614766999706, "grad_norm": 0.4051157534122467, "learning_rate": 0.00015770935416368814, "loss": 1.3307, "step": 16285 }, { "epoch": 0.21162914221391294, "grad_norm": 0.3877584636211395, "learning_rate": 0.00015770675470177674, "loss": 1.3674, "step": 16286 }, { "epoch": 0.2116421367578288, "grad_norm": 0.39504584670066833, "learning_rate": 0.00015770415523986534, "loss": 1.5278, "step": 16287 }, { "epoch": 0.21165513130174468, "grad_norm": 0.2848084568977356, "learning_rate": 0.000157701555777954, "loss": 1.2895, "step": 16288 }, { "epoch": 0.21166812584566055, "grad_norm": 0.4326888620853424, "learning_rate": 0.00015769895631604258, "loss": 1.541, "step": 16289 }, { "epoch": 0.21168112038957643, "grad_norm": 0.3197568356990814, "learning_rate": 0.0001576963568541312, "loss": 1.3245, "step": 16290 }, { "epoch": 0.2116941149334923, "grad_norm": 0.33372920751571655, "learning_rate": 0.0001576937573922198, "loss": 1.3271, "step": 16291 }, { "epoch": 0.21170710947740817, "grad_norm": 0.3435698449611664, "learning_rate": 0.00015769115793030843, "loss": 1.4158, "step": 16292 }, { "epoch": 0.21172010402132405, "grad_norm": 0.369258850812912, "learning_rate": 0.00015768855846839705, "loss": 1.5082, "step": 16293 }, { "epoch": 0.21173309856523992, "grad_norm": 0.35419028997421265, "learning_rate": 0.00015768595900648565, "loss": 1.3562, "step": 16294 }, { "epoch": 0.2117460931091558, "grad_norm": 0.44479209184646606, "learning_rate": 0.00015768335954457428, "loss": 1.3886, "step": 16295 }, { "epoch": 0.21175908765307166, "grad_norm": 0.4464130103588104, "learning_rate": 0.0001576807600826629, "loss": 1.5582, "step": 16296 }, { "epoch": 0.21177208219698754, "grad_norm": 0.2587941586971283, "learning_rate": 0.00015767816062075153, "loss": 1.235, "step": 16297 }, { "epoch": 0.2117850767409034, "grad_norm": 0.39664021134376526, "learning_rate": 0.00015767556115884012, "loss": 1.3493, "step": 16298 }, { "epoch": 0.21179807128481928, "grad_norm": 0.3935302495956421, "learning_rate": 0.00015767296169692872, "loss": 1.4107, "step": 16299 }, { "epoch": 0.21181106582873516, "grad_norm": 0.42637020349502563, "learning_rate": 0.00015767036223501737, "loss": 1.3466, "step": 16300 }, { "epoch": 0.21182406037265103, "grad_norm": 0.36727002263069153, "learning_rate": 0.00015766776277310597, "loss": 1.3438, "step": 16301 }, { "epoch": 0.2118370549165669, "grad_norm": 0.3233186602592468, "learning_rate": 0.0001576651633111946, "loss": 1.3481, "step": 16302 }, { "epoch": 0.21185004946048278, "grad_norm": 0.4604240953922272, "learning_rate": 0.0001576625638492832, "loss": 1.3995, "step": 16303 }, { "epoch": 0.21186304400439865, "grad_norm": 0.4501984715461731, "learning_rate": 0.00015765996438737182, "loss": 1.5679, "step": 16304 }, { "epoch": 0.21187603854831452, "grad_norm": 0.47554242610931396, "learning_rate": 0.00015765736492546044, "loss": 1.3234, "step": 16305 }, { "epoch": 0.2118890330922304, "grad_norm": 0.3562142550945282, "learning_rate": 0.00015765476546354904, "loss": 1.362, "step": 16306 }, { "epoch": 0.21190202763614627, "grad_norm": 0.37659701704978943, "learning_rate": 0.00015765216600163766, "loss": 1.3895, "step": 16307 }, { "epoch": 0.21191502218006214, "grad_norm": 0.3696676194667816, "learning_rate": 0.0001576495665397263, "loss": 1.4501, "step": 16308 }, { "epoch": 0.211928016723978, "grad_norm": 0.3817282021045685, "learning_rate": 0.0001576469670778149, "loss": 1.3747, "step": 16309 }, { "epoch": 0.2119410112678939, "grad_norm": 0.26398220658302307, "learning_rate": 0.0001576443676159035, "loss": 1.3465, "step": 16310 }, { "epoch": 0.21195400581180976, "grad_norm": 0.5268006324768066, "learning_rate": 0.00015764176815399213, "loss": 1.6416, "step": 16311 }, { "epoch": 0.21196700035572563, "grad_norm": 0.41523152589797974, "learning_rate": 0.00015763916869208076, "loss": 1.6528, "step": 16312 }, { "epoch": 0.2119799948996415, "grad_norm": 0.511795699596405, "learning_rate": 0.00015763656923016935, "loss": 1.4691, "step": 16313 }, { "epoch": 0.21199298944355738, "grad_norm": 0.38476619124412537, "learning_rate": 0.00015763396976825798, "loss": 1.4436, "step": 16314 }, { "epoch": 0.21200598398747325, "grad_norm": 0.3326032757759094, "learning_rate": 0.0001576313703063466, "loss": 1.5026, "step": 16315 }, { "epoch": 0.21201897853138912, "grad_norm": 0.2643774151802063, "learning_rate": 0.0001576287708444352, "loss": 1.177, "step": 16316 }, { "epoch": 0.212031973075305, "grad_norm": 0.3378181755542755, "learning_rate": 0.00015762617138252383, "loss": 1.4634, "step": 16317 }, { "epoch": 0.21204496761922087, "grad_norm": 0.328880250453949, "learning_rate": 0.00015762357192061242, "loss": 1.5544, "step": 16318 }, { "epoch": 0.21205796216313674, "grad_norm": 0.4815271198749542, "learning_rate": 0.00015762097245870107, "loss": 1.6177, "step": 16319 }, { "epoch": 0.21207095670705262, "grad_norm": 0.4236106872558594, "learning_rate": 0.00015761837299678967, "loss": 1.6295, "step": 16320 }, { "epoch": 0.2120839512509685, "grad_norm": 0.2801547050476074, "learning_rate": 0.0001576157735348783, "loss": 1.4897, "step": 16321 }, { "epoch": 0.21209694579488436, "grad_norm": 0.5137124061584473, "learning_rate": 0.0001576131740729669, "loss": 1.4729, "step": 16322 }, { "epoch": 0.21210994033880023, "grad_norm": 0.368581086397171, "learning_rate": 0.00015761057461105552, "loss": 1.2429, "step": 16323 }, { "epoch": 0.2121229348827161, "grad_norm": 0.3731137216091156, "learning_rate": 0.00015760797514914414, "loss": 1.472, "step": 16324 }, { "epoch": 0.21213592942663198, "grad_norm": 0.34975066781044006, "learning_rate": 0.00015760537568723274, "loss": 1.4161, "step": 16325 }, { "epoch": 0.21214892397054785, "grad_norm": 0.3392781913280487, "learning_rate": 0.00015760277622532136, "loss": 1.4149, "step": 16326 }, { "epoch": 0.21216191851446373, "grad_norm": 0.4145644009113312, "learning_rate": 0.00015760017676341, "loss": 1.3132, "step": 16327 }, { "epoch": 0.2121749130583796, "grad_norm": 0.4203912615776062, "learning_rate": 0.00015759757730149861, "loss": 1.5654, "step": 16328 }, { "epoch": 0.21218790760229547, "grad_norm": 0.3994449973106384, "learning_rate": 0.0001575949778395872, "loss": 1.4233, "step": 16329 }, { "epoch": 0.21220090214621137, "grad_norm": 0.36465057730674744, "learning_rate": 0.0001575923783776758, "loss": 1.35, "step": 16330 }, { "epoch": 0.21221389669012725, "grad_norm": 0.37644141912460327, "learning_rate": 0.00015758977891576446, "loss": 1.5822, "step": 16331 }, { "epoch": 0.21222689123404312, "grad_norm": 0.3889392614364624, "learning_rate": 0.00015758717945385306, "loss": 1.528, "step": 16332 }, { "epoch": 0.212239885777959, "grad_norm": 0.30650168657302856, "learning_rate": 0.00015758457999194168, "loss": 1.3817, "step": 16333 }, { "epoch": 0.21225288032187486, "grad_norm": 0.33140769600868225, "learning_rate": 0.00015758198053003028, "loss": 1.3821, "step": 16334 }, { "epoch": 0.21226587486579074, "grad_norm": 0.3856028616428375, "learning_rate": 0.0001575793810681189, "loss": 1.2606, "step": 16335 }, { "epoch": 0.2122788694097066, "grad_norm": 0.4393945634365082, "learning_rate": 0.00015757678160620753, "loss": 1.5902, "step": 16336 }, { "epoch": 0.21229186395362248, "grad_norm": 0.3755260109901428, "learning_rate": 0.00015757418214429613, "loss": 1.3909, "step": 16337 }, { "epoch": 0.21230485849753836, "grad_norm": 0.4153316617012024, "learning_rate": 0.00015757158268238475, "loss": 1.554, "step": 16338 }, { "epoch": 0.21231785304145423, "grad_norm": 0.4598907232284546, "learning_rate": 0.00015756898322047337, "loss": 1.51, "step": 16339 }, { "epoch": 0.2123308475853701, "grad_norm": 0.3993397057056427, "learning_rate": 0.000157566383758562, "loss": 1.3916, "step": 16340 }, { "epoch": 0.21234384212928598, "grad_norm": 0.4127173125743866, "learning_rate": 0.0001575637842966506, "loss": 1.2945, "step": 16341 }, { "epoch": 0.21235683667320185, "grad_norm": 0.4294263422489166, "learning_rate": 0.0001575611848347392, "loss": 1.596, "step": 16342 }, { "epoch": 0.21236983121711772, "grad_norm": 0.365667462348938, "learning_rate": 0.00015755858537282785, "loss": 1.4254, "step": 16343 }, { "epoch": 0.2123828257610336, "grad_norm": 0.362148642539978, "learning_rate": 0.00015755598591091644, "loss": 1.6175, "step": 16344 }, { "epoch": 0.21239582030494947, "grad_norm": 0.30672407150268555, "learning_rate": 0.00015755338644900507, "loss": 1.3325, "step": 16345 }, { "epoch": 0.21240881484886534, "grad_norm": 0.30809929966926575, "learning_rate": 0.00015755078698709366, "loss": 1.2251, "step": 16346 }, { "epoch": 0.2124218093927812, "grad_norm": 0.444663405418396, "learning_rate": 0.0001575481875251823, "loss": 1.4597, "step": 16347 }, { "epoch": 0.2124348039366971, "grad_norm": 0.3519798815250397, "learning_rate": 0.0001575455880632709, "loss": 1.4149, "step": 16348 }, { "epoch": 0.21244779848061296, "grad_norm": 0.33298400044441223, "learning_rate": 0.0001575429886013595, "loss": 1.3919, "step": 16349 }, { "epoch": 0.21246079302452883, "grad_norm": 0.45460131764411926, "learning_rate": 0.00015754038913944816, "loss": 1.3684, "step": 16350 }, { "epoch": 0.2124737875684447, "grad_norm": 0.39912545680999756, "learning_rate": 0.00015753778967753676, "loss": 1.3542, "step": 16351 }, { "epoch": 0.21248678211236058, "grad_norm": 0.38210734724998474, "learning_rate": 0.00015753519021562538, "loss": 1.6695, "step": 16352 }, { "epoch": 0.21249977665627645, "grad_norm": 0.38878941535949707, "learning_rate": 0.00015753259075371398, "loss": 1.4372, "step": 16353 }, { "epoch": 0.21251277120019232, "grad_norm": 0.42028963565826416, "learning_rate": 0.0001575299912918026, "loss": 1.3376, "step": 16354 }, { "epoch": 0.2125257657441082, "grad_norm": 0.4192335307598114, "learning_rate": 0.00015752739182989123, "loss": 1.5857, "step": 16355 }, { "epoch": 0.21253876028802407, "grad_norm": 0.34347543120384216, "learning_rate": 0.00015752479236797983, "loss": 1.5225, "step": 16356 }, { "epoch": 0.21255175483193994, "grad_norm": 0.37485194206237793, "learning_rate": 0.00015752219290606845, "loss": 1.5033, "step": 16357 }, { "epoch": 0.21256474937585582, "grad_norm": 0.4230476915836334, "learning_rate": 0.00015751959344415708, "loss": 1.2886, "step": 16358 }, { "epoch": 0.2125777439197717, "grad_norm": 0.3253383934497833, "learning_rate": 0.00015751699398224567, "loss": 1.2778, "step": 16359 }, { "epoch": 0.21259073846368756, "grad_norm": 0.4557027518749237, "learning_rate": 0.0001575143945203343, "loss": 1.4234, "step": 16360 }, { "epoch": 0.21260373300760343, "grad_norm": 0.44025859236717224, "learning_rate": 0.0001575117950584229, "loss": 1.5407, "step": 16361 }, { "epoch": 0.2126167275515193, "grad_norm": 0.4361938238143921, "learning_rate": 0.00015750919559651155, "loss": 1.4776, "step": 16362 }, { "epoch": 0.21262972209543518, "grad_norm": 0.4802328050136566, "learning_rate": 0.00015750659613460015, "loss": 1.4684, "step": 16363 }, { "epoch": 0.21264271663935105, "grad_norm": 0.25335893034935, "learning_rate": 0.00015750399667268877, "loss": 1.2355, "step": 16364 }, { "epoch": 0.21265571118326693, "grad_norm": 0.4080866873264313, "learning_rate": 0.00015750139721077737, "loss": 1.2535, "step": 16365 }, { "epoch": 0.2126687057271828, "grad_norm": 0.4209708273410797, "learning_rate": 0.000157498797748866, "loss": 1.5189, "step": 16366 }, { "epoch": 0.21268170027109867, "grad_norm": 0.3630412817001343, "learning_rate": 0.00015749619828695462, "loss": 1.4, "step": 16367 }, { "epoch": 0.21269469481501455, "grad_norm": 0.3301694095134735, "learning_rate": 0.0001574935988250432, "loss": 1.1731, "step": 16368 }, { "epoch": 0.21270768935893042, "grad_norm": 0.3677677512168884, "learning_rate": 0.00015749099936313184, "loss": 1.2785, "step": 16369 }, { "epoch": 0.2127206839028463, "grad_norm": 0.3904007375240326, "learning_rate": 0.00015748839990122046, "loss": 1.3091, "step": 16370 }, { "epoch": 0.21273367844676216, "grad_norm": 0.4355577826499939, "learning_rate": 0.00015748580043930906, "loss": 1.6459, "step": 16371 }, { "epoch": 0.21274667299067804, "grad_norm": 0.29074719548225403, "learning_rate": 0.00015748320097739768, "loss": 1.1778, "step": 16372 }, { "epoch": 0.2127596675345939, "grad_norm": 0.36832883954048157, "learning_rate": 0.00015748060151548628, "loss": 1.3407, "step": 16373 }, { "epoch": 0.21277266207850978, "grad_norm": 0.41720035672187805, "learning_rate": 0.00015747800205357493, "loss": 1.3455, "step": 16374 }, { "epoch": 0.21278565662242566, "grad_norm": 0.38714170455932617, "learning_rate": 0.00015747540259166353, "loss": 1.3966, "step": 16375 }, { "epoch": 0.21279865116634153, "grad_norm": 0.418650358915329, "learning_rate": 0.00015747280312975216, "loss": 1.536, "step": 16376 }, { "epoch": 0.2128116457102574, "grad_norm": 0.4572198987007141, "learning_rate": 0.00015747020366784075, "loss": 1.3927, "step": 16377 }, { "epoch": 0.21282464025417328, "grad_norm": 0.3800646960735321, "learning_rate": 0.00015746760420592938, "loss": 1.4264, "step": 16378 }, { "epoch": 0.21283763479808915, "grad_norm": 0.40469101071357727, "learning_rate": 0.000157465004744018, "loss": 1.5828, "step": 16379 }, { "epoch": 0.21285062934200502, "grad_norm": 0.43588578701019287, "learning_rate": 0.0001574624052821066, "loss": 1.3067, "step": 16380 }, { "epoch": 0.2128636238859209, "grad_norm": 0.38335245847702026, "learning_rate": 0.00015745980582019522, "loss": 1.3734, "step": 16381 }, { "epoch": 0.21287661842983677, "grad_norm": 0.41501665115356445, "learning_rate": 0.00015745720635828385, "loss": 1.4202, "step": 16382 }, { "epoch": 0.21288961297375264, "grad_norm": 0.3989786207675934, "learning_rate": 0.00015745460689637245, "loss": 1.5109, "step": 16383 }, { "epoch": 0.2129026075176685, "grad_norm": 0.4099924862384796, "learning_rate": 0.00015745200743446107, "loss": 1.4466, "step": 16384 }, { "epoch": 0.21291560206158439, "grad_norm": 0.41755086183547974, "learning_rate": 0.0001574494079725497, "loss": 1.4014, "step": 16385 }, { "epoch": 0.21292859660550026, "grad_norm": 0.3694026470184326, "learning_rate": 0.00015744680851063832, "loss": 1.2747, "step": 16386 }, { "epoch": 0.21294159114941613, "grad_norm": 0.4344460368156433, "learning_rate": 0.00015744420904872692, "loss": 1.4092, "step": 16387 }, { "epoch": 0.212954585693332, "grad_norm": 0.30986517667770386, "learning_rate": 0.00015744160958681554, "loss": 1.3623, "step": 16388 }, { "epoch": 0.21296758023724788, "grad_norm": 0.42683228850364685, "learning_rate": 0.00015743901012490417, "loss": 1.4106, "step": 16389 }, { "epoch": 0.21298057478116375, "grad_norm": 0.3688706159591675, "learning_rate": 0.00015743641066299276, "loss": 1.3762, "step": 16390 }, { "epoch": 0.21299356932507962, "grad_norm": 0.4859285354614258, "learning_rate": 0.0001574338112010814, "loss": 1.4464, "step": 16391 }, { "epoch": 0.2130065638689955, "grad_norm": 0.4936436414718628, "learning_rate": 0.00015743121173916998, "loss": 1.4556, "step": 16392 }, { "epoch": 0.21301955841291137, "grad_norm": 0.38862279057502747, "learning_rate": 0.00015742861227725864, "loss": 1.3634, "step": 16393 }, { "epoch": 0.21303255295682724, "grad_norm": 0.45797985792160034, "learning_rate": 0.00015742601281534723, "loss": 1.425, "step": 16394 }, { "epoch": 0.21304554750074312, "grad_norm": 0.37307730317115784, "learning_rate": 0.00015742341335343586, "loss": 1.3996, "step": 16395 }, { "epoch": 0.213058542044659, "grad_norm": 0.43095463514328003, "learning_rate": 0.00015742081389152446, "loss": 1.3158, "step": 16396 }, { "epoch": 0.21307153658857486, "grad_norm": 0.47890886664390564, "learning_rate": 0.00015741821442961308, "loss": 1.355, "step": 16397 }, { "epoch": 0.21308453113249073, "grad_norm": 0.3687115013599396, "learning_rate": 0.0001574156149677017, "loss": 1.2198, "step": 16398 }, { "epoch": 0.2130975256764066, "grad_norm": 0.39474666118621826, "learning_rate": 0.0001574130155057903, "loss": 1.5039, "step": 16399 }, { "epoch": 0.21311052022032248, "grad_norm": 0.3476211726665497, "learning_rate": 0.00015741041604387893, "loss": 1.3581, "step": 16400 }, { "epoch": 0.21312351476423835, "grad_norm": 0.4627285897731781, "learning_rate": 0.00015740781658196755, "loss": 1.5028, "step": 16401 }, { "epoch": 0.21313650930815423, "grad_norm": 0.37881433963775635, "learning_rate": 0.00015740521712005615, "loss": 1.4737, "step": 16402 }, { "epoch": 0.2131495038520701, "grad_norm": 0.4564220607280731, "learning_rate": 0.00015740261765814477, "loss": 1.3839, "step": 16403 }, { "epoch": 0.21316249839598597, "grad_norm": 0.3127775192260742, "learning_rate": 0.00015740001819623337, "loss": 1.5259, "step": 16404 }, { "epoch": 0.21317549293990185, "grad_norm": 0.448333740234375, "learning_rate": 0.00015739741873432202, "loss": 1.2491, "step": 16405 }, { "epoch": 0.21318848748381775, "grad_norm": 0.4341827630996704, "learning_rate": 0.00015739481927241062, "loss": 1.5643, "step": 16406 }, { "epoch": 0.21320148202773362, "grad_norm": 0.4399312734603882, "learning_rate": 0.00015739221981049924, "loss": 1.4382, "step": 16407 }, { "epoch": 0.2132144765716495, "grad_norm": 0.35957273840904236, "learning_rate": 0.00015738962034858784, "loss": 1.5121, "step": 16408 }, { "epoch": 0.21322747111556536, "grad_norm": 0.25651615858078003, "learning_rate": 0.00015738702088667646, "loss": 1.3806, "step": 16409 }, { "epoch": 0.21324046565948124, "grad_norm": 0.3757201135158539, "learning_rate": 0.0001573844214247651, "loss": 1.4592, "step": 16410 }, { "epoch": 0.2132534602033971, "grad_norm": 0.4285160303115845, "learning_rate": 0.0001573818219628537, "loss": 1.5571, "step": 16411 }, { "epoch": 0.21326645474731298, "grad_norm": 0.37521782517433167, "learning_rate": 0.0001573792225009423, "loss": 1.3879, "step": 16412 }, { "epoch": 0.21327944929122886, "grad_norm": 0.42501676082611084, "learning_rate": 0.00015737662303903094, "loss": 1.4827, "step": 16413 }, { "epoch": 0.21329244383514473, "grad_norm": 0.3862791061401367, "learning_rate": 0.00015737402357711953, "loss": 1.2277, "step": 16414 }, { "epoch": 0.2133054383790606, "grad_norm": 0.34886133670806885, "learning_rate": 0.00015737142411520816, "loss": 1.2409, "step": 16415 }, { "epoch": 0.21331843292297648, "grad_norm": 0.4193170368671417, "learning_rate": 0.00015736882465329676, "loss": 1.4239, "step": 16416 }, { "epoch": 0.21333142746689235, "grad_norm": 0.3871462941169739, "learning_rate": 0.0001573662251913854, "loss": 1.5403, "step": 16417 }, { "epoch": 0.21334442201080822, "grad_norm": 0.451837956905365, "learning_rate": 0.000157363625729474, "loss": 1.451, "step": 16418 }, { "epoch": 0.2133574165547241, "grad_norm": 0.33466705679893494, "learning_rate": 0.00015736102626756263, "loss": 1.3357, "step": 16419 }, { "epoch": 0.21337041109863997, "grad_norm": 0.4441995322704315, "learning_rate": 0.00015735842680565123, "loss": 1.2815, "step": 16420 }, { "epoch": 0.21338340564255584, "grad_norm": 0.24887260794639587, "learning_rate": 0.00015735582734373985, "loss": 1.2635, "step": 16421 }, { "epoch": 0.2133964001864717, "grad_norm": 0.2998470664024353, "learning_rate": 0.00015735322788182847, "loss": 1.4651, "step": 16422 }, { "epoch": 0.21340939473038759, "grad_norm": 0.3927188515663147, "learning_rate": 0.00015735062841991707, "loss": 1.4348, "step": 16423 }, { "epoch": 0.21342238927430346, "grad_norm": 0.4490024745464325, "learning_rate": 0.00015734802895800572, "loss": 1.2902, "step": 16424 }, { "epoch": 0.21343538381821933, "grad_norm": 0.4487338960170746, "learning_rate": 0.00015734542949609432, "loss": 1.3611, "step": 16425 }, { "epoch": 0.2134483783621352, "grad_norm": 0.3307473659515381, "learning_rate": 0.00015734283003418292, "loss": 1.2051, "step": 16426 }, { "epoch": 0.21346137290605108, "grad_norm": 0.3719083368778229, "learning_rate": 0.00015734023057227154, "loss": 1.2721, "step": 16427 }, { "epoch": 0.21347436744996695, "grad_norm": 0.38454705476760864, "learning_rate": 0.00015733763111036017, "loss": 1.5339, "step": 16428 }, { "epoch": 0.21348736199388282, "grad_norm": 0.44238048791885376, "learning_rate": 0.0001573350316484488, "loss": 1.2716, "step": 16429 }, { "epoch": 0.2135003565377987, "grad_norm": 0.38776183128356934, "learning_rate": 0.0001573324321865374, "loss": 1.5555, "step": 16430 }, { "epoch": 0.21351335108171457, "grad_norm": 0.38936471939086914, "learning_rate": 0.00015732983272462601, "loss": 1.202, "step": 16431 }, { "epoch": 0.21352634562563044, "grad_norm": 0.4147506654262543, "learning_rate": 0.00015732723326271464, "loss": 1.4464, "step": 16432 }, { "epoch": 0.21353934016954632, "grad_norm": 0.4918941557407379, "learning_rate": 0.00015732463380080324, "loss": 1.5148, "step": 16433 }, { "epoch": 0.2135523347134622, "grad_norm": 0.35338094830513, "learning_rate": 0.00015732203433889186, "loss": 1.1859, "step": 16434 }, { "epoch": 0.21356532925737806, "grad_norm": 0.449516236782074, "learning_rate": 0.00015731943487698046, "loss": 1.3216, "step": 16435 }, { "epoch": 0.21357832380129393, "grad_norm": 0.36722397804260254, "learning_rate": 0.0001573168354150691, "loss": 1.2822, "step": 16436 }, { "epoch": 0.2135913183452098, "grad_norm": 0.3264809548854828, "learning_rate": 0.0001573142359531577, "loss": 1.3596, "step": 16437 }, { "epoch": 0.21360431288912568, "grad_norm": 0.42582595348358154, "learning_rate": 0.0001573116364912463, "loss": 1.5805, "step": 16438 }, { "epoch": 0.21361730743304155, "grad_norm": 0.4740326404571533, "learning_rate": 0.00015730903702933493, "loss": 1.4249, "step": 16439 }, { "epoch": 0.21363030197695743, "grad_norm": 0.3801330029964447, "learning_rate": 0.00015730643756742355, "loss": 1.4191, "step": 16440 }, { "epoch": 0.2136432965208733, "grad_norm": 0.3570318818092346, "learning_rate": 0.00015730383810551218, "loss": 1.5236, "step": 16441 }, { "epoch": 0.21365629106478917, "grad_norm": 0.3715328574180603, "learning_rate": 0.00015730123864360077, "loss": 1.4106, "step": 16442 }, { "epoch": 0.21366928560870505, "grad_norm": 0.46679291129112244, "learning_rate": 0.0001572986391816894, "loss": 1.4655, "step": 16443 }, { "epoch": 0.21368228015262092, "grad_norm": 0.3911263346672058, "learning_rate": 0.00015729603971977802, "loss": 1.2921, "step": 16444 }, { "epoch": 0.2136952746965368, "grad_norm": 0.45583686232566833, "learning_rate": 0.00015729344025786662, "loss": 1.3771, "step": 16445 }, { "epoch": 0.21370826924045266, "grad_norm": 0.44621261954307556, "learning_rate": 0.00015729084079595525, "loss": 1.4974, "step": 16446 }, { "epoch": 0.21372126378436854, "grad_norm": 0.24736717343330383, "learning_rate": 0.00015728824133404384, "loss": 1.3437, "step": 16447 }, { "epoch": 0.2137342583282844, "grad_norm": 0.47360867261886597, "learning_rate": 0.0001572856418721325, "loss": 1.3008, "step": 16448 }, { "epoch": 0.21374725287220028, "grad_norm": 0.4432130753993988, "learning_rate": 0.0001572830424102211, "loss": 1.292, "step": 16449 }, { "epoch": 0.21376024741611616, "grad_norm": 0.4720573425292969, "learning_rate": 0.00015728044294830972, "loss": 1.2591, "step": 16450 }, { "epoch": 0.21377324196003203, "grad_norm": 0.32347947359085083, "learning_rate": 0.00015727784348639831, "loss": 1.2989, "step": 16451 }, { "epoch": 0.2137862365039479, "grad_norm": 0.39931702613830566, "learning_rate": 0.00015727524402448694, "loss": 1.3725, "step": 16452 }, { "epoch": 0.21379923104786377, "grad_norm": 0.36974623799324036, "learning_rate": 0.00015727264456257556, "loss": 1.5771, "step": 16453 }, { "epoch": 0.21381222559177965, "grad_norm": 0.3857141137123108, "learning_rate": 0.00015727004510066416, "loss": 1.4767, "step": 16454 }, { "epoch": 0.21382522013569552, "grad_norm": 0.37301379442214966, "learning_rate": 0.00015726744563875278, "loss": 1.2362, "step": 16455 }, { "epoch": 0.2138382146796114, "grad_norm": 0.384650319814682, "learning_rate": 0.0001572648461768414, "loss": 1.3956, "step": 16456 }, { "epoch": 0.21385120922352727, "grad_norm": 0.35109105706214905, "learning_rate": 0.00015726224671493, "loss": 1.1436, "step": 16457 }, { "epoch": 0.21386420376744314, "grad_norm": 0.4444829225540161, "learning_rate": 0.00015725964725301863, "loss": 1.4092, "step": 16458 }, { "epoch": 0.213877198311359, "grad_norm": 0.38663068413734436, "learning_rate": 0.00015725704779110726, "loss": 1.3128, "step": 16459 }, { "epoch": 0.21389019285527489, "grad_norm": 0.46210744976997375, "learning_rate": 0.00015725444832919588, "loss": 1.507, "step": 16460 }, { "epoch": 0.21390318739919076, "grad_norm": 0.3999937176704407, "learning_rate": 0.00015725184886728448, "loss": 1.3442, "step": 16461 }, { "epoch": 0.21391618194310663, "grad_norm": 0.4437315762042999, "learning_rate": 0.0001572492494053731, "loss": 1.4218, "step": 16462 }, { "epoch": 0.2139291764870225, "grad_norm": 0.422163188457489, "learning_rate": 0.00015724664994346173, "loss": 1.318, "step": 16463 }, { "epoch": 0.21394217103093838, "grad_norm": 0.4266189932823181, "learning_rate": 0.00015724405048155032, "loss": 1.5237, "step": 16464 }, { "epoch": 0.21395516557485425, "grad_norm": 0.45921310782432556, "learning_rate": 0.00015724145101963895, "loss": 1.5496, "step": 16465 }, { "epoch": 0.21396816011877012, "grad_norm": 0.37793177366256714, "learning_rate": 0.00015723885155772755, "loss": 1.3886, "step": 16466 }, { "epoch": 0.213981154662686, "grad_norm": 0.37618908286094666, "learning_rate": 0.00015723625209581617, "loss": 1.4356, "step": 16467 }, { "epoch": 0.21399414920660187, "grad_norm": 0.3298528790473938, "learning_rate": 0.0001572336526339048, "loss": 1.4025, "step": 16468 }, { "epoch": 0.21400714375051774, "grad_norm": 0.3641802966594696, "learning_rate": 0.0001572310531719934, "loss": 1.3217, "step": 16469 }, { "epoch": 0.21402013829443362, "grad_norm": 0.398813933134079, "learning_rate": 0.00015722845371008202, "loss": 1.223, "step": 16470 }, { "epoch": 0.2140331328383495, "grad_norm": 0.45190438628196716, "learning_rate": 0.00015722585424817064, "loss": 1.6055, "step": 16471 }, { "epoch": 0.21404612738226536, "grad_norm": 0.38312652707099915, "learning_rate": 0.00015722325478625927, "loss": 1.3531, "step": 16472 }, { "epoch": 0.21405912192618123, "grad_norm": 0.3588237166404724, "learning_rate": 0.00015722065532434786, "loss": 1.2749, "step": 16473 }, { "epoch": 0.2140721164700971, "grad_norm": 0.3974151313304901, "learning_rate": 0.0001572180558624365, "loss": 1.4132, "step": 16474 }, { "epoch": 0.21408511101401298, "grad_norm": 0.36366257071495056, "learning_rate": 0.0001572154564005251, "loss": 1.4313, "step": 16475 }, { "epoch": 0.21409810555792885, "grad_norm": 0.4387637972831726, "learning_rate": 0.0001572128569386137, "loss": 1.4985, "step": 16476 }, { "epoch": 0.21411110010184473, "grad_norm": 0.31119364500045776, "learning_rate": 0.00015721025747670233, "loss": 1.3743, "step": 16477 }, { "epoch": 0.2141240946457606, "grad_norm": 0.21330766379833221, "learning_rate": 0.00015720765801479093, "loss": 1.4404, "step": 16478 }, { "epoch": 0.21413708918967647, "grad_norm": 0.3332308530807495, "learning_rate": 0.00015720505855287958, "loss": 1.3968, "step": 16479 }, { "epoch": 0.21415008373359234, "grad_norm": 0.3787296414375305, "learning_rate": 0.00015720245909096818, "loss": 1.3836, "step": 16480 }, { "epoch": 0.21416307827750822, "grad_norm": 0.44584155082702637, "learning_rate": 0.00015719985962905678, "loss": 1.2373, "step": 16481 }, { "epoch": 0.21417607282142412, "grad_norm": 0.31913888454437256, "learning_rate": 0.0001571972601671454, "loss": 1.4198, "step": 16482 }, { "epoch": 0.21418906736534, "grad_norm": 0.5201233625411987, "learning_rate": 0.00015719466070523403, "loss": 1.4421, "step": 16483 }, { "epoch": 0.21420206190925586, "grad_norm": 0.4701615571975708, "learning_rate": 0.00015719206124332265, "loss": 1.4973, "step": 16484 }, { "epoch": 0.21421505645317174, "grad_norm": 0.5191045999526978, "learning_rate": 0.00015718946178141125, "loss": 1.4815, "step": 16485 }, { "epoch": 0.2142280509970876, "grad_norm": 0.352174311876297, "learning_rate": 0.00015718686231949987, "loss": 1.5913, "step": 16486 }, { "epoch": 0.21424104554100348, "grad_norm": 0.45393285155296326, "learning_rate": 0.0001571842628575885, "loss": 1.4239, "step": 16487 }, { "epoch": 0.21425404008491936, "grad_norm": 0.3982965052127838, "learning_rate": 0.0001571816633956771, "loss": 1.5383, "step": 16488 }, { "epoch": 0.21426703462883523, "grad_norm": 0.37316644191741943, "learning_rate": 0.00015717906393376572, "loss": 1.4464, "step": 16489 }, { "epoch": 0.2142800291727511, "grad_norm": 0.46091148257255554, "learning_rate": 0.00015717646447185432, "loss": 1.5204, "step": 16490 }, { "epoch": 0.21429302371666697, "grad_norm": 0.462181031703949, "learning_rate": 0.00015717386500994297, "loss": 1.5215, "step": 16491 }, { "epoch": 0.21430601826058285, "grad_norm": 0.4024527966976166, "learning_rate": 0.00015717126554803157, "loss": 1.3385, "step": 16492 }, { "epoch": 0.21431901280449872, "grad_norm": 0.3024749457836151, "learning_rate": 0.00015716866608612016, "loss": 1.4666, "step": 16493 }, { "epoch": 0.2143320073484146, "grad_norm": 0.9460294246673584, "learning_rate": 0.0001571660666242088, "loss": 1.5663, "step": 16494 }, { "epoch": 0.21434500189233047, "grad_norm": 0.3830714523792267, "learning_rate": 0.0001571634671622974, "loss": 1.3336, "step": 16495 }, { "epoch": 0.21435799643624634, "grad_norm": 0.3980732858181, "learning_rate": 0.00015716086770038604, "loss": 1.4113, "step": 16496 }, { "epoch": 0.2143709909801622, "grad_norm": 0.33247390389442444, "learning_rate": 0.00015715826823847463, "loss": 1.3418, "step": 16497 }, { "epoch": 0.21438398552407809, "grad_norm": 0.3369801640510559, "learning_rate": 0.00015715566877656326, "loss": 1.2834, "step": 16498 }, { "epoch": 0.21439698006799396, "grad_norm": 0.413433313369751, "learning_rate": 0.00015715306931465188, "loss": 1.4559, "step": 16499 }, { "epoch": 0.21440997461190983, "grad_norm": 0.5788470506668091, "learning_rate": 0.00015715046985274048, "loss": 1.4304, "step": 16500 }, { "epoch": 0.2144229691558257, "grad_norm": 0.44265472888946533, "learning_rate": 0.0001571478703908291, "loss": 1.4603, "step": 16501 }, { "epoch": 0.21443596369974158, "grad_norm": 0.42819735407829285, "learning_rate": 0.00015714527092891773, "loss": 1.4628, "step": 16502 }, { "epoch": 0.21444895824365745, "grad_norm": 0.5448420643806458, "learning_rate": 0.00015714267146700635, "loss": 1.5795, "step": 16503 }, { "epoch": 0.21446195278757332, "grad_norm": 0.37777411937713623, "learning_rate": 0.00015714007200509495, "loss": 1.3947, "step": 16504 }, { "epoch": 0.2144749473314892, "grad_norm": 0.3798162341117859, "learning_rate": 0.00015713747254318355, "loss": 1.3604, "step": 16505 }, { "epoch": 0.21448794187540507, "grad_norm": 0.3861330449581146, "learning_rate": 0.0001571348730812722, "loss": 1.1878, "step": 16506 }, { "epoch": 0.21450093641932094, "grad_norm": 0.2823701500892639, "learning_rate": 0.0001571322736193608, "loss": 1.1425, "step": 16507 }, { "epoch": 0.21451393096323682, "grad_norm": 0.398682177066803, "learning_rate": 0.00015712967415744942, "loss": 1.3511, "step": 16508 }, { "epoch": 0.2145269255071527, "grad_norm": 0.39075520634651184, "learning_rate": 0.00015712707469553802, "loss": 1.2274, "step": 16509 }, { "epoch": 0.21453992005106856, "grad_norm": 0.4440814256668091, "learning_rate": 0.00015712447523362664, "loss": 1.4322, "step": 16510 }, { "epoch": 0.21455291459498443, "grad_norm": 0.4309839606285095, "learning_rate": 0.00015712187577171527, "loss": 1.6187, "step": 16511 }, { "epoch": 0.2145659091389003, "grad_norm": 0.2919430434703827, "learning_rate": 0.00015711927630980387, "loss": 1.3373, "step": 16512 }, { "epoch": 0.21457890368281618, "grad_norm": 0.351412832736969, "learning_rate": 0.0001571166768478925, "loss": 1.2871, "step": 16513 }, { "epoch": 0.21459189822673205, "grad_norm": 0.36734259128570557, "learning_rate": 0.00015711407738598111, "loss": 1.3329, "step": 16514 }, { "epoch": 0.21460489277064793, "grad_norm": 0.3979712128639221, "learning_rate": 0.00015711147792406974, "loss": 1.374, "step": 16515 }, { "epoch": 0.2146178873145638, "grad_norm": 0.4031108021736145, "learning_rate": 0.00015710887846215834, "loss": 1.4755, "step": 16516 }, { "epoch": 0.21463088185847967, "grad_norm": 0.3169833719730377, "learning_rate": 0.00015710627900024696, "loss": 1.3009, "step": 16517 }, { "epoch": 0.21464387640239554, "grad_norm": 0.33369866013526917, "learning_rate": 0.00015710367953833559, "loss": 1.177, "step": 16518 }, { "epoch": 0.21465687094631142, "grad_norm": 0.5357704162597656, "learning_rate": 0.00015710108007642418, "loss": 1.368, "step": 16519 }, { "epoch": 0.2146698654902273, "grad_norm": 0.4652195870876312, "learning_rate": 0.0001570984806145128, "loss": 1.3483, "step": 16520 }, { "epoch": 0.21468286003414316, "grad_norm": 0.4243440330028534, "learning_rate": 0.0001570958811526014, "loss": 1.4274, "step": 16521 }, { "epoch": 0.21469585457805904, "grad_norm": 0.48390108346939087, "learning_rate": 0.00015709328169069003, "loss": 1.5251, "step": 16522 }, { "epoch": 0.2147088491219749, "grad_norm": 0.391955703496933, "learning_rate": 0.00015709068222877865, "loss": 1.426, "step": 16523 }, { "epoch": 0.21472184366589078, "grad_norm": 0.38584983348846436, "learning_rate": 0.00015708808276686725, "loss": 1.4606, "step": 16524 }, { "epoch": 0.21473483820980666, "grad_norm": 0.43249809741973877, "learning_rate": 0.00015708548330495588, "loss": 1.4831, "step": 16525 }, { "epoch": 0.21474783275372253, "grad_norm": 0.3600912094116211, "learning_rate": 0.0001570828838430445, "loss": 1.4734, "step": 16526 }, { "epoch": 0.2147608272976384, "grad_norm": 0.47654473781585693, "learning_rate": 0.00015708028438113312, "loss": 1.4104, "step": 16527 }, { "epoch": 0.21477382184155427, "grad_norm": 0.420943021774292, "learning_rate": 0.00015707768491922172, "loss": 1.4508, "step": 16528 }, { "epoch": 0.21478681638547015, "grad_norm": 0.3651467263698578, "learning_rate": 0.00015707508545731035, "loss": 1.4071, "step": 16529 }, { "epoch": 0.21479981092938602, "grad_norm": 0.3509159982204437, "learning_rate": 0.00015707248599539897, "loss": 1.3449, "step": 16530 }, { "epoch": 0.2148128054733019, "grad_norm": 0.40557292103767395, "learning_rate": 0.00015706988653348757, "loss": 1.2499, "step": 16531 }, { "epoch": 0.21482580001721777, "grad_norm": 0.5104345679283142, "learning_rate": 0.0001570672870715762, "loss": 1.3207, "step": 16532 }, { "epoch": 0.21483879456113364, "grad_norm": 0.35232189297676086, "learning_rate": 0.00015706468760966482, "loss": 1.4028, "step": 16533 }, { "epoch": 0.2148517891050495, "grad_norm": 0.3544084131717682, "learning_rate": 0.00015706208814775344, "loss": 1.442, "step": 16534 }, { "epoch": 0.21486478364896539, "grad_norm": 0.3969372808933258, "learning_rate": 0.00015705948868584204, "loss": 1.3704, "step": 16535 }, { "epoch": 0.21487777819288126, "grad_norm": 0.28448039293289185, "learning_rate": 0.00015705688922393064, "loss": 1.4701, "step": 16536 }, { "epoch": 0.21489077273679713, "grad_norm": 0.4381963014602661, "learning_rate": 0.0001570542897620193, "loss": 1.4371, "step": 16537 }, { "epoch": 0.214903767280713, "grad_norm": 0.45803946256637573, "learning_rate": 0.00015705169030010789, "loss": 1.3939, "step": 16538 }, { "epoch": 0.21491676182462888, "grad_norm": 0.47935450077056885, "learning_rate": 0.0001570490908381965, "loss": 1.1933, "step": 16539 }, { "epoch": 0.21492975636854475, "grad_norm": 0.40125685930252075, "learning_rate": 0.0001570464913762851, "loss": 1.367, "step": 16540 }, { "epoch": 0.21494275091246062, "grad_norm": 0.5328443050384521, "learning_rate": 0.00015704389191437373, "loss": 1.5222, "step": 16541 }, { "epoch": 0.2149557454563765, "grad_norm": 0.46879997849464417, "learning_rate": 0.00015704129245246236, "loss": 1.4179, "step": 16542 }, { "epoch": 0.21496874000029237, "grad_norm": 0.35379940271377563, "learning_rate": 0.00015703869299055095, "loss": 1.4787, "step": 16543 }, { "epoch": 0.21498173454420824, "grad_norm": 0.4055086076259613, "learning_rate": 0.00015703609352863958, "loss": 1.2983, "step": 16544 }, { "epoch": 0.21499472908812411, "grad_norm": 0.33105772733688354, "learning_rate": 0.0001570334940667282, "loss": 1.316, "step": 16545 }, { "epoch": 0.21500772363204, "grad_norm": 0.35414037108421326, "learning_rate": 0.00015703089460481683, "loss": 1.225, "step": 16546 }, { "epoch": 0.21502071817595586, "grad_norm": 0.41440072655677795, "learning_rate": 0.00015702829514290542, "loss": 1.5852, "step": 16547 }, { "epoch": 0.21503371271987173, "grad_norm": 0.3771766722202301, "learning_rate": 0.00015702569568099402, "loss": 1.302, "step": 16548 }, { "epoch": 0.2150467072637876, "grad_norm": 0.4226526618003845, "learning_rate": 0.00015702309621908267, "loss": 1.3977, "step": 16549 }, { "epoch": 0.21505970180770348, "grad_norm": 0.4723445177078247, "learning_rate": 0.00015702049675717127, "loss": 1.4628, "step": 16550 }, { "epoch": 0.21507269635161935, "grad_norm": 0.4863370954990387, "learning_rate": 0.0001570178972952599, "loss": 1.6494, "step": 16551 }, { "epoch": 0.21508569089553523, "grad_norm": 0.3522881865501404, "learning_rate": 0.0001570152978333485, "loss": 1.4251, "step": 16552 }, { "epoch": 0.2150986854394511, "grad_norm": 0.3577641248703003, "learning_rate": 0.00015701269837143712, "loss": 1.1627, "step": 16553 }, { "epoch": 0.21511167998336697, "grad_norm": 0.3905261158943176, "learning_rate": 0.00015701009890952574, "loss": 1.2126, "step": 16554 }, { "epoch": 0.21512467452728284, "grad_norm": 0.30502554774284363, "learning_rate": 0.00015700749944761434, "loss": 1.4291, "step": 16555 }, { "epoch": 0.21513766907119872, "grad_norm": 0.3698728680610657, "learning_rate": 0.00015700489998570296, "loss": 1.328, "step": 16556 }, { "epoch": 0.2151506636151146, "grad_norm": 0.3145277202129364, "learning_rate": 0.0001570023005237916, "loss": 1.4469, "step": 16557 }, { "epoch": 0.2151636581590305, "grad_norm": 0.40194594860076904, "learning_rate": 0.0001569997010618802, "loss": 1.5475, "step": 16558 }, { "epoch": 0.21517665270294636, "grad_norm": 0.5104473233222961, "learning_rate": 0.0001569971015999688, "loss": 1.4858, "step": 16559 }, { "epoch": 0.21518964724686224, "grad_norm": 0.38238149881362915, "learning_rate": 0.0001569945021380574, "loss": 1.2252, "step": 16560 }, { "epoch": 0.2152026417907781, "grad_norm": 0.39461711049079895, "learning_rate": 0.00015699190267614606, "loss": 1.3934, "step": 16561 }, { "epoch": 0.21521563633469398, "grad_norm": 0.38339686393737793, "learning_rate": 0.00015698930321423466, "loss": 1.5358, "step": 16562 }, { "epoch": 0.21522863087860986, "grad_norm": 0.38039103150367737, "learning_rate": 0.00015698670375232328, "loss": 1.2871, "step": 16563 }, { "epoch": 0.21524162542252573, "grad_norm": 0.43147265911102295, "learning_rate": 0.00015698410429041188, "loss": 1.4518, "step": 16564 }, { "epoch": 0.2152546199664416, "grad_norm": 0.3983471095561981, "learning_rate": 0.0001569815048285005, "loss": 1.4717, "step": 16565 }, { "epoch": 0.21526761451035747, "grad_norm": 0.3225042223930359, "learning_rate": 0.00015697890536658913, "loss": 1.3561, "step": 16566 }, { "epoch": 0.21528060905427335, "grad_norm": 0.3922005593776703, "learning_rate": 0.00015697630590467772, "loss": 1.4307, "step": 16567 }, { "epoch": 0.21529360359818922, "grad_norm": 0.3118862807750702, "learning_rate": 0.00015697370644276635, "loss": 1.3035, "step": 16568 }, { "epoch": 0.2153065981421051, "grad_norm": 0.39475566148757935, "learning_rate": 0.00015697110698085497, "loss": 1.2448, "step": 16569 }, { "epoch": 0.21531959268602097, "grad_norm": 0.3987126350402832, "learning_rate": 0.0001569685075189436, "loss": 1.3782, "step": 16570 }, { "epoch": 0.21533258722993684, "grad_norm": 0.47500649094581604, "learning_rate": 0.0001569659080570322, "loss": 1.4427, "step": 16571 }, { "epoch": 0.2153455817738527, "grad_norm": 0.33055704832077026, "learning_rate": 0.00015696330859512082, "loss": 1.187, "step": 16572 }, { "epoch": 0.21535857631776859, "grad_norm": 0.4292328953742981, "learning_rate": 0.00015696070913320944, "loss": 1.3624, "step": 16573 }, { "epoch": 0.21537157086168446, "grad_norm": 0.30911752581596375, "learning_rate": 0.00015695810967129804, "loss": 1.2905, "step": 16574 }, { "epoch": 0.21538456540560033, "grad_norm": 0.4092995226383209, "learning_rate": 0.00015695551020938667, "loss": 1.4696, "step": 16575 }, { "epoch": 0.2153975599495162, "grad_norm": 0.45963016152381897, "learning_rate": 0.0001569529107474753, "loss": 1.4939, "step": 16576 }, { "epoch": 0.21541055449343208, "grad_norm": 0.42844027280807495, "learning_rate": 0.0001569503112855639, "loss": 1.4469, "step": 16577 }, { "epoch": 0.21542354903734795, "grad_norm": 0.4033646583557129, "learning_rate": 0.0001569477118236525, "loss": 1.2084, "step": 16578 }, { "epoch": 0.21543654358126382, "grad_norm": 0.3368788957595825, "learning_rate": 0.0001569451123617411, "loss": 1.2441, "step": 16579 }, { "epoch": 0.2154495381251797, "grad_norm": 0.3674704432487488, "learning_rate": 0.00015694251289982976, "loss": 1.558, "step": 16580 }, { "epoch": 0.21546253266909557, "grad_norm": 0.45151060819625854, "learning_rate": 0.00015693991343791836, "loss": 1.4704, "step": 16581 }, { "epoch": 0.21547552721301144, "grad_norm": 0.471706360578537, "learning_rate": 0.00015693731397600698, "loss": 1.4053, "step": 16582 }, { "epoch": 0.21548852175692731, "grad_norm": 0.37198522686958313, "learning_rate": 0.00015693471451409558, "loss": 1.5183, "step": 16583 }, { "epoch": 0.2155015163008432, "grad_norm": 0.4410859942436218, "learning_rate": 0.0001569321150521842, "loss": 1.4267, "step": 16584 }, { "epoch": 0.21551451084475906, "grad_norm": 0.4573918879032135, "learning_rate": 0.00015692951559027283, "loss": 1.5274, "step": 16585 }, { "epoch": 0.21552750538867493, "grad_norm": 0.29926973581314087, "learning_rate": 0.00015692691612836143, "loss": 1.2994, "step": 16586 }, { "epoch": 0.2155404999325908, "grad_norm": 0.31536000967025757, "learning_rate": 0.00015692431666645005, "loss": 1.4826, "step": 16587 }, { "epoch": 0.21555349447650668, "grad_norm": 0.39215388894081116, "learning_rate": 0.00015692171720453868, "loss": 1.5273, "step": 16588 }, { "epoch": 0.21556648902042255, "grad_norm": 0.3824387192726135, "learning_rate": 0.00015691911774262727, "loss": 1.2518, "step": 16589 }, { "epoch": 0.21557948356433843, "grad_norm": 0.4292064309120178, "learning_rate": 0.0001569165182807159, "loss": 1.2965, "step": 16590 }, { "epoch": 0.2155924781082543, "grad_norm": 0.4297655522823334, "learning_rate": 0.0001569139188188045, "loss": 1.3606, "step": 16591 }, { "epoch": 0.21560547265217017, "grad_norm": 0.44488075375556946, "learning_rate": 0.00015691131935689315, "loss": 1.4546, "step": 16592 }, { "epoch": 0.21561846719608604, "grad_norm": 0.351406455039978, "learning_rate": 0.00015690871989498174, "loss": 1.4916, "step": 16593 }, { "epoch": 0.21563146174000192, "grad_norm": 0.34770748019218445, "learning_rate": 0.00015690612043307037, "loss": 1.291, "step": 16594 }, { "epoch": 0.2156444562839178, "grad_norm": 0.47346386313438416, "learning_rate": 0.00015690352097115897, "loss": 1.3954, "step": 16595 }, { "epoch": 0.21565745082783366, "grad_norm": 0.48725366592407227, "learning_rate": 0.0001569009215092476, "loss": 1.3502, "step": 16596 }, { "epoch": 0.21567044537174954, "grad_norm": 0.29200097918510437, "learning_rate": 0.00015689832204733621, "loss": 1.2541, "step": 16597 }, { "epoch": 0.2156834399156654, "grad_norm": 0.437290757894516, "learning_rate": 0.0001568957225854248, "loss": 1.3627, "step": 16598 }, { "epoch": 0.21569643445958128, "grad_norm": 0.42533838748931885, "learning_rate": 0.00015689312312351344, "loss": 1.5972, "step": 16599 }, { "epoch": 0.21570942900349716, "grad_norm": 0.36109253764152527, "learning_rate": 0.00015689052366160206, "loss": 1.4141, "step": 16600 }, { "epoch": 0.21572242354741303, "grad_norm": 0.37121185660362244, "learning_rate": 0.00015688792419969069, "loss": 1.523, "step": 16601 }, { "epoch": 0.2157354180913289, "grad_norm": 0.40480029582977295, "learning_rate": 0.00015688532473777928, "loss": 1.4019, "step": 16602 }, { "epoch": 0.21574841263524477, "grad_norm": 0.5045374631881714, "learning_rate": 0.00015688272527586788, "loss": 1.4244, "step": 16603 }, { "epoch": 0.21576140717916065, "grad_norm": 0.3900858163833618, "learning_rate": 0.00015688012581395653, "loss": 1.5065, "step": 16604 }, { "epoch": 0.21577440172307652, "grad_norm": 0.3240197002887726, "learning_rate": 0.00015687752635204513, "loss": 1.2758, "step": 16605 }, { "epoch": 0.2157873962669924, "grad_norm": 0.38154393434524536, "learning_rate": 0.00015687492689013375, "loss": 1.4389, "step": 16606 }, { "epoch": 0.21580039081090827, "grad_norm": 0.33986303210258484, "learning_rate": 0.00015687232742822238, "loss": 1.4756, "step": 16607 }, { "epoch": 0.21581338535482414, "grad_norm": 0.8153408765792847, "learning_rate": 0.00015686972796631098, "loss": 1.5496, "step": 16608 }, { "epoch": 0.21582637989874, "grad_norm": 0.39027640223503113, "learning_rate": 0.0001568671285043996, "loss": 1.7707, "step": 16609 }, { "epoch": 0.21583937444265588, "grad_norm": 0.3908083736896515, "learning_rate": 0.0001568645290424882, "loss": 1.5284, "step": 16610 }, { "epoch": 0.21585236898657176, "grad_norm": 0.3799417316913605, "learning_rate": 0.00015686192958057685, "loss": 1.582, "step": 16611 }, { "epoch": 0.21586536353048763, "grad_norm": 0.3183356523513794, "learning_rate": 0.00015685933011866545, "loss": 1.6138, "step": 16612 }, { "epoch": 0.2158783580744035, "grad_norm": 0.4347495138645172, "learning_rate": 0.00015685673065675407, "loss": 1.5235, "step": 16613 }, { "epoch": 0.21589135261831938, "grad_norm": 0.308858722448349, "learning_rate": 0.00015685413119484267, "loss": 1.3521, "step": 16614 }, { "epoch": 0.21590434716223525, "grad_norm": 0.5220159292221069, "learning_rate": 0.0001568515317329313, "loss": 1.4335, "step": 16615 }, { "epoch": 0.21591734170615112, "grad_norm": 0.39267468452453613, "learning_rate": 0.00015684893227101992, "loss": 1.2497, "step": 16616 }, { "epoch": 0.215930336250067, "grad_norm": 0.4098016321659088, "learning_rate": 0.00015684633280910851, "loss": 1.5089, "step": 16617 }, { "epoch": 0.21594333079398287, "grad_norm": 0.40980812907218933, "learning_rate": 0.00015684373334719714, "loss": 1.3314, "step": 16618 }, { "epoch": 0.21595632533789874, "grad_norm": 0.39346665143966675, "learning_rate": 0.00015684113388528576, "loss": 1.4366, "step": 16619 }, { "epoch": 0.21596931988181461, "grad_norm": 0.3957061469554901, "learning_rate": 0.00015683853442337436, "loss": 1.411, "step": 16620 }, { "epoch": 0.2159823144257305, "grad_norm": 0.44264093041419983, "learning_rate": 0.00015683593496146299, "loss": 1.261, "step": 16621 }, { "epoch": 0.21599530896964636, "grad_norm": 0.3062681257724762, "learning_rate": 0.00015683333549955158, "loss": 1.2393, "step": 16622 }, { "epoch": 0.21600830351356223, "grad_norm": 0.44686415791511536, "learning_rate": 0.00015683073603764023, "loss": 1.4037, "step": 16623 }, { "epoch": 0.2160212980574781, "grad_norm": 0.40485095977783203, "learning_rate": 0.00015682813657572883, "loss": 1.3506, "step": 16624 }, { "epoch": 0.21603429260139398, "grad_norm": 0.4370565414428711, "learning_rate": 0.00015682553711381746, "loss": 1.5838, "step": 16625 }, { "epoch": 0.21604728714530985, "grad_norm": 0.4163168668746948, "learning_rate": 0.00015682293765190605, "loss": 1.4353, "step": 16626 }, { "epoch": 0.21606028168922572, "grad_norm": 0.37149685621261597, "learning_rate": 0.00015682033818999468, "loss": 1.454, "step": 16627 }, { "epoch": 0.2160732762331416, "grad_norm": 0.4331968426704407, "learning_rate": 0.0001568177387280833, "loss": 1.442, "step": 16628 }, { "epoch": 0.21608627077705747, "grad_norm": 0.3904690444469452, "learning_rate": 0.0001568151392661719, "loss": 1.4272, "step": 16629 }, { "epoch": 0.21609926532097334, "grad_norm": 0.2630819082260132, "learning_rate": 0.00015681253980426052, "loss": 1.2998, "step": 16630 }, { "epoch": 0.21611225986488922, "grad_norm": 0.41717612743377686, "learning_rate": 0.00015680994034234915, "loss": 1.4902, "step": 16631 }, { "epoch": 0.2161252544088051, "grad_norm": 0.3159341514110565, "learning_rate": 0.00015680734088043775, "loss": 1.4452, "step": 16632 }, { "epoch": 0.21613824895272096, "grad_norm": 0.39778199791908264, "learning_rate": 0.00015680474141852637, "loss": 1.4816, "step": 16633 }, { "epoch": 0.21615124349663686, "grad_norm": 0.4765211343765259, "learning_rate": 0.00015680214195661497, "loss": 1.4979, "step": 16634 }, { "epoch": 0.21616423804055274, "grad_norm": 0.39823561906814575, "learning_rate": 0.00015679954249470362, "loss": 1.3544, "step": 16635 }, { "epoch": 0.2161772325844686, "grad_norm": 0.4006238579750061, "learning_rate": 0.00015679694303279222, "loss": 1.3446, "step": 16636 }, { "epoch": 0.21619022712838448, "grad_norm": 0.5970996618270874, "learning_rate": 0.00015679434357088084, "loss": 1.347, "step": 16637 }, { "epoch": 0.21620322167230036, "grad_norm": 0.39650824666023254, "learning_rate": 0.00015679174410896944, "loss": 1.4437, "step": 16638 }, { "epoch": 0.21621621621621623, "grad_norm": 0.3815503716468811, "learning_rate": 0.00015678914464705806, "loss": 1.3184, "step": 16639 }, { "epoch": 0.2162292107601321, "grad_norm": 0.37390029430389404, "learning_rate": 0.0001567865451851467, "loss": 1.4119, "step": 16640 }, { "epoch": 0.21624220530404797, "grad_norm": 0.42822691798210144, "learning_rate": 0.00015678394572323529, "loss": 1.5361, "step": 16641 }, { "epoch": 0.21625519984796385, "grad_norm": 0.39275333285331726, "learning_rate": 0.0001567813462613239, "loss": 1.3879, "step": 16642 }, { "epoch": 0.21626819439187972, "grad_norm": 0.5643912553787231, "learning_rate": 0.00015677874679941253, "loss": 1.5656, "step": 16643 }, { "epoch": 0.2162811889357956, "grad_norm": 0.4708293378353119, "learning_rate": 0.00015677614733750113, "loss": 1.4699, "step": 16644 }, { "epoch": 0.21629418347971147, "grad_norm": 0.3414262533187866, "learning_rate": 0.00015677354787558976, "loss": 1.4886, "step": 16645 }, { "epoch": 0.21630717802362734, "grad_norm": 0.4258613586425781, "learning_rate": 0.00015677094841367838, "loss": 1.4512, "step": 16646 }, { "epoch": 0.2163201725675432, "grad_norm": 0.3839971125125885, "learning_rate": 0.000156768348951767, "loss": 1.4354, "step": 16647 }, { "epoch": 0.21633316711145908, "grad_norm": 0.46988213062286377, "learning_rate": 0.0001567657494898556, "loss": 1.4923, "step": 16648 }, { "epoch": 0.21634616165537496, "grad_norm": 0.4564168155193329, "learning_rate": 0.00015676315002794423, "loss": 1.3735, "step": 16649 }, { "epoch": 0.21635915619929083, "grad_norm": 0.4323045611381531, "learning_rate": 0.00015676055056603285, "loss": 1.4812, "step": 16650 }, { "epoch": 0.2163721507432067, "grad_norm": 0.3471057116985321, "learning_rate": 0.00015675795110412145, "loss": 1.3863, "step": 16651 }, { "epoch": 0.21638514528712258, "grad_norm": 0.39946672320365906, "learning_rate": 0.00015675535164221007, "loss": 1.2958, "step": 16652 }, { "epoch": 0.21639813983103845, "grad_norm": 0.4242045283317566, "learning_rate": 0.00015675275218029867, "loss": 1.521, "step": 16653 }, { "epoch": 0.21641113437495432, "grad_norm": 0.4094468653202057, "learning_rate": 0.00015675015271838732, "loss": 1.3779, "step": 16654 }, { "epoch": 0.2164241289188702, "grad_norm": 0.34646421670913696, "learning_rate": 0.00015674755325647592, "loss": 1.5316, "step": 16655 }, { "epoch": 0.21643712346278607, "grad_norm": 0.42703479528427124, "learning_rate": 0.00015674495379456454, "loss": 1.5985, "step": 16656 }, { "epoch": 0.21645011800670194, "grad_norm": 0.4245736598968506, "learning_rate": 0.00015674235433265314, "loss": 1.3731, "step": 16657 }, { "epoch": 0.21646311255061781, "grad_norm": 0.3305426836013794, "learning_rate": 0.00015673975487074177, "loss": 1.4723, "step": 16658 }, { "epoch": 0.2164761070945337, "grad_norm": 0.5194588303565979, "learning_rate": 0.0001567371554088304, "loss": 1.5208, "step": 16659 }, { "epoch": 0.21648910163844956, "grad_norm": 0.35528671741485596, "learning_rate": 0.000156734555946919, "loss": 1.3596, "step": 16660 }, { "epoch": 0.21650209618236543, "grad_norm": 0.4904539883136749, "learning_rate": 0.0001567319564850076, "loss": 1.4714, "step": 16661 }, { "epoch": 0.2165150907262813, "grad_norm": 0.31905707716941833, "learning_rate": 0.00015672935702309624, "loss": 1.522, "step": 16662 }, { "epoch": 0.21652808527019718, "grad_norm": 0.42712539434432983, "learning_rate": 0.00015672675756118483, "loss": 1.4163, "step": 16663 }, { "epoch": 0.21654107981411305, "grad_norm": 0.4735722839832306, "learning_rate": 0.00015672415809927346, "loss": 1.5672, "step": 16664 }, { "epoch": 0.21655407435802893, "grad_norm": 0.35667502880096436, "learning_rate": 0.00015672155863736206, "loss": 1.2409, "step": 16665 }, { "epoch": 0.2165670689019448, "grad_norm": 0.3629409968852997, "learning_rate": 0.0001567189591754507, "loss": 1.4795, "step": 16666 }, { "epoch": 0.21658006344586067, "grad_norm": 0.3938175439834595, "learning_rate": 0.0001567163597135393, "loss": 1.5103, "step": 16667 }, { "epoch": 0.21659305798977654, "grad_norm": 0.424789696931839, "learning_rate": 0.00015671376025162793, "loss": 1.3422, "step": 16668 }, { "epoch": 0.21660605253369242, "grad_norm": 0.340334951877594, "learning_rate": 0.00015671116078971653, "loss": 1.4515, "step": 16669 }, { "epoch": 0.2166190470776083, "grad_norm": 0.4597562849521637, "learning_rate": 0.00015670856132780515, "loss": 1.4839, "step": 16670 }, { "epoch": 0.21663204162152416, "grad_norm": 0.4084775745868683, "learning_rate": 0.00015670596186589378, "loss": 1.3904, "step": 16671 }, { "epoch": 0.21664503616544004, "grad_norm": 0.3597360849380493, "learning_rate": 0.00015670336240398237, "loss": 1.4012, "step": 16672 }, { "epoch": 0.2166580307093559, "grad_norm": 0.44616204500198364, "learning_rate": 0.000156700762942071, "loss": 1.5331, "step": 16673 }, { "epoch": 0.21667102525327178, "grad_norm": 0.3745647072792053, "learning_rate": 0.00015669816348015962, "loss": 1.438, "step": 16674 }, { "epoch": 0.21668401979718765, "grad_norm": 0.3706674575805664, "learning_rate": 0.00015669556401824822, "loss": 1.3303, "step": 16675 }, { "epoch": 0.21669701434110353, "grad_norm": 0.4070736765861511, "learning_rate": 0.00015669296455633684, "loss": 1.3363, "step": 16676 }, { "epoch": 0.2167100088850194, "grad_norm": 0.3106875419616699, "learning_rate": 0.00015669036509442544, "loss": 1.3661, "step": 16677 }, { "epoch": 0.21672300342893527, "grad_norm": 0.3898864686489105, "learning_rate": 0.0001566877656325141, "loss": 1.4775, "step": 16678 }, { "epoch": 0.21673599797285115, "grad_norm": 0.37862181663513184, "learning_rate": 0.0001566851661706027, "loss": 1.2082, "step": 16679 }, { "epoch": 0.21674899251676702, "grad_norm": 0.39564165472984314, "learning_rate": 0.00015668256670869131, "loss": 1.4668, "step": 16680 }, { "epoch": 0.2167619870606829, "grad_norm": 0.36793726682662964, "learning_rate": 0.00015667996724677994, "loss": 1.5403, "step": 16681 }, { "epoch": 0.21677498160459877, "grad_norm": 0.3965376317501068, "learning_rate": 0.00015667736778486854, "loss": 1.2782, "step": 16682 }, { "epoch": 0.21678797614851464, "grad_norm": 0.366913765668869, "learning_rate": 0.00015667476832295716, "loss": 1.5441, "step": 16683 }, { "epoch": 0.2168009706924305, "grad_norm": 0.425129234790802, "learning_rate": 0.00015667216886104576, "loss": 1.3375, "step": 16684 }, { "epoch": 0.21681396523634638, "grad_norm": 0.38893622159957886, "learning_rate": 0.0001566695693991344, "loss": 1.4341, "step": 16685 }, { "epoch": 0.21682695978026226, "grad_norm": 0.4136925935745239, "learning_rate": 0.000156666969937223, "loss": 1.4315, "step": 16686 }, { "epoch": 0.21683995432417813, "grad_norm": 0.47807273268699646, "learning_rate": 0.0001566643704753116, "loss": 1.5499, "step": 16687 }, { "epoch": 0.216852948868094, "grad_norm": 0.2983015179634094, "learning_rate": 0.00015666177101340023, "loss": 1.6045, "step": 16688 }, { "epoch": 0.21686594341200988, "grad_norm": 0.39483004808425903, "learning_rate": 0.00015665917155148885, "loss": 1.3651, "step": 16689 }, { "epoch": 0.21687893795592575, "grad_norm": 0.3885948359966278, "learning_rate": 0.00015665657208957748, "loss": 1.4324, "step": 16690 }, { "epoch": 0.21689193249984162, "grad_norm": 0.3961101472377777, "learning_rate": 0.00015665397262766608, "loss": 1.3949, "step": 16691 }, { "epoch": 0.2169049270437575, "grad_norm": 0.34981784224510193, "learning_rate": 0.0001566513731657547, "loss": 1.4664, "step": 16692 }, { "epoch": 0.21691792158767337, "grad_norm": 0.3440290689468384, "learning_rate": 0.00015664877370384332, "loss": 1.357, "step": 16693 }, { "epoch": 0.21693091613158924, "grad_norm": 0.35698163509368896, "learning_rate": 0.00015664617424193192, "loss": 1.5693, "step": 16694 }, { "epoch": 0.2169439106755051, "grad_norm": 0.3388995826244354, "learning_rate": 0.00015664357478002055, "loss": 1.5293, "step": 16695 }, { "epoch": 0.216956905219421, "grad_norm": 0.3523191511631012, "learning_rate": 0.00015664097531810914, "loss": 1.457, "step": 16696 }, { "epoch": 0.21696989976333686, "grad_norm": 0.5185662508010864, "learning_rate": 0.0001566383758561978, "loss": 1.3138, "step": 16697 }, { "epoch": 0.21698289430725273, "grad_norm": 0.38048869371414185, "learning_rate": 0.0001566357763942864, "loss": 1.2495, "step": 16698 }, { "epoch": 0.2169958888511686, "grad_norm": 0.4592948257923126, "learning_rate": 0.000156633176932375, "loss": 1.3806, "step": 16699 }, { "epoch": 0.21700888339508448, "grad_norm": 0.46434304118156433, "learning_rate": 0.00015663057747046361, "loss": 1.5363, "step": 16700 }, { "epoch": 0.21702187793900035, "grad_norm": 0.39502277970314026, "learning_rate": 0.00015662797800855224, "loss": 1.2855, "step": 16701 }, { "epoch": 0.21703487248291622, "grad_norm": 0.29518890380859375, "learning_rate": 0.00015662537854664086, "loss": 1.2189, "step": 16702 }, { "epoch": 0.2170478670268321, "grad_norm": 0.4711300730705261, "learning_rate": 0.00015662277908472946, "loss": 1.4416, "step": 16703 }, { "epoch": 0.21706086157074797, "grad_norm": 0.41619038581848145, "learning_rate": 0.00015662017962281809, "loss": 1.4412, "step": 16704 }, { "epoch": 0.21707385611466384, "grad_norm": 0.36433371901512146, "learning_rate": 0.0001566175801609067, "loss": 1.2662, "step": 16705 }, { "epoch": 0.21708685065857972, "grad_norm": 0.45653778314590454, "learning_rate": 0.0001566149806989953, "loss": 1.4103, "step": 16706 }, { "epoch": 0.2170998452024956, "grad_norm": 0.4312211573123932, "learning_rate": 0.00015661238123708393, "loss": 1.534, "step": 16707 }, { "epoch": 0.21711283974641146, "grad_norm": 0.4086803197860718, "learning_rate": 0.00015660978177517253, "loss": 1.3577, "step": 16708 }, { "epoch": 0.21712583429032734, "grad_norm": 0.39179056882858276, "learning_rate": 0.00015660718231326118, "loss": 1.485, "step": 16709 }, { "epoch": 0.21713882883424324, "grad_norm": 0.2953057587146759, "learning_rate": 0.00015660458285134978, "loss": 1.3184, "step": 16710 }, { "epoch": 0.2171518233781591, "grad_norm": 0.3751826584339142, "learning_rate": 0.00015660198338943838, "loss": 1.4064, "step": 16711 }, { "epoch": 0.21716481792207498, "grad_norm": 0.38460439443588257, "learning_rate": 0.000156599383927527, "loss": 1.2642, "step": 16712 }, { "epoch": 0.21717781246599085, "grad_norm": 0.39809390902519226, "learning_rate": 0.00015659678446561562, "loss": 1.469, "step": 16713 }, { "epoch": 0.21719080700990673, "grad_norm": 0.47651973366737366, "learning_rate": 0.00015659418500370425, "loss": 1.5223, "step": 16714 }, { "epoch": 0.2172038015538226, "grad_norm": 0.40227261185646057, "learning_rate": 0.00015659158554179285, "loss": 1.4262, "step": 16715 }, { "epoch": 0.21721679609773847, "grad_norm": 0.4465072453022003, "learning_rate": 0.00015658898607988147, "loss": 1.4563, "step": 16716 }, { "epoch": 0.21722979064165435, "grad_norm": 0.44313499331474304, "learning_rate": 0.0001565863866179701, "loss": 1.5551, "step": 16717 }, { "epoch": 0.21724278518557022, "grad_norm": 0.34312674403190613, "learning_rate": 0.0001565837871560587, "loss": 1.336, "step": 16718 }, { "epoch": 0.2172557797294861, "grad_norm": 0.33247441053390503, "learning_rate": 0.00015658118769414732, "loss": 1.4549, "step": 16719 }, { "epoch": 0.21726877427340197, "grad_norm": 0.4092087745666504, "learning_rate": 0.00015657858823223594, "loss": 1.5188, "step": 16720 }, { "epoch": 0.21728176881731784, "grad_norm": 0.43918511271476746, "learning_rate": 0.00015657598877032457, "loss": 1.593, "step": 16721 }, { "epoch": 0.2172947633612337, "grad_norm": 0.47822877764701843, "learning_rate": 0.00015657338930841316, "loss": 1.4232, "step": 16722 }, { "epoch": 0.21730775790514958, "grad_norm": 0.3324108421802521, "learning_rate": 0.0001565707898465018, "loss": 1.592, "step": 16723 }, { "epoch": 0.21732075244906546, "grad_norm": 0.37148159742355347, "learning_rate": 0.0001565681903845904, "loss": 1.7572, "step": 16724 }, { "epoch": 0.21733374699298133, "grad_norm": 0.36410802602767944, "learning_rate": 0.000156565590922679, "loss": 1.2273, "step": 16725 }, { "epoch": 0.2173467415368972, "grad_norm": 0.42501553893089294, "learning_rate": 0.00015656299146076763, "loss": 1.443, "step": 16726 }, { "epoch": 0.21735973608081308, "grad_norm": 0.3321506977081299, "learning_rate": 0.00015656039199885623, "loss": 1.2448, "step": 16727 }, { "epoch": 0.21737273062472895, "grad_norm": 0.4748922288417816, "learning_rate": 0.00015655779253694486, "loss": 1.5254, "step": 16728 }, { "epoch": 0.21738572516864482, "grad_norm": 0.3702036440372467, "learning_rate": 0.00015655519307503348, "loss": 1.1718, "step": 16729 }, { "epoch": 0.2173987197125607, "grad_norm": 0.34009918570518494, "learning_rate": 0.00015655259361312208, "loss": 1.5975, "step": 16730 }, { "epoch": 0.21741171425647657, "grad_norm": 0.46154966950416565, "learning_rate": 0.0001565499941512107, "loss": 1.4618, "step": 16731 }, { "epoch": 0.21742470880039244, "grad_norm": 0.3811264932155609, "learning_rate": 0.00015654739468929933, "loss": 1.3974, "step": 16732 }, { "epoch": 0.21743770334430831, "grad_norm": 0.2730337381362915, "learning_rate": 0.00015654479522738795, "loss": 1.3609, "step": 16733 }, { "epoch": 0.2174506978882242, "grad_norm": 0.3844816982746124, "learning_rate": 0.00015654219576547655, "loss": 1.3632, "step": 16734 }, { "epoch": 0.21746369243214006, "grad_norm": 0.37815961241722107, "learning_rate": 0.00015653959630356517, "loss": 1.4671, "step": 16735 }, { "epoch": 0.21747668697605593, "grad_norm": 0.34678205847740173, "learning_rate": 0.0001565369968416538, "loss": 1.3536, "step": 16736 }, { "epoch": 0.2174896815199718, "grad_norm": 0.40051963925361633, "learning_rate": 0.0001565343973797424, "loss": 1.5007, "step": 16737 }, { "epoch": 0.21750267606388768, "grad_norm": 0.3613760769367218, "learning_rate": 0.00015653179791783102, "loss": 1.4845, "step": 16738 }, { "epoch": 0.21751567060780355, "grad_norm": 0.43381285667419434, "learning_rate": 0.00015652919845591962, "loss": 1.3215, "step": 16739 }, { "epoch": 0.21752866515171942, "grad_norm": 0.34630247950553894, "learning_rate": 0.00015652659899400827, "loss": 1.5543, "step": 16740 }, { "epoch": 0.2175416596956353, "grad_norm": 0.42135071754455566, "learning_rate": 0.00015652399953209687, "loss": 1.5052, "step": 16741 }, { "epoch": 0.21755465423955117, "grad_norm": 0.33038827776908875, "learning_rate": 0.00015652140007018546, "loss": 1.1484, "step": 16742 }, { "epoch": 0.21756764878346704, "grad_norm": 0.4731179475784302, "learning_rate": 0.0001565188006082741, "loss": 1.5578, "step": 16743 }, { "epoch": 0.21758064332738292, "grad_norm": 0.3731505870819092, "learning_rate": 0.0001565162011463627, "loss": 1.5184, "step": 16744 }, { "epoch": 0.2175936378712988, "grad_norm": 0.449459046125412, "learning_rate": 0.00015651360168445134, "loss": 1.4476, "step": 16745 }, { "epoch": 0.21760663241521466, "grad_norm": 0.37653404474258423, "learning_rate": 0.00015651100222253993, "loss": 1.308, "step": 16746 }, { "epoch": 0.21761962695913054, "grad_norm": 0.40483370423316956, "learning_rate": 0.00015650840276062856, "loss": 1.515, "step": 16747 }, { "epoch": 0.2176326215030464, "grad_norm": 0.4058394432067871, "learning_rate": 0.00015650580329871718, "loss": 1.4304, "step": 16748 }, { "epoch": 0.21764561604696228, "grad_norm": 0.3681657612323761, "learning_rate": 0.00015650320383680578, "loss": 1.3604, "step": 16749 }, { "epoch": 0.21765861059087815, "grad_norm": 0.4467221200466156, "learning_rate": 0.0001565006043748944, "loss": 1.6015, "step": 16750 }, { "epoch": 0.21767160513479403, "grad_norm": 0.4380754232406616, "learning_rate": 0.000156498004912983, "loss": 1.371, "step": 16751 }, { "epoch": 0.2176845996787099, "grad_norm": 0.3701735734939575, "learning_rate": 0.00015649540545107165, "loss": 1.3654, "step": 16752 }, { "epoch": 0.21769759422262577, "grad_norm": 0.37976616621017456, "learning_rate": 0.00015649280598916025, "loss": 1.6451, "step": 16753 }, { "epoch": 0.21771058876654165, "grad_norm": 0.38921859860420227, "learning_rate": 0.00015649020652724885, "loss": 1.2862, "step": 16754 }, { "epoch": 0.21772358331045752, "grad_norm": 0.3717900514602661, "learning_rate": 0.0001564876070653375, "loss": 1.4568, "step": 16755 }, { "epoch": 0.2177365778543734, "grad_norm": 0.35280272364616394, "learning_rate": 0.0001564850076034261, "loss": 1.3572, "step": 16756 }, { "epoch": 0.21774957239828927, "grad_norm": 0.5717735886573792, "learning_rate": 0.00015648240814151472, "loss": 1.2634, "step": 16757 }, { "epoch": 0.21776256694220514, "grad_norm": 0.4082580506801605, "learning_rate": 0.00015647980867960332, "loss": 1.3163, "step": 16758 }, { "epoch": 0.217775561486121, "grad_norm": 0.5412265658378601, "learning_rate": 0.00015647720921769194, "loss": 1.4772, "step": 16759 }, { "epoch": 0.21778855603003688, "grad_norm": 0.41908109188079834, "learning_rate": 0.00015647460975578057, "loss": 1.2015, "step": 16760 }, { "epoch": 0.21780155057395276, "grad_norm": 0.6466304063796997, "learning_rate": 0.00015647201029386917, "loss": 1.4759, "step": 16761 }, { "epoch": 0.21781454511786863, "grad_norm": 0.2947784662246704, "learning_rate": 0.0001564694108319578, "loss": 1.4606, "step": 16762 }, { "epoch": 0.2178275396617845, "grad_norm": 0.38844993710517883, "learning_rate": 0.00015646681137004642, "loss": 1.3304, "step": 16763 }, { "epoch": 0.21784053420570038, "grad_norm": 0.31608712673187256, "learning_rate": 0.00015646421190813504, "loss": 1.352, "step": 16764 }, { "epoch": 0.21785352874961625, "grad_norm": 0.33786827325820923, "learning_rate": 0.00015646161244622364, "loss": 1.5429, "step": 16765 }, { "epoch": 0.21786652329353212, "grad_norm": 0.41640591621398926, "learning_rate": 0.00015645901298431223, "loss": 1.4957, "step": 16766 }, { "epoch": 0.217879517837448, "grad_norm": 0.25733786821365356, "learning_rate": 0.00015645641352240089, "loss": 1.2774, "step": 16767 }, { "epoch": 0.21789251238136387, "grad_norm": 0.384190171957016, "learning_rate": 0.00015645381406048948, "loss": 1.2511, "step": 16768 }, { "epoch": 0.21790550692527974, "grad_norm": 0.4897196590900421, "learning_rate": 0.0001564512145985781, "loss": 1.5158, "step": 16769 }, { "epoch": 0.2179185014691956, "grad_norm": 0.38684624433517456, "learning_rate": 0.0001564486151366667, "loss": 1.3112, "step": 16770 }, { "epoch": 0.2179314960131115, "grad_norm": 0.3542638123035431, "learning_rate": 0.00015644601567475533, "loss": 1.4792, "step": 16771 }, { "epoch": 0.21794449055702736, "grad_norm": 0.366250604391098, "learning_rate": 0.00015644341621284395, "loss": 1.3279, "step": 16772 }, { "epoch": 0.21795748510094323, "grad_norm": 0.4124433398246765, "learning_rate": 0.00015644081675093255, "loss": 1.5081, "step": 16773 }, { "epoch": 0.2179704796448591, "grad_norm": 0.3338433802127838, "learning_rate": 0.00015643821728902118, "loss": 1.3079, "step": 16774 }, { "epoch": 0.21798347418877498, "grad_norm": 0.4465111196041107, "learning_rate": 0.0001564356178271098, "loss": 1.5815, "step": 16775 }, { "epoch": 0.21799646873269085, "grad_norm": 0.48583635687828064, "learning_rate": 0.00015643301836519843, "loss": 1.4563, "step": 16776 }, { "epoch": 0.21800946327660672, "grad_norm": 0.3784427046775818, "learning_rate": 0.00015643041890328702, "loss": 1.4843, "step": 16777 }, { "epoch": 0.2180224578205226, "grad_norm": 0.4067921042442322, "learning_rate": 0.00015642781944137565, "loss": 1.3139, "step": 16778 }, { "epoch": 0.21803545236443847, "grad_norm": 0.4644142687320709, "learning_rate": 0.00015642521997946427, "loss": 1.6929, "step": 16779 }, { "epoch": 0.21804844690835434, "grad_norm": 0.3555678427219391, "learning_rate": 0.00015642262051755287, "loss": 1.2989, "step": 16780 }, { "epoch": 0.21806144145227022, "grad_norm": 0.355695903301239, "learning_rate": 0.0001564200210556415, "loss": 1.3526, "step": 16781 }, { "epoch": 0.2180744359961861, "grad_norm": 0.35750722885131836, "learning_rate": 0.0001564174215937301, "loss": 1.4054, "step": 16782 }, { "epoch": 0.21808743054010196, "grad_norm": 0.33844780921936035, "learning_rate": 0.00015641482213181872, "loss": 1.3747, "step": 16783 }, { "epoch": 0.21810042508401783, "grad_norm": 0.5258835554122925, "learning_rate": 0.00015641222266990734, "loss": 1.4915, "step": 16784 }, { "epoch": 0.2181134196279337, "grad_norm": 0.43022042512893677, "learning_rate": 0.00015640962320799594, "loss": 1.5072, "step": 16785 }, { "epoch": 0.21812641417184958, "grad_norm": 0.41696757078170776, "learning_rate": 0.00015640702374608456, "loss": 1.4393, "step": 16786 }, { "epoch": 0.21813940871576548, "grad_norm": 0.37719014286994934, "learning_rate": 0.00015640442428417319, "loss": 1.6216, "step": 16787 }, { "epoch": 0.21815240325968135, "grad_norm": 0.24868805706501007, "learning_rate": 0.0001564018248222618, "loss": 1.2143, "step": 16788 }, { "epoch": 0.21816539780359723, "grad_norm": 0.3956727683544159, "learning_rate": 0.0001563992253603504, "loss": 1.5182, "step": 16789 }, { "epoch": 0.2181783923475131, "grad_norm": 0.4267377555370331, "learning_rate": 0.00015639662589843903, "loss": 1.3347, "step": 16790 }, { "epoch": 0.21819138689142897, "grad_norm": 0.4038475453853607, "learning_rate": 0.00015639402643652766, "loss": 1.3509, "step": 16791 }, { "epoch": 0.21820438143534485, "grad_norm": 0.3086733818054199, "learning_rate": 0.00015639142697461625, "loss": 1.3589, "step": 16792 }, { "epoch": 0.21821737597926072, "grad_norm": 0.4115789532661438, "learning_rate": 0.00015638882751270488, "loss": 1.3748, "step": 16793 }, { "epoch": 0.2182303705231766, "grad_norm": 0.3457920253276825, "learning_rate": 0.0001563862280507935, "loss": 1.2035, "step": 16794 }, { "epoch": 0.21824336506709247, "grad_norm": 0.3764108717441559, "learning_rate": 0.0001563836285888821, "loss": 1.3565, "step": 16795 }, { "epoch": 0.21825635961100834, "grad_norm": 0.4859064221382141, "learning_rate": 0.00015638102912697073, "loss": 1.4211, "step": 16796 }, { "epoch": 0.2182693541549242, "grad_norm": 0.33105313777923584, "learning_rate": 0.00015637842966505932, "loss": 1.3378, "step": 16797 }, { "epoch": 0.21828234869884008, "grad_norm": 0.37387049198150635, "learning_rate": 0.00015637583020314797, "loss": 1.3626, "step": 16798 }, { "epoch": 0.21829534324275596, "grad_norm": 0.38293424248695374, "learning_rate": 0.00015637323074123657, "loss": 1.4374, "step": 16799 }, { "epoch": 0.21830833778667183, "grad_norm": 0.47173646092414856, "learning_rate": 0.0001563706312793252, "loss": 1.3875, "step": 16800 }, { "epoch": 0.2183213323305877, "grad_norm": 0.41163915395736694, "learning_rate": 0.0001563680318174138, "loss": 1.4015, "step": 16801 }, { "epoch": 0.21833432687450358, "grad_norm": 0.4057028293609619, "learning_rate": 0.00015636543235550242, "loss": 1.5235, "step": 16802 }, { "epoch": 0.21834732141841945, "grad_norm": 0.3744058609008789, "learning_rate": 0.00015636283289359104, "loss": 1.343, "step": 16803 }, { "epoch": 0.21836031596233532, "grad_norm": 0.3776036500930786, "learning_rate": 0.00015636023343167964, "loss": 1.4716, "step": 16804 }, { "epoch": 0.2183733105062512, "grad_norm": 0.3524835407733917, "learning_rate": 0.00015635763396976826, "loss": 1.4402, "step": 16805 }, { "epoch": 0.21838630505016707, "grad_norm": 0.3152594566345215, "learning_rate": 0.0001563550345078569, "loss": 1.4213, "step": 16806 }, { "epoch": 0.21839929959408294, "grad_norm": 0.3834557831287384, "learning_rate": 0.0001563524350459455, "loss": 1.4371, "step": 16807 }, { "epoch": 0.2184122941379988, "grad_norm": 0.38085585832595825, "learning_rate": 0.0001563498355840341, "loss": 1.1847, "step": 16808 }, { "epoch": 0.2184252886819147, "grad_norm": 0.32922738790512085, "learning_rate": 0.0001563472361221227, "loss": 1.4526, "step": 16809 }, { "epoch": 0.21843828322583056, "grad_norm": 0.4722871780395508, "learning_rate": 0.00015634463666021136, "loss": 1.4853, "step": 16810 }, { "epoch": 0.21845127776974643, "grad_norm": 0.43322816491127014, "learning_rate": 0.00015634203719829996, "loss": 1.3518, "step": 16811 }, { "epoch": 0.2184642723136623, "grad_norm": 0.4004061818122864, "learning_rate": 0.00015633943773638858, "loss": 1.3632, "step": 16812 }, { "epoch": 0.21847726685757818, "grad_norm": 0.425752192735672, "learning_rate": 0.00015633683827447718, "loss": 1.4789, "step": 16813 }, { "epoch": 0.21849026140149405, "grad_norm": 0.4251552224159241, "learning_rate": 0.0001563342388125658, "loss": 1.4618, "step": 16814 }, { "epoch": 0.21850325594540992, "grad_norm": 0.4280528128147125, "learning_rate": 0.00015633163935065443, "loss": 1.2739, "step": 16815 }, { "epoch": 0.2185162504893258, "grad_norm": 0.33704516291618347, "learning_rate": 0.00015632903988874303, "loss": 1.3042, "step": 16816 }, { "epoch": 0.21852924503324167, "grad_norm": 0.40909144282341003, "learning_rate": 0.00015632644042683165, "loss": 1.6533, "step": 16817 }, { "epoch": 0.21854223957715754, "grad_norm": 0.3977036774158478, "learning_rate": 0.00015632384096492027, "loss": 1.5252, "step": 16818 }, { "epoch": 0.21855523412107342, "grad_norm": 0.3146308660507202, "learning_rate": 0.0001563212415030089, "loss": 1.1663, "step": 16819 }, { "epoch": 0.2185682286649893, "grad_norm": 0.4449825882911682, "learning_rate": 0.0001563186420410975, "loss": 1.4451, "step": 16820 }, { "epoch": 0.21858122320890516, "grad_norm": 0.5218036770820618, "learning_rate": 0.0001563160425791861, "loss": 1.6039, "step": 16821 }, { "epoch": 0.21859421775282104, "grad_norm": 0.3879724144935608, "learning_rate": 0.00015631344311727474, "loss": 1.5384, "step": 16822 }, { "epoch": 0.2186072122967369, "grad_norm": 0.2828150689601898, "learning_rate": 0.00015631084365536334, "loss": 1.5118, "step": 16823 }, { "epoch": 0.21862020684065278, "grad_norm": 0.39576396346092224, "learning_rate": 0.00015630824419345197, "loss": 1.3704, "step": 16824 }, { "epoch": 0.21863320138456865, "grad_norm": 0.39629244804382324, "learning_rate": 0.00015630564473154056, "loss": 1.4324, "step": 16825 }, { "epoch": 0.21864619592848453, "grad_norm": 0.4551648795604706, "learning_rate": 0.0001563030452696292, "loss": 1.4113, "step": 16826 }, { "epoch": 0.2186591904724004, "grad_norm": 0.37779414653778076, "learning_rate": 0.0001563004458077178, "loss": 1.3153, "step": 16827 }, { "epoch": 0.21867218501631627, "grad_norm": 0.40455910563468933, "learning_rate": 0.0001562978463458064, "loss": 1.4747, "step": 16828 }, { "epoch": 0.21868517956023215, "grad_norm": 0.29166582226753235, "learning_rate": 0.00015629524688389503, "loss": 1.5594, "step": 16829 }, { "epoch": 0.21869817410414802, "grad_norm": 0.3363259434700012, "learning_rate": 0.00015629264742198366, "loss": 1.2258, "step": 16830 }, { "epoch": 0.2187111686480639, "grad_norm": 0.4475618302822113, "learning_rate": 0.00015629004796007228, "loss": 1.4639, "step": 16831 }, { "epoch": 0.21872416319197976, "grad_norm": 0.40197041630744934, "learning_rate": 0.00015628744849816088, "loss": 1.3159, "step": 16832 }, { "epoch": 0.21873715773589564, "grad_norm": 0.43685510754585266, "learning_rate": 0.0001562848490362495, "loss": 1.6726, "step": 16833 }, { "epoch": 0.2187501522798115, "grad_norm": 0.3603578209877014, "learning_rate": 0.00015628224957433813, "loss": 1.5215, "step": 16834 }, { "epoch": 0.21876314682372738, "grad_norm": 0.36410731077194214, "learning_rate": 0.00015627965011242673, "loss": 1.2982, "step": 16835 }, { "epoch": 0.21877614136764326, "grad_norm": 0.4339752197265625, "learning_rate": 0.00015627705065051535, "loss": 1.4808, "step": 16836 }, { "epoch": 0.21878913591155913, "grad_norm": 0.44504326581954956, "learning_rate": 0.00015627445118860398, "loss": 1.6463, "step": 16837 }, { "epoch": 0.218802130455475, "grad_norm": 0.44338786602020264, "learning_rate": 0.00015627185172669257, "loss": 1.399, "step": 16838 }, { "epoch": 0.21881512499939088, "grad_norm": 0.35459303855895996, "learning_rate": 0.0001562692522647812, "loss": 1.6303, "step": 16839 }, { "epoch": 0.21882811954330675, "grad_norm": 0.42481687664985657, "learning_rate": 0.0001562666528028698, "loss": 1.391, "step": 16840 }, { "epoch": 0.21884111408722262, "grad_norm": 0.339913934469223, "learning_rate": 0.00015626405334095845, "loss": 1.1546, "step": 16841 }, { "epoch": 0.2188541086311385, "grad_norm": 0.4006161689758301, "learning_rate": 0.00015626145387904704, "loss": 1.4251, "step": 16842 }, { "epoch": 0.21886710317505437, "grad_norm": 0.41801369190216064, "learning_rate": 0.00015625885441713567, "loss": 1.5567, "step": 16843 }, { "epoch": 0.21888009771897024, "grad_norm": 0.37432003021240234, "learning_rate": 0.00015625625495522427, "loss": 1.4939, "step": 16844 }, { "epoch": 0.2188930922628861, "grad_norm": 0.42471933364868164, "learning_rate": 0.0001562536554933129, "loss": 1.4387, "step": 16845 }, { "epoch": 0.21890608680680199, "grad_norm": 0.4127656817436218, "learning_rate": 0.00015625105603140152, "loss": 1.4283, "step": 16846 }, { "epoch": 0.21891908135071786, "grad_norm": 0.4068490266799927, "learning_rate": 0.0001562484565694901, "loss": 1.4211, "step": 16847 }, { "epoch": 0.21893207589463373, "grad_norm": 0.4155423045158386, "learning_rate": 0.00015624585710757874, "loss": 1.4509, "step": 16848 }, { "epoch": 0.2189450704385496, "grad_norm": 0.39099904894828796, "learning_rate": 0.00015624325764566736, "loss": 1.5133, "step": 16849 }, { "epoch": 0.21895806498246548, "grad_norm": 0.43010252714157104, "learning_rate": 0.00015624065818375596, "loss": 1.481, "step": 16850 }, { "epoch": 0.21897105952638135, "grad_norm": 0.36012136936187744, "learning_rate": 0.00015623805872184458, "loss": 1.5329, "step": 16851 }, { "epoch": 0.21898405407029722, "grad_norm": 0.28695186972618103, "learning_rate": 0.00015623545925993318, "loss": 1.3573, "step": 16852 }, { "epoch": 0.2189970486142131, "grad_norm": 0.32815098762512207, "learning_rate": 0.00015623285979802183, "loss": 1.5567, "step": 16853 }, { "epoch": 0.21901004315812897, "grad_norm": 0.44502657651901245, "learning_rate": 0.00015623026033611043, "loss": 1.6575, "step": 16854 }, { "epoch": 0.21902303770204484, "grad_norm": 0.3865372836589813, "learning_rate": 0.00015622766087419905, "loss": 1.3475, "step": 16855 }, { "epoch": 0.21903603224596072, "grad_norm": 0.31753936409950256, "learning_rate": 0.00015622506141228765, "loss": 1.3279, "step": 16856 }, { "epoch": 0.2190490267898766, "grad_norm": 0.7306997776031494, "learning_rate": 0.00015622246195037628, "loss": 1.4626, "step": 16857 }, { "epoch": 0.21906202133379246, "grad_norm": 0.4163583517074585, "learning_rate": 0.0001562198624884649, "loss": 1.3209, "step": 16858 }, { "epoch": 0.21907501587770833, "grad_norm": 0.41101527214050293, "learning_rate": 0.0001562172630265535, "loss": 1.436, "step": 16859 }, { "epoch": 0.2190880104216242, "grad_norm": 0.38744163513183594, "learning_rate": 0.00015621466356464212, "loss": 1.4152, "step": 16860 }, { "epoch": 0.21910100496554008, "grad_norm": 0.47433170676231384, "learning_rate": 0.00015621206410273075, "loss": 1.486, "step": 16861 }, { "epoch": 0.21911399950945595, "grad_norm": 0.39724916219711304, "learning_rate": 0.00015620946464081937, "loss": 1.4638, "step": 16862 }, { "epoch": 0.21912699405337185, "grad_norm": 0.3638724386692047, "learning_rate": 0.00015620686517890797, "loss": 1.457, "step": 16863 }, { "epoch": 0.21913998859728773, "grad_norm": 0.4112611413002014, "learning_rate": 0.00015620426571699657, "loss": 1.4775, "step": 16864 }, { "epoch": 0.2191529831412036, "grad_norm": 0.4751654863357544, "learning_rate": 0.00015620166625508522, "loss": 1.4363, "step": 16865 }, { "epoch": 0.21916597768511947, "grad_norm": 0.38216903805732727, "learning_rate": 0.00015619906679317382, "loss": 1.5605, "step": 16866 }, { "epoch": 0.21917897222903535, "grad_norm": 0.3676697909832001, "learning_rate": 0.00015619646733126244, "loss": 1.3506, "step": 16867 }, { "epoch": 0.21919196677295122, "grad_norm": 0.4396227300167084, "learning_rate": 0.00015619386786935106, "loss": 1.4454, "step": 16868 }, { "epoch": 0.2192049613168671, "grad_norm": 0.384050577878952, "learning_rate": 0.00015619126840743966, "loss": 1.4436, "step": 16869 }, { "epoch": 0.21921795586078296, "grad_norm": 0.2954740524291992, "learning_rate": 0.00015618866894552829, "loss": 1.3362, "step": 16870 }, { "epoch": 0.21923095040469884, "grad_norm": 0.4695274233818054, "learning_rate": 0.00015618606948361688, "loss": 1.5226, "step": 16871 }, { "epoch": 0.2192439449486147, "grad_norm": 0.3787025511264801, "learning_rate": 0.00015618347002170554, "loss": 1.3989, "step": 16872 }, { "epoch": 0.21925693949253058, "grad_norm": 0.319148451089859, "learning_rate": 0.00015618087055979413, "loss": 1.6091, "step": 16873 }, { "epoch": 0.21926993403644646, "grad_norm": 0.4071063697338104, "learning_rate": 0.00015617827109788276, "loss": 1.2532, "step": 16874 }, { "epoch": 0.21928292858036233, "grad_norm": 0.3327900171279907, "learning_rate": 0.00015617567163597135, "loss": 1.2584, "step": 16875 }, { "epoch": 0.2192959231242782, "grad_norm": 0.38058164715766907, "learning_rate": 0.00015617307217405998, "loss": 1.6633, "step": 16876 }, { "epoch": 0.21930891766819408, "grad_norm": 0.32354289293289185, "learning_rate": 0.0001561704727121486, "loss": 1.1628, "step": 16877 }, { "epoch": 0.21932191221210995, "grad_norm": 0.34749579429626465, "learning_rate": 0.0001561678732502372, "loss": 1.4499, "step": 16878 }, { "epoch": 0.21933490675602582, "grad_norm": 0.3439461290836334, "learning_rate": 0.00015616527378832583, "loss": 1.3724, "step": 16879 }, { "epoch": 0.2193479012999417, "grad_norm": 0.4042541980743408, "learning_rate": 0.00015616267432641445, "loss": 1.5251, "step": 16880 }, { "epoch": 0.21936089584385757, "grad_norm": 0.3595815896987915, "learning_rate": 0.00015616007486450305, "loss": 1.3439, "step": 16881 }, { "epoch": 0.21937389038777344, "grad_norm": 0.41376960277557373, "learning_rate": 0.00015615747540259167, "loss": 1.3891, "step": 16882 }, { "epoch": 0.2193868849316893, "grad_norm": 0.2848331928253174, "learning_rate": 0.00015615487594068027, "loss": 1.2759, "step": 16883 }, { "epoch": 0.2193998794756052, "grad_norm": 0.40645015239715576, "learning_rate": 0.00015615227647876892, "loss": 1.3187, "step": 16884 }, { "epoch": 0.21941287401952106, "grad_norm": 0.4796353280544281, "learning_rate": 0.00015614967701685752, "loss": 1.6828, "step": 16885 }, { "epoch": 0.21942586856343693, "grad_norm": 0.37315917015075684, "learning_rate": 0.00015614707755494614, "loss": 1.3014, "step": 16886 }, { "epoch": 0.2194388631073528, "grad_norm": 0.3138767182826996, "learning_rate": 0.00015614447809303474, "loss": 1.2505, "step": 16887 }, { "epoch": 0.21945185765126868, "grad_norm": 0.4015048146247864, "learning_rate": 0.00015614187863112336, "loss": 1.4245, "step": 16888 }, { "epoch": 0.21946485219518455, "grad_norm": 0.36808067560195923, "learning_rate": 0.000156139279169212, "loss": 1.2575, "step": 16889 }, { "epoch": 0.21947784673910042, "grad_norm": 0.4535841643810272, "learning_rate": 0.00015613667970730059, "loss": 1.3205, "step": 16890 }, { "epoch": 0.2194908412830163, "grad_norm": 0.32976144552230835, "learning_rate": 0.0001561340802453892, "loss": 1.1836, "step": 16891 }, { "epoch": 0.21950383582693217, "grad_norm": 0.41894757747650146, "learning_rate": 0.00015613148078347784, "loss": 1.5443, "step": 16892 }, { "epoch": 0.21951683037084804, "grad_norm": 0.35990431904792786, "learning_rate": 0.00015612888132156643, "loss": 1.1722, "step": 16893 }, { "epoch": 0.21952982491476392, "grad_norm": 0.40163126587867737, "learning_rate": 0.00015612628185965506, "loss": 1.2236, "step": 16894 }, { "epoch": 0.2195428194586798, "grad_norm": 0.34681564569473267, "learning_rate": 0.00015612368239774365, "loss": 1.3468, "step": 16895 }, { "epoch": 0.21955581400259566, "grad_norm": 0.36382806301116943, "learning_rate": 0.0001561210829358323, "loss": 1.3494, "step": 16896 }, { "epoch": 0.21956880854651153, "grad_norm": 0.513202965259552, "learning_rate": 0.0001561184834739209, "loss": 1.4313, "step": 16897 }, { "epoch": 0.2195818030904274, "grad_norm": 0.32461661100387573, "learning_rate": 0.00015611588401200953, "loss": 1.4219, "step": 16898 }, { "epoch": 0.21959479763434328, "grad_norm": 0.44427090883255005, "learning_rate": 0.00015611328455009813, "loss": 1.6281, "step": 16899 }, { "epoch": 0.21960779217825915, "grad_norm": 0.40824612975120544, "learning_rate": 0.00015611068508818675, "loss": 1.4497, "step": 16900 }, { "epoch": 0.21962078672217503, "grad_norm": 0.4134128987789154, "learning_rate": 0.00015610808562627537, "loss": 1.5011, "step": 16901 }, { "epoch": 0.2196337812660909, "grad_norm": 0.4088248312473297, "learning_rate": 0.00015610548616436397, "loss": 1.4041, "step": 16902 }, { "epoch": 0.21964677581000677, "grad_norm": 0.3561285734176636, "learning_rate": 0.0001561028867024526, "loss": 1.4716, "step": 16903 }, { "epoch": 0.21965977035392265, "grad_norm": 0.4280087649822235, "learning_rate": 0.00015610028724054122, "loss": 1.3912, "step": 16904 }, { "epoch": 0.21967276489783852, "grad_norm": 0.37301552295684814, "learning_rate": 0.00015609768777862982, "loss": 1.4228, "step": 16905 }, { "epoch": 0.2196857594417544, "grad_norm": 0.3958079516887665, "learning_rate": 0.00015609508831671844, "loss": 1.5259, "step": 16906 }, { "epoch": 0.21969875398567026, "grad_norm": 0.421053946018219, "learning_rate": 0.00015609248885480707, "loss": 1.3295, "step": 16907 }, { "epoch": 0.21971174852958614, "grad_norm": 0.3922063112258911, "learning_rate": 0.0001560898893928957, "loss": 1.4187, "step": 16908 }, { "epoch": 0.219724743073502, "grad_norm": 0.4066793918609619, "learning_rate": 0.0001560872899309843, "loss": 1.3811, "step": 16909 }, { "epoch": 0.21973773761741788, "grad_norm": 0.41032347083091736, "learning_rate": 0.0001560846904690729, "loss": 1.5123, "step": 16910 }, { "epoch": 0.21975073216133376, "grad_norm": 0.335581511259079, "learning_rate": 0.00015608209100716154, "loss": 1.2008, "step": 16911 }, { "epoch": 0.21976372670524963, "grad_norm": 0.3605802059173584, "learning_rate": 0.00015607949154525014, "loss": 1.3167, "step": 16912 }, { "epoch": 0.2197767212491655, "grad_norm": 0.43856149911880493, "learning_rate": 0.00015607689208333876, "loss": 1.5308, "step": 16913 }, { "epoch": 0.21978971579308137, "grad_norm": 0.3836953341960907, "learning_rate": 0.00015607429262142736, "loss": 1.496, "step": 16914 }, { "epoch": 0.21980271033699725, "grad_norm": 0.5015119314193726, "learning_rate": 0.000156071693159516, "loss": 1.4847, "step": 16915 }, { "epoch": 0.21981570488091312, "grad_norm": 0.5078940391540527, "learning_rate": 0.0001560690936976046, "loss": 1.4667, "step": 16916 }, { "epoch": 0.219828699424829, "grad_norm": 0.42061948776245117, "learning_rate": 0.0001560664942356932, "loss": 1.4439, "step": 16917 }, { "epoch": 0.21984169396874487, "grad_norm": 0.32008206844329834, "learning_rate": 0.00015606389477378183, "loss": 1.5181, "step": 16918 }, { "epoch": 0.21985468851266074, "grad_norm": 0.48307475447654724, "learning_rate": 0.00015606129531187045, "loss": 1.2098, "step": 16919 }, { "epoch": 0.2198676830565766, "grad_norm": 0.3718625009059906, "learning_rate": 0.00015605869584995908, "loss": 1.3122, "step": 16920 }, { "epoch": 0.21988067760049249, "grad_norm": 0.45591822266578674, "learning_rate": 0.00015605609638804767, "loss": 1.3838, "step": 16921 }, { "epoch": 0.21989367214440836, "grad_norm": 0.3006881773471832, "learning_rate": 0.0001560534969261363, "loss": 1.5228, "step": 16922 }, { "epoch": 0.21990666668832423, "grad_norm": 0.38272807002067566, "learning_rate": 0.00015605089746422492, "loss": 1.4197, "step": 16923 }, { "epoch": 0.2199196612322401, "grad_norm": 0.36027419567108154, "learning_rate": 0.00015604829800231352, "loss": 1.3656, "step": 16924 }, { "epoch": 0.21993265577615598, "grad_norm": 0.3465765118598938, "learning_rate": 0.00015604569854040215, "loss": 1.4092, "step": 16925 }, { "epoch": 0.21994565032007185, "grad_norm": 0.40953442454338074, "learning_rate": 0.00015604309907849074, "loss": 1.4936, "step": 16926 }, { "epoch": 0.21995864486398772, "grad_norm": 0.5084898471832275, "learning_rate": 0.0001560404996165794, "loss": 1.4619, "step": 16927 }, { "epoch": 0.2199716394079036, "grad_norm": 0.4237470030784607, "learning_rate": 0.000156037900154668, "loss": 1.3977, "step": 16928 }, { "epoch": 0.21998463395181947, "grad_norm": 0.4161651134490967, "learning_rate": 0.00015603530069275662, "loss": 1.2603, "step": 16929 }, { "epoch": 0.21999762849573534, "grad_norm": 0.38115230202674866, "learning_rate": 0.0001560327012308452, "loss": 1.2718, "step": 16930 }, { "epoch": 0.22001062303965122, "grad_norm": 0.4662272334098816, "learning_rate": 0.00015603010176893384, "loss": 1.5266, "step": 16931 }, { "epoch": 0.2200236175835671, "grad_norm": 0.2777803838253021, "learning_rate": 0.00015602750230702246, "loss": 1.1361, "step": 16932 }, { "epoch": 0.22003661212748296, "grad_norm": 0.4139927625656128, "learning_rate": 0.00015602490284511106, "loss": 1.4081, "step": 16933 }, { "epoch": 0.22004960667139883, "grad_norm": 0.3474874794483185, "learning_rate": 0.00015602230338319968, "loss": 1.3327, "step": 16934 }, { "epoch": 0.2200626012153147, "grad_norm": 0.5078178644180298, "learning_rate": 0.0001560197039212883, "loss": 1.5693, "step": 16935 }, { "epoch": 0.22007559575923058, "grad_norm": 0.42416632175445557, "learning_rate": 0.0001560171044593769, "loss": 1.4047, "step": 16936 }, { "epoch": 0.22008859030314645, "grad_norm": 0.35124078392982483, "learning_rate": 0.00015601450499746553, "loss": 1.499, "step": 16937 }, { "epoch": 0.22010158484706233, "grad_norm": 0.44358327984809875, "learning_rate": 0.00015601190553555413, "loss": 1.4055, "step": 16938 }, { "epoch": 0.22011457939097823, "grad_norm": 0.38890859484672546, "learning_rate": 0.00015600930607364278, "loss": 1.4529, "step": 16939 }, { "epoch": 0.2201275739348941, "grad_norm": 0.3731576204299927, "learning_rate": 0.00015600670661173138, "loss": 1.4017, "step": 16940 }, { "epoch": 0.22014056847880997, "grad_norm": 0.431218683719635, "learning_rate": 0.00015600410714982, "loss": 1.3973, "step": 16941 }, { "epoch": 0.22015356302272585, "grad_norm": 0.3969866931438446, "learning_rate": 0.00015600150768790863, "loss": 1.1836, "step": 16942 }, { "epoch": 0.22016655756664172, "grad_norm": 0.2890317142009735, "learning_rate": 0.00015599890822599722, "loss": 1.3694, "step": 16943 }, { "epoch": 0.2201795521105576, "grad_norm": 0.5005160570144653, "learning_rate": 0.00015599630876408585, "loss": 1.4754, "step": 16944 }, { "epoch": 0.22019254665447346, "grad_norm": 0.47472092509269714, "learning_rate": 0.00015599370930217445, "loss": 1.3355, "step": 16945 }, { "epoch": 0.22020554119838934, "grad_norm": 0.39735203981399536, "learning_rate": 0.0001559911098402631, "loss": 1.5688, "step": 16946 }, { "epoch": 0.2202185357423052, "grad_norm": 0.4081897735595703, "learning_rate": 0.0001559885103783517, "loss": 1.5895, "step": 16947 }, { "epoch": 0.22023153028622108, "grad_norm": 0.412993848323822, "learning_rate": 0.0001559859109164403, "loss": 1.3628, "step": 16948 }, { "epoch": 0.22024452483013696, "grad_norm": 0.3694773316383362, "learning_rate": 0.00015598331145452892, "loss": 1.5862, "step": 16949 }, { "epoch": 0.22025751937405283, "grad_norm": 0.36286282539367676, "learning_rate": 0.00015598071199261754, "loss": 1.4797, "step": 16950 }, { "epoch": 0.2202705139179687, "grad_norm": 0.36835727095603943, "learning_rate": 0.00015597811253070616, "loss": 1.4781, "step": 16951 }, { "epoch": 0.22028350846188458, "grad_norm": 0.44325903058052063, "learning_rate": 0.00015597551306879476, "loss": 1.4121, "step": 16952 }, { "epoch": 0.22029650300580045, "grad_norm": 0.4392807185649872, "learning_rate": 0.0001559729136068834, "loss": 1.5093, "step": 16953 }, { "epoch": 0.22030949754971632, "grad_norm": 0.3409249782562256, "learning_rate": 0.000155970314144972, "loss": 1.5036, "step": 16954 }, { "epoch": 0.2203224920936322, "grad_norm": 0.4693164527416229, "learning_rate": 0.0001559677146830606, "loss": 1.3713, "step": 16955 }, { "epoch": 0.22033548663754807, "grad_norm": 0.3211345970630646, "learning_rate": 0.00015596511522114923, "loss": 1.3309, "step": 16956 }, { "epoch": 0.22034848118146394, "grad_norm": 0.3904562294483185, "learning_rate": 0.00015596251575923783, "loss": 1.4153, "step": 16957 }, { "epoch": 0.2203614757253798, "grad_norm": 0.42940059304237366, "learning_rate": 0.00015595991629732648, "loss": 1.3425, "step": 16958 }, { "epoch": 0.22037447026929569, "grad_norm": 0.46401265263557434, "learning_rate": 0.00015595731683541508, "loss": 1.2938, "step": 16959 }, { "epoch": 0.22038746481321156, "grad_norm": 0.35324928164482117, "learning_rate": 0.00015595471737350368, "loss": 1.5334, "step": 16960 }, { "epoch": 0.22040045935712743, "grad_norm": 0.28420963883399963, "learning_rate": 0.0001559521179115923, "loss": 1.1194, "step": 16961 }, { "epoch": 0.2204134539010433, "grad_norm": 0.3774755895137787, "learning_rate": 0.00015594951844968093, "loss": 1.3872, "step": 16962 }, { "epoch": 0.22042644844495918, "grad_norm": 0.304919958114624, "learning_rate": 0.00015594691898776955, "loss": 1.2284, "step": 16963 }, { "epoch": 0.22043944298887505, "grad_norm": 0.5344669222831726, "learning_rate": 0.00015594431952585815, "loss": 1.4851, "step": 16964 }, { "epoch": 0.22045243753279092, "grad_norm": 0.33331140875816345, "learning_rate": 0.00015594172006394677, "loss": 1.4512, "step": 16965 }, { "epoch": 0.2204654320767068, "grad_norm": 0.38237372040748596, "learning_rate": 0.0001559391206020354, "loss": 1.1646, "step": 16966 }, { "epoch": 0.22047842662062267, "grad_norm": 0.378888338804245, "learning_rate": 0.000155936521140124, "loss": 1.3772, "step": 16967 }, { "epoch": 0.22049142116453854, "grad_norm": 0.40520039200782776, "learning_rate": 0.00015593392167821262, "loss": 1.63, "step": 16968 }, { "epoch": 0.22050441570845442, "grad_norm": 0.3504319190979004, "learning_rate": 0.00015593132221630122, "loss": 1.2721, "step": 16969 }, { "epoch": 0.2205174102523703, "grad_norm": 0.36314040422439575, "learning_rate": 0.00015592872275438987, "loss": 1.5577, "step": 16970 }, { "epoch": 0.22053040479628616, "grad_norm": 0.37017959356307983, "learning_rate": 0.00015592612329247846, "loss": 1.511, "step": 16971 }, { "epoch": 0.22054339934020203, "grad_norm": 0.5501101016998291, "learning_rate": 0.00015592352383056706, "loss": 1.5643, "step": 16972 }, { "epoch": 0.2205563938841179, "grad_norm": 0.45284131169319153, "learning_rate": 0.0001559209243686557, "loss": 1.4666, "step": 16973 }, { "epoch": 0.22056938842803378, "grad_norm": 0.4814234972000122, "learning_rate": 0.0001559183249067443, "loss": 1.4575, "step": 16974 }, { "epoch": 0.22058238297194965, "grad_norm": 0.3367791175842285, "learning_rate": 0.00015591572544483294, "loss": 1.3896, "step": 16975 }, { "epoch": 0.22059537751586553, "grad_norm": 0.31046754121780396, "learning_rate": 0.00015591312598292153, "loss": 1.4478, "step": 16976 }, { "epoch": 0.2206083720597814, "grad_norm": 0.5310745239257812, "learning_rate": 0.00015591052652101016, "loss": 1.3571, "step": 16977 }, { "epoch": 0.22062136660369727, "grad_norm": 0.23618222773075104, "learning_rate": 0.00015590792705909878, "loss": 1.5222, "step": 16978 }, { "epoch": 0.22063436114761314, "grad_norm": 0.442163348197937, "learning_rate": 0.00015590532759718738, "loss": 1.4654, "step": 16979 }, { "epoch": 0.22064735569152902, "grad_norm": 0.4986705482006073, "learning_rate": 0.000155902728135276, "loss": 1.363, "step": 16980 }, { "epoch": 0.2206603502354449, "grad_norm": 0.44632959365844727, "learning_rate": 0.00015590012867336463, "loss": 1.5249, "step": 16981 }, { "epoch": 0.22067334477936076, "grad_norm": 0.44567590951919556, "learning_rate": 0.00015589752921145325, "loss": 1.3507, "step": 16982 }, { "epoch": 0.22068633932327664, "grad_norm": 0.32191595435142517, "learning_rate": 0.00015589492974954185, "loss": 1.4514, "step": 16983 }, { "epoch": 0.2206993338671925, "grad_norm": 0.4375464618206024, "learning_rate": 0.00015589233028763047, "loss": 1.7349, "step": 16984 }, { "epoch": 0.22071232841110838, "grad_norm": 0.3427964448928833, "learning_rate": 0.0001558897308257191, "loss": 1.1675, "step": 16985 }, { "epoch": 0.22072532295502426, "grad_norm": 0.4100309908390045, "learning_rate": 0.0001558871313638077, "loss": 1.3829, "step": 16986 }, { "epoch": 0.22073831749894013, "grad_norm": 0.35462266206741333, "learning_rate": 0.00015588453190189632, "loss": 1.3021, "step": 16987 }, { "epoch": 0.220751312042856, "grad_norm": 0.47487521171569824, "learning_rate": 0.00015588193243998492, "loss": 1.4399, "step": 16988 }, { "epoch": 0.22076430658677187, "grad_norm": 0.4301503896713257, "learning_rate": 0.00015587933297807354, "loss": 1.4897, "step": 16989 }, { "epoch": 0.22077730113068775, "grad_norm": 0.4237578511238098, "learning_rate": 0.00015587673351616217, "loss": 1.52, "step": 16990 }, { "epoch": 0.22079029567460362, "grad_norm": 0.4684312641620636, "learning_rate": 0.00015587413405425076, "loss": 1.4168, "step": 16991 }, { "epoch": 0.2208032902185195, "grad_norm": 0.4203507602214813, "learning_rate": 0.0001558715345923394, "loss": 1.5013, "step": 16992 }, { "epoch": 0.22081628476243537, "grad_norm": 0.4595431685447693, "learning_rate": 0.00015586893513042801, "loss": 1.3043, "step": 16993 }, { "epoch": 0.22082927930635124, "grad_norm": 0.3742528557777405, "learning_rate": 0.00015586633566851664, "loss": 1.2729, "step": 16994 }, { "epoch": 0.2208422738502671, "grad_norm": 0.35850024223327637, "learning_rate": 0.00015586373620660524, "loss": 1.4276, "step": 16995 }, { "epoch": 0.22085526839418299, "grad_norm": 0.41013646125793457, "learning_rate": 0.00015586113674469386, "loss": 1.5027, "step": 16996 }, { "epoch": 0.22086826293809886, "grad_norm": 0.4253622591495514, "learning_rate": 0.00015585853728278248, "loss": 1.5784, "step": 16997 }, { "epoch": 0.22088125748201473, "grad_norm": 0.5466195344924927, "learning_rate": 0.00015585593782087108, "loss": 1.3935, "step": 16998 }, { "epoch": 0.2208942520259306, "grad_norm": 0.2679307162761688, "learning_rate": 0.0001558533383589597, "loss": 1.3094, "step": 16999 }, { "epoch": 0.22090724656984648, "grad_norm": 0.35882630944252014, "learning_rate": 0.0001558507388970483, "loss": 1.2618, "step": 17000 }, { "epoch": 0.22092024111376235, "grad_norm": 0.49659794569015503, "learning_rate": 0.00015584813943513693, "loss": 1.3546, "step": 17001 }, { "epoch": 0.22093323565767822, "grad_norm": 0.48639124631881714, "learning_rate": 0.00015584553997322555, "loss": 1.4672, "step": 17002 }, { "epoch": 0.2209462302015941, "grad_norm": 0.4292655289173126, "learning_rate": 0.00015584294051131415, "loss": 1.2426, "step": 17003 }, { "epoch": 0.22095922474550997, "grad_norm": 0.40612438321113586, "learning_rate": 0.00015584034104940277, "loss": 1.6765, "step": 17004 }, { "epoch": 0.22097221928942584, "grad_norm": 0.3818831741809845, "learning_rate": 0.0001558377415874914, "loss": 1.4683, "step": 17005 }, { "epoch": 0.22098521383334171, "grad_norm": 0.36524754762649536, "learning_rate": 0.00015583514212558002, "loss": 1.4492, "step": 17006 }, { "epoch": 0.2209982083772576, "grad_norm": 0.4679274559020996, "learning_rate": 0.00015583254266366862, "loss": 1.4814, "step": 17007 }, { "epoch": 0.22101120292117346, "grad_norm": 0.43185847997665405, "learning_rate": 0.00015582994320175725, "loss": 1.29, "step": 17008 }, { "epoch": 0.22102419746508933, "grad_norm": 0.45009997487068176, "learning_rate": 0.00015582734373984587, "loss": 1.7194, "step": 17009 }, { "epoch": 0.2210371920090052, "grad_norm": 0.44202664494514465, "learning_rate": 0.00015582474427793447, "loss": 1.4675, "step": 17010 }, { "epoch": 0.22105018655292108, "grad_norm": 0.4849221706390381, "learning_rate": 0.0001558221448160231, "loss": 1.416, "step": 17011 }, { "epoch": 0.22106318109683695, "grad_norm": 0.3867471516132355, "learning_rate": 0.0001558195453541117, "loss": 1.3669, "step": 17012 }, { "epoch": 0.22107617564075283, "grad_norm": 0.368221640586853, "learning_rate": 0.00015581694589220034, "loss": 1.3793, "step": 17013 }, { "epoch": 0.2210891701846687, "grad_norm": 0.36737099289894104, "learning_rate": 0.00015581434643028894, "loss": 1.4401, "step": 17014 }, { "epoch": 0.2211021647285846, "grad_norm": 0.38588178157806396, "learning_rate": 0.00015581174696837754, "loss": 1.3863, "step": 17015 }, { "epoch": 0.22111515927250047, "grad_norm": 0.43531864881515503, "learning_rate": 0.0001558091475064662, "loss": 1.46, "step": 17016 }, { "epoch": 0.22112815381641635, "grad_norm": 0.4761612117290497, "learning_rate": 0.00015580654804455478, "loss": 1.4948, "step": 17017 }, { "epoch": 0.22114114836033222, "grad_norm": 0.40776824951171875, "learning_rate": 0.0001558039485826434, "loss": 1.6796, "step": 17018 }, { "epoch": 0.2211541429042481, "grad_norm": 0.45228666067123413, "learning_rate": 0.000155801349120732, "loss": 1.4522, "step": 17019 }, { "epoch": 0.22116713744816396, "grad_norm": 0.45015138387680054, "learning_rate": 0.00015579874965882063, "loss": 1.5762, "step": 17020 }, { "epoch": 0.22118013199207984, "grad_norm": 0.4756341874599457, "learning_rate": 0.00015579615019690926, "loss": 1.4287, "step": 17021 }, { "epoch": 0.2211931265359957, "grad_norm": 0.347330778837204, "learning_rate": 0.00015579355073499785, "loss": 1.305, "step": 17022 }, { "epoch": 0.22120612107991158, "grad_norm": 0.41575974225997925, "learning_rate": 0.00015579095127308648, "loss": 1.4143, "step": 17023 }, { "epoch": 0.22121911562382746, "grad_norm": 0.35551080107688904, "learning_rate": 0.0001557883518111751, "loss": 1.36, "step": 17024 }, { "epoch": 0.22123211016774333, "grad_norm": 0.43945980072021484, "learning_rate": 0.00015578575234926373, "loss": 1.3988, "step": 17025 }, { "epoch": 0.2212451047116592, "grad_norm": 0.45011472702026367, "learning_rate": 0.00015578315288735232, "loss": 1.6539, "step": 17026 }, { "epoch": 0.22125809925557507, "grad_norm": 0.419046550989151, "learning_rate": 0.00015578055342544092, "loss": 1.5442, "step": 17027 }, { "epoch": 0.22127109379949095, "grad_norm": 0.3983384668827057, "learning_rate": 0.00015577795396352957, "loss": 1.38, "step": 17028 }, { "epoch": 0.22128408834340682, "grad_norm": 0.3543729782104492, "learning_rate": 0.00015577535450161817, "loss": 1.3682, "step": 17029 }, { "epoch": 0.2212970828873227, "grad_norm": 0.510632336139679, "learning_rate": 0.0001557727550397068, "loss": 1.7461, "step": 17030 }, { "epoch": 0.22131007743123857, "grad_norm": 0.440660685300827, "learning_rate": 0.0001557701555777954, "loss": 1.4275, "step": 17031 }, { "epoch": 0.22132307197515444, "grad_norm": 0.34685972332954407, "learning_rate": 0.00015576755611588402, "loss": 1.2296, "step": 17032 }, { "epoch": 0.2213360665190703, "grad_norm": 0.2996695339679718, "learning_rate": 0.00015576495665397264, "loss": 1.1563, "step": 17033 }, { "epoch": 0.22134906106298619, "grad_norm": 0.3733215928077698, "learning_rate": 0.00015576235719206124, "loss": 1.4296, "step": 17034 }, { "epoch": 0.22136205560690206, "grad_norm": 0.38955050706863403, "learning_rate": 0.00015575975773014986, "loss": 1.3116, "step": 17035 }, { "epoch": 0.22137505015081793, "grad_norm": 0.4073977470397949, "learning_rate": 0.0001557571582682385, "loss": 1.3835, "step": 17036 }, { "epoch": 0.2213880446947338, "grad_norm": 0.41414129734039307, "learning_rate": 0.0001557545588063271, "loss": 1.4798, "step": 17037 }, { "epoch": 0.22140103923864968, "grad_norm": 0.46962955594062805, "learning_rate": 0.0001557519593444157, "loss": 1.3848, "step": 17038 }, { "epoch": 0.22141403378256555, "grad_norm": 0.48201245069503784, "learning_rate": 0.0001557493598825043, "loss": 1.6492, "step": 17039 }, { "epoch": 0.22142702832648142, "grad_norm": 0.38728567957878113, "learning_rate": 0.00015574676042059296, "loss": 1.418, "step": 17040 }, { "epoch": 0.2214400228703973, "grad_norm": 0.40893441438674927, "learning_rate": 0.00015574416095868156, "loss": 1.5834, "step": 17041 }, { "epoch": 0.22145301741431317, "grad_norm": 0.49246731400489807, "learning_rate": 0.00015574156149677018, "loss": 1.6009, "step": 17042 }, { "epoch": 0.22146601195822904, "grad_norm": 0.397651344537735, "learning_rate": 0.00015573896203485878, "loss": 1.4665, "step": 17043 }, { "epoch": 0.22147900650214492, "grad_norm": 0.39051553606987, "learning_rate": 0.0001557363625729474, "loss": 1.4885, "step": 17044 }, { "epoch": 0.2214920010460608, "grad_norm": 0.45621123909950256, "learning_rate": 0.00015573376311103603, "loss": 1.4777, "step": 17045 }, { "epoch": 0.22150499558997666, "grad_norm": 0.4330211579799652, "learning_rate": 0.00015573116364912462, "loss": 1.1337, "step": 17046 }, { "epoch": 0.22151799013389253, "grad_norm": 0.3897799849510193, "learning_rate": 0.00015572856418721325, "loss": 1.2749, "step": 17047 }, { "epoch": 0.2215309846778084, "grad_norm": 0.3862776458263397, "learning_rate": 0.00015572596472530187, "loss": 1.4044, "step": 17048 }, { "epoch": 0.22154397922172428, "grad_norm": 0.3762578070163727, "learning_rate": 0.0001557233652633905, "loss": 1.4842, "step": 17049 }, { "epoch": 0.22155697376564015, "grad_norm": 0.3673527240753174, "learning_rate": 0.0001557207658014791, "loss": 1.2782, "step": 17050 }, { "epoch": 0.22156996830955603, "grad_norm": 0.3858078122138977, "learning_rate": 0.00015571816633956772, "loss": 1.5894, "step": 17051 }, { "epoch": 0.2215829628534719, "grad_norm": 0.40626755356788635, "learning_rate": 0.00015571556687765634, "loss": 1.4181, "step": 17052 }, { "epoch": 0.22159595739738777, "grad_norm": 0.3597128093242645, "learning_rate": 0.00015571296741574494, "loss": 1.4549, "step": 17053 }, { "epoch": 0.22160895194130364, "grad_norm": 0.4017217755317688, "learning_rate": 0.00015571036795383357, "loss": 1.3754, "step": 17054 }, { "epoch": 0.22162194648521952, "grad_norm": 0.3090975284576416, "learning_rate": 0.0001557077684919222, "loss": 1.3076, "step": 17055 }, { "epoch": 0.2216349410291354, "grad_norm": 0.3845195174217224, "learning_rate": 0.0001557051690300108, "loss": 1.3728, "step": 17056 }, { "epoch": 0.22164793557305126, "grad_norm": 0.4403516948223114, "learning_rate": 0.0001557025695680994, "loss": 1.491, "step": 17057 }, { "epoch": 0.22166093011696714, "grad_norm": 0.3647840917110443, "learning_rate": 0.000155699970106188, "loss": 1.4119, "step": 17058 }, { "epoch": 0.221673924660883, "grad_norm": 0.40061089396476746, "learning_rate": 0.00015569737064427666, "loss": 1.4018, "step": 17059 }, { "epoch": 0.22168691920479888, "grad_norm": 0.33236902952194214, "learning_rate": 0.00015569477118236526, "loss": 1.2903, "step": 17060 }, { "epoch": 0.22169991374871476, "grad_norm": 0.44524461030960083, "learning_rate": 0.00015569217172045388, "loss": 1.4229, "step": 17061 }, { "epoch": 0.22171290829263063, "grad_norm": 0.5494827032089233, "learning_rate": 0.00015568957225854248, "loss": 1.5232, "step": 17062 }, { "epoch": 0.2217259028365465, "grad_norm": 0.4711943566799164, "learning_rate": 0.0001556869727966311, "loss": 1.4898, "step": 17063 }, { "epoch": 0.22173889738046237, "grad_norm": 0.308763712644577, "learning_rate": 0.00015568437333471973, "loss": 1.1964, "step": 17064 }, { "epoch": 0.22175189192437825, "grad_norm": 0.4602581560611725, "learning_rate": 0.00015568177387280833, "loss": 1.3666, "step": 17065 }, { "epoch": 0.22176488646829412, "grad_norm": 0.43294858932495117, "learning_rate": 0.00015567917441089695, "loss": 1.3974, "step": 17066 }, { "epoch": 0.22177788101221, "grad_norm": 0.3961070477962494, "learning_rate": 0.00015567657494898558, "loss": 1.3042, "step": 17067 }, { "epoch": 0.22179087555612587, "grad_norm": 0.4131130278110504, "learning_rate": 0.0001556739754870742, "loss": 1.4559, "step": 17068 }, { "epoch": 0.22180387010004174, "grad_norm": 0.3964472711086273, "learning_rate": 0.0001556713760251628, "loss": 1.1308, "step": 17069 }, { "epoch": 0.2218168646439576, "grad_norm": 0.3841591775417328, "learning_rate": 0.0001556687765632514, "loss": 1.4378, "step": 17070 }, { "epoch": 0.22182985918787348, "grad_norm": 0.3324319124221802, "learning_rate": 0.00015566617710134005, "loss": 1.5845, "step": 17071 }, { "epoch": 0.22184285373178936, "grad_norm": 0.2964618504047394, "learning_rate": 0.00015566357763942864, "loss": 1.291, "step": 17072 }, { "epoch": 0.22185584827570523, "grad_norm": 0.42336753010749817, "learning_rate": 0.00015566097817751727, "loss": 1.4161, "step": 17073 }, { "epoch": 0.2218688428196211, "grad_norm": 0.4712112247943878, "learning_rate": 0.00015565837871560587, "loss": 1.2435, "step": 17074 }, { "epoch": 0.22188183736353698, "grad_norm": 0.46201184391975403, "learning_rate": 0.0001556557792536945, "loss": 1.305, "step": 17075 }, { "epoch": 0.22189483190745285, "grad_norm": 0.34623244404792786, "learning_rate": 0.00015565317979178311, "loss": 1.2764, "step": 17076 }, { "epoch": 0.22190782645136872, "grad_norm": 0.3202507495880127, "learning_rate": 0.0001556505803298717, "loss": 1.2393, "step": 17077 }, { "epoch": 0.2219208209952846, "grad_norm": 0.3208293318748474, "learning_rate": 0.00015564798086796034, "loss": 1.634, "step": 17078 }, { "epoch": 0.22193381553920047, "grad_norm": 0.44903144240379333, "learning_rate": 0.00015564538140604896, "loss": 1.4049, "step": 17079 }, { "epoch": 0.22194681008311634, "grad_norm": 0.41508638858795166, "learning_rate": 0.00015564278194413758, "loss": 1.4669, "step": 17080 }, { "epoch": 0.22195980462703221, "grad_norm": 0.3519446551799774, "learning_rate": 0.00015564018248222618, "loss": 1.3563, "step": 17081 }, { "epoch": 0.2219727991709481, "grad_norm": 0.46139299869537354, "learning_rate": 0.00015563758302031478, "loss": 1.3372, "step": 17082 }, { "epoch": 0.22198579371486396, "grad_norm": 0.3968223035335541, "learning_rate": 0.00015563498355840343, "loss": 1.3699, "step": 17083 }, { "epoch": 0.22199878825877983, "grad_norm": 0.4089559018611908, "learning_rate": 0.00015563238409649203, "loss": 1.2311, "step": 17084 }, { "epoch": 0.2220117828026957, "grad_norm": 0.4803493320941925, "learning_rate": 0.00015562978463458065, "loss": 1.1683, "step": 17085 }, { "epoch": 0.22202477734661158, "grad_norm": 0.45740577578544617, "learning_rate": 0.00015562718517266925, "loss": 1.6397, "step": 17086 }, { "epoch": 0.22203777189052745, "grad_norm": 0.3872956335544586, "learning_rate": 0.00015562458571075788, "loss": 1.5075, "step": 17087 }, { "epoch": 0.22205076643444333, "grad_norm": 0.40057888627052307, "learning_rate": 0.0001556219862488465, "loss": 1.359, "step": 17088 }, { "epoch": 0.2220637609783592, "grad_norm": 0.4077836275100708, "learning_rate": 0.0001556193867869351, "loss": 1.3742, "step": 17089 }, { "epoch": 0.22207675552227507, "grad_norm": 0.34613364934921265, "learning_rate": 0.00015561678732502375, "loss": 1.4589, "step": 17090 }, { "epoch": 0.22208975006619097, "grad_norm": 0.42743000388145447, "learning_rate": 0.00015561418786311235, "loss": 1.2644, "step": 17091 }, { "epoch": 0.22210274461010684, "grad_norm": 0.6018355488777161, "learning_rate": 0.00015561158840120097, "loss": 1.4335, "step": 17092 }, { "epoch": 0.22211573915402272, "grad_norm": 0.36277633905410767, "learning_rate": 0.00015560898893928957, "loss": 1.6177, "step": 17093 }, { "epoch": 0.2221287336979386, "grad_norm": 0.4203292727470398, "learning_rate": 0.0001556063894773782, "loss": 1.3813, "step": 17094 }, { "epoch": 0.22214172824185446, "grad_norm": 0.3625844120979309, "learning_rate": 0.00015560379001546682, "loss": 1.4356, "step": 17095 }, { "epoch": 0.22215472278577034, "grad_norm": 0.5396364331245422, "learning_rate": 0.00015560119055355541, "loss": 1.4676, "step": 17096 }, { "epoch": 0.2221677173296862, "grad_norm": 0.3877023160457611, "learning_rate": 0.00015559859109164404, "loss": 1.5454, "step": 17097 }, { "epoch": 0.22218071187360208, "grad_norm": 0.514180064201355, "learning_rate": 0.00015559599162973266, "loss": 1.5093, "step": 17098 }, { "epoch": 0.22219370641751796, "grad_norm": 0.39768078923225403, "learning_rate": 0.00015559339216782126, "loss": 1.3124, "step": 17099 }, { "epoch": 0.22220670096143383, "grad_norm": 0.3751824200153351, "learning_rate": 0.00015559079270590988, "loss": 1.2822, "step": 17100 }, { "epoch": 0.2222196955053497, "grad_norm": 0.5395187735557556, "learning_rate": 0.00015558819324399848, "loss": 1.3567, "step": 17101 }, { "epoch": 0.22223269004926557, "grad_norm": 0.24038265645503998, "learning_rate": 0.00015558559378208713, "loss": 1.4351, "step": 17102 }, { "epoch": 0.22224568459318145, "grad_norm": 0.43682458996772766, "learning_rate": 0.00015558299432017573, "loss": 1.5713, "step": 17103 }, { "epoch": 0.22225867913709732, "grad_norm": 0.3363043963909149, "learning_rate": 0.00015558039485826436, "loss": 1.4052, "step": 17104 }, { "epoch": 0.2222716736810132, "grad_norm": 0.471466064453125, "learning_rate": 0.00015557779539635295, "loss": 1.1848, "step": 17105 }, { "epoch": 0.22228466822492907, "grad_norm": 0.41675013303756714, "learning_rate": 0.00015557519593444158, "loss": 1.3606, "step": 17106 }, { "epoch": 0.22229766276884494, "grad_norm": 0.49515584111213684, "learning_rate": 0.0001555725964725302, "loss": 1.5995, "step": 17107 }, { "epoch": 0.2223106573127608, "grad_norm": 0.2888803482055664, "learning_rate": 0.0001555699970106188, "loss": 1.4539, "step": 17108 }, { "epoch": 0.22232365185667669, "grad_norm": 0.41267451643943787, "learning_rate": 0.00015556739754870742, "loss": 1.5359, "step": 17109 }, { "epoch": 0.22233664640059256, "grad_norm": 0.46718311309814453, "learning_rate": 0.00015556479808679605, "loss": 1.3763, "step": 17110 }, { "epoch": 0.22234964094450843, "grad_norm": 0.3803378641605377, "learning_rate": 0.00015556219862488465, "loss": 1.4768, "step": 17111 }, { "epoch": 0.2223626354884243, "grad_norm": 0.42829012870788574, "learning_rate": 0.00015555959916297327, "loss": 1.3366, "step": 17112 }, { "epoch": 0.22237563003234018, "grad_norm": 0.3077649772167206, "learning_rate": 0.00015555699970106187, "loss": 1.4243, "step": 17113 }, { "epoch": 0.22238862457625605, "grad_norm": 0.3625745177268982, "learning_rate": 0.00015555440023915052, "loss": 1.3656, "step": 17114 }, { "epoch": 0.22240161912017192, "grad_norm": 0.3440830409526825, "learning_rate": 0.00015555180077723912, "loss": 1.3793, "step": 17115 }, { "epoch": 0.2224146136640878, "grad_norm": 0.36143049597740173, "learning_rate": 0.00015554920131532774, "loss": 1.0977, "step": 17116 }, { "epoch": 0.22242760820800367, "grad_norm": 0.3851677179336548, "learning_rate": 0.00015554660185341634, "loss": 1.2104, "step": 17117 }, { "epoch": 0.22244060275191954, "grad_norm": 0.4465150535106659, "learning_rate": 0.00015554400239150496, "loss": 1.3397, "step": 17118 }, { "epoch": 0.22245359729583541, "grad_norm": 0.35547319054603577, "learning_rate": 0.0001555414029295936, "loss": 1.447, "step": 17119 }, { "epoch": 0.2224665918397513, "grad_norm": 0.39653661847114563, "learning_rate": 0.00015553880346768218, "loss": 1.5618, "step": 17120 }, { "epoch": 0.22247958638366716, "grad_norm": 0.338181734085083, "learning_rate": 0.0001555362040057708, "loss": 1.4234, "step": 17121 }, { "epoch": 0.22249258092758303, "grad_norm": 0.4291459321975708, "learning_rate": 0.00015553360454385943, "loss": 1.4441, "step": 17122 }, { "epoch": 0.2225055754714989, "grad_norm": 0.3681119978427887, "learning_rate": 0.00015553100508194803, "loss": 1.6754, "step": 17123 }, { "epoch": 0.22251857001541478, "grad_norm": 0.44944924116134644, "learning_rate": 0.00015552840562003666, "loss": 1.4699, "step": 17124 }, { "epoch": 0.22253156455933065, "grad_norm": 0.35042116045951843, "learning_rate": 0.00015552580615812525, "loss": 1.1873, "step": 17125 }, { "epoch": 0.22254455910324653, "grad_norm": 0.31520354747772217, "learning_rate": 0.0001555232066962139, "loss": 1.2255, "step": 17126 }, { "epoch": 0.2225575536471624, "grad_norm": 0.3630147874355316, "learning_rate": 0.0001555206072343025, "loss": 1.4813, "step": 17127 }, { "epoch": 0.22257054819107827, "grad_norm": 0.42200225591659546, "learning_rate": 0.00015551800777239113, "loss": 1.4492, "step": 17128 }, { "epoch": 0.22258354273499414, "grad_norm": 0.42526718974113464, "learning_rate": 0.00015551540831047975, "loss": 1.4772, "step": 17129 }, { "epoch": 0.22259653727891002, "grad_norm": 0.6211418509483337, "learning_rate": 0.00015551280884856835, "loss": 1.6208, "step": 17130 }, { "epoch": 0.2226095318228259, "grad_norm": 0.3544312119483948, "learning_rate": 0.00015551020938665697, "loss": 1.3873, "step": 17131 }, { "epoch": 0.22262252636674176, "grad_norm": 0.450359582901001, "learning_rate": 0.00015550760992474557, "loss": 1.3735, "step": 17132 }, { "epoch": 0.22263552091065764, "grad_norm": 0.4444940388202667, "learning_rate": 0.00015550501046283422, "loss": 1.4011, "step": 17133 }, { "epoch": 0.2226485154545735, "grad_norm": 0.4288652837276459, "learning_rate": 0.00015550241100092282, "loss": 1.5391, "step": 17134 }, { "epoch": 0.22266150999848938, "grad_norm": 0.4947999119758606, "learning_rate": 0.00015549981153901144, "loss": 1.4797, "step": 17135 }, { "epoch": 0.22267450454240525, "grad_norm": 0.35433104634284973, "learning_rate": 0.00015549721207710004, "loss": 1.2502, "step": 17136 }, { "epoch": 0.22268749908632113, "grad_norm": 0.4129192531108856, "learning_rate": 0.00015549461261518867, "loss": 1.4185, "step": 17137 }, { "epoch": 0.222700493630237, "grad_norm": 0.4013659358024597, "learning_rate": 0.0001554920131532773, "loss": 1.296, "step": 17138 }, { "epoch": 0.22271348817415287, "grad_norm": 0.37711530923843384, "learning_rate": 0.0001554894136913659, "loss": 1.3832, "step": 17139 }, { "epoch": 0.22272648271806875, "grad_norm": 0.403781920671463, "learning_rate": 0.0001554868142294545, "loss": 1.4053, "step": 17140 }, { "epoch": 0.22273947726198462, "grad_norm": 0.38190335035324097, "learning_rate": 0.00015548421476754314, "loss": 1.3746, "step": 17141 }, { "epoch": 0.2227524718059005, "grad_norm": 0.3156324625015259, "learning_rate": 0.00015548161530563173, "loss": 1.1838, "step": 17142 }, { "epoch": 0.22276546634981637, "grad_norm": 0.45251744985580444, "learning_rate": 0.00015547901584372036, "loss": 1.2926, "step": 17143 }, { "epoch": 0.22277846089373224, "grad_norm": 0.41896748542785645, "learning_rate": 0.00015547641638180896, "loss": 1.4728, "step": 17144 }, { "epoch": 0.2227914554376481, "grad_norm": 0.31802403926849365, "learning_rate": 0.0001554738169198976, "loss": 1.2417, "step": 17145 }, { "epoch": 0.22280444998156398, "grad_norm": 0.3450278639793396, "learning_rate": 0.0001554712174579862, "loss": 1.4251, "step": 17146 }, { "epoch": 0.22281744452547986, "grad_norm": 0.4255521595478058, "learning_rate": 0.00015546861799607483, "loss": 1.4164, "step": 17147 }, { "epoch": 0.22283043906939573, "grad_norm": 0.4435980021953583, "learning_rate": 0.00015546601853416343, "loss": 1.4434, "step": 17148 }, { "epoch": 0.2228434336133116, "grad_norm": 0.4812254011631012, "learning_rate": 0.00015546341907225205, "loss": 1.52, "step": 17149 }, { "epoch": 0.22285642815722748, "grad_norm": 0.33715853095054626, "learning_rate": 0.00015546081961034068, "loss": 1.1849, "step": 17150 }, { "epoch": 0.22286942270114335, "grad_norm": 0.40500110387802124, "learning_rate": 0.00015545822014842927, "loss": 1.3702, "step": 17151 }, { "epoch": 0.22288241724505922, "grad_norm": 0.4157017469406128, "learning_rate": 0.0001554556206865179, "loss": 1.3938, "step": 17152 }, { "epoch": 0.2228954117889751, "grad_norm": 0.2947644889354706, "learning_rate": 0.00015545302122460652, "loss": 1.3843, "step": 17153 }, { "epoch": 0.22290840633289097, "grad_norm": 0.3716994822025299, "learning_rate": 0.00015545042176269512, "loss": 1.3801, "step": 17154 }, { "epoch": 0.22292140087680684, "grad_norm": 0.45993781089782715, "learning_rate": 0.00015544782230078374, "loss": 1.6304, "step": 17155 }, { "epoch": 0.22293439542072271, "grad_norm": 0.3995344340801239, "learning_rate": 0.00015544522283887234, "loss": 1.4776, "step": 17156 }, { "epoch": 0.2229473899646386, "grad_norm": 0.42422494292259216, "learning_rate": 0.000155442623376961, "loss": 1.3458, "step": 17157 }, { "epoch": 0.22296038450855446, "grad_norm": 0.3867335915565491, "learning_rate": 0.0001554400239150496, "loss": 1.3409, "step": 17158 }, { "epoch": 0.22297337905247033, "grad_norm": 0.4358266294002533, "learning_rate": 0.00015543742445313821, "loss": 1.6161, "step": 17159 }, { "epoch": 0.2229863735963862, "grad_norm": 0.40526410937309265, "learning_rate": 0.0001554348249912268, "loss": 1.3947, "step": 17160 }, { "epoch": 0.22299936814030208, "grad_norm": 0.423902690410614, "learning_rate": 0.00015543222552931544, "loss": 1.4279, "step": 17161 }, { "epoch": 0.22301236268421795, "grad_norm": 0.34187567234039307, "learning_rate": 0.00015542962606740406, "loss": 1.4244, "step": 17162 }, { "epoch": 0.22302535722813382, "grad_norm": 0.39992156624794006, "learning_rate": 0.00015542702660549266, "loss": 1.3955, "step": 17163 }, { "epoch": 0.2230383517720497, "grad_norm": 0.4114689230918884, "learning_rate": 0.0001554244271435813, "loss": 1.3662, "step": 17164 }, { "epoch": 0.22305134631596557, "grad_norm": 0.43203005194664, "learning_rate": 0.0001554218276816699, "loss": 1.3727, "step": 17165 }, { "epoch": 0.22306434085988144, "grad_norm": 0.3677416443824768, "learning_rate": 0.0001554192282197585, "loss": 1.3425, "step": 17166 }, { "epoch": 0.22307733540379734, "grad_norm": 0.4099891781806946, "learning_rate": 0.00015541662875784713, "loss": 1.5158, "step": 17167 }, { "epoch": 0.22309032994771322, "grad_norm": 0.39120379090309143, "learning_rate": 0.00015541402929593575, "loss": 1.3764, "step": 17168 }, { "epoch": 0.2231033244916291, "grad_norm": 0.47283923625946045, "learning_rate": 0.00015541142983402438, "loss": 1.3689, "step": 17169 }, { "epoch": 0.22311631903554496, "grad_norm": 0.47374963760375977, "learning_rate": 0.00015540883037211298, "loss": 1.5228, "step": 17170 }, { "epoch": 0.22312931357946084, "grad_norm": 0.41419610381126404, "learning_rate": 0.0001554062309102016, "loss": 1.324, "step": 17171 }, { "epoch": 0.2231423081233767, "grad_norm": 0.34329530596733093, "learning_rate": 0.00015540363144829022, "loss": 1.4898, "step": 17172 }, { "epoch": 0.22315530266729258, "grad_norm": 0.4290868639945984, "learning_rate": 0.00015540103198637882, "loss": 1.3483, "step": 17173 }, { "epoch": 0.22316829721120846, "grad_norm": 0.4576382339000702, "learning_rate": 0.00015539843252446745, "loss": 1.3437, "step": 17174 }, { "epoch": 0.22318129175512433, "grad_norm": 0.3294677138328552, "learning_rate": 0.00015539583306255604, "loss": 1.1422, "step": 17175 }, { "epoch": 0.2231942862990402, "grad_norm": 0.4289793372154236, "learning_rate": 0.0001553932336006447, "loss": 1.3833, "step": 17176 }, { "epoch": 0.22320728084295607, "grad_norm": 0.3473701775074005, "learning_rate": 0.0001553906341387333, "loss": 1.4183, "step": 17177 }, { "epoch": 0.22322027538687195, "grad_norm": 0.26277607679367065, "learning_rate": 0.0001553880346768219, "loss": 1.1681, "step": 17178 }, { "epoch": 0.22323326993078782, "grad_norm": 0.3259830176830292, "learning_rate": 0.00015538543521491051, "loss": 1.3871, "step": 17179 }, { "epoch": 0.2232462644747037, "grad_norm": 0.3959862291812897, "learning_rate": 0.00015538283575299914, "loss": 1.2493, "step": 17180 }, { "epoch": 0.22325925901861957, "grad_norm": 0.32133084535598755, "learning_rate": 0.00015538023629108776, "loss": 1.4416, "step": 17181 }, { "epoch": 0.22327225356253544, "grad_norm": 0.4233432710170746, "learning_rate": 0.00015537763682917636, "loss": 1.3732, "step": 17182 }, { "epoch": 0.2232852481064513, "grad_norm": 0.42476895451545715, "learning_rate": 0.00015537503736726499, "loss": 1.4966, "step": 17183 }, { "epoch": 0.22329824265036718, "grad_norm": 0.39380794763565063, "learning_rate": 0.0001553724379053536, "loss": 1.3662, "step": 17184 }, { "epoch": 0.22331123719428306, "grad_norm": 0.48878365755081177, "learning_rate": 0.0001553698384434422, "loss": 1.5692, "step": 17185 }, { "epoch": 0.22332423173819893, "grad_norm": 0.35221439599990845, "learning_rate": 0.00015536723898153083, "loss": 1.4879, "step": 17186 }, { "epoch": 0.2233372262821148, "grad_norm": 0.46363532543182373, "learning_rate": 0.00015536463951961943, "loss": 1.6407, "step": 17187 }, { "epoch": 0.22335022082603068, "grad_norm": 0.39026781916618347, "learning_rate": 0.00015536204005770808, "loss": 1.5055, "step": 17188 }, { "epoch": 0.22336321536994655, "grad_norm": 0.352279931306839, "learning_rate": 0.00015535944059579668, "loss": 1.4515, "step": 17189 }, { "epoch": 0.22337620991386242, "grad_norm": 0.3973168134689331, "learning_rate": 0.0001553568411338853, "loss": 1.5286, "step": 17190 }, { "epoch": 0.2233892044577783, "grad_norm": 0.3869595527648926, "learning_rate": 0.0001553542416719739, "loss": 1.4942, "step": 17191 }, { "epoch": 0.22340219900169417, "grad_norm": 0.3624345660209656, "learning_rate": 0.00015535164221006252, "loss": 1.4501, "step": 17192 }, { "epoch": 0.22341519354561004, "grad_norm": 0.2924130856990814, "learning_rate": 0.00015534904274815115, "loss": 1.1925, "step": 17193 }, { "epoch": 0.22342818808952591, "grad_norm": 0.3422030806541443, "learning_rate": 0.00015534644328623975, "loss": 1.4121, "step": 17194 }, { "epoch": 0.2234411826334418, "grad_norm": 0.4656819701194763, "learning_rate": 0.00015534384382432837, "loss": 1.3132, "step": 17195 }, { "epoch": 0.22345417717735766, "grad_norm": 0.41130486130714417, "learning_rate": 0.000155341244362417, "loss": 1.4668, "step": 17196 }, { "epoch": 0.22346717172127353, "grad_norm": 0.3827744722366333, "learning_rate": 0.0001553386449005056, "loss": 1.2714, "step": 17197 }, { "epoch": 0.2234801662651894, "grad_norm": 0.3936939239501953, "learning_rate": 0.00015533604543859422, "loss": 1.3407, "step": 17198 }, { "epoch": 0.22349316080910528, "grad_norm": 0.31146296858787537, "learning_rate": 0.00015533344597668281, "loss": 1.5786, "step": 17199 }, { "epoch": 0.22350615535302115, "grad_norm": 0.32435494661331177, "learning_rate": 0.00015533084651477147, "loss": 1.3857, "step": 17200 }, { "epoch": 0.22351914989693702, "grad_norm": 0.40252891182899475, "learning_rate": 0.00015532824705286006, "loss": 1.4384, "step": 17201 }, { "epoch": 0.2235321444408529, "grad_norm": 0.4690648317337036, "learning_rate": 0.0001553256475909487, "loss": 1.5365, "step": 17202 }, { "epoch": 0.22354513898476877, "grad_norm": 0.3450070023536682, "learning_rate": 0.0001553230481290373, "loss": 1.478, "step": 17203 }, { "epoch": 0.22355813352868464, "grad_norm": 0.47981390357017517, "learning_rate": 0.0001553204486671259, "loss": 1.4841, "step": 17204 }, { "epoch": 0.22357112807260052, "grad_norm": 0.3812151551246643, "learning_rate": 0.00015531784920521453, "loss": 1.4281, "step": 17205 }, { "epoch": 0.2235841226165164, "grad_norm": 0.43522951006889343, "learning_rate": 0.00015531524974330313, "loss": 1.4512, "step": 17206 }, { "epoch": 0.22359711716043226, "grad_norm": 0.4600542485713959, "learning_rate": 0.00015531265028139176, "loss": 1.3144, "step": 17207 }, { "epoch": 0.22361011170434814, "grad_norm": 0.36629295349121094, "learning_rate": 0.00015531005081948038, "loss": 1.4347, "step": 17208 }, { "epoch": 0.223623106248264, "grad_norm": 0.36778852343559265, "learning_rate": 0.00015530745135756898, "loss": 1.4107, "step": 17209 }, { "epoch": 0.22363610079217988, "grad_norm": 0.40101122856140137, "learning_rate": 0.0001553048518956576, "loss": 1.2283, "step": 17210 }, { "epoch": 0.22364909533609575, "grad_norm": 0.4092688262462616, "learning_rate": 0.00015530225243374623, "loss": 1.4168, "step": 17211 }, { "epoch": 0.22366208988001163, "grad_norm": 0.2870953679084778, "learning_rate": 0.00015529965297183485, "loss": 1.2667, "step": 17212 }, { "epoch": 0.2236750844239275, "grad_norm": 0.3662099540233612, "learning_rate": 0.00015529705350992345, "loss": 1.4113, "step": 17213 }, { "epoch": 0.22368807896784337, "grad_norm": 0.416275292634964, "learning_rate": 0.00015529445404801207, "loss": 1.4527, "step": 17214 }, { "epoch": 0.22370107351175925, "grad_norm": 0.37154337763786316, "learning_rate": 0.0001552918545861007, "loss": 1.3959, "step": 17215 }, { "epoch": 0.22371406805567512, "grad_norm": 0.35895952582359314, "learning_rate": 0.0001552892551241893, "loss": 1.3032, "step": 17216 }, { "epoch": 0.223727062599591, "grad_norm": 0.3686949908733368, "learning_rate": 0.00015528665566227792, "loss": 1.2793, "step": 17217 }, { "epoch": 0.22374005714350687, "grad_norm": 0.4785311222076416, "learning_rate": 0.00015528405620036652, "loss": 1.4317, "step": 17218 }, { "epoch": 0.22375305168742274, "grad_norm": 0.3777119815349579, "learning_rate": 0.00015528145673845517, "loss": 1.4399, "step": 17219 }, { "epoch": 0.2237660462313386, "grad_norm": 0.3776816427707672, "learning_rate": 0.00015527885727654377, "loss": 1.5438, "step": 17220 }, { "epoch": 0.22377904077525448, "grad_norm": 0.43836694955825806, "learning_rate": 0.00015527625781463236, "loss": 1.5241, "step": 17221 }, { "epoch": 0.22379203531917036, "grad_norm": 0.47422000765800476, "learning_rate": 0.000155273658352721, "loss": 1.5175, "step": 17222 }, { "epoch": 0.22380502986308623, "grad_norm": 0.4537494480609894, "learning_rate": 0.0001552710588908096, "loss": 1.5111, "step": 17223 }, { "epoch": 0.2238180244070021, "grad_norm": 0.37764039635658264, "learning_rate": 0.00015526845942889824, "loss": 1.3176, "step": 17224 }, { "epoch": 0.22383101895091798, "grad_norm": 0.4033384919166565, "learning_rate": 0.00015526585996698683, "loss": 1.3036, "step": 17225 }, { "epoch": 0.22384401349483385, "grad_norm": 0.4137011766433716, "learning_rate": 0.00015526326050507546, "loss": 1.4036, "step": 17226 }, { "epoch": 0.22385700803874972, "grad_norm": 0.36519598960876465, "learning_rate": 0.00015526066104316408, "loss": 1.0906, "step": 17227 }, { "epoch": 0.2238700025826656, "grad_norm": 0.34954115748405457, "learning_rate": 0.00015525806158125268, "loss": 1.3285, "step": 17228 }, { "epoch": 0.22388299712658147, "grad_norm": 0.392419695854187, "learning_rate": 0.0001552554621193413, "loss": 1.475, "step": 17229 }, { "epoch": 0.22389599167049734, "grad_norm": 0.4142645001411438, "learning_rate": 0.0001552528626574299, "loss": 1.3739, "step": 17230 }, { "epoch": 0.2239089862144132, "grad_norm": 0.32194235920906067, "learning_rate": 0.00015525026319551855, "loss": 1.4845, "step": 17231 }, { "epoch": 0.2239219807583291, "grad_norm": 0.3668786585330963, "learning_rate": 0.00015524766373360715, "loss": 1.3907, "step": 17232 }, { "epoch": 0.22393497530224496, "grad_norm": 0.40856194496154785, "learning_rate": 0.00015524506427169575, "loss": 1.3633, "step": 17233 }, { "epoch": 0.22394796984616083, "grad_norm": 0.38559821248054504, "learning_rate": 0.00015524246480978437, "loss": 1.3355, "step": 17234 }, { "epoch": 0.2239609643900767, "grad_norm": 0.44653359055519104, "learning_rate": 0.000155239865347873, "loss": 1.4567, "step": 17235 }, { "epoch": 0.22397395893399258, "grad_norm": 0.3987303376197815, "learning_rate": 0.00015523726588596162, "loss": 1.4833, "step": 17236 }, { "epoch": 0.22398695347790845, "grad_norm": 0.43988803029060364, "learning_rate": 0.00015523466642405022, "loss": 1.4035, "step": 17237 }, { "epoch": 0.22399994802182432, "grad_norm": 0.40534988045692444, "learning_rate": 0.00015523206696213884, "loss": 1.3272, "step": 17238 }, { "epoch": 0.2240129425657402, "grad_norm": 0.3910040855407715, "learning_rate": 0.00015522946750022747, "loss": 1.4318, "step": 17239 }, { "epoch": 0.22402593710965607, "grad_norm": 0.3395826816558838, "learning_rate": 0.00015522686803831607, "loss": 1.5333, "step": 17240 }, { "epoch": 0.22403893165357194, "grad_norm": 0.3173440396785736, "learning_rate": 0.0001552242685764047, "loss": 1.4444, "step": 17241 }, { "epoch": 0.22405192619748782, "grad_norm": 0.3964594304561615, "learning_rate": 0.00015522166911449331, "loss": 1.2549, "step": 17242 }, { "epoch": 0.22406492074140372, "grad_norm": 0.3530697822570801, "learning_rate": 0.00015521906965258194, "loss": 1.4242, "step": 17243 }, { "epoch": 0.2240779152853196, "grad_norm": 0.4321229159832001, "learning_rate": 0.00015521647019067054, "loss": 1.604, "step": 17244 }, { "epoch": 0.22409090982923546, "grad_norm": 0.4241045415401459, "learning_rate": 0.00015521387072875913, "loss": 1.3933, "step": 17245 }, { "epoch": 0.22410390437315134, "grad_norm": 0.4098696708679199, "learning_rate": 0.00015521127126684779, "loss": 1.6097, "step": 17246 }, { "epoch": 0.2241168989170672, "grad_norm": 0.33041030168533325, "learning_rate": 0.00015520867180493638, "loss": 1.4669, "step": 17247 }, { "epoch": 0.22412989346098308, "grad_norm": 0.4452991187572479, "learning_rate": 0.000155206072343025, "loss": 1.3393, "step": 17248 }, { "epoch": 0.22414288800489895, "grad_norm": 0.3844544291496277, "learning_rate": 0.0001552034728811136, "loss": 1.3795, "step": 17249 }, { "epoch": 0.22415588254881483, "grad_norm": 0.43733423948287964, "learning_rate": 0.00015520087341920223, "loss": 1.4092, "step": 17250 }, { "epoch": 0.2241688770927307, "grad_norm": 0.41477397084236145, "learning_rate": 0.00015519827395729085, "loss": 1.376, "step": 17251 }, { "epoch": 0.22418187163664657, "grad_norm": 0.4083769619464874, "learning_rate": 0.00015519567449537945, "loss": 1.4334, "step": 17252 }, { "epoch": 0.22419486618056245, "grad_norm": 0.4906991720199585, "learning_rate": 0.00015519307503346808, "loss": 1.2715, "step": 17253 }, { "epoch": 0.22420786072447832, "grad_norm": 0.40970197319984436, "learning_rate": 0.0001551904755715567, "loss": 1.4685, "step": 17254 }, { "epoch": 0.2242208552683942, "grad_norm": 0.3219669461250305, "learning_rate": 0.00015518787610964532, "loss": 1.2984, "step": 17255 }, { "epoch": 0.22423384981231007, "grad_norm": 0.4801519513130188, "learning_rate": 0.00015518527664773392, "loss": 1.4003, "step": 17256 }, { "epoch": 0.22424684435622594, "grad_norm": 0.42577916383743286, "learning_rate": 0.00015518267718582255, "loss": 1.4536, "step": 17257 }, { "epoch": 0.2242598389001418, "grad_norm": 0.34381628036499023, "learning_rate": 0.00015518007772391117, "loss": 1.4981, "step": 17258 }, { "epoch": 0.22427283344405768, "grad_norm": 0.4634552001953125, "learning_rate": 0.00015517747826199977, "loss": 1.3198, "step": 17259 }, { "epoch": 0.22428582798797356, "grad_norm": 0.3238271176815033, "learning_rate": 0.0001551748788000884, "loss": 1.3033, "step": 17260 }, { "epoch": 0.22429882253188943, "grad_norm": 0.4381254017353058, "learning_rate": 0.000155172279338177, "loss": 1.3442, "step": 17261 }, { "epoch": 0.2243118170758053, "grad_norm": 0.3971615433692932, "learning_rate": 0.00015516967987626561, "loss": 1.453, "step": 17262 }, { "epoch": 0.22432481161972118, "grad_norm": 0.4603961110115051, "learning_rate": 0.00015516708041435424, "loss": 1.5408, "step": 17263 }, { "epoch": 0.22433780616363705, "grad_norm": 0.3853374123573303, "learning_rate": 0.00015516448095244284, "loss": 1.4496, "step": 17264 }, { "epoch": 0.22435080070755292, "grad_norm": 0.4377438724040985, "learning_rate": 0.00015516188149053146, "loss": 1.3491, "step": 17265 }, { "epoch": 0.2243637952514688, "grad_norm": 0.3321067690849304, "learning_rate": 0.00015515928202862009, "loss": 1.3057, "step": 17266 }, { "epoch": 0.22437678979538467, "grad_norm": 0.4278276860713959, "learning_rate": 0.0001551566825667087, "loss": 1.3219, "step": 17267 }, { "epoch": 0.22438978433930054, "grad_norm": 0.42417049407958984, "learning_rate": 0.0001551540831047973, "loss": 1.5954, "step": 17268 }, { "epoch": 0.2244027788832164, "grad_norm": 0.30943602323532104, "learning_rate": 0.00015515148364288593, "loss": 1.2925, "step": 17269 }, { "epoch": 0.2244157734271323, "grad_norm": 0.42065128684043884, "learning_rate": 0.00015514888418097456, "loss": 1.1031, "step": 17270 }, { "epoch": 0.22442876797104816, "grad_norm": 0.27368807792663574, "learning_rate": 0.00015514628471906315, "loss": 1.0893, "step": 17271 }, { "epoch": 0.22444176251496403, "grad_norm": 0.39994674921035767, "learning_rate": 0.00015514368525715178, "loss": 1.2647, "step": 17272 }, { "epoch": 0.2244547570588799, "grad_norm": 0.39597436785697937, "learning_rate": 0.00015514108579524038, "loss": 1.4117, "step": 17273 }, { "epoch": 0.22446775160279578, "grad_norm": 0.34959545731544495, "learning_rate": 0.00015513848633332903, "loss": 1.5133, "step": 17274 }, { "epoch": 0.22448074614671165, "grad_norm": 0.40860414505004883, "learning_rate": 0.00015513588687141762, "loss": 1.4046, "step": 17275 }, { "epoch": 0.22449374069062752, "grad_norm": 0.5460940599441528, "learning_rate": 0.00015513328740950622, "loss": 1.5928, "step": 17276 }, { "epoch": 0.2245067352345434, "grad_norm": 0.4546952545642853, "learning_rate": 0.00015513068794759487, "loss": 1.5112, "step": 17277 }, { "epoch": 0.22451972977845927, "grad_norm": 0.4209045171737671, "learning_rate": 0.00015512808848568347, "loss": 1.3803, "step": 17278 }, { "epoch": 0.22453272432237514, "grad_norm": 0.36467427015304565, "learning_rate": 0.0001551254890237721, "loss": 1.3589, "step": 17279 }, { "epoch": 0.22454571886629102, "grad_norm": 0.28918224573135376, "learning_rate": 0.0001551228895618607, "loss": 1.1951, "step": 17280 }, { "epoch": 0.2245587134102069, "grad_norm": 0.2822990417480469, "learning_rate": 0.00015512029009994932, "loss": 1.2312, "step": 17281 }, { "epoch": 0.22457170795412276, "grad_norm": 0.38115745782852173, "learning_rate": 0.00015511769063803794, "loss": 1.4074, "step": 17282 }, { "epoch": 0.22458470249803864, "grad_norm": 0.31620487570762634, "learning_rate": 0.00015511509117612654, "loss": 1.4553, "step": 17283 }, { "epoch": 0.2245976970419545, "grad_norm": 0.5735006332397461, "learning_rate": 0.00015511249171421516, "loss": 1.4612, "step": 17284 }, { "epoch": 0.22461069158587038, "grad_norm": 0.41778677701950073, "learning_rate": 0.0001551098922523038, "loss": 1.48, "step": 17285 }, { "epoch": 0.22462368612978625, "grad_norm": 0.39154380559921265, "learning_rate": 0.0001551072927903924, "loss": 1.1666, "step": 17286 }, { "epoch": 0.22463668067370213, "grad_norm": 0.5108563303947449, "learning_rate": 0.000155104693328481, "loss": 1.4689, "step": 17287 }, { "epoch": 0.224649675217618, "grad_norm": 0.34346339106559753, "learning_rate": 0.0001551020938665696, "loss": 1.2787, "step": 17288 }, { "epoch": 0.22466266976153387, "grad_norm": 0.406199187040329, "learning_rate": 0.00015509949440465826, "loss": 1.4693, "step": 17289 }, { "epoch": 0.22467566430544975, "grad_norm": 0.46047765016555786, "learning_rate": 0.00015509689494274686, "loss": 1.6481, "step": 17290 }, { "epoch": 0.22468865884936562, "grad_norm": 0.44907137751579285, "learning_rate": 0.00015509429548083548, "loss": 1.3559, "step": 17291 }, { "epoch": 0.2247016533932815, "grad_norm": 0.45858585834503174, "learning_rate": 0.00015509169601892408, "loss": 1.4427, "step": 17292 }, { "epoch": 0.22471464793719736, "grad_norm": 0.3390357792377472, "learning_rate": 0.0001550890965570127, "loss": 1.3068, "step": 17293 }, { "epoch": 0.22472764248111324, "grad_norm": 0.39388394355773926, "learning_rate": 0.00015508649709510133, "loss": 1.5405, "step": 17294 }, { "epoch": 0.2247406370250291, "grad_norm": 0.3702054023742676, "learning_rate": 0.00015508389763318992, "loss": 1.2752, "step": 17295 }, { "epoch": 0.22475363156894498, "grad_norm": 0.383247435092926, "learning_rate": 0.00015508129817127855, "loss": 1.4986, "step": 17296 }, { "epoch": 0.22476662611286086, "grad_norm": 0.42690354585647583, "learning_rate": 0.00015507869870936717, "loss": 1.4135, "step": 17297 }, { "epoch": 0.22477962065677673, "grad_norm": 0.39397284388542175, "learning_rate": 0.0001550760992474558, "loss": 1.3382, "step": 17298 }, { "epoch": 0.2247926152006926, "grad_norm": 0.460771381855011, "learning_rate": 0.0001550734997855444, "loss": 1.3601, "step": 17299 }, { "epoch": 0.22480560974460848, "grad_norm": 0.3621922433376312, "learning_rate": 0.000155070900323633, "loss": 1.2357, "step": 17300 }, { "epoch": 0.22481860428852435, "grad_norm": 0.43352898955345154, "learning_rate": 0.00015506830086172164, "loss": 1.4183, "step": 17301 }, { "epoch": 0.22483159883244022, "grad_norm": 0.45269906520843506, "learning_rate": 0.00015506570139981024, "loss": 1.3568, "step": 17302 }, { "epoch": 0.2248445933763561, "grad_norm": 0.508004903793335, "learning_rate": 0.00015506310193789887, "loss": 1.3575, "step": 17303 }, { "epoch": 0.22485758792027197, "grad_norm": 0.42237740755081177, "learning_rate": 0.00015506050247598746, "loss": 1.4219, "step": 17304 }, { "epoch": 0.22487058246418784, "grad_norm": 0.3603668510913849, "learning_rate": 0.0001550579030140761, "loss": 1.4793, "step": 17305 }, { "epoch": 0.2248835770081037, "grad_norm": 0.43718016147613525, "learning_rate": 0.0001550553035521647, "loss": 1.4276, "step": 17306 }, { "epoch": 0.2248965715520196, "grad_norm": 0.36309847235679626, "learning_rate": 0.0001550527040902533, "loss": 1.4366, "step": 17307 }, { "epoch": 0.22490956609593546, "grad_norm": 0.37834012508392334, "learning_rate": 0.00015505010462834193, "loss": 1.393, "step": 17308 }, { "epoch": 0.22492256063985133, "grad_norm": 0.3192480802536011, "learning_rate": 0.00015504750516643056, "loss": 1.2919, "step": 17309 }, { "epoch": 0.2249355551837672, "grad_norm": 0.4102786183357239, "learning_rate": 0.00015504490570451918, "loss": 1.5319, "step": 17310 }, { "epoch": 0.22494854972768308, "grad_norm": 0.42493653297424316, "learning_rate": 0.00015504230624260778, "loss": 1.2968, "step": 17311 }, { "epoch": 0.22496154427159895, "grad_norm": 0.41623684763908386, "learning_rate": 0.0001550397067806964, "loss": 1.5455, "step": 17312 }, { "epoch": 0.22497453881551482, "grad_norm": 0.295663058757782, "learning_rate": 0.00015503710731878503, "loss": 1.2794, "step": 17313 }, { "epoch": 0.2249875333594307, "grad_norm": 0.4800005257129669, "learning_rate": 0.00015503450785687363, "loss": 1.4411, "step": 17314 }, { "epoch": 0.22500052790334657, "grad_norm": 0.4075014293193817, "learning_rate": 0.00015503190839496225, "loss": 1.1826, "step": 17315 }, { "epoch": 0.22501352244726244, "grad_norm": 0.40996208786964417, "learning_rate": 0.00015502930893305088, "loss": 1.3666, "step": 17316 }, { "epoch": 0.22502651699117832, "grad_norm": 0.385638028383255, "learning_rate": 0.00015502670947113947, "loss": 1.4855, "step": 17317 }, { "epoch": 0.2250395115350942, "grad_norm": 0.35620033740997314, "learning_rate": 0.0001550241100092281, "loss": 1.4412, "step": 17318 }, { "epoch": 0.22505250607901006, "grad_norm": 0.40918657183647156, "learning_rate": 0.0001550215105473167, "loss": 1.5307, "step": 17319 }, { "epoch": 0.22506550062292596, "grad_norm": 0.36366400122642517, "learning_rate": 0.00015501891108540535, "loss": 1.4665, "step": 17320 }, { "epoch": 0.22507849516684184, "grad_norm": 0.3885035812854767, "learning_rate": 0.00015501631162349394, "loss": 1.3861, "step": 17321 }, { "epoch": 0.2250914897107577, "grad_norm": 0.4904419183731079, "learning_rate": 0.00015501371216158257, "loss": 1.4437, "step": 17322 }, { "epoch": 0.22510448425467358, "grad_norm": 0.34057196974754333, "learning_rate": 0.00015501111269967117, "loss": 1.2526, "step": 17323 }, { "epoch": 0.22511747879858945, "grad_norm": 0.4373798072338104, "learning_rate": 0.0001550085132377598, "loss": 1.395, "step": 17324 }, { "epoch": 0.22513047334250533, "grad_norm": 0.45585834980010986, "learning_rate": 0.00015500591377584842, "loss": 1.5725, "step": 17325 }, { "epoch": 0.2251434678864212, "grad_norm": 0.3861900866031647, "learning_rate": 0.000155003314313937, "loss": 1.4385, "step": 17326 }, { "epoch": 0.22515646243033707, "grad_norm": 0.47984573245048523, "learning_rate": 0.00015500071485202564, "loss": 1.5295, "step": 17327 }, { "epoch": 0.22516945697425295, "grad_norm": 0.43926528096199036, "learning_rate": 0.00015499811539011426, "loss": 1.6313, "step": 17328 }, { "epoch": 0.22518245151816882, "grad_norm": 0.44269147515296936, "learning_rate": 0.00015499551592820286, "loss": 1.6328, "step": 17329 }, { "epoch": 0.2251954460620847, "grad_norm": 0.410132497549057, "learning_rate": 0.00015499291646629148, "loss": 1.428, "step": 17330 }, { "epoch": 0.22520844060600057, "grad_norm": 0.38601186871528625, "learning_rate": 0.00015499031700438008, "loss": 1.3893, "step": 17331 }, { "epoch": 0.22522143514991644, "grad_norm": 0.32521939277648926, "learning_rate": 0.00015498771754246873, "loss": 1.2513, "step": 17332 }, { "epoch": 0.2252344296938323, "grad_norm": 0.5738088488578796, "learning_rate": 0.00015498511808055733, "loss": 1.5438, "step": 17333 }, { "epoch": 0.22524742423774818, "grad_norm": 0.3105775713920593, "learning_rate": 0.00015498251861864595, "loss": 1.234, "step": 17334 }, { "epoch": 0.22526041878166406, "grad_norm": 0.41933226585388184, "learning_rate": 0.00015497991915673455, "loss": 1.6983, "step": 17335 }, { "epoch": 0.22527341332557993, "grad_norm": 0.37573865056037903, "learning_rate": 0.00015497731969482318, "loss": 1.2763, "step": 17336 }, { "epoch": 0.2252864078694958, "grad_norm": 0.4303555488586426, "learning_rate": 0.0001549747202329118, "loss": 1.429, "step": 17337 }, { "epoch": 0.22529940241341168, "grad_norm": 0.43803417682647705, "learning_rate": 0.0001549721207710004, "loss": 1.4104, "step": 17338 }, { "epoch": 0.22531239695732755, "grad_norm": 0.33352023363113403, "learning_rate": 0.00015496952130908902, "loss": 1.406, "step": 17339 }, { "epoch": 0.22532539150124342, "grad_norm": 0.4034109115600586, "learning_rate": 0.00015496692184717765, "loss": 1.5581, "step": 17340 }, { "epoch": 0.2253383860451593, "grad_norm": 0.2970642149448395, "learning_rate": 0.00015496432238526627, "loss": 1.3252, "step": 17341 }, { "epoch": 0.22535138058907517, "grad_norm": 0.3742406964302063, "learning_rate": 0.00015496172292335487, "loss": 1.4519, "step": 17342 }, { "epoch": 0.22536437513299104, "grad_norm": 0.38583990931510925, "learning_rate": 0.00015495912346144347, "loss": 1.2261, "step": 17343 }, { "epoch": 0.2253773696769069, "grad_norm": 0.3115047812461853, "learning_rate": 0.00015495652399953212, "loss": 1.3512, "step": 17344 }, { "epoch": 0.2253903642208228, "grad_norm": 0.3753218352794647, "learning_rate": 0.00015495392453762072, "loss": 1.4042, "step": 17345 }, { "epoch": 0.22540335876473866, "grad_norm": 0.4485861361026764, "learning_rate": 0.00015495132507570934, "loss": 1.4968, "step": 17346 }, { "epoch": 0.22541635330865453, "grad_norm": 0.35710158944129944, "learning_rate": 0.00015494872561379794, "loss": 1.2975, "step": 17347 }, { "epoch": 0.2254293478525704, "grad_norm": 0.3344789743423462, "learning_rate": 0.00015494612615188656, "loss": 1.4141, "step": 17348 }, { "epoch": 0.22544234239648628, "grad_norm": 0.44867363572120667, "learning_rate": 0.00015494352668997519, "loss": 1.5023, "step": 17349 }, { "epoch": 0.22545533694040215, "grad_norm": 0.4031669795513153, "learning_rate": 0.00015494092722806378, "loss": 1.2803, "step": 17350 }, { "epoch": 0.22546833148431802, "grad_norm": 0.4371286928653717, "learning_rate": 0.00015493832776615243, "loss": 1.3404, "step": 17351 }, { "epoch": 0.2254813260282339, "grad_norm": 0.4148988723754883, "learning_rate": 0.00015493572830424103, "loss": 1.5122, "step": 17352 }, { "epoch": 0.22549432057214977, "grad_norm": 0.3802322447299957, "learning_rate": 0.00015493312884232966, "loss": 1.4035, "step": 17353 }, { "epoch": 0.22550731511606564, "grad_norm": 0.4195025563240051, "learning_rate": 0.00015493052938041825, "loss": 1.4206, "step": 17354 }, { "epoch": 0.22552030965998152, "grad_norm": 0.40878820419311523, "learning_rate": 0.00015492792991850688, "loss": 1.4791, "step": 17355 }, { "epoch": 0.2255333042038974, "grad_norm": 0.41589826345443726, "learning_rate": 0.0001549253304565955, "loss": 1.3459, "step": 17356 }, { "epoch": 0.22554629874781326, "grad_norm": 0.44983240962028503, "learning_rate": 0.0001549227309946841, "loss": 1.5836, "step": 17357 }, { "epoch": 0.22555929329172913, "grad_norm": 0.43190452456474304, "learning_rate": 0.00015492013153277273, "loss": 1.4565, "step": 17358 }, { "epoch": 0.225572287835645, "grad_norm": 0.46866104006767273, "learning_rate": 0.00015491753207086135, "loss": 1.4345, "step": 17359 }, { "epoch": 0.22558528237956088, "grad_norm": 0.38252729177474976, "learning_rate": 0.00015491493260894995, "loss": 1.4523, "step": 17360 }, { "epoch": 0.22559827692347675, "grad_norm": 0.4475078880786896, "learning_rate": 0.00015491233314703857, "loss": 1.5079, "step": 17361 }, { "epoch": 0.22561127146739263, "grad_norm": 0.3892700672149658, "learning_rate": 0.00015490973368512717, "loss": 1.4031, "step": 17362 }, { "epoch": 0.2256242660113085, "grad_norm": 0.34709739685058594, "learning_rate": 0.00015490713422321582, "loss": 1.3772, "step": 17363 }, { "epoch": 0.22563726055522437, "grad_norm": 0.47149357199668884, "learning_rate": 0.00015490453476130442, "loss": 1.5625, "step": 17364 }, { "epoch": 0.22565025509914025, "grad_norm": 0.2849879860877991, "learning_rate": 0.00015490193529939304, "loss": 1.4355, "step": 17365 }, { "epoch": 0.22566324964305612, "grad_norm": 0.3187899887561798, "learning_rate": 0.00015489933583748164, "loss": 1.2053, "step": 17366 }, { "epoch": 0.225676244186972, "grad_norm": 0.43930551409721375, "learning_rate": 0.00015489673637557026, "loss": 1.4115, "step": 17367 }, { "epoch": 0.22568923873088786, "grad_norm": 0.2609654664993286, "learning_rate": 0.0001548941369136589, "loss": 1.2889, "step": 17368 }, { "epoch": 0.22570223327480374, "grad_norm": 0.29785192012786865, "learning_rate": 0.00015489153745174749, "loss": 1.2689, "step": 17369 }, { "epoch": 0.2257152278187196, "grad_norm": 0.424932062625885, "learning_rate": 0.0001548889379898361, "loss": 1.3087, "step": 17370 }, { "epoch": 0.22572822236263548, "grad_norm": 0.4638703763484955, "learning_rate": 0.00015488633852792473, "loss": 1.5136, "step": 17371 }, { "epoch": 0.22574121690655136, "grad_norm": 0.4499339461326599, "learning_rate": 0.00015488373906601333, "loss": 1.4435, "step": 17372 }, { "epoch": 0.22575421145046723, "grad_norm": 0.3429933190345764, "learning_rate": 0.00015488113960410196, "loss": 1.4743, "step": 17373 }, { "epoch": 0.2257672059943831, "grad_norm": 0.41518691182136536, "learning_rate": 0.00015487854014219055, "loss": 1.5183, "step": 17374 }, { "epoch": 0.22578020053829898, "grad_norm": 0.39178723096847534, "learning_rate": 0.0001548759406802792, "loss": 1.4559, "step": 17375 }, { "epoch": 0.22579319508221485, "grad_norm": 0.47018399834632874, "learning_rate": 0.0001548733412183678, "loss": 1.512, "step": 17376 }, { "epoch": 0.22580618962613072, "grad_norm": 0.33574777841567993, "learning_rate": 0.00015487074175645643, "loss": 1.4443, "step": 17377 }, { "epoch": 0.2258191841700466, "grad_norm": 0.32084521651268005, "learning_rate": 0.00015486814229454502, "loss": 1.2799, "step": 17378 }, { "epoch": 0.22583217871396247, "grad_norm": 0.38468995690345764, "learning_rate": 0.00015486554283263365, "loss": 1.3267, "step": 17379 }, { "epoch": 0.22584517325787834, "grad_norm": 0.43420401215553284, "learning_rate": 0.00015486294337072227, "loss": 1.3912, "step": 17380 }, { "epoch": 0.2258581678017942, "grad_norm": 0.4336622953414917, "learning_rate": 0.00015486034390881087, "loss": 1.3389, "step": 17381 }, { "epoch": 0.22587116234571009, "grad_norm": 0.4239758253097534, "learning_rate": 0.0001548577444468995, "loss": 1.3689, "step": 17382 }, { "epoch": 0.22588415688962596, "grad_norm": 0.31912335753440857, "learning_rate": 0.00015485514498498812, "loss": 1.1682, "step": 17383 }, { "epoch": 0.22589715143354183, "grad_norm": 0.342845618724823, "learning_rate": 0.00015485254552307672, "loss": 1.4299, "step": 17384 }, { "epoch": 0.2259101459774577, "grad_norm": 0.2734147608280182, "learning_rate": 0.00015484994606116534, "loss": 1.1489, "step": 17385 }, { "epoch": 0.22592314052137358, "grad_norm": 0.4070376753807068, "learning_rate": 0.00015484734659925397, "loss": 1.491, "step": 17386 }, { "epoch": 0.22593613506528945, "grad_norm": 0.4500395953655243, "learning_rate": 0.0001548447471373426, "loss": 1.2911, "step": 17387 }, { "epoch": 0.22594912960920532, "grad_norm": 0.3489089906215668, "learning_rate": 0.0001548421476754312, "loss": 1.4734, "step": 17388 }, { "epoch": 0.2259621241531212, "grad_norm": 0.3487343192100525, "learning_rate": 0.0001548395482135198, "loss": 1.5562, "step": 17389 }, { "epoch": 0.22597511869703707, "grad_norm": 0.4250304698944092, "learning_rate": 0.00015483694875160844, "loss": 1.2573, "step": 17390 }, { "epoch": 0.22598811324095294, "grad_norm": 0.43247562646865845, "learning_rate": 0.00015483434928969703, "loss": 1.4344, "step": 17391 }, { "epoch": 0.22600110778486882, "grad_norm": 0.47394558787345886, "learning_rate": 0.00015483174982778566, "loss": 1.3823, "step": 17392 }, { "epoch": 0.2260141023287847, "grad_norm": 0.3615734875202179, "learning_rate": 0.00015482915036587426, "loss": 1.4962, "step": 17393 }, { "epoch": 0.22602709687270056, "grad_norm": 0.37008240818977356, "learning_rate": 0.0001548265509039629, "loss": 1.3689, "step": 17394 }, { "epoch": 0.22604009141661643, "grad_norm": 0.47572454810142517, "learning_rate": 0.0001548239514420515, "loss": 1.4971, "step": 17395 }, { "epoch": 0.22605308596053234, "grad_norm": 0.47684207558631897, "learning_rate": 0.00015482135198014013, "loss": 1.4241, "step": 17396 }, { "epoch": 0.2260660805044482, "grad_norm": 0.42600762844085693, "learning_rate": 0.00015481875251822873, "loss": 1.4819, "step": 17397 }, { "epoch": 0.22607907504836408, "grad_norm": 0.44199568033218384, "learning_rate": 0.00015481615305631735, "loss": 1.359, "step": 17398 }, { "epoch": 0.22609206959227995, "grad_norm": 0.4367537200450897, "learning_rate": 0.00015481355359440598, "loss": 1.4672, "step": 17399 }, { "epoch": 0.22610506413619583, "grad_norm": 0.38594746589660645, "learning_rate": 0.00015481095413249457, "loss": 1.3857, "step": 17400 }, { "epoch": 0.2261180586801117, "grad_norm": 0.47841691970825195, "learning_rate": 0.0001548083546705832, "loss": 1.5825, "step": 17401 }, { "epoch": 0.22613105322402757, "grad_norm": 0.42805933952331543, "learning_rate": 0.00015480575520867182, "loss": 1.565, "step": 17402 }, { "epoch": 0.22614404776794345, "grad_norm": 0.4200296700000763, "learning_rate": 0.00015480315574676042, "loss": 1.4444, "step": 17403 }, { "epoch": 0.22615704231185932, "grad_norm": 0.39058685302734375, "learning_rate": 0.00015480055628484904, "loss": 1.4195, "step": 17404 }, { "epoch": 0.2261700368557752, "grad_norm": 0.40262678265571594, "learning_rate": 0.00015479795682293764, "loss": 1.3986, "step": 17405 }, { "epoch": 0.22618303139969106, "grad_norm": 0.3661644756793976, "learning_rate": 0.0001547953573610263, "loss": 1.4036, "step": 17406 }, { "epoch": 0.22619602594360694, "grad_norm": 0.34374576807022095, "learning_rate": 0.0001547927578991149, "loss": 1.3049, "step": 17407 }, { "epoch": 0.2262090204875228, "grad_norm": 0.48040637373924255, "learning_rate": 0.00015479015843720352, "loss": 1.406, "step": 17408 }, { "epoch": 0.22622201503143868, "grad_norm": 0.3728158175945282, "learning_rate": 0.0001547875589752921, "loss": 1.535, "step": 17409 }, { "epoch": 0.22623500957535456, "grad_norm": 0.3528313636779785, "learning_rate": 0.00015478495951338074, "loss": 1.2917, "step": 17410 }, { "epoch": 0.22624800411927043, "grad_norm": 0.39543795585632324, "learning_rate": 0.00015478236005146936, "loss": 1.4908, "step": 17411 }, { "epoch": 0.2262609986631863, "grad_norm": 0.37769341468811035, "learning_rate": 0.00015477976058955796, "loss": 1.3956, "step": 17412 }, { "epoch": 0.22627399320710218, "grad_norm": 0.441013365983963, "learning_rate": 0.00015477716112764658, "loss": 1.3879, "step": 17413 }, { "epoch": 0.22628698775101805, "grad_norm": 0.5277686715126038, "learning_rate": 0.0001547745616657352, "loss": 1.4989, "step": 17414 }, { "epoch": 0.22629998229493392, "grad_norm": 0.42173245549201965, "learning_rate": 0.0001547719622038238, "loss": 1.3407, "step": 17415 }, { "epoch": 0.2263129768388498, "grad_norm": 0.33099690079689026, "learning_rate": 0.00015476936274191243, "loss": 1.1165, "step": 17416 }, { "epoch": 0.22632597138276567, "grad_norm": 0.47431480884552, "learning_rate": 0.00015476676328000103, "loss": 1.4413, "step": 17417 }, { "epoch": 0.22633896592668154, "grad_norm": 0.41168826818466187, "learning_rate": 0.00015476416381808968, "loss": 1.4841, "step": 17418 }, { "epoch": 0.2263519604705974, "grad_norm": 0.2497444450855255, "learning_rate": 0.00015476156435617828, "loss": 1.1803, "step": 17419 }, { "epoch": 0.22636495501451329, "grad_norm": 0.38342034816741943, "learning_rate": 0.0001547589648942669, "loss": 1.5575, "step": 17420 }, { "epoch": 0.22637794955842916, "grad_norm": 0.45156100392341614, "learning_rate": 0.0001547563654323555, "loss": 1.3754, "step": 17421 }, { "epoch": 0.22639094410234503, "grad_norm": 0.418491005897522, "learning_rate": 0.00015475376597044412, "loss": 1.3967, "step": 17422 }, { "epoch": 0.2264039386462609, "grad_norm": 0.34283629059791565, "learning_rate": 0.00015475116650853275, "loss": 1.6174, "step": 17423 }, { "epoch": 0.22641693319017678, "grad_norm": 0.38933736085891724, "learning_rate": 0.00015474856704662134, "loss": 1.3978, "step": 17424 }, { "epoch": 0.22642992773409265, "grad_norm": 0.46384942531585693, "learning_rate": 0.00015474596758471, "loss": 1.3459, "step": 17425 }, { "epoch": 0.22644292227800852, "grad_norm": 0.3376656472682953, "learning_rate": 0.0001547433681227986, "loss": 1.3114, "step": 17426 }, { "epoch": 0.2264559168219244, "grad_norm": 0.3847888112068176, "learning_rate": 0.0001547407686608872, "loss": 1.3687, "step": 17427 }, { "epoch": 0.22646891136584027, "grad_norm": 0.34971368312835693, "learning_rate": 0.00015473816919897582, "loss": 1.6343, "step": 17428 }, { "epoch": 0.22648190590975614, "grad_norm": 0.4429413378238678, "learning_rate": 0.00015473556973706444, "loss": 1.5323, "step": 17429 }, { "epoch": 0.22649490045367202, "grad_norm": 0.38012903928756714, "learning_rate": 0.00015473297027515306, "loss": 1.3206, "step": 17430 }, { "epoch": 0.2265078949975879, "grad_norm": 0.44928067922592163, "learning_rate": 0.00015473037081324166, "loss": 1.6097, "step": 17431 }, { "epoch": 0.22652088954150376, "grad_norm": 0.34846264123916626, "learning_rate": 0.00015472777135133029, "loss": 1.3545, "step": 17432 }, { "epoch": 0.22653388408541963, "grad_norm": 0.4594947099685669, "learning_rate": 0.0001547251718894189, "loss": 1.5832, "step": 17433 }, { "epoch": 0.2265468786293355, "grad_norm": 0.36072802543640137, "learning_rate": 0.0001547225724275075, "loss": 1.5538, "step": 17434 }, { "epoch": 0.22655987317325138, "grad_norm": 0.40906375646591187, "learning_rate": 0.00015471997296559613, "loss": 1.5471, "step": 17435 }, { "epoch": 0.22657286771716725, "grad_norm": 0.3710382878780365, "learning_rate": 0.00015471737350368473, "loss": 1.4535, "step": 17436 }, { "epoch": 0.22658586226108313, "grad_norm": 0.3967141807079315, "learning_rate": 0.00015471477404177338, "loss": 1.3244, "step": 17437 }, { "epoch": 0.226598856804999, "grad_norm": 0.45705223083496094, "learning_rate": 0.00015471217457986198, "loss": 1.5352, "step": 17438 }, { "epoch": 0.22661185134891487, "grad_norm": 0.5221951603889465, "learning_rate": 0.00015470957511795058, "loss": 1.4655, "step": 17439 }, { "epoch": 0.22662484589283075, "grad_norm": 0.38480231165885925, "learning_rate": 0.0001547069756560392, "loss": 1.3329, "step": 17440 }, { "epoch": 0.22663784043674662, "grad_norm": 0.45903274416923523, "learning_rate": 0.00015470437619412783, "loss": 1.3116, "step": 17441 }, { "epoch": 0.2266508349806625, "grad_norm": 0.40764319896698, "learning_rate": 0.00015470177673221645, "loss": 1.3762, "step": 17442 }, { "epoch": 0.22666382952457836, "grad_norm": 0.3567750155925751, "learning_rate": 0.00015469917727030505, "loss": 1.2855, "step": 17443 }, { "epoch": 0.22667682406849424, "grad_norm": 0.36940595507621765, "learning_rate": 0.00015469657780839367, "loss": 1.2901, "step": 17444 }, { "epoch": 0.2266898186124101, "grad_norm": 0.31514015793800354, "learning_rate": 0.0001546939783464823, "loss": 1.3736, "step": 17445 }, { "epoch": 0.22670281315632598, "grad_norm": 0.40365323424339294, "learning_rate": 0.0001546913788845709, "loss": 1.5605, "step": 17446 }, { "epoch": 0.22671580770024186, "grad_norm": 0.3547961115837097, "learning_rate": 0.00015468877942265952, "loss": 1.2786, "step": 17447 }, { "epoch": 0.22672880224415773, "grad_norm": 0.31444740295410156, "learning_rate": 0.00015468617996074812, "loss": 1.3187, "step": 17448 }, { "epoch": 0.2267417967880736, "grad_norm": 0.31617364287376404, "learning_rate": 0.00015468358049883677, "loss": 1.1888, "step": 17449 }, { "epoch": 0.22675479133198947, "grad_norm": 0.44486579298973083, "learning_rate": 0.00015468098103692536, "loss": 1.5871, "step": 17450 }, { "epoch": 0.22676778587590535, "grad_norm": 0.4003638029098511, "learning_rate": 0.00015467838157501396, "loss": 1.2373, "step": 17451 }, { "epoch": 0.22678078041982122, "grad_norm": 0.36715012788772583, "learning_rate": 0.00015467578211310259, "loss": 1.4776, "step": 17452 }, { "epoch": 0.2267937749637371, "grad_norm": 0.3751664161682129, "learning_rate": 0.0001546731826511912, "loss": 1.4395, "step": 17453 }, { "epoch": 0.22680676950765297, "grad_norm": 0.3138881027698517, "learning_rate": 0.00015467058318927984, "loss": 1.3384, "step": 17454 }, { "epoch": 0.22681976405156884, "grad_norm": 0.45714783668518066, "learning_rate": 0.00015466798372736843, "loss": 1.2495, "step": 17455 }, { "epoch": 0.2268327585954847, "grad_norm": 0.39608290791511536, "learning_rate": 0.00015466538426545706, "loss": 1.5277, "step": 17456 }, { "epoch": 0.22684575313940059, "grad_norm": 0.26731643080711365, "learning_rate": 0.00015466278480354568, "loss": 1.2756, "step": 17457 }, { "epoch": 0.22685874768331646, "grad_norm": 0.4738873839378357, "learning_rate": 0.00015466018534163428, "loss": 1.4794, "step": 17458 }, { "epoch": 0.22687174222723233, "grad_norm": 0.4773971736431122, "learning_rate": 0.0001546575858797229, "loss": 1.6204, "step": 17459 }, { "epoch": 0.2268847367711482, "grad_norm": 0.4836757779121399, "learning_rate": 0.00015465498641781153, "loss": 1.6049, "step": 17460 }, { "epoch": 0.22689773131506408, "grad_norm": 0.30799826979637146, "learning_rate": 0.00015465238695590015, "loss": 1.3359, "step": 17461 }, { "epoch": 0.22691072585897995, "grad_norm": 0.3993365168571472, "learning_rate": 0.00015464978749398875, "loss": 1.344, "step": 17462 }, { "epoch": 0.22692372040289582, "grad_norm": 0.38170790672302246, "learning_rate": 0.00015464718803207737, "loss": 1.5201, "step": 17463 }, { "epoch": 0.2269367149468117, "grad_norm": 0.36720049381256104, "learning_rate": 0.000154644588570166, "loss": 1.4012, "step": 17464 }, { "epoch": 0.22694970949072757, "grad_norm": 0.355109304189682, "learning_rate": 0.0001546419891082546, "loss": 1.4716, "step": 17465 }, { "epoch": 0.22696270403464344, "grad_norm": 0.38913825154304504, "learning_rate": 0.00015463938964634322, "loss": 1.4926, "step": 17466 }, { "epoch": 0.22697569857855932, "grad_norm": 0.32351431250572205, "learning_rate": 0.00015463679018443182, "loss": 1.4396, "step": 17467 }, { "epoch": 0.2269886931224752, "grad_norm": 0.33774125576019287, "learning_rate": 0.00015463419072252044, "loss": 1.3058, "step": 17468 }, { "epoch": 0.22700168766639106, "grad_norm": 0.35611337423324585, "learning_rate": 0.00015463159126060907, "loss": 1.3914, "step": 17469 }, { "epoch": 0.22701468221030693, "grad_norm": 0.32777202129364014, "learning_rate": 0.00015462899179869766, "loss": 1.489, "step": 17470 }, { "epoch": 0.2270276767542228, "grad_norm": 0.39417633414268494, "learning_rate": 0.0001546263923367863, "loss": 1.5456, "step": 17471 }, { "epoch": 0.2270406712981387, "grad_norm": 0.38823428750038147, "learning_rate": 0.0001546237928748749, "loss": 1.38, "step": 17472 }, { "epoch": 0.22705366584205458, "grad_norm": 0.41720306873321533, "learning_rate": 0.00015462119341296354, "loss": 1.4998, "step": 17473 }, { "epoch": 0.22706666038597045, "grad_norm": 0.260313481092453, "learning_rate": 0.00015461859395105214, "loss": 1.2702, "step": 17474 }, { "epoch": 0.22707965492988633, "grad_norm": 0.36505457758903503, "learning_rate": 0.00015461599448914076, "loss": 1.5749, "step": 17475 }, { "epoch": 0.2270926494738022, "grad_norm": 0.35256657004356384, "learning_rate": 0.00015461339502722938, "loss": 1.3065, "step": 17476 }, { "epoch": 0.22710564401771807, "grad_norm": 0.30015385150909424, "learning_rate": 0.00015461079556531798, "loss": 1.394, "step": 17477 }, { "epoch": 0.22711863856163395, "grad_norm": 0.4065621495246887, "learning_rate": 0.0001546081961034066, "loss": 1.3026, "step": 17478 }, { "epoch": 0.22713163310554982, "grad_norm": 0.3028584122657776, "learning_rate": 0.0001546055966414952, "loss": 1.2323, "step": 17479 }, { "epoch": 0.2271446276494657, "grad_norm": 0.37900277972221375, "learning_rate": 0.00015460299717958385, "loss": 1.1991, "step": 17480 }, { "epoch": 0.22715762219338156, "grad_norm": 0.3935201168060303, "learning_rate": 0.00015460039771767245, "loss": 1.3666, "step": 17481 }, { "epoch": 0.22717061673729744, "grad_norm": 0.450606107711792, "learning_rate": 0.00015459779825576105, "loss": 1.627, "step": 17482 }, { "epoch": 0.2271836112812133, "grad_norm": 0.4243600368499756, "learning_rate": 0.00015459519879384967, "loss": 1.4304, "step": 17483 }, { "epoch": 0.22719660582512918, "grad_norm": 0.40043914318084717, "learning_rate": 0.0001545925993319383, "loss": 1.5418, "step": 17484 }, { "epoch": 0.22720960036904506, "grad_norm": 0.4547288119792938, "learning_rate": 0.00015458999987002692, "loss": 1.4344, "step": 17485 }, { "epoch": 0.22722259491296093, "grad_norm": 0.3056755065917969, "learning_rate": 0.00015458740040811552, "loss": 1.2878, "step": 17486 }, { "epoch": 0.2272355894568768, "grad_norm": 0.4275365173816681, "learning_rate": 0.00015458480094620415, "loss": 1.3026, "step": 17487 }, { "epoch": 0.22724858400079267, "grad_norm": 0.4318055212497711, "learning_rate": 0.00015458220148429277, "loss": 1.3762, "step": 17488 }, { "epoch": 0.22726157854470855, "grad_norm": 0.36770716309547424, "learning_rate": 0.00015457960202238137, "loss": 1.4377, "step": 17489 }, { "epoch": 0.22727457308862442, "grad_norm": 0.4921204447746277, "learning_rate": 0.00015457700256047, "loss": 1.658, "step": 17490 }, { "epoch": 0.2272875676325403, "grad_norm": 0.36330509185791016, "learning_rate": 0.0001545744030985586, "loss": 1.3917, "step": 17491 }, { "epoch": 0.22730056217645617, "grad_norm": 0.48243260383605957, "learning_rate": 0.00015457180363664724, "loss": 1.5888, "step": 17492 }, { "epoch": 0.22731355672037204, "grad_norm": 0.4554900825023651, "learning_rate": 0.00015456920417473584, "loss": 1.4751, "step": 17493 }, { "epoch": 0.2273265512642879, "grad_norm": 0.3958800137042999, "learning_rate": 0.00015456660471282444, "loss": 1.4439, "step": 17494 }, { "epoch": 0.22733954580820379, "grad_norm": 0.3475806415081024, "learning_rate": 0.00015456400525091306, "loss": 1.3836, "step": 17495 }, { "epoch": 0.22735254035211966, "grad_norm": 0.3271598219871521, "learning_rate": 0.00015456140578900168, "loss": 1.3591, "step": 17496 }, { "epoch": 0.22736553489603553, "grad_norm": 0.4348144233226776, "learning_rate": 0.0001545588063270903, "loss": 1.4427, "step": 17497 }, { "epoch": 0.2273785294399514, "grad_norm": 0.4207206666469574, "learning_rate": 0.0001545562068651789, "loss": 1.3819, "step": 17498 }, { "epoch": 0.22739152398386728, "grad_norm": 0.4827212393283844, "learning_rate": 0.00015455360740326753, "loss": 1.5058, "step": 17499 }, { "epoch": 0.22740451852778315, "grad_norm": 0.4095722436904907, "learning_rate": 0.00015455100794135615, "loss": 1.3503, "step": 17500 }, { "epoch": 0.22741751307169902, "grad_norm": 0.4699098765850067, "learning_rate": 0.00015454840847944475, "loss": 1.3316, "step": 17501 }, { "epoch": 0.2274305076156149, "grad_norm": 0.3005622923374176, "learning_rate": 0.00015454580901753338, "loss": 1.5259, "step": 17502 }, { "epoch": 0.22744350215953077, "grad_norm": 0.33248934149742126, "learning_rate": 0.000154543209555622, "loss": 1.3527, "step": 17503 }, { "epoch": 0.22745649670344664, "grad_norm": 0.4927341341972351, "learning_rate": 0.00015454061009371063, "loss": 1.5616, "step": 17504 }, { "epoch": 0.22746949124736252, "grad_norm": 0.29577523469924927, "learning_rate": 0.00015453801063179922, "loss": 1.4328, "step": 17505 }, { "epoch": 0.2274824857912784, "grad_norm": 0.48556411266326904, "learning_rate": 0.00015453541116988782, "loss": 1.5196, "step": 17506 }, { "epoch": 0.22749548033519426, "grad_norm": 0.4814305603504181, "learning_rate": 0.00015453281170797647, "loss": 1.5006, "step": 17507 }, { "epoch": 0.22750847487911013, "grad_norm": 0.37607795000076294, "learning_rate": 0.00015453021224606507, "loss": 1.2757, "step": 17508 }, { "epoch": 0.227521469423026, "grad_norm": 0.448186993598938, "learning_rate": 0.0001545276127841537, "loss": 1.2001, "step": 17509 }, { "epoch": 0.22753446396694188, "grad_norm": 0.33712536096572876, "learning_rate": 0.0001545250133222423, "loss": 1.6128, "step": 17510 }, { "epoch": 0.22754745851085775, "grad_norm": 0.3859885632991791, "learning_rate": 0.00015452241386033092, "loss": 1.5175, "step": 17511 }, { "epoch": 0.22756045305477363, "grad_norm": 0.440922349691391, "learning_rate": 0.00015451981439841954, "loss": 1.5203, "step": 17512 }, { "epoch": 0.2275734475986895, "grad_norm": 0.4625594913959503, "learning_rate": 0.00015451721493650814, "loss": 1.651, "step": 17513 }, { "epoch": 0.22758644214260537, "grad_norm": 0.3439292311668396, "learning_rate": 0.00015451461547459676, "loss": 1.3172, "step": 17514 }, { "epoch": 0.22759943668652124, "grad_norm": 0.47682440280914307, "learning_rate": 0.0001545120160126854, "loss": 1.4806, "step": 17515 }, { "epoch": 0.22761243123043712, "grad_norm": 0.30367887020111084, "learning_rate": 0.000154509416550774, "loss": 1.4158, "step": 17516 }, { "epoch": 0.227625425774353, "grad_norm": 0.491248220205307, "learning_rate": 0.0001545068170888626, "loss": 1.3398, "step": 17517 }, { "epoch": 0.22763842031826886, "grad_norm": 0.3286205530166626, "learning_rate": 0.00015450421762695123, "loss": 1.2543, "step": 17518 }, { "epoch": 0.22765141486218474, "grad_norm": 0.4323371648788452, "learning_rate": 0.00015450161816503986, "loss": 1.6399, "step": 17519 }, { "epoch": 0.2276644094061006, "grad_norm": 0.35500413179397583, "learning_rate": 0.00015449901870312845, "loss": 1.3036, "step": 17520 }, { "epoch": 0.22767740395001648, "grad_norm": 0.2753719091415405, "learning_rate": 0.00015449641924121708, "loss": 1.2633, "step": 17521 }, { "epoch": 0.22769039849393236, "grad_norm": 0.41145145893096924, "learning_rate": 0.00015449381977930568, "loss": 1.4671, "step": 17522 }, { "epoch": 0.22770339303784823, "grad_norm": 0.35795196890830994, "learning_rate": 0.0001544912203173943, "loss": 1.2313, "step": 17523 }, { "epoch": 0.2277163875817641, "grad_norm": 0.35109421610832214, "learning_rate": 0.00015448862085548293, "loss": 1.4308, "step": 17524 }, { "epoch": 0.22772938212567997, "grad_norm": 0.3335050344467163, "learning_rate": 0.00015448602139357152, "loss": 1.2711, "step": 17525 }, { "epoch": 0.22774237666959585, "grad_norm": 0.304598867893219, "learning_rate": 0.00015448342193166015, "loss": 1.3404, "step": 17526 }, { "epoch": 0.22775537121351172, "grad_norm": 0.355780690908432, "learning_rate": 0.00015448082246974877, "loss": 1.2636, "step": 17527 }, { "epoch": 0.2277683657574276, "grad_norm": 0.45066171884536743, "learning_rate": 0.0001544782230078374, "loss": 1.2542, "step": 17528 }, { "epoch": 0.22778136030134347, "grad_norm": 0.3187810778617859, "learning_rate": 0.000154475623545926, "loss": 1.2667, "step": 17529 }, { "epoch": 0.22779435484525934, "grad_norm": 0.4394470453262329, "learning_rate": 0.00015447302408401462, "loss": 1.5081, "step": 17530 }, { "epoch": 0.2278073493891752, "grad_norm": 0.3630068004131317, "learning_rate": 0.00015447042462210324, "loss": 1.3614, "step": 17531 }, { "epoch": 0.22782034393309109, "grad_norm": 0.3895515203475952, "learning_rate": 0.00015446782516019184, "loss": 1.2763, "step": 17532 }, { "epoch": 0.22783333847700696, "grad_norm": 0.34488222002983093, "learning_rate": 0.00015446522569828046, "loss": 1.58, "step": 17533 }, { "epoch": 0.22784633302092283, "grad_norm": 0.5027956962585449, "learning_rate": 0.00015446262623636906, "loss": 1.6167, "step": 17534 }, { "epoch": 0.2278593275648387, "grad_norm": 0.4847308099269867, "learning_rate": 0.0001544600267744577, "loss": 1.7772, "step": 17535 }, { "epoch": 0.22787232210875458, "grad_norm": 0.3336997628211975, "learning_rate": 0.0001544574273125463, "loss": 1.5796, "step": 17536 }, { "epoch": 0.22788531665267045, "grad_norm": 0.4108007848262787, "learning_rate": 0.0001544548278506349, "loss": 1.3802, "step": 17537 }, { "epoch": 0.22789831119658632, "grad_norm": 0.3541216254234314, "learning_rate": 0.00015445222838872356, "loss": 1.3754, "step": 17538 }, { "epoch": 0.2279113057405022, "grad_norm": 0.3413882553577423, "learning_rate": 0.00015444962892681216, "loss": 1.278, "step": 17539 }, { "epoch": 0.22792430028441807, "grad_norm": 0.26236289739608765, "learning_rate": 0.00015444702946490078, "loss": 1.3884, "step": 17540 }, { "epoch": 0.22793729482833394, "grad_norm": 0.3783803880214691, "learning_rate": 0.00015444443000298938, "loss": 1.3518, "step": 17541 }, { "epoch": 0.22795028937224981, "grad_norm": 0.3493862748146057, "learning_rate": 0.000154441830541078, "loss": 1.4121, "step": 17542 }, { "epoch": 0.2279632839161657, "grad_norm": 0.30744510889053345, "learning_rate": 0.00015443923107916663, "loss": 1.3294, "step": 17543 }, { "epoch": 0.22797627846008156, "grad_norm": 0.37657248973846436, "learning_rate": 0.00015443663161725523, "loss": 1.4234, "step": 17544 }, { "epoch": 0.22798927300399743, "grad_norm": 0.33738717436790466, "learning_rate": 0.00015443403215534385, "loss": 1.4368, "step": 17545 }, { "epoch": 0.2280022675479133, "grad_norm": 0.3121657073497772, "learning_rate": 0.00015443143269343247, "loss": 1.1387, "step": 17546 }, { "epoch": 0.22801526209182918, "grad_norm": 0.3690843880176544, "learning_rate": 0.0001544288332315211, "loss": 1.3377, "step": 17547 }, { "epoch": 0.22802825663574508, "grad_norm": 0.617122232913971, "learning_rate": 0.0001544262337696097, "loss": 1.4112, "step": 17548 }, { "epoch": 0.22804125117966095, "grad_norm": 0.39011451601982117, "learning_rate": 0.0001544236343076983, "loss": 1.4522, "step": 17549 }, { "epoch": 0.22805424572357683, "grad_norm": 0.3467435836791992, "learning_rate": 0.00015442103484578695, "loss": 1.4362, "step": 17550 }, { "epoch": 0.2280672402674927, "grad_norm": 0.38670575618743896, "learning_rate": 0.00015441843538387554, "loss": 1.3359, "step": 17551 }, { "epoch": 0.22808023481140857, "grad_norm": 0.36283016204833984, "learning_rate": 0.00015441583592196417, "loss": 1.4344, "step": 17552 }, { "epoch": 0.22809322935532444, "grad_norm": 0.441263884305954, "learning_rate": 0.00015441323646005276, "loss": 1.4465, "step": 17553 }, { "epoch": 0.22810622389924032, "grad_norm": 0.5017417073249817, "learning_rate": 0.0001544106369981414, "loss": 1.3819, "step": 17554 }, { "epoch": 0.2281192184431562, "grad_norm": 0.43855154514312744, "learning_rate": 0.00015440803753623001, "loss": 1.5213, "step": 17555 }, { "epoch": 0.22813221298707206, "grad_norm": 0.4814172685146332, "learning_rate": 0.0001544054380743186, "loss": 1.5137, "step": 17556 }, { "epoch": 0.22814520753098794, "grad_norm": 0.3849272131919861, "learning_rate": 0.00015440283861240724, "loss": 1.3589, "step": 17557 }, { "epoch": 0.2281582020749038, "grad_norm": 0.3414883017539978, "learning_rate": 0.00015440023915049586, "loss": 1.2675, "step": 17558 }, { "epoch": 0.22817119661881968, "grad_norm": 0.27242282032966614, "learning_rate": 0.00015439763968858448, "loss": 1.337, "step": 17559 }, { "epoch": 0.22818419116273556, "grad_norm": 0.4883895814418793, "learning_rate": 0.00015439504022667308, "loss": 1.518, "step": 17560 }, { "epoch": 0.22819718570665143, "grad_norm": 0.3641911745071411, "learning_rate": 0.00015439244076476168, "loss": 1.4129, "step": 17561 }, { "epoch": 0.2282101802505673, "grad_norm": 0.30756595730781555, "learning_rate": 0.00015438984130285033, "loss": 1.3691, "step": 17562 }, { "epoch": 0.22822317479448317, "grad_norm": 0.4031060039997101, "learning_rate": 0.00015438724184093893, "loss": 1.4588, "step": 17563 }, { "epoch": 0.22823616933839905, "grad_norm": 0.4151732325553894, "learning_rate": 0.00015438464237902755, "loss": 1.496, "step": 17564 }, { "epoch": 0.22824916388231492, "grad_norm": 0.346788614988327, "learning_rate": 0.00015438204291711615, "loss": 1.2248, "step": 17565 }, { "epoch": 0.2282621584262308, "grad_norm": 0.45701804757118225, "learning_rate": 0.00015437944345520477, "loss": 1.5695, "step": 17566 }, { "epoch": 0.22827515297014667, "grad_norm": 0.3944202959537506, "learning_rate": 0.0001543768439932934, "loss": 1.2604, "step": 17567 }, { "epoch": 0.22828814751406254, "grad_norm": 0.400157630443573, "learning_rate": 0.000154374244531382, "loss": 1.5668, "step": 17568 }, { "epoch": 0.2283011420579784, "grad_norm": 0.4012301564216614, "learning_rate": 0.00015437164506947062, "loss": 1.4282, "step": 17569 }, { "epoch": 0.22831413660189429, "grad_norm": 0.3055853843688965, "learning_rate": 0.00015436904560755925, "loss": 1.2053, "step": 17570 }, { "epoch": 0.22832713114581016, "grad_norm": 0.47218552231788635, "learning_rate": 0.00015436644614564787, "loss": 1.6631, "step": 17571 }, { "epoch": 0.22834012568972603, "grad_norm": 0.4413902759552002, "learning_rate": 0.00015436384668373647, "loss": 1.4876, "step": 17572 }, { "epoch": 0.2283531202336419, "grad_norm": 0.3570714592933655, "learning_rate": 0.0001543612472218251, "loss": 1.3518, "step": 17573 }, { "epoch": 0.22836611477755778, "grad_norm": 0.4896111488342285, "learning_rate": 0.00015435864775991372, "loss": 1.4229, "step": 17574 }, { "epoch": 0.22837910932147365, "grad_norm": 0.3856905400753021, "learning_rate": 0.00015435604829800231, "loss": 1.5016, "step": 17575 }, { "epoch": 0.22839210386538952, "grad_norm": 0.4265894293785095, "learning_rate": 0.00015435344883609094, "loss": 1.6509, "step": 17576 }, { "epoch": 0.2284050984093054, "grad_norm": 0.3925989866256714, "learning_rate": 0.00015435084937417956, "loss": 1.5953, "step": 17577 }, { "epoch": 0.22841809295322127, "grad_norm": 0.31342899799346924, "learning_rate": 0.00015434824991226816, "loss": 1.2474, "step": 17578 }, { "epoch": 0.22843108749713714, "grad_norm": 0.5138340592384338, "learning_rate": 0.00015434565045035678, "loss": 1.4155, "step": 17579 }, { "epoch": 0.22844408204105301, "grad_norm": 0.4364220201969147, "learning_rate": 0.00015434305098844538, "loss": 1.5345, "step": 17580 }, { "epoch": 0.2284570765849689, "grad_norm": 0.4405708909034729, "learning_rate": 0.00015434045152653403, "loss": 1.4461, "step": 17581 }, { "epoch": 0.22847007112888476, "grad_norm": 0.4068009555339813, "learning_rate": 0.00015433785206462263, "loss": 1.4358, "step": 17582 }, { "epoch": 0.22848306567280063, "grad_norm": 0.4266466200351715, "learning_rate": 0.00015433525260271126, "loss": 1.2945, "step": 17583 }, { "epoch": 0.2284960602167165, "grad_norm": 0.4239807426929474, "learning_rate": 0.00015433265314079985, "loss": 1.2412, "step": 17584 }, { "epoch": 0.22850905476063238, "grad_norm": 0.3937571346759796, "learning_rate": 0.00015433005367888848, "loss": 1.2955, "step": 17585 }, { "epoch": 0.22852204930454825, "grad_norm": 0.3565618097782135, "learning_rate": 0.0001543274542169771, "loss": 1.2166, "step": 17586 }, { "epoch": 0.22853504384846413, "grad_norm": 0.3238370418548584, "learning_rate": 0.0001543248547550657, "loss": 1.3791, "step": 17587 }, { "epoch": 0.22854803839238, "grad_norm": 0.3457615375518799, "learning_rate": 0.00015432225529315432, "loss": 1.3926, "step": 17588 }, { "epoch": 0.22856103293629587, "grad_norm": 0.2828628420829773, "learning_rate": 0.00015431965583124295, "loss": 1.2077, "step": 17589 }, { "epoch": 0.22857402748021174, "grad_norm": 0.4756614863872528, "learning_rate": 0.00015431705636933155, "loss": 1.6235, "step": 17590 }, { "epoch": 0.22858702202412762, "grad_norm": 0.3619145154953003, "learning_rate": 0.00015431445690742017, "loss": 1.4554, "step": 17591 }, { "epoch": 0.2286000165680435, "grad_norm": 0.4134174883365631, "learning_rate": 0.00015431185744550877, "loss": 1.5222, "step": 17592 }, { "epoch": 0.22861301111195936, "grad_norm": 0.4153233468532562, "learning_rate": 0.00015430925798359742, "loss": 1.433, "step": 17593 }, { "epoch": 0.22862600565587524, "grad_norm": 0.3607367277145386, "learning_rate": 0.00015430665852168602, "loss": 1.3922, "step": 17594 }, { "epoch": 0.2286390001997911, "grad_norm": 0.4759838581085205, "learning_rate": 0.00015430405905977464, "loss": 1.4376, "step": 17595 }, { "epoch": 0.22865199474370698, "grad_norm": 0.33668971061706543, "learning_rate": 0.00015430145959786324, "loss": 1.2891, "step": 17596 }, { "epoch": 0.22866498928762286, "grad_norm": 0.3650651276111603, "learning_rate": 0.00015429886013595186, "loss": 1.4798, "step": 17597 }, { "epoch": 0.22867798383153873, "grad_norm": 0.3216116428375244, "learning_rate": 0.0001542962606740405, "loss": 1.4721, "step": 17598 }, { "epoch": 0.2286909783754546, "grad_norm": 0.4008892774581909, "learning_rate": 0.00015429366121212908, "loss": 1.3132, "step": 17599 }, { "epoch": 0.22870397291937047, "grad_norm": 0.3910900354385376, "learning_rate": 0.0001542910617502177, "loss": 1.4189, "step": 17600 }, { "epoch": 0.22871696746328635, "grad_norm": 0.31416893005371094, "learning_rate": 0.00015428846228830633, "loss": 1.5268, "step": 17601 }, { "epoch": 0.22872996200720222, "grad_norm": 0.27825412154197693, "learning_rate": 0.00015428586282639496, "loss": 1.463, "step": 17602 }, { "epoch": 0.2287429565511181, "grad_norm": 0.5107660889625549, "learning_rate": 0.00015428326336448356, "loss": 1.6413, "step": 17603 }, { "epoch": 0.22875595109503397, "grad_norm": 0.45120808482170105, "learning_rate": 0.00015428066390257215, "loss": 1.4809, "step": 17604 }, { "epoch": 0.22876894563894984, "grad_norm": 0.4583079218864441, "learning_rate": 0.0001542780644406608, "loss": 1.5901, "step": 17605 }, { "epoch": 0.2287819401828657, "grad_norm": 0.5126442909240723, "learning_rate": 0.0001542754649787494, "loss": 1.2798, "step": 17606 }, { "epoch": 0.22879493472678158, "grad_norm": 0.32243844866752625, "learning_rate": 0.00015427286551683803, "loss": 1.2415, "step": 17607 }, { "epoch": 0.22880792927069746, "grad_norm": 0.2602277100086212, "learning_rate": 0.00015427026605492662, "loss": 1.3129, "step": 17608 }, { "epoch": 0.22882092381461333, "grad_norm": 0.28018492460250854, "learning_rate": 0.00015426766659301525, "loss": 1.287, "step": 17609 }, { "epoch": 0.2288339183585292, "grad_norm": 0.3126949965953827, "learning_rate": 0.00015426506713110387, "loss": 1.4174, "step": 17610 }, { "epoch": 0.22884691290244508, "grad_norm": 0.462068647146225, "learning_rate": 0.00015426246766919247, "loss": 1.5323, "step": 17611 }, { "epoch": 0.22885990744636095, "grad_norm": 0.3884279131889343, "learning_rate": 0.00015425986820728112, "loss": 1.3909, "step": 17612 }, { "epoch": 0.22887290199027682, "grad_norm": 0.42448994517326355, "learning_rate": 0.00015425726874536972, "loss": 1.4556, "step": 17613 }, { "epoch": 0.2288858965341927, "grad_norm": 0.4207201302051544, "learning_rate": 0.00015425466928345834, "loss": 1.4939, "step": 17614 }, { "epoch": 0.22889889107810857, "grad_norm": 0.40967270731925964, "learning_rate": 0.00015425206982154694, "loss": 1.471, "step": 17615 }, { "epoch": 0.22891188562202444, "grad_norm": 0.3311474323272705, "learning_rate": 0.00015424947035963557, "loss": 1.2696, "step": 17616 }, { "epoch": 0.22892488016594031, "grad_norm": 0.3830753564834595, "learning_rate": 0.0001542468708977242, "loss": 1.3144, "step": 17617 }, { "epoch": 0.2289378747098562, "grad_norm": 0.4001374840736389, "learning_rate": 0.0001542442714358128, "loss": 1.3508, "step": 17618 }, { "epoch": 0.22895086925377206, "grad_norm": 0.42457136511802673, "learning_rate": 0.0001542416719739014, "loss": 1.6802, "step": 17619 }, { "epoch": 0.22896386379768793, "grad_norm": 0.35581204295158386, "learning_rate": 0.00015423907251199004, "loss": 1.4076, "step": 17620 }, { "epoch": 0.2289768583416038, "grad_norm": 0.40405532717704773, "learning_rate": 0.00015423647305007863, "loss": 1.5242, "step": 17621 }, { "epoch": 0.22898985288551968, "grad_norm": 0.36132997274398804, "learning_rate": 0.00015423387358816726, "loss": 1.3457, "step": 17622 }, { "epoch": 0.22900284742943555, "grad_norm": 0.37850359082221985, "learning_rate": 0.00015423127412625586, "loss": 1.3197, "step": 17623 }, { "epoch": 0.22901584197335145, "grad_norm": 0.3285813629627228, "learning_rate": 0.0001542286746643445, "loss": 1.2982, "step": 17624 }, { "epoch": 0.22902883651726733, "grad_norm": 0.3952697813510895, "learning_rate": 0.0001542260752024331, "loss": 1.4559, "step": 17625 }, { "epoch": 0.2290418310611832, "grad_norm": 0.3677605390548706, "learning_rate": 0.00015422347574052173, "loss": 1.4088, "step": 17626 }, { "epoch": 0.22905482560509907, "grad_norm": 0.43694770336151123, "learning_rate": 0.00015422087627861033, "loss": 1.4517, "step": 17627 }, { "epoch": 0.22906782014901494, "grad_norm": 0.3547777235507965, "learning_rate": 0.00015421827681669895, "loss": 1.3605, "step": 17628 }, { "epoch": 0.22908081469293082, "grad_norm": 0.4288869798183441, "learning_rate": 0.00015421567735478757, "loss": 1.4047, "step": 17629 }, { "epoch": 0.2290938092368467, "grad_norm": 0.37506112456321716, "learning_rate": 0.00015421307789287617, "loss": 1.438, "step": 17630 }, { "epoch": 0.22910680378076256, "grad_norm": 0.39819619059562683, "learning_rate": 0.0001542104784309648, "loss": 1.4956, "step": 17631 }, { "epoch": 0.22911979832467844, "grad_norm": 0.44526052474975586, "learning_rate": 0.00015420787896905342, "loss": 1.5042, "step": 17632 }, { "epoch": 0.2291327928685943, "grad_norm": 0.39161333441734314, "learning_rate": 0.00015420527950714202, "loss": 1.3279, "step": 17633 }, { "epoch": 0.22914578741251018, "grad_norm": 0.42912355065345764, "learning_rate": 0.00015420268004523064, "loss": 1.4148, "step": 17634 }, { "epoch": 0.22915878195642606, "grad_norm": 0.38548797369003296, "learning_rate": 0.00015420008058331924, "loss": 1.5169, "step": 17635 }, { "epoch": 0.22917177650034193, "grad_norm": 0.41569754481315613, "learning_rate": 0.0001541974811214079, "loss": 1.3007, "step": 17636 }, { "epoch": 0.2291847710442578, "grad_norm": 0.3122117817401886, "learning_rate": 0.0001541948816594965, "loss": 1.2777, "step": 17637 }, { "epoch": 0.22919776558817367, "grad_norm": 0.344384104013443, "learning_rate": 0.00015419228219758511, "loss": 1.3958, "step": 17638 }, { "epoch": 0.22921076013208955, "grad_norm": 0.3293127715587616, "learning_rate": 0.0001541896827356737, "loss": 1.4674, "step": 17639 }, { "epoch": 0.22922375467600542, "grad_norm": 0.29614976048469543, "learning_rate": 0.00015418708327376234, "loss": 1.4696, "step": 17640 }, { "epoch": 0.2292367492199213, "grad_norm": 0.3996814787387848, "learning_rate": 0.00015418448381185096, "loss": 1.5301, "step": 17641 }, { "epoch": 0.22924974376383717, "grad_norm": 0.47054117918014526, "learning_rate": 0.00015418188434993956, "loss": 1.4324, "step": 17642 }, { "epoch": 0.22926273830775304, "grad_norm": 0.4537562429904938, "learning_rate": 0.00015417928488802818, "loss": 1.4053, "step": 17643 }, { "epoch": 0.2292757328516689, "grad_norm": 0.4789636433124542, "learning_rate": 0.0001541766854261168, "loss": 1.4235, "step": 17644 }, { "epoch": 0.22928872739558478, "grad_norm": 0.382784903049469, "learning_rate": 0.0001541740859642054, "loss": 1.5688, "step": 17645 }, { "epoch": 0.22930172193950066, "grad_norm": 0.363221138715744, "learning_rate": 0.00015417148650229403, "loss": 1.2391, "step": 17646 }, { "epoch": 0.22931471648341653, "grad_norm": 0.4165670871734619, "learning_rate": 0.00015416888704038265, "loss": 1.3637, "step": 17647 }, { "epoch": 0.2293277110273324, "grad_norm": 0.3411755859851837, "learning_rate": 0.00015416628757847128, "loss": 1.4937, "step": 17648 }, { "epoch": 0.22934070557124828, "grad_norm": 0.38665154576301575, "learning_rate": 0.00015416368811655987, "loss": 1.3832, "step": 17649 }, { "epoch": 0.22935370011516415, "grad_norm": 0.44277629256248474, "learning_rate": 0.0001541610886546485, "loss": 1.4414, "step": 17650 }, { "epoch": 0.22936669465908002, "grad_norm": 0.36975422501564026, "learning_rate": 0.00015415848919273712, "loss": 1.4096, "step": 17651 }, { "epoch": 0.2293796892029959, "grad_norm": 0.3800652325153351, "learning_rate": 0.00015415588973082572, "loss": 1.3322, "step": 17652 }, { "epoch": 0.22939268374691177, "grad_norm": 0.4465552866458893, "learning_rate": 0.00015415329026891435, "loss": 1.4889, "step": 17653 }, { "epoch": 0.22940567829082764, "grad_norm": 0.41041141748428345, "learning_rate": 0.00015415069080700294, "loss": 1.4372, "step": 17654 }, { "epoch": 0.22941867283474351, "grad_norm": 0.39620378613471985, "learning_rate": 0.0001541480913450916, "loss": 1.4789, "step": 17655 }, { "epoch": 0.2294316673786594, "grad_norm": 0.446790486574173, "learning_rate": 0.0001541454918831802, "loss": 1.2575, "step": 17656 }, { "epoch": 0.22944466192257526, "grad_norm": 0.46105313301086426, "learning_rate": 0.0001541428924212688, "loss": 1.5288, "step": 17657 }, { "epoch": 0.22945765646649113, "grad_norm": 0.34282946586608887, "learning_rate": 0.00015414029295935741, "loss": 1.4353, "step": 17658 }, { "epoch": 0.229470651010407, "grad_norm": 0.4200456440448761, "learning_rate": 0.00015413769349744604, "loss": 1.3926, "step": 17659 }, { "epoch": 0.22948364555432288, "grad_norm": 0.45076262950897217, "learning_rate": 0.00015413509403553466, "loss": 1.4815, "step": 17660 }, { "epoch": 0.22949664009823875, "grad_norm": 0.4466899633407593, "learning_rate": 0.00015413249457362326, "loss": 1.4514, "step": 17661 }, { "epoch": 0.22950963464215463, "grad_norm": 0.35786283016204834, "learning_rate": 0.00015412989511171188, "loss": 1.4671, "step": 17662 }, { "epoch": 0.2295226291860705, "grad_norm": 0.38174551725387573, "learning_rate": 0.0001541272956498005, "loss": 1.5623, "step": 17663 }, { "epoch": 0.22953562372998637, "grad_norm": 0.37272652983665466, "learning_rate": 0.0001541246961878891, "loss": 1.4073, "step": 17664 }, { "epoch": 0.22954861827390224, "grad_norm": 0.47229018807411194, "learning_rate": 0.00015412209672597773, "loss": 1.5061, "step": 17665 }, { "epoch": 0.22956161281781812, "grad_norm": 0.40937644243240356, "learning_rate": 0.00015411949726406633, "loss": 1.4206, "step": 17666 }, { "epoch": 0.229574607361734, "grad_norm": 0.4650891125202179, "learning_rate": 0.00015411689780215498, "loss": 1.4561, "step": 17667 }, { "epoch": 0.22958760190564986, "grad_norm": 0.4773378372192383, "learning_rate": 0.00015411429834024358, "loss": 1.4002, "step": 17668 }, { "epoch": 0.22960059644956574, "grad_norm": 0.4365747272968292, "learning_rate": 0.0001541116988783322, "loss": 1.2941, "step": 17669 }, { "epoch": 0.2296135909934816, "grad_norm": 0.4797625243663788, "learning_rate": 0.0001541090994164208, "loss": 1.549, "step": 17670 }, { "epoch": 0.22962658553739748, "grad_norm": 0.4469527006149292, "learning_rate": 0.00015410649995450942, "loss": 1.5086, "step": 17671 }, { "epoch": 0.22963958008131335, "grad_norm": 0.374441921710968, "learning_rate": 0.00015410390049259805, "loss": 1.4955, "step": 17672 }, { "epoch": 0.22965257462522923, "grad_norm": 0.3637180030345917, "learning_rate": 0.00015410130103068665, "loss": 1.448, "step": 17673 }, { "epoch": 0.2296655691691451, "grad_norm": 0.3801933228969574, "learning_rate": 0.00015409870156877527, "loss": 1.3988, "step": 17674 }, { "epoch": 0.22967856371306097, "grad_norm": 0.3234044909477234, "learning_rate": 0.0001540961021068639, "loss": 1.444, "step": 17675 }, { "epoch": 0.22969155825697685, "grad_norm": 0.33238109946250916, "learning_rate": 0.0001540935026449525, "loss": 1.4295, "step": 17676 }, { "epoch": 0.22970455280089272, "grad_norm": 0.369138240814209, "learning_rate": 0.00015409090318304112, "loss": 1.5526, "step": 17677 }, { "epoch": 0.2297175473448086, "grad_norm": 0.32458943128585815, "learning_rate": 0.00015408830372112971, "loss": 1.2806, "step": 17678 }, { "epoch": 0.22973054188872447, "grad_norm": 0.3393263816833496, "learning_rate": 0.00015408570425921837, "loss": 1.5087, "step": 17679 }, { "epoch": 0.22974353643264034, "grad_norm": 0.40407025814056396, "learning_rate": 0.00015408310479730696, "loss": 1.4372, "step": 17680 }, { "epoch": 0.2297565309765562, "grad_norm": 0.37247687578201294, "learning_rate": 0.0001540805053353956, "loss": 1.4607, "step": 17681 }, { "epoch": 0.22976952552047208, "grad_norm": 0.5529502034187317, "learning_rate": 0.00015407790587348418, "loss": 1.3671, "step": 17682 }, { "epoch": 0.22978252006438796, "grad_norm": 0.4205712378025055, "learning_rate": 0.0001540753064115728, "loss": 1.4083, "step": 17683 }, { "epoch": 0.22979551460830383, "grad_norm": 0.3608722686767578, "learning_rate": 0.00015407270694966143, "loss": 1.2233, "step": 17684 }, { "epoch": 0.2298085091522197, "grad_norm": 0.47460004687309265, "learning_rate": 0.00015407010748775003, "loss": 1.2779, "step": 17685 }, { "epoch": 0.22982150369613558, "grad_norm": 0.3191768229007721, "learning_rate": 0.00015406750802583868, "loss": 1.2741, "step": 17686 }, { "epoch": 0.22983449824005145, "grad_norm": 0.42855381965637207, "learning_rate": 0.00015406490856392728, "loss": 1.378, "step": 17687 }, { "epoch": 0.22984749278396732, "grad_norm": 0.354781836271286, "learning_rate": 0.00015406230910201588, "loss": 1.5386, "step": 17688 }, { "epoch": 0.2298604873278832, "grad_norm": 0.39844000339508057, "learning_rate": 0.0001540597096401045, "loss": 1.3719, "step": 17689 }, { "epoch": 0.22987348187179907, "grad_norm": 0.42703622579574585, "learning_rate": 0.00015405711017819313, "loss": 1.4384, "step": 17690 }, { "epoch": 0.22988647641571494, "grad_norm": 0.38441357016563416, "learning_rate": 0.00015405451071628175, "loss": 1.2483, "step": 17691 }, { "epoch": 0.22989947095963081, "grad_norm": 0.31359657645225525, "learning_rate": 0.00015405191125437035, "loss": 1.4259, "step": 17692 }, { "epoch": 0.2299124655035467, "grad_norm": 0.3714214861392975, "learning_rate": 0.00015404931179245897, "loss": 1.3477, "step": 17693 }, { "epoch": 0.22992546004746256, "grad_norm": 0.477580726146698, "learning_rate": 0.0001540467123305476, "loss": 1.4587, "step": 17694 }, { "epoch": 0.22993845459137843, "grad_norm": 0.5337256789207458, "learning_rate": 0.0001540441128686362, "loss": 1.5064, "step": 17695 }, { "epoch": 0.2299514491352943, "grad_norm": 0.41329288482666016, "learning_rate": 0.00015404151340672482, "loss": 1.2234, "step": 17696 }, { "epoch": 0.22996444367921018, "grad_norm": 0.4516674280166626, "learning_rate": 0.00015403891394481342, "loss": 1.2709, "step": 17697 }, { "epoch": 0.22997743822312605, "grad_norm": 0.4006953537464142, "learning_rate": 0.00015403631448290207, "loss": 1.4849, "step": 17698 }, { "epoch": 0.22999043276704192, "grad_norm": 0.4751393496990204, "learning_rate": 0.00015403371502099067, "loss": 1.3091, "step": 17699 }, { "epoch": 0.23000342731095783, "grad_norm": 0.3570221960544586, "learning_rate": 0.00015403111555907926, "loss": 1.4614, "step": 17700 }, { "epoch": 0.2300164218548737, "grad_norm": 0.5387532114982605, "learning_rate": 0.0001540285160971679, "loss": 1.5955, "step": 17701 }, { "epoch": 0.23002941639878957, "grad_norm": 0.32554253935813904, "learning_rate": 0.0001540259166352565, "loss": 1.3071, "step": 17702 }, { "epoch": 0.23004241094270544, "grad_norm": 0.42064937949180603, "learning_rate": 0.00015402331717334514, "loss": 1.5228, "step": 17703 }, { "epoch": 0.23005540548662132, "grad_norm": 0.3914891481399536, "learning_rate": 0.00015402071771143373, "loss": 1.4626, "step": 17704 }, { "epoch": 0.2300684000305372, "grad_norm": 0.4852510094642639, "learning_rate": 0.00015401811824952236, "loss": 1.5935, "step": 17705 }, { "epoch": 0.23008139457445306, "grad_norm": 0.4209222197532654, "learning_rate": 0.00015401551878761098, "loss": 1.3421, "step": 17706 }, { "epoch": 0.23009438911836894, "grad_norm": 0.2824123501777649, "learning_rate": 0.00015401291932569958, "loss": 1.2957, "step": 17707 }, { "epoch": 0.2301073836622848, "grad_norm": 0.30800673365592957, "learning_rate": 0.0001540103198637882, "loss": 1.0889, "step": 17708 }, { "epoch": 0.23012037820620068, "grad_norm": 0.551593542098999, "learning_rate": 0.0001540077204018768, "loss": 1.4372, "step": 17709 }, { "epoch": 0.23013337275011655, "grad_norm": 0.47093117237091064, "learning_rate": 0.00015400512093996545, "loss": 1.4836, "step": 17710 }, { "epoch": 0.23014636729403243, "grad_norm": 0.34184256196022034, "learning_rate": 0.00015400252147805405, "loss": 1.389, "step": 17711 }, { "epoch": 0.2301593618379483, "grad_norm": 0.41638702154159546, "learning_rate": 0.00015399992201614265, "loss": 1.3359, "step": 17712 }, { "epoch": 0.23017235638186417, "grad_norm": 0.35902711749076843, "learning_rate": 0.00015399732255423127, "loss": 1.6393, "step": 17713 }, { "epoch": 0.23018535092578005, "grad_norm": 0.4319157898426056, "learning_rate": 0.0001539947230923199, "loss": 1.4951, "step": 17714 }, { "epoch": 0.23019834546969592, "grad_norm": 0.3503279387950897, "learning_rate": 0.00015399212363040852, "loss": 1.554, "step": 17715 }, { "epoch": 0.2302113400136118, "grad_norm": 0.47127795219421387, "learning_rate": 0.00015398952416849712, "loss": 1.5118, "step": 17716 }, { "epoch": 0.23022433455752767, "grad_norm": 0.4257420599460602, "learning_rate": 0.00015398692470658574, "loss": 1.3695, "step": 17717 }, { "epoch": 0.23023732910144354, "grad_norm": 0.4545004963874817, "learning_rate": 0.00015398432524467437, "loss": 1.459, "step": 17718 }, { "epoch": 0.2302503236453594, "grad_norm": 0.34622564911842346, "learning_rate": 0.00015398172578276297, "loss": 1.2835, "step": 17719 }, { "epoch": 0.23026331818927528, "grad_norm": 0.3949187695980072, "learning_rate": 0.0001539791263208516, "loss": 1.3423, "step": 17720 }, { "epoch": 0.23027631273319116, "grad_norm": 0.4023889899253845, "learning_rate": 0.00015397652685894021, "loss": 1.399, "step": 17721 }, { "epoch": 0.23028930727710703, "grad_norm": 0.34029367566108704, "learning_rate": 0.00015397392739702884, "loss": 1.2726, "step": 17722 }, { "epoch": 0.2303023018210229, "grad_norm": 0.386578232049942, "learning_rate": 0.00015397132793511744, "loss": 1.3364, "step": 17723 }, { "epoch": 0.23031529636493878, "grad_norm": 0.34251466393470764, "learning_rate": 0.00015396872847320606, "loss": 1.2247, "step": 17724 }, { "epoch": 0.23032829090885465, "grad_norm": 0.4175904393196106, "learning_rate": 0.00015396612901129469, "loss": 1.567, "step": 17725 }, { "epoch": 0.23034128545277052, "grad_norm": 0.448779433965683, "learning_rate": 0.00015396352954938328, "loss": 1.4457, "step": 17726 }, { "epoch": 0.2303542799966864, "grad_norm": 0.40709659457206726, "learning_rate": 0.0001539609300874719, "loss": 1.4649, "step": 17727 }, { "epoch": 0.23036727454060227, "grad_norm": 0.3895239531993866, "learning_rate": 0.0001539583306255605, "loss": 1.3916, "step": 17728 }, { "epoch": 0.23038026908451814, "grad_norm": 0.41733086109161377, "learning_rate": 0.00015395573116364913, "loss": 1.5186, "step": 17729 }, { "epoch": 0.23039326362843401, "grad_norm": 0.5264485478401184, "learning_rate": 0.00015395313170173775, "loss": 1.5917, "step": 17730 }, { "epoch": 0.2304062581723499, "grad_norm": 0.3452605605125427, "learning_rate": 0.00015395053223982635, "loss": 1.4038, "step": 17731 }, { "epoch": 0.23041925271626576, "grad_norm": 0.48457321524620056, "learning_rate": 0.00015394793277791498, "loss": 1.4106, "step": 17732 }, { "epoch": 0.23043224726018163, "grad_norm": 0.4452970027923584, "learning_rate": 0.0001539453333160036, "loss": 1.5444, "step": 17733 }, { "epoch": 0.2304452418040975, "grad_norm": 0.33489036560058594, "learning_rate": 0.00015394273385409222, "loss": 1.4237, "step": 17734 }, { "epoch": 0.23045823634801338, "grad_norm": 0.4045279324054718, "learning_rate": 0.00015394013439218082, "loss": 1.4664, "step": 17735 }, { "epoch": 0.23047123089192925, "grad_norm": 0.48704978823661804, "learning_rate": 0.00015393753493026945, "loss": 1.5603, "step": 17736 }, { "epoch": 0.23048422543584512, "grad_norm": 0.4264164865016937, "learning_rate": 0.00015393493546835807, "loss": 1.4007, "step": 17737 }, { "epoch": 0.230497219979761, "grad_norm": 0.40201908349990845, "learning_rate": 0.00015393233600644667, "loss": 1.4843, "step": 17738 }, { "epoch": 0.23051021452367687, "grad_norm": 0.4726935923099518, "learning_rate": 0.0001539297365445353, "loss": 1.6548, "step": 17739 }, { "epoch": 0.23052320906759274, "grad_norm": 0.3787649869918823, "learning_rate": 0.0001539271370826239, "loss": 1.2578, "step": 17740 }, { "epoch": 0.23053620361150862, "grad_norm": 0.5269920825958252, "learning_rate": 0.00015392453762071251, "loss": 1.3792, "step": 17741 }, { "epoch": 0.2305491981554245, "grad_norm": 0.2908004820346832, "learning_rate": 0.00015392193815880114, "loss": 1.1771, "step": 17742 }, { "epoch": 0.23056219269934036, "grad_norm": 0.45666834712028503, "learning_rate": 0.00015391933869688974, "loss": 1.3636, "step": 17743 }, { "epoch": 0.23057518724325624, "grad_norm": 0.32404395937919617, "learning_rate": 0.00015391673923497836, "loss": 1.3183, "step": 17744 }, { "epoch": 0.2305881817871721, "grad_norm": 0.3777819871902466, "learning_rate": 0.00015391413977306699, "loss": 1.6317, "step": 17745 }, { "epoch": 0.23060117633108798, "grad_norm": 0.39990320801734924, "learning_rate": 0.0001539115403111556, "loss": 1.5315, "step": 17746 }, { "epoch": 0.23061417087500385, "grad_norm": 0.4379133880138397, "learning_rate": 0.0001539089408492442, "loss": 1.329, "step": 17747 }, { "epoch": 0.23062716541891973, "grad_norm": 0.2936893105506897, "learning_rate": 0.00015390634138733283, "loss": 1.2154, "step": 17748 }, { "epoch": 0.2306401599628356, "grad_norm": 0.29721030592918396, "learning_rate": 0.00015390374192542146, "loss": 1.3616, "step": 17749 }, { "epoch": 0.23065315450675147, "grad_norm": 0.43537285923957825, "learning_rate": 0.00015390114246351005, "loss": 1.407, "step": 17750 }, { "epoch": 0.23066614905066735, "grad_norm": 0.3409595489501953, "learning_rate": 0.00015389854300159868, "loss": 1.3873, "step": 17751 }, { "epoch": 0.23067914359458322, "grad_norm": 0.5316119194030762, "learning_rate": 0.00015389594353968728, "loss": 1.6667, "step": 17752 }, { "epoch": 0.2306921381384991, "grad_norm": 0.3834903836250305, "learning_rate": 0.00015389334407777593, "loss": 1.5414, "step": 17753 }, { "epoch": 0.23070513268241497, "grad_norm": 0.41867783665657043, "learning_rate": 0.00015389074461586452, "loss": 1.4372, "step": 17754 }, { "epoch": 0.23071812722633084, "grad_norm": 0.3815166652202606, "learning_rate": 0.00015388814515395312, "loss": 1.2776, "step": 17755 }, { "epoch": 0.2307311217702467, "grad_norm": 0.38519179821014404, "learning_rate": 0.00015388554569204175, "loss": 1.4389, "step": 17756 }, { "epoch": 0.23074411631416258, "grad_norm": 0.3920106887817383, "learning_rate": 0.00015388294623013037, "loss": 1.5267, "step": 17757 }, { "epoch": 0.23075711085807846, "grad_norm": 0.36666139960289, "learning_rate": 0.000153880346768219, "loss": 1.5599, "step": 17758 }, { "epoch": 0.23077010540199433, "grad_norm": 0.38227421045303345, "learning_rate": 0.0001538777473063076, "loss": 1.4345, "step": 17759 }, { "epoch": 0.2307830999459102, "grad_norm": 0.44606131315231323, "learning_rate": 0.00015387514784439622, "loss": 1.4597, "step": 17760 }, { "epoch": 0.23079609448982608, "grad_norm": 0.39926958084106445, "learning_rate": 0.00015387254838248484, "loss": 1.3754, "step": 17761 }, { "epoch": 0.23080908903374195, "grad_norm": 0.39020079374313354, "learning_rate": 0.00015386994892057344, "loss": 1.4708, "step": 17762 }, { "epoch": 0.23082208357765782, "grad_norm": 0.23276223242282867, "learning_rate": 0.00015386734945866206, "loss": 1.377, "step": 17763 }, { "epoch": 0.2308350781215737, "grad_norm": 0.44530966877937317, "learning_rate": 0.0001538647499967507, "loss": 1.4262, "step": 17764 }, { "epoch": 0.23084807266548957, "grad_norm": 0.3683391213417053, "learning_rate": 0.0001538621505348393, "loss": 1.5455, "step": 17765 }, { "epoch": 0.23086106720940544, "grad_norm": 0.42391595244407654, "learning_rate": 0.0001538595510729279, "loss": 1.3392, "step": 17766 }, { "epoch": 0.2308740617533213, "grad_norm": 0.48458898067474365, "learning_rate": 0.0001538569516110165, "loss": 1.3953, "step": 17767 }, { "epoch": 0.2308870562972372, "grad_norm": 0.31297212839126587, "learning_rate": 0.00015385435214910516, "loss": 1.397, "step": 17768 }, { "epoch": 0.23090005084115306, "grad_norm": 0.3093230128288269, "learning_rate": 0.00015385175268719376, "loss": 1.3708, "step": 17769 }, { "epoch": 0.23091304538506893, "grad_norm": 0.5038252472877502, "learning_rate": 0.00015384915322528238, "loss": 1.4602, "step": 17770 }, { "epoch": 0.2309260399289848, "grad_norm": 0.3663513958454132, "learning_rate": 0.00015384655376337098, "loss": 1.2451, "step": 17771 }, { "epoch": 0.23093903447290068, "grad_norm": 0.3613506555557251, "learning_rate": 0.0001538439543014596, "loss": 1.4721, "step": 17772 }, { "epoch": 0.23095202901681655, "grad_norm": 0.4875245690345764, "learning_rate": 0.00015384135483954823, "loss": 1.4681, "step": 17773 }, { "epoch": 0.23096502356073242, "grad_norm": 0.3016203045845032, "learning_rate": 0.00015383875537763682, "loss": 1.3883, "step": 17774 }, { "epoch": 0.2309780181046483, "grad_norm": 0.39854696393013, "learning_rate": 0.00015383615591572545, "loss": 1.379, "step": 17775 }, { "epoch": 0.2309910126485642, "grad_norm": 0.43419399857521057, "learning_rate": 0.00015383355645381407, "loss": 1.3976, "step": 17776 }, { "epoch": 0.23100400719248007, "grad_norm": 0.31253641843795776, "learning_rate": 0.0001538309569919027, "loss": 1.2721, "step": 17777 }, { "epoch": 0.23101700173639594, "grad_norm": 0.3926675319671631, "learning_rate": 0.0001538283575299913, "loss": 1.2644, "step": 17778 }, { "epoch": 0.23102999628031182, "grad_norm": 0.3172212839126587, "learning_rate": 0.00015382575806807992, "loss": 1.385, "step": 17779 }, { "epoch": 0.2310429908242277, "grad_norm": 0.5464864373207092, "learning_rate": 0.00015382315860616854, "loss": 1.4979, "step": 17780 }, { "epoch": 0.23105598536814356, "grad_norm": 0.39636465907096863, "learning_rate": 0.00015382055914425714, "loss": 1.3413, "step": 17781 }, { "epoch": 0.23106897991205944, "grad_norm": 0.31318336725234985, "learning_rate": 0.00015381795968234577, "loss": 1.3443, "step": 17782 }, { "epoch": 0.2310819744559753, "grad_norm": 0.4878489375114441, "learning_rate": 0.00015381536022043436, "loss": 1.4212, "step": 17783 }, { "epoch": 0.23109496899989118, "grad_norm": 0.43249329924583435, "learning_rate": 0.000153812760758523, "loss": 1.2095, "step": 17784 }, { "epoch": 0.23110796354380705, "grad_norm": 0.3974393904209137, "learning_rate": 0.0001538101612966116, "loss": 1.4415, "step": 17785 }, { "epoch": 0.23112095808772293, "grad_norm": 0.41968485713005066, "learning_rate": 0.0001538075618347002, "loss": 1.3609, "step": 17786 }, { "epoch": 0.2311339526316388, "grad_norm": 0.3287057876586914, "learning_rate": 0.00015380496237278883, "loss": 1.3519, "step": 17787 }, { "epoch": 0.23114694717555467, "grad_norm": 0.35635054111480713, "learning_rate": 0.00015380236291087746, "loss": 1.4472, "step": 17788 }, { "epoch": 0.23115994171947055, "grad_norm": 0.4138796031475067, "learning_rate": 0.00015379976344896608, "loss": 1.3219, "step": 17789 }, { "epoch": 0.23117293626338642, "grad_norm": 0.7689157128334045, "learning_rate": 0.00015379716398705468, "loss": 1.4017, "step": 17790 }, { "epoch": 0.2311859308073023, "grad_norm": 0.5234813094139099, "learning_rate": 0.0001537945645251433, "loss": 1.5614, "step": 17791 }, { "epoch": 0.23119892535121817, "grad_norm": 0.3792448937892914, "learning_rate": 0.00015379196506323193, "loss": 1.3483, "step": 17792 }, { "epoch": 0.23121191989513404, "grad_norm": 0.39990630745887756, "learning_rate": 0.00015378936560132053, "loss": 1.3536, "step": 17793 }, { "epoch": 0.2312249144390499, "grad_norm": 0.3721769452095032, "learning_rate": 0.00015378676613940915, "loss": 1.2774, "step": 17794 }, { "epoch": 0.23123790898296578, "grad_norm": 0.33774635195732117, "learning_rate": 0.00015378416667749778, "loss": 1.3486, "step": 17795 }, { "epoch": 0.23125090352688166, "grad_norm": 0.3447568416595459, "learning_rate": 0.00015378156721558637, "loss": 1.5181, "step": 17796 }, { "epoch": 0.23126389807079753, "grad_norm": 0.34187808632850647, "learning_rate": 0.000153778967753675, "loss": 1.2728, "step": 17797 }, { "epoch": 0.2312768926147134, "grad_norm": 0.37070977687835693, "learning_rate": 0.0001537763682917636, "loss": 1.5407, "step": 17798 }, { "epoch": 0.23128988715862928, "grad_norm": 0.3658391237258911, "learning_rate": 0.00015377376882985225, "loss": 1.339, "step": 17799 }, { "epoch": 0.23130288170254515, "grad_norm": 0.37719231843948364, "learning_rate": 0.00015377116936794084, "loss": 1.349, "step": 17800 }, { "epoch": 0.23131587624646102, "grad_norm": 0.45412755012512207, "learning_rate": 0.00015376856990602947, "loss": 1.4136, "step": 17801 }, { "epoch": 0.2313288707903769, "grad_norm": 0.31934070587158203, "learning_rate": 0.00015376597044411807, "loss": 1.2899, "step": 17802 }, { "epoch": 0.23134186533429277, "grad_norm": 0.35606926679611206, "learning_rate": 0.0001537633709822067, "loss": 1.4105, "step": 17803 }, { "epoch": 0.23135485987820864, "grad_norm": 0.5676253437995911, "learning_rate": 0.00015376077152029531, "loss": 1.5117, "step": 17804 }, { "epoch": 0.2313678544221245, "grad_norm": 0.3563275635242462, "learning_rate": 0.0001537581720583839, "loss": 1.6511, "step": 17805 }, { "epoch": 0.2313808489660404, "grad_norm": 0.3710525929927826, "learning_rate": 0.00015375557259647254, "loss": 1.4105, "step": 17806 }, { "epoch": 0.23139384350995626, "grad_norm": 0.43689870834350586, "learning_rate": 0.00015375297313456116, "loss": 1.4626, "step": 17807 }, { "epoch": 0.23140683805387213, "grad_norm": 0.42645755410194397, "learning_rate": 0.00015375037367264979, "loss": 1.4595, "step": 17808 }, { "epoch": 0.231419832597788, "grad_norm": 0.41030409932136536, "learning_rate": 0.00015374777421073838, "loss": 1.3959, "step": 17809 }, { "epoch": 0.23143282714170388, "grad_norm": 0.39593061804771423, "learning_rate": 0.00015374517474882698, "loss": 1.4249, "step": 17810 }, { "epoch": 0.23144582168561975, "grad_norm": 0.3445868492126465, "learning_rate": 0.00015374257528691563, "loss": 1.211, "step": 17811 }, { "epoch": 0.23145881622953562, "grad_norm": 0.386158287525177, "learning_rate": 0.00015373997582500423, "loss": 1.4471, "step": 17812 }, { "epoch": 0.2314718107734515, "grad_norm": 0.3330208361148834, "learning_rate": 0.00015373737636309285, "loss": 1.1957, "step": 17813 }, { "epoch": 0.23148480531736737, "grad_norm": 0.4697929918766022, "learning_rate": 0.00015373477690118145, "loss": 1.3807, "step": 17814 }, { "epoch": 0.23149779986128324, "grad_norm": 0.4216265082359314, "learning_rate": 0.00015373217743927008, "loss": 1.4217, "step": 17815 }, { "epoch": 0.23151079440519912, "grad_norm": 0.4688165485858917, "learning_rate": 0.0001537295779773587, "loss": 1.2106, "step": 17816 }, { "epoch": 0.231523788949115, "grad_norm": 0.4179801344871521, "learning_rate": 0.0001537269785154473, "loss": 1.4094, "step": 17817 }, { "epoch": 0.23153678349303086, "grad_norm": 0.3999471664428711, "learning_rate": 0.00015372437905353592, "loss": 1.3621, "step": 17818 }, { "epoch": 0.23154977803694674, "grad_norm": 0.34358906745910645, "learning_rate": 0.00015372177959162455, "loss": 1.5047, "step": 17819 }, { "epoch": 0.2315627725808626, "grad_norm": 0.3317813277244568, "learning_rate": 0.00015371918012971317, "loss": 1.2093, "step": 17820 }, { "epoch": 0.23157576712477848, "grad_norm": 0.49148693680763245, "learning_rate": 0.00015371658066780177, "loss": 1.4874, "step": 17821 }, { "epoch": 0.23158876166869435, "grad_norm": 0.40034812688827515, "learning_rate": 0.00015371398120589037, "loss": 1.6476, "step": 17822 }, { "epoch": 0.23160175621261023, "grad_norm": 0.2973600924015045, "learning_rate": 0.00015371138174397902, "loss": 1.102, "step": 17823 }, { "epoch": 0.2316147507565261, "grad_norm": 0.46445170044898987, "learning_rate": 0.00015370878228206761, "loss": 1.4162, "step": 17824 }, { "epoch": 0.23162774530044197, "grad_norm": 0.5100994110107422, "learning_rate": 0.00015370618282015624, "loss": 1.6362, "step": 17825 }, { "epoch": 0.23164073984435785, "grad_norm": 0.36702030897140503, "learning_rate": 0.00015370358335824484, "loss": 1.3632, "step": 17826 }, { "epoch": 0.23165373438827372, "grad_norm": 0.3670720160007477, "learning_rate": 0.00015370098389633346, "loss": 1.3551, "step": 17827 }, { "epoch": 0.2316667289321896, "grad_norm": 0.37104499340057373, "learning_rate": 0.00015369838443442209, "loss": 1.3752, "step": 17828 }, { "epoch": 0.23167972347610546, "grad_norm": 0.443123459815979, "learning_rate": 0.00015369578497251068, "loss": 1.4786, "step": 17829 }, { "epoch": 0.23169271802002134, "grad_norm": 0.337756872177124, "learning_rate": 0.0001536931855105993, "loss": 1.3264, "step": 17830 }, { "epoch": 0.2317057125639372, "grad_norm": 0.3932908773422241, "learning_rate": 0.00015369058604868793, "loss": 1.2971, "step": 17831 }, { "epoch": 0.23171870710785308, "grad_norm": 0.425310879945755, "learning_rate": 0.00015368798658677656, "loss": 1.2991, "step": 17832 }, { "epoch": 0.23173170165176896, "grad_norm": 0.3532455861568451, "learning_rate": 0.00015368538712486515, "loss": 1.4057, "step": 17833 }, { "epoch": 0.23174469619568483, "grad_norm": 0.3757370114326477, "learning_rate": 0.00015368278766295378, "loss": 1.2325, "step": 17834 }, { "epoch": 0.2317576907396007, "grad_norm": 0.4334326982498169, "learning_rate": 0.0001536801882010424, "loss": 1.5564, "step": 17835 }, { "epoch": 0.23177068528351658, "grad_norm": 0.34158578515052795, "learning_rate": 0.000153677588739131, "loss": 1.3843, "step": 17836 }, { "epoch": 0.23178367982743245, "grad_norm": 0.35654598474502563, "learning_rate": 0.00015367498927721962, "loss": 1.3741, "step": 17837 }, { "epoch": 0.23179667437134832, "grad_norm": 0.453436017036438, "learning_rate": 0.00015367238981530825, "loss": 1.2056, "step": 17838 }, { "epoch": 0.2318096689152642, "grad_norm": 0.4400738775730133, "learning_rate": 0.00015366979035339685, "loss": 1.5811, "step": 17839 }, { "epoch": 0.23182266345918007, "grad_norm": 0.4141646921634674, "learning_rate": 0.00015366719089148547, "loss": 1.4859, "step": 17840 }, { "epoch": 0.23183565800309594, "grad_norm": 0.39764395356178284, "learning_rate": 0.00015366459142957407, "loss": 1.4994, "step": 17841 }, { "epoch": 0.2318486525470118, "grad_norm": 0.45021700859069824, "learning_rate": 0.00015366199196766272, "loss": 1.7234, "step": 17842 }, { "epoch": 0.23186164709092769, "grad_norm": 0.40634238719940186, "learning_rate": 0.00015365939250575132, "loss": 1.2821, "step": 17843 }, { "epoch": 0.23187464163484356, "grad_norm": 0.40197062492370605, "learning_rate": 0.00015365679304383994, "loss": 1.3954, "step": 17844 }, { "epoch": 0.23188763617875943, "grad_norm": 0.4001365303993225, "learning_rate": 0.00015365419358192854, "loss": 1.2962, "step": 17845 }, { "epoch": 0.2319006307226753, "grad_norm": 0.3134896457195282, "learning_rate": 0.00015365159412001716, "loss": 1.4198, "step": 17846 }, { "epoch": 0.23191362526659118, "grad_norm": 0.6061667203903198, "learning_rate": 0.0001536489946581058, "loss": 1.5099, "step": 17847 }, { "epoch": 0.23192661981050705, "grad_norm": 0.3054920732975006, "learning_rate": 0.00015364639519619439, "loss": 1.4205, "step": 17848 }, { "epoch": 0.23193961435442292, "grad_norm": 0.3513713777065277, "learning_rate": 0.000153643795734283, "loss": 1.5263, "step": 17849 }, { "epoch": 0.2319526088983388, "grad_norm": 0.42762911319732666, "learning_rate": 0.00015364119627237163, "loss": 1.3243, "step": 17850 }, { "epoch": 0.23196560344225467, "grad_norm": 0.3481937348842621, "learning_rate": 0.00015363859681046023, "loss": 1.2704, "step": 17851 }, { "epoch": 0.23197859798617057, "grad_norm": 0.3869515359401703, "learning_rate": 0.00015363599734854886, "loss": 1.6046, "step": 17852 }, { "epoch": 0.23199159253008644, "grad_norm": 0.4191778004169464, "learning_rate": 0.00015363339788663745, "loss": 1.517, "step": 17853 }, { "epoch": 0.23200458707400232, "grad_norm": 0.3250775635242462, "learning_rate": 0.0001536307984247261, "loss": 1.3247, "step": 17854 }, { "epoch": 0.2320175816179182, "grad_norm": 0.41848769783973694, "learning_rate": 0.0001536281989628147, "loss": 1.2927, "step": 17855 }, { "epoch": 0.23203057616183406, "grad_norm": 0.3696177899837494, "learning_rate": 0.00015362559950090333, "loss": 1.4659, "step": 17856 }, { "epoch": 0.23204357070574994, "grad_norm": 0.33975252509117126, "learning_rate": 0.00015362300003899192, "loss": 1.2339, "step": 17857 }, { "epoch": 0.2320565652496658, "grad_norm": 0.4539058804512024, "learning_rate": 0.00015362040057708055, "loss": 1.5023, "step": 17858 }, { "epoch": 0.23206955979358168, "grad_norm": 0.44186097383499146, "learning_rate": 0.00015361780111516917, "loss": 1.4831, "step": 17859 }, { "epoch": 0.23208255433749755, "grad_norm": 0.38855695724487305, "learning_rate": 0.00015361520165325777, "loss": 1.4965, "step": 17860 }, { "epoch": 0.23209554888141343, "grad_norm": 0.4631621539592743, "learning_rate": 0.0001536126021913464, "loss": 1.4102, "step": 17861 }, { "epoch": 0.2321085434253293, "grad_norm": 0.39363187551498413, "learning_rate": 0.00015361000272943502, "loss": 1.4716, "step": 17862 }, { "epoch": 0.23212153796924517, "grad_norm": 0.24282239377498627, "learning_rate": 0.00015360740326752364, "loss": 1.2962, "step": 17863 }, { "epoch": 0.23213453251316105, "grad_norm": 0.3153238594532013, "learning_rate": 0.00015360480380561224, "loss": 1.4539, "step": 17864 }, { "epoch": 0.23214752705707692, "grad_norm": 0.3699541687965393, "learning_rate": 0.00015360220434370084, "loss": 1.4895, "step": 17865 }, { "epoch": 0.2321605216009928, "grad_norm": 0.3706502914428711, "learning_rate": 0.0001535996048817895, "loss": 1.371, "step": 17866 }, { "epoch": 0.23217351614490866, "grad_norm": 0.3946920335292816, "learning_rate": 0.0001535970054198781, "loss": 1.4146, "step": 17867 }, { "epoch": 0.23218651068882454, "grad_norm": 0.2880970537662506, "learning_rate": 0.0001535944059579667, "loss": 1.1588, "step": 17868 }, { "epoch": 0.2321995052327404, "grad_norm": 0.3923853635787964, "learning_rate": 0.00015359180649605534, "loss": 1.197, "step": 17869 }, { "epoch": 0.23221249977665628, "grad_norm": 0.4441952705383301, "learning_rate": 0.00015358920703414393, "loss": 1.5486, "step": 17870 }, { "epoch": 0.23222549432057216, "grad_norm": 0.38266226649284363, "learning_rate": 0.00015358660757223256, "loss": 1.4285, "step": 17871 }, { "epoch": 0.23223848886448803, "grad_norm": 0.38642239570617676, "learning_rate": 0.00015358400811032116, "loss": 1.39, "step": 17872 }, { "epoch": 0.2322514834084039, "grad_norm": 0.3625488579273224, "learning_rate": 0.0001535814086484098, "loss": 1.2688, "step": 17873 }, { "epoch": 0.23226447795231978, "grad_norm": 0.39898431301116943, "learning_rate": 0.0001535788091864984, "loss": 1.6059, "step": 17874 }, { "epoch": 0.23227747249623565, "grad_norm": 0.4230211675167084, "learning_rate": 0.00015357620972458703, "loss": 1.3784, "step": 17875 }, { "epoch": 0.23229046704015152, "grad_norm": 0.4505596458911896, "learning_rate": 0.00015357361026267563, "loss": 1.3411, "step": 17876 }, { "epoch": 0.2323034615840674, "grad_norm": 0.3540389835834503, "learning_rate": 0.00015357101080076425, "loss": 1.5181, "step": 17877 }, { "epoch": 0.23231645612798327, "grad_norm": 0.43678078055381775, "learning_rate": 0.00015356841133885288, "loss": 1.5223, "step": 17878 }, { "epoch": 0.23232945067189914, "grad_norm": 0.47371378540992737, "learning_rate": 0.00015356581187694147, "loss": 1.5503, "step": 17879 }, { "epoch": 0.232342445215815, "grad_norm": 0.4325070381164551, "learning_rate": 0.0001535632124150301, "loss": 1.2801, "step": 17880 }, { "epoch": 0.2323554397597309, "grad_norm": 0.4451591968536377, "learning_rate": 0.00015356061295311872, "loss": 1.4762, "step": 17881 }, { "epoch": 0.23236843430364676, "grad_norm": 0.4021493196487427, "learning_rate": 0.00015355801349120732, "loss": 1.4245, "step": 17882 }, { "epoch": 0.23238142884756263, "grad_norm": 0.3905608057975769, "learning_rate": 0.00015355541402929594, "loss": 1.4166, "step": 17883 }, { "epoch": 0.2323944233914785, "grad_norm": 0.42811280488967896, "learning_rate": 0.00015355281456738454, "loss": 1.4641, "step": 17884 }, { "epoch": 0.23240741793539438, "grad_norm": 0.4082183539867401, "learning_rate": 0.0001535502151054732, "loss": 1.4691, "step": 17885 }, { "epoch": 0.23242041247931025, "grad_norm": 0.303067684173584, "learning_rate": 0.0001535476156435618, "loss": 1.3537, "step": 17886 }, { "epoch": 0.23243340702322612, "grad_norm": 0.38688594102859497, "learning_rate": 0.00015354501618165042, "loss": 1.48, "step": 17887 }, { "epoch": 0.232446401567142, "grad_norm": 0.341637521982193, "learning_rate": 0.000153542416719739, "loss": 1.3024, "step": 17888 }, { "epoch": 0.23245939611105787, "grad_norm": 0.31752997636795044, "learning_rate": 0.00015353981725782764, "loss": 1.2497, "step": 17889 }, { "epoch": 0.23247239065497374, "grad_norm": 0.471500426530838, "learning_rate": 0.00015353721779591626, "loss": 1.5345, "step": 17890 }, { "epoch": 0.23248538519888962, "grad_norm": 0.28118324279785156, "learning_rate": 0.00015353461833400486, "loss": 1.46, "step": 17891 }, { "epoch": 0.2324983797428055, "grad_norm": 0.39630448818206787, "learning_rate": 0.00015353201887209348, "loss": 1.5243, "step": 17892 }, { "epoch": 0.23251137428672136, "grad_norm": 0.4049227833747864, "learning_rate": 0.0001535294194101821, "loss": 1.4443, "step": 17893 }, { "epoch": 0.23252436883063723, "grad_norm": 0.42329803109169006, "learning_rate": 0.0001535268199482707, "loss": 1.5795, "step": 17894 }, { "epoch": 0.2325373633745531, "grad_norm": 0.5804991722106934, "learning_rate": 0.00015352422048635933, "loss": 1.3983, "step": 17895 }, { "epoch": 0.23255035791846898, "grad_norm": 0.436396062374115, "learning_rate": 0.00015352162102444793, "loss": 1.4881, "step": 17896 }, { "epoch": 0.23256335246238485, "grad_norm": 0.4165444076061249, "learning_rate": 0.00015351902156253658, "loss": 1.2902, "step": 17897 }, { "epoch": 0.23257634700630073, "grad_norm": 0.36505982279777527, "learning_rate": 0.00015351642210062518, "loss": 1.43, "step": 17898 }, { "epoch": 0.2325893415502166, "grad_norm": 0.4033183157444, "learning_rate": 0.0001535138226387138, "loss": 1.267, "step": 17899 }, { "epoch": 0.23260233609413247, "grad_norm": 0.37419673800468445, "learning_rate": 0.0001535112231768024, "loss": 1.3773, "step": 17900 }, { "epoch": 0.23261533063804835, "grad_norm": 0.39067843556404114, "learning_rate": 0.00015350862371489102, "loss": 1.3313, "step": 17901 }, { "epoch": 0.23262832518196422, "grad_norm": 0.3143513798713684, "learning_rate": 0.00015350602425297965, "loss": 1.2085, "step": 17902 }, { "epoch": 0.2326413197258801, "grad_norm": 0.30038923025131226, "learning_rate": 0.00015350342479106824, "loss": 1.4399, "step": 17903 }, { "epoch": 0.23265431426979596, "grad_norm": 0.42759689688682556, "learning_rate": 0.00015350082532915687, "loss": 1.4828, "step": 17904 }, { "epoch": 0.23266730881371184, "grad_norm": 0.3189608156681061, "learning_rate": 0.0001534982258672455, "loss": 1.3064, "step": 17905 }, { "epoch": 0.2326803033576277, "grad_norm": 0.43306416273117065, "learning_rate": 0.0001534956264053341, "loss": 1.4608, "step": 17906 }, { "epoch": 0.23269329790154358, "grad_norm": 0.3596252202987671, "learning_rate": 0.00015349302694342272, "loss": 1.5733, "step": 17907 }, { "epoch": 0.23270629244545946, "grad_norm": 0.39456039667129517, "learning_rate": 0.00015349042748151134, "loss": 1.5159, "step": 17908 }, { "epoch": 0.23271928698937533, "grad_norm": 0.4954872131347656, "learning_rate": 0.00015348782801959996, "loss": 1.3347, "step": 17909 }, { "epoch": 0.2327322815332912, "grad_norm": 0.36924368143081665, "learning_rate": 0.00015348522855768856, "loss": 1.6782, "step": 17910 }, { "epoch": 0.23274527607720708, "grad_norm": 0.4221276044845581, "learning_rate": 0.00015348262909577719, "loss": 1.4704, "step": 17911 }, { "epoch": 0.23275827062112295, "grad_norm": 0.3313046395778656, "learning_rate": 0.0001534800296338658, "loss": 1.1531, "step": 17912 }, { "epoch": 0.23277126516503882, "grad_norm": 0.3749324381351471, "learning_rate": 0.0001534774301719544, "loss": 1.3079, "step": 17913 }, { "epoch": 0.2327842597089547, "grad_norm": 0.3325458765029907, "learning_rate": 0.00015347483071004303, "loss": 1.3079, "step": 17914 }, { "epoch": 0.23279725425287057, "grad_norm": 0.3753267228603363, "learning_rate": 0.00015347223124813163, "loss": 1.5559, "step": 17915 }, { "epoch": 0.23281024879678644, "grad_norm": 0.3746005892753601, "learning_rate": 0.00015346963178622028, "loss": 1.4584, "step": 17916 }, { "epoch": 0.2328232433407023, "grad_norm": 0.4311268627643585, "learning_rate": 0.00015346703232430888, "loss": 1.5771, "step": 17917 }, { "epoch": 0.23283623788461819, "grad_norm": 0.40249913930892944, "learning_rate": 0.00015346443286239748, "loss": 1.5888, "step": 17918 }, { "epoch": 0.23284923242853406, "grad_norm": 0.4716746509075165, "learning_rate": 0.0001534618334004861, "loss": 1.4667, "step": 17919 }, { "epoch": 0.23286222697244993, "grad_norm": 0.375625342130661, "learning_rate": 0.00015345923393857472, "loss": 1.2394, "step": 17920 }, { "epoch": 0.2328752215163658, "grad_norm": 0.37376120686531067, "learning_rate": 0.00015345663447666335, "loss": 1.5129, "step": 17921 }, { "epoch": 0.23288821606028168, "grad_norm": 0.37111878395080566, "learning_rate": 0.00015345403501475195, "loss": 1.3276, "step": 17922 }, { "epoch": 0.23290121060419755, "grad_norm": 0.35338693857192993, "learning_rate": 0.00015345143555284057, "loss": 1.4121, "step": 17923 }, { "epoch": 0.23291420514811342, "grad_norm": 0.4400428831577301, "learning_rate": 0.0001534488360909292, "loss": 1.4532, "step": 17924 }, { "epoch": 0.2329271996920293, "grad_norm": 0.37598493695259094, "learning_rate": 0.0001534462366290178, "loss": 1.3349, "step": 17925 }, { "epoch": 0.23294019423594517, "grad_norm": 0.3390032947063446, "learning_rate": 0.00015344363716710642, "loss": 1.6361, "step": 17926 }, { "epoch": 0.23295318877986104, "grad_norm": 0.44953683018684387, "learning_rate": 0.00015344103770519501, "loss": 1.5049, "step": 17927 }, { "epoch": 0.23296618332377692, "grad_norm": 0.5058104991912842, "learning_rate": 0.00015343843824328367, "loss": 1.3093, "step": 17928 }, { "epoch": 0.23297917786769282, "grad_norm": 0.38602083921432495, "learning_rate": 0.00015343583878137226, "loss": 1.3341, "step": 17929 }, { "epoch": 0.2329921724116087, "grad_norm": 0.38048407435417175, "learning_rate": 0.0001534332393194609, "loss": 1.3511, "step": 17930 }, { "epoch": 0.23300516695552456, "grad_norm": 0.34763115644454956, "learning_rate": 0.00015343063985754949, "loss": 1.4245, "step": 17931 }, { "epoch": 0.23301816149944043, "grad_norm": 0.41458916664123535, "learning_rate": 0.0001534280403956381, "loss": 1.4254, "step": 17932 }, { "epoch": 0.2330311560433563, "grad_norm": 0.39238467812538147, "learning_rate": 0.00015342544093372673, "loss": 1.4751, "step": 17933 }, { "epoch": 0.23304415058727218, "grad_norm": 0.4726831912994385, "learning_rate": 0.00015342284147181533, "loss": 1.5654, "step": 17934 }, { "epoch": 0.23305714513118805, "grad_norm": 0.44743531942367554, "learning_rate": 0.00015342024200990396, "loss": 1.2952, "step": 17935 }, { "epoch": 0.23307013967510393, "grad_norm": 0.49141213297843933, "learning_rate": 0.00015341764254799258, "loss": 1.5189, "step": 17936 }, { "epoch": 0.2330831342190198, "grad_norm": 0.31094464659690857, "learning_rate": 0.00015341504308608118, "loss": 1.2981, "step": 17937 }, { "epoch": 0.23309612876293567, "grad_norm": 0.3795747756958008, "learning_rate": 0.0001534124436241698, "loss": 1.2674, "step": 17938 }, { "epoch": 0.23310912330685155, "grad_norm": 0.39798861742019653, "learning_rate": 0.0001534098441622584, "loss": 1.333, "step": 17939 }, { "epoch": 0.23312211785076742, "grad_norm": 0.35945409536361694, "learning_rate": 0.00015340724470034705, "loss": 1.421, "step": 17940 }, { "epoch": 0.2331351123946833, "grad_norm": 0.471318781375885, "learning_rate": 0.00015340464523843565, "loss": 1.5116, "step": 17941 }, { "epoch": 0.23314810693859916, "grad_norm": 0.32836174964904785, "learning_rate": 0.00015340204577652427, "loss": 1.4417, "step": 17942 }, { "epoch": 0.23316110148251504, "grad_norm": 0.34531545639038086, "learning_rate": 0.0001533994463146129, "loss": 1.3299, "step": 17943 }, { "epoch": 0.2331740960264309, "grad_norm": 0.3809370994567871, "learning_rate": 0.0001533968468527015, "loss": 1.2998, "step": 17944 }, { "epoch": 0.23318709057034678, "grad_norm": 0.46564140915870667, "learning_rate": 0.00015339424739079012, "loss": 1.6028, "step": 17945 }, { "epoch": 0.23320008511426266, "grad_norm": 0.2764657735824585, "learning_rate": 0.00015339164792887872, "loss": 1.2578, "step": 17946 }, { "epoch": 0.23321307965817853, "grad_norm": 0.4108177721500397, "learning_rate": 0.00015338904846696734, "loss": 1.4521, "step": 17947 }, { "epoch": 0.2332260742020944, "grad_norm": 0.44228336215019226, "learning_rate": 0.00015338644900505597, "loss": 1.4632, "step": 17948 }, { "epoch": 0.23323906874601028, "grad_norm": 0.40309804677963257, "learning_rate": 0.00015338384954314456, "loss": 1.514, "step": 17949 }, { "epoch": 0.23325206328992615, "grad_norm": 0.3982924818992615, "learning_rate": 0.0001533812500812332, "loss": 1.4437, "step": 17950 }, { "epoch": 0.23326505783384202, "grad_norm": 0.5114452838897705, "learning_rate": 0.0001533786506193218, "loss": 1.585, "step": 17951 }, { "epoch": 0.2332780523777579, "grad_norm": 0.42855343222618103, "learning_rate": 0.00015337605115741044, "loss": 1.4228, "step": 17952 }, { "epoch": 0.23329104692167377, "grad_norm": 0.2871570289134979, "learning_rate": 0.00015337345169549903, "loss": 1.2664, "step": 17953 }, { "epoch": 0.23330404146558964, "grad_norm": 0.3602854311466217, "learning_rate": 0.00015337085223358766, "loss": 1.4117, "step": 17954 }, { "epoch": 0.2333170360095055, "grad_norm": 0.3426908850669861, "learning_rate": 0.00015336825277167628, "loss": 1.4184, "step": 17955 }, { "epoch": 0.23333003055342139, "grad_norm": 0.38484346866607666, "learning_rate": 0.00015336565330976488, "loss": 1.4802, "step": 17956 }, { "epoch": 0.23334302509733726, "grad_norm": 0.3409084379673004, "learning_rate": 0.0001533630538478535, "loss": 1.4181, "step": 17957 }, { "epoch": 0.23335601964125313, "grad_norm": 0.3370325565338135, "learning_rate": 0.0001533604543859421, "loss": 1.3947, "step": 17958 }, { "epoch": 0.233369014185169, "grad_norm": 0.4113130271434784, "learning_rate": 0.00015335785492403075, "loss": 1.4426, "step": 17959 }, { "epoch": 0.23338200872908488, "grad_norm": 0.4372863471508026, "learning_rate": 0.00015335525546211935, "loss": 1.5363, "step": 17960 }, { "epoch": 0.23339500327300075, "grad_norm": 0.39516639709472656, "learning_rate": 0.00015335265600020795, "loss": 1.4497, "step": 17961 }, { "epoch": 0.23340799781691662, "grad_norm": 0.5111863613128662, "learning_rate": 0.00015335005653829657, "loss": 1.4849, "step": 17962 }, { "epoch": 0.2334209923608325, "grad_norm": 0.4778505861759186, "learning_rate": 0.0001533474570763852, "loss": 1.3552, "step": 17963 }, { "epoch": 0.23343398690474837, "grad_norm": 0.44522005319595337, "learning_rate": 0.00015334485761447382, "loss": 1.309, "step": 17964 }, { "epoch": 0.23344698144866424, "grad_norm": 0.37539568543434143, "learning_rate": 0.00015334225815256242, "loss": 1.4853, "step": 17965 }, { "epoch": 0.23345997599258012, "grad_norm": 0.35811930894851685, "learning_rate": 0.00015333965869065104, "loss": 1.5613, "step": 17966 }, { "epoch": 0.233472970536496, "grad_norm": 0.40717077255249023, "learning_rate": 0.00015333705922873967, "loss": 1.617, "step": 17967 }, { "epoch": 0.23348596508041186, "grad_norm": 0.423179030418396, "learning_rate": 0.00015333445976682827, "loss": 1.4066, "step": 17968 }, { "epoch": 0.23349895962432773, "grad_norm": 0.3956672251224518, "learning_rate": 0.0001533318603049169, "loss": 1.2867, "step": 17969 }, { "epoch": 0.2335119541682436, "grad_norm": 0.4276334345340729, "learning_rate": 0.0001533292608430055, "loss": 1.3287, "step": 17970 }, { "epoch": 0.23352494871215948, "grad_norm": 0.36300331354141235, "learning_rate": 0.00015332666138109414, "loss": 1.3658, "step": 17971 }, { "epoch": 0.23353794325607535, "grad_norm": 0.4081609547138214, "learning_rate": 0.00015332406191918274, "loss": 1.4147, "step": 17972 }, { "epoch": 0.23355093779999123, "grad_norm": 0.3212282657623291, "learning_rate": 0.00015332146245727133, "loss": 1.3882, "step": 17973 }, { "epoch": 0.2335639323439071, "grad_norm": 0.45553895831108093, "learning_rate": 0.00015331886299535996, "loss": 1.6364, "step": 17974 }, { "epoch": 0.23357692688782297, "grad_norm": 0.3248056471347809, "learning_rate": 0.00015331626353344858, "loss": 1.3687, "step": 17975 }, { "epoch": 0.23358992143173885, "grad_norm": 0.46040576696395874, "learning_rate": 0.0001533136640715372, "loss": 1.6193, "step": 17976 }, { "epoch": 0.23360291597565472, "grad_norm": 0.2937815487384796, "learning_rate": 0.0001533110646096258, "loss": 1.3843, "step": 17977 }, { "epoch": 0.2336159105195706, "grad_norm": 0.34830930829048157, "learning_rate": 0.00015330846514771443, "loss": 1.3201, "step": 17978 }, { "epoch": 0.23362890506348646, "grad_norm": 0.3946056365966797, "learning_rate": 0.00015330586568580305, "loss": 1.1903, "step": 17979 }, { "epoch": 0.23364189960740234, "grad_norm": 0.37340110540390015, "learning_rate": 0.00015330326622389165, "loss": 1.3991, "step": 17980 }, { "epoch": 0.2336548941513182, "grad_norm": 0.4689640402793884, "learning_rate": 0.00015330066676198028, "loss": 1.5045, "step": 17981 }, { "epoch": 0.23366788869523408, "grad_norm": 0.40675726532936096, "learning_rate": 0.0001532980673000689, "loss": 1.3037, "step": 17982 }, { "epoch": 0.23368088323914996, "grad_norm": 0.45732080936431885, "learning_rate": 0.00015329546783815753, "loss": 1.6636, "step": 17983 }, { "epoch": 0.23369387778306583, "grad_norm": 0.4354645013809204, "learning_rate": 0.00015329286837624612, "loss": 1.459, "step": 17984 }, { "epoch": 0.2337068723269817, "grad_norm": 0.38458874821662903, "learning_rate": 0.00015329026891433475, "loss": 1.3473, "step": 17985 }, { "epoch": 0.23371986687089757, "grad_norm": 0.4326435625553131, "learning_rate": 0.00015328766945242337, "loss": 1.5688, "step": 17986 }, { "epoch": 0.23373286141481345, "grad_norm": 0.39050623774528503, "learning_rate": 0.00015328506999051197, "loss": 1.4113, "step": 17987 }, { "epoch": 0.23374585595872932, "grad_norm": 0.3956562578678131, "learning_rate": 0.0001532824705286006, "loss": 1.471, "step": 17988 }, { "epoch": 0.2337588505026452, "grad_norm": 0.37935376167297363, "learning_rate": 0.0001532798710666892, "loss": 1.4289, "step": 17989 }, { "epoch": 0.23377184504656107, "grad_norm": 0.39070311188697815, "learning_rate": 0.00015327727160477782, "loss": 1.2734, "step": 17990 }, { "epoch": 0.23378483959047694, "grad_norm": 0.3776244819164276, "learning_rate": 0.00015327467214286644, "loss": 1.308, "step": 17991 }, { "epoch": 0.2337978341343928, "grad_norm": 0.40933501720428467, "learning_rate": 0.00015327207268095504, "loss": 1.5812, "step": 17992 }, { "epoch": 0.23381082867830869, "grad_norm": 0.3220255374908447, "learning_rate": 0.00015326947321904366, "loss": 1.3327, "step": 17993 }, { "epoch": 0.23382382322222456, "grad_norm": 0.35636258125305176, "learning_rate": 0.00015326687375713229, "loss": 1.3992, "step": 17994 }, { "epoch": 0.23383681776614043, "grad_norm": 0.3924807608127594, "learning_rate": 0.0001532642742952209, "loss": 1.4874, "step": 17995 }, { "epoch": 0.2338498123100563, "grad_norm": 0.3293445408344269, "learning_rate": 0.0001532616748333095, "loss": 1.3592, "step": 17996 }, { "epoch": 0.23386280685397218, "grad_norm": 0.40680330991744995, "learning_rate": 0.00015325907537139813, "loss": 1.5609, "step": 17997 }, { "epoch": 0.23387580139788805, "grad_norm": 0.4301876425743103, "learning_rate": 0.00015325647590948676, "loss": 1.516, "step": 17998 }, { "epoch": 0.23388879594180392, "grad_norm": 0.5076674222946167, "learning_rate": 0.00015325387644757535, "loss": 1.3165, "step": 17999 }, { "epoch": 0.2339017904857198, "grad_norm": 0.41786855459213257, "learning_rate": 0.00015325127698566398, "loss": 1.3622, "step": 18000 }, { "epoch": 0.23391478502963567, "grad_norm": 0.32593899965286255, "learning_rate": 0.00015324867752375258, "loss": 1.1442, "step": 18001 }, { "epoch": 0.23392777957355154, "grad_norm": 0.47393733263015747, "learning_rate": 0.0001532460780618412, "loss": 1.6408, "step": 18002 }, { "epoch": 0.23394077411746741, "grad_norm": 0.36780720949172974, "learning_rate": 0.00015324347859992983, "loss": 1.4241, "step": 18003 }, { "epoch": 0.2339537686613833, "grad_norm": 0.3852415978908539, "learning_rate": 0.00015324087913801842, "loss": 1.6291, "step": 18004 }, { "epoch": 0.2339667632052992, "grad_norm": 0.43215811252593994, "learning_rate": 0.00015323827967610705, "loss": 1.5221, "step": 18005 }, { "epoch": 0.23397975774921506, "grad_norm": 0.33725112676620483, "learning_rate": 0.00015323568021419567, "loss": 1.5029, "step": 18006 }, { "epoch": 0.23399275229313093, "grad_norm": 0.39836397767066956, "learning_rate": 0.0001532330807522843, "loss": 1.4686, "step": 18007 }, { "epoch": 0.2340057468370468, "grad_norm": 0.2320425808429718, "learning_rate": 0.0001532304812903729, "loss": 1.4181, "step": 18008 }, { "epoch": 0.23401874138096268, "grad_norm": 0.41020092368125916, "learning_rate": 0.00015322788182846152, "loss": 1.2584, "step": 18009 }, { "epoch": 0.23403173592487855, "grad_norm": 0.3727324306964874, "learning_rate": 0.00015322528236655014, "loss": 1.3973, "step": 18010 }, { "epoch": 0.23404473046879443, "grad_norm": 0.2463207244873047, "learning_rate": 0.00015322268290463874, "loss": 1.3373, "step": 18011 }, { "epoch": 0.2340577250127103, "grad_norm": 0.32143476605415344, "learning_rate": 0.00015322008344272736, "loss": 1.4462, "step": 18012 }, { "epoch": 0.23407071955662617, "grad_norm": 0.4097207486629486, "learning_rate": 0.00015321748398081596, "loss": 1.5051, "step": 18013 }, { "epoch": 0.23408371410054205, "grad_norm": 0.33502137660980225, "learning_rate": 0.0001532148845189046, "loss": 1.3054, "step": 18014 }, { "epoch": 0.23409670864445792, "grad_norm": 0.3422829508781433, "learning_rate": 0.0001532122850569932, "loss": 1.3622, "step": 18015 }, { "epoch": 0.2341097031883738, "grad_norm": 0.3967907726764679, "learning_rate": 0.0001532096855950818, "loss": 1.5501, "step": 18016 }, { "epoch": 0.23412269773228966, "grad_norm": 0.41888493299484253, "learning_rate": 0.00015320708613317046, "loss": 1.365, "step": 18017 }, { "epoch": 0.23413569227620554, "grad_norm": 0.5179020762443542, "learning_rate": 0.00015320448667125906, "loss": 1.5825, "step": 18018 }, { "epoch": 0.2341486868201214, "grad_norm": 0.5036913156509399, "learning_rate": 0.00015320188720934768, "loss": 1.5466, "step": 18019 }, { "epoch": 0.23416168136403728, "grad_norm": 0.30901363492012024, "learning_rate": 0.00015319928774743628, "loss": 1.3966, "step": 18020 }, { "epoch": 0.23417467590795316, "grad_norm": 0.4030865728855133, "learning_rate": 0.0001531966882855249, "loss": 1.3442, "step": 18021 }, { "epoch": 0.23418767045186903, "grad_norm": 0.360787570476532, "learning_rate": 0.00015319408882361353, "loss": 1.4201, "step": 18022 }, { "epoch": 0.2342006649957849, "grad_norm": 0.37237903475761414, "learning_rate": 0.00015319148936170213, "loss": 1.3517, "step": 18023 }, { "epoch": 0.23421365953970077, "grad_norm": 0.4541695713996887, "learning_rate": 0.00015318888989979075, "loss": 1.4075, "step": 18024 }, { "epoch": 0.23422665408361665, "grad_norm": 0.47659939527511597, "learning_rate": 0.00015318629043787937, "loss": 1.4612, "step": 18025 }, { "epoch": 0.23423964862753252, "grad_norm": 0.422150194644928, "learning_rate": 0.000153183690975968, "loss": 1.2813, "step": 18026 }, { "epoch": 0.2342526431714484, "grad_norm": 0.3242901563644409, "learning_rate": 0.0001531810915140566, "loss": 1.3177, "step": 18027 }, { "epoch": 0.23426563771536427, "grad_norm": 0.3061369061470032, "learning_rate": 0.0001531784920521452, "loss": 1.2185, "step": 18028 }, { "epoch": 0.23427863225928014, "grad_norm": 0.31887730956077576, "learning_rate": 0.00015317589259023384, "loss": 1.3763, "step": 18029 }, { "epoch": 0.234291626803196, "grad_norm": 0.39775481820106506, "learning_rate": 0.00015317329312832244, "loss": 1.4063, "step": 18030 }, { "epoch": 0.23430462134711189, "grad_norm": 0.2797357738018036, "learning_rate": 0.00015317069366641107, "loss": 1.4146, "step": 18031 }, { "epoch": 0.23431761589102776, "grad_norm": 0.43941476941108704, "learning_rate": 0.00015316809420449966, "loss": 1.2305, "step": 18032 }, { "epoch": 0.23433061043494363, "grad_norm": 0.9685315489768982, "learning_rate": 0.0001531654947425883, "loss": 1.479, "step": 18033 }, { "epoch": 0.2343436049788595, "grad_norm": 0.39319655299186707, "learning_rate": 0.0001531628952806769, "loss": 1.4624, "step": 18034 }, { "epoch": 0.23435659952277538, "grad_norm": 0.4018360674381256, "learning_rate": 0.0001531602958187655, "loss": 1.4521, "step": 18035 }, { "epoch": 0.23436959406669125, "grad_norm": 0.32737624645233154, "learning_rate": 0.00015315769635685414, "loss": 1.3044, "step": 18036 }, { "epoch": 0.23438258861060712, "grad_norm": 0.3900229334831238, "learning_rate": 0.00015315509689494276, "loss": 1.4614, "step": 18037 }, { "epoch": 0.234395583154523, "grad_norm": 0.4148141145706177, "learning_rate": 0.00015315249743303138, "loss": 1.3859, "step": 18038 }, { "epoch": 0.23440857769843887, "grad_norm": 0.40642696619033813, "learning_rate": 0.00015314989797111998, "loss": 1.411, "step": 18039 }, { "epoch": 0.23442157224235474, "grad_norm": 0.47283414006233215, "learning_rate": 0.00015314729850920858, "loss": 1.5534, "step": 18040 }, { "epoch": 0.23443456678627062, "grad_norm": 0.41187629103660583, "learning_rate": 0.00015314469904729723, "loss": 1.5191, "step": 18041 }, { "epoch": 0.2344475613301865, "grad_norm": 0.3797478973865509, "learning_rate": 0.00015314209958538583, "loss": 1.4271, "step": 18042 }, { "epoch": 0.23446055587410236, "grad_norm": 0.402601957321167, "learning_rate": 0.00015313950012347445, "loss": 1.4774, "step": 18043 }, { "epoch": 0.23447355041801823, "grad_norm": 0.4224216938018799, "learning_rate": 0.00015313690066156305, "loss": 1.5172, "step": 18044 }, { "epoch": 0.2344865449619341, "grad_norm": 0.39689821004867554, "learning_rate": 0.00015313430119965167, "loss": 1.2662, "step": 18045 }, { "epoch": 0.23449953950584998, "grad_norm": 0.5979065895080566, "learning_rate": 0.0001531317017377403, "loss": 1.4197, "step": 18046 }, { "epoch": 0.23451253404976585, "grad_norm": 0.35663479566574097, "learning_rate": 0.0001531291022758289, "loss": 1.5123, "step": 18047 }, { "epoch": 0.23452552859368173, "grad_norm": 0.33819663524627686, "learning_rate": 0.00015312650281391752, "loss": 1.4088, "step": 18048 }, { "epoch": 0.2345385231375976, "grad_norm": 0.3434363603591919, "learning_rate": 0.00015312390335200614, "loss": 1.2682, "step": 18049 }, { "epoch": 0.23455151768151347, "grad_norm": 0.3235607445240021, "learning_rate": 0.00015312130389009477, "loss": 1.4834, "step": 18050 }, { "epoch": 0.23456451222542934, "grad_norm": 0.4239378571510315, "learning_rate": 0.00015311870442818337, "loss": 1.515, "step": 18051 }, { "epoch": 0.23457750676934522, "grad_norm": 0.4866640567779541, "learning_rate": 0.000153116104966272, "loss": 1.524, "step": 18052 }, { "epoch": 0.2345905013132611, "grad_norm": 0.40287330746650696, "learning_rate": 0.00015311350550436062, "loss": 1.3636, "step": 18053 }, { "epoch": 0.23460349585717696, "grad_norm": 0.3488408029079437, "learning_rate": 0.0001531109060424492, "loss": 1.239, "step": 18054 }, { "epoch": 0.23461649040109284, "grad_norm": 0.4170578718185425, "learning_rate": 0.00015310830658053784, "loss": 1.6802, "step": 18055 }, { "epoch": 0.2346294849450087, "grad_norm": 0.3394927382469177, "learning_rate": 0.00015310570711862646, "loss": 1.4157, "step": 18056 }, { "epoch": 0.23464247948892458, "grad_norm": 0.4658181071281433, "learning_rate": 0.00015310310765671506, "loss": 1.3043, "step": 18057 }, { "epoch": 0.23465547403284046, "grad_norm": 0.35588932037353516, "learning_rate": 0.00015310050819480368, "loss": 1.4884, "step": 18058 }, { "epoch": 0.23466846857675633, "grad_norm": 0.3373064696788788, "learning_rate": 0.00015309790873289228, "loss": 1.4317, "step": 18059 }, { "epoch": 0.2346814631206722, "grad_norm": 0.40685445070266724, "learning_rate": 0.00015309530927098093, "loss": 1.3801, "step": 18060 }, { "epoch": 0.23469445766458807, "grad_norm": 0.5385668873786926, "learning_rate": 0.00015309270980906953, "loss": 1.398, "step": 18061 }, { "epoch": 0.23470745220850395, "grad_norm": 0.40858256816864014, "learning_rate": 0.00015309011034715815, "loss": 1.3456, "step": 18062 }, { "epoch": 0.23472044675241982, "grad_norm": 0.31402289867401123, "learning_rate": 0.00015308751088524675, "loss": 1.3194, "step": 18063 }, { "epoch": 0.2347334412963357, "grad_norm": 0.381795197725296, "learning_rate": 0.00015308491142333538, "loss": 1.4004, "step": 18064 }, { "epoch": 0.23474643584025157, "grad_norm": 0.4572577476501465, "learning_rate": 0.000153082311961424, "loss": 1.4242, "step": 18065 }, { "epoch": 0.23475943038416744, "grad_norm": 0.37868568301200867, "learning_rate": 0.0001530797124995126, "loss": 1.4439, "step": 18066 }, { "epoch": 0.2347724249280833, "grad_norm": 0.36918380856513977, "learning_rate": 0.00015307711303760122, "loss": 1.2055, "step": 18067 }, { "epoch": 0.23478541947199918, "grad_norm": 0.30491912364959717, "learning_rate": 0.00015307451357568985, "loss": 1.2899, "step": 18068 }, { "epoch": 0.23479841401591506, "grad_norm": 0.34524771571159363, "learning_rate": 0.00015307191411377847, "loss": 1.4236, "step": 18069 }, { "epoch": 0.23481140855983093, "grad_norm": 0.3788608908653259, "learning_rate": 0.00015306931465186707, "loss": 1.4725, "step": 18070 }, { "epoch": 0.2348244031037468, "grad_norm": 0.38002508878707886, "learning_rate": 0.00015306671518995567, "loss": 1.2142, "step": 18071 }, { "epoch": 0.23483739764766268, "grad_norm": 0.36435046792030334, "learning_rate": 0.00015306411572804432, "loss": 1.4132, "step": 18072 }, { "epoch": 0.23485039219157855, "grad_norm": 0.37841206789016724, "learning_rate": 0.00015306151626613292, "loss": 1.372, "step": 18073 }, { "epoch": 0.23486338673549442, "grad_norm": 0.3763261139392853, "learning_rate": 0.00015305891680422154, "loss": 1.3355, "step": 18074 }, { "epoch": 0.2348763812794103, "grad_norm": 0.4358110725879669, "learning_rate": 0.00015305631734231014, "loss": 1.3339, "step": 18075 }, { "epoch": 0.23488937582332617, "grad_norm": 0.49096590280532837, "learning_rate": 0.00015305371788039876, "loss": 1.5661, "step": 18076 }, { "epoch": 0.23490237036724204, "grad_norm": 0.4548513889312744, "learning_rate": 0.0001530511184184874, "loss": 1.5205, "step": 18077 }, { "epoch": 0.23491536491115791, "grad_norm": 0.45075923204421997, "learning_rate": 0.00015304851895657598, "loss": 1.3969, "step": 18078 }, { "epoch": 0.2349283594550738, "grad_norm": 0.3555245101451874, "learning_rate": 0.0001530459194946646, "loss": 1.447, "step": 18079 }, { "epoch": 0.23494135399898966, "grad_norm": 0.34753239154815674, "learning_rate": 0.00015304332003275323, "loss": 1.512, "step": 18080 }, { "epoch": 0.23495434854290556, "grad_norm": 0.37124064564704895, "learning_rate": 0.00015304072057084186, "loss": 1.3088, "step": 18081 }, { "epoch": 0.23496734308682143, "grad_norm": 0.4854525029659271, "learning_rate": 0.00015303812110893045, "loss": 1.4232, "step": 18082 }, { "epoch": 0.2349803376307373, "grad_norm": 0.5026360154151917, "learning_rate": 0.00015303552164701905, "loss": 1.5394, "step": 18083 }, { "epoch": 0.23499333217465318, "grad_norm": 0.3670816421508789, "learning_rate": 0.0001530329221851077, "loss": 1.2812, "step": 18084 }, { "epoch": 0.23500632671856905, "grad_norm": 0.39980369806289673, "learning_rate": 0.0001530303227231963, "loss": 1.5397, "step": 18085 }, { "epoch": 0.23501932126248493, "grad_norm": 0.35663512349128723, "learning_rate": 0.00015302772326128493, "loss": 1.431, "step": 18086 }, { "epoch": 0.2350323158064008, "grad_norm": 0.37008699774742126, "learning_rate": 0.00015302512379937352, "loss": 1.5248, "step": 18087 }, { "epoch": 0.23504531035031667, "grad_norm": 0.4221617877483368, "learning_rate": 0.00015302252433746215, "loss": 1.4101, "step": 18088 }, { "epoch": 0.23505830489423254, "grad_norm": 0.4563870429992676, "learning_rate": 0.00015301992487555077, "loss": 1.3585, "step": 18089 }, { "epoch": 0.23507129943814842, "grad_norm": 0.3603486716747284, "learning_rate": 0.00015301732541363937, "loss": 1.2289, "step": 18090 }, { "epoch": 0.2350842939820643, "grad_norm": 0.40538015961647034, "learning_rate": 0.00015301472595172802, "loss": 1.5078, "step": 18091 }, { "epoch": 0.23509728852598016, "grad_norm": 0.28498125076293945, "learning_rate": 0.00015301212648981662, "loss": 1.2793, "step": 18092 }, { "epoch": 0.23511028306989604, "grad_norm": 0.44566699862480164, "learning_rate": 0.00015300952702790524, "loss": 1.4666, "step": 18093 }, { "epoch": 0.2351232776138119, "grad_norm": 0.4075450897216797, "learning_rate": 0.00015300692756599384, "loss": 1.3643, "step": 18094 }, { "epoch": 0.23513627215772778, "grad_norm": 0.35127031803131104, "learning_rate": 0.00015300432810408246, "loss": 1.246, "step": 18095 }, { "epoch": 0.23514926670164366, "grad_norm": 0.4481108486652374, "learning_rate": 0.0001530017286421711, "loss": 1.3677, "step": 18096 }, { "epoch": 0.23516226124555953, "grad_norm": 0.36416977643966675, "learning_rate": 0.0001529991291802597, "loss": 1.1969, "step": 18097 }, { "epoch": 0.2351752557894754, "grad_norm": 0.4587027132511139, "learning_rate": 0.0001529965297183483, "loss": 1.4056, "step": 18098 }, { "epoch": 0.23518825033339127, "grad_norm": 0.3319268822669983, "learning_rate": 0.00015299393025643694, "loss": 1.3583, "step": 18099 }, { "epoch": 0.23520124487730715, "grad_norm": 0.36130058765411377, "learning_rate": 0.00015299133079452553, "loss": 1.2937, "step": 18100 }, { "epoch": 0.23521423942122302, "grad_norm": 0.48869195580482483, "learning_rate": 0.00015298873133261416, "loss": 1.3722, "step": 18101 }, { "epoch": 0.2352272339651389, "grad_norm": 0.29547956585884094, "learning_rate": 0.00015298613187070275, "loss": 1.2454, "step": 18102 }, { "epoch": 0.23524022850905477, "grad_norm": 0.3617570996284485, "learning_rate": 0.0001529835324087914, "loss": 1.4587, "step": 18103 }, { "epoch": 0.23525322305297064, "grad_norm": 0.3721075654029846, "learning_rate": 0.00015298093294688, "loss": 1.2906, "step": 18104 }, { "epoch": 0.2352662175968865, "grad_norm": 0.45762911438941956, "learning_rate": 0.00015297833348496863, "loss": 1.1893, "step": 18105 }, { "epoch": 0.23527921214080239, "grad_norm": 0.34379497170448303, "learning_rate": 0.00015297573402305723, "loss": 1.4006, "step": 18106 }, { "epoch": 0.23529220668471826, "grad_norm": 0.33811071515083313, "learning_rate": 0.00015297313456114585, "loss": 1.3711, "step": 18107 }, { "epoch": 0.23530520122863413, "grad_norm": 0.40029510855674744, "learning_rate": 0.00015297053509923447, "loss": 1.4282, "step": 18108 }, { "epoch": 0.23531819577255, "grad_norm": 0.41995954513549805, "learning_rate": 0.00015296793563732307, "loss": 1.3516, "step": 18109 }, { "epoch": 0.23533119031646588, "grad_norm": 0.483599990606308, "learning_rate": 0.0001529653361754117, "loss": 1.4617, "step": 18110 }, { "epoch": 0.23534418486038175, "grad_norm": 0.3936520218849182, "learning_rate": 0.00015296273671350032, "loss": 1.4211, "step": 18111 }, { "epoch": 0.23535717940429762, "grad_norm": 0.44347572326660156, "learning_rate": 0.00015296013725158892, "loss": 1.4651, "step": 18112 }, { "epoch": 0.2353701739482135, "grad_norm": 0.45416751503944397, "learning_rate": 0.00015295753778967754, "loss": 1.5017, "step": 18113 }, { "epoch": 0.23538316849212937, "grad_norm": 0.3818269670009613, "learning_rate": 0.00015295493832776614, "loss": 1.3149, "step": 18114 }, { "epoch": 0.23539616303604524, "grad_norm": 0.2547667622566223, "learning_rate": 0.0001529523388658548, "loss": 1.4288, "step": 18115 }, { "epoch": 0.23540915757996111, "grad_norm": 0.43630170822143555, "learning_rate": 0.0001529497394039434, "loss": 1.4479, "step": 18116 }, { "epoch": 0.235422152123877, "grad_norm": 0.4467422068119049, "learning_rate": 0.00015294713994203201, "loss": 1.2504, "step": 18117 }, { "epoch": 0.23543514666779286, "grad_norm": 0.4496765434741974, "learning_rate": 0.0001529445404801206, "loss": 1.5589, "step": 18118 }, { "epoch": 0.23544814121170873, "grad_norm": 0.4187292754650116, "learning_rate": 0.00015294194101820924, "loss": 1.2564, "step": 18119 }, { "epoch": 0.2354611357556246, "grad_norm": 0.4326245188713074, "learning_rate": 0.00015293934155629786, "loss": 1.3895, "step": 18120 }, { "epoch": 0.23547413029954048, "grad_norm": 0.3660198748111725, "learning_rate": 0.00015293674209438646, "loss": 1.4438, "step": 18121 }, { "epoch": 0.23548712484345635, "grad_norm": 0.34446752071380615, "learning_rate": 0.00015293414263247508, "loss": 1.3791, "step": 18122 }, { "epoch": 0.23550011938737223, "grad_norm": 0.31882914900779724, "learning_rate": 0.0001529315431705637, "loss": 1.2146, "step": 18123 }, { "epoch": 0.2355131139312881, "grad_norm": 0.3847213685512543, "learning_rate": 0.0001529289437086523, "loss": 1.3098, "step": 18124 }, { "epoch": 0.23552610847520397, "grad_norm": 0.32543617486953735, "learning_rate": 0.00015292634424674093, "loss": 1.2413, "step": 18125 }, { "epoch": 0.23553910301911984, "grad_norm": 0.3916413187980652, "learning_rate": 0.00015292374478482953, "loss": 1.5142, "step": 18126 }, { "epoch": 0.23555209756303572, "grad_norm": 0.34882864356040955, "learning_rate": 0.00015292114532291818, "loss": 1.4549, "step": 18127 }, { "epoch": 0.2355650921069516, "grad_norm": 0.4583916664123535, "learning_rate": 0.00015291854586100677, "loss": 1.4873, "step": 18128 }, { "epoch": 0.23557808665086746, "grad_norm": 0.42106905579566956, "learning_rate": 0.0001529159463990954, "loss": 1.3655, "step": 18129 }, { "epoch": 0.23559108119478334, "grad_norm": 0.3971590995788574, "learning_rate": 0.00015291334693718402, "loss": 1.7991, "step": 18130 }, { "epoch": 0.2356040757386992, "grad_norm": 0.4282810389995575, "learning_rate": 0.00015291074747527262, "loss": 1.4536, "step": 18131 }, { "epoch": 0.23561707028261508, "grad_norm": 0.3702646791934967, "learning_rate": 0.00015290814801336125, "loss": 1.3402, "step": 18132 }, { "epoch": 0.23563006482653095, "grad_norm": 0.35790005326271057, "learning_rate": 0.00015290554855144984, "loss": 1.2635, "step": 18133 }, { "epoch": 0.23564305937044683, "grad_norm": 0.4585866928100586, "learning_rate": 0.0001529029490895385, "loss": 1.5563, "step": 18134 }, { "epoch": 0.2356560539143627, "grad_norm": 0.3552546501159668, "learning_rate": 0.0001529003496276271, "loss": 1.4165, "step": 18135 }, { "epoch": 0.23566904845827857, "grad_norm": 0.479915589094162, "learning_rate": 0.00015289775016571572, "loss": 1.4247, "step": 18136 }, { "epoch": 0.23568204300219445, "grad_norm": 0.37789207696914673, "learning_rate": 0.0001528951507038043, "loss": 1.4213, "step": 18137 }, { "epoch": 0.23569503754611032, "grad_norm": 0.3848220407962799, "learning_rate": 0.00015289255124189294, "loss": 1.3665, "step": 18138 }, { "epoch": 0.2357080320900262, "grad_norm": 0.3396890461444855, "learning_rate": 0.00015288995177998156, "loss": 1.3181, "step": 18139 }, { "epoch": 0.23572102663394207, "grad_norm": 0.42892786860466003, "learning_rate": 0.00015288735231807016, "loss": 1.4398, "step": 18140 }, { "epoch": 0.23573402117785794, "grad_norm": 0.4097321927547455, "learning_rate": 0.00015288475285615878, "loss": 1.4484, "step": 18141 }, { "epoch": 0.2357470157217738, "grad_norm": 0.42936643958091736, "learning_rate": 0.0001528821533942474, "loss": 1.2688, "step": 18142 }, { "epoch": 0.23576001026568968, "grad_norm": 0.4336777925491333, "learning_rate": 0.000152879553932336, "loss": 1.3498, "step": 18143 }, { "epoch": 0.23577300480960556, "grad_norm": 0.4130055010318756, "learning_rate": 0.00015287695447042463, "loss": 1.3498, "step": 18144 }, { "epoch": 0.23578599935352143, "grad_norm": 0.34807726740837097, "learning_rate": 0.00015287435500851323, "loss": 1.4249, "step": 18145 }, { "epoch": 0.2357989938974373, "grad_norm": 0.3700559139251709, "learning_rate": 0.00015287175554660188, "loss": 1.526, "step": 18146 }, { "epoch": 0.23581198844135318, "grad_norm": 0.46488526463508606, "learning_rate": 0.00015286915608469048, "loss": 1.5499, "step": 18147 }, { "epoch": 0.23582498298526905, "grad_norm": 0.3845004737377167, "learning_rate": 0.0001528665566227791, "loss": 1.4191, "step": 18148 }, { "epoch": 0.23583797752918492, "grad_norm": 0.3803398013114929, "learning_rate": 0.0001528639571608677, "loss": 1.5062, "step": 18149 }, { "epoch": 0.2358509720731008, "grad_norm": 0.48074230551719666, "learning_rate": 0.00015286135769895632, "loss": 1.4756, "step": 18150 }, { "epoch": 0.23586396661701667, "grad_norm": 0.47667598724365234, "learning_rate": 0.00015285875823704495, "loss": 1.4859, "step": 18151 }, { "epoch": 0.23587696116093254, "grad_norm": 0.5368838906288147, "learning_rate": 0.00015285615877513355, "loss": 1.4254, "step": 18152 }, { "epoch": 0.23588995570484841, "grad_norm": 0.4175216257572174, "learning_rate": 0.00015285355931322217, "loss": 1.69, "step": 18153 }, { "epoch": 0.2359029502487643, "grad_norm": 0.47914665937423706, "learning_rate": 0.0001528509598513108, "loss": 1.8011, "step": 18154 }, { "epoch": 0.23591594479268016, "grad_norm": 0.4637472331523895, "learning_rate": 0.0001528483603893994, "loss": 1.2943, "step": 18155 }, { "epoch": 0.23592893933659603, "grad_norm": 0.39966654777526855, "learning_rate": 0.00015284576092748802, "loss": 1.5315, "step": 18156 }, { "epoch": 0.23594193388051193, "grad_norm": 0.27806514501571655, "learning_rate": 0.0001528431614655766, "loss": 1.3058, "step": 18157 }, { "epoch": 0.2359549284244278, "grad_norm": 0.4609842598438263, "learning_rate": 0.00015284056200366527, "loss": 1.421, "step": 18158 }, { "epoch": 0.23596792296834368, "grad_norm": 0.3547143042087555, "learning_rate": 0.00015283796254175386, "loss": 1.1357, "step": 18159 }, { "epoch": 0.23598091751225955, "grad_norm": 0.355663001537323, "learning_rate": 0.0001528353630798425, "loss": 1.334, "step": 18160 }, { "epoch": 0.23599391205617543, "grad_norm": 0.5052456259727478, "learning_rate": 0.00015283276361793108, "loss": 1.6809, "step": 18161 }, { "epoch": 0.2360069066000913, "grad_norm": 0.4380223751068115, "learning_rate": 0.0001528301641560197, "loss": 1.2919, "step": 18162 }, { "epoch": 0.23601990114400717, "grad_norm": 0.3678244948387146, "learning_rate": 0.00015282756469410833, "loss": 1.3556, "step": 18163 }, { "epoch": 0.23603289568792304, "grad_norm": 0.3166038691997528, "learning_rate": 0.00015282496523219693, "loss": 1.3418, "step": 18164 }, { "epoch": 0.23604589023183892, "grad_norm": 0.420303076505661, "learning_rate": 0.00015282236577028556, "loss": 1.3637, "step": 18165 }, { "epoch": 0.2360588847757548, "grad_norm": 0.35121268033981323, "learning_rate": 0.00015281976630837418, "loss": 1.4739, "step": 18166 }, { "epoch": 0.23607187931967066, "grad_norm": 0.2775443494319916, "learning_rate": 0.00015281716684646278, "loss": 1.2545, "step": 18167 }, { "epoch": 0.23608487386358654, "grad_norm": 0.45077207684516907, "learning_rate": 0.0001528145673845514, "loss": 1.3224, "step": 18168 }, { "epoch": 0.2360978684075024, "grad_norm": 0.377049058675766, "learning_rate": 0.00015281196792264003, "loss": 1.5797, "step": 18169 }, { "epoch": 0.23611086295141828, "grad_norm": 0.4229763448238373, "learning_rate": 0.00015280936846072865, "loss": 1.6762, "step": 18170 }, { "epoch": 0.23612385749533416, "grad_norm": 0.5033701062202454, "learning_rate": 0.00015280676899881725, "loss": 1.4265, "step": 18171 }, { "epoch": 0.23613685203925003, "grad_norm": 0.3585719168186188, "learning_rate": 0.00015280416953690587, "loss": 1.3259, "step": 18172 }, { "epoch": 0.2361498465831659, "grad_norm": 0.3322538733482361, "learning_rate": 0.0001528015700749945, "loss": 1.5847, "step": 18173 }, { "epoch": 0.23616284112708177, "grad_norm": 0.4248094856739044, "learning_rate": 0.0001527989706130831, "loss": 1.3984, "step": 18174 }, { "epoch": 0.23617583567099765, "grad_norm": 0.30544593930244446, "learning_rate": 0.00015279637115117172, "loss": 1.3828, "step": 18175 }, { "epoch": 0.23618883021491352, "grad_norm": 0.37814876437187195, "learning_rate": 0.00015279377168926032, "loss": 1.4739, "step": 18176 }, { "epoch": 0.2362018247588294, "grad_norm": 0.3748113512992859, "learning_rate": 0.00015279117222734897, "loss": 1.4219, "step": 18177 }, { "epoch": 0.23621481930274527, "grad_norm": 0.4318793714046478, "learning_rate": 0.00015278857276543756, "loss": 1.4285, "step": 18178 }, { "epoch": 0.23622781384666114, "grad_norm": 0.574853241443634, "learning_rate": 0.00015278597330352616, "loss": 1.3646, "step": 18179 }, { "epoch": 0.236240808390577, "grad_norm": 0.3363092839717865, "learning_rate": 0.0001527833738416148, "loss": 1.2765, "step": 18180 }, { "epoch": 0.23625380293449288, "grad_norm": 0.5701215267181396, "learning_rate": 0.0001527807743797034, "loss": 1.516, "step": 18181 }, { "epoch": 0.23626679747840876, "grad_norm": 0.41120481491088867, "learning_rate": 0.00015277817491779204, "loss": 1.348, "step": 18182 }, { "epoch": 0.23627979202232463, "grad_norm": 0.4616166353225708, "learning_rate": 0.00015277557545588063, "loss": 1.674, "step": 18183 }, { "epoch": 0.2362927865662405, "grad_norm": 0.3635489344596863, "learning_rate": 0.00015277297599396926, "loss": 1.3864, "step": 18184 }, { "epoch": 0.23630578111015638, "grad_norm": 0.37966054677963257, "learning_rate": 0.00015277037653205788, "loss": 1.2714, "step": 18185 }, { "epoch": 0.23631877565407225, "grad_norm": 0.37608158588409424, "learning_rate": 0.00015276777707014648, "loss": 1.3796, "step": 18186 }, { "epoch": 0.23633177019798812, "grad_norm": 0.37520378828048706, "learning_rate": 0.0001527651776082351, "loss": 1.3243, "step": 18187 }, { "epoch": 0.236344764741904, "grad_norm": 0.4705045819282532, "learning_rate": 0.0001527625781463237, "loss": 1.4067, "step": 18188 }, { "epoch": 0.23635775928581987, "grad_norm": 0.38363322615623474, "learning_rate": 0.00015275997868441235, "loss": 1.34, "step": 18189 }, { "epoch": 0.23637075382973574, "grad_norm": 0.415509968996048, "learning_rate": 0.00015275737922250095, "loss": 1.4282, "step": 18190 }, { "epoch": 0.23638374837365161, "grad_norm": 0.4897744953632355, "learning_rate": 0.00015275477976058957, "loss": 1.4026, "step": 18191 }, { "epoch": 0.2363967429175675, "grad_norm": 0.3456737995147705, "learning_rate": 0.00015275218029867817, "loss": 1.3776, "step": 18192 }, { "epoch": 0.23640973746148336, "grad_norm": 0.2571519911289215, "learning_rate": 0.0001527495808367668, "loss": 1.2256, "step": 18193 }, { "epoch": 0.23642273200539923, "grad_norm": 0.36385491490364075, "learning_rate": 0.00015274698137485542, "loss": 1.4353, "step": 18194 }, { "epoch": 0.2364357265493151, "grad_norm": 0.39608821272850037, "learning_rate": 0.00015274438191294402, "loss": 1.4698, "step": 18195 }, { "epoch": 0.23644872109323098, "grad_norm": 0.35390350222587585, "learning_rate": 0.00015274178245103264, "loss": 1.3155, "step": 18196 }, { "epoch": 0.23646171563714685, "grad_norm": 0.334207147359848, "learning_rate": 0.00015273918298912127, "loss": 1.6021, "step": 18197 }, { "epoch": 0.23647471018106273, "grad_norm": 0.3848886489868164, "learning_rate": 0.00015273658352720986, "loss": 1.5115, "step": 18198 }, { "epoch": 0.2364877047249786, "grad_norm": 0.38187968730926514, "learning_rate": 0.0001527339840652985, "loss": 1.2702, "step": 18199 }, { "epoch": 0.23650069926889447, "grad_norm": 0.2943970263004303, "learning_rate": 0.0001527313846033871, "loss": 1.2392, "step": 18200 }, { "epoch": 0.23651369381281034, "grad_norm": 0.5097970366477966, "learning_rate": 0.00015272878514147574, "loss": 1.5173, "step": 18201 }, { "epoch": 0.23652668835672622, "grad_norm": 0.3958309292793274, "learning_rate": 0.00015272618567956434, "loss": 1.6729, "step": 18202 }, { "epoch": 0.2365396829006421, "grad_norm": 0.39633217453956604, "learning_rate": 0.00015272358621765296, "loss": 1.4933, "step": 18203 }, { "epoch": 0.23655267744455796, "grad_norm": 0.42337802052497864, "learning_rate": 0.00015272098675574158, "loss": 1.6307, "step": 18204 }, { "epoch": 0.23656567198847384, "grad_norm": 0.37842652201652527, "learning_rate": 0.00015271838729383018, "loss": 1.5237, "step": 18205 }, { "epoch": 0.2365786665323897, "grad_norm": 0.410157173871994, "learning_rate": 0.0001527157878319188, "loss": 1.3664, "step": 18206 }, { "epoch": 0.23659166107630558, "grad_norm": 0.2758738100528717, "learning_rate": 0.0001527131883700074, "loss": 1.1969, "step": 18207 }, { "epoch": 0.23660465562022145, "grad_norm": 0.32757776975631714, "learning_rate": 0.00015271058890809603, "loss": 1.2699, "step": 18208 }, { "epoch": 0.23661765016413733, "grad_norm": 0.5655000805854797, "learning_rate": 0.00015270798944618465, "loss": 1.4511, "step": 18209 }, { "epoch": 0.2366306447080532, "grad_norm": 0.4192947745323181, "learning_rate": 0.00015270538998427325, "loss": 1.5025, "step": 18210 }, { "epoch": 0.23664363925196907, "grad_norm": 0.43690788745880127, "learning_rate": 0.00015270279052236187, "loss": 1.5444, "step": 18211 }, { "epoch": 0.23665663379588495, "grad_norm": 0.4251912534236908, "learning_rate": 0.0001527001910604505, "loss": 1.4209, "step": 18212 }, { "epoch": 0.23666962833980082, "grad_norm": 0.38708940148353577, "learning_rate": 0.00015269759159853912, "loss": 1.3019, "step": 18213 }, { "epoch": 0.2366826228837167, "grad_norm": 0.34309008717536926, "learning_rate": 0.00015269499213662772, "loss": 1.3288, "step": 18214 }, { "epoch": 0.23669561742763257, "grad_norm": 0.39441609382629395, "learning_rate": 0.00015269239267471635, "loss": 1.4494, "step": 18215 }, { "epoch": 0.23670861197154844, "grad_norm": 0.4112738072872162, "learning_rate": 0.00015268979321280497, "loss": 1.4111, "step": 18216 }, { "epoch": 0.2367216065154643, "grad_norm": 0.43523266911506653, "learning_rate": 0.00015268719375089357, "loss": 1.2338, "step": 18217 }, { "epoch": 0.23673460105938018, "grad_norm": 0.4263109266757965, "learning_rate": 0.0001526845942889822, "loss": 1.218, "step": 18218 }, { "epoch": 0.23674759560329606, "grad_norm": 0.3401554226875305, "learning_rate": 0.0001526819948270708, "loss": 1.3865, "step": 18219 }, { "epoch": 0.23676059014721193, "grad_norm": 0.42171597480773926, "learning_rate": 0.00015267939536515944, "loss": 1.3249, "step": 18220 }, { "epoch": 0.2367735846911278, "grad_norm": 0.4004462659358978, "learning_rate": 0.00015267679590324804, "loss": 1.4731, "step": 18221 }, { "epoch": 0.23678657923504368, "grad_norm": 0.3541699945926666, "learning_rate": 0.00015267419644133664, "loss": 1.3885, "step": 18222 }, { "epoch": 0.23679957377895955, "grad_norm": 0.3860013782978058, "learning_rate": 0.00015267159697942526, "loss": 1.5273, "step": 18223 }, { "epoch": 0.23681256832287542, "grad_norm": 0.4005851149559021, "learning_rate": 0.00015266899751751388, "loss": 1.5126, "step": 18224 }, { "epoch": 0.2368255628667913, "grad_norm": 0.3217337131500244, "learning_rate": 0.0001526663980556025, "loss": 1.2865, "step": 18225 }, { "epoch": 0.23683855741070717, "grad_norm": 0.2983606457710266, "learning_rate": 0.0001526637985936911, "loss": 1.1833, "step": 18226 }, { "epoch": 0.23685155195462304, "grad_norm": 0.40271472930908203, "learning_rate": 0.00015266119913177973, "loss": 1.4144, "step": 18227 }, { "epoch": 0.2368645464985389, "grad_norm": 0.455280601978302, "learning_rate": 0.00015265859966986836, "loss": 1.4037, "step": 18228 }, { "epoch": 0.2368775410424548, "grad_norm": 0.47944316267967224, "learning_rate": 0.00015265600020795695, "loss": 1.5018, "step": 18229 }, { "epoch": 0.23689053558637066, "grad_norm": 0.39556461572647095, "learning_rate": 0.00015265340074604558, "loss": 1.5045, "step": 18230 }, { "epoch": 0.23690353013028653, "grad_norm": 0.3195951282978058, "learning_rate": 0.00015265080128413417, "loss": 1.1856, "step": 18231 }, { "epoch": 0.2369165246742024, "grad_norm": 0.33018478751182556, "learning_rate": 0.00015264820182222283, "loss": 1.3831, "step": 18232 }, { "epoch": 0.2369295192181183, "grad_norm": 0.2715296149253845, "learning_rate": 0.00015264560236031142, "loss": 1.3369, "step": 18233 }, { "epoch": 0.23694251376203418, "grad_norm": 0.40880000591278076, "learning_rate": 0.00015264300289840002, "loss": 1.5024, "step": 18234 }, { "epoch": 0.23695550830595005, "grad_norm": 0.39754101634025574, "learning_rate": 0.00015264040343648865, "loss": 1.431, "step": 18235 }, { "epoch": 0.23696850284986593, "grad_norm": 0.34071943163871765, "learning_rate": 0.00015263780397457727, "loss": 1.3663, "step": 18236 }, { "epoch": 0.2369814973937818, "grad_norm": 0.4209735691547394, "learning_rate": 0.0001526352045126659, "loss": 1.4971, "step": 18237 }, { "epoch": 0.23699449193769767, "grad_norm": 0.4041473865509033, "learning_rate": 0.0001526326050507545, "loss": 1.3777, "step": 18238 }, { "epoch": 0.23700748648161354, "grad_norm": 0.3437062203884125, "learning_rate": 0.00015263000558884312, "loss": 1.5667, "step": 18239 }, { "epoch": 0.23702048102552942, "grad_norm": 0.4070585072040558, "learning_rate": 0.00015262740612693174, "loss": 1.3817, "step": 18240 }, { "epoch": 0.2370334755694453, "grad_norm": 0.3852291405200958, "learning_rate": 0.00015262480666502034, "loss": 1.3261, "step": 18241 }, { "epoch": 0.23704647011336116, "grad_norm": 0.35380423069000244, "learning_rate": 0.00015262220720310896, "loss": 1.4057, "step": 18242 }, { "epoch": 0.23705946465727704, "grad_norm": 0.4682086706161499, "learning_rate": 0.0001526196077411976, "loss": 1.5476, "step": 18243 }, { "epoch": 0.2370724592011929, "grad_norm": 0.49074384570121765, "learning_rate": 0.0001526170082792862, "loss": 1.3677, "step": 18244 }, { "epoch": 0.23708545374510878, "grad_norm": 0.37268275022506714, "learning_rate": 0.0001526144088173748, "loss": 1.1758, "step": 18245 }, { "epoch": 0.23709844828902465, "grad_norm": 0.35650113224983215, "learning_rate": 0.0001526118093554634, "loss": 1.3764, "step": 18246 }, { "epoch": 0.23711144283294053, "grad_norm": 0.35127323865890503, "learning_rate": 0.00015260920989355206, "loss": 1.2237, "step": 18247 }, { "epoch": 0.2371244373768564, "grad_norm": 0.40921294689178467, "learning_rate": 0.00015260661043164066, "loss": 1.4648, "step": 18248 }, { "epoch": 0.23713743192077227, "grad_norm": 0.3901052176952362, "learning_rate": 0.00015260401096972928, "loss": 1.3654, "step": 18249 }, { "epoch": 0.23715042646468815, "grad_norm": 0.35409045219421387, "learning_rate": 0.00015260141150781788, "loss": 1.3752, "step": 18250 }, { "epoch": 0.23716342100860402, "grad_norm": 0.438001424074173, "learning_rate": 0.0001525988120459065, "loss": 1.3889, "step": 18251 }, { "epoch": 0.2371764155525199, "grad_norm": 0.38508597016334534, "learning_rate": 0.00015259621258399513, "loss": 1.4102, "step": 18252 }, { "epoch": 0.23718941009643577, "grad_norm": 0.4647999703884125, "learning_rate": 0.00015259361312208372, "loss": 1.3686, "step": 18253 }, { "epoch": 0.23720240464035164, "grad_norm": 0.4336076080799103, "learning_rate": 0.00015259101366017235, "loss": 1.4542, "step": 18254 }, { "epoch": 0.2372153991842675, "grad_norm": 0.6248377561569214, "learning_rate": 0.00015258841419826097, "loss": 1.6398, "step": 18255 }, { "epoch": 0.23722839372818338, "grad_norm": 0.4756849408149719, "learning_rate": 0.0001525858147363496, "loss": 1.4641, "step": 18256 }, { "epoch": 0.23724138827209926, "grad_norm": 0.44121065735816956, "learning_rate": 0.0001525832152744382, "loss": 1.4335, "step": 18257 }, { "epoch": 0.23725438281601513, "grad_norm": 0.43411433696746826, "learning_rate": 0.00015258061581252682, "loss": 1.5405, "step": 18258 }, { "epoch": 0.237267377359931, "grad_norm": 0.42824992537498474, "learning_rate": 0.00015257801635061544, "loss": 1.309, "step": 18259 }, { "epoch": 0.23728037190384688, "grad_norm": 0.3974263072013855, "learning_rate": 0.00015257541688870404, "loss": 1.3186, "step": 18260 }, { "epoch": 0.23729336644776275, "grad_norm": 0.391261488199234, "learning_rate": 0.00015257281742679267, "loss": 1.3448, "step": 18261 }, { "epoch": 0.23730636099167862, "grad_norm": 0.37573957443237305, "learning_rate": 0.00015257021796488126, "loss": 1.441, "step": 18262 }, { "epoch": 0.2373193555355945, "grad_norm": 0.38714393973350525, "learning_rate": 0.0001525676185029699, "loss": 1.3944, "step": 18263 }, { "epoch": 0.23733235007951037, "grad_norm": 0.3637382686138153, "learning_rate": 0.0001525650190410585, "loss": 1.2501, "step": 18264 }, { "epoch": 0.23734534462342624, "grad_norm": 0.43220192193984985, "learning_rate": 0.0001525624195791471, "loss": 1.372, "step": 18265 }, { "epoch": 0.23735833916734211, "grad_norm": 0.38607120513916016, "learning_rate": 0.00015255982011723573, "loss": 1.3484, "step": 18266 }, { "epoch": 0.237371333711258, "grad_norm": 0.43534281849861145, "learning_rate": 0.00015255722065532436, "loss": 1.5282, "step": 18267 }, { "epoch": 0.23738432825517386, "grad_norm": 0.4245777726173401, "learning_rate": 0.00015255462119341298, "loss": 1.4235, "step": 18268 }, { "epoch": 0.23739732279908973, "grad_norm": 0.4120676517486572, "learning_rate": 0.00015255202173150158, "loss": 1.4388, "step": 18269 }, { "epoch": 0.2374103173430056, "grad_norm": 0.275453120470047, "learning_rate": 0.0001525494222695902, "loss": 1.3444, "step": 18270 }, { "epoch": 0.23742331188692148, "grad_norm": 0.34658634662628174, "learning_rate": 0.00015254682280767883, "loss": 1.2568, "step": 18271 }, { "epoch": 0.23743630643083735, "grad_norm": 0.5128961801528931, "learning_rate": 0.00015254422334576743, "loss": 1.4373, "step": 18272 }, { "epoch": 0.23744930097475322, "grad_norm": 0.3994189202785492, "learning_rate": 0.00015254162388385605, "loss": 1.2695, "step": 18273 }, { "epoch": 0.2374622955186691, "grad_norm": 0.3061051666736603, "learning_rate": 0.00015253902442194465, "loss": 1.4212, "step": 18274 }, { "epoch": 0.23747529006258497, "grad_norm": 0.3993454873561859, "learning_rate": 0.0001525364249600333, "loss": 1.516, "step": 18275 }, { "epoch": 0.23748828460650084, "grad_norm": 0.4022742211818695, "learning_rate": 0.0001525338254981219, "loss": 1.3363, "step": 18276 }, { "epoch": 0.23750127915041672, "grad_norm": 0.42900004982948303, "learning_rate": 0.0001525312260362105, "loss": 1.3001, "step": 18277 }, { "epoch": 0.2375142736943326, "grad_norm": 0.40140804648399353, "learning_rate": 0.00015252862657429915, "loss": 1.4237, "step": 18278 }, { "epoch": 0.23752726823824846, "grad_norm": 0.36197608709335327, "learning_rate": 0.00015252602711238774, "loss": 1.49, "step": 18279 }, { "epoch": 0.23754026278216434, "grad_norm": 0.4287504553794861, "learning_rate": 0.00015252342765047637, "loss": 1.5939, "step": 18280 }, { "epoch": 0.2375532573260802, "grad_norm": 0.4833097457885742, "learning_rate": 0.00015252082818856497, "loss": 1.3941, "step": 18281 }, { "epoch": 0.23756625186999608, "grad_norm": 0.41275930404663086, "learning_rate": 0.0001525182287266536, "loss": 1.5896, "step": 18282 }, { "epoch": 0.23757924641391195, "grad_norm": 0.4740753769874573, "learning_rate": 0.00015251562926474221, "loss": 1.4735, "step": 18283 }, { "epoch": 0.23759224095782783, "grad_norm": 0.36063116788864136, "learning_rate": 0.0001525130298028308, "loss": 1.475, "step": 18284 }, { "epoch": 0.2376052355017437, "grad_norm": 0.34388068318367004, "learning_rate": 0.00015251043034091944, "loss": 1.3601, "step": 18285 }, { "epoch": 0.23761823004565957, "grad_norm": 0.37087368965148926, "learning_rate": 0.00015250783087900806, "loss": 1.387, "step": 18286 }, { "epoch": 0.23763122458957545, "grad_norm": 0.37153133749961853, "learning_rate": 0.00015250523141709669, "loss": 1.3265, "step": 18287 }, { "epoch": 0.23764421913349132, "grad_norm": 0.37546223402023315, "learning_rate": 0.00015250263195518528, "loss": 1.4727, "step": 18288 }, { "epoch": 0.2376572136774072, "grad_norm": 0.41121768951416016, "learning_rate": 0.00015250003249327388, "loss": 1.5391, "step": 18289 }, { "epoch": 0.23767020822132306, "grad_norm": 0.35365819931030273, "learning_rate": 0.00015249743303136253, "loss": 1.4179, "step": 18290 }, { "epoch": 0.23768320276523894, "grad_norm": 0.4539684057235718, "learning_rate": 0.00015249483356945113, "loss": 1.4743, "step": 18291 }, { "epoch": 0.2376961973091548, "grad_norm": 0.361674964427948, "learning_rate": 0.00015249223410753975, "loss": 1.4407, "step": 18292 }, { "epoch": 0.23770919185307068, "grad_norm": 0.269663006067276, "learning_rate": 0.00015248963464562835, "loss": 1.2026, "step": 18293 }, { "epoch": 0.23772218639698656, "grad_norm": 0.4076891541481018, "learning_rate": 0.00015248703518371698, "loss": 1.3339, "step": 18294 }, { "epoch": 0.23773518094090243, "grad_norm": 0.3721511662006378, "learning_rate": 0.0001524844357218056, "loss": 1.311, "step": 18295 }, { "epoch": 0.2377481754848183, "grad_norm": 0.3775753974914551, "learning_rate": 0.0001524818362598942, "loss": 1.4413, "step": 18296 }, { "epoch": 0.23776117002873418, "grad_norm": 0.3262495696544647, "learning_rate": 0.00015247923679798282, "loss": 1.2478, "step": 18297 }, { "epoch": 0.23777416457265005, "grad_norm": 0.4048340916633606, "learning_rate": 0.00015247663733607145, "loss": 1.3213, "step": 18298 }, { "epoch": 0.23778715911656592, "grad_norm": 0.5252066850662231, "learning_rate": 0.00015247403787416007, "loss": 1.3677, "step": 18299 }, { "epoch": 0.2378001536604818, "grad_norm": 0.41798362135887146, "learning_rate": 0.00015247143841224867, "loss": 1.4824, "step": 18300 }, { "epoch": 0.23781314820439767, "grad_norm": 0.34314557909965515, "learning_rate": 0.00015246883895033727, "loss": 1.3602, "step": 18301 }, { "epoch": 0.23782614274831354, "grad_norm": 0.3864387571811676, "learning_rate": 0.00015246623948842592, "loss": 1.3383, "step": 18302 }, { "epoch": 0.2378391372922294, "grad_norm": 0.40235865116119385, "learning_rate": 0.00015246364002651451, "loss": 1.3264, "step": 18303 }, { "epoch": 0.2378521318361453, "grad_norm": 0.4304735064506531, "learning_rate": 0.00015246104056460314, "loss": 1.5047, "step": 18304 }, { "epoch": 0.23786512638006116, "grad_norm": 0.31614983081817627, "learning_rate": 0.00015245844110269174, "loss": 1.3543, "step": 18305 }, { "epoch": 0.23787812092397703, "grad_norm": 0.27633342146873474, "learning_rate": 0.00015245584164078036, "loss": 1.3811, "step": 18306 }, { "epoch": 0.2378911154678929, "grad_norm": 0.3414054214954376, "learning_rate": 0.00015245324217886899, "loss": 1.2454, "step": 18307 }, { "epoch": 0.23790411001180878, "grad_norm": 0.36108309030532837, "learning_rate": 0.00015245064271695758, "loss": 1.4144, "step": 18308 }, { "epoch": 0.23791710455572468, "grad_norm": 0.43713364005088806, "learning_rate": 0.0001524480432550462, "loss": 1.503, "step": 18309 }, { "epoch": 0.23793009909964055, "grad_norm": 0.46419891715049744, "learning_rate": 0.00015244544379313483, "loss": 1.4029, "step": 18310 }, { "epoch": 0.23794309364355642, "grad_norm": 0.39231863617897034, "learning_rate": 0.00015244284433122346, "loss": 1.4094, "step": 18311 }, { "epoch": 0.2379560881874723, "grad_norm": 0.3756033778190613, "learning_rate": 0.00015244024486931205, "loss": 1.3828, "step": 18312 }, { "epoch": 0.23796908273138817, "grad_norm": 0.5546630620956421, "learning_rate": 0.00015243764540740068, "loss": 1.5738, "step": 18313 }, { "epoch": 0.23798207727530404, "grad_norm": 0.4147547483444214, "learning_rate": 0.0001524350459454893, "loss": 1.2955, "step": 18314 }, { "epoch": 0.23799507181921992, "grad_norm": 0.43519827723503113, "learning_rate": 0.0001524324464835779, "loss": 1.6257, "step": 18315 }, { "epoch": 0.2380080663631358, "grad_norm": 0.3643302917480469, "learning_rate": 0.00015242984702166652, "loss": 1.0968, "step": 18316 }, { "epoch": 0.23802106090705166, "grad_norm": 0.3854471743106842, "learning_rate": 0.00015242724755975515, "loss": 1.5254, "step": 18317 }, { "epoch": 0.23803405545096754, "grad_norm": 0.48158252239227295, "learning_rate": 0.00015242464809784375, "loss": 1.5382, "step": 18318 }, { "epoch": 0.2380470499948834, "grad_norm": 0.356721967458725, "learning_rate": 0.00015242204863593237, "loss": 1.5433, "step": 18319 }, { "epoch": 0.23806004453879928, "grad_norm": 0.3497735261917114, "learning_rate": 0.00015241944917402097, "loss": 1.4411, "step": 18320 }, { "epoch": 0.23807303908271515, "grad_norm": 0.4345513582229614, "learning_rate": 0.00015241684971210962, "loss": 1.4692, "step": 18321 }, { "epoch": 0.23808603362663103, "grad_norm": 0.49815332889556885, "learning_rate": 0.00015241425025019822, "loss": 1.3836, "step": 18322 }, { "epoch": 0.2380990281705469, "grad_norm": 0.3473820090293884, "learning_rate": 0.00015241165078828684, "loss": 1.361, "step": 18323 }, { "epoch": 0.23811202271446277, "grad_norm": 0.3871738612651825, "learning_rate": 0.00015240905132637544, "loss": 1.4123, "step": 18324 }, { "epoch": 0.23812501725837865, "grad_norm": 0.3766672611236572, "learning_rate": 0.00015240645186446406, "loss": 1.5922, "step": 18325 }, { "epoch": 0.23813801180229452, "grad_norm": 0.33464813232421875, "learning_rate": 0.0001524038524025527, "loss": 1.4179, "step": 18326 }, { "epoch": 0.2381510063462104, "grad_norm": 0.5216385126113892, "learning_rate": 0.00015240125294064128, "loss": 1.4162, "step": 18327 }, { "epoch": 0.23816400089012627, "grad_norm": 0.43903517723083496, "learning_rate": 0.0001523986534787299, "loss": 1.5467, "step": 18328 }, { "epoch": 0.23817699543404214, "grad_norm": 0.40008556842803955, "learning_rate": 0.00015239605401681853, "loss": 1.4518, "step": 18329 }, { "epoch": 0.238189989977958, "grad_norm": 0.36536145210266113, "learning_rate": 0.00015239345455490713, "loss": 1.3205, "step": 18330 }, { "epoch": 0.23820298452187388, "grad_norm": 0.38722220063209534, "learning_rate": 0.00015239085509299576, "loss": 1.4093, "step": 18331 }, { "epoch": 0.23821597906578976, "grad_norm": 0.4167955219745636, "learning_rate": 0.00015238825563108435, "loss": 1.5592, "step": 18332 }, { "epoch": 0.23822897360970563, "grad_norm": 0.4013184607028961, "learning_rate": 0.000152385656169173, "loss": 1.4779, "step": 18333 }, { "epoch": 0.2382419681536215, "grad_norm": 0.293133020401001, "learning_rate": 0.0001523830567072616, "loss": 1.3147, "step": 18334 }, { "epoch": 0.23825496269753738, "grad_norm": 0.448988676071167, "learning_rate": 0.00015238045724535023, "loss": 1.6063, "step": 18335 }, { "epoch": 0.23826795724145325, "grad_norm": 0.38091573119163513, "learning_rate": 0.00015237785778343882, "loss": 1.3969, "step": 18336 }, { "epoch": 0.23828095178536912, "grad_norm": 0.38666629791259766, "learning_rate": 0.00015237525832152745, "loss": 1.4154, "step": 18337 }, { "epoch": 0.238293946329285, "grad_norm": 0.3075655698776245, "learning_rate": 0.00015237265885961607, "loss": 1.4303, "step": 18338 }, { "epoch": 0.23830694087320087, "grad_norm": 0.3782234191894531, "learning_rate": 0.00015237005939770467, "loss": 1.3338, "step": 18339 }, { "epoch": 0.23831993541711674, "grad_norm": 0.4326476752758026, "learning_rate": 0.0001523674599357933, "loss": 1.5497, "step": 18340 }, { "epoch": 0.2383329299610326, "grad_norm": 0.44837895035743713, "learning_rate": 0.00015236486047388192, "loss": 1.6385, "step": 18341 }, { "epoch": 0.2383459245049485, "grad_norm": 0.41195419430732727, "learning_rate": 0.00015236226101197054, "loss": 1.5162, "step": 18342 }, { "epoch": 0.23835891904886436, "grad_norm": 0.4462265074253082, "learning_rate": 0.00015235966155005914, "loss": 1.3663, "step": 18343 }, { "epoch": 0.23837191359278023, "grad_norm": 0.4363690912723541, "learning_rate": 0.00015235706208814774, "loss": 1.4924, "step": 18344 }, { "epoch": 0.2383849081366961, "grad_norm": 0.3536207675933838, "learning_rate": 0.0001523544626262364, "loss": 1.3365, "step": 18345 }, { "epoch": 0.23839790268061198, "grad_norm": 0.37310531735420227, "learning_rate": 0.000152351863164325, "loss": 1.4971, "step": 18346 }, { "epoch": 0.23841089722452785, "grad_norm": 0.39613497257232666, "learning_rate": 0.0001523492637024136, "loss": 1.3801, "step": 18347 }, { "epoch": 0.23842389176844372, "grad_norm": 0.38797375559806824, "learning_rate": 0.0001523466642405022, "loss": 1.2277, "step": 18348 }, { "epoch": 0.2384368863123596, "grad_norm": 0.401252418756485, "learning_rate": 0.00015234406477859083, "loss": 1.3874, "step": 18349 }, { "epoch": 0.23844988085627547, "grad_norm": 0.3404322862625122, "learning_rate": 0.00015234146531667946, "loss": 1.335, "step": 18350 }, { "epoch": 0.23846287540019134, "grad_norm": 0.4653375446796417, "learning_rate": 0.00015233886585476806, "loss": 1.4304, "step": 18351 }, { "epoch": 0.23847586994410722, "grad_norm": 0.42086586356163025, "learning_rate": 0.0001523362663928567, "loss": 1.4604, "step": 18352 }, { "epoch": 0.2384888644880231, "grad_norm": 0.4242717921733856, "learning_rate": 0.0001523336669309453, "loss": 1.5315, "step": 18353 }, { "epoch": 0.23850185903193896, "grad_norm": 0.3968888223171234, "learning_rate": 0.00015233106746903393, "loss": 1.2455, "step": 18354 }, { "epoch": 0.23851485357585483, "grad_norm": 0.44738245010375977, "learning_rate": 0.00015232846800712253, "loss": 1.504, "step": 18355 }, { "epoch": 0.2385278481197707, "grad_norm": 0.4098779857158661, "learning_rate": 0.00015232586854521115, "loss": 1.652, "step": 18356 }, { "epoch": 0.23854084266368658, "grad_norm": 0.42097532749176025, "learning_rate": 0.00015232326908329978, "loss": 1.3497, "step": 18357 }, { "epoch": 0.23855383720760245, "grad_norm": 0.4069109857082367, "learning_rate": 0.00015232066962138837, "loss": 1.6071, "step": 18358 }, { "epoch": 0.23856683175151833, "grad_norm": 0.31164857745170593, "learning_rate": 0.000152318070159477, "loss": 1.4385, "step": 18359 }, { "epoch": 0.2385798262954342, "grad_norm": 0.37497246265411377, "learning_rate": 0.00015231547069756562, "loss": 1.2898, "step": 18360 }, { "epoch": 0.23859282083935007, "grad_norm": 0.4040505588054657, "learning_rate": 0.00015231287123565422, "loss": 1.4444, "step": 18361 }, { "epoch": 0.23860581538326595, "grad_norm": 0.34165674448013306, "learning_rate": 0.00015231027177374284, "loss": 1.3848, "step": 18362 }, { "epoch": 0.23861880992718182, "grad_norm": 0.4454728662967682, "learning_rate": 0.00015230767231183144, "loss": 1.4778, "step": 18363 }, { "epoch": 0.2386318044710977, "grad_norm": 0.4510919451713562, "learning_rate": 0.0001523050728499201, "loss": 1.563, "step": 18364 }, { "epoch": 0.23864479901501356, "grad_norm": 0.3385016620159149, "learning_rate": 0.0001523024733880087, "loss": 1.5803, "step": 18365 }, { "epoch": 0.23865779355892944, "grad_norm": 0.3359794020652771, "learning_rate": 0.00015229987392609731, "loss": 1.4566, "step": 18366 }, { "epoch": 0.2386707881028453, "grad_norm": 0.3456694483757019, "learning_rate": 0.0001522972744641859, "loss": 1.3548, "step": 18367 }, { "epoch": 0.23868378264676118, "grad_norm": 0.3981991112232208, "learning_rate": 0.00015229467500227454, "loss": 1.3751, "step": 18368 }, { "epoch": 0.23869677719067706, "grad_norm": 0.4316973090171814, "learning_rate": 0.00015229207554036316, "loss": 1.4836, "step": 18369 }, { "epoch": 0.23870977173459293, "grad_norm": 0.3886548578739166, "learning_rate": 0.00015228947607845176, "loss": 1.2173, "step": 18370 }, { "epoch": 0.2387227662785088, "grad_norm": 0.4843144118785858, "learning_rate": 0.00015228687661654038, "loss": 1.4104, "step": 18371 }, { "epoch": 0.23873576082242468, "grad_norm": 0.4144282341003418, "learning_rate": 0.000152284277154629, "loss": 1.5156, "step": 18372 }, { "epoch": 0.23874875536634055, "grad_norm": 0.33613187074661255, "learning_rate": 0.0001522816776927176, "loss": 1.2932, "step": 18373 }, { "epoch": 0.23876174991025642, "grad_norm": 0.41678911447525024, "learning_rate": 0.00015227907823080623, "loss": 1.365, "step": 18374 }, { "epoch": 0.2387747444541723, "grad_norm": 0.41785669326782227, "learning_rate": 0.00015227647876889483, "loss": 1.5861, "step": 18375 }, { "epoch": 0.23878773899808817, "grad_norm": 0.44068747758865356, "learning_rate": 0.00015227387930698348, "loss": 1.3881, "step": 18376 }, { "epoch": 0.23880073354200404, "grad_norm": 0.39700567722320557, "learning_rate": 0.00015227127984507208, "loss": 1.2586, "step": 18377 }, { "epoch": 0.2388137280859199, "grad_norm": 0.3955710530281067, "learning_rate": 0.0001522686803831607, "loss": 1.3585, "step": 18378 }, { "epoch": 0.23882672262983579, "grad_norm": 0.32789579033851624, "learning_rate": 0.0001522660809212493, "loss": 1.3655, "step": 18379 }, { "epoch": 0.23883971717375166, "grad_norm": 0.3593163788318634, "learning_rate": 0.00015226348145933792, "loss": 1.3141, "step": 18380 }, { "epoch": 0.23885271171766753, "grad_norm": 0.425920307636261, "learning_rate": 0.00015226088199742655, "loss": 1.3062, "step": 18381 }, { "epoch": 0.2388657062615834, "grad_norm": 0.41736099123954773, "learning_rate": 0.00015225828253551514, "loss": 1.338, "step": 18382 }, { "epoch": 0.23887870080549928, "grad_norm": 0.4712754786014557, "learning_rate": 0.00015225568307360377, "loss": 1.4469, "step": 18383 }, { "epoch": 0.23889169534941515, "grad_norm": 0.44545063376426697, "learning_rate": 0.0001522530836116924, "loss": 1.5173, "step": 18384 }, { "epoch": 0.23890468989333105, "grad_norm": 0.417752742767334, "learning_rate": 0.000152250484149781, "loss": 1.3929, "step": 18385 }, { "epoch": 0.23891768443724692, "grad_norm": 0.4690520763397217, "learning_rate": 0.00015224788468786961, "loss": 1.4693, "step": 18386 }, { "epoch": 0.2389306789811628, "grad_norm": 0.45876073837280273, "learning_rate": 0.0001522452852259582, "loss": 1.2926, "step": 18387 }, { "epoch": 0.23894367352507867, "grad_norm": 0.3711584806442261, "learning_rate": 0.00015224268576404686, "loss": 1.4161, "step": 18388 }, { "epoch": 0.23895666806899454, "grad_norm": 0.4448450207710266, "learning_rate": 0.00015224008630213546, "loss": 1.337, "step": 18389 }, { "epoch": 0.23896966261291042, "grad_norm": 0.4528861939907074, "learning_rate": 0.00015223748684022409, "loss": 1.4324, "step": 18390 }, { "epoch": 0.2389826571568263, "grad_norm": 0.4680466055870056, "learning_rate": 0.0001522348873783127, "loss": 1.4565, "step": 18391 }, { "epoch": 0.23899565170074216, "grad_norm": 0.24746529757976532, "learning_rate": 0.0001522322879164013, "loss": 1.3496, "step": 18392 }, { "epoch": 0.23900864624465804, "grad_norm": 0.35205063223838806, "learning_rate": 0.00015222968845448993, "loss": 1.4021, "step": 18393 }, { "epoch": 0.2390216407885739, "grad_norm": 0.42321908473968506, "learning_rate": 0.00015222708899257853, "loss": 1.4395, "step": 18394 }, { "epoch": 0.23903463533248978, "grad_norm": 0.37585991621017456, "learning_rate": 0.00015222448953066718, "loss": 1.3448, "step": 18395 }, { "epoch": 0.23904762987640565, "grad_norm": 0.3511659502983093, "learning_rate": 0.00015222189006875578, "loss": 1.3364, "step": 18396 }, { "epoch": 0.23906062442032153, "grad_norm": 0.365261435508728, "learning_rate": 0.0001522192906068444, "loss": 1.4228, "step": 18397 }, { "epoch": 0.2390736189642374, "grad_norm": 0.375203937292099, "learning_rate": 0.000152216691144933, "loss": 1.4221, "step": 18398 }, { "epoch": 0.23908661350815327, "grad_norm": 0.4070294201374054, "learning_rate": 0.00015221409168302162, "loss": 1.3619, "step": 18399 }, { "epoch": 0.23909960805206915, "grad_norm": 0.43807366490364075, "learning_rate": 0.00015221149222111025, "loss": 1.465, "step": 18400 }, { "epoch": 0.23911260259598502, "grad_norm": 0.47241339087486267, "learning_rate": 0.00015220889275919885, "loss": 1.4921, "step": 18401 }, { "epoch": 0.2391255971399009, "grad_norm": 0.44848671555519104, "learning_rate": 0.00015220629329728747, "loss": 1.5368, "step": 18402 }, { "epoch": 0.23913859168381676, "grad_norm": 0.39667055010795593, "learning_rate": 0.0001522036938353761, "loss": 1.318, "step": 18403 }, { "epoch": 0.23915158622773264, "grad_norm": 0.37080222368240356, "learning_rate": 0.0001522010943734647, "loss": 1.3532, "step": 18404 }, { "epoch": 0.2391645807716485, "grad_norm": 0.38848569989204407, "learning_rate": 0.00015219849491155332, "loss": 1.4087, "step": 18405 }, { "epoch": 0.23917757531556438, "grad_norm": 0.3809947073459625, "learning_rate": 0.00015219589544964191, "loss": 1.3293, "step": 18406 }, { "epoch": 0.23919056985948026, "grad_norm": 0.4530285596847534, "learning_rate": 0.00015219329598773057, "loss": 1.4698, "step": 18407 }, { "epoch": 0.23920356440339613, "grad_norm": 0.3524971902370453, "learning_rate": 0.00015219069652581916, "loss": 1.7743, "step": 18408 }, { "epoch": 0.239216558947312, "grad_norm": 0.438679963350296, "learning_rate": 0.0001521880970639078, "loss": 1.4769, "step": 18409 }, { "epoch": 0.23922955349122788, "grad_norm": 0.38857272267341614, "learning_rate": 0.00015218549760199639, "loss": 1.3954, "step": 18410 }, { "epoch": 0.23924254803514375, "grad_norm": 0.41575130820274353, "learning_rate": 0.000152182898140085, "loss": 1.4375, "step": 18411 }, { "epoch": 0.23925554257905962, "grad_norm": 0.4694860577583313, "learning_rate": 0.00015218029867817363, "loss": 1.3106, "step": 18412 }, { "epoch": 0.2392685371229755, "grad_norm": 0.4033541679382324, "learning_rate": 0.00015217769921626223, "loss": 1.5521, "step": 18413 }, { "epoch": 0.23928153166689137, "grad_norm": 0.4256977140903473, "learning_rate": 0.00015217509975435086, "loss": 1.3459, "step": 18414 }, { "epoch": 0.23929452621080724, "grad_norm": 0.39320868253707886, "learning_rate": 0.00015217250029243948, "loss": 1.4408, "step": 18415 }, { "epoch": 0.2393075207547231, "grad_norm": 0.4771225154399872, "learning_rate": 0.00015216990083052808, "loss": 1.472, "step": 18416 }, { "epoch": 0.23932051529863899, "grad_norm": 0.37927496433258057, "learning_rate": 0.0001521673013686167, "loss": 1.4159, "step": 18417 }, { "epoch": 0.23933350984255486, "grad_norm": 0.5706114172935486, "learning_rate": 0.0001521647019067053, "loss": 1.4365, "step": 18418 }, { "epoch": 0.23934650438647073, "grad_norm": 0.4115237295627594, "learning_rate": 0.00015216210244479395, "loss": 1.3363, "step": 18419 }, { "epoch": 0.2393594989303866, "grad_norm": 0.37234610319137573, "learning_rate": 0.00015215950298288255, "loss": 1.4372, "step": 18420 }, { "epoch": 0.23937249347430248, "grad_norm": 0.37164726853370667, "learning_rate": 0.00015215690352097117, "loss": 1.382, "step": 18421 }, { "epoch": 0.23938548801821835, "grad_norm": 0.444560706615448, "learning_rate": 0.00015215430405905977, "loss": 1.402, "step": 18422 }, { "epoch": 0.23939848256213422, "grad_norm": 0.48399990797042847, "learning_rate": 0.0001521517045971484, "loss": 1.3937, "step": 18423 }, { "epoch": 0.2394114771060501, "grad_norm": 0.4545885920524597, "learning_rate": 0.00015214910513523702, "loss": 1.4436, "step": 18424 }, { "epoch": 0.23942447164996597, "grad_norm": 0.4635612964630127, "learning_rate": 0.00015214650567332562, "loss": 1.3423, "step": 18425 }, { "epoch": 0.23943746619388184, "grad_norm": 0.46208158135414124, "learning_rate": 0.00015214390621141427, "loss": 1.3664, "step": 18426 }, { "epoch": 0.23945046073779772, "grad_norm": 0.4331016540527344, "learning_rate": 0.00015214130674950287, "loss": 1.3717, "step": 18427 }, { "epoch": 0.2394634552817136, "grad_norm": 0.4727119207382202, "learning_rate": 0.00015213870728759146, "loss": 1.5932, "step": 18428 }, { "epoch": 0.23947644982562946, "grad_norm": 0.3666777014732361, "learning_rate": 0.0001521361078256801, "loss": 1.427, "step": 18429 }, { "epoch": 0.23948944436954533, "grad_norm": 0.3947739899158478, "learning_rate": 0.0001521335083637687, "loss": 1.5911, "step": 18430 }, { "epoch": 0.2395024389134612, "grad_norm": 0.47298958897590637, "learning_rate": 0.00015213090890185734, "loss": 1.2793, "step": 18431 }, { "epoch": 0.23951543345737708, "grad_norm": 0.4279867112636566, "learning_rate": 0.00015212830943994593, "loss": 1.299, "step": 18432 }, { "epoch": 0.23952842800129295, "grad_norm": 0.4041167199611664, "learning_rate": 0.00015212570997803456, "loss": 1.3863, "step": 18433 }, { "epoch": 0.23954142254520883, "grad_norm": 0.42256590723991394, "learning_rate": 0.00015212311051612318, "loss": 1.4091, "step": 18434 }, { "epoch": 0.2395544170891247, "grad_norm": 0.4084274172782898, "learning_rate": 0.00015212051105421178, "loss": 1.2583, "step": 18435 }, { "epoch": 0.23956741163304057, "grad_norm": 0.4347551167011261, "learning_rate": 0.0001521179115923004, "loss": 1.5064, "step": 18436 }, { "epoch": 0.23958040617695645, "grad_norm": 0.40250375866889954, "learning_rate": 0.000152115312130389, "loss": 1.3499, "step": 18437 }, { "epoch": 0.23959340072087232, "grad_norm": 0.36157944798469543, "learning_rate": 0.00015211271266847765, "loss": 1.365, "step": 18438 }, { "epoch": 0.2396063952647882, "grad_norm": 0.46978047490119934, "learning_rate": 0.00015211011320656625, "loss": 1.515, "step": 18439 }, { "epoch": 0.23961938980870406, "grad_norm": 0.41418614983558655, "learning_rate": 0.00015210751374465485, "loss": 1.5187, "step": 18440 }, { "epoch": 0.23963238435261994, "grad_norm": 0.33228474855422974, "learning_rate": 0.00015210491428274347, "loss": 1.4437, "step": 18441 }, { "epoch": 0.2396453788965358, "grad_norm": 0.36606141924858093, "learning_rate": 0.0001521023148208321, "loss": 1.3836, "step": 18442 }, { "epoch": 0.23965837344045168, "grad_norm": 0.3457067310810089, "learning_rate": 0.00015209971535892072, "loss": 1.4185, "step": 18443 }, { "epoch": 0.23967136798436756, "grad_norm": 0.37555575370788574, "learning_rate": 0.00015209711589700932, "loss": 1.2671, "step": 18444 }, { "epoch": 0.23968436252828343, "grad_norm": 0.34376436471939087, "learning_rate": 0.00015209451643509794, "loss": 1.2509, "step": 18445 }, { "epoch": 0.2396973570721993, "grad_norm": 0.3246956169605255, "learning_rate": 0.00015209191697318657, "loss": 1.344, "step": 18446 }, { "epoch": 0.23971035161611517, "grad_norm": 0.43418511748313904, "learning_rate": 0.00015208931751127517, "loss": 1.465, "step": 18447 }, { "epoch": 0.23972334616003105, "grad_norm": 0.35498565435409546, "learning_rate": 0.0001520867180493638, "loss": 1.4512, "step": 18448 }, { "epoch": 0.23973634070394692, "grad_norm": 0.45842888951301575, "learning_rate": 0.0001520841185874524, "loss": 1.5731, "step": 18449 }, { "epoch": 0.2397493352478628, "grad_norm": 0.38945823907852173, "learning_rate": 0.00015208151912554104, "loss": 1.4053, "step": 18450 }, { "epoch": 0.23976232979177867, "grad_norm": 0.3220931887626648, "learning_rate": 0.00015207891966362964, "loss": 1.3988, "step": 18451 }, { "epoch": 0.23977532433569454, "grad_norm": 0.3733195960521698, "learning_rate": 0.00015207632020171823, "loss": 1.4427, "step": 18452 }, { "epoch": 0.2397883188796104, "grad_norm": 0.41567760705947876, "learning_rate": 0.00015207372073980686, "loss": 1.4139, "step": 18453 }, { "epoch": 0.23980131342352629, "grad_norm": 0.4601441025733948, "learning_rate": 0.00015207112127789548, "loss": 1.3478, "step": 18454 }, { "epoch": 0.23981430796744216, "grad_norm": 0.41283464431762695, "learning_rate": 0.0001520685218159841, "loss": 1.4726, "step": 18455 }, { "epoch": 0.23982730251135803, "grad_norm": 0.3775501847267151, "learning_rate": 0.0001520659223540727, "loss": 1.6285, "step": 18456 }, { "epoch": 0.2398402970552739, "grad_norm": 0.3454986810684204, "learning_rate": 0.00015206332289216133, "loss": 1.2349, "step": 18457 }, { "epoch": 0.23985329159918978, "grad_norm": 0.33893612027168274, "learning_rate": 0.00015206072343024995, "loss": 1.1989, "step": 18458 }, { "epoch": 0.23986628614310565, "grad_norm": 0.3938547372817993, "learning_rate": 0.00015205812396833855, "loss": 1.3242, "step": 18459 }, { "epoch": 0.23987928068702152, "grad_norm": 0.39689940214157104, "learning_rate": 0.00015205552450642718, "loss": 1.4818, "step": 18460 }, { "epoch": 0.23989227523093742, "grad_norm": 0.39818379282951355, "learning_rate": 0.00015205292504451577, "loss": 1.2801, "step": 18461 }, { "epoch": 0.2399052697748533, "grad_norm": 0.48824170231819153, "learning_rate": 0.00015205032558260442, "loss": 1.4916, "step": 18462 }, { "epoch": 0.23991826431876917, "grad_norm": 0.3895243704319, "learning_rate": 0.00015204772612069302, "loss": 1.3978, "step": 18463 }, { "epoch": 0.23993125886268504, "grad_norm": 0.40258723497390747, "learning_rate": 0.00015204512665878165, "loss": 1.4048, "step": 18464 }, { "epoch": 0.23994425340660092, "grad_norm": 0.42507022619247437, "learning_rate": 0.00015204252719687027, "loss": 1.2852, "step": 18465 }, { "epoch": 0.2399572479505168, "grad_norm": 0.34923067688941956, "learning_rate": 0.00015203992773495887, "loss": 1.3582, "step": 18466 }, { "epoch": 0.23997024249443266, "grad_norm": 0.49082961678504944, "learning_rate": 0.0001520373282730475, "loss": 1.4326, "step": 18467 }, { "epoch": 0.23998323703834853, "grad_norm": 0.5216532349586487, "learning_rate": 0.0001520347288111361, "loss": 1.4784, "step": 18468 }, { "epoch": 0.2399962315822644, "grad_norm": 0.42487505078315735, "learning_rate": 0.00015203212934922471, "loss": 1.379, "step": 18469 }, { "epoch": 0.24000922612618028, "grad_norm": 0.4226248264312744, "learning_rate": 0.00015202952988731334, "loss": 1.2864, "step": 18470 }, { "epoch": 0.24002222067009615, "grad_norm": 0.48685169219970703, "learning_rate": 0.00015202693042540194, "loss": 1.3841, "step": 18471 }, { "epoch": 0.24003521521401203, "grad_norm": 0.47948235273361206, "learning_rate": 0.00015202433096349056, "loss": 1.4886, "step": 18472 }, { "epoch": 0.2400482097579279, "grad_norm": 0.4333220422267914, "learning_rate": 0.00015202173150157919, "loss": 1.406, "step": 18473 }, { "epoch": 0.24006120430184377, "grad_norm": 0.4021318852901459, "learning_rate": 0.0001520191320396678, "loss": 1.4615, "step": 18474 }, { "epoch": 0.24007419884575965, "grad_norm": 0.433322012424469, "learning_rate": 0.0001520165325777564, "loss": 1.3888, "step": 18475 }, { "epoch": 0.24008719338967552, "grad_norm": 0.4169984757900238, "learning_rate": 0.00015201393311584503, "loss": 1.4407, "step": 18476 }, { "epoch": 0.2401001879335914, "grad_norm": 0.39146688580513, "learning_rate": 0.00015201133365393366, "loss": 1.0661, "step": 18477 }, { "epoch": 0.24011318247750726, "grad_norm": 0.46941983699798584, "learning_rate": 0.00015200873419202225, "loss": 1.5075, "step": 18478 }, { "epoch": 0.24012617702142314, "grad_norm": 0.3752908408641815, "learning_rate": 0.00015200613473011088, "loss": 1.5302, "step": 18479 }, { "epoch": 0.240139171565339, "grad_norm": 0.3781212866306305, "learning_rate": 0.00015200353526819948, "loss": 1.3203, "step": 18480 }, { "epoch": 0.24015216610925488, "grad_norm": 0.3345184922218323, "learning_rate": 0.00015200093580628813, "loss": 1.4135, "step": 18481 }, { "epoch": 0.24016516065317076, "grad_norm": 0.41585254669189453, "learning_rate": 0.00015199833634437672, "loss": 1.5058, "step": 18482 }, { "epoch": 0.24017815519708663, "grad_norm": 0.3971256613731384, "learning_rate": 0.00015199573688246532, "loss": 1.3248, "step": 18483 }, { "epoch": 0.2401911497410025, "grad_norm": 0.375776082277298, "learning_rate": 0.00015199313742055395, "loss": 1.406, "step": 18484 }, { "epoch": 0.24020414428491837, "grad_norm": 0.49672961235046387, "learning_rate": 0.00015199053795864257, "loss": 1.3617, "step": 18485 }, { "epoch": 0.24021713882883425, "grad_norm": 0.427846759557724, "learning_rate": 0.0001519879384967312, "loss": 1.3318, "step": 18486 }, { "epoch": 0.24023013337275012, "grad_norm": 0.4171332120895386, "learning_rate": 0.0001519853390348198, "loss": 1.4284, "step": 18487 }, { "epoch": 0.240243127916666, "grad_norm": 0.520584225654602, "learning_rate": 0.00015198273957290842, "loss": 1.2623, "step": 18488 }, { "epoch": 0.24025612246058187, "grad_norm": 0.38603925704956055, "learning_rate": 0.00015198014011099704, "loss": 1.6043, "step": 18489 }, { "epoch": 0.24026911700449774, "grad_norm": 0.36628198623657227, "learning_rate": 0.00015197754064908564, "loss": 1.4751, "step": 18490 }, { "epoch": 0.2402821115484136, "grad_norm": 0.38636234402656555, "learning_rate": 0.00015197494118717426, "loss": 1.3396, "step": 18491 }, { "epoch": 0.24029510609232949, "grad_norm": 0.37485790252685547, "learning_rate": 0.00015197234172526286, "loss": 1.3657, "step": 18492 }, { "epoch": 0.24030810063624536, "grad_norm": 0.40534693002700806, "learning_rate": 0.0001519697422633515, "loss": 1.6734, "step": 18493 }, { "epoch": 0.24032109518016123, "grad_norm": 0.38207098841667175, "learning_rate": 0.0001519671428014401, "loss": 1.3262, "step": 18494 }, { "epoch": 0.2403340897240771, "grad_norm": 0.39534109830856323, "learning_rate": 0.0001519645433395287, "loss": 1.2607, "step": 18495 }, { "epoch": 0.24034708426799298, "grad_norm": 0.4871385395526886, "learning_rate": 0.00015196194387761733, "loss": 1.4781, "step": 18496 }, { "epoch": 0.24036007881190885, "grad_norm": 0.4130176901817322, "learning_rate": 0.00015195934441570596, "loss": 1.4118, "step": 18497 }, { "epoch": 0.24037307335582472, "grad_norm": 0.4164632260799408, "learning_rate": 0.00015195674495379458, "loss": 1.6275, "step": 18498 }, { "epoch": 0.2403860678997406, "grad_norm": 0.4478120505809784, "learning_rate": 0.00015195414549188318, "loss": 1.3451, "step": 18499 }, { "epoch": 0.24039906244365647, "grad_norm": 0.5194246768951416, "learning_rate": 0.0001519515460299718, "loss": 1.502, "step": 18500 }, { "epoch": 0.24041205698757234, "grad_norm": 0.2550109028816223, "learning_rate": 0.00015194894656806043, "loss": 1.3911, "step": 18501 }, { "epoch": 0.24042505153148822, "grad_norm": 0.43518000841140747, "learning_rate": 0.00015194634710614902, "loss": 1.3353, "step": 18502 }, { "epoch": 0.2404380460754041, "grad_norm": 0.32888466119766235, "learning_rate": 0.00015194374764423765, "loss": 1.4003, "step": 18503 }, { "epoch": 0.24045104061931996, "grad_norm": 0.5493488311767578, "learning_rate": 0.00015194114818232627, "loss": 1.5907, "step": 18504 }, { "epoch": 0.24046403516323583, "grad_norm": 0.371028333902359, "learning_rate": 0.0001519385487204149, "loss": 1.229, "step": 18505 }, { "epoch": 0.2404770297071517, "grad_norm": 0.30533456802368164, "learning_rate": 0.0001519359492585035, "loss": 1.4314, "step": 18506 }, { "epoch": 0.24049002425106758, "grad_norm": 0.4756530225276947, "learning_rate": 0.0001519333497965921, "loss": 1.3445, "step": 18507 }, { "epoch": 0.24050301879498345, "grad_norm": 0.3461696207523346, "learning_rate": 0.00015193075033468074, "loss": 1.4566, "step": 18508 }, { "epoch": 0.24051601333889933, "grad_norm": 0.3828386664390564, "learning_rate": 0.00015192815087276934, "loss": 1.3277, "step": 18509 }, { "epoch": 0.2405290078828152, "grad_norm": 0.46872884035110474, "learning_rate": 0.00015192555141085797, "loss": 1.4433, "step": 18510 }, { "epoch": 0.24054200242673107, "grad_norm": 0.3163316547870636, "learning_rate": 0.00015192295194894656, "loss": 1.4528, "step": 18511 }, { "epoch": 0.24055499697064694, "grad_norm": 0.41251397132873535, "learning_rate": 0.0001519203524870352, "loss": 1.4331, "step": 18512 }, { "epoch": 0.24056799151456282, "grad_norm": 0.40884074568748474, "learning_rate": 0.0001519177530251238, "loss": 1.477, "step": 18513 }, { "epoch": 0.2405809860584787, "grad_norm": 0.5166527032852173, "learning_rate": 0.0001519151535632124, "loss": 1.3682, "step": 18514 }, { "epoch": 0.24059398060239456, "grad_norm": 0.37735888361930847, "learning_rate": 0.00015191255410130103, "loss": 1.5552, "step": 18515 }, { "epoch": 0.24060697514631044, "grad_norm": 0.3381499946117401, "learning_rate": 0.00015190995463938966, "loss": 1.2124, "step": 18516 }, { "epoch": 0.2406199696902263, "grad_norm": 0.3511542081832886, "learning_rate": 0.00015190735517747828, "loss": 1.23, "step": 18517 }, { "epoch": 0.24063296423414218, "grad_norm": 0.32661309838294983, "learning_rate": 0.00015190475571556688, "loss": 1.3646, "step": 18518 }, { "epoch": 0.24064595877805806, "grad_norm": 0.43127313256263733, "learning_rate": 0.0001519021562536555, "loss": 1.3926, "step": 18519 }, { "epoch": 0.24065895332197393, "grad_norm": 0.41396626830101013, "learning_rate": 0.00015189955679174413, "loss": 1.4587, "step": 18520 }, { "epoch": 0.2406719478658898, "grad_norm": 0.5347922444343567, "learning_rate": 0.00015189695732983273, "loss": 1.2497, "step": 18521 }, { "epoch": 0.24068494240980567, "grad_norm": 0.5608534812927246, "learning_rate": 0.00015189435786792135, "loss": 1.4887, "step": 18522 }, { "epoch": 0.24069793695372155, "grad_norm": 0.3457473814487457, "learning_rate": 0.00015189175840600995, "loss": 1.3865, "step": 18523 }, { "epoch": 0.24071093149763742, "grad_norm": 0.5131239295005798, "learning_rate": 0.00015188915894409857, "loss": 1.4279, "step": 18524 }, { "epoch": 0.2407239260415533, "grad_norm": 0.42662137746810913, "learning_rate": 0.0001518865594821872, "loss": 1.3431, "step": 18525 }, { "epoch": 0.24073692058546917, "grad_norm": 0.4022914171218872, "learning_rate": 0.0001518839600202758, "loss": 1.4766, "step": 18526 }, { "epoch": 0.24074991512938504, "grad_norm": 0.39197391271591187, "learning_rate": 0.00015188136055836442, "loss": 1.4444, "step": 18527 }, { "epoch": 0.2407629096733009, "grad_norm": 0.3474847674369812, "learning_rate": 0.00015187876109645304, "loss": 1.5234, "step": 18528 }, { "epoch": 0.24077590421721679, "grad_norm": 0.349973201751709, "learning_rate": 0.00015187616163454167, "loss": 1.2971, "step": 18529 }, { "epoch": 0.24078889876113266, "grad_norm": 0.4139890968799591, "learning_rate": 0.00015187356217263027, "loss": 1.4601, "step": 18530 }, { "epoch": 0.24080189330504853, "grad_norm": 0.43503978848457336, "learning_rate": 0.0001518709627107189, "loss": 1.3643, "step": 18531 }, { "epoch": 0.2408148878489644, "grad_norm": 0.3425271213054657, "learning_rate": 0.00015186836324880752, "loss": 1.398, "step": 18532 }, { "epoch": 0.24082788239288028, "grad_norm": 0.41756126284599304, "learning_rate": 0.0001518657637868961, "loss": 1.346, "step": 18533 }, { "epoch": 0.24084087693679615, "grad_norm": 0.4261583089828491, "learning_rate": 0.00015186316432498474, "loss": 1.444, "step": 18534 }, { "epoch": 0.24085387148071202, "grad_norm": 0.4392475187778473, "learning_rate": 0.00015186056486307333, "loss": 1.4935, "step": 18535 }, { "epoch": 0.2408668660246279, "grad_norm": 0.3863852322101593, "learning_rate": 0.00015185796540116196, "loss": 1.3656, "step": 18536 }, { "epoch": 0.24087986056854377, "grad_norm": 0.3822007179260254, "learning_rate": 0.00015185536593925058, "loss": 1.3856, "step": 18537 }, { "epoch": 0.24089285511245967, "grad_norm": 0.4949081242084503, "learning_rate": 0.00015185276647733918, "loss": 1.4339, "step": 18538 }, { "epoch": 0.24090584965637554, "grad_norm": 0.3679749071598053, "learning_rate": 0.00015185016701542783, "loss": 1.4916, "step": 18539 }, { "epoch": 0.24091884420029142, "grad_norm": 0.3821481168270111, "learning_rate": 0.00015184756755351643, "loss": 1.4219, "step": 18540 }, { "epoch": 0.2409318387442073, "grad_norm": 0.30985528230667114, "learning_rate": 0.00015184496809160505, "loss": 1.2672, "step": 18541 }, { "epoch": 0.24094483328812316, "grad_norm": 0.45522674918174744, "learning_rate": 0.00015184236862969365, "loss": 1.4787, "step": 18542 }, { "epoch": 0.24095782783203903, "grad_norm": 0.3590070307254791, "learning_rate": 0.00015183976916778228, "loss": 1.246, "step": 18543 }, { "epoch": 0.2409708223759549, "grad_norm": 0.26879581809043884, "learning_rate": 0.0001518371697058709, "loss": 1.3358, "step": 18544 }, { "epoch": 0.24098381691987078, "grad_norm": 0.37274497747421265, "learning_rate": 0.0001518345702439595, "loss": 1.4597, "step": 18545 }, { "epoch": 0.24099681146378665, "grad_norm": 0.293200820684433, "learning_rate": 0.00015183197078204812, "loss": 1.3228, "step": 18546 }, { "epoch": 0.24100980600770253, "grad_norm": 0.3799766004085541, "learning_rate": 0.00015182937132013675, "loss": 1.3871, "step": 18547 }, { "epoch": 0.2410228005516184, "grad_norm": 0.4362042546272278, "learning_rate": 0.00015182677185822537, "loss": 1.4107, "step": 18548 }, { "epoch": 0.24103579509553427, "grad_norm": 0.44697362184524536, "learning_rate": 0.00015182417239631397, "loss": 1.4029, "step": 18549 }, { "epoch": 0.24104878963945015, "grad_norm": 0.2885587215423584, "learning_rate": 0.00015182157293440257, "loss": 1.4777, "step": 18550 }, { "epoch": 0.24106178418336602, "grad_norm": 0.43840134143829346, "learning_rate": 0.00015181897347249122, "loss": 1.3983, "step": 18551 }, { "epoch": 0.2410747787272819, "grad_norm": 0.37484657764434814, "learning_rate": 0.00015181637401057982, "loss": 1.4156, "step": 18552 }, { "epoch": 0.24108777327119776, "grad_norm": 0.3867681324481964, "learning_rate": 0.00015181377454866844, "loss": 1.4694, "step": 18553 }, { "epoch": 0.24110076781511364, "grad_norm": 0.488303005695343, "learning_rate": 0.00015181117508675704, "loss": 1.5646, "step": 18554 }, { "epoch": 0.2411137623590295, "grad_norm": 0.3741527199745178, "learning_rate": 0.00015180857562484566, "loss": 1.324, "step": 18555 }, { "epoch": 0.24112675690294538, "grad_norm": 0.34053561091423035, "learning_rate": 0.00015180597616293429, "loss": 1.5557, "step": 18556 }, { "epoch": 0.24113975144686126, "grad_norm": 0.31561756134033203, "learning_rate": 0.00015180337670102288, "loss": 1.3786, "step": 18557 }, { "epoch": 0.24115274599077713, "grad_norm": 0.45263171195983887, "learning_rate": 0.0001518007772391115, "loss": 1.3521, "step": 18558 }, { "epoch": 0.241165740534693, "grad_norm": 0.39372923970222473, "learning_rate": 0.00015179817777720013, "loss": 1.4225, "step": 18559 }, { "epoch": 0.24117873507860887, "grad_norm": 0.33100995421409607, "learning_rate": 0.00015179557831528876, "loss": 1.2954, "step": 18560 }, { "epoch": 0.24119172962252475, "grad_norm": 0.3820338845252991, "learning_rate": 0.00015179297885337735, "loss": 1.4251, "step": 18561 }, { "epoch": 0.24120472416644062, "grad_norm": 0.39095795154571533, "learning_rate": 0.00015179037939146595, "loss": 1.5057, "step": 18562 }, { "epoch": 0.2412177187103565, "grad_norm": 0.36307084560394287, "learning_rate": 0.0001517877799295546, "loss": 1.4401, "step": 18563 }, { "epoch": 0.24123071325427237, "grad_norm": 0.3987591564655304, "learning_rate": 0.0001517851804676432, "loss": 1.3163, "step": 18564 }, { "epoch": 0.24124370779818824, "grad_norm": 0.4528557062149048, "learning_rate": 0.00015178258100573183, "loss": 1.4546, "step": 18565 }, { "epoch": 0.2412567023421041, "grad_norm": 0.41332393884658813, "learning_rate": 0.00015177998154382042, "loss": 1.3587, "step": 18566 }, { "epoch": 0.24126969688601999, "grad_norm": 0.36671555042266846, "learning_rate": 0.00015177738208190905, "loss": 1.4171, "step": 18567 }, { "epoch": 0.24128269142993586, "grad_norm": 0.3533616364002228, "learning_rate": 0.00015177478261999767, "loss": 1.5284, "step": 18568 }, { "epoch": 0.24129568597385173, "grad_norm": 0.3955092132091522, "learning_rate": 0.00015177218315808627, "loss": 1.3413, "step": 18569 }, { "epoch": 0.2413086805177676, "grad_norm": 0.5402181148529053, "learning_rate": 0.0001517695836961749, "loss": 1.5172, "step": 18570 }, { "epoch": 0.24132167506168348, "grad_norm": 0.42114314436912537, "learning_rate": 0.00015176698423426352, "loss": 1.1807, "step": 18571 }, { "epoch": 0.24133466960559935, "grad_norm": 0.4067343473434448, "learning_rate": 0.00015176438477235214, "loss": 1.4942, "step": 18572 }, { "epoch": 0.24134766414951522, "grad_norm": 0.3690411448478699, "learning_rate": 0.00015176178531044074, "loss": 1.447, "step": 18573 }, { "epoch": 0.2413606586934311, "grad_norm": 0.44165557622909546, "learning_rate": 0.00015175918584852936, "loss": 1.3786, "step": 18574 }, { "epoch": 0.24137365323734697, "grad_norm": 0.474723219871521, "learning_rate": 0.000151756586386618, "loss": 1.4934, "step": 18575 }, { "epoch": 0.24138664778126284, "grad_norm": 0.44561460614204407, "learning_rate": 0.00015175398692470659, "loss": 1.4458, "step": 18576 }, { "epoch": 0.24139964232517871, "grad_norm": 0.45253416895866394, "learning_rate": 0.0001517513874627952, "loss": 1.4211, "step": 18577 }, { "epoch": 0.2414126368690946, "grad_norm": 0.36146828532218933, "learning_rate": 0.00015174878800088384, "loss": 1.3264, "step": 18578 }, { "epoch": 0.24142563141301046, "grad_norm": 0.3475085496902466, "learning_rate": 0.00015174618853897243, "loss": 1.5759, "step": 18579 }, { "epoch": 0.24143862595692633, "grad_norm": 0.38433751463890076, "learning_rate": 0.00015174358907706106, "loss": 1.3702, "step": 18580 }, { "epoch": 0.2414516205008422, "grad_norm": 0.36033758521080017, "learning_rate": 0.00015174098961514965, "loss": 1.5495, "step": 18581 }, { "epoch": 0.24146461504475808, "grad_norm": 0.3818190395832062, "learning_rate": 0.0001517383901532383, "loss": 1.3455, "step": 18582 }, { "epoch": 0.24147760958867395, "grad_norm": 0.5108311176300049, "learning_rate": 0.0001517357906913269, "loss": 1.3746, "step": 18583 }, { "epoch": 0.24149060413258983, "grad_norm": 0.3417815864086151, "learning_rate": 0.00015173319122941553, "loss": 1.2899, "step": 18584 }, { "epoch": 0.2415035986765057, "grad_norm": 0.40832841396331787, "learning_rate": 0.00015173059176750413, "loss": 1.5907, "step": 18585 }, { "epoch": 0.24151659322042157, "grad_norm": 0.32985880970954895, "learning_rate": 0.00015172799230559275, "loss": 1.3568, "step": 18586 }, { "epoch": 0.24152958776433744, "grad_norm": 0.3042459785938263, "learning_rate": 0.00015172539284368137, "loss": 1.3126, "step": 18587 }, { "epoch": 0.24154258230825332, "grad_norm": 0.39141055941581726, "learning_rate": 0.00015172279338176997, "loss": 1.3054, "step": 18588 }, { "epoch": 0.2415555768521692, "grad_norm": 0.4066309332847595, "learning_rate": 0.0001517201939198586, "loss": 1.4629, "step": 18589 }, { "epoch": 0.24156857139608506, "grad_norm": 0.3061867356300354, "learning_rate": 0.00015171759445794722, "loss": 1.383, "step": 18590 }, { "epoch": 0.24158156594000094, "grad_norm": 0.3496739864349365, "learning_rate": 0.00015171499499603582, "loss": 1.446, "step": 18591 }, { "epoch": 0.2415945604839168, "grad_norm": 0.4225510060787201, "learning_rate": 0.00015171239553412444, "loss": 1.4763, "step": 18592 }, { "epoch": 0.24160755502783268, "grad_norm": 0.39365333318710327, "learning_rate": 0.00015170979607221304, "loss": 1.0949, "step": 18593 }, { "epoch": 0.24162054957174856, "grad_norm": 0.4351522624492645, "learning_rate": 0.0001517071966103017, "loss": 1.3895, "step": 18594 }, { "epoch": 0.24163354411566443, "grad_norm": 0.26343002915382385, "learning_rate": 0.0001517045971483903, "loss": 1.14, "step": 18595 }, { "epoch": 0.2416465386595803, "grad_norm": 0.2849823534488678, "learning_rate": 0.0001517019976864789, "loss": 1.2416, "step": 18596 }, { "epoch": 0.24165953320349617, "grad_norm": 0.3246159255504608, "learning_rate": 0.0001516993982245675, "loss": 1.2887, "step": 18597 }, { "epoch": 0.24167252774741205, "grad_norm": 0.4150823652744293, "learning_rate": 0.00015169679876265613, "loss": 1.703, "step": 18598 }, { "epoch": 0.24168552229132792, "grad_norm": 0.3307783007621765, "learning_rate": 0.00015169419930074476, "loss": 1.5364, "step": 18599 }, { "epoch": 0.2416985168352438, "grad_norm": 0.38430479168891907, "learning_rate": 0.00015169159983883336, "loss": 1.3438, "step": 18600 }, { "epoch": 0.24171151137915967, "grad_norm": 0.5762456655502319, "learning_rate": 0.00015168900037692198, "loss": 1.5096, "step": 18601 }, { "epoch": 0.24172450592307554, "grad_norm": 0.4061860144138336, "learning_rate": 0.0001516864009150106, "loss": 1.6329, "step": 18602 }, { "epoch": 0.2417375004669914, "grad_norm": 0.4096372425556183, "learning_rate": 0.00015168380145309923, "loss": 1.3402, "step": 18603 }, { "epoch": 0.24175049501090728, "grad_norm": 0.4047014117240906, "learning_rate": 0.00015168120199118783, "loss": 1.3931, "step": 18604 }, { "epoch": 0.24176348955482316, "grad_norm": 0.38806089758872986, "learning_rate": 0.00015167860252927643, "loss": 1.5532, "step": 18605 }, { "epoch": 0.24177648409873903, "grad_norm": 0.4528484046459198, "learning_rate": 0.00015167600306736508, "loss": 1.4809, "step": 18606 }, { "epoch": 0.2417894786426549, "grad_norm": 0.3606020510196686, "learning_rate": 0.00015167340360545367, "loss": 1.4749, "step": 18607 }, { "epoch": 0.24180247318657078, "grad_norm": 0.4066159725189209, "learning_rate": 0.0001516708041435423, "loss": 1.3681, "step": 18608 }, { "epoch": 0.24181546773048665, "grad_norm": 0.4755224883556366, "learning_rate": 0.0001516682046816309, "loss": 1.3789, "step": 18609 }, { "epoch": 0.24182846227440252, "grad_norm": 0.43885236978530884, "learning_rate": 0.00015166560521971952, "loss": 1.238, "step": 18610 }, { "epoch": 0.2418414568183184, "grad_norm": 0.366558700799942, "learning_rate": 0.00015166300575780814, "loss": 1.2719, "step": 18611 }, { "epoch": 0.24185445136223427, "grad_norm": 0.35997137427330017, "learning_rate": 0.00015166040629589674, "loss": 1.2549, "step": 18612 }, { "epoch": 0.24186744590615014, "grad_norm": 0.4199138581752777, "learning_rate": 0.0001516578068339854, "loss": 1.4904, "step": 18613 }, { "epoch": 0.24188044045006604, "grad_norm": 0.3938407599925995, "learning_rate": 0.000151655207372074, "loss": 1.3414, "step": 18614 }, { "epoch": 0.24189343499398192, "grad_norm": 0.3760218918323517, "learning_rate": 0.00015165260791016262, "loss": 1.3576, "step": 18615 }, { "epoch": 0.2419064295378978, "grad_norm": 0.3322869539260864, "learning_rate": 0.0001516500084482512, "loss": 1.4801, "step": 18616 }, { "epoch": 0.24191942408181366, "grad_norm": 0.33314236998558044, "learning_rate": 0.00015164740898633984, "loss": 1.2436, "step": 18617 }, { "epoch": 0.24193241862572953, "grad_norm": 0.39032691717147827, "learning_rate": 0.00015164480952442846, "loss": 1.3134, "step": 18618 }, { "epoch": 0.2419454131696454, "grad_norm": 0.4915000796318054, "learning_rate": 0.00015164221006251706, "loss": 1.5002, "step": 18619 }, { "epoch": 0.24195840771356128, "grad_norm": 0.2642613351345062, "learning_rate": 0.00015163961060060568, "loss": 1.3446, "step": 18620 }, { "epoch": 0.24197140225747715, "grad_norm": 0.4540685713291168, "learning_rate": 0.0001516370111386943, "loss": 1.419, "step": 18621 }, { "epoch": 0.24198439680139303, "grad_norm": 0.3748992681503296, "learning_rate": 0.0001516344116767829, "loss": 1.4248, "step": 18622 }, { "epoch": 0.2419973913453089, "grad_norm": 0.3597988784313202, "learning_rate": 0.00015163181221487153, "loss": 1.2639, "step": 18623 }, { "epoch": 0.24201038588922477, "grad_norm": 0.4828532040119171, "learning_rate": 0.00015162921275296013, "loss": 1.4896, "step": 18624 }, { "epoch": 0.24202338043314064, "grad_norm": 0.489960640668869, "learning_rate": 0.00015162661329104878, "loss": 1.3722, "step": 18625 }, { "epoch": 0.24203637497705652, "grad_norm": 0.4617827534675598, "learning_rate": 0.00015162401382913738, "loss": 1.3188, "step": 18626 }, { "epoch": 0.2420493695209724, "grad_norm": 0.46666109561920166, "learning_rate": 0.000151621414367226, "loss": 1.5559, "step": 18627 }, { "epoch": 0.24206236406488826, "grad_norm": 0.4043882191181183, "learning_rate": 0.0001516188149053146, "loss": 1.3671, "step": 18628 }, { "epoch": 0.24207535860880414, "grad_norm": 0.341335266828537, "learning_rate": 0.00015161621544340322, "loss": 1.1951, "step": 18629 }, { "epoch": 0.24208835315272, "grad_norm": 0.3979286551475525, "learning_rate": 0.00015161361598149185, "loss": 1.4869, "step": 18630 }, { "epoch": 0.24210134769663588, "grad_norm": 0.5013980269432068, "learning_rate": 0.00015161101651958044, "loss": 1.3594, "step": 18631 }, { "epoch": 0.24211434224055176, "grad_norm": 0.47453683614730835, "learning_rate": 0.00015160841705766907, "loss": 1.5178, "step": 18632 }, { "epoch": 0.24212733678446763, "grad_norm": 0.37517401576042175, "learning_rate": 0.0001516058175957577, "loss": 1.4119, "step": 18633 }, { "epoch": 0.2421403313283835, "grad_norm": 0.5154349207878113, "learning_rate": 0.0001516032181338463, "loss": 1.3683, "step": 18634 }, { "epoch": 0.24215332587229937, "grad_norm": 0.40972840785980225, "learning_rate": 0.00015160061867193492, "loss": 1.5584, "step": 18635 }, { "epoch": 0.24216632041621525, "grad_norm": 0.4575447142124176, "learning_rate": 0.0001515980192100235, "loss": 1.3647, "step": 18636 }, { "epoch": 0.24217931496013112, "grad_norm": 0.5445562601089478, "learning_rate": 0.00015159541974811216, "loss": 1.4829, "step": 18637 }, { "epoch": 0.242192309504047, "grad_norm": 0.4747721254825592, "learning_rate": 0.00015159282028620076, "loss": 1.5206, "step": 18638 }, { "epoch": 0.24220530404796287, "grad_norm": 0.4366362392902374, "learning_rate": 0.00015159022082428939, "loss": 1.3796, "step": 18639 }, { "epoch": 0.24221829859187874, "grad_norm": 0.40907254815101624, "learning_rate": 0.00015158762136237798, "loss": 1.5788, "step": 18640 }, { "epoch": 0.2422312931357946, "grad_norm": 0.42435750365257263, "learning_rate": 0.0001515850219004666, "loss": 1.4762, "step": 18641 }, { "epoch": 0.24224428767971048, "grad_norm": 0.3240799307823181, "learning_rate": 0.00015158242243855523, "loss": 1.4178, "step": 18642 }, { "epoch": 0.24225728222362636, "grad_norm": 0.41263800859451294, "learning_rate": 0.00015157982297664383, "loss": 1.2983, "step": 18643 }, { "epoch": 0.24227027676754223, "grad_norm": 0.3566671907901764, "learning_rate": 0.00015157722351473245, "loss": 1.4416, "step": 18644 }, { "epoch": 0.2422832713114581, "grad_norm": 0.47105324268341064, "learning_rate": 0.00015157462405282108, "loss": 1.4999, "step": 18645 }, { "epoch": 0.24229626585537398, "grad_norm": 0.32150542736053467, "learning_rate": 0.00015157202459090968, "loss": 1.6419, "step": 18646 }, { "epoch": 0.24230926039928985, "grad_norm": 0.530704915523529, "learning_rate": 0.0001515694251289983, "loss": 1.464, "step": 18647 }, { "epoch": 0.24232225494320572, "grad_norm": 0.3804032504558563, "learning_rate": 0.00015156682566708693, "loss": 1.749, "step": 18648 }, { "epoch": 0.2423352494871216, "grad_norm": 0.2966215908527374, "learning_rate": 0.00015156422620517555, "loss": 1.2693, "step": 18649 }, { "epoch": 0.24234824403103747, "grad_norm": 0.36431822180747986, "learning_rate": 0.00015156162674326415, "loss": 1.2924, "step": 18650 }, { "epoch": 0.24236123857495334, "grad_norm": 0.43764594197273254, "learning_rate": 0.00015155902728135277, "loss": 1.352, "step": 18651 }, { "epoch": 0.24237423311886921, "grad_norm": 0.3338315486907959, "learning_rate": 0.0001515564278194414, "loss": 1.4392, "step": 18652 }, { "epoch": 0.2423872276627851, "grad_norm": 0.6378545165061951, "learning_rate": 0.00015155382835753, "loss": 1.4756, "step": 18653 }, { "epoch": 0.24240022220670096, "grad_norm": 0.3244880139827728, "learning_rate": 0.00015155122889561862, "loss": 1.3912, "step": 18654 }, { "epoch": 0.24241321675061683, "grad_norm": 0.4499629735946655, "learning_rate": 0.00015154862943370722, "loss": 1.5699, "step": 18655 }, { "epoch": 0.2424262112945327, "grad_norm": 0.5940762758255005, "learning_rate": 0.00015154602997179587, "loss": 1.454, "step": 18656 }, { "epoch": 0.24243920583844858, "grad_norm": 0.5180310010910034, "learning_rate": 0.00015154343050988446, "loss": 1.4349, "step": 18657 }, { "epoch": 0.24245220038236445, "grad_norm": 0.5157365798950195, "learning_rate": 0.00015154083104797306, "loss": 1.4465, "step": 18658 }, { "epoch": 0.24246519492628033, "grad_norm": 0.29018861055374146, "learning_rate": 0.00015153823158606169, "loss": 1.2229, "step": 18659 }, { "epoch": 0.2424781894701962, "grad_norm": 0.4880530536174774, "learning_rate": 0.0001515356321241503, "loss": 1.6192, "step": 18660 }, { "epoch": 0.24249118401411207, "grad_norm": 0.4546888470649719, "learning_rate": 0.00015153303266223894, "loss": 1.2466, "step": 18661 }, { "epoch": 0.24250417855802794, "grad_norm": 0.41736820340156555, "learning_rate": 0.00015153043320032753, "loss": 1.3336, "step": 18662 }, { "epoch": 0.24251717310194382, "grad_norm": 0.34993085265159607, "learning_rate": 0.00015152783373841616, "loss": 1.4632, "step": 18663 }, { "epoch": 0.2425301676458597, "grad_norm": 0.4183323383331299, "learning_rate": 0.00015152523427650478, "loss": 1.4106, "step": 18664 }, { "epoch": 0.24254316218977556, "grad_norm": 0.34649717807769775, "learning_rate": 0.00015152263481459338, "loss": 1.2787, "step": 18665 }, { "epoch": 0.24255615673369144, "grad_norm": 0.34201788902282715, "learning_rate": 0.000151520035352682, "loss": 1.2444, "step": 18666 }, { "epoch": 0.2425691512776073, "grad_norm": 0.4168111979961395, "learning_rate": 0.0001515174358907706, "loss": 1.4562, "step": 18667 }, { "epoch": 0.24258214582152318, "grad_norm": 0.24262452125549316, "learning_rate": 0.00015151483642885925, "loss": 1.2438, "step": 18668 }, { "epoch": 0.24259514036543905, "grad_norm": 0.35859549045562744, "learning_rate": 0.00015151223696694785, "loss": 1.2294, "step": 18669 }, { "epoch": 0.24260813490935493, "grad_norm": 0.42102178931236267, "learning_rate": 0.00015150963750503647, "loss": 1.3983, "step": 18670 }, { "epoch": 0.2426211294532708, "grad_norm": 0.4524250626564026, "learning_rate": 0.00015150703804312507, "loss": 1.4813, "step": 18671 }, { "epoch": 0.24263412399718667, "grad_norm": 0.4526432454586029, "learning_rate": 0.0001515044385812137, "loss": 1.3536, "step": 18672 }, { "epoch": 0.24264711854110255, "grad_norm": 0.42873483896255493, "learning_rate": 0.00015150183911930232, "loss": 1.3566, "step": 18673 }, { "epoch": 0.24266011308501842, "grad_norm": 0.31473997235298157, "learning_rate": 0.00015149923965739092, "loss": 1.3312, "step": 18674 }, { "epoch": 0.2426731076289343, "grad_norm": 0.4092557430267334, "learning_rate": 0.00015149664019547954, "loss": 1.4292, "step": 18675 }, { "epoch": 0.24268610217285017, "grad_norm": 0.4255172312259674, "learning_rate": 0.00015149404073356817, "loss": 1.4521, "step": 18676 }, { "epoch": 0.24269909671676604, "grad_norm": 0.42825016379356384, "learning_rate": 0.00015149144127165676, "loss": 1.4087, "step": 18677 }, { "epoch": 0.2427120912606819, "grad_norm": 0.35758161544799805, "learning_rate": 0.0001514888418097454, "loss": 1.2621, "step": 18678 }, { "epoch": 0.24272508580459778, "grad_norm": 0.29613304138183594, "learning_rate": 0.00015148624234783399, "loss": 1.244, "step": 18679 }, { "epoch": 0.24273808034851366, "grad_norm": 0.27445435523986816, "learning_rate": 0.00015148364288592264, "loss": 1.2144, "step": 18680 }, { "epoch": 0.24275107489242953, "grad_norm": 0.4573116898536682, "learning_rate": 0.00015148104342401124, "loss": 1.4982, "step": 18681 }, { "epoch": 0.2427640694363454, "grad_norm": 0.37999045848846436, "learning_rate": 0.00015147844396209986, "loss": 1.4073, "step": 18682 }, { "epoch": 0.24277706398026128, "grad_norm": 0.42922714352607727, "learning_rate": 0.00015147584450018846, "loss": 1.551, "step": 18683 }, { "epoch": 0.24279005852417715, "grad_norm": 0.47659096121788025, "learning_rate": 0.00015147324503827708, "loss": 1.5652, "step": 18684 }, { "epoch": 0.24280305306809302, "grad_norm": 0.4912133514881134, "learning_rate": 0.0001514706455763657, "loss": 1.5049, "step": 18685 }, { "epoch": 0.2428160476120089, "grad_norm": 0.33328741788864136, "learning_rate": 0.0001514680461144543, "loss": 1.3937, "step": 18686 }, { "epoch": 0.24282904215592477, "grad_norm": 0.3814634680747986, "learning_rate": 0.00015146544665254296, "loss": 1.5056, "step": 18687 }, { "epoch": 0.24284203669984064, "grad_norm": 0.32378554344177246, "learning_rate": 0.00015146284719063155, "loss": 1.5988, "step": 18688 }, { "epoch": 0.24285503124375651, "grad_norm": 0.3705339729785919, "learning_rate": 0.00015146024772872015, "loss": 1.2346, "step": 18689 }, { "epoch": 0.24286802578767241, "grad_norm": 0.43569323420524597, "learning_rate": 0.00015145764826680877, "loss": 1.429, "step": 18690 }, { "epoch": 0.2428810203315883, "grad_norm": 0.3635070025920868, "learning_rate": 0.0001514550488048974, "loss": 1.4371, "step": 18691 }, { "epoch": 0.24289401487550416, "grad_norm": 0.45927491784095764, "learning_rate": 0.00015145244934298602, "loss": 1.5511, "step": 18692 }, { "epoch": 0.24290700941942003, "grad_norm": 0.3554874360561371, "learning_rate": 0.00015144984988107462, "loss": 1.431, "step": 18693 }, { "epoch": 0.2429200039633359, "grad_norm": 0.4174509644508362, "learning_rate": 0.00015144725041916325, "loss": 1.4388, "step": 18694 }, { "epoch": 0.24293299850725178, "grad_norm": 0.4513232111930847, "learning_rate": 0.00015144465095725187, "loss": 1.3627, "step": 18695 }, { "epoch": 0.24294599305116765, "grad_norm": 0.2647462487220764, "learning_rate": 0.00015144205149534047, "loss": 1.1804, "step": 18696 }, { "epoch": 0.24295898759508353, "grad_norm": 0.4600968062877655, "learning_rate": 0.0001514394520334291, "loss": 1.3239, "step": 18697 }, { "epoch": 0.2429719821389994, "grad_norm": 0.37200671434402466, "learning_rate": 0.0001514368525715177, "loss": 1.2162, "step": 18698 }, { "epoch": 0.24298497668291527, "grad_norm": 0.3960192799568176, "learning_rate": 0.00015143425310960634, "loss": 1.3289, "step": 18699 }, { "epoch": 0.24299797122683114, "grad_norm": 0.34126052260398865, "learning_rate": 0.00015143165364769494, "loss": 1.3954, "step": 18700 }, { "epoch": 0.24301096577074702, "grad_norm": 0.5253952741622925, "learning_rate": 0.00015142905418578354, "loss": 1.4714, "step": 18701 }, { "epoch": 0.2430239603146629, "grad_norm": 0.4674523174762726, "learning_rate": 0.00015142645472387216, "loss": 1.3456, "step": 18702 }, { "epoch": 0.24303695485857876, "grad_norm": 0.30023127794265747, "learning_rate": 0.00015142385526196078, "loss": 1.5672, "step": 18703 }, { "epoch": 0.24304994940249464, "grad_norm": 0.39188772439956665, "learning_rate": 0.0001514212558000494, "loss": 1.4785, "step": 18704 }, { "epoch": 0.2430629439464105, "grad_norm": 0.4098399877548218, "learning_rate": 0.000151418656338138, "loss": 1.3529, "step": 18705 }, { "epoch": 0.24307593849032638, "grad_norm": 0.40387290716171265, "learning_rate": 0.00015141605687622663, "loss": 1.3914, "step": 18706 }, { "epoch": 0.24308893303424225, "grad_norm": 0.35220301151275635, "learning_rate": 0.00015141345741431526, "loss": 1.3964, "step": 18707 }, { "epoch": 0.24310192757815813, "grad_norm": 0.36789292097091675, "learning_rate": 0.00015141085795240385, "loss": 1.4885, "step": 18708 }, { "epoch": 0.243114922122074, "grad_norm": 0.379399836063385, "learning_rate": 0.00015140825849049248, "loss": 1.4816, "step": 18709 }, { "epoch": 0.24312791666598987, "grad_norm": 0.32574591040611267, "learning_rate": 0.00015140565902858107, "loss": 1.4377, "step": 18710 }, { "epoch": 0.24314091120990575, "grad_norm": 0.4179539084434509, "learning_rate": 0.00015140305956666973, "loss": 1.5756, "step": 18711 }, { "epoch": 0.24315390575382162, "grad_norm": 0.391205370426178, "learning_rate": 0.00015140046010475832, "loss": 1.5877, "step": 18712 }, { "epoch": 0.2431669002977375, "grad_norm": 0.3907163739204407, "learning_rate": 0.00015139786064284692, "loss": 1.4361, "step": 18713 }, { "epoch": 0.24317989484165337, "grad_norm": 0.33866459131240845, "learning_rate": 0.00015139526118093555, "loss": 1.0723, "step": 18714 }, { "epoch": 0.24319288938556924, "grad_norm": 0.34207606315612793, "learning_rate": 0.00015139266171902417, "loss": 1.4648, "step": 18715 }, { "epoch": 0.2432058839294851, "grad_norm": 0.3380109667778015, "learning_rate": 0.0001513900622571128, "loss": 1.3762, "step": 18716 }, { "epoch": 0.24321887847340098, "grad_norm": 0.47035568952560425, "learning_rate": 0.0001513874627952014, "loss": 1.5755, "step": 18717 }, { "epoch": 0.24323187301731686, "grad_norm": 0.4450298845767975, "learning_rate": 0.00015138486333329002, "loss": 1.2981, "step": 18718 }, { "epoch": 0.24324486756123273, "grad_norm": 0.5092129111289978, "learning_rate": 0.00015138226387137864, "loss": 1.3505, "step": 18719 }, { "epoch": 0.2432578621051486, "grad_norm": 0.3212827444076538, "learning_rate": 0.00015137966440946724, "loss": 1.4542, "step": 18720 }, { "epoch": 0.24327085664906448, "grad_norm": 0.44399669766426086, "learning_rate": 0.00015137706494755586, "loss": 1.5591, "step": 18721 }, { "epoch": 0.24328385119298035, "grad_norm": 0.33565202355384827, "learning_rate": 0.0001513744654856445, "loss": 1.4235, "step": 18722 }, { "epoch": 0.24329684573689622, "grad_norm": 0.4154299199581146, "learning_rate": 0.0001513718660237331, "loss": 1.3861, "step": 18723 }, { "epoch": 0.2433098402808121, "grad_norm": 0.4201943278312683, "learning_rate": 0.0001513692665618217, "loss": 1.4103, "step": 18724 }, { "epoch": 0.24332283482472797, "grad_norm": 0.4369250237941742, "learning_rate": 0.00015136666709991033, "loss": 1.3311, "step": 18725 }, { "epoch": 0.24333582936864384, "grad_norm": 0.33135008811950684, "learning_rate": 0.00015136406763799896, "loss": 1.4669, "step": 18726 }, { "epoch": 0.24334882391255971, "grad_norm": 0.4350792467594147, "learning_rate": 0.00015136146817608756, "loss": 1.3068, "step": 18727 }, { "epoch": 0.2433618184564756, "grad_norm": 0.37491574883461, "learning_rate": 0.00015135886871417618, "loss": 1.3763, "step": 18728 }, { "epoch": 0.24337481300039146, "grad_norm": 0.3830513060092926, "learning_rate": 0.00015135626925226478, "loss": 1.5058, "step": 18729 }, { "epoch": 0.24338780754430733, "grad_norm": 0.43277010321617126, "learning_rate": 0.0001513536697903534, "loss": 1.4395, "step": 18730 }, { "epoch": 0.2434008020882232, "grad_norm": 0.28896117210388184, "learning_rate": 0.00015135107032844203, "loss": 1.3533, "step": 18731 }, { "epoch": 0.24341379663213908, "grad_norm": 0.3370968997478485, "learning_rate": 0.00015134847086653062, "loss": 1.5178, "step": 18732 }, { "epoch": 0.24342679117605495, "grad_norm": 0.31554126739501953, "learning_rate": 0.00015134587140461925, "loss": 1.2208, "step": 18733 }, { "epoch": 0.24343978571997082, "grad_norm": 0.3740726411342621, "learning_rate": 0.00015134327194270787, "loss": 1.3376, "step": 18734 }, { "epoch": 0.2434527802638867, "grad_norm": 0.37324458360671997, "learning_rate": 0.0001513406724807965, "loss": 1.2837, "step": 18735 }, { "epoch": 0.24346577480780257, "grad_norm": 0.37935417890548706, "learning_rate": 0.0001513380730188851, "loss": 1.3733, "step": 18736 }, { "epoch": 0.24347876935171844, "grad_norm": 0.3903962969779968, "learning_rate": 0.00015133547355697372, "loss": 1.4875, "step": 18737 }, { "epoch": 0.24349176389563432, "grad_norm": 0.4248451590538025, "learning_rate": 0.00015133287409506234, "loss": 1.4921, "step": 18738 }, { "epoch": 0.2435047584395502, "grad_norm": 0.4164523482322693, "learning_rate": 0.00015133027463315094, "loss": 1.3684, "step": 18739 }, { "epoch": 0.24351775298346606, "grad_norm": 0.4053780436515808, "learning_rate": 0.00015132767517123956, "loss": 1.4135, "step": 18740 }, { "epoch": 0.24353074752738194, "grad_norm": 0.44323110580444336, "learning_rate": 0.00015132507570932816, "loss": 1.5026, "step": 18741 }, { "epoch": 0.2435437420712978, "grad_norm": 0.4161290228366852, "learning_rate": 0.0001513224762474168, "loss": 1.4473, "step": 18742 }, { "epoch": 0.24355673661521368, "grad_norm": 0.37046870589256287, "learning_rate": 0.0001513198767855054, "loss": 1.5759, "step": 18743 }, { "epoch": 0.24356973115912955, "grad_norm": 0.3534325659275055, "learning_rate": 0.000151317277323594, "loss": 1.415, "step": 18744 }, { "epoch": 0.24358272570304543, "grad_norm": 0.3760634958744049, "learning_rate": 0.00015131467786168263, "loss": 1.4554, "step": 18745 }, { "epoch": 0.2435957202469613, "grad_norm": 0.38557153940200806, "learning_rate": 0.00015131207839977126, "loss": 1.408, "step": 18746 }, { "epoch": 0.24360871479087717, "grad_norm": 0.29940006136894226, "learning_rate": 0.00015130947893785988, "loss": 1.2208, "step": 18747 }, { "epoch": 0.24362170933479305, "grad_norm": 0.385562926530838, "learning_rate": 0.00015130687947594848, "loss": 1.2387, "step": 18748 }, { "epoch": 0.24363470387870892, "grad_norm": 0.3814559280872345, "learning_rate": 0.0001513042800140371, "loss": 1.4198, "step": 18749 }, { "epoch": 0.2436476984226248, "grad_norm": 0.36405354738235474, "learning_rate": 0.00015130168055212573, "loss": 1.5686, "step": 18750 }, { "epoch": 0.24366069296654067, "grad_norm": 0.3762778639793396, "learning_rate": 0.00015129908109021433, "loss": 1.3829, "step": 18751 }, { "epoch": 0.24367368751045654, "grad_norm": 0.43630701303482056, "learning_rate": 0.00015129648162830295, "loss": 1.4168, "step": 18752 }, { "epoch": 0.2436866820543724, "grad_norm": 0.4406484365463257, "learning_rate": 0.00015129388216639155, "loss": 1.4702, "step": 18753 }, { "epoch": 0.24369967659828828, "grad_norm": 0.379935085773468, "learning_rate": 0.0001512912827044802, "loss": 1.4309, "step": 18754 }, { "epoch": 0.24371267114220416, "grad_norm": 0.412319153547287, "learning_rate": 0.0001512886832425688, "loss": 1.4179, "step": 18755 }, { "epoch": 0.24372566568612003, "grad_norm": 0.42225927114486694, "learning_rate": 0.0001512860837806574, "loss": 1.345, "step": 18756 }, { "epoch": 0.2437386602300359, "grad_norm": 0.43439263105392456, "learning_rate": 0.00015128348431874602, "loss": 1.4821, "step": 18757 }, { "epoch": 0.24375165477395178, "grad_norm": 0.48218613862991333, "learning_rate": 0.00015128088485683464, "loss": 1.4024, "step": 18758 }, { "epoch": 0.24376464931786765, "grad_norm": 0.3680278956890106, "learning_rate": 0.00015127828539492327, "loss": 1.5368, "step": 18759 }, { "epoch": 0.24377764386178352, "grad_norm": 0.39215633273124695, "learning_rate": 0.00015127568593301186, "loss": 1.2514, "step": 18760 }, { "epoch": 0.2437906384056994, "grad_norm": 0.32422924041748047, "learning_rate": 0.0001512730864711005, "loss": 1.3348, "step": 18761 }, { "epoch": 0.24380363294961527, "grad_norm": 0.4098733365535736, "learning_rate": 0.00015127048700918911, "loss": 1.4497, "step": 18762 }, { "epoch": 0.24381662749353114, "grad_norm": 0.405290812253952, "learning_rate": 0.0001512678875472777, "loss": 1.4056, "step": 18763 }, { "epoch": 0.243829622037447, "grad_norm": 0.3286243975162506, "learning_rate": 0.00015126528808536634, "loss": 1.4373, "step": 18764 }, { "epoch": 0.2438426165813629, "grad_norm": 0.4578157663345337, "learning_rate": 0.00015126268862345496, "loss": 1.4788, "step": 18765 }, { "epoch": 0.2438556111252788, "grad_norm": 0.29505306482315063, "learning_rate": 0.00015126008916154358, "loss": 1.3035, "step": 18766 }, { "epoch": 0.24386860566919466, "grad_norm": 0.36847490072250366, "learning_rate": 0.00015125748969963218, "loss": 1.5574, "step": 18767 }, { "epoch": 0.24388160021311053, "grad_norm": 0.4174138903617859, "learning_rate": 0.00015125489023772078, "loss": 1.6381, "step": 18768 }, { "epoch": 0.2438945947570264, "grad_norm": 0.3846185803413391, "learning_rate": 0.00015125229077580943, "loss": 1.4377, "step": 18769 }, { "epoch": 0.24390758930094228, "grad_norm": 0.4426399767398834, "learning_rate": 0.00015124969131389803, "loss": 1.18, "step": 18770 }, { "epoch": 0.24392058384485815, "grad_norm": 0.35817059874534607, "learning_rate": 0.00015124709185198665, "loss": 1.3283, "step": 18771 }, { "epoch": 0.24393357838877402, "grad_norm": 0.3810522258281708, "learning_rate": 0.00015124449239007525, "loss": 1.2969, "step": 18772 }, { "epoch": 0.2439465729326899, "grad_norm": 0.43992850184440613, "learning_rate": 0.00015124189292816387, "loss": 1.2024, "step": 18773 }, { "epoch": 0.24395956747660577, "grad_norm": 0.4206596612930298, "learning_rate": 0.0001512392934662525, "loss": 1.4586, "step": 18774 }, { "epoch": 0.24397256202052164, "grad_norm": 0.36326298117637634, "learning_rate": 0.0001512366940043411, "loss": 1.2922, "step": 18775 }, { "epoch": 0.24398555656443752, "grad_norm": 0.32933279871940613, "learning_rate": 0.00015123409454242972, "loss": 1.2991, "step": 18776 }, { "epoch": 0.2439985511083534, "grad_norm": 0.3619011640548706, "learning_rate": 0.00015123149508051835, "loss": 1.3923, "step": 18777 }, { "epoch": 0.24401154565226926, "grad_norm": 0.359198659658432, "learning_rate": 0.00015122889561860697, "loss": 1.3943, "step": 18778 }, { "epoch": 0.24402454019618514, "grad_norm": 0.30627089738845825, "learning_rate": 0.00015122629615669557, "loss": 1.4395, "step": 18779 }, { "epoch": 0.244037534740101, "grad_norm": 0.31865280866622925, "learning_rate": 0.00015122369669478416, "loss": 1.3081, "step": 18780 }, { "epoch": 0.24405052928401688, "grad_norm": 0.41274237632751465, "learning_rate": 0.00015122109723287282, "loss": 1.5199, "step": 18781 }, { "epoch": 0.24406352382793275, "grad_norm": 0.34779736399650574, "learning_rate": 0.00015121849777096141, "loss": 1.4976, "step": 18782 }, { "epoch": 0.24407651837184863, "grad_norm": 0.28953367471694946, "learning_rate": 0.00015121589830905004, "loss": 1.3315, "step": 18783 }, { "epoch": 0.2440895129157645, "grad_norm": 0.3361044228076935, "learning_rate": 0.00015121329884713864, "loss": 1.1861, "step": 18784 }, { "epoch": 0.24410250745968037, "grad_norm": 0.49731504917144775, "learning_rate": 0.00015121069938522726, "loss": 1.4633, "step": 18785 }, { "epoch": 0.24411550200359625, "grad_norm": 0.38585013151168823, "learning_rate": 0.00015120809992331588, "loss": 1.4163, "step": 18786 }, { "epoch": 0.24412849654751212, "grad_norm": 0.5276221036911011, "learning_rate": 0.00015120550046140448, "loss": 1.594, "step": 18787 }, { "epoch": 0.244141491091428, "grad_norm": 0.4159316122531891, "learning_rate": 0.0001512029009994931, "loss": 1.3507, "step": 18788 }, { "epoch": 0.24415448563534387, "grad_norm": 0.5151793956756592, "learning_rate": 0.00015120030153758173, "loss": 1.445, "step": 18789 }, { "epoch": 0.24416748017925974, "grad_norm": 0.4093502461910248, "learning_rate": 0.00015119770207567036, "loss": 1.4257, "step": 18790 }, { "epoch": 0.2441804747231756, "grad_norm": 0.30513325333595276, "learning_rate": 0.00015119510261375895, "loss": 1.5628, "step": 18791 }, { "epoch": 0.24419346926709148, "grad_norm": 0.3895033895969391, "learning_rate": 0.00015119250315184758, "loss": 1.2502, "step": 18792 }, { "epoch": 0.24420646381100736, "grad_norm": 0.29489392042160034, "learning_rate": 0.0001511899036899362, "loss": 1.2116, "step": 18793 }, { "epoch": 0.24421945835492323, "grad_norm": 0.4238291382789612, "learning_rate": 0.0001511873042280248, "loss": 1.5503, "step": 18794 }, { "epoch": 0.2442324528988391, "grad_norm": 0.45867016911506653, "learning_rate": 0.00015118470476611342, "loss": 1.4501, "step": 18795 }, { "epoch": 0.24424544744275498, "grad_norm": 0.33472180366516113, "learning_rate": 0.00015118210530420202, "loss": 1.4123, "step": 18796 }, { "epoch": 0.24425844198667085, "grad_norm": 0.49916476011276245, "learning_rate": 0.00015117950584229065, "loss": 1.4447, "step": 18797 }, { "epoch": 0.24427143653058672, "grad_norm": 0.4661047160625458, "learning_rate": 0.00015117690638037927, "loss": 1.5426, "step": 18798 }, { "epoch": 0.2442844310745026, "grad_norm": 0.30168893933296204, "learning_rate": 0.00015117430691846787, "loss": 1.518, "step": 18799 }, { "epoch": 0.24429742561841847, "grad_norm": 0.4043814241886139, "learning_rate": 0.00015117170745655652, "loss": 1.2824, "step": 18800 }, { "epoch": 0.24431042016233434, "grad_norm": 0.40717563033103943, "learning_rate": 0.00015116910799464512, "loss": 1.55, "step": 18801 }, { "epoch": 0.2443234147062502, "grad_norm": 0.3813866376876831, "learning_rate": 0.00015116650853273374, "loss": 1.3725, "step": 18802 }, { "epoch": 0.2443364092501661, "grad_norm": 0.37645336985588074, "learning_rate": 0.00015116390907082234, "loss": 1.3089, "step": 18803 }, { "epoch": 0.24434940379408196, "grad_norm": 0.32562968134880066, "learning_rate": 0.00015116130960891096, "loss": 1.1854, "step": 18804 }, { "epoch": 0.24436239833799783, "grad_norm": 0.3957166075706482, "learning_rate": 0.0001511587101469996, "loss": 1.2336, "step": 18805 }, { "epoch": 0.2443753928819137, "grad_norm": 0.5393717885017395, "learning_rate": 0.00015115611068508818, "loss": 1.3798, "step": 18806 }, { "epoch": 0.24438838742582958, "grad_norm": 0.40587881207466125, "learning_rate": 0.0001511535112231768, "loss": 1.398, "step": 18807 }, { "epoch": 0.24440138196974545, "grad_norm": 0.2878035008907318, "learning_rate": 0.00015115091176126543, "loss": 1.2865, "step": 18808 }, { "epoch": 0.24441437651366132, "grad_norm": 0.40324002504348755, "learning_rate": 0.00015114831229935406, "loss": 1.5572, "step": 18809 }, { "epoch": 0.2444273710575772, "grad_norm": 0.3589307963848114, "learning_rate": 0.00015114571283744266, "loss": 1.3056, "step": 18810 }, { "epoch": 0.24444036560149307, "grad_norm": 0.49471569061279297, "learning_rate": 0.00015114311337553125, "loss": 1.541, "step": 18811 }, { "epoch": 0.24445336014540894, "grad_norm": 0.3284972906112671, "learning_rate": 0.0001511405139136199, "loss": 1.3574, "step": 18812 }, { "epoch": 0.24446635468932482, "grad_norm": 0.37585967779159546, "learning_rate": 0.0001511379144517085, "loss": 1.4213, "step": 18813 }, { "epoch": 0.2444793492332407, "grad_norm": 0.3559940457344055, "learning_rate": 0.00015113531498979713, "loss": 1.3872, "step": 18814 }, { "epoch": 0.24449234377715656, "grad_norm": 0.42702940106391907, "learning_rate": 0.00015113271552788572, "loss": 1.5979, "step": 18815 }, { "epoch": 0.24450533832107244, "grad_norm": 0.4022574722766876, "learning_rate": 0.00015113011606597435, "loss": 1.3873, "step": 18816 }, { "epoch": 0.2445183328649883, "grad_norm": 0.3252454996109009, "learning_rate": 0.00015112751660406297, "loss": 1.2717, "step": 18817 }, { "epoch": 0.24453132740890418, "grad_norm": 0.3761330544948578, "learning_rate": 0.00015112491714215157, "loss": 1.2447, "step": 18818 }, { "epoch": 0.24454432195282005, "grad_norm": 0.3562001585960388, "learning_rate": 0.0001511223176802402, "loss": 1.3029, "step": 18819 }, { "epoch": 0.24455731649673593, "grad_norm": 0.3484468162059784, "learning_rate": 0.00015111971821832882, "loss": 1.3335, "step": 18820 }, { "epoch": 0.2445703110406518, "grad_norm": 0.4047453701496124, "learning_rate": 0.00015111711875641744, "loss": 1.3216, "step": 18821 }, { "epoch": 0.24458330558456767, "grad_norm": 0.44757184386253357, "learning_rate": 0.00015111451929450604, "loss": 1.4453, "step": 18822 }, { "epoch": 0.24459630012848355, "grad_norm": 0.3730284571647644, "learning_rate": 0.00015111191983259464, "loss": 1.2855, "step": 18823 }, { "epoch": 0.24460929467239942, "grad_norm": 0.4215877056121826, "learning_rate": 0.0001511093203706833, "loss": 1.6168, "step": 18824 }, { "epoch": 0.2446222892163153, "grad_norm": 0.4406856596469879, "learning_rate": 0.0001511067209087719, "loss": 1.5112, "step": 18825 }, { "epoch": 0.24463528376023116, "grad_norm": 0.39661383628845215, "learning_rate": 0.0001511041214468605, "loss": 1.6071, "step": 18826 }, { "epoch": 0.24464827830414704, "grad_norm": 0.4332396388053894, "learning_rate": 0.0001511015219849491, "loss": 1.4226, "step": 18827 }, { "epoch": 0.2446612728480629, "grad_norm": 0.42816656827926636, "learning_rate": 0.00015109892252303773, "loss": 1.2745, "step": 18828 }, { "epoch": 0.24467426739197878, "grad_norm": 0.35928821563720703, "learning_rate": 0.00015109632306112636, "loss": 1.3723, "step": 18829 }, { "epoch": 0.24468726193589466, "grad_norm": 0.3581092357635498, "learning_rate": 0.00015109372359921496, "loss": 1.3478, "step": 18830 }, { "epoch": 0.24470025647981053, "grad_norm": 0.6039936542510986, "learning_rate": 0.00015109112413730358, "loss": 1.4855, "step": 18831 }, { "epoch": 0.2447132510237264, "grad_norm": 0.41694405674934387, "learning_rate": 0.0001510885246753922, "loss": 1.2934, "step": 18832 }, { "epoch": 0.24472624556764228, "grad_norm": 0.38265755772590637, "learning_rate": 0.00015108592521348083, "loss": 1.4342, "step": 18833 }, { "epoch": 0.24473924011155815, "grad_norm": 0.3158224821090698, "learning_rate": 0.00015108332575156943, "loss": 1.2717, "step": 18834 }, { "epoch": 0.24475223465547402, "grad_norm": 0.34161150455474854, "learning_rate": 0.00015108072628965805, "loss": 1.5496, "step": 18835 }, { "epoch": 0.2447652291993899, "grad_norm": 0.3635046184062958, "learning_rate": 0.00015107812682774668, "loss": 1.1539, "step": 18836 }, { "epoch": 0.24477822374330577, "grad_norm": 0.3595418632030487, "learning_rate": 0.00015107552736583527, "loss": 1.5846, "step": 18837 }, { "epoch": 0.24479121828722164, "grad_norm": 0.36239418387413025, "learning_rate": 0.0001510729279039239, "loss": 1.504, "step": 18838 }, { "epoch": 0.2448042128311375, "grad_norm": 0.35877153277397156, "learning_rate": 0.00015107032844201252, "loss": 1.2728, "step": 18839 }, { "epoch": 0.2448172073750534, "grad_norm": 0.4540707767009735, "learning_rate": 0.00015106772898010112, "loss": 1.4395, "step": 18840 }, { "epoch": 0.24483020191896926, "grad_norm": 0.4120355248451233, "learning_rate": 0.00015106512951818974, "loss": 1.3899, "step": 18841 }, { "epoch": 0.24484319646288516, "grad_norm": 0.37712806463241577, "learning_rate": 0.00015106253005627834, "loss": 1.5772, "step": 18842 }, { "epoch": 0.24485619100680103, "grad_norm": 0.3947502374649048, "learning_rate": 0.000151059930594367, "loss": 1.2986, "step": 18843 }, { "epoch": 0.2448691855507169, "grad_norm": 0.36569494009017944, "learning_rate": 0.0001510573311324556, "loss": 1.4456, "step": 18844 }, { "epoch": 0.24488218009463278, "grad_norm": 0.3824523985385895, "learning_rate": 0.00015105473167054421, "loss": 1.5606, "step": 18845 }, { "epoch": 0.24489517463854865, "grad_norm": 0.4289792478084564, "learning_rate": 0.0001510521322086328, "loss": 1.3368, "step": 18846 }, { "epoch": 0.24490816918246452, "grad_norm": 0.44024258852005005, "learning_rate": 0.00015104953274672144, "loss": 1.3706, "step": 18847 }, { "epoch": 0.2449211637263804, "grad_norm": 0.352137953042984, "learning_rate": 0.00015104693328481006, "loss": 1.4208, "step": 18848 }, { "epoch": 0.24493415827029627, "grad_norm": 0.44328558444976807, "learning_rate": 0.00015104433382289866, "loss": 1.4415, "step": 18849 }, { "epoch": 0.24494715281421214, "grad_norm": 0.43280497193336487, "learning_rate": 0.00015104173436098728, "loss": 1.3949, "step": 18850 }, { "epoch": 0.24496014735812802, "grad_norm": 0.4635848104953766, "learning_rate": 0.0001510391348990759, "loss": 1.4373, "step": 18851 }, { "epoch": 0.2449731419020439, "grad_norm": 0.37055668234825134, "learning_rate": 0.0001510365354371645, "loss": 1.4796, "step": 18852 }, { "epoch": 0.24498613644595976, "grad_norm": 0.5201985239982605, "learning_rate": 0.00015103393597525313, "loss": 1.404, "step": 18853 }, { "epoch": 0.24499913098987564, "grad_norm": 0.3533934950828552, "learning_rate": 0.00015103133651334173, "loss": 1.4053, "step": 18854 }, { "epoch": 0.2450121255337915, "grad_norm": 0.39543235301971436, "learning_rate": 0.00015102873705143038, "loss": 1.4608, "step": 18855 }, { "epoch": 0.24502512007770738, "grad_norm": 0.39458879828453064, "learning_rate": 0.00015102613758951898, "loss": 1.4994, "step": 18856 }, { "epoch": 0.24503811462162325, "grad_norm": 0.4454036056995392, "learning_rate": 0.0001510235381276076, "loss": 1.4487, "step": 18857 }, { "epoch": 0.24505110916553913, "grad_norm": 0.3871466815471649, "learning_rate": 0.0001510209386656962, "loss": 1.4809, "step": 18858 }, { "epoch": 0.245064103709455, "grad_norm": 0.3446694314479828, "learning_rate": 0.00015101833920378482, "loss": 1.5561, "step": 18859 }, { "epoch": 0.24507709825337087, "grad_norm": 0.41022297739982605, "learning_rate": 0.00015101573974187345, "loss": 1.3301, "step": 18860 }, { "epoch": 0.24509009279728675, "grad_norm": 0.4372495412826538, "learning_rate": 0.00015101314027996204, "loss": 1.3225, "step": 18861 }, { "epoch": 0.24510308734120262, "grad_norm": 0.3996409773826599, "learning_rate": 0.00015101054081805067, "loss": 1.5621, "step": 18862 }, { "epoch": 0.2451160818851185, "grad_norm": 0.3698645532131195, "learning_rate": 0.0001510079413561393, "loss": 1.396, "step": 18863 }, { "epoch": 0.24512907642903436, "grad_norm": 0.3147052526473999, "learning_rate": 0.0001510053418942279, "loss": 1.3551, "step": 18864 }, { "epoch": 0.24514207097295024, "grad_norm": 0.4044569134712219, "learning_rate": 0.00015100274243231651, "loss": 1.1914, "step": 18865 }, { "epoch": 0.2451550655168661, "grad_norm": 0.42564070224761963, "learning_rate": 0.0001510001429704051, "loss": 1.6862, "step": 18866 }, { "epoch": 0.24516806006078198, "grad_norm": 0.5052086710929871, "learning_rate": 0.00015099754350849376, "loss": 1.5415, "step": 18867 }, { "epoch": 0.24518105460469786, "grad_norm": 0.42564037442207336, "learning_rate": 0.00015099494404658236, "loss": 1.4262, "step": 18868 }, { "epoch": 0.24519404914861373, "grad_norm": 0.4483758807182312, "learning_rate": 0.00015099234458467098, "loss": 1.5262, "step": 18869 }, { "epoch": 0.2452070436925296, "grad_norm": 0.3586370646953583, "learning_rate": 0.00015098974512275958, "loss": 1.3809, "step": 18870 }, { "epoch": 0.24522003823644548, "grad_norm": 0.3960714340209961, "learning_rate": 0.0001509871456608482, "loss": 1.4154, "step": 18871 }, { "epoch": 0.24523303278036135, "grad_norm": 0.4796430766582489, "learning_rate": 0.00015098454619893683, "loss": 1.6041, "step": 18872 }, { "epoch": 0.24524602732427722, "grad_norm": 0.29470136761665344, "learning_rate": 0.00015098194673702543, "loss": 1.1743, "step": 18873 }, { "epoch": 0.2452590218681931, "grad_norm": 0.44004929065704346, "learning_rate": 0.00015097934727511408, "loss": 1.4328, "step": 18874 }, { "epoch": 0.24527201641210897, "grad_norm": 0.3609558641910553, "learning_rate": 0.00015097674781320268, "loss": 1.3158, "step": 18875 }, { "epoch": 0.24528501095602484, "grad_norm": 0.4033472537994385, "learning_rate": 0.0001509741483512913, "loss": 1.4027, "step": 18876 }, { "epoch": 0.2452980054999407, "grad_norm": 0.400058776140213, "learning_rate": 0.0001509715488893799, "loss": 1.5013, "step": 18877 }, { "epoch": 0.2453110000438566, "grad_norm": 0.5335497260093689, "learning_rate": 0.00015096894942746852, "loss": 1.3279, "step": 18878 }, { "epoch": 0.24532399458777246, "grad_norm": 0.326107382774353, "learning_rate": 0.00015096634996555715, "loss": 1.5635, "step": 18879 }, { "epoch": 0.24533698913168833, "grad_norm": 0.34297630190849304, "learning_rate": 0.00015096375050364575, "loss": 1.5086, "step": 18880 }, { "epoch": 0.2453499836756042, "grad_norm": 0.3952433168888092, "learning_rate": 0.00015096115104173437, "loss": 1.5876, "step": 18881 }, { "epoch": 0.24536297821952008, "grad_norm": 0.4266887903213501, "learning_rate": 0.000150958551579823, "loss": 1.4145, "step": 18882 }, { "epoch": 0.24537597276343595, "grad_norm": 0.31486156582832336, "learning_rate": 0.0001509559521179116, "loss": 1.3145, "step": 18883 }, { "epoch": 0.24538896730735182, "grad_norm": 0.2696535587310791, "learning_rate": 0.00015095335265600022, "loss": 1.4498, "step": 18884 }, { "epoch": 0.2454019618512677, "grad_norm": 0.4172575771808624, "learning_rate": 0.00015095075319408881, "loss": 1.5064, "step": 18885 }, { "epoch": 0.24541495639518357, "grad_norm": 0.34061190485954285, "learning_rate": 0.00015094815373217747, "loss": 1.6777, "step": 18886 }, { "epoch": 0.24542795093909944, "grad_norm": 0.43162450194358826, "learning_rate": 0.00015094555427026606, "loss": 1.3103, "step": 18887 }, { "epoch": 0.24544094548301532, "grad_norm": 0.7491441965103149, "learning_rate": 0.0001509429548083547, "loss": 1.3838, "step": 18888 }, { "epoch": 0.2454539400269312, "grad_norm": 0.4656504690647125, "learning_rate": 0.00015094035534644328, "loss": 1.3809, "step": 18889 }, { "epoch": 0.24546693457084706, "grad_norm": 0.29248175024986267, "learning_rate": 0.0001509377558845319, "loss": 1.5842, "step": 18890 }, { "epoch": 0.24547992911476293, "grad_norm": 0.48640045523643494, "learning_rate": 0.00015093515642262053, "loss": 1.5551, "step": 18891 }, { "epoch": 0.2454929236586788, "grad_norm": 0.3092939555644989, "learning_rate": 0.00015093255696070913, "loss": 1.3025, "step": 18892 }, { "epoch": 0.24550591820259468, "grad_norm": 0.41665032505989075, "learning_rate": 0.00015092995749879776, "loss": 1.3641, "step": 18893 }, { "epoch": 0.24551891274651055, "grad_norm": 0.3051072657108307, "learning_rate": 0.00015092735803688638, "loss": 1.3634, "step": 18894 }, { "epoch": 0.24553190729042643, "grad_norm": 0.4392319917678833, "learning_rate": 0.00015092475857497498, "loss": 1.4081, "step": 18895 }, { "epoch": 0.2455449018343423, "grad_norm": 0.3552829921245575, "learning_rate": 0.0001509221591130636, "loss": 1.3307, "step": 18896 }, { "epoch": 0.24555789637825817, "grad_norm": 0.4525161385536194, "learning_rate": 0.0001509195596511522, "loss": 1.5111, "step": 18897 }, { "epoch": 0.24557089092217405, "grad_norm": 0.30844396352767944, "learning_rate": 0.00015091696018924085, "loss": 1.2365, "step": 18898 }, { "epoch": 0.24558388546608992, "grad_norm": 0.41089165210723877, "learning_rate": 0.00015091436072732945, "loss": 1.4627, "step": 18899 }, { "epoch": 0.2455968800100058, "grad_norm": 0.2998374104499817, "learning_rate": 0.00015091176126541807, "loss": 1.4468, "step": 18900 }, { "epoch": 0.24560987455392166, "grad_norm": 0.3028707504272461, "learning_rate": 0.00015090916180350667, "loss": 1.3342, "step": 18901 }, { "epoch": 0.24562286909783754, "grad_norm": 0.4073849618434906, "learning_rate": 0.0001509065623415953, "loss": 1.4842, "step": 18902 }, { "epoch": 0.2456358636417534, "grad_norm": 0.43446093797683716, "learning_rate": 0.00015090396287968392, "loss": 1.445, "step": 18903 }, { "epoch": 0.24564885818566928, "grad_norm": 0.44141966104507446, "learning_rate": 0.00015090136341777252, "loss": 1.5915, "step": 18904 }, { "epoch": 0.24566185272958516, "grad_norm": 0.3364699184894562, "learning_rate": 0.00015089876395586114, "loss": 1.4876, "step": 18905 }, { "epoch": 0.24567484727350103, "grad_norm": 0.2789861559867859, "learning_rate": 0.00015089616449394977, "loss": 1.3709, "step": 18906 }, { "epoch": 0.2456878418174169, "grad_norm": 0.40969333052635193, "learning_rate": 0.00015089356503203836, "loss": 1.3745, "step": 18907 }, { "epoch": 0.24570083636133278, "grad_norm": 0.2771616280078888, "learning_rate": 0.000150890965570127, "loss": 1.3688, "step": 18908 }, { "epoch": 0.24571383090524865, "grad_norm": 0.3067249357700348, "learning_rate": 0.0001508883661082156, "loss": 1.3455, "step": 18909 }, { "epoch": 0.24572682544916452, "grad_norm": 0.46570900082588196, "learning_rate": 0.00015088576664630424, "loss": 1.4555, "step": 18910 }, { "epoch": 0.2457398199930804, "grad_norm": 0.4184342324733734, "learning_rate": 0.00015088316718439283, "loss": 1.3756, "step": 18911 }, { "epoch": 0.24575281453699627, "grad_norm": 0.378604531288147, "learning_rate": 0.00015088056772248146, "loss": 1.3979, "step": 18912 }, { "epoch": 0.24576580908091214, "grad_norm": 0.4666525721549988, "learning_rate": 0.00015087796826057008, "loss": 1.3965, "step": 18913 }, { "epoch": 0.245778803624828, "grad_norm": 0.39189279079437256, "learning_rate": 0.00015087536879865868, "loss": 1.4006, "step": 18914 }, { "epoch": 0.24579179816874389, "grad_norm": 0.514519989490509, "learning_rate": 0.0001508727693367473, "loss": 1.5277, "step": 18915 }, { "epoch": 0.24580479271265976, "grad_norm": 0.3929111063480377, "learning_rate": 0.0001508701698748359, "loss": 1.3883, "step": 18916 }, { "epoch": 0.24581778725657563, "grad_norm": 0.42645853757858276, "learning_rate": 0.00015086757041292455, "loss": 1.4955, "step": 18917 }, { "epoch": 0.24583078180049153, "grad_norm": 0.38905468583106995, "learning_rate": 0.00015086497095101315, "loss": 1.2094, "step": 18918 }, { "epoch": 0.2458437763444074, "grad_norm": 0.3995038866996765, "learning_rate": 0.00015086237148910175, "loss": 1.5347, "step": 18919 }, { "epoch": 0.24585677088832328, "grad_norm": 0.4171901047229767, "learning_rate": 0.00015085977202719037, "loss": 1.4877, "step": 18920 }, { "epoch": 0.24586976543223915, "grad_norm": 0.38386470079421997, "learning_rate": 0.000150857172565279, "loss": 1.2076, "step": 18921 }, { "epoch": 0.24588275997615502, "grad_norm": 0.30258697271347046, "learning_rate": 0.00015085457310336762, "loss": 1.3197, "step": 18922 }, { "epoch": 0.2458957545200709, "grad_norm": 0.40699124336242676, "learning_rate": 0.00015085197364145622, "loss": 1.5121, "step": 18923 }, { "epoch": 0.24590874906398677, "grad_norm": 0.3099406659603119, "learning_rate": 0.00015084937417954484, "loss": 1.2779, "step": 18924 }, { "epoch": 0.24592174360790264, "grad_norm": 0.34832271933555603, "learning_rate": 0.00015084677471763347, "loss": 1.1913, "step": 18925 }, { "epoch": 0.24593473815181852, "grad_norm": 0.45675963163375854, "learning_rate": 0.00015084417525572207, "loss": 1.3428, "step": 18926 }, { "epoch": 0.2459477326957344, "grad_norm": 0.3640705645084381, "learning_rate": 0.0001508415757938107, "loss": 1.4172, "step": 18927 }, { "epoch": 0.24596072723965026, "grad_norm": 0.49615612626075745, "learning_rate": 0.0001508389763318993, "loss": 1.4138, "step": 18928 }, { "epoch": 0.24597372178356613, "grad_norm": 0.4574418067932129, "learning_rate": 0.00015083637686998794, "loss": 1.4386, "step": 18929 }, { "epoch": 0.245986716327482, "grad_norm": 0.4878838360309601, "learning_rate": 0.00015083377740807654, "loss": 1.4701, "step": 18930 }, { "epoch": 0.24599971087139788, "grad_norm": 0.4412200450897217, "learning_rate": 0.00015083117794616516, "loss": 1.5322, "step": 18931 }, { "epoch": 0.24601270541531375, "grad_norm": 0.3475750684738159, "learning_rate": 0.00015082857848425376, "loss": 1.3837, "step": 18932 }, { "epoch": 0.24602569995922963, "grad_norm": 0.3626914322376251, "learning_rate": 0.00015082597902234238, "loss": 1.2061, "step": 18933 }, { "epoch": 0.2460386945031455, "grad_norm": 0.40176922082901, "learning_rate": 0.000150823379560431, "loss": 1.3461, "step": 18934 }, { "epoch": 0.24605168904706137, "grad_norm": 0.4041104316711426, "learning_rate": 0.0001508207800985196, "loss": 1.3661, "step": 18935 }, { "epoch": 0.24606468359097725, "grad_norm": 0.3509327471256256, "learning_rate": 0.00015081818063660823, "loss": 1.272, "step": 18936 }, { "epoch": 0.24607767813489312, "grad_norm": 0.3932877480983734, "learning_rate": 0.00015081558117469685, "loss": 1.5167, "step": 18937 }, { "epoch": 0.246090672678809, "grad_norm": 0.40667298436164856, "learning_rate": 0.00015081298171278545, "loss": 1.6011, "step": 18938 }, { "epoch": 0.24610366722272486, "grad_norm": 0.49350541830062866, "learning_rate": 0.00015081038225087408, "loss": 1.4247, "step": 18939 }, { "epoch": 0.24611666176664074, "grad_norm": 0.36479461193084717, "learning_rate": 0.00015080778278896267, "loss": 1.3265, "step": 18940 }, { "epoch": 0.2461296563105566, "grad_norm": 0.45613476634025574, "learning_rate": 0.00015080518332705132, "loss": 1.5385, "step": 18941 }, { "epoch": 0.24614265085447248, "grad_norm": 0.4249322712421417, "learning_rate": 0.00015080258386513992, "loss": 1.3963, "step": 18942 }, { "epoch": 0.24615564539838836, "grad_norm": 0.3252331018447876, "learning_rate": 0.00015079998440322855, "loss": 1.2948, "step": 18943 }, { "epoch": 0.24616863994230423, "grad_norm": 0.3997461497783661, "learning_rate": 0.00015079738494131714, "loss": 1.4025, "step": 18944 }, { "epoch": 0.2461816344862201, "grad_norm": 0.5006688237190247, "learning_rate": 0.00015079478547940577, "loss": 1.3893, "step": 18945 }, { "epoch": 0.24619462903013598, "grad_norm": 0.46695682406425476, "learning_rate": 0.0001507921860174944, "loss": 1.4693, "step": 18946 }, { "epoch": 0.24620762357405185, "grad_norm": 0.33112865686416626, "learning_rate": 0.000150789586555583, "loss": 1.3808, "step": 18947 }, { "epoch": 0.24622061811796772, "grad_norm": 0.5466291308403015, "learning_rate": 0.00015078698709367161, "loss": 1.4927, "step": 18948 }, { "epoch": 0.2462336126618836, "grad_norm": 0.38930803537368774, "learning_rate": 0.00015078438763176024, "loss": 1.4061, "step": 18949 }, { "epoch": 0.24624660720579947, "grad_norm": 0.4526674449443817, "learning_rate": 0.00015078178816984884, "loss": 1.396, "step": 18950 }, { "epoch": 0.24625960174971534, "grad_norm": 0.31736335158348083, "learning_rate": 0.00015077918870793746, "loss": 1.284, "step": 18951 }, { "epoch": 0.2462725962936312, "grad_norm": 0.46952641010284424, "learning_rate": 0.00015077658924602609, "loss": 1.4592, "step": 18952 }, { "epoch": 0.24628559083754709, "grad_norm": 0.4860699772834778, "learning_rate": 0.0001507739897841147, "loss": 1.4297, "step": 18953 }, { "epoch": 0.24629858538146296, "grad_norm": 0.43200260400772095, "learning_rate": 0.0001507713903222033, "loss": 1.4825, "step": 18954 }, { "epoch": 0.24631157992537883, "grad_norm": 0.4215549826622009, "learning_rate": 0.00015076879086029193, "loss": 1.342, "step": 18955 }, { "epoch": 0.2463245744692947, "grad_norm": 0.36756816506385803, "learning_rate": 0.00015076619139838056, "loss": 1.2427, "step": 18956 }, { "epoch": 0.24633756901321058, "grad_norm": 0.4120791256427765, "learning_rate": 0.00015076359193646915, "loss": 1.3275, "step": 18957 }, { "epoch": 0.24635056355712645, "grad_norm": 0.3671320676803589, "learning_rate": 0.00015076099247455778, "loss": 1.4187, "step": 18958 }, { "epoch": 0.24636355810104232, "grad_norm": 0.4896242320537567, "learning_rate": 0.00015075839301264638, "loss": 1.4208, "step": 18959 }, { "epoch": 0.2463765526449582, "grad_norm": 0.3865586519241333, "learning_rate": 0.00015075579355073503, "loss": 1.4248, "step": 18960 }, { "epoch": 0.24638954718887407, "grad_norm": 0.3354617953300476, "learning_rate": 0.00015075319408882362, "loss": 1.32, "step": 18961 }, { "epoch": 0.24640254173278994, "grad_norm": 0.419919490814209, "learning_rate": 0.00015075059462691222, "loss": 1.4812, "step": 18962 }, { "epoch": 0.24641553627670582, "grad_norm": 0.4438920021057129, "learning_rate": 0.00015074799516500085, "loss": 1.4412, "step": 18963 }, { "epoch": 0.2464285308206217, "grad_norm": 0.39383605122566223, "learning_rate": 0.00015074539570308947, "loss": 1.4579, "step": 18964 }, { "epoch": 0.24644152536453756, "grad_norm": 0.44826945662498474, "learning_rate": 0.0001507427962411781, "loss": 1.3422, "step": 18965 }, { "epoch": 0.24645451990845343, "grad_norm": 0.3373607397079468, "learning_rate": 0.0001507401967792667, "loss": 1.2381, "step": 18966 }, { "epoch": 0.2464675144523693, "grad_norm": 0.4834904968738556, "learning_rate": 0.00015073759731735532, "loss": 1.5965, "step": 18967 }, { "epoch": 0.24648050899628518, "grad_norm": 0.4047437310218811, "learning_rate": 0.00015073499785544394, "loss": 1.4364, "step": 18968 }, { "epoch": 0.24649350354020105, "grad_norm": 0.3365999162197113, "learning_rate": 0.00015073239839353254, "loss": 1.3354, "step": 18969 }, { "epoch": 0.24650649808411693, "grad_norm": 0.31903040409088135, "learning_rate": 0.00015072979893162116, "loss": 1.0632, "step": 18970 }, { "epoch": 0.2465194926280328, "grad_norm": 0.43094542622566223, "learning_rate": 0.00015072719946970976, "loss": 1.4729, "step": 18971 }, { "epoch": 0.24653248717194867, "grad_norm": 0.3633486032485962, "learning_rate": 0.0001507246000077984, "loss": 1.4649, "step": 18972 }, { "epoch": 0.24654548171586455, "grad_norm": 0.5617942810058594, "learning_rate": 0.000150722000545887, "loss": 1.495, "step": 18973 }, { "epoch": 0.24655847625978042, "grad_norm": 0.37191879749298096, "learning_rate": 0.0001507194010839756, "loss": 1.4604, "step": 18974 }, { "epoch": 0.2465714708036963, "grad_norm": 0.4197375774383545, "learning_rate": 0.00015071680162206423, "loss": 1.253, "step": 18975 }, { "epoch": 0.24658446534761216, "grad_norm": 0.395475834608078, "learning_rate": 0.00015071420216015286, "loss": 1.5585, "step": 18976 }, { "epoch": 0.24659745989152804, "grad_norm": 0.41954919695854187, "learning_rate": 0.00015071160269824148, "loss": 1.4427, "step": 18977 }, { "epoch": 0.2466104544354439, "grad_norm": 0.42125093936920166, "learning_rate": 0.00015070900323633008, "loss": 1.6147, "step": 18978 }, { "epoch": 0.24662344897935978, "grad_norm": 0.5504149198532104, "learning_rate": 0.0001507064037744187, "loss": 1.4808, "step": 18979 }, { "epoch": 0.24663644352327566, "grad_norm": 0.42679038643836975, "learning_rate": 0.00015070380431250733, "loss": 1.4419, "step": 18980 }, { "epoch": 0.24664943806719153, "grad_norm": 0.47874870896339417, "learning_rate": 0.00015070120485059592, "loss": 1.4607, "step": 18981 }, { "epoch": 0.2466624326111074, "grad_norm": 0.42051711678504944, "learning_rate": 0.00015069860538868455, "loss": 1.4576, "step": 18982 }, { "epoch": 0.24667542715502327, "grad_norm": 0.42147690057754517, "learning_rate": 0.00015069600592677317, "loss": 1.3982, "step": 18983 }, { "epoch": 0.24668842169893915, "grad_norm": 0.36267000436782837, "learning_rate": 0.0001506934064648618, "loss": 1.2403, "step": 18984 }, { "epoch": 0.24670141624285502, "grad_norm": 0.38945502042770386, "learning_rate": 0.0001506908070029504, "loss": 1.3817, "step": 18985 }, { "epoch": 0.2467144107867709, "grad_norm": 0.42240071296691895, "learning_rate": 0.000150688207541039, "loss": 1.4019, "step": 18986 }, { "epoch": 0.24672740533068677, "grad_norm": 0.3628593981266022, "learning_rate": 0.00015068560807912764, "loss": 1.602, "step": 18987 }, { "epoch": 0.24674039987460264, "grad_norm": 0.43468666076660156, "learning_rate": 0.00015068300861721624, "loss": 1.3571, "step": 18988 }, { "epoch": 0.2467533944185185, "grad_norm": 0.42950963973999023, "learning_rate": 0.00015068040915530487, "loss": 1.2265, "step": 18989 }, { "epoch": 0.24676638896243439, "grad_norm": 0.3333166539669037, "learning_rate": 0.00015067780969339346, "loss": 1.4119, "step": 18990 }, { "epoch": 0.24677938350635026, "grad_norm": 0.2706016004085541, "learning_rate": 0.0001506752102314821, "loss": 1.1275, "step": 18991 }, { "epoch": 0.24679237805026613, "grad_norm": 0.3455129563808441, "learning_rate": 0.0001506726107695707, "loss": 1.403, "step": 18992 }, { "epoch": 0.246805372594182, "grad_norm": 0.48403802514076233, "learning_rate": 0.0001506700113076593, "loss": 1.3928, "step": 18993 }, { "epoch": 0.2468183671380979, "grad_norm": 0.5159617066383362, "learning_rate": 0.00015066741184574793, "loss": 1.4067, "step": 18994 }, { "epoch": 0.24683136168201378, "grad_norm": 0.34165164828300476, "learning_rate": 0.00015066481238383656, "loss": 1.3847, "step": 18995 }, { "epoch": 0.24684435622592965, "grad_norm": 0.44306039810180664, "learning_rate": 0.00015066221292192518, "loss": 1.5585, "step": 18996 }, { "epoch": 0.24685735076984552, "grad_norm": 0.4142409861087799, "learning_rate": 0.00015065961346001378, "loss": 1.4677, "step": 18997 }, { "epoch": 0.2468703453137614, "grad_norm": 0.5357551574707031, "learning_rate": 0.0001506570139981024, "loss": 1.504, "step": 18998 }, { "epoch": 0.24688333985767727, "grad_norm": 0.42358142137527466, "learning_rate": 0.00015065441453619103, "loss": 1.5028, "step": 18999 }, { "epoch": 0.24689633440159314, "grad_norm": 0.34169360995292664, "learning_rate": 0.00015065181507427963, "loss": 1.1883, "step": 19000 }, { "epoch": 0.24690932894550902, "grad_norm": 0.38855984807014465, "learning_rate": 0.00015064921561236825, "loss": 1.592, "step": 19001 }, { "epoch": 0.2469223234894249, "grad_norm": 0.4562557339668274, "learning_rate": 0.00015064661615045685, "loss": 1.4843, "step": 19002 }, { "epoch": 0.24693531803334076, "grad_norm": 0.4345974028110504, "learning_rate": 0.00015064401668854547, "loss": 1.3692, "step": 19003 }, { "epoch": 0.24694831257725663, "grad_norm": 0.43085306882858276, "learning_rate": 0.0001506414172266341, "loss": 1.4547, "step": 19004 }, { "epoch": 0.2469613071211725, "grad_norm": 0.4232499897480011, "learning_rate": 0.0001506388177647227, "loss": 1.632, "step": 19005 }, { "epoch": 0.24697430166508838, "grad_norm": 0.45389315485954285, "learning_rate": 0.00015063621830281132, "loss": 1.5862, "step": 19006 }, { "epoch": 0.24698729620900425, "grad_norm": 0.34181898832321167, "learning_rate": 0.00015063361884089994, "loss": 1.2575, "step": 19007 }, { "epoch": 0.24700029075292013, "grad_norm": 0.3823287785053253, "learning_rate": 0.00015063101937898857, "loss": 1.3946, "step": 19008 }, { "epoch": 0.247013285296836, "grad_norm": 0.31054508686065674, "learning_rate": 0.00015062841991707717, "loss": 1.2438, "step": 19009 }, { "epoch": 0.24702627984075187, "grad_norm": 0.43709537386894226, "learning_rate": 0.0001506258204551658, "loss": 1.6329, "step": 19010 }, { "epoch": 0.24703927438466775, "grad_norm": 0.3412104547023773, "learning_rate": 0.00015062322099325441, "loss": 1.2695, "step": 19011 }, { "epoch": 0.24705226892858362, "grad_norm": 0.3026045858860016, "learning_rate": 0.000150620621531343, "loss": 1.2772, "step": 19012 }, { "epoch": 0.2470652634724995, "grad_norm": 0.42059242725372314, "learning_rate": 0.00015061802206943164, "loss": 1.4135, "step": 19013 }, { "epoch": 0.24707825801641536, "grad_norm": 0.3934084177017212, "learning_rate": 0.00015061542260752023, "loss": 1.3873, "step": 19014 }, { "epoch": 0.24709125256033124, "grad_norm": 0.3865605592727661, "learning_rate": 0.00015061282314560889, "loss": 1.3047, "step": 19015 }, { "epoch": 0.2471042471042471, "grad_norm": 0.4346744418144226, "learning_rate": 0.00015061022368369748, "loss": 1.5871, "step": 19016 }, { "epoch": 0.24711724164816298, "grad_norm": 0.3997902572154999, "learning_rate": 0.00015060762422178608, "loss": 1.5877, "step": 19017 }, { "epoch": 0.24713023619207886, "grad_norm": 0.3850584924221039, "learning_rate": 0.0001506050247598747, "loss": 1.3246, "step": 19018 }, { "epoch": 0.24714323073599473, "grad_norm": 0.3420499861240387, "learning_rate": 0.00015060242529796333, "loss": 1.3257, "step": 19019 }, { "epoch": 0.2471562252799106, "grad_norm": 0.40653449296951294, "learning_rate": 0.00015059982583605195, "loss": 1.4343, "step": 19020 }, { "epoch": 0.24716921982382647, "grad_norm": 0.36838942766189575, "learning_rate": 0.00015059722637414055, "loss": 1.3241, "step": 19021 }, { "epoch": 0.24718221436774235, "grad_norm": 0.37009817361831665, "learning_rate": 0.00015059462691222918, "loss": 1.3256, "step": 19022 }, { "epoch": 0.24719520891165822, "grad_norm": 0.4786401093006134, "learning_rate": 0.0001505920274503178, "loss": 1.3751, "step": 19023 }, { "epoch": 0.2472082034555741, "grad_norm": 0.48386844992637634, "learning_rate": 0.0001505894279884064, "loss": 1.5617, "step": 19024 }, { "epoch": 0.24722119799948997, "grad_norm": 0.40708616375923157, "learning_rate": 0.00015058682852649502, "loss": 1.3024, "step": 19025 }, { "epoch": 0.24723419254340584, "grad_norm": 0.4326227903366089, "learning_rate": 0.00015058422906458365, "loss": 1.4966, "step": 19026 }, { "epoch": 0.2472471870873217, "grad_norm": 0.4003925919532776, "learning_rate": 0.00015058162960267227, "loss": 1.4613, "step": 19027 }, { "epoch": 0.24726018163123759, "grad_norm": 0.3473844826221466, "learning_rate": 0.00015057903014076087, "loss": 1.1871, "step": 19028 }, { "epoch": 0.24727317617515346, "grad_norm": 0.36359885334968567, "learning_rate": 0.00015057643067884947, "loss": 1.6373, "step": 19029 }, { "epoch": 0.24728617071906933, "grad_norm": 0.41476327180862427, "learning_rate": 0.00015057383121693812, "loss": 1.4524, "step": 19030 }, { "epoch": 0.2472991652629852, "grad_norm": 0.4065360724925995, "learning_rate": 0.00015057123175502671, "loss": 1.4698, "step": 19031 }, { "epoch": 0.24731215980690108, "grad_norm": 0.47410744428634644, "learning_rate": 0.00015056863229311534, "loss": 1.5398, "step": 19032 }, { "epoch": 0.24732515435081695, "grad_norm": 0.4218134880065918, "learning_rate": 0.00015056603283120394, "loss": 1.45, "step": 19033 }, { "epoch": 0.24733814889473282, "grad_norm": 0.535853385925293, "learning_rate": 0.00015056343336929256, "loss": 1.3941, "step": 19034 }, { "epoch": 0.2473511434386487, "grad_norm": 0.37583762407302856, "learning_rate": 0.00015056083390738119, "loss": 1.5249, "step": 19035 }, { "epoch": 0.24736413798256457, "grad_norm": 0.4235329329967499, "learning_rate": 0.00015055823444546978, "loss": 1.4013, "step": 19036 }, { "epoch": 0.24737713252648044, "grad_norm": 0.37479016184806824, "learning_rate": 0.0001505556349835584, "loss": 1.2676, "step": 19037 }, { "epoch": 0.24739012707039632, "grad_norm": 0.329023152589798, "learning_rate": 0.00015055303552164703, "loss": 1.1844, "step": 19038 }, { "epoch": 0.2474031216143122, "grad_norm": 0.2642345428466797, "learning_rate": 0.00015055043605973566, "loss": 1.2542, "step": 19039 }, { "epoch": 0.24741611615822806, "grad_norm": 0.45293453335762024, "learning_rate": 0.00015054783659782425, "loss": 1.4203, "step": 19040 }, { "epoch": 0.24742911070214393, "grad_norm": 0.39274486899375916, "learning_rate": 0.00015054523713591285, "loss": 1.3899, "step": 19041 }, { "epoch": 0.2474421052460598, "grad_norm": 0.4734686315059662, "learning_rate": 0.0001505426376740015, "loss": 1.4268, "step": 19042 }, { "epoch": 0.24745509978997568, "grad_norm": 0.4422254264354706, "learning_rate": 0.0001505400382120901, "loss": 1.4182, "step": 19043 }, { "epoch": 0.24746809433389155, "grad_norm": 0.32406046986579895, "learning_rate": 0.00015053743875017872, "loss": 1.5838, "step": 19044 }, { "epoch": 0.24748108887780743, "grad_norm": 0.5539398789405823, "learning_rate": 0.00015053483928826732, "loss": 1.4353, "step": 19045 }, { "epoch": 0.2474940834217233, "grad_norm": 0.41394415497779846, "learning_rate": 0.00015053223982635595, "loss": 1.552, "step": 19046 }, { "epoch": 0.24750707796563917, "grad_norm": 0.40400639176368713, "learning_rate": 0.00015052964036444457, "loss": 1.4701, "step": 19047 }, { "epoch": 0.24752007250955504, "grad_norm": 0.4305315315723419, "learning_rate": 0.00015052704090253317, "loss": 1.5165, "step": 19048 }, { "epoch": 0.24753306705347092, "grad_norm": 0.36676716804504395, "learning_rate": 0.0001505244414406218, "loss": 1.6704, "step": 19049 }, { "epoch": 0.2475460615973868, "grad_norm": 0.6173962950706482, "learning_rate": 0.00015052184197871042, "loss": 1.619, "step": 19050 }, { "epoch": 0.24755905614130266, "grad_norm": 0.45166561007499695, "learning_rate": 0.00015051924251679904, "loss": 1.5228, "step": 19051 }, { "epoch": 0.24757205068521854, "grad_norm": 0.41376087069511414, "learning_rate": 0.00015051664305488764, "loss": 1.5808, "step": 19052 }, { "epoch": 0.2475850452291344, "grad_norm": 0.42920196056365967, "learning_rate": 0.00015051404359297626, "loss": 1.6603, "step": 19053 }, { "epoch": 0.24759803977305028, "grad_norm": 0.5915201902389526, "learning_rate": 0.0001505114441310649, "loss": 1.484, "step": 19054 }, { "epoch": 0.24761103431696616, "grad_norm": 0.41033339500427246, "learning_rate": 0.00015050884466915349, "loss": 1.354, "step": 19055 }, { "epoch": 0.24762402886088203, "grad_norm": 0.35668492317199707, "learning_rate": 0.0001505062452072421, "loss": 1.2864, "step": 19056 }, { "epoch": 0.2476370234047979, "grad_norm": 0.36982670426368713, "learning_rate": 0.00015050364574533073, "loss": 1.4106, "step": 19057 }, { "epoch": 0.24765001794871377, "grad_norm": 0.3040981888771057, "learning_rate": 0.00015050104628341933, "loss": 1.3076, "step": 19058 }, { "epoch": 0.24766301249262965, "grad_norm": 0.3890395164489746, "learning_rate": 0.00015049844682150796, "loss": 1.5605, "step": 19059 }, { "epoch": 0.24767600703654552, "grad_norm": 0.4530028700828552, "learning_rate": 0.00015049584735959655, "loss": 1.3765, "step": 19060 }, { "epoch": 0.2476890015804614, "grad_norm": 0.405753493309021, "learning_rate": 0.0001504932478976852, "loss": 1.2287, "step": 19061 }, { "epoch": 0.24770199612437727, "grad_norm": 0.38713157176971436, "learning_rate": 0.0001504906484357738, "loss": 1.2443, "step": 19062 }, { "epoch": 0.24771499066829314, "grad_norm": 0.3774830102920532, "learning_rate": 0.00015048804897386243, "loss": 1.2687, "step": 19063 }, { "epoch": 0.247727985212209, "grad_norm": 0.39333006739616394, "learning_rate": 0.00015048544951195102, "loss": 1.4222, "step": 19064 }, { "epoch": 0.24774097975612488, "grad_norm": 0.4434764087200165, "learning_rate": 0.00015048285005003965, "loss": 1.3346, "step": 19065 }, { "epoch": 0.24775397430004076, "grad_norm": 0.3143687844276428, "learning_rate": 0.00015048025058812827, "loss": 1.2926, "step": 19066 }, { "epoch": 0.24776696884395663, "grad_norm": 0.5469130277633667, "learning_rate": 0.00015047765112621687, "loss": 1.4938, "step": 19067 }, { "epoch": 0.2477799633878725, "grad_norm": 0.29207664728164673, "learning_rate": 0.0001504750516643055, "loss": 1.4543, "step": 19068 }, { "epoch": 0.24779295793178838, "grad_norm": 0.45945000648498535, "learning_rate": 0.00015047245220239412, "loss": 1.517, "step": 19069 }, { "epoch": 0.24780595247570428, "grad_norm": 0.4479677975177765, "learning_rate": 0.00015046985274048272, "loss": 1.4524, "step": 19070 }, { "epoch": 0.24781894701962015, "grad_norm": 0.4423813819885254, "learning_rate": 0.00015046725327857134, "loss": 1.4043, "step": 19071 }, { "epoch": 0.24783194156353602, "grad_norm": 0.37931036949157715, "learning_rate": 0.00015046465381665994, "loss": 1.2993, "step": 19072 }, { "epoch": 0.2478449361074519, "grad_norm": 0.36628457903862, "learning_rate": 0.0001504620543547486, "loss": 1.4395, "step": 19073 }, { "epoch": 0.24785793065136777, "grad_norm": 0.41657090187072754, "learning_rate": 0.0001504594548928372, "loss": 1.3384, "step": 19074 }, { "epoch": 0.24787092519528364, "grad_norm": 0.3791036903858185, "learning_rate": 0.0001504568554309258, "loss": 1.4581, "step": 19075 }, { "epoch": 0.24788391973919952, "grad_norm": 0.4603444039821625, "learning_rate": 0.0001504542559690144, "loss": 1.4532, "step": 19076 }, { "epoch": 0.2478969142831154, "grad_norm": 0.4019109904766083, "learning_rate": 0.00015045165650710303, "loss": 1.4926, "step": 19077 }, { "epoch": 0.24790990882703126, "grad_norm": 0.4186308979988098, "learning_rate": 0.00015044905704519166, "loss": 1.4957, "step": 19078 }, { "epoch": 0.24792290337094713, "grad_norm": 0.42851924896240234, "learning_rate": 0.00015044645758328026, "loss": 1.4571, "step": 19079 }, { "epoch": 0.247935897914863, "grad_norm": 0.4043085277080536, "learning_rate": 0.00015044385812136888, "loss": 1.3662, "step": 19080 }, { "epoch": 0.24794889245877888, "grad_norm": 0.3865445554256439, "learning_rate": 0.0001504412586594575, "loss": 1.3245, "step": 19081 }, { "epoch": 0.24796188700269475, "grad_norm": 0.4346553683280945, "learning_rate": 0.00015043865919754613, "loss": 1.2947, "step": 19082 }, { "epoch": 0.24797488154661063, "grad_norm": 0.4635961353778839, "learning_rate": 0.00015043605973563473, "loss": 1.4776, "step": 19083 }, { "epoch": 0.2479878760905265, "grad_norm": 0.33484140038490295, "learning_rate": 0.00015043346027372332, "loss": 1.1439, "step": 19084 }, { "epoch": 0.24800087063444237, "grad_norm": 0.34960395097732544, "learning_rate": 0.00015043086081181198, "loss": 1.3343, "step": 19085 }, { "epoch": 0.24801386517835824, "grad_norm": 0.3153984844684601, "learning_rate": 0.00015042826134990057, "loss": 1.4118, "step": 19086 }, { "epoch": 0.24802685972227412, "grad_norm": 0.36336901783943176, "learning_rate": 0.0001504256618879892, "loss": 1.5549, "step": 19087 }, { "epoch": 0.24803985426619, "grad_norm": 0.3608517348766327, "learning_rate": 0.0001504230624260778, "loss": 1.3839, "step": 19088 }, { "epoch": 0.24805284881010586, "grad_norm": 0.3816474676132202, "learning_rate": 0.00015042046296416642, "loss": 1.182, "step": 19089 }, { "epoch": 0.24806584335402174, "grad_norm": 0.5169097185134888, "learning_rate": 0.00015041786350225504, "loss": 1.3752, "step": 19090 }, { "epoch": 0.2480788378979376, "grad_norm": 0.5496385097503662, "learning_rate": 0.00015041526404034364, "loss": 1.4404, "step": 19091 }, { "epoch": 0.24809183244185348, "grad_norm": 0.4175279140472412, "learning_rate": 0.00015041266457843227, "loss": 1.3126, "step": 19092 }, { "epoch": 0.24810482698576936, "grad_norm": 0.4991360604763031, "learning_rate": 0.0001504100651165209, "loss": 1.5526, "step": 19093 }, { "epoch": 0.24811782152968523, "grad_norm": 0.3966275751590729, "learning_rate": 0.00015040746565460952, "loss": 1.3804, "step": 19094 }, { "epoch": 0.2481308160736011, "grad_norm": 0.36171290278434753, "learning_rate": 0.0001504048661926981, "loss": 1.398, "step": 19095 }, { "epoch": 0.24814381061751697, "grad_norm": 1.1638695001602173, "learning_rate": 0.00015040226673078674, "loss": 1.4331, "step": 19096 }, { "epoch": 0.24815680516143285, "grad_norm": 0.4792670011520386, "learning_rate": 0.00015039966726887536, "loss": 1.376, "step": 19097 }, { "epoch": 0.24816979970534872, "grad_norm": 0.44714289903640747, "learning_rate": 0.00015039706780696396, "loss": 1.2529, "step": 19098 }, { "epoch": 0.2481827942492646, "grad_norm": 0.4257495701313019, "learning_rate": 0.00015039446834505258, "loss": 1.3908, "step": 19099 }, { "epoch": 0.24819578879318047, "grad_norm": 0.4333556592464447, "learning_rate": 0.0001503918688831412, "loss": 1.1902, "step": 19100 }, { "epoch": 0.24820878333709634, "grad_norm": 0.29756325483322144, "learning_rate": 0.0001503892694212298, "loss": 1.3437, "step": 19101 }, { "epoch": 0.2482217778810122, "grad_norm": 0.491445392370224, "learning_rate": 0.00015038666995931843, "loss": 1.4148, "step": 19102 }, { "epoch": 0.24823477242492809, "grad_norm": 0.47462043166160583, "learning_rate": 0.00015038407049740703, "loss": 1.5003, "step": 19103 }, { "epoch": 0.24824776696884396, "grad_norm": 0.42689380049705505, "learning_rate": 0.00015038147103549568, "loss": 1.5778, "step": 19104 }, { "epoch": 0.24826076151275983, "grad_norm": 0.28072085976600647, "learning_rate": 0.00015037887157358428, "loss": 1.2322, "step": 19105 }, { "epoch": 0.2482737560566757, "grad_norm": 0.46227651834487915, "learning_rate": 0.0001503762721116729, "loss": 1.4735, "step": 19106 }, { "epoch": 0.24828675060059158, "grad_norm": 0.42137861251831055, "learning_rate": 0.0001503736726497615, "loss": 1.5118, "step": 19107 }, { "epoch": 0.24829974514450745, "grad_norm": 0.4753071963787079, "learning_rate": 0.00015037107318785012, "loss": 1.5744, "step": 19108 }, { "epoch": 0.24831273968842332, "grad_norm": 0.38856756687164307, "learning_rate": 0.00015036847372593875, "loss": 1.6, "step": 19109 }, { "epoch": 0.2483257342323392, "grad_norm": 0.4565708041191101, "learning_rate": 0.00015036587426402734, "loss": 1.3489, "step": 19110 }, { "epoch": 0.24833872877625507, "grad_norm": 0.421584814786911, "learning_rate": 0.00015036327480211597, "loss": 1.3473, "step": 19111 }, { "epoch": 0.24835172332017094, "grad_norm": 0.38016435503959656, "learning_rate": 0.0001503606753402046, "loss": 1.4225, "step": 19112 }, { "epoch": 0.24836471786408681, "grad_norm": 0.4585200250148773, "learning_rate": 0.0001503580758782932, "loss": 1.3235, "step": 19113 }, { "epoch": 0.2483777124080027, "grad_norm": 0.41468092799186707, "learning_rate": 0.00015035547641638182, "loss": 1.498, "step": 19114 }, { "epoch": 0.24839070695191856, "grad_norm": 0.2867843210697174, "learning_rate": 0.0001503528769544704, "loss": 1.2306, "step": 19115 }, { "epoch": 0.24840370149583443, "grad_norm": 0.4398726224899292, "learning_rate": 0.00015035027749255906, "loss": 1.5214, "step": 19116 }, { "epoch": 0.2484166960397503, "grad_norm": 0.4244706928730011, "learning_rate": 0.00015034767803064766, "loss": 1.3045, "step": 19117 }, { "epoch": 0.24842969058366618, "grad_norm": 0.3680809736251831, "learning_rate": 0.00015034507856873629, "loss": 1.4953, "step": 19118 }, { "epoch": 0.24844268512758205, "grad_norm": 0.3799309730529785, "learning_rate": 0.00015034247910682488, "loss": 1.3721, "step": 19119 }, { "epoch": 0.24845567967149793, "grad_norm": 0.319286972284317, "learning_rate": 0.0001503398796449135, "loss": 1.324, "step": 19120 }, { "epoch": 0.2484686742154138, "grad_norm": 0.3913177251815796, "learning_rate": 0.00015033728018300213, "loss": 1.3181, "step": 19121 }, { "epoch": 0.24848166875932967, "grad_norm": 0.4648880958557129, "learning_rate": 0.00015033468072109073, "loss": 1.544, "step": 19122 }, { "epoch": 0.24849466330324554, "grad_norm": 0.35948699712753296, "learning_rate": 0.00015033208125917935, "loss": 1.3323, "step": 19123 }, { "epoch": 0.24850765784716142, "grad_norm": 0.43552032113075256, "learning_rate": 0.00015032948179726798, "loss": 1.319, "step": 19124 }, { "epoch": 0.2485206523910773, "grad_norm": 0.41005679965019226, "learning_rate": 0.00015032688233535658, "loss": 1.3827, "step": 19125 }, { "epoch": 0.24853364693499316, "grad_norm": 0.3432054817676544, "learning_rate": 0.0001503242828734452, "loss": 1.5231, "step": 19126 }, { "epoch": 0.24854664147890904, "grad_norm": 0.3863566219806671, "learning_rate": 0.0001503216834115338, "loss": 1.3088, "step": 19127 }, { "epoch": 0.2485596360228249, "grad_norm": 0.49043983221054077, "learning_rate": 0.00015031908394962245, "loss": 1.6035, "step": 19128 }, { "epoch": 0.24857263056674078, "grad_norm": 0.42359763383865356, "learning_rate": 0.00015031648448771105, "loss": 1.4198, "step": 19129 }, { "epoch": 0.24858562511065666, "grad_norm": 0.429117888212204, "learning_rate": 0.00015031388502579967, "loss": 1.3661, "step": 19130 }, { "epoch": 0.24859861965457253, "grad_norm": 0.3501416742801666, "learning_rate": 0.0001503112855638883, "loss": 1.4501, "step": 19131 }, { "epoch": 0.2486116141984884, "grad_norm": 0.3616982698440552, "learning_rate": 0.0001503086861019769, "loss": 1.3478, "step": 19132 }, { "epoch": 0.24862460874240427, "grad_norm": 0.3698638677597046, "learning_rate": 0.00015030608664006552, "loss": 1.4481, "step": 19133 }, { "epoch": 0.24863760328632015, "grad_norm": 0.34999844431877136, "learning_rate": 0.00015030348717815412, "loss": 1.4285, "step": 19134 }, { "epoch": 0.24865059783023602, "grad_norm": 0.4181182086467743, "learning_rate": 0.00015030088771624277, "loss": 1.4721, "step": 19135 }, { "epoch": 0.2486635923741519, "grad_norm": 0.38231828808784485, "learning_rate": 0.00015029828825433136, "loss": 1.4484, "step": 19136 }, { "epoch": 0.24867658691806777, "grad_norm": 0.3951537609100342, "learning_rate": 0.00015029568879242, "loss": 1.4103, "step": 19137 }, { "epoch": 0.24868958146198364, "grad_norm": 0.4068044424057007, "learning_rate": 0.00015029308933050859, "loss": 1.3895, "step": 19138 }, { "epoch": 0.2487025760058995, "grad_norm": 0.3836880326271057, "learning_rate": 0.0001502904898685972, "loss": 1.3739, "step": 19139 }, { "epoch": 0.24871557054981538, "grad_norm": 0.3770371079444885, "learning_rate": 0.00015028789040668583, "loss": 1.4968, "step": 19140 }, { "epoch": 0.24872856509373126, "grad_norm": 0.442480206489563, "learning_rate": 0.00015028529094477443, "loss": 1.4166, "step": 19141 }, { "epoch": 0.24874155963764713, "grad_norm": 0.38389837741851807, "learning_rate": 0.00015028269148286306, "loss": 1.3675, "step": 19142 }, { "epoch": 0.248754554181563, "grad_norm": 0.3987143039703369, "learning_rate": 0.00015028009202095168, "loss": 1.3711, "step": 19143 }, { "epoch": 0.24876754872547888, "grad_norm": 0.4099433124065399, "learning_rate": 0.00015027749255904028, "loss": 1.3545, "step": 19144 }, { "epoch": 0.24878054326939475, "grad_norm": 0.49564334750175476, "learning_rate": 0.0001502748930971289, "loss": 1.4243, "step": 19145 }, { "epoch": 0.24879353781331062, "grad_norm": 0.45382511615753174, "learning_rate": 0.0001502722936352175, "loss": 1.3203, "step": 19146 }, { "epoch": 0.24880653235722652, "grad_norm": 0.4894731044769287, "learning_rate": 0.00015026969417330615, "loss": 1.6907, "step": 19147 }, { "epoch": 0.2488195269011424, "grad_norm": 0.40606409311294556, "learning_rate": 0.00015026709471139475, "loss": 1.4863, "step": 19148 }, { "epoch": 0.24883252144505827, "grad_norm": 0.3097348213195801, "learning_rate": 0.00015026449524948337, "loss": 1.2026, "step": 19149 }, { "epoch": 0.24884551598897414, "grad_norm": 0.30959758162498474, "learning_rate": 0.00015026189578757197, "loss": 1.3094, "step": 19150 }, { "epoch": 0.24885851053289001, "grad_norm": 0.4124270975589752, "learning_rate": 0.0001502592963256606, "loss": 1.2226, "step": 19151 }, { "epoch": 0.2488715050768059, "grad_norm": 0.29642346501350403, "learning_rate": 0.00015025669686374922, "loss": 1.4754, "step": 19152 }, { "epoch": 0.24888449962072176, "grad_norm": 0.3061631917953491, "learning_rate": 0.00015025409740183782, "loss": 1.4331, "step": 19153 }, { "epoch": 0.24889749416463763, "grad_norm": 0.3950229585170746, "learning_rate": 0.00015025149793992644, "loss": 1.3908, "step": 19154 }, { "epoch": 0.2489104887085535, "grad_norm": 0.3768841326236725, "learning_rate": 0.00015024889847801507, "loss": 1.7169, "step": 19155 }, { "epoch": 0.24892348325246938, "grad_norm": 0.33753305673599243, "learning_rate": 0.00015024629901610366, "loss": 1.4618, "step": 19156 }, { "epoch": 0.24893647779638525, "grad_norm": 0.3248215615749359, "learning_rate": 0.0001502436995541923, "loss": 1.4749, "step": 19157 }, { "epoch": 0.24894947234030113, "grad_norm": 0.4011741280555725, "learning_rate": 0.00015024110009228089, "loss": 1.5527, "step": 19158 }, { "epoch": 0.248962466884217, "grad_norm": 0.38974371552467346, "learning_rate": 0.00015023850063036954, "loss": 1.2757, "step": 19159 }, { "epoch": 0.24897546142813287, "grad_norm": 0.4189406931400299, "learning_rate": 0.00015023590116845813, "loss": 1.2964, "step": 19160 }, { "epoch": 0.24898845597204874, "grad_norm": 0.47264423966407776, "learning_rate": 0.00015023330170654676, "loss": 1.3981, "step": 19161 }, { "epoch": 0.24900145051596462, "grad_norm": 0.41537484526634216, "learning_rate": 0.00015023070224463536, "loss": 1.4059, "step": 19162 }, { "epoch": 0.2490144450598805, "grad_norm": 0.40705251693725586, "learning_rate": 0.00015022810278272398, "loss": 1.4244, "step": 19163 }, { "epoch": 0.24902743960379636, "grad_norm": 0.36592376232147217, "learning_rate": 0.0001502255033208126, "loss": 1.6813, "step": 19164 }, { "epoch": 0.24904043414771224, "grad_norm": 0.3731706142425537, "learning_rate": 0.0001502229038589012, "loss": 1.2138, "step": 19165 }, { "epoch": 0.2490534286916281, "grad_norm": 0.3618335723876953, "learning_rate": 0.00015022030439698983, "loss": 1.3899, "step": 19166 }, { "epoch": 0.24906642323554398, "grad_norm": 0.3592146039009094, "learning_rate": 0.00015021770493507845, "loss": 1.196, "step": 19167 }, { "epoch": 0.24907941777945986, "grad_norm": 0.44842615723609924, "learning_rate": 0.00015021510547316705, "loss": 1.4077, "step": 19168 }, { "epoch": 0.24909241232337573, "grad_norm": 0.3890256881713867, "learning_rate": 0.00015021250601125567, "loss": 1.4916, "step": 19169 }, { "epoch": 0.2491054068672916, "grad_norm": 0.3488661050796509, "learning_rate": 0.0001502099065493443, "loss": 1.4662, "step": 19170 }, { "epoch": 0.24911840141120747, "grad_norm": 0.4618266224861145, "learning_rate": 0.00015020730708743292, "loss": 1.2772, "step": 19171 }, { "epoch": 0.24913139595512335, "grad_norm": 0.3948304057121277, "learning_rate": 0.00015020470762552152, "loss": 1.4734, "step": 19172 }, { "epoch": 0.24914439049903922, "grad_norm": 0.5780940651893616, "learning_rate": 0.00015020210816361014, "loss": 1.5365, "step": 19173 }, { "epoch": 0.2491573850429551, "grad_norm": 0.40671610832214355, "learning_rate": 0.00015019950870169877, "loss": 1.4395, "step": 19174 }, { "epoch": 0.24917037958687097, "grad_norm": 0.4146711528301239, "learning_rate": 0.00015019690923978737, "loss": 1.3896, "step": 19175 }, { "epoch": 0.24918337413078684, "grad_norm": 0.3773079812526703, "learning_rate": 0.000150194309777876, "loss": 1.3601, "step": 19176 }, { "epoch": 0.2491963686747027, "grad_norm": 0.2879582941532135, "learning_rate": 0.0001501917103159646, "loss": 1.275, "step": 19177 }, { "epoch": 0.24920936321861858, "grad_norm": 0.38658273220062256, "learning_rate": 0.00015018911085405324, "loss": 1.4351, "step": 19178 }, { "epoch": 0.24922235776253446, "grad_norm": 0.38487473130226135, "learning_rate": 0.00015018651139214184, "loss": 1.405, "step": 19179 }, { "epoch": 0.24923535230645033, "grad_norm": 0.38571348786354065, "learning_rate": 0.00015018391193023043, "loss": 1.3919, "step": 19180 }, { "epoch": 0.2492483468503662, "grad_norm": 0.33694788813591003, "learning_rate": 0.00015018131246831906, "loss": 1.2644, "step": 19181 }, { "epoch": 0.24926134139428208, "grad_norm": 0.3174707889556885, "learning_rate": 0.00015017871300640768, "loss": 1.4046, "step": 19182 }, { "epoch": 0.24927433593819795, "grad_norm": 0.39126893877983093, "learning_rate": 0.0001501761135444963, "loss": 1.5128, "step": 19183 }, { "epoch": 0.24928733048211382, "grad_norm": 0.36360475420951843, "learning_rate": 0.0001501735140825849, "loss": 1.3412, "step": 19184 }, { "epoch": 0.2493003250260297, "grad_norm": 0.35371512174606323, "learning_rate": 0.00015017091462067353, "loss": 1.1731, "step": 19185 }, { "epoch": 0.24931331956994557, "grad_norm": 0.429453581571579, "learning_rate": 0.00015016831515876215, "loss": 1.4355, "step": 19186 }, { "epoch": 0.24932631411386144, "grad_norm": 0.33113598823547363, "learning_rate": 0.00015016571569685075, "loss": 1.3623, "step": 19187 }, { "epoch": 0.24933930865777731, "grad_norm": 0.6194899082183838, "learning_rate": 0.00015016311623493938, "loss": 1.4792, "step": 19188 }, { "epoch": 0.2493523032016932, "grad_norm": 0.3941209614276886, "learning_rate": 0.00015016051677302797, "loss": 1.279, "step": 19189 }, { "epoch": 0.24936529774560906, "grad_norm": 0.38978275656700134, "learning_rate": 0.00015015791731111663, "loss": 1.4462, "step": 19190 }, { "epoch": 0.24937829228952493, "grad_norm": 0.420627623796463, "learning_rate": 0.00015015531784920522, "loss": 1.3809, "step": 19191 }, { "epoch": 0.2493912868334408, "grad_norm": 0.32875826954841614, "learning_rate": 0.00015015271838729382, "loss": 1.6137, "step": 19192 }, { "epoch": 0.24940428137735668, "grad_norm": 0.3822869658470154, "learning_rate": 0.00015015011892538244, "loss": 1.4486, "step": 19193 }, { "epoch": 0.24941727592127255, "grad_norm": 0.36073216795921326, "learning_rate": 0.00015014751946347107, "loss": 1.3713, "step": 19194 }, { "epoch": 0.24943027046518843, "grad_norm": 0.4094196856021881, "learning_rate": 0.0001501449200015597, "loss": 1.4062, "step": 19195 }, { "epoch": 0.2494432650091043, "grad_norm": 0.398723840713501, "learning_rate": 0.0001501423205396483, "loss": 1.5017, "step": 19196 }, { "epoch": 0.24945625955302017, "grad_norm": 0.4532181918621063, "learning_rate": 0.00015013972107773692, "loss": 1.2433, "step": 19197 }, { "epoch": 0.24946925409693604, "grad_norm": 0.458676815032959, "learning_rate": 0.00015013712161582554, "loss": 1.4285, "step": 19198 }, { "epoch": 0.24948224864085192, "grad_norm": 0.4670829772949219, "learning_rate": 0.00015013452215391414, "loss": 1.4901, "step": 19199 }, { "epoch": 0.2494952431847678, "grad_norm": 0.4188287854194641, "learning_rate": 0.00015013192269200276, "loss": 1.4784, "step": 19200 }, { "epoch": 0.24950823772868366, "grad_norm": 0.38152992725372314, "learning_rate": 0.00015012932323009136, "loss": 1.4712, "step": 19201 }, { "epoch": 0.24952123227259954, "grad_norm": 0.42134958505630493, "learning_rate": 0.00015012672376818, "loss": 1.4233, "step": 19202 }, { "epoch": 0.2495342268165154, "grad_norm": 0.4274691045284271, "learning_rate": 0.0001501241243062686, "loss": 1.4307, "step": 19203 }, { "epoch": 0.24954722136043128, "grad_norm": 0.39370617270469666, "learning_rate": 0.00015012152484435723, "loss": 1.2499, "step": 19204 }, { "epoch": 0.24956021590434715, "grad_norm": 0.48284730315208435, "learning_rate": 0.00015011892538244586, "loss": 1.3994, "step": 19205 }, { "epoch": 0.24957321044826303, "grad_norm": 0.45212820172309875, "learning_rate": 0.00015011632592053445, "loss": 1.2625, "step": 19206 }, { "epoch": 0.2495862049921789, "grad_norm": 0.4460216462612152, "learning_rate": 0.00015011372645862308, "loss": 1.1583, "step": 19207 }, { "epoch": 0.24959919953609477, "grad_norm": 0.29853329062461853, "learning_rate": 0.00015011112699671168, "loss": 1.5585, "step": 19208 }, { "epoch": 0.24961219408001065, "grad_norm": 0.32156065106391907, "learning_rate": 0.0001501085275348003, "loss": 1.345, "step": 19209 }, { "epoch": 0.24962518862392652, "grad_norm": 0.40135571360588074, "learning_rate": 0.00015010592807288893, "loss": 1.396, "step": 19210 }, { "epoch": 0.2496381831678424, "grad_norm": 0.4073614478111267, "learning_rate": 0.00015010332861097752, "loss": 1.4523, "step": 19211 }, { "epoch": 0.24965117771175827, "grad_norm": 0.40700840950012207, "learning_rate": 0.00015010072914906615, "loss": 1.4825, "step": 19212 }, { "epoch": 0.24966417225567414, "grad_norm": 0.34999483823776245, "learning_rate": 0.00015009812968715477, "loss": 1.3906, "step": 19213 }, { "epoch": 0.24967716679959, "grad_norm": 0.31180256605148315, "learning_rate": 0.0001500955302252434, "loss": 1.2635, "step": 19214 }, { "epoch": 0.24969016134350588, "grad_norm": 0.48124930262565613, "learning_rate": 0.000150092930763332, "loss": 1.5987, "step": 19215 }, { "epoch": 0.24970315588742176, "grad_norm": 0.2858378291130066, "learning_rate": 0.00015009033130142062, "loss": 1.0638, "step": 19216 }, { "epoch": 0.24971615043133763, "grad_norm": 0.43331724405288696, "learning_rate": 0.00015008773183950924, "loss": 1.4054, "step": 19217 }, { "epoch": 0.2497291449752535, "grad_norm": 0.40219780802726746, "learning_rate": 0.00015008513237759784, "loss": 1.3669, "step": 19218 }, { "epoch": 0.24974213951916938, "grad_norm": 0.35651901364326477, "learning_rate": 0.00015008253291568646, "loss": 1.4312, "step": 19219 }, { "epoch": 0.24975513406308525, "grad_norm": 0.4978581964969635, "learning_rate": 0.00015007993345377506, "loss": 1.4445, "step": 19220 }, { "epoch": 0.24976812860700112, "grad_norm": 0.31322818994522095, "learning_rate": 0.0001500773339918637, "loss": 1.2865, "step": 19221 }, { "epoch": 0.249781123150917, "grad_norm": 0.30895453691482544, "learning_rate": 0.0001500747345299523, "loss": 1.3561, "step": 19222 }, { "epoch": 0.2497941176948329, "grad_norm": 0.49428462982177734, "learning_rate": 0.0001500721350680409, "loss": 1.3864, "step": 19223 }, { "epoch": 0.24980711223874877, "grad_norm": 0.3733403980731964, "learning_rate": 0.00015006953560612953, "loss": 1.483, "step": 19224 }, { "epoch": 0.24982010678266464, "grad_norm": 0.39760130643844604, "learning_rate": 0.00015006693614421816, "loss": 1.2236, "step": 19225 }, { "epoch": 0.24983310132658051, "grad_norm": 0.4495384991168976, "learning_rate": 0.00015006433668230678, "loss": 1.666, "step": 19226 }, { "epoch": 0.2498460958704964, "grad_norm": 0.3407870829105377, "learning_rate": 0.00015006173722039538, "loss": 1.3406, "step": 19227 }, { "epoch": 0.24985909041441226, "grad_norm": 0.33544498682022095, "learning_rate": 0.000150059137758484, "loss": 1.458, "step": 19228 }, { "epoch": 0.24987208495832813, "grad_norm": 0.40889278054237366, "learning_rate": 0.00015005653829657263, "loss": 1.4447, "step": 19229 }, { "epoch": 0.249885079502244, "grad_norm": 0.3434186577796936, "learning_rate": 0.00015005393883466123, "loss": 1.4921, "step": 19230 }, { "epoch": 0.24989807404615988, "grad_norm": 0.43655088543891907, "learning_rate": 0.00015005133937274985, "loss": 1.4809, "step": 19231 }, { "epoch": 0.24991106859007575, "grad_norm": 0.38057807087898254, "learning_rate": 0.00015004873991083845, "loss": 1.5081, "step": 19232 }, { "epoch": 0.24992406313399163, "grad_norm": 0.4958529472351074, "learning_rate": 0.0001500461404489271, "loss": 1.502, "step": 19233 }, { "epoch": 0.2499370576779075, "grad_norm": 0.40056657791137695, "learning_rate": 0.0001500435409870157, "loss": 1.1288, "step": 19234 }, { "epoch": 0.24995005222182337, "grad_norm": 0.40495529770851135, "learning_rate": 0.0001500409415251043, "loss": 1.3488, "step": 19235 }, { "epoch": 0.24996304676573924, "grad_norm": 0.3337843716144562, "learning_rate": 0.00015003834206319292, "loss": 1.4262, "step": 19236 }, { "epoch": 0.24997604130965512, "grad_norm": 0.3919743597507477, "learning_rate": 0.00015003574260128154, "loss": 1.4275, "step": 19237 }, { "epoch": 0.249989035853571, "grad_norm": 0.42955249547958374, "learning_rate": 0.00015003314313937017, "loss": 1.3145, "step": 19238 }, { "epoch": 0.25000203039748686, "grad_norm": 0.41069361567497253, "learning_rate": 0.00015003054367745876, "loss": 1.5642, "step": 19239 }, { "epoch": 0.2500150249414027, "grad_norm": 0.4328746199607849, "learning_rate": 0.0001500279442155474, "loss": 1.4593, "step": 19240 }, { "epoch": 0.2500280194853186, "grad_norm": 0.3221152126789093, "learning_rate": 0.000150025344753636, "loss": 1.2975, "step": 19241 }, { "epoch": 0.25004101402923445, "grad_norm": 0.4952162504196167, "learning_rate": 0.0001500227452917246, "loss": 1.4493, "step": 19242 }, { "epoch": 0.25005400857315035, "grad_norm": 0.27219510078430176, "learning_rate": 0.00015002014582981324, "loss": 1.1862, "step": 19243 }, { "epoch": 0.2500670031170662, "grad_norm": 0.40036454796791077, "learning_rate": 0.00015001754636790186, "loss": 1.2394, "step": 19244 }, { "epoch": 0.2500799976609821, "grad_norm": 0.3157905340194702, "learning_rate": 0.00015001494690599048, "loss": 1.3851, "step": 19245 }, { "epoch": 0.25009299220489795, "grad_norm": 0.3738485276699066, "learning_rate": 0.00015001234744407908, "loss": 1.7611, "step": 19246 }, { "epoch": 0.25010598674881385, "grad_norm": 0.4408351182937622, "learning_rate": 0.00015000974798216768, "loss": 1.229, "step": 19247 }, { "epoch": 0.2501189812927297, "grad_norm": 0.3717687726020813, "learning_rate": 0.00015000714852025633, "loss": 1.5592, "step": 19248 }, { "epoch": 0.2501319758366456, "grad_norm": 0.32244768738746643, "learning_rate": 0.00015000454905834493, "loss": 1.4232, "step": 19249 }, { "epoch": 0.25014497038056144, "grad_norm": 0.39478233456611633, "learning_rate": 0.00015000194959643355, "loss": 1.2541, "step": 19250 }, { "epoch": 0.25015796492447734, "grad_norm": 0.3973926901817322, "learning_rate": 0.00014999935013452215, "loss": 1.4973, "step": 19251 }, { "epoch": 0.2501709594683932, "grad_norm": 0.41790857911109924, "learning_rate": 0.00014999675067261077, "loss": 1.5505, "step": 19252 }, { "epoch": 0.2501839540123091, "grad_norm": 0.3618704676628113, "learning_rate": 0.0001499941512106994, "loss": 1.3576, "step": 19253 }, { "epoch": 0.25019694855622493, "grad_norm": 0.38586872816085815, "learning_rate": 0.000149991551748788, "loss": 1.3541, "step": 19254 }, { "epoch": 0.25020994310014083, "grad_norm": 0.4389822781085968, "learning_rate": 0.00014998895228687662, "loss": 1.4139, "step": 19255 }, { "epoch": 0.2502229376440567, "grad_norm": 0.43892979621887207, "learning_rate": 0.00014998635282496525, "loss": 1.3491, "step": 19256 }, { "epoch": 0.2502359321879726, "grad_norm": 0.45594754815101624, "learning_rate": 0.00014998375336305387, "loss": 1.4041, "step": 19257 }, { "epoch": 0.2502489267318884, "grad_norm": 0.37851089239120483, "learning_rate": 0.00014998115390114247, "loss": 1.3846, "step": 19258 }, { "epoch": 0.2502619212758043, "grad_norm": 0.3150915503501892, "learning_rate": 0.0001499785544392311, "loss": 1.2744, "step": 19259 }, { "epoch": 0.25027491581972017, "grad_norm": 0.43895670771598816, "learning_rate": 0.00014997595497731972, "loss": 1.3074, "step": 19260 }, { "epoch": 0.25028791036363607, "grad_norm": 0.32327836751937866, "learning_rate": 0.0001499733555154083, "loss": 1.5824, "step": 19261 }, { "epoch": 0.25030090490755197, "grad_norm": 0.38560882210731506, "learning_rate": 0.00014997075605349694, "loss": 1.3508, "step": 19262 }, { "epoch": 0.2503138994514678, "grad_norm": 0.3774488866329193, "learning_rate": 0.00014996815659158554, "loss": 1.2967, "step": 19263 }, { "epoch": 0.2503268939953837, "grad_norm": 0.3617823123931885, "learning_rate": 0.00014996555712967416, "loss": 1.4806, "step": 19264 }, { "epoch": 0.25033988853929956, "grad_norm": 0.3435172438621521, "learning_rate": 0.00014996295766776278, "loss": 1.3939, "step": 19265 }, { "epoch": 0.25035288308321546, "grad_norm": 0.4564226567745209, "learning_rate": 0.00014996035820585138, "loss": 1.4135, "step": 19266 }, { "epoch": 0.2503658776271313, "grad_norm": 0.3977218568325043, "learning_rate": 0.00014995775874394, "loss": 1.4134, "step": 19267 }, { "epoch": 0.2503788721710472, "grad_norm": 0.3221655786037445, "learning_rate": 0.00014995515928202863, "loss": 1.3027, "step": 19268 }, { "epoch": 0.25039186671496305, "grad_norm": 0.41427087783813477, "learning_rate": 0.00014995255982011725, "loss": 1.3775, "step": 19269 }, { "epoch": 0.25040486125887895, "grad_norm": 0.31644406914711, "learning_rate": 0.00014994996035820585, "loss": 1.2903, "step": 19270 }, { "epoch": 0.2504178558027948, "grad_norm": 0.4224737286567688, "learning_rate": 0.00014994736089629448, "loss": 1.3279, "step": 19271 }, { "epoch": 0.2504308503467107, "grad_norm": 0.4087494909763336, "learning_rate": 0.0001499447614343831, "loss": 1.484, "step": 19272 }, { "epoch": 0.25044384489062654, "grad_norm": 0.48848119378089905, "learning_rate": 0.0001499421619724717, "loss": 1.299, "step": 19273 }, { "epoch": 0.25045683943454244, "grad_norm": 0.5020224452018738, "learning_rate": 0.00014993956251056032, "loss": 1.481, "step": 19274 }, { "epoch": 0.2504698339784583, "grad_norm": 0.48772576451301575, "learning_rate": 0.00014993696304864892, "loss": 1.3352, "step": 19275 }, { "epoch": 0.2504828285223742, "grad_norm": 0.3205771744251251, "learning_rate": 0.00014993436358673755, "loss": 1.2712, "step": 19276 }, { "epoch": 0.25049582306629004, "grad_norm": 0.43504101037979126, "learning_rate": 0.00014993176412482617, "loss": 1.4005, "step": 19277 }, { "epoch": 0.25050881761020594, "grad_norm": 0.41573214530944824, "learning_rate": 0.00014992916466291477, "loss": 1.5298, "step": 19278 }, { "epoch": 0.2505218121541218, "grad_norm": 0.38092154264450073, "learning_rate": 0.00014992656520100342, "loss": 1.3994, "step": 19279 }, { "epoch": 0.2505348066980377, "grad_norm": 0.2937425374984741, "learning_rate": 0.00014992396573909202, "loss": 1.4855, "step": 19280 }, { "epoch": 0.2505478012419535, "grad_norm": 0.3984566032886505, "learning_rate": 0.00014992136627718064, "loss": 1.3065, "step": 19281 }, { "epoch": 0.25056079578586943, "grad_norm": 0.4287436306476593, "learning_rate": 0.00014991876681526924, "loss": 1.3654, "step": 19282 }, { "epoch": 0.2505737903297853, "grad_norm": 0.41433101892471313, "learning_rate": 0.00014991616735335786, "loss": 1.3354, "step": 19283 }, { "epoch": 0.2505867848737012, "grad_norm": 0.343309223651886, "learning_rate": 0.0001499135678914465, "loss": 1.4141, "step": 19284 }, { "epoch": 0.250599779417617, "grad_norm": 0.36281776428222656, "learning_rate": 0.00014991096842953508, "loss": 1.2881, "step": 19285 }, { "epoch": 0.2506127739615329, "grad_norm": 0.4217841327190399, "learning_rate": 0.0001499083689676237, "loss": 1.5818, "step": 19286 }, { "epoch": 0.25062576850544876, "grad_norm": 0.42087310552597046, "learning_rate": 0.00014990576950571233, "loss": 1.5077, "step": 19287 }, { "epoch": 0.25063876304936467, "grad_norm": 0.4089794158935547, "learning_rate": 0.00014990317004380096, "loss": 1.4815, "step": 19288 }, { "epoch": 0.2506517575932805, "grad_norm": 0.34926196932792664, "learning_rate": 0.00014990057058188955, "loss": 1.3468, "step": 19289 }, { "epoch": 0.2506647521371964, "grad_norm": 0.40168845653533936, "learning_rate": 0.00014989797111997815, "loss": 1.575, "step": 19290 }, { "epoch": 0.25067774668111226, "grad_norm": 0.4461529850959778, "learning_rate": 0.0001498953716580668, "loss": 1.599, "step": 19291 }, { "epoch": 0.25069074122502816, "grad_norm": 0.5150933861732483, "learning_rate": 0.0001498927721961554, "loss": 1.6999, "step": 19292 }, { "epoch": 0.250703735768944, "grad_norm": 0.4014335572719574, "learning_rate": 0.00014989017273424403, "loss": 1.3744, "step": 19293 }, { "epoch": 0.2507167303128599, "grad_norm": 0.44097307324409485, "learning_rate": 0.00014988757327233262, "loss": 1.3409, "step": 19294 }, { "epoch": 0.25072972485677575, "grad_norm": 0.4230285584926605, "learning_rate": 0.00014988497381042125, "loss": 1.5175, "step": 19295 }, { "epoch": 0.25074271940069165, "grad_norm": 0.35233306884765625, "learning_rate": 0.00014988237434850987, "loss": 1.5311, "step": 19296 }, { "epoch": 0.2507557139446075, "grad_norm": 0.2650070786476135, "learning_rate": 0.00014987977488659847, "loss": 1.2741, "step": 19297 }, { "epoch": 0.2507687084885234, "grad_norm": 0.39668363332748413, "learning_rate": 0.0001498771754246871, "loss": 1.4199, "step": 19298 }, { "epoch": 0.25078170303243924, "grad_norm": 0.3839765787124634, "learning_rate": 0.00014987457596277572, "loss": 1.5785, "step": 19299 }, { "epoch": 0.25079469757635514, "grad_norm": 0.4144365191459656, "learning_rate": 0.00014987197650086434, "loss": 1.3323, "step": 19300 }, { "epoch": 0.250807692120271, "grad_norm": 0.4161668121814728, "learning_rate": 0.00014986937703895294, "loss": 1.4305, "step": 19301 }, { "epoch": 0.2508206866641869, "grad_norm": 0.4364469349384308, "learning_rate": 0.00014986677757704154, "loss": 1.5838, "step": 19302 }, { "epoch": 0.25083368120810273, "grad_norm": 0.4104681611061096, "learning_rate": 0.0001498641781151302, "loss": 1.4805, "step": 19303 }, { "epoch": 0.25084667575201863, "grad_norm": 0.3963468372821808, "learning_rate": 0.0001498615786532188, "loss": 1.45, "step": 19304 }, { "epoch": 0.2508596702959345, "grad_norm": 0.3825284242630005, "learning_rate": 0.0001498589791913074, "loss": 1.418, "step": 19305 }, { "epoch": 0.2508726648398504, "grad_norm": 0.3770049512386322, "learning_rate": 0.000149856379729396, "loss": 1.3113, "step": 19306 }, { "epoch": 0.2508856593837662, "grad_norm": 0.41633182764053345, "learning_rate": 0.00014985378026748463, "loss": 1.364, "step": 19307 }, { "epoch": 0.2508986539276821, "grad_norm": 0.44034066796302795, "learning_rate": 0.00014985118080557326, "loss": 1.2433, "step": 19308 }, { "epoch": 0.25091164847159797, "grad_norm": 0.2963191568851471, "learning_rate": 0.00014984858134366185, "loss": 1.4263, "step": 19309 }, { "epoch": 0.25092464301551387, "grad_norm": 0.3092297315597534, "learning_rate": 0.00014984598188175048, "loss": 1.0592, "step": 19310 }, { "epoch": 0.2509376375594297, "grad_norm": 0.4465179443359375, "learning_rate": 0.0001498433824198391, "loss": 1.6107, "step": 19311 }, { "epoch": 0.2509506321033456, "grad_norm": 0.4339119493961334, "learning_rate": 0.00014984078295792773, "loss": 1.4658, "step": 19312 }, { "epoch": 0.25096362664726146, "grad_norm": 0.46140626072883606, "learning_rate": 0.00014983818349601633, "loss": 1.4549, "step": 19313 }, { "epoch": 0.25097662119117736, "grad_norm": 0.4929795265197754, "learning_rate": 0.00014983558403410495, "loss": 1.4189, "step": 19314 }, { "epoch": 0.2509896157350932, "grad_norm": 0.45960184931755066, "learning_rate": 0.00014983298457219357, "loss": 1.3732, "step": 19315 }, { "epoch": 0.2510026102790091, "grad_norm": 0.42808791995048523, "learning_rate": 0.00014983038511028217, "loss": 1.4864, "step": 19316 }, { "epoch": 0.25101560482292495, "grad_norm": 0.30764973163604736, "learning_rate": 0.0001498277856483708, "loss": 1.5246, "step": 19317 }, { "epoch": 0.25102859936684085, "grad_norm": 0.3849032521247864, "learning_rate": 0.00014982518618645942, "loss": 1.4392, "step": 19318 }, { "epoch": 0.2510415939107567, "grad_norm": 0.3228871524333954, "learning_rate": 0.00014982258672454802, "loss": 1.2357, "step": 19319 }, { "epoch": 0.2510545884546726, "grad_norm": 0.42633679509162903, "learning_rate": 0.00014981998726263664, "loss": 1.4902, "step": 19320 }, { "epoch": 0.25106758299858845, "grad_norm": 0.38729336857795715, "learning_rate": 0.00014981738780072524, "loss": 1.3342, "step": 19321 }, { "epoch": 0.25108057754250435, "grad_norm": 0.318507581949234, "learning_rate": 0.0001498147883388139, "loss": 1.3358, "step": 19322 }, { "epoch": 0.2510935720864202, "grad_norm": 0.47785621881484985, "learning_rate": 0.0001498121888769025, "loss": 1.4492, "step": 19323 }, { "epoch": 0.2511065666303361, "grad_norm": 0.3477509617805481, "learning_rate": 0.00014980958941499111, "loss": 1.4552, "step": 19324 }, { "epoch": 0.25111956117425194, "grad_norm": 0.5051265954971313, "learning_rate": 0.0001498069899530797, "loss": 1.5248, "step": 19325 }, { "epoch": 0.25113255571816784, "grad_norm": 0.422441691160202, "learning_rate": 0.00014980439049116834, "loss": 1.3613, "step": 19326 }, { "epoch": 0.2511455502620837, "grad_norm": 0.4308181703090668, "learning_rate": 0.00014980179102925696, "loss": 1.5678, "step": 19327 }, { "epoch": 0.2511585448059996, "grad_norm": 0.4285312294960022, "learning_rate": 0.00014979919156734556, "loss": 1.4648, "step": 19328 }, { "epoch": 0.25117153934991543, "grad_norm": 0.3926316797733307, "learning_rate": 0.00014979659210543418, "loss": 1.3372, "step": 19329 }, { "epoch": 0.25118453389383133, "grad_norm": 0.26758047938346863, "learning_rate": 0.0001497939926435228, "loss": 1.4504, "step": 19330 }, { "epoch": 0.2511975284377472, "grad_norm": 0.4403420388698578, "learning_rate": 0.0001497913931816114, "loss": 1.4039, "step": 19331 }, { "epoch": 0.2512105229816631, "grad_norm": 0.35894283652305603, "learning_rate": 0.00014978879371970003, "loss": 1.3014, "step": 19332 }, { "epoch": 0.2512235175255789, "grad_norm": 0.4046807289123535, "learning_rate": 0.00014978619425778863, "loss": 1.4148, "step": 19333 }, { "epoch": 0.2512365120694948, "grad_norm": 0.35515445470809937, "learning_rate": 0.00014978359479587728, "loss": 1.2439, "step": 19334 }, { "epoch": 0.25124950661341067, "grad_norm": 0.36843305826187134, "learning_rate": 0.00014978099533396587, "loss": 1.5725, "step": 19335 }, { "epoch": 0.25126250115732657, "grad_norm": 0.3644530177116394, "learning_rate": 0.0001497783958720545, "loss": 1.3158, "step": 19336 }, { "epoch": 0.25127549570124247, "grad_norm": 0.3628610372543335, "learning_rate": 0.0001497757964101431, "loss": 1.3984, "step": 19337 }, { "epoch": 0.2512884902451583, "grad_norm": 0.4411577582359314, "learning_rate": 0.00014977319694823172, "loss": 1.3599, "step": 19338 }, { "epoch": 0.2513014847890742, "grad_norm": 0.4517787992954254, "learning_rate": 0.00014977059748632035, "loss": 1.4633, "step": 19339 }, { "epoch": 0.25131447933299006, "grad_norm": 0.4036463499069214, "learning_rate": 0.00014976799802440894, "loss": 1.4428, "step": 19340 }, { "epoch": 0.25132747387690596, "grad_norm": 0.3981083333492279, "learning_rate": 0.00014976539856249757, "loss": 1.3533, "step": 19341 }, { "epoch": 0.2513404684208218, "grad_norm": 0.4366310238838196, "learning_rate": 0.0001497627991005862, "loss": 1.3081, "step": 19342 }, { "epoch": 0.2513534629647377, "grad_norm": 0.4536551237106323, "learning_rate": 0.00014976019963867482, "loss": 1.3329, "step": 19343 }, { "epoch": 0.25136645750865355, "grad_norm": 0.4184654951095581, "learning_rate": 0.00014975760017676341, "loss": 1.3681, "step": 19344 }, { "epoch": 0.25137945205256945, "grad_norm": 0.46820053458213806, "learning_rate": 0.000149755000714852, "loss": 1.5395, "step": 19345 }, { "epoch": 0.2513924465964853, "grad_norm": 0.4538785517215729, "learning_rate": 0.00014975240125294066, "loss": 1.4008, "step": 19346 }, { "epoch": 0.2514054411404012, "grad_norm": 0.3344264626502991, "learning_rate": 0.00014974980179102926, "loss": 1.5737, "step": 19347 }, { "epoch": 0.25141843568431704, "grad_norm": 0.38885772228240967, "learning_rate": 0.00014974720232911788, "loss": 1.3549, "step": 19348 }, { "epoch": 0.25143143022823294, "grad_norm": 0.3296425938606262, "learning_rate": 0.00014974460286720648, "loss": 1.337, "step": 19349 }, { "epoch": 0.2514444247721488, "grad_norm": 0.5911374092102051, "learning_rate": 0.0001497420034052951, "loss": 1.4441, "step": 19350 }, { "epoch": 0.2514574193160647, "grad_norm": 0.4292764663696289, "learning_rate": 0.00014973940394338373, "loss": 1.3529, "step": 19351 }, { "epoch": 0.25147041385998053, "grad_norm": 0.3773444890975952, "learning_rate": 0.00014973680448147233, "loss": 1.3774, "step": 19352 }, { "epoch": 0.25148340840389644, "grad_norm": 0.4887596368789673, "learning_rate": 0.00014973420501956098, "loss": 1.5905, "step": 19353 }, { "epoch": 0.2514964029478123, "grad_norm": 0.4610773026943207, "learning_rate": 0.00014973160555764958, "loss": 1.5496, "step": 19354 }, { "epoch": 0.2515093974917282, "grad_norm": 0.3434821367263794, "learning_rate": 0.0001497290060957382, "loss": 1.5048, "step": 19355 }, { "epoch": 0.251522392035644, "grad_norm": 0.4377850890159607, "learning_rate": 0.0001497264066338268, "loss": 1.5321, "step": 19356 }, { "epoch": 0.2515353865795599, "grad_norm": 0.4377135634422302, "learning_rate": 0.00014972380717191542, "loss": 1.4132, "step": 19357 }, { "epoch": 0.2515483811234758, "grad_norm": 0.42370685935020447, "learning_rate": 0.00014972120771000405, "loss": 1.5159, "step": 19358 }, { "epoch": 0.2515613756673917, "grad_norm": 0.36708515882492065, "learning_rate": 0.00014971860824809265, "loss": 1.3687, "step": 19359 }, { "epoch": 0.2515743702113075, "grad_norm": 0.3527795076370239, "learning_rate": 0.00014971600878618127, "loss": 1.4833, "step": 19360 }, { "epoch": 0.2515873647552234, "grad_norm": 0.5197107791900635, "learning_rate": 0.0001497134093242699, "loss": 1.3474, "step": 19361 }, { "epoch": 0.25160035929913926, "grad_norm": 0.2973027229309082, "learning_rate": 0.0001497108098623585, "loss": 1.3054, "step": 19362 }, { "epoch": 0.25161335384305517, "grad_norm": 0.31174319982528687, "learning_rate": 0.00014970821040044712, "loss": 1.1624, "step": 19363 }, { "epoch": 0.251626348386971, "grad_norm": 0.33600637316703796, "learning_rate": 0.00014970561093853571, "loss": 1.3334, "step": 19364 }, { "epoch": 0.2516393429308869, "grad_norm": 0.45112213492393494, "learning_rate": 0.00014970301147662437, "loss": 1.4194, "step": 19365 }, { "epoch": 0.25165233747480276, "grad_norm": 0.3276121914386749, "learning_rate": 0.00014970041201471296, "loss": 1.2727, "step": 19366 }, { "epoch": 0.25166533201871866, "grad_norm": 0.32909461855888367, "learning_rate": 0.0001496978125528016, "loss": 1.227, "step": 19367 }, { "epoch": 0.2516783265626345, "grad_norm": 0.38036054372787476, "learning_rate": 0.00014969521309089018, "loss": 1.4674, "step": 19368 }, { "epoch": 0.2516913211065504, "grad_norm": 0.425784707069397, "learning_rate": 0.0001496926136289788, "loss": 1.5073, "step": 19369 }, { "epoch": 0.25170431565046625, "grad_norm": 0.3376021981239319, "learning_rate": 0.00014969001416706743, "loss": 1.1589, "step": 19370 }, { "epoch": 0.25171731019438215, "grad_norm": 0.43341004848480225, "learning_rate": 0.00014968741470515603, "loss": 1.5024, "step": 19371 }, { "epoch": 0.251730304738298, "grad_norm": 0.35913532972335815, "learning_rate": 0.00014968481524324466, "loss": 1.3625, "step": 19372 }, { "epoch": 0.2517432992822139, "grad_norm": 0.3747897446155548, "learning_rate": 0.00014968221578133328, "loss": 1.2266, "step": 19373 }, { "epoch": 0.25175629382612974, "grad_norm": 0.47092750668525696, "learning_rate": 0.00014967961631942188, "loss": 1.4456, "step": 19374 }, { "epoch": 0.25176928837004564, "grad_norm": 0.3811807632446289, "learning_rate": 0.0001496770168575105, "loss": 1.4112, "step": 19375 }, { "epoch": 0.2517822829139615, "grad_norm": 0.3384043574333191, "learning_rate": 0.0001496744173955991, "loss": 1.5773, "step": 19376 }, { "epoch": 0.2517952774578774, "grad_norm": 0.3639392554759979, "learning_rate": 0.00014967181793368775, "loss": 1.3745, "step": 19377 }, { "epoch": 0.25180827200179323, "grad_norm": 0.3538079857826233, "learning_rate": 0.00014966921847177635, "loss": 1.3261, "step": 19378 }, { "epoch": 0.25182126654570913, "grad_norm": 0.27463003993034363, "learning_rate": 0.00014966661900986497, "loss": 1.3753, "step": 19379 }, { "epoch": 0.251834261089625, "grad_norm": 0.33416783809661865, "learning_rate": 0.00014966401954795357, "loss": 1.3011, "step": 19380 }, { "epoch": 0.2518472556335409, "grad_norm": 0.42259055376052856, "learning_rate": 0.0001496614200860422, "loss": 1.5886, "step": 19381 }, { "epoch": 0.2518602501774567, "grad_norm": 0.43376848101615906, "learning_rate": 0.00014965882062413082, "loss": 1.5122, "step": 19382 }, { "epoch": 0.2518732447213726, "grad_norm": 0.4062389135360718, "learning_rate": 0.00014965622116221942, "loss": 1.4191, "step": 19383 }, { "epoch": 0.25188623926528847, "grad_norm": 0.2730928361415863, "learning_rate": 0.00014965362170030804, "loss": 1.2733, "step": 19384 }, { "epoch": 0.25189923380920437, "grad_norm": 0.4791623055934906, "learning_rate": 0.00014965102223839667, "loss": 1.39, "step": 19385 }, { "epoch": 0.2519122283531202, "grad_norm": 0.3642703592777252, "learning_rate": 0.00014964842277648526, "loss": 1.5112, "step": 19386 }, { "epoch": 0.2519252228970361, "grad_norm": 0.45902013778686523, "learning_rate": 0.0001496458233145739, "loss": 1.4592, "step": 19387 }, { "epoch": 0.25193821744095196, "grad_norm": 0.3657056391239166, "learning_rate": 0.00014964322385266248, "loss": 1.3107, "step": 19388 }, { "epoch": 0.25195121198486786, "grad_norm": 0.3789004385471344, "learning_rate": 0.00014964062439075114, "loss": 1.6029, "step": 19389 }, { "epoch": 0.2519642065287837, "grad_norm": 0.38772717118263245, "learning_rate": 0.00014963802492883973, "loss": 1.5438, "step": 19390 }, { "epoch": 0.2519772010726996, "grad_norm": 0.45567506551742554, "learning_rate": 0.00014963542546692836, "loss": 1.5508, "step": 19391 }, { "epoch": 0.25199019561661545, "grad_norm": 0.3882790207862854, "learning_rate": 0.00014963282600501698, "loss": 1.5101, "step": 19392 }, { "epoch": 0.25200319016053135, "grad_norm": 0.446857213973999, "learning_rate": 0.00014963022654310558, "loss": 1.4696, "step": 19393 }, { "epoch": 0.2520161847044472, "grad_norm": 0.3953308165073395, "learning_rate": 0.0001496276270811942, "loss": 1.4145, "step": 19394 }, { "epoch": 0.2520291792483631, "grad_norm": 0.29922378063201904, "learning_rate": 0.0001496250276192828, "loss": 1.3245, "step": 19395 }, { "epoch": 0.25204217379227895, "grad_norm": 0.3518284857273102, "learning_rate": 0.00014962242815737145, "loss": 1.2183, "step": 19396 }, { "epoch": 0.25205516833619485, "grad_norm": 0.4177667796611786, "learning_rate": 0.00014961982869546005, "loss": 1.412, "step": 19397 }, { "epoch": 0.2520681628801107, "grad_norm": 0.3514609932899475, "learning_rate": 0.00014961722923354865, "loss": 1.4467, "step": 19398 }, { "epoch": 0.2520811574240266, "grad_norm": 0.3810456097126007, "learning_rate": 0.00014961462977163727, "loss": 1.3019, "step": 19399 }, { "epoch": 0.25209415196794244, "grad_norm": 0.3124588429927826, "learning_rate": 0.0001496120303097259, "loss": 1.4037, "step": 19400 }, { "epoch": 0.25210714651185834, "grad_norm": 0.5496308207511902, "learning_rate": 0.00014960943084781452, "loss": 1.4659, "step": 19401 }, { "epoch": 0.2521201410557742, "grad_norm": 0.27893659472465515, "learning_rate": 0.00014960683138590312, "loss": 1.4998, "step": 19402 }, { "epoch": 0.2521331355996901, "grad_norm": 0.4640393555164337, "learning_rate": 0.00014960423192399174, "loss": 1.5136, "step": 19403 }, { "epoch": 0.25214613014360593, "grad_norm": 0.37795260548591614, "learning_rate": 0.00014960163246208037, "loss": 1.4603, "step": 19404 }, { "epoch": 0.25215912468752183, "grad_norm": 0.3591223955154419, "learning_rate": 0.00014959903300016897, "loss": 1.4772, "step": 19405 }, { "epoch": 0.2521721192314377, "grad_norm": 0.35912078619003296, "learning_rate": 0.0001495964335382576, "loss": 1.5752, "step": 19406 }, { "epoch": 0.2521851137753536, "grad_norm": 0.3744770288467407, "learning_rate": 0.0001495938340763462, "loss": 1.2821, "step": 19407 }, { "epoch": 0.2521981083192694, "grad_norm": 0.41908347606658936, "learning_rate": 0.00014959123461443484, "loss": 1.372, "step": 19408 }, { "epoch": 0.2522111028631853, "grad_norm": 0.37979528307914734, "learning_rate": 0.00014958863515252344, "loss": 1.1564, "step": 19409 }, { "epoch": 0.25222409740710117, "grad_norm": 0.38841861486434937, "learning_rate": 0.00014958603569061206, "loss": 1.3559, "step": 19410 }, { "epoch": 0.25223709195101707, "grad_norm": 0.4274185597896576, "learning_rate": 0.00014958343622870066, "loss": 1.2185, "step": 19411 }, { "epoch": 0.2522500864949329, "grad_norm": 0.39575764536857605, "learning_rate": 0.00014958083676678928, "loss": 1.5424, "step": 19412 }, { "epoch": 0.2522630810388488, "grad_norm": 0.46195104718208313, "learning_rate": 0.0001495782373048779, "loss": 1.4648, "step": 19413 }, { "epoch": 0.2522760755827647, "grad_norm": 0.36915773153305054, "learning_rate": 0.0001495756378429665, "loss": 1.3907, "step": 19414 }, { "epoch": 0.25228907012668056, "grad_norm": 0.45009738206863403, "learning_rate": 0.00014957303838105513, "loss": 1.5096, "step": 19415 }, { "epoch": 0.25230206467059646, "grad_norm": 0.31375381350517273, "learning_rate": 0.00014957043891914375, "loss": 1.3629, "step": 19416 }, { "epoch": 0.2523150592145123, "grad_norm": 0.4297015368938446, "learning_rate": 0.00014956783945723235, "loss": 1.388, "step": 19417 }, { "epoch": 0.2523280537584282, "grad_norm": 0.4091581702232361, "learning_rate": 0.00014956523999532097, "loss": 1.419, "step": 19418 }, { "epoch": 0.25234104830234405, "grad_norm": 0.4369363486766815, "learning_rate": 0.00014956264053340957, "loss": 1.5665, "step": 19419 }, { "epoch": 0.25235404284625995, "grad_norm": 0.32671064138412476, "learning_rate": 0.00014956004107149822, "loss": 1.4437, "step": 19420 }, { "epoch": 0.2523670373901758, "grad_norm": 0.37904953956604004, "learning_rate": 0.00014955744160958682, "loss": 1.6114, "step": 19421 }, { "epoch": 0.2523800319340917, "grad_norm": 0.4265599250793457, "learning_rate": 0.00014955484214767545, "loss": 1.5065, "step": 19422 }, { "epoch": 0.25239302647800754, "grad_norm": 0.43036991357803345, "learning_rate": 0.00014955224268576404, "loss": 1.3427, "step": 19423 }, { "epoch": 0.25240602102192344, "grad_norm": 0.44632551074028015, "learning_rate": 0.00014954964322385267, "loss": 1.3998, "step": 19424 }, { "epoch": 0.2524190155658393, "grad_norm": 0.46608179807662964, "learning_rate": 0.0001495470437619413, "loss": 1.3282, "step": 19425 }, { "epoch": 0.2524320101097552, "grad_norm": 0.4105066657066345, "learning_rate": 0.0001495444443000299, "loss": 1.4524, "step": 19426 }, { "epoch": 0.25244500465367103, "grad_norm": 0.472917377948761, "learning_rate": 0.00014954184483811851, "loss": 1.3913, "step": 19427 }, { "epoch": 0.25245799919758694, "grad_norm": 0.40210890769958496, "learning_rate": 0.00014953924537620714, "loss": 1.4328, "step": 19428 }, { "epoch": 0.2524709937415028, "grad_norm": 0.3711254298686981, "learning_rate": 0.00014953664591429574, "loss": 1.3584, "step": 19429 }, { "epoch": 0.2524839882854187, "grad_norm": 0.45770329236984253, "learning_rate": 0.00014953404645238436, "loss": 1.4096, "step": 19430 }, { "epoch": 0.2524969828293345, "grad_norm": 0.4337862432003021, "learning_rate": 0.00014953144699047298, "loss": 1.3453, "step": 19431 }, { "epoch": 0.2525099773732504, "grad_norm": 0.33598023653030396, "learning_rate": 0.0001495288475285616, "loss": 1.527, "step": 19432 }, { "epoch": 0.2525229719171663, "grad_norm": 0.3237660229206085, "learning_rate": 0.0001495262480666502, "loss": 1.3763, "step": 19433 }, { "epoch": 0.2525359664610822, "grad_norm": 0.43121376633644104, "learning_rate": 0.00014952364860473883, "loss": 1.5971, "step": 19434 }, { "epoch": 0.252548961004998, "grad_norm": 0.40737298130989075, "learning_rate": 0.00014952104914282746, "loss": 1.5253, "step": 19435 }, { "epoch": 0.2525619555489139, "grad_norm": 0.4197191298007965, "learning_rate": 0.00014951844968091605, "loss": 1.4081, "step": 19436 }, { "epoch": 0.25257495009282976, "grad_norm": 0.4256860911846161, "learning_rate": 0.00014951585021900468, "loss": 1.8356, "step": 19437 }, { "epoch": 0.25258794463674566, "grad_norm": 0.4347538948059082, "learning_rate": 0.00014951325075709327, "loss": 1.4042, "step": 19438 }, { "epoch": 0.2526009391806615, "grad_norm": 0.4465582072734833, "learning_rate": 0.00014951065129518193, "loss": 1.4165, "step": 19439 }, { "epoch": 0.2526139337245774, "grad_norm": 0.37985777854919434, "learning_rate": 0.00014950805183327052, "loss": 1.2847, "step": 19440 }, { "epoch": 0.25262692826849326, "grad_norm": 0.3992941081523895, "learning_rate": 0.00014950545237135912, "loss": 1.2284, "step": 19441 }, { "epoch": 0.25263992281240916, "grad_norm": 0.4348006844520569, "learning_rate": 0.00014950285290944775, "loss": 1.4642, "step": 19442 }, { "epoch": 0.252652917356325, "grad_norm": 0.3953380882740021, "learning_rate": 0.00014950025344753637, "loss": 1.4533, "step": 19443 }, { "epoch": 0.2526659119002409, "grad_norm": 0.3284112811088562, "learning_rate": 0.000149497653985625, "loss": 1.2262, "step": 19444 }, { "epoch": 0.25267890644415675, "grad_norm": 0.42996883392333984, "learning_rate": 0.0001494950545237136, "loss": 1.4904, "step": 19445 }, { "epoch": 0.25269190098807265, "grad_norm": 0.3848508298397064, "learning_rate": 0.00014949245506180222, "loss": 1.5273, "step": 19446 }, { "epoch": 0.2527048955319885, "grad_norm": 0.3397538363933563, "learning_rate": 0.00014948985559989084, "loss": 1.4848, "step": 19447 }, { "epoch": 0.2527178900759044, "grad_norm": 0.4893542528152466, "learning_rate": 0.00014948725613797944, "loss": 1.4846, "step": 19448 }, { "epoch": 0.25273088461982024, "grad_norm": 0.47302690148353577, "learning_rate": 0.00014948465667606806, "loss": 1.5966, "step": 19449 }, { "epoch": 0.25274387916373614, "grad_norm": 0.4177716374397278, "learning_rate": 0.00014948205721415666, "loss": 1.5485, "step": 19450 }, { "epoch": 0.252756873707652, "grad_norm": 0.36448538303375244, "learning_rate": 0.0001494794577522453, "loss": 1.4388, "step": 19451 }, { "epoch": 0.2527698682515679, "grad_norm": 0.44344890117645264, "learning_rate": 0.0001494768582903339, "loss": 1.3458, "step": 19452 }, { "epoch": 0.25278286279548373, "grad_norm": 0.34140545129776, "learning_rate": 0.0001494742588284225, "loss": 1.418, "step": 19453 }, { "epoch": 0.25279585733939963, "grad_norm": 0.5025710463523865, "learning_rate": 0.00014947165936651113, "loss": 1.4794, "step": 19454 }, { "epoch": 0.2528088518833155, "grad_norm": 0.34338274598121643, "learning_rate": 0.00014946905990459976, "loss": 1.41, "step": 19455 }, { "epoch": 0.2528218464272314, "grad_norm": 0.4069278836250305, "learning_rate": 0.00014946646044268838, "loss": 1.291, "step": 19456 }, { "epoch": 0.2528348409711472, "grad_norm": 0.3977620601654053, "learning_rate": 0.00014946386098077698, "loss": 1.4239, "step": 19457 }, { "epoch": 0.2528478355150631, "grad_norm": 0.36042532324790955, "learning_rate": 0.0001494612615188656, "loss": 1.3277, "step": 19458 }, { "epoch": 0.25286083005897897, "grad_norm": 0.3523425757884979, "learning_rate": 0.00014945866205695423, "loss": 1.3304, "step": 19459 }, { "epoch": 0.25287382460289487, "grad_norm": 0.4886690676212311, "learning_rate": 0.00014945606259504282, "loss": 1.2939, "step": 19460 }, { "epoch": 0.2528868191468107, "grad_norm": 0.40894386172294617, "learning_rate": 0.00014945346313313145, "loss": 1.4308, "step": 19461 }, { "epoch": 0.2528998136907266, "grad_norm": 0.3787551820278168, "learning_rate": 0.00014945086367122005, "loss": 1.563, "step": 19462 }, { "epoch": 0.25291280823464246, "grad_norm": 0.4399498999118805, "learning_rate": 0.0001494482642093087, "loss": 1.4975, "step": 19463 }, { "epoch": 0.25292580277855836, "grad_norm": 0.35064423084259033, "learning_rate": 0.0001494456647473973, "loss": 1.2873, "step": 19464 }, { "epoch": 0.2529387973224742, "grad_norm": 0.3743061125278473, "learning_rate": 0.00014944306528548592, "loss": 1.4569, "step": 19465 }, { "epoch": 0.2529517918663901, "grad_norm": 0.4102039635181427, "learning_rate": 0.00014944046582357454, "loss": 1.5506, "step": 19466 }, { "epoch": 0.25296478641030595, "grad_norm": 0.34494784474372864, "learning_rate": 0.00014943786636166314, "loss": 1.452, "step": 19467 }, { "epoch": 0.25297778095422185, "grad_norm": 0.41487744450569153, "learning_rate": 0.00014943526689975177, "loss": 1.5787, "step": 19468 }, { "epoch": 0.2529907754981377, "grad_norm": 0.3826618194580078, "learning_rate": 0.00014943266743784036, "loss": 1.5631, "step": 19469 }, { "epoch": 0.2530037700420536, "grad_norm": 0.4021928012371063, "learning_rate": 0.000149430067975929, "loss": 1.4804, "step": 19470 }, { "epoch": 0.25301676458596944, "grad_norm": 0.3860153257846832, "learning_rate": 0.0001494274685140176, "loss": 1.2641, "step": 19471 }, { "epoch": 0.25302975912988535, "grad_norm": 0.3456415832042694, "learning_rate": 0.0001494248690521062, "loss": 1.3048, "step": 19472 }, { "epoch": 0.2530427536738012, "grad_norm": 0.27741387486457825, "learning_rate": 0.00014942226959019483, "loss": 1.3909, "step": 19473 }, { "epoch": 0.2530557482177171, "grad_norm": 0.36109939217567444, "learning_rate": 0.00014941967012828346, "loss": 1.493, "step": 19474 }, { "epoch": 0.25306874276163294, "grad_norm": 0.4939716160297394, "learning_rate": 0.00014941707066637208, "loss": 1.3413, "step": 19475 }, { "epoch": 0.25308173730554884, "grad_norm": 0.34035468101501465, "learning_rate": 0.00014941447120446068, "loss": 1.6573, "step": 19476 }, { "epoch": 0.2530947318494647, "grad_norm": 0.3671989440917969, "learning_rate": 0.0001494118717425493, "loss": 1.1807, "step": 19477 }, { "epoch": 0.2531077263933806, "grad_norm": 0.4366193413734436, "learning_rate": 0.00014940927228063793, "loss": 1.5688, "step": 19478 }, { "epoch": 0.25312072093729643, "grad_norm": 0.38654372096061707, "learning_rate": 0.00014940667281872653, "loss": 1.4302, "step": 19479 }, { "epoch": 0.25313371548121233, "grad_norm": 0.37955477833747864, "learning_rate": 0.00014940407335681515, "loss": 1.523, "step": 19480 }, { "epoch": 0.2531467100251282, "grad_norm": 0.3090011179447174, "learning_rate": 0.00014940147389490375, "loss": 1.3223, "step": 19481 }, { "epoch": 0.2531597045690441, "grad_norm": 0.3520176112651825, "learning_rate": 0.00014939887443299237, "loss": 1.4801, "step": 19482 }, { "epoch": 0.2531726991129599, "grad_norm": 0.358134001493454, "learning_rate": 0.000149396274971081, "loss": 1.2845, "step": 19483 }, { "epoch": 0.2531856936568758, "grad_norm": 0.4331236779689789, "learning_rate": 0.0001493936755091696, "loss": 1.4309, "step": 19484 }, { "epoch": 0.25319868820079167, "grad_norm": 0.426949679851532, "learning_rate": 0.00014939107604725822, "loss": 1.4705, "step": 19485 }, { "epoch": 0.25321168274470757, "grad_norm": 0.41182342171669006, "learning_rate": 0.00014938847658534684, "loss": 1.4218, "step": 19486 }, { "epoch": 0.2532246772886234, "grad_norm": 0.36608171463012695, "learning_rate": 0.00014938587712343547, "loss": 1.4997, "step": 19487 }, { "epoch": 0.2532376718325393, "grad_norm": 0.40239009261131287, "learning_rate": 0.00014938327766152407, "loss": 1.4691, "step": 19488 }, { "epoch": 0.2532506663764552, "grad_norm": 0.4432760179042816, "learning_rate": 0.0001493806781996127, "loss": 1.3926, "step": 19489 }, { "epoch": 0.25326366092037106, "grad_norm": 0.3851640820503235, "learning_rate": 0.00014937807873770131, "loss": 1.4094, "step": 19490 }, { "epoch": 0.25327665546428696, "grad_norm": 0.4153343141078949, "learning_rate": 0.0001493754792757899, "loss": 1.4461, "step": 19491 }, { "epoch": 0.2532896500082028, "grad_norm": 0.3610401153564453, "learning_rate": 0.00014937287981387854, "loss": 1.2303, "step": 19492 }, { "epoch": 0.2533026445521187, "grad_norm": 0.40506166219711304, "learning_rate": 0.00014937028035196713, "loss": 1.6006, "step": 19493 }, { "epoch": 0.25331563909603455, "grad_norm": 0.4519224762916565, "learning_rate": 0.00014936768089005579, "loss": 1.3561, "step": 19494 }, { "epoch": 0.25332863363995045, "grad_norm": 0.41228964924812317, "learning_rate": 0.00014936508142814438, "loss": 1.3722, "step": 19495 }, { "epoch": 0.2533416281838663, "grad_norm": 0.6391938924789429, "learning_rate": 0.00014936248196623298, "loss": 1.331, "step": 19496 }, { "epoch": 0.2533546227277822, "grad_norm": 0.41272252798080444, "learning_rate": 0.0001493598825043216, "loss": 1.43, "step": 19497 }, { "epoch": 0.25336761727169804, "grad_norm": 0.3163849115371704, "learning_rate": 0.00014935728304241023, "loss": 1.2974, "step": 19498 }, { "epoch": 0.25338061181561394, "grad_norm": 0.3924661874771118, "learning_rate": 0.00014935468358049885, "loss": 1.3742, "step": 19499 }, { "epoch": 0.2533936063595298, "grad_norm": 0.3626254200935364, "learning_rate": 0.00014935208411858745, "loss": 1.3065, "step": 19500 }, { "epoch": 0.2534066009034457, "grad_norm": 0.4591801166534424, "learning_rate": 0.00014934948465667608, "loss": 1.4867, "step": 19501 }, { "epoch": 0.25341959544736153, "grad_norm": 0.3122835159301758, "learning_rate": 0.0001493468851947647, "loss": 1.2819, "step": 19502 }, { "epoch": 0.25343258999127743, "grad_norm": 0.4542468190193176, "learning_rate": 0.0001493442857328533, "loss": 1.5839, "step": 19503 }, { "epoch": 0.2534455845351933, "grad_norm": 0.4456194043159485, "learning_rate": 0.00014934168627094192, "loss": 1.5638, "step": 19504 }, { "epoch": 0.2534585790791092, "grad_norm": 0.22348594665527344, "learning_rate": 0.00014933908680903055, "loss": 1.111, "step": 19505 }, { "epoch": 0.253471573623025, "grad_norm": 0.38837847113609314, "learning_rate": 0.00014933648734711917, "loss": 1.6684, "step": 19506 }, { "epoch": 0.2534845681669409, "grad_norm": 0.3763684630393982, "learning_rate": 0.00014933388788520777, "loss": 1.336, "step": 19507 }, { "epoch": 0.25349756271085677, "grad_norm": 0.3438556492328644, "learning_rate": 0.00014933128842329637, "loss": 1.1228, "step": 19508 }, { "epoch": 0.2535105572547727, "grad_norm": 0.38098448514938354, "learning_rate": 0.00014932868896138502, "loss": 1.5339, "step": 19509 }, { "epoch": 0.2535235517986885, "grad_norm": 0.44352248311042786, "learning_rate": 0.00014932608949947361, "loss": 1.3948, "step": 19510 }, { "epoch": 0.2535365463426044, "grad_norm": 0.47841498255729675, "learning_rate": 0.00014932349003756224, "loss": 1.4295, "step": 19511 }, { "epoch": 0.25354954088652026, "grad_norm": 0.43203219771385193, "learning_rate": 0.00014932089057565084, "loss": 1.6745, "step": 19512 }, { "epoch": 0.25356253543043616, "grad_norm": 0.5679474472999573, "learning_rate": 0.00014931829111373946, "loss": 1.5579, "step": 19513 }, { "epoch": 0.253575529974352, "grad_norm": 0.4489889442920685, "learning_rate": 0.00014931569165182809, "loss": 1.3245, "step": 19514 }, { "epoch": 0.2535885245182679, "grad_norm": 0.5117473006248474, "learning_rate": 0.00014931309218991668, "loss": 1.5214, "step": 19515 }, { "epoch": 0.25360151906218376, "grad_norm": 0.371930867433548, "learning_rate": 0.0001493104927280053, "loss": 1.452, "step": 19516 }, { "epoch": 0.25361451360609966, "grad_norm": 0.47861620783805847, "learning_rate": 0.00014930789326609393, "loss": 1.6071, "step": 19517 }, { "epoch": 0.2536275081500155, "grad_norm": 0.3440735638141632, "learning_rate": 0.00014930529380418256, "loss": 1.4441, "step": 19518 }, { "epoch": 0.2536405026939314, "grad_norm": 0.34542837738990784, "learning_rate": 0.00014930269434227115, "loss": 1.4439, "step": 19519 }, { "epoch": 0.25365349723784725, "grad_norm": 0.413703978061676, "learning_rate": 0.00014930009488035978, "loss": 1.4168, "step": 19520 }, { "epoch": 0.25366649178176315, "grad_norm": 0.38638782501220703, "learning_rate": 0.0001492974954184484, "loss": 1.3942, "step": 19521 }, { "epoch": 0.253679486325679, "grad_norm": 0.42799457907676697, "learning_rate": 0.000149294895956537, "loss": 1.3651, "step": 19522 }, { "epoch": 0.2536924808695949, "grad_norm": 0.3785470128059387, "learning_rate": 0.00014929229649462562, "loss": 1.411, "step": 19523 }, { "epoch": 0.25370547541351074, "grad_norm": 0.4188239276409149, "learning_rate": 0.00014928969703271422, "loss": 1.477, "step": 19524 }, { "epoch": 0.25371846995742664, "grad_norm": 0.5385071039199829, "learning_rate": 0.00014928709757080285, "loss": 1.5267, "step": 19525 }, { "epoch": 0.2537314645013425, "grad_norm": 0.40900567173957825, "learning_rate": 0.00014928449810889147, "loss": 1.3916, "step": 19526 }, { "epoch": 0.2537444590452584, "grad_norm": 0.4177548885345459, "learning_rate": 0.00014928189864698007, "loss": 1.4331, "step": 19527 }, { "epoch": 0.25375745358917423, "grad_norm": 0.4325701594352722, "learning_rate": 0.0001492792991850687, "loss": 1.6716, "step": 19528 }, { "epoch": 0.25377044813309013, "grad_norm": 0.326905220746994, "learning_rate": 0.00014927669972315732, "loss": 1.2966, "step": 19529 }, { "epoch": 0.253783442677006, "grad_norm": 0.4979146122932434, "learning_rate": 0.00014927410026124594, "loss": 1.5173, "step": 19530 }, { "epoch": 0.2537964372209219, "grad_norm": 0.35215646028518677, "learning_rate": 0.00014927150079933454, "loss": 1.3729, "step": 19531 }, { "epoch": 0.2538094317648377, "grad_norm": 0.47046953439712524, "learning_rate": 0.00014926890133742316, "loss": 1.4639, "step": 19532 }, { "epoch": 0.2538224263087536, "grad_norm": 0.44784340262413025, "learning_rate": 0.0001492663018755118, "loss": 1.353, "step": 19533 }, { "epoch": 0.25383542085266947, "grad_norm": 0.4741322994232178, "learning_rate": 0.00014926370241360039, "loss": 1.4941, "step": 19534 }, { "epoch": 0.25384841539658537, "grad_norm": 0.2875347435474396, "learning_rate": 0.000149261102951689, "loss": 1.2758, "step": 19535 }, { "epoch": 0.2538614099405012, "grad_norm": 0.3891442120075226, "learning_rate": 0.0001492585034897776, "loss": 1.4887, "step": 19536 }, { "epoch": 0.2538744044844171, "grad_norm": 0.41554713249206543, "learning_rate": 0.00014925590402786623, "loss": 1.4239, "step": 19537 }, { "epoch": 0.25388739902833296, "grad_norm": 0.35057684779167175, "learning_rate": 0.00014925330456595486, "loss": 1.3987, "step": 19538 }, { "epoch": 0.25390039357224886, "grad_norm": 0.4619067311286926, "learning_rate": 0.00014925070510404345, "loss": 1.4433, "step": 19539 }, { "epoch": 0.2539133881161647, "grad_norm": 0.3216840624809265, "learning_rate": 0.0001492481056421321, "loss": 1.5148, "step": 19540 }, { "epoch": 0.2539263826600806, "grad_norm": 0.4319811165332794, "learning_rate": 0.0001492455061802207, "loss": 1.4827, "step": 19541 }, { "epoch": 0.25393937720399645, "grad_norm": 0.46609023213386536, "learning_rate": 0.00014924290671830933, "loss": 1.4998, "step": 19542 }, { "epoch": 0.25395237174791235, "grad_norm": 0.3596515357494354, "learning_rate": 0.00014924030725639792, "loss": 1.5007, "step": 19543 }, { "epoch": 0.2539653662918282, "grad_norm": 0.4820636510848999, "learning_rate": 0.00014923770779448655, "loss": 1.4179, "step": 19544 }, { "epoch": 0.2539783608357441, "grad_norm": 0.3542421758174896, "learning_rate": 0.00014923510833257517, "loss": 1.5935, "step": 19545 }, { "epoch": 0.25399135537965994, "grad_norm": 0.35289159417152405, "learning_rate": 0.00014923250887066377, "loss": 1.5804, "step": 19546 }, { "epoch": 0.25400434992357585, "grad_norm": 0.33503708243370056, "learning_rate": 0.0001492299094087524, "loss": 1.2877, "step": 19547 }, { "epoch": 0.2540173444674917, "grad_norm": 0.328752726316452, "learning_rate": 0.00014922730994684102, "loss": 1.5439, "step": 19548 }, { "epoch": 0.2540303390114076, "grad_norm": 0.44852712750434875, "learning_rate": 0.00014922471048492964, "loss": 1.4289, "step": 19549 }, { "epoch": 0.25404333355532344, "grad_norm": 0.39202719926834106, "learning_rate": 0.00014922211102301824, "loss": 1.4395, "step": 19550 }, { "epoch": 0.25405632809923934, "grad_norm": 0.42687827348709106, "learning_rate": 0.00014921951156110684, "loss": 1.213, "step": 19551 }, { "epoch": 0.2540693226431552, "grad_norm": 0.4032503664493561, "learning_rate": 0.0001492169120991955, "loss": 1.3344, "step": 19552 }, { "epoch": 0.2540823171870711, "grad_norm": 0.28723934292793274, "learning_rate": 0.0001492143126372841, "loss": 1.2125, "step": 19553 }, { "epoch": 0.25409531173098693, "grad_norm": 0.3656887114048004, "learning_rate": 0.0001492117131753727, "loss": 1.2898, "step": 19554 }, { "epoch": 0.25410830627490283, "grad_norm": 0.35861334204673767, "learning_rate": 0.0001492091137134613, "loss": 1.2993, "step": 19555 }, { "epoch": 0.2541213008188187, "grad_norm": 0.41531193256378174, "learning_rate": 0.00014920651425154993, "loss": 1.5702, "step": 19556 }, { "epoch": 0.2541342953627346, "grad_norm": 0.46724745631217957, "learning_rate": 0.00014920391478963856, "loss": 1.5282, "step": 19557 }, { "epoch": 0.2541472899066504, "grad_norm": 0.40336310863494873, "learning_rate": 0.00014920131532772716, "loss": 1.4613, "step": 19558 }, { "epoch": 0.2541602844505663, "grad_norm": 0.4190009832382202, "learning_rate": 0.00014919871586581578, "loss": 1.4086, "step": 19559 }, { "epoch": 0.25417327899448217, "grad_norm": 0.2918795943260193, "learning_rate": 0.0001491961164039044, "loss": 1.606, "step": 19560 }, { "epoch": 0.25418627353839807, "grad_norm": 0.36437034606933594, "learning_rate": 0.00014919351694199303, "loss": 1.3699, "step": 19561 }, { "epoch": 0.2541992680823139, "grad_norm": 0.33752548694610596, "learning_rate": 0.00014919091748008163, "loss": 1.1943, "step": 19562 }, { "epoch": 0.2542122626262298, "grad_norm": 0.5191263556480408, "learning_rate": 0.00014918831801817022, "loss": 1.5589, "step": 19563 }, { "epoch": 0.25422525717014566, "grad_norm": 0.3960602581501007, "learning_rate": 0.00014918571855625888, "loss": 1.4288, "step": 19564 }, { "epoch": 0.25423825171406156, "grad_norm": 0.4055216908454895, "learning_rate": 0.00014918311909434747, "loss": 1.3877, "step": 19565 }, { "epoch": 0.25425124625797746, "grad_norm": 0.4590810239315033, "learning_rate": 0.0001491805196324361, "loss": 1.459, "step": 19566 }, { "epoch": 0.2542642408018933, "grad_norm": 0.4493219554424286, "learning_rate": 0.0001491779201705247, "loss": 1.4646, "step": 19567 }, { "epoch": 0.2542772353458092, "grad_norm": 0.42967626452445984, "learning_rate": 0.00014917532070861332, "loss": 1.3442, "step": 19568 }, { "epoch": 0.25429022988972505, "grad_norm": 0.40593600273132324, "learning_rate": 0.00014917272124670194, "loss": 1.4179, "step": 19569 }, { "epoch": 0.25430322443364095, "grad_norm": 0.3814839720726013, "learning_rate": 0.00014917012178479054, "loss": 1.4409, "step": 19570 }, { "epoch": 0.2543162189775568, "grad_norm": 0.333474338054657, "learning_rate": 0.00014916752232287917, "loss": 1.4659, "step": 19571 }, { "epoch": 0.2543292135214727, "grad_norm": 0.3968014121055603, "learning_rate": 0.0001491649228609678, "loss": 1.5165, "step": 19572 }, { "epoch": 0.25434220806538854, "grad_norm": 0.5033664107322693, "learning_rate": 0.00014916232339905641, "loss": 1.4188, "step": 19573 }, { "epoch": 0.25435520260930444, "grad_norm": 0.43290650844573975, "learning_rate": 0.000149159723937145, "loss": 1.3538, "step": 19574 }, { "epoch": 0.2543681971532203, "grad_norm": 0.42177537083625793, "learning_rate": 0.0001491571244752336, "loss": 1.3532, "step": 19575 }, { "epoch": 0.2543811916971362, "grad_norm": 0.41929543018341064, "learning_rate": 0.00014915452501332226, "loss": 1.6026, "step": 19576 }, { "epoch": 0.25439418624105203, "grad_norm": 0.33637744188308716, "learning_rate": 0.00014915192555141086, "loss": 1.4604, "step": 19577 }, { "epoch": 0.25440718078496793, "grad_norm": 0.406148225069046, "learning_rate": 0.00014914932608949948, "loss": 1.325, "step": 19578 }, { "epoch": 0.2544201753288838, "grad_norm": 0.41331106424331665, "learning_rate": 0.0001491467266275881, "loss": 1.4351, "step": 19579 }, { "epoch": 0.2544331698727997, "grad_norm": 0.3987766206264496, "learning_rate": 0.0001491441271656767, "loss": 1.4019, "step": 19580 }, { "epoch": 0.2544461644167155, "grad_norm": 0.5104241967201233, "learning_rate": 0.00014914152770376533, "loss": 1.5371, "step": 19581 }, { "epoch": 0.2544591589606314, "grad_norm": 0.34166020154953003, "learning_rate": 0.00014913892824185393, "loss": 1.4705, "step": 19582 }, { "epoch": 0.25447215350454727, "grad_norm": 0.3657446503639221, "learning_rate": 0.00014913632877994258, "loss": 1.4575, "step": 19583 }, { "epoch": 0.2544851480484632, "grad_norm": 0.45171964168548584, "learning_rate": 0.00014913372931803118, "loss": 1.3922, "step": 19584 }, { "epoch": 0.254498142592379, "grad_norm": 0.30195876955986023, "learning_rate": 0.0001491311298561198, "loss": 1.2429, "step": 19585 }, { "epoch": 0.2545111371362949, "grad_norm": 0.44035932421684265, "learning_rate": 0.0001491285303942084, "loss": 1.4496, "step": 19586 }, { "epoch": 0.25452413168021076, "grad_norm": 0.4288991689682007, "learning_rate": 0.00014912593093229702, "loss": 1.5179, "step": 19587 }, { "epoch": 0.25453712622412666, "grad_norm": 0.4843510389328003, "learning_rate": 0.00014912333147038565, "loss": 1.4315, "step": 19588 }, { "epoch": 0.2545501207680425, "grad_norm": 0.4102731943130493, "learning_rate": 0.00014912073200847424, "loss": 1.424, "step": 19589 }, { "epoch": 0.2545631153119584, "grad_norm": 0.33283668756484985, "learning_rate": 0.00014911813254656287, "loss": 1.2421, "step": 19590 }, { "epoch": 0.25457610985587426, "grad_norm": 0.4266267716884613, "learning_rate": 0.0001491155330846515, "loss": 1.3228, "step": 19591 }, { "epoch": 0.25458910439979016, "grad_norm": 0.4488687813282013, "learning_rate": 0.0001491129336227401, "loss": 1.4948, "step": 19592 }, { "epoch": 0.254602098943706, "grad_norm": 0.3215477764606476, "learning_rate": 0.00014911033416082871, "loss": 1.3819, "step": 19593 }, { "epoch": 0.2546150934876219, "grad_norm": 0.2730790972709656, "learning_rate": 0.0001491077346989173, "loss": 1.3785, "step": 19594 }, { "epoch": 0.25462808803153775, "grad_norm": 0.4222654402256012, "learning_rate": 0.00014910513523700596, "loss": 1.4154, "step": 19595 }, { "epoch": 0.25464108257545365, "grad_norm": 0.4084135591983795, "learning_rate": 0.00014910253577509456, "loss": 1.3022, "step": 19596 }, { "epoch": 0.2546540771193695, "grad_norm": 0.31401485204696655, "learning_rate": 0.00014909993631318319, "loss": 1.4603, "step": 19597 }, { "epoch": 0.2546670716632854, "grad_norm": 0.47276976704597473, "learning_rate": 0.00014909733685127178, "loss": 1.494, "step": 19598 }, { "epoch": 0.25468006620720124, "grad_norm": 0.5564035773277283, "learning_rate": 0.0001490947373893604, "loss": 1.4751, "step": 19599 }, { "epoch": 0.25469306075111714, "grad_norm": 0.48834386467933655, "learning_rate": 0.00014909213792744903, "loss": 1.3476, "step": 19600 }, { "epoch": 0.254706055295033, "grad_norm": 0.3741549551486969, "learning_rate": 0.00014908953846553763, "loss": 1.3873, "step": 19601 }, { "epoch": 0.2547190498389489, "grad_norm": 0.4064483940601349, "learning_rate": 0.00014908693900362625, "loss": 1.4014, "step": 19602 }, { "epoch": 0.25473204438286473, "grad_norm": 0.3980434238910675, "learning_rate": 0.00014908433954171488, "loss": 1.3172, "step": 19603 }, { "epoch": 0.25474503892678063, "grad_norm": 0.36086952686309814, "learning_rate": 0.00014908174007980348, "loss": 1.2456, "step": 19604 }, { "epoch": 0.2547580334706965, "grad_norm": 0.4407871961593628, "learning_rate": 0.0001490791406178921, "loss": 1.494, "step": 19605 }, { "epoch": 0.2547710280146124, "grad_norm": 0.36557450890541077, "learning_rate": 0.0001490765411559807, "loss": 1.3946, "step": 19606 }, { "epoch": 0.2547840225585282, "grad_norm": 0.38791608810424805, "learning_rate": 0.00014907394169406935, "loss": 1.2061, "step": 19607 }, { "epoch": 0.2547970171024441, "grad_norm": 0.41294312477111816, "learning_rate": 0.00014907134223215795, "loss": 1.3249, "step": 19608 }, { "epoch": 0.25481001164635997, "grad_norm": 0.38132742047309875, "learning_rate": 0.00014906874277024657, "loss": 1.369, "step": 19609 }, { "epoch": 0.25482300619027587, "grad_norm": 0.4066297113895416, "learning_rate": 0.00014906614330833517, "loss": 1.4157, "step": 19610 }, { "epoch": 0.2548360007341917, "grad_norm": 0.4042745530605316, "learning_rate": 0.0001490635438464238, "loss": 1.4776, "step": 19611 }, { "epoch": 0.2548489952781076, "grad_norm": 0.4441920518875122, "learning_rate": 0.00014906094438451242, "loss": 1.4029, "step": 19612 }, { "epoch": 0.25486198982202346, "grad_norm": 0.4884093105792999, "learning_rate": 0.00014905834492260101, "loss": 1.4921, "step": 19613 }, { "epoch": 0.25487498436593936, "grad_norm": 0.40551596879959106, "learning_rate": 0.00014905574546068967, "loss": 1.4383, "step": 19614 }, { "epoch": 0.2548879789098552, "grad_norm": 0.40478554368019104, "learning_rate": 0.00014905314599877826, "loss": 1.4793, "step": 19615 }, { "epoch": 0.2549009734537711, "grad_norm": 0.4637174606323242, "learning_rate": 0.0001490505465368669, "loss": 1.5266, "step": 19616 }, { "epoch": 0.25491396799768695, "grad_norm": 0.42517709732055664, "learning_rate": 0.00014904794707495549, "loss": 1.4772, "step": 19617 }, { "epoch": 0.25492696254160285, "grad_norm": 0.35457098484039307, "learning_rate": 0.0001490453476130441, "loss": 1.4463, "step": 19618 }, { "epoch": 0.2549399570855187, "grad_norm": 0.39123281836509705, "learning_rate": 0.00014904274815113273, "loss": 1.4359, "step": 19619 }, { "epoch": 0.2549529516294346, "grad_norm": 0.5061010718345642, "learning_rate": 0.00014904014868922133, "loss": 1.642, "step": 19620 }, { "epoch": 0.25496594617335044, "grad_norm": 0.4411238431930542, "learning_rate": 0.00014903754922730996, "loss": 1.3891, "step": 19621 }, { "epoch": 0.25497894071726634, "grad_norm": 0.39414554834365845, "learning_rate": 0.00014903494976539858, "loss": 1.4517, "step": 19622 }, { "epoch": 0.2549919352611822, "grad_norm": 0.43490347266197205, "learning_rate": 0.00014903235030348718, "loss": 1.4189, "step": 19623 }, { "epoch": 0.2550049298050981, "grad_norm": 0.4816271662712097, "learning_rate": 0.0001490297508415758, "loss": 1.2582, "step": 19624 }, { "epoch": 0.25501792434901394, "grad_norm": 0.46682465076446533, "learning_rate": 0.0001490271513796644, "loss": 1.5558, "step": 19625 }, { "epoch": 0.25503091889292984, "grad_norm": 0.35740694403648376, "learning_rate": 0.00014902455191775305, "loss": 1.3755, "step": 19626 }, { "epoch": 0.2550439134368457, "grad_norm": 0.2937428951263428, "learning_rate": 0.00014902195245584165, "loss": 1.4038, "step": 19627 }, { "epoch": 0.2550569079807616, "grad_norm": 0.41690313816070557, "learning_rate": 0.00014901935299393027, "loss": 1.4042, "step": 19628 }, { "epoch": 0.2550699025246774, "grad_norm": 0.4506669044494629, "learning_rate": 0.00014901675353201887, "loss": 1.3506, "step": 19629 }, { "epoch": 0.25508289706859333, "grad_norm": 0.42641496658325195, "learning_rate": 0.0001490141540701075, "loss": 1.4267, "step": 19630 }, { "epoch": 0.2550958916125092, "grad_norm": 0.46458303928375244, "learning_rate": 0.00014901155460819612, "loss": 1.4515, "step": 19631 }, { "epoch": 0.2551088861564251, "grad_norm": 0.5402584671974182, "learning_rate": 0.00014900895514628472, "loss": 1.5567, "step": 19632 }, { "epoch": 0.2551218807003409, "grad_norm": 0.48536399006843567, "learning_rate": 0.00014900635568437334, "loss": 1.5556, "step": 19633 }, { "epoch": 0.2551348752442568, "grad_norm": 0.3085572421550751, "learning_rate": 0.00014900375622246197, "loss": 1.5257, "step": 19634 }, { "epoch": 0.25514786978817267, "grad_norm": 0.3858374059200287, "learning_rate": 0.00014900115676055056, "loss": 1.5151, "step": 19635 }, { "epoch": 0.25516086433208857, "grad_norm": 0.32978740334510803, "learning_rate": 0.0001489985572986392, "loss": 1.3794, "step": 19636 }, { "epoch": 0.2551738588760044, "grad_norm": 0.5962116718292236, "learning_rate": 0.00014899595783672779, "loss": 1.5962, "step": 19637 }, { "epoch": 0.2551868534199203, "grad_norm": 0.48759955167770386, "learning_rate": 0.00014899335837481644, "loss": 1.4439, "step": 19638 }, { "epoch": 0.25519984796383616, "grad_norm": 0.45516636967658997, "learning_rate": 0.00014899075891290503, "loss": 1.4525, "step": 19639 }, { "epoch": 0.25521284250775206, "grad_norm": 0.488765150308609, "learning_rate": 0.00014898815945099366, "loss": 1.4983, "step": 19640 }, { "epoch": 0.25522583705166796, "grad_norm": 0.4281080961227417, "learning_rate": 0.00014898555998908226, "loss": 1.2567, "step": 19641 }, { "epoch": 0.2552388315955838, "grad_norm": 0.4397064745426178, "learning_rate": 0.00014898296052717088, "loss": 1.4347, "step": 19642 }, { "epoch": 0.2552518261394997, "grad_norm": 0.4339059591293335, "learning_rate": 0.0001489803610652595, "loss": 1.2534, "step": 19643 }, { "epoch": 0.25526482068341555, "grad_norm": 0.4328799843788147, "learning_rate": 0.0001489777616033481, "loss": 1.6832, "step": 19644 }, { "epoch": 0.25527781522733145, "grad_norm": 0.36828485131263733, "learning_rate": 0.00014897516214143673, "loss": 1.4683, "step": 19645 }, { "epoch": 0.2552908097712473, "grad_norm": 0.4733307957649231, "learning_rate": 0.00014897256267952535, "loss": 1.3166, "step": 19646 }, { "epoch": 0.2553038043151632, "grad_norm": 0.4817982614040375, "learning_rate": 0.00014896996321761395, "loss": 1.459, "step": 19647 }, { "epoch": 0.25531679885907904, "grad_norm": 0.39325281977653503, "learning_rate": 0.00014896736375570257, "loss": 1.3309, "step": 19648 }, { "epoch": 0.25532979340299494, "grad_norm": 0.37920570373535156, "learning_rate": 0.00014896476429379117, "loss": 1.3799, "step": 19649 }, { "epoch": 0.2553427879469108, "grad_norm": 0.4038579761981964, "learning_rate": 0.00014896216483187982, "loss": 1.5325, "step": 19650 }, { "epoch": 0.2553557824908267, "grad_norm": 0.5017217993736267, "learning_rate": 0.00014895956536996842, "loss": 1.4294, "step": 19651 }, { "epoch": 0.25536877703474253, "grad_norm": 0.40085774660110474, "learning_rate": 0.00014895696590805704, "loss": 1.36, "step": 19652 }, { "epoch": 0.25538177157865843, "grad_norm": 0.42601174116134644, "learning_rate": 0.00014895436644614567, "loss": 1.3577, "step": 19653 }, { "epoch": 0.2553947661225743, "grad_norm": 0.4202224016189575, "learning_rate": 0.00014895176698423427, "loss": 1.4851, "step": 19654 }, { "epoch": 0.2554077606664902, "grad_norm": 0.44097375869750977, "learning_rate": 0.0001489491675223229, "loss": 1.4616, "step": 19655 }, { "epoch": 0.255420755210406, "grad_norm": 0.3401409089565277, "learning_rate": 0.0001489465680604115, "loss": 1.5165, "step": 19656 }, { "epoch": 0.2554337497543219, "grad_norm": 0.563421368598938, "learning_rate": 0.00014894396859850014, "loss": 1.5301, "step": 19657 }, { "epoch": 0.25544674429823777, "grad_norm": 0.5105387568473816, "learning_rate": 0.00014894136913658874, "loss": 1.4498, "step": 19658 }, { "epoch": 0.25545973884215367, "grad_norm": 0.39737823605537415, "learning_rate": 0.00014893876967467733, "loss": 1.4409, "step": 19659 }, { "epoch": 0.2554727333860695, "grad_norm": 0.38277554512023926, "learning_rate": 0.00014893617021276596, "loss": 1.5231, "step": 19660 }, { "epoch": 0.2554857279299854, "grad_norm": 0.40716618299484253, "learning_rate": 0.00014893357075085458, "loss": 1.3287, "step": 19661 }, { "epoch": 0.25549872247390126, "grad_norm": 0.3670077919960022, "learning_rate": 0.0001489309712889432, "loss": 1.3276, "step": 19662 }, { "epoch": 0.25551171701781716, "grad_norm": 0.43973225355148315, "learning_rate": 0.0001489283718270318, "loss": 1.3053, "step": 19663 }, { "epoch": 0.255524711561733, "grad_norm": 0.38072311878204346, "learning_rate": 0.00014892577236512043, "loss": 1.5744, "step": 19664 }, { "epoch": 0.2555377061056489, "grad_norm": 0.4879327118396759, "learning_rate": 0.00014892317290320905, "loss": 1.4884, "step": 19665 }, { "epoch": 0.25555070064956475, "grad_norm": 0.33255159854888916, "learning_rate": 0.00014892057344129765, "loss": 1.303, "step": 19666 }, { "epoch": 0.25556369519348066, "grad_norm": 0.3579411208629608, "learning_rate": 0.00014891797397938628, "loss": 1.2984, "step": 19667 }, { "epoch": 0.2555766897373965, "grad_norm": 0.4280683398246765, "learning_rate": 0.00014891537451747487, "loss": 1.486, "step": 19668 }, { "epoch": 0.2555896842813124, "grad_norm": 0.33373525738716125, "learning_rate": 0.00014891277505556352, "loss": 1.5716, "step": 19669 }, { "epoch": 0.25560267882522825, "grad_norm": 0.3290935456752777, "learning_rate": 0.00014891017559365212, "loss": 1.2735, "step": 19670 }, { "epoch": 0.25561567336914415, "grad_norm": 0.39359936118125916, "learning_rate": 0.00014890757613174075, "loss": 1.3702, "step": 19671 }, { "epoch": 0.25562866791306, "grad_norm": 0.3568744659423828, "learning_rate": 0.00014890497666982934, "loss": 1.2655, "step": 19672 }, { "epoch": 0.2556416624569759, "grad_norm": 0.42902329564094543, "learning_rate": 0.00014890237720791797, "loss": 1.4409, "step": 19673 }, { "epoch": 0.25565465700089174, "grad_norm": 0.47060903906822205, "learning_rate": 0.0001488997777460066, "loss": 1.4231, "step": 19674 }, { "epoch": 0.25566765154480764, "grad_norm": 0.383518785238266, "learning_rate": 0.0001488971782840952, "loss": 1.4637, "step": 19675 }, { "epoch": 0.2556806460887235, "grad_norm": 0.35775506496429443, "learning_rate": 0.00014889457882218382, "loss": 1.4128, "step": 19676 }, { "epoch": 0.2556936406326394, "grad_norm": 0.34001848101615906, "learning_rate": 0.00014889197936027244, "loss": 1.2971, "step": 19677 }, { "epoch": 0.25570663517655523, "grad_norm": 0.37495020031929016, "learning_rate": 0.00014888937989836104, "loss": 1.3279, "step": 19678 }, { "epoch": 0.25571962972047113, "grad_norm": 0.43208250403404236, "learning_rate": 0.00014888678043644966, "loss": 1.5267, "step": 19679 }, { "epoch": 0.255732624264387, "grad_norm": 0.4606996476650238, "learning_rate": 0.00014888418097453826, "loss": 1.4772, "step": 19680 }, { "epoch": 0.2557456188083029, "grad_norm": 0.32310324907302856, "learning_rate": 0.0001488815815126269, "loss": 1.3419, "step": 19681 }, { "epoch": 0.2557586133522187, "grad_norm": 0.46317756175994873, "learning_rate": 0.0001488789820507155, "loss": 1.3172, "step": 19682 }, { "epoch": 0.2557716078961346, "grad_norm": 0.4242875277996063, "learning_rate": 0.00014887638258880413, "loss": 1.3944, "step": 19683 }, { "epoch": 0.25578460244005047, "grad_norm": 0.4088890254497528, "learning_rate": 0.00014887378312689273, "loss": 1.462, "step": 19684 }, { "epoch": 0.25579759698396637, "grad_norm": 0.474467009305954, "learning_rate": 0.00014887118366498135, "loss": 1.5759, "step": 19685 }, { "epoch": 0.2558105915278822, "grad_norm": 0.4188452363014221, "learning_rate": 0.00014886858420306998, "loss": 1.39, "step": 19686 }, { "epoch": 0.2558235860717981, "grad_norm": 0.3549194633960724, "learning_rate": 0.00014886598474115858, "loss": 1.4429, "step": 19687 }, { "epoch": 0.25583658061571396, "grad_norm": 0.3320971727371216, "learning_rate": 0.0001488633852792472, "loss": 1.2474, "step": 19688 }, { "epoch": 0.25584957515962986, "grad_norm": 0.38851800560951233, "learning_rate": 0.00014886078581733582, "loss": 1.2246, "step": 19689 }, { "epoch": 0.2558625697035457, "grad_norm": 0.3691411018371582, "learning_rate": 0.00014885818635542442, "loss": 1.3217, "step": 19690 }, { "epoch": 0.2558755642474616, "grad_norm": 0.420337051153183, "learning_rate": 0.00014885558689351305, "loss": 1.4315, "step": 19691 }, { "epoch": 0.25588855879137745, "grad_norm": 0.4311297833919525, "learning_rate": 0.00014885298743160167, "loss": 1.4508, "step": 19692 }, { "epoch": 0.25590155333529335, "grad_norm": 0.3846625089645386, "learning_rate": 0.0001488503879696903, "loss": 1.275, "step": 19693 }, { "epoch": 0.2559145478792092, "grad_norm": 0.2988477647304535, "learning_rate": 0.0001488477885077789, "loss": 1.4861, "step": 19694 }, { "epoch": 0.2559275424231251, "grad_norm": 0.45092031359672546, "learning_rate": 0.00014884518904586752, "loss": 1.4046, "step": 19695 }, { "epoch": 0.25594053696704094, "grad_norm": 0.2920062839984894, "learning_rate": 0.00014884258958395614, "loss": 1.444, "step": 19696 }, { "epoch": 0.25595353151095684, "grad_norm": 0.40450745820999146, "learning_rate": 0.00014883999012204474, "loss": 1.3732, "step": 19697 }, { "epoch": 0.2559665260548727, "grad_norm": 0.353371262550354, "learning_rate": 0.00014883739066013336, "loss": 1.4188, "step": 19698 }, { "epoch": 0.2559795205987886, "grad_norm": 0.3861452639102936, "learning_rate": 0.00014883479119822196, "loss": 1.3263, "step": 19699 }, { "epoch": 0.25599251514270444, "grad_norm": 0.4692375063896179, "learning_rate": 0.0001488321917363106, "loss": 1.2456, "step": 19700 }, { "epoch": 0.25600550968662034, "grad_norm": 0.4575304687023163, "learning_rate": 0.0001488295922743992, "loss": 1.3955, "step": 19701 }, { "epoch": 0.2560185042305362, "grad_norm": 0.412777304649353, "learning_rate": 0.0001488269928124878, "loss": 1.4081, "step": 19702 }, { "epoch": 0.2560314987744521, "grad_norm": 0.45569878816604614, "learning_rate": 0.00014882439335057643, "loss": 1.5099, "step": 19703 }, { "epoch": 0.2560444933183679, "grad_norm": 0.4113287031650543, "learning_rate": 0.00014882179388866506, "loss": 1.4169, "step": 19704 }, { "epoch": 0.25605748786228383, "grad_norm": 0.4119247496128082, "learning_rate": 0.00014881919442675368, "loss": 1.3847, "step": 19705 }, { "epoch": 0.2560704824061997, "grad_norm": 0.4179016649723053, "learning_rate": 0.00014881659496484228, "loss": 1.4449, "step": 19706 }, { "epoch": 0.2560834769501156, "grad_norm": 0.5122266411781311, "learning_rate": 0.0001488139955029309, "loss": 1.453, "step": 19707 }, { "epoch": 0.2560964714940314, "grad_norm": 0.3612705171108246, "learning_rate": 0.00014881139604101953, "loss": 1.4874, "step": 19708 }, { "epoch": 0.2561094660379473, "grad_norm": 0.32217028737068176, "learning_rate": 0.00014880879657910812, "loss": 1.3092, "step": 19709 }, { "epoch": 0.25612246058186316, "grad_norm": 0.5130923390388489, "learning_rate": 0.00014880619711719675, "loss": 1.564, "step": 19710 }, { "epoch": 0.25613545512577907, "grad_norm": 0.4182952344417572, "learning_rate": 0.00014880359765528535, "loss": 1.2655, "step": 19711 }, { "epoch": 0.2561484496696949, "grad_norm": 0.35896793007850647, "learning_rate": 0.000148800998193374, "loss": 1.5231, "step": 19712 }, { "epoch": 0.2561614442136108, "grad_norm": 0.4089457094669342, "learning_rate": 0.0001487983987314626, "loss": 1.6013, "step": 19713 }, { "epoch": 0.25617443875752666, "grad_norm": 0.5242648124694824, "learning_rate": 0.0001487957992695512, "loss": 1.4914, "step": 19714 }, { "epoch": 0.25618743330144256, "grad_norm": 0.42909014225006104, "learning_rate": 0.00014879319980763982, "loss": 1.5397, "step": 19715 }, { "epoch": 0.2562004278453584, "grad_norm": 0.4052775800228119, "learning_rate": 0.00014879060034572844, "loss": 1.7156, "step": 19716 }, { "epoch": 0.2562134223892743, "grad_norm": 0.39925897121429443, "learning_rate": 0.00014878800088381707, "loss": 1.5665, "step": 19717 }, { "epoch": 0.2562264169331902, "grad_norm": 0.4118512272834778, "learning_rate": 0.00014878540142190566, "loss": 1.3955, "step": 19718 }, { "epoch": 0.25623941147710605, "grad_norm": 0.40068572759628296, "learning_rate": 0.0001487828019599943, "loss": 1.4006, "step": 19719 }, { "epoch": 0.25625240602102195, "grad_norm": 0.3560904562473297, "learning_rate": 0.0001487802024980829, "loss": 1.2326, "step": 19720 }, { "epoch": 0.2562654005649378, "grad_norm": 0.35685473680496216, "learning_rate": 0.0001487776030361715, "loss": 1.3766, "step": 19721 }, { "epoch": 0.2562783951088537, "grad_norm": 0.3998667895793915, "learning_rate": 0.00014877500357426013, "loss": 1.511, "step": 19722 }, { "epoch": 0.25629138965276954, "grad_norm": 0.33287513256073, "learning_rate": 0.00014877240411234873, "loss": 1.3826, "step": 19723 }, { "epoch": 0.25630438419668544, "grad_norm": 0.5380807518959045, "learning_rate": 0.00014876980465043738, "loss": 1.283, "step": 19724 }, { "epoch": 0.2563173787406013, "grad_norm": 0.3904716372489929, "learning_rate": 0.00014876720518852598, "loss": 1.2334, "step": 19725 }, { "epoch": 0.2563303732845172, "grad_norm": 0.36384886503219604, "learning_rate": 0.0001487646057266146, "loss": 1.4242, "step": 19726 }, { "epoch": 0.25634336782843303, "grad_norm": 0.40154126286506653, "learning_rate": 0.00014876200626470323, "loss": 1.4757, "step": 19727 }, { "epoch": 0.25635636237234893, "grad_norm": 0.3741087317466736, "learning_rate": 0.00014875940680279183, "loss": 1.4073, "step": 19728 }, { "epoch": 0.2563693569162648, "grad_norm": 0.3878123164176941, "learning_rate": 0.00014875680734088045, "loss": 1.4382, "step": 19729 }, { "epoch": 0.2563823514601807, "grad_norm": 0.5249066948890686, "learning_rate": 0.00014875420787896905, "loss": 1.4512, "step": 19730 }, { "epoch": 0.2563953460040965, "grad_norm": 0.26608163118362427, "learning_rate": 0.00014875160841705767, "loss": 1.3231, "step": 19731 }, { "epoch": 0.2564083405480124, "grad_norm": 0.2851769030094147, "learning_rate": 0.0001487490089551463, "loss": 1.3595, "step": 19732 }, { "epoch": 0.25642133509192827, "grad_norm": 0.4020291268825531, "learning_rate": 0.0001487464094932349, "loss": 1.4306, "step": 19733 }, { "epoch": 0.25643432963584417, "grad_norm": 0.31380996108055115, "learning_rate": 0.00014874381003132352, "loss": 1.3461, "step": 19734 }, { "epoch": 0.25644732417976, "grad_norm": 0.361942321062088, "learning_rate": 0.00014874121056941214, "loss": 1.5764, "step": 19735 }, { "epoch": 0.2564603187236759, "grad_norm": 0.3764529228210449, "learning_rate": 0.00014873861110750077, "loss": 1.4292, "step": 19736 }, { "epoch": 0.25647331326759176, "grad_norm": 0.36036935448646545, "learning_rate": 0.00014873601164558937, "loss": 1.4322, "step": 19737 }, { "epoch": 0.25648630781150766, "grad_norm": 0.4236028790473938, "learning_rate": 0.000148733412183678, "loss": 1.3712, "step": 19738 }, { "epoch": 0.2564993023554235, "grad_norm": 0.3441009223461151, "learning_rate": 0.00014873081272176662, "loss": 1.2628, "step": 19739 }, { "epoch": 0.2565122968993394, "grad_norm": 0.3482554852962494, "learning_rate": 0.0001487282132598552, "loss": 1.4261, "step": 19740 }, { "epoch": 0.25652529144325525, "grad_norm": 0.32627928256988525, "learning_rate": 0.00014872561379794384, "loss": 1.546, "step": 19741 }, { "epoch": 0.25653828598717116, "grad_norm": 0.45293572545051575, "learning_rate": 0.00014872301433603243, "loss": 1.2624, "step": 19742 }, { "epoch": 0.256551280531087, "grad_norm": 0.3377649784088135, "learning_rate": 0.00014872041487412106, "loss": 1.3445, "step": 19743 }, { "epoch": 0.2565642750750029, "grad_norm": 0.3097911477088928, "learning_rate": 0.00014871781541220968, "loss": 1.399, "step": 19744 }, { "epoch": 0.25657726961891875, "grad_norm": 0.5099747180938721, "learning_rate": 0.00014871521595029828, "loss": 1.4224, "step": 19745 }, { "epoch": 0.25659026416283465, "grad_norm": 0.42007213830947876, "learning_rate": 0.0001487126164883869, "loss": 1.4047, "step": 19746 }, { "epoch": 0.2566032587067505, "grad_norm": 0.4278099536895752, "learning_rate": 0.00014871001702647553, "loss": 1.3473, "step": 19747 }, { "epoch": 0.2566162532506664, "grad_norm": 0.3885868191719055, "learning_rate": 0.00014870741756456415, "loss": 1.3933, "step": 19748 }, { "epoch": 0.25662924779458224, "grad_norm": 0.3276718258857727, "learning_rate": 0.00014870481810265275, "loss": 1.4491, "step": 19749 }, { "epoch": 0.25664224233849814, "grad_norm": 0.5262018442153931, "learning_rate": 0.00014870221864074138, "loss": 1.4612, "step": 19750 }, { "epoch": 0.256655236882414, "grad_norm": 0.413718044757843, "learning_rate": 0.00014869961917883, "loss": 1.189, "step": 19751 }, { "epoch": 0.2566682314263299, "grad_norm": 0.405471533536911, "learning_rate": 0.0001486970197169186, "loss": 1.5588, "step": 19752 }, { "epoch": 0.25668122597024573, "grad_norm": 0.4070466458797455, "learning_rate": 0.00014869442025500722, "loss": 1.5992, "step": 19753 }, { "epoch": 0.25669422051416163, "grad_norm": 0.4018714427947998, "learning_rate": 0.00014869182079309582, "loss": 1.4072, "step": 19754 }, { "epoch": 0.2567072150580775, "grad_norm": 0.45151740312576294, "learning_rate": 0.00014868922133118447, "loss": 1.3729, "step": 19755 }, { "epoch": 0.2567202096019934, "grad_norm": 0.4852622449398041, "learning_rate": 0.00014868662186927307, "loss": 1.4663, "step": 19756 }, { "epoch": 0.2567332041459092, "grad_norm": 0.3701883554458618, "learning_rate": 0.00014868402240736167, "loss": 1.4152, "step": 19757 }, { "epoch": 0.2567461986898251, "grad_norm": 0.31140780448913574, "learning_rate": 0.0001486814229454503, "loss": 1.3034, "step": 19758 }, { "epoch": 0.25675919323374097, "grad_norm": 0.3135972023010254, "learning_rate": 0.00014867882348353892, "loss": 1.3016, "step": 19759 }, { "epoch": 0.25677218777765687, "grad_norm": 0.29702073335647583, "learning_rate": 0.00014867622402162754, "loss": 1.3853, "step": 19760 }, { "epoch": 0.2567851823215727, "grad_norm": 0.31660234928131104, "learning_rate": 0.00014867362455971614, "loss": 1.4071, "step": 19761 }, { "epoch": 0.2567981768654886, "grad_norm": 0.4049093723297119, "learning_rate": 0.00014867102509780476, "loss": 1.1623, "step": 19762 }, { "epoch": 0.25681117140940446, "grad_norm": 0.4344634711742401, "learning_rate": 0.00014866842563589339, "loss": 1.5088, "step": 19763 }, { "epoch": 0.25682416595332036, "grad_norm": 0.36794304847717285, "learning_rate": 0.00014866582617398198, "loss": 1.2857, "step": 19764 }, { "epoch": 0.2568371604972362, "grad_norm": 0.39373883605003357, "learning_rate": 0.0001486632267120706, "loss": 1.36, "step": 19765 }, { "epoch": 0.2568501550411521, "grad_norm": 0.34790873527526855, "learning_rate": 0.00014866062725015923, "loss": 1.1421, "step": 19766 }, { "epoch": 0.25686314958506795, "grad_norm": 0.4097789227962494, "learning_rate": 0.00014865802778824786, "loss": 1.4323, "step": 19767 }, { "epoch": 0.25687614412898385, "grad_norm": 0.2604452073574066, "learning_rate": 0.00014865542832633645, "loss": 1.448, "step": 19768 }, { "epoch": 0.2568891386728997, "grad_norm": 0.346098393201828, "learning_rate": 0.00014865282886442505, "loss": 1.3863, "step": 19769 }, { "epoch": 0.2569021332168156, "grad_norm": 0.40293800830841064, "learning_rate": 0.0001486502294025137, "loss": 1.4521, "step": 19770 }, { "epoch": 0.25691512776073144, "grad_norm": 0.4309069812297821, "learning_rate": 0.0001486476299406023, "loss": 1.3924, "step": 19771 }, { "epoch": 0.25692812230464734, "grad_norm": 0.33858928084373474, "learning_rate": 0.00014864503047869093, "loss": 1.4213, "step": 19772 }, { "epoch": 0.2569411168485632, "grad_norm": 0.407690167427063, "learning_rate": 0.00014864243101677952, "loss": 1.4275, "step": 19773 }, { "epoch": 0.2569541113924791, "grad_norm": 0.3626769483089447, "learning_rate": 0.00014863983155486815, "loss": 1.3991, "step": 19774 }, { "epoch": 0.25696710593639494, "grad_norm": 0.4655991196632385, "learning_rate": 0.00014863723209295677, "loss": 1.4511, "step": 19775 }, { "epoch": 0.25698010048031084, "grad_norm": 0.39907339215278625, "learning_rate": 0.00014863463263104537, "loss": 1.6319, "step": 19776 }, { "epoch": 0.2569930950242267, "grad_norm": 0.40345531702041626, "learning_rate": 0.000148632033169134, "loss": 1.5083, "step": 19777 }, { "epoch": 0.2570060895681426, "grad_norm": 0.4085056781768799, "learning_rate": 0.00014862943370722262, "loss": 1.6034, "step": 19778 }, { "epoch": 0.2570190841120584, "grad_norm": 0.3583461046218872, "learning_rate": 0.00014862683424531124, "loss": 1.2385, "step": 19779 }, { "epoch": 0.2570320786559743, "grad_norm": 0.39973992109298706, "learning_rate": 0.00014862423478339984, "loss": 1.318, "step": 19780 }, { "epoch": 0.2570450731998902, "grad_norm": 0.41369712352752686, "learning_rate": 0.00014862163532148844, "loss": 1.5932, "step": 19781 }, { "epoch": 0.2570580677438061, "grad_norm": 0.4177628755569458, "learning_rate": 0.0001486190358595771, "loss": 1.5605, "step": 19782 }, { "epoch": 0.2570710622877219, "grad_norm": 0.40126335620880127, "learning_rate": 0.00014861643639766569, "loss": 1.5701, "step": 19783 }, { "epoch": 0.2570840568316378, "grad_norm": 0.4328796863555908, "learning_rate": 0.0001486138369357543, "loss": 1.4917, "step": 19784 }, { "epoch": 0.25709705137555366, "grad_norm": 0.40425291657447815, "learning_rate": 0.0001486112374738429, "loss": 1.3878, "step": 19785 }, { "epoch": 0.25711004591946957, "grad_norm": 0.3638814687728882, "learning_rate": 0.00014860863801193153, "loss": 1.2323, "step": 19786 }, { "epoch": 0.2571230404633854, "grad_norm": 0.42932456731796265, "learning_rate": 0.00014860603855002016, "loss": 1.3454, "step": 19787 }, { "epoch": 0.2571360350073013, "grad_norm": 0.36770328879356384, "learning_rate": 0.00014860343908810875, "loss": 1.252, "step": 19788 }, { "epoch": 0.25714902955121716, "grad_norm": 0.2767905592918396, "learning_rate": 0.00014860083962619738, "loss": 1.4381, "step": 19789 }, { "epoch": 0.25716202409513306, "grad_norm": 0.4000886082649231, "learning_rate": 0.000148598240164286, "loss": 1.592, "step": 19790 }, { "epoch": 0.2571750186390489, "grad_norm": 0.47979533672332764, "learning_rate": 0.00014859564070237463, "loss": 1.474, "step": 19791 }, { "epoch": 0.2571880131829648, "grad_norm": 0.3197050988674164, "learning_rate": 0.00014859304124046323, "loss": 1.326, "step": 19792 }, { "epoch": 0.25720100772688065, "grad_norm": 0.3906276226043701, "learning_rate": 0.00014859044177855185, "loss": 1.3318, "step": 19793 }, { "epoch": 0.25721400227079655, "grad_norm": 0.4051985740661621, "learning_rate": 0.00014858784231664047, "loss": 1.4334, "step": 19794 }, { "epoch": 0.25722699681471245, "grad_norm": 0.4129682779312134, "learning_rate": 0.00014858524285472907, "loss": 1.3189, "step": 19795 }, { "epoch": 0.2572399913586283, "grad_norm": 0.3920605480670929, "learning_rate": 0.0001485826433928177, "loss": 1.2422, "step": 19796 }, { "epoch": 0.2572529859025442, "grad_norm": 0.26474153995513916, "learning_rate": 0.0001485800439309063, "loss": 1.3911, "step": 19797 }, { "epoch": 0.25726598044646004, "grad_norm": 0.46777454018592834, "learning_rate": 0.00014857744446899492, "loss": 1.381, "step": 19798 }, { "epoch": 0.25727897499037594, "grad_norm": 0.36751025915145874, "learning_rate": 0.00014857484500708354, "loss": 1.4543, "step": 19799 }, { "epoch": 0.2572919695342918, "grad_norm": 0.3731157183647156, "learning_rate": 0.00014857224554517214, "loss": 1.4548, "step": 19800 }, { "epoch": 0.2573049640782077, "grad_norm": 0.3724712133407593, "learning_rate": 0.0001485696460832608, "loss": 1.2211, "step": 19801 }, { "epoch": 0.25731795862212353, "grad_norm": 0.43481382727622986, "learning_rate": 0.0001485670466213494, "loss": 1.4179, "step": 19802 }, { "epoch": 0.25733095316603943, "grad_norm": 0.4335424602031708, "learning_rate": 0.000148564447159438, "loss": 1.3901, "step": 19803 }, { "epoch": 0.2573439477099553, "grad_norm": 0.4344167709350586, "learning_rate": 0.0001485618476975266, "loss": 1.5662, "step": 19804 }, { "epoch": 0.2573569422538712, "grad_norm": 0.36935222148895264, "learning_rate": 0.00014855924823561524, "loss": 1.3493, "step": 19805 }, { "epoch": 0.257369936797787, "grad_norm": 0.2943502962589264, "learning_rate": 0.00014855664877370386, "loss": 1.2453, "step": 19806 }, { "epoch": 0.2573829313417029, "grad_norm": 0.4090955853462219, "learning_rate": 0.00014855404931179246, "loss": 1.2359, "step": 19807 }, { "epoch": 0.25739592588561877, "grad_norm": 0.36399662494659424, "learning_rate": 0.00014855144984988108, "loss": 1.4555, "step": 19808 }, { "epoch": 0.25740892042953467, "grad_norm": 0.4545719027519226, "learning_rate": 0.0001485488503879697, "loss": 1.4715, "step": 19809 }, { "epoch": 0.2574219149734505, "grad_norm": 0.37993332743644714, "learning_rate": 0.0001485462509260583, "loss": 1.5364, "step": 19810 }, { "epoch": 0.2574349095173664, "grad_norm": 0.37425246834754944, "learning_rate": 0.00014854365146414693, "loss": 1.2401, "step": 19811 }, { "epoch": 0.25744790406128226, "grad_norm": 0.4478759467601776, "learning_rate": 0.00014854105200223553, "loss": 1.5776, "step": 19812 }, { "epoch": 0.25746089860519816, "grad_norm": 0.37398451566696167, "learning_rate": 0.00014853845254032418, "loss": 1.3112, "step": 19813 }, { "epoch": 0.257473893149114, "grad_norm": 0.4248572587966919, "learning_rate": 0.00014853585307841277, "loss": 1.3554, "step": 19814 }, { "epoch": 0.2574868876930299, "grad_norm": 0.41222965717315674, "learning_rate": 0.0001485332536165014, "loss": 1.3743, "step": 19815 }, { "epoch": 0.25749988223694575, "grad_norm": 0.29611456394195557, "learning_rate": 0.00014853065415459, "loss": 1.4479, "step": 19816 }, { "epoch": 0.25751287678086165, "grad_norm": 0.38925328850746155, "learning_rate": 0.00014852805469267862, "loss": 1.1967, "step": 19817 }, { "epoch": 0.2575258713247775, "grad_norm": 0.3814679980278015, "learning_rate": 0.00014852545523076724, "loss": 1.652, "step": 19818 }, { "epoch": 0.2575388658686934, "grad_norm": 0.45444709062576294, "learning_rate": 0.00014852285576885584, "loss": 1.4584, "step": 19819 }, { "epoch": 0.25755186041260925, "grad_norm": 0.44860517978668213, "learning_rate": 0.00014852025630694447, "loss": 1.5225, "step": 19820 }, { "epoch": 0.25756485495652515, "grad_norm": 0.3990727961063385, "learning_rate": 0.0001485176568450331, "loss": 1.3546, "step": 19821 }, { "epoch": 0.257577849500441, "grad_norm": 0.4320986866950989, "learning_rate": 0.00014851505738312172, "loss": 1.4597, "step": 19822 }, { "epoch": 0.2575908440443569, "grad_norm": 0.46973755955696106, "learning_rate": 0.0001485124579212103, "loss": 1.3869, "step": 19823 }, { "epoch": 0.25760383858827274, "grad_norm": 0.34555894136428833, "learning_rate": 0.0001485098584592989, "loss": 1.2505, "step": 19824 }, { "epoch": 0.25761683313218864, "grad_norm": 0.43511444330215454, "learning_rate": 0.00014850725899738756, "loss": 1.4422, "step": 19825 }, { "epoch": 0.2576298276761045, "grad_norm": 0.35832250118255615, "learning_rate": 0.00014850465953547616, "loss": 1.2538, "step": 19826 }, { "epoch": 0.2576428222200204, "grad_norm": 0.3831486403942108, "learning_rate": 0.00014850206007356478, "loss": 1.3276, "step": 19827 }, { "epoch": 0.25765581676393623, "grad_norm": 0.38330164551734924, "learning_rate": 0.00014849946061165338, "loss": 1.3, "step": 19828 }, { "epoch": 0.25766881130785213, "grad_norm": 0.36269333958625793, "learning_rate": 0.000148496861149742, "loss": 1.5511, "step": 19829 }, { "epoch": 0.257681805851768, "grad_norm": 0.47035592794418335, "learning_rate": 0.00014849426168783063, "loss": 1.6231, "step": 19830 }, { "epoch": 0.2576948003956839, "grad_norm": 0.32775580883026123, "learning_rate": 0.00014849166222591923, "loss": 1.4256, "step": 19831 }, { "epoch": 0.2577077949395997, "grad_norm": 0.3980858325958252, "learning_rate": 0.00014848906276400785, "loss": 1.3851, "step": 19832 }, { "epoch": 0.2577207894835156, "grad_norm": 0.31603139638900757, "learning_rate": 0.00014848646330209648, "loss": 1.3909, "step": 19833 }, { "epoch": 0.25773378402743147, "grad_norm": 0.5892931222915649, "learning_rate": 0.0001484838638401851, "loss": 1.462, "step": 19834 }, { "epoch": 0.25774677857134737, "grad_norm": 0.4033832848072052, "learning_rate": 0.0001484812643782737, "loss": 1.603, "step": 19835 }, { "epoch": 0.2577597731152632, "grad_norm": 0.4719257950782776, "learning_rate": 0.00014847866491636232, "loss": 1.3801, "step": 19836 }, { "epoch": 0.2577727676591791, "grad_norm": 0.2986607253551483, "learning_rate": 0.00014847606545445095, "loss": 1.3932, "step": 19837 }, { "epoch": 0.25778576220309496, "grad_norm": 0.38879287242889404, "learning_rate": 0.00014847346599253954, "loss": 1.4757, "step": 19838 }, { "epoch": 0.25779875674701086, "grad_norm": 0.36992597579956055, "learning_rate": 0.00014847086653062817, "loss": 1.6562, "step": 19839 }, { "epoch": 0.2578117512909267, "grad_norm": 0.31835880875587463, "learning_rate": 0.0001484682670687168, "loss": 1.4312, "step": 19840 }, { "epoch": 0.2578247458348426, "grad_norm": 0.3240465521812439, "learning_rate": 0.0001484656676068054, "loss": 1.3491, "step": 19841 }, { "epoch": 0.25783774037875845, "grad_norm": 0.5762081146240234, "learning_rate": 0.00014846306814489402, "loss": 1.2901, "step": 19842 }, { "epoch": 0.25785073492267435, "grad_norm": 0.4779595136642456, "learning_rate": 0.0001484604686829826, "loss": 1.3934, "step": 19843 }, { "epoch": 0.2578637294665902, "grad_norm": 0.3311115503311157, "learning_rate": 0.00014845786922107126, "loss": 1.4032, "step": 19844 }, { "epoch": 0.2578767240105061, "grad_norm": 0.3485088646411896, "learning_rate": 0.00014845526975915986, "loss": 1.6217, "step": 19845 }, { "epoch": 0.25788971855442194, "grad_norm": 0.43883562088012695, "learning_rate": 0.0001484526702972485, "loss": 1.5096, "step": 19846 }, { "epoch": 0.25790271309833784, "grad_norm": 0.41075778007507324, "learning_rate": 0.00014845007083533708, "loss": 1.4138, "step": 19847 }, { "epoch": 0.2579157076422537, "grad_norm": 0.39212271571159363, "learning_rate": 0.0001484474713734257, "loss": 1.4253, "step": 19848 }, { "epoch": 0.2579287021861696, "grad_norm": 0.45918703079223633, "learning_rate": 0.00014844487191151433, "loss": 1.4387, "step": 19849 }, { "epoch": 0.25794169673008543, "grad_norm": 0.36436140537261963, "learning_rate": 0.00014844227244960293, "loss": 1.4624, "step": 19850 }, { "epoch": 0.25795469127400134, "grad_norm": 0.36109301447868347, "learning_rate": 0.00014843967298769155, "loss": 1.2941, "step": 19851 }, { "epoch": 0.2579676858179172, "grad_norm": 0.26176226139068604, "learning_rate": 0.00014843707352578018, "loss": 1.3627, "step": 19852 }, { "epoch": 0.2579806803618331, "grad_norm": 0.33842501044273376, "learning_rate": 0.00014843447406386878, "loss": 1.192, "step": 19853 }, { "epoch": 0.2579936749057489, "grad_norm": 0.4383045732975006, "learning_rate": 0.0001484318746019574, "loss": 1.4028, "step": 19854 }, { "epoch": 0.2580066694496648, "grad_norm": 0.36733677983283997, "learning_rate": 0.000148429275140046, "loss": 1.4541, "step": 19855 }, { "epoch": 0.2580196639935807, "grad_norm": 0.2952321767807007, "learning_rate": 0.00014842667567813465, "loss": 1.35, "step": 19856 }, { "epoch": 0.2580326585374966, "grad_norm": 0.4473154842853546, "learning_rate": 0.00014842407621622325, "loss": 1.4635, "step": 19857 }, { "epoch": 0.2580456530814124, "grad_norm": 0.7557210326194763, "learning_rate": 0.00014842147675431187, "loss": 1.3226, "step": 19858 }, { "epoch": 0.2580586476253283, "grad_norm": 0.34873929619789124, "learning_rate": 0.00014841887729240047, "loss": 1.4924, "step": 19859 }, { "epoch": 0.25807164216924416, "grad_norm": 0.3867649734020233, "learning_rate": 0.0001484162778304891, "loss": 1.4229, "step": 19860 }, { "epoch": 0.25808463671316006, "grad_norm": 0.3477613031864166, "learning_rate": 0.00014841367836857772, "loss": 1.3157, "step": 19861 }, { "epoch": 0.2580976312570759, "grad_norm": 0.3585951328277588, "learning_rate": 0.00014841107890666632, "loss": 1.3972, "step": 19862 }, { "epoch": 0.2581106258009918, "grad_norm": 0.40776291489601135, "learning_rate": 0.00014840847944475494, "loss": 1.3709, "step": 19863 }, { "epoch": 0.25812362034490766, "grad_norm": 0.3480892777442932, "learning_rate": 0.00014840587998284356, "loss": 1.1016, "step": 19864 }, { "epoch": 0.25813661488882356, "grad_norm": 0.3900054097175598, "learning_rate": 0.00014840328052093216, "loss": 1.5474, "step": 19865 }, { "epoch": 0.2581496094327394, "grad_norm": 0.42429476976394653, "learning_rate": 0.0001484006810590208, "loss": 1.504, "step": 19866 }, { "epoch": 0.2581626039766553, "grad_norm": 0.30723729729652405, "learning_rate": 0.00014839808159710938, "loss": 1.3978, "step": 19867 }, { "epoch": 0.25817559852057115, "grad_norm": 0.4251982569694519, "learning_rate": 0.00014839548213519804, "loss": 1.5004, "step": 19868 }, { "epoch": 0.25818859306448705, "grad_norm": 0.3205110728740692, "learning_rate": 0.00014839288267328663, "loss": 1.6192, "step": 19869 }, { "epoch": 0.25820158760840295, "grad_norm": 0.34095466136932373, "learning_rate": 0.00014839028321137526, "loss": 1.387, "step": 19870 }, { "epoch": 0.2582145821523188, "grad_norm": 0.22871388494968414, "learning_rate": 0.00014838768374946385, "loss": 1.2076, "step": 19871 }, { "epoch": 0.2582275766962347, "grad_norm": 0.4135155975818634, "learning_rate": 0.00014838508428755248, "loss": 1.31, "step": 19872 }, { "epoch": 0.25824057124015054, "grad_norm": 0.40966808795928955, "learning_rate": 0.0001483824848256411, "loss": 1.3069, "step": 19873 }, { "epoch": 0.25825356578406644, "grad_norm": 0.4008606970310211, "learning_rate": 0.0001483798853637297, "loss": 1.5713, "step": 19874 }, { "epoch": 0.2582665603279823, "grad_norm": 0.3630692958831787, "learning_rate": 0.00014837728590181835, "loss": 1.4669, "step": 19875 }, { "epoch": 0.2582795548718982, "grad_norm": 0.4633021056652069, "learning_rate": 0.00014837468643990695, "loss": 1.297, "step": 19876 }, { "epoch": 0.25829254941581403, "grad_norm": 0.36767396330833435, "learning_rate": 0.00014837208697799557, "loss": 1.2831, "step": 19877 }, { "epoch": 0.25830554395972993, "grad_norm": 0.49484503269195557, "learning_rate": 0.00014836948751608417, "loss": 1.3405, "step": 19878 }, { "epoch": 0.2583185385036458, "grad_norm": 0.38587021827697754, "learning_rate": 0.0001483668880541728, "loss": 1.536, "step": 19879 }, { "epoch": 0.2583315330475617, "grad_norm": 0.5180341005325317, "learning_rate": 0.00014836428859226142, "loss": 1.4147, "step": 19880 }, { "epoch": 0.2583445275914775, "grad_norm": 0.47908255457878113, "learning_rate": 0.00014836168913035002, "loss": 1.5092, "step": 19881 }, { "epoch": 0.2583575221353934, "grad_norm": 0.4228743314743042, "learning_rate": 0.00014835908966843864, "loss": 1.3845, "step": 19882 }, { "epoch": 0.25837051667930927, "grad_norm": 0.36705100536346436, "learning_rate": 0.00014835649020652727, "loss": 1.6856, "step": 19883 }, { "epoch": 0.25838351122322517, "grad_norm": 0.4564898610115051, "learning_rate": 0.00014835389074461586, "loss": 1.5801, "step": 19884 }, { "epoch": 0.258396505767141, "grad_norm": 0.3571053743362427, "learning_rate": 0.0001483512912827045, "loss": 1.4008, "step": 19885 }, { "epoch": 0.2584095003110569, "grad_norm": 0.270420104265213, "learning_rate": 0.0001483486918207931, "loss": 1.2654, "step": 19886 }, { "epoch": 0.25842249485497276, "grad_norm": 0.3955066204071045, "learning_rate": 0.00014834609235888174, "loss": 1.5701, "step": 19887 }, { "epoch": 0.25843548939888866, "grad_norm": 0.4124511182308197, "learning_rate": 0.00014834349289697034, "loss": 1.3096, "step": 19888 }, { "epoch": 0.2584484839428045, "grad_norm": 0.3961043655872345, "learning_rate": 0.00014834089343505896, "loss": 1.3106, "step": 19889 }, { "epoch": 0.2584614784867204, "grad_norm": 0.3272870182991028, "learning_rate": 0.00014833829397314756, "loss": 1.2618, "step": 19890 }, { "epoch": 0.25847447303063625, "grad_norm": 0.31888192892074585, "learning_rate": 0.00014833569451123618, "loss": 1.3127, "step": 19891 }, { "epoch": 0.25848746757455215, "grad_norm": 0.43939170241355896, "learning_rate": 0.0001483330950493248, "loss": 1.5248, "step": 19892 }, { "epoch": 0.258500462118468, "grad_norm": 0.4425123631954193, "learning_rate": 0.0001483304955874134, "loss": 1.3877, "step": 19893 }, { "epoch": 0.2585134566623839, "grad_norm": 0.363163024187088, "learning_rate": 0.00014832789612550203, "loss": 1.4747, "step": 19894 }, { "epoch": 0.25852645120629975, "grad_norm": 0.347953736782074, "learning_rate": 0.00014832529666359065, "loss": 1.3415, "step": 19895 }, { "epoch": 0.25853944575021565, "grad_norm": 0.47498077154159546, "learning_rate": 0.00014832269720167925, "loss": 1.5627, "step": 19896 }, { "epoch": 0.2585524402941315, "grad_norm": 0.392143577337265, "learning_rate": 0.00014832009773976787, "loss": 1.2656, "step": 19897 }, { "epoch": 0.2585654348380474, "grad_norm": 0.5318813323974609, "learning_rate": 0.00014831749827785647, "loss": 1.5933, "step": 19898 }, { "epoch": 0.25857842938196324, "grad_norm": 0.4562159776687622, "learning_rate": 0.00014831489881594512, "loss": 1.3945, "step": 19899 }, { "epoch": 0.25859142392587914, "grad_norm": 0.3080541491508484, "learning_rate": 0.00014831229935403372, "loss": 1.2794, "step": 19900 }, { "epoch": 0.258604418469795, "grad_norm": 0.38244175910949707, "learning_rate": 0.00014830969989212235, "loss": 1.3745, "step": 19901 }, { "epoch": 0.2586174130137109, "grad_norm": 0.36817625164985657, "learning_rate": 0.00014830710043021094, "loss": 1.3873, "step": 19902 }, { "epoch": 0.25863040755762673, "grad_norm": 0.3452102243900299, "learning_rate": 0.00014830450096829957, "loss": 1.4399, "step": 19903 }, { "epoch": 0.25864340210154263, "grad_norm": 0.41950735449790955, "learning_rate": 0.0001483019015063882, "loss": 1.4612, "step": 19904 }, { "epoch": 0.2586563966454585, "grad_norm": 0.3153369426727295, "learning_rate": 0.0001482993020444768, "loss": 1.2716, "step": 19905 }, { "epoch": 0.2586693911893744, "grad_norm": 0.4001500606536865, "learning_rate": 0.0001482967025825654, "loss": 1.3932, "step": 19906 }, { "epoch": 0.2586823857332902, "grad_norm": 0.4432854652404785, "learning_rate": 0.00014829410312065404, "loss": 1.473, "step": 19907 }, { "epoch": 0.2586953802772061, "grad_norm": 0.3478623330593109, "learning_rate": 0.00014829150365874264, "loss": 1.2928, "step": 19908 }, { "epoch": 0.25870837482112197, "grad_norm": 0.4289468824863434, "learning_rate": 0.00014828890419683126, "loss": 1.3406, "step": 19909 }, { "epoch": 0.25872136936503787, "grad_norm": 0.41697534918785095, "learning_rate": 0.00014828630473491988, "loss": 1.3225, "step": 19910 }, { "epoch": 0.2587343639089537, "grad_norm": 0.40126150846481323, "learning_rate": 0.0001482837052730085, "loss": 1.4203, "step": 19911 }, { "epoch": 0.2587473584528696, "grad_norm": 0.4429757595062256, "learning_rate": 0.0001482811058110971, "loss": 1.2334, "step": 19912 }, { "epoch": 0.25876035299678546, "grad_norm": 0.3996281921863556, "learning_rate": 0.00014827850634918573, "loss": 1.3844, "step": 19913 }, { "epoch": 0.25877334754070136, "grad_norm": 0.46142658591270447, "learning_rate": 0.00014827590688727436, "loss": 1.5765, "step": 19914 }, { "epoch": 0.2587863420846172, "grad_norm": 0.43726444244384766, "learning_rate": 0.00014827330742536295, "loss": 1.4275, "step": 19915 }, { "epoch": 0.2587993366285331, "grad_norm": 0.3690805435180664, "learning_rate": 0.00014827070796345158, "loss": 1.4337, "step": 19916 }, { "epoch": 0.25881233117244895, "grad_norm": 0.35200124979019165, "learning_rate": 0.00014826810850154017, "loss": 1.2507, "step": 19917 }, { "epoch": 0.25882532571636485, "grad_norm": 0.48526984453201294, "learning_rate": 0.00014826550903962883, "loss": 1.4562, "step": 19918 }, { "epoch": 0.2588383202602807, "grad_norm": 0.28235483169555664, "learning_rate": 0.00014826290957771742, "loss": 1.1081, "step": 19919 }, { "epoch": 0.2588513148041966, "grad_norm": 0.46671703457832336, "learning_rate": 0.00014826031011580602, "loss": 1.3082, "step": 19920 }, { "epoch": 0.25886430934811244, "grad_norm": 0.42913997173309326, "learning_rate": 0.00014825771065389465, "loss": 1.3711, "step": 19921 }, { "epoch": 0.25887730389202834, "grad_norm": 0.43390780687332153, "learning_rate": 0.00014825511119198327, "loss": 1.136, "step": 19922 }, { "epoch": 0.2588902984359442, "grad_norm": 0.4601288437843323, "learning_rate": 0.0001482525117300719, "loss": 1.4765, "step": 19923 }, { "epoch": 0.2589032929798601, "grad_norm": 0.3857990801334381, "learning_rate": 0.0001482499122681605, "loss": 1.1873, "step": 19924 }, { "epoch": 0.25891628752377593, "grad_norm": 0.4280492663383484, "learning_rate": 0.00014824731280624912, "loss": 1.3045, "step": 19925 }, { "epoch": 0.25892928206769183, "grad_norm": 0.36360520124435425, "learning_rate": 0.00014824471334433774, "loss": 1.2402, "step": 19926 }, { "epoch": 0.2589422766116077, "grad_norm": 0.32240432500839233, "learning_rate": 0.00014824211388242634, "loss": 1.2962, "step": 19927 }, { "epoch": 0.2589552711555236, "grad_norm": 0.3881559371948242, "learning_rate": 0.00014823951442051496, "loss": 1.4118, "step": 19928 }, { "epoch": 0.2589682656994394, "grad_norm": 0.40604355931282043, "learning_rate": 0.00014823691495860356, "loss": 1.5554, "step": 19929 }, { "epoch": 0.2589812602433553, "grad_norm": 0.3437977135181427, "learning_rate": 0.0001482343154966922, "loss": 1.2844, "step": 19930 }, { "epoch": 0.25899425478727117, "grad_norm": 0.3995177149772644, "learning_rate": 0.0001482317160347808, "loss": 1.6499, "step": 19931 }, { "epoch": 0.2590072493311871, "grad_norm": 0.4572320878505707, "learning_rate": 0.00014822911657286943, "loss": 1.3128, "step": 19932 }, { "epoch": 0.2590202438751029, "grad_norm": 0.37138083577156067, "learning_rate": 0.00014822651711095803, "loss": 1.323, "step": 19933 }, { "epoch": 0.2590332384190188, "grad_norm": 0.543493390083313, "learning_rate": 0.00014822391764904666, "loss": 1.4226, "step": 19934 }, { "epoch": 0.25904623296293466, "grad_norm": 0.42756617069244385, "learning_rate": 0.00014822131818713528, "loss": 1.5851, "step": 19935 }, { "epoch": 0.25905922750685056, "grad_norm": 0.34705233573913574, "learning_rate": 0.00014821871872522388, "loss": 1.4637, "step": 19936 }, { "epoch": 0.2590722220507664, "grad_norm": 0.4670591652393341, "learning_rate": 0.0001482161192633125, "loss": 1.4106, "step": 19937 }, { "epoch": 0.2590852165946823, "grad_norm": 0.3798062205314636, "learning_rate": 0.00014821351980140113, "loss": 1.6454, "step": 19938 }, { "epoch": 0.25909821113859816, "grad_norm": 0.40061119198799133, "learning_rate": 0.00014821092033948972, "loss": 1.4775, "step": 19939 }, { "epoch": 0.25911120568251406, "grad_norm": 0.4153488576412201, "learning_rate": 0.00014820832087757835, "loss": 1.3671, "step": 19940 }, { "epoch": 0.2591242002264299, "grad_norm": 0.3565073311328888, "learning_rate": 0.00014820572141566695, "loss": 1.6061, "step": 19941 }, { "epoch": 0.2591371947703458, "grad_norm": 0.4369582533836365, "learning_rate": 0.0001482031219537556, "loss": 1.3608, "step": 19942 }, { "epoch": 0.25915018931426165, "grad_norm": 0.5014320611953735, "learning_rate": 0.0001482005224918442, "loss": 1.3902, "step": 19943 }, { "epoch": 0.25916318385817755, "grad_norm": 0.48925432562828064, "learning_rate": 0.00014819792302993282, "loss": 1.4684, "step": 19944 }, { "epoch": 0.2591761784020934, "grad_norm": 0.33795610070228577, "learning_rate": 0.00014819532356802142, "loss": 1.323, "step": 19945 }, { "epoch": 0.2591891729460093, "grad_norm": 0.331469863653183, "learning_rate": 0.00014819272410611004, "loss": 1.3025, "step": 19946 }, { "epoch": 0.2592021674899252, "grad_norm": 0.35915523767471313, "learning_rate": 0.00014819012464419867, "loss": 1.3517, "step": 19947 }, { "epoch": 0.25921516203384104, "grad_norm": 0.49079039692878723, "learning_rate": 0.00014818752518228726, "loss": 1.5311, "step": 19948 }, { "epoch": 0.25922815657775694, "grad_norm": 0.48849114775657654, "learning_rate": 0.0001481849257203759, "loss": 1.568, "step": 19949 }, { "epoch": 0.2592411511216728, "grad_norm": 0.3905053436756134, "learning_rate": 0.0001481823262584645, "loss": 1.3059, "step": 19950 }, { "epoch": 0.2592541456655887, "grad_norm": 0.43965134024620056, "learning_rate": 0.0001481797267965531, "loss": 1.4275, "step": 19951 }, { "epoch": 0.25926714020950453, "grad_norm": 0.3594221770763397, "learning_rate": 0.00014817712733464173, "loss": 1.5361, "step": 19952 }, { "epoch": 0.25928013475342043, "grad_norm": 0.386545866727829, "learning_rate": 0.00014817452787273036, "loss": 1.3875, "step": 19953 }, { "epoch": 0.2592931292973363, "grad_norm": 0.33274510502815247, "learning_rate": 0.00014817192841081898, "loss": 1.2103, "step": 19954 }, { "epoch": 0.2593061238412522, "grad_norm": 0.47073882818222046, "learning_rate": 0.00014816932894890758, "loss": 1.4757, "step": 19955 }, { "epoch": 0.259319118385168, "grad_norm": 0.394117146730423, "learning_rate": 0.0001481667294869962, "loss": 1.4047, "step": 19956 }, { "epoch": 0.2593321129290839, "grad_norm": 0.3804217576980591, "learning_rate": 0.00014816413002508483, "loss": 1.6032, "step": 19957 }, { "epoch": 0.25934510747299977, "grad_norm": 0.3410324454307556, "learning_rate": 0.00014816153056317343, "loss": 1.4341, "step": 19958 }, { "epoch": 0.25935810201691567, "grad_norm": 0.4197107255458832, "learning_rate": 0.00014815893110126205, "loss": 1.3531, "step": 19959 }, { "epoch": 0.2593710965608315, "grad_norm": 0.4598172605037689, "learning_rate": 0.00014815633163935065, "loss": 1.5031, "step": 19960 }, { "epoch": 0.2593840911047474, "grad_norm": 0.33939510583877563, "learning_rate": 0.0001481537321774393, "loss": 1.4511, "step": 19961 }, { "epoch": 0.25939708564866326, "grad_norm": 0.44218817353248596, "learning_rate": 0.0001481511327155279, "loss": 1.4836, "step": 19962 }, { "epoch": 0.25941008019257916, "grad_norm": 0.3660185933113098, "learning_rate": 0.0001481485332536165, "loss": 1.4618, "step": 19963 }, { "epoch": 0.259423074736495, "grad_norm": 0.38963234424591064, "learning_rate": 0.00014814593379170512, "loss": 1.5467, "step": 19964 }, { "epoch": 0.2594360692804109, "grad_norm": 0.31272071599960327, "learning_rate": 0.00014814333432979374, "loss": 1.3584, "step": 19965 }, { "epoch": 0.25944906382432675, "grad_norm": 0.41172417998313904, "learning_rate": 0.00014814073486788237, "loss": 1.2183, "step": 19966 }, { "epoch": 0.25946205836824265, "grad_norm": 0.3378946781158447, "learning_rate": 0.00014813813540597096, "loss": 1.2264, "step": 19967 }, { "epoch": 0.2594750529121585, "grad_norm": 0.4257126748561859, "learning_rate": 0.0001481355359440596, "loss": 1.5005, "step": 19968 }, { "epoch": 0.2594880474560744, "grad_norm": 0.3144511580467224, "learning_rate": 0.00014813293648214821, "loss": 1.4333, "step": 19969 }, { "epoch": 0.25950104199999025, "grad_norm": 0.30821138620376587, "learning_rate": 0.0001481303370202368, "loss": 1.2791, "step": 19970 }, { "epoch": 0.25951403654390615, "grad_norm": 0.3085781931877136, "learning_rate": 0.00014812773755832544, "loss": 1.3864, "step": 19971 }, { "epoch": 0.259527031087822, "grad_norm": 0.412639856338501, "learning_rate": 0.00014812513809641403, "loss": 1.3098, "step": 19972 }, { "epoch": 0.2595400256317379, "grad_norm": 0.33945998549461365, "learning_rate": 0.00014812253863450268, "loss": 1.4027, "step": 19973 }, { "epoch": 0.25955302017565374, "grad_norm": 0.38900020718574524, "learning_rate": 0.00014811993917259128, "loss": 1.323, "step": 19974 }, { "epoch": 0.25956601471956964, "grad_norm": 0.3130693733692169, "learning_rate": 0.00014811733971067988, "loss": 1.5209, "step": 19975 }, { "epoch": 0.2595790092634855, "grad_norm": 0.5098480582237244, "learning_rate": 0.0001481147402487685, "loss": 1.3622, "step": 19976 }, { "epoch": 0.2595920038074014, "grad_norm": 0.3957809507846832, "learning_rate": 0.00014811214078685713, "loss": 1.1881, "step": 19977 }, { "epoch": 0.25960499835131723, "grad_norm": 0.36697855591773987, "learning_rate": 0.00014810954132494575, "loss": 1.2473, "step": 19978 }, { "epoch": 0.25961799289523313, "grad_norm": 0.5590858459472656, "learning_rate": 0.00014810694186303435, "loss": 1.3385, "step": 19979 }, { "epoch": 0.259630987439149, "grad_norm": 0.4463638365268707, "learning_rate": 0.00014810434240112297, "loss": 1.4433, "step": 19980 }, { "epoch": 0.2596439819830649, "grad_norm": 0.35455814003944397, "learning_rate": 0.0001481017429392116, "loss": 1.3965, "step": 19981 }, { "epoch": 0.2596569765269807, "grad_norm": 0.3963063657283783, "learning_rate": 0.0001480991434773002, "loss": 1.5104, "step": 19982 }, { "epoch": 0.2596699710708966, "grad_norm": 0.33794882893562317, "learning_rate": 0.00014809654401538882, "loss": 1.3446, "step": 19983 }, { "epoch": 0.25968296561481247, "grad_norm": 0.3056904375553131, "learning_rate": 0.00014809394455347745, "loss": 1.1081, "step": 19984 }, { "epoch": 0.25969596015872837, "grad_norm": 0.3665129840373993, "learning_rate": 0.00014809134509156607, "loss": 1.5664, "step": 19985 }, { "epoch": 0.2597089547026442, "grad_norm": 0.6793163418769836, "learning_rate": 0.00014808874562965467, "loss": 1.4859, "step": 19986 }, { "epoch": 0.2597219492465601, "grad_norm": 0.3136841952800751, "learning_rate": 0.00014808614616774326, "loss": 1.2335, "step": 19987 }, { "epoch": 0.25973494379047596, "grad_norm": 0.3115212619304657, "learning_rate": 0.00014808354670583192, "loss": 1.3646, "step": 19988 }, { "epoch": 0.25974793833439186, "grad_norm": 0.3132984936237335, "learning_rate": 0.00014808094724392051, "loss": 1.4162, "step": 19989 }, { "epoch": 0.2597609328783077, "grad_norm": 0.5420405864715576, "learning_rate": 0.00014807834778200914, "loss": 1.3965, "step": 19990 }, { "epoch": 0.2597739274222236, "grad_norm": 0.3387608528137207, "learning_rate": 0.00014807574832009774, "loss": 1.471, "step": 19991 }, { "epoch": 0.25978692196613945, "grad_norm": 0.45799100399017334, "learning_rate": 0.00014807314885818636, "loss": 1.4963, "step": 19992 }, { "epoch": 0.25979991651005535, "grad_norm": 0.37677329778671265, "learning_rate": 0.00014807054939627498, "loss": 1.396, "step": 19993 }, { "epoch": 0.2598129110539712, "grad_norm": 0.436212956905365, "learning_rate": 0.00014806794993436358, "loss": 1.5221, "step": 19994 }, { "epoch": 0.2598259055978871, "grad_norm": 0.3971352279186249, "learning_rate": 0.0001480653504724522, "loss": 1.3441, "step": 19995 }, { "epoch": 0.25983890014180294, "grad_norm": 0.4042627513408661, "learning_rate": 0.00014806275101054083, "loss": 1.3809, "step": 19996 }, { "epoch": 0.25985189468571884, "grad_norm": 0.37473365664482117, "learning_rate": 0.00014806015154862946, "loss": 1.4999, "step": 19997 }, { "epoch": 0.2598648892296347, "grad_norm": 0.42937174439430237, "learning_rate": 0.00014805755208671805, "loss": 1.5074, "step": 19998 }, { "epoch": 0.2598778837735506, "grad_norm": 0.4152386486530304, "learning_rate": 0.00014805495262480668, "loss": 1.3692, "step": 19999 }, { "epoch": 0.25989087831746643, "grad_norm": 0.3838444948196411, "learning_rate": 0.0001480523531628953, "loss": 1.2714, "step": 20000 }, { "epoch": 0.25990387286138233, "grad_norm": 0.3570510447025299, "learning_rate": 0.0001480497537009839, "loss": 1.2544, "step": 20001 }, { "epoch": 0.2599168674052982, "grad_norm": 0.3934921324253082, "learning_rate": 0.00014804715423907252, "loss": 1.4345, "step": 20002 }, { "epoch": 0.2599298619492141, "grad_norm": 0.3592262268066406, "learning_rate": 0.00014804455477716112, "loss": 1.3459, "step": 20003 }, { "epoch": 0.2599428564931299, "grad_norm": 0.37760719656944275, "learning_rate": 0.00014804195531524975, "loss": 1.4581, "step": 20004 }, { "epoch": 0.2599558510370458, "grad_norm": 0.431679368019104, "learning_rate": 0.00014803935585333837, "loss": 1.4971, "step": 20005 }, { "epoch": 0.25996884558096167, "grad_norm": 0.35606569051742554, "learning_rate": 0.00014803675639142697, "loss": 1.3987, "step": 20006 }, { "epoch": 0.2599818401248776, "grad_norm": 0.39354535937309265, "learning_rate": 0.0001480341569295156, "loss": 1.4373, "step": 20007 }, { "epoch": 0.2599948346687934, "grad_norm": 0.41023895144462585, "learning_rate": 0.00014803155746760422, "loss": 1.6971, "step": 20008 }, { "epoch": 0.2600078292127093, "grad_norm": 0.4600581228733063, "learning_rate": 0.00014802895800569284, "loss": 1.6419, "step": 20009 }, { "epoch": 0.26002082375662516, "grad_norm": 0.4285810589790344, "learning_rate": 0.00014802635854378144, "loss": 1.5576, "step": 20010 }, { "epoch": 0.26003381830054106, "grad_norm": 0.4394996464252472, "learning_rate": 0.00014802375908187006, "loss": 1.5056, "step": 20011 }, { "epoch": 0.2600468128444569, "grad_norm": 0.3718104660511017, "learning_rate": 0.0001480211596199587, "loss": 1.4, "step": 20012 }, { "epoch": 0.2600598073883728, "grad_norm": 0.4083999991416931, "learning_rate": 0.00014801856015804728, "loss": 1.3433, "step": 20013 }, { "epoch": 0.26007280193228866, "grad_norm": 0.6538832783699036, "learning_rate": 0.0001480159606961359, "loss": 1.4879, "step": 20014 }, { "epoch": 0.26008579647620456, "grad_norm": 0.5382021069526672, "learning_rate": 0.0001480133612342245, "loss": 1.4073, "step": 20015 }, { "epoch": 0.2600987910201204, "grad_norm": 0.41146913170814514, "learning_rate": 0.00014801076177231316, "loss": 1.3624, "step": 20016 }, { "epoch": 0.2601117855640363, "grad_norm": 0.3455566167831421, "learning_rate": 0.00014800816231040176, "loss": 1.4985, "step": 20017 }, { "epoch": 0.26012478010795215, "grad_norm": 0.4496353566646576, "learning_rate": 0.00014800556284849035, "loss": 1.4535, "step": 20018 }, { "epoch": 0.26013777465186805, "grad_norm": 0.4191818833351135, "learning_rate": 0.00014800296338657898, "loss": 1.5009, "step": 20019 }, { "epoch": 0.2601507691957839, "grad_norm": 0.35791489481925964, "learning_rate": 0.0001480003639246676, "loss": 1.4575, "step": 20020 }, { "epoch": 0.2601637637396998, "grad_norm": 0.43893197178840637, "learning_rate": 0.00014799776446275623, "loss": 1.309, "step": 20021 }, { "epoch": 0.2601767582836157, "grad_norm": 0.49037161469459534, "learning_rate": 0.00014799516500084482, "loss": 1.3291, "step": 20022 }, { "epoch": 0.26018975282753154, "grad_norm": 0.38623711466789246, "learning_rate": 0.00014799256553893345, "loss": 1.2752, "step": 20023 }, { "epoch": 0.26020274737144744, "grad_norm": 0.42128702998161316, "learning_rate": 0.00014798996607702207, "loss": 1.3228, "step": 20024 }, { "epoch": 0.2602157419153633, "grad_norm": 0.36215609312057495, "learning_rate": 0.00014798736661511067, "loss": 1.5077, "step": 20025 }, { "epoch": 0.2602287364592792, "grad_norm": 0.5817545652389526, "learning_rate": 0.0001479847671531993, "loss": 1.5628, "step": 20026 }, { "epoch": 0.26024173100319503, "grad_norm": 0.41502395272254944, "learning_rate": 0.00014798216769128792, "loss": 1.465, "step": 20027 }, { "epoch": 0.26025472554711093, "grad_norm": 0.43040773272514343, "learning_rate": 0.00014797956822937654, "loss": 1.4436, "step": 20028 }, { "epoch": 0.2602677200910268, "grad_norm": 0.3085106313228607, "learning_rate": 0.00014797696876746514, "loss": 1.3371, "step": 20029 }, { "epoch": 0.2602807146349427, "grad_norm": 0.3936407268047333, "learning_rate": 0.00014797436930555374, "loss": 1.3838, "step": 20030 }, { "epoch": 0.2602937091788585, "grad_norm": 0.35208559036254883, "learning_rate": 0.0001479717698436424, "loss": 1.4223, "step": 20031 }, { "epoch": 0.2603067037227744, "grad_norm": 0.465328574180603, "learning_rate": 0.000147969170381731, "loss": 1.287, "step": 20032 }, { "epoch": 0.26031969826669027, "grad_norm": 0.37656769156455994, "learning_rate": 0.0001479665709198196, "loss": 1.295, "step": 20033 }, { "epoch": 0.26033269281060617, "grad_norm": 0.34409090876579285, "learning_rate": 0.0001479639714579082, "loss": 1.2714, "step": 20034 }, { "epoch": 0.260345687354522, "grad_norm": 0.3560160994529724, "learning_rate": 0.00014796137199599683, "loss": 1.439, "step": 20035 }, { "epoch": 0.2603586818984379, "grad_norm": 0.3348861634731293, "learning_rate": 0.00014795877253408546, "loss": 1.3228, "step": 20036 }, { "epoch": 0.26037167644235376, "grad_norm": 0.37119099497795105, "learning_rate": 0.00014795617307217406, "loss": 1.1904, "step": 20037 }, { "epoch": 0.26038467098626966, "grad_norm": 0.4423973858356476, "learning_rate": 0.00014795357361026268, "loss": 1.5895, "step": 20038 }, { "epoch": 0.2603976655301855, "grad_norm": 0.44404786825180054, "learning_rate": 0.0001479509741483513, "loss": 1.4094, "step": 20039 }, { "epoch": 0.2604106600741014, "grad_norm": 0.4142516553401947, "learning_rate": 0.00014794837468643993, "loss": 1.3849, "step": 20040 }, { "epoch": 0.26042365461801725, "grad_norm": 0.4309588372707367, "learning_rate": 0.00014794577522452853, "loss": 1.5008, "step": 20041 }, { "epoch": 0.26043664916193315, "grad_norm": 0.45189225673675537, "learning_rate": 0.00014794317576261712, "loss": 1.3865, "step": 20042 }, { "epoch": 0.260449643705849, "grad_norm": 0.41932713985443115, "learning_rate": 0.00014794057630070578, "loss": 1.4993, "step": 20043 }, { "epoch": 0.2604626382497649, "grad_norm": 0.7539716958999634, "learning_rate": 0.00014793797683879437, "loss": 1.4877, "step": 20044 }, { "epoch": 0.26047563279368074, "grad_norm": 0.4239988327026367, "learning_rate": 0.000147935377376883, "loss": 1.5216, "step": 20045 }, { "epoch": 0.26048862733759665, "grad_norm": 0.4229147136211395, "learning_rate": 0.0001479327779149716, "loss": 1.3387, "step": 20046 }, { "epoch": 0.2605016218815125, "grad_norm": 0.3592168390750885, "learning_rate": 0.00014793017845306022, "loss": 1.2966, "step": 20047 }, { "epoch": 0.2605146164254284, "grad_norm": 0.3860403299331665, "learning_rate": 0.00014792757899114884, "loss": 1.6332, "step": 20048 }, { "epoch": 0.26052761096934424, "grad_norm": 0.4260939359664917, "learning_rate": 0.00014792497952923744, "loss": 1.579, "step": 20049 }, { "epoch": 0.26054060551326014, "grad_norm": 0.41257718205451965, "learning_rate": 0.00014792238006732607, "loss": 1.3625, "step": 20050 }, { "epoch": 0.260553600057176, "grad_norm": 0.3556547462940216, "learning_rate": 0.0001479197806054147, "loss": 1.3959, "step": 20051 }, { "epoch": 0.2605665946010919, "grad_norm": 0.3657892346382141, "learning_rate": 0.00014791718114350331, "loss": 1.5483, "step": 20052 }, { "epoch": 0.26057958914500773, "grad_norm": 0.3173982501029968, "learning_rate": 0.0001479145816815919, "loss": 1.5487, "step": 20053 }, { "epoch": 0.26059258368892363, "grad_norm": 0.4258553385734558, "learning_rate": 0.00014791198221968054, "loss": 1.6506, "step": 20054 }, { "epoch": 0.2606055782328395, "grad_norm": 0.46200472116470337, "learning_rate": 0.00014790938275776916, "loss": 1.2226, "step": 20055 }, { "epoch": 0.2606185727767554, "grad_norm": 0.4142913818359375, "learning_rate": 0.00014790678329585776, "loss": 1.37, "step": 20056 }, { "epoch": 0.2606315673206712, "grad_norm": 0.4041662812232971, "learning_rate": 0.00014790418383394638, "loss": 1.5098, "step": 20057 }, { "epoch": 0.2606445618645871, "grad_norm": 0.29881346225738525, "learning_rate": 0.000147901584372035, "loss": 1.2866, "step": 20058 }, { "epoch": 0.26065755640850297, "grad_norm": 0.37747687101364136, "learning_rate": 0.0001478989849101236, "loss": 1.4203, "step": 20059 }, { "epoch": 0.26067055095241887, "grad_norm": 0.4450613558292389, "learning_rate": 0.00014789638544821223, "loss": 1.6244, "step": 20060 }, { "epoch": 0.2606835454963347, "grad_norm": 0.5059861540794373, "learning_rate": 0.00014789378598630083, "loss": 1.5656, "step": 20061 }, { "epoch": 0.2606965400402506, "grad_norm": 0.3857816755771637, "learning_rate": 0.00014789118652438948, "loss": 1.5341, "step": 20062 }, { "epoch": 0.26070953458416646, "grad_norm": 0.505658745765686, "learning_rate": 0.00014788858706247808, "loss": 1.4786, "step": 20063 }, { "epoch": 0.26072252912808236, "grad_norm": 0.3507513701915741, "learning_rate": 0.0001478859876005667, "loss": 1.3486, "step": 20064 }, { "epoch": 0.2607355236719982, "grad_norm": 0.3892856240272522, "learning_rate": 0.0001478833881386553, "loss": 1.3781, "step": 20065 }, { "epoch": 0.2607485182159141, "grad_norm": 0.44830596446990967, "learning_rate": 0.00014788078867674392, "loss": 1.3686, "step": 20066 }, { "epoch": 0.26076151275982995, "grad_norm": 0.3782263994216919, "learning_rate": 0.00014787818921483255, "loss": 1.4312, "step": 20067 }, { "epoch": 0.26077450730374585, "grad_norm": 0.5403381586074829, "learning_rate": 0.00014787558975292114, "loss": 1.3026, "step": 20068 }, { "epoch": 0.2607875018476617, "grad_norm": 0.6702861189842224, "learning_rate": 0.00014787299029100977, "loss": 1.5058, "step": 20069 }, { "epoch": 0.2608004963915776, "grad_norm": 0.34010711312294006, "learning_rate": 0.0001478703908290984, "loss": 1.5275, "step": 20070 }, { "epoch": 0.26081349093549344, "grad_norm": 0.4390709400177002, "learning_rate": 0.000147867791367187, "loss": 1.3433, "step": 20071 }, { "epoch": 0.26082648547940934, "grad_norm": 0.4154433012008667, "learning_rate": 0.00014786519190527561, "loss": 1.5328, "step": 20072 }, { "epoch": 0.2608394800233252, "grad_norm": 0.543128252029419, "learning_rate": 0.0001478625924433642, "loss": 1.5487, "step": 20073 }, { "epoch": 0.2608524745672411, "grad_norm": 0.43565601110458374, "learning_rate": 0.00014785999298145286, "loss": 1.5526, "step": 20074 }, { "epoch": 0.26086546911115693, "grad_norm": 0.4094981849193573, "learning_rate": 0.00014785739351954146, "loss": 1.2936, "step": 20075 }, { "epoch": 0.26087846365507283, "grad_norm": 0.415495365858078, "learning_rate": 0.00014785479405763009, "loss": 1.3533, "step": 20076 }, { "epoch": 0.2608914581989887, "grad_norm": 0.38112905621528625, "learning_rate": 0.00014785219459571868, "loss": 1.4268, "step": 20077 }, { "epoch": 0.2609044527429046, "grad_norm": 0.34138205647468567, "learning_rate": 0.0001478495951338073, "loss": 1.2208, "step": 20078 }, { "epoch": 0.2609174472868204, "grad_norm": 0.43928828835487366, "learning_rate": 0.00014784699567189593, "loss": 1.4356, "step": 20079 }, { "epoch": 0.2609304418307363, "grad_norm": 0.33731186389923096, "learning_rate": 0.00014784439620998453, "loss": 1.2892, "step": 20080 }, { "epoch": 0.26094343637465217, "grad_norm": 0.44879165291786194, "learning_rate": 0.00014784179674807315, "loss": 1.5041, "step": 20081 }, { "epoch": 0.26095643091856807, "grad_norm": 0.4550076723098755, "learning_rate": 0.00014783919728616178, "loss": 1.4732, "step": 20082 }, { "epoch": 0.2609694254624839, "grad_norm": 0.4142514765262604, "learning_rate": 0.0001478365978242504, "loss": 1.5805, "step": 20083 }, { "epoch": 0.2609824200063998, "grad_norm": 0.5202316045761108, "learning_rate": 0.000147833998362339, "loss": 1.2799, "step": 20084 }, { "epoch": 0.26099541455031566, "grad_norm": 0.36545681953430176, "learning_rate": 0.0001478313989004276, "loss": 1.4073, "step": 20085 }, { "epoch": 0.26100840909423156, "grad_norm": 0.42346805334091187, "learning_rate": 0.00014782879943851625, "loss": 1.5625, "step": 20086 }, { "epoch": 0.2610214036381474, "grad_norm": 0.41787809133529663, "learning_rate": 0.00014782619997660485, "loss": 1.3216, "step": 20087 }, { "epoch": 0.2610343981820633, "grad_norm": 0.3913102447986603, "learning_rate": 0.00014782360051469347, "loss": 1.3677, "step": 20088 }, { "epoch": 0.26104739272597915, "grad_norm": 0.46991586685180664, "learning_rate": 0.00014782100105278207, "loss": 1.4767, "step": 20089 }, { "epoch": 0.26106038726989506, "grad_norm": 0.4853183925151825, "learning_rate": 0.0001478184015908707, "loss": 1.5355, "step": 20090 }, { "epoch": 0.2610733818138109, "grad_norm": 0.36936235427856445, "learning_rate": 0.00014781580212895932, "loss": 1.5055, "step": 20091 }, { "epoch": 0.2610863763577268, "grad_norm": 0.38284045457839966, "learning_rate": 0.00014781320266704791, "loss": 1.4527, "step": 20092 }, { "epoch": 0.26109937090164265, "grad_norm": 0.42419642210006714, "learning_rate": 0.00014781060320513654, "loss": 1.4127, "step": 20093 }, { "epoch": 0.26111236544555855, "grad_norm": 0.42717838287353516, "learning_rate": 0.00014780800374322516, "loss": 1.3308, "step": 20094 }, { "epoch": 0.2611253599894744, "grad_norm": 0.463407427072525, "learning_rate": 0.0001478054042813138, "loss": 1.4209, "step": 20095 }, { "epoch": 0.2611383545333903, "grad_norm": 0.401685506105423, "learning_rate": 0.00014780280481940239, "loss": 1.5169, "step": 20096 }, { "epoch": 0.26115134907730614, "grad_norm": 0.37016400694847107, "learning_rate": 0.000147800205357491, "loss": 1.5556, "step": 20097 }, { "epoch": 0.26116434362122204, "grad_norm": 0.37862780690193176, "learning_rate": 0.00014779760589557963, "loss": 1.3738, "step": 20098 }, { "epoch": 0.26117733816513794, "grad_norm": 0.48829784989356995, "learning_rate": 0.00014779500643366823, "loss": 1.602, "step": 20099 }, { "epoch": 0.2611903327090538, "grad_norm": 0.3443527817726135, "learning_rate": 0.00014779240697175686, "loss": 1.414, "step": 20100 }, { "epoch": 0.2612033272529697, "grad_norm": 0.36224499344825745, "learning_rate": 0.00014778980750984548, "loss": 1.4077, "step": 20101 }, { "epoch": 0.26121632179688553, "grad_norm": 0.37000930309295654, "learning_rate": 0.00014778720804793408, "loss": 1.488, "step": 20102 }, { "epoch": 0.26122931634080143, "grad_norm": 0.45107078552246094, "learning_rate": 0.0001477846085860227, "loss": 1.5106, "step": 20103 }, { "epoch": 0.2612423108847173, "grad_norm": 0.31218990683555603, "learning_rate": 0.0001477820091241113, "loss": 1.4386, "step": 20104 }, { "epoch": 0.2612553054286332, "grad_norm": 0.41558870673179626, "learning_rate": 0.00014777940966219995, "loss": 1.3468, "step": 20105 }, { "epoch": 0.261268299972549, "grad_norm": 0.4044245779514313, "learning_rate": 0.00014777681020028855, "loss": 1.4517, "step": 20106 }, { "epoch": 0.2612812945164649, "grad_norm": 0.3682188093662262, "learning_rate": 0.00014777421073837717, "loss": 1.4658, "step": 20107 }, { "epoch": 0.26129428906038077, "grad_norm": 0.3466509282588959, "learning_rate": 0.00014777161127646577, "loss": 1.2332, "step": 20108 }, { "epoch": 0.26130728360429667, "grad_norm": 0.4850592613220215, "learning_rate": 0.0001477690118145544, "loss": 1.4182, "step": 20109 }, { "epoch": 0.2613202781482125, "grad_norm": 0.3551687002182007, "learning_rate": 0.00014776641235264302, "loss": 1.3957, "step": 20110 }, { "epoch": 0.2613332726921284, "grad_norm": 0.4125869572162628, "learning_rate": 0.00014776381289073162, "loss": 1.3972, "step": 20111 }, { "epoch": 0.26134626723604426, "grad_norm": 0.3912777602672577, "learning_rate": 0.00014776121342882024, "loss": 1.4743, "step": 20112 }, { "epoch": 0.26135926177996016, "grad_norm": 0.26863548159599304, "learning_rate": 0.00014775861396690887, "loss": 1.4372, "step": 20113 }, { "epoch": 0.261372256323876, "grad_norm": 0.4019676446914673, "learning_rate": 0.00014775601450499746, "loss": 1.2715, "step": 20114 }, { "epoch": 0.2613852508677919, "grad_norm": 0.4471684396266937, "learning_rate": 0.0001477534150430861, "loss": 1.46, "step": 20115 }, { "epoch": 0.26139824541170775, "grad_norm": 0.4223073720932007, "learning_rate": 0.00014775081558117468, "loss": 1.3325, "step": 20116 }, { "epoch": 0.26141123995562365, "grad_norm": 0.36810413002967834, "learning_rate": 0.00014774821611926334, "loss": 1.3726, "step": 20117 }, { "epoch": 0.2614242344995395, "grad_norm": 0.5159755945205688, "learning_rate": 0.00014774561665735193, "loss": 1.546, "step": 20118 }, { "epoch": 0.2614372290434554, "grad_norm": 0.4350285530090332, "learning_rate": 0.00014774301719544056, "loss": 1.5419, "step": 20119 }, { "epoch": 0.26145022358737124, "grad_norm": 0.3918091058731079, "learning_rate": 0.00014774041773352916, "loss": 1.332, "step": 20120 }, { "epoch": 0.26146321813128715, "grad_norm": 0.42419981956481934, "learning_rate": 0.00014773781827161778, "loss": 1.5422, "step": 20121 }, { "epoch": 0.261476212675203, "grad_norm": 0.5650913119316101, "learning_rate": 0.0001477352188097064, "loss": 1.435, "step": 20122 }, { "epoch": 0.2614892072191189, "grad_norm": 0.3097410798072815, "learning_rate": 0.000147732619347795, "loss": 1.2515, "step": 20123 }, { "epoch": 0.26150220176303474, "grad_norm": 0.42348599433898926, "learning_rate": 0.00014773001988588363, "loss": 1.3592, "step": 20124 }, { "epoch": 0.26151519630695064, "grad_norm": 0.3523194193840027, "learning_rate": 0.00014772742042397225, "loss": 1.5306, "step": 20125 }, { "epoch": 0.2615281908508665, "grad_norm": 0.4342210292816162, "learning_rate": 0.00014772482096206085, "loss": 1.435, "step": 20126 }, { "epoch": 0.2615411853947824, "grad_norm": 0.3992953598499298, "learning_rate": 0.00014772222150014947, "loss": 1.515, "step": 20127 }, { "epoch": 0.26155417993869823, "grad_norm": 0.4150521457195282, "learning_rate": 0.00014771962203823807, "loss": 1.4629, "step": 20128 }, { "epoch": 0.26156717448261413, "grad_norm": 0.37494733929634094, "learning_rate": 0.00014771702257632672, "loss": 1.5302, "step": 20129 }, { "epoch": 0.26158016902653, "grad_norm": 0.33371207118034363, "learning_rate": 0.00014771442311441532, "loss": 1.4883, "step": 20130 }, { "epoch": 0.2615931635704459, "grad_norm": 0.416979044675827, "learning_rate": 0.00014771182365250394, "loss": 1.2736, "step": 20131 }, { "epoch": 0.2616061581143617, "grad_norm": 0.4367213845252991, "learning_rate": 0.00014770922419059254, "loss": 1.4821, "step": 20132 }, { "epoch": 0.2616191526582776, "grad_norm": 0.4128398597240448, "learning_rate": 0.00014770662472868117, "loss": 1.6699, "step": 20133 }, { "epoch": 0.26163214720219347, "grad_norm": 0.3529261648654938, "learning_rate": 0.0001477040252667698, "loss": 1.3901, "step": 20134 }, { "epoch": 0.26164514174610937, "grad_norm": 0.4329010844230652, "learning_rate": 0.0001477014258048584, "loss": 1.4838, "step": 20135 }, { "epoch": 0.2616581362900252, "grad_norm": 0.46771135926246643, "learning_rate": 0.00014769882634294704, "loss": 1.4736, "step": 20136 }, { "epoch": 0.2616711308339411, "grad_norm": 0.30786871910095215, "learning_rate": 0.00014769622688103564, "loss": 1.2045, "step": 20137 }, { "epoch": 0.26168412537785696, "grad_norm": 0.3756197988986969, "learning_rate": 0.00014769362741912426, "loss": 1.3288, "step": 20138 }, { "epoch": 0.26169711992177286, "grad_norm": 0.4581568241119385, "learning_rate": 0.00014769102795721286, "loss": 1.4416, "step": 20139 }, { "epoch": 0.2617101144656887, "grad_norm": 0.37867748737335205, "learning_rate": 0.00014768842849530148, "loss": 1.4914, "step": 20140 }, { "epoch": 0.2617231090096046, "grad_norm": 0.36453431844711304, "learning_rate": 0.0001476858290333901, "loss": 1.3458, "step": 20141 }, { "epoch": 0.26173610355352045, "grad_norm": 0.4035639762878418, "learning_rate": 0.0001476832295714787, "loss": 1.4142, "step": 20142 }, { "epoch": 0.26174909809743635, "grad_norm": 0.34680575132369995, "learning_rate": 0.00014768063010956733, "loss": 1.4168, "step": 20143 }, { "epoch": 0.2617620926413522, "grad_norm": 0.28573501110076904, "learning_rate": 0.00014767803064765595, "loss": 1.2719, "step": 20144 }, { "epoch": 0.2617750871852681, "grad_norm": 0.42015549540519714, "learning_rate": 0.00014767543118574455, "loss": 1.5674, "step": 20145 }, { "epoch": 0.26178808172918394, "grad_norm": 0.42318591475486755, "learning_rate": 0.00014767283172383318, "loss": 1.3624, "step": 20146 }, { "epoch": 0.26180107627309984, "grad_norm": 0.40999001264572144, "learning_rate": 0.00014767023226192177, "loss": 1.3244, "step": 20147 }, { "epoch": 0.2618140708170157, "grad_norm": 0.42387962341308594, "learning_rate": 0.00014766763280001042, "loss": 1.3569, "step": 20148 }, { "epoch": 0.2618270653609316, "grad_norm": 0.4213663339614868, "learning_rate": 0.00014766503333809902, "loss": 1.2088, "step": 20149 }, { "epoch": 0.26184005990484743, "grad_norm": 0.36051782965660095, "learning_rate": 0.00014766243387618765, "loss": 1.3062, "step": 20150 }, { "epoch": 0.26185305444876333, "grad_norm": 0.37084564566612244, "learning_rate": 0.00014765983441427624, "loss": 1.3655, "step": 20151 }, { "epoch": 0.2618660489926792, "grad_norm": 0.47213733196258545, "learning_rate": 0.00014765723495236487, "loss": 1.6196, "step": 20152 }, { "epoch": 0.2618790435365951, "grad_norm": 0.4904009699821472, "learning_rate": 0.0001476546354904535, "loss": 1.5828, "step": 20153 }, { "epoch": 0.2618920380805109, "grad_norm": 0.3814501464366913, "learning_rate": 0.0001476520360285421, "loss": 1.3349, "step": 20154 }, { "epoch": 0.2619050326244268, "grad_norm": 0.5415825843811035, "learning_rate": 0.00014764943656663071, "loss": 1.4677, "step": 20155 }, { "epoch": 0.26191802716834267, "grad_norm": 0.4704650342464447, "learning_rate": 0.00014764683710471934, "loss": 1.5759, "step": 20156 }, { "epoch": 0.26193102171225857, "grad_norm": 0.3297520875930786, "learning_rate": 0.00014764423764280794, "loss": 1.3964, "step": 20157 }, { "epoch": 0.2619440162561744, "grad_norm": 0.4667484164237976, "learning_rate": 0.00014764163818089656, "loss": 1.4234, "step": 20158 }, { "epoch": 0.2619570108000903, "grad_norm": 0.3856322467327118, "learning_rate": 0.00014763903871898516, "loss": 1.6446, "step": 20159 }, { "epoch": 0.26197000534400616, "grad_norm": 0.3428819179534912, "learning_rate": 0.0001476364392570738, "loss": 1.2616, "step": 20160 }, { "epoch": 0.26198299988792206, "grad_norm": 0.2953803539276123, "learning_rate": 0.0001476338397951624, "loss": 1.0612, "step": 20161 }, { "epoch": 0.2619959944318379, "grad_norm": 0.36012712121009827, "learning_rate": 0.00014763124033325103, "loss": 1.2913, "step": 20162 }, { "epoch": 0.2620089889757538, "grad_norm": 0.3688061833381653, "learning_rate": 0.00014762864087133963, "loss": 1.3258, "step": 20163 }, { "epoch": 0.26202198351966965, "grad_norm": 0.4010842740535736, "learning_rate": 0.00014762604140942825, "loss": 1.4237, "step": 20164 }, { "epoch": 0.26203497806358556, "grad_norm": 0.33622780442237854, "learning_rate": 0.00014762344194751688, "loss": 1.3618, "step": 20165 }, { "epoch": 0.2620479726075014, "grad_norm": 0.43539294600486755, "learning_rate": 0.00014762084248560548, "loss": 1.4673, "step": 20166 }, { "epoch": 0.2620609671514173, "grad_norm": 0.366283118724823, "learning_rate": 0.0001476182430236941, "loss": 1.2359, "step": 20167 }, { "epoch": 0.26207396169533315, "grad_norm": 0.4514642655849457, "learning_rate": 0.00014761564356178272, "loss": 1.4804, "step": 20168 }, { "epoch": 0.26208695623924905, "grad_norm": 0.4827955663204193, "learning_rate": 0.00014761304409987132, "loss": 1.5538, "step": 20169 }, { "epoch": 0.2620999507831649, "grad_norm": 0.4313634932041168, "learning_rate": 0.00014761044463795995, "loss": 1.4015, "step": 20170 }, { "epoch": 0.2621129453270808, "grad_norm": 0.36991316080093384, "learning_rate": 0.00014760784517604857, "loss": 1.2878, "step": 20171 }, { "epoch": 0.26212593987099664, "grad_norm": 0.3427586555480957, "learning_rate": 0.0001476052457141372, "loss": 1.3078, "step": 20172 }, { "epoch": 0.26213893441491254, "grad_norm": 0.33271706104278564, "learning_rate": 0.0001476026462522258, "loss": 1.26, "step": 20173 }, { "epoch": 0.26215192895882844, "grad_norm": 0.4862552583217621, "learning_rate": 0.00014760004679031442, "loss": 1.4289, "step": 20174 }, { "epoch": 0.2621649235027443, "grad_norm": 0.35112544894218445, "learning_rate": 0.00014759744732840304, "loss": 1.2879, "step": 20175 }, { "epoch": 0.2621779180466602, "grad_norm": 0.41724705696105957, "learning_rate": 0.00014759484786649164, "loss": 1.4041, "step": 20176 }, { "epoch": 0.26219091259057603, "grad_norm": 0.43246138095855713, "learning_rate": 0.00014759224840458026, "loss": 1.3086, "step": 20177 }, { "epoch": 0.26220390713449193, "grad_norm": 0.3994060456752777, "learning_rate": 0.00014758964894266886, "loss": 1.5876, "step": 20178 }, { "epoch": 0.2622169016784078, "grad_norm": 0.37874072790145874, "learning_rate": 0.0001475870494807575, "loss": 1.2841, "step": 20179 }, { "epoch": 0.2622298962223237, "grad_norm": 0.41393381357192993, "learning_rate": 0.0001475844500188461, "loss": 1.4817, "step": 20180 }, { "epoch": 0.2622428907662395, "grad_norm": 0.4610188603401184, "learning_rate": 0.0001475818505569347, "loss": 1.5493, "step": 20181 }, { "epoch": 0.2622558853101554, "grad_norm": 0.45889797806739807, "learning_rate": 0.00014757925109502333, "loss": 1.5041, "step": 20182 }, { "epoch": 0.26226887985407127, "grad_norm": 0.32614219188690186, "learning_rate": 0.00014757665163311196, "loss": 1.2483, "step": 20183 }, { "epoch": 0.26228187439798717, "grad_norm": 0.3387735188007355, "learning_rate": 0.00014757405217120058, "loss": 1.4668, "step": 20184 }, { "epoch": 0.262294868941903, "grad_norm": 0.3283379375934601, "learning_rate": 0.00014757145270928918, "loss": 1.2671, "step": 20185 }, { "epoch": 0.2623078634858189, "grad_norm": 0.3363078236579895, "learning_rate": 0.0001475688532473778, "loss": 1.4534, "step": 20186 }, { "epoch": 0.26232085802973476, "grad_norm": 0.4706505835056305, "learning_rate": 0.00014756625378546643, "loss": 1.4774, "step": 20187 }, { "epoch": 0.26233385257365066, "grad_norm": 0.3641832172870636, "learning_rate": 0.00014756365432355502, "loss": 1.3404, "step": 20188 }, { "epoch": 0.2623468471175665, "grad_norm": 0.38552185893058777, "learning_rate": 0.00014756105486164365, "loss": 1.4825, "step": 20189 }, { "epoch": 0.2623598416614824, "grad_norm": 0.4629080295562744, "learning_rate": 0.00014755845539973225, "loss": 1.3297, "step": 20190 }, { "epoch": 0.26237283620539825, "grad_norm": 0.449511855840683, "learning_rate": 0.0001475558559378209, "loss": 1.388, "step": 20191 }, { "epoch": 0.26238583074931415, "grad_norm": 0.4649542272090912, "learning_rate": 0.0001475532564759095, "loss": 1.3407, "step": 20192 }, { "epoch": 0.26239882529323, "grad_norm": 0.4479967951774597, "learning_rate": 0.0001475506570139981, "loss": 1.3285, "step": 20193 }, { "epoch": 0.2624118198371459, "grad_norm": 0.4342278242111206, "learning_rate": 0.00014754805755208672, "loss": 1.2499, "step": 20194 }, { "epoch": 0.26242481438106174, "grad_norm": 0.4888266921043396, "learning_rate": 0.00014754545809017534, "loss": 1.4193, "step": 20195 }, { "epoch": 0.26243780892497764, "grad_norm": 0.44978824257850647, "learning_rate": 0.00014754285862826397, "loss": 1.2825, "step": 20196 }, { "epoch": 0.2624508034688935, "grad_norm": 0.4346938729286194, "learning_rate": 0.00014754025916635256, "loss": 1.5181, "step": 20197 }, { "epoch": 0.2624637980128094, "grad_norm": 0.390828013420105, "learning_rate": 0.0001475376597044412, "loss": 1.4133, "step": 20198 }, { "epoch": 0.26247679255672524, "grad_norm": 0.3145589828491211, "learning_rate": 0.0001475350602425298, "loss": 1.3667, "step": 20199 }, { "epoch": 0.26248978710064114, "grad_norm": 0.48016080260276794, "learning_rate": 0.0001475324607806184, "loss": 1.5029, "step": 20200 }, { "epoch": 0.262502781644557, "grad_norm": 0.3888293504714966, "learning_rate": 0.00014752986131870703, "loss": 1.5375, "step": 20201 }, { "epoch": 0.2625157761884729, "grad_norm": 0.3966054618358612, "learning_rate": 0.00014752726185679563, "loss": 1.3739, "step": 20202 }, { "epoch": 0.2625287707323887, "grad_norm": 0.44394451379776, "learning_rate": 0.00014752466239488428, "loss": 1.3229, "step": 20203 }, { "epoch": 0.26254176527630463, "grad_norm": 0.45547330379486084, "learning_rate": 0.00014752206293297288, "loss": 1.5428, "step": 20204 }, { "epoch": 0.2625547598202205, "grad_norm": 0.38327598571777344, "learning_rate": 0.0001475194634710615, "loss": 1.3516, "step": 20205 }, { "epoch": 0.2625677543641364, "grad_norm": 0.4890802204608917, "learning_rate": 0.0001475168640091501, "loss": 1.5009, "step": 20206 }, { "epoch": 0.2625807489080522, "grad_norm": 0.388841837644577, "learning_rate": 0.00014751426454723873, "loss": 1.5056, "step": 20207 }, { "epoch": 0.2625937434519681, "grad_norm": 0.3530994951725006, "learning_rate": 0.00014751166508532735, "loss": 1.4598, "step": 20208 }, { "epoch": 0.26260673799588397, "grad_norm": 0.39420780539512634, "learning_rate": 0.00014750906562341595, "loss": 1.3406, "step": 20209 }, { "epoch": 0.26261973253979987, "grad_norm": 0.4644569158554077, "learning_rate": 0.00014750646616150457, "loss": 1.6051, "step": 20210 }, { "epoch": 0.2626327270837157, "grad_norm": 0.38759228587150574, "learning_rate": 0.0001475038666995932, "loss": 1.3615, "step": 20211 }, { "epoch": 0.2626457216276316, "grad_norm": 0.4613949656486511, "learning_rate": 0.0001475012672376818, "loss": 1.4428, "step": 20212 }, { "epoch": 0.26265871617154746, "grad_norm": 0.35100141167640686, "learning_rate": 0.00014749866777577042, "loss": 1.3847, "step": 20213 }, { "epoch": 0.26267171071546336, "grad_norm": 0.3811013698577881, "learning_rate": 0.00014749606831385904, "loss": 1.3349, "step": 20214 }, { "epoch": 0.2626847052593792, "grad_norm": 0.45175135135650635, "learning_rate": 0.00014749346885194767, "loss": 1.5312, "step": 20215 }, { "epoch": 0.2626976998032951, "grad_norm": 0.41111159324645996, "learning_rate": 0.00014749086939003627, "loss": 1.4716, "step": 20216 }, { "epoch": 0.26271069434721095, "grad_norm": 0.37609606981277466, "learning_rate": 0.0001474882699281249, "loss": 1.3412, "step": 20217 }, { "epoch": 0.26272368889112685, "grad_norm": 0.46760720014572144, "learning_rate": 0.00014748567046621351, "loss": 1.4216, "step": 20218 }, { "epoch": 0.2627366834350427, "grad_norm": 0.42550939321517944, "learning_rate": 0.0001474830710043021, "loss": 1.2957, "step": 20219 }, { "epoch": 0.2627496779789586, "grad_norm": 0.4149263799190521, "learning_rate": 0.00014748047154239074, "loss": 1.5043, "step": 20220 }, { "epoch": 0.26276267252287444, "grad_norm": 0.36996227502822876, "learning_rate": 0.00014747787208047933, "loss": 1.398, "step": 20221 }, { "epoch": 0.26277566706679034, "grad_norm": 0.46956267952919006, "learning_rate": 0.00014747527261856799, "loss": 1.4203, "step": 20222 }, { "epoch": 0.2627886616107062, "grad_norm": 0.45764175057411194, "learning_rate": 0.00014747267315665658, "loss": 1.4969, "step": 20223 }, { "epoch": 0.2628016561546221, "grad_norm": 0.4426584541797638, "learning_rate": 0.00014747007369474518, "loss": 1.4054, "step": 20224 }, { "epoch": 0.26281465069853793, "grad_norm": 0.32432153820991516, "learning_rate": 0.0001474674742328338, "loss": 1.395, "step": 20225 }, { "epoch": 0.26282764524245383, "grad_norm": 0.3240564465522766, "learning_rate": 0.00014746487477092243, "loss": 1.3175, "step": 20226 }, { "epoch": 0.2628406397863697, "grad_norm": 0.4357198476791382, "learning_rate": 0.00014746227530901105, "loss": 1.4547, "step": 20227 }, { "epoch": 0.2628536343302856, "grad_norm": 0.33011049032211304, "learning_rate": 0.00014745967584709965, "loss": 1.5637, "step": 20228 }, { "epoch": 0.2628666288742014, "grad_norm": 0.5391556024551392, "learning_rate": 0.00014745707638518828, "loss": 1.7016, "step": 20229 }, { "epoch": 0.2628796234181173, "grad_norm": 0.49176377058029175, "learning_rate": 0.0001474544769232769, "loss": 1.5119, "step": 20230 }, { "epoch": 0.26289261796203317, "grad_norm": 0.35366764664649963, "learning_rate": 0.0001474518774613655, "loss": 1.343, "step": 20231 }, { "epoch": 0.26290561250594907, "grad_norm": 0.34848880767822266, "learning_rate": 0.00014744927799945412, "loss": 1.2315, "step": 20232 }, { "epoch": 0.2629186070498649, "grad_norm": 0.30918940901756287, "learning_rate": 0.00014744667853754272, "loss": 1.2837, "step": 20233 }, { "epoch": 0.2629316015937808, "grad_norm": 0.3803653419017792, "learning_rate": 0.00014744407907563137, "loss": 1.401, "step": 20234 }, { "epoch": 0.26294459613769666, "grad_norm": 0.39797085523605347, "learning_rate": 0.00014744147961371997, "loss": 1.3792, "step": 20235 }, { "epoch": 0.26295759068161256, "grad_norm": 0.48326244950294495, "learning_rate": 0.00014743888015180857, "loss": 1.4719, "step": 20236 }, { "epoch": 0.2629705852255284, "grad_norm": 0.38805291056632996, "learning_rate": 0.0001474362806898972, "loss": 1.4317, "step": 20237 }, { "epoch": 0.2629835797694443, "grad_norm": 0.40799880027770996, "learning_rate": 0.00014743368122798581, "loss": 1.5228, "step": 20238 }, { "epoch": 0.26299657431336015, "grad_norm": 0.44866764545440674, "learning_rate": 0.00014743108176607444, "loss": 1.554, "step": 20239 }, { "epoch": 0.26300956885727605, "grad_norm": 0.4495713710784912, "learning_rate": 0.00014742848230416304, "loss": 1.4282, "step": 20240 }, { "epoch": 0.2630225634011919, "grad_norm": 0.42010048031806946, "learning_rate": 0.00014742588284225166, "loss": 1.5962, "step": 20241 }, { "epoch": 0.2630355579451078, "grad_norm": 0.5566220283508301, "learning_rate": 0.00014742328338034029, "loss": 1.5394, "step": 20242 }, { "epoch": 0.26304855248902365, "grad_norm": 0.4417506754398346, "learning_rate": 0.00014742068391842888, "loss": 1.6033, "step": 20243 }, { "epoch": 0.26306154703293955, "grad_norm": 0.481902152299881, "learning_rate": 0.0001474180844565175, "loss": 1.4638, "step": 20244 }, { "epoch": 0.2630745415768554, "grad_norm": 0.48447972536087036, "learning_rate": 0.00014741548499460613, "loss": 1.51, "step": 20245 }, { "epoch": 0.2630875361207713, "grad_norm": 0.4679826498031616, "learning_rate": 0.00014741288553269476, "loss": 1.4538, "step": 20246 }, { "epoch": 0.26310053066468714, "grad_norm": 0.4882189631462097, "learning_rate": 0.00014741028607078335, "loss": 1.4387, "step": 20247 }, { "epoch": 0.26311352520860304, "grad_norm": 0.43295976519584656, "learning_rate": 0.00014740768660887195, "loss": 1.3978, "step": 20248 }, { "epoch": 0.2631265197525189, "grad_norm": 0.34668004512786865, "learning_rate": 0.0001474050871469606, "loss": 1.3928, "step": 20249 }, { "epoch": 0.2631395142964348, "grad_norm": 0.410810649394989, "learning_rate": 0.0001474024876850492, "loss": 1.4836, "step": 20250 }, { "epoch": 0.2631525088403507, "grad_norm": 0.4092983305454254, "learning_rate": 0.00014739988822313782, "loss": 1.421, "step": 20251 }, { "epoch": 0.26316550338426653, "grad_norm": 0.4536350667476654, "learning_rate": 0.00014739728876122642, "loss": 1.3137, "step": 20252 }, { "epoch": 0.26317849792818243, "grad_norm": 0.4732278883457184, "learning_rate": 0.00014739468929931505, "loss": 1.3703, "step": 20253 }, { "epoch": 0.2631914924720983, "grad_norm": 0.36400094628334045, "learning_rate": 0.00014739208983740367, "loss": 1.4922, "step": 20254 }, { "epoch": 0.2632044870160142, "grad_norm": 0.4370715320110321, "learning_rate": 0.00014738949037549227, "loss": 1.4546, "step": 20255 }, { "epoch": 0.26321748155993, "grad_norm": 0.406096488237381, "learning_rate": 0.0001473868909135809, "loss": 1.4973, "step": 20256 }, { "epoch": 0.2632304761038459, "grad_norm": 0.33333998918533325, "learning_rate": 0.00014738429145166952, "loss": 1.5353, "step": 20257 }, { "epoch": 0.26324347064776177, "grad_norm": 0.39879703521728516, "learning_rate": 0.00014738169198975814, "loss": 1.5031, "step": 20258 }, { "epoch": 0.26325646519167767, "grad_norm": 0.41050270199775696, "learning_rate": 0.00014737909252784674, "loss": 1.3392, "step": 20259 }, { "epoch": 0.2632694597355935, "grad_norm": 0.4123345613479614, "learning_rate": 0.00014737649306593536, "loss": 1.3392, "step": 20260 }, { "epoch": 0.2632824542795094, "grad_norm": 0.3925626873970032, "learning_rate": 0.000147373893604024, "loss": 1.5845, "step": 20261 }, { "epoch": 0.26329544882342526, "grad_norm": 0.40224093198776245, "learning_rate": 0.00014737129414211259, "loss": 1.4397, "step": 20262 }, { "epoch": 0.26330844336734116, "grad_norm": 0.5149607062339783, "learning_rate": 0.0001473686946802012, "loss": 1.2604, "step": 20263 }, { "epoch": 0.263321437911257, "grad_norm": 0.47094354033470154, "learning_rate": 0.0001473660952182898, "loss": 1.5489, "step": 20264 }, { "epoch": 0.2633344324551729, "grad_norm": 0.434526264667511, "learning_rate": 0.00014736349575637843, "loss": 1.383, "step": 20265 }, { "epoch": 0.26334742699908875, "grad_norm": 0.35797902941703796, "learning_rate": 0.00014736089629446706, "loss": 1.2575, "step": 20266 }, { "epoch": 0.26336042154300465, "grad_norm": 0.4485943913459778, "learning_rate": 0.00014735829683255565, "loss": 1.3066, "step": 20267 }, { "epoch": 0.2633734160869205, "grad_norm": 0.3717271387577057, "learning_rate": 0.00014735569737064428, "loss": 1.4803, "step": 20268 }, { "epoch": 0.2633864106308364, "grad_norm": 0.46604233980178833, "learning_rate": 0.0001473530979087329, "loss": 1.4642, "step": 20269 }, { "epoch": 0.26339940517475224, "grad_norm": 0.430742472410202, "learning_rate": 0.00014735049844682153, "loss": 1.4613, "step": 20270 }, { "epoch": 0.26341239971866814, "grad_norm": 0.4645368456840515, "learning_rate": 0.00014734789898491012, "loss": 1.4686, "step": 20271 }, { "epoch": 0.263425394262584, "grad_norm": 0.3445025086402893, "learning_rate": 0.00014734529952299875, "loss": 1.2804, "step": 20272 }, { "epoch": 0.2634383888064999, "grad_norm": 0.3552330434322357, "learning_rate": 0.00014734270006108737, "loss": 1.3502, "step": 20273 }, { "epoch": 0.26345138335041574, "grad_norm": 0.4548403322696686, "learning_rate": 0.00014734010059917597, "loss": 1.5107, "step": 20274 }, { "epoch": 0.26346437789433164, "grad_norm": 0.4379022717475891, "learning_rate": 0.0001473375011372646, "loss": 1.5556, "step": 20275 }, { "epoch": 0.2634773724382475, "grad_norm": 0.38250961899757385, "learning_rate": 0.0001473349016753532, "loss": 1.5204, "step": 20276 }, { "epoch": 0.2634903669821634, "grad_norm": 0.38495001196861267, "learning_rate": 0.00014733230221344182, "loss": 1.404, "step": 20277 }, { "epoch": 0.2635033615260792, "grad_norm": 0.41897985339164734, "learning_rate": 0.00014732970275153044, "loss": 1.5264, "step": 20278 }, { "epoch": 0.26351635606999513, "grad_norm": 0.3831501305103302, "learning_rate": 0.00014732710328961904, "loss": 1.4505, "step": 20279 }, { "epoch": 0.263529350613911, "grad_norm": 0.33679720759391785, "learning_rate": 0.00014732450382770766, "loss": 1.415, "step": 20280 }, { "epoch": 0.2635423451578269, "grad_norm": 0.4497471749782562, "learning_rate": 0.0001473219043657963, "loss": 1.4677, "step": 20281 }, { "epoch": 0.2635553397017427, "grad_norm": 0.45883193612098694, "learning_rate": 0.0001473193049038849, "loss": 1.4142, "step": 20282 }, { "epoch": 0.2635683342456586, "grad_norm": 0.35476186871528625, "learning_rate": 0.0001473167054419735, "loss": 1.2586, "step": 20283 }, { "epoch": 0.26358132878957446, "grad_norm": 0.47246623039245605, "learning_rate": 0.00014731410598006213, "loss": 1.2574, "step": 20284 }, { "epoch": 0.26359432333349037, "grad_norm": 0.3894566297531128, "learning_rate": 0.00014731150651815076, "loss": 1.452, "step": 20285 }, { "epoch": 0.2636073178774062, "grad_norm": 0.4148367643356323, "learning_rate": 0.00014730890705623936, "loss": 1.4957, "step": 20286 }, { "epoch": 0.2636203124213221, "grad_norm": 0.42245736718177795, "learning_rate": 0.00014730630759432798, "loss": 1.5777, "step": 20287 }, { "epoch": 0.26363330696523796, "grad_norm": 0.4743061363697052, "learning_rate": 0.0001473037081324166, "loss": 1.4748, "step": 20288 }, { "epoch": 0.26364630150915386, "grad_norm": 0.39008021354675293, "learning_rate": 0.00014730110867050523, "loss": 1.5946, "step": 20289 }, { "epoch": 0.2636592960530697, "grad_norm": 0.4633859395980835, "learning_rate": 0.00014729850920859383, "loss": 1.4356, "step": 20290 }, { "epoch": 0.2636722905969856, "grad_norm": 0.4043808579444885, "learning_rate": 0.00014729590974668242, "loss": 1.3425, "step": 20291 }, { "epoch": 0.26368528514090145, "grad_norm": 0.4194539785385132, "learning_rate": 0.00014729331028477108, "loss": 1.2348, "step": 20292 }, { "epoch": 0.26369827968481735, "grad_norm": 0.3785662353038788, "learning_rate": 0.00014729071082285967, "loss": 1.402, "step": 20293 }, { "epoch": 0.2637112742287332, "grad_norm": 0.45317962765693665, "learning_rate": 0.0001472881113609483, "loss": 1.4497, "step": 20294 }, { "epoch": 0.2637242687726491, "grad_norm": 0.40188026428222656, "learning_rate": 0.0001472855118990369, "loss": 1.3854, "step": 20295 }, { "epoch": 0.26373726331656494, "grad_norm": 0.39926695823669434, "learning_rate": 0.00014728291243712552, "loss": 1.4611, "step": 20296 }, { "epoch": 0.26375025786048084, "grad_norm": 0.4087277054786682, "learning_rate": 0.00014728031297521414, "loss": 1.5526, "step": 20297 }, { "epoch": 0.2637632524043967, "grad_norm": 0.3697447180747986, "learning_rate": 0.00014727771351330274, "loss": 1.3993, "step": 20298 }, { "epoch": 0.2637762469483126, "grad_norm": 0.5567469000816345, "learning_rate": 0.00014727511405139137, "loss": 1.5175, "step": 20299 }, { "epoch": 0.26378924149222843, "grad_norm": 0.397339403629303, "learning_rate": 0.00014727251458948, "loss": 1.4167, "step": 20300 }, { "epoch": 0.26380223603614433, "grad_norm": 0.4239199161529541, "learning_rate": 0.00014726991512756862, "loss": 1.611, "step": 20301 }, { "epoch": 0.2638152305800602, "grad_norm": 0.3901256322860718, "learning_rate": 0.0001472673156656572, "loss": 1.4279, "step": 20302 }, { "epoch": 0.2638282251239761, "grad_norm": 0.4490596055984497, "learning_rate": 0.0001472647162037458, "loss": 1.4353, "step": 20303 }, { "epoch": 0.2638412196678919, "grad_norm": 0.4311522841453552, "learning_rate": 0.00014726211674183446, "loss": 1.4894, "step": 20304 }, { "epoch": 0.2638542142118078, "grad_norm": 0.376436710357666, "learning_rate": 0.00014725951727992306, "loss": 1.4257, "step": 20305 }, { "epoch": 0.26386720875572367, "grad_norm": 0.35798490047454834, "learning_rate": 0.00014725691781801168, "loss": 1.5572, "step": 20306 }, { "epoch": 0.26388020329963957, "grad_norm": 0.4611857533454895, "learning_rate": 0.00014725431835610028, "loss": 1.5312, "step": 20307 }, { "epoch": 0.2638931978435554, "grad_norm": 0.3669566512107849, "learning_rate": 0.0001472517188941889, "loss": 1.2343, "step": 20308 }, { "epoch": 0.2639061923874713, "grad_norm": 0.45506301522254944, "learning_rate": 0.00014724911943227753, "loss": 1.5609, "step": 20309 }, { "epoch": 0.26391918693138716, "grad_norm": 0.4446612596511841, "learning_rate": 0.00014724651997036613, "loss": 1.455, "step": 20310 }, { "epoch": 0.26393218147530306, "grad_norm": 0.3697642683982849, "learning_rate": 0.00014724392050845475, "loss": 1.2827, "step": 20311 }, { "epoch": 0.2639451760192189, "grad_norm": 0.3904421329498291, "learning_rate": 0.00014724132104654338, "loss": 1.5397, "step": 20312 }, { "epoch": 0.2639581705631348, "grad_norm": 0.4419945478439331, "learning_rate": 0.000147238721584632, "loss": 1.5727, "step": 20313 }, { "epoch": 0.26397116510705065, "grad_norm": 0.35768091678619385, "learning_rate": 0.0001472361221227206, "loss": 1.4451, "step": 20314 }, { "epoch": 0.26398415965096655, "grad_norm": 0.34864622354507446, "learning_rate": 0.0001472335226608092, "loss": 1.4107, "step": 20315 }, { "epoch": 0.2639971541948824, "grad_norm": 0.36060312390327454, "learning_rate": 0.00014723092319889785, "loss": 1.3151, "step": 20316 }, { "epoch": 0.2640101487387983, "grad_norm": 0.39931216835975647, "learning_rate": 0.00014722832373698644, "loss": 1.4274, "step": 20317 }, { "epoch": 0.26402314328271415, "grad_norm": 0.31003302335739136, "learning_rate": 0.00014722572427507507, "loss": 1.2604, "step": 20318 }, { "epoch": 0.26403613782663005, "grad_norm": 0.3718045651912689, "learning_rate": 0.0001472231248131637, "loss": 1.6029, "step": 20319 }, { "epoch": 0.2640491323705459, "grad_norm": 0.41842931509017944, "learning_rate": 0.0001472205253512523, "loss": 1.3846, "step": 20320 }, { "epoch": 0.2640621269144618, "grad_norm": 0.33567166328430176, "learning_rate": 0.00014721792588934092, "loss": 1.2919, "step": 20321 }, { "epoch": 0.26407512145837764, "grad_norm": 0.3347376883029938, "learning_rate": 0.0001472153264274295, "loss": 1.3849, "step": 20322 }, { "epoch": 0.26408811600229354, "grad_norm": 0.516158938407898, "learning_rate": 0.00014721272696551816, "loss": 1.4817, "step": 20323 }, { "epoch": 0.2641011105462094, "grad_norm": 0.4297718405723572, "learning_rate": 0.00014721012750360676, "loss": 1.5191, "step": 20324 }, { "epoch": 0.2641141050901253, "grad_norm": 0.4083881676197052, "learning_rate": 0.00014720752804169539, "loss": 1.4355, "step": 20325 }, { "epoch": 0.26412709963404113, "grad_norm": 0.4268642067909241, "learning_rate": 0.00014720492857978398, "loss": 1.4211, "step": 20326 }, { "epoch": 0.26414009417795703, "grad_norm": 0.4757986068725586, "learning_rate": 0.0001472023291178726, "loss": 1.4262, "step": 20327 }, { "epoch": 0.26415308872187293, "grad_norm": 0.3787767291069031, "learning_rate": 0.00014719972965596123, "loss": 1.5379, "step": 20328 }, { "epoch": 0.2641660832657888, "grad_norm": 0.37189981341362, "learning_rate": 0.00014719713019404983, "loss": 1.3065, "step": 20329 }, { "epoch": 0.2641790778097047, "grad_norm": 0.32237106561660767, "learning_rate": 0.00014719453073213845, "loss": 1.1731, "step": 20330 }, { "epoch": 0.2641920723536205, "grad_norm": 0.5207783579826355, "learning_rate": 0.00014719193127022708, "loss": 1.2849, "step": 20331 }, { "epoch": 0.2642050668975364, "grad_norm": 0.41006970405578613, "learning_rate": 0.00014718933180831568, "loss": 1.4631, "step": 20332 }, { "epoch": 0.26421806144145227, "grad_norm": 0.44240331649780273, "learning_rate": 0.0001471867323464043, "loss": 1.2657, "step": 20333 }, { "epoch": 0.26423105598536817, "grad_norm": 0.41540876030921936, "learning_rate": 0.0001471841328844929, "loss": 1.5526, "step": 20334 }, { "epoch": 0.264244050529284, "grad_norm": 0.37062546610832214, "learning_rate": 0.00014718153342258155, "loss": 1.3855, "step": 20335 }, { "epoch": 0.2642570450731999, "grad_norm": 0.3392045497894287, "learning_rate": 0.00014717893396067015, "loss": 1.5928, "step": 20336 }, { "epoch": 0.26427003961711576, "grad_norm": 0.3722420036792755, "learning_rate": 0.00014717633449875877, "loss": 1.4956, "step": 20337 }, { "epoch": 0.26428303416103166, "grad_norm": 0.3512881398200989, "learning_rate": 0.00014717373503684737, "loss": 1.3604, "step": 20338 }, { "epoch": 0.2642960287049475, "grad_norm": 0.38898351788520813, "learning_rate": 0.000147171135574936, "loss": 1.3339, "step": 20339 }, { "epoch": 0.2643090232488634, "grad_norm": 0.4500400424003601, "learning_rate": 0.00014716853611302462, "loss": 1.5171, "step": 20340 }, { "epoch": 0.26432201779277925, "grad_norm": 0.4502471387386322, "learning_rate": 0.00014716593665111322, "loss": 1.452, "step": 20341 }, { "epoch": 0.26433501233669515, "grad_norm": 0.30990028381347656, "learning_rate": 0.00014716333718920184, "loss": 1.39, "step": 20342 }, { "epoch": 0.264348006880611, "grad_norm": 0.40244022011756897, "learning_rate": 0.00014716073772729046, "loss": 1.5825, "step": 20343 }, { "epoch": 0.2643610014245269, "grad_norm": 0.45240139961242676, "learning_rate": 0.0001471581382653791, "loss": 1.4573, "step": 20344 }, { "epoch": 0.26437399596844274, "grad_norm": 0.4707461893558502, "learning_rate": 0.00014715553880346769, "loss": 1.4836, "step": 20345 }, { "epoch": 0.26438699051235864, "grad_norm": 0.4370970129966736, "learning_rate": 0.00014715293934155628, "loss": 1.4546, "step": 20346 }, { "epoch": 0.2643999850562745, "grad_norm": 0.4711625874042511, "learning_rate": 0.00014715033987964494, "loss": 1.4243, "step": 20347 }, { "epoch": 0.2644129796001904, "grad_norm": 0.4341016411781311, "learning_rate": 0.00014714774041773353, "loss": 1.5527, "step": 20348 }, { "epoch": 0.26442597414410624, "grad_norm": 0.3351496160030365, "learning_rate": 0.00014714514095582216, "loss": 1.1485, "step": 20349 }, { "epoch": 0.26443896868802214, "grad_norm": 0.4886619746685028, "learning_rate": 0.00014714254149391075, "loss": 1.5007, "step": 20350 }, { "epoch": 0.264451963231938, "grad_norm": 0.47134706377983093, "learning_rate": 0.00014713994203199938, "loss": 1.4468, "step": 20351 }, { "epoch": 0.2644649577758539, "grad_norm": 0.5228691101074219, "learning_rate": 0.000147137342570088, "loss": 1.4265, "step": 20352 }, { "epoch": 0.2644779523197697, "grad_norm": 0.46689245104789734, "learning_rate": 0.0001471347431081766, "loss": 1.312, "step": 20353 }, { "epoch": 0.2644909468636856, "grad_norm": 0.41836151480674744, "learning_rate": 0.00014713214364626523, "loss": 1.4118, "step": 20354 }, { "epoch": 0.2645039414076015, "grad_norm": 0.49114224314689636, "learning_rate": 0.00014712954418435385, "loss": 1.4517, "step": 20355 }, { "epoch": 0.2645169359515174, "grad_norm": 0.49041232466697693, "learning_rate": 0.00014712694472244247, "loss": 1.4667, "step": 20356 }, { "epoch": 0.2645299304954332, "grad_norm": 0.39503321051597595, "learning_rate": 0.00014712434526053107, "loss": 1.4059, "step": 20357 }, { "epoch": 0.2645429250393491, "grad_norm": 0.4683149755001068, "learning_rate": 0.0001471217457986197, "loss": 1.4779, "step": 20358 }, { "epoch": 0.26455591958326496, "grad_norm": 0.402805358171463, "learning_rate": 0.00014711914633670832, "loss": 1.4736, "step": 20359 }, { "epoch": 0.26456891412718087, "grad_norm": 0.369107723236084, "learning_rate": 0.00014711654687479692, "loss": 1.4433, "step": 20360 }, { "epoch": 0.2645819086710967, "grad_norm": 0.4391769766807556, "learning_rate": 0.00014711394741288554, "loss": 1.4178, "step": 20361 }, { "epoch": 0.2645949032150126, "grad_norm": 0.30711233615875244, "learning_rate": 0.00014711134795097417, "loss": 1.4596, "step": 20362 }, { "epoch": 0.26460789775892846, "grad_norm": 0.44645780324935913, "learning_rate": 0.00014710874848906276, "loss": 1.4392, "step": 20363 }, { "epoch": 0.26462089230284436, "grad_norm": 0.38735485076904297, "learning_rate": 0.0001471061490271514, "loss": 1.6671, "step": 20364 }, { "epoch": 0.2646338868467602, "grad_norm": 0.41639265418052673, "learning_rate": 0.00014710354956523999, "loss": 1.3628, "step": 20365 }, { "epoch": 0.2646468813906761, "grad_norm": 0.3761052191257477, "learning_rate": 0.00014710095010332864, "loss": 1.4702, "step": 20366 }, { "epoch": 0.26465987593459195, "grad_norm": 0.388702929019928, "learning_rate": 0.00014709835064141723, "loss": 1.4651, "step": 20367 }, { "epoch": 0.26467287047850785, "grad_norm": 0.3984396457672119, "learning_rate": 0.00014709575117950586, "loss": 1.3737, "step": 20368 }, { "epoch": 0.2646858650224237, "grad_norm": 0.3564522862434387, "learning_rate": 0.00014709315171759446, "loss": 1.4, "step": 20369 }, { "epoch": 0.2646988595663396, "grad_norm": 0.394505113363266, "learning_rate": 0.00014709055225568308, "loss": 1.2504, "step": 20370 }, { "epoch": 0.26471185411025544, "grad_norm": 0.43178120255470276, "learning_rate": 0.0001470879527937717, "loss": 1.3292, "step": 20371 }, { "epoch": 0.26472484865417134, "grad_norm": 0.40451210737228394, "learning_rate": 0.0001470853533318603, "loss": 1.3094, "step": 20372 }, { "epoch": 0.2647378431980872, "grad_norm": 0.5249249339103699, "learning_rate": 0.00014708275386994893, "loss": 1.3884, "step": 20373 }, { "epoch": 0.2647508377420031, "grad_norm": 0.3677501976490021, "learning_rate": 0.00014708015440803755, "loss": 1.5359, "step": 20374 }, { "epoch": 0.26476383228591893, "grad_norm": 0.412564218044281, "learning_rate": 0.00014707755494612615, "loss": 1.5126, "step": 20375 }, { "epoch": 0.26477682682983483, "grad_norm": 0.36126774549484253, "learning_rate": 0.00014707495548421477, "loss": 1.4725, "step": 20376 }, { "epoch": 0.2647898213737507, "grad_norm": 0.3468768298625946, "learning_rate": 0.00014707235602230337, "loss": 1.428, "step": 20377 }, { "epoch": 0.2648028159176666, "grad_norm": 0.4005556106567383, "learning_rate": 0.00014706975656039202, "loss": 1.4203, "step": 20378 }, { "epoch": 0.2648158104615824, "grad_norm": 0.41145312786102295, "learning_rate": 0.00014706715709848062, "loss": 1.3495, "step": 20379 }, { "epoch": 0.2648288050054983, "grad_norm": 0.40121734142303467, "learning_rate": 0.00014706455763656924, "loss": 1.4096, "step": 20380 }, { "epoch": 0.26484179954941417, "grad_norm": 0.3555337190628052, "learning_rate": 0.00014706195817465784, "loss": 1.519, "step": 20381 }, { "epoch": 0.26485479409333007, "grad_norm": 0.42210060358047485, "learning_rate": 0.00014705935871274647, "loss": 1.6939, "step": 20382 }, { "epoch": 0.2648677886372459, "grad_norm": 0.40848425030708313, "learning_rate": 0.0001470567592508351, "loss": 1.5978, "step": 20383 }, { "epoch": 0.2648807831811618, "grad_norm": 0.4455949068069458, "learning_rate": 0.0001470541597889237, "loss": 1.4914, "step": 20384 }, { "epoch": 0.26489377772507766, "grad_norm": 0.3725610077381134, "learning_rate": 0.0001470515603270123, "loss": 1.4107, "step": 20385 }, { "epoch": 0.26490677226899356, "grad_norm": 0.4815816581249237, "learning_rate": 0.00014704896086510094, "loss": 1.555, "step": 20386 }, { "epoch": 0.2649197668129094, "grad_norm": 0.3977145850658417, "learning_rate": 0.00014704636140318953, "loss": 1.53, "step": 20387 }, { "epoch": 0.2649327613568253, "grad_norm": 0.4359447956085205, "learning_rate": 0.00014704376194127816, "loss": 1.4012, "step": 20388 }, { "epoch": 0.26494575590074115, "grad_norm": 0.40985336899757385, "learning_rate": 0.00014704116247936676, "loss": 1.4377, "step": 20389 }, { "epoch": 0.26495875044465705, "grad_norm": 0.4714990556240082, "learning_rate": 0.0001470385630174554, "loss": 1.5874, "step": 20390 }, { "epoch": 0.2649717449885729, "grad_norm": 0.4139081537723541, "learning_rate": 0.000147035963555544, "loss": 1.4625, "step": 20391 }, { "epoch": 0.2649847395324888, "grad_norm": 0.37376633286476135, "learning_rate": 0.00014703336409363263, "loss": 1.3662, "step": 20392 }, { "epoch": 0.26499773407640465, "grad_norm": 0.4207388758659363, "learning_rate": 0.00014703076463172125, "loss": 1.506, "step": 20393 }, { "epoch": 0.26501072862032055, "grad_norm": 0.49392813444137573, "learning_rate": 0.00014702816516980985, "loss": 1.3898, "step": 20394 }, { "epoch": 0.2650237231642364, "grad_norm": 0.36611130833625793, "learning_rate": 0.00014702556570789848, "loss": 1.35, "step": 20395 }, { "epoch": 0.2650367177081523, "grad_norm": 0.42950478196144104, "learning_rate": 0.00014702296624598707, "loss": 1.2796, "step": 20396 }, { "epoch": 0.26504971225206814, "grad_norm": 0.21927288174629211, "learning_rate": 0.00014702036678407573, "loss": 1.1613, "step": 20397 }, { "epoch": 0.26506270679598404, "grad_norm": 0.35888615250587463, "learning_rate": 0.00014701776732216432, "loss": 1.7731, "step": 20398 }, { "epoch": 0.2650757013398999, "grad_norm": 0.3721853196620941, "learning_rate": 0.00014701516786025292, "loss": 1.321, "step": 20399 }, { "epoch": 0.2650886958838158, "grad_norm": 0.38958534598350525, "learning_rate": 0.00014701256839834154, "loss": 1.5856, "step": 20400 }, { "epoch": 0.26510169042773163, "grad_norm": 0.36468446254730225, "learning_rate": 0.00014700996893643017, "loss": 1.3031, "step": 20401 }, { "epoch": 0.26511468497164753, "grad_norm": 0.437661349773407, "learning_rate": 0.0001470073694745188, "loss": 1.4715, "step": 20402 }, { "epoch": 0.26512767951556343, "grad_norm": 0.31876251101493835, "learning_rate": 0.0001470047700126074, "loss": 1.1394, "step": 20403 }, { "epoch": 0.2651406740594793, "grad_norm": 0.38678017258644104, "learning_rate": 0.00014700217055069602, "loss": 1.4323, "step": 20404 }, { "epoch": 0.2651536686033952, "grad_norm": 0.3743257522583008, "learning_rate": 0.00014699957108878464, "loss": 1.7135, "step": 20405 }, { "epoch": 0.265166663147311, "grad_norm": 0.433645635843277, "learning_rate": 0.00014699697162687324, "loss": 1.5107, "step": 20406 }, { "epoch": 0.2651796576912269, "grad_norm": 0.4228347837924957, "learning_rate": 0.00014699437216496186, "loss": 1.609, "step": 20407 }, { "epoch": 0.26519265223514277, "grad_norm": 0.3998931646347046, "learning_rate": 0.00014699177270305046, "loss": 1.4465, "step": 20408 }, { "epoch": 0.26520564677905867, "grad_norm": 0.4201618731021881, "learning_rate": 0.0001469891732411391, "loss": 1.4981, "step": 20409 }, { "epoch": 0.2652186413229745, "grad_norm": 0.36882930994033813, "learning_rate": 0.0001469865737792277, "loss": 1.1966, "step": 20410 }, { "epoch": 0.2652316358668904, "grad_norm": 0.23093043267726898, "learning_rate": 0.00014698397431731633, "loss": 1.2175, "step": 20411 }, { "epoch": 0.26524463041080626, "grad_norm": 0.4628934860229492, "learning_rate": 0.00014698137485540493, "loss": 1.4904, "step": 20412 }, { "epoch": 0.26525762495472216, "grad_norm": 0.4709840714931488, "learning_rate": 0.00014697877539349355, "loss": 1.5158, "step": 20413 }, { "epoch": 0.265270619498638, "grad_norm": 0.4536694884300232, "learning_rate": 0.00014697617593158218, "loss": 1.3396, "step": 20414 }, { "epoch": 0.2652836140425539, "grad_norm": 0.4625324308872223, "learning_rate": 0.00014697357646967078, "loss": 1.4676, "step": 20415 }, { "epoch": 0.26529660858646975, "grad_norm": 0.3581303358078003, "learning_rate": 0.0001469709770077594, "loss": 1.4109, "step": 20416 }, { "epoch": 0.26530960313038565, "grad_norm": 0.4025501012802124, "learning_rate": 0.00014696837754584803, "loss": 1.3674, "step": 20417 }, { "epoch": 0.2653225976743015, "grad_norm": 0.513404369354248, "learning_rate": 0.00014696577808393662, "loss": 1.4814, "step": 20418 }, { "epoch": 0.2653355922182174, "grad_norm": 0.4174301028251648, "learning_rate": 0.00014696317862202525, "loss": 1.4742, "step": 20419 }, { "epoch": 0.26534858676213324, "grad_norm": 0.5026924014091492, "learning_rate": 0.00014696057916011384, "loss": 1.4351, "step": 20420 }, { "epoch": 0.26536158130604914, "grad_norm": 0.4656890034675598, "learning_rate": 0.0001469579796982025, "loss": 1.4062, "step": 20421 }, { "epoch": 0.265374575849965, "grad_norm": 0.37233859300613403, "learning_rate": 0.0001469553802362911, "loss": 1.3051, "step": 20422 }, { "epoch": 0.2653875703938809, "grad_norm": 0.380974143743515, "learning_rate": 0.00014695278077437972, "loss": 1.4349, "step": 20423 }, { "epoch": 0.26540056493779673, "grad_norm": 0.4362393617630005, "learning_rate": 0.00014695018131246832, "loss": 1.3495, "step": 20424 }, { "epoch": 0.26541355948171264, "grad_norm": 0.3156451880931854, "learning_rate": 0.00014694758185055694, "loss": 1.3011, "step": 20425 }, { "epoch": 0.2654265540256285, "grad_norm": 0.4349451959133148, "learning_rate": 0.00014694498238864556, "loss": 1.4841, "step": 20426 }, { "epoch": 0.2654395485695444, "grad_norm": 0.3691495954990387, "learning_rate": 0.00014694238292673416, "loss": 1.351, "step": 20427 }, { "epoch": 0.2654525431134602, "grad_norm": 0.35394132137298584, "learning_rate": 0.00014693978346482279, "loss": 1.4216, "step": 20428 }, { "epoch": 0.2654655376573761, "grad_norm": 0.4816385507583618, "learning_rate": 0.0001469371840029114, "loss": 1.3449, "step": 20429 }, { "epoch": 0.265478532201292, "grad_norm": 0.38406801223754883, "learning_rate": 0.000146934584541, "loss": 1.4388, "step": 20430 }, { "epoch": 0.2654915267452079, "grad_norm": 0.37115398049354553, "learning_rate": 0.00014693198507908863, "loss": 1.2303, "step": 20431 }, { "epoch": 0.2655045212891237, "grad_norm": 0.4292278587818146, "learning_rate": 0.00014692938561717726, "loss": 1.3919, "step": 20432 }, { "epoch": 0.2655175158330396, "grad_norm": 0.4069332778453827, "learning_rate": 0.00014692678615526588, "loss": 1.5804, "step": 20433 }, { "epoch": 0.26553051037695546, "grad_norm": 0.3018932342529297, "learning_rate": 0.00014692418669335448, "loss": 1.3315, "step": 20434 }, { "epoch": 0.26554350492087136, "grad_norm": 0.3571458160877228, "learning_rate": 0.0001469215872314431, "loss": 1.3644, "step": 20435 }, { "epoch": 0.2655564994647872, "grad_norm": 0.4125556945800781, "learning_rate": 0.00014691898776953173, "loss": 1.5809, "step": 20436 }, { "epoch": 0.2655694940087031, "grad_norm": 0.4639059901237488, "learning_rate": 0.00014691638830762033, "loss": 1.4488, "step": 20437 }, { "epoch": 0.26558248855261896, "grad_norm": 0.5177050232887268, "learning_rate": 0.00014691378884570895, "loss": 1.5209, "step": 20438 }, { "epoch": 0.26559548309653486, "grad_norm": 0.3875853419303894, "learning_rate": 0.00014691118938379755, "loss": 1.2416, "step": 20439 }, { "epoch": 0.2656084776404507, "grad_norm": 0.2878570556640625, "learning_rate": 0.0001469085899218862, "loss": 1.3895, "step": 20440 }, { "epoch": 0.2656214721843666, "grad_norm": 0.40805357694625854, "learning_rate": 0.0001469059904599748, "loss": 1.3319, "step": 20441 }, { "epoch": 0.26563446672828245, "grad_norm": 0.28271886706352234, "learning_rate": 0.0001469033909980634, "loss": 1.366, "step": 20442 }, { "epoch": 0.26564746127219835, "grad_norm": 0.3878540098667145, "learning_rate": 0.00014690079153615202, "loss": 1.4347, "step": 20443 }, { "epoch": 0.2656604558161142, "grad_norm": 0.3423001170158386, "learning_rate": 0.00014689819207424064, "loss": 1.5316, "step": 20444 }, { "epoch": 0.2656734503600301, "grad_norm": 0.3997270166873932, "learning_rate": 0.00014689559261232927, "loss": 1.391, "step": 20445 }, { "epoch": 0.26568644490394594, "grad_norm": 0.4809684455394745, "learning_rate": 0.00014689299315041786, "loss": 1.4126, "step": 20446 }, { "epoch": 0.26569943944786184, "grad_norm": 0.4122914969921112, "learning_rate": 0.0001468903936885065, "loss": 1.3593, "step": 20447 }, { "epoch": 0.2657124339917777, "grad_norm": 0.4307091236114502, "learning_rate": 0.0001468877942265951, "loss": 1.4189, "step": 20448 }, { "epoch": 0.2657254285356936, "grad_norm": 0.39407795667648315, "learning_rate": 0.0001468851947646837, "loss": 1.3267, "step": 20449 }, { "epoch": 0.26573842307960943, "grad_norm": 0.3779304623603821, "learning_rate": 0.00014688259530277234, "loss": 1.2678, "step": 20450 }, { "epoch": 0.26575141762352533, "grad_norm": 0.47362807393074036, "learning_rate": 0.00014687999584086093, "loss": 1.4144, "step": 20451 }, { "epoch": 0.2657644121674412, "grad_norm": 0.36937618255615234, "learning_rate": 0.00014687739637894958, "loss": 1.4565, "step": 20452 }, { "epoch": 0.2657774067113571, "grad_norm": 0.42991894483566284, "learning_rate": 0.00014687479691703818, "loss": 1.403, "step": 20453 }, { "epoch": 0.2657904012552729, "grad_norm": 0.5301804542541504, "learning_rate": 0.00014687219745512678, "loss": 1.3587, "step": 20454 }, { "epoch": 0.2658033957991888, "grad_norm": 0.306618332862854, "learning_rate": 0.0001468695979932154, "loss": 1.3203, "step": 20455 }, { "epoch": 0.26581639034310467, "grad_norm": 0.41566163301467896, "learning_rate": 0.00014686699853130403, "loss": 1.2821, "step": 20456 }, { "epoch": 0.26582938488702057, "grad_norm": 0.36690419912338257, "learning_rate": 0.00014686439906939265, "loss": 1.4334, "step": 20457 }, { "epoch": 0.2658423794309364, "grad_norm": 0.49747782945632935, "learning_rate": 0.00014686179960748125, "loss": 1.4742, "step": 20458 }, { "epoch": 0.2658553739748523, "grad_norm": 0.40697649121284485, "learning_rate": 0.00014685920014556987, "loss": 1.281, "step": 20459 }, { "epoch": 0.26586836851876816, "grad_norm": 0.36211541295051575, "learning_rate": 0.0001468566006836585, "loss": 1.4503, "step": 20460 }, { "epoch": 0.26588136306268406, "grad_norm": 0.3339312672615051, "learning_rate": 0.0001468540012217471, "loss": 1.2378, "step": 20461 }, { "epoch": 0.2658943576065999, "grad_norm": 0.31429940462112427, "learning_rate": 0.00014685140175983572, "loss": 1.4091, "step": 20462 }, { "epoch": 0.2659073521505158, "grad_norm": 0.4537610411643982, "learning_rate": 0.00014684880229792432, "loss": 1.5026, "step": 20463 }, { "epoch": 0.26592034669443165, "grad_norm": 0.47489133477211, "learning_rate": 0.00014684620283601297, "loss": 1.3688, "step": 20464 }, { "epoch": 0.26593334123834755, "grad_norm": 0.4213125705718994, "learning_rate": 0.00014684360337410157, "loss": 1.3503, "step": 20465 }, { "epoch": 0.2659463357822634, "grad_norm": 0.46165016293525696, "learning_rate": 0.0001468410039121902, "loss": 1.418, "step": 20466 }, { "epoch": 0.2659593303261793, "grad_norm": 0.3794310688972473, "learning_rate": 0.00014683840445027882, "loss": 1.4213, "step": 20467 }, { "epoch": 0.26597232487009514, "grad_norm": 0.3580165505409241, "learning_rate": 0.0001468358049883674, "loss": 1.4313, "step": 20468 }, { "epoch": 0.26598531941401105, "grad_norm": 0.46021631360054016, "learning_rate": 0.00014683320552645604, "loss": 1.5658, "step": 20469 }, { "epoch": 0.2659983139579269, "grad_norm": 0.40520593523979187, "learning_rate": 0.00014683060606454464, "loss": 1.3848, "step": 20470 }, { "epoch": 0.2660113085018428, "grad_norm": 0.3961764872074127, "learning_rate": 0.00014682800660263326, "loss": 1.5738, "step": 20471 }, { "epoch": 0.26602430304575864, "grad_norm": 0.4557397961616516, "learning_rate": 0.00014682540714072188, "loss": 1.3206, "step": 20472 }, { "epoch": 0.26603729758967454, "grad_norm": 0.4219590425491333, "learning_rate": 0.00014682280767881048, "loss": 1.501, "step": 20473 }, { "epoch": 0.2660502921335904, "grad_norm": 0.42460018396377563, "learning_rate": 0.0001468202082168991, "loss": 1.296, "step": 20474 }, { "epoch": 0.2660632866775063, "grad_norm": 0.35993239283561707, "learning_rate": 0.00014681760875498773, "loss": 1.3469, "step": 20475 }, { "epoch": 0.26607628122142213, "grad_norm": 0.3286518454551697, "learning_rate": 0.00014681500929307636, "loss": 1.3153, "step": 20476 }, { "epoch": 0.26608927576533803, "grad_norm": 0.30503278970718384, "learning_rate": 0.00014681240983116495, "loss": 1.4186, "step": 20477 }, { "epoch": 0.2661022703092539, "grad_norm": 0.4529259204864502, "learning_rate": 0.00014680981036925358, "loss": 1.3746, "step": 20478 }, { "epoch": 0.2661152648531698, "grad_norm": 0.38732701539993286, "learning_rate": 0.0001468072109073422, "loss": 1.3698, "step": 20479 }, { "epoch": 0.2661282593970857, "grad_norm": 0.40228399634361267, "learning_rate": 0.0001468046114454308, "loss": 1.1749, "step": 20480 }, { "epoch": 0.2661412539410015, "grad_norm": 0.4193732440471649, "learning_rate": 0.00014680201198351942, "loss": 1.3749, "step": 20481 }, { "epoch": 0.2661542484849174, "grad_norm": 0.46410825848579407, "learning_rate": 0.00014679941252160802, "loss": 1.3536, "step": 20482 }, { "epoch": 0.26616724302883327, "grad_norm": 0.3774057626724243, "learning_rate": 0.00014679681305969665, "loss": 1.3258, "step": 20483 }, { "epoch": 0.26618023757274917, "grad_norm": 0.3716343343257904, "learning_rate": 0.00014679421359778527, "loss": 1.4639, "step": 20484 }, { "epoch": 0.266193232116665, "grad_norm": 0.3497524857521057, "learning_rate": 0.00014679161413587387, "loss": 1.3988, "step": 20485 }, { "epoch": 0.2662062266605809, "grad_norm": 0.37190037965774536, "learning_rate": 0.0001467890146739625, "loss": 1.415, "step": 20486 }, { "epoch": 0.26621922120449676, "grad_norm": 0.42416632175445557, "learning_rate": 0.00014678641521205112, "loss": 1.4715, "step": 20487 }, { "epoch": 0.26623221574841266, "grad_norm": 0.43514132499694824, "learning_rate": 0.00014678381575013974, "loss": 1.3162, "step": 20488 }, { "epoch": 0.2662452102923285, "grad_norm": 0.43945401906967163, "learning_rate": 0.00014678121628822834, "loss": 1.4666, "step": 20489 }, { "epoch": 0.2662582048362444, "grad_norm": 0.4091470241546631, "learning_rate": 0.00014677861682631696, "loss": 1.5216, "step": 20490 }, { "epoch": 0.26627119938016025, "grad_norm": 0.3419691324234009, "learning_rate": 0.0001467760173644056, "loss": 1.3405, "step": 20491 }, { "epoch": 0.26628419392407615, "grad_norm": 0.378904789686203, "learning_rate": 0.00014677341790249418, "loss": 1.4309, "step": 20492 }, { "epoch": 0.266297188467992, "grad_norm": 0.39966049790382385, "learning_rate": 0.0001467708184405828, "loss": 1.5508, "step": 20493 }, { "epoch": 0.2663101830119079, "grad_norm": 0.3456076979637146, "learning_rate": 0.0001467682189786714, "loss": 1.2997, "step": 20494 }, { "epoch": 0.26632317755582374, "grad_norm": 0.5551380515098572, "learning_rate": 0.00014676561951676006, "loss": 1.6379, "step": 20495 }, { "epoch": 0.26633617209973964, "grad_norm": 0.4086301922798157, "learning_rate": 0.00014676302005484866, "loss": 1.3887, "step": 20496 }, { "epoch": 0.2663491666436555, "grad_norm": 0.5633037686347961, "learning_rate": 0.00014676042059293725, "loss": 1.482, "step": 20497 }, { "epoch": 0.2663621611875714, "grad_norm": 0.4641495645046234, "learning_rate": 0.00014675782113102588, "loss": 1.4203, "step": 20498 }, { "epoch": 0.26637515573148723, "grad_norm": 0.367080956697464, "learning_rate": 0.0001467552216691145, "loss": 1.5495, "step": 20499 }, { "epoch": 0.26638815027540313, "grad_norm": 0.4504353404045105, "learning_rate": 0.00014675262220720313, "loss": 1.3292, "step": 20500 }, { "epoch": 0.266401144819319, "grad_norm": 0.4139452576637268, "learning_rate": 0.00014675002274529172, "loss": 1.4385, "step": 20501 }, { "epoch": 0.2664141393632349, "grad_norm": 0.3960649073123932, "learning_rate": 0.00014674742328338035, "loss": 1.4685, "step": 20502 }, { "epoch": 0.2664271339071507, "grad_norm": 0.4675760269165039, "learning_rate": 0.00014674482382146897, "loss": 1.4818, "step": 20503 }, { "epoch": 0.2664401284510666, "grad_norm": 0.46072453260421753, "learning_rate": 0.00014674222435955757, "loss": 1.3335, "step": 20504 }, { "epoch": 0.26645312299498247, "grad_norm": 0.39284059405326843, "learning_rate": 0.0001467396248976462, "loss": 1.5761, "step": 20505 }, { "epoch": 0.2664661175388984, "grad_norm": 0.2924778163433075, "learning_rate": 0.00014673702543573482, "loss": 1.5178, "step": 20506 }, { "epoch": 0.2664791120828142, "grad_norm": 0.40896594524383545, "learning_rate": 0.00014673442597382344, "loss": 1.4503, "step": 20507 }, { "epoch": 0.2664921066267301, "grad_norm": 0.3824611008167267, "learning_rate": 0.00014673182651191204, "loss": 1.4965, "step": 20508 }, { "epoch": 0.26650510117064596, "grad_norm": 0.4336054027080536, "learning_rate": 0.00014672922705000064, "loss": 1.4405, "step": 20509 }, { "epoch": 0.26651809571456186, "grad_norm": 0.3573705852031708, "learning_rate": 0.0001467266275880893, "loss": 1.3256, "step": 20510 }, { "epoch": 0.2665310902584777, "grad_norm": 0.2696384787559509, "learning_rate": 0.0001467240281261779, "loss": 1.1086, "step": 20511 }, { "epoch": 0.2665440848023936, "grad_norm": 0.4157159924507141, "learning_rate": 0.0001467214286642665, "loss": 1.4728, "step": 20512 }, { "epoch": 0.26655707934630946, "grad_norm": 0.3976568579673767, "learning_rate": 0.0001467188292023551, "loss": 1.3135, "step": 20513 }, { "epoch": 0.26657007389022536, "grad_norm": 0.42020806670188904, "learning_rate": 0.00014671622974044373, "loss": 1.4906, "step": 20514 }, { "epoch": 0.2665830684341412, "grad_norm": 0.48886722326278687, "learning_rate": 0.00014671363027853236, "loss": 1.5235, "step": 20515 }, { "epoch": 0.2665960629780571, "grad_norm": 0.3221413791179657, "learning_rate": 0.00014671103081662095, "loss": 1.2387, "step": 20516 }, { "epoch": 0.26660905752197295, "grad_norm": 0.4440333843231201, "learning_rate": 0.00014670843135470958, "loss": 1.3829, "step": 20517 }, { "epoch": 0.26662205206588885, "grad_norm": 0.2754184901714325, "learning_rate": 0.0001467058318927982, "loss": 1.4253, "step": 20518 }, { "epoch": 0.2666350466098047, "grad_norm": 0.3203902840614319, "learning_rate": 0.00014670323243088683, "loss": 1.2764, "step": 20519 }, { "epoch": 0.2666480411537206, "grad_norm": 0.3556414246559143, "learning_rate": 0.00014670063296897543, "loss": 1.4171, "step": 20520 }, { "epoch": 0.26666103569763644, "grad_norm": 0.4025947153568268, "learning_rate": 0.00014669803350706402, "loss": 1.3654, "step": 20521 }, { "epoch": 0.26667403024155234, "grad_norm": 0.3941269516944885, "learning_rate": 0.00014669543404515267, "loss": 1.5005, "step": 20522 }, { "epoch": 0.2666870247854682, "grad_norm": 0.45404061675071716, "learning_rate": 0.00014669283458324127, "loss": 1.4261, "step": 20523 }, { "epoch": 0.2667000193293841, "grad_norm": 0.3397692143917084, "learning_rate": 0.0001466902351213299, "loss": 1.3216, "step": 20524 }, { "epoch": 0.26671301387329993, "grad_norm": 0.43843889236450195, "learning_rate": 0.0001466876356594185, "loss": 1.4268, "step": 20525 }, { "epoch": 0.26672600841721583, "grad_norm": 0.48928913474082947, "learning_rate": 0.00014668503619750712, "loss": 1.5124, "step": 20526 }, { "epoch": 0.2667390029611317, "grad_norm": 0.3399357199668884, "learning_rate": 0.00014668243673559574, "loss": 1.3637, "step": 20527 }, { "epoch": 0.2667519975050476, "grad_norm": 0.421852171421051, "learning_rate": 0.00014667983727368434, "loss": 1.4828, "step": 20528 }, { "epoch": 0.2667649920489634, "grad_norm": 0.4316619634628296, "learning_rate": 0.00014667723781177296, "loss": 1.3984, "step": 20529 }, { "epoch": 0.2667779865928793, "grad_norm": 0.4099975526332855, "learning_rate": 0.0001466746383498616, "loss": 1.5689, "step": 20530 }, { "epoch": 0.26679098113679517, "grad_norm": 0.337446004152298, "learning_rate": 0.00014667203888795021, "loss": 1.2349, "step": 20531 }, { "epoch": 0.26680397568071107, "grad_norm": 0.4579349458217621, "learning_rate": 0.0001466694394260388, "loss": 1.3794, "step": 20532 }, { "epoch": 0.2668169702246269, "grad_norm": 0.5047162175178528, "learning_rate": 0.00014666683996412744, "loss": 1.5072, "step": 20533 }, { "epoch": 0.2668299647685428, "grad_norm": 0.3554355800151825, "learning_rate": 0.00014666424050221606, "loss": 1.5544, "step": 20534 }, { "epoch": 0.26684295931245866, "grad_norm": 0.3911932110786438, "learning_rate": 0.00014666164104030466, "loss": 1.4133, "step": 20535 }, { "epoch": 0.26685595385637456, "grad_norm": 0.43749839067459106, "learning_rate": 0.00014665904157839328, "loss": 1.2183, "step": 20536 }, { "epoch": 0.2668689484002904, "grad_norm": 0.40340352058410645, "learning_rate": 0.00014665644211648188, "loss": 1.3961, "step": 20537 }, { "epoch": 0.2668819429442063, "grad_norm": 0.44622188806533813, "learning_rate": 0.0001466538426545705, "loss": 1.5031, "step": 20538 }, { "epoch": 0.26689493748812215, "grad_norm": 0.3573788106441498, "learning_rate": 0.00014665124319265913, "loss": 1.4739, "step": 20539 }, { "epoch": 0.26690793203203805, "grad_norm": 0.3120102286338806, "learning_rate": 0.00014664864373074773, "loss": 1.5174, "step": 20540 }, { "epoch": 0.2669209265759539, "grad_norm": 0.4040786325931549, "learning_rate": 0.00014664604426883638, "loss": 1.2888, "step": 20541 }, { "epoch": 0.2669339211198698, "grad_norm": 0.3949944078922272, "learning_rate": 0.00014664344480692497, "loss": 1.3514, "step": 20542 }, { "epoch": 0.26694691566378564, "grad_norm": 0.4573860466480255, "learning_rate": 0.0001466408453450136, "loss": 1.4011, "step": 20543 }, { "epoch": 0.26695991020770155, "grad_norm": 0.4439416527748108, "learning_rate": 0.0001466382458831022, "loss": 1.4129, "step": 20544 }, { "epoch": 0.2669729047516174, "grad_norm": 0.34848085045814514, "learning_rate": 0.00014663564642119082, "loss": 1.515, "step": 20545 }, { "epoch": 0.2669858992955333, "grad_norm": 0.39392855763435364, "learning_rate": 0.00014663304695927945, "loss": 1.2974, "step": 20546 }, { "epoch": 0.26699889383944914, "grad_norm": 0.3618329167366028, "learning_rate": 0.00014663044749736804, "loss": 1.5627, "step": 20547 }, { "epoch": 0.26701188838336504, "grad_norm": 0.37951064109802246, "learning_rate": 0.00014662784803545667, "loss": 1.4563, "step": 20548 }, { "epoch": 0.2670248829272809, "grad_norm": 0.3968099057674408, "learning_rate": 0.0001466252485735453, "loss": 1.5636, "step": 20549 }, { "epoch": 0.2670378774711968, "grad_norm": 0.47305813431739807, "learning_rate": 0.00014662264911163392, "loss": 1.3466, "step": 20550 }, { "epoch": 0.26705087201511263, "grad_norm": 0.3422969877719879, "learning_rate": 0.00014662004964972251, "loss": 1.2357, "step": 20551 }, { "epoch": 0.26706386655902853, "grad_norm": 0.26564934849739075, "learning_rate": 0.0001466174501878111, "loss": 1.3822, "step": 20552 }, { "epoch": 0.2670768611029444, "grad_norm": 0.38466688990592957, "learning_rate": 0.00014661485072589976, "loss": 1.6854, "step": 20553 }, { "epoch": 0.2670898556468603, "grad_norm": 0.42451873421669006, "learning_rate": 0.00014661225126398836, "loss": 1.3781, "step": 20554 }, { "epoch": 0.2671028501907762, "grad_norm": 0.3984309136867523, "learning_rate": 0.00014660965180207698, "loss": 1.5276, "step": 20555 }, { "epoch": 0.267115844734692, "grad_norm": 0.32337599992752075, "learning_rate": 0.00014660705234016558, "loss": 1.5079, "step": 20556 }, { "epoch": 0.2671288392786079, "grad_norm": 0.34577375650405884, "learning_rate": 0.0001466044528782542, "loss": 1.3852, "step": 20557 }, { "epoch": 0.26714183382252377, "grad_norm": 0.324277400970459, "learning_rate": 0.00014660185341634283, "loss": 1.5501, "step": 20558 }, { "epoch": 0.26715482836643967, "grad_norm": 0.3926406502723694, "learning_rate": 0.00014659925395443143, "loss": 1.4244, "step": 20559 }, { "epoch": 0.2671678229103555, "grad_norm": 0.3978142738342285, "learning_rate": 0.00014659665449252005, "loss": 1.3282, "step": 20560 }, { "epoch": 0.2671808174542714, "grad_norm": 0.48755988478660583, "learning_rate": 0.00014659405503060868, "loss": 1.4851, "step": 20561 }, { "epoch": 0.26719381199818726, "grad_norm": 0.39426976442337036, "learning_rate": 0.0001465914555686973, "loss": 1.4414, "step": 20562 }, { "epoch": 0.26720680654210316, "grad_norm": 0.5472205281257629, "learning_rate": 0.0001465888561067859, "loss": 1.4487, "step": 20563 }, { "epoch": 0.267219801086019, "grad_norm": 0.3972024619579315, "learning_rate": 0.0001465862566448745, "loss": 1.5969, "step": 20564 }, { "epoch": 0.2672327956299349, "grad_norm": 0.4485118091106415, "learning_rate": 0.00014658365718296315, "loss": 1.3231, "step": 20565 }, { "epoch": 0.26724579017385075, "grad_norm": 0.38982245326042175, "learning_rate": 0.00014658105772105175, "loss": 1.3931, "step": 20566 }, { "epoch": 0.26725878471776665, "grad_norm": 0.397883802652359, "learning_rate": 0.00014657845825914037, "loss": 1.5746, "step": 20567 }, { "epoch": 0.2672717792616825, "grad_norm": 0.38800114393234253, "learning_rate": 0.00014657585879722897, "loss": 1.3589, "step": 20568 }, { "epoch": 0.2672847738055984, "grad_norm": 0.39322930574417114, "learning_rate": 0.0001465732593353176, "loss": 1.5264, "step": 20569 }, { "epoch": 0.26729776834951424, "grad_norm": 0.4672949016094208, "learning_rate": 0.00014657065987340622, "loss": 1.5386, "step": 20570 }, { "epoch": 0.26731076289343014, "grad_norm": 0.36268308758735657, "learning_rate": 0.00014656806041149481, "loss": 1.2384, "step": 20571 }, { "epoch": 0.267323757437346, "grad_norm": 0.3658696413040161, "learning_rate": 0.00014656546094958344, "loss": 1.3069, "step": 20572 }, { "epoch": 0.2673367519812619, "grad_norm": 0.36909234523773193, "learning_rate": 0.00014656286148767206, "loss": 1.3643, "step": 20573 }, { "epoch": 0.26734974652517773, "grad_norm": 0.3710574209690094, "learning_rate": 0.0001465602620257607, "loss": 1.448, "step": 20574 }, { "epoch": 0.26736274106909363, "grad_norm": 0.4195187985897064, "learning_rate": 0.00014655766256384928, "loss": 1.1507, "step": 20575 }, { "epoch": 0.2673757356130095, "grad_norm": 0.30392158031463623, "learning_rate": 0.00014655506310193788, "loss": 1.2132, "step": 20576 }, { "epoch": 0.2673887301569254, "grad_norm": 0.37641626596450806, "learning_rate": 0.00014655246364002653, "loss": 1.425, "step": 20577 }, { "epoch": 0.2674017247008412, "grad_norm": 0.39774182438850403, "learning_rate": 0.00014654986417811513, "loss": 1.3475, "step": 20578 }, { "epoch": 0.2674147192447571, "grad_norm": 0.45070627331733704, "learning_rate": 0.00014654726471620376, "loss": 1.3981, "step": 20579 }, { "epoch": 0.26742771378867297, "grad_norm": 0.429548978805542, "learning_rate": 0.00014654466525429238, "loss": 1.4826, "step": 20580 }, { "epoch": 0.2674407083325889, "grad_norm": 0.4077950119972229, "learning_rate": 0.00014654206579238098, "loss": 1.5791, "step": 20581 }, { "epoch": 0.2674537028765047, "grad_norm": 0.4084833562374115, "learning_rate": 0.0001465394663304696, "loss": 1.3112, "step": 20582 }, { "epoch": 0.2674666974204206, "grad_norm": 0.43050774931907654, "learning_rate": 0.0001465368668685582, "loss": 1.4577, "step": 20583 }, { "epoch": 0.26747969196433646, "grad_norm": 0.408719539642334, "learning_rate": 0.00014653426740664685, "loss": 1.4389, "step": 20584 }, { "epoch": 0.26749268650825236, "grad_norm": 0.3926236629486084, "learning_rate": 0.00014653166794473545, "loss": 1.3572, "step": 20585 }, { "epoch": 0.2675056810521682, "grad_norm": 0.3172488808631897, "learning_rate": 0.00014652906848282407, "loss": 1.3145, "step": 20586 }, { "epoch": 0.2675186755960841, "grad_norm": 0.3986409604549408, "learning_rate": 0.00014652646902091267, "loss": 1.43, "step": 20587 }, { "epoch": 0.26753167013999996, "grad_norm": 0.3207913339138031, "learning_rate": 0.0001465238695590013, "loss": 1.5232, "step": 20588 }, { "epoch": 0.26754466468391586, "grad_norm": 0.3617097735404968, "learning_rate": 0.00014652127009708992, "loss": 1.5102, "step": 20589 }, { "epoch": 0.2675576592278317, "grad_norm": 0.4643590748310089, "learning_rate": 0.00014651867063517852, "loss": 1.5157, "step": 20590 }, { "epoch": 0.2675706537717476, "grad_norm": 0.43895310163497925, "learning_rate": 0.00014651607117326714, "loss": 1.5303, "step": 20591 }, { "epoch": 0.26758364831566345, "grad_norm": 0.3952551782131195, "learning_rate": 0.00014651347171135577, "loss": 1.393, "step": 20592 }, { "epoch": 0.26759664285957935, "grad_norm": 0.37808409333229065, "learning_rate": 0.00014651087224944436, "loss": 1.287, "step": 20593 }, { "epoch": 0.2676096374034952, "grad_norm": 0.4574804902076721, "learning_rate": 0.000146508272787533, "loss": 1.3884, "step": 20594 }, { "epoch": 0.2676226319474111, "grad_norm": 0.4842263162136078, "learning_rate": 0.00014650567332562158, "loss": 1.4964, "step": 20595 }, { "epoch": 0.26763562649132694, "grad_norm": 0.3509844243526459, "learning_rate": 0.00014650307386371024, "loss": 1.6198, "step": 20596 }, { "epoch": 0.26764862103524284, "grad_norm": 0.42597100138664246, "learning_rate": 0.00014650047440179883, "loss": 1.4549, "step": 20597 }, { "epoch": 0.2676616155791587, "grad_norm": 0.45165514945983887, "learning_rate": 0.00014649787493988746, "loss": 1.425, "step": 20598 }, { "epoch": 0.2676746101230746, "grad_norm": 0.38156551122665405, "learning_rate": 0.00014649527547797606, "loss": 1.6365, "step": 20599 }, { "epoch": 0.26768760466699043, "grad_norm": 0.3806611895561218, "learning_rate": 0.00014649267601606468, "loss": 1.4636, "step": 20600 }, { "epoch": 0.26770059921090633, "grad_norm": 0.3957795798778534, "learning_rate": 0.0001464900765541533, "loss": 1.4103, "step": 20601 }, { "epoch": 0.2677135937548222, "grad_norm": 0.29333290457725525, "learning_rate": 0.0001464874770922419, "loss": 1.2337, "step": 20602 }, { "epoch": 0.2677265882987381, "grad_norm": 0.3904321491718292, "learning_rate": 0.00014648487763033053, "loss": 1.5009, "step": 20603 }, { "epoch": 0.2677395828426539, "grad_norm": 0.40922266244888306, "learning_rate": 0.00014648227816841915, "loss": 1.7033, "step": 20604 }, { "epoch": 0.2677525773865698, "grad_norm": 0.35216403007507324, "learning_rate": 0.00014647967870650775, "loss": 1.3186, "step": 20605 }, { "epoch": 0.26776557193048567, "grad_norm": 0.35933560132980347, "learning_rate": 0.00014647707924459637, "loss": 1.3302, "step": 20606 }, { "epoch": 0.26777856647440157, "grad_norm": 0.28483909368515015, "learning_rate": 0.00014647447978268497, "loss": 1.3749, "step": 20607 }, { "epoch": 0.2677915610183174, "grad_norm": 0.423801451921463, "learning_rate": 0.00014647188032077362, "loss": 1.4734, "step": 20608 }, { "epoch": 0.2678045555622333, "grad_norm": 0.27995800971984863, "learning_rate": 0.00014646928085886222, "loss": 1.3285, "step": 20609 }, { "epoch": 0.26781755010614916, "grad_norm": 0.5021472573280334, "learning_rate": 0.00014646668139695084, "loss": 1.4605, "step": 20610 }, { "epoch": 0.26783054465006506, "grad_norm": 0.39849868416786194, "learning_rate": 0.00014646408193503944, "loss": 1.3853, "step": 20611 }, { "epoch": 0.2678435391939809, "grad_norm": 0.30901312828063965, "learning_rate": 0.00014646148247312807, "loss": 1.1987, "step": 20612 }, { "epoch": 0.2678565337378968, "grad_norm": 0.3241737186908722, "learning_rate": 0.0001464588830112167, "loss": 1.4156, "step": 20613 }, { "epoch": 0.26786952828181265, "grad_norm": 0.43412768840789795, "learning_rate": 0.0001464562835493053, "loss": 1.465, "step": 20614 }, { "epoch": 0.26788252282572855, "grad_norm": 0.3654631972312927, "learning_rate": 0.00014645368408739394, "loss": 1.2729, "step": 20615 }, { "epoch": 0.2678955173696444, "grad_norm": 0.3572291135787964, "learning_rate": 0.00014645108462548254, "loss": 1.5252, "step": 20616 }, { "epoch": 0.2679085119135603, "grad_norm": 0.4149107336997986, "learning_rate": 0.00014644848516357116, "loss": 1.4198, "step": 20617 }, { "epoch": 0.26792150645747614, "grad_norm": 0.286456435918808, "learning_rate": 0.00014644588570165976, "loss": 1.2797, "step": 20618 }, { "epoch": 0.26793450100139204, "grad_norm": 0.40724557638168335, "learning_rate": 0.00014644328623974838, "loss": 1.4218, "step": 20619 }, { "epoch": 0.2679474955453079, "grad_norm": 0.3967374265193939, "learning_rate": 0.000146440686777837, "loss": 1.5093, "step": 20620 }, { "epoch": 0.2679604900892238, "grad_norm": 0.4456663131713867, "learning_rate": 0.0001464380873159256, "loss": 1.6117, "step": 20621 }, { "epoch": 0.26797348463313964, "grad_norm": 0.35312244296073914, "learning_rate": 0.00014643548785401423, "loss": 1.3902, "step": 20622 }, { "epoch": 0.26798647917705554, "grad_norm": 0.3175770938396454, "learning_rate": 0.00014643288839210285, "loss": 1.3318, "step": 20623 }, { "epoch": 0.2679994737209714, "grad_norm": 0.384489506483078, "learning_rate": 0.00014643028893019145, "loss": 1.4892, "step": 20624 }, { "epoch": 0.2680124682648873, "grad_norm": 0.3831591010093689, "learning_rate": 0.00014642768946828008, "loss": 1.3784, "step": 20625 }, { "epoch": 0.2680254628088031, "grad_norm": 0.38771161437034607, "learning_rate": 0.00014642509000636867, "loss": 1.4784, "step": 20626 }, { "epoch": 0.26803845735271903, "grad_norm": 0.4399231970310211, "learning_rate": 0.00014642249054445732, "loss": 1.398, "step": 20627 }, { "epoch": 0.2680514518966349, "grad_norm": 0.4146258533000946, "learning_rate": 0.00014641989108254592, "loss": 1.4837, "step": 20628 }, { "epoch": 0.2680644464405508, "grad_norm": 0.4017559587955475, "learning_rate": 0.00014641729162063455, "loss": 1.3852, "step": 20629 }, { "epoch": 0.2680774409844666, "grad_norm": 0.43311452865600586, "learning_rate": 0.00014641469215872314, "loss": 1.5727, "step": 20630 }, { "epoch": 0.2680904355283825, "grad_norm": 0.461364209651947, "learning_rate": 0.00014641209269681177, "loss": 1.2905, "step": 20631 }, { "epoch": 0.2681034300722984, "grad_norm": 0.37731269001960754, "learning_rate": 0.0001464094932349004, "loss": 1.2865, "step": 20632 }, { "epoch": 0.26811642461621427, "grad_norm": 0.4365038275718689, "learning_rate": 0.000146406893772989, "loss": 1.3654, "step": 20633 }, { "epoch": 0.26812941916013017, "grad_norm": 0.4046669602394104, "learning_rate": 0.00014640429431107761, "loss": 1.5534, "step": 20634 }, { "epoch": 0.268142413704046, "grad_norm": 0.42849433422088623, "learning_rate": 0.00014640169484916624, "loss": 1.4641, "step": 20635 }, { "epoch": 0.2681554082479619, "grad_norm": 0.49273115396499634, "learning_rate": 0.00014639909538725484, "loss": 1.4751, "step": 20636 }, { "epoch": 0.26816840279187776, "grad_norm": 0.3816077411174774, "learning_rate": 0.00014639649592534346, "loss": 1.3567, "step": 20637 }, { "epoch": 0.26818139733579366, "grad_norm": 0.3444799780845642, "learning_rate": 0.00014639389646343206, "loss": 1.3324, "step": 20638 }, { "epoch": 0.2681943918797095, "grad_norm": 0.31481218338012695, "learning_rate": 0.0001463912970015207, "loss": 1.3754, "step": 20639 }, { "epoch": 0.2682073864236254, "grad_norm": 0.43880695104599, "learning_rate": 0.0001463886975396093, "loss": 1.3357, "step": 20640 }, { "epoch": 0.26822038096754125, "grad_norm": 0.4160803556442261, "learning_rate": 0.00014638609807769793, "loss": 1.3076, "step": 20641 }, { "epoch": 0.26823337551145715, "grad_norm": 0.3695676624774933, "learning_rate": 0.00014638349861578653, "loss": 1.5626, "step": 20642 }, { "epoch": 0.268246370055373, "grad_norm": 0.39395737648010254, "learning_rate": 0.00014638089915387515, "loss": 1.3592, "step": 20643 }, { "epoch": 0.2682593645992889, "grad_norm": 0.4336263835430145, "learning_rate": 0.00014637829969196378, "loss": 1.3741, "step": 20644 }, { "epoch": 0.26827235914320474, "grad_norm": 0.44991445541381836, "learning_rate": 0.00014637570023005238, "loss": 1.4544, "step": 20645 }, { "epoch": 0.26828535368712064, "grad_norm": 0.4383201003074646, "learning_rate": 0.000146373100768141, "loss": 1.6389, "step": 20646 }, { "epoch": 0.2682983482310365, "grad_norm": 0.4155541658401489, "learning_rate": 0.00014637050130622962, "loss": 1.5005, "step": 20647 }, { "epoch": 0.2683113427749524, "grad_norm": 0.36898618936538696, "learning_rate": 0.00014636790184431822, "loss": 1.383, "step": 20648 }, { "epoch": 0.26832433731886823, "grad_norm": 0.3449082374572754, "learning_rate": 0.00014636530238240685, "loss": 1.5954, "step": 20649 }, { "epoch": 0.26833733186278413, "grad_norm": 0.4273110628128052, "learning_rate": 0.00014636270292049544, "loss": 1.3348, "step": 20650 }, { "epoch": 0.2683503264067, "grad_norm": 0.3989933133125305, "learning_rate": 0.0001463601034585841, "loss": 1.3298, "step": 20651 }, { "epoch": 0.2683633209506159, "grad_norm": 0.33908236026763916, "learning_rate": 0.0001463575039966727, "loss": 1.4126, "step": 20652 }, { "epoch": 0.2683763154945317, "grad_norm": 0.3597542941570282, "learning_rate": 0.00014635490453476132, "loss": 1.4677, "step": 20653 }, { "epoch": 0.2683893100384476, "grad_norm": 0.4397994875907898, "learning_rate": 0.00014635230507284994, "loss": 1.3313, "step": 20654 }, { "epoch": 0.26840230458236347, "grad_norm": 0.3488227427005768, "learning_rate": 0.00014634970561093854, "loss": 1.4348, "step": 20655 }, { "epoch": 0.26841529912627937, "grad_norm": 0.3622024655342102, "learning_rate": 0.00014634710614902716, "loss": 1.5753, "step": 20656 }, { "epoch": 0.2684282936701952, "grad_norm": 0.3150954842567444, "learning_rate": 0.00014634450668711576, "loss": 1.3288, "step": 20657 }, { "epoch": 0.2684412882141111, "grad_norm": 0.43790921568870544, "learning_rate": 0.0001463419072252044, "loss": 1.5527, "step": 20658 }, { "epoch": 0.26845428275802696, "grad_norm": 0.3803635239601135, "learning_rate": 0.000146339307763293, "loss": 1.3104, "step": 20659 }, { "epoch": 0.26846727730194286, "grad_norm": 0.5523461103439331, "learning_rate": 0.0001463367083013816, "loss": 1.4069, "step": 20660 }, { "epoch": 0.2684802718458587, "grad_norm": 0.3028532862663269, "learning_rate": 0.00014633410883947023, "loss": 1.4229, "step": 20661 }, { "epoch": 0.2684932663897746, "grad_norm": 0.4048062264919281, "learning_rate": 0.00014633150937755886, "loss": 1.395, "step": 20662 }, { "epoch": 0.26850626093369045, "grad_norm": 0.39473825693130493, "learning_rate": 0.00014632890991564748, "loss": 1.4592, "step": 20663 }, { "epoch": 0.26851925547760636, "grad_norm": 0.3994807004928589, "learning_rate": 0.00014632631045373608, "loss": 1.2424, "step": 20664 }, { "epoch": 0.2685322500215222, "grad_norm": 0.43029409646987915, "learning_rate": 0.0001463237109918247, "loss": 1.3107, "step": 20665 }, { "epoch": 0.2685452445654381, "grad_norm": 0.2833326458930969, "learning_rate": 0.00014632111152991333, "loss": 1.3468, "step": 20666 }, { "epoch": 0.26855823910935395, "grad_norm": 0.32771480083465576, "learning_rate": 0.00014631851206800192, "loss": 1.3958, "step": 20667 }, { "epoch": 0.26857123365326985, "grad_norm": 0.41782641410827637, "learning_rate": 0.00014631591260609055, "loss": 1.455, "step": 20668 }, { "epoch": 0.2685842281971857, "grad_norm": 0.4168553352355957, "learning_rate": 0.00014631331314417915, "loss": 1.2892, "step": 20669 }, { "epoch": 0.2685972227411016, "grad_norm": 0.3225835859775543, "learning_rate": 0.0001463107136822678, "loss": 1.3053, "step": 20670 }, { "epoch": 0.26861021728501744, "grad_norm": 0.4053954780101776, "learning_rate": 0.0001463081142203564, "loss": 1.3354, "step": 20671 }, { "epoch": 0.26862321182893334, "grad_norm": 0.3914523720741272, "learning_rate": 0.00014630551475844502, "loss": 1.3413, "step": 20672 }, { "epoch": 0.2686362063728492, "grad_norm": 0.3967929780483246, "learning_rate": 0.00014630291529653362, "loss": 1.5612, "step": 20673 }, { "epoch": 0.2686492009167651, "grad_norm": 0.2634059488773346, "learning_rate": 0.00014630031583462224, "loss": 1.0879, "step": 20674 }, { "epoch": 0.26866219546068093, "grad_norm": 0.38141852617263794, "learning_rate": 0.00014629771637271087, "loss": 1.3308, "step": 20675 }, { "epoch": 0.26867519000459683, "grad_norm": 0.2449585646390915, "learning_rate": 0.00014629511691079946, "loss": 1.4104, "step": 20676 }, { "epoch": 0.2686881845485127, "grad_norm": 0.4229148328304291, "learning_rate": 0.0001462925174488881, "loss": 1.3328, "step": 20677 }, { "epoch": 0.2687011790924286, "grad_norm": 0.4751756191253662, "learning_rate": 0.0001462899179869767, "loss": 1.2427, "step": 20678 }, { "epoch": 0.2687141736363444, "grad_norm": 0.3833039402961731, "learning_rate": 0.0001462873185250653, "loss": 1.3409, "step": 20679 }, { "epoch": 0.2687271681802603, "grad_norm": 0.3475169837474823, "learning_rate": 0.00014628471906315393, "loss": 1.3617, "step": 20680 }, { "epoch": 0.26874016272417617, "grad_norm": 0.2804672122001648, "learning_rate": 0.00014628211960124253, "loss": 1.196, "step": 20681 }, { "epoch": 0.26875315726809207, "grad_norm": 0.41921156644821167, "learning_rate": 0.00014627952013933118, "loss": 1.3957, "step": 20682 }, { "epoch": 0.2687661518120079, "grad_norm": 0.4306753873825073, "learning_rate": 0.00014627692067741978, "loss": 1.407, "step": 20683 }, { "epoch": 0.2687791463559238, "grad_norm": 0.33495616912841797, "learning_rate": 0.0001462743212155084, "loss": 1.3731, "step": 20684 }, { "epoch": 0.26879214089983966, "grad_norm": 0.47762545943260193, "learning_rate": 0.000146271721753597, "loss": 1.4385, "step": 20685 }, { "epoch": 0.26880513544375556, "grad_norm": 0.44264090061187744, "learning_rate": 0.00014626912229168563, "loss": 1.421, "step": 20686 }, { "epoch": 0.2688181299876714, "grad_norm": 0.43737825751304626, "learning_rate": 0.00014626652282977425, "loss": 1.3618, "step": 20687 }, { "epoch": 0.2688311245315873, "grad_norm": 0.4088273346424103, "learning_rate": 0.00014626392336786285, "loss": 1.5765, "step": 20688 }, { "epoch": 0.26884411907550315, "grad_norm": 0.40672099590301514, "learning_rate": 0.00014626132390595147, "loss": 1.1195, "step": 20689 }, { "epoch": 0.26885711361941905, "grad_norm": 0.48126015067100525, "learning_rate": 0.0001462587244440401, "loss": 1.6144, "step": 20690 }, { "epoch": 0.2688701081633349, "grad_norm": 0.36170291900634766, "learning_rate": 0.0001462561249821287, "loss": 1.2703, "step": 20691 }, { "epoch": 0.2688831027072508, "grad_norm": 0.5130376815795898, "learning_rate": 0.00014625352552021732, "loss": 1.4901, "step": 20692 }, { "epoch": 0.26889609725116664, "grad_norm": 0.35150396823883057, "learning_rate": 0.00014625092605830594, "loss": 1.1439, "step": 20693 }, { "epoch": 0.26890909179508254, "grad_norm": 0.4448801279067993, "learning_rate": 0.00014624832659639457, "loss": 1.4869, "step": 20694 }, { "epoch": 0.2689220863389984, "grad_norm": 0.380319207906723, "learning_rate": 0.00014624572713448317, "loss": 1.3173, "step": 20695 }, { "epoch": 0.2689350808829143, "grad_norm": 0.3700210452079773, "learning_rate": 0.0001462431276725718, "loss": 1.3, "step": 20696 }, { "epoch": 0.26894807542683014, "grad_norm": 0.3236282765865326, "learning_rate": 0.00014624052821066041, "loss": 1.1146, "step": 20697 }, { "epoch": 0.26896106997074604, "grad_norm": 0.3484293520450592, "learning_rate": 0.000146237928748749, "loss": 1.3232, "step": 20698 }, { "epoch": 0.2689740645146619, "grad_norm": 0.2931276559829712, "learning_rate": 0.00014623532928683764, "loss": 1.4476, "step": 20699 }, { "epoch": 0.2689870590585778, "grad_norm": 0.43924808502197266, "learning_rate": 0.00014623272982492623, "loss": 1.2916, "step": 20700 }, { "epoch": 0.2690000536024936, "grad_norm": 0.3774886131286621, "learning_rate": 0.00014623013036301489, "loss": 1.386, "step": 20701 }, { "epoch": 0.26901304814640953, "grad_norm": 0.4164135158061981, "learning_rate": 0.00014622753090110348, "loss": 1.3177, "step": 20702 }, { "epoch": 0.2690260426903254, "grad_norm": 0.5559182167053223, "learning_rate": 0.00014622493143919208, "loss": 1.4402, "step": 20703 }, { "epoch": 0.2690390372342413, "grad_norm": 0.45743390917778015, "learning_rate": 0.0001462223319772807, "loss": 1.4531, "step": 20704 }, { "epoch": 0.2690520317781571, "grad_norm": 0.40325310826301575, "learning_rate": 0.00014621973251536933, "loss": 1.3158, "step": 20705 }, { "epoch": 0.269065026322073, "grad_norm": 0.46727636456489563, "learning_rate": 0.00014621713305345795, "loss": 1.4722, "step": 20706 }, { "epoch": 0.2690780208659889, "grad_norm": 0.30945438146591187, "learning_rate": 0.00014621453359154655, "loss": 1.5292, "step": 20707 }, { "epoch": 0.26909101540990477, "grad_norm": 0.5098206996917725, "learning_rate": 0.00014621193412963518, "loss": 1.4808, "step": 20708 }, { "epoch": 0.26910400995382067, "grad_norm": 0.45369401574134827, "learning_rate": 0.0001462093346677238, "loss": 1.6057, "step": 20709 }, { "epoch": 0.2691170044977365, "grad_norm": 0.36789557337760925, "learning_rate": 0.0001462067352058124, "loss": 1.1677, "step": 20710 }, { "epoch": 0.2691299990416524, "grad_norm": 0.3800722360610962, "learning_rate": 0.00014620413574390102, "loss": 1.2215, "step": 20711 }, { "epoch": 0.26914299358556826, "grad_norm": 0.42654749751091003, "learning_rate": 0.00014620153628198962, "loss": 1.3985, "step": 20712 }, { "epoch": 0.26915598812948416, "grad_norm": 0.5000320672988892, "learning_rate": 0.00014619893682007827, "loss": 1.5014, "step": 20713 }, { "epoch": 0.2691689826734, "grad_norm": 0.37402960658073425, "learning_rate": 0.00014619633735816687, "loss": 1.3482, "step": 20714 }, { "epoch": 0.2691819772173159, "grad_norm": 0.3382261097431183, "learning_rate": 0.00014619373789625547, "loss": 1.3532, "step": 20715 }, { "epoch": 0.26919497176123175, "grad_norm": 0.3761296272277832, "learning_rate": 0.0001461911384343441, "loss": 1.4991, "step": 20716 }, { "epoch": 0.26920796630514765, "grad_norm": 0.3428308367729187, "learning_rate": 0.00014618853897243271, "loss": 1.6513, "step": 20717 }, { "epoch": 0.2692209608490635, "grad_norm": 0.3999347686767578, "learning_rate": 0.00014618593951052134, "loss": 1.3187, "step": 20718 }, { "epoch": 0.2692339553929794, "grad_norm": 0.3850482702255249, "learning_rate": 0.00014618334004860994, "loss": 1.5427, "step": 20719 }, { "epoch": 0.26924694993689524, "grad_norm": 0.5011225938796997, "learning_rate": 0.00014618074058669856, "loss": 1.3105, "step": 20720 }, { "epoch": 0.26925994448081114, "grad_norm": 0.4166141152381897, "learning_rate": 0.00014617814112478719, "loss": 1.3921, "step": 20721 }, { "epoch": 0.269272939024727, "grad_norm": 0.32082441449165344, "learning_rate": 0.00014617554166287578, "loss": 1.5407, "step": 20722 }, { "epoch": 0.2692859335686429, "grad_norm": 0.41183531284332275, "learning_rate": 0.0001461729422009644, "loss": 1.3955, "step": 20723 }, { "epoch": 0.26929892811255873, "grad_norm": 0.2709164321422577, "learning_rate": 0.000146170342739053, "loss": 1.4238, "step": 20724 }, { "epoch": 0.26931192265647463, "grad_norm": 0.373794287443161, "learning_rate": 0.00014616774327714166, "loss": 1.3195, "step": 20725 }, { "epoch": 0.2693249172003905, "grad_norm": 0.4608331322669983, "learning_rate": 0.00014616514381523025, "loss": 1.371, "step": 20726 }, { "epoch": 0.2693379117443064, "grad_norm": 0.4786517322063446, "learning_rate": 0.00014616254435331885, "loss": 1.4855, "step": 20727 }, { "epoch": 0.2693509062882222, "grad_norm": 0.3594572842121124, "learning_rate": 0.0001461599448914075, "loss": 1.4858, "step": 20728 }, { "epoch": 0.2693639008321381, "grad_norm": 0.36895909905433655, "learning_rate": 0.0001461573454294961, "loss": 1.5196, "step": 20729 }, { "epoch": 0.26937689537605397, "grad_norm": 0.38295185565948486, "learning_rate": 0.00014615474596758472, "loss": 1.5151, "step": 20730 }, { "epoch": 0.26938988991996987, "grad_norm": 0.4619479477405548, "learning_rate": 0.00014615214650567332, "loss": 1.4108, "step": 20731 }, { "epoch": 0.2694028844638857, "grad_norm": 0.3786890506744385, "learning_rate": 0.00014614954704376195, "loss": 1.4663, "step": 20732 }, { "epoch": 0.2694158790078016, "grad_norm": 0.34674233198165894, "learning_rate": 0.00014614694758185057, "loss": 1.3268, "step": 20733 }, { "epoch": 0.26942887355171746, "grad_norm": 0.35739365220069885, "learning_rate": 0.00014614434811993917, "loss": 1.4983, "step": 20734 }, { "epoch": 0.26944186809563336, "grad_norm": 0.3962860107421875, "learning_rate": 0.0001461417486580278, "loss": 1.4861, "step": 20735 }, { "epoch": 0.2694548626395492, "grad_norm": 0.4904305040836334, "learning_rate": 0.00014613914919611642, "loss": 1.3847, "step": 20736 }, { "epoch": 0.2694678571834651, "grad_norm": 0.4515432119369507, "learning_rate": 0.00014613654973420504, "loss": 1.4656, "step": 20737 }, { "epoch": 0.26948085172738095, "grad_norm": 0.36145856976509094, "learning_rate": 0.00014613395027229364, "loss": 1.2893, "step": 20738 }, { "epoch": 0.26949384627129686, "grad_norm": 0.4274309277534485, "learning_rate": 0.00014613135081038226, "loss": 1.4493, "step": 20739 }, { "epoch": 0.2695068408152127, "grad_norm": 0.5018572807312012, "learning_rate": 0.0001461287513484709, "loss": 1.3617, "step": 20740 }, { "epoch": 0.2695198353591286, "grad_norm": 0.44044139981269836, "learning_rate": 0.00014612615188655949, "loss": 1.442, "step": 20741 }, { "epoch": 0.26953282990304445, "grad_norm": 0.449899286031723, "learning_rate": 0.0001461235524246481, "loss": 1.4371, "step": 20742 }, { "epoch": 0.26954582444696035, "grad_norm": 0.4401423931121826, "learning_rate": 0.0001461209529627367, "loss": 1.1401, "step": 20743 }, { "epoch": 0.2695588189908762, "grad_norm": 0.41809260845184326, "learning_rate": 0.00014611835350082533, "loss": 1.3633, "step": 20744 }, { "epoch": 0.2695718135347921, "grad_norm": 0.4464724361896515, "learning_rate": 0.00014611575403891396, "loss": 1.5006, "step": 20745 }, { "epoch": 0.26958480807870794, "grad_norm": 0.4274747371673584, "learning_rate": 0.00014611315457700255, "loss": 1.2148, "step": 20746 }, { "epoch": 0.26959780262262384, "grad_norm": 0.39786624908447266, "learning_rate": 0.00014611055511509118, "loss": 1.4833, "step": 20747 }, { "epoch": 0.2696107971665397, "grad_norm": 0.4112197458744049, "learning_rate": 0.0001461079556531798, "loss": 1.4131, "step": 20748 }, { "epoch": 0.2696237917104556, "grad_norm": 0.4289264380931854, "learning_rate": 0.00014610535619126843, "loss": 1.4593, "step": 20749 }, { "epoch": 0.26963678625437143, "grad_norm": 0.3172912895679474, "learning_rate": 0.00014610275672935702, "loss": 1.4076, "step": 20750 }, { "epoch": 0.26964978079828733, "grad_norm": 0.3598501980304718, "learning_rate": 0.00014610015726744565, "loss": 1.4618, "step": 20751 }, { "epoch": 0.2696627753422032, "grad_norm": 0.42353907227516174, "learning_rate": 0.00014609755780553427, "loss": 1.4181, "step": 20752 }, { "epoch": 0.2696757698861191, "grad_norm": 0.37242230772972107, "learning_rate": 0.00014609495834362287, "loss": 1.5244, "step": 20753 }, { "epoch": 0.2696887644300349, "grad_norm": 0.4530049264431, "learning_rate": 0.0001460923588817115, "loss": 1.4631, "step": 20754 }, { "epoch": 0.2697017589739508, "grad_norm": 0.41440507769584656, "learning_rate": 0.0001460897594198001, "loss": 1.3935, "step": 20755 }, { "epoch": 0.26971475351786667, "grad_norm": 0.3290250301361084, "learning_rate": 0.00014608715995788874, "loss": 1.3011, "step": 20756 }, { "epoch": 0.26972774806178257, "grad_norm": 0.5125744342803955, "learning_rate": 0.00014608456049597734, "loss": 1.5043, "step": 20757 }, { "epoch": 0.2697407426056984, "grad_norm": 0.35893887281417847, "learning_rate": 0.00014608196103406594, "loss": 1.2907, "step": 20758 }, { "epoch": 0.2697537371496143, "grad_norm": 0.28977641463279724, "learning_rate": 0.00014607936157215456, "loss": 1.2827, "step": 20759 }, { "epoch": 0.26976673169353016, "grad_norm": 0.41102495789527893, "learning_rate": 0.0001460767621102432, "loss": 1.4831, "step": 20760 }, { "epoch": 0.26977972623744606, "grad_norm": 0.40334898233413696, "learning_rate": 0.0001460741626483318, "loss": 1.4153, "step": 20761 }, { "epoch": 0.2697927207813619, "grad_norm": 0.3420429527759552, "learning_rate": 0.0001460715631864204, "loss": 1.498, "step": 20762 }, { "epoch": 0.2698057153252778, "grad_norm": 0.4257211983203888, "learning_rate": 0.00014606896372450903, "loss": 1.3442, "step": 20763 }, { "epoch": 0.26981870986919365, "grad_norm": 0.4188475012779236, "learning_rate": 0.00014606636426259766, "loss": 1.3155, "step": 20764 }, { "epoch": 0.26983170441310955, "grad_norm": 0.3321782946586609, "learning_rate": 0.00014606376480068626, "loss": 1.4222, "step": 20765 }, { "epoch": 0.2698446989570254, "grad_norm": 0.43151503801345825, "learning_rate": 0.00014606116533877488, "loss": 1.5216, "step": 20766 }, { "epoch": 0.2698576935009413, "grad_norm": 0.3855516016483307, "learning_rate": 0.0001460585658768635, "loss": 1.4338, "step": 20767 }, { "epoch": 0.26987068804485714, "grad_norm": 0.4201805591583252, "learning_rate": 0.00014605596641495213, "loss": 1.3732, "step": 20768 }, { "epoch": 0.26988368258877304, "grad_norm": 0.43783777952194214, "learning_rate": 0.00014605336695304073, "loss": 1.253, "step": 20769 }, { "epoch": 0.2698966771326889, "grad_norm": 0.42294326424598694, "learning_rate": 0.00014605076749112932, "loss": 1.463, "step": 20770 }, { "epoch": 0.2699096716766048, "grad_norm": 0.40143537521362305, "learning_rate": 0.00014604816802921798, "loss": 1.352, "step": 20771 }, { "epoch": 0.26992266622052064, "grad_norm": 0.4824879467487335, "learning_rate": 0.00014604556856730657, "loss": 1.4916, "step": 20772 }, { "epoch": 0.26993566076443654, "grad_norm": 0.41266053915023804, "learning_rate": 0.0001460429691053952, "loss": 1.4579, "step": 20773 }, { "epoch": 0.2699486553083524, "grad_norm": 0.48308441042900085, "learning_rate": 0.0001460403696434838, "loss": 1.6107, "step": 20774 }, { "epoch": 0.2699616498522683, "grad_norm": 0.39897236227989197, "learning_rate": 0.00014603777018157242, "loss": 1.4627, "step": 20775 }, { "epoch": 0.2699746443961841, "grad_norm": 0.3590661585330963, "learning_rate": 0.00014603517071966104, "loss": 1.4829, "step": 20776 }, { "epoch": 0.2699876389401, "grad_norm": 0.38587871193885803, "learning_rate": 0.00014603257125774964, "loss": 1.3482, "step": 20777 }, { "epoch": 0.2700006334840159, "grad_norm": 0.3748542070388794, "learning_rate": 0.00014602997179583827, "loss": 1.3458, "step": 20778 }, { "epoch": 0.2700136280279318, "grad_norm": 0.4357249438762665, "learning_rate": 0.0001460273723339269, "loss": 1.3145, "step": 20779 }, { "epoch": 0.2700266225718476, "grad_norm": 0.3993145227432251, "learning_rate": 0.00014602477287201551, "loss": 1.4894, "step": 20780 }, { "epoch": 0.2700396171157635, "grad_norm": 0.49461695551872253, "learning_rate": 0.0001460221734101041, "loss": 1.518, "step": 20781 }, { "epoch": 0.27005261165967936, "grad_norm": 0.42455703020095825, "learning_rate": 0.0001460195739481927, "loss": 1.5025, "step": 20782 }, { "epoch": 0.27006560620359527, "grad_norm": 0.306119441986084, "learning_rate": 0.00014601697448628136, "loss": 1.322, "step": 20783 }, { "epoch": 0.27007860074751117, "grad_norm": 0.39377444982528687, "learning_rate": 0.00014601437502436996, "loss": 1.4059, "step": 20784 }, { "epoch": 0.270091595291427, "grad_norm": 0.36525020003318787, "learning_rate": 0.00014601177556245858, "loss": 1.2923, "step": 20785 }, { "epoch": 0.2701045898353429, "grad_norm": 0.3976100981235504, "learning_rate": 0.00014600917610054718, "loss": 1.3119, "step": 20786 }, { "epoch": 0.27011758437925876, "grad_norm": 0.4202565550804138, "learning_rate": 0.0001460065766386358, "loss": 1.2831, "step": 20787 }, { "epoch": 0.27013057892317466, "grad_norm": 0.33152076601982117, "learning_rate": 0.00014600397717672443, "loss": 1.4323, "step": 20788 }, { "epoch": 0.2701435734670905, "grad_norm": 0.39816585183143616, "learning_rate": 0.00014600137771481303, "loss": 1.5205, "step": 20789 }, { "epoch": 0.2701565680110064, "grad_norm": 0.4054388999938965, "learning_rate": 0.00014599877825290165, "loss": 1.2961, "step": 20790 }, { "epoch": 0.27016956255492225, "grad_norm": 0.424545556306839, "learning_rate": 0.00014599617879099028, "loss": 1.2815, "step": 20791 }, { "epoch": 0.27018255709883815, "grad_norm": 0.35893192887306213, "learning_rate": 0.0001459935793290789, "loss": 1.5249, "step": 20792 }, { "epoch": 0.270195551642754, "grad_norm": 0.3958604037761688, "learning_rate": 0.0001459909798671675, "loss": 1.326, "step": 20793 }, { "epoch": 0.2702085461866699, "grad_norm": 0.40877991914749146, "learning_rate": 0.00014598838040525612, "loss": 1.4322, "step": 20794 }, { "epoch": 0.27022154073058574, "grad_norm": 0.3238425850868225, "learning_rate": 0.00014598578094334475, "loss": 1.2572, "step": 20795 }, { "epoch": 0.27023453527450164, "grad_norm": 0.4755823016166687, "learning_rate": 0.00014598318148143334, "loss": 1.4959, "step": 20796 }, { "epoch": 0.2702475298184175, "grad_norm": 0.3692767918109894, "learning_rate": 0.00014598058201952197, "loss": 1.4663, "step": 20797 }, { "epoch": 0.2702605243623334, "grad_norm": 0.33734601736068726, "learning_rate": 0.00014597798255761057, "loss": 1.4525, "step": 20798 }, { "epoch": 0.27027351890624923, "grad_norm": 0.4396667182445526, "learning_rate": 0.0001459753830956992, "loss": 1.4602, "step": 20799 }, { "epoch": 0.27028651345016513, "grad_norm": 0.4326832890510559, "learning_rate": 0.00014597278363378781, "loss": 1.5171, "step": 20800 }, { "epoch": 0.270299507994081, "grad_norm": 0.45728784799575806, "learning_rate": 0.0001459701841718764, "loss": 1.4585, "step": 20801 }, { "epoch": 0.2703125025379969, "grad_norm": 0.3867819607257843, "learning_rate": 0.00014596758470996506, "loss": 1.5228, "step": 20802 }, { "epoch": 0.2703254970819127, "grad_norm": 0.4609912931919098, "learning_rate": 0.00014596498524805366, "loss": 1.3221, "step": 20803 }, { "epoch": 0.2703384916258286, "grad_norm": 0.44288545846939087, "learning_rate": 0.00014596238578614229, "loss": 1.6226, "step": 20804 }, { "epoch": 0.27035148616974447, "grad_norm": 0.3676133155822754, "learning_rate": 0.00014595978632423088, "loss": 1.3888, "step": 20805 }, { "epoch": 0.27036448071366037, "grad_norm": 0.3704010248184204, "learning_rate": 0.0001459571868623195, "loss": 1.3269, "step": 20806 }, { "epoch": 0.2703774752575762, "grad_norm": 0.4500162899494171, "learning_rate": 0.00014595458740040813, "loss": 1.4127, "step": 20807 }, { "epoch": 0.2703904698014921, "grad_norm": 0.3112788498401642, "learning_rate": 0.00014595198793849673, "loss": 1.3928, "step": 20808 }, { "epoch": 0.27040346434540796, "grad_norm": 0.4380784034729004, "learning_rate": 0.00014594938847658535, "loss": 1.4733, "step": 20809 }, { "epoch": 0.27041645888932386, "grad_norm": 0.40678146481513977, "learning_rate": 0.00014594678901467398, "loss": 1.4062, "step": 20810 }, { "epoch": 0.2704294534332397, "grad_norm": 0.3612669110298157, "learning_rate": 0.00014594418955276258, "loss": 1.4338, "step": 20811 }, { "epoch": 0.2704424479771556, "grad_norm": 0.4091089367866516, "learning_rate": 0.0001459415900908512, "loss": 1.6577, "step": 20812 }, { "epoch": 0.27045544252107145, "grad_norm": 0.3786531686782837, "learning_rate": 0.0001459389906289398, "loss": 1.3989, "step": 20813 }, { "epoch": 0.27046843706498735, "grad_norm": 0.46411582827568054, "learning_rate": 0.00014593639116702845, "loss": 1.3317, "step": 20814 }, { "epoch": 0.2704814316089032, "grad_norm": 0.4212292730808258, "learning_rate": 0.00014593379170511705, "loss": 1.3794, "step": 20815 }, { "epoch": 0.2704944261528191, "grad_norm": 0.30869749188423157, "learning_rate": 0.00014593119224320567, "loss": 1.49, "step": 20816 }, { "epoch": 0.27050742069673495, "grad_norm": 0.4733096659183502, "learning_rate": 0.00014592859278129427, "loss": 1.4353, "step": 20817 }, { "epoch": 0.27052041524065085, "grad_norm": 0.39669325947761536, "learning_rate": 0.0001459259933193829, "loss": 1.4102, "step": 20818 }, { "epoch": 0.2705334097845667, "grad_norm": 0.4561370313167572, "learning_rate": 0.00014592339385747152, "loss": 1.402, "step": 20819 }, { "epoch": 0.2705464043284826, "grad_norm": 0.44948163628578186, "learning_rate": 0.00014592079439556011, "loss": 1.5506, "step": 20820 }, { "epoch": 0.27055939887239844, "grad_norm": 0.366617351770401, "learning_rate": 0.00014591819493364874, "loss": 1.5229, "step": 20821 }, { "epoch": 0.27057239341631434, "grad_norm": 0.37989145517349243, "learning_rate": 0.00014591559547173736, "loss": 1.4713, "step": 20822 }, { "epoch": 0.2705853879602302, "grad_norm": 0.3669915497303009, "learning_rate": 0.000145912996009826, "loss": 1.4177, "step": 20823 }, { "epoch": 0.2705983825041461, "grad_norm": 0.40401995182037354, "learning_rate": 0.00014591039654791459, "loss": 1.4135, "step": 20824 }, { "epoch": 0.27061137704806193, "grad_norm": 0.3447856307029724, "learning_rate": 0.00014590779708600318, "loss": 1.2905, "step": 20825 }, { "epoch": 0.27062437159197783, "grad_norm": 0.3797377347946167, "learning_rate": 0.00014590519762409183, "loss": 1.207, "step": 20826 }, { "epoch": 0.2706373661358937, "grad_norm": 0.37477850914001465, "learning_rate": 0.00014590259816218043, "loss": 1.4285, "step": 20827 }, { "epoch": 0.2706503606798096, "grad_norm": 0.38122615218162537, "learning_rate": 0.00014589999870026906, "loss": 1.2644, "step": 20828 }, { "epoch": 0.2706633552237254, "grad_norm": 0.40307557582855225, "learning_rate": 0.00014589739923835765, "loss": 1.4889, "step": 20829 }, { "epoch": 0.2706763497676413, "grad_norm": 0.34643661975860596, "learning_rate": 0.00014589479977644628, "loss": 1.4476, "step": 20830 }, { "epoch": 0.27068934431155717, "grad_norm": 0.36796635389328003, "learning_rate": 0.0001458922003145349, "loss": 1.3461, "step": 20831 }, { "epoch": 0.27070233885547307, "grad_norm": 0.402668833732605, "learning_rate": 0.0001458896008526235, "loss": 1.5257, "step": 20832 }, { "epoch": 0.2707153333993889, "grad_norm": 0.4068823456764221, "learning_rate": 0.00014588700139071212, "loss": 1.3416, "step": 20833 }, { "epoch": 0.2707283279433048, "grad_norm": 0.4461059868335724, "learning_rate": 0.00014588440192880075, "loss": 1.4412, "step": 20834 }, { "epoch": 0.27074132248722066, "grad_norm": 0.5113810300827026, "learning_rate": 0.00014588180246688937, "loss": 1.5175, "step": 20835 }, { "epoch": 0.27075431703113656, "grad_norm": 0.34720370173454285, "learning_rate": 0.00014587920300497797, "loss": 1.3346, "step": 20836 }, { "epoch": 0.2707673115750524, "grad_norm": 0.429147332906723, "learning_rate": 0.00014587660354306657, "loss": 1.4084, "step": 20837 }, { "epoch": 0.2707803061189683, "grad_norm": 0.3017660975456238, "learning_rate": 0.00014587400408115522, "loss": 1.0667, "step": 20838 }, { "epoch": 0.27079330066288415, "grad_norm": 0.4261918067932129, "learning_rate": 0.00014587140461924382, "loss": 1.4971, "step": 20839 }, { "epoch": 0.27080629520680005, "grad_norm": 0.3704416751861572, "learning_rate": 0.00014586880515733244, "loss": 1.3831, "step": 20840 }, { "epoch": 0.2708192897507159, "grad_norm": 0.4343677759170532, "learning_rate": 0.00014586620569542107, "loss": 1.6067, "step": 20841 }, { "epoch": 0.2708322842946318, "grad_norm": 0.42772969603538513, "learning_rate": 0.00014586360623350966, "loss": 1.4594, "step": 20842 }, { "epoch": 0.27084527883854764, "grad_norm": 0.4471254050731659, "learning_rate": 0.0001458610067715983, "loss": 1.4875, "step": 20843 }, { "epoch": 0.27085827338246354, "grad_norm": 0.4613000154495239, "learning_rate": 0.00014585840730968689, "loss": 1.4553, "step": 20844 }, { "epoch": 0.2708712679263794, "grad_norm": 0.3984544277191162, "learning_rate": 0.00014585580784777554, "loss": 1.3063, "step": 20845 }, { "epoch": 0.2708842624702953, "grad_norm": 0.4328620135784149, "learning_rate": 0.00014585320838586413, "loss": 1.4295, "step": 20846 }, { "epoch": 0.27089725701421113, "grad_norm": 0.38093486428260803, "learning_rate": 0.00014585060892395276, "loss": 1.4901, "step": 20847 }, { "epoch": 0.27091025155812704, "grad_norm": 0.3480699360370636, "learning_rate": 0.00014584800946204136, "loss": 1.642, "step": 20848 }, { "epoch": 0.2709232461020429, "grad_norm": 0.3781997859477997, "learning_rate": 0.00014584541000012998, "loss": 1.2357, "step": 20849 }, { "epoch": 0.2709362406459588, "grad_norm": 0.4760379195213318, "learning_rate": 0.0001458428105382186, "loss": 1.5356, "step": 20850 }, { "epoch": 0.2709492351898746, "grad_norm": 0.32137757539749146, "learning_rate": 0.0001458402110763072, "loss": 1.4307, "step": 20851 }, { "epoch": 0.2709622297337905, "grad_norm": 0.4688911736011505, "learning_rate": 0.00014583761161439583, "loss": 1.6149, "step": 20852 }, { "epoch": 0.2709752242777064, "grad_norm": 0.3692459762096405, "learning_rate": 0.00014583501215248445, "loss": 1.3311, "step": 20853 }, { "epoch": 0.2709882188216223, "grad_norm": 0.37827131152153015, "learning_rate": 0.00014583241269057305, "loss": 1.3687, "step": 20854 }, { "epoch": 0.2710012133655381, "grad_norm": 0.45231932401657104, "learning_rate": 0.00014582981322866167, "loss": 1.4479, "step": 20855 }, { "epoch": 0.271014207909454, "grad_norm": 0.3784984052181244, "learning_rate": 0.00014582721376675027, "loss": 1.4308, "step": 20856 }, { "epoch": 0.27102720245336986, "grad_norm": 0.4157150685787201, "learning_rate": 0.00014582461430483892, "loss": 1.5173, "step": 20857 }, { "epoch": 0.27104019699728576, "grad_norm": 0.4382265508174896, "learning_rate": 0.00014582201484292752, "loss": 1.6408, "step": 20858 }, { "epoch": 0.27105319154120167, "grad_norm": 0.43809014558792114, "learning_rate": 0.00014581941538101614, "loss": 1.5878, "step": 20859 }, { "epoch": 0.2710661860851175, "grad_norm": 0.4447806477546692, "learning_rate": 0.00014581681591910474, "loss": 1.3213, "step": 20860 }, { "epoch": 0.2710791806290334, "grad_norm": 0.3964853286743164, "learning_rate": 0.00014581421645719337, "loss": 1.4591, "step": 20861 }, { "epoch": 0.27109217517294926, "grad_norm": 0.34540191292762756, "learning_rate": 0.000145811616995282, "loss": 1.2487, "step": 20862 }, { "epoch": 0.27110516971686516, "grad_norm": 0.4400683343410492, "learning_rate": 0.0001458090175333706, "loss": 1.4948, "step": 20863 }, { "epoch": 0.271118164260781, "grad_norm": 0.41917356848716736, "learning_rate": 0.0001458064180714592, "loss": 1.4493, "step": 20864 }, { "epoch": 0.2711311588046969, "grad_norm": 0.3956057131290436, "learning_rate": 0.00014580381860954784, "loss": 1.3558, "step": 20865 }, { "epoch": 0.27114415334861275, "grad_norm": 0.4873267710208893, "learning_rate": 0.00014580121914763643, "loss": 1.4294, "step": 20866 }, { "epoch": 0.27115714789252865, "grad_norm": 0.4087563157081604, "learning_rate": 0.00014579861968572506, "loss": 1.3979, "step": 20867 }, { "epoch": 0.2711701424364445, "grad_norm": 0.32891717553138733, "learning_rate": 0.00014579602022381366, "loss": 1.4041, "step": 20868 }, { "epoch": 0.2711831369803604, "grad_norm": 0.4600984752178192, "learning_rate": 0.0001457934207619023, "loss": 1.3648, "step": 20869 }, { "epoch": 0.27119613152427624, "grad_norm": 0.4293263852596283, "learning_rate": 0.0001457908212999909, "loss": 1.3921, "step": 20870 }, { "epoch": 0.27120912606819214, "grad_norm": 0.40912577509880066, "learning_rate": 0.00014578822183807953, "loss": 1.4292, "step": 20871 }, { "epoch": 0.271222120612108, "grad_norm": 0.34529200196266174, "learning_rate": 0.00014578562237616813, "loss": 1.4514, "step": 20872 }, { "epoch": 0.2712351151560239, "grad_norm": 0.2624732255935669, "learning_rate": 0.00014578302291425675, "loss": 1.1451, "step": 20873 }, { "epoch": 0.27124810969993973, "grad_norm": 0.36293190717697144, "learning_rate": 0.00014578042345234538, "loss": 1.4315, "step": 20874 }, { "epoch": 0.27126110424385563, "grad_norm": 0.4560997188091278, "learning_rate": 0.00014577782399043397, "loss": 1.418, "step": 20875 }, { "epoch": 0.2712740987877715, "grad_norm": 0.4754064679145813, "learning_rate": 0.00014577522452852263, "loss": 1.5889, "step": 20876 }, { "epoch": 0.2712870933316874, "grad_norm": 0.3319530487060547, "learning_rate": 0.00014577262506661122, "loss": 1.416, "step": 20877 }, { "epoch": 0.2713000878756032, "grad_norm": 0.4696401357650757, "learning_rate": 0.00014577002560469985, "loss": 1.4437, "step": 20878 }, { "epoch": 0.2713130824195191, "grad_norm": 0.34043440222740173, "learning_rate": 0.00014576742614278844, "loss": 1.2634, "step": 20879 }, { "epoch": 0.27132607696343497, "grad_norm": 0.38766780495643616, "learning_rate": 0.00014576482668087707, "loss": 1.4191, "step": 20880 }, { "epoch": 0.27133907150735087, "grad_norm": 0.4224969446659088, "learning_rate": 0.0001457622272189657, "loss": 1.3619, "step": 20881 }, { "epoch": 0.2713520660512667, "grad_norm": 0.5193988084793091, "learning_rate": 0.0001457596277570543, "loss": 1.3226, "step": 20882 }, { "epoch": 0.2713650605951826, "grad_norm": 0.3767814040184021, "learning_rate": 0.00014575702829514292, "loss": 1.5669, "step": 20883 }, { "epoch": 0.27137805513909846, "grad_norm": 0.33606457710266113, "learning_rate": 0.00014575442883323154, "loss": 1.2803, "step": 20884 }, { "epoch": 0.27139104968301436, "grad_norm": 0.46754106879234314, "learning_rate": 0.00014575182937132014, "loss": 1.4925, "step": 20885 }, { "epoch": 0.2714040442269302, "grad_norm": 0.36631861329078674, "learning_rate": 0.00014574922990940876, "loss": 1.3568, "step": 20886 }, { "epoch": 0.2714170387708461, "grad_norm": 0.5096385478973389, "learning_rate": 0.00014574663044749736, "loss": 1.3471, "step": 20887 }, { "epoch": 0.27143003331476195, "grad_norm": 0.40501829981803894, "learning_rate": 0.000145744030985586, "loss": 1.4683, "step": 20888 }, { "epoch": 0.27144302785867785, "grad_norm": 0.4708895981311798, "learning_rate": 0.0001457414315236746, "loss": 1.4784, "step": 20889 }, { "epoch": 0.2714560224025937, "grad_norm": 0.4071749746799469, "learning_rate": 0.00014573883206176323, "loss": 1.4794, "step": 20890 }, { "epoch": 0.2714690169465096, "grad_norm": 0.3962570130825043, "learning_rate": 0.00014573623259985183, "loss": 1.3485, "step": 20891 }, { "epoch": 0.27148201149042545, "grad_norm": 0.3588683307170868, "learning_rate": 0.00014573363313794045, "loss": 1.3494, "step": 20892 }, { "epoch": 0.27149500603434135, "grad_norm": 0.4205160439014435, "learning_rate": 0.00014573103367602908, "loss": 1.3711, "step": 20893 }, { "epoch": 0.2715080005782572, "grad_norm": 0.40023550391197205, "learning_rate": 0.00014572843421411768, "loss": 1.237, "step": 20894 }, { "epoch": 0.2715209951221731, "grad_norm": 0.40140536427497864, "learning_rate": 0.0001457258347522063, "loss": 1.5044, "step": 20895 }, { "epoch": 0.27153398966608894, "grad_norm": 0.31563496589660645, "learning_rate": 0.00014572323529029493, "loss": 1.2847, "step": 20896 }, { "epoch": 0.27154698421000484, "grad_norm": 0.479702889919281, "learning_rate": 0.00014572063582838352, "loss": 1.4794, "step": 20897 }, { "epoch": 0.2715599787539207, "grad_norm": 0.2976384162902832, "learning_rate": 0.00014571803636647215, "loss": 1.1839, "step": 20898 }, { "epoch": 0.2715729732978366, "grad_norm": 0.3080192804336548, "learning_rate": 0.00014571543690456074, "loss": 1.1892, "step": 20899 }, { "epoch": 0.27158596784175243, "grad_norm": 0.4232228994369507, "learning_rate": 0.0001457128374426494, "loss": 1.5328, "step": 20900 }, { "epoch": 0.27159896238566833, "grad_norm": 0.4344131648540497, "learning_rate": 0.000145710237980738, "loss": 1.5033, "step": 20901 }, { "epoch": 0.2716119569295842, "grad_norm": 0.5643211007118225, "learning_rate": 0.00014570763851882662, "loss": 1.3747, "step": 20902 }, { "epoch": 0.2716249514735001, "grad_norm": 0.3988206088542938, "learning_rate": 0.00014570503905691522, "loss": 1.5398, "step": 20903 }, { "epoch": 0.2716379460174159, "grad_norm": 0.4586490988731384, "learning_rate": 0.00014570243959500384, "loss": 1.5673, "step": 20904 }, { "epoch": 0.2716509405613318, "grad_norm": 0.39344796538352966, "learning_rate": 0.00014569984013309246, "loss": 1.393, "step": 20905 }, { "epoch": 0.27166393510524767, "grad_norm": 0.41531339287757874, "learning_rate": 0.00014569724067118106, "loss": 1.2759, "step": 20906 }, { "epoch": 0.27167692964916357, "grad_norm": 0.4514845013618469, "learning_rate": 0.00014569464120926969, "loss": 1.4866, "step": 20907 }, { "epoch": 0.2716899241930794, "grad_norm": 0.4289674162864685, "learning_rate": 0.0001456920417473583, "loss": 1.3813, "step": 20908 }, { "epoch": 0.2717029187369953, "grad_norm": 0.4469279646873474, "learning_rate": 0.0001456894422854469, "loss": 1.445, "step": 20909 }, { "epoch": 0.27171591328091116, "grad_norm": 0.3672669231891632, "learning_rate": 0.00014568684282353553, "loss": 1.6184, "step": 20910 }, { "epoch": 0.27172890782482706, "grad_norm": 0.3048780858516693, "learning_rate": 0.00014568424336162413, "loss": 1.2093, "step": 20911 }, { "epoch": 0.2717419023687429, "grad_norm": 0.3432950973510742, "learning_rate": 0.00014568164389971278, "loss": 1.4253, "step": 20912 }, { "epoch": 0.2717548969126588, "grad_norm": 0.44160789251327515, "learning_rate": 0.00014567904443780138, "loss": 1.6193, "step": 20913 }, { "epoch": 0.27176789145657465, "grad_norm": 0.43504777550697327, "learning_rate": 0.00014567644497589, "loss": 1.5853, "step": 20914 }, { "epoch": 0.27178088600049055, "grad_norm": 0.42595210671424866, "learning_rate": 0.00014567384551397863, "loss": 1.3747, "step": 20915 }, { "epoch": 0.2717938805444064, "grad_norm": 0.3934386074542999, "learning_rate": 0.00014567124605206722, "loss": 1.2053, "step": 20916 }, { "epoch": 0.2718068750883223, "grad_norm": 0.3724689781665802, "learning_rate": 0.00014566864659015585, "loss": 1.3004, "step": 20917 }, { "epoch": 0.27181986963223814, "grad_norm": 0.5848191976547241, "learning_rate": 0.00014566604712824445, "loss": 1.5933, "step": 20918 }, { "epoch": 0.27183286417615404, "grad_norm": 0.4291781187057495, "learning_rate": 0.0001456634476663331, "loss": 1.3255, "step": 20919 }, { "epoch": 0.2718458587200699, "grad_norm": 0.5061736106872559, "learning_rate": 0.0001456608482044217, "loss": 1.5369, "step": 20920 }, { "epoch": 0.2718588532639858, "grad_norm": 0.445131778717041, "learning_rate": 0.0001456582487425103, "loss": 1.4927, "step": 20921 }, { "epoch": 0.27187184780790163, "grad_norm": 0.3543862998485565, "learning_rate": 0.00014565564928059892, "loss": 1.447, "step": 20922 }, { "epoch": 0.27188484235181754, "grad_norm": 0.4228907525539398, "learning_rate": 0.00014565304981868754, "loss": 1.3768, "step": 20923 }, { "epoch": 0.2718978368957334, "grad_norm": 0.4196886420249939, "learning_rate": 0.00014565045035677617, "loss": 1.4874, "step": 20924 }, { "epoch": 0.2719108314396493, "grad_norm": 0.4059169888496399, "learning_rate": 0.00014564785089486476, "loss": 1.411, "step": 20925 }, { "epoch": 0.2719238259835651, "grad_norm": 0.34867146611213684, "learning_rate": 0.0001456452514329534, "loss": 1.5161, "step": 20926 }, { "epoch": 0.271936820527481, "grad_norm": 0.4182260036468506, "learning_rate": 0.000145642651971042, "loss": 1.4437, "step": 20927 }, { "epoch": 0.27194981507139687, "grad_norm": 0.2763451039791107, "learning_rate": 0.0001456400525091306, "loss": 1.39, "step": 20928 }, { "epoch": 0.2719628096153128, "grad_norm": 0.2923557162284851, "learning_rate": 0.00014563745304721923, "loss": 1.3705, "step": 20929 }, { "epoch": 0.2719758041592286, "grad_norm": 0.4142714738845825, "learning_rate": 0.00014563485358530783, "loss": 1.4015, "step": 20930 }, { "epoch": 0.2719887987031445, "grad_norm": 0.3314935564994812, "learning_rate": 0.00014563225412339648, "loss": 1.2912, "step": 20931 }, { "epoch": 0.27200179324706036, "grad_norm": 0.3004406690597534, "learning_rate": 0.00014562965466148508, "loss": 1.1659, "step": 20932 }, { "epoch": 0.27201478779097626, "grad_norm": 0.4019714891910553, "learning_rate": 0.00014562705519957368, "loss": 1.4722, "step": 20933 }, { "epoch": 0.2720277823348921, "grad_norm": 0.39341098070144653, "learning_rate": 0.0001456244557376623, "loss": 1.307, "step": 20934 }, { "epoch": 0.272040776878808, "grad_norm": 0.37396857142448425, "learning_rate": 0.00014562185627575093, "loss": 1.3251, "step": 20935 }, { "epoch": 0.2720537714227239, "grad_norm": 0.4872579276561737, "learning_rate": 0.00014561925681383955, "loss": 1.3513, "step": 20936 }, { "epoch": 0.27206676596663976, "grad_norm": 0.3539903461933136, "learning_rate": 0.00014561665735192815, "loss": 1.4946, "step": 20937 }, { "epoch": 0.27207976051055566, "grad_norm": 0.3839171528816223, "learning_rate": 0.00014561405789001677, "loss": 1.258, "step": 20938 }, { "epoch": 0.2720927550544715, "grad_norm": 0.3958101272583008, "learning_rate": 0.0001456114584281054, "loss": 1.4121, "step": 20939 }, { "epoch": 0.2721057495983874, "grad_norm": 0.46334654092788696, "learning_rate": 0.000145608858966194, "loss": 1.4719, "step": 20940 }, { "epoch": 0.27211874414230325, "grad_norm": 0.43549561500549316, "learning_rate": 0.00014560625950428262, "loss": 1.5402, "step": 20941 }, { "epoch": 0.27213173868621915, "grad_norm": 0.4384608566761017, "learning_rate": 0.00014560366004237122, "loss": 1.4569, "step": 20942 }, { "epoch": 0.272144733230135, "grad_norm": 0.43945741653442383, "learning_rate": 0.00014560106058045987, "loss": 1.2072, "step": 20943 }, { "epoch": 0.2721577277740509, "grad_norm": 0.3684191405773163, "learning_rate": 0.00014559846111854847, "loss": 1.5432, "step": 20944 }, { "epoch": 0.27217072231796674, "grad_norm": 0.2938414514064789, "learning_rate": 0.0001455958616566371, "loss": 1.3299, "step": 20945 }, { "epoch": 0.27218371686188264, "grad_norm": 0.3631507456302643, "learning_rate": 0.0001455932621947257, "loss": 1.2465, "step": 20946 }, { "epoch": 0.2721967114057985, "grad_norm": 0.3917112946510315, "learning_rate": 0.0001455906627328143, "loss": 1.4785, "step": 20947 }, { "epoch": 0.2722097059497144, "grad_norm": 0.32186800241470337, "learning_rate": 0.00014558806327090294, "loss": 1.3332, "step": 20948 }, { "epoch": 0.27222270049363023, "grad_norm": 0.396060973405838, "learning_rate": 0.00014558546380899153, "loss": 1.4922, "step": 20949 }, { "epoch": 0.27223569503754613, "grad_norm": 0.3950470983982086, "learning_rate": 0.00014558286434708016, "loss": 1.4796, "step": 20950 }, { "epoch": 0.272248689581462, "grad_norm": 0.4561919569969177, "learning_rate": 0.00014558026488516878, "loss": 1.4229, "step": 20951 }, { "epoch": 0.2722616841253779, "grad_norm": 0.3467792868614197, "learning_rate": 0.00014557766542325738, "loss": 1.3409, "step": 20952 }, { "epoch": 0.2722746786692937, "grad_norm": 0.4270062744617462, "learning_rate": 0.000145575065961346, "loss": 1.4152, "step": 20953 }, { "epoch": 0.2722876732132096, "grad_norm": 0.35324808955192566, "learning_rate": 0.00014557246649943463, "loss": 1.5561, "step": 20954 }, { "epoch": 0.27230066775712547, "grad_norm": 0.43271857500076294, "learning_rate": 0.00014556986703752325, "loss": 1.4855, "step": 20955 }, { "epoch": 0.27231366230104137, "grad_norm": 0.3370274603366852, "learning_rate": 0.00014556726757561185, "loss": 1.291, "step": 20956 }, { "epoch": 0.2723266568449572, "grad_norm": 0.36171454191207886, "learning_rate": 0.00014556466811370048, "loss": 1.399, "step": 20957 }, { "epoch": 0.2723396513888731, "grad_norm": 0.411704421043396, "learning_rate": 0.0001455620686517891, "loss": 1.3516, "step": 20958 }, { "epoch": 0.27235264593278896, "grad_norm": 0.4227503538131714, "learning_rate": 0.0001455594691898777, "loss": 1.4469, "step": 20959 }, { "epoch": 0.27236564047670486, "grad_norm": 0.3900274634361267, "learning_rate": 0.00014555686972796632, "loss": 1.3782, "step": 20960 }, { "epoch": 0.2723786350206207, "grad_norm": 0.4124211370944977, "learning_rate": 0.00014555427026605492, "loss": 1.514, "step": 20961 }, { "epoch": 0.2723916295645366, "grad_norm": 0.3952930271625519, "learning_rate": 0.00014555167080414357, "loss": 1.3357, "step": 20962 }, { "epoch": 0.27240462410845245, "grad_norm": 0.4210646450519562, "learning_rate": 0.00014554907134223217, "loss": 1.6704, "step": 20963 }, { "epoch": 0.27241761865236835, "grad_norm": 0.4260576665401459, "learning_rate": 0.00014554647188032077, "loss": 1.4169, "step": 20964 }, { "epoch": 0.2724306131962842, "grad_norm": 0.39298146963119507, "learning_rate": 0.0001455438724184094, "loss": 1.3177, "step": 20965 }, { "epoch": 0.2724436077402001, "grad_norm": 0.4447885751724243, "learning_rate": 0.00014554127295649802, "loss": 1.5267, "step": 20966 }, { "epoch": 0.27245660228411595, "grad_norm": 0.33108648657798767, "learning_rate": 0.00014553867349458664, "loss": 1.3462, "step": 20967 }, { "epoch": 0.27246959682803185, "grad_norm": 0.3959689140319824, "learning_rate": 0.00014553607403267524, "loss": 1.1595, "step": 20968 }, { "epoch": 0.2724825913719477, "grad_norm": 0.4592994749546051, "learning_rate": 0.00014553347457076386, "loss": 1.4471, "step": 20969 }, { "epoch": 0.2724955859158636, "grad_norm": 0.3343043923377991, "learning_rate": 0.00014553087510885249, "loss": 1.3166, "step": 20970 }, { "epoch": 0.27250858045977944, "grad_norm": 0.45657259225845337, "learning_rate": 0.00014552827564694108, "loss": 1.4627, "step": 20971 }, { "epoch": 0.27252157500369534, "grad_norm": 0.48273783922195435, "learning_rate": 0.0001455256761850297, "loss": 1.3553, "step": 20972 }, { "epoch": 0.2725345695476112, "grad_norm": 0.3224756717681885, "learning_rate": 0.0001455230767231183, "loss": 1.3283, "step": 20973 }, { "epoch": 0.2725475640915271, "grad_norm": 0.5678260922431946, "learning_rate": 0.00014552047726120696, "loss": 1.3435, "step": 20974 }, { "epoch": 0.27256055863544293, "grad_norm": 0.3586970865726471, "learning_rate": 0.00014551787779929555, "loss": 1.2358, "step": 20975 }, { "epoch": 0.27257355317935883, "grad_norm": 0.4826592803001404, "learning_rate": 0.00014551527833738415, "loss": 1.4185, "step": 20976 }, { "epoch": 0.2725865477232747, "grad_norm": 0.4475829005241394, "learning_rate": 0.00014551267887547278, "loss": 1.6022, "step": 20977 }, { "epoch": 0.2725995422671906, "grad_norm": 0.3895430266857147, "learning_rate": 0.0001455100794135614, "loss": 1.4663, "step": 20978 }, { "epoch": 0.2726125368111064, "grad_norm": 0.3294030427932739, "learning_rate": 0.00014550747995165003, "loss": 1.263, "step": 20979 }, { "epoch": 0.2726255313550223, "grad_norm": 0.41625484824180603, "learning_rate": 0.00014550488048973862, "loss": 1.48, "step": 20980 }, { "epoch": 0.27263852589893817, "grad_norm": 0.39338308572769165, "learning_rate": 0.00014550228102782725, "loss": 1.557, "step": 20981 }, { "epoch": 0.27265152044285407, "grad_norm": 0.34008949995040894, "learning_rate": 0.00014549968156591587, "loss": 1.5955, "step": 20982 }, { "epoch": 0.2726645149867699, "grad_norm": 0.4699447751045227, "learning_rate": 0.00014549708210400447, "loss": 1.6548, "step": 20983 }, { "epoch": 0.2726775095306858, "grad_norm": 0.44465935230255127, "learning_rate": 0.0001454944826420931, "loss": 1.3683, "step": 20984 }, { "epoch": 0.27269050407460166, "grad_norm": 0.3938220739364624, "learning_rate": 0.0001454918831801817, "loss": 1.5514, "step": 20985 }, { "epoch": 0.27270349861851756, "grad_norm": 0.3734889328479767, "learning_rate": 0.00014548928371827034, "loss": 1.4019, "step": 20986 }, { "epoch": 0.2727164931624334, "grad_norm": 0.36078962683677673, "learning_rate": 0.00014548668425635894, "loss": 1.524, "step": 20987 }, { "epoch": 0.2727294877063493, "grad_norm": 1.1044508218765259, "learning_rate": 0.00014548408479444754, "loss": 1.4458, "step": 20988 }, { "epoch": 0.27274248225026515, "grad_norm": 0.38395217061042786, "learning_rate": 0.0001454814853325362, "loss": 1.4384, "step": 20989 }, { "epoch": 0.27275547679418105, "grad_norm": 0.3708850145339966, "learning_rate": 0.00014547888587062479, "loss": 1.2456, "step": 20990 }, { "epoch": 0.2727684713380969, "grad_norm": 0.4532736539840698, "learning_rate": 0.0001454762864087134, "loss": 1.3178, "step": 20991 }, { "epoch": 0.2727814658820128, "grad_norm": 0.47734859585762024, "learning_rate": 0.000145473686946802, "loss": 1.4773, "step": 20992 }, { "epoch": 0.27279446042592864, "grad_norm": 0.4454466998577118, "learning_rate": 0.00014547108748489063, "loss": 1.3575, "step": 20993 }, { "epoch": 0.27280745496984454, "grad_norm": 0.42839354276657104, "learning_rate": 0.00014546848802297926, "loss": 1.3424, "step": 20994 }, { "epoch": 0.2728204495137604, "grad_norm": 0.37258681654930115, "learning_rate": 0.00014546588856106785, "loss": 1.4759, "step": 20995 }, { "epoch": 0.2728334440576763, "grad_norm": 0.35203108191490173, "learning_rate": 0.00014546328909915648, "loss": 1.4133, "step": 20996 }, { "epoch": 0.27284643860159213, "grad_norm": 0.3980732858181, "learning_rate": 0.0001454606896372451, "loss": 1.4573, "step": 20997 }, { "epoch": 0.27285943314550803, "grad_norm": 0.2713392972946167, "learning_rate": 0.00014545809017533373, "loss": 1.3958, "step": 20998 }, { "epoch": 0.2728724276894239, "grad_norm": 0.49511057138442993, "learning_rate": 0.00014545549071342233, "loss": 1.5556, "step": 20999 }, { "epoch": 0.2728854222333398, "grad_norm": 0.3600739538669586, "learning_rate": 0.00014545289125151095, "loss": 1.4051, "step": 21000 }, { "epoch": 0.2728984167772556, "grad_norm": 0.45966795086860657, "learning_rate": 0.00014545029178959957, "loss": 1.499, "step": 21001 }, { "epoch": 0.2729114113211715, "grad_norm": 0.36374330520629883, "learning_rate": 0.00014544769232768817, "loss": 1.4556, "step": 21002 }, { "epoch": 0.27292440586508737, "grad_norm": 0.3642531931400299, "learning_rate": 0.0001454450928657768, "loss": 1.4275, "step": 21003 }, { "epoch": 0.2729374004090033, "grad_norm": 0.3998773396015167, "learning_rate": 0.0001454424934038654, "loss": 1.4732, "step": 21004 }, { "epoch": 0.2729503949529191, "grad_norm": 0.34008800983428955, "learning_rate": 0.00014543989394195402, "loss": 1.4137, "step": 21005 }, { "epoch": 0.272963389496835, "grad_norm": 0.4003192186355591, "learning_rate": 0.00014543729448004264, "loss": 1.3219, "step": 21006 }, { "epoch": 0.27297638404075086, "grad_norm": 0.4651870131492615, "learning_rate": 0.00014543469501813124, "loss": 1.3768, "step": 21007 }, { "epoch": 0.27298937858466676, "grad_norm": 0.431985080242157, "learning_rate": 0.00014543209555621986, "loss": 1.4117, "step": 21008 }, { "epoch": 0.2730023731285826, "grad_norm": 0.32566460967063904, "learning_rate": 0.0001454294960943085, "loss": 1.3819, "step": 21009 }, { "epoch": 0.2730153676724985, "grad_norm": 0.41906920075416565, "learning_rate": 0.0001454268966323971, "loss": 1.3556, "step": 21010 }, { "epoch": 0.27302836221641436, "grad_norm": 0.337770015001297, "learning_rate": 0.0001454242971704857, "loss": 1.4031, "step": 21011 }, { "epoch": 0.27304135676033026, "grad_norm": 0.4519765079021454, "learning_rate": 0.00014542169770857434, "loss": 1.5323, "step": 21012 }, { "epoch": 0.27305435130424616, "grad_norm": 0.3738921284675598, "learning_rate": 0.00014541909824666296, "loss": 1.5589, "step": 21013 }, { "epoch": 0.273067345848162, "grad_norm": 0.4027923047542572, "learning_rate": 0.00014541649878475156, "loss": 1.3714, "step": 21014 }, { "epoch": 0.2730803403920779, "grad_norm": 0.39273229241371155, "learning_rate": 0.00014541389932284018, "loss": 1.3112, "step": 21015 }, { "epoch": 0.27309333493599375, "grad_norm": 0.36209815740585327, "learning_rate": 0.00014541129986092878, "loss": 1.3559, "step": 21016 }, { "epoch": 0.27310632947990965, "grad_norm": 0.4092465341091156, "learning_rate": 0.0001454087003990174, "loss": 1.4444, "step": 21017 }, { "epoch": 0.2731193240238255, "grad_norm": 0.3870810866355896, "learning_rate": 0.00014540610093710603, "loss": 1.39, "step": 21018 }, { "epoch": 0.2731323185677414, "grad_norm": 0.42070648074150085, "learning_rate": 0.00014540350147519463, "loss": 1.3565, "step": 21019 }, { "epoch": 0.27314531311165724, "grad_norm": 0.41532325744628906, "learning_rate": 0.00014540090201328325, "loss": 1.3182, "step": 21020 }, { "epoch": 0.27315830765557314, "grad_norm": 0.4815254509449005, "learning_rate": 0.00014539830255137187, "loss": 1.3652, "step": 21021 }, { "epoch": 0.273171302199489, "grad_norm": 0.33928966522216797, "learning_rate": 0.0001453957030894605, "loss": 1.2993, "step": 21022 }, { "epoch": 0.2731842967434049, "grad_norm": 0.4513469338417053, "learning_rate": 0.0001453931036275491, "loss": 1.5187, "step": 21023 }, { "epoch": 0.27319729128732073, "grad_norm": 0.36666297912597656, "learning_rate": 0.00014539050416563772, "loss": 1.3535, "step": 21024 }, { "epoch": 0.27321028583123663, "grad_norm": 0.42291414737701416, "learning_rate": 0.00014538790470372635, "loss": 1.3596, "step": 21025 }, { "epoch": 0.2732232803751525, "grad_norm": 0.2663572132587433, "learning_rate": 0.00014538530524181494, "loss": 1.387, "step": 21026 }, { "epoch": 0.2732362749190684, "grad_norm": 0.36366698145866394, "learning_rate": 0.00014538270577990357, "loss": 1.4335, "step": 21027 }, { "epoch": 0.2732492694629842, "grad_norm": 0.4077562391757965, "learning_rate": 0.0001453801063179922, "loss": 1.4028, "step": 21028 }, { "epoch": 0.2732622640069001, "grad_norm": 0.2685930132865906, "learning_rate": 0.00014537750685608082, "loss": 1.0368, "step": 21029 }, { "epoch": 0.27327525855081597, "grad_norm": 0.45041173696517944, "learning_rate": 0.0001453749073941694, "loss": 1.5317, "step": 21030 }, { "epoch": 0.27328825309473187, "grad_norm": 0.35876336693763733, "learning_rate": 0.000145372307932258, "loss": 1.3686, "step": 21031 }, { "epoch": 0.2733012476386477, "grad_norm": 0.4320288896560669, "learning_rate": 0.00014536970847034666, "loss": 1.4189, "step": 21032 }, { "epoch": 0.2733142421825636, "grad_norm": 0.4032602608203888, "learning_rate": 0.00014536710900843526, "loss": 1.3094, "step": 21033 }, { "epoch": 0.27332723672647946, "grad_norm": 0.4131283164024353, "learning_rate": 0.00014536450954652388, "loss": 1.3437, "step": 21034 }, { "epoch": 0.27334023127039536, "grad_norm": 0.4300435185432434, "learning_rate": 0.00014536191008461248, "loss": 1.4145, "step": 21035 }, { "epoch": 0.2733532258143112, "grad_norm": 0.3635316491127014, "learning_rate": 0.0001453593106227011, "loss": 1.3144, "step": 21036 }, { "epoch": 0.2733662203582271, "grad_norm": 0.3523062765598297, "learning_rate": 0.00014535671116078973, "loss": 1.3391, "step": 21037 }, { "epoch": 0.27337921490214295, "grad_norm": 0.3737955689430237, "learning_rate": 0.00014535411169887833, "loss": 1.6278, "step": 21038 }, { "epoch": 0.27339220944605885, "grad_norm": 0.39869099855422974, "learning_rate": 0.00014535151223696695, "loss": 1.3535, "step": 21039 }, { "epoch": 0.2734052039899747, "grad_norm": 0.46052759885787964, "learning_rate": 0.00014534891277505558, "loss": 1.4842, "step": 21040 }, { "epoch": 0.2734181985338906, "grad_norm": 0.5159851908683777, "learning_rate": 0.0001453463133131442, "loss": 1.631, "step": 21041 }, { "epoch": 0.27343119307780644, "grad_norm": 0.37330055236816406, "learning_rate": 0.0001453437138512328, "loss": 1.3921, "step": 21042 }, { "epoch": 0.27344418762172235, "grad_norm": 0.348766565322876, "learning_rate": 0.0001453411143893214, "loss": 1.2829, "step": 21043 }, { "epoch": 0.2734571821656382, "grad_norm": 0.33741191029548645, "learning_rate": 0.00014533851492741005, "loss": 1.518, "step": 21044 }, { "epoch": 0.2734701767095541, "grad_norm": 0.3587682843208313, "learning_rate": 0.00014533591546549865, "loss": 1.4188, "step": 21045 }, { "epoch": 0.27348317125346994, "grad_norm": 0.34680241346359253, "learning_rate": 0.00014533331600358727, "loss": 1.3415, "step": 21046 }, { "epoch": 0.27349616579738584, "grad_norm": 0.41146257519721985, "learning_rate": 0.00014533071654167587, "loss": 1.4233, "step": 21047 }, { "epoch": 0.2735091603413017, "grad_norm": 0.44580915570259094, "learning_rate": 0.0001453281170797645, "loss": 1.4175, "step": 21048 }, { "epoch": 0.2735221548852176, "grad_norm": 0.3958224952220917, "learning_rate": 0.00014532551761785312, "loss": 1.525, "step": 21049 }, { "epoch": 0.27353514942913343, "grad_norm": 0.45374515652656555, "learning_rate": 0.0001453229181559417, "loss": 1.4871, "step": 21050 }, { "epoch": 0.27354814397304933, "grad_norm": 0.37961292266845703, "learning_rate": 0.00014532031869403034, "loss": 1.6329, "step": 21051 }, { "epoch": 0.2735611385169652, "grad_norm": 0.30875957012176514, "learning_rate": 0.00014531771923211896, "loss": 1.4655, "step": 21052 }, { "epoch": 0.2735741330608811, "grad_norm": 0.3240174651145935, "learning_rate": 0.0001453151197702076, "loss": 1.1792, "step": 21053 }, { "epoch": 0.2735871276047969, "grad_norm": 0.476652055978775, "learning_rate": 0.00014531252030829618, "loss": 1.4837, "step": 21054 }, { "epoch": 0.2736001221487128, "grad_norm": 0.4962819814682007, "learning_rate": 0.00014530992084638478, "loss": 1.2407, "step": 21055 }, { "epoch": 0.27361311669262867, "grad_norm": 0.7336745262145996, "learning_rate": 0.00014530732138447343, "loss": 1.4192, "step": 21056 }, { "epoch": 0.27362611123654457, "grad_norm": 0.41748496890068054, "learning_rate": 0.00014530472192256203, "loss": 1.493, "step": 21057 }, { "epoch": 0.2736391057804604, "grad_norm": 0.4320474863052368, "learning_rate": 0.00014530212246065065, "loss": 1.4288, "step": 21058 }, { "epoch": 0.2736521003243763, "grad_norm": 0.4384480118751526, "learning_rate": 0.00014529952299873925, "loss": 1.4565, "step": 21059 }, { "epoch": 0.27366509486829216, "grad_norm": 0.3575893044471741, "learning_rate": 0.00014529692353682788, "loss": 1.585, "step": 21060 }, { "epoch": 0.27367808941220806, "grad_norm": 0.37584006786346436, "learning_rate": 0.0001452943240749165, "loss": 1.4706, "step": 21061 }, { "epoch": 0.2736910839561239, "grad_norm": 0.4650433659553528, "learning_rate": 0.0001452917246130051, "loss": 1.471, "step": 21062 }, { "epoch": 0.2737040785000398, "grad_norm": 0.4942532181739807, "learning_rate": 0.00014528912515109375, "loss": 1.5046, "step": 21063 }, { "epoch": 0.27371707304395565, "grad_norm": 0.3835715353488922, "learning_rate": 0.00014528652568918235, "loss": 1.4204, "step": 21064 }, { "epoch": 0.27373006758787155, "grad_norm": 0.4147658944129944, "learning_rate": 0.00014528392622727097, "loss": 1.674, "step": 21065 }, { "epoch": 0.2737430621317874, "grad_norm": 0.4385238587856293, "learning_rate": 0.00014528132676535957, "loss": 1.4338, "step": 21066 }, { "epoch": 0.2737560566757033, "grad_norm": 0.45123305916786194, "learning_rate": 0.0001452787273034482, "loss": 1.4056, "step": 21067 }, { "epoch": 0.27376905121961914, "grad_norm": 0.37931713461875916, "learning_rate": 0.00014527612784153682, "loss": 1.5346, "step": 21068 }, { "epoch": 0.27378204576353504, "grad_norm": 0.3663512170314789, "learning_rate": 0.00014527352837962542, "loss": 1.3788, "step": 21069 }, { "epoch": 0.2737950403074509, "grad_norm": 0.4405010938644409, "learning_rate": 0.00014527092891771404, "loss": 1.298, "step": 21070 }, { "epoch": 0.2738080348513668, "grad_norm": 0.3729875087738037, "learning_rate": 0.00014526832945580266, "loss": 1.5995, "step": 21071 }, { "epoch": 0.27382102939528263, "grad_norm": 0.42570021748542786, "learning_rate": 0.00014526572999389126, "loss": 1.3873, "step": 21072 }, { "epoch": 0.27383402393919853, "grad_norm": 0.3910354673862457, "learning_rate": 0.0001452631305319799, "loss": 1.4487, "step": 21073 }, { "epoch": 0.2738470184831144, "grad_norm": 0.2987804412841797, "learning_rate": 0.00014526053107006848, "loss": 1.1991, "step": 21074 }, { "epoch": 0.2738600130270303, "grad_norm": 0.3597513437271118, "learning_rate": 0.00014525793160815714, "loss": 1.4366, "step": 21075 }, { "epoch": 0.2738730075709461, "grad_norm": 0.48137202858924866, "learning_rate": 0.00014525533214624573, "loss": 1.42, "step": 21076 }, { "epoch": 0.273886002114862, "grad_norm": 0.42970511317253113, "learning_rate": 0.00014525273268433436, "loss": 1.3713, "step": 21077 }, { "epoch": 0.27389899665877787, "grad_norm": 0.3753858506679535, "learning_rate": 0.00014525013322242295, "loss": 1.2101, "step": 21078 }, { "epoch": 0.27391199120269377, "grad_norm": 0.4589577317237854, "learning_rate": 0.00014524753376051158, "loss": 1.4407, "step": 21079 }, { "epoch": 0.2739249857466096, "grad_norm": 0.3946545422077179, "learning_rate": 0.0001452449342986002, "loss": 1.3384, "step": 21080 }, { "epoch": 0.2739379802905255, "grad_norm": 0.415936142206192, "learning_rate": 0.0001452423348366888, "loss": 1.4975, "step": 21081 }, { "epoch": 0.27395097483444136, "grad_norm": 0.48644179105758667, "learning_rate": 0.00014523973537477743, "loss": 1.4348, "step": 21082 }, { "epoch": 0.27396396937835726, "grad_norm": 0.38244932889938354, "learning_rate": 0.00014523713591286605, "loss": 1.4551, "step": 21083 }, { "epoch": 0.2739769639222731, "grad_norm": 0.35513171553611755, "learning_rate": 0.00014523453645095467, "loss": 1.3898, "step": 21084 }, { "epoch": 0.273989958466189, "grad_norm": 0.5417866110801697, "learning_rate": 0.00014523193698904327, "loss": 1.4588, "step": 21085 }, { "epoch": 0.27400295301010485, "grad_norm": 0.41893693804740906, "learning_rate": 0.00014522933752713187, "loss": 1.4227, "step": 21086 }, { "epoch": 0.27401594755402076, "grad_norm": 0.3841964602470398, "learning_rate": 0.00014522673806522052, "loss": 1.6915, "step": 21087 }, { "epoch": 0.27402894209793666, "grad_norm": 0.33003556728363037, "learning_rate": 0.00014522413860330912, "loss": 1.395, "step": 21088 }, { "epoch": 0.2740419366418525, "grad_norm": 0.28712520003318787, "learning_rate": 0.00014522153914139774, "loss": 1.4047, "step": 21089 }, { "epoch": 0.2740549311857684, "grad_norm": 0.40768882632255554, "learning_rate": 0.00014521893967948634, "loss": 1.4435, "step": 21090 }, { "epoch": 0.27406792572968425, "grad_norm": 0.509784460067749, "learning_rate": 0.00014521634021757496, "loss": 1.4047, "step": 21091 }, { "epoch": 0.27408092027360015, "grad_norm": 0.35930219292640686, "learning_rate": 0.0001452137407556636, "loss": 1.4614, "step": 21092 }, { "epoch": 0.274093914817516, "grad_norm": 0.4207635521888733, "learning_rate": 0.0001452111412937522, "loss": 1.1942, "step": 21093 }, { "epoch": 0.2741069093614319, "grad_norm": 0.30240803956985474, "learning_rate": 0.0001452085418318408, "loss": 1.3059, "step": 21094 }, { "epoch": 0.27411990390534774, "grad_norm": 0.5255926847457886, "learning_rate": 0.00014520594236992944, "loss": 1.3044, "step": 21095 }, { "epoch": 0.27413289844926364, "grad_norm": 0.3513743281364441, "learning_rate": 0.00014520334290801806, "loss": 1.3226, "step": 21096 }, { "epoch": 0.2741458929931795, "grad_norm": 0.4706592857837677, "learning_rate": 0.00014520074344610666, "loss": 1.4042, "step": 21097 }, { "epoch": 0.2741588875370954, "grad_norm": 0.4073595404624939, "learning_rate": 0.00014519814398419528, "loss": 1.3979, "step": 21098 }, { "epoch": 0.27417188208101123, "grad_norm": 0.4529314339160919, "learning_rate": 0.0001451955445222839, "loss": 1.2585, "step": 21099 }, { "epoch": 0.27418487662492713, "grad_norm": 0.4774497151374817, "learning_rate": 0.0001451929450603725, "loss": 1.3144, "step": 21100 }, { "epoch": 0.274197871168843, "grad_norm": 0.39242956042289734, "learning_rate": 0.00014519034559846113, "loss": 1.4923, "step": 21101 }, { "epoch": 0.2742108657127589, "grad_norm": 0.3895746171474457, "learning_rate": 0.00014518774613654975, "loss": 1.2079, "step": 21102 }, { "epoch": 0.2742238602566747, "grad_norm": 0.4333137571811676, "learning_rate": 0.00014518514667463835, "loss": 1.2861, "step": 21103 }, { "epoch": 0.2742368548005906, "grad_norm": 0.26529887318611145, "learning_rate": 0.00014518254721272697, "loss": 1.1751, "step": 21104 }, { "epoch": 0.27424984934450647, "grad_norm": 0.475535124540329, "learning_rate": 0.00014517994775081557, "loss": 1.5904, "step": 21105 }, { "epoch": 0.27426284388842237, "grad_norm": 0.40255001187324524, "learning_rate": 0.00014517734828890422, "loss": 1.63, "step": 21106 }, { "epoch": 0.2742758384323382, "grad_norm": 0.3675733506679535, "learning_rate": 0.00014517474882699282, "loss": 1.2531, "step": 21107 }, { "epoch": 0.2742888329762541, "grad_norm": 0.4259125888347626, "learning_rate": 0.00014517214936508145, "loss": 1.4274, "step": 21108 }, { "epoch": 0.27430182752016996, "grad_norm": 0.516318142414093, "learning_rate": 0.00014516954990317004, "loss": 1.5148, "step": 21109 }, { "epoch": 0.27431482206408586, "grad_norm": 0.46355438232421875, "learning_rate": 0.00014516695044125867, "loss": 1.256, "step": 21110 }, { "epoch": 0.2743278166080017, "grad_norm": 0.3909146189689636, "learning_rate": 0.0001451643509793473, "loss": 1.5112, "step": 21111 }, { "epoch": 0.2743408111519176, "grad_norm": 0.40489462018013, "learning_rate": 0.0001451617515174359, "loss": 1.4785, "step": 21112 }, { "epoch": 0.27435380569583345, "grad_norm": 0.4154320955276489, "learning_rate": 0.00014515915205552451, "loss": 1.4824, "step": 21113 }, { "epoch": 0.27436680023974935, "grad_norm": 0.33446720242500305, "learning_rate": 0.00014515655259361314, "loss": 1.165, "step": 21114 }, { "epoch": 0.2743797947836652, "grad_norm": 0.31943079829216003, "learning_rate": 0.00014515395313170174, "loss": 1.237, "step": 21115 }, { "epoch": 0.2743927893275811, "grad_norm": 0.4016754925251007, "learning_rate": 0.00014515135366979036, "loss": 1.3974, "step": 21116 }, { "epoch": 0.27440578387149694, "grad_norm": 0.4799249768257141, "learning_rate": 0.00014514875420787896, "loss": 1.4643, "step": 21117 }, { "epoch": 0.27441877841541285, "grad_norm": 0.4338774085044861, "learning_rate": 0.0001451461547459676, "loss": 1.1566, "step": 21118 }, { "epoch": 0.2744317729593287, "grad_norm": 0.37024784088134766, "learning_rate": 0.0001451435552840562, "loss": 1.5777, "step": 21119 }, { "epoch": 0.2744447675032446, "grad_norm": 0.42051899433135986, "learning_rate": 0.00014514095582214483, "loss": 1.4166, "step": 21120 }, { "epoch": 0.27445776204716044, "grad_norm": 0.40367645025253296, "learning_rate": 0.00014513835636023343, "loss": 1.2711, "step": 21121 }, { "epoch": 0.27447075659107634, "grad_norm": 0.3834116458892822, "learning_rate": 0.00014513575689832205, "loss": 1.5209, "step": 21122 }, { "epoch": 0.2744837511349922, "grad_norm": 0.45230185985565186, "learning_rate": 0.00014513315743641068, "loss": 1.4885, "step": 21123 }, { "epoch": 0.2744967456789081, "grad_norm": 0.44072362780570984, "learning_rate": 0.00014513055797449927, "loss": 1.4712, "step": 21124 }, { "epoch": 0.27450974022282393, "grad_norm": 0.5290454626083374, "learning_rate": 0.0001451279585125879, "loss": 1.6872, "step": 21125 }, { "epoch": 0.27452273476673983, "grad_norm": 0.4498007893562317, "learning_rate": 0.00014512535905067652, "loss": 1.4448, "step": 21126 }, { "epoch": 0.2745357293106557, "grad_norm": 0.48479050397872925, "learning_rate": 0.00014512275958876512, "loss": 1.4481, "step": 21127 }, { "epoch": 0.2745487238545716, "grad_norm": 0.46207278966903687, "learning_rate": 0.00014512016012685375, "loss": 1.594, "step": 21128 }, { "epoch": 0.2745617183984874, "grad_norm": 0.4562664330005646, "learning_rate": 0.00014511756066494234, "loss": 1.4636, "step": 21129 }, { "epoch": 0.2745747129424033, "grad_norm": 0.4015084207057953, "learning_rate": 0.000145114961203031, "loss": 1.4764, "step": 21130 }, { "epoch": 0.27458770748631917, "grad_norm": 0.4773150682449341, "learning_rate": 0.0001451123617411196, "loss": 1.612, "step": 21131 }, { "epoch": 0.27460070203023507, "grad_norm": 0.47321027517318726, "learning_rate": 0.00014510976227920822, "loss": 1.4557, "step": 21132 }, { "epoch": 0.2746136965741509, "grad_norm": 0.34213805198669434, "learning_rate": 0.00014510716281729681, "loss": 1.3234, "step": 21133 }, { "epoch": 0.2746266911180668, "grad_norm": 0.491956502199173, "learning_rate": 0.00014510456335538544, "loss": 1.6393, "step": 21134 }, { "epoch": 0.27463968566198266, "grad_norm": 0.24043916165828705, "learning_rate": 0.00014510196389347406, "loss": 1.4292, "step": 21135 }, { "epoch": 0.27465268020589856, "grad_norm": 0.49715086817741394, "learning_rate": 0.00014509936443156266, "loss": 1.3918, "step": 21136 }, { "epoch": 0.2746656747498144, "grad_norm": 0.4062346816062927, "learning_rate": 0.0001450967649696513, "loss": 1.3848, "step": 21137 }, { "epoch": 0.2746786692937303, "grad_norm": 0.4312981367111206, "learning_rate": 0.0001450941655077399, "loss": 1.3298, "step": 21138 }, { "epoch": 0.27469166383764615, "grad_norm": 0.3923072814941406, "learning_rate": 0.0001450915660458285, "loss": 1.3491, "step": 21139 }, { "epoch": 0.27470465838156205, "grad_norm": 0.4729924499988556, "learning_rate": 0.00014508896658391713, "loss": 1.425, "step": 21140 }, { "epoch": 0.2747176529254779, "grad_norm": 0.48524609208106995, "learning_rate": 0.00014508636712200576, "loss": 1.4278, "step": 21141 }, { "epoch": 0.2747306474693938, "grad_norm": 0.3876422643661499, "learning_rate": 0.00014508376766009438, "loss": 1.4143, "step": 21142 }, { "epoch": 0.27474364201330964, "grad_norm": 0.3328193426132202, "learning_rate": 0.00014508116819818298, "loss": 1.3708, "step": 21143 }, { "epoch": 0.27475663655722554, "grad_norm": 0.3669082224369049, "learning_rate": 0.0001450785687362716, "loss": 1.404, "step": 21144 }, { "epoch": 0.2747696311011414, "grad_norm": 0.6811167001724243, "learning_rate": 0.00014507596927436023, "loss": 1.56, "step": 21145 }, { "epoch": 0.2747826256450573, "grad_norm": 0.3828492760658264, "learning_rate": 0.00014507336981244882, "loss": 1.1858, "step": 21146 }, { "epoch": 0.27479562018897313, "grad_norm": 0.31917595863342285, "learning_rate": 0.00014507077035053745, "loss": 1.3122, "step": 21147 }, { "epoch": 0.27480861473288903, "grad_norm": 0.4403409957885742, "learning_rate": 0.00014506817088862605, "loss": 1.4218, "step": 21148 }, { "epoch": 0.2748216092768049, "grad_norm": 0.32866010069847107, "learning_rate": 0.0001450655714267147, "loss": 1.2003, "step": 21149 }, { "epoch": 0.2748346038207208, "grad_norm": 0.3999956548213959, "learning_rate": 0.0001450629719648033, "loss": 1.4845, "step": 21150 }, { "epoch": 0.2748475983646366, "grad_norm": 0.4942968785762787, "learning_rate": 0.00014506037250289192, "loss": 1.4445, "step": 21151 }, { "epoch": 0.2748605929085525, "grad_norm": 0.3724045753479004, "learning_rate": 0.00014505777304098052, "loss": 1.3934, "step": 21152 }, { "epoch": 0.27487358745246837, "grad_norm": 0.47794023156166077, "learning_rate": 0.00014505517357906914, "loss": 1.3485, "step": 21153 }, { "epoch": 0.27488658199638427, "grad_norm": 0.37596553564071655, "learning_rate": 0.00014505257411715777, "loss": 1.4601, "step": 21154 }, { "epoch": 0.2748995765403001, "grad_norm": 0.41127005219459534, "learning_rate": 0.00014504997465524636, "loss": 1.3331, "step": 21155 }, { "epoch": 0.274912571084216, "grad_norm": 0.3894975483417511, "learning_rate": 0.000145047375193335, "loss": 1.1967, "step": 21156 }, { "epoch": 0.27492556562813186, "grad_norm": 0.4334724247455597, "learning_rate": 0.0001450447757314236, "loss": 1.5161, "step": 21157 }, { "epoch": 0.27493856017204776, "grad_norm": 0.6829808950424194, "learning_rate": 0.0001450421762695122, "loss": 1.4384, "step": 21158 }, { "epoch": 0.2749515547159636, "grad_norm": 0.43273308873176575, "learning_rate": 0.00014503957680760083, "loss": 1.417, "step": 21159 }, { "epoch": 0.2749645492598795, "grad_norm": 0.36936119198799133, "learning_rate": 0.00014503697734568943, "loss": 1.4714, "step": 21160 }, { "epoch": 0.27497754380379535, "grad_norm": 0.4327675998210907, "learning_rate": 0.00014503437788377808, "loss": 1.6612, "step": 21161 }, { "epoch": 0.27499053834771126, "grad_norm": 0.4079569876194, "learning_rate": 0.00014503177842186668, "loss": 1.4983, "step": 21162 }, { "epoch": 0.2750035328916271, "grad_norm": 0.36818698048591614, "learning_rate": 0.0001450291789599553, "loss": 1.2719, "step": 21163 }, { "epoch": 0.275016527435543, "grad_norm": 0.33294445276260376, "learning_rate": 0.0001450265794980439, "loss": 1.4431, "step": 21164 }, { "epoch": 0.2750295219794589, "grad_norm": 0.43079033493995667, "learning_rate": 0.00014502398003613253, "loss": 1.4679, "step": 21165 }, { "epoch": 0.27504251652337475, "grad_norm": 0.3781503736972809, "learning_rate": 0.00014502138057422115, "loss": 1.7069, "step": 21166 }, { "epoch": 0.27505551106729065, "grad_norm": 0.31240200996398926, "learning_rate": 0.00014501878111230975, "loss": 1.3101, "step": 21167 }, { "epoch": 0.2750685056112065, "grad_norm": 0.4006272256374359, "learning_rate": 0.00014501618165039837, "loss": 1.3933, "step": 21168 }, { "epoch": 0.2750815001551224, "grad_norm": 0.38519835472106934, "learning_rate": 0.000145013582188487, "loss": 1.487, "step": 21169 }, { "epoch": 0.27509449469903824, "grad_norm": 0.3168710768222809, "learning_rate": 0.0001450109827265756, "loss": 1.1667, "step": 21170 }, { "epoch": 0.27510748924295414, "grad_norm": 0.444307804107666, "learning_rate": 0.00014500838326466422, "loss": 1.5404, "step": 21171 }, { "epoch": 0.27512048378687, "grad_norm": 0.40774983167648315, "learning_rate": 0.00014500578380275284, "loss": 1.4113, "step": 21172 }, { "epoch": 0.2751334783307859, "grad_norm": 0.33713987469673157, "learning_rate": 0.00014500318434084147, "loss": 1.4219, "step": 21173 }, { "epoch": 0.27514647287470173, "grad_norm": 0.43668732047080994, "learning_rate": 0.00014500058487893007, "loss": 1.2493, "step": 21174 }, { "epoch": 0.27515946741861763, "grad_norm": 0.508034348487854, "learning_rate": 0.0001449979854170187, "loss": 1.4009, "step": 21175 }, { "epoch": 0.2751724619625335, "grad_norm": 0.44830527901649475, "learning_rate": 0.00014499538595510731, "loss": 1.281, "step": 21176 }, { "epoch": 0.2751854565064494, "grad_norm": 0.4372822940349579, "learning_rate": 0.0001449927864931959, "loss": 1.334, "step": 21177 }, { "epoch": 0.2751984510503652, "grad_norm": 0.3612796664237976, "learning_rate": 0.00014499018703128454, "loss": 1.546, "step": 21178 }, { "epoch": 0.2752114455942811, "grad_norm": 0.40134289860725403, "learning_rate": 0.00014498758756937313, "loss": 1.3757, "step": 21179 }, { "epoch": 0.27522444013819697, "grad_norm": 0.38240689039230347, "learning_rate": 0.00014498498810746178, "loss": 1.6334, "step": 21180 }, { "epoch": 0.27523743468211287, "grad_norm": 0.4360727071762085, "learning_rate": 0.00014498238864555038, "loss": 1.3282, "step": 21181 }, { "epoch": 0.2752504292260287, "grad_norm": 0.3220953643321991, "learning_rate": 0.00014497978918363898, "loss": 1.145, "step": 21182 }, { "epoch": 0.2752634237699446, "grad_norm": 0.39528268575668335, "learning_rate": 0.0001449771897217276, "loss": 1.4409, "step": 21183 }, { "epoch": 0.27527641831386046, "grad_norm": 0.4668637812137604, "learning_rate": 0.00014497459025981623, "loss": 1.3587, "step": 21184 }, { "epoch": 0.27528941285777636, "grad_norm": 0.3947980999946594, "learning_rate": 0.00014497199079790485, "loss": 1.7426, "step": 21185 }, { "epoch": 0.2753024074016922, "grad_norm": 0.4025053381919861, "learning_rate": 0.00014496939133599345, "loss": 1.4422, "step": 21186 }, { "epoch": 0.2753154019456081, "grad_norm": 0.4357895851135254, "learning_rate": 0.00014496679187408207, "loss": 1.288, "step": 21187 }, { "epoch": 0.27532839648952395, "grad_norm": 0.36953502893447876, "learning_rate": 0.0001449641924121707, "loss": 1.4374, "step": 21188 }, { "epoch": 0.27534139103343985, "grad_norm": 0.38954612612724304, "learning_rate": 0.0001449615929502593, "loss": 1.3362, "step": 21189 }, { "epoch": 0.2753543855773557, "grad_norm": 0.3733152747154236, "learning_rate": 0.00014495899348834792, "loss": 1.5403, "step": 21190 }, { "epoch": 0.2753673801212716, "grad_norm": 0.36010414361953735, "learning_rate": 0.00014495639402643652, "loss": 1.3824, "step": 21191 }, { "epoch": 0.27538037466518744, "grad_norm": 0.3495846092700958, "learning_rate": 0.00014495379456452517, "loss": 1.3414, "step": 21192 }, { "epoch": 0.27539336920910334, "grad_norm": 0.4866452217102051, "learning_rate": 0.00014495119510261377, "loss": 1.4483, "step": 21193 }, { "epoch": 0.2754063637530192, "grad_norm": 0.38002511858940125, "learning_rate": 0.00014494859564070237, "loss": 1.5761, "step": 21194 }, { "epoch": 0.2754193582969351, "grad_norm": 0.4484215974807739, "learning_rate": 0.000144945996178791, "loss": 1.492, "step": 21195 }, { "epoch": 0.27543235284085094, "grad_norm": 0.36568936705589294, "learning_rate": 0.00014494339671687961, "loss": 1.6437, "step": 21196 }, { "epoch": 0.27544534738476684, "grad_norm": 0.4071952700614929, "learning_rate": 0.00014494079725496824, "loss": 1.6131, "step": 21197 }, { "epoch": 0.2754583419286827, "grad_norm": 0.3388287425041199, "learning_rate": 0.00014493819779305684, "loss": 1.3583, "step": 21198 }, { "epoch": 0.2754713364725986, "grad_norm": 0.44066786766052246, "learning_rate": 0.00014493559833114546, "loss": 1.4328, "step": 21199 }, { "epoch": 0.2754843310165144, "grad_norm": 0.4633805751800537, "learning_rate": 0.00014493299886923408, "loss": 1.4578, "step": 21200 }, { "epoch": 0.27549732556043033, "grad_norm": 0.5341418981552124, "learning_rate": 0.00014493039940732268, "loss": 1.4543, "step": 21201 }, { "epoch": 0.2755103201043462, "grad_norm": 0.3690308630466461, "learning_rate": 0.0001449277999454113, "loss": 1.3158, "step": 21202 }, { "epoch": 0.2755233146482621, "grad_norm": 0.4203195571899414, "learning_rate": 0.0001449252004834999, "loss": 1.368, "step": 21203 }, { "epoch": 0.2755363091921779, "grad_norm": 0.3265753388404846, "learning_rate": 0.00014492260102158856, "loss": 1.1581, "step": 21204 }, { "epoch": 0.2755493037360938, "grad_norm": 0.419819176197052, "learning_rate": 0.00014492000155967715, "loss": 1.4314, "step": 21205 }, { "epoch": 0.27556229828000967, "grad_norm": 0.4059402346611023, "learning_rate": 0.00014491740209776578, "loss": 1.4519, "step": 21206 }, { "epoch": 0.27557529282392557, "grad_norm": 0.3671705424785614, "learning_rate": 0.00014491480263585437, "loss": 1.4712, "step": 21207 }, { "epoch": 0.2755882873678414, "grad_norm": 0.4350607991218567, "learning_rate": 0.000144912203173943, "loss": 1.4063, "step": 21208 }, { "epoch": 0.2756012819117573, "grad_norm": 0.40174663066864014, "learning_rate": 0.00014490960371203162, "loss": 1.5422, "step": 21209 }, { "epoch": 0.27561427645567316, "grad_norm": 0.5050309896469116, "learning_rate": 0.00014490700425012022, "loss": 1.3792, "step": 21210 }, { "epoch": 0.27562727099958906, "grad_norm": 0.4664018154144287, "learning_rate": 0.00014490440478820885, "loss": 1.3682, "step": 21211 }, { "epoch": 0.2756402655435049, "grad_norm": 0.3265899121761322, "learning_rate": 0.00014490180532629747, "loss": 1.5398, "step": 21212 }, { "epoch": 0.2756532600874208, "grad_norm": 0.31491950154304504, "learning_rate": 0.00014489920586438607, "loss": 1.2475, "step": 21213 }, { "epoch": 0.27566625463133665, "grad_norm": 0.42566415667533875, "learning_rate": 0.0001448966064024747, "loss": 1.3364, "step": 21214 }, { "epoch": 0.27567924917525255, "grad_norm": 0.49124494194984436, "learning_rate": 0.00014489400694056332, "loss": 1.4401, "step": 21215 }, { "epoch": 0.2756922437191684, "grad_norm": 0.3966924250125885, "learning_rate": 0.00014489140747865194, "loss": 1.4349, "step": 21216 }, { "epoch": 0.2757052382630843, "grad_norm": 0.41968196630477905, "learning_rate": 0.00014488880801674054, "loss": 1.5184, "step": 21217 }, { "epoch": 0.27571823280700014, "grad_norm": 0.3788725435733795, "learning_rate": 0.00014488620855482916, "loss": 1.4666, "step": 21218 }, { "epoch": 0.27573122735091604, "grad_norm": 0.39863821864128113, "learning_rate": 0.0001448836090929178, "loss": 1.3248, "step": 21219 }, { "epoch": 0.2757442218948319, "grad_norm": 0.3196360766887665, "learning_rate": 0.00014488100963100638, "loss": 1.3715, "step": 21220 }, { "epoch": 0.2757572164387478, "grad_norm": 0.41795575618743896, "learning_rate": 0.000144878410169095, "loss": 1.4114, "step": 21221 }, { "epoch": 0.27577021098266363, "grad_norm": 0.25024887919425964, "learning_rate": 0.0001448758107071836, "loss": 1.2667, "step": 21222 }, { "epoch": 0.27578320552657953, "grad_norm": 0.5350170731544495, "learning_rate": 0.00014487321124527223, "loss": 1.6328, "step": 21223 }, { "epoch": 0.2757962000704954, "grad_norm": 0.35319074988365173, "learning_rate": 0.00014487061178336086, "loss": 1.3237, "step": 21224 }, { "epoch": 0.2758091946144113, "grad_norm": 0.38834911584854126, "learning_rate": 0.00014486801232144945, "loss": 1.4453, "step": 21225 }, { "epoch": 0.2758221891583271, "grad_norm": 0.4736301898956299, "learning_rate": 0.00014486541285953808, "loss": 1.4106, "step": 21226 }, { "epoch": 0.275835183702243, "grad_norm": 0.383249431848526, "learning_rate": 0.0001448628133976267, "loss": 1.3068, "step": 21227 }, { "epoch": 0.27584817824615887, "grad_norm": 0.3536500334739685, "learning_rate": 0.00014486021393571533, "loss": 1.3202, "step": 21228 }, { "epoch": 0.27586117279007477, "grad_norm": 0.42011725902557373, "learning_rate": 0.00014485761447380392, "loss": 1.3821, "step": 21229 }, { "epoch": 0.2758741673339906, "grad_norm": 0.36506393551826477, "learning_rate": 0.00014485501501189255, "loss": 1.2793, "step": 21230 }, { "epoch": 0.2758871618779065, "grad_norm": 0.34045228362083435, "learning_rate": 0.00014485241554998117, "loss": 1.4418, "step": 21231 }, { "epoch": 0.27590015642182236, "grad_norm": 0.45484286546707153, "learning_rate": 0.00014484981608806977, "loss": 1.5592, "step": 21232 }, { "epoch": 0.27591315096573826, "grad_norm": 0.3172948956489563, "learning_rate": 0.0001448472166261584, "loss": 1.3045, "step": 21233 }, { "epoch": 0.2759261455096541, "grad_norm": 0.4191264808177948, "learning_rate": 0.000144844617164247, "loss": 1.3347, "step": 21234 }, { "epoch": 0.27593914005357, "grad_norm": 0.41809552907943726, "learning_rate": 0.00014484201770233564, "loss": 1.3487, "step": 21235 }, { "epoch": 0.27595213459748585, "grad_norm": 0.4197249114513397, "learning_rate": 0.00014483941824042424, "loss": 1.5669, "step": 21236 }, { "epoch": 0.27596512914140175, "grad_norm": 0.41611534357070923, "learning_rate": 0.00014483681877851284, "loss": 1.3036, "step": 21237 }, { "epoch": 0.2759781236853176, "grad_norm": 0.3917938470840454, "learning_rate": 0.00014483421931660146, "loss": 1.3676, "step": 21238 }, { "epoch": 0.2759911182292335, "grad_norm": 0.49736669659614563, "learning_rate": 0.0001448316198546901, "loss": 1.5584, "step": 21239 }, { "epoch": 0.2760041127731494, "grad_norm": 0.3698282837867737, "learning_rate": 0.0001448290203927787, "loss": 1.3504, "step": 21240 }, { "epoch": 0.27601710731706525, "grad_norm": 0.4146203398704529, "learning_rate": 0.0001448264209308673, "loss": 1.4932, "step": 21241 }, { "epoch": 0.27603010186098115, "grad_norm": 0.4756197929382324, "learning_rate": 0.00014482382146895593, "loss": 1.5046, "step": 21242 }, { "epoch": 0.276043096404897, "grad_norm": 0.3861199617385864, "learning_rate": 0.00014482122200704456, "loss": 1.5186, "step": 21243 }, { "epoch": 0.2760560909488129, "grad_norm": 0.39208948612213135, "learning_rate": 0.00014481862254513316, "loss": 1.3228, "step": 21244 }, { "epoch": 0.27606908549272874, "grad_norm": 0.3501081168651581, "learning_rate": 0.00014481602308322178, "loss": 1.3014, "step": 21245 }, { "epoch": 0.27608208003664464, "grad_norm": 0.5069080591201782, "learning_rate": 0.0001448134236213104, "loss": 1.5984, "step": 21246 }, { "epoch": 0.2760950745805605, "grad_norm": 0.40057137608528137, "learning_rate": 0.00014481082415939903, "loss": 1.3721, "step": 21247 }, { "epoch": 0.2761080691244764, "grad_norm": 0.29378098249435425, "learning_rate": 0.00014480822469748763, "loss": 1.377, "step": 21248 }, { "epoch": 0.27612106366839223, "grad_norm": 0.37494799494743347, "learning_rate": 0.00014480562523557622, "loss": 1.4259, "step": 21249 }, { "epoch": 0.27613405821230813, "grad_norm": 0.4854723811149597, "learning_rate": 0.00014480302577366488, "loss": 1.6243, "step": 21250 }, { "epoch": 0.276147052756224, "grad_norm": 0.3956761658191681, "learning_rate": 0.00014480042631175347, "loss": 1.5744, "step": 21251 }, { "epoch": 0.2761600473001399, "grad_norm": 0.4590616226196289, "learning_rate": 0.0001447978268498421, "loss": 1.3578, "step": 21252 }, { "epoch": 0.2761730418440557, "grad_norm": 0.40178245306015015, "learning_rate": 0.0001447952273879307, "loss": 1.4293, "step": 21253 }, { "epoch": 0.2761860363879716, "grad_norm": 0.4865666627883911, "learning_rate": 0.00014479262792601932, "loss": 1.5323, "step": 21254 }, { "epoch": 0.27619903093188747, "grad_norm": 0.4642694890499115, "learning_rate": 0.00014479002846410794, "loss": 1.446, "step": 21255 }, { "epoch": 0.27621202547580337, "grad_norm": 0.4002801775932312, "learning_rate": 0.00014478742900219654, "loss": 1.5125, "step": 21256 }, { "epoch": 0.2762250200197192, "grad_norm": 0.3465811014175415, "learning_rate": 0.00014478482954028517, "loss": 1.3498, "step": 21257 }, { "epoch": 0.2762380145636351, "grad_norm": 0.4012953042984009, "learning_rate": 0.0001447822300783738, "loss": 1.4418, "step": 21258 }, { "epoch": 0.27625100910755096, "grad_norm": 0.4169375002384186, "learning_rate": 0.00014477963061646241, "loss": 1.343, "step": 21259 }, { "epoch": 0.27626400365146686, "grad_norm": 0.4746510088443756, "learning_rate": 0.000144777031154551, "loss": 1.6866, "step": 21260 }, { "epoch": 0.2762769981953827, "grad_norm": 0.44590604305267334, "learning_rate": 0.0001447744316926396, "loss": 1.5745, "step": 21261 }, { "epoch": 0.2762899927392986, "grad_norm": 0.323533296585083, "learning_rate": 0.00014477183223072826, "loss": 1.5348, "step": 21262 }, { "epoch": 0.27630298728321445, "grad_norm": 0.4145243465900421, "learning_rate": 0.00014476923276881686, "loss": 1.5405, "step": 21263 }, { "epoch": 0.27631598182713035, "grad_norm": 0.38724735379219055, "learning_rate": 0.00014476663330690548, "loss": 1.5302, "step": 21264 }, { "epoch": 0.2763289763710462, "grad_norm": 0.4373025596141815, "learning_rate": 0.00014476403384499408, "loss": 1.4764, "step": 21265 }, { "epoch": 0.2763419709149621, "grad_norm": 0.37312254309654236, "learning_rate": 0.0001447614343830827, "loss": 1.3138, "step": 21266 }, { "epoch": 0.27635496545887794, "grad_norm": 0.4806848168373108, "learning_rate": 0.00014475883492117133, "loss": 1.6074, "step": 21267 }, { "epoch": 0.27636796000279384, "grad_norm": 0.41567668318748474, "learning_rate": 0.00014475623545925993, "loss": 1.2749, "step": 21268 }, { "epoch": 0.2763809545467097, "grad_norm": 0.46890705823898315, "learning_rate": 0.00014475363599734855, "loss": 1.325, "step": 21269 }, { "epoch": 0.2763939490906256, "grad_norm": 0.35188305377960205, "learning_rate": 0.00014475103653543718, "loss": 1.5179, "step": 21270 }, { "epoch": 0.27640694363454144, "grad_norm": 0.42655396461486816, "learning_rate": 0.0001447484370735258, "loss": 1.4931, "step": 21271 }, { "epoch": 0.27641993817845734, "grad_norm": 0.4551280736923218, "learning_rate": 0.0001447458376116144, "loss": 1.3336, "step": 21272 }, { "epoch": 0.2764329327223732, "grad_norm": 0.4306657612323761, "learning_rate": 0.00014474323814970302, "loss": 1.4428, "step": 21273 }, { "epoch": 0.2764459272662891, "grad_norm": 0.4020560085773468, "learning_rate": 0.00014474063868779165, "loss": 1.4625, "step": 21274 }, { "epoch": 0.2764589218102049, "grad_norm": 0.3207226097583771, "learning_rate": 0.00014473803922588024, "loss": 1.3209, "step": 21275 }, { "epoch": 0.27647191635412083, "grad_norm": 0.39836278557777405, "learning_rate": 0.00014473543976396887, "loss": 1.3315, "step": 21276 }, { "epoch": 0.2764849108980367, "grad_norm": 0.4348875880241394, "learning_rate": 0.00014473284030205747, "loss": 1.4045, "step": 21277 }, { "epoch": 0.2764979054419526, "grad_norm": 0.382883757352829, "learning_rate": 0.0001447302408401461, "loss": 1.2114, "step": 21278 }, { "epoch": 0.2765108999858684, "grad_norm": 0.4889993965625763, "learning_rate": 0.00014472764137823471, "loss": 1.4111, "step": 21279 }, { "epoch": 0.2765238945297843, "grad_norm": 0.36627429723739624, "learning_rate": 0.0001447250419163233, "loss": 1.2897, "step": 21280 }, { "epoch": 0.27653688907370017, "grad_norm": 0.41897332668304443, "learning_rate": 0.00014472244245441194, "loss": 1.5972, "step": 21281 }, { "epoch": 0.27654988361761607, "grad_norm": 0.3551464080810547, "learning_rate": 0.00014471984299250056, "loss": 1.3026, "step": 21282 }, { "epoch": 0.2765628781615319, "grad_norm": 0.4172768294811249, "learning_rate": 0.00014471724353058919, "loss": 1.4142, "step": 21283 }, { "epoch": 0.2765758727054478, "grad_norm": 0.3352348804473877, "learning_rate": 0.00014471464406867778, "loss": 1.3868, "step": 21284 }, { "epoch": 0.27658886724936366, "grad_norm": 0.36794567108154297, "learning_rate": 0.0001447120446067664, "loss": 1.3668, "step": 21285 }, { "epoch": 0.27660186179327956, "grad_norm": 0.42755863070487976, "learning_rate": 0.00014470944514485503, "loss": 1.5327, "step": 21286 }, { "epoch": 0.2766148563371954, "grad_norm": 0.39130860567092896, "learning_rate": 0.00014470684568294363, "loss": 1.2986, "step": 21287 }, { "epoch": 0.2766278508811113, "grad_norm": 0.3780863583087921, "learning_rate": 0.00014470424622103225, "loss": 1.3391, "step": 21288 }, { "epoch": 0.27664084542502715, "grad_norm": 0.3552754521369934, "learning_rate": 0.00014470164675912088, "loss": 1.1574, "step": 21289 }, { "epoch": 0.27665383996894305, "grad_norm": 0.41215065121650696, "learning_rate": 0.0001446990472972095, "loss": 1.5628, "step": 21290 }, { "epoch": 0.2766668345128589, "grad_norm": 0.30129313468933105, "learning_rate": 0.0001446964478352981, "loss": 1.3785, "step": 21291 }, { "epoch": 0.2766798290567748, "grad_norm": 0.43935802578926086, "learning_rate": 0.0001446938483733867, "loss": 1.4064, "step": 21292 }, { "epoch": 0.27669282360069064, "grad_norm": 0.5928434729576111, "learning_rate": 0.00014469124891147535, "loss": 1.4197, "step": 21293 }, { "epoch": 0.27670581814460654, "grad_norm": 0.3758867084980011, "learning_rate": 0.00014468864944956395, "loss": 1.3138, "step": 21294 }, { "epoch": 0.2767188126885224, "grad_norm": 0.5056210160255432, "learning_rate": 0.00014468604998765257, "loss": 1.4802, "step": 21295 }, { "epoch": 0.2767318072324383, "grad_norm": 0.3933154344558716, "learning_rate": 0.00014468345052574117, "loss": 1.3173, "step": 21296 }, { "epoch": 0.27674480177635413, "grad_norm": 0.4187923073768616, "learning_rate": 0.0001446808510638298, "loss": 1.271, "step": 21297 }, { "epoch": 0.27675779632027003, "grad_norm": 0.3093286156654358, "learning_rate": 0.00014467825160191842, "loss": 1.3094, "step": 21298 }, { "epoch": 0.2767707908641859, "grad_norm": 0.41404083371162415, "learning_rate": 0.00014467565214000701, "loss": 1.5085, "step": 21299 }, { "epoch": 0.2767837854081018, "grad_norm": 0.49591660499572754, "learning_rate": 0.00014467305267809564, "loss": 1.2991, "step": 21300 }, { "epoch": 0.2767967799520176, "grad_norm": 0.4666811227798462, "learning_rate": 0.00014467045321618426, "loss": 1.5373, "step": 21301 }, { "epoch": 0.2768097744959335, "grad_norm": 0.32571935653686523, "learning_rate": 0.0001446678537542729, "loss": 1.356, "step": 21302 }, { "epoch": 0.27682276903984937, "grad_norm": 0.4207615256309509, "learning_rate": 0.00014466525429236149, "loss": 1.3703, "step": 21303 }, { "epoch": 0.27683576358376527, "grad_norm": 0.36844712495803833, "learning_rate": 0.00014466265483045008, "loss": 1.1713, "step": 21304 }, { "epoch": 0.2768487581276811, "grad_norm": 0.41885948181152344, "learning_rate": 0.00014466005536853873, "loss": 1.2881, "step": 21305 }, { "epoch": 0.276861752671597, "grad_norm": 0.3904375433921814, "learning_rate": 0.00014465745590662733, "loss": 1.4986, "step": 21306 }, { "epoch": 0.27687474721551286, "grad_norm": 0.45266616344451904, "learning_rate": 0.00014465485644471596, "loss": 1.3446, "step": 21307 }, { "epoch": 0.27688774175942876, "grad_norm": 0.32533466815948486, "learning_rate": 0.00014465225698280455, "loss": 1.5466, "step": 21308 }, { "epoch": 0.2769007363033446, "grad_norm": 0.3485800325870514, "learning_rate": 0.00014464965752089318, "loss": 1.3886, "step": 21309 }, { "epoch": 0.2769137308472605, "grad_norm": 0.46611183881759644, "learning_rate": 0.0001446470580589818, "loss": 1.5632, "step": 21310 }, { "epoch": 0.27692672539117635, "grad_norm": 0.42077791690826416, "learning_rate": 0.0001446444585970704, "loss": 1.4926, "step": 21311 }, { "epoch": 0.27693971993509225, "grad_norm": 0.33902305364608765, "learning_rate": 0.00014464185913515902, "loss": 1.2663, "step": 21312 }, { "epoch": 0.2769527144790081, "grad_norm": 0.3745257556438446, "learning_rate": 0.00014463925967324765, "loss": 1.4413, "step": 21313 }, { "epoch": 0.276965709022924, "grad_norm": 0.44815024733543396, "learning_rate": 0.00014463666021133627, "loss": 1.4831, "step": 21314 }, { "epoch": 0.27697870356683985, "grad_norm": 0.3737947940826416, "learning_rate": 0.00014463406074942487, "loss": 1.4351, "step": 21315 }, { "epoch": 0.27699169811075575, "grad_norm": 0.38703247904777527, "learning_rate": 0.00014463146128751347, "loss": 1.3067, "step": 21316 }, { "epoch": 0.27700469265467165, "grad_norm": 0.4382397532463074, "learning_rate": 0.00014462886182560212, "loss": 1.4665, "step": 21317 }, { "epoch": 0.2770176871985875, "grad_norm": 0.381588876247406, "learning_rate": 0.00014462626236369072, "loss": 1.5344, "step": 21318 }, { "epoch": 0.2770306817425034, "grad_norm": 0.4803544282913208, "learning_rate": 0.00014462366290177934, "loss": 1.4596, "step": 21319 }, { "epoch": 0.27704367628641924, "grad_norm": 0.3316510021686554, "learning_rate": 0.00014462106343986797, "loss": 1.348, "step": 21320 }, { "epoch": 0.27705667083033514, "grad_norm": 0.3640795648097992, "learning_rate": 0.00014461846397795656, "loss": 1.5092, "step": 21321 }, { "epoch": 0.277069665374251, "grad_norm": 0.40990835428237915, "learning_rate": 0.0001446158645160452, "loss": 1.5255, "step": 21322 }, { "epoch": 0.2770826599181669, "grad_norm": 0.33405521512031555, "learning_rate": 0.00014461326505413379, "loss": 1.4087, "step": 21323 }, { "epoch": 0.27709565446208273, "grad_norm": 0.4905710220336914, "learning_rate": 0.00014461066559222244, "loss": 1.4012, "step": 21324 }, { "epoch": 0.27710864900599863, "grad_norm": 0.5047950744628906, "learning_rate": 0.00014460806613031103, "loss": 1.4779, "step": 21325 }, { "epoch": 0.2771216435499145, "grad_norm": 0.3084236979484558, "learning_rate": 0.00014460546666839966, "loss": 1.3, "step": 21326 }, { "epoch": 0.2771346380938304, "grad_norm": 0.4295514225959778, "learning_rate": 0.00014460286720648826, "loss": 1.4081, "step": 21327 }, { "epoch": 0.2771476326377462, "grad_norm": 0.3383934497833252, "learning_rate": 0.00014460026774457688, "loss": 1.4153, "step": 21328 }, { "epoch": 0.2771606271816621, "grad_norm": 0.297966867685318, "learning_rate": 0.0001445976682826655, "loss": 1.402, "step": 21329 }, { "epoch": 0.27717362172557797, "grad_norm": 0.4542658030986786, "learning_rate": 0.0001445950688207541, "loss": 1.5341, "step": 21330 }, { "epoch": 0.27718661626949387, "grad_norm": 0.361622154712677, "learning_rate": 0.00014459246935884273, "loss": 1.2048, "step": 21331 }, { "epoch": 0.2771996108134097, "grad_norm": 0.39191052317619324, "learning_rate": 0.00014458986989693135, "loss": 1.3645, "step": 21332 }, { "epoch": 0.2772126053573256, "grad_norm": 0.34734442830085754, "learning_rate": 0.00014458727043501995, "loss": 1.3434, "step": 21333 }, { "epoch": 0.27722559990124146, "grad_norm": 0.4504390358924866, "learning_rate": 0.00014458467097310857, "loss": 1.3666, "step": 21334 }, { "epoch": 0.27723859444515736, "grad_norm": 0.3920741379261017, "learning_rate": 0.00014458207151119717, "loss": 1.5056, "step": 21335 }, { "epoch": 0.2772515889890732, "grad_norm": 0.3147026598453522, "learning_rate": 0.00014457947204928582, "loss": 1.3457, "step": 21336 }, { "epoch": 0.2772645835329891, "grad_norm": 0.38812771439552307, "learning_rate": 0.00014457687258737442, "loss": 1.3301, "step": 21337 }, { "epoch": 0.27727757807690495, "grad_norm": 0.34487760066986084, "learning_rate": 0.00014457427312546304, "loss": 1.5975, "step": 21338 }, { "epoch": 0.27729057262082085, "grad_norm": 0.4248666763305664, "learning_rate": 0.00014457167366355164, "loss": 1.3343, "step": 21339 }, { "epoch": 0.2773035671647367, "grad_norm": 0.37901660799980164, "learning_rate": 0.00014456907420164027, "loss": 1.5059, "step": 21340 }, { "epoch": 0.2773165617086526, "grad_norm": 0.41012606024742126, "learning_rate": 0.0001445664747397289, "loss": 1.4179, "step": 21341 }, { "epoch": 0.27732955625256844, "grad_norm": 0.3304790258407593, "learning_rate": 0.0001445638752778175, "loss": 1.4316, "step": 21342 }, { "epoch": 0.27734255079648434, "grad_norm": 0.3572097718715668, "learning_rate": 0.0001445612758159061, "loss": 1.3586, "step": 21343 }, { "epoch": 0.2773555453404002, "grad_norm": 0.33713775873184204, "learning_rate": 0.00014455867635399474, "loss": 1.4721, "step": 21344 }, { "epoch": 0.2773685398843161, "grad_norm": 0.40653857588768005, "learning_rate": 0.00014455607689208333, "loss": 1.3655, "step": 21345 }, { "epoch": 0.27738153442823194, "grad_norm": 0.41890978813171387, "learning_rate": 0.00014455347743017196, "loss": 1.3493, "step": 21346 }, { "epoch": 0.27739452897214784, "grad_norm": 0.3786761462688446, "learning_rate": 0.00014455087796826056, "loss": 1.1891, "step": 21347 }, { "epoch": 0.2774075235160637, "grad_norm": 0.4871302545070648, "learning_rate": 0.0001445482785063492, "loss": 1.4943, "step": 21348 }, { "epoch": 0.2774205180599796, "grad_norm": 0.40408241748809814, "learning_rate": 0.0001445456790444378, "loss": 1.5567, "step": 21349 }, { "epoch": 0.2774335126038954, "grad_norm": 0.4766120910644531, "learning_rate": 0.00014454307958252643, "loss": 1.853, "step": 21350 }, { "epoch": 0.2774465071478113, "grad_norm": 0.3546866476535797, "learning_rate": 0.00014454048012061503, "loss": 1.4671, "step": 21351 }, { "epoch": 0.2774595016917272, "grad_norm": 0.3619031012058258, "learning_rate": 0.00014453788065870365, "loss": 1.3083, "step": 21352 }, { "epoch": 0.2774724962356431, "grad_norm": 0.4772902727127075, "learning_rate": 0.00014453528119679228, "loss": 1.5611, "step": 21353 }, { "epoch": 0.2774854907795589, "grad_norm": 0.3812859058380127, "learning_rate": 0.00014453268173488087, "loss": 1.3227, "step": 21354 }, { "epoch": 0.2774984853234748, "grad_norm": 0.46992361545562744, "learning_rate": 0.0001445300822729695, "loss": 1.6204, "step": 21355 }, { "epoch": 0.27751147986739066, "grad_norm": 0.4284652769565582, "learning_rate": 0.00014452748281105812, "loss": 1.3658, "step": 21356 }, { "epoch": 0.27752447441130657, "grad_norm": 0.41886651515960693, "learning_rate": 0.00014452488334914675, "loss": 1.4634, "step": 21357 }, { "epoch": 0.2775374689552224, "grad_norm": 0.41843563318252563, "learning_rate": 0.00014452228388723534, "loss": 1.4561, "step": 21358 }, { "epoch": 0.2775504634991383, "grad_norm": 0.38557982444763184, "learning_rate": 0.00014451968442532397, "loss": 1.5675, "step": 21359 }, { "epoch": 0.27756345804305416, "grad_norm": 0.42279666662216187, "learning_rate": 0.0001445170849634126, "loss": 1.456, "step": 21360 }, { "epoch": 0.27757645258697006, "grad_norm": 0.2636811137199402, "learning_rate": 0.0001445144855015012, "loss": 1.4615, "step": 21361 }, { "epoch": 0.2775894471308859, "grad_norm": 0.32072141766548157, "learning_rate": 0.00014451188603958981, "loss": 1.2438, "step": 21362 }, { "epoch": 0.2776024416748018, "grad_norm": 0.43834319710731506, "learning_rate": 0.00014450928657767844, "loss": 1.395, "step": 21363 }, { "epoch": 0.27761543621871765, "grad_norm": 0.40107449889183044, "learning_rate": 0.00014450668711576704, "loss": 1.4272, "step": 21364 }, { "epoch": 0.27762843076263355, "grad_norm": 0.5505580306053162, "learning_rate": 0.00014450408765385566, "loss": 1.4463, "step": 21365 }, { "epoch": 0.2776414253065494, "grad_norm": 0.416530966758728, "learning_rate": 0.00014450148819194426, "loss": 1.4453, "step": 21366 }, { "epoch": 0.2776544198504653, "grad_norm": 0.4096967875957489, "learning_rate": 0.0001444988887300329, "loss": 1.6375, "step": 21367 }, { "epoch": 0.27766741439438114, "grad_norm": 0.4949805438518524, "learning_rate": 0.0001444962892681215, "loss": 1.4885, "step": 21368 }, { "epoch": 0.27768040893829704, "grad_norm": 0.4033469557762146, "learning_rate": 0.00014449368980621013, "loss": 1.3952, "step": 21369 }, { "epoch": 0.2776934034822129, "grad_norm": 0.48273131251335144, "learning_rate": 0.00014449109034429873, "loss": 1.6422, "step": 21370 }, { "epoch": 0.2777063980261288, "grad_norm": 0.4689628481864929, "learning_rate": 0.00014448849088238735, "loss": 1.5062, "step": 21371 }, { "epoch": 0.27771939257004463, "grad_norm": 0.5330589413642883, "learning_rate": 0.00014448589142047598, "loss": 1.4142, "step": 21372 }, { "epoch": 0.27773238711396053, "grad_norm": 0.41915202140808105, "learning_rate": 0.00014448329195856458, "loss": 1.6137, "step": 21373 }, { "epoch": 0.2777453816578764, "grad_norm": 0.3730369806289673, "learning_rate": 0.0001444806924966532, "loss": 1.1628, "step": 21374 }, { "epoch": 0.2777583762017923, "grad_norm": 0.3279553949832916, "learning_rate": 0.00014447809303474182, "loss": 1.2664, "step": 21375 }, { "epoch": 0.2777713707457081, "grad_norm": 0.45137903094291687, "learning_rate": 0.00014447549357283042, "loss": 1.4193, "step": 21376 }, { "epoch": 0.277784365289624, "grad_norm": 0.3433552384376526, "learning_rate": 0.00014447289411091905, "loss": 1.4443, "step": 21377 }, { "epoch": 0.27779735983353987, "grad_norm": 0.3838362395763397, "learning_rate": 0.00014447029464900764, "loss": 1.3011, "step": 21378 }, { "epoch": 0.27781035437745577, "grad_norm": 0.3855244517326355, "learning_rate": 0.0001444676951870963, "loss": 1.413, "step": 21379 }, { "epoch": 0.2778233489213716, "grad_norm": 0.40140780806541443, "learning_rate": 0.0001444650957251849, "loss": 1.4696, "step": 21380 }, { "epoch": 0.2778363434652875, "grad_norm": 0.4046088755130768, "learning_rate": 0.00014446249626327352, "loss": 1.5458, "step": 21381 }, { "epoch": 0.27784933800920336, "grad_norm": 0.37515345215797424, "learning_rate": 0.00014445989680136211, "loss": 1.2169, "step": 21382 }, { "epoch": 0.27786233255311926, "grad_norm": 0.43619510531425476, "learning_rate": 0.00014445729733945074, "loss": 1.3219, "step": 21383 }, { "epoch": 0.2778753270970351, "grad_norm": 0.3891080915927887, "learning_rate": 0.00014445469787753936, "loss": 1.4813, "step": 21384 }, { "epoch": 0.277888321640951, "grad_norm": 0.2551535665988922, "learning_rate": 0.00014445209841562796, "loss": 1.3341, "step": 21385 }, { "epoch": 0.27790131618486685, "grad_norm": 0.507038414478302, "learning_rate": 0.00014444949895371659, "loss": 1.3524, "step": 21386 }, { "epoch": 0.27791431072878275, "grad_norm": 0.3655197024345398, "learning_rate": 0.0001444468994918052, "loss": 1.388, "step": 21387 }, { "epoch": 0.2779273052726986, "grad_norm": 0.43492814898490906, "learning_rate": 0.0001444443000298938, "loss": 1.273, "step": 21388 }, { "epoch": 0.2779402998166145, "grad_norm": 0.4352020025253296, "learning_rate": 0.00014444170056798243, "loss": 1.33, "step": 21389 }, { "epoch": 0.27795329436053035, "grad_norm": 0.494403600692749, "learning_rate": 0.00014443910110607103, "loss": 1.4167, "step": 21390 }, { "epoch": 0.27796628890444625, "grad_norm": 0.32938453555107117, "learning_rate": 0.00014443650164415968, "loss": 1.4163, "step": 21391 }, { "epoch": 0.27797928344836215, "grad_norm": 0.4741702377796173, "learning_rate": 0.00014443390218224828, "loss": 1.4508, "step": 21392 }, { "epoch": 0.277992277992278, "grad_norm": 0.46993911266326904, "learning_rate": 0.0001444313027203369, "loss": 1.5122, "step": 21393 }, { "epoch": 0.2780052725361939, "grad_norm": 0.3995822072029114, "learning_rate": 0.0001444287032584255, "loss": 1.5962, "step": 21394 }, { "epoch": 0.27801826708010974, "grad_norm": 0.38425421714782715, "learning_rate": 0.00014442610379651412, "loss": 1.3499, "step": 21395 }, { "epoch": 0.27803126162402564, "grad_norm": 0.3769197165966034, "learning_rate": 0.00014442350433460275, "loss": 1.3154, "step": 21396 }, { "epoch": 0.2780442561679415, "grad_norm": 0.48159024119377136, "learning_rate": 0.00014442090487269135, "loss": 1.5015, "step": 21397 }, { "epoch": 0.2780572507118574, "grad_norm": 0.3942597210407257, "learning_rate": 0.00014441830541078, "loss": 1.247, "step": 21398 }, { "epoch": 0.27807024525577323, "grad_norm": 0.4537292718887329, "learning_rate": 0.0001444157059488686, "loss": 1.4541, "step": 21399 }, { "epoch": 0.27808323979968913, "grad_norm": 0.4368032217025757, "learning_rate": 0.0001444131064869572, "loss": 1.3595, "step": 21400 }, { "epoch": 0.278096234343605, "grad_norm": 0.37032121419906616, "learning_rate": 0.00014441050702504582, "loss": 1.3364, "step": 21401 }, { "epoch": 0.2781092288875209, "grad_norm": 0.4966641068458557, "learning_rate": 0.00014440790756313444, "loss": 1.4888, "step": 21402 }, { "epoch": 0.2781222234314367, "grad_norm": 0.3264123499393463, "learning_rate": 0.00014440530810122307, "loss": 1.4118, "step": 21403 }, { "epoch": 0.2781352179753526, "grad_norm": 0.33183181285858154, "learning_rate": 0.00014440270863931166, "loss": 1.4755, "step": 21404 }, { "epoch": 0.27814821251926847, "grad_norm": 0.4152382016181946, "learning_rate": 0.0001444001091774003, "loss": 1.4971, "step": 21405 }, { "epoch": 0.27816120706318437, "grad_norm": 0.33708927035331726, "learning_rate": 0.0001443975097154889, "loss": 1.3765, "step": 21406 }, { "epoch": 0.2781742016071002, "grad_norm": 0.43458428978919983, "learning_rate": 0.0001443949102535775, "loss": 1.3403, "step": 21407 }, { "epoch": 0.2781871961510161, "grad_norm": 0.38059690594673157, "learning_rate": 0.00014439231079166613, "loss": 1.4151, "step": 21408 }, { "epoch": 0.27820019069493196, "grad_norm": 0.3961121737957001, "learning_rate": 0.00014438971132975473, "loss": 1.3189, "step": 21409 }, { "epoch": 0.27821318523884786, "grad_norm": 0.3854229152202606, "learning_rate": 0.00014438711186784338, "loss": 1.2997, "step": 21410 }, { "epoch": 0.2782261797827637, "grad_norm": 0.3332661986351013, "learning_rate": 0.00014438451240593198, "loss": 1.221, "step": 21411 }, { "epoch": 0.2782391743266796, "grad_norm": 0.5238799452781677, "learning_rate": 0.0001443819129440206, "loss": 1.4279, "step": 21412 }, { "epoch": 0.27825216887059545, "grad_norm": 0.4394322633743286, "learning_rate": 0.0001443793134821092, "loss": 1.3998, "step": 21413 }, { "epoch": 0.27826516341451135, "grad_norm": 0.44008713960647583, "learning_rate": 0.00014437671402019783, "loss": 1.6128, "step": 21414 }, { "epoch": 0.2782781579584272, "grad_norm": 0.39046064019203186, "learning_rate": 0.00014437411455828645, "loss": 1.5418, "step": 21415 }, { "epoch": 0.2782911525023431, "grad_norm": 0.4449542760848999, "learning_rate": 0.00014437151509637505, "loss": 1.5318, "step": 21416 }, { "epoch": 0.27830414704625894, "grad_norm": 0.5111715197563171, "learning_rate": 0.00014436891563446367, "loss": 1.5035, "step": 21417 }, { "epoch": 0.27831714159017484, "grad_norm": 0.4284152388572693, "learning_rate": 0.0001443663161725523, "loss": 1.3336, "step": 21418 }, { "epoch": 0.2783301361340907, "grad_norm": 0.384360134601593, "learning_rate": 0.0001443637167106409, "loss": 1.2077, "step": 21419 }, { "epoch": 0.2783431306780066, "grad_norm": 0.3995853662490845, "learning_rate": 0.00014436111724872952, "loss": 1.452, "step": 21420 }, { "epoch": 0.27835612522192243, "grad_norm": 0.4239235520362854, "learning_rate": 0.00014435851778681812, "loss": 1.5758, "step": 21421 }, { "epoch": 0.27836911976583834, "grad_norm": 0.3954913914203644, "learning_rate": 0.00014435591832490677, "loss": 1.6059, "step": 21422 }, { "epoch": 0.2783821143097542, "grad_norm": 0.42043933272361755, "learning_rate": 0.00014435331886299537, "loss": 1.5903, "step": 21423 }, { "epoch": 0.2783951088536701, "grad_norm": 0.4186195433139801, "learning_rate": 0.000144350719401084, "loss": 1.3663, "step": 21424 }, { "epoch": 0.2784081033975859, "grad_norm": 0.469321608543396, "learning_rate": 0.0001443481199391726, "loss": 1.4055, "step": 21425 }, { "epoch": 0.2784210979415018, "grad_norm": 0.4266015887260437, "learning_rate": 0.0001443455204772612, "loss": 1.4899, "step": 21426 }, { "epoch": 0.2784340924854177, "grad_norm": 0.48333337903022766, "learning_rate": 0.00014434292101534984, "loss": 1.4089, "step": 21427 }, { "epoch": 0.2784470870293336, "grad_norm": 0.3953308165073395, "learning_rate": 0.00014434032155343843, "loss": 1.5657, "step": 21428 }, { "epoch": 0.2784600815732494, "grad_norm": 0.33619922399520874, "learning_rate": 0.00014433772209152706, "loss": 1.2991, "step": 21429 }, { "epoch": 0.2784730761171653, "grad_norm": 0.36046817898750305, "learning_rate": 0.00014433512262961568, "loss": 1.192, "step": 21430 }, { "epoch": 0.27848607066108116, "grad_norm": 0.456960529088974, "learning_rate": 0.00014433252316770428, "loss": 1.3118, "step": 21431 }, { "epoch": 0.27849906520499706, "grad_norm": 0.49748027324676514, "learning_rate": 0.0001443299237057929, "loss": 1.4176, "step": 21432 }, { "epoch": 0.2785120597489129, "grad_norm": 0.43698936700820923, "learning_rate": 0.00014432732424388153, "loss": 1.2421, "step": 21433 }, { "epoch": 0.2785250542928288, "grad_norm": 0.4454978406429291, "learning_rate": 0.00014432472478197015, "loss": 1.3019, "step": 21434 }, { "epoch": 0.27853804883674466, "grad_norm": 0.44687652587890625, "learning_rate": 0.00014432212532005875, "loss": 1.5089, "step": 21435 }, { "epoch": 0.27855104338066056, "grad_norm": 0.5089172124862671, "learning_rate": 0.00014431952585814738, "loss": 1.2937, "step": 21436 }, { "epoch": 0.2785640379245764, "grad_norm": 0.4406977891921997, "learning_rate": 0.000144316926396236, "loss": 1.3908, "step": 21437 }, { "epoch": 0.2785770324684923, "grad_norm": 0.48522382974624634, "learning_rate": 0.0001443143269343246, "loss": 1.4225, "step": 21438 }, { "epoch": 0.27859002701240815, "grad_norm": 0.33807873725891113, "learning_rate": 0.00014431172747241322, "loss": 1.406, "step": 21439 }, { "epoch": 0.27860302155632405, "grad_norm": 0.5246123671531677, "learning_rate": 0.00014430912801050182, "loss": 1.5528, "step": 21440 }, { "epoch": 0.2786160161002399, "grad_norm": 0.590869128704071, "learning_rate": 0.00014430652854859047, "loss": 1.5029, "step": 21441 }, { "epoch": 0.2786290106441558, "grad_norm": 0.3561863899230957, "learning_rate": 0.00014430392908667907, "loss": 1.4338, "step": 21442 }, { "epoch": 0.27864200518807164, "grad_norm": 0.4269315302371979, "learning_rate": 0.00014430132962476767, "loss": 1.3754, "step": 21443 }, { "epoch": 0.27865499973198754, "grad_norm": 0.4255005717277527, "learning_rate": 0.0001442987301628563, "loss": 1.2201, "step": 21444 }, { "epoch": 0.2786679942759034, "grad_norm": 0.4089432954788208, "learning_rate": 0.00014429613070094492, "loss": 1.4634, "step": 21445 }, { "epoch": 0.2786809888198193, "grad_norm": 0.3521561920642853, "learning_rate": 0.00014429353123903354, "loss": 1.1944, "step": 21446 }, { "epoch": 0.27869398336373513, "grad_norm": 0.33145517110824585, "learning_rate": 0.00014429093177712214, "loss": 1.2196, "step": 21447 }, { "epoch": 0.27870697790765103, "grad_norm": 0.3241947293281555, "learning_rate": 0.00014428833231521076, "loss": 1.3463, "step": 21448 }, { "epoch": 0.2787199724515669, "grad_norm": 0.46330612897872925, "learning_rate": 0.00014428573285329939, "loss": 1.3855, "step": 21449 }, { "epoch": 0.2787329669954828, "grad_norm": 0.43466782569885254, "learning_rate": 0.00014428313339138798, "loss": 1.6249, "step": 21450 }, { "epoch": 0.2787459615393986, "grad_norm": 0.41936221718788147, "learning_rate": 0.0001442805339294766, "loss": 1.3844, "step": 21451 }, { "epoch": 0.2787589560833145, "grad_norm": 0.5308575630187988, "learning_rate": 0.0001442779344675652, "loss": 1.4612, "step": 21452 }, { "epoch": 0.27877195062723037, "grad_norm": 0.3371976315975189, "learning_rate": 0.00014427533500565386, "loss": 1.5368, "step": 21453 }, { "epoch": 0.27878494517114627, "grad_norm": 0.4401633143424988, "learning_rate": 0.00014427273554374245, "loss": 1.5234, "step": 21454 }, { "epoch": 0.2787979397150621, "grad_norm": 0.356117844581604, "learning_rate": 0.00014427013608183105, "loss": 1.5649, "step": 21455 }, { "epoch": 0.278810934258978, "grad_norm": 0.40017110109329224, "learning_rate": 0.00014426753661991968, "loss": 1.3235, "step": 21456 }, { "epoch": 0.27882392880289386, "grad_norm": 0.4322781562805176, "learning_rate": 0.0001442649371580083, "loss": 1.3192, "step": 21457 }, { "epoch": 0.27883692334680976, "grad_norm": 0.3859092593193054, "learning_rate": 0.00014426233769609692, "loss": 1.3756, "step": 21458 }, { "epoch": 0.2788499178907256, "grad_norm": 0.39356598258018494, "learning_rate": 0.00014425973823418552, "loss": 1.3102, "step": 21459 }, { "epoch": 0.2788629124346415, "grad_norm": 0.48195910453796387, "learning_rate": 0.00014425713877227415, "loss": 1.4914, "step": 21460 }, { "epoch": 0.27887590697855735, "grad_norm": 0.37488916516304016, "learning_rate": 0.00014425453931036277, "loss": 1.4812, "step": 21461 }, { "epoch": 0.27888890152247325, "grad_norm": 0.2964920103549957, "learning_rate": 0.00014425193984845137, "loss": 1.4049, "step": 21462 }, { "epoch": 0.2789018960663891, "grad_norm": 0.34389278292655945, "learning_rate": 0.00014424934038654, "loss": 1.3698, "step": 21463 }, { "epoch": 0.278914890610305, "grad_norm": 0.4370003640651703, "learning_rate": 0.0001442467409246286, "loss": 1.4366, "step": 21464 }, { "epoch": 0.27892788515422084, "grad_norm": 0.4175202250480652, "learning_rate": 0.00014424414146271724, "loss": 1.3644, "step": 21465 }, { "epoch": 0.27894087969813675, "grad_norm": 0.3859788477420807, "learning_rate": 0.00014424154200080584, "loss": 1.5084, "step": 21466 }, { "epoch": 0.2789538742420526, "grad_norm": 0.2370905876159668, "learning_rate": 0.00014423894253889444, "loss": 1.4443, "step": 21467 }, { "epoch": 0.2789668687859685, "grad_norm": 0.32241150736808777, "learning_rate": 0.00014423634307698306, "loss": 1.4162, "step": 21468 }, { "epoch": 0.2789798633298844, "grad_norm": 0.35621413588523865, "learning_rate": 0.00014423374361507169, "loss": 1.1357, "step": 21469 }, { "epoch": 0.27899285787380024, "grad_norm": 0.40914133191108704, "learning_rate": 0.0001442311441531603, "loss": 1.3368, "step": 21470 }, { "epoch": 0.27900585241771614, "grad_norm": 0.45546963810920715, "learning_rate": 0.0001442285446912489, "loss": 1.4786, "step": 21471 }, { "epoch": 0.279018846961632, "grad_norm": 0.4365185499191284, "learning_rate": 0.00014422594522933753, "loss": 1.5744, "step": 21472 }, { "epoch": 0.2790318415055479, "grad_norm": 0.43023788928985596, "learning_rate": 0.00014422334576742616, "loss": 1.5829, "step": 21473 }, { "epoch": 0.27904483604946373, "grad_norm": 0.45840564370155334, "learning_rate": 0.00014422074630551475, "loss": 1.3198, "step": 21474 }, { "epoch": 0.27905783059337963, "grad_norm": 0.5318276882171631, "learning_rate": 0.00014421814684360338, "loss": 1.4954, "step": 21475 }, { "epoch": 0.2790708251372955, "grad_norm": 0.37725311517715454, "learning_rate": 0.000144215547381692, "loss": 1.4431, "step": 21476 }, { "epoch": 0.2790838196812114, "grad_norm": 0.35171905159950256, "learning_rate": 0.00014421294791978063, "loss": 1.2492, "step": 21477 }, { "epoch": 0.2790968142251272, "grad_norm": 0.5016863346099854, "learning_rate": 0.00014421034845786922, "loss": 1.3518, "step": 21478 }, { "epoch": 0.2791098087690431, "grad_norm": 0.3473861515522003, "learning_rate": 0.00014420774899595785, "loss": 1.5334, "step": 21479 }, { "epoch": 0.27912280331295897, "grad_norm": 0.39675071835517883, "learning_rate": 0.00014420514953404647, "loss": 1.423, "step": 21480 }, { "epoch": 0.27913579785687487, "grad_norm": 0.3445226550102234, "learning_rate": 0.00014420255007213507, "loss": 1.5077, "step": 21481 }, { "epoch": 0.2791487924007907, "grad_norm": 0.38240480422973633, "learning_rate": 0.0001441999506102237, "loss": 1.3547, "step": 21482 }, { "epoch": 0.2791617869447066, "grad_norm": 0.42996129393577576, "learning_rate": 0.0001441973511483123, "loss": 1.6199, "step": 21483 }, { "epoch": 0.27917478148862246, "grad_norm": 0.3516026437282562, "learning_rate": 0.00014419475168640092, "loss": 1.3657, "step": 21484 }, { "epoch": 0.27918777603253836, "grad_norm": 0.39256930351257324, "learning_rate": 0.00014419215222448954, "loss": 1.2887, "step": 21485 }, { "epoch": 0.2792007705764542, "grad_norm": 0.3857741951942444, "learning_rate": 0.00014418955276257814, "loss": 1.3836, "step": 21486 }, { "epoch": 0.2792137651203701, "grad_norm": 0.4462112784385681, "learning_rate": 0.00014418695330066676, "loss": 1.3532, "step": 21487 }, { "epoch": 0.27922675966428595, "grad_norm": 0.42959532141685486, "learning_rate": 0.0001441843538387554, "loss": 1.4041, "step": 21488 }, { "epoch": 0.27923975420820185, "grad_norm": 0.36009982228279114, "learning_rate": 0.000144181754376844, "loss": 1.4038, "step": 21489 }, { "epoch": 0.2792527487521177, "grad_norm": 0.35454922914505005, "learning_rate": 0.0001441791549149326, "loss": 1.3783, "step": 21490 }, { "epoch": 0.2792657432960336, "grad_norm": 0.4293123781681061, "learning_rate": 0.00014417655545302123, "loss": 1.464, "step": 21491 }, { "epoch": 0.27927873783994944, "grad_norm": 0.44025054574012756, "learning_rate": 0.00014417395599110986, "loss": 1.32, "step": 21492 }, { "epoch": 0.27929173238386534, "grad_norm": 0.3445517122745514, "learning_rate": 0.00014417135652919846, "loss": 1.4833, "step": 21493 }, { "epoch": 0.2793047269277812, "grad_norm": 0.43521058559417725, "learning_rate": 0.00014416875706728708, "loss": 1.3956, "step": 21494 }, { "epoch": 0.2793177214716971, "grad_norm": 0.45968952775001526, "learning_rate": 0.00014416615760537568, "loss": 1.3773, "step": 21495 }, { "epoch": 0.27933071601561293, "grad_norm": 0.3523813486099243, "learning_rate": 0.00014416355814346433, "loss": 1.3612, "step": 21496 }, { "epoch": 0.27934371055952884, "grad_norm": 0.3348783254623413, "learning_rate": 0.00014416095868155293, "loss": 1.3667, "step": 21497 }, { "epoch": 0.2793567051034447, "grad_norm": 0.4334190785884857, "learning_rate": 0.00014415835921964152, "loss": 1.4579, "step": 21498 }, { "epoch": 0.2793696996473606, "grad_norm": 0.34385475516319275, "learning_rate": 0.00014415575975773015, "loss": 1.2887, "step": 21499 }, { "epoch": 0.2793826941912764, "grad_norm": 0.2839030921459198, "learning_rate": 0.00014415316029581877, "loss": 1.1337, "step": 21500 }, { "epoch": 0.2793956887351923, "grad_norm": 0.383198082447052, "learning_rate": 0.0001441505608339074, "loss": 1.2243, "step": 21501 }, { "epoch": 0.27940868327910817, "grad_norm": 0.39565548300743103, "learning_rate": 0.000144147961371996, "loss": 1.194, "step": 21502 }, { "epoch": 0.2794216778230241, "grad_norm": 0.4241720139980316, "learning_rate": 0.00014414536191008462, "loss": 1.2956, "step": 21503 }, { "epoch": 0.2794346723669399, "grad_norm": 0.35848429799079895, "learning_rate": 0.00014414276244817324, "loss": 1.5074, "step": 21504 }, { "epoch": 0.2794476669108558, "grad_norm": 0.5230528712272644, "learning_rate": 0.00014414016298626184, "loss": 1.5408, "step": 21505 }, { "epoch": 0.27946066145477166, "grad_norm": 0.34090229868888855, "learning_rate": 0.00014413756352435047, "loss": 1.4492, "step": 21506 }, { "epoch": 0.27947365599868756, "grad_norm": 0.4034985899925232, "learning_rate": 0.0001441349640624391, "loss": 1.427, "step": 21507 }, { "epoch": 0.2794866505426034, "grad_norm": 0.35721829533576965, "learning_rate": 0.00014413236460052772, "loss": 1.3985, "step": 21508 }, { "epoch": 0.2794996450865193, "grad_norm": 0.44309142231941223, "learning_rate": 0.0001441297651386163, "loss": 1.2786, "step": 21509 }, { "epoch": 0.27951263963043516, "grad_norm": 0.44928547739982605, "learning_rate": 0.0001441271656767049, "loss": 1.5405, "step": 21510 }, { "epoch": 0.27952563417435106, "grad_norm": 0.38292816281318665, "learning_rate": 0.00014412456621479356, "loss": 1.3209, "step": 21511 }, { "epoch": 0.2795386287182669, "grad_norm": 0.28443366289138794, "learning_rate": 0.00014412196675288216, "loss": 1.3041, "step": 21512 }, { "epoch": 0.2795516232621828, "grad_norm": 0.4442163109779358, "learning_rate": 0.00014411936729097078, "loss": 1.4933, "step": 21513 }, { "epoch": 0.27956461780609865, "grad_norm": 0.4082050025463104, "learning_rate": 0.00014411676782905938, "loss": 1.4507, "step": 21514 }, { "epoch": 0.27957761235001455, "grad_norm": 0.3807099163532257, "learning_rate": 0.000144114168367148, "loss": 1.4412, "step": 21515 }, { "epoch": 0.2795906068939304, "grad_norm": 0.3784346878528595, "learning_rate": 0.00014411156890523663, "loss": 1.3442, "step": 21516 }, { "epoch": 0.2796036014378463, "grad_norm": 0.43854638934135437, "learning_rate": 0.00014410896944332523, "loss": 1.468, "step": 21517 }, { "epoch": 0.27961659598176214, "grad_norm": 0.5125160813331604, "learning_rate": 0.00014410636998141385, "loss": 1.2961, "step": 21518 }, { "epoch": 0.27962959052567804, "grad_norm": 0.3407018184661865, "learning_rate": 0.00014410377051950248, "loss": 1.463, "step": 21519 }, { "epoch": 0.2796425850695939, "grad_norm": 0.46935951709747314, "learning_rate": 0.0001441011710575911, "loss": 1.4124, "step": 21520 }, { "epoch": 0.2796555796135098, "grad_norm": 0.41228967905044556, "learning_rate": 0.0001440985715956797, "loss": 1.2428, "step": 21521 }, { "epoch": 0.27966857415742563, "grad_norm": 0.3404655158519745, "learning_rate": 0.0001440959721337683, "loss": 1.4957, "step": 21522 }, { "epoch": 0.27968156870134153, "grad_norm": 0.4039502739906311, "learning_rate": 0.00014409337267185695, "loss": 1.408, "step": 21523 }, { "epoch": 0.2796945632452574, "grad_norm": 0.4064100980758667, "learning_rate": 0.00014409077320994554, "loss": 1.5325, "step": 21524 }, { "epoch": 0.2797075577891733, "grad_norm": 0.41681787371635437, "learning_rate": 0.00014408817374803417, "loss": 1.4673, "step": 21525 }, { "epoch": 0.2797205523330891, "grad_norm": 0.4816579520702362, "learning_rate": 0.00014408557428612277, "loss": 1.4488, "step": 21526 }, { "epoch": 0.279733546877005, "grad_norm": 0.5031188130378723, "learning_rate": 0.0001440829748242114, "loss": 1.5229, "step": 21527 }, { "epoch": 0.27974654142092087, "grad_norm": 0.36926984786987305, "learning_rate": 0.00014408037536230002, "loss": 1.5312, "step": 21528 }, { "epoch": 0.27975953596483677, "grad_norm": 0.43403682112693787, "learning_rate": 0.0001440777759003886, "loss": 1.4893, "step": 21529 }, { "epoch": 0.2797725305087526, "grad_norm": 0.4647897779941559, "learning_rate": 0.00014407517643847724, "loss": 1.3366, "step": 21530 }, { "epoch": 0.2797855250526685, "grad_norm": 0.38596251606941223, "learning_rate": 0.00014407257697656586, "loss": 1.1498, "step": 21531 }, { "epoch": 0.27979851959658436, "grad_norm": 0.2629759609699249, "learning_rate": 0.00014406997751465449, "loss": 1.3319, "step": 21532 }, { "epoch": 0.27981151414050026, "grad_norm": 0.39126014709472656, "learning_rate": 0.00014406737805274308, "loss": 1.2858, "step": 21533 }, { "epoch": 0.2798245086844161, "grad_norm": 0.3864976167678833, "learning_rate": 0.0001440647785908317, "loss": 1.349, "step": 21534 }, { "epoch": 0.279837503228332, "grad_norm": 0.42169812321662903, "learning_rate": 0.00014406217912892033, "loss": 1.4357, "step": 21535 }, { "epoch": 0.27985049777224785, "grad_norm": 0.41296932101249695, "learning_rate": 0.00014405957966700893, "loss": 1.536, "step": 21536 }, { "epoch": 0.27986349231616375, "grad_norm": 0.40680354833602905, "learning_rate": 0.00014405698020509755, "loss": 1.2904, "step": 21537 }, { "epoch": 0.2798764868600796, "grad_norm": 0.5047711730003357, "learning_rate": 0.00014405438074318615, "loss": 1.49, "step": 21538 }, { "epoch": 0.2798894814039955, "grad_norm": 0.46680948138237, "learning_rate": 0.00014405178128127478, "loss": 1.4756, "step": 21539 }, { "epoch": 0.27990247594791134, "grad_norm": 0.3409785032272339, "learning_rate": 0.0001440491818193634, "loss": 1.4024, "step": 21540 }, { "epoch": 0.27991547049182725, "grad_norm": 0.42886868119239807, "learning_rate": 0.000144046582357452, "loss": 1.5185, "step": 21541 }, { "epoch": 0.2799284650357431, "grad_norm": 0.4366057217121124, "learning_rate": 0.00014404398289554062, "loss": 1.4912, "step": 21542 }, { "epoch": 0.279941459579659, "grad_norm": 0.3587425947189331, "learning_rate": 0.00014404138343362925, "loss": 1.4523, "step": 21543 }, { "epoch": 0.27995445412357484, "grad_norm": 0.3492141664028168, "learning_rate": 0.00014403878397171787, "loss": 1.2518, "step": 21544 }, { "epoch": 0.27996744866749074, "grad_norm": 0.298484206199646, "learning_rate": 0.00014403618450980647, "loss": 1.3221, "step": 21545 }, { "epoch": 0.27998044321140664, "grad_norm": 0.3970220386981964, "learning_rate": 0.0001440335850478951, "loss": 1.4704, "step": 21546 }, { "epoch": 0.2799934377553225, "grad_norm": 0.40950071811676025, "learning_rate": 0.00014403098558598372, "loss": 1.5809, "step": 21547 }, { "epoch": 0.2800064322992384, "grad_norm": 0.46853384375572205, "learning_rate": 0.00014402838612407232, "loss": 1.4397, "step": 21548 }, { "epoch": 0.28001942684315423, "grad_norm": 0.4517922103404999, "learning_rate": 0.00014402578666216094, "loss": 1.3388, "step": 21549 }, { "epoch": 0.28003242138707013, "grad_norm": 0.36487752199172974, "learning_rate": 0.00014402318720024956, "loss": 1.3091, "step": 21550 }, { "epoch": 0.280045415930986, "grad_norm": 0.4536622166633606, "learning_rate": 0.00014402058773833816, "loss": 1.4841, "step": 21551 }, { "epoch": 0.2800584104749019, "grad_norm": 0.4552665650844574, "learning_rate": 0.00014401798827642679, "loss": 1.4781, "step": 21552 }, { "epoch": 0.2800714050188177, "grad_norm": 0.4001098871231079, "learning_rate": 0.00014401538881451538, "loss": 1.3086, "step": 21553 }, { "epoch": 0.2800843995627336, "grad_norm": 0.4407338500022888, "learning_rate": 0.00014401278935260404, "loss": 1.4137, "step": 21554 }, { "epoch": 0.28009739410664947, "grad_norm": 0.4241715967655182, "learning_rate": 0.00014401018989069263, "loss": 1.5933, "step": 21555 }, { "epoch": 0.28011038865056537, "grad_norm": 0.45190882682800293, "learning_rate": 0.00014400759042878126, "loss": 1.3828, "step": 21556 }, { "epoch": 0.2801233831944812, "grad_norm": 0.42788925766944885, "learning_rate": 0.00014400499096686985, "loss": 1.3792, "step": 21557 }, { "epoch": 0.2801363777383971, "grad_norm": 0.347611665725708, "learning_rate": 0.00014400239150495848, "loss": 1.614, "step": 21558 }, { "epoch": 0.28014937228231296, "grad_norm": 0.5511483550071716, "learning_rate": 0.0001439997920430471, "loss": 1.3305, "step": 21559 }, { "epoch": 0.28016236682622886, "grad_norm": 0.4634404480457306, "learning_rate": 0.0001439971925811357, "loss": 1.4258, "step": 21560 }, { "epoch": 0.2801753613701447, "grad_norm": 0.42640987038612366, "learning_rate": 0.00014399459311922433, "loss": 1.3595, "step": 21561 }, { "epoch": 0.2801883559140606, "grad_norm": 0.35330328345298767, "learning_rate": 0.00014399199365731295, "loss": 1.2516, "step": 21562 }, { "epoch": 0.28020135045797645, "grad_norm": 0.4085612893104553, "learning_rate": 0.00014398939419540157, "loss": 1.4904, "step": 21563 }, { "epoch": 0.28021434500189235, "grad_norm": 0.40327513217926025, "learning_rate": 0.00014398679473349017, "loss": 1.3826, "step": 21564 }, { "epoch": 0.2802273395458082, "grad_norm": 0.4792408049106598, "learning_rate": 0.00014398419527157877, "loss": 1.5326, "step": 21565 }, { "epoch": 0.2802403340897241, "grad_norm": 0.3872530162334442, "learning_rate": 0.00014398159580966742, "loss": 1.2945, "step": 21566 }, { "epoch": 0.28025332863363994, "grad_norm": 0.3294652998447418, "learning_rate": 0.00014397899634775602, "loss": 1.3299, "step": 21567 }, { "epoch": 0.28026632317755584, "grad_norm": 0.3749648630619049, "learning_rate": 0.00014397639688584464, "loss": 1.3798, "step": 21568 }, { "epoch": 0.2802793177214717, "grad_norm": 0.39000073075294495, "learning_rate": 0.00014397379742393324, "loss": 1.5696, "step": 21569 }, { "epoch": 0.2802923122653876, "grad_norm": 0.31261932849884033, "learning_rate": 0.00014397119796202186, "loss": 1.3209, "step": 21570 }, { "epoch": 0.28030530680930343, "grad_norm": 0.3025438189506531, "learning_rate": 0.0001439685985001105, "loss": 1.2002, "step": 21571 }, { "epoch": 0.28031830135321933, "grad_norm": 0.3718826472759247, "learning_rate": 0.00014396599903819909, "loss": 1.4044, "step": 21572 }, { "epoch": 0.2803312958971352, "grad_norm": 0.3508923649787903, "learning_rate": 0.0001439633995762877, "loss": 1.341, "step": 21573 }, { "epoch": 0.2803442904410511, "grad_norm": 0.3179311454296112, "learning_rate": 0.00014396080011437634, "loss": 1.369, "step": 21574 }, { "epoch": 0.2803572849849669, "grad_norm": 0.3752168118953705, "learning_rate": 0.00014395820065246496, "loss": 1.3316, "step": 21575 }, { "epoch": 0.2803702795288828, "grad_norm": 0.4096318781375885, "learning_rate": 0.00014395560119055356, "loss": 1.3735, "step": 21576 }, { "epoch": 0.28038327407279867, "grad_norm": 0.3741842210292816, "learning_rate": 0.00014395300172864215, "loss": 1.4975, "step": 21577 }, { "epoch": 0.2803962686167146, "grad_norm": 0.3936016857624054, "learning_rate": 0.0001439504022667308, "loss": 1.378, "step": 21578 }, { "epoch": 0.2804092631606304, "grad_norm": 0.48750174045562744, "learning_rate": 0.0001439478028048194, "loss": 1.6248, "step": 21579 }, { "epoch": 0.2804222577045463, "grad_norm": 0.37635859847068787, "learning_rate": 0.00014394520334290803, "loss": 1.1349, "step": 21580 }, { "epoch": 0.28043525224846216, "grad_norm": 0.33232641220092773, "learning_rate": 0.00014394260388099665, "loss": 1.3849, "step": 21581 }, { "epoch": 0.28044824679237806, "grad_norm": 0.39684557914733887, "learning_rate": 0.00014394000441908525, "loss": 1.3981, "step": 21582 }, { "epoch": 0.2804612413362939, "grad_norm": 0.4057500958442688, "learning_rate": 0.00014393740495717387, "loss": 1.5069, "step": 21583 }, { "epoch": 0.2804742358802098, "grad_norm": 0.41019001603126526, "learning_rate": 0.00014393480549526247, "loss": 1.5021, "step": 21584 }, { "epoch": 0.28048723042412566, "grad_norm": 0.48081421852111816, "learning_rate": 0.00014393220603335112, "loss": 1.5573, "step": 21585 }, { "epoch": 0.28050022496804156, "grad_norm": 0.38263487815856934, "learning_rate": 0.00014392960657143972, "loss": 1.3049, "step": 21586 }, { "epoch": 0.2805132195119574, "grad_norm": 0.46623101830482483, "learning_rate": 0.00014392700710952834, "loss": 1.3493, "step": 21587 }, { "epoch": 0.2805262140558733, "grad_norm": 0.4086287021636963, "learning_rate": 0.00014392440764761694, "loss": 1.3976, "step": 21588 }, { "epoch": 0.28053920859978915, "grad_norm": 0.3839130401611328, "learning_rate": 0.00014392180818570557, "loss": 1.2677, "step": 21589 }, { "epoch": 0.28055220314370505, "grad_norm": 0.5059996843338013, "learning_rate": 0.0001439192087237942, "loss": 1.5403, "step": 21590 }, { "epoch": 0.2805651976876209, "grad_norm": 0.35892558097839355, "learning_rate": 0.0001439166092618828, "loss": 1.4145, "step": 21591 }, { "epoch": 0.2805781922315368, "grad_norm": 0.4133143723011017, "learning_rate": 0.0001439140097999714, "loss": 1.5327, "step": 21592 }, { "epoch": 0.28059118677545264, "grad_norm": 0.3344416916370392, "learning_rate": 0.00014391141033806004, "loss": 1.4399, "step": 21593 }, { "epoch": 0.28060418131936854, "grad_norm": 0.2513716518878937, "learning_rate": 0.00014390881087614864, "loss": 1.3628, "step": 21594 }, { "epoch": 0.2806171758632844, "grad_norm": 0.4003777801990509, "learning_rate": 0.00014390621141423726, "loss": 1.2635, "step": 21595 }, { "epoch": 0.2806301704072003, "grad_norm": 0.3850751221179962, "learning_rate": 0.00014390361195232586, "loss": 1.3411, "step": 21596 }, { "epoch": 0.28064316495111613, "grad_norm": 0.35250312089920044, "learning_rate": 0.0001439010124904145, "loss": 1.4228, "step": 21597 }, { "epoch": 0.28065615949503203, "grad_norm": 0.39210233092308044, "learning_rate": 0.0001438984130285031, "loss": 1.4644, "step": 21598 }, { "epoch": 0.2806691540389479, "grad_norm": 0.39643925428390503, "learning_rate": 0.00014389581356659173, "loss": 1.2163, "step": 21599 }, { "epoch": 0.2806821485828638, "grad_norm": 0.3537564277648926, "learning_rate": 0.00014389321410468033, "loss": 1.1641, "step": 21600 }, { "epoch": 0.2806951431267796, "grad_norm": 0.5541505217552185, "learning_rate": 0.00014389061464276895, "loss": 1.5356, "step": 21601 }, { "epoch": 0.2807081376706955, "grad_norm": 0.3661820590496063, "learning_rate": 0.00014388801518085758, "loss": 1.3512, "step": 21602 }, { "epoch": 0.28072113221461137, "grad_norm": 0.4304031729698181, "learning_rate": 0.00014388541571894617, "loss": 1.6132, "step": 21603 }, { "epoch": 0.28073412675852727, "grad_norm": 0.44018834829330444, "learning_rate": 0.0001438828162570348, "loss": 1.3435, "step": 21604 }, { "epoch": 0.2807471213024431, "grad_norm": 0.42459535598754883, "learning_rate": 0.00014388021679512342, "loss": 1.4118, "step": 21605 }, { "epoch": 0.280760115846359, "grad_norm": 0.5122298002243042, "learning_rate": 0.00014387761733321202, "loss": 1.5491, "step": 21606 }, { "epoch": 0.28077311039027486, "grad_norm": 0.3131127655506134, "learning_rate": 0.00014387501787130064, "loss": 1.2474, "step": 21607 }, { "epoch": 0.28078610493419076, "grad_norm": 0.4709749221801758, "learning_rate": 0.00014387241840938924, "loss": 1.5598, "step": 21608 }, { "epoch": 0.2807990994781066, "grad_norm": 0.4325936734676361, "learning_rate": 0.0001438698189474779, "loss": 1.4556, "step": 21609 }, { "epoch": 0.2808120940220225, "grad_norm": 0.42554768919944763, "learning_rate": 0.0001438672194855665, "loss": 1.4737, "step": 21610 }, { "epoch": 0.28082508856593835, "grad_norm": 0.4179743528366089, "learning_rate": 0.00014386462002365512, "loss": 1.4633, "step": 21611 }, { "epoch": 0.28083808310985425, "grad_norm": 0.37180235981941223, "learning_rate": 0.0001438620205617437, "loss": 1.2854, "step": 21612 }, { "epoch": 0.2808510776537701, "grad_norm": 0.4042123556137085, "learning_rate": 0.00014385942109983234, "loss": 1.6696, "step": 21613 }, { "epoch": 0.280864072197686, "grad_norm": 0.3201903700828552, "learning_rate": 0.00014385682163792096, "loss": 1.1982, "step": 21614 }, { "epoch": 0.28087706674160184, "grad_norm": 0.4126211702823639, "learning_rate": 0.00014385422217600956, "loss": 1.3844, "step": 21615 }, { "epoch": 0.28089006128551774, "grad_norm": 0.3589378297328949, "learning_rate": 0.00014385162271409818, "loss": 1.4956, "step": 21616 }, { "epoch": 0.2809030558294336, "grad_norm": 0.3412325382232666, "learning_rate": 0.0001438490232521868, "loss": 1.2256, "step": 21617 }, { "epoch": 0.2809160503733495, "grad_norm": 0.4087387025356293, "learning_rate": 0.00014384642379027543, "loss": 1.2813, "step": 21618 }, { "epoch": 0.28092904491726534, "grad_norm": 0.39880043268203735, "learning_rate": 0.00014384382432836403, "loss": 1.3026, "step": 21619 }, { "epoch": 0.28094203946118124, "grad_norm": 0.4509781301021576, "learning_rate": 0.00014384122486645265, "loss": 1.5823, "step": 21620 }, { "epoch": 0.28095503400509714, "grad_norm": 0.4069022536277771, "learning_rate": 0.00014383862540454128, "loss": 1.3807, "step": 21621 }, { "epoch": 0.280968028549013, "grad_norm": 0.39547228813171387, "learning_rate": 0.00014383602594262988, "loss": 1.1127, "step": 21622 }, { "epoch": 0.2809810230929289, "grad_norm": 0.38375696539878845, "learning_rate": 0.0001438334264807185, "loss": 1.2796, "step": 21623 }, { "epoch": 0.28099401763684473, "grad_norm": 0.36782675981521606, "learning_rate": 0.00014383082701880713, "loss": 1.2502, "step": 21624 }, { "epoch": 0.28100701218076063, "grad_norm": 0.43092501163482666, "learning_rate": 0.00014382822755689572, "loss": 1.4962, "step": 21625 }, { "epoch": 0.2810200067246765, "grad_norm": 0.41716548800468445, "learning_rate": 0.00014382562809498435, "loss": 1.5437, "step": 21626 }, { "epoch": 0.2810330012685924, "grad_norm": 0.4281606376171112, "learning_rate": 0.00014382302863307294, "loss": 1.4638, "step": 21627 }, { "epoch": 0.2810459958125082, "grad_norm": 0.42682525515556335, "learning_rate": 0.0001438204291711616, "loss": 1.365, "step": 21628 }, { "epoch": 0.2810589903564241, "grad_norm": 0.44276565313339233, "learning_rate": 0.0001438178297092502, "loss": 1.4691, "step": 21629 }, { "epoch": 0.28107198490033997, "grad_norm": 0.38427865505218506, "learning_rate": 0.00014381523024733882, "loss": 1.331, "step": 21630 }, { "epoch": 0.28108497944425587, "grad_norm": 0.46011996269226074, "learning_rate": 0.00014381263078542742, "loss": 1.6645, "step": 21631 }, { "epoch": 0.2810979739881717, "grad_norm": 0.4322268068790436, "learning_rate": 0.00014381003132351604, "loss": 1.3499, "step": 21632 }, { "epoch": 0.2811109685320876, "grad_norm": 0.38161829113960266, "learning_rate": 0.00014380743186160466, "loss": 1.2661, "step": 21633 }, { "epoch": 0.28112396307600346, "grad_norm": 0.4218149185180664, "learning_rate": 0.00014380483239969326, "loss": 1.4326, "step": 21634 }, { "epoch": 0.28113695761991936, "grad_norm": 0.3905273377895355, "learning_rate": 0.0001438022329377819, "loss": 1.4713, "step": 21635 }, { "epoch": 0.2811499521638352, "grad_norm": 0.4071963429450989, "learning_rate": 0.0001437996334758705, "loss": 1.4191, "step": 21636 }, { "epoch": 0.2811629467077511, "grad_norm": 0.5114985704421997, "learning_rate": 0.0001437970340139591, "loss": 1.6028, "step": 21637 }, { "epoch": 0.28117594125166695, "grad_norm": 0.5102190971374512, "learning_rate": 0.00014379443455204773, "loss": 1.3921, "step": 21638 }, { "epoch": 0.28118893579558285, "grad_norm": 0.4546314477920532, "learning_rate": 0.00014379183509013633, "loss": 1.5311, "step": 21639 }, { "epoch": 0.2812019303394987, "grad_norm": 0.43757355213165283, "learning_rate": 0.00014378923562822498, "loss": 1.4271, "step": 21640 }, { "epoch": 0.2812149248834146, "grad_norm": 0.4324467182159424, "learning_rate": 0.00014378663616631358, "loss": 1.4025, "step": 21641 }, { "epoch": 0.28122791942733044, "grad_norm": 0.4437076449394226, "learning_rate": 0.0001437840367044022, "loss": 1.508, "step": 21642 }, { "epoch": 0.28124091397124634, "grad_norm": 0.42971280217170715, "learning_rate": 0.0001437814372424908, "loss": 1.477, "step": 21643 }, { "epoch": 0.2812539085151622, "grad_norm": 0.4508056342601776, "learning_rate": 0.00014377883778057943, "loss": 1.5793, "step": 21644 }, { "epoch": 0.2812669030590781, "grad_norm": 0.3884173631668091, "learning_rate": 0.00014377623831866805, "loss": 1.5439, "step": 21645 }, { "epoch": 0.28127989760299393, "grad_norm": 0.4531201422214508, "learning_rate": 0.00014377363885675665, "loss": 1.4845, "step": 21646 }, { "epoch": 0.28129289214690983, "grad_norm": 0.42577460408210754, "learning_rate": 0.00014377103939484527, "loss": 1.3472, "step": 21647 }, { "epoch": 0.2813058866908257, "grad_norm": 0.4146845042705536, "learning_rate": 0.0001437684399329339, "loss": 1.4833, "step": 21648 }, { "epoch": 0.2813188812347416, "grad_norm": 0.37905383110046387, "learning_rate": 0.0001437658404710225, "loss": 1.4207, "step": 21649 }, { "epoch": 0.2813318757786574, "grad_norm": 0.46708089113235474, "learning_rate": 0.00014376324100911112, "loss": 1.3243, "step": 21650 }, { "epoch": 0.2813448703225733, "grad_norm": 0.5826940536499023, "learning_rate": 0.00014376064154719972, "loss": 1.4155, "step": 21651 }, { "epoch": 0.28135786486648917, "grad_norm": 0.4636281430721283, "learning_rate": 0.00014375804208528837, "loss": 1.4661, "step": 21652 }, { "epoch": 0.28137085941040507, "grad_norm": 0.38280513882637024, "learning_rate": 0.00014375544262337696, "loss": 1.4645, "step": 21653 }, { "epoch": 0.2813838539543209, "grad_norm": 0.41344279050827026, "learning_rate": 0.0001437528431614656, "loss": 1.4461, "step": 21654 }, { "epoch": 0.2813968484982368, "grad_norm": 0.46122950315475464, "learning_rate": 0.00014375024369955421, "loss": 1.4312, "step": 21655 }, { "epoch": 0.28140984304215266, "grad_norm": 0.39554929733276367, "learning_rate": 0.0001437476442376428, "loss": 1.4736, "step": 21656 }, { "epoch": 0.28142283758606856, "grad_norm": 0.4467918872833252, "learning_rate": 0.00014374504477573144, "loss": 1.5203, "step": 21657 }, { "epoch": 0.2814358321299844, "grad_norm": 0.32374107837677, "learning_rate": 0.00014374244531382003, "loss": 1.3642, "step": 21658 }, { "epoch": 0.2814488266739003, "grad_norm": 0.4517781734466553, "learning_rate": 0.00014373984585190868, "loss": 1.413, "step": 21659 }, { "epoch": 0.28146182121781615, "grad_norm": 0.5237303376197815, "learning_rate": 0.00014373724638999728, "loss": 1.4423, "step": 21660 }, { "epoch": 0.28147481576173206, "grad_norm": 0.3754831850528717, "learning_rate": 0.00014373464692808588, "loss": 1.3623, "step": 21661 }, { "epoch": 0.2814878103056479, "grad_norm": 0.42003101110458374, "learning_rate": 0.0001437320474661745, "loss": 1.4949, "step": 21662 }, { "epoch": 0.2815008048495638, "grad_norm": 0.4061402976512909, "learning_rate": 0.00014372944800426313, "loss": 1.5131, "step": 21663 }, { "epoch": 0.28151379939347965, "grad_norm": 0.4422401487827301, "learning_rate": 0.00014372684854235175, "loss": 1.4298, "step": 21664 }, { "epoch": 0.28152679393739555, "grad_norm": 0.45768260955810547, "learning_rate": 0.00014372424908044035, "loss": 1.3497, "step": 21665 }, { "epoch": 0.2815397884813114, "grad_norm": 0.40046998858451843, "learning_rate": 0.00014372164961852897, "loss": 1.2383, "step": 21666 }, { "epoch": 0.2815527830252273, "grad_norm": 0.4369175434112549, "learning_rate": 0.0001437190501566176, "loss": 1.5959, "step": 21667 }, { "epoch": 0.28156577756914314, "grad_norm": 0.3852870762348175, "learning_rate": 0.0001437164506947062, "loss": 1.3339, "step": 21668 }, { "epoch": 0.28157877211305904, "grad_norm": 0.3945727050304413, "learning_rate": 0.00014371385123279482, "loss": 1.219, "step": 21669 }, { "epoch": 0.2815917666569749, "grad_norm": 0.4296533763408661, "learning_rate": 0.00014371125177088342, "loss": 1.4618, "step": 21670 }, { "epoch": 0.2816047612008908, "grad_norm": 0.35974952578544617, "learning_rate": 0.00014370865230897207, "loss": 1.4644, "step": 21671 }, { "epoch": 0.28161775574480663, "grad_norm": 0.46538421511650085, "learning_rate": 0.00014370605284706067, "loss": 1.3791, "step": 21672 }, { "epoch": 0.28163075028872253, "grad_norm": 0.33413568139076233, "learning_rate": 0.0001437034533851493, "loss": 1.4626, "step": 21673 }, { "epoch": 0.2816437448326384, "grad_norm": 0.4075777232646942, "learning_rate": 0.0001437008539232379, "loss": 1.3576, "step": 21674 }, { "epoch": 0.2816567393765543, "grad_norm": 0.35055723786354065, "learning_rate": 0.0001436982544613265, "loss": 1.3586, "step": 21675 }, { "epoch": 0.2816697339204701, "grad_norm": 0.5690978169441223, "learning_rate": 0.00014369565499941514, "loss": 1.3709, "step": 21676 }, { "epoch": 0.281682728464386, "grad_norm": 0.3441561460494995, "learning_rate": 0.00014369305553750374, "loss": 1.2507, "step": 21677 }, { "epoch": 0.28169572300830187, "grad_norm": 0.36188071966171265, "learning_rate": 0.00014369045607559236, "loss": 1.4651, "step": 21678 }, { "epoch": 0.28170871755221777, "grad_norm": 0.47652989625930786, "learning_rate": 0.00014368785661368098, "loss": 1.4811, "step": 21679 }, { "epoch": 0.2817217120961336, "grad_norm": 0.48744603991508484, "learning_rate": 0.00014368525715176958, "loss": 1.4591, "step": 21680 }, { "epoch": 0.2817347066400495, "grad_norm": 0.4956151247024536, "learning_rate": 0.0001436826576898582, "loss": 1.4191, "step": 21681 }, { "epoch": 0.28174770118396536, "grad_norm": 0.41328027844429016, "learning_rate": 0.0001436800582279468, "loss": 1.3678, "step": 21682 }, { "epoch": 0.28176069572788126, "grad_norm": 0.34563249349594116, "learning_rate": 0.00014367745876603546, "loss": 1.3641, "step": 21683 }, { "epoch": 0.2817736902717971, "grad_norm": 0.3507574200630188, "learning_rate": 0.00014367485930412405, "loss": 1.2525, "step": 21684 }, { "epoch": 0.281786684815713, "grad_norm": 0.4258630573749542, "learning_rate": 0.00014367225984221268, "loss": 1.4797, "step": 21685 }, { "epoch": 0.28179967935962885, "grad_norm": 0.44550222158432007, "learning_rate": 0.00014366966038030127, "loss": 1.4483, "step": 21686 }, { "epoch": 0.28181267390354475, "grad_norm": 0.38555362820625305, "learning_rate": 0.0001436670609183899, "loss": 1.3341, "step": 21687 }, { "epoch": 0.2818256684474606, "grad_norm": 0.3425605297088623, "learning_rate": 0.00014366446145647852, "loss": 1.3425, "step": 21688 }, { "epoch": 0.2818386629913765, "grad_norm": 0.4604874551296234, "learning_rate": 0.00014366186199456712, "loss": 1.4784, "step": 21689 }, { "epoch": 0.28185165753529234, "grad_norm": 0.3135378062725067, "learning_rate": 0.00014365926253265575, "loss": 1.2873, "step": 21690 }, { "epoch": 0.28186465207920824, "grad_norm": 0.46984222531318665, "learning_rate": 0.00014365666307074437, "loss": 1.3769, "step": 21691 }, { "epoch": 0.2818776466231241, "grad_norm": 0.32434436678886414, "learning_rate": 0.00014365406360883297, "loss": 1.3276, "step": 21692 }, { "epoch": 0.28189064116704, "grad_norm": 0.30501803755760193, "learning_rate": 0.0001436514641469216, "loss": 1.4448, "step": 21693 }, { "epoch": 0.28190363571095584, "grad_norm": 0.3443998098373413, "learning_rate": 0.00014364886468501022, "loss": 1.5251, "step": 21694 }, { "epoch": 0.28191663025487174, "grad_norm": 0.4193110466003418, "learning_rate": 0.00014364626522309884, "loss": 1.5764, "step": 21695 }, { "epoch": 0.2819296247987876, "grad_norm": 0.38531726598739624, "learning_rate": 0.00014364366576118744, "loss": 1.2305, "step": 21696 }, { "epoch": 0.2819426193427035, "grad_norm": 0.4568133056163788, "learning_rate": 0.00014364106629927606, "loss": 1.368, "step": 21697 }, { "epoch": 0.2819556138866194, "grad_norm": 0.447349488735199, "learning_rate": 0.0001436384668373647, "loss": 1.3013, "step": 21698 }, { "epoch": 0.28196860843053523, "grad_norm": 0.4292466342449188, "learning_rate": 0.00014363586737545328, "loss": 1.527, "step": 21699 }, { "epoch": 0.28198160297445113, "grad_norm": 0.29827407002449036, "learning_rate": 0.0001436332679135419, "loss": 1.0936, "step": 21700 }, { "epoch": 0.281994597518367, "grad_norm": 0.5215676426887512, "learning_rate": 0.0001436306684516305, "loss": 1.463, "step": 21701 }, { "epoch": 0.2820075920622829, "grad_norm": 0.4000895321369171, "learning_rate": 0.00014362806898971916, "loss": 1.4644, "step": 21702 }, { "epoch": 0.2820205866061987, "grad_norm": 0.48442643880844116, "learning_rate": 0.00014362546952780776, "loss": 1.5668, "step": 21703 }, { "epoch": 0.2820335811501146, "grad_norm": 0.3367200791835785, "learning_rate": 0.00014362287006589635, "loss": 1.405, "step": 21704 }, { "epoch": 0.28204657569403047, "grad_norm": 0.30173593759536743, "learning_rate": 0.00014362027060398498, "loss": 1.3217, "step": 21705 }, { "epoch": 0.28205957023794637, "grad_norm": 0.4231071174144745, "learning_rate": 0.0001436176711420736, "loss": 1.3678, "step": 21706 }, { "epoch": 0.2820725647818622, "grad_norm": 0.40426525473594666, "learning_rate": 0.00014361507168016223, "loss": 1.4909, "step": 21707 }, { "epoch": 0.2820855593257781, "grad_norm": 0.3810115158557892, "learning_rate": 0.00014361247221825082, "loss": 1.2473, "step": 21708 }, { "epoch": 0.28209855386969396, "grad_norm": 0.42240890860557556, "learning_rate": 0.00014360987275633945, "loss": 1.3171, "step": 21709 }, { "epoch": 0.28211154841360986, "grad_norm": 0.31563258171081543, "learning_rate": 0.00014360727329442807, "loss": 1.3224, "step": 21710 }, { "epoch": 0.2821245429575257, "grad_norm": 0.4698585867881775, "learning_rate": 0.00014360467383251667, "loss": 1.4441, "step": 21711 }, { "epoch": 0.2821375375014416, "grad_norm": 0.3946951627731323, "learning_rate": 0.0001436020743706053, "loss": 1.4574, "step": 21712 }, { "epoch": 0.28215053204535745, "grad_norm": 0.3217959702014923, "learning_rate": 0.0001435994749086939, "loss": 1.5407, "step": 21713 }, { "epoch": 0.28216352658927335, "grad_norm": 0.45142993330955505, "learning_rate": 0.00014359687544678254, "loss": 1.3828, "step": 21714 }, { "epoch": 0.2821765211331892, "grad_norm": 0.4140826165676117, "learning_rate": 0.00014359427598487114, "loss": 1.3128, "step": 21715 }, { "epoch": 0.2821895156771051, "grad_norm": 0.23171290755271912, "learning_rate": 0.00014359167652295974, "loss": 1.299, "step": 21716 }, { "epoch": 0.28220251022102094, "grad_norm": 0.46328428387641907, "learning_rate": 0.00014358907706104836, "loss": 1.544, "step": 21717 }, { "epoch": 0.28221550476493684, "grad_norm": 0.3866714537143707, "learning_rate": 0.000143586477599137, "loss": 1.3213, "step": 21718 }, { "epoch": 0.2822284993088527, "grad_norm": 0.5351392030715942, "learning_rate": 0.0001435838781372256, "loss": 1.5189, "step": 21719 }, { "epoch": 0.2822414938527686, "grad_norm": 0.39193418622016907, "learning_rate": 0.0001435812786753142, "loss": 1.4766, "step": 21720 }, { "epoch": 0.28225448839668443, "grad_norm": 0.4878564178943634, "learning_rate": 0.00014357867921340283, "loss": 1.4514, "step": 21721 }, { "epoch": 0.28226748294060033, "grad_norm": 0.5170662999153137, "learning_rate": 0.00014357607975149146, "loss": 1.5403, "step": 21722 }, { "epoch": 0.2822804774845162, "grad_norm": 0.6031299829483032, "learning_rate": 0.00014357348028958006, "loss": 1.3689, "step": 21723 }, { "epoch": 0.2822934720284321, "grad_norm": 0.32543259859085083, "learning_rate": 0.00014357088082766868, "loss": 1.4631, "step": 21724 }, { "epoch": 0.2823064665723479, "grad_norm": 0.4092324674129486, "learning_rate": 0.00014356828136575728, "loss": 1.5122, "step": 21725 }, { "epoch": 0.2823194611162638, "grad_norm": 0.47104087471961975, "learning_rate": 0.00014356568190384593, "loss": 1.4428, "step": 21726 }, { "epoch": 0.28233245566017967, "grad_norm": 0.431558221578598, "learning_rate": 0.00014356308244193453, "loss": 1.5317, "step": 21727 }, { "epoch": 0.28234545020409557, "grad_norm": 0.45123809576034546, "learning_rate": 0.00014356048298002312, "loss": 1.4795, "step": 21728 }, { "epoch": 0.2823584447480114, "grad_norm": 0.3939613997936249, "learning_rate": 0.00014355788351811177, "loss": 1.3566, "step": 21729 }, { "epoch": 0.2823714392919273, "grad_norm": 0.3815322816371918, "learning_rate": 0.00014355528405620037, "loss": 1.7065, "step": 21730 }, { "epoch": 0.28238443383584316, "grad_norm": 0.44508302211761475, "learning_rate": 0.000143552684594289, "loss": 1.4391, "step": 21731 }, { "epoch": 0.28239742837975906, "grad_norm": 0.369547039270401, "learning_rate": 0.0001435500851323776, "loss": 1.6936, "step": 21732 }, { "epoch": 0.2824104229236749, "grad_norm": 0.4604247212409973, "learning_rate": 0.00014354748567046622, "loss": 1.4554, "step": 21733 }, { "epoch": 0.2824234174675908, "grad_norm": 0.41318246722221375, "learning_rate": 0.00014354488620855484, "loss": 1.4091, "step": 21734 }, { "epoch": 0.28243641201150665, "grad_norm": 0.4227082133293152, "learning_rate": 0.00014354228674664344, "loss": 1.4725, "step": 21735 }, { "epoch": 0.28244940655542256, "grad_norm": 0.4632466435432434, "learning_rate": 0.00014353968728473206, "loss": 1.3923, "step": 21736 }, { "epoch": 0.2824624010993384, "grad_norm": 0.33906590938568115, "learning_rate": 0.0001435370878228207, "loss": 1.375, "step": 21737 }, { "epoch": 0.2824753956432543, "grad_norm": 0.3812054395675659, "learning_rate": 0.00014353448836090931, "loss": 1.5133, "step": 21738 }, { "epoch": 0.28248839018717015, "grad_norm": 0.4794810116291046, "learning_rate": 0.0001435318888989979, "loss": 1.3677, "step": 21739 }, { "epoch": 0.28250138473108605, "grad_norm": 0.369107186794281, "learning_rate": 0.00014352928943708654, "loss": 1.332, "step": 21740 }, { "epoch": 0.2825143792750019, "grad_norm": 0.5090543031692505, "learning_rate": 0.00014352668997517516, "loss": 1.3189, "step": 21741 }, { "epoch": 0.2825273738189178, "grad_norm": 0.3605302572250366, "learning_rate": 0.00014352409051326376, "loss": 1.4531, "step": 21742 }, { "epoch": 0.28254036836283364, "grad_norm": 0.48940834403038025, "learning_rate": 0.00014352149105135238, "loss": 1.4939, "step": 21743 }, { "epoch": 0.28255336290674954, "grad_norm": 0.3710671067237854, "learning_rate": 0.00014351889158944098, "loss": 1.5996, "step": 21744 }, { "epoch": 0.2825663574506654, "grad_norm": 0.3644084334373474, "learning_rate": 0.0001435162921275296, "loss": 1.4049, "step": 21745 }, { "epoch": 0.2825793519945813, "grad_norm": 0.38626033067703247, "learning_rate": 0.00014351369266561823, "loss": 1.2626, "step": 21746 }, { "epoch": 0.28259234653849713, "grad_norm": 0.42499861121177673, "learning_rate": 0.00014351109320370683, "loss": 1.4287, "step": 21747 }, { "epoch": 0.28260534108241303, "grad_norm": 0.4534740149974823, "learning_rate": 0.00014350849374179545, "loss": 1.3844, "step": 21748 }, { "epoch": 0.2826183356263289, "grad_norm": 0.46116897463798523, "learning_rate": 0.00014350589427988407, "loss": 1.5236, "step": 21749 }, { "epoch": 0.2826313301702448, "grad_norm": 0.4565775990486145, "learning_rate": 0.0001435032948179727, "loss": 1.4928, "step": 21750 }, { "epoch": 0.2826443247141606, "grad_norm": 0.42477887868881226, "learning_rate": 0.0001435006953560613, "loss": 1.548, "step": 21751 }, { "epoch": 0.2826573192580765, "grad_norm": 0.4219886362552643, "learning_rate": 0.00014349809589414992, "loss": 1.4348, "step": 21752 }, { "epoch": 0.28267031380199237, "grad_norm": 0.39015692472457886, "learning_rate": 0.00014349549643223855, "loss": 1.4617, "step": 21753 }, { "epoch": 0.28268330834590827, "grad_norm": 0.3318279981613159, "learning_rate": 0.00014349289697032714, "loss": 1.2249, "step": 21754 }, { "epoch": 0.2826963028898241, "grad_norm": 0.32213252782821655, "learning_rate": 0.00014349029750841577, "loss": 1.4706, "step": 21755 }, { "epoch": 0.28270929743374, "grad_norm": 0.467014878988266, "learning_rate": 0.00014348769804650436, "loss": 1.4635, "step": 21756 }, { "epoch": 0.28272229197765586, "grad_norm": 0.4662379324436188, "learning_rate": 0.000143485098584593, "loss": 1.3668, "step": 21757 }, { "epoch": 0.28273528652157176, "grad_norm": 0.4565616250038147, "learning_rate": 0.00014348249912268161, "loss": 1.5082, "step": 21758 }, { "epoch": 0.2827482810654876, "grad_norm": 0.3721349537372589, "learning_rate": 0.0001434798996607702, "loss": 1.4445, "step": 21759 }, { "epoch": 0.2827612756094035, "grad_norm": 0.42279231548309326, "learning_rate": 0.00014347730019885884, "loss": 1.5313, "step": 21760 }, { "epoch": 0.28277427015331935, "grad_norm": 0.37611299753189087, "learning_rate": 0.00014347470073694746, "loss": 1.3632, "step": 21761 }, { "epoch": 0.28278726469723525, "grad_norm": 0.40103912353515625, "learning_rate": 0.00014347210127503608, "loss": 1.33, "step": 21762 }, { "epoch": 0.2828002592411511, "grad_norm": 0.4093247056007385, "learning_rate": 0.00014346950181312468, "loss": 1.4338, "step": 21763 }, { "epoch": 0.282813253785067, "grad_norm": 0.5236377120018005, "learning_rate": 0.0001434669023512133, "loss": 1.2763, "step": 21764 }, { "epoch": 0.28282624832898284, "grad_norm": 0.3952511250972748, "learning_rate": 0.00014346430288930193, "loss": 1.4751, "step": 21765 }, { "epoch": 0.28283924287289874, "grad_norm": 0.43484461307525635, "learning_rate": 0.00014346170342739053, "loss": 1.3466, "step": 21766 }, { "epoch": 0.2828522374168146, "grad_norm": 0.4340057969093323, "learning_rate": 0.00014345910396547915, "loss": 1.5659, "step": 21767 }, { "epoch": 0.2828652319607305, "grad_norm": 0.39850088953971863, "learning_rate": 0.00014345650450356778, "loss": 1.4022, "step": 21768 }, { "epoch": 0.28287822650464634, "grad_norm": 0.41238662600517273, "learning_rate": 0.0001434539050416564, "loss": 1.416, "step": 21769 }, { "epoch": 0.28289122104856224, "grad_norm": 0.37408211827278137, "learning_rate": 0.000143451305579745, "loss": 1.1999, "step": 21770 }, { "epoch": 0.2829042155924781, "grad_norm": 0.45001137256622314, "learning_rate": 0.0001434487061178336, "loss": 1.4894, "step": 21771 }, { "epoch": 0.282917210136394, "grad_norm": 0.342305988073349, "learning_rate": 0.00014344610665592225, "loss": 1.4074, "step": 21772 }, { "epoch": 0.2829302046803099, "grad_norm": 0.4604296088218689, "learning_rate": 0.00014344350719401085, "loss": 1.4135, "step": 21773 }, { "epoch": 0.2829431992242257, "grad_norm": 0.30350837111473083, "learning_rate": 0.00014344090773209947, "loss": 1.3574, "step": 21774 }, { "epoch": 0.28295619376814163, "grad_norm": 0.37793588638305664, "learning_rate": 0.00014343830827018807, "loss": 1.4875, "step": 21775 }, { "epoch": 0.2829691883120575, "grad_norm": 0.3413355350494385, "learning_rate": 0.0001434357088082767, "loss": 1.3764, "step": 21776 }, { "epoch": 0.2829821828559734, "grad_norm": 0.4076234698295593, "learning_rate": 0.00014343310934636532, "loss": 1.36, "step": 21777 }, { "epoch": 0.2829951773998892, "grad_norm": 0.41776394844055176, "learning_rate": 0.00014343050988445391, "loss": 1.3279, "step": 21778 }, { "epoch": 0.2830081719438051, "grad_norm": 0.37759140133857727, "learning_rate": 0.00014342791042254254, "loss": 1.5197, "step": 21779 }, { "epoch": 0.28302116648772097, "grad_norm": 0.5261659026145935, "learning_rate": 0.00014342531096063116, "loss": 1.4205, "step": 21780 }, { "epoch": 0.28303416103163687, "grad_norm": 0.38115450739860535, "learning_rate": 0.0001434227114987198, "loss": 1.5498, "step": 21781 }, { "epoch": 0.2830471555755527, "grad_norm": 0.42801326513290405, "learning_rate": 0.00014342011203680838, "loss": 1.3893, "step": 21782 }, { "epoch": 0.2830601501194686, "grad_norm": 0.35237276554107666, "learning_rate": 0.00014341751257489698, "loss": 1.424, "step": 21783 }, { "epoch": 0.28307314466338446, "grad_norm": 0.4044218063354492, "learning_rate": 0.00014341491311298563, "loss": 1.6333, "step": 21784 }, { "epoch": 0.28308613920730036, "grad_norm": 0.3681308925151825, "learning_rate": 0.00014341231365107423, "loss": 1.3307, "step": 21785 }, { "epoch": 0.2830991337512162, "grad_norm": 0.37480831146240234, "learning_rate": 0.00014340971418916286, "loss": 1.3504, "step": 21786 }, { "epoch": 0.2831121282951321, "grad_norm": 0.37955859303474426, "learning_rate": 0.00014340711472725145, "loss": 1.4032, "step": 21787 }, { "epoch": 0.28312512283904795, "grad_norm": 0.445197731256485, "learning_rate": 0.00014340451526534008, "loss": 1.4659, "step": 21788 }, { "epoch": 0.28313811738296385, "grad_norm": 0.43626245856285095, "learning_rate": 0.0001434019158034287, "loss": 1.3216, "step": 21789 }, { "epoch": 0.2831511119268797, "grad_norm": 0.4422862231731415, "learning_rate": 0.0001433993163415173, "loss": 1.3629, "step": 21790 }, { "epoch": 0.2831641064707956, "grad_norm": 0.3592076897621155, "learning_rate": 0.00014339671687960592, "loss": 1.2363, "step": 21791 }, { "epoch": 0.28317710101471144, "grad_norm": 0.37475383281707764, "learning_rate": 0.00014339411741769455, "loss": 1.2707, "step": 21792 }, { "epoch": 0.28319009555862734, "grad_norm": 0.35880836844444275, "learning_rate": 0.00014339151795578317, "loss": 1.2994, "step": 21793 }, { "epoch": 0.2832030901025432, "grad_norm": 0.3758575916290283, "learning_rate": 0.00014338891849387177, "loss": 1.502, "step": 21794 }, { "epoch": 0.2832160846464591, "grad_norm": 0.39884600043296814, "learning_rate": 0.0001433863190319604, "loss": 1.5207, "step": 21795 }, { "epoch": 0.28322907919037493, "grad_norm": 0.35125336050987244, "learning_rate": 0.00014338371957004902, "loss": 1.1604, "step": 21796 }, { "epoch": 0.28324207373429083, "grad_norm": 0.4417129158973694, "learning_rate": 0.00014338112010813762, "loss": 1.4518, "step": 21797 }, { "epoch": 0.2832550682782067, "grad_norm": 0.36367693543434143, "learning_rate": 0.00014337852064622624, "loss": 1.5242, "step": 21798 }, { "epoch": 0.2832680628221226, "grad_norm": 0.39889103174209595, "learning_rate": 0.00014337592118431484, "loss": 1.4116, "step": 21799 }, { "epoch": 0.2832810573660384, "grad_norm": 0.3439438045024872, "learning_rate": 0.00014337332172240346, "loss": 1.3042, "step": 21800 }, { "epoch": 0.2832940519099543, "grad_norm": 0.31915032863616943, "learning_rate": 0.0001433707222604921, "loss": 1.5826, "step": 21801 }, { "epoch": 0.28330704645387017, "grad_norm": 0.42528292536735535, "learning_rate": 0.00014336812279858068, "loss": 1.4076, "step": 21802 }, { "epoch": 0.28332004099778607, "grad_norm": 0.3702611029148102, "learning_rate": 0.00014336552333666934, "loss": 1.2547, "step": 21803 }, { "epoch": 0.2833330355417019, "grad_norm": 0.40039560198783875, "learning_rate": 0.00014336292387475793, "loss": 1.4127, "step": 21804 }, { "epoch": 0.2833460300856178, "grad_norm": 0.4360812306404114, "learning_rate": 0.00014336032441284656, "loss": 1.494, "step": 21805 }, { "epoch": 0.28335902462953366, "grad_norm": 0.3242342174053192, "learning_rate": 0.00014335772495093516, "loss": 1.2982, "step": 21806 }, { "epoch": 0.28337201917344956, "grad_norm": 0.4012332260608673, "learning_rate": 0.00014335512548902378, "loss": 1.4413, "step": 21807 }, { "epoch": 0.2833850137173654, "grad_norm": 0.4329386055469513, "learning_rate": 0.0001433525260271124, "loss": 1.5064, "step": 21808 }, { "epoch": 0.2833980082612813, "grad_norm": 0.3515535593032837, "learning_rate": 0.000143349926565201, "loss": 1.5046, "step": 21809 }, { "epoch": 0.28341100280519715, "grad_norm": 0.3048829436302185, "learning_rate": 0.00014334732710328963, "loss": 1.2416, "step": 21810 }, { "epoch": 0.28342399734911305, "grad_norm": 0.4261169135570526, "learning_rate": 0.00014334472764137825, "loss": 1.6335, "step": 21811 }, { "epoch": 0.2834369918930289, "grad_norm": 0.3847494125366211, "learning_rate": 0.00014334212817946685, "loss": 1.3573, "step": 21812 }, { "epoch": 0.2834499864369448, "grad_norm": 0.34324175119400024, "learning_rate": 0.00014333952871755547, "loss": 1.2693, "step": 21813 }, { "epoch": 0.28346298098086065, "grad_norm": 0.32652267813682556, "learning_rate": 0.00014333692925564407, "loss": 1.686, "step": 21814 }, { "epoch": 0.28347597552477655, "grad_norm": 0.4678068161010742, "learning_rate": 0.00014333432979373272, "loss": 1.328, "step": 21815 }, { "epoch": 0.2834889700686924, "grad_norm": 0.38122639060020447, "learning_rate": 0.00014333173033182132, "loss": 1.2032, "step": 21816 }, { "epoch": 0.2835019646126083, "grad_norm": 0.3848731517791748, "learning_rate": 0.00014332913086990994, "loss": 1.2498, "step": 21817 }, { "epoch": 0.28351495915652414, "grad_norm": 0.3523218035697937, "learning_rate": 0.00014332653140799854, "loss": 1.2221, "step": 21818 }, { "epoch": 0.28352795370044004, "grad_norm": 0.44534292817115784, "learning_rate": 0.00014332393194608717, "loss": 1.2493, "step": 21819 }, { "epoch": 0.2835409482443559, "grad_norm": 0.4481651484966278, "learning_rate": 0.0001433213324841758, "loss": 1.441, "step": 21820 }, { "epoch": 0.2835539427882718, "grad_norm": 0.39966532588005066, "learning_rate": 0.0001433187330222644, "loss": 1.3248, "step": 21821 }, { "epoch": 0.28356693733218763, "grad_norm": 0.43956258893013, "learning_rate": 0.000143316133560353, "loss": 1.4088, "step": 21822 }, { "epoch": 0.28357993187610353, "grad_norm": 0.406645804643631, "learning_rate": 0.00014331353409844164, "loss": 1.7489, "step": 21823 }, { "epoch": 0.2835929264200194, "grad_norm": 0.41724923253059387, "learning_rate": 0.00014331093463653026, "loss": 1.3259, "step": 21824 }, { "epoch": 0.2836059209639353, "grad_norm": 0.40026265382766724, "learning_rate": 0.00014330833517461886, "loss": 1.4538, "step": 21825 }, { "epoch": 0.2836189155078511, "grad_norm": 0.523308277130127, "learning_rate": 0.00014330573571270746, "loss": 1.4485, "step": 21826 }, { "epoch": 0.283631910051767, "grad_norm": 0.38762956857681274, "learning_rate": 0.0001433031362507961, "loss": 1.4196, "step": 21827 }, { "epoch": 0.28364490459568287, "grad_norm": 0.35311782360076904, "learning_rate": 0.0001433005367888847, "loss": 1.4274, "step": 21828 }, { "epoch": 0.28365789913959877, "grad_norm": 0.45122388005256653, "learning_rate": 0.00014329793732697333, "loss": 1.4438, "step": 21829 }, { "epoch": 0.2836708936835146, "grad_norm": 0.333778440952301, "learning_rate": 0.00014329533786506193, "loss": 1.2265, "step": 21830 }, { "epoch": 0.2836838882274305, "grad_norm": 0.435007244348526, "learning_rate": 0.00014329273840315055, "loss": 1.4427, "step": 21831 }, { "epoch": 0.28369688277134636, "grad_norm": 0.5377725958824158, "learning_rate": 0.00014329013894123918, "loss": 1.3354, "step": 21832 }, { "epoch": 0.28370987731526226, "grad_norm": 0.3487373888492584, "learning_rate": 0.00014328753947932777, "loss": 1.3189, "step": 21833 }, { "epoch": 0.2837228718591781, "grad_norm": 0.4342208206653595, "learning_rate": 0.0001432849400174164, "loss": 1.2371, "step": 21834 }, { "epoch": 0.283735866403094, "grad_norm": 0.395367294549942, "learning_rate": 0.00014328234055550502, "loss": 1.4169, "step": 21835 }, { "epoch": 0.28374886094700985, "grad_norm": 0.3446871042251587, "learning_rate": 0.00014327974109359365, "loss": 1.5585, "step": 21836 }, { "epoch": 0.28376185549092575, "grad_norm": 0.36191636323928833, "learning_rate": 0.00014327714163168224, "loss": 1.2578, "step": 21837 }, { "epoch": 0.2837748500348416, "grad_norm": 0.5307068824768066, "learning_rate": 0.00014327454216977084, "loss": 1.416, "step": 21838 }, { "epoch": 0.2837878445787575, "grad_norm": 0.36990657448768616, "learning_rate": 0.0001432719427078595, "loss": 1.5035, "step": 21839 }, { "epoch": 0.28380083912267334, "grad_norm": 0.3635912835597992, "learning_rate": 0.0001432693432459481, "loss": 1.476, "step": 21840 }, { "epoch": 0.28381383366658924, "grad_norm": 0.49250268936157227, "learning_rate": 0.00014326674378403671, "loss": 1.6581, "step": 21841 }, { "epoch": 0.2838268282105051, "grad_norm": 0.39382851123809814, "learning_rate": 0.00014326414432212534, "loss": 1.3253, "step": 21842 }, { "epoch": 0.283839822754421, "grad_norm": 0.5141470432281494, "learning_rate": 0.00014326154486021394, "loss": 1.2545, "step": 21843 }, { "epoch": 0.28385281729833683, "grad_norm": 0.3891243636608124, "learning_rate": 0.00014325894539830256, "loss": 1.351, "step": 21844 }, { "epoch": 0.28386581184225274, "grad_norm": 0.40513208508491516, "learning_rate": 0.00014325634593639116, "loss": 1.622, "step": 21845 }, { "epoch": 0.2838788063861686, "grad_norm": 0.4180625081062317, "learning_rate": 0.0001432537464744798, "loss": 1.6297, "step": 21846 }, { "epoch": 0.2838918009300845, "grad_norm": 0.40011677145957947, "learning_rate": 0.0001432511470125684, "loss": 1.3979, "step": 21847 }, { "epoch": 0.2839047954740003, "grad_norm": 0.40850383043289185, "learning_rate": 0.00014324854755065703, "loss": 1.5533, "step": 21848 }, { "epoch": 0.2839177900179162, "grad_norm": 0.3992455005645752, "learning_rate": 0.00014324594808874563, "loss": 1.3892, "step": 21849 }, { "epoch": 0.28393078456183213, "grad_norm": 0.33947646617889404, "learning_rate": 0.00014324334862683425, "loss": 1.2538, "step": 21850 }, { "epoch": 0.283943779105748, "grad_norm": 0.43455737829208374, "learning_rate": 0.00014324074916492288, "loss": 1.5294, "step": 21851 }, { "epoch": 0.2839567736496639, "grad_norm": 0.34516170620918274, "learning_rate": 0.00014323814970301148, "loss": 1.3957, "step": 21852 }, { "epoch": 0.2839697681935797, "grad_norm": 0.3848903477191925, "learning_rate": 0.0001432355502411001, "loss": 1.1959, "step": 21853 }, { "epoch": 0.2839827627374956, "grad_norm": 0.36252716183662415, "learning_rate": 0.00014323295077918872, "loss": 1.4499, "step": 21854 }, { "epoch": 0.28399575728141147, "grad_norm": 0.49076372385025024, "learning_rate": 0.00014323035131727732, "loss": 1.7547, "step": 21855 }, { "epoch": 0.28400875182532737, "grad_norm": 0.4048703610897064, "learning_rate": 0.00014322775185536595, "loss": 1.4122, "step": 21856 }, { "epoch": 0.2840217463692432, "grad_norm": 0.4614471197128296, "learning_rate": 0.00014322515239345454, "loss": 1.4806, "step": 21857 }, { "epoch": 0.2840347409131591, "grad_norm": 0.5605059862136841, "learning_rate": 0.0001432225529315432, "loss": 1.4019, "step": 21858 }, { "epoch": 0.28404773545707496, "grad_norm": 0.5168873071670532, "learning_rate": 0.0001432199534696318, "loss": 1.265, "step": 21859 }, { "epoch": 0.28406073000099086, "grad_norm": 0.494658499956131, "learning_rate": 0.00014321735400772042, "loss": 1.5678, "step": 21860 }, { "epoch": 0.2840737245449067, "grad_norm": 0.39822033047676086, "learning_rate": 0.00014321475454580901, "loss": 1.2623, "step": 21861 }, { "epoch": 0.2840867190888226, "grad_norm": 0.4155806601047516, "learning_rate": 0.00014321215508389764, "loss": 1.6313, "step": 21862 }, { "epoch": 0.28409971363273845, "grad_norm": 0.40179985761642456, "learning_rate": 0.00014320955562198626, "loss": 1.1571, "step": 21863 }, { "epoch": 0.28411270817665435, "grad_norm": 0.3448730707168579, "learning_rate": 0.00014320695616007486, "loss": 1.1556, "step": 21864 }, { "epoch": 0.2841257027205702, "grad_norm": 0.3519286811351776, "learning_rate": 0.00014320435669816349, "loss": 1.1557, "step": 21865 }, { "epoch": 0.2841386972644861, "grad_norm": 0.41627949476242065, "learning_rate": 0.0001432017572362521, "loss": 1.4976, "step": 21866 }, { "epoch": 0.28415169180840194, "grad_norm": 0.28488898277282715, "learning_rate": 0.0001431991577743407, "loss": 1.4623, "step": 21867 }, { "epoch": 0.28416468635231784, "grad_norm": 0.2909516394138336, "learning_rate": 0.00014319655831242933, "loss": 1.4012, "step": 21868 }, { "epoch": 0.2841776808962337, "grad_norm": 0.43699222803115845, "learning_rate": 0.00014319395885051793, "loss": 1.6935, "step": 21869 }, { "epoch": 0.2841906754401496, "grad_norm": 0.47480183839797974, "learning_rate": 0.00014319135938860658, "loss": 1.2782, "step": 21870 }, { "epoch": 0.28420366998406543, "grad_norm": 0.4402593672275543, "learning_rate": 0.00014318875992669518, "loss": 1.5391, "step": 21871 }, { "epoch": 0.28421666452798133, "grad_norm": 0.31280797719955444, "learning_rate": 0.0001431861604647838, "loss": 1.3815, "step": 21872 }, { "epoch": 0.2842296590718972, "grad_norm": 0.2559697926044464, "learning_rate": 0.0001431835610028724, "loss": 1.3309, "step": 21873 }, { "epoch": 0.2842426536158131, "grad_norm": 0.3198792040348053, "learning_rate": 0.00014318096154096102, "loss": 1.4866, "step": 21874 }, { "epoch": 0.2842556481597289, "grad_norm": 0.4694131314754486, "learning_rate": 0.00014317836207904965, "loss": 1.5446, "step": 21875 }, { "epoch": 0.2842686427036448, "grad_norm": 0.343665212392807, "learning_rate": 0.00014317576261713825, "loss": 1.5644, "step": 21876 }, { "epoch": 0.28428163724756067, "grad_norm": 0.3482874631881714, "learning_rate": 0.0001431731631552269, "loss": 1.4043, "step": 21877 }, { "epoch": 0.28429463179147657, "grad_norm": 0.25677087903022766, "learning_rate": 0.0001431705636933155, "loss": 1.4226, "step": 21878 }, { "epoch": 0.2843076263353924, "grad_norm": 0.40079620480537415, "learning_rate": 0.00014316796423140412, "loss": 1.384, "step": 21879 }, { "epoch": 0.2843206208793083, "grad_norm": 0.3208344578742981, "learning_rate": 0.00014316536476949272, "loss": 1.3486, "step": 21880 }, { "epoch": 0.28433361542322416, "grad_norm": 0.4193015694618225, "learning_rate": 0.00014316276530758134, "loss": 1.3767, "step": 21881 }, { "epoch": 0.28434660996714006, "grad_norm": 0.33976733684539795, "learning_rate": 0.00014316016584566997, "loss": 1.4011, "step": 21882 }, { "epoch": 0.2843596045110559, "grad_norm": 0.41534972190856934, "learning_rate": 0.00014315756638375856, "loss": 1.3568, "step": 21883 }, { "epoch": 0.2843725990549718, "grad_norm": 0.4091799557209015, "learning_rate": 0.0001431549669218472, "loss": 1.6021, "step": 21884 }, { "epoch": 0.28438559359888765, "grad_norm": 0.28058111667633057, "learning_rate": 0.0001431523674599358, "loss": 1.3497, "step": 21885 }, { "epoch": 0.28439858814280355, "grad_norm": 0.3813077211380005, "learning_rate": 0.0001431497679980244, "loss": 1.435, "step": 21886 }, { "epoch": 0.2844115826867194, "grad_norm": 0.40862488746643066, "learning_rate": 0.00014314716853611303, "loss": 1.6265, "step": 21887 }, { "epoch": 0.2844245772306353, "grad_norm": 0.39700421690940857, "learning_rate": 0.00014314456907420163, "loss": 1.3329, "step": 21888 }, { "epoch": 0.28443757177455115, "grad_norm": 0.39813247323036194, "learning_rate": 0.00014314196961229028, "loss": 1.41, "step": 21889 }, { "epoch": 0.28445056631846705, "grad_norm": 0.3587433099746704, "learning_rate": 0.00014313937015037888, "loss": 1.3962, "step": 21890 }, { "epoch": 0.2844635608623829, "grad_norm": 0.39923006296157837, "learning_rate": 0.0001431367706884675, "loss": 1.328, "step": 21891 }, { "epoch": 0.2844765554062988, "grad_norm": 0.3837178349494934, "learning_rate": 0.0001431341712265561, "loss": 1.322, "step": 21892 }, { "epoch": 0.28448954995021464, "grad_norm": 0.25855177640914917, "learning_rate": 0.00014313157176464473, "loss": 1.2647, "step": 21893 }, { "epoch": 0.28450254449413054, "grad_norm": 0.35650014877319336, "learning_rate": 0.00014312897230273335, "loss": 1.402, "step": 21894 }, { "epoch": 0.2845155390380464, "grad_norm": 0.4028565585613251, "learning_rate": 0.00014312637284082195, "loss": 1.4435, "step": 21895 }, { "epoch": 0.2845285335819623, "grad_norm": 0.3979090452194214, "learning_rate": 0.00014312377337891057, "loss": 1.566, "step": 21896 }, { "epoch": 0.28454152812587813, "grad_norm": 0.24705497920513153, "learning_rate": 0.0001431211739169992, "loss": 1.4079, "step": 21897 }, { "epoch": 0.28455452266979403, "grad_norm": 0.4081833064556122, "learning_rate": 0.0001431185744550878, "loss": 1.3421, "step": 21898 }, { "epoch": 0.2845675172137099, "grad_norm": 0.46858513355255127, "learning_rate": 0.00014311597499317642, "loss": 1.4073, "step": 21899 }, { "epoch": 0.2845805117576258, "grad_norm": 0.34121426939964294, "learning_rate": 0.00014311337553126502, "loss": 1.2371, "step": 21900 }, { "epoch": 0.2845935063015416, "grad_norm": 0.43017086386680603, "learning_rate": 0.00014311077606935367, "loss": 1.5585, "step": 21901 }, { "epoch": 0.2846065008454575, "grad_norm": 0.3084973692893982, "learning_rate": 0.00014310817660744227, "loss": 1.4033, "step": 21902 }, { "epoch": 0.28461949538937337, "grad_norm": 0.4116935133934021, "learning_rate": 0.0001431055771455309, "loss": 1.4335, "step": 21903 }, { "epoch": 0.28463248993328927, "grad_norm": 0.35372108221054077, "learning_rate": 0.0001431029776836195, "loss": 1.3184, "step": 21904 }, { "epoch": 0.2846454844772051, "grad_norm": 0.38982659578323364, "learning_rate": 0.0001431003782217081, "loss": 1.4081, "step": 21905 }, { "epoch": 0.284658479021121, "grad_norm": 0.4333013892173767, "learning_rate": 0.00014309777875979674, "loss": 1.4882, "step": 21906 }, { "epoch": 0.28467147356503686, "grad_norm": 0.4317880868911743, "learning_rate": 0.00014309517929788533, "loss": 1.4465, "step": 21907 }, { "epoch": 0.28468446810895276, "grad_norm": 0.3072398602962494, "learning_rate": 0.00014309257983597396, "loss": 1.4629, "step": 21908 }, { "epoch": 0.2846974626528686, "grad_norm": 0.3618871867656708, "learning_rate": 0.00014308998037406258, "loss": 1.4559, "step": 21909 }, { "epoch": 0.2847104571967845, "grad_norm": 0.4164143204689026, "learning_rate": 0.00014308738091215118, "loss": 1.3443, "step": 21910 }, { "epoch": 0.28472345174070035, "grad_norm": 0.45673471689224243, "learning_rate": 0.0001430847814502398, "loss": 1.4438, "step": 21911 }, { "epoch": 0.28473644628461625, "grad_norm": 0.33497557044029236, "learning_rate": 0.0001430821819883284, "loss": 1.3079, "step": 21912 }, { "epoch": 0.2847494408285321, "grad_norm": 0.42376720905303955, "learning_rate": 0.00014307958252641705, "loss": 1.4289, "step": 21913 }, { "epoch": 0.284762435372448, "grad_norm": 0.3171910345554352, "learning_rate": 0.00014307698306450565, "loss": 1.2059, "step": 21914 }, { "epoch": 0.28477542991636384, "grad_norm": 0.4601474106311798, "learning_rate": 0.00014307438360259428, "loss": 1.455, "step": 21915 }, { "epoch": 0.28478842446027974, "grad_norm": 0.3594374358654022, "learning_rate": 0.0001430717841406829, "loss": 1.2365, "step": 21916 }, { "epoch": 0.2848014190041956, "grad_norm": 0.4302177131175995, "learning_rate": 0.0001430691846787715, "loss": 1.4431, "step": 21917 }, { "epoch": 0.2848144135481115, "grad_norm": 0.3090600073337555, "learning_rate": 0.00014306658521686012, "loss": 1.2196, "step": 21918 }, { "epoch": 0.28482740809202733, "grad_norm": 0.36141103506088257, "learning_rate": 0.00014306398575494872, "loss": 1.4712, "step": 21919 }, { "epoch": 0.28484040263594324, "grad_norm": 0.5438603162765503, "learning_rate": 0.00014306138629303737, "loss": 1.4388, "step": 21920 }, { "epoch": 0.2848533971798591, "grad_norm": 0.3602411448955536, "learning_rate": 0.00014305878683112597, "loss": 1.5405, "step": 21921 }, { "epoch": 0.284866391723775, "grad_norm": 0.34778353571891785, "learning_rate": 0.00014305618736921457, "loss": 1.487, "step": 21922 }, { "epoch": 0.2848793862676908, "grad_norm": 0.492312490940094, "learning_rate": 0.0001430535879073032, "loss": 1.3588, "step": 21923 }, { "epoch": 0.2848923808116067, "grad_norm": 0.3657194674015045, "learning_rate": 0.00014305098844539181, "loss": 1.256, "step": 21924 }, { "epoch": 0.2849053753555226, "grad_norm": 0.4633488357067108, "learning_rate": 0.00014304838898348044, "loss": 1.4007, "step": 21925 }, { "epoch": 0.2849183698994385, "grad_norm": 0.4802319407463074, "learning_rate": 0.00014304578952156904, "loss": 1.4698, "step": 21926 }, { "epoch": 0.2849313644433544, "grad_norm": 0.36779287457466125, "learning_rate": 0.00014304319005965766, "loss": 1.2843, "step": 21927 }, { "epoch": 0.2849443589872702, "grad_norm": 0.38392651081085205, "learning_rate": 0.00014304059059774629, "loss": 1.2688, "step": 21928 }, { "epoch": 0.2849573535311861, "grad_norm": 0.2964380979537964, "learning_rate": 0.00014303799113583488, "loss": 1.1265, "step": 21929 }, { "epoch": 0.28497034807510196, "grad_norm": 0.28442779183387756, "learning_rate": 0.0001430353916739235, "loss": 1.3179, "step": 21930 }, { "epoch": 0.28498334261901787, "grad_norm": 0.3989967405796051, "learning_rate": 0.0001430327922120121, "loss": 1.2743, "step": 21931 }, { "epoch": 0.2849963371629337, "grad_norm": 0.46536925435066223, "learning_rate": 0.00014303019275010076, "loss": 1.8473, "step": 21932 }, { "epoch": 0.2850093317068496, "grad_norm": 0.43414443731307983, "learning_rate": 0.00014302759328818935, "loss": 1.4503, "step": 21933 }, { "epoch": 0.28502232625076546, "grad_norm": 0.35513585805892944, "learning_rate": 0.00014302499382627795, "loss": 1.2259, "step": 21934 }, { "epoch": 0.28503532079468136, "grad_norm": 0.4347069561481476, "learning_rate": 0.00014302239436436658, "loss": 1.4936, "step": 21935 }, { "epoch": 0.2850483153385972, "grad_norm": 0.4843854010105133, "learning_rate": 0.0001430197949024552, "loss": 1.3628, "step": 21936 }, { "epoch": 0.2850613098825131, "grad_norm": 0.4745556712150574, "learning_rate": 0.00014301719544054382, "loss": 1.5012, "step": 21937 }, { "epoch": 0.28507430442642895, "grad_norm": 0.4343568682670593, "learning_rate": 0.00014301459597863242, "loss": 1.4119, "step": 21938 }, { "epoch": 0.28508729897034485, "grad_norm": 0.42833149433135986, "learning_rate": 0.00014301199651672105, "loss": 1.473, "step": 21939 }, { "epoch": 0.2851002935142607, "grad_norm": 0.41668805480003357, "learning_rate": 0.00014300939705480967, "loss": 1.4469, "step": 21940 }, { "epoch": 0.2851132880581766, "grad_norm": 0.395904004573822, "learning_rate": 0.00014300679759289827, "loss": 1.4087, "step": 21941 }, { "epoch": 0.28512628260209244, "grad_norm": 0.3692234456539154, "learning_rate": 0.0001430041981309869, "loss": 1.2834, "step": 21942 }, { "epoch": 0.28513927714600834, "grad_norm": 0.4075605571269989, "learning_rate": 0.0001430015986690755, "loss": 1.5301, "step": 21943 }, { "epoch": 0.2851522716899242, "grad_norm": 0.3128395974636078, "learning_rate": 0.00014299899920716414, "loss": 1.339, "step": 21944 }, { "epoch": 0.2851652662338401, "grad_norm": 0.4022481441497803, "learning_rate": 0.00014299639974525274, "loss": 1.1985, "step": 21945 }, { "epoch": 0.28517826077775593, "grad_norm": 0.4021647274494171, "learning_rate": 0.00014299380028334136, "loss": 1.4608, "step": 21946 }, { "epoch": 0.28519125532167183, "grad_norm": 0.3396584689617157, "learning_rate": 0.00014299120082142996, "loss": 1.3398, "step": 21947 }, { "epoch": 0.2852042498655877, "grad_norm": 0.45777085423469543, "learning_rate": 0.00014298860135951859, "loss": 1.3457, "step": 21948 }, { "epoch": 0.2852172444095036, "grad_norm": 0.44055110216140747, "learning_rate": 0.0001429860018976072, "loss": 1.492, "step": 21949 }, { "epoch": 0.2852302389534194, "grad_norm": 0.4318700432777405, "learning_rate": 0.0001429834024356958, "loss": 1.3895, "step": 21950 }, { "epoch": 0.2852432334973353, "grad_norm": 0.3629011809825897, "learning_rate": 0.00014298080297378443, "loss": 1.4125, "step": 21951 }, { "epoch": 0.28525622804125117, "grad_norm": 0.31618040800094604, "learning_rate": 0.00014297820351187306, "loss": 1.4318, "step": 21952 }, { "epoch": 0.28526922258516707, "grad_norm": 0.41475939750671387, "learning_rate": 0.00014297560404996165, "loss": 1.5863, "step": 21953 }, { "epoch": 0.2852822171290829, "grad_norm": 0.44185543060302734, "learning_rate": 0.00014297300458805028, "loss": 1.2739, "step": 21954 }, { "epoch": 0.2852952116729988, "grad_norm": 0.4192999303340912, "learning_rate": 0.0001429704051261389, "loss": 1.4073, "step": 21955 }, { "epoch": 0.28530820621691466, "grad_norm": 0.3721044659614563, "learning_rate": 0.00014296780566422753, "loss": 1.3611, "step": 21956 }, { "epoch": 0.28532120076083056, "grad_norm": 0.38076090812683105, "learning_rate": 0.00014296520620231612, "loss": 1.3161, "step": 21957 }, { "epoch": 0.2853341953047464, "grad_norm": 0.3386537432670593, "learning_rate": 0.00014296260674040475, "loss": 1.341, "step": 21958 }, { "epoch": 0.2853471898486623, "grad_norm": 0.34403541684150696, "learning_rate": 0.00014296000727849337, "loss": 1.3572, "step": 21959 }, { "epoch": 0.28536018439257815, "grad_norm": 0.41266053915023804, "learning_rate": 0.00014295740781658197, "loss": 1.5182, "step": 21960 }, { "epoch": 0.28537317893649405, "grad_norm": 0.336268812417984, "learning_rate": 0.0001429548083546706, "loss": 1.3286, "step": 21961 }, { "epoch": 0.2853861734804099, "grad_norm": 0.4718099534511566, "learning_rate": 0.0001429522088927592, "loss": 1.3731, "step": 21962 }, { "epoch": 0.2853991680243258, "grad_norm": 0.31338202953338623, "learning_rate": 0.00014294960943084782, "loss": 1.3117, "step": 21963 }, { "epoch": 0.28541216256824165, "grad_norm": 0.34216970205307007, "learning_rate": 0.00014294700996893644, "loss": 1.2764, "step": 21964 }, { "epoch": 0.28542515711215755, "grad_norm": 0.4323378801345825, "learning_rate": 0.00014294441050702504, "loss": 1.4339, "step": 21965 }, { "epoch": 0.2854381516560734, "grad_norm": 0.3430556356906891, "learning_rate": 0.00014294181104511366, "loss": 1.369, "step": 21966 }, { "epoch": 0.2854511461999893, "grad_norm": 0.35220974683761597, "learning_rate": 0.0001429392115832023, "loss": 1.3689, "step": 21967 }, { "epoch": 0.28546414074390514, "grad_norm": 0.31723353266716003, "learning_rate": 0.0001429366121212909, "loss": 1.26, "step": 21968 }, { "epoch": 0.28547713528782104, "grad_norm": 0.41802477836608887, "learning_rate": 0.0001429340126593795, "loss": 1.6105, "step": 21969 }, { "epoch": 0.2854901298317369, "grad_norm": 0.39274874329566956, "learning_rate": 0.00014293141319746813, "loss": 1.5149, "step": 21970 }, { "epoch": 0.2855031243756528, "grad_norm": 0.34680861234664917, "learning_rate": 0.00014292881373555676, "loss": 1.2788, "step": 21971 }, { "epoch": 0.28551611891956863, "grad_norm": 0.33555078506469727, "learning_rate": 0.00014292621427364536, "loss": 1.2289, "step": 21972 }, { "epoch": 0.28552911346348453, "grad_norm": 0.4141026735305786, "learning_rate": 0.00014292361481173398, "loss": 1.1982, "step": 21973 }, { "epoch": 0.2855421080074004, "grad_norm": 0.3478696942329407, "learning_rate": 0.00014292101534982258, "loss": 1.6218, "step": 21974 }, { "epoch": 0.2855551025513163, "grad_norm": 0.40909144282341003, "learning_rate": 0.00014291841588791123, "loss": 1.4228, "step": 21975 }, { "epoch": 0.2855680970952321, "grad_norm": 0.404229998588562, "learning_rate": 0.00014291581642599983, "loss": 1.4067, "step": 21976 }, { "epoch": 0.285581091639148, "grad_norm": 0.37593090534210205, "learning_rate": 0.00014291321696408842, "loss": 1.4202, "step": 21977 }, { "epoch": 0.28559408618306387, "grad_norm": 0.3663129210472107, "learning_rate": 0.00014291061750217705, "loss": 1.6132, "step": 21978 }, { "epoch": 0.28560708072697977, "grad_norm": 0.30304816365242004, "learning_rate": 0.00014290801804026567, "loss": 1.2752, "step": 21979 }, { "epoch": 0.2856200752708956, "grad_norm": 0.34673941135406494, "learning_rate": 0.0001429054185783543, "loss": 1.3966, "step": 21980 }, { "epoch": 0.2856330698148115, "grad_norm": 0.3668404519557953, "learning_rate": 0.0001429028191164429, "loss": 1.4455, "step": 21981 }, { "epoch": 0.28564606435872736, "grad_norm": 0.44638675451278687, "learning_rate": 0.00014290021965453152, "loss": 1.2834, "step": 21982 }, { "epoch": 0.28565905890264326, "grad_norm": 0.3655369281768799, "learning_rate": 0.00014289762019262014, "loss": 1.238, "step": 21983 }, { "epoch": 0.2856720534465591, "grad_norm": 0.4036513566970825, "learning_rate": 0.00014289502073070874, "loss": 1.3708, "step": 21984 }, { "epoch": 0.285685047990475, "grad_norm": 0.408968061208725, "learning_rate": 0.00014289242126879737, "loss": 1.4938, "step": 21985 }, { "epoch": 0.28569804253439085, "grad_norm": 0.4376639127731323, "learning_rate": 0.00014288982180688596, "loss": 1.3572, "step": 21986 }, { "epoch": 0.28571103707830675, "grad_norm": 0.3937339186668396, "learning_rate": 0.00014288722234497461, "loss": 1.3805, "step": 21987 }, { "epoch": 0.2857240316222226, "grad_norm": 0.3632708191871643, "learning_rate": 0.0001428846228830632, "loss": 1.3606, "step": 21988 }, { "epoch": 0.2857370261661385, "grad_norm": 0.4313856363296509, "learning_rate": 0.0001428820234211518, "loss": 1.3387, "step": 21989 }, { "epoch": 0.28575002071005434, "grad_norm": 0.33378085494041443, "learning_rate": 0.00014287942395924046, "loss": 1.467, "step": 21990 }, { "epoch": 0.28576301525397024, "grad_norm": 0.41261959075927734, "learning_rate": 0.00014287682449732906, "loss": 1.4259, "step": 21991 }, { "epoch": 0.2857760097978861, "grad_norm": 0.4721905589103699, "learning_rate": 0.00014287422503541768, "loss": 1.3853, "step": 21992 }, { "epoch": 0.285789004341802, "grad_norm": 0.35229530930519104, "learning_rate": 0.00014287162557350628, "loss": 1.2444, "step": 21993 }, { "epoch": 0.28580199888571783, "grad_norm": 0.5010068416595459, "learning_rate": 0.0001428690261115949, "loss": 1.4539, "step": 21994 }, { "epoch": 0.28581499342963373, "grad_norm": 0.42573896050453186, "learning_rate": 0.00014286642664968353, "loss": 1.4227, "step": 21995 }, { "epoch": 0.2858279879735496, "grad_norm": 0.4087473452091217, "learning_rate": 0.00014286382718777213, "loss": 1.425, "step": 21996 }, { "epoch": 0.2858409825174655, "grad_norm": 0.3637833297252655, "learning_rate": 0.00014286122772586075, "loss": 1.2943, "step": 21997 }, { "epoch": 0.2858539770613813, "grad_norm": 0.489261656999588, "learning_rate": 0.00014285862826394938, "loss": 1.5085, "step": 21998 }, { "epoch": 0.2858669716052972, "grad_norm": 0.38434356451034546, "learning_rate": 0.000142856028802038, "loss": 1.4885, "step": 21999 }, { "epoch": 0.28587996614921307, "grad_norm": 0.4948313534259796, "learning_rate": 0.0001428534293401266, "loss": 1.6242, "step": 22000 }, { "epoch": 0.285892960693129, "grad_norm": 0.512351930141449, "learning_rate": 0.00014285082987821522, "loss": 1.5307, "step": 22001 }, { "epoch": 0.2859059552370449, "grad_norm": 0.3727751672267914, "learning_rate": 0.00014284823041630385, "loss": 1.5014, "step": 22002 }, { "epoch": 0.2859189497809607, "grad_norm": 0.3941037058830261, "learning_rate": 0.00014284563095439244, "loss": 1.4953, "step": 22003 }, { "epoch": 0.2859319443248766, "grad_norm": 0.4126706123352051, "learning_rate": 0.00014284303149248107, "loss": 1.3524, "step": 22004 }, { "epoch": 0.28594493886879246, "grad_norm": 0.38840505480766296, "learning_rate": 0.00014284043203056967, "loss": 1.3747, "step": 22005 }, { "epoch": 0.28595793341270836, "grad_norm": 0.4557211995124817, "learning_rate": 0.0001428378325686583, "loss": 1.3452, "step": 22006 }, { "epoch": 0.2859709279566242, "grad_norm": 0.41547250747680664, "learning_rate": 0.00014283523310674691, "loss": 1.3542, "step": 22007 }, { "epoch": 0.2859839225005401, "grad_norm": 0.37983959913253784, "learning_rate": 0.0001428326336448355, "loss": 1.2848, "step": 22008 }, { "epoch": 0.28599691704445596, "grad_norm": 0.43346232175827026, "learning_rate": 0.00014283003418292414, "loss": 1.3939, "step": 22009 }, { "epoch": 0.28600991158837186, "grad_norm": 0.5540909171104431, "learning_rate": 0.00014282743472101276, "loss": 1.4492, "step": 22010 }, { "epoch": 0.2860229061322877, "grad_norm": 0.41637828946113586, "learning_rate": 0.00014282483525910139, "loss": 1.6654, "step": 22011 }, { "epoch": 0.2860359006762036, "grad_norm": 0.4486018419265747, "learning_rate": 0.00014282223579718998, "loss": 1.5164, "step": 22012 }, { "epoch": 0.28604889522011945, "grad_norm": 0.35889574885368347, "learning_rate": 0.0001428196363352786, "loss": 1.3653, "step": 22013 }, { "epoch": 0.28606188976403535, "grad_norm": 0.31671467423439026, "learning_rate": 0.00014281703687336723, "loss": 1.3542, "step": 22014 }, { "epoch": 0.2860748843079512, "grad_norm": 0.4389597773551941, "learning_rate": 0.00014281443741145583, "loss": 1.5172, "step": 22015 }, { "epoch": 0.2860878788518671, "grad_norm": 0.3850475251674652, "learning_rate": 0.00014281183794954445, "loss": 1.6949, "step": 22016 }, { "epoch": 0.28610087339578294, "grad_norm": 0.3959624767303467, "learning_rate": 0.00014280923848763305, "loss": 1.5044, "step": 22017 }, { "epoch": 0.28611386793969884, "grad_norm": 0.4096791446208954, "learning_rate": 0.00014280663902572168, "loss": 1.4464, "step": 22018 }, { "epoch": 0.2861268624836147, "grad_norm": 0.42850667238235474, "learning_rate": 0.0001428040395638103, "loss": 1.3657, "step": 22019 }, { "epoch": 0.2861398570275306, "grad_norm": 0.46171823143959045, "learning_rate": 0.0001428014401018989, "loss": 1.3529, "step": 22020 }, { "epoch": 0.28615285157144643, "grad_norm": 0.26572272181510925, "learning_rate": 0.00014279884063998752, "loss": 1.1545, "step": 22021 }, { "epoch": 0.28616584611536233, "grad_norm": 0.36273351311683655, "learning_rate": 0.00014279624117807615, "loss": 1.3032, "step": 22022 }, { "epoch": 0.2861788406592782, "grad_norm": 0.4640752077102661, "learning_rate": 0.00014279364171616477, "loss": 1.4083, "step": 22023 }, { "epoch": 0.2861918352031941, "grad_norm": 0.4503646790981293, "learning_rate": 0.00014279104225425337, "loss": 1.5933, "step": 22024 }, { "epoch": 0.2862048297471099, "grad_norm": 0.3152863681316376, "learning_rate": 0.000142788442792342, "loss": 1.4376, "step": 22025 }, { "epoch": 0.2862178242910258, "grad_norm": 0.356189489364624, "learning_rate": 0.00014278584333043062, "loss": 1.5175, "step": 22026 }, { "epoch": 0.28623081883494167, "grad_norm": 0.4671732783317566, "learning_rate": 0.00014278324386851921, "loss": 1.5777, "step": 22027 }, { "epoch": 0.28624381337885757, "grad_norm": 0.36938387155532837, "learning_rate": 0.00014278064440660784, "loss": 1.3272, "step": 22028 }, { "epoch": 0.2862568079227734, "grad_norm": 0.44200819730758667, "learning_rate": 0.00014277804494469646, "loss": 1.4551, "step": 22029 }, { "epoch": 0.2862698024666893, "grad_norm": 0.4520149528980255, "learning_rate": 0.0001427754454827851, "loss": 1.4775, "step": 22030 }, { "epoch": 0.28628279701060516, "grad_norm": 0.3840465843677521, "learning_rate": 0.00014277284602087369, "loss": 1.3288, "step": 22031 }, { "epoch": 0.28629579155452106, "grad_norm": 0.4465623199939728, "learning_rate": 0.00014277024655896228, "loss": 1.4295, "step": 22032 }, { "epoch": 0.2863087860984369, "grad_norm": 0.33145877718925476, "learning_rate": 0.00014276764709705093, "loss": 1.5137, "step": 22033 }, { "epoch": 0.2863217806423528, "grad_norm": 0.38937899470329285, "learning_rate": 0.00014276504763513953, "loss": 1.4834, "step": 22034 }, { "epoch": 0.28633477518626865, "grad_norm": 0.328317254781723, "learning_rate": 0.00014276244817322816, "loss": 1.6545, "step": 22035 }, { "epoch": 0.28634776973018455, "grad_norm": 0.44835636019706726, "learning_rate": 0.00014275984871131675, "loss": 1.2589, "step": 22036 }, { "epoch": 0.2863607642741004, "grad_norm": 0.3727401793003082, "learning_rate": 0.00014275724924940538, "loss": 1.4606, "step": 22037 }, { "epoch": 0.2863737588180163, "grad_norm": 0.43079492449760437, "learning_rate": 0.000142754649787494, "loss": 1.6143, "step": 22038 }, { "epoch": 0.28638675336193214, "grad_norm": 0.35338684916496277, "learning_rate": 0.0001427520503255826, "loss": 1.2741, "step": 22039 }, { "epoch": 0.28639974790584805, "grad_norm": 0.33504289388656616, "learning_rate": 0.00014274945086367122, "loss": 1.1859, "step": 22040 }, { "epoch": 0.2864127424497639, "grad_norm": 0.41287708282470703, "learning_rate": 0.00014274685140175985, "loss": 1.3995, "step": 22041 }, { "epoch": 0.2864257369936798, "grad_norm": 0.3278936743736267, "learning_rate": 0.00014274425193984847, "loss": 1.365, "step": 22042 }, { "epoch": 0.28643873153759564, "grad_norm": 0.4115864634513855, "learning_rate": 0.00014274165247793707, "loss": 1.4876, "step": 22043 }, { "epoch": 0.28645172608151154, "grad_norm": 0.3723506033420563, "learning_rate": 0.00014273905301602567, "loss": 1.5142, "step": 22044 }, { "epoch": 0.2864647206254274, "grad_norm": 0.5202172994613647, "learning_rate": 0.00014273645355411432, "loss": 1.5319, "step": 22045 }, { "epoch": 0.2864777151693433, "grad_norm": 0.49196693301200867, "learning_rate": 0.00014273385409220292, "loss": 1.5103, "step": 22046 }, { "epoch": 0.28649070971325913, "grad_norm": 0.4838505983352661, "learning_rate": 0.00014273125463029154, "loss": 1.4901, "step": 22047 }, { "epoch": 0.28650370425717503, "grad_norm": 0.3634736239910126, "learning_rate": 0.00014272865516838014, "loss": 1.4783, "step": 22048 }, { "epoch": 0.2865166988010909, "grad_norm": 0.42378202080726624, "learning_rate": 0.00014272605570646876, "loss": 1.4223, "step": 22049 }, { "epoch": 0.2865296933450068, "grad_norm": 0.4459422528743744, "learning_rate": 0.0001427234562445574, "loss": 1.4177, "step": 22050 }, { "epoch": 0.2865426878889226, "grad_norm": 0.44379082322120667, "learning_rate": 0.00014272085678264599, "loss": 1.4822, "step": 22051 }, { "epoch": 0.2865556824328385, "grad_norm": 0.3417443037033081, "learning_rate": 0.0001427182573207346, "loss": 1.1639, "step": 22052 }, { "epoch": 0.28656867697675437, "grad_norm": 0.35039427876472473, "learning_rate": 0.00014271565785882323, "loss": 1.3411, "step": 22053 }, { "epoch": 0.28658167152067027, "grad_norm": 0.3391748368740082, "learning_rate": 0.00014271305839691186, "loss": 1.2356, "step": 22054 }, { "epoch": 0.2865946660645861, "grad_norm": 0.3190998435020447, "learning_rate": 0.00014271045893500046, "loss": 1.4417, "step": 22055 }, { "epoch": 0.286607660608502, "grad_norm": 0.40838944911956787, "learning_rate": 0.00014270785947308905, "loss": 1.2985, "step": 22056 }, { "epoch": 0.28662065515241786, "grad_norm": 0.3903173506259918, "learning_rate": 0.0001427052600111777, "loss": 1.1493, "step": 22057 }, { "epoch": 0.28663364969633376, "grad_norm": 0.3411411941051483, "learning_rate": 0.0001427026605492663, "loss": 1.2035, "step": 22058 }, { "epoch": 0.2866466442402496, "grad_norm": 0.40367719531059265, "learning_rate": 0.00014270006108735493, "loss": 1.3588, "step": 22059 }, { "epoch": 0.2866596387841655, "grad_norm": 0.4369624853134155, "learning_rate": 0.00014269746162544352, "loss": 1.4349, "step": 22060 }, { "epoch": 0.28667263332808135, "grad_norm": 0.40241745114326477, "learning_rate": 0.00014269486216353215, "loss": 1.3375, "step": 22061 }, { "epoch": 0.28668562787199725, "grad_norm": 0.3780624568462372, "learning_rate": 0.00014269226270162077, "loss": 1.4254, "step": 22062 }, { "epoch": 0.2866986224159131, "grad_norm": 0.4062449336051941, "learning_rate": 0.00014268966323970937, "loss": 1.4223, "step": 22063 }, { "epoch": 0.286711616959829, "grad_norm": 0.48995670676231384, "learning_rate": 0.00014268706377779802, "loss": 1.4169, "step": 22064 }, { "epoch": 0.28672461150374484, "grad_norm": 0.38684117794036865, "learning_rate": 0.00014268446431588662, "loss": 1.4493, "step": 22065 }, { "epoch": 0.28673760604766074, "grad_norm": 0.43404072523117065, "learning_rate": 0.00014268186485397524, "loss": 1.3545, "step": 22066 }, { "epoch": 0.2867506005915766, "grad_norm": 0.46732303500175476, "learning_rate": 0.00014267926539206384, "loss": 1.5281, "step": 22067 }, { "epoch": 0.2867635951354925, "grad_norm": 0.32384055852890015, "learning_rate": 0.00014267666593015247, "loss": 1.4781, "step": 22068 }, { "epoch": 0.28677658967940833, "grad_norm": 0.43550577759742737, "learning_rate": 0.0001426740664682411, "loss": 1.3708, "step": 22069 }, { "epoch": 0.28678958422332423, "grad_norm": 0.4591449201107025, "learning_rate": 0.0001426714670063297, "loss": 1.514, "step": 22070 }, { "epoch": 0.2868025787672401, "grad_norm": 0.43799638748168945, "learning_rate": 0.0001426688675444183, "loss": 1.4926, "step": 22071 }, { "epoch": 0.286815573311156, "grad_norm": 0.48175352811813354, "learning_rate": 0.00014266626808250694, "loss": 1.4686, "step": 22072 }, { "epoch": 0.2868285678550718, "grad_norm": 0.43021318316459656, "learning_rate": 0.00014266366862059553, "loss": 1.5257, "step": 22073 }, { "epoch": 0.2868415623989877, "grad_norm": 0.39312949776649475, "learning_rate": 0.00014266106915868416, "loss": 1.5201, "step": 22074 }, { "epoch": 0.28685455694290357, "grad_norm": 0.43819543719291687, "learning_rate": 0.00014265846969677276, "loss": 1.5464, "step": 22075 }, { "epoch": 0.28686755148681947, "grad_norm": 0.43548449873924255, "learning_rate": 0.0001426558702348614, "loss": 1.2925, "step": 22076 }, { "epoch": 0.2868805460307354, "grad_norm": 0.46215489506721497, "learning_rate": 0.00014265327077295, "loss": 1.5256, "step": 22077 }, { "epoch": 0.2868935405746512, "grad_norm": 0.35817450284957886, "learning_rate": 0.00014265067131103863, "loss": 1.5043, "step": 22078 }, { "epoch": 0.2869065351185671, "grad_norm": 0.23793365061283112, "learning_rate": 0.00014264807184912723, "loss": 1.4752, "step": 22079 }, { "epoch": 0.28691952966248296, "grad_norm": 0.42317914962768555, "learning_rate": 0.00014264547238721585, "loss": 1.4091, "step": 22080 }, { "epoch": 0.28693252420639886, "grad_norm": 0.45541349053382874, "learning_rate": 0.00014264287292530448, "loss": 1.516, "step": 22081 }, { "epoch": 0.2869455187503147, "grad_norm": 0.40810176730155945, "learning_rate": 0.00014264027346339307, "loss": 1.4239, "step": 22082 }, { "epoch": 0.2869585132942306, "grad_norm": 0.37985411286354065, "learning_rate": 0.0001426376740014817, "loss": 1.5568, "step": 22083 }, { "epoch": 0.28697150783814646, "grad_norm": 0.37615418434143066, "learning_rate": 0.00014263507453957032, "loss": 1.508, "step": 22084 }, { "epoch": 0.28698450238206236, "grad_norm": 0.4067232012748718, "learning_rate": 0.00014263247507765895, "loss": 1.4641, "step": 22085 }, { "epoch": 0.2869974969259782, "grad_norm": 0.3505003750324249, "learning_rate": 0.00014262987561574754, "loss": 1.3532, "step": 22086 }, { "epoch": 0.2870104914698941, "grad_norm": 0.41911816596984863, "learning_rate": 0.00014262727615383614, "loss": 1.5465, "step": 22087 }, { "epoch": 0.28702348601380995, "grad_norm": 0.4251181185245514, "learning_rate": 0.0001426246766919248, "loss": 1.4784, "step": 22088 }, { "epoch": 0.28703648055772585, "grad_norm": 0.3102303147315979, "learning_rate": 0.0001426220772300134, "loss": 1.3195, "step": 22089 }, { "epoch": 0.2870494751016417, "grad_norm": 0.5132701992988586, "learning_rate": 0.00014261947776810202, "loss": 1.6155, "step": 22090 }, { "epoch": 0.2870624696455576, "grad_norm": 0.45485636591911316, "learning_rate": 0.0001426168783061906, "loss": 1.2436, "step": 22091 }, { "epoch": 0.28707546418947344, "grad_norm": 0.3383045494556427, "learning_rate": 0.00014261427884427924, "loss": 1.3814, "step": 22092 }, { "epoch": 0.28708845873338934, "grad_norm": 0.4377923011779785, "learning_rate": 0.00014261167938236786, "loss": 1.5571, "step": 22093 }, { "epoch": 0.2871014532773052, "grad_norm": 0.30352792143821716, "learning_rate": 0.00014260907992045646, "loss": 1.2497, "step": 22094 }, { "epoch": 0.2871144478212211, "grad_norm": 0.3492357134819031, "learning_rate": 0.00014260648045854508, "loss": 1.3974, "step": 22095 }, { "epoch": 0.28712744236513693, "grad_norm": 0.29284554719924927, "learning_rate": 0.0001426038809966337, "loss": 1.3344, "step": 22096 }, { "epoch": 0.28714043690905283, "grad_norm": 0.5212596654891968, "learning_rate": 0.00014260128153472233, "loss": 1.295, "step": 22097 }, { "epoch": 0.2871534314529687, "grad_norm": 0.37402665615081787, "learning_rate": 0.00014259868207281093, "loss": 1.4609, "step": 22098 }, { "epoch": 0.2871664259968846, "grad_norm": 0.40469640493392944, "learning_rate": 0.00014259608261089953, "loss": 1.6954, "step": 22099 }, { "epoch": 0.2871794205408004, "grad_norm": 0.49761828780174255, "learning_rate": 0.00014259348314898818, "loss": 1.3617, "step": 22100 }, { "epoch": 0.2871924150847163, "grad_norm": 0.5624523758888245, "learning_rate": 0.00014259088368707678, "loss": 1.417, "step": 22101 }, { "epoch": 0.28720540962863217, "grad_norm": 0.37021806836128235, "learning_rate": 0.0001425882842251654, "loss": 1.4123, "step": 22102 }, { "epoch": 0.28721840417254807, "grad_norm": 0.45372435450553894, "learning_rate": 0.00014258568476325403, "loss": 1.4109, "step": 22103 }, { "epoch": 0.2872313987164639, "grad_norm": 0.47401976585388184, "learning_rate": 0.00014258308530134262, "loss": 1.5865, "step": 22104 }, { "epoch": 0.2872443932603798, "grad_norm": 0.45055750012397766, "learning_rate": 0.00014258048583943125, "loss": 1.4061, "step": 22105 }, { "epoch": 0.28725738780429566, "grad_norm": 0.4332391917705536, "learning_rate": 0.00014257788637751984, "loss": 1.4039, "step": 22106 }, { "epoch": 0.28727038234821156, "grad_norm": 0.36800718307495117, "learning_rate": 0.0001425752869156085, "loss": 1.5256, "step": 22107 }, { "epoch": 0.2872833768921274, "grad_norm": 0.43751880526542664, "learning_rate": 0.0001425726874536971, "loss": 1.6016, "step": 22108 }, { "epoch": 0.2872963714360433, "grad_norm": 0.3916257619857788, "learning_rate": 0.00014257008799178572, "loss": 1.3052, "step": 22109 }, { "epoch": 0.28730936597995915, "grad_norm": 0.4078250229358673, "learning_rate": 0.00014256748852987432, "loss": 1.2841, "step": 22110 }, { "epoch": 0.28732236052387505, "grad_norm": 0.4308535158634186, "learning_rate": 0.00014256488906796294, "loss": 1.403, "step": 22111 }, { "epoch": 0.2873353550677909, "grad_norm": 0.4177975058555603, "learning_rate": 0.00014256228960605156, "loss": 1.3683, "step": 22112 }, { "epoch": 0.2873483496117068, "grad_norm": 0.4185788333415985, "learning_rate": 0.00014255969014414016, "loss": 1.274, "step": 22113 }, { "epoch": 0.28736134415562264, "grad_norm": 0.43693187832832336, "learning_rate": 0.00014255709068222879, "loss": 1.4506, "step": 22114 }, { "epoch": 0.28737433869953855, "grad_norm": 0.33323588967323303, "learning_rate": 0.0001425544912203174, "loss": 1.517, "step": 22115 }, { "epoch": 0.2873873332434544, "grad_norm": 0.4163329005241394, "learning_rate": 0.000142551891758406, "loss": 1.2719, "step": 22116 }, { "epoch": 0.2874003277873703, "grad_norm": 0.4504295885562897, "learning_rate": 0.00014254929229649463, "loss": 1.4478, "step": 22117 }, { "epoch": 0.28741332233128614, "grad_norm": 0.38723647594451904, "learning_rate": 0.00014254669283458323, "loss": 1.2207, "step": 22118 }, { "epoch": 0.28742631687520204, "grad_norm": 0.4415624439716339, "learning_rate": 0.00014254409337267188, "loss": 1.496, "step": 22119 }, { "epoch": 0.2874393114191179, "grad_norm": 0.26319268345832825, "learning_rate": 0.00014254149391076048, "loss": 1.5161, "step": 22120 }, { "epoch": 0.2874523059630338, "grad_norm": 0.4632225036621094, "learning_rate": 0.0001425388944488491, "loss": 1.6013, "step": 22121 }, { "epoch": 0.28746530050694963, "grad_norm": 0.36476877331733704, "learning_rate": 0.0001425362949869377, "loss": 1.28, "step": 22122 }, { "epoch": 0.28747829505086553, "grad_norm": 0.3627232313156128, "learning_rate": 0.00014253369552502633, "loss": 1.4236, "step": 22123 }, { "epoch": 0.2874912895947814, "grad_norm": 0.40497633814811707, "learning_rate": 0.00014253109606311495, "loss": 1.4835, "step": 22124 }, { "epoch": 0.2875042841386973, "grad_norm": 0.38339635729789734, "learning_rate": 0.00014252849660120355, "loss": 1.4145, "step": 22125 }, { "epoch": 0.2875172786826131, "grad_norm": 0.3669421970844269, "learning_rate": 0.00014252589713929217, "loss": 1.4088, "step": 22126 }, { "epoch": 0.287530273226529, "grad_norm": 0.35283127427101135, "learning_rate": 0.0001425232976773808, "loss": 1.3145, "step": 22127 }, { "epoch": 0.28754326777044487, "grad_norm": 0.35127025842666626, "learning_rate": 0.0001425206982154694, "loss": 1.4328, "step": 22128 }, { "epoch": 0.28755626231436077, "grad_norm": 0.4407002329826355, "learning_rate": 0.00014251809875355802, "loss": 1.4376, "step": 22129 }, { "epoch": 0.2875692568582766, "grad_norm": 0.3614174723625183, "learning_rate": 0.00014251549929164662, "loss": 1.1961, "step": 22130 }, { "epoch": 0.2875822514021925, "grad_norm": 0.3617641031742096, "learning_rate": 0.00014251289982973527, "loss": 1.3747, "step": 22131 }, { "epoch": 0.28759524594610836, "grad_norm": 0.37700214982032776, "learning_rate": 0.00014251030036782386, "loss": 1.3639, "step": 22132 }, { "epoch": 0.28760824049002426, "grad_norm": 0.37006232142448425, "learning_rate": 0.0001425077009059125, "loss": 1.347, "step": 22133 }, { "epoch": 0.2876212350339401, "grad_norm": 0.43554365634918213, "learning_rate": 0.00014250510144400109, "loss": 1.4454, "step": 22134 }, { "epoch": 0.287634229577856, "grad_norm": 0.3919074237346649, "learning_rate": 0.0001425025019820897, "loss": 1.4605, "step": 22135 }, { "epoch": 0.28764722412177185, "grad_norm": 0.45605579018592834, "learning_rate": 0.00014249990252017833, "loss": 1.3907, "step": 22136 }, { "epoch": 0.28766021866568775, "grad_norm": 0.3348766565322876, "learning_rate": 0.00014249730305826693, "loss": 1.4485, "step": 22137 }, { "epoch": 0.2876732132096036, "grad_norm": 0.3504857122898102, "learning_rate": 0.00014249470359635558, "loss": 1.5131, "step": 22138 }, { "epoch": 0.2876862077535195, "grad_norm": 0.5046715140342712, "learning_rate": 0.00014249210413444418, "loss": 1.5235, "step": 22139 }, { "epoch": 0.28769920229743534, "grad_norm": 0.3486250340938568, "learning_rate": 0.00014248950467253278, "loss": 1.3206, "step": 22140 }, { "epoch": 0.28771219684135124, "grad_norm": 0.35158732533454895, "learning_rate": 0.0001424869052106214, "loss": 1.4862, "step": 22141 }, { "epoch": 0.2877251913852671, "grad_norm": 0.43166446685791016, "learning_rate": 0.00014248430574871003, "loss": 1.3788, "step": 22142 }, { "epoch": 0.287738185929183, "grad_norm": 0.3212583363056183, "learning_rate": 0.00014248170628679865, "loss": 1.4891, "step": 22143 }, { "epoch": 0.28775118047309883, "grad_norm": 0.4297787547111511, "learning_rate": 0.00014247910682488725, "loss": 1.4005, "step": 22144 }, { "epoch": 0.28776417501701473, "grad_norm": 0.4180818498134613, "learning_rate": 0.00014247650736297587, "loss": 1.6154, "step": 22145 }, { "epoch": 0.2877771695609306, "grad_norm": 0.473725825548172, "learning_rate": 0.0001424739079010645, "loss": 1.419, "step": 22146 }, { "epoch": 0.2877901641048465, "grad_norm": 0.42257606983184814, "learning_rate": 0.0001424713084391531, "loss": 1.4625, "step": 22147 }, { "epoch": 0.2878031586487623, "grad_norm": 0.3577720820903778, "learning_rate": 0.00014246870897724172, "loss": 1.5566, "step": 22148 }, { "epoch": 0.2878161531926782, "grad_norm": 0.3756045997142792, "learning_rate": 0.00014246610951533032, "loss": 1.4821, "step": 22149 }, { "epoch": 0.28782914773659407, "grad_norm": 0.42378586530685425, "learning_rate": 0.00014246351005341897, "loss": 1.5888, "step": 22150 }, { "epoch": 0.28784214228050997, "grad_norm": 0.39652448892593384, "learning_rate": 0.00014246091059150757, "loss": 1.5133, "step": 22151 }, { "epoch": 0.2878551368244258, "grad_norm": 0.41539499163627625, "learning_rate": 0.0001424583111295962, "loss": 1.2951, "step": 22152 }, { "epoch": 0.2878681313683417, "grad_norm": 0.43573832511901855, "learning_rate": 0.0001424557116676848, "loss": 1.4112, "step": 22153 }, { "epoch": 0.2878811259122576, "grad_norm": 0.46801653504371643, "learning_rate": 0.0001424531122057734, "loss": 1.446, "step": 22154 }, { "epoch": 0.28789412045617346, "grad_norm": 0.4033289849758148, "learning_rate": 0.00014245051274386204, "loss": 1.3774, "step": 22155 }, { "epoch": 0.28790711500008936, "grad_norm": 0.3874218165874481, "learning_rate": 0.00014244791328195063, "loss": 1.4161, "step": 22156 }, { "epoch": 0.2879201095440052, "grad_norm": 0.42272356152534485, "learning_rate": 0.00014244531382003926, "loss": 1.4871, "step": 22157 }, { "epoch": 0.2879331040879211, "grad_norm": 0.3884390592575073, "learning_rate": 0.00014244271435812788, "loss": 1.5558, "step": 22158 }, { "epoch": 0.28794609863183696, "grad_norm": 0.29191964864730835, "learning_rate": 0.00014244011489621648, "loss": 1.5111, "step": 22159 }, { "epoch": 0.28795909317575286, "grad_norm": 0.39024919271469116, "learning_rate": 0.0001424375154343051, "loss": 1.2363, "step": 22160 }, { "epoch": 0.2879720877196687, "grad_norm": 0.3433208465576172, "learning_rate": 0.0001424349159723937, "loss": 1.4404, "step": 22161 }, { "epoch": 0.2879850822635846, "grad_norm": 0.4786560535430908, "learning_rate": 0.00014243231651048235, "loss": 1.4269, "step": 22162 }, { "epoch": 0.28799807680750045, "grad_norm": 0.41161152720451355, "learning_rate": 0.00014242971704857095, "loss": 1.3291, "step": 22163 }, { "epoch": 0.28801107135141635, "grad_norm": 0.40924957394599915, "learning_rate": 0.00014242711758665958, "loss": 1.3783, "step": 22164 }, { "epoch": 0.2880240658953322, "grad_norm": 0.44676119089126587, "learning_rate": 0.00014242451812474817, "loss": 1.4976, "step": 22165 }, { "epoch": 0.2880370604392481, "grad_norm": 0.3842381536960602, "learning_rate": 0.0001424219186628368, "loss": 1.3076, "step": 22166 }, { "epoch": 0.28805005498316394, "grad_norm": 0.397760272026062, "learning_rate": 0.00014241931920092542, "loss": 1.3117, "step": 22167 }, { "epoch": 0.28806304952707984, "grad_norm": 0.46874600648880005, "learning_rate": 0.00014241671973901402, "loss": 1.5023, "step": 22168 }, { "epoch": 0.2880760440709957, "grad_norm": 0.3981539011001587, "learning_rate": 0.00014241412027710264, "loss": 1.4564, "step": 22169 }, { "epoch": 0.2880890386149116, "grad_norm": 0.3509082496166229, "learning_rate": 0.00014241152081519127, "loss": 1.4025, "step": 22170 }, { "epoch": 0.28810203315882743, "grad_norm": 0.3865593373775482, "learning_rate": 0.00014240892135327987, "loss": 1.3551, "step": 22171 }, { "epoch": 0.28811502770274333, "grad_norm": 0.7735856771469116, "learning_rate": 0.0001424063218913685, "loss": 1.337, "step": 22172 }, { "epoch": 0.2881280222466592, "grad_norm": 0.32929879426956177, "learning_rate": 0.0001424037224294571, "loss": 1.3845, "step": 22173 }, { "epoch": 0.2881410167905751, "grad_norm": 0.3991185128688812, "learning_rate": 0.00014240112296754574, "loss": 1.2685, "step": 22174 }, { "epoch": 0.2881540113344909, "grad_norm": 0.4119705557823181, "learning_rate": 0.00014239852350563434, "loss": 1.272, "step": 22175 }, { "epoch": 0.2881670058784068, "grad_norm": 0.2189141809940338, "learning_rate": 0.00014239592404372296, "loss": 1.2547, "step": 22176 }, { "epoch": 0.28818000042232267, "grad_norm": 0.38488346338272095, "learning_rate": 0.0001423933245818116, "loss": 1.3921, "step": 22177 }, { "epoch": 0.28819299496623857, "grad_norm": 0.41958320140838623, "learning_rate": 0.00014239072511990018, "loss": 1.5372, "step": 22178 }, { "epoch": 0.2882059895101544, "grad_norm": 0.4101869761943817, "learning_rate": 0.0001423881256579888, "loss": 1.36, "step": 22179 }, { "epoch": 0.2882189840540703, "grad_norm": 0.45164361596107483, "learning_rate": 0.0001423855261960774, "loss": 1.4036, "step": 22180 }, { "epoch": 0.28823197859798616, "grad_norm": 0.4409153461456299, "learning_rate": 0.00014238292673416606, "loss": 1.5524, "step": 22181 }, { "epoch": 0.28824497314190206, "grad_norm": 0.3638860881328583, "learning_rate": 0.00014238032727225465, "loss": 1.5387, "step": 22182 }, { "epoch": 0.2882579676858179, "grad_norm": 0.5224159359931946, "learning_rate": 0.00014237772781034325, "loss": 1.4571, "step": 22183 }, { "epoch": 0.2882709622297338, "grad_norm": 0.437107115983963, "learning_rate": 0.00014237512834843188, "loss": 1.5554, "step": 22184 }, { "epoch": 0.28828395677364965, "grad_norm": 0.4087488055229187, "learning_rate": 0.0001423725288865205, "loss": 1.447, "step": 22185 }, { "epoch": 0.28829695131756555, "grad_norm": 0.39433395862579346, "learning_rate": 0.00014236992942460913, "loss": 1.5002, "step": 22186 }, { "epoch": 0.2883099458614814, "grad_norm": 0.26936182379722595, "learning_rate": 0.00014236732996269772, "loss": 1.6188, "step": 22187 }, { "epoch": 0.2883229404053973, "grad_norm": 0.3768483102321625, "learning_rate": 0.00014236473050078635, "loss": 1.418, "step": 22188 }, { "epoch": 0.28833593494931314, "grad_norm": 0.4075142443180084, "learning_rate": 0.00014236213103887497, "loss": 1.3955, "step": 22189 }, { "epoch": 0.28834892949322904, "grad_norm": 0.3793126046657562, "learning_rate": 0.00014235953157696357, "loss": 1.1325, "step": 22190 }, { "epoch": 0.2883619240371449, "grad_norm": 0.3578402101993561, "learning_rate": 0.0001423569321150522, "loss": 1.2844, "step": 22191 }, { "epoch": 0.2883749185810608, "grad_norm": 0.3542401194572449, "learning_rate": 0.0001423543326531408, "loss": 1.4327, "step": 22192 }, { "epoch": 0.28838791312497664, "grad_norm": 0.3764530420303345, "learning_rate": 0.00014235173319122944, "loss": 1.2426, "step": 22193 }, { "epoch": 0.28840090766889254, "grad_norm": 0.37350791692733765, "learning_rate": 0.00014234913372931804, "loss": 1.2829, "step": 22194 }, { "epoch": 0.2884139022128084, "grad_norm": 0.38806912302970886, "learning_rate": 0.00014234653426740664, "loss": 1.4265, "step": 22195 }, { "epoch": 0.2884268967567243, "grad_norm": 0.39289748668670654, "learning_rate": 0.00014234393480549526, "loss": 1.4502, "step": 22196 }, { "epoch": 0.2884398913006401, "grad_norm": 0.47973915934562683, "learning_rate": 0.00014234133534358389, "loss": 1.3023, "step": 22197 }, { "epoch": 0.28845288584455603, "grad_norm": 0.3785458505153656, "learning_rate": 0.0001423387358816725, "loss": 1.378, "step": 22198 }, { "epoch": 0.2884658803884719, "grad_norm": 0.4501010775566101, "learning_rate": 0.0001423361364197611, "loss": 1.5021, "step": 22199 }, { "epoch": 0.2884788749323878, "grad_norm": 0.5293017029762268, "learning_rate": 0.00014233353695784973, "loss": 1.4096, "step": 22200 }, { "epoch": 0.2884918694763036, "grad_norm": 0.4222677946090698, "learning_rate": 0.00014233093749593836, "loss": 1.4493, "step": 22201 }, { "epoch": 0.2885048640202195, "grad_norm": 0.4977155923843384, "learning_rate": 0.00014232833803402695, "loss": 1.4248, "step": 22202 }, { "epoch": 0.28851785856413537, "grad_norm": 0.344005823135376, "learning_rate": 0.00014232573857211558, "loss": 1.2453, "step": 22203 }, { "epoch": 0.28853085310805127, "grad_norm": 0.4305126965045929, "learning_rate": 0.00014232313911020418, "loss": 1.5422, "step": 22204 }, { "epoch": 0.2885438476519671, "grad_norm": 0.45520490407943726, "learning_rate": 0.00014232053964829283, "loss": 1.4611, "step": 22205 }, { "epoch": 0.288556842195883, "grad_norm": 0.43232953548431396, "learning_rate": 0.00014231794018638143, "loss": 1.5281, "step": 22206 }, { "epoch": 0.28856983673979886, "grad_norm": 0.36836835741996765, "learning_rate": 0.00014231534072447005, "loss": 1.4601, "step": 22207 }, { "epoch": 0.28858283128371476, "grad_norm": 0.46151643991470337, "learning_rate": 0.00014231274126255865, "loss": 1.3012, "step": 22208 }, { "epoch": 0.2885958258276306, "grad_norm": 0.3296111524105072, "learning_rate": 0.00014231014180064727, "loss": 1.1617, "step": 22209 }, { "epoch": 0.2886088203715465, "grad_norm": 0.4365094304084778, "learning_rate": 0.0001423075423387359, "loss": 1.4654, "step": 22210 }, { "epoch": 0.28862181491546235, "grad_norm": 0.4050542116165161, "learning_rate": 0.0001423049428768245, "loss": 1.4388, "step": 22211 }, { "epoch": 0.28863480945937825, "grad_norm": 0.37126821279525757, "learning_rate": 0.00014230234341491312, "loss": 1.3354, "step": 22212 }, { "epoch": 0.2886478040032941, "grad_norm": 0.29640763998031616, "learning_rate": 0.00014229974395300174, "loss": 1.344, "step": 22213 }, { "epoch": 0.28866079854721, "grad_norm": 0.3536563813686371, "learning_rate": 0.00014229714449109034, "loss": 1.4147, "step": 22214 }, { "epoch": 0.28867379309112584, "grad_norm": 0.3737310469150543, "learning_rate": 0.00014229454502917896, "loss": 1.476, "step": 22215 }, { "epoch": 0.28868678763504174, "grad_norm": 0.515973687171936, "learning_rate": 0.0001422919455672676, "loss": 1.4826, "step": 22216 }, { "epoch": 0.2886997821789576, "grad_norm": 0.3244648277759552, "learning_rate": 0.0001422893461053562, "loss": 1.3116, "step": 22217 }, { "epoch": 0.2887127767228735, "grad_norm": 0.3121008276939392, "learning_rate": 0.0001422867466434448, "loss": 1.3423, "step": 22218 }, { "epoch": 0.28872577126678933, "grad_norm": 0.3961296081542969, "learning_rate": 0.00014228414718153344, "loss": 1.4464, "step": 22219 }, { "epoch": 0.28873876581070523, "grad_norm": 0.41946539282798767, "learning_rate": 0.00014228154771962206, "loss": 1.3208, "step": 22220 }, { "epoch": 0.2887517603546211, "grad_norm": 0.3853977918624878, "learning_rate": 0.00014227894825771066, "loss": 1.2638, "step": 22221 }, { "epoch": 0.288764754898537, "grad_norm": 0.4243699014186859, "learning_rate": 0.00014227634879579928, "loss": 1.2983, "step": 22222 }, { "epoch": 0.2887777494424528, "grad_norm": 0.32959455251693726, "learning_rate": 0.00014227374933388788, "loss": 1.2961, "step": 22223 }, { "epoch": 0.2887907439863687, "grad_norm": 0.44616636633872986, "learning_rate": 0.0001422711498719765, "loss": 1.3889, "step": 22224 }, { "epoch": 0.28880373853028457, "grad_norm": 0.38505083322525024, "learning_rate": 0.00014226855041006513, "loss": 1.4369, "step": 22225 }, { "epoch": 0.28881673307420047, "grad_norm": 0.44189339876174927, "learning_rate": 0.00014226595094815373, "loss": 1.4329, "step": 22226 }, { "epoch": 0.2888297276181163, "grad_norm": 0.515619695186615, "learning_rate": 0.00014226335148624235, "loss": 1.5656, "step": 22227 }, { "epoch": 0.2888427221620322, "grad_norm": 0.48045679926872253, "learning_rate": 0.00014226075202433097, "loss": 1.4018, "step": 22228 }, { "epoch": 0.28885571670594806, "grad_norm": 0.5401654243469238, "learning_rate": 0.0001422581525624196, "loss": 1.6124, "step": 22229 }, { "epoch": 0.28886871124986396, "grad_norm": 0.29047438502311707, "learning_rate": 0.0001422555531005082, "loss": 1.5682, "step": 22230 }, { "epoch": 0.28888170579377986, "grad_norm": 0.42067721486091614, "learning_rate": 0.00014225295363859682, "loss": 1.3397, "step": 22231 }, { "epoch": 0.2888947003376957, "grad_norm": 0.43651214241981506, "learning_rate": 0.00014225035417668545, "loss": 1.5441, "step": 22232 }, { "epoch": 0.2889076948816116, "grad_norm": 0.45472288131713867, "learning_rate": 0.00014224775471477404, "loss": 1.3925, "step": 22233 }, { "epoch": 0.28892068942552745, "grad_norm": 0.5030890703201294, "learning_rate": 0.00014224515525286267, "loss": 1.6147, "step": 22234 }, { "epoch": 0.28893368396944336, "grad_norm": 0.32014161348342896, "learning_rate": 0.00014224255579095126, "loss": 1.2553, "step": 22235 }, { "epoch": 0.2889466785133592, "grad_norm": 0.40538862347602844, "learning_rate": 0.00014223995632903992, "loss": 1.4536, "step": 22236 }, { "epoch": 0.2889596730572751, "grad_norm": 0.4062918424606323, "learning_rate": 0.0001422373568671285, "loss": 1.5067, "step": 22237 }, { "epoch": 0.28897266760119095, "grad_norm": 0.4442928731441498, "learning_rate": 0.0001422347574052171, "loss": 1.5302, "step": 22238 }, { "epoch": 0.28898566214510685, "grad_norm": 0.5418941378593445, "learning_rate": 0.00014223215794330574, "loss": 1.3773, "step": 22239 }, { "epoch": 0.2889986566890227, "grad_norm": 0.4543876051902771, "learning_rate": 0.00014222955848139436, "loss": 1.4275, "step": 22240 }, { "epoch": 0.2890116512329386, "grad_norm": 0.3564375042915344, "learning_rate": 0.00014222695901948298, "loss": 1.3042, "step": 22241 }, { "epoch": 0.28902464577685444, "grad_norm": 0.3918377161026001, "learning_rate": 0.00014222435955757158, "loss": 1.4558, "step": 22242 }, { "epoch": 0.28903764032077034, "grad_norm": 0.419392466545105, "learning_rate": 0.0001422217600956602, "loss": 1.4747, "step": 22243 }, { "epoch": 0.2890506348646862, "grad_norm": 0.4483223855495453, "learning_rate": 0.00014221916063374883, "loss": 1.6658, "step": 22244 }, { "epoch": 0.2890636294086021, "grad_norm": 0.3335586488246918, "learning_rate": 0.00014221656117183743, "loss": 1.4099, "step": 22245 }, { "epoch": 0.28907662395251793, "grad_norm": 0.3058409094810486, "learning_rate": 0.00014221396170992605, "loss": 1.1076, "step": 22246 }, { "epoch": 0.28908961849643383, "grad_norm": 0.4099843204021454, "learning_rate": 0.00014221136224801465, "loss": 1.3358, "step": 22247 }, { "epoch": 0.2891026130403497, "grad_norm": 0.4615851938724518, "learning_rate": 0.0001422087627861033, "loss": 1.4019, "step": 22248 }, { "epoch": 0.2891156075842656, "grad_norm": 0.4688842296600342, "learning_rate": 0.0001422061633241919, "loss": 1.4556, "step": 22249 }, { "epoch": 0.2891286021281814, "grad_norm": 0.4001471698284149, "learning_rate": 0.0001422035638622805, "loss": 1.394, "step": 22250 }, { "epoch": 0.2891415966720973, "grad_norm": 0.4646671712398529, "learning_rate": 0.00014220096440036915, "loss": 1.3567, "step": 22251 }, { "epoch": 0.28915459121601317, "grad_norm": 0.37723442912101746, "learning_rate": 0.00014219836493845775, "loss": 1.2708, "step": 22252 }, { "epoch": 0.28916758575992907, "grad_norm": 0.35815441608428955, "learning_rate": 0.00014219576547654637, "loss": 1.4626, "step": 22253 }, { "epoch": 0.2891805803038449, "grad_norm": 0.3989640176296234, "learning_rate": 0.00014219316601463497, "loss": 1.3824, "step": 22254 }, { "epoch": 0.2891935748477608, "grad_norm": 0.4239923655986786, "learning_rate": 0.0001421905665527236, "loss": 1.403, "step": 22255 }, { "epoch": 0.28920656939167666, "grad_norm": 0.42054277658462524, "learning_rate": 0.00014218796709081222, "loss": 1.6153, "step": 22256 }, { "epoch": 0.28921956393559256, "grad_norm": 0.32436731457710266, "learning_rate": 0.0001421853676289008, "loss": 1.387, "step": 22257 }, { "epoch": 0.2892325584795084, "grad_norm": 0.446170836687088, "learning_rate": 0.00014218276816698944, "loss": 1.3896, "step": 22258 }, { "epoch": 0.2892455530234243, "grad_norm": 0.4039406478404999, "learning_rate": 0.00014218016870507806, "loss": 1.4743, "step": 22259 }, { "epoch": 0.28925854756734015, "grad_norm": 0.42505866289138794, "learning_rate": 0.0001421775692431667, "loss": 1.5496, "step": 22260 }, { "epoch": 0.28927154211125605, "grad_norm": 0.32116004824638367, "learning_rate": 0.00014217496978125528, "loss": 1.6292, "step": 22261 }, { "epoch": 0.2892845366551719, "grad_norm": 0.44610050320625305, "learning_rate": 0.00014217237031934388, "loss": 1.3935, "step": 22262 }, { "epoch": 0.2892975311990878, "grad_norm": 0.4346407651901245, "learning_rate": 0.00014216977085743253, "loss": 1.3005, "step": 22263 }, { "epoch": 0.28931052574300364, "grad_norm": 0.4863552749156952, "learning_rate": 0.00014216717139552113, "loss": 1.342, "step": 22264 }, { "epoch": 0.28932352028691954, "grad_norm": 0.36966603994369507, "learning_rate": 0.00014216457193360976, "loss": 1.3606, "step": 22265 }, { "epoch": 0.2893365148308354, "grad_norm": 0.4484783709049225, "learning_rate": 0.00014216197247169835, "loss": 1.6543, "step": 22266 }, { "epoch": 0.2893495093747513, "grad_norm": 0.4564906060695648, "learning_rate": 0.00014215937300978698, "loss": 1.3202, "step": 22267 }, { "epoch": 0.28936250391866714, "grad_norm": 0.4410177767276764, "learning_rate": 0.0001421567735478756, "loss": 1.5001, "step": 22268 }, { "epoch": 0.28937549846258304, "grad_norm": 0.3964931070804596, "learning_rate": 0.0001421541740859642, "loss": 1.3534, "step": 22269 }, { "epoch": 0.2893884930064989, "grad_norm": 0.32138121128082275, "learning_rate": 0.00014215157462405282, "loss": 1.2751, "step": 22270 }, { "epoch": 0.2894014875504148, "grad_norm": 0.312162846326828, "learning_rate": 0.00014214897516214145, "loss": 1.3733, "step": 22271 }, { "epoch": 0.2894144820943306, "grad_norm": 0.3795371949672699, "learning_rate": 0.00014214637570023007, "loss": 1.5391, "step": 22272 }, { "epoch": 0.28942747663824653, "grad_norm": 0.4658038020133972, "learning_rate": 0.00014214377623831867, "loss": 1.5096, "step": 22273 }, { "epoch": 0.2894404711821624, "grad_norm": 0.4609101414680481, "learning_rate": 0.0001421411767764073, "loss": 1.4755, "step": 22274 }, { "epoch": 0.2894534657260783, "grad_norm": 0.39089611172676086, "learning_rate": 0.00014213857731449592, "loss": 1.4187, "step": 22275 }, { "epoch": 0.2894664602699941, "grad_norm": 0.3116099238395691, "learning_rate": 0.00014213597785258452, "loss": 1.5299, "step": 22276 }, { "epoch": 0.28947945481391, "grad_norm": 0.34981757402420044, "learning_rate": 0.00014213337839067314, "loss": 1.461, "step": 22277 }, { "epoch": 0.28949244935782587, "grad_norm": 0.30512478947639465, "learning_rate": 0.00014213077892876174, "loss": 1.4557, "step": 22278 }, { "epoch": 0.28950544390174177, "grad_norm": 0.3608631193637848, "learning_rate": 0.00014212817946685036, "loss": 1.2629, "step": 22279 }, { "epoch": 0.2895184384456576, "grad_norm": 0.37413182854652405, "learning_rate": 0.000142125580004939, "loss": 1.5088, "step": 22280 }, { "epoch": 0.2895314329895735, "grad_norm": 0.4897593855857849, "learning_rate": 0.00014212298054302758, "loss": 1.3233, "step": 22281 }, { "epoch": 0.28954442753348936, "grad_norm": 0.4357750117778778, "learning_rate": 0.0001421203810811162, "loss": 1.4592, "step": 22282 }, { "epoch": 0.28955742207740526, "grad_norm": 0.4132704734802246, "learning_rate": 0.00014211778161920483, "loss": 1.5849, "step": 22283 }, { "epoch": 0.2895704166213211, "grad_norm": 0.44093960523605347, "learning_rate": 0.00014211518215729346, "loss": 1.3972, "step": 22284 }, { "epoch": 0.289583411165237, "grad_norm": 0.42616814374923706, "learning_rate": 0.00014211258269538205, "loss": 1.4716, "step": 22285 }, { "epoch": 0.28959640570915285, "grad_norm": 0.25706642866134644, "learning_rate": 0.00014210998323347068, "loss": 1.3038, "step": 22286 }, { "epoch": 0.28960940025306875, "grad_norm": 0.38505569100379944, "learning_rate": 0.0001421073837715593, "loss": 1.7213, "step": 22287 }, { "epoch": 0.2896223947969846, "grad_norm": 0.36318185925483704, "learning_rate": 0.0001421047843096479, "loss": 1.4037, "step": 22288 }, { "epoch": 0.2896353893409005, "grad_norm": 0.4311409294605255, "learning_rate": 0.00014210218484773653, "loss": 1.4954, "step": 22289 }, { "epoch": 0.28964838388481634, "grad_norm": 0.47099241614341736, "learning_rate": 0.00014209958538582515, "loss": 1.419, "step": 22290 }, { "epoch": 0.28966137842873224, "grad_norm": 0.3859619200229645, "learning_rate": 0.00014209698592391377, "loss": 1.4748, "step": 22291 }, { "epoch": 0.2896743729726481, "grad_norm": 0.3615507483482361, "learning_rate": 0.00014209438646200237, "loss": 1.4257, "step": 22292 }, { "epoch": 0.289687367516564, "grad_norm": 0.3239363729953766, "learning_rate": 0.00014209178700009097, "loss": 1.3568, "step": 22293 }, { "epoch": 0.28970036206047983, "grad_norm": 0.43754154443740845, "learning_rate": 0.00014208918753817962, "loss": 1.4336, "step": 22294 }, { "epoch": 0.28971335660439573, "grad_norm": 0.41938117146492004, "learning_rate": 0.00014208658807626822, "loss": 1.4998, "step": 22295 }, { "epoch": 0.2897263511483116, "grad_norm": 0.3349857032299042, "learning_rate": 0.00014208398861435684, "loss": 1.284, "step": 22296 }, { "epoch": 0.2897393456922275, "grad_norm": 0.42732977867126465, "learning_rate": 0.00014208138915244544, "loss": 1.4753, "step": 22297 }, { "epoch": 0.2897523402361433, "grad_norm": 0.3861484229564667, "learning_rate": 0.00014207878969053406, "loss": 1.5407, "step": 22298 }, { "epoch": 0.2897653347800592, "grad_norm": 0.33476418256759644, "learning_rate": 0.0001420761902286227, "loss": 1.3591, "step": 22299 }, { "epoch": 0.28977832932397507, "grad_norm": 0.7007179260253906, "learning_rate": 0.0001420735907667113, "loss": 1.6933, "step": 22300 }, { "epoch": 0.28979132386789097, "grad_norm": 0.5477127432823181, "learning_rate": 0.0001420709913047999, "loss": 1.6322, "step": 22301 }, { "epoch": 0.2898043184118068, "grad_norm": 0.38956218957901, "learning_rate": 0.00014206839184288854, "loss": 1.3232, "step": 22302 }, { "epoch": 0.2898173129557227, "grad_norm": 0.42501935362815857, "learning_rate": 0.00014206579238097716, "loss": 1.6154, "step": 22303 }, { "epoch": 0.28983030749963856, "grad_norm": 0.35152241587638855, "learning_rate": 0.00014206319291906576, "loss": 1.4486, "step": 22304 }, { "epoch": 0.28984330204355446, "grad_norm": 0.33880022168159485, "learning_rate": 0.00014206059345715435, "loss": 1.2453, "step": 22305 }, { "epoch": 0.28985629658747036, "grad_norm": 0.5051475763320923, "learning_rate": 0.000142057993995243, "loss": 1.2604, "step": 22306 }, { "epoch": 0.2898692911313862, "grad_norm": 0.4556049108505249, "learning_rate": 0.0001420553945333316, "loss": 1.4191, "step": 22307 }, { "epoch": 0.2898822856753021, "grad_norm": 0.3710220158100128, "learning_rate": 0.00014205279507142023, "loss": 1.5994, "step": 22308 }, { "epoch": 0.28989528021921795, "grad_norm": 0.39599597454071045, "learning_rate": 0.00014205019560950883, "loss": 1.6606, "step": 22309 }, { "epoch": 0.28990827476313386, "grad_norm": 0.3968614637851715, "learning_rate": 0.00014204759614759745, "loss": 1.2073, "step": 22310 }, { "epoch": 0.2899212693070497, "grad_norm": 0.4041449725627899, "learning_rate": 0.00014204499668568607, "loss": 1.5553, "step": 22311 }, { "epoch": 0.2899342638509656, "grad_norm": 0.3937256634235382, "learning_rate": 0.00014204239722377467, "loss": 1.37, "step": 22312 }, { "epoch": 0.28994725839488145, "grad_norm": 0.40615394711494446, "learning_rate": 0.0001420397977618633, "loss": 1.5182, "step": 22313 }, { "epoch": 0.28996025293879735, "grad_norm": 0.3599732220172882, "learning_rate": 0.00014203719829995192, "loss": 1.4869, "step": 22314 }, { "epoch": 0.2899732474827132, "grad_norm": 0.3950166702270508, "learning_rate": 0.00014203459883804055, "loss": 1.23, "step": 22315 }, { "epoch": 0.2899862420266291, "grad_norm": 0.4353402554988861, "learning_rate": 0.00014203199937612914, "loss": 1.5926, "step": 22316 }, { "epoch": 0.28999923657054494, "grad_norm": 0.37088438868522644, "learning_rate": 0.00014202939991421774, "loss": 1.3402, "step": 22317 }, { "epoch": 0.29001223111446084, "grad_norm": 0.47764694690704346, "learning_rate": 0.0001420268004523064, "loss": 1.3889, "step": 22318 }, { "epoch": 0.2900252256583767, "grad_norm": 0.3792191743850708, "learning_rate": 0.000142024200990395, "loss": 1.3261, "step": 22319 }, { "epoch": 0.2900382202022926, "grad_norm": 0.41932064294815063, "learning_rate": 0.00014202160152848361, "loss": 1.4122, "step": 22320 }, { "epoch": 0.29005121474620843, "grad_norm": 0.43686172366142273, "learning_rate": 0.0001420190020665722, "loss": 1.5433, "step": 22321 }, { "epoch": 0.29006420929012433, "grad_norm": 0.2098013162612915, "learning_rate": 0.00014201640260466084, "loss": 0.9717, "step": 22322 }, { "epoch": 0.2900772038340402, "grad_norm": 0.3696129024028778, "learning_rate": 0.00014201380314274946, "loss": 1.3771, "step": 22323 }, { "epoch": 0.2900901983779561, "grad_norm": 0.43324437737464905, "learning_rate": 0.00014201120368083806, "loss": 1.4719, "step": 22324 }, { "epoch": 0.2901031929218719, "grad_norm": 0.4683477282524109, "learning_rate": 0.0001420086042189267, "loss": 1.5837, "step": 22325 }, { "epoch": 0.2901161874657878, "grad_norm": 0.4767380654811859, "learning_rate": 0.0001420060047570153, "loss": 1.4855, "step": 22326 }, { "epoch": 0.29012918200970367, "grad_norm": 0.3886423408985138, "learning_rate": 0.00014200340529510393, "loss": 1.2985, "step": 22327 }, { "epoch": 0.29014217655361957, "grad_norm": 0.3977794051170349, "learning_rate": 0.00014200080583319253, "loss": 1.4754, "step": 22328 }, { "epoch": 0.2901551710975354, "grad_norm": 0.25363174080848694, "learning_rate": 0.00014199820637128115, "loss": 1.2019, "step": 22329 }, { "epoch": 0.2901681656414513, "grad_norm": 0.3842352330684662, "learning_rate": 0.00014199560690936978, "loss": 1.545, "step": 22330 }, { "epoch": 0.29018116018536716, "grad_norm": 0.3418513834476471, "learning_rate": 0.00014199300744745837, "loss": 1.4383, "step": 22331 }, { "epoch": 0.29019415472928306, "grad_norm": 0.33965203166007996, "learning_rate": 0.000141990407985547, "loss": 1.4286, "step": 22332 }, { "epoch": 0.2902071492731989, "grad_norm": 0.43528953194618225, "learning_rate": 0.00014198780852363562, "loss": 1.3904, "step": 22333 }, { "epoch": 0.2902201438171148, "grad_norm": 0.46454524993896484, "learning_rate": 0.00014198520906172422, "loss": 1.2898, "step": 22334 }, { "epoch": 0.29023313836103065, "grad_norm": 0.4105285704135895, "learning_rate": 0.00014198260959981285, "loss": 1.4581, "step": 22335 }, { "epoch": 0.29024613290494655, "grad_norm": 0.3978787660598755, "learning_rate": 0.00014198001013790144, "loss": 1.4891, "step": 22336 }, { "epoch": 0.2902591274488624, "grad_norm": 0.35916993021965027, "learning_rate": 0.0001419774106759901, "loss": 1.2862, "step": 22337 }, { "epoch": 0.2902721219927783, "grad_norm": 0.39996638894081116, "learning_rate": 0.0001419748112140787, "loss": 1.3422, "step": 22338 }, { "epoch": 0.29028511653669414, "grad_norm": 0.3090335726737976, "learning_rate": 0.00014197221175216732, "loss": 1.14, "step": 22339 }, { "epoch": 0.29029811108061004, "grad_norm": 0.506252646446228, "learning_rate": 0.00014196961229025591, "loss": 1.506, "step": 22340 }, { "epoch": 0.2903111056245259, "grad_norm": 0.42328906059265137, "learning_rate": 0.00014196701282834454, "loss": 1.4337, "step": 22341 }, { "epoch": 0.2903241001684418, "grad_norm": 0.4637744128704071, "learning_rate": 0.00014196441336643316, "loss": 1.3951, "step": 22342 }, { "epoch": 0.29033709471235764, "grad_norm": 0.3766438663005829, "learning_rate": 0.00014196181390452176, "loss": 1.5259, "step": 22343 }, { "epoch": 0.29035008925627354, "grad_norm": 0.3739299476146698, "learning_rate": 0.00014195921444261038, "loss": 1.4014, "step": 22344 }, { "epoch": 0.2903630838001894, "grad_norm": 0.34221434593200684, "learning_rate": 0.000141956614980699, "loss": 1.3113, "step": 22345 }, { "epoch": 0.2903760783441053, "grad_norm": 0.4266754388809204, "learning_rate": 0.0001419540155187876, "loss": 1.3435, "step": 22346 }, { "epoch": 0.2903890728880211, "grad_norm": 0.4251234233379364, "learning_rate": 0.00014195141605687623, "loss": 1.3403, "step": 22347 }, { "epoch": 0.290402067431937, "grad_norm": 0.3070501387119293, "learning_rate": 0.00014194881659496483, "loss": 1.2095, "step": 22348 }, { "epoch": 0.2904150619758529, "grad_norm": 0.3991011381149292, "learning_rate": 0.00014194621713305348, "loss": 1.5122, "step": 22349 }, { "epoch": 0.2904280565197688, "grad_norm": 0.4335017204284668, "learning_rate": 0.00014194361767114208, "loss": 1.3991, "step": 22350 }, { "epoch": 0.2904410510636846, "grad_norm": 0.28476572036743164, "learning_rate": 0.0001419410182092307, "loss": 1.4286, "step": 22351 }, { "epoch": 0.2904540456076005, "grad_norm": 0.3671368658542633, "learning_rate": 0.0001419384187473193, "loss": 1.5065, "step": 22352 }, { "epoch": 0.29046704015151636, "grad_norm": 0.3427566885948181, "learning_rate": 0.00014193581928540792, "loss": 1.3048, "step": 22353 }, { "epoch": 0.29048003469543227, "grad_norm": 0.34179338812828064, "learning_rate": 0.00014193321982349655, "loss": 1.1822, "step": 22354 }, { "epoch": 0.2904930292393481, "grad_norm": 0.3781779408454895, "learning_rate": 0.00014193062036158515, "loss": 1.3153, "step": 22355 }, { "epoch": 0.290506023783264, "grad_norm": 0.39216864109039307, "learning_rate": 0.00014192802089967377, "loss": 1.2711, "step": 22356 }, { "epoch": 0.29051901832717986, "grad_norm": 0.4567340612411499, "learning_rate": 0.0001419254214377624, "loss": 1.6723, "step": 22357 }, { "epoch": 0.29053201287109576, "grad_norm": 0.4034908711910248, "learning_rate": 0.00014192282197585102, "loss": 1.5544, "step": 22358 }, { "epoch": 0.2905450074150116, "grad_norm": 0.4637525677680969, "learning_rate": 0.00014192022251393962, "loss": 1.2819, "step": 22359 }, { "epoch": 0.2905580019589275, "grad_norm": 0.4224262833595276, "learning_rate": 0.00014191762305202824, "loss": 1.3303, "step": 22360 }, { "epoch": 0.29057099650284335, "grad_norm": 0.4207250773906708, "learning_rate": 0.00014191502359011687, "loss": 1.5319, "step": 22361 }, { "epoch": 0.29058399104675925, "grad_norm": 0.37056565284729004, "learning_rate": 0.00014191242412820546, "loss": 1.4878, "step": 22362 }, { "epoch": 0.2905969855906751, "grad_norm": 0.37557870149612427, "learning_rate": 0.0001419098246662941, "loss": 1.3815, "step": 22363 }, { "epoch": 0.290609980134591, "grad_norm": 0.4046560227870941, "learning_rate": 0.0001419072252043827, "loss": 1.3142, "step": 22364 }, { "epoch": 0.29062297467850684, "grad_norm": 0.3729954957962036, "learning_rate": 0.0001419046257424713, "loss": 1.265, "step": 22365 }, { "epoch": 0.29063596922242274, "grad_norm": 0.310863196849823, "learning_rate": 0.00014190202628055993, "loss": 1.342, "step": 22366 }, { "epoch": 0.2906489637663386, "grad_norm": 0.4439898431301117, "learning_rate": 0.00014189942681864853, "loss": 1.4744, "step": 22367 }, { "epoch": 0.2906619583102545, "grad_norm": 0.5251782536506653, "learning_rate": 0.00014189682735673718, "loss": 1.4744, "step": 22368 }, { "epoch": 0.29067495285417033, "grad_norm": 0.381529837846756, "learning_rate": 0.00014189422789482578, "loss": 1.29, "step": 22369 }, { "epoch": 0.29068794739808623, "grad_norm": 0.4463101327419281, "learning_rate": 0.0001418916284329144, "loss": 1.4288, "step": 22370 }, { "epoch": 0.2907009419420021, "grad_norm": 0.43048325181007385, "learning_rate": 0.000141889028971003, "loss": 1.3991, "step": 22371 }, { "epoch": 0.290713936485918, "grad_norm": 0.32835185527801514, "learning_rate": 0.00014188642950909163, "loss": 1.2291, "step": 22372 }, { "epoch": 0.2907269310298338, "grad_norm": 0.3897741734981537, "learning_rate": 0.00014188383004718025, "loss": 1.6042, "step": 22373 }, { "epoch": 0.2907399255737497, "grad_norm": 0.46307021379470825, "learning_rate": 0.00014188123058526885, "loss": 1.5311, "step": 22374 }, { "epoch": 0.29075292011766557, "grad_norm": 0.43575552105903625, "learning_rate": 0.00014187863112335747, "loss": 1.1857, "step": 22375 }, { "epoch": 0.29076591466158147, "grad_norm": 0.5436030030250549, "learning_rate": 0.0001418760316614461, "loss": 1.3523, "step": 22376 }, { "epoch": 0.2907789092054973, "grad_norm": 0.35704275965690613, "learning_rate": 0.0001418734321995347, "loss": 1.438, "step": 22377 }, { "epoch": 0.2907919037494132, "grad_norm": 0.5289881229400635, "learning_rate": 0.00014187083273762332, "loss": 1.5381, "step": 22378 }, { "epoch": 0.29080489829332906, "grad_norm": 0.41413334012031555, "learning_rate": 0.00014186823327571192, "loss": 1.3201, "step": 22379 }, { "epoch": 0.29081789283724496, "grad_norm": 0.38084661960601807, "learning_rate": 0.00014186563381380057, "loss": 1.4525, "step": 22380 }, { "epoch": 0.2908308873811608, "grad_norm": 0.4375143349170685, "learning_rate": 0.00014186303435188917, "loss": 1.4496, "step": 22381 }, { "epoch": 0.2908438819250767, "grad_norm": 0.37477463483810425, "learning_rate": 0.0001418604348899778, "loss": 1.437, "step": 22382 }, { "epoch": 0.2908568764689926, "grad_norm": 0.38776907324790955, "learning_rate": 0.0001418578354280664, "loss": 1.4108, "step": 22383 }, { "epoch": 0.29086987101290845, "grad_norm": 0.3997366726398468, "learning_rate": 0.000141855235966155, "loss": 1.496, "step": 22384 }, { "epoch": 0.29088286555682435, "grad_norm": 0.410274863243103, "learning_rate": 0.00014185263650424364, "loss": 1.3605, "step": 22385 }, { "epoch": 0.2908958601007402, "grad_norm": 0.451054185628891, "learning_rate": 0.00014185003704233223, "loss": 1.4451, "step": 22386 }, { "epoch": 0.2909088546446561, "grad_norm": 0.4546957314014435, "learning_rate": 0.00014184743758042086, "loss": 1.4672, "step": 22387 }, { "epoch": 0.29092184918857195, "grad_norm": 0.38290470838546753, "learning_rate": 0.00014184483811850948, "loss": 1.3172, "step": 22388 }, { "epoch": 0.29093484373248785, "grad_norm": 0.2988804280757904, "learning_rate": 0.00014184223865659808, "loss": 1.3318, "step": 22389 }, { "epoch": 0.2909478382764037, "grad_norm": 0.32097315788269043, "learning_rate": 0.0001418396391946867, "loss": 1.4046, "step": 22390 }, { "epoch": 0.2909608328203196, "grad_norm": 0.32626211643218994, "learning_rate": 0.0001418370397327753, "loss": 1.355, "step": 22391 }, { "epoch": 0.29097382736423544, "grad_norm": 0.3629591166973114, "learning_rate": 0.00014183444027086395, "loss": 1.2518, "step": 22392 }, { "epoch": 0.29098682190815134, "grad_norm": 0.3561261296272278, "learning_rate": 0.00014183184080895255, "loss": 1.3262, "step": 22393 }, { "epoch": 0.2909998164520672, "grad_norm": 0.44344791769981384, "learning_rate": 0.00014182924134704118, "loss": 1.36, "step": 22394 }, { "epoch": 0.2910128109959831, "grad_norm": 0.3925451636314392, "learning_rate": 0.00014182664188512977, "loss": 1.4019, "step": 22395 }, { "epoch": 0.29102580553989893, "grad_norm": 0.41915786266326904, "learning_rate": 0.0001418240424232184, "loss": 1.3088, "step": 22396 }, { "epoch": 0.29103880008381483, "grad_norm": 0.3616812527179718, "learning_rate": 0.00014182144296130702, "loss": 1.3475, "step": 22397 }, { "epoch": 0.2910517946277307, "grad_norm": 0.34788963198661804, "learning_rate": 0.00014181884349939562, "loss": 1.2185, "step": 22398 }, { "epoch": 0.2910647891716466, "grad_norm": 0.4292566478252411, "learning_rate": 0.00014181624403748427, "loss": 1.3199, "step": 22399 }, { "epoch": 0.2910777837155624, "grad_norm": 0.4887067973613739, "learning_rate": 0.00014181364457557287, "loss": 1.5501, "step": 22400 }, { "epoch": 0.2910907782594783, "grad_norm": 0.39569053053855896, "learning_rate": 0.00014181104511366147, "loss": 1.4242, "step": 22401 }, { "epoch": 0.29110377280339417, "grad_norm": 0.34402787685394287, "learning_rate": 0.0001418084456517501, "loss": 1.2173, "step": 22402 }, { "epoch": 0.29111676734731007, "grad_norm": 0.45720356702804565, "learning_rate": 0.00014180584618983871, "loss": 1.4169, "step": 22403 }, { "epoch": 0.2911297618912259, "grad_norm": 0.37784141302108765, "learning_rate": 0.00014180324672792734, "loss": 1.3462, "step": 22404 }, { "epoch": 0.2911427564351418, "grad_norm": 0.41201251745224, "learning_rate": 0.00014180064726601594, "loss": 1.5398, "step": 22405 }, { "epoch": 0.29115575097905766, "grad_norm": 0.4607985317707062, "learning_rate": 0.00014179804780410456, "loss": 1.4393, "step": 22406 }, { "epoch": 0.29116874552297356, "grad_norm": 0.39322248101234436, "learning_rate": 0.00014179544834219318, "loss": 1.1679, "step": 22407 }, { "epoch": 0.2911817400668894, "grad_norm": 0.4623670279979706, "learning_rate": 0.00014179284888028178, "loss": 1.5951, "step": 22408 }, { "epoch": 0.2911947346108053, "grad_norm": 0.37860068678855896, "learning_rate": 0.0001417902494183704, "loss": 1.4761, "step": 22409 }, { "epoch": 0.29120772915472115, "grad_norm": 0.34126970171928406, "learning_rate": 0.000141787649956459, "loss": 1.4322, "step": 22410 }, { "epoch": 0.29122072369863705, "grad_norm": 0.258602499961853, "learning_rate": 0.00014178505049454766, "loss": 1.3408, "step": 22411 }, { "epoch": 0.2912337182425529, "grad_norm": 0.41152387857437134, "learning_rate": 0.00014178245103263625, "loss": 1.273, "step": 22412 }, { "epoch": 0.2912467127864688, "grad_norm": 0.441094309091568, "learning_rate": 0.00014177985157072488, "loss": 1.5128, "step": 22413 }, { "epoch": 0.29125970733038464, "grad_norm": 0.40566110610961914, "learning_rate": 0.00014177725210881348, "loss": 1.3256, "step": 22414 }, { "epoch": 0.29127270187430054, "grad_norm": 0.4134718179702759, "learning_rate": 0.0001417746526469021, "loss": 1.3458, "step": 22415 }, { "epoch": 0.2912856964182164, "grad_norm": 0.46284419298171997, "learning_rate": 0.00014177205318499072, "loss": 1.4527, "step": 22416 }, { "epoch": 0.2912986909621323, "grad_norm": 0.4089805781841278, "learning_rate": 0.00014176945372307932, "loss": 1.4001, "step": 22417 }, { "epoch": 0.29131168550604813, "grad_norm": 0.5223478674888611, "learning_rate": 0.00014176685426116795, "loss": 1.4858, "step": 22418 }, { "epoch": 0.29132468004996404, "grad_norm": 0.41372770071029663, "learning_rate": 0.00014176425479925657, "loss": 1.3085, "step": 22419 }, { "epoch": 0.2913376745938799, "grad_norm": 0.3995048701763153, "learning_rate": 0.00014176165533734517, "loss": 1.4552, "step": 22420 }, { "epoch": 0.2913506691377958, "grad_norm": 0.40415453910827637, "learning_rate": 0.0001417590558754338, "loss": 1.3393, "step": 22421 }, { "epoch": 0.2913636636817116, "grad_norm": 0.3481190800666809, "learning_rate": 0.0001417564564135224, "loss": 1.2948, "step": 22422 }, { "epoch": 0.2913766582256275, "grad_norm": 0.39264971017837524, "learning_rate": 0.00014175385695161104, "loss": 1.307, "step": 22423 }, { "epoch": 0.2913896527695434, "grad_norm": 0.36539313197135925, "learning_rate": 0.00014175125748969964, "loss": 1.4567, "step": 22424 }, { "epoch": 0.2914026473134593, "grad_norm": 0.4535652697086334, "learning_rate": 0.00014174865802778826, "loss": 1.3782, "step": 22425 }, { "epoch": 0.2914156418573751, "grad_norm": 0.37952205538749695, "learning_rate": 0.00014174605856587686, "loss": 1.3139, "step": 22426 }, { "epoch": 0.291428636401291, "grad_norm": 0.38149702548980713, "learning_rate": 0.00014174345910396548, "loss": 1.4591, "step": 22427 }, { "epoch": 0.29144163094520686, "grad_norm": 0.45030805468559265, "learning_rate": 0.0001417408596420541, "loss": 1.5303, "step": 22428 }, { "epoch": 0.29145462548912277, "grad_norm": 0.281517893075943, "learning_rate": 0.0001417382601801427, "loss": 1.3519, "step": 22429 }, { "epoch": 0.2914676200330386, "grad_norm": 0.45179814100265503, "learning_rate": 0.00014173566071823133, "loss": 1.307, "step": 22430 }, { "epoch": 0.2914806145769545, "grad_norm": 0.356875479221344, "learning_rate": 0.00014173306125631996, "loss": 1.317, "step": 22431 }, { "epoch": 0.29149360912087036, "grad_norm": 0.36342817544937134, "learning_rate": 0.00014173046179440855, "loss": 1.3144, "step": 22432 }, { "epoch": 0.29150660366478626, "grad_norm": 0.3269090950489044, "learning_rate": 0.00014172786233249718, "loss": 1.4073, "step": 22433 }, { "epoch": 0.2915195982087021, "grad_norm": 0.3639696538448334, "learning_rate": 0.0001417252628705858, "loss": 1.3686, "step": 22434 }, { "epoch": 0.291532592752618, "grad_norm": 0.46873170137405396, "learning_rate": 0.00014172266340867443, "loss": 1.3213, "step": 22435 }, { "epoch": 0.29154558729653385, "grad_norm": 0.30299144983291626, "learning_rate": 0.00014172006394676302, "loss": 1.2294, "step": 22436 }, { "epoch": 0.29155858184044975, "grad_norm": 0.46435976028442383, "learning_rate": 0.00014171746448485165, "loss": 1.4906, "step": 22437 }, { "epoch": 0.2915715763843656, "grad_norm": 0.4323849380016327, "learning_rate": 0.00014171486502294027, "loss": 1.4484, "step": 22438 }, { "epoch": 0.2915845709282815, "grad_norm": 0.43398523330688477, "learning_rate": 0.00014171226556102887, "loss": 1.5972, "step": 22439 }, { "epoch": 0.29159756547219734, "grad_norm": 0.3140985667705536, "learning_rate": 0.0001417096660991175, "loss": 1.179, "step": 22440 }, { "epoch": 0.29161056001611324, "grad_norm": 0.4335821568965912, "learning_rate": 0.0001417070666372061, "loss": 1.4986, "step": 22441 }, { "epoch": 0.2916235545600291, "grad_norm": 0.40765589475631714, "learning_rate": 0.00014170446717529474, "loss": 1.2942, "step": 22442 }, { "epoch": 0.291636549103945, "grad_norm": 0.37462612986564636, "learning_rate": 0.00014170186771338334, "loss": 1.5981, "step": 22443 }, { "epoch": 0.29164954364786083, "grad_norm": 0.4370235800743103, "learning_rate": 0.00014169926825147194, "loss": 1.5524, "step": 22444 }, { "epoch": 0.29166253819177673, "grad_norm": 0.34201183915138245, "learning_rate": 0.00014169666878956056, "loss": 1.2176, "step": 22445 }, { "epoch": 0.2916755327356926, "grad_norm": 0.3491097092628479, "learning_rate": 0.0001416940693276492, "loss": 1.4063, "step": 22446 }, { "epoch": 0.2916885272796085, "grad_norm": 0.4681399166584015, "learning_rate": 0.0001416914698657378, "loss": 1.4079, "step": 22447 }, { "epoch": 0.2917015218235243, "grad_norm": 0.3690841794013977, "learning_rate": 0.0001416888704038264, "loss": 1.2544, "step": 22448 }, { "epoch": 0.2917145163674402, "grad_norm": 0.5175653100013733, "learning_rate": 0.00014168627094191503, "loss": 1.3572, "step": 22449 }, { "epoch": 0.29172751091135607, "grad_norm": 0.36344748735427856, "learning_rate": 0.00014168367148000366, "loss": 1.3683, "step": 22450 }, { "epoch": 0.29174050545527197, "grad_norm": 0.31031015515327454, "learning_rate": 0.00014168107201809226, "loss": 1.2095, "step": 22451 }, { "epoch": 0.2917534999991878, "grad_norm": 0.34097951650619507, "learning_rate": 0.00014167847255618088, "loss": 1.3518, "step": 22452 }, { "epoch": 0.2917664945431037, "grad_norm": 0.4360902011394501, "learning_rate": 0.00014167587309426948, "loss": 1.4026, "step": 22453 }, { "epoch": 0.29177948908701956, "grad_norm": 0.36822423338890076, "learning_rate": 0.00014167327363235813, "loss": 1.2999, "step": 22454 }, { "epoch": 0.29179248363093546, "grad_norm": 0.40952008962631226, "learning_rate": 0.00014167067417044673, "loss": 1.372, "step": 22455 }, { "epoch": 0.2918054781748513, "grad_norm": 0.4163241386413574, "learning_rate": 0.00014166807470853532, "loss": 1.4147, "step": 22456 }, { "epoch": 0.2918184727187672, "grad_norm": 0.40895166993141174, "learning_rate": 0.00014166547524662395, "loss": 1.4159, "step": 22457 }, { "epoch": 0.2918314672626831, "grad_norm": 0.35260632634162903, "learning_rate": 0.00014166287578471257, "loss": 1.235, "step": 22458 }, { "epoch": 0.29184446180659895, "grad_norm": 0.34396541118621826, "learning_rate": 0.0001416602763228012, "loss": 1.4638, "step": 22459 }, { "epoch": 0.29185745635051485, "grad_norm": 0.39983272552490234, "learning_rate": 0.0001416576768608898, "loss": 1.2562, "step": 22460 }, { "epoch": 0.2918704508944307, "grad_norm": 0.33869585394859314, "learning_rate": 0.00014165507739897842, "loss": 1.3032, "step": 22461 }, { "epoch": 0.2918834454383466, "grad_norm": 0.39762187004089355, "learning_rate": 0.00014165247793706704, "loss": 1.3093, "step": 22462 }, { "epoch": 0.29189643998226245, "grad_norm": 0.38377419114112854, "learning_rate": 0.00014164987847515564, "loss": 1.5378, "step": 22463 }, { "epoch": 0.29190943452617835, "grad_norm": 0.43484777212142944, "learning_rate": 0.00014164727901324427, "loss": 1.4626, "step": 22464 }, { "epoch": 0.2919224290700942, "grad_norm": 0.28662002086639404, "learning_rate": 0.00014164467955133286, "loss": 1.3209, "step": 22465 }, { "epoch": 0.2919354236140101, "grad_norm": 0.4069984555244446, "learning_rate": 0.00014164208008942151, "loss": 1.4037, "step": 22466 }, { "epoch": 0.29194841815792594, "grad_norm": 0.36249905824661255, "learning_rate": 0.0001416394806275101, "loss": 1.2412, "step": 22467 }, { "epoch": 0.29196141270184184, "grad_norm": 0.42795199155807495, "learning_rate": 0.0001416368811655987, "loss": 1.4898, "step": 22468 }, { "epoch": 0.2919744072457577, "grad_norm": 0.40475648641586304, "learning_rate": 0.00014163428170368733, "loss": 1.5042, "step": 22469 }, { "epoch": 0.2919874017896736, "grad_norm": 0.573387622833252, "learning_rate": 0.00014163168224177596, "loss": 1.4308, "step": 22470 }, { "epoch": 0.29200039633358943, "grad_norm": 0.4268193244934082, "learning_rate": 0.00014162908277986458, "loss": 1.2659, "step": 22471 }, { "epoch": 0.29201339087750533, "grad_norm": 0.4079134464263916, "learning_rate": 0.00014162648331795318, "loss": 1.4543, "step": 22472 }, { "epoch": 0.2920263854214212, "grad_norm": 0.34781429171562195, "learning_rate": 0.0001416238838560418, "loss": 1.3429, "step": 22473 }, { "epoch": 0.2920393799653371, "grad_norm": 0.3656001091003418, "learning_rate": 0.00014162128439413043, "loss": 1.2805, "step": 22474 }, { "epoch": 0.2920523745092529, "grad_norm": 0.4839644432067871, "learning_rate": 0.00014161868493221903, "loss": 1.555, "step": 22475 }, { "epoch": 0.2920653690531688, "grad_norm": 0.43905937671661377, "learning_rate": 0.00014161608547030765, "loss": 1.5976, "step": 22476 }, { "epoch": 0.29207836359708467, "grad_norm": 0.4452652037143707, "learning_rate": 0.00014161348600839628, "loss": 1.2814, "step": 22477 }, { "epoch": 0.29209135814100057, "grad_norm": 0.41431668400764465, "learning_rate": 0.0001416108865464849, "loss": 1.4741, "step": 22478 }, { "epoch": 0.2921043526849164, "grad_norm": 0.3766244649887085, "learning_rate": 0.0001416082870845735, "loss": 1.2444, "step": 22479 }, { "epoch": 0.2921173472288323, "grad_norm": 0.49738484621047974, "learning_rate": 0.00014160568762266212, "loss": 1.5616, "step": 22480 }, { "epoch": 0.29213034177274816, "grad_norm": 0.4044188857078552, "learning_rate": 0.00014160308816075075, "loss": 1.5336, "step": 22481 }, { "epoch": 0.29214333631666406, "grad_norm": 0.41952499747276306, "learning_rate": 0.00014160048869883934, "loss": 1.6241, "step": 22482 }, { "epoch": 0.2921563308605799, "grad_norm": 0.3903580605983734, "learning_rate": 0.00014159788923692797, "loss": 1.3418, "step": 22483 }, { "epoch": 0.2921693254044958, "grad_norm": 0.4092293083667755, "learning_rate": 0.00014159528977501657, "loss": 1.2682, "step": 22484 }, { "epoch": 0.29218231994841165, "grad_norm": 0.34794676303863525, "learning_rate": 0.0001415926903131052, "loss": 1.5098, "step": 22485 }, { "epoch": 0.29219531449232755, "grad_norm": 0.41119280457496643, "learning_rate": 0.00014159009085119381, "loss": 1.544, "step": 22486 }, { "epoch": 0.2922083090362434, "grad_norm": 0.42848271131515503, "learning_rate": 0.0001415874913892824, "loss": 1.3591, "step": 22487 }, { "epoch": 0.2922213035801593, "grad_norm": 0.4547380805015564, "learning_rate": 0.00014158489192737104, "loss": 1.3756, "step": 22488 }, { "epoch": 0.29223429812407514, "grad_norm": 0.40840330719947815, "learning_rate": 0.00014158229246545966, "loss": 1.4207, "step": 22489 }, { "epoch": 0.29224729266799104, "grad_norm": 0.37380197644233704, "learning_rate": 0.00014157969300354829, "loss": 1.4824, "step": 22490 }, { "epoch": 0.2922602872119069, "grad_norm": 0.4293728172779083, "learning_rate": 0.00014157709354163688, "loss": 1.5876, "step": 22491 }, { "epoch": 0.2922732817558228, "grad_norm": 0.4066116213798523, "learning_rate": 0.0001415744940797255, "loss": 1.3308, "step": 22492 }, { "epoch": 0.29228627629973863, "grad_norm": 0.40848809480667114, "learning_rate": 0.00014157189461781413, "loss": 1.4137, "step": 22493 }, { "epoch": 0.29229927084365454, "grad_norm": 0.32751092314720154, "learning_rate": 0.00014156929515590273, "loss": 1.3348, "step": 22494 }, { "epoch": 0.2923122653875704, "grad_norm": 0.4927367866039276, "learning_rate": 0.00014156669569399135, "loss": 1.4771, "step": 22495 }, { "epoch": 0.2923252599314863, "grad_norm": 0.5153440237045288, "learning_rate": 0.00014156409623207995, "loss": 1.5009, "step": 22496 }, { "epoch": 0.2923382544754021, "grad_norm": 0.3810153603553772, "learning_rate": 0.0001415614967701686, "loss": 1.4025, "step": 22497 }, { "epoch": 0.292351249019318, "grad_norm": 0.3840183913707733, "learning_rate": 0.0001415588973082572, "loss": 1.2577, "step": 22498 }, { "epoch": 0.29236424356323387, "grad_norm": 0.31107601523399353, "learning_rate": 0.0001415562978463458, "loss": 1.292, "step": 22499 }, { "epoch": 0.2923772381071498, "grad_norm": 0.5184291005134583, "learning_rate": 0.00014155369838443442, "loss": 1.5384, "step": 22500 }, { "epoch": 0.2923902326510656, "grad_norm": 0.4737551808357239, "learning_rate": 0.00014155109892252305, "loss": 1.5834, "step": 22501 }, { "epoch": 0.2924032271949815, "grad_norm": 0.47723016142845154, "learning_rate": 0.00014154849946061167, "loss": 1.4968, "step": 22502 }, { "epoch": 0.29241622173889736, "grad_norm": 0.4479424059391022, "learning_rate": 0.00014154589999870027, "loss": 1.3638, "step": 22503 }, { "epoch": 0.29242921628281326, "grad_norm": 0.4283653497695923, "learning_rate": 0.0001415433005367889, "loss": 1.3799, "step": 22504 }, { "epoch": 0.2924422108267291, "grad_norm": 0.4166092574596405, "learning_rate": 0.00014154070107487752, "loss": 1.4467, "step": 22505 }, { "epoch": 0.292455205370645, "grad_norm": 0.5127121210098267, "learning_rate": 0.00014153810161296611, "loss": 1.39, "step": 22506 }, { "epoch": 0.29246819991456086, "grad_norm": 0.4101192355155945, "learning_rate": 0.00014153550215105474, "loss": 1.2877, "step": 22507 }, { "epoch": 0.29248119445847676, "grad_norm": 0.431466668844223, "learning_rate": 0.00014153290268914336, "loss": 1.6306, "step": 22508 }, { "epoch": 0.2924941890023926, "grad_norm": 0.4160490930080414, "learning_rate": 0.000141530303227232, "loss": 1.5468, "step": 22509 }, { "epoch": 0.2925071835463085, "grad_norm": 0.3985010087490082, "learning_rate": 0.00014152770376532059, "loss": 1.2803, "step": 22510 }, { "epoch": 0.29252017809022435, "grad_norm": 0.4404599070549011, "learning_rate": 0.00014152510430340918, "loss": 1.5743, "step": 22511 }, { "epoch": 0.29253317263414025, "grad_norm": 0.37941256165504456, "learning_rate": 0.00014152250484149783, "loss": 1.5251, "step": 22512 }, { "epoch": 0.2925461671780561, "grad_norm": 0.4095732271671295, "learning_rate": 0.00014151990537958643, "loss": 1.4543, "step": 22513 }, { "epoch": 0.292559161721972, "grad_norm": 0.39623120427131653, "learning_rate": 0.00014151730591767506, "loss": 1.5119, "step": 22514 }, { "epoch": 0.29257215626588784, "grad_norm": 0.45352184772491455, "learning_rate": 0.00014151470645576365, "loss": 1.5995, "step": 22515 }, { "epoch": 0.29258515080980374, "grad_norm": 0.34750935435295105, "learning_rate": 0.00014151210699385228, "loss": 1.1497, "step": 22516 }, { "epoch": 0.2925981453537196, "grad_norm": 0.4564761519432068, "learning_rate": 0.0001415095075319409, "loss": 1.4583, "step": 22517 }, { "epoch": 0.2926111398976355, "grad_norm": 0.40920811891555786, "learning_rate": 0.0001415069080700295, "loss": 1.3728, "step": 22518 }, { "epoch": 0.29262413444155133, "grad_norm": 0.42259570956230164, "learning_rate": 0.00014150430860811812, "loss": 1.3961, "step": 22519 }, { "epoch": 0.29263712898546723, "grad_norm": 0.33282750844955444, "learning_rate": 0.00014150170914620675, "loss": 1.3534, "step": 22520 }, { "epoch": 0.2926501235293831, "grad_norm": 0.4240438938140869, "learning_rate": 0.00014149910968429537, "loss": 1.3855, "step": 22521 }, { "epoch": 0.292663118073299, "grad_norm": 0.3609941601753235, "learning_rate": 0.00014149651022238397, "loss": 1.451, "step": 22522 }, { "epoch": 0.2926761126172148, "grad_norm": 0.29714062809944153, "learning_rate": 0.00014149391076047257, "loss": 1.2573, "step": 22523 }, { "epoch": 0.2926891071611307, "grad_norm": 0.3870581388473511, "learning_rate": 0.00014149131129856122, "loss": 1.4361, "step": 22524 }, { "epoch": 0.29270210170504657, "grad_norm": 0.37668898701667786, "learning_rate": 0.00014148871183664982, "loss": 1.3807, "step": 22525 }, { "epoch": 0.29271509624896247, "grad_norm": 0.43750301003456116, "learning_rate": 0.00014148611237473844, "loss": 1.5885, "step": 22526 }, { "epoch": 0.2927280907928783, "grad_norm": 0.36418819427490234, "learning_rate": 0.00014148351291282704, "loss": 1.3241, "step": 22527 }, { "epoch": 0.2927410853367942, "grad_norm": 0.5421130061149597, "learning_rate": 0.00014148091345091566, "loss": 1.5953, "step": 22528 }, { "epoch": 0.29275407988071006, "grad_norm": 0.4437992572784424, "learning_rate": 0.0001414783139890043, "loss": 1.4474, "step": 22529 }, { "epoch": 0.29276707442462596, "grad_norm": 0.48086389899253845, "learning_rate": 0.00014147571452709289, "loss": 1.3507, "step": 22530 }, { "epoch": 0.2927800689685418, "grad_norm": 0.5275854468345642, "learning_rate": 0.0001414731150651815, "loss": 1.4328, "step": 22531 }, { "epoch": 0.2927930635124577, "grad_norm": 0.35602810978889465, "learning_rate": 0.00014147051560327013, "loss": 1.3794, "step": 22532 }, { "epoch": 0.29280605805637355, "grad_norm": 0.3511948585510254, "learning_rate": 0.00014146791614135876, "loss": 1.325, "step": 22533 }, { "epoch": 0.29281905260028945, "grad_norm": 0.35132676362991333, "learning_rate": 0.00014146531667944736, "loss": 1.6158, "step": 22534 }, { "epoch": 0.29283204714420535, "grad_norm": 0.24422088265419006, "learning_rate": 0.00014146271721753598, "loss": 1.1416, "step": 22535 }, { "epoch": 0.2928450416881212, "grad_norm": 0.4303850829601288, "learning_rate": 0.0001414601177556246, "loss": 1.4665, "step": 22536 }, { "epoch": 0.2928580362320371, "grad_norm": 0.44975119829177856, "learning_rate": 0.0001414575182937132, "loss": 1.5136, "step": 22537 }, { "epoch": 0.29287103077595295, "grad_norm": 0.3508610725402832, "learning_rate": 0.00014145491883180183, "loss": 1.3161, "step": 22538 }, { "epoch": 0.29288402531986885, "grad_norm": 0.46991166472435, "learning_rate": 0.00014145231936989042, "loss": 1.4926, "step": 22539 }, { "epoch": 0.2928970198637847, "grad_norm": 0.5001912117004395, "learning_rate": 0.00014144971990797905, "loss": 1.3944, "step": 22540 }, { "epoch": 0.2929100144077006, "grad_norm": 0.4130523204803467, "learning_rate": 0.00014144712044606767, "loss": 1.5294, "step": 22541 }, { "epoch": 0.29292300895161644, "grad_norm": 0.4101564288139343, "learning_rate": 0.00014144452098415627, "loss": 1.5661, "step": 22542 }, { "epoch": 0.29293600349553234, "grad_norm": 0.2994188070297241, "learning_rate": 0.0001414419215222449, "loss": 1.3108, "step": 22543 }, { "epoch": 0.2929489980394482, "grad_norm": 0.3450506031513214, "learning_rate": 0.00014143932206033352, "loss": 1.2291, "step": 22544 }, { "epoch": 0.2929619925833641, "grad_norm": 0.43679311871528625, "learning_rate": 0.00014143672259842214, "loss": 1.3722, "step": 22545 }, { "epoch": 0.29297498712727993, "grad_norm": 0.3442310690879822, "learning_rate": 0.00014143412313651074, "loss": 1.2909, "step": 22546 }, { "epoch": 0.29298798167119583, "grad_norm": 0.3955119550228119, "learning_rate": 0.00014143152367459937, "loss": 1.5496, "step": 22547 }, { "epoch": 0.2930009762151117, "grad_norm": 0.28952857851982117, "learning_rate": 0.000141428924212688, "loss": 1.4617, "step": 22548 }, { "epoch": 0.2930139707590276, "grad_norm": 0.44652268290519714, "learning_rate": 0.0001414263247507766, "loss": 1.4031, "step": 22549 }, { "epoch": 0.2930269653029434, "grad_norm": 0.38711878657341003, "learning_rate": 0.0001414237252888652, "loss": 1.4184, "step": 22550 }, { "epoch": 0.2930399598468593, "grad_norm": 0.43617695569992065, "learning_rate": 0.00014142112582695384, "loss": 1.4482, "step": 22551 }, { "epoch": 0.29305295439077517, "grad_norm": 0.32677868008613586, "learning_rate": 0.00014141852636504243, "loss": 1.568, "step": 22552 }, { "epoch": 0.29306594893469107, "grad_norm": 0.44942548871040344, "learning_rate": 0.00014141592690313106, "loss": 1.3152, "step": 22553 }, { "epoch": 0.2930789434786069, "grad_norm": 0.4771665036678314, "learning_rate": 0.00014141332744121966, "loss": 1.5968, "step": 22554 }, { "epoch": 0.2930919380225228, "grad_norm": 0.4449734687805176, "learning_rate": 0.0001414107279793083, "loss": 1.351, "step": 22555 }, { "epoch": 0.29310493256643866, "grad_norm": 0.37848758697509766, "learning_rate": 0.0001414081285173969, "loss": 1.6488, "step": 22556 }, { "epoch": 0.29311792711035456, "grad_norm": 0.39109450578689575, "learning_rate": 0.00014140552905548553, "loss": 1.2372, "step": 22557 }, { "epoch": 0.2931309216542704, "grad_norm": 0.317754864692688, "learning_rate": 0.00014140292959357413, "loss": 1.2596, "step": 22558 }, { "epoch": 0.2931439161981863, "grad_norm": 0.4028441905975342, "learning_rate": 0.00014140033013166275, "loss": 1.2158, "step": 22559 }, { "epoch": 0.29315691074210215, "grad_norm": 0.36479660868644714, "learning_rate": 0.00014139773066975138, "loss": 1.3898, "step": 22560 }, { "epoch": 0.29316990528601805, "grad_norm": 0.39816340804100037, "learning_rate": 0.00014139513120783997, "loss": 1.3612, "step": 22561 }, { "epoch": 0.2931828998299339, "grad_norm": 0.393535852432251, "learning_rate": 0.0001413925317459286, "loss": 1.5222, "step": 22562 }, { "epoch": 0.2931958943738498, "grad_norm": 0.3182189166545868, "learning_rate": 0.00014138993228401722, "loss": 1.431, "step": 22563 }, { "epoch": 0.29320888891776564, "grad_norm": 0.37469854950904846, "learning_rate": 0.00014138733282210585, "loss": 1.3974, "step": 22564 }, { "epoch": 0.29322188346168154, "grad_norm": 0.3932574689388275, "learning_rate": 0.00014138473336019444, "loss": 1.3816, "step": 22565 }, { "epoch": 0.2932348780055974, "grad_norm": 0.4263599216938019, "learning_rate": 0.00014138213389828304, "loss": 1.6606, "step": 22566 }, { "epoch": 0.2932478725495133, "grad_norm": 0.4822588860988617, "learning_rate": 0.0001413795344363717, "loss": 1.4894, "step": 22567 }, { "epoch": 0.29326086709342913, "grad_norm": 0.3361617624759674, "learning_rate": 0.0001413769349744603, "loss": 1.3536, "step": 22568 }, { "epoch": 0.29327386163734503, "grad_norm": 0.39120563864707947, "learning_rate": 0.00014137433551254891, "loss": 1.299, "step": 22569 }, { "epoch": 0.2932868561812609, "grad_norm": 0.4127618670463562, "learning_rate": 0.0001413717360506375, "loss": 1.6053, "step": 22570 }, { "epoch": 0.2932998507251768, "grad_norm": 0.3370152711868286, "learning_rate": 0.00014136913658872614, "loss": 1.4671, "step": 22571 }, { "epoch": 0.2933128452690926, "grad_norm": 0.49673357605934143, "learning_rate": 0.00014136653712681476, "loss": 1.425, "step": 22572 }, { "epoch": 0.2933258398130085, "grad_norm": 0.3558320701122284, "learning_rate": 0.00014136393766490336, "loss": 1.2506, "step": 22573 }, { "epoch": 0.29333883435692437, "grad_norm": 0.44406816363334656, "learning_rate": 0.00014136133820299198, "loss": 1.322, "step": 22574 }, { "epoch": 0.2933518289008403, "grad_norm": 0.407133549451828, "learning_rate": 0.0001413587387410806, "loss": 1.5989, "step": 22575 }, { "epoch": 0.2933648234447561, "grad_norm": 0.4633191227912903, "learning_rate": 0.00014135613927916923, "loss": 1.4413, "step": 22576 }, { "epoch": 0.293377817988672, "grad_norm": 0.4019451439380646, "learning_rate": 0.00014135353981725783, "loss": 1.4451, "step": 22577 }, { "epoch": 0.29339081253258786, "grad_norm": 0.3871854245662689, "learning_rate": 0.00014135094035534643, "loss": 1.5382, "step": 22578 }, { "epoch": 0.29340380707650376, "grad_norm": 0.42469462752342224, "learning_rate": 0.00014134834089343508, "loss": 1.359, "step": 22579 }, { "epoch": 0.2934168016204196, "grad_norm": 0.4095105826854706, "learning_rate": 0.00014134574143152368, "loss": 1.1768, "step": 22580 }, { "epoch": 0.2934297961643355, "grad_norm": 0.2695428729057312, "learning_rate": 0.0001413431419696123, "loss": 1.5073, "step": 22581 }, { "epoch": 0.29344279070825136, "grad_norm": 0.41797783970832825, "learning_rate": 0.00014134054250770092, "loss": 1.4181, "step": 22582 }, { "epoch": 0.29345578525216726, "grad_norm": 0.4157312214374542, "learning_rate": 0.00014133794304578952, "loss": 1.4828, "step": 22583 }, { "epoch": 0.2934687797960831, "grad_norm": 0.3593350350856781, "learning_rate": 0.00014133534358387815, "loss": 1.2943, "step": 22584 }, { "epoch": 0.293481774339999, "grad_norm": 0.39659184217453003, "learning_rate": 0.00014133274412196674, "loss": 1.51, "step": 22585 }, { "epoch": 0.29349476888391485, "grad_norm": 0.4719579219818115, "learning_rate": 0.0001413301446600554, "loss": 1.393, "step": 22586 }, { "epoch": 0.29350776342783075, "grad_norm": 0.3489084541797638, "learning_rate": 0.000141327545198144, "loss": 1.2905, "step": 22587 }, { "epoch": 0.2935207579717466, "grad_norm": 0.3658064305782318, "learning_rate": 0.00014132494573623262, "loss": 1.4529, "step": 22588 }, { "epoch": 0.2935337525156625, "grad_norm": 0.41651803255081177, "learning_rate": 0.00014132234627432121, "loss": 1.4316, "step": 22589 }, { "epoch": 0.29354674705957834, "grad_norm": 0.33188119530677795, "learning_rate": 0.00014131974681240984, "loss": 1.4201, "step": 22590 }, { "epoch": 0.29355974160349424, "grad_norm": 0.431319922208786, "learning_rate": 0.00014131714735049846, "loss": 1.3938, "step": 22591 }, { "epoch": 0.2935727361474101, "grad_norm": 0.3691128194332123, "learning_rate": 0.00014131454788858706, "loss": 1.3736, "step": 22592 }, { "epoch": 0.293585730691326, "grad_norm": 0.3895038962364197, "learning_rate": 0.00014131194842667569, "loss": 1.3666, "step": 22593 }, { "epoch": 0.29359872523524183, "grad_norm": 0.3395620882511139, "learning_rate": 0.0001413093489647643, "loss": 1.371, "step": 22594 }, { "epoch": 0.29361171977915773, "grad_norm": 0.45897865295410156, "learning_rate": 0.0001413067495028529, "loss": 1.4082, "step": 22595 }, { "epoch": 0.2936247143230736, "grad_norm": 0.4305770695209503, "learning_rate": 0.00014130415004094153, "loss": 1.3365, "step": 22596 }, { "epoch": 0.2936377088669895, "grad_norm": 0.45783206820487976, "learning_rate": 0.00014130155057903013, "loss": 1.2981, "step": 22597 }, { "epoch": 0.2936507034109053, "grad_norm": 0.4359472990036011, "learning_rate": 0.00014129895111711878, "loss": 1.5193, "step": 22598 }, { "epoch": 0.2936636979548212, "grad_norm": 0.4860736131668091, "learning_rate": 0.00014129635165520738, "loss": 1.4043, "step": 22599 }, { "epoch": 0.29367669249873707, "grad_norm": 0.40545913577079773, "learning_rate": 0.000141293752193296, "loss": 1.4422, "step": 22600 }, { "epoch": 0.29368968704265297, "grad_norm": 0.452754944562912, "learning_rate": 0.0001412911527313846, "loss": 1.3803, "step": 22601 }, { "epoch": 0.2937026815865688, "grad_norm": 0.40198904275894165, "learning_rate": 0.00014128855326947322, "loss": 1.453, "step": 22602 }, { "epoch": 0.2937156761304847, "grad_norm": 0.38908687233924866, "learning_rate": 0.00014128595380756185, "loss": 1.2037, "step": 22603 }, { "epoch": 0.29372867067440056, "grad_norm": 0.3717786371707916, "learning_rate": 0.00014128335434565045, "loss": 1.4296, "step": 22604 }, { "epoch": 0.29374166521831646, "grad_norm": 0.31460779905319214, "learning_rate": 0.00014128075488373907, "loss": 1.3849, "step": 22605 }, { "epoch": 0.2937546597622323, "grad_norm": 0.4288352429866791, "learning_rate": 0.0001412781554218277, "loss": 1.2045, "step": 22606 }, { "epoch": 0.2937676543061482, "grad_norm": 0.343243807554245, "learning_rate": 0.0001412755559599163, "loss": 1.4274, "step": 22607 }, { "epoch": 0.29378064885006405, "grad_norm": 0.4744374752044678, "learning_rate": 0.00014127295649800492, "loss": 1.3088, "step": 22608 }, { "epoch": 0.29379364339397995, "grad_norm": 0.3874797523021698, "learning_rate": 0.00014127035703609351, "loss": 1.5031, "step": 22609 }, { "epoch": 0.29380663793789585, "grad_norm": 0.4258202612400055, "learning_rate": 0.00014126775757418217, "loss": 1.3359, "step": 22610 }, { "epoch": 0.2938196324818117, "grad_norm": 0.33291882276535034, "learning_rate": 0.00014126515811227076, "loss": 1.392, "step": 22611 }, { "epoch": 0.2938326270257276, "grad_norm": 0.38168609142303467, "learning_rate": 0.0001412625586503594, "loss": 1.2818, "step": 22612 }, { "epoch": 0.29384562156964344, "grad_norm": 0.38539960980415344, "learning_rate": 0.00014125995918844799, "loss": 1.2209, "step": 22613 }, { "epoch": 0.29385861611355935, "grad_norm": 0.46443766355514526, "learning_rate": 0.0001412573597265366, "loss": 1.3587, "step": 22614 }, { "epoch": 0.2938716106574752, "grad_norm": 0.47241437435150146, "learning_rate": 0.00014125476026462523, "loss": 1.4568, "step": 22615 }, { "epoch": 0.2938846052013911, "grad_norm": 0.43388858437538147, "learning_rate": 0.00014125216080271383, "loss": 1.3528, "step": 22616 }, { "epoch": 0.29389759974530694, "grad_norm": 0.42175722122192383, "learning_rate": 0.00014124956134080246, "loss": 1.367, "step": 22617 }, { "epoch": 0.29391059428922284, "grad_norm": 0.3789048492908478, "learning_rate": 0.00014124696187889108, "loss": 1.5094, "step": 22618 }, { "epoch": 0.2939235888331387, "grad_norm": 0.44789692759513855, "learning_rate": 0.0001412443624169797, "loss": 1.5487, "step": 22619 }, { "epoch": 0.2939365833770546, "grad_norm": 0.4851679801940918, "learning_rate": 0.0001412417629550683, "loss": 1.3995, "step": 22620 }, { "epoch": 0.29394957792097043, "grad_norm": 0.4190042316913605, "learning_rate": 0.00014123916349315693, "loss": 1.3758, "step": 22621 }, { "epoch": 0.29396257246488633, "grad_norm": 0.4972772002220154, "learning_rate": 0.00014123656403124555, "loss": 1.3953, "step": 22622 }, { "epoch": 0.2939755670088022, "grad_norm": 0.37061765789985657, "learning_rate": 0.00014123396456933415, "loss": 1.4499, "step": 22623 }, { "epoch": 0.2939885615527181, "grad_norm": 0.45653384923934937, "learning_rate": 0.00014123136510742277, "loss": 1.4304, "step": 22624 }, { "epoch": 0.2940015560966339, "grad_norm": 0.685883641242981, "learning_rate": 0.0001412287656455114, "loss": 1.2556, "step": 22625 }, { "epoch": 0.2940145506405498, "grad_norm": 0.3788043260574341, "learning_rate": 0.0001412261661836, "loss": 1.5572, "step": 22626 }, { "epoch": 0.29402754518446567, "grad_norm": 0.33320727944374084, "learning_rate": 0.00014122356672168862, "loss": 1.4471, "step": 22627 }, { "epoch": 0.29404053972838157, "grad_norm": 0.41008460521698, "learning_rate": 0.00014122096725977722, "loss": 1.5881, "step": 22628 }, { "epoch": 0.2940535342722974, "grad_norm": 0.28735849261283875, "learning_rate": 0.00014121836779786587, "loss": 1.4514, "step": 22629 }, { "epoch": 0.2940665288162133, "grad_norm": 0.3744684159755707, "learning_rate": 0.00014121576833595447, "loss": 1.4418, "step": 22630 }, { "epoch": 0.29407952336012916, "grad_norm": 0.3476904332637787, "learning_rate": 0.0001412131688740431, "loss": 1.5407, "step": 22631 }, { "epoch": 0.29409251790404506, "grad_norm": 0.5504875183105469, "learning_rate": 0.0001412105694121317, "loss": 1.4748, "step": 22632 }, { "epoch": 0.2941055124479609, "grad_norm": 0.5731304287910461, "learning_rate": 0.0001412079699502203, "loss": 1.3007, "step": 22633 }, { "epoch": 0.2941185069918768, "grad_norm": 0.4084780514240265, "learning_rate": 0.00014120537048830894, "loss": 1.3574, "step": 22634 }, { "epoch": 0.29413150153579265, "grad_norm": 0.4878765344619751, "learning_rate": 0.00014120277102639753, "loss": 1.3662, "step": 22635 }, { "epoch": 0.29414449607970855, "grad_norm": 0.38960886001586914, "learning_rate": 0.00014120017156448616, "loss": 1.5241, "step": 22636 }, { "epoch": 0.2941574906236244, "grad_norm": 0.4260435402393341, "learning_rate": 0.00014119757210257478, "loss": 1.3908, "step": 22637 }, { "epoch": 0.2941704851675403, "grad_norm": 0.41243478655815125, "learning_rate": 0.00014119497264066338, "loss": 1.47, "step": 22638 }, { "epoch": 0.29418347971145614, "grad_norm": 0.4653548002243042, "learning_rate": 0.000141192373178752, "loss": 1.5074, "step": 22639 }, { "epoch": 0.29419647425537204, "grad_norm": 0.48945680260658264, "learning_rate": 0.0001411897737168406, "loss": 1.3663, "step": 22640 }, { "epoch": 0.2942094687992879, "grad_norm": 0.40850454568862915, "learning_rate": 0.00014118717425492925, "loss": 1.5302, "step": 22641 }, { "epoch": 0.2942224633432038, "grad_norm": 0.48850518465042114, "learning_rate": 0.00014118457479301785, "loss": 1.406, "step": 22642 }, { "epoch": 0.29423545788711963, "grad_norm": 0.4024600684642792, "learning_rate": 0.00014118197533110648, "loss": 1.3428, "step": 22643 }, { "epoch": 0.29424845243103553, "grad_norm": 0.47545182704925537, "learning_rate": 0.00014117937586919507, "loss": 1.4902, "step": 22644 }, { "epoch": 0.2942614469749514, "grad_norm": 0.44334152340888977, "learning_rate": 0.0001411767764072837, "loss": 1.3876, "step": 22645 }, { "epoch": 0.2942744415188673, "grad_norm": 0.2862689197063446, "learning_rate": 0.00014117417694537232, "loss": 1.4468, "step": 22646 }, { "epoch": 0.2942874360627831, "grad_norm": 0.42577341198921204, "learning_rate": 0.00014117157748346092, "loss": 1.3768, "step": 22647 }, { "epoch": 0.294300430606699, "grad_norm": 0.41813918948173523, "learning_rate": 0.00014116897802154954, "loss": 1.5569, "step": 22648 }, { "epoch": 0.29431342515061487, "grad_norm": 0.4774334132671356, "learning_rate": 0.00014116637855963817, "loss": 1.3415, "step": 22649 }, { "epoch": 0.29432641969453077, "grad_norm": 0.42769134044647217, "learning_rate": 0.00014116377909772677, "loss": 1.2718, "step": 22650 }, { "epoch": 0.2943394142384466, "grad_norm": 0.45138004422187805, "learning_rate": 0.0001411611796358154, "loss": 1.4508, "step": 22651 }, { "epoch": 0.2943524087823625, "grad_norm": 0.4319975972175598, "learning_rate": 0.000141158580173904, "loss": 1.3672, "step": 22652 }, { "epoch": 0.29436540332627836, "grad_norm": 0.38144925236701965, "learning_rate": 0.00014115598071199264, "loss": 1.3791, "step": 22653 }, { "epoch": 0.29437839787019426, "grad_norm": 0.41073185205459595, "learning_rate": 0.00014115338125008124, "loss": 1.5162, "step": 22654 }, { "epoch": 0.2943913924141101, "grad_norm": 0.35939306020736694, "learning_rate": 0.00014115078178816986, "loss": 1.5065, "step": 22655 }, { "epoch": 0.294404386958026, "grad_norm": 0.407172828912735, "learning_rate": 0.00014114818232625846, "loss": 1.3363, "step": 22656 }, { "epoch": 0.29441738150194185, "grad_norm": 0.3219147324562073, "learning_rate": 0.00014114558286434708, "loss": 1.2946, "step": 22657 }, { "epoch": 0.29443037604585776, "grad_norm": 0.3669741451740265, "learning_rate": 0.0001411429834024357, "loss": 1.409, "step": 22658 }, { "epoch": 0.2944433705897736, "grad_norm": 0.4027775228023529, "learning_rate": 0.0001411403839405243, "loss": 1.2179, "step": 22659 }, { "epoch": 0.2944563651336895, "grad_norm": 0.31190550327301025, "learning_rate": 0.00014113778447861296, "loss": 1.2808, "step": 22660 }, { "epoch": 0.29446935967760535, "grad_norm": 0.28889214992523193, "learning_rate": 0.00014113518501670155, "loss": 1.4286, "step": 22661 }, { "epoch": 0.29448235422152125, "grad_norm": 0.38850679993629456, "learning_rate": 0.00014113258555479015, "loss": 1.2951, "step": 22662 }, { "epoch": 0.2944953487654371, "grad_norm": 0.41878315806388855, "learning_rate": 0.00014112998609287878, "loss": 1.469, "step": 22663 }, { "epoch": 0.294508343309353, "grad_norm": 0.4294496476650238, "learning_rate": 0.0001411273866309674, "loss": 1.4834, "step": 22664 }, { "epoch": 0.29452133785326884, "grad_norm": 0.42085373401641846, "learning_rate": 0.00014112478716905603, "loss": 1.416, "step": 22665 }, { "epoch": 0.29453433239718474, "grad_norm": 0.46573328971862793, "learning_rate": 0.00014112218770714462, "loss": 1.4659, "step": 22666 }, { "epoch": 0.2945473269411006, "grad_norm": 0.3660159409046173, "learning_rate": 0.00014111958824523325, "loss": 1.318, "step": 22667 }, { "epoch": 0.2945603214850165, "grad_norm": 0.37564074993133545, "learning_rate": 0.00014111698878332187, "loss": 1.3054, "step": 22668 }, { "epoch": 0.29457331602893233, "grad_norm": 0.46375614404678345, "learning_rate": 0.00014111438932141047, "loss": 1.6232, "step": 22669 }, { "epoch": 0.29458631057284823, "grad_norm": 0.3933001160621643, "learning_rate": 0.0001411117898594991, "loss": 1.4715, "step": 22670 }, { "epoch": 0.2945993051167641, "grad_norm": 0.4217098653316498, "learning_rate": 0.0001411091903975877, "loss": 1.6454, "step": 22671 }, { "epoch": 0.29461229966068, "grad_norm": 0.34935858845710754, "learning_rate": 0.00014110659093567634, "loss": 1.402, "step": 22672 }, { "epoch": 0.2946252942045958, "grad_norm": 0.4610530734062195, "learning_rate": 0.00014110399147376494, "loss": 1.4101, "step": 22673 }, { "epoch": 0.2946382887485117, "grad_norm": 0.3640984296798706, "learning_rate": 0.00014110139201185354, "loss": 1.3667, "step": 22674 }, { "epoch": 0.29465128329242757, "grad_norm": 0.4347236156463623, "learning_rate": 0.00014109879254994216, "loss": 1.4123, "step": 22675 }, { "epoch": 0.29466427783634347, "grad_norm": 0.3800946772098541, "learning_rate": 0.00014109619308803079, "loss": 1.2625, "step": 22676 }, { "epoch": 0.2946772723802593, "grad_norm": 0.3644084930419922, "learning_rate": 0.0001410935936261194, "loss": 1.3465, "step": 22677 }, { "epoch": 0.2946902669241752, "grad_norm": 0.4131089448928833, "learning_rate": 0.000141090994164208, "loss": 1.4395, "step": 22678 }, { "epoch": 0.29470326146809106, "grad_norm": 0.4133332371711731, "learning_rate": 0.00014108839470229663, "loss": 1.2441, "step": 22679 }, { "epoch": 0.29471625601200696, "grad_norm": 0.38967373967170715, "learning_rate": 0.00014108579524038526, "loss": 1.5048, "step": 22680 }, { "epoch": 0.2947292505559228, "grad_norm": 0.38563454151153564, "learning_rate": 0.00014108319577847385, "loss": 1.271, "step": 22681 }, { "epoch": 0.2947422450998387, "grad_norm": 0.4786517918109894, "learning_rate": 0.00014108059631656248, "loss": 1.3529, "step": 22682 }, { "epoch": 0.29475523964375455, "grad_norm": 0.3604390025138855, "learning_rate": 0.00014107799685465108, "loss": 1.5334, "step": 22683 }, { "epoch": 0.29476823418767045, "grad_norm": 0.4230327904224396, "learning_rate": 0.00014107539739273973, "loss": 1.3628, "step": 22684 }, { "epoch": 0.2947812287315863, "grad_norm": 0.4077970087528229, "learning_rate": 0.00014107279793082833, "loss": 1.2496, "step": 22685 }, { "epoch": 0.2947942232755022, "grad_norm": 0.48415517807006836, "learning_rate": 0.00014107019846891695, "loss": 1.5605, "step": 22686 }, { "epoch": 0.2948072178194181, "grad_norm": 0.40656521916389465, "learning_rate": 0.00014106759900700555, "loss": 1.3913, "step": 22687 }, { "epoch": 0.29482021236333394, "grad_norm": 0.47370409965515137, "learning_rate": 0.00014106499954509417, "loss": 1.39, "step": 22688 }, { "epoch": 0.29483320690724985, "grad_norm": 0.38415780663490295, "learning_rate": 0.0001410624000831828, "loss": 1.4926, "step": 22689 }, { "epoch": 0.2948462014511657, "grad_norm": 0.3729836940765381, "learning_rate": 0.0001410598006212714, "loss": 1.4634, "step": 22690 }, { "epoch": 0.2948591959950816, "grad_norm": 0.5336193442344666, "learning_rate": 0.00014105720115936002, "loss": 1.5676, "step": 22691 }, { "epoch": 0.29487219053899744, "grad_norm": 0.3772420585155487, "learning_rate": 0.00014105460169744864, "loss": 1.5012, "step": 22692 }, { "epoch": 0.29488518508291334, "grad_norm": 0.4394102394580841, "learning_rate": 0.00014105200223553724, "loss": 1.3906, "step": 22693 }, { "epoch": 0.2948981796268292, "grad_norm": 0.4405618906021118, "learning_rate": 0.00014104940277362586, "loss": 1.3566, "step": 22694 }, { "epoch": 0.2949111741707451, "grad_norm": 0.4179721474647522, "learning_rate": 0.0001410468033117145, "loss": 1.3191, "step": 22695 }, { "epoch": 0.29492416871466093, "grad_norm": 0.3888811767101288, "learning_rate": 0.0001410442038498031, "loss": 1.6286, "step": 22696 }, { "epoch": 0.29493716325857683, "grad_norm": 0.41354015469551086, "learning_rate": 0.0001410416043878917, "loss": 1.2846, "step": 22697 }, { "epoch": 0.2949501578024927, "grad_norm": 0.3298114538192749, "learning_rate": 0.00014103900492598033, "loss": 1.246, "step": 22698 }, { "epoch": 0.2949631523464086, "grad_norm": 0.4141797423362732, "learning_rate": 0.00014103640546406896, "loss": 1.1815, "step": 22699 }, { "epoch": 0.2949761468903244, "grad_norm": 0.41696545481681824, "learning_rate": 0.00014103380600215756, "loss": 1.4957, "step": 22700 }, { "epoch": 0.2949891414342403, "grad_norm": 0.3861754536628723, "learning_rate": 0.00014103120654024618, "loss": 1.3783, "step": 22701 }, { "epoch": 0.29500213597815617, "grad_norm": 0.4896663427352905, "learning_rate": 0.00014102860707833478, "loss": 1.3522, "step": 22702 }, { "epoch": 0.29501513052207207, "grad_norm": 0.3898656368255615, "learning_rate": 0.00014102600761642343, "loss": 1.3368, "step": 22703 }, { "epoch": 0.2950281250659879, "grad_norm": 0.34179216623306274, "learning_rate": 0.00014102340815451203, "loss": 1.3682, "step": 22704 }, { "epoch": 0.2950411196099038, "grad_norm": 0.44247201085090637, "learning_rate": 0.00014102080869260062, "loss": 1.4201, "step": 22705 }, { "epoch": 0.29505411415381966, "grad_norm": 0.43878957629203796, "learning_rate": 0.00014101820923068925, "loss": 1.3786, "step": 22706 }, { "epoch": 0.29506710869773556, "grad_norm": 0.4125325381755829, "learning_rate": 0.00014101560976877787, "loss": 1.4007, "step": 22707 }, { "epoch": 0.2950801032416514, "grad_norm": 0.43359920382499695, "learning_rate": 0.0001410130103068665, "loss": 1.5341, "step": 22708 }, { "epoch": 0.2950930977855673, "grad_norm": 0.45451676845550537, "learning_rate": 0.0001410104108449551, "loss": 1.4745, "step": 22709 }, { "epoch": 0.29510609232948315, "grad_norm": 0.37634754180908203, "learning_rate": 0.00014100781138304372, "loss": 1.4869, "step": 22710 }, { "epoch": 0.29511908687339905, "grad_norm": 0.5383051037788391, "learning_rate": 0.00014100521192113234, "loss": 1.4963, "step": 22711 }, { "epoch": 0.2951320814173149, "grad_norm": 0.3083975613117218, "learning_rate": 0.00014100261245922094, "loss": 1.08, "step": 22712 }, { "epoch": 0.2951450759612308, "grad_norm": 0.3205197751522064, "learning_rate": 0.00014100001299730957, "loss": 1.2795, "step": 22713 }, { "epoch": 0.29515807050514664, "grad_norm": 0.3008273243904114, "learning_rate": 0.00014099741353539816, "loss": 1.5117, "step": 22714 }, { "epoch": 0.29517106504906254, "grad_norm": 0.3480885922908783, "learning_rate": 0.00014099481407348682, "loss": 1.3758, "step": 22715 }, { "epoch": 0.2951840595929784, "grad_norm": 0.3936046361923218, "learning_rate": 0.0001409922146115754, "loss": 1.3568, "step": 22716 }, { "epoch": 0.2951970541368943, "grad_norm": 0.45173177123069763, "learning_rate": 0.000140989615149664, "loss": 1.5368, "step": 22717 }, { "epoch": 0.29521004868081013, "grad_norm": 0.47739657759666443, "learning_rate": 0.00014098701568775263, "loss": 1.4721, "step": 22718 }, { "epoch": 0.29522304322472603, "grad_norm": 0.3324936628341675, "learning_rate": 0.00014098441622584126, "loss": 1.5117, "step": 22719 }, { "epoch": 0.2952360377686419, "grad_norm": 0.36412155628204346, "learning_rate": 0.00014098181676392988, "loss": 1.4161, "step": 22720 }, { "epoch": 0.2952490323125578, "grad_norm": 0.4264192581176758, "learning_rate": 0.00014097921730201848, "loss": 1.3919, "step": 22721 }, { "epoch": 0.2952620268564736, "grad_norm": 0.37785351276397705, "learning_rate": 0.0001409766178401071, "loss": 1.4503, "step": 22722 }, { "epoch": 0.2952750214003895, "grad_norm": 0.445453941822052, "learning_rate": 0.00014097401837819573, "loss": 1.5297, "step": 22723 }, { "epoch": 0.29528801594430537, "grad_norm": 0.4093923270702362, "learning_rate": 0.00014097141891628433, "loss": 1.3703, "step": 22724 }, { "epoch": 0.29530101048822127, "grad_norm": 0.4471838176250458, "learning_rate": 0.00014096881945437295, "loss": 1.4905, "step": 22725 }, { "epoch": 0.2953140050321371, "grad_norm": 0.3949466049671173, "learning_rate": 0.00014096621999246155, "loss": 1.5135, "step": 22726 }, { "epoch": 0.295326999576053, "grad_norm": 0.4689790606498718, "learning_rate": 0.0001409636205305502, "loss": 1.5711, "step": 22727 }, { "epoch": 0.29533999411996886, "grad_norm": 0.433599054813385, "learning_rate": 0.0001409610210686388, "loss": 1.6089, "step": 22728 }, { "epoch": 0.29535298866388476, "grad_norm": 0.3340052664279938, "learning_rate": 0.0001409584216067274, "loss": 1.4621, "step": 22729 }, { "epoch": 0.2953659832078006, "grad_norm": 0.3817252516746521, "learning_rate": 0.00014095582214481602, "loss": 1.3275, "step": 22730 }, { "epoch": 0.2953789777517165, "grad_norm": 0.3421393930912018, "learning_rate": 0.00014095322268290464, "loss": 1.4728, "step": 22731 }, { "epoch": 0.29539197229563235, "grad_norm": 0.30604031682014465, "learning_rate": 0.00014095062322099327, "loss": 1.0493, "step": 22732 }, { "epoch": 0.29540496683954826, "grad_norm": 0.3763476610183716, "learning_rate": 0.00014094802375908187, "loss": 1.5927, "step": 22733 }, { "epoch": 0.2954179613834641, "grad_norm": 0.3829968571662903, "learning_rate": 0.0001409454242971705, "loss": 1.5236, "step": 22734 }, { "epoch": 0.29543095592738, "grad_norm": 0.36088457703590393, "learning_rate": 0.00014094282483525912, "loss": 1.3462, "step": 22735 }, { "epoch": 0.29544395047129585, "grad_norm": 0.3757702112197876, "learning_rate": 0.0001409402253733477, "loss": 1.3773, "step": 22736 }, { "epoch": 0.29545694501521175, "grad_norm": 0.4474758207798004, "learning_rate": 0.00014093762591143634, "loss": 1.5072, "step": 22737 }, { "epoch": 0.2954699395591276, "grad_norm": 0.3275909423828125, "learning_rate": 0.00014093502644952496, "loss": 1.3505, "step": 22738 }, { "epoch": 0.2954829341030435, "grad_norm": 0.4505625367164612, "learning_rate": 0.00014093242698761359, "loss": 1.4056, "step": 22739 }, { "epoch": 0.29549592864695934, "grad_norm": 0.4183221161365509, "learning_rate": 0.00014092982752570218, "loss": 1.4081, "step": 22740 }, { "epoch": 0.29550892319087524, "grad_norm": 0.35911083221435547, "learning_rate": 0.0001409272280637908, "loss": 1.4545, "step": 22741 }, { "epoch": 0.2955219177347911, "grad_norm": 0.4382403790950775, "learning_rate": 0.00014092462860187943, "loss": 1.4295, "step": 22742 }, { "epoch": 0.295534912278707, "grad_norm": 0.4173792898654938, "learning_rate": 0.00014092202913996803, "loss": 1.2766, "step": 22743 }, { "epoch": 0.29554790682262283, "grad_norm": 0.39180731773376465, "learning_rate": 0.00014091942967805665, "loss": 1.5351, "step": 22744 }, { "epoch": 0.29556090136653873, "grad_norm": 0.5312597751617432, "learning_rate": 0.00014091683021614525, "loss": 1.4906, "step": 22745 }, { "epoch": 0.2955738959104546, "grad_norm": 0.4281651973724365, "learning_rate": 0.00014091423075423388, "loss": 1.4218, "step": 22746 }, { "epoch": 0.2955868904543705, "grad_norm": 0.3991905152797699, "learning_rate": 0.0001409116312923225, "loss": 1.46, "step": 22747 }, { "epoch": 0.2955998849982863, "grad_norm": 0.5015237331390381, "learning_rate": 0.0001409090318304111, "loss": 1.392, "step": 22748 }, { "epoch": 0.2956128795422022, "grad_norm": 0.45229706168174744, "learning_rate": 0.00014090643236849972, "loss": 1.3191, "step": 22749 }, { "epoch": 0.29562587408611807, "grad_norm": 0.4183344542980194, "learning_rate": 0.00014090383290658835, "loss": 1.538, "step": 22750 }, { "epoch": 0.29563886863003397, "grad_norm": 0.4537038505077362, "learning_rate": 0.00014090123344467697, "loss": 1.3393, "step": 22751 }, { "epoch": 0.2956518631739498, "grad_norm": 0.47749584913253784, "learning_rate": 0.00014089863398276557, "loss": 1.3506, "step": 22752 }, { "epoch": 0.2956648577178657, "grad_norm": 0.4244959354400635, "learning_rate": 0.0001408960345208542, "loss": 1.7011, "step": 22753 }, { "epoch": 0.29567785226178156, "grad_norm": 0.35170838236808777, "learning_rate": 0.00014089343505894282, "loss": 1.6055, "step": 22754 }, { "epoch": 0.29569084680569746, "grad_norm": 0.42715394496917725, "learning_rate": 0.00014089083559703142, "loss": 1.3151, "step": 22755 }, { "epoch": 0.2957038413496133, "grad_norm": 0.3868791460990906, "learning_rate": 0.00014088823613512004, "loss": 1.4112, "step": 22756 }, { "epoch": 0.2957168358935292, "grad_norm": 0.37718990445137024, "learning_rate": 0.00014088563667320864, "loss": 1.3493, "step": 22757 }, { "epoch": 0.29572983043744505, "grad_norm": 0.34347158670425415, "learning_rate": 0.00014088303721129726, "loss": 1.4015, "step": 22758 }, { "epoch": 0.29574282498136095, "grad_norm": 0.43162864446640015, "learning_rate": 0.00014088043774938589, "loss": 1.6092, "step": 22759 }, { "epoch": 0.2957558195252768, "grad_norm": 0.49372825026512146, "learning_rate": 0.00014087783828747448, "loss": 1.3606, "step": 22760 }, { "epoch": 0.2957688140691927, "grad_norm": 0.3937578797340393, "learning_rate": 0.0001408752388255631, "loss": 1.1894, "step": 22761 }, { "epoch": 0.29578180861310854, "grad_norm": 0.5224334597587585, "learning_rate": 0.00014087263936365173, "loss": 1.328, "step": 22762 }, { "epoch": 0.29579480315702444, "grad_norm": 0.3274579346179962, "learning_rate": 0.00014087003990174036, "loss": 1.3335, "step": 22763 }, { "epoch": 0.29580779770094034, "grad_norm": 0.37509703636169434, "learning_rate": 0.00014086744043982895, "loss": 1.3058, "step": 22764 }, { "epoch": 0.2958207922448562, "grad_norm": 0.4386065900325775, "learning_rate": 0.00014086484097791758, "loss": 1.4496, "step": 22765 }, { "epoch": 0.2958337867887721, "grad_norm": 0.4717961847782135, "learning_rate": 0.0001408622415160062, "loss": 1.5254, "step": 22766 }, { "epoch": 0.29584678133268794, "grad_norm": 0.36254382133483887, "learning_rate": 0.0001408596420540948, "loss": 1.2415, "step": 22767 }, { "epoch": 0.29585977587660384, "grad_norm": 0.36956173181533813, "learning_rate": 0.00014085704259218343, "loss": 1.2223, "step": 22768 }, { "epoch": 0.2958727704205197, "grad_norm": 0.4694001376628876, "learning_rate": 0.00014085444313027205, "loss": 1.7242, "step": 22769 }, { "epoch": 0.2958857649644356, "grad_norm": 0.4347313642501831, "learning_rate": 0.00014085184366836067, "loss": 1.5408, "step": 22770 }, { "epoch": 0.2958987595083514, "grad_norm": 0.3834713399410248, "learning_rate": 0.00014084924420644927, "loss": 1.3332, "step": 22771 }, { "epoch": 0.29591175405226733, "grad_norm": 0.38068875670433044, "learning_rate": 0.00014084664474453787, "loss": 1.407, "step": 22772 }, { "epoch": 0.2959247485961832, "grad_norm": 0.3845745623111725, "learning_rate": 0.00014084404528262652, "loss": 1.4291, "step": 22773 }, { "epoch": 0.2959377431400991, "grad_norm": 0.40543684363365173, "learning_rate": 0.00014084144582071512, "loss": 1.2186, "step": 22774 }, { "epoch": 0.2959507376840149, "grad_norm": 0.3033972680568695, "learning_rate": 0.00014083884635880374, "loss": 1.2849, "step": 22775 }, { "epoch": 0.2959637322279308, "grad_norm": 0.32294130325317383, "learning_rate": 0.00014083624689689234, "loss": 1.4622, "step": 22776 }, { "epoch": 0.29597672677184667, "grad_norm": 0.44553059339523315, "learning_rate": 0.00014083364743498096, "loss": 1.4221, "step": 22777 }, { "epoch": 0.29598972131576257, "grad_norm": 0.3012782037258148, "learning_rate": 0.0001408310479730696, "loss": 1.3275, "step": 22778 }, { "epoch": 0.2960027158596784, "grad_norm": 0.4275689125061035, "learning_rate": 0.00014082844851115819, "loss": 1.6086, "step": 22779 }, { "epoch": 0.2960157104035943, "grad_norm": 0.33903008699417114, "learning_rate": 0.0001408258490492468, "loss": 1.177, "step": 22780 }, { "epoch": 0.29602870494751016, "grad_norm": 0.5214288234710693, "learning_rate": 0.00014082324958733544, "loss": 1.3999, "step": 22781 }, { "epoch": 0.29604169949142606, "grad_norm": 0.4583309590816498, "learning_rate": 0.00014082065012542406, "loss": 1.5295, "step": 22782 }, { "epoch": 0.2960546940353419, "grad_norm": 0.39608946442604065, "learning_rate": 0.00014081805066351266, "loss": 1.2594, "step": 22783 }, { "epoch": 0.2960676885792578, "grad_norm": 0.3155381679534912, "learning_rate": 0.00014081545120160125, "loss": 1.4035, "step": 22784 }, { "epoch": 0.29608068312317365, "grad_norm": 0.4135020673274994, "learning_rate": 0.0001408128517396899, "loss": 1.372, "step": 22785 }, { "epoch": 0.29609367766708955, "grad_norm": 0.4492252469062805, "learning_rate": 0.0001408102522777785, "loss": 1.3946, "step": 22786 }, { "epoch": 0.2961066722110054, "grad_norm": 0.3661917448043823, "learning_rate": 0.00014080765281586713, "loss": 1.6238, "step": 22787 }, { "epoch": 0.2961196667549213, "grad_norm": 0.35723355412483215, "learning_rate": 0.00014080505335395573, "loss": 1.2971, "step": 22788 }, { "epoch": 0.29613266129883714, "grad_norm": 0.4470587968826294, "learning_rate": 0.00014080245389204435, "loss": 1.4378, "step": 22789 }, { "epoch": 0.29614565584275304, "grad_norm": 0.4286514222621918, "learning_rate": 0.00014079985443013297, "loss": 1.4807, "step": 22790 }, { "epoch": 0.2961586503866689, "grad_norm": 0.37609192728996277, "learning_rate": 0.00014079725496822157, "loss": 1.2514, "step": 22791 }, { "epoch": 0.2961716449305848, "grad_norm": 0.2713671624660492, "learning_rate": 0.0001407946555063102, "loss": 1.4892, "step": 22792 }, { "epoch": 0.29618463947450063, "grad_norm": 0.38295048475265503, "learning_rate": 0.00014079205604439882, "loss": 1.328, "step": 22793 }, { "epoch": 0.29619763401841653, "grad_norm": 0.4896673858165741, "learning_rate": 0.00014078945658248745, "loss": 1.4202, "step": 22794 }, { "epoch": 0.2962106285623324, "grad_norm": 0.47017353773117065, "learning_rate": 0.00014078685712057604, "loss": 1.4478, "step": 22795 }, { "epoch": 0.2962236231062483, "grad_norm": 0.3654111623764038, "learning_rate": 0.00014078425765866464, "loss": 1.1031, "step": 22796 }, { "epoch": 0.2962366176501641, "grad_norm": 0.30157145857810974, "learning_rate": 0.0001407816581967533, "loss": 1.3447, "step": 22797 }, { "epoch": 0.29624961219408, "grad_norm": 0.45422545075416565, "learning_rate": 0.0001407790587348419, "loss": 1.4342, "step": 22798 }, { "epoch": 0.29626260673799587, "grad_norm": 0.27733734250068665, "learning_rate": 0.0001407764592729305, "loss": 1.3901, "step": 22799 }, { "epoch": 0.29627560128191177, "grad_norm": 0.47230327129364014, "learning_rate": 0.0001407738598110191, "loss": 1.4753, "step": 22800 }, { "epoch": 0.2962885958258276, "grad_norm": 0.42203742265701294, "learning_rate": 0.00014077126034910774, "loss": 1.4846, "step": 22801 }, { "epoch": 0.2963015903697435, "grad_norm": 0.40571749210357666, "learning_rate": 0.00014076866088719636, "loss": 1.347, "step": 22802 }, { "epoch": 0.29631458491365936, "grad_norm": 0.5059195160865784, "learning_rate": 0.00014076606142528496, "loss": 1.5918, "step": 22803 }, { "epoch": 0.29632757945757526, "grad_norm": 0.3949039578437805, "learning_rate": 0.00014076346196337358, "loss": 1.396, "step": 22804 }, { "epoch": 0.2963405740014911, "grad_norm": 0.3804425299167633, "learning_rate": 0.0001407608625014622, "loss": 1.2227, "step": 22805 }, { "epoch": 0.296353568545407, "grad_norm": 0.39050114154815674, "learning_rate": 0.00014075826303955083, "loss": 1.43, "step": 22806 }, { "epoch": 0.29636656308932285, "grad_norm": 0.3925984799861908, "learning_rate": 0.00014075566357763943, "loss": 1.34, "step": 22807 }, { "epoch": 0.29637955763323875, "grad_norm": 0.4106157422065735, "learning_rate": 0.00014075306411572805, "loss": 1.1953, "step": 22808 }, { "epoch": 0.2963925521771546, "grad_norm": 0.30718663334846497, "learning_rate": 0.00014075046465381668, "loss": 1.4182, "step": 22809 }, { "epoch": 0.2964055467210705, "grad_norm": 0.31157761812210083, "learning_rate": 0.00014074786519190527, "loss": 1.3287, "step": 22810 }, { "epoch": 0.29641854126498635, "grad_norm": 0.3276211619377136, "learning_rate": 0.0001407452657299939, "loss": 1.3914, "step": 22811 }, { "epoch": 0.29643153580890225, "grad_norm": 0.3580071032047272, "learning_rate": 0.00014074266626808252, "loss": 1.4117, "step": 22812 }, { "epoch": 0.2964445303528181, "grad_norm": 0.3786758482456207, "learning_rate": 0.00014074006680617112, "loss": 1.4506, "step": 22813 }, { "epoch": 0.296457524896734, "grad_norm": 0.3502340614795685, "learning_rate": 0.00014073746734425975, "loss": 1.4399, "step": 22814 }, { "epoch": 0.29647051944064984, "grad_norm": 0.42407429218292236, "learning_rate": 0.00014073486788234834, "loss": 1.4414, "step": 22815 }, { "epoch": 0.29648351398456574, "grad_norm": 0.34258678555488586, "learning_rate": 0.000140732268420437, "loss": 1.4689, "step": 22816 }, { "epoch": 0.2964965085284816, "grad_norm": 0.4263593852519989, "learning_rate": 0.0001407296689585256, "loss": 1.4483, "step": 22817 }, { "epoch": 0.2965095030723975, "grad_norm": 0.5015770792961121, "learning_rate": 0.00014072706949661422, "loss": 1.6009, "step": 22818 }, { "epoch": 0.29652249761631333, "grad_norm": 0.3282269835472107, "learning_rate": 0.0001407244700347028, "loss": 1.3456, "step": 22819 }, { "epoch": 0.29653549216022923, "grad_norm": 0.3061131238937378, "learning_rate": 0.00014072187057279144, "loss": 1.6779, "step": 22820 }, { "epoch": 0.2965484867041451, "grad_norm": 0.3473403751850128, "learning_rate": 0.00014071927111088006, "loss": 1.4375, "step": 22821 }, { "epoch": 0.296561481248061, "grad_norm": 0.41655465960502625, "learning_rate": 0.00014071667164896866, "loss": 1.5354, "step": 22822 }, { "epoch": 0.2965744757919768, "grad_norm": 0.40060603618621826, "learning_rate": 0.00014071407218705728, "loss": 1.4237, "step": 22823 }, { "epoch": 0.2965874703358927, "grad_norm": 0.3970382809638977, "learning_rate": 0.0001407114727251459, "loss": 1.2728, "step": 22824 }, { "epoch": 0.29660046487980857, "grad_norm": 0.5272574424743652, "learning_rate": 0.00014070887326323453, "loss": 1.5391, "step": 22825 }, { "epoch": 0.29661345942372447, "grad_norm": 0.40819504857063293, "learning_rate": 0.00014070627380132313, "loss": 1.5664, "step": 22826 }, { "epoch": 0.2966264539676403, "grad_norm": 0.4603120982646942, "learning_rate": 0.00014070367433941173, "loss": 1.4675, "step": 22827 }, { "epoch": 0.2966394485115562, "grad_norm": 0.3939252197742462, "learning_rate": 0.00014070107487750038, "loss": 1.4344, "step": 22828 }, { "epoch": 0.29665244305547206, "grad_norm": 0.4045909345149994, "learning_rate": 0.00014069847541558898, "loss": 1.4307, "step": 22829 }, { "epoch": 0.29666543759938796, "grad_norm": 0.32135406136512756, "learning_rate": 0.0001406958759536776, "loss": 1.3987, "step": 22830 }, { "epoch": 0.2966784321433038, "grad_norm": 0.3731941282749176, "learning_rate": 0.0001406932764917662, "loss": 1.4561, "step": 22831 }, { "epoch": 0.2966914266872197, "grad_norm": 0.443343847990036, "learning_rate": 0.00014069067702985482, "loss": 1.5198, "step": 22832 }, { "epoch": 0.29670442123113555, "grad_norm": 0.3785175085067749, "learning_rate": 0.00014068807756794345, "loss": 1.4399, "step": 22833 }, { "epoch": 0.29671741577505145, "grad_norm": 0.4535701274871826, "learning_rate": 0.00014068547810603205, "loss": 1.4742, "step": 22834 }, { "epoch": 0.2967304103189673, "grad_norm": 0.3857325315475464, "learning_rate": 0.00014068287864412067, "loss": 1.443, "step": 22835 }, { "epoch": 0.2967434048628832, "grad_norm": 0.3766133189201355, "learning_rate": 0.0001406802791822093, "loss": 1.3728, "step": 22836 }, { "epoch": 0.29675639940679904, "grad_norm": 0.3807442784309387, "learning_rate": 0.00014067767972029792, "loss": 1.4144, "step": 22837 }, { "epoch": 0.29676939395071494, "grad_norm": 0.3636348843574524, "learning_rate": 0.00014067508025838652, "loss": 1.3078, "step": 22838 }, { "epoch": 0.29678238849463084, "grad_norm": 0.44451650977134705, "learning_rate": 0.0001406724807964751, "loss": 1.6234, "step": 22839 }, { "epoch": 0.2967953830385467, "grad_norm": 0.4159308969974518, "learning_rate": 0.00014066988133456376, "loss": 1.3185, "step": 22840 }, { "epoch": 0.2968083775824626, "grad_norm": 0.37671971321105957, "learning_rate": 0.00014066728187265236, "loss": 1.543, "step": 22841 }, { "epoch": 0.29682137212637844, "grad_norm": 0.4537704885005951, "learning_rate": 0.000140664682410741, "loss": 1.5102, "step": 22842 }, { "epoch": 0.29683436667029434, "grad_norm": 0.34417566657066345, "learning_rate": 0.0001406620829488296, "loss": 1.4457, "step": 22843 }, { "epoch": 0.2968473612142102, "grad_norm": 0.3451550006866455, "learning_rate": 0.0001406594834869182, "loss": 1.5407, "step": 22844 }, { "epoch": 0.2968603557581261, "grad_norm": 0.4700402319431305, "learning_rate": 0.00014065688402500683, "loss": 1.5849, "step": 22845 }, { "epoch": 0.2968733503020419, "grad_norm": 0.41609492897987366, "learning_rate": 0.00014065428456309543, "loss": 1.6633, "step": 22846 }, { "epoch": 0.29688634484595783, "grad_norm": 0.48423802852630615, "learning_rate": 0.00014065168510118408, "loss": 1.5358, "step": 22847 }, { "epoch": 0.2968993393898737, "grad_norm": 0.360832542181015, "learning_rate": 0.00014064908563927268, "loss": 1.5546, "step": 22848 }, { "epoch": 0.2969123339337896, "grad_norm": 0.3818899095058441, "learning_rate": 0.0001406464861773613, "loss": 1.3849, "step": 22849 }, { "epoch": 0.2969253284777054, "grad_norm": 0.3651469647884369, "learning_rate": 0.0001406438867154499, "loss": 1.4863, "step": 22850 }, { "epoch": 0.2969383230216213, "grad_norm": 0.4357500970363617, "learning_rate": 0.00014064128725353853, "loss": 1.4161, "step": 22851 }, { "epoch": 0.29695131756553717, "grad_norm": 0.4675513803958893, "learning_rate": 0.00014063868779162715, "loss": 1.4354, "step": 22852 }, { "epoch": 0.29696431210945307, "grad_norm": 0.36629101634025574, "learning_rate": 0.00014063608832971575, "loss": 1.3113, "step": 22853 }, { "epoch": 0.2969773066533689, "grad_norm": 0.4100399315357208, "learning_rate": 0.00014063348886780437, "loss": 1.3803, "step": 22854 }, { "epoch": 0.2969903011972848, "grad_norm": 0.3664558529853821, "learning_rate": 0.000140630889405893, "loss": 1.3159, "step": 22855 }, { "epoch": 0.29700329574120066, "grad_norm": 0.2958900034427643, "learning_rate": 0.0001406282899439816, "loss": 1.1542, "step": 22856 }, { "epoch": 0.29701629028511656, "grad_norm": 0.32148393988609314, "learning_rate": 0.00014062569048207022, "loss": 1.5216, "step": 22857 }, { "epoch": 0.2970292848290324, "grad_norm": 0.37673118710517883, "learning_rate": 0.00014062309102015882, "loss": 1.288, "step": 22858 }, { "epoch": 0.2970422793729483, "grad_norm": 0.36361804604530334, "learning_rate": 0.00014062049155824747, "loss": 1.5888, "step": 22859 }, { "epoch": 0.29705527391686415, "grad_norm": 0.3854396641254425, "learning_rate": 0.00014061789209633606, "loss": 1.3321, "step": 22860 }, { "epoch": 0.29706826846078005, "grad_norm": 0.3788318634033203, "learning_rate": 0.0001406152926344247, "loss": 1.3712, "step": 22861 }, { "epoch": 0.2970812630046959, "grad_norm": 0.3288755416870117, "learning_rate": 0.0001406126931725133, "loss": 1.4977, "step": 22862 }, { "epoch": 0.2970942575486118, "grad_norm": 0.35468214750289917, "learning_rate": 0.0001406100937106019, "loss": 1.3864, "step": 22863 }, { "epoch": 0.29710725209252764, "grad_norm": 0.39672982692718506, "learning_rate": 0.00014060749424869054, "loss": 1.1931, "step": 22864 }, { "epoch": 0.29712024663644354, "grad_norm": 0.3686577379703522, "learning_rate": 0.00014060489478677913, "loss": 1.4555, "step": 22865 }, { "epoch": 0.2971332411803594, "grad_norm": 0.4682312607765198, "learning_rate": 0.00014060229532486776, "loss": 1.4446, "step": 22866 }, { "epoch": 0.2971462357242753, "grad_norm": 0.3407752811908722, "learning_rate": 0.00014059969586295638, "loss": 1.3824, "step": 22867 }, { "epoch": 0.29715923026819113, "grad_norm": 0.4182986617088318, "learning_rate": 0.00014059709640104498, "loss": 1.4133, "step": 22868 }, { "epoch": 0.29717222481210703, "grad_norm": 0.4380069375038147, "learning_rate": 0.0001405944969391336, "loss": 1.4291, "step": 22869 }, { "epoch": 0.2971852193560229, "grad_norm": 0.47763022780418396, "learning_rate": 0.0001405918974772222, "loss": 1.402, "step": 22870 }, { "epoch": 0.2971982138999388, "grad_norm": 0.30499228835105896, "learning_rate": 0.00014058929801531085, "loss": 1.2681, "step": 22871 }, { "epoch": 0.2972112084438546, "grad_norm": 0.4225768446922302, "learning_rate": 0.00014058669855339945, "loss": 1.5723, "step": 22872 }, { "epoch": 0.2972242029877705, "grad_norm": 0.3985782265663147, "learning_rate": 0.00014058409909148807, "loss": 1.2473, "step": 22873 }, { "epoch": 0.29723719753168637, "grad_norm": 0.3871857225894928, "learning_rate": 0.00014058149962957667, "loss": 1.4637, "step": 22874 }, { "epoch": 0.29725019207560227, "grad_norm": 0.28158804774284363, "learning_rate": 0.0001405789001676653, "loss": 1.5074, "step": 22875 }, { "epoch": 0.2972631866195181, "grad_norm": 0.40317776799201965, "learning_rate": 0.00014057630070575392, "loss": 1.4289, "step": 22876 }, { "epoch": 0.297276181163434, "grad_norm": 0.29599377512931824, "learning_rate": 0.00014057370124384252, "loss": 1.3555, "step": 22877 }, { "epoch": 0.29728917570734986, "grad_norm": 0.3496891260147095, "learning_rate": 0.00014057110178193114, "loss": 1.6558, "step": 22878 }, { "epoch": 0.29730217025126576, "grad_norm": 0.38670259714126587, "learning_rate": 0.00014056850232001977, "loss": 1.2619, "step": 22879 }, { "epoch": 0.2973151647951816, "grad_norm": 0.3137998580932617, "learning_rate": 0.00014056590285810836, "loss": 1.2972, "step": 22880 }, { "epoch": 0.2973281593390975, "grad_norm": 0.47068697214126587, "learning_rate": 0.000140563303396197, "loss": 1.5808, "step": 22881 }, { "epoch": 0.29734115388301335, "grad_norm": 0.5047212839126587, "learning_rate": 0.00014056070393428561, "loss": 1.4334, "step": 22882 }, { "epoch": 0.29735414842692925, "grad_norm": 0.4658738076686859, "learning_rate": 0.00014055810447237424, "loss": 1.6403, "step": 22883 }, { "epoch": 0.2973671429708451, "grad_norm": 0.411141574382782, "learning_rate": 0.00014055550501046284, "loss": 1.3849, "step": 22884 }, { "epoch": 0.297380137514761, "grad_norm": 0.38476452231407166, "learning_rate": 0.00014055290554855146, "loss": 1.432, "step": 22885 }, { "epoch": 0.29739313205867685, "grad_norm": 0.41409575939178467, "learning_rate": 0.00014055030608664008, "loss": 1.6512, "step": 22886 }, { "epoch": 0.29740612660259275, "grad_norm": 0.3472932279109955, "learning_rate": 0.00014054770662472868, "loss": 1.3431, "step": 22887 }, { "epoch": 0.2974191211465086, "grad_norm": 0.33591771125793457, "learning_rate": 0.0001405451071628173, "loss": 1.2956, "step": 22888 }, { "epoch": 0.2974321156904245, "grad_norm": 0.24137292802333832, "learning_rate": 0.0001405425077009059, "loss": 1.2802, "step": 22889 }, { "epoch": 0.29744511023434034, "grad_norm": 0.3404938280582428, "learning_rate": 0.00014053990823899456, "loss": 1.2568, "step": 22890 }, { "epoch": 0.29745810477825624, "grad_norm": 0.33813583850860596, "learning_rate": 0.00014053730877708315, "loss": 1.2234, "step": 22891 }, { "epoch": 0.2974710993221721, "grad_norm": 0.32802852988243103, "learning_rate": 0.00014053470931517178, "loss": 1.1815, "step": 22892 }, { "epoch": 0.297484093866088, "grad_norm": 0.4178912937641144, "learning_rate": 0.00014053210985326037, "loss": 1.3813, "step": 22893 }, { "epoch": 0.29749708841000383, "grad_norm": 0.32503899931907654, "learning_rate": 0.000140529510391349, "loss": 1.5414, "step": 22894 }, { "epoch": 0.29751008295391973, "grad_norm": 0.4295741319656372, "learning_rate": 0.00014052691092943762, "loss": 1.5876, "step": 22895 }, { "epoch": 0.2975230774978356, "grad_norm": 0.4701733887195587, "learning_rate": 0.00014052431146752622, "loss": 1.3618, "step": 22896 }, { "epoch": 0.2975360720417515, "grad_norm": 0.33705857396125793, "learning_rate": 0.00014052171200561485, "loss": 1.345, "step": 22897 }, { "epoch": 0.2975490665856673, "grad_norm": 0.4426041841506958, "learning_rate": 0.00014051911254370347, "loss": 1.5497, "step": 22898 }, { "epoch": 0.2975620611295832, "grad_norm": 0.41073349118232727, "learning_rate": 0.00014051651308179207, "loss": 1.3098, "step": 22899 }, { "epoch": 0.29757505567349907, "grad_norm": 0.34824374318122864, "learning_rate": 0.0001405139136198807, "loss": 1.4465, "step": 22900 }, { "epoch": 0.29758805021741497, "grad_norm": 0.38049209117889404, "learning_rate": 0.0001405113141579693, "loss": 1.5442, "step": 22901 }, { "epoch": 0.2976010447613308, "grad_norm": 0.3623986542224884, "learning_rate": 0.00014050871469605794, "loss": 1.407, "step": 22902 }, { "epoch": 0.2976140393052467, "grad_norm": 0.4316590130329132, "learning_rate": 0.00014050611523414654, "loss": 1.3392, "step": 22903 }, { "epoch": 0.29762703384916256, "grad_norm": 0.3892042338848114, "learning_rate": 0.00014050351577223516, "loss": 1.5788, "step": 22904 }, { "epoch": 0.29764002839307846, "grad_norm": 0.3722061514854431, "learning_rate": 0.00014050091631032376, "loss": 1.3286, "step": 22905 }, { "epoch": 0.2976530229369943, "grad_norm": 0.40507733821868896, "learning_rate": 0.00014049831684841238, "loss": 1.5016, "step": 22906 }, { "epoch": 0.2976660174809102, "grad_norm": 0.3747061789035797, "learning_rate": 0.000140495717386501, "loss": 1.2771, "step": 22907 }, { "epoch": 0.29767901202482605, "grad_norm": 0.4782727062702179, "learning_rate": 0.0001404931179245896, "loss": 1.4754, "step": 22908 }, { "epoch": 0.29769200656874195, "grad_norm": 0.38695231080055237, "learning_rate": 0.00014049051846267823, "loss": 1.4502, "step": 22909 }, { "epoch": 0.2977050011126578, "grad_norm": 0.3897361755371094, "learning_rate": 0.00014048791900076686, "loss": 1.4328, "step": 22910 }, { "epoch": 0.2977179956565737, "grad_norm": 0.3365316092967987, "learning_rate": 0.00014048531953885545, "loss": 1.3066, "step": 22911 }, { "epoch": 0.29773099020048954, "grad_norm": 0.3724667429924011, "learning_rate": 0.00014048272007694408, "loss": 1.2428, "step": 22912 }, { "epoch": 0.29774398474440544, "grad_norm": 0.37072405219078064, "learning_rate": 0.00014048012061503267, "loss": 1.4466, "step": 22913 }, { "epoch": 0.2977569792883213, "grad_norm": 0.3356662094593048, "learning_rate": 0.00014047752115312133, "loss": 1.477, "step": 22914 }, { "epoch": 0.2977699738322372, "grad_norm": 0.3485557734966278, "learning_rate": 0.00014047492169120992, "loss": 1.6869, "step": 22915 }, { "epoch": 0.2977829683761531, "grad_norm": 0.4210974872112274, "learning_rate": 0.00014047232222929855, "loss": 1.3762, "step": 22916 }, { "epoch": 0.29779596292006894, "grad_norm": 0.3594282865524292, "learning_rate": 0.00014046972276738717, "loss": 1.3609, "step": 22917 }, { "epoch": 0.29780895746398484, "grad_norm": 0.3362147808074951, "learning_rate": 0.00014046712330547577, "loss": 1.3309, "step": 22918 }, { "epoch": 0.2978219520079007, "grad_norm": 0.4683127999305725, "learning_rate": 0.0001404645238435644, "loss": 1.4555, "step": 22919 }, { "epoch": 0.2978349465518166, "grad_norm": 0.32860347628593445, "learning_rate": 0.000140461924381653, "loss": 1.2936, "step": 22920 }, { "epoch": 0.2978479410957324, "grad_norm": 0.49081334471702576, "learning_rate": 0.00014045932491974164, "loss": 1.4337, "step": 22921 }, { "epoch": 0.2978609356396483, "grad_norm": 0.42803627252578735, "learning_rate": 0.00014045672545783024, "loss": 1.4939, "step": 22922 }, { "epoch": 0.2978739301835642, "grad_norm": 0.4179244041442871, "learning_rate": 0.00014045412599591884, "loss": 1.3672, "step": 22923 }, { "epoch": 0.2978869247274801, "grad_norm": 0.4239777624607086, "learning_rate": 0.00014045152653400746, "loss": 1.3249, "step": 22924 }, { "epoch": 0.2978999192713959, "grad_norm": 0.37392351031303406, "learning_rate": 0.0001404489270720961, "loss": 1.2115, "step": 22925 }, { "epoch": 0.2979129138153118, "grad_norm": 0.39103397727012634, "learning_rate": 0.0001404463276101847, "loss": 1.382, "step": 22926 }, { "epoch": 0.29792590835922766, "grad_norm": 0.3098256587982178, "learning_rate": 0.0001404437281482733, "loss": 1.3627, "step": 22927 }, { "epoch": 0.29793890290314357, "grad_norm": 0.40651196241378784, "learning_rate": 0.00014044112868636193, "loss": 1.4748, "step": 22928 }, { "epoch": 0.2979518974470594, "grad_norm": 0.4172825515270233, "learning_rate": 0.00014043852922445056, "loss": 1.5185, "step": 22929 }, { "epoch": 0.2979648919909753, "grad_norm": 0.42139530181884766, "learning_rate": 0.00014043592976253916, "loss": 1.4345, "step": 22930 }, { "epoch": 0.29797788653489116, "grad_norm": 0.4424646496772766, "learning_rate": 0.00014043333030062778, "loss": 1.4346, "step": 22931 }, { "epoch": 0.29799088107880706, "grad_norm": 0.38615378737449646, "learning_rate": 0.00014043073083871638, "loss": 1.1767, "step": 22932 }, { "epoch": 0.2980038756227229, "grad_norm": 0.44978195428848267, "learning_rate": 0.00014042813137680503, "loss": 1.4309, "step": 22933 }, { "epoch": 0.2980168701666388, "grad_norm": 0.3788641393184662, "learning_rate": 0.00014042553191489363, "loss": 1.2985, "step": 22934 }, { "epoch": 0.29802986471055465, "grad_norm": 0.4054301381111145, "learning_rate": 0.00014042293245298222, "loss": 1.3065, "step": 22935 }, { "epoch": 0.29804285925447055, "grad_norm": 0.42570415139198303, "learning_rate": 0.00014042033299107085, "loss": 1.3586, "step": 22936 }, { "epoch": 0.2980558537983864, "grad_norm": 0.312277227640152, "learning_rate": 0.00014041773352915947, "loss": 1.5036, "step": 22937 }, { "epoch": 0.2980688483423023, "grad_norm": 0.43636733293533325, "learning_rate": 0.0001404151340672481, "loss": 1.4062, "step": 22938 }, { "epoch": 0.29808184288621814, "grad_norm": 0.4913323223590851, "learning_rate": 0.0001404125346053367, "loss": 1.3741, "step": 22939 }, { "epoch": 0.29809483743013404, "grad_norm": 0.4800909161567688, "learning_rate": 0.00014040993514342532, "loss": 1.4679, "step": 22940 }, { "epoch": 0.2981078319740499, "grad_norm": 0.39575737714767456, "learning_rate": 0.00014040733568151394, "loss": 1.3649, "step": 22941 }, { "epoch": 0.2981208265179658, "grad_norm": 0.41685548424720764, "learning_rate": 0.00014040473621960254, "loss": 1.2472, "step": 22942 }, { "epoch": 0.29813382106188163, "grad_norm": 0.2750242054462433, "learning_rate": 0.00014040213675769117, "loss": 1.2321, "step": 22943 }, { "epoch": 0.29814681560579753, "grad_norm": 0.42563796043395996, "learning_rate": 0.00014039953729577976, "loss": 1.4788, "step": 22944 }, { "epoch": 0.2981598101497134, "grad_norm": 0.452678382396698, "learning_rate": 0.00014039693783386841, "loss": 1.7496, "step": 22945 }, { "epoch": 0.2981728046936293, "grad_norm": 0.4093649089336395, "learning_rate": 0.000140394338371957, "loss": 1.4877, "step": 22946 }, { "epoch": 0.2981857992375451, "grad_norm": 0.37021756172180176, "learning_rate": 0.00014039173891004564, "loss": 1.4622, "step": 22947 }, { "epoch": 0.298198793781461, "grad_norm": 0.3859061300754547, "learning_rate": 0.00014038913944813423, "loss": 1.4197, "step": 22948 }, { "epoch": 0.29821178832537687, "grad_norm": 0.3743303418159485, "learning_rate": 0.00014038653998622286, "loss": 1.4015, "step": 22949 }, { "epoch": 0.29822478286929277, "grad_norm": 0.5168335437774658, "learning_rate": 0.00014038394052431148, "loss": 1.4413, "step": 22950 }, { "epoch": 0.2982377774132086, "grad_norm": 0.44695064425468445, "learning_rate": 0.00014038134106240008, "loss": 1.3717, "step": 22951 }, { "epoch": 0.2982507719571245, "grad_norm": 0.3422711491584778, "learning_rate": 0.0001403787416004887, "loss": 1.4909, "step": 22952 }, { "epoch": 0.29826376650104036, "grad_norm": 0.49021583795547485, "learning_rate": 0.00014037614213857733, "loss": 1.472, "step": 22953 }, { "epoch": 0.29827676104495626, "grad_norm": 0.40482184290885925, "learning_rate": 0.00014037354267666593, "loss": 1.1841, "step": 22954 }, { "epoch": 0.2982897555888721, "grad_norm": 0.4326929450035095, "learning_rate": 0.00014037094321475455, "loss": 1.2391, "step": 22955 }, { "epoch": 0.298302750132788, "grad_norm": 0.4419623613357544, "learning_rate": 0.00014036834375284317, "loss": 1.2396, "step": 22956 }, { "epoch": 0.29831574467670385, "grad_norm": 0.4637749493122101, "learning_rate": 0.0001403657442909318, "loss": 1.4447, "step": 22957 }, { "epoch": 0.29832873922061975, "grad_norm": 0.2633437216281891, "learning_rate": 0.0001403631448290204, "loss": 1.377, "step": 22958 }, { "epoch": 0.2983417337645356, "grad_norm": 0.3231419026851654, "learning_rate": 0.00014036054536710902, "loss": 1.281, "step": 22959 }, { "epoch": 0.2983547283084515, "grad_norm": 0.40608328580856323, "learning_rate": 0.00014035794590519765, "loss": 1.3984, "step": 22960 }, { "epoch": 0.29836772285236735, "grad_norm": 0.4633846879005432, "learning_rate": 0.00014035534644328624, "loss": 1.7658, "step": 22961 }, { "epoch": 0.29838071739628325, "grad_norm": 0.3306026756763458, "learning_rate": 0.00014035274698137487, "loss": 1.4273, "step": 22962 }, { "epoch": 0.2983937119401991, "grad_norm": 0.4051123857498169, "learning_rate": 0.00014035014751946347, "loss": 1.3625, "step": 22963 }, { "epoch": 0.298406706484115, "grad_norm": 0.4277397096157074, "learning_rate": 0.0001403475480575521, "loss": 1.4658, "step": 22964 }, { "epoch": 0.29841970102803084, "grad_norm": 0.29705703258514404, "learning_rate": 0.00014034494859564071, "loss": 1.3797, "step": 22965 }, { "epoch": 0.29843269557194674, "grad_norm": 0.3473593592643738, "learning_rate": 0.0001403423491337293, "loss": 1.2868, "step": 22966 }, { "epoch": 0.2984456901158626, "grad_norm": 0.4127417802810669, "learning_rate": 0.00014033974967181794, "loss": 1.5055, "step": 22967 }, { "epoch": 0.2984586846597785, "grad_norm": 0.2935155928134918, "learning_rate": 0.00014033715020990656, "loss": 1.3117, "step": 22968 }, { "epoch": 0.29847167920369433, "grad_norm": 0.39981845021247864, "learning_rate": 0.00014033455074799518, "loss": 1.2983, "step": 22969 }, { "epoch": 0.29848467374761023, "grad_norm": 0.49690040946006775, "learning_rate": 0.00014033195128608378, "loss": 1.3331, "step": 22970 }, { "epoch": 0.2984976682915261, "grad_norm": 0.4131467044353485, "learning_rate": 0.0001403293518241724, "loss": 1.4258, "step": 22971 }, { "epoch": 0.298510662835442, "grad_norm": 0.5347421765327454, "learning_rate": 0.00014032675236226103, "loss": 1.4407, "step": 22972 }, { "epoch": 0.2985236573793578, "grad_norm": 0.4536976218223572, "learning_rate": 0.00014032415290034963, "loss": 1.3972, "step": 22973 }, { "epoch": 0.2985366519232737, "grad_norm": 0.4558336138725281, "learning_rate": 0.00014032155343843825, "loss": 1.6056, "step": 22974 }, { "epoch": 0.29854964646718957, "grad_norm": 0.4119022786617279, "learning_rate": 0.00014031895397652685, "loss": 1.4241, "step": 22975 }, { "epoch": 0.29856264101110547, "grad_norm": 0.4163656532764435, "learning_rate": 0.0001403163545146155, "loss": 1.5093, "step": 22976 }, { "epoch": 0.2985756355550213, "grad_norm": 0.2803994119167328, "learning_rate": 0.0001403137550527041, "loss": 1.2156, "step": 22977 }, { "epoch": 0.2985886300989372, "grad_norm": 0.3968595564365387, "learning_rate": 0.0001403111555907927, "loss": 1.4486, "step": 22978 }, { "epoch": 0.29860162464285306, "grad_norm": 0.3674934506416321, "learning_rate": 0.00014030855612888132, "loss": 1.5421, "step": 22979 }, { "epoch": 0.29861461918676896, "grad_norm": 0.38411805033683777, "learning_rate": 0.00014030595666696995, "loss": 1.3499, "step": 22980 }, { "epoch": 0.2986276137306848, "grad_norm": 0.5203903317451477, "learning_rate": 0.00014030335720505857, "loss": 1.5487, "step": 22981 }, { "epoch": 0.2986406082746007, "grad_norm": 0.414045125246048, "learning_rate": 0.00014030075774314717, "loss": 1.5586, "step": 22982 }, { "epoch": 0.29865360281851655, "grad_norm": 0.30370041728019714, "learning_rate": 0.0001402981582812358, "loss": 1.2743, "step": 22983 }, { "epoch": 0.29866659736243245, "grad_norm": 0.3711134195327759, "learning_rate": 0.00014029555881932442, "loss": 1.4256, "step": 22984 }, { "epoch": 0.2986795919063483, "grad_norm": 0.43841317296028137, "learning_rate": 0.00014029295935741301, "loss": 1.446, "step": 22985 }, { "epoch": 0.2986925864502642, "grad_norm": 0.4599684774875641, "learning_rate": 0.00014029035989550164, "loss": 1.3719, "step": 22986 }, { "epoch": 0.29870558099418004, "grad_norm": 0.4624248445034027, "learning_rate": 0.00014028776043359024, "loss": 1.4111, "step": 22987 }, { "epoch": 0.29871857553809594, "grad_norm": 0.41547250747680664, "learning_rate": 0.0001402851609716789, "loss": 1.587, "step": 22988 }, { "epoch": 0.2987315700820118, "grad_norm": 0.44423219561576843, "learning_rate": 0.00014028256150976748, "loss": 1.4156, "step": 22989 }, { "epoch": 0.2987445646259277, "grad_norm": 0.33778834342956543, "learning_rate": 0.00014027996204785608, "loss": 1.1865, "step": 22990 }, { "epoch": 0.2987575591698436, "grad_norm": 0.4605976939201355, "learning_rate": 0.00014027736258594473, "loss": 1.4909, "step": 22991 }, { "epoch": 0.29877055371375943, "grad_norm": 0.43227675557136536, "learning_rate": 0.00014027476312403333, "loss": 1.6118, "step": 22992 }, { "epoch": 0.29878354825767534, "grad_norm": 0.43669548630714417, "learning_rate": 0.00014027216366212196, "loss": 1.5071, "step": 22993 }, { "epoch": 0.2987965428015912, "grad_norm": 0.3404190242290497, "learning_rate": 0.00014026956420021055, "loss": 1.4458, "step": 22994 }, { "epoch": 0.2988095373455071, "grad_norm": 0.40627357363700867, "learning_rate": 0.00014026696473829918, "loss": 1.4604, "step": 22995 }, { "epoch": 0.2988225318894229, "grad_norm": 0.5124145150184631, "learning_rate": 0.0001402643652763878, "loss": 1.2954, "step": 22996 }, { "epoch": 0.2988355264333388, "grad_norm": 0.4437195956707001, "learning_rate": 0.0001402617658144764, "loss": 1.3254, "step": 22997 }, { "epoch": 0.2988485209772547, "grad_norm": 0.33872994780540466, "learning_rate": 0.00014025916635256502, "loss": 1.242, "step": 22998 }, { "epoch": 0.2988615155211706, "grad_norm": 0.4779316186904907, "learning_rate": 0.00014025656689065365, "loss": 1.4364, "step": 22999 }, { "epoch": 0.2988745100650864, "grad_norm": 0.46553751826286316, "learning_rate": 0.00014025396742874227, "loss": 1.4367, "step": 23000 }, { "epoch": 0.2988875046090023, "grad_norm": 0.5074887871742249, "learning_rate": 0.00014025136796683087, "loss": 1.3992, "step": 23001 }, { "epoch": 0.29890049915291816, "grad_norm": 0.3086678087711334, "learning_rate": 0.00014024876850491947, "loss": 1.3144, "step": 23002 }, { "epoch": 0.29891349369683407, "grad_norm": 0.45222100615501404, "learning_rate": 0.00014024616904300812, "loss": 1.4294, "step": 23003 }, { "epoch": 0.2989264882407499, "grad_norm": 0.42579686641693115, "learning_rate": 0.00014024356958109672, "loss": 1.4623, "step": 23004 }, { "epoch": 0.2989394827846658, "grad_norm": 0.23428599536418915, "learning_rate": 0.00014024097011918534, "loss": 1.1466, "step": 23005 }, { "epoch": 0.29895247732858166, "grad_norm": 0.4408715069293976, "learning_rate": 0.00014023837065727394, "loss": 1.4951, "step": 23006 }, { "epoch": 0.29896547187249756, "grad_norm": 0.34068456292152405, "learning_rate": 0.00014023577119536256, "loss": 1.3378, "step": 23007 }, { "epoch": 0.2989784664164134, "grad_norm": 0.44888728857040405, "learning_rate": 0.0001402331717334512, "loss": 1.3897, "step": 23008 }, { "epoch": 0.2989914609603293, "grad_norm": 0.35491135716438293, "learning_rate": 0.00014023057227153978, "loss": 1.3693, "step": 23009 }, { "epoch": 0.29900445550424515, "grad_norm": 0.38651758432388306, "learning_rate": 0.0001402279728096284, "loss": 1.3369, "step": 23010 }, { "epoch": 0.29901745004816105, "grad_norm": 0.3042561709880829, "learning_rate": 0.00014022537334771703, "loss": 1.3415, "step": 23011 }, { "epoch": 0.2990304445920769, "grad_norm": 0.42663854360580444, "learning_rate": 0.00014022277388580566, "loss": 1.5087, "step": 23012 }, { "epoch": 0.2990434391359928, "grad_norm": 0.5376585721969604, "learning_rate": 0.00014022017442389426, "loss": 1.5156, "step": 23013 }, { "epoch": 0.29905643367990864, "grad_norm": 0.4362395405769348, "learning_rate": 0.00014021757496198288, "loss": 1.3487, "step": 23014 }, { "epoch": 0.29906942822382454, "grad_norm": 0.38434898853302, "learning_rate": 0.0001402149755000715, "loss": 1.3713, "step": 23015 }, { "epoch": 0.2990824227677404, "grad_norm": 0.396319717168808, "learning_rate": 0.0001402123760381601, "loss": 1.3007, "step": 23016 }, { "epoch": 0.2990954173116563, "grad_norm": 0.4017016589641571, "learning_rate": 0.00014020977657624873, "loss": 1.3074, "step": 23017 }, { "epoch": 0.29910841185557213, "grad_norm": 0.3336453437805176, "learning_rate": 0.00014020717711433732, "loss": 1.342, "step": 23018 }, { "epoch": 0.29912140639948803, "grad_norm": 0.32417479157447815, "learning_rate": 0.00014020457765242595, "loss": 1.3644, "step": 23019 }, { "epoch": 0.2991344009434039, "grad_norm": 0.3886622488498688, "learning_rate": 0.00014020197819051457, "loss": 1.2076, "step": 23020 }, { "epoch": 0.2991473954873198, "grad_norm": 0.44363850355148315, "learning_rate": 0.00014019937872860317, "loss": 1.3943, "step": 23021 }, { "epoch": 0.2991603900312356, "grad_norm": 0.4339125454425812, "learning_rate": 0.0001401967792666918, "loss": 1.5611, "step": 23022 }, { "epoch": 0.2991733845751515, "grad_norm": 0.5525286197662354, "learning_rate": 0.00014019417980478042, "loss": 1.3549, "step": 23023 }, { "epoch": 0.29918637911906737, "grad_norm": 0.3635425269603729, "learning_rate": 0.00014019158034286904, "loss": 1.2835, "step": 23024 }, { "epoch": 0.29919937366298327, "grad_norm": 0.7514035701751709, "learning_rate": 0.00014018898088095764, "loss": 1.3743, "step": 23025 }, { "epoch": 0.2992123682068991, "grad_norm": 0.3714289367198944, "learning_rate": 0.00014018638141904627, "loss": 1.3128, "step": 23026 }, { "epoch": 0.299225362750815, "grad_norm": 0.4191341698169708, "learning_rate": 0.0001401837819571349, "loss": 1.4467, "step": 23027 }, { "epoch": 0.29923835729473086, "grad_norm": 0.37737709283828735, "learning_rate": 0.0001401811824952235, "loss": 1.2746, "step": 23028 }, { "epoch": 0.29925135183864676, "grad_norm": 0.35028916597366333, "learning_rate": 0.0001401785830333121, "loss": 1.3699, "step": 23029 }, { "epoch": 0.2992643463825626, "grad_norm": 0.3594965636730194, "learning_rate": 0.00014017598357140074, "loss": 1.4369, "step": 23030 }, { "epoch": 0.2992773409264785, "grad_norm": 0.3582841157913208, "learning_rate": 0.00014017338410948936, "loss": 1.4876, "step": 23031 }, { "epoch": 0.29929033547039435, "grad_norm": 0.3327685594558716, "learning_rate": 0.00014017078464757796, "loss": 1.6182, "step": 23032 }, { "epoch": 0.29930333001431025, "grad_norm": 0.37973544001579285, "learning_rate": 0.00014016818518566656, "loss": 1.3351, "step": 23033 }, { "epoch": 0.2993163245582261, "grad_norm": 0.3545832633972168, "learning_rate": 0.0001401655857237552, "loss": 1.3836, "step": 23034 }, { "epoch": 0.299329319102142, "grad_norm": 0.39142656326293945, "learning_rate": 0.0001401629862618438, "loss": 1.4798, "step": 23035 }, { "epoch": 0.29934231364605784, "grad_norm": 0.3702673316001892, "learning_rate": 0.00014016038679993243, "loss": 1.2912, "step": 23036 }, { "epoch": 0.29935530818997375, "grad_norm": 0.4710434675216675, "learning_rate": 0.00014015778733802103, "loss": 1.5469, "step": 23037 }, { "epoch": 0.2993683027338896, "grad_norm": 0.4846549928188324, "learning_rate": 0.00014015518787610965, "loss": 1.5023, "step": 23038 }, { "epoch": 0.2993812972778055, "grad_norm": 0.2982243299484253, "learning_rate": 0.00014015258841419828, "loss": 1.3477, "step": 23039 }, { "epoch": 0.29939429182172134, "grad_norm": 0.42551302909851074, "learning_rate": 0.00014014998895228687, "loss": 1.4403, "step": 23040 }, { "epoch": 0.29940728636563724, "grad_norm": 0.34329622983932495, "learning_rate": 0.0001401473894903755, "loss": 1.2153, "step": 23041 }, { "epoch": 0.2994202809095531, "grad_norm": 0.308676540851593, "learning_rate": 0.00014014479002846412, "loss": 1.4109, "step": 23042 }, { "epoch": 0.299433275453469, "grad_norm": 0.33030661940574646, "learning_rate": 0.00014014219056655275, "loss": 1.4284, "step": 23043 }, { "epoch": 0.29944626999738483, "grad_norm": 0.36275357007980347, "learning_rate": 0.00014013959110464134, "loss": 1.2559, "step": 23044 }, { "epoch": 0.29945926454130073, "grad_norm": 0.3830799460411072, "learning_rate": 0.00014013699164272994, "loss": 1.3998, "step": 23045 }, { "epoch": 0.2994722590852166, "grad_norm": 0.35228869318962097, "learning_rate": 0.0001401343921808186, "loss": 1.3099, "step": 23046 }, { "epoch": 0.2994852536291325, "grad_norm": 0.32076987624168396, "learning_rate": 0.0001401317927189072, "loss": 1.4854, "step": 23047 }, { "epoch": 0.2994982481730483, "grad_norm": 0.4578842520713806, "learning_rate": 0.00014012919325699581, "loss": 1.4898, "step": 23048 }, { "epoch": 0.2995112427169642, "grad_norm": 0.43700602650642395, "learning_rate": 0.0001401265937950844, "loss": 1.5548, "step": 23049 }, { "epoch": 0.29952423726088007, "grad_norm": 0.3971840739250183, "learning_rate": 0.00014012399433317304, "loss": 1.3444, "step": 23050 }, { "epoch": 0.29953723180479597, "grad_norm": 0.44781315326690674, "learning_rate": 0.00014012139487126166, "loss": 1.5399, "step": 23051 }, { "epoch": 0.2995502263487118, "grad_norm": 0.42125004529953003, "learning_rate": 0.00014011879540935026, "loss": 1.2929, "step": 23052 }, { "epoch": 0.2995632208926277, "grad_norm": 0.4396473467350006, "learning_rate": 0.00014011619594743888, "loss": 1.5312, "step": 23053 }, { "epoch": 0.29957621543654356, "grad_norm": 0.3253374993801117, "learning_rate": 0.0001401135964855275, "loss": 1.5133, "step": 23054 }, { "epoch": 0.29958920998045946, "grad_norm": 0.5467861294746399, "learning_rate": 0.00014011099702361613, "loss": 1.4122, "step": 23055 }, { "epoch": 0.2996022045243753, "grad_norm": 0.32921624183654785, "learning_rate": 0.00014010839756170473, "loss": 1.4513, "step": 23056 }, { "epoch": 0.2996151990682912, "grad_norm": 0.36699020862579346, "learning_rate": 0.00014010579809979333, "loss": 1.2875, "step": 23057 }, { "epoch": 0.29962819361220705, "grad_norm": 0.41242703795433044, "learning_rate": 0.00014010319863788198, "loss": 1.3874, "step": 23058 }, { "epoch": 0.29964118815612295, "grad_norm": 0.39426305890083313, "learning_rate": 0.00014010059917597058, "loss": 1.5611, "step": 23059 }, { "epoch": 0.2996541827000388, "grad_norm": 0.4162886142730713, "learning_rate": 0.0001400979997140592, "loss": 1.3413, "step": 23060 }, { "epoch": 0.2996671772439547, "grad_norm": 0.32781875133514404, "learning_rate": 0.0001400954002521478, "loss": 1.3974, "step": 23061 }, { "epoch": 0.29968017178787054, "grad_norm": 0.43102505803108215, "learning_rate": 0.00014009280079023642, "loss": 1.3725, "step": 23062 }, { "epoch": 0.29969316633178644, "grad_norm": 0.37109994888305664, "learning_rate": 0.00014009020132832505, "loss": 1.537, "step": 23063 }, { "epoch": 0.2997061608757023, "grad_norm": 0.4329696595668793, "learning_rate": 0.00014008760186641364, "loss": 1.4747, "step": 23064 }, { "epoch": 0.2997191554196182, "grad_norm": 0.3689322769641876, "learning_rate": 0.0001400850024045023, "loss": 1.4872, "step": 23065 }, { "epoch": 0.29973214996353403, "grad_norm": 0.2741352617740631, "learning_rate": 0.0001400824029425909, "loss": 1.1206, "step": 23066 }, { "epoch": 0.29974514450744993, "grad_norm": 0.45320233702659607, "learning_rate": 0.00014007980348067952, "loss": 1.5149, "step": 23067 }, { "epoch": 0.29975813905136584, "grad_norm": 0.5117446184158325, "learning_rate": 0.00014007720401876811, "loss": 1.5151, "step": 23068 }, { "epoch": 0.2997711335952817, "grad_norm": 0.3062855005264282, "learning_rate": 0.00014007460455685674, "loss": 1.4095, "step": 23069 }, { "epoch": 0.2997841281391976, "grad_norm": 0.3812221586704254, "learning_rate": 0.00014007200509494536, "loss": 1.3836, "step": 23070 }, { "epoch": 0.2997971226831134, "grad_norm": 0.4222165048122406, "learning_rate": 0.00014006940563303396, "loss": 1.4239, "step": 23071 }, { "epoch": 0.2998101172270293, "grad_norm": 0.4736957550048828, "learning_rate": 0.00014006680617112259, "loss": 1.5331, "step": 23072 }, { "epoch": 0.29982311177094517, "grad_norm": 0.4582005739212036, "learning_rate": 0.0001400642067092112, "loss": 1.434, "step": 23073 }, { "epoch": 0.2998361063148611, "grad_norm": 0.41837435960769653, "learning_rate": 0.0001400616072472998, "loss": 1.2434, "step": 23074 }, { "epoch": 0.2998491008587769, "grad_norm": 0.44448328018188477, "learning_rate": 0.00014005900778538843, "loss": 1.63, "step": 23075 }, { "epoch": 0.2998620954026928, "grad_norm": 0.3510551154613495, "learning_rate": 0.00014005640832347703, "loss": 1.3541, "step": 23076 }, { "epoch": 0.29987508994660866, "grad_norm": 0.3945156931877136, "learning_rate": 0.00014005380886156568, "loss": 1.3171, "step": 23077 }, { "epoch": 0.29988808449052456, "grad_norm": 0.2724815011024475, "learning_rate": 0.00014005120939965428, "loss": 1.161, "step": 23078 }, { "epoch": 0.2999010790344404, "grad_norm": 0.4988335371017456, "learning_rate": 0.0001400486099377429, "loss": 1.3636, "step": 23079 }, { "epoch": 0.2999140735783563, "grad_norm": 0.4144153594970703, "learning_rate": 0.0001400460104758315, "loss": 1.4459, "step": 23080 }, { "epoch": 0.29992706812227216, "grad_norm": 0.35689717531204224, "learning_rate": 0.00014004341101392012, "loss": 1.335, "step": 23081 }, { "epoch": 0.29994006266618806, "grad_norm": 0.444684237241745, "learning_rate": 0.00014004081155200875, "loss": 1.4402, "step": 23082 }, { "epoch": 0.2999530572101039, "grad_norm": 0.3817139267921448, "learning_rate": 0.00014003821209009735, "loss": 1.4088, "step": 23083 }, { "epoch": 0.2999660517540198, "grad_norm": 0.4249608814716339, "learning_rate": 0.00014003561262818597, "loss": 1.5001, "step": 23084 }, { "epoch": 0.29997904629793565, "grad_norm": 0.3391485810279846, "learning_rate": 0.0001400330131662746, "loss": 1.6685, "step": 23085 }, { "epoch": 0.29999204084185155, "grad_norm": 0.47837451100349426, "learning_rate": 0.0001400304137043632, "loss": 1.5881, "step": 23086 }, { "epoch": 0.3000050353857674, "grad_norm": 0.33048415184020996, "learning_rate": 0.00014002781424245182, "loss": 1.2404, "step": 23087 }, { "epoch": 0.3000180299296833, "grad_norm": 0.45419323444366455, "learning_rate": 0.00014002521478054041, "loss": 1.4115, "step": 23088 }, { "epoch": 0.30003102447359914, "grad_norm": 0.4446699917316437, "learning_rate": 0.00014002261531862907, "loss": 1.4144, "step": 23089 }, { "epoch": 0.30004401901751504, "grad_norm": 0.32597842812538147, "learning_rate": 0.00014002001585671766, "loss": 1.4544, "step": 23090 }, { "epoch": 0.3000570135614309, "grad_norm": 0.35293325781822205, "learning_rate": 0.0001400174163948063, "loss": 1.3647, "step": 23091 }, { "epoch": 0.3000700081053468, "grad_norm": 0.3385941684246063, "learning_rate": 0.00014001481693289489, "loss": 1.1412, "step": 23092 }, { "epoch": 0.30008300264926263, "grad_norm": 0.4868931770324707, "learning_rate": 0.0001400122174709835, "loss": 1.311, "step": 23093 }, { "epoch": 0.30009599719317853, "grad_norm": 0.3418442904949188, "learning_rate": 0.00014000961800907213, "loss": 1.3247, "step": 23094 }, { "epoch": 0.3001089917370944, "grad_norm": 0.41976794600486755, "learning_rate": 0.00014000701854716073, "loss": 1.4714, "step": 23095 }, { "epoch": 0.3001219862810103, "grad_norm": 0.42028120160102844, "learning_rate": 0.00014000441908524936, "loss": 1.5775, "step": 23096 }, { "epoch": 0.3001349808249261, "grad_norm": 0.46938571333885193, "learning_rate": 0.00014000181962333798, "loss": 1.3944, "step": 23097 }, { "epoch": 0.300147975368842, "grad_norm": 0.45536401867866516, "learning_rate": 0.0001399992201614266, "loss": 1.526, "step": 23098 }, { "epoch": 0.30016096991275787, "grad_norm": 0.34066665172576904, "learning_rate": 0.0001399966206995152, "loss": 1.311, "step": 23099 }, { "epoch": 0.30017396445667377, "grad_norm": 0.35779157280921936, "learning_rate": 0.0001399940212376038, "loss": 1.3758, "step": 23100 }, { "epoch": 0.3001869590005896, "grad_norm": 0.4279794692993164, "learning_rate": 0.00013999142177569245, "loss": 1.4, "step": 23101 }, { "epoch": 0.3001999535445055, "grad_norm": 0.33099332451820374, "learning_rate": 0.00013998882231378105, "loss": 1.3823, "step": 23102 }, { "epoch": 0.30021294808842136, "grad_norm": 0.42596107721328735, "learning_rate": 0.00013998622285186967, "loss": 1.4782, "step": 23103 }, { "epoch": 0.30022594263233726, "grad_norm": 0.4145480990409851, "learning_rate": 0.0001399836233899583, "loss": 1.3681, "step": 23104 }, { "epoch": 0.3002389371762531, "grad_norm": 0.2508307099342346, "learning_rate": 0.0001399810239280469, "loss": 1.3107, "step": 23105 }, { "epoch": 0.300251931720169, "grad_norm": 0.45985832810401917, "learning_rate": 0.00013997842446613552, "loss": 1.3947, "step": 23106 }, { "epoch": 0.30026492626408485, "grad_norm": 0.40395018458366394, "learning_rate": 0.00013997582500422412, "loss": 1.4372, "step": 23107 }, { "epoch": 0.30027792080800075, "grad_norm": 0.41711878776550293, "learning_rate": 0.00013997322554231277, "loss": 1.5646, "step": 23108 }, { "epoch": 0.3002909153519166, "grad_norm": 0.39429208636283875, "learning_rate": 0.00013997062608040137, "loss": 1.3827, "step": 23109 }, { "epoch": 0.3003039098958325, "grad_norm": 0.4032944142818451, "learning_rate": 0.00013996802661849, "loss": 1.5617, "step": 23110 }, { "epoch": 0.30031690443974834, "grad_norm": 0.31945449113845825, "learning_rate": 0.0001399654271565786, "loss": 1.2784, "step": 23111 }, { "epoch": 0.30032989898366425, "grad_norm": 0.309740275144577, "learning_rate": 0.0001399628276946672, "loss": 1.2569, "step": 23112 }, { "epoch": 0.3003428935275801, "grad_norm": 0.338676393032074, "learning_rate": 0.00013996022823275584, "loss": 1.4824, "step": 23113 }, { "epoch": 0.300355888071496, "grad_norm": 0.4370415508747101, "learning_rate": 0.00013995762877084443, "loss": 1.722, "step": 23114 }, { "epoch": 0.30036888261541184, "grad_norm": 0.4167619049549103, "learning_rate": 0.00013995502930893306, "loss": 1.3218, "step": 23115 }, { "epoch": 0.30038187715932774, "grad_norm": 0.3589772880077362, "learning_rate": 0.00013995242984702168, "loss": 1.3254, "step": 23116 }, { "epoch": 0.3003948717032436, "grad_norm": 0.395468145608902, "learning_rate": 0.00013994983038511028, "loss": 1.3911, "step": 23117 }, { "epoch": 0.3004078662471595, "grad_norm": 0.5283803939819336, "learning_rate": 0.0001399472309231989, "loss": 1.5771, "step": 23118 }, { "epoch": 0.30042086079107533, "grad_norm": 0.34886303544044495, "learning_rate": 0.0001399446314612875, "loss": 1.5127, "step": 23119 }, { "epoch": 0.30043385533499123, "grad_norm": 0.4214344322681427, "learning_rate": 0.00013994203199937615, "loss": 1.4522, "step": 23120 }, { "epoch": 0.3004468498789071, "grad_norm": 0.3599209189414978, "learning_rate": 0.00013993943253746475, "loss": 1.4764, "step": 23121 }, { "epoch": 0.300459844422823, "grad_norm": 0.3773268163204193, "learning_rate": 0.00013993683307555338, "loss": 1.3414, "step": 23122 }, { "epoch": 0.3004728389667388, "grad_norm": 0.3108392357826233, "learning_rate": 0.00013993423361364197, "loss": 1.2555, "step": 23123 }, { "epoch": 0.3004858335106547, "grad_norm": 0.4072191119194031, "learning_rate": 0.0001399316341517306, "loss": 1.3073, "step": 23124 }, { "epoch": 0.30049882805457057, "grad_norm": 0.4205571711063385, "learning_rate": 0.00013992903468981922, "loss": 1.4057, "step": 23125 }, { "epoch": 0.30051182259848647, "grad_norm": 0.481813907623291, "learning_rate": 0.00013992643522790782, "loss": 1.4237, "step": 23126 }, { "epoch": 0.3005248171424023, "grad_norm": 0.46717846393585205, "learning_rate": 0.00013992383576599644, "loss": 1.321, "step": 23127 }, { "epoch": 0.3005378116863182, "grad_norm": 0.3609062731266022, "learning_rate": 0.00013992123630408507, "loss": 1.4154, "step": 23128 }, { "epoch": 0.30055080623023406, "grad_norm": 0.37674105167388916, "learning_rate": 0.00013991863684217367, "loss": 1.2722, "step": 23129 }, { "epoch": 0.30056380077414996, "grad_norm": 0.5592606067657471, "learning_rate": 0.0001399160373802623, "loss": 1.268, "step": 23130 }, { "epoch": 0.3005767953180658, "grad_norm": 0.32691535353660583, "learning_rate": 0.0001399134379183509, "loss": 1.3656, "step": 23131 }, { "epoch": 0.3005897898619817, "grad_norm": 0.36100149154663086, "learning_rate": 0.00013991083845643954, "loss": 1.3526, "step": 23132 }, { "epoch": 0.30060278440589755, "grad_norm": 0.4244671165943146, "learning_rate": 0.00013990823899452814, "loss": 1.543, "step": 23133 }, { "epoch": 0.30061577894981345, "grad_norm": 0.40261876583099365, "learning_rate": 0.00013990563953261676, "loss": 1.445, "step": 23134 }, { "epoch": 0.3006287734937293, "grad_norm": 0.35691580176353455, "learning_rate": 0.00013990304007070536, "loss": 1.4398, "step": 23135 }, { "epoch": 0.3006417680376452, "grad_norm": 0.4153222441673279, "learning_rate": 0.00013990044060879398, "loss": 1.289, "step": 23136 }, { "epoch": 0.30065476258156104, "grad_norm": 0.5712962746620178, "learning_rate": 0.0001398978411468826, "loss": 1.5766, "step": 23137 }, { "epoch": 0.30066775712547694, "grad_norm": 0.3895614743232727, "learning_rate": 0.0001398952416849712, "loss": 1.3107, "step": 23138 }, { "epoch": 0.3006807516693928, "grad_norm": 0.4627132713794708, "learning_rate": 0.00013989264222305986, "loss": 1.3964, "step": 23139 }, { "epoch": 0.3006937462133087, "grad_norm": 0.42954251170158386, "learning_rate": 0.00013989004276114845, "loss": 1.2456, "step": 23140 }, { "epoch": 0.30070674075722453, "grad_norm": 0.36836904287338257, "learning_rate": 0.00013988744329923705, "loss": 1.4635, "step": 23141 }, { "epoch": 0.30071973530114043, "grad_norm": 0.3514178693294525, "learning_rate": 0.00013988484383732568, "loss": 1.3925, "step": 23142 }, { "epoch": 0.30073272984505633, "grad_norm": 0.44862452149391174, "learning_rate": 0.0001398822443754143, "loss": 1.438, "step": 23143 }, { "epoch": 0.3007457243889722, "grad_norm": 0.5299797654151917, "learning_rate": 0.00013987964491350292, "loss": 1.4577, "step": 23144 }, { "epoch": 0.3007587189328881, "grad_norm": 0.4457918703556061, "learning_rate": 0.00013987704545159152, "loss": 1.6489, "step": 23145 }, { "epoch": 0.3007717134768039, "grad_norm": 0.4561452865600586, "learning_rate": 0.00013987444598968015, "loss": 1.3688, "step": 23146 }, { "epoch": 0.3007847080207198, "grad_norm": 0.44524261355400085, "learning_rate": 0.00013987184652776877, "loss": 1.4955, "step": 23147 }, { "epoch": 0.30079770256463567, "grad_norm": 0.41511401534080505, "learning_rate": 0.00013986924706585737, "loss": 1.1362, "step": 23148 }, { "epoch": 0.3008106971085516, "grad_norm": 0.42589128017425537, "learning_rate": 0.000139866647603946, "loss": 1.4206, "step": 23149 }, { "epoch": 0.3008236916524674, "grad_norm": 0.38631191849708557, "learning_rate": 0.0001398640481420346, "loss": 1.3594, "step": 23150 }, { "epoch": 0.3008366861963833, "grad_norm": 0.3594703674316406, "learning_rate": 0.00013986144868012324, "loss": 1.5019, "step": 23151 }, { "epoch": 0.30084968074029916, "grad_norm": 0.37220489978790283, "learning_rate": 0.00013985884921821184, "loss": 1.4041, "step": 23152 }, { "epoch": 0.30086267528421506, "grad_norm": 0.3390521705150604, "learning_rate": 0.00013985624975630046, "loss": 1.5505, "step": 23153 }, { "epoch": 0.3008756698281309, "grad_norm": 0.3974217474460602, "learning_rate": 0.00013985365029438906, "loss": 1.4717, "step": 23154 }, { "epoch": 0.3008886643720468, "grad_norm": 0.38737231492996216, "learning_rate": 0.00013985105083247769, "loss": 1.2632, "step": 23155 }, { "epoch": 0.30090165891596266, "grad_norm": 0.38725540041923523, "learning_rate": 0.0001398484513705663, "loss": 1.4045, "step": 23156 }, { "epoch": 0.30091465345987856, "grad_norm": 0.46442216634750366, "learning_rate": 0.0001398458519086549, "loss": 1.3241, "step": 23157 }, { "epoch": 0.3009276480037944, "grad_norm": 0.4750121235847473, "learning_rate": 0.00013984325244674353, "loss": 1.5128, "step": 23158 }, { "epoch": 0.3009406425477103, "grad_norm": 0.37743815779685974, "learning_rate": 0.00013984065298483216, "loss": 1.5687, "step": 23159 }, { "epoch": 0.30095363709162615, "grad_norm": 0.43370598554611206, "learning_rate": 0.00013983805352292075, "loss": 1.2567, "step": 23160 }, { "epoch": 0.30096663163554205, "grad_norm": 0.3631453812122345, "learning_rate": 0.00013983545406100938, "loss": 1.365, "step": 23161 }, { "epoch": 0.3009796261794579, "grad_norm": 0.2913861870765686, "learning_rate": 0.00013983285459909798, "loss": 1.4156, "step": 23162 }, { "epoch": 0.3009926207233738, "grad_norm": 0.4645940363407135, "learning_rate": 0.00013983025513718663, "loss": 1.4296, "step": 23163 }, { "epoch": 0.30100561526728964, "grad_norm": 0.3260025978088379, "learning_rate": 0.00013982765567527522, "loss": 1.2212, "step": 23164 }, { "epoch": 0.30101860981120554, "grad_norm": 0.37264716625213623, "learning_rate": 0.00013982505621336385, "loss": 1.6016, "step": 23165 }, { "epoch": 0.3010316043551214, "grad_norm": 0.3139488101005554, "learning_rate": 0.00013982245675145245, "loss": 1.3311, "step": 23166 }, { "epoch": 0.3010445988990373, "grad_norm": 0.39901939034461975, "learning_rate": 0.00013981985728954107, "loss": 1.5765, "step": 23167 }, { "epoch": 0.30105759344295313, "grad_norm": 0.42495572566986084, "learning_rate": 0.0001398172578276297, "loss": 1.611, "step": 23168 }, { "epoch": 0.30107058798686903, "grad_norm": 0.37210941314697266, "learning_rate": 0.0001398146583657183, "loss": 1.5092, "step": 23169 }, { "epoch": 0.3010835825307849, "grad_norm": 0.4773200452327728, "learning_rate": 0.00013981205890380692, "loss": 1.4687, "step": 23170 }, { "epoch": 0.3010965770747008, "grad_norm": 0.3680039644241333, "learning_rate": 0.00013980945944189554, "loss": 1.4688, "step": 23171 }, { "epoch": 0.3011095716186166, "grad_norm": 0.40396901965141296, "learning_rate": 0.00013980685997998414, "loss": 1.4098, "step": 23172 }, { "epoch": 0.3011225661625325, "grad_norm": 0.29876863956451416, "learning_rate": 0.00013980426051807276, "loss": 1.2946, "step": 23173 }, { "epoch": 0.30113556070644837, "grad_norm": 0.337939590215683, "learning_rate": 0.00013980166105616136, "loss": 1.3637, "step": 23174 }, { "epoch": 0.30114855525036427, "grad_norm": 0.3283732831478119, "learning_rate": 0.00013979906159425, "loss": 1.3861, "step": 23175 }, { "epoch": 0.3011615497942801, "grad_norm": 0.35722818970680237, "learning_rate": 0.0001397964621323386, "loss": 1.3949, "step": 23176 }, { "epoch": 0.301174544338196, "grad_norm": 0.5842116475105286, "learning_rate": 0.00013979386267042723, "loss": 1.4274, "step": 23177 }, { "epoch": 0.30118753888211186, "grad_norm": 0.37793973088264465, "learning_rate": 0.00013979126320851586, "loss": 1.375, "step": 23178 }, { "epoch": 0.30120053342602776, "grad_norm": 0.40954336524009705, "learning_rate": 0.00013978866374660446, "loss": 1.5104, "step": 23179 }, { "epoch": 0.3012135279699436, "grad_norm": 0.38299664855003357, "learning_rate": 0.00013978606428469308, "loss": 1.3905, "step": 23180 }, { "epoch": 0.3012265225138595, "grad_norm": 0.3575705587863922, "learning_rate": 0.00013978346482278168, "loss": 1.3747, "step": 23181 }, { "epoch": 0.30123951705777535, "grad_norm": 0.34922829270362854, "learning_rate": 0.00013978086536087033, "loss": 1.3434, "step": 23182 }, { "epoch": 0.30125251160169125, "grad_norm": 0.3702467381954193, "learning_rate": 0.00013977826589895893, "loss": 1.4672, "step": 23183 }, { "epoch": 0.3012655061456071, "grad_norm": 0.35502973198890686, "learning_rate": 0.00013977566643704752, "loss": 1.3925, "step": 23184 }, { "epoch": 0.301278500689523, "grad_norm": 0.4230843484401703, "learning_rate": 0.00013977306697513615, "loss": 1.4039, "step": 23185 }, { "epoch": 0.30129149523343884, "grad_norm": 0.4708244204521179, "learning_rate": 0.00013977046751322477, "loss": 1.435, "step": 23186 }, { "epoch": 0.30130448977735474, "grad_norm": 0.314054012298584, "learning_rate": 0.0001397678680513134, "loss": 1.4189, "step": 23187 }, { "epoch": 0.3013174843212706, "grad_norm": 0.45249736309051514, "learning_rate": 0.000139765268589402, "loss": 1.3491, "step": 23188 }, { "epoch": 0.3013304788651865, "grad_norm": 0.45952489972114563, "learning_rate": 0.00013976266912749062, "loss": 1.5901, "step": 23189 }, { "epoch": 0.30134347340910234, "grad_norm": 0.3819359540939331, "learning_rate": 0.00013976006966557924, "loss": 1.2112, "step": 23190 }, { "epoch": 0.30135646795301824, "grad_norm": 0.3522294759750366, "learning_rate": 0.00013975747020366784, "loss": 1.2512, "step": 23191 }, { "epoch": 0.3013694624969341, "grad_norm": 0.36187493801116943, "learning_rate": 0.00013975487074175647, "loss": 1.3138, "step": 23192 }, { "epoch": 0.30138245704085, "grad_norm": 0.4160166084766388, "learning_rate": 0.00013975227127984506, "loss": 1.3973, "step": 23193 }, { "epoch": 0.30139545158476583, "grad_norm": 0.36178678274154663, "learning_rate": 0.00013974967181793372, "loss": 1.355, "step": 23194 }, { "epoch": 0.30140844612868173, "grad_norm": 0.5714994668960571, "learning_rate": 0.0001397470723560223, "loss": 1.5371, "step": 23195 }, { "epoch": 0.3014214406725976, "grad_norm": 0.40891629457473755, "learning_rate": 0.0001397444728941109, "loss": 1.6034, "step": 23196 }, { "epoch": 0.3014344352165135, "grad_norm": 0.4667307138442993, "learning_rate": 0.00013974187343219953, "loss": 1.4555, "step": 23197 }, { "epoch": 0.3014474297604293, "grad_norm": 0.39944300055503845, "learning_rate": 0.00013973927397028816, "loss": 1.4359, "step": 23198 }, { "epoch": 0.3014604243043452, "grad_norm": 0.36141452193260193, "learning_rate": 0.00013973667450837678, "loss": 1.2626, "step": 23199 }, { "epoch": 0.30147341884826107, "grad_norm": 0.4503568708896637, "learning_rate": 0.00013973407504646538, "loss": 1.2701, "step": 23200 }, { "epoch": 0.30148641339217697, "grad_norm": 0.3694996237754822, "learning_rate": 0.000139731475584554, "loss": 1.1636, "step": 23201 }, { "epoch": 0.3014994079360928, "grad_norm": 0.3991595506668091, "learning_rate": 0.00013972887612264263, "loss": 1.513, "step": 23202 }, { "epoch": 0.3015124024800087, "grad_norm": 0.4340282082557678, "learning_rate": 0.00013972627666073123, "loss": 1.5162, "step": 23203 }, { "epoch": 0.30152539702392456, "grad_norm": 0.37580135464668274, "learning_rate": 0.00013972367719881985, "loss": 1.3635, "step": 23204 }, { "epoch": 0.30153839156784046, "grad_norm": 0.457426518201828, "learning_rate": 0.00013972107773690845, "loss": 1.3668, "step": 23205 }, { "epoch": 0.3015513861117563, "grad_norm": 0.4442581236362457, "learning_rate": 0.0001397184782749971, "loss": 1.463, "step": 23206 }, { "epoch": 0.3015643806556722, "grad_norm": 0.26839351654052734, "learning_rate": 0.0001397158788130857, "loss": 1.3775, "step": 23207 }, { "epoch": 0.30157737519958805, "grad_norm": 0.31884241104125977, "learning_rate": 0.0001397132793511743, "loss": 1.3226, "step": 23208 }, { "epoch": 0.30159036974350395, "grad_norm": 0.305759459733963, "learning_rate": 0.00013971067988926292, "loss": 1.344, "step": 23209 }, { "epoch": 0.3016033642874198, "grad_norm": 0.35471776127815247, "learning_rate": 0.00013970808042735154, "loss": 1.3643, "step": 23210 }, { "epoch": 0.3016163588313357, "grad_norm": 0.4749649465084076, "learning_rate": 0.00013970548096544017, "loss": 1.2789, "step": 23211 }, { "epoch": 0.30162935337525154, "grad_norm": 0.3606630861759186, "learning_rate": 0.00013970288150352877, "loss": 1.2798, "step": 23212 }, { "epoch": 0.30164234791916744, "grad_norm": 0.4138084948062897, "learning_rate": 0.0001397002820416174, "loss": 1.3784, "step": 23213 }, { "epoch": 0.3016553424630833, "grad_norm": 0.4225277602672577, "learning_rate": 0.00013969768257970602, "loss": 1.5316, "step": 23214 }, { "epoch": 0.3016683370069992, "grad_norm": 0.5402829647064209, "learning_rate": 0.0001396950831177946, "loss": 1.3919, "step": 23215 }, { "epoch": 0.30168133155091503, "grad_norm": 0.37081778049468994, "learning_rate": 0.00013969248365588324, "loss": 1.2534, "step": 23216 }, { "epoch": 0.30169432609483093, "grad_norm": 0.4349399507045746, "learning_rate": 0.00013968988419397186, "loss": 1.3948, "step": 23217 }, { "epoch": 0.3017073206387468, "grad_norm": 0.4180797338485718, "learning_rate": 0.00013968728473206049, "loss": 1.4315, "step": 23218 }, { "epoch": 0.3017203151826627, "grad_norm": 0.34367337822914124, "learning_rate": 0.00013968468527014908, "loss": 1.3839, "step": 23219 }, { "epoch": 0.3017333097265786, "grad_norm": 0.4184907376766205, "learning_rate": 0.0001396820858082377, "loss": 1.3688, "step": 23220 }, { "epoch": 0.3017463042704944, "grad_norm": 0.26103711128234863, "learning_rate": 0.00013967948634632633, "loss": 1.4243, "step": 23221 }, { "epoch": 0.3017592988144103, "grad_norm": 0.5270993113517761, "learning_rate": 0.00013967688688441493, "loss": 1.3227, "step": 23222 }, { "epoch": 0.30177229335832617, "grad_norm": 0.3441081643104553, "learning_rate": 0.00013967428742250355, "loss": 1.4017, "step": 23223 }, { "epoch": 0.30178528790224207, "grad_norm": 0.3834455907344818, "learning_rate": 0.00013967168796059215, "loss": 1.2201, "step": 23224 }, { "epoch": 0.3017982824461579, "grad_norm": 0.5141701102256775, "learning_rate": 0.00013966908849868078, "loss": 1.4855, "step": 23225 }, { "epoch": 0.3018112769900738, "grad_norm": 0.30219364166259766, "learning_rate": 0.0001396664890367694, "loss": 1.2627, "step": 23226 }, { "epoch": 0.30182427153398966, "grad_norm": 0.35873734951019287, "learning_rate": 0.000139663889574858, "loss": 1.3197, "step": 23227 }, { "epoch": 0.30183726607790556, "grad_norm": 0.4135015606880188, "learning_rate": 0.00013966129011294662, "loss": 1.6383, "step": 23228 }, { "epoch": 0.3018502606218214, "grad_norm": 0.432526558637619, "learning_rate": 0.00013965869065103525, "loss": 1.5395, "step": 23229 }, { "epoch": 0.3018632551657373, "grad_norm": 0.3854345381259918, "learning_rate": 0.00013965609118912387, "loss": 1.5485, "step": 23230 }, { "epoch": 0.30187624970965315, "grad_norm": 0.4011388421058655, "learning_rate": 0.00013965349172721247, "loss": 1.248, "step": 23231 }, { "epoch": 0.30188924425356906, "grad_norm": 0.3831024169921875, "learning_rate": 0.0001396508922653011, "loss": 1.3951, "step": 23232 }, { "epoch": 0.3019022387974849, "grad_norm": 0.41663724184036255, "learning_rate": 0.00013964829280338972, "loss": 1.705, "step": 23233 }, { "epoch": 0.3019152333414008, "grad_norm": 0.450585275888443, "learning_rate": 0.00013964569334147832, "loss": 1.4459, "step": 23234 }, { "epoch": 0.30192822788531665, "grad_norm": 0.40704357624053955, "learning_rate": 0.00013964309387956694, "loss": 1.2648, "step": 23235 }, { "epoch": 0.30194122242923255, "grad_norm": 0.34594273567199707, "learning_rate": 0.00013964049441765554, "loss": 1.5103, "step": 23236 }, { "epoch": 0.3019542169731484, "grad_norm": 0.32361677289009094, "learning_rate": 0.0001396378949557442, "loss": 1.3969, "step": 23237 }, { "epoch": 0.3019672115170643, "grad_norm": 0.5021398663520813, "learning_rate": 0.00013963529549383279, "loss": 1.5684, "step": 23238 }, { "epoch": 0.30198020606098014, "grad_norm": 0.41178274154663086, "learning_rate": 0.00013963269603192138, "loss": 1.2743, "step": 23239 }, { "epoch": 0.30199320060489604, "grad_norm": 0.4706447124481201, "learning_rate": 0.00013963009657001, "loss": 1.3471, "step": 23240 }, { "epoch": 0.3020061951488119, "grad_norm": 0.3766452670097351, "learning_rate": 0.00013962749710809863, "loss": 1.4541, "step": 23241 }, { "epoch": 0.3020191896927278, "grad_norm": 0.30931195616722107, "learning_rate": 0.00013962489764618726, "loss": 1.232, "step": 23242 }, { "epoch": 0.30203218423664363, "grad_norm": 0.43955984711647034, "learning_rate": 0.00013962229818427585, "loss": 1.2698, "step": 23243 }, { "epoch": 0.30204517878055953, "grad_norm": 0.31557130813598633, "learning_rate": 0.00013961969872236448, "loss": 1.4353, "step": 23244 }, { "epoch": 0.3020581733244754, "grad_norm": 0.374362975358963, "learning_rate": 0.0001396170992604531, "loss": 1.3089, "step": 23245 }, { "epoch": 0.3020711678683913, "grad_norm": 0.28767290711402893, "learning_rate": 0.0001396144997985417, "loss": 1.287, "step": 23246 }, { "epoch": 0.3020841624123071, "grad_norm": 0.39166656136512756, "learning_rate": 0.00013961190033663032, "loss": 1.4792, "step": 23247 }, { "epoch": 0.302097156956223, "grad_norm": 0.33174094557762146, "learning_rate": 0.00013960930087471892, "loss": 1.2213, "step": 23248 }, { "epoch": 0.30211015150013887, "grad_norm": 0.42901986837387085, "learning_rate": 0.00013960670141280757, "loss": 1.3961, "step": 23249 }, { "epoch": 0.30212314604405477, "grad_norm": 0.4432438910007477, "learning_rate": 0.00013960410195089617, "loss": 1.2548, "step": 23250 }, { "epoch": 0.3021361405879706, "grad_norm": 0.3181210458278656, "learning_rate": 0.00013960150248898477, "loss": 1.3349, "step": 23251 }, { "epoch": 0.3021491351318865, "grad_norm": 0.3390786349773407, "learning_rate": 0.00013959890302707342, "loss": 1.4645, "step": 23252 }, { "epoch": 0.30216212967580236, "grad_norm": 0.3404589295387268, "learning_rate": 0.00013959630356516202, "loss": 1.3128, "step": 23253 }, { "epoch": 0.30217512421971826, "grad_norm": 0.30252063274383545, "learning_rate": 0.00013959370410325064, "loss": 1.1207, "step": 23254 }, { "epoch": 0.3021881187636341, "grad_norm": 0.2729305028915405, "learning_rate": 0.00013959110464133924, "loss": 1.5024, "step": 23255 }, { "epoch": 0.30220111330755, "grad_norm": 0.30618563294410706, "learning_rate": 0.00013958850517942786, "loss": 1.1178, "step": 23256 }, { "epoch": 0.30221410785146585, "grad_norm": 0.3727412223815918, "learning_rate": 0.0001395859057175165, "loss": 1.4083, "step": 23257 }, { "epoch": 0.30222710239538175, "grad_norm": 0.3510522246360779, "learning_rate": 0.00013958330625560509, "loss": 1.5114, "step": 23258 }, { "epoch": 0.3022400969392976, "grad_norm": 0.3374330699443817, "learning_rate": 0.0001395807067936937, "loss": 1.107, "step": 23259 }, { "epoch": 0.3022530914832135, "grad_norm": 0.4756379723548889, "learning_rate": 0.00013957810733178233, "loss": 1.5095, "step": 23260 }, { "epoch": 0.30226608602712934, "grad_norm": 0.3482968807220459, "learning_rate": 0.00013957550786987096, "loss": 1.171, "step": 23261 }, { "epoch": 0.30227908057104524, "grad_norm": 0.31565549969673157, "learning_rate": 0.00013957290840795956, "loss": 1.4808, "step": 23262 }, { "epoch": 0.3022920751149611, "grad_norm": 0.4882533848285675, "learning_rate": 0.00013957030894604815, "loss": 1.3616, "step": 23263 }, { "epoch": 0.302305069658877, "grad_norm": 0.25556454062461853, "learning_rate": 0.0001395677094841368, "loss": 1.2692, "step": 23264 }, { "epoch": 0.30231806420279284, "grad_norm": 0.3218751847743988, "learning_rate": 0.0001395651100222254, "loss": 1.4677, "step": 23265 }, { "epoch": 0.30233105874670874, "grad_norm": 0.3643142580986023, "learning_rate": 0.00013956251056031403, "loss": 1.5306, "step": 23266 }, { "epoch": 0.3023440532906246, "grad_norm": 0.33794793486595154, "learning_rate": 0.00013955991109840262, "loss": 1.5031, "step": 23267 }, { "epoch": 0.3023570478345405, "grad_norm": 0.35743266344070435, "learning_rate": 0.00013955731163649125, "loss": 1.2518, "step": 23268 }, { "epoch": 0.3023700423784563, "grad_norm": 0.41200628876686096, "learning_rate": 0.00013955471217457987, "loss": 1.3647, "step": 23269 }, { "epoch": 0.30238303692237223, "grad_norm": 0.4245513379573822, "learning_rate": 0.00013955211271266847, "loss": 1.2949, "step": 23270 }, { "epoch": 0.3023960314662881, "grad_norm": 0.40892940759658813, "learning_rate": 0.0001395495132507571, "loss": 1.3158, "step": 23271 }, { "epoch": 0.302409026010204, "grad_norm": 0.2947690784931183, "learning_rate": 0.00013954691378884572, "loss": 1.4545, "step": 23272 }, { "epoch": 0.3024220205541198, "grad_norm": 0.4097446799278259, "learning_rate": 0.00013954431432693434, "loss": 1.3724, "step": 23273 }, { "epoch": 0.3024350150980357, "grad_norm": 0.39230504631996155, "learning_rate": 0.00013954171486502294, "loss": 1.2693, "step": 23274 }, { "epoch": 0.30244800964195157, "grad_norm": 0.3813019394874573, "learning_rate": 0.00013953911540311157, "loss": 1.538, "step": 23275 }, { "epoch": 0.30246100418586747, "grad_norm": 0.38401761651039124, "learning_rate": 0.0001395365159412002, "loss": 1.3451, "step": 23276 }, { "epoch": 0.3024739987297833, "grad_norm": 0.4438844621181488, "learning_rate": 0.0001395339164792888, "loss": 1.4136, "step": 23277 }, { "epoch": 0.3024869932736992, "grad_norm": 0.49788713455200195, "learning_rate": 0.0001395313170173774, "loss": 1.5324, "step": 23278 }, { "epoch": 0.30249998781761506, "grad_norm": 0.4323347508907318, "learning_rate": 0.000139528717555466, "loss": 1.5753, "step": 23279 }, { "epoch": 0.30251298236153096, "grad_norm": 0.432938814163208, "learning_rate": 0.00013952611809355463, "loss": 1.598, "step": 23280 }, { "epoch": 0.3025259769054468, "grad_norm": 0.47529903054237366, "learning_rate": 0.00013952351863164326, "loss": 1.433, "step": 23281 }, { "epoch": 0.3025389714493627, "grad_norm": 0.2561517059803009, "learning_rate": 0.00013952091916973186, "loss": 1.4588, "step": 23282 }, { "epoch": 0.30255196599327855, "grad_norm": 0.37453752756118774, "learning_rate": 0.00013951831970782048, "loss": 1.3911, "step": 23283 }, { "epoch": 0.30256496053719445, "grad_norm": 0.3329564034938812, "learning_rate": 0.0001395157202459091, "loss": 1.4052, "step": 23284 }, { "epoch": 0.3025779550811103, "grad_norm": 0.45538899302482605, "learning_rate": 0.00013951312078399773, "loss": 1.5904, "step": 23285 }, { "epoch": 0.3025909496250262, "grad_norm": 0.4049103260040283, "learning_rate": 0.00013951052132208633, "loss": 1.4333, "step": 23286 }, { "epoch": 0.30260394416894204, "grad_norm": 0.41849347949028015, "learning_rate": 0.00013950792186017495, "loss": 1.4867, "step": 23287 }, { "epoch": 0.30261693871285794, "grad_norm": 0.3378688395023346, "learning_rate": 0.00013950532239826358, "loss": 1.2821, "step": 23288 }, { "epoch": 0.3026299332567738, "grad_norm": 0.317399799823761, "learning_rate": 0.00013950272293635217, "loss": 1.2492, "step": 23289 }, { "epoch": 0.3026429278006897, "grad_norm": 0.32929331064224243, "learning_rate": 0.0001395001234744408, "loss": 1.4858, "step": 23290 }, { "epoch": 0.30265592234460553, "grad_norm": 0.41066640615463257, "learning_rate": 0.00013949752401252942, "loss": 1.4886, "step": 23291 }, { "epoch": 0.30266891688852143, "grad_norm": 0.44785404205322266, "learning_rate": 0.00013949492455061802, "loss": 1.403, "step": 23292 }, { "epoch": 0.3026819114324373, "grad_norm": 0.3656848967075348, "learning_rate": 0.00013949232508870664, "loss": 1.3891, "step": 23293 }, { "epoch": 0.3026949059763532, "grad_norm": 0.47436001896858215, "learning_rate": 0.00013948972562679524, "loss": 1.4735, "step": 23294 }, { "epoch": 0.302707900520269, "grad_norm": 0.3732442557811737, "learning_rate": 0.0001394871261648839, "loss": 1.3321, "step": 23295 }, { "epoch": 0.3027208950641849, "grad_norm": 0.5020295977592468, "learning_rate": 0.0001394845267029725, "loss": 1.4719, "step": 23296 }, { "epoch": 0.3027338896081008, "grad_norm": 0.45214781165122986, "learning_rate": 0.00013948192724106112, "loss": 1.4315, "step": 23297 }, { "epoch": 0.30274688415201667, "grad_norm": 0.3320401608943939, "learning_rate": 0.0001394793277791497, "loss": 1.4257, "step": 23298 }, { "epoch": 0.30275987869593257, "grad_norm": 0.357349693775177, "learning_rate": 0.00013947672831723834, "loss": 1.4468, "step": 23299 }, { "epoch": 0.3027728732398484, "grad_norm": 0.4401451051235199, "learning_rate": 0.00013947412885532696, "loss": 1.5007, "step": 23300 }, { "epoch": 0.3027858677837643, "grad_norm": 0.4379909932613373, "learning_rate": 0.00013947152939341556, "loss": 1.4731, "step": 23301 }, { "epoch": 0.30279886232768016, "grad_norm": 0.44491416215896606, "learning_rate": 0.00013946892993150418, "loss": 1.4754, "step": 23302 }, { "epoch": 0.30281185687159606, "grad_norm": 0.5017706751823425, "learning_rate": 0.0001394663304695928, "loss": 1.4334, "step": 23303 }, { "epoch": 0.3028248514155119, "grad_norm": 0.4530334770679474, "learning_rate": 0.00013946373100768143, "loss": 1.475, "step": 23304 }, { "epoch": 0.3028378459594278, "grad_norm": 0.4294649660587311, "learning_rate": 0.00013946113154577003, "loss": 1.3431, "step": 23305 }, { "epoch": 0.30285084050334365, "grad_norm": 0.3677988648414612, "learning_rate": 0.00013945853208385863, "loss": 1.482, "step": 23306 }, { "epoch": 0.30286383504725956, "grad_norm": 0.3039516806602478, "learning_rate": 0.00013945593262194728, "loss": 1.1731, "step": 23307 }, { "epoch": 0.3028768295911754, "grad_norm": 0.3287495970726013, "learning_rate": 0.00013945333316003588, "loss": 1.3002, "step": 23308 }, { "epoch": 0.3028898241350913, "grad_norm": 0.3238582909107208, "learning_rate": 0.0001394507336981245, "loss": 1.3247, "step": 23309 }, { "epoch": 0.30290281867900715, "grad_norm": 0.46046897768974304, "learning_rate": 0.0001394481342362131, "loss": 1.5692, "step": 23310 }, { "epoch": 0.30291581322292305, "grad_norm": 0.4231153428554535, "learning_rate": 0.00013944553477430172, "loss": 1.5392, "step": 23311 }, { "epoch": 0.3029288077668389, "grad_norm": 0.46809911727905273, "learning_rate": 0.00013944293531239035, "loss": 1.4985, "step": 23312 }, { "epoch": 0.3029418023107548, "grad_norm": 0.3634107708930969, "learning_rate": 0.00013944033585047894, "loss": 1.3924, "step": 23313 }, { "epoch": 0.30295479685467064, "grad_norm": 0.41893237829208374, "learning_rate": 0.00013943773638856757, "loss": 1.4398, "step": 23314 }, { "epoch": 0.30296779139858654, "grad_norm": 0.4360145628452301, "learning_rate": 0.0001394351369266562, "loss": 1.3139, "step": 23315 }, { "epoch": 0.3029807859425024, "grad_norm": 0.3686993718147278, "learning_rate": 0.00013943253746474482, "loss": 1.467, "step": 23316 }, { "epoch": 0.3029937804864183, "grad_norm": 0.27927976846694946, "learning_rate": 0.00013942993800283342, "loss": 1.3602, "step": 23317 }, { "epoch": 0.30300677503033413, "grad_norm": 0.378081738948822, "learning_rate": 0.000139427338540922, "loss": 1.3723, "step": 23318 }, { "epoch": 0.30301976957425003, "grad_norm": 0.40298354625701904, "learning_rate": 0.00013942473907901066, "loss": 1.4525, "step": 23319 }, { "epoch": 0.3030327641181659, "grad_norm": 0.35920295119285583, "learning_rate": 0.00013942213961709926, "loss": 1.4268, "step": 23320 }, { "epoch": 0.3030457586620818, "grad_norm": 0.3600679636001587, "learning_rate": 0.00013941954015518789, "loss": 1.6027, "step": 23321 }, { "epoch": 0.3030587532059976, "grad_norm": 0.4024488031864166, "learning_rate": 0.00013941694069327648, "loss": 1.3513, "step": 23322 }, { "epoch": 0.3030717477499135, "grad_norm": 0.3834989070892334, "learning_rate": 0.0001394143412313651, "loss": 1.304, "step": 23323 }, { "epoch": 0.30308474229382937, "grad_norm": 0.3793964385986328, "learning_rate": 0.00013941174176945373, "loss": 1.389, "step": 23324 }, { "epoch": 0.30309773683774527, "grad_norm": 0.4368106722831726, "learning_rate": 0.00013940914230754233, "loss": 1.4158, "step": 23325 }, { "epoch": 0.3031107313816611, "grad_norm": 0.43449971079826355, "learning_rate": 0.00013940654284563098, "loss": 1.4015, "step": 23326 }, { "epoch": 0.303123725925577, "grad_norm": 0.48215702176094055, "learning_rate": 0.00013940394338371958, "loss": 1.41, "step": 23327 }, { "epoch": 0.30313672046949286, "grad_norm": 0.4209710359573364, "learning_rate": 0.0001394013439218082, "loss": 1.4505, "step": 23328 }, { "epoch": 0.30314971501340876, "grad_norm": 0.36241504549980164, "learning_rate": 0.0001393987444598968, "loss": 1.4124, "step": 23329 }, { "epoch": 0.3031627095573246, "grad_norm": 0.29592418670654297, "learning_rate": 0.00013939614499798543, "loss": 1.392, "step": 23330 }, { "epoch": 0.3031757041012405, "grad_norm": 0.3353019654750824, "learning_rate": 0.00013939354553607405, "loss": 1.3459, "step": 23331 }, { "epoch": 0.30318869864515635, "grad_norm": 0.3616652190685272, "learning_rate": 0.00013939094607416265, "loss": 1.4107, "step": 23332 }, { "epoch": 0.30320169318907225, "grad_norm": 0.24782046675682068, "learning_rate": 0.00013938834661225127, "loss": 1.2818, "step": 23333 }, { "epoch": 0.3032146877329881, "grad_norm": 0.36093559861183167, "learning_rate": 0.0001393857471503399, "loss": 1.2839, "step": 23334 }, { "epoch": 0.303227682276904, "grad_norm": 0.2977079749107361, "learning_rate": 0.0001393831476884285, "loss": 1.3725, "step": 23335 }, { "epoch": 0.30324067682081984, "grad_norm": 0.39603355526924133, "learning_rate": 0.00013938054822651712, "loss": 1.5214, "step": 23336 }, { "epoch": 0.30325367136473574, "grad_norm": 0.284260094165802, "learning_rate": 0.00013937794876460572, "loss": 1.3551, "step": 23337 }, { "epoch": 0.3032666659086516, "grad_norm": 0.35100439190864563, "learning_rate": 0.00013937534930269437, "loss": 1.457, "step": 23338 }, { "epoch": 0.3032796604525675, "grad_norm": 0.43551748991012573, "learning_rate": 0.00013937274984078296, "loss": 1.3806, "step": 23339 }, { "epoch": 0.30329265499648334, "grad_norm": 0.48077815771102905, "learning_rate": 0.0001393701503788716, "loss": 1.5811, "step": 23340 }, { "epoch": 0.30330564954039924, "grad_norm": 0.3833942413330078, "learning_rate": 0.00013936755091696019, "loss": 1.497, "step": 23341 }, { "epoch": 0.3033186440843151, "grad_norm": 0.4411120116710663, "learning_rate": 0.0001393649514550488, "loss": 1.5894, "step": 23342 }, { "epoch": 0.303331638628231, "grad_norm": 0.3648945689201355, "learning_rate": 0.00013936235199313744, "loss": 1.302, "step": 23343 }, { "epoch": 0.3033446331721468, "grad_norm": 0.4178422689437866, "learning_rate": 0.00013935975253122603, "loss": 1.4245, "step": 23344 }, { "epoch": 0.3033576277160627, "grad_norm": 0.3966846764087677, "learning_rate": 0.00013935715306931466, "loss": 1.3355, "step": 23345 }, { "epoch": 0.3033706222599786, "grad_norm": 0.2796681225299835, "learning_rate": 0.00013935455360740328, "loss": 1.0936, "step": 23346 }, { "epoch": 0.3033836168038945, "grad_norm": 0.41754207015037537, "learning_rate": 0.00013935195414549188, "loss": 1.3595, "step": 23347 }, { "epoch": 0.3033966113478103, "grad_norm": 0.37742170691490173, "learning_rate": 0.0001393493546835805, "loss": 1.4647, "step": 23348 }, { "epoch": 0.3034096058917262, "grad_norm": 0.38867121934890747, "learning_rate": 0.0001393467552216691, "loss": 1.5306, "step": 23349 }, { "epoch": 0.30342260043564206, "grad_norm": 0.5812363624572754, "learning_rate": 0.00013934415575975775, "loss": 1.5353, "step": 23350 }, { "epoch": 0.30343559497955797, "grad_norm": 0.40888872742652893, "learning_rate": 0.00013934155629784635, "loss": 1.2623, "step": 23351 }, { "epoch": 0.3034485895234738, "grad_norm": 0.35665449500083923, "learning_rate": 0.00013933895683593497, "loss": 1.3862, "step": 23352 }, { "epoch": 0.3034615840673897, "grad_norm": 0.3410831391811371, "learning_rate": 0.00013933635737402357, "loss": 1.3896, "step": 23353 }, { "epoch": 0.30347457861130556, "grad_norm": 0.46788543462753296, "learning_rate": 0.0001393337579121122, "loss": 1.3214, "step": 23354 }, { "epoch": 0.30348757315522146, "grad_norm": 0.30422621965408325, "learning_rate": 0.00013933115845020082, "loss": 1.2889, "step": 23355 }, { "epoch": 0.3035005676991373, "grad_norm": 0.3669605851173401, "learning_rate": 0.00013932855898828942, "loss": 1.2636, "step": 23356 }, { "epoch": 0.3035135622430532, "grad_norm": 0.35256868600845337, "learning_rate": 0.00013932595952637804, "loss": 1.4778, "step": 23357 }, { "epoch": 0.30352655678696905, "grad_norm": 0.3997613787651062, "learning_rate": 0.00013932336006446667, "loss": 1.4352, "step": 23358 }, { "epoch": 0.30353955133088495, "grad_norm": 0.3804091215133667, "learning_rate": 0.0001393207606025553, "loss": 1.2913, "step": 23359 }, { "epoch": 0.3035525458748008, "grad_norm": 0.3649604618549347, "learning_rate": 0.0001393181611406439, "loss": 1.3199, "step": 23360 }, { "epoch": 0.3035655404187167, "grad_norm": 0.3914465606212616, "learning_rate": 0.00013931556167873249, "loss": 1.3312, "step": 23361 }, { "epoch": 0.30357853496263254, "grad_norm": 0.4296543598175049, "learning_rate": 0.00013931296221682114, "loss": 1.3644, "step": 23362 }, { "epoch": 0.30359152950654844, "grad_norm": 0.4134215712547302, "learning_rate": 0.00013931036275490974, "loss": 1.3889, "step": 23363 }, { "epoch": 0.3036045240504643, "grad_norm": 0.46308422088623047, "learning_rate": 0.00013930776329299836, "loss": 1.4569, "step": 23364 }, { "epoch": 0.3036175185943802, "grad_norm": 0.3566986918449402, "learning_rate": 0.00013930516383108698, "loss": 1.1827, "step": 23365 }, { "epoch": 0.30363051313829603, "grad_norm": 0.43237459659576416, "learning_rate": 0.00013930256436917558, "loss": 1.3108, "step": 23366 }, { "epoch": 0.30364350768221193, "grad_norm": 0.3919980227947235, "learning_rate": 0.0001392999649072642, "loss": 1.5722, "step": 23367 }, { "epoch": 0.3036565022261278, "grad_norm": 0.38708871603012085, "learning_rate": 0.0001392973654453528, "loss": 1.4127, "step": 23368 }, { "epoch": 0.3036694967700437, "grad_norm": 0.36836186051368713, "learning_rate": 0.00013929476598344145, "loss": 1.174, "step": 23369 }, { "epoch": 0.3036824913139595, "grad_norm": 0.31136471033096313, "learning_rate": 0.00013929216652153005, "loss": 1.2656, "step": 23370 }, { "epoch": 0.3036954858578754, "grad_norm": 0.3176463842391968, "learning_rate": 0.00013928956705961868, "loss": 1.2335, "step": 23371 }, { "epoch": 0.3037084804017913, "grad_norm": 0.40809836983680725, "learning_rate": 0.00013928696759770727, "loss": 1.349, "step": 23372 }, { "epoch": 0.30372147494570717, "grad_norm": 0.40170395374298096, "learning_rate": 0.0001392843681357959, "loss": 1.474, "step": 23373 }, { "epoch": 0.30373446948962307, "grad_norm": 0.4301799237728119, "learning_rate": 0.00013928176867388452, "loss": 1.4337, "step": 23374 }, { "epoch": 0.3037474640335389, "grad_norm": 0.45454856753349304, "learning_rate": 0.00013927916921197312, "loss": 1.4051, "step": 23375 }, { "epoch": 0.3037604585774548, "grad_norm": 0.52679842710495, "learning_rate": 0.00013927656975006174, "loss": 1.4775, "step": 23376 }, { "epoch": 0.30377345312137066, "grad_norm": 0.3931540846824646, "learning_rate": 0.00013927397028815037, "loss": 1.4335, "step": 23377 }, { "epoch": 0.30378644766528656, "grad_norm": 0.45587897300720215, "learning_rate": 0.00013927137082623897, "loss": 1.3549, "step": 23378 }, { "epoch": 0.3037994422092024, "grad_norm": 0.37841564416885376, "learning_rate": 0.0001392687713643276, "loss": 1.4803, "step": 23379 }, { "epoch": 0.3038124367531183, "grad_norm": 0.4343169927597046, "learning_rate": 0.0001392661719024162, "loss": 1.3295, "step": 23380 }, { "epoch": 0.30382543129703415, "grad_norm": 0.43603068590164185, "learning_rate": 0.00013926357244050484, "loss": 1.3117, "step": 23381 }, { "epoch": 0.30383842584095005, "grad_norm": 0.35854434967041016, "learning_rate": 0.00013926097297859344, "loss": 1.4942, "step": 23382 }, { "epoch": 0.3038514203848659, "grad_norm": 0.5235525369644165, "learning_rate": 0.00013925837351668206, "loss": 1.3438, "step": 23383 }, { "epoch": 0.3038644149287818, "grad_norm": 0.3831540644168854, "learning_rate": 0.00013925577405477066, "loss": 1.5348, "step": 23384 }, { "epoch": 0.30387740947269765, "grad_norm": 0.45061415433883667, "learning_rate": 0.00013925317459285928, "loss": 1.5049, "step": 23385 }, { "epoch": 0.30389040401661355, "grad_norm": 0.3730837106704712, "learning_rate": 0.0001392505751309479, "loss": 1.2029, "step": 23386 }, { "epoch": 0.3039033985605294, "grad_norm": 0.47303473949432373, "learning_rate": 0.0001392479756690365, "loss": 1.5152, "step": 23387 }, { "epoch": 0.3039163931044453, "grad_norm": 0.2890664041042328, "learning_rate": 0.00013924537620712513, "loss": 1.3942, "step": 23388 }, { "epoch": 0.30392938764836114, "grad_norm": 0.45294487476348877, "learning_rate": 0.00013924277674521375, "loss": 1.5301, "step": 23389 }, { "epoch": 0.30394238219227704, "grad_norm": 0.43785783648490906, "learning_rate": 0.00013924017728330235, "loss": 1.2817, "step": 23390 }, { "epoch": 0.3039553767361929, "grad_norm": 0.4318059980869293, "learning_rate": 0.00013923757782139098, "loss": 1.3844, "step": 23391 }, { "epoch": 0.3039683712801088, "grad_norm": 0.500150203704834, "learning_rate": 0.00013923497835947957, "loss": 1.3624, "step": 23392 }, { "epoch": 0.30398136582402463, "grad_norm": 0.4585595428943634, "learning_rate": 0.00013923237889756823, "loss": 1.5629, "step": 23393 }, { "epoch": 0.30399436036794053, "grad_norm": 0.36613747477531433, "learning_rate": 0.00013922977943565682, "loss": 1.3208, "step": 23394 }, { "epoch": 0.3040073549118564, "grad_norm": 0.38989126682281494, "learning_rate": 0.00013922717997374545, "loss": 1.4224, "step": 23395 }, { "epoch": 0.3040203494557723, "grad_norm": 0.34212422370910645, "learning_rate": 0.00013922458051183404, "loss": 1.2932, "step": 23396 }, { "epoch": 0.3040333439996881, "grad_norm": 0.5155880451202393, "learning_rate": 0.00013922198104992267, "loss": 1.3404, "step": 23397 }, { "epoch": 0.304046338543604, "grad_norm": 0.4678805470466614, "learning_rate": 0.0001392193815880113, "loss": 1.5571, "step": 23398 }, { "epoch": 0.30405933308751987, "grad_norm": 0.39795011281967163, "learning_rate": 0.0001392167821260999, "loss": 1.3241, "step": 23399 }, { "epoch": 0.30407232763143577, "grad_norm": 0.40905988216400146, "learning_rate": 0.00013921418266418854, "loss": 1.3328, "step": 23400 }, { "epoch": 0.3040853221753516, "grad_norm": 0.44862300157546997, "learning_rate": 0.00013921158320227714, "loss": 1.3618, "step": 23401 }, { "epoch": 0.3040983167192675, "grad_norm": 0.35704198479652405, "learning_rate": 0.00013920898374036574, "loss": 1.4144, "step": 23402 }, { "epoch": 0.30411131126318336, "grad_norm": 0.44293662905693054, "learning_rate": 0.00013920638427845436, "loss": 1.3012, "step": 23403 }, { "epoch": 0.30412430580709926, "grad_norm": 0.41678890585899353, "learning_rate": 0.000139203784816543, "loss": 1.5451, "step": 23404 }, { "epoch": 0.3041373003510151, "grad_norm": 0.41258934140205383, "learning_rate": 0.0001392011853546316, "loss": 1.3727, "step": 23405 }, { "epoch": 0.304150294894931, "grad_norm": 1.7785333395004272, "learning_rate": 0.0001391985858927202, "loss": 1.3302, "step": 23406 }, { "epoch": 0.30416328943884685, "grad_norm": 0.4313473403453827, "learning_rate": 0.00013919598643080883, "loss": 1.3551, "step": 23407 }, { "epoch": 0.30417628398276275, "grad_norm": 0.44515150785446167, "learning_rate": 0.00013919338696889746, "loss": 1.3341, "step": 23408 }, { "epoch": 0.3041892785266786, "grad_norm": 0.36140763759613037, "learning_rate": 0.00013919078750698605, "loss": 1.3879, "step": 23409 }, { "epoch": 0.3042022730705945, "grad_norm": 0.45209184288978577, "learning_rate": 0.00013918818804507468, "loss": 1.3747, "step": 23410 }, { "epoch": 0.30421526761451034, "grad_norm": 0.4458887279033661, "learning_rate": 0.00013918558858316328, "loss": 1.4881, "step": 23411 }, { "epoch": 0.30422826215842624, "grad_norm": 0.46184462308883667, "learning_rate": 0.00013918298912125193, "loss": 1.5488, "step": 23412 }, { "epoch": 0.3042412567023421, "grad_norm": 0.4529193043708801, "learning_rate": 0.00013918038965934053, "loss": 1.3648, "step": 23413 }, { "epoch": 0.304254251246258, "grad_norm": 0.33885693550109863, "learning_rate": 0.00013917779019742912, "loss": 1.3015, "step": 23414 }, { "epoch": 0.30426724579017383, "grad_norm": 0.4012843370437622, "learning_rate": 0.00013917519073551775, "loss": 1.4004, "step": 23415 }, { "epoch": 0.30428024033408974, "grad_norm": 0.4831693172454834, "learning_rate": 0.00013917259127360637, "loss": 1.4555, "step": 23416 }, { "epoch": 0.3042932348780056, "grad_norm": 0.44907328486442566, "learning_rate": 0.000139169991811695, "loss": 1.3931, "step": 23417 }, { "epoch": 0.3043062294219215, "grad_norm": 0.43334949016571045, "learning_rate": 0.0001391673923497836, "loss": 1.3305, "step": 23418 }, { "epoch": 0.3043192239658373, "grad_norm": 0.3851233124732971, "learning_rate": 0.00013916479288787222, "loss": 1.3957, "step": 23419 }, { "epoch": 0.3043322185097532, "grad_norm": 0.45768246054649353, "learning_rate": 0.00013916219342596084, "loss": 1.4605, "step": 23420 }, { "epoch": 0.3043452130536691, "grad_norm": 0.3982701003551483, "learning_rate": 0.00013915959396404944, "loss": 1.3328, "step": 23421 }, { "epoch": 0.304358207597585, "grad_norm": 0.29739758372306824, "learning_rate": 0.00013915699450213806, "loss": 1.5081, "step": 23422 }, { "epoch": 0.3043712021415008, "grad_norm": 0.4620401859283447, "learning_rate": 0.00013915439504022666, "loss": 1.4313, "step": 23423 }, { "epoch": 0.3043841966854167, "grad_norm": 0.3972269594669342, "learning_rate": 0.00013915179557831531, "loss": 1.4865, "step": 23424 }, { "epoch": 0.30439719122933256, "grad_norm": 0.5003836154937744, "learning_rate": 0.0001391491961164039, "loss": 1.3011, "step": 23425 }, { "epoch": 0.30441018577324847, "grad_norm": 0.3578912019729614, "learning_rate": 0.00013914659665449254, "loss": 1.4613, "step": 23426 }, { "epoch": 0.3044231803171643, "grad_norm": 0.3965323269367218, "learning_rate": 0.00013914399719258113, "loss": 1.3485, "step": 23427 }, { "epoch": 0.3044361748610802, "grad_norm": 0.5065497756004333, "learning_rate": 0.00013914139773066976, "loss": 1.5553, "step": 23428 }, { "epoch": 0.30444916940499606, "grad_norm": 0.3976811468601227, "learning_rate": 0.00013913879826875838, "loss": 1.4789, "step": 23429 }, { "epoch": 0.30446216394891196, "grad_norm": 0.3483099341392517, "learning_rate": 0.00013913619880684698, "loss": 1.2753, "step": 23430 }, { "epoch": 0.3044751584928278, "grad_norm": 0.41991087794303894, "learning_rate": 0.0001391335993449356, "loss": 1.428, "step": 23431 }, { "epoch": 0.3044881530367437, "grad_norm": 0.40444377064704895, "learning_rate": 0.00013913099988302423, "loss": 1.3348, "step": 23432 }, { "epoch": 0.30450114758065955, "grad_norm": 0.3690468370914459, "learning_rate": 0.00013912840042111283, "loss": 1.317, "step": 23433 }, { "epoch": 0.30451414212457545, "grad_norm": 0.37328040599823, "learning_rate": 0.00013912580095920145, "loss": 1.24, "step": 23434 }, { "epoch": 0.3045271366684913, "grad_norm": 0.3880142271518707, "learning_rate": 0.00013912320149729005, "loss": 1.4818, "step": 23435 }, { "epoch": 0.3045401312124072, "grad_norm": 0.392406702041626, "learning_rate": 0.0001391206020353787, "loss": 1.2328, "step": 23436 }, { "epoch": 0.30455312575632304, "grad_norm": 0.395727276802063, "learning_rate": 0.0001391180025734673, "loss": 1.3575, "step": 23437 }, { "epoch": 0.30456612030023894, "grad_norm": 0.41765278577804565, "learning_rate": 0.00013911540311155592, "loss": 1.4517, "step": 23438 }, { "epoch": 0.3045791148441548, "grad_norm": 0.3868548274040222, "learning_rate": 0.00013911280364964455, "loss": 1.3772, "step": 23439 }, { "epoch": 0.3045921093880707, "grad_norm": 0.3593936562538147, "learning_rate": 0.00013911020418773314, "loss": 1.3226, "step": 23440 }, { "epoch": 0.30460510393198653, "grad_norm": 0.46014881134033203, "learning_rate": 0.00013910760472582177, "loss": 1.487, "step": 23441 }, { "epoch": 0.30461809847590243, "grad_norm": 0.4650191068649292, "learning_rate": 0.00013910500526391036, "loss": 1.5201, "step": 23442 }, { "epoch": 0.3046310930198183, "grad_norm": 0.3960723578929901, "learning_rate": 0.00013910240580199902, "loss": 1.4578, "step": 23443 }, { "epoch": 0.3046440875637342, "grad_norm": 0.41256600618362427, "learning_rate": 0.00013909980634008761, "loss": 1.5645, "step": 23444 }, { "epoch": 0.30465708210765, "grad_norm": 0.3604339063167572, "learning_rate": 0.0001390972068781762, "loss": 1.5961, "step": 23445 }, { "epoch": 0.3046700766515659, "grad_norm": 0.4878902733325958, "learning_rate": 0.00013909460741626484, "loss": 1.5096, "step": 23446 }, { "epoch": 0.30468307119548177, "grad_norm": 0.42814451456069946, "learning_rate": 0.00013909200795435346, "loss": 1.5028, "step": 23447 }, { "epoch": 0.30469606573939767, "grad_norm": 0.4190797507762909, "learning_rate": 0.00013908940849244208, "loss": 1.4652, "step": 23448 }, { "epoch": 0.30470906028331357, "grad_norm": 0.34647899866104126, "learning_rate": 0.00013908680903053068, "loss": 1.4682, "step": 23449 }, { "epoch": 0.3047220548272294, "grad_norm": 0.4343963861465454, "learning_rate": 0.0001390842095686193, "loss": 1.3618, "step": 23450 }, { "epoch": 0.3047350493711453, "grad_norm": 0.3554687798023224, "learning_rate": 0.00013908161010670793, "loss": 1.4868, "step": 23451 }, { "epoch": 0.30474804391506116, "grad_norm": 0.3702371418476105, "learning_rate": 0.00013907901064479653, "loss": 1.3101, "step": 23452 }, { "epoch": 0.30476103845897706, "grad_norm": 0.45455601811408997, "learning_rate": 0.00013907641118288515, "loss": 1.6089, "step": 23453 }, { "epoch": 0.3047740330028929, "grad_norm": 0.4820840656757355, "learning_rate": 0.00013907381172097375, "loss": 1.4987, "step": 23454 }, { "epoch": 0.3047870275468088, "grad_norm": 0.38765203952789307, "learning_rate": 0.0001390712122590624, "loss": 1.538, "step": 23455 }, { "epoch": 0.30480002209072465, "grad_norm": 0.4099428355693817, "learning_rate": 0.000139068612797151, "loss": 1.4147, "step": 23456 }, { "epoch": 0.30481301663464055, "grad_norm": 0.4511684775352478, "learning_rate": 0.0001390660133352396, "loss": 1.5418, "step": 23457 }, { "epoch": 0.3048260111785564, "grad_norm": 0.4140436351299286, "learning_rate": 0.00013906341387332822, "loss": 1.3171, "step": 23458 }, { "epoch": 0.3048390057224723, "grad_norm": 0.4105619192123413, "learning_rate": 0.00013906081441141685, "loss": 1.3859, "step": 23459 }, { "epoch": 0.30485200026638815, "grad_norm": 0.39106282591819763, "learning_rate": 0.00013905821494950547, "loss": 1.3089, "step": 23460 }, { "epoch": 0.30486499481030405, "grad_norm": 0.48305389285087585, "learning_rate": 0.00013905561548759407, "loss": 1.4178, "step": 23461 }, { "epoch": 0.3048779893542199, "grad_norm": 0.3905850946903229, "learning_rate": 0.0001390530160256827, "loss": 1.3447, "step": 23462 }, { "epoch": 0.3048909838981358, "grad_norm": 0.37989890575408936, "learning_rate": 0.00013905041656377132, "loss": 1.414, "step": 23463 }, { "epoch": 0.30490397844205164, "grad_norm": 0.4079782962799072, "learning_rate": 0.0001390478171018599, "loss": 1.3805, "step": 23464 }, { "epoch": 0.30491697298596754, "grad_norm": 0.4466724991798401, "learning_rate": 0.00013904521763994854, "loss": 1.4437, "step": 23465 }, { "epoch": 0.3049299675298834, "grad_norm": 0.40522077679634094, "learning_rate": 0.00013904261817803714, "loss": 1.4954, "step": 23466 }, { "epoch": 0.3049429620737993, "grad_norm": 0.3816321790218353, "learning_rate": 0.0001390400187161258, "loss": 1.4651, "step": 23467 }, { "epoch": 0.30495595661771513, "grad_norm": 0.4906289577484131, "learning_rate": 0.00013903741925421438, "loss": 1.6576, "step": 23468 }, { "epoch": 0.30496895116163103, "grad_norm": 0.37748995423316956, "learning_rate": 0.00013903481979230298, "loss": 1.5941, "step": 23469 }, { "epoch": 0.3049819457055469, "grad_norm": 0.2999838888645172, "learning_rate": 0.0001390322203303916, "loss": 1.4557, "step": 23470 }, { "epoch": 0.3049949402494628, "grad_norm": 0.3470779061317444, "learning_rate": 0.00013902962086848023, "loss": 1.234, "step": 23471 }, { "epoch": 0.3050079347933786, "grad_norm": 0.3533174395561218, "learning_rate": 0.00013902702140656886, "loss": 1.2147, "step": 23472 }, { "epoch": 0.3050209293372945, "grad_norm": 0.3998168110847473, "learning_rate": 0.00013902442194465745, "loss": 1.327, "step": 23473 }, { "epoch": 0.30503392388121037, "grad_norm": 0.31588008999824524, "learning_rate": 0.00013902182248274608, "loss": 1.3918, "step": 23474 }, { "epoch": 0.30504691842512627, "grad_norm": 0.34923332929611206, "learning_rate": 0.0001390192230208347, "loss": 1.2754, "step": 23475 }, { "epoch": 0.3050599129690421, "grad_norm": 0.34490811824798584, "learning_rate": 0.0001390166235589233, "loss": 1.288, "step": 23476 }, { "epoch": 0.305072907512958, "grad_norm": 0.4352579116821289, "learning_rate": 0.00013901402409701192, "loss": 1.4201, "step": 23477 }, { "epoch": 0.30508590205687386, "grad_norm": 0.457725465297699, "learning_rate": 0.00013901142463510055, "loss": 1.4294, "step": 23478 }, { "epoch": 0.30509889660078976, "grad_norm": 0.4381331503391266, "learning_rate": 0.00013900882517318917, "loss": 1.3799, "step": 23479 }, { "epoch": 0.3051118911447056, "grad_norm": 0.4067361652851105, "learning_rate": 0.00013900622571127777, "loss": 1.3466, "step": 23480 }, { "epoch": 0.3051248856886215, "grad_norm": 0.41730648279190063, "learning_rate": 0.0001390036262493664, "loss": 1.6048, "step": 23481 }, { "epoch": 0.30513788023253735, "grad_norm": 0.38951125741004944, "learning_rate": 0.00013900102678745502, "loss": 1.2188, "step": 23482 }, { "epoch": 0.30515087477645325, "grad_norm": 0.4512186050415039, "learning_rate": 0.00013899842732554362, "loss": 1.3749, "step": 23483 }, { "epoch": 0.3051638693203691, "grad_norm": 0.3760858178138733, "learning_rate": 0.00013899582786363224, "loss": 1.3459, "step": 23484 }, { "epoch": 0.305176863864285, "grad_norm": 0.3248966634273529, "learning_rate": 0.00013899322840172084, "loss": 1.3515, "step": 23485 }, { "epoch": 0.30518985840820084, "grad_norm": 0.295704185962677, "learning_rate": 0.00013899062893980946, "loss": 1.2406, "step": 23486 }, { "epoch": 0.30520285295211674, "grad_norm": 0.4719599485397339, "learning_rate": 0.0001389880294778981, "loss": 1.5597, "step": 23487 }, { "epoch": 0.3052158474960326, "grad_norm": 0.4941157400608063, "learning_rate": 0.00013898543001598668, "loss": 1.5769, "step": 23488 }, { "epoch": 0.3052288420399485, "grad_norm": 0.32661327719688416, "learning_rate": 0.0001389828305540753, "loss": 1.3615, "step": 23489 }, { "epoch": 0.30524183658386433, "grad_norm": 0.5071525573730469, "learning_rate": 0.00013898023109216393, "loss": 1.3819, "step": 23490 }, { "epoch": 0.30525483112778024, "grad_norm": 0.43125683069229126, "learning_rate": 0.00013897763163025256, "loss": 1.4411, "step": 23491 }, { "epoch": 0.3052678256716961, "grad_norm": 0.3250223696231842, "learning_rate": 0.00013897503216834116, "loss": 1.3657, "step": 23492 }, { "epoch": 0.305280820215612, "grad_norm": 0.3734956979751587, "learning_rate": 0.00013897243270642978, "loss": 1.362, "step": 23493 }, { "epoch": 0.3052938147595278, "grad_norm": 0.38495510816574097, "learning_rate": 0.0001389698332445184, "loss": 1.4573, "step": 23494 }, { "epoch": 0.3053068093034437, "grad_norm": 0.30793297290802, "learning_rate": 0.000138967233782607, "loss": 1.3172, "step": 23495 }, { "epoch": 0.30531980384735957, "grad_norm": 0.49670201539993286, "learning_rate": 0.00013896463432069563, "loss": 1.3595, "step": 23496 }, { "epoch": 0.3053327983912755, "grad_norm": 0.21125417947769165, "learning_rate": 0.00013896203485878422, "loss": 1.254, "step": 23497 }, { "epoch": 0.3053457929351913, "grad_norm": 0.38317611813545227, "learning_rate": 0.00013895943539687285, "loss": 1.3221, "step": 23498 }, { "epoch": 0.3053587874791072, "grad_norm": 0.4652711749076843, "learning_rate": 0.00013895683593496147, "loss": 1.3433, "step": 23499 }, { "epoch": 0.30537178202302306, "grad_norm": 0.4497043490409851, "learning_rate": 0.00013895423647305007, "loss": 1.546, "step": 23500 }, { "epoch": 0.30538477656693896, "grad_norm": 0.5556791424751282, "learning_rate": 0.0001389516370111387, "loss": 1.2698, "step": 23501 }, { "epoch": 0.3053977711108548, "grad_norm": 0.3903411626815796, "learning_rate": 0.00013894903754922732, "loss": 1.3535, "step": 23502 }, { "epoch": 0.3054107656547707, "grad_norm": 0.4146912097930908, "learning_rate": 0.00013894643808731594, "loss": 1.48, "step": 23503 }, { "epoch": 0.30542376019868656, "grad_norm": 0.34024927020072937, "learning_rate": 0.00013894383862540454, "loss": 1.0548, "step": 23504 }, { "epoch": 0.30543675474260246, "grad_norm": 0.33760544657707214, "learning_rate": 0.00013894123916349316, "loss": 1.2827, "step": 23505 }, { "epoch": 0.3054497492865183, "grad_norm": 0.4486847519874573, "learning_rate": 0.0001389386397015818, "loss": 1.2843, "step": 23506 }, { "epoch": 0.3054627438304342, "grad_norm": 0.4389313757419586, "learning_rate": 0.0001389360402396704, "loss": 1.3684, "step": 23507 }, { "epoch": 0.30547573837435005, "grad_norm": 0.42779406905174255, "learning_rate": 0.000138933440777759, "loss": 1.3537, "step": 23508 }, { "epoch": 0.30548873291826595, "grad_norm": 0.28546014428138733, "learning_rate": 0.0001389308413158476, "loss": 1.506, "step": 23509 }, { "epoch": 0.3055017274621818, "grad_norm": 0.3954133987426758, "learning_rate": 0.00013892824185393626, "loss": 1.404, "step": 23510 }, { "epoch": 0.3055147220060977, "grad_norm": 0.36492669582366943, "learning_rate": 0.00013892564239202486, "loss": 1.2677, "step": 23511 }, { "epoch": 0.30552771655001354, "grad_norm": 0.3887994587421417, "learning_rate": 0.00013892304293011346, "loss": 1.2982, "step": 23512 }, { "epoch": 0.30554071109392944, "grad_norm": 0.39080923795700073, "learning_rate": 0.0001389204434682021, "loss": 1.4723, "step": 23513 }, { "epoch": 0.3055537056378453, "grad_norm": 0.430855393409729, "learning_rate": 0.0001389178440062907, "loss": 1.5383, "step": 23514 }, { "epoch": 0.3055667001817612, "grad_norm": 0.39280420541763306, "learning_rate": 0.00013891524454437933, "loss": 1.4976, "step": 23515 }, { "epoch": 0.30557969472567703, "grad_norm": 0.40288299322128296, "learning_rate": 0.00013891264508246793, "loss": 1.3175, "step": 23516 }, { "epoch": 0.30559268926959293, "grad_norm": 0.46204444766044617, "learning_rate": 0.00013891004562055655, "loss": 1.4789, "step": 23517 }, { "epoch": 0.3056056838135088, "grad_norm": 0.38068604469299316, "learning_rate": 0.00013890744615864517, "loss": 1.5661, "step": 23518 }, { "epoch": 0.3056186783574247, "grad_norm": 0.4827874004840851, "learning_rate": 0.00013890484669673377, "loss": 1.4093, "step": 23519 }, { "epoch": 0.3056316729013405, "grad_norm": 0.31266921758651733, "learning_rate": 0.0001389022472348224, "loss": 1.3273, "step": 23520 }, { "epoch": 0.3056446674452564, "grad_norm": 0.29505765438079834, "learning_rate": 0.00013889964777291102, "loss": 1.3602, "step": 23521 }, { "epoch": 0.30565766198917227, "grad_norm": 0.2704898416996002, "learning_rate": 0.00013889704831099965, "loss": 1.3186, "step": 23522 }, { "epoch": 0.30567065653308817, "grad_norm": 0.4260876178741455, "learning_rate": 0.00013889444884908824, "loss": 1.4234, "step": 23523 }, { "epoch": 0.30568365107700407, "grad_norm": 0.39754945039749146, "learning_rate": 0.00013889184938717684, "loss": 1.5558, "step": 23524 }, { "epoch": 0.3056966456209199, "grad_norm": 0.3910837173461914, "learning_rate": 0.0001388892499252655, "loss": 1.3731, "step": 23525 }, { "epoch": 0.3057096401648358, "grad_norm": 0.4193885326385498, "learning_rate": 0.0001388866504633541, "loss": 1.4238, "step": 23526 }, { "epoch": 0.30572263470875166, "grad_norm": 0.3782515227794647, "learning_rate": 0.00013888405100144271, "loss": 1.4786, "step": 23527 }, { "epoch": 0.30573562925266756, "grad_norm": 0.3675345778465271, "learning_rate": 0.0001388814515395313, "loss": 1.3544, "step": 23528 }, { "epoch": 0.3057486237965834, "grad_norm": 0.34074875712394714, "learning_rate": 0.00013887885207761994, "loss": 1.4319, "step": 23529 }, { "epoch": 0.3057616183404993, "grad_norm": 0.38775891065597534, "learning_rate": 0.00013887625261570856, "loss": 1.3225, "step": 23530 }, { "epoch": 0.30577461288441515, "grad_norm": 0.41392791271209717, "learning_rate": 0.00013887365315379716, "loss": 1.2939, "step": 23531 }, { "epoch": 0.30578760742833105, "grad_norm": 0.34924671053886414, "learning_rate": 0.00013887105369188578, "loss": 1.3975, "step": 23532 }, { "epoch": 0.3058006019722469, "grad_norm": 0.47477591037750244, "learning_rate": 0.0001388684542299744, "loss": 1.5919, "step": 23533 }, { "epoch": 0.3058135965161628, "grad_norm": 0.3081744313240051, "learning_rate": 0.00013886585476806303, "loss": 1.2359, "step": 23534 }, { "epoch": 0.30582659106007865, "grad_norm": 0.3801209628582001, "learning_rate": 0.00013886325530615163, "loss": 1.5442, "step": 23535 }, { "epoch": 0.30583958560399455, "grad_norm": 0.40903663635253906, "learning_rate": 0.00013886065584424025, "loss": 1.4123, "step": 23536 }, { "epoch": 0.3058525801479104, "grad_norm": 0.4087826609611511, "learning_rate": 0.00013885805638232888, "loss": 1.5494, "step": 23537 }, { "epoch": 0.3058655746918263, "grad_norm": 0.368113249540329, "learning_rate": 0.00013885545692041747, "loss": 1.3457, "step": 23538 }, { "epoch": 0.30587856923574214, "grad_norm": 0.39751601219177246, "learning_rate": 0.0001388528574585061, "loss": 1.1442, "step": 23539 }, { "epoch": 0.30589156377965804, "grad_norm": 0.5110365748405457, "learning_rate": 0.0001388502579965947, "loss": 1.5872, "step": 23540 }, { "epoch": 0.3059045583235739, "grad_norm": 0.29193973541259766, "learning_rate": 0.00013884765853468332, "loss": 1.4078, "step": 23541 }, { "epoch": 0.3059175528674898, "grad_norm": 0.3715648353099823, "learning_rate": 0.00013884505907277195, "loss": 1.5586, "step": 23542 }, { "epoch": 0.30593054741140563, "grad_norm": 0.3424482047557831, "learning_rate": 0.00013884245961086054, "loss": 1.3866, "step": 23543 }, { "epoch": 0.30594354195532153, "grad_norm": 0.3644959330558777, "learning_rate": 0.00013883986014894917, "loss": 1.3772, "step": 23544 }, { "epoch": 0.3059565364992374, "grad_norm": 0.5370931625366211, "learning_rate": 0.0001388372606870378, "loss": 1.4216, "step": 23545 }, { "epoch": 0.3059695310431533, "grad_norm": 0.33715203404426575, "learning_rate": 0.00013883466122512642, "loss": 1.3976, "step": 23546 }, { "epoch": 0.3059825255870691, "grad_norm": 0.5049725770950317, "learning_rate": 0.00013883206176321501, "loss": 1.4355, "step": 23547 }, { "epoch": 0.305995520130985, "grad_norm": 0.41145607829093933, "learning_rate": 0.00013882946230130364, "loss": 1.4619, "step": 23548 }, { "epoch": 0.30600851467490087, "grad_norm": 0.4065941572189331, "learning_rate": 0.00013882686283939226, "loss": 1.5665, "step": 23549 }, { "epoch": 0.30602150921881677, "grad_norm": 0.4424576461315155, "learning_rate": 0.00013882426337748086, "loss": 1.5069, "step": 23550 }, { "epoch": 0.3060345037627326, "grad_norm": 0.473283976316452, "learning_rate": 0.00013882166391556948, "loss": 1.4298, "step": 23551 }, { "epoch": 0.3060474983066485, "grad_norm": 0.41806769371032715, "learning_rate": 0.0001388190644536581, "loss": 1.4097, "step": 23552 }, { "epoch": 0.30606049285056436, "grad_norm": 0.3266026973724365, "learning_rate": 0.0001388164649917467, "loss": 1.1847, "step": 23553 }, { "epoch": 0.30607348739448026, "grad_norm": 0.3820030689239502, "learning_rate": 0.00013881386552983533, "loss": 1.3584, "step": 23554 }, { "epoch": 0.3060864819383961, "grad_norm": 0.3995163142681122, "learning_rate": 0.00013881126606792393, "loss": 1.535, "step": 23555 }, { "epoch": 0.306099476482312, "grad_norm": 0.4605192244052887, "learning_rate": 0.00013880866660601258, "loss": 1.5605, "step": 23556 }, { "epoch": 0.30611247102622785, "grad_norm": 0.3773915469646454, "learning_rate": 0.00013880606714410118, "loss": 1.3675, "step": 23557 }, { "epoch": 0.30612546557014375, "grad_norm": 0.41772910952568054, "learning_rate": 0.0001388034676821898, "loss": 1.3495, "step": 23558 }, { "epoch": 0.3061384601140596, "grad_norm": 0.3977172076702118, "learning_rate": 0.0001388008682202784, "loss": 1.5292, "step": 23559 }, { "epoch": 0.3061514546579755, "grad_norm": 0.35015615820884705, "learning_rate": 0.00013879826875836702, "loss": 1.3182, "step": 23560 }, { "epoch": 0.30616444920189134, "grad_norm": 0.3995659053325653, "learning_rate": 0.00013879566929645565, "loss": 1.5231, "step": 23561 }, { "epoch": 0.30617744374580724, "grad_norm": 0.5172699689865112, "learning_rate": 0.00013879306983454425, "loss": 1.5096, "step": 23562 }, { "epoch": 0.3061904382897231, "grad_norm": 0.32431915402412415, "learning_rate": 0.00013879047037263287, "loss": 1.421, "step": 23563 }, { "epoch": 0.306203432833639, "grad_norm": 0.3113725483417511, "learning_rate": 0.0001387878709107215, "loss": 1.3885, "step": 23564 }, { "epoch": 0.30621642737755483, "grad_norm": 0.37816914916038513, "learning_rate": 0.00013878527144881012, "loss": 1.2267, "step": 23565 }, { "epoch": 0.30622942192147073, "grad_norm": 0.4050650894641876, "learning_rate": 0.00013878267198689872, "loss": 1.424, "step": 23566 }, { "epoch": 0.3062424164653866, "grad_norm": 0.4346819519996643, "learning_rate": 0.00013878007252498731, "loss": 1.3197, "step": 23567 }, { "epoch": 0.3062554110093025, "grad_norm": 0.3116249144077301, "learning_rate": 0.00013877747306307597, "loss": 1.4365, "step": 23568 }, { "epoch": 0.3062684055532183, "grad_norm": 0.4409507215023041, "learning_rate": 0.00013877487360116456, "loss": 1.3679, "step": 23569 }, { "epoch": 0.3062814000971342, "grad_norm": 0.3509639501571655, "learning_rate": 0.0001387722741392532, "loss": 1.2883, "step": 23570 }, { "epoch": 0.30629439464105007, "grad_norm": 0.3960501253604889, "learning_rate": 0.00013876967467734178, "loss": 1.5578, "step": 23571 }, { "epoch": 0.306307389184966, "grad_norm": 0.4010438919067383, "learning_rate": 0.0001387670752154304, "loss": 1.3401, "step": 23572 }, { "epoch": 0.3063203837288818, "grad_norm": 0.3445011079311371, "learning_rate": 0.00013876447575351903, "loss": 1.4625, "step": 23573 }, { "epoch": 0.3063333782727977, "grad_norm": 0.4155280292034149, "learning_rate": 0.00013876187629160763, "loss": 1.4966, "step": 23574 }, { "epoch": 0.30634637281671356, "grad_norm": 0.31351906061172485, "learning_rate": 0.00013875927682969626, "loss": 1.3953, "step": 23575 }, { "epoch": 0.30635936736062946, "grad_norm": 0.3552655577659607, "learning_rate": 0.00013875667736778488, "loss": 1.2937, "step": 23576 }, { "epoch": 0.3063723619045453, "grad_norm": 0.4302465617656708, "learning_rate": 0.0001387540779058735, "loss": 1.34, "step": 23577 }, { "epoch": 0.3063853564484612, "grad_norm": 0.27516257762908936, "learning_rate": 0.0001387514784439621, "loss": 1.3098, "step": 23578 }, { "epoch": 0.30639835099237706, "grad_norm": 0.4331068992614746, "learning_rate": 0.0001387488789820507, "loss": 1.4499, "step": 23579 }, { "epoch": 0.30641134553629296, "grad_norm": 0.4370339810848236, "learning_rate": 0.00013874627952013935, "loss": 1.3013, "step": 23580 }, { "epoch": 0.3064243400802088, "grad_norm": 0.3664869964122772, "learning_rate": 0.00013874368005822795, "loss": 1.3151, "step": 23581 }, { "epoch": 0.3064373346241247, "grad_norm": 0.44402244687080383, "learning_rate": 0.00013874108059631657, "loss": 1.4454, "step": 23582 }, { "epoch": 0.30645032916804055, "grad_norm": 0.4439980089664459, "learning_rate": 0.00013873848113440517, "loss": 1.2174, "step": 23583 }, { "epoch": 0.30646332371195645, "grad_norm": 0.37649279832839966, "learning_rate": 0.0001387358816724938, "loss": 1.5126, "step": 23584 }, { "epoch": 0.3064763182558723, "grad_norm": 0.4243823289871216, "learning_rate": 0.00013873328221058242, "loss": 1.4926, "step": 23585 }, { "epoch": 0.3064893127997882, "grad_norm": 0.35294297337532043, "learning_rate": 0.00013873068274867102, "loss": 1.2073, "step": 23586 }, { "epoch": 0.30650230734370404, "grad_norm": 0.3987620770931244, "learning_rate": 0.00013872808328675967, "loss": 1.4063, "step": 23587 }, { "epoch": 0.30651530188761994, "grad_norm": 0.4728260636329651, "learning_rate": 0.00013872548382484827, "loss": 1.5492, "step": 23588 }, { "epoch": 0.3065282964315358, "grad_norm": 0.4484679102897644, "learning_rate": 0.0001387228843629369, "loss": 1.4972, "step": 23589 }, { "epoch": 0.3065412909754517, "grad_norm": 0.3717074990272522, "learning_rate": 0.0001387202849010255, "loss": 1.3849, "step": 23590 }, { "epoch": 0.30655428551936753, "grad_norm": 0.40513846278190613, "learning_rate": 0.0001387176854391141, "loss": 1.3796, "step": 23591 }, { "epoch": 0.30656728006328343, "grad_norm": 0.42069143056869507, "learning_rate": 0.00013871508597720274, "loss": 1.2732, "step": 23592 }, { "epoch": 0.3065802746071993, "grad_norm": 0.45256495475769043, "learning_rate": 0.00013871248651529133, "loss": 1.4469, "step": 23593 }, { "epoch": 0.3065932691511152, "grad_norm": 0.31257379055023193, "learning_rate": 0.00013870988705337996, "loss": 1.6586, "step": 23594 }, { "epoch": 0.306606263695031, "grad_norm": 0.39899003505706787, "learning_rate": 0.00013870728759146858, "loss": 1.3932, "step": 23595 }, { "epoch": 0.3066192582389469, "grad_norm": 0.3584810197353363, "learning_rate": 0.00013870468812955718, "loss": 1.334, "step": 23596 }, { "epoch": 0.30663225278286277, "grad_norm": 0.510430634021759, "learning_rate": 0.0001387020886676458, "loss": 1.6248, "step": 23597 }, { "epoch": 0.30664524732677867, "grad_norm": 0.46201857924461365, "learning_rate": 0.0001386994892057344, "loss": 1.4542, "step": 23598 }, { "epoch": 0.3066582418706945, "grad_norm": 0.4728403389453888, "learning_rate": 0.00013869688974382305, "loss": 1.5001, "step": 23599 }, { "epoch": 0.3066712364146104, "grad_norm": 0.32483214139938354, "learning_rate": 0.00013869429028191165, "loss": 1.3561, "step": 23600 }, { "epoch": 0.3066842309585263, "grad_norm": 0.35493841767311096, "learning_rate": 0.00013869169082000028, "loss": 1.1853, "step": 23601 }, { "epoch": 0.30669722550244216, "grad_norm": 0.410774827003479, "learning_rate": 0.00013868909135808887, "loss": 1.5055, "step": 23602 }, { "epoch": 0.30671022004635806, "grad_norm": 0.30181410908699036, "learning_rate": 0.0001386864918961775, "loss": 1.2662, "step": 23603 }, { "epoch": 0.3067232145902739, "grad_norm": 0.4347074627876282, "learning_rate": 0.00013868389243426612, "loss": 1.4768, "step": 23604 }, { "epoch": 0.3067362091341898, "grad_norm": 0.41375666856765747, "learning_rate": 0.00013868129297235472, "loss": 1.3018, "step": 23605 }, { "epoch": 0.30674920367810565, "grad_norm": 0.41536417603492737, "learning_rate": 0.00013867869351044334, "loss": 1.4455, "step": 23606 }, { "epoch": 0.30676219822202155, "grad_norm": 0.45644810795783997, "learning_rate": 0.00013867609404853197, "loss": 1.3005, "step": 23607 }, { "epoch": 0.3067751927659374, "grad_norm": 0.4884204566478729, "learning_rate": 0.00013867349458662057, "loss": 1.4056, "step": 23608 }, { "epoch": 0.3067881873098533, "grad_norm": 0.33736222982406616, "learning_rate": 0.0001386708951247092, "loss": 1.3616, "step": 23609 }, { "epoch": 0.30680118185376914, "grad_norm": 0.43201127648353577, "learning_rate": 0.0001386682956627978, "loss": 1.5119, "step": 23610 }, { "epoch": 0.30681417639768505, "grad_norm": 0.5068404674530029, "learning_rate": 0.00013866569620088644, "loss": 1.3493, "step": 23611 }, { "epoch": 0.3068271709416009, "grad_norm": 0.3931178152561188, "learning_rate": 0.00013866309673897504, "loss": 1.3852, "step": 23612 }, { "epoch": 0.3068401654855168, "grad_norm": 0.4263322353363037, "learning_rate": 0.00013866049727706366, "loss": 1.3801, "step": 23613 }, { "epoch": 0.30685316002943264, "grad_norm": 0.2817239463329315, "learning_rate": 0.00013865789781515226, "loss": 1.2746, "step": 23614 }, { "epoch": 0.30686615457334854, "grad_norm": 0.40504273772239685, "learning_rate": 0.00013865529835324088, "loss": 1.5184, "step": 23615 }, { "epoch": 0.3068791491172644, "grad_norm": 0.4184659421443939, "learning_rate": 0.0001386526988913295, "loss": 1.5096, "step": 23616 }, { "epoch": 0.3068921436611803, "grad_norm": 0.4181513786315918, "learning_rate": 0.0001386500994294181, "loss": 1.3144, "step": 23617 }, { "epoch": 0.30690513820509613, "grad_norm": 0.2820641100406647, "learning_rate": 0.00013864749996750673, "loss": 1.3355, "step": 23618 }, { "epoch": 0.30691813274901203, "grad_norm": 0.5276575088500977, "learning_rate": 0.00013864490050559535, "loss": 1.524, "step": 23619 }, { "epoch": 0.3069311272929279, "grad_norm": 0.4829508364200592, "learning_rate": 0.00013864230104368395, "loss": 1.5497, "step": 23620 }, { "epoch": 0.3069441218368438, "grad_norm": 0.3474687933921814, "learning_rate": 0.00013863970158177258, "loss": 1.4189, "step": 23621 }, { "epoch": 0.3069571163807596, "grad_norm": 0.40267297625541687, "learning_rate": 0.0001386371021198612, "loss": 1.2774, "step": 23622 }, { "epoch": 0.3069701109246755, "grad_norm": 0.4492112696170807, "learning_rate": 0.00013863450265794982, "loss": 1.5127, "step": 23623 }, { "epoch": 0.30698310546859137, "grad_norm": 0.4088647663593292, "learning_rate": 0.00013863190319603842, "loss": 1.3064, "step": 23624 }, { "epoch": 0.30699610001250727, "grad_norm": 0.4043883681297302, "learning_rate": 0.00013862930373412705, "loss": 1.4856, "step": 23625 }, { "epoch": 0.3070090945564231, "grad_norm": 0.5127471685409546, "learning_rate": 0.00013862670427221567, "loss": 1.5484, "step": 23626 }, { "epoch": 0.307022089100339, "grad_norm": 0.48295533657073975, "learning_rate": 0.00013862410481030427, "loss": 1.4358, "step": 23627 }, { "epoch": 0.30703508364425486, "grad_norm": 0.3892424404621124, "learning_rate": 0.0001386215053483929, "loss": 1.3772, "step": 23628 }, { "epoch": 0.30704807818817076, "grad_norm": 0.3554915487766266, "learning_rate": 0.0001386189058864815, "loss": 1.4281, "step": 23629 }, { "epoch": 0.3070610727320866, "grad_norm": 0.3611208200454712, "learning_rate": 0.00013861630642457014, "loss": 1.5503, "step": 23630 }, { "epoch": 0.3070740672760025, "grad_norm": 0.3918308913707733, "learning_rate": 0.00013861370696265874, "loss": 1.4471, "step": 23631 }, { "epoch": 0.30708706181991835, "grad_norm": 0.33947885036468506, "learning_rate": 0.00013861110750074736, "loss": 1.3327, "step": 23632 }, { "epoch": 0.30710005636383425, "grad_norm": 0.40992066264152527, "learning_rate": 0.00013860850803883596, "loss": 1.3111, "step": 23633 }, { "epoch": 0.3071130509077501, "grad_norm": 0.4609469771385193, "learning_rate": 0.00013860590857692459, "loss": 1.7449, "step": 23634 }, { "epoch": 0.307126045451666, "grad_norm": 0.3659321665763855, "learning_rate": 0.0001386033091150132, "loss": 1.2248, "step": 23635 }, { "epoch": 0.30713903999558184, "grad_norm": 0.25856778025627136, "learning_rate": 0.0001386007096531018, "loss": 1.1068, "step": 23636 }, { "epoch": 0.30715203453949774, "grad_norm": 0.49618345499038696, "learning_rate": 0.00013859811019119043, "loss": 1.5069, "step": 23637 }, { "epoch": 0.3071650290834136, "grad_norm": 0.4758076071739197, "learning_rate": 0.00013859551072927906, "loss": 1.5385, "step": 23638 }, { "epoch": 0.3071780236273295, "grad_norm": 0.3862117826938629, "learning_rate": 0.00013859291126736765, "loss": 1.3563, "step": 23639 }, { "epoch": 0.30719101817124533, "grad_norm": 0.40093135833740234, "learning_rate": 0.00013859031180545628, "loss": 1.3412, "step": 23640 }, { "epoch": 0.30720401271516123, "grad_norm": 0.4450172185897827, "learning_rate": 0.00013858771234354488, "loss": 1.47, "step": 23641 }, { "epoch": 0.3072170072590771, "grad_norm": 0.4352661371231079, "learning_rate": 0.00013858511288163353, "loss": 1.4592, "step": 23642 }, { "epoch": 0.307230001802993, "grad_norm": 0.3355281352996826, "learning_rate": 0.00013858251341972212, "loss": 1.3442, "step": 23643 }, { "epoch": 0.3072429963469088, "grad_norm": 0.36961600184440613, "learning_rate": 0.00013857991395781075, "loss": 1.3783, "step": 23644 }, { "epoch": 0.3072559908908247, "grad_norm": 0.43317127227783203, "learning_rate": 0.00013857731449589935, "loss": 1.4284, "step": 23645 }, { "epoch": 0.30726898543474057, "grad_norm": 0.5049710273742676, "learning_rate": 0.00013857471503398797, "loss": 1.4564, "step": 23646 }, { "epoch": 0.30728197997865647, "grad_norm": 0.4409331977367401, "learning_rate": 0.0001385721155720766, "loss": 1.4723, "step": 23647 }, { "epoch": 0.3072949745225723, "grad_norm": 0.33348605036735535, "learning_rate": 0.0001385695161101652, "loss": 1.3064, "step": 23648 }, { "epoch": 0.3073079690664882, "grad_norm": 0.4932759702205658, "learning_rate": 0.00013856691664825382, "loss": 1.5257, "step": 23649 }, { "epoch": 0.30732096361040406, "grad_norm": 0.5131253600120544, "learning_rate": 0.00013856431718634244, "loss": 1.5473, "step": 23650 }, { "epoch": 0.30733395815431996, "grad_norm": 0.3839573264122009, "learning_rate": 0.00013856171772443104, "loss": 1.2628, "step": 23651 }, { "epoch": 0.3073469526982358, "grad_norm": 0.36517488956451416, "learning_rate": 0.00013855911826251966, "loss": 1.5401, "step": 23652 }, { "epoch": 0.3073599472421517, "grad_norm": 0.45546606183052063, "learning_rate": 0.00013855651880060826, "loss": 1.3099, "step": 23653 }, { "epoch": 0.30737294178606756, "grad_norm": 0.48339593410491943, "learning_rate": 0.0001385539193386969, "loss": 1.3436, "step": 23654 }, { "epoch": 0.30738593632998346, "grad_norm": 0.27648887038230896, "learning_rate": 0.0001385513198767855, "loss": 1.0945, "step": 23655 }, { "epoch": 0.3073989308738993, "grad_norm": 0.42309150099754333, "learning_rate": 0.00013854872041487413, "loss": 1.2807, "step": 23656 }, { "epoch": 0.3074119254178152, "grad_norm": 0.3902518153190613, "learning_rate": 0.00013854612095296273, "loss": 1.3453, "step": 23657 }, { "epoch": 0.30742491996173105, "grad_norm": 0.3287469446659088, "learning_rate": 0.00013854352149105136, "loss": 1.2096, "step": 23658 }, { "epoch": 0.30743791450564695, "grad_norm": 0.4263773560523987, "learning_rate": 0.00013854092202913998, "loss": 1.4426, "step": 23659 }, { "epoch": 0.3074509090495628, "grad_norm": 0.4225476086139679, "learning_rate": 0.00013853832256722858, "loss": 1.5974, "step": 23660 }, { "epoch": 0.3074639035934787, "grad_norm": 0.4083250164985657, "learning_rate": 0.00013853572310531723, "loss": 1.3769, "step": 23661 }, { "epoch": 0.30747689813739454, "grad_norm": 0.427945077419281, "learning_rate": 0.00013853312364340583, "loss": 1.6552, "step": 23662 }, { "epoch": 0.30748989268131044, "grad_norm": 0.461101233959198, "learning_rate": 0.00013853052418149442, "loss": 1.2396, "step": 23663 }, { "epoch": 0.3075028872252263, "grad_norm": 0.32514646649360657, "learning_rate": 0.00013852792471958305, "loss": 1.2542, "step": 23664 }, { "epoch": 0.3075158817691422, "grad_norm": 0.46653270721435547, "learning_rate": 0.00013852532525767167, "loss": 1.5008, "step": 23665 }, { "epoch": 0.30752887631305803, "grad_norm": 0.4038919508457184, "learning_rate": 0.0001385227257957603, "loss": 1.405, "step": 23666 }, { "epoch": 0.30754187085697393, "grad_norm": 0.2607548236846924, "learning_rate": 0.0001385201263338489, "loss": 1.2664, "step": 23667 }, { "epoch": 0.3075548654008898, "grad_norm": 0.3488636612892151, "learning_rate": 0.00013851752687193752, "loss": 1.4976, "step": 23668 }, { "epoch": 0.3075678599448057, "grad_norm": 0.4039755165576935, "learning_rate": 0.00013851492741002614, "loss": 1.4058, "step": 23669 }, { "epoch": 0.3075808544887215, "grad_norm": 0.4989168047904968, "learning_rate": 0.00013851232794811474, "loss": 1.4851, "step": 23670 }, { "epoch": 0.3075938490326374, "grad_norm": 0.39854735136032104, "learning_rate": 0.00013850972848620337, "loss": 1.4742, "step": 23671 }, { "epoch": 0.30760684357655327, "grad_norm": 0.41402918100357056, "learning_rate": 0.00013850712902429196, "loss": 1.3461, "step": 23672 }, { "epoch": 0.30761983812046917, "grad_norm": 0.3732379078865051, "learning_rate": 0.00013850452956238061, "loss": 1.3689, "step": 23673 }, { "epoch": 0.307632832664385, "grad_norm": 0.35589849948883057, "learning_rate": 0.0001385019301004692, "loss": 1.4185, "step": 23674 }, { "epoch": 0.3076458272083009, "grad_norm": 0.37869277596473694, "learning_rate": 0.0001384993306385578, "loss": 1.5981, "step": 23675 }, { "epoch": 0.3076588217522168, "grad_norm": 0.486847847700119, "learning_rate": 0.00013849673117664643, "loss": 1.5504, "step": 23676 }, { "epoch": 0.30767181629613266, "grad_norm": 0.43533119559288025, "learning_rate": 0.00013849413171473506, "loss": 1.2873, "step": 23677 }, { "epoch": 0.30768481084004856, "grad_norm": 0.4138009250164032, "learning_rate": 0.00013849153225282368, "loss": 1.4943, "step": 23678 }, { "epoch": 0.3076978053839644, "grad_norm": 0.42973774671554565, "learning_rate": 0.00013848893279091228, "loss": 1.4223, "step": 23679 }, { "epoch": 0.3077107999278803, "grad_norm": 0.4351615607738495, "learning_rate": 0.0001384863333290009, "loss": 1.307, "step": 23680 }, { "epoch": 0.30772379447179615, "grad_norm": 0.3348578214645386, "learning_rate": 0.00013848373386708953, "loss": 1.4563, "step": 23681 }, { "epoch": 0.30773678901571205, "grad_norm": 0.3403237462043762, "learning_rate": 0.00013848113440517813, "loss": 1.4801, "step": 23682 }, { "epoch": 0.3077497835596279, "grad_norm": 0.33509063720703125, "learning_rate": 0.00013847853494326675, "loss": 1.3741, "step": 23683 }, { "epoch": 0.3077627781035438, "grad_norm": 0.39930030703544617, "learning_rate": 0.00013847593548135535, "loss": 1.2608, "step": 23684 }, { "epoch": 0.30777577264745964, "grad_norm": 0.43121635913848877, "learning_rate": 0.000138473336019444, "loss": 1.4514, "step": 23685 }, { "epoch": 0.30778876719137555, "grad_norm": 0.4374832212924957, "learning_rate": 0.0001384707365575326, "loss": 1.4659, "step": 23686 }, { "epoch": 0.3078017617352914, "grad_norm": 0.4245457649230957, "learning_rate": 0.00013846813709562122, "loss": 1.378, "step": 23687 }, { "epoch": 0.3078147562792073, "grad_norm": 0.4714190363883972, "learning_rate": 0.00013846553763370982, "loss": 1.5325, "step": 23688 }, { "epoch": 0.30782775082312314, "grad_norm": 0.46281975507736206, "learning_rate": 0.00013846293817179844, "loss": 1.5707, "step": 23689 }, { "epoch": 0.30784074536703904, "grad_norm": 0.3224961459636688, "learning_rate": 0.00013846033870988707, "loss": 1.1255, "step": 23690 }, { "epoch": 0.3078537399109549, "grad_norm": 0.42874646186828613, "learning_rate": 0.00013845773924797567, "loss": 1.5171, "step": 23691 }, { "epoch": 0.3078667344548708, "grad_norm": 0.29915958642959595, "learning_rate": 0.0001384551397860643, "loss": 1.4222, "step": 23692 }, { "epoch": 0.30787972899878663, "grad_norm": 0.37287771701812744, "learning_rate": 0.00013845254032415291, "loss": 1.2152, "step": 23693 }, { "epoch": 0.30789272354270253, "grad_norm": 0.3369549810886383, "learning_rate": 0.0001384499408622415, "loss": 1.5369, "step": 23694 }, { "epoch": 0.3079057180866184, "grad_norm": 0.3251512050628662, "learning_rate": 0.00013844734140033014, "loss": 1.3568, "step": 23695 }, { "epoch": 0.3079187126305343, "grad_norm": 0.38703179359436035, "learning_rate": 0.00013844474193841876, "loss": 1.2748, "step": 23696 }, { "epoch": 0.3079317071744501, "grad_norm": 0.3561120927333832, "learning_rate": 0.00013844214247650739, "loss": 1.3144, "step": 23697 }, { "epoch": 0.307944701718366, "grad_norm": 0.338692307472229, "learning_rate": 0.00013843954301459598, "loss": 1.1319, "step": 23698 }, { "epoch": 0.30795769626228187, "grad_norm": 0.42351263761520386, "learning_rate": 0.0001384369435526846, "loss": 1.4552, "step": 23699 }, { "epoch": 0.30797069080619777, "grad_norm": 0.3722483217716217, "learning_rate": 0.00013843434409077323, "loss": 1.3144, "step": 23700 }, { "epoch": 0.3079836853501136, "grad_norm": 0.3416104316711426, "learning_rate": 0.00013843174462886183, "loss": 1.3443, "step": 23701 }, { "epoch": 0.3079966798940295, "grad_norm": 0.4270249605178833, "learning_rate": 0.00013842914516695045, "loss": 1.3215, "step": 23702 }, { "epoch": 0.30800967443794536, "grad_norm": 0.33300483226776123, "learning_rate": 0.00013842654570503905, "loss": 1.4587, "step": 23703 }, { "epoch": 0.30802266898186126, "grad_norm": 0.27737119793891907, "learning_rate": 0.00013842394624312768, "loss": 1.2077, "step": 23704 }, { "epoch": 0.3080356635257771, "grad_norm": 0.37972211837768555, "learning_rate": 0.0001384213467812163, "loss": 1.4058, "step": 23705 }, { "epoch": 0.308048658069693, "grad_norm": 0.394265741109848, "learning_rate": 0.0001384187473193049, "loss": 1.5386, "step": 23706 }, { "epoch": 0.30806165261360885, "grad_norm": 0.3523576855659485, "learning_rate": 0.00013841614785739352, "loss": 1.393, "step": 23707 }, { "epoch": 0.30807464715752475, "grad_norm": 0.5150784254074097, "learning_rate": 0.00013841354839548215, "loss": 1.6029, "step": 23708 }, { "epoch": 0.3080876417014406, "grad_norm": 0.5116024017333984, "learning_rate": 0.00013841094893357077, "loss": 1.4506, "step": 23709 }, { "epoch": 0.3081006362453565, "grad_norm": 0.31057223677635193, "learning_rate": 0.00013840834947165937, "loss": 1.2613, "step": 23710 }, { "epoch": 0.30811363078927234, "grad_norm": 0.4687395393848419, "learning_rate": 0.000138405750009748, "loss": 1.5061, "step": 23711 }, { "epoch": 0.30812662533318824, "grad_norm": 0.3337137997150421, "learning_rate": 0.00013840315054783662, "loss": 1.3149, "step": 23712 }, { "epoch": 0.3081396198771041, "grad_norm": 0.351261705160141, "learning_rate": 0.00013840055108592521, "loss": 1.5241, "step": 23713 }, { "epoch": 0.30815261442102, "grad_norm": 0.361054390668869, "learning_rate": 0.00013839795162401384, "loss": 1.3152, "step": 23714 }, { "epoch": 0.30816560896493583, "grad_norm": 0.34404996037483215, "learning_rate": 0.00013839535216210244, "loss": 1.4769, "step": 23715 }, { "epoch": 0.30817860350885173, "grad_norm": 0.3886276185512543, "learning_rate": 0.0001383927527001911, "loss": 1.3482, "step": 23716 }, { "epoch": 0.3081915980527676, "grad_norm": 0.3152378797531128, "learning_rate": 0.00013839015323827969, "loss": 1.374, "step": 23717 }, { "epoch": 0.3082045925966835, "grad_norm": 0.42092639207839966, "learning_rate": 0.00013838755377636828, "loss": 1.5369, "step": 23718 }, { "epoch": 0.3082175871405993, "grad_norm": 0.36865299940109253, "learning_rate": 0.0001383849543144569, "loss": 1.3525, "step": 23719 }, { "epoch": 0.3082305816845152, "grad_norm": 0.4288714826107025, "learning_rate": 0.00013838235485254553, "loss": 1.4217, "step": 23720 }, { "epoch": 0.30824357622843107, "grad_norm": 0.37806907296180725, "learning_rate": 0.00013837975539063416, "loss": 1.3303, "step": 23721 }, { "epoch": 0.30825657077234697, "grad_norm": 0.380581796169281, "learning_rate": 0.00013837715592872275, "loss": 1.2867, "step": 23722 }, { "epoch": 0.3082695653162628, "grad_norm": 0.342325896024704, "learning_rate": 0.00013837455646681138, "loss": 1.351, "step": 23723 }, { "epoch": 0.3082825598601787, "grad_norm": 0.36525553464889526, "learning_rate": 0.0001383719570049, "loss": 1.3957, "step": 23724 }, { "epoch": 0.30829555440409456, "grad_norm": 0.3850937783718109, "learning_rate": 0.0001383693575429886, "loss": 1.3506, "step": 23725 }, { "epoch": 0.30830854894801046, "grad_norm": 0.3460325300693512, "learning_rate": 0.00013836675808107722, "loss": 1.2405, "step": 23726 }, { "epoch": 0.3083215434919263, "grad_norm": 0.36706075072288513, "learning_rate": 0.00013836415861916582, "loss": 1.5763, "step": 23727 }, { "epoch": 0.3083345380358422, "grad_norm": 0.40368518233299255, "learning_rate": 0.00013836155915725447, "loss": 1.2892, "step": 23728 }, { "epoch": 0.30834753257975805, "grad_norm": 0.3619697093963623, "learning_rate": 0.00013835895969534307, "loss": 1.4129, "step": 23729 }, { "epoch": 0.30836052712367396, "grad_norm": 0.4654667377471924, "learning_rate": 0.00013835636023343167, "loss": 1.4071, "step": 23730 }, { "epoch": 0.3083735216675898, "grad_norm": 0.4690954387187958, "learning_rate": 0.0001383537607715203, "loss": 1.6251, "step": 23731 }, { "epoch": 0.3083865162115057, "grad_norm": 0.43373632431030273, "learning_rate": 0.00013835116130960892, "loss": 1.4548, "step": 23732 }, { "epoch": 0.30839951075542155, "grad_norm": 0.3968580961227417, "learning_rate": 0.00013834856184769754, "loss": 1.269, "step": 23733 }, { "epoch": 0.30841250529933745, "grad_norm": 0.404070645570755, "learning_rate": 0.00013834596238578614, "loss": 1.407, "step": 23734 }, { "epoch": 0.3084254998432533, "grad_norm": 0.3268396854400635, "learning_rate": 0.00013834336292387476, "loss": 1.4472, "step": 23735 }, { "epoch": 0.3084384943871692, "grad_norm": 0.46975499391555786, "learning_rate": 0.0001383407634619634, "loss": 1.3193, "step": 23736 }, { "epoch": 0.30845148893108504, "grad_norm": 0.37644270062446594, "learning_rate": 0.00013833816400005199, "loss": 1.5597, "step": 23737 }, { "epoch": 0.30846448347500094, "grad_norm": 0.37298479676246643, "learning_rate": 0.0001383355645381406, "loss": 1.5519, "step": 23738 }, { "epoch": 0.3084774780189168, "grad_norm": 0.40612509846687317, "learning_rate": 0.00013833296507622923, "loss": 1.504, "step": 23739 }, { "epoch": 0.3084904725628327, "grad_norm": 0.43061867356300354, "learning_rate": 0.00013833036561431786, "loss": 1.3877, "step": 23740 }, { "epoch": 0.30850346710674853, "grad_norm": 0.39031246304512024, "learning_rate": 0.00013832776615240646, "loss": 1.2832, "step": 23741 }, { "epoch": 0.30851646165066443, "grad_norm": 0.40483370423316956, "learning_rate": 0.00013832516669049508, "loss": 1.412, "step": 23742 }, { "epoch": 0.3085294561945803, "grad_norm": 0.5106333494186401, "learning_rate": 0.0001383225672285837, "loss": 1.5594, "step": 23743 }, { "epoch": 0.3085424507384962, "grad_norm": 0.39754360914230347, "learning_rate": 0.0001383199677666723, "loss": 1.4502, "step": 23744 }, { "epoch": 0.308555445282412, "grad_norm": 0.3896426558494568, "learning_rate": 0.00013831736830476093, "loss": 1.4657, "step": 23745 }, { "epoch": 0.3085684398263279, "grad_norm": 0.39586618542671204, "learning_rate": 0.00013831476884284952, "loss": 1.4429, "step": 23746 }, { "epoch": 0.30858143437024377, "grad_norm": 0.42053499817848206, "learning_rate": 0.00013831216938093815, "loss": 1.4572, "step": 23747 }, { "epoch": 0.30859442891415967, "grad_norm": 0.4519786834716797, "learning_rate": 0.00013830956991902677, "loss": 1.4165, "step": 23748 }, { "epoch": 0.3086074234580755, "grad_norm": 0.35225239396095276, "learning_rate": 0.00013830697045711537, "loss": 1.2124, "step": 23749 }, { "epoch": 0.3086204180019914, "grad_norm": 0.4474756419658661, "learning_rate": 0.000138304370995204, "loss": 1.4339, "step": 23750 }, { "epoch": 0.30863341254590726, "grad_norm": 0.4044516086578369, "learning_rate": 0.00013830177153329262, "loss": 1.3497, "step": 23751 }, { "epoch": 0.30864640708982316, "grad_norm": 0.42009204626083374, "learning_rate": 0.00013829917207138124, "loss": 1.5014, "step": 23752 }, { "epoch": 0.30865940163373906, "grad_norm": 0.3916882276535034, "learning_rate": 0.00013829657260946984, "loss": 1.3825, "step": 23753 }, { "epoch": 0.3086723961776549, "grad_norm": 0.48238059878349304, "learning_rate": 0.00013829397314755847, "loss": 1.5252, "step": 23754 }, { "epoch": 0.3086853907215708, "grad_norm": 0.4167851507663727, "learning_rate": 0.0001382913736856471, "loss": 1.3571, "step": 23755 }, { "epoch": 0.30869838526548665, "grad_norm": 0.3993912637233734, "learning_rate": 0.0001382887742237357, "loss": 1.4099, "step": 23756 }, { "epoch": 0.30871137980940255, "grad_norm": 0.3138660490512848, "learning_rate": 0.0001382861747618243, "loss": 1.3357, "step": 23757 }, { "epoch": 0.3087243743533184, "grad_norm": 0.40589895844459534, "learning_rate": 0.0001382835752999129, "loss": 1.6265, "step": 23758 }, { "epoch": 0.3087373688972343, "grad_norm": 0.4522473216056824, "learning_rate": 0.00013828097583800153, "loss": 1.3554, "step": 23759 }, { "epoch": 0.30875036344115014, "grad_norm": 0.4732970893383026, "learning_rate": 0.00013827837637609016, "loss": 1.4338, "step": 23760 }, { "epoch": 0.30876335798506604, "grad_norm": 0.42332056164741516, "learning_rate": 0.00013827577691417876, "loss": 1.3054, "step": 23761 }, { "epoch": 0.3087763525289819, "grad_norm": 0.39770859479904175, "learning_rate": 0.00013827317745226738, "loss": 1.5026, "step": 23762 }, { "epoch": 0.3087893470728978, "grad_norm": 0.3936413526535034, "learning_rate": 0.000138270577990356, "loss": 1.4661, "step": 23763 }, { "epoch": 0.30880234161681364, "grad_norm": 0.42304402589797974, "learning_rate": 0.00013826797852844463, "loss": 1.4781, "step": 23764 }, { "epoch": 0.30881533616072954, "grad_norm": 0.3309645652770996, "learning_rate": 0.00013826537906653323, "loss": 1.4208, "step": 23765 }, { "epoch": 0.3088283307046454, "grad_norm": 0.358285516500473, "learning_rate": 0.00013826277960462185, "loss": 1.2081, "step": 23766 }, { "epoch": 0.3088413252485613, "grad_norm": 0.4188380539417267, "learning_rate": 0.00013826018014271048, "loss": 1.3715, "step": 23767 }, { "epoch": 0.3088543197924771, "grad_norm": 0.31712132692337036, "learning_rate": 0.00013825758068079907, "loss": 1.507, "step": 23768 }, { "epoch": 0.30886731433639303, "grad_norm": 0.4045106768608093, "learning_rate": 0.0001382549812188877, "loss": 1.3644, "step": 23769 }, { "epoch": 0.3088803088803089, "grad_norm": 0.41257748007774353, "learning_rate": 0.00013825238175697632, "loss": 1.3753, "step": 23770 }, { "epoch": 0.3088933034242248, "grad_norm": 0.4561169743537903, "learning_rate": 0.00013824978229506495, "loss": 1.4705, "step": 23771 }, { "epoch": 0.3089062979681406, "grad_norm": 0.3939547836780548, "learning_rate": 0.00013824718283315354, "loss": 1.5, "step": 23772 }, { "epoch": 0.3089192925120565, "grad_norm": 0.36217886209487915, "learning_rate": 0.00013824458337124214, "loss": 1.2748, "step": 23773 }, { "epoch": 0.30893228705597237, "grad_norm": 0.41370636224746704, "learning_rate": 0.0001382419839093308, "loss": 1.3303, "step": 23774 }, { "epoch": 0.30894528159988827, "grad_norm": 0.42174336314201355, "learning_rate": 0.0001382393844474194, "loss": 1.4376, "step": 23775 }, { "epoch": 0.3089582761438041, "grad_norm": 0.40029215812683105, "learning_rate": 0.00013823678498550801, "loss": 1.419, "step": 23776 }, { "epoch": 0.30897127068772, "grad_norm": 0.38913801312446594, "learning_rate": 0.0001382341855235966, "loss": 1.3723, "step": 23777 }, { "epoch": 0.30898426523163586, "grad_norm": 0.41501766443252563, "learning_rate": 0.00013823158606168524, "loss": 1.3912, "step": 23778 }, { "epoch": 0.30899725977555176, "grad_norm": 0.38698235154151917, "learning_rate": 0.00013822898659977386, "loss": 1.1963, "step": 23779 }, { "epoch": 0.3090102543194676, "grad_norm": 0.35782167315483093, "learning_rate": 0.00013822638713786246, "loss": 1.1406, "step": 23780 }, { "epoch": 0.3090232488633835, "grad_norm": 0.3086980879306793, "learning_rate": 0.00013822378767595108, "loss": 1.4323, "step": 23781 }, { "epoch": 0.30903624340729935, "grad_norm": 0.370387464761734, "learning_rate": 0.0001382211882140397, "loss": 1.2246, "step": 23782 }, { "epoch": 0.30904923795121525, "grad_norm": 0.42195382714271545, "learning_rate": 0.00013821858875212833, "loss": 1.2092, "step": 23783 }, { "epoch": 0.3090622324951311, "grad_norm": 0.37231874465942383, "learning_rate": 0.00013821598929021693, "loss": 1.4821, "step": 23784 }, { "epoch": 0.309075227039047, "grad_norm": 0.36413538455963135, "learning_rate": 0.00013821338982830553, "loss": 1.4356, "step": 23785 }, { "epoch": 0.30908822158296284, "grad_norm": 0.4143666923046112, "learning_rate": 0.00013821079036639418, "loss": 1.439, "step": 23786 }, { "epoch": 0.30910121612687874, "grad_norm": 0.41055288910865784, "learning_rate": 0.00013820819090448278, "loss": 1.2347, "step": 23787 }, { "epoch": 0.3091142106707946, "grad_norm": 0.5114784836769104, "learning_rate": 0.0001382055914425714, "loss": 1.4228, "step": 23788 }, { "epoch": 0.3091272052147105, "grad_norm": 0.4596235454082489, "learning_rate": 0.00013820299198066, "loss": 1.4927, "step": 23789 }, { "epoch": 0.30914019975862633, "grad_norm": 0.3260226845741272, "learning_rate": 0.00013820039251874862, "loss": 1.1972, "step": 23790 }, { "epoch": 0.30915319430254223, "grad_norm": 0.51402348279953, "learning_rate": 0.00013819779305683725, "loss": 1.3992, "step": 23791 }, { "epoch": 0.3091661888464581, "grad_norm": 0.4389089047908783, "learning_rate": 0.00013819519359492584, "loss": 1.463, "step": 23792 }, { "epoch": 0.309179183390374, "grad_norm": 0.4586727023124695, "learning_rate": 0.00013819259413301447, "loss": 1.3317, "step": 23793 }, { "epoch": 0.3091921779342898, "grad_norm": 0.38384613394737244, "learning_rate": 0.0001381899946711031, "loss": 1.3695, "step": 23794 }, { "epoch": 0.3092051724782057, "grad_norm": 0.4415891170501709, "learning_rate": 0.00013818739520919172, "loss": 1.3687, "step": 23795 }, { "epoch": 0.30921816702212157, "grad_norm": 0.45097705721855164, "learning_rate": 0.00013818479574728031, "loss": 1.4676, "step": 23796 }, { "epoch": 0.30923116156603747, "grad_norm": 0.444897323846817, "learning_rate": 0.0001381821962853689, "loss": 1.4107, "step": 23797 }, { "epoch": 0.3092441561099533, "grad_norm": 0.4139679968357086, "learning_rate": 0.00013817959682345756, "loss": 1.415, "step": 23798 }, { "epoch": 0.3092571506538692, "grad_norm": 0.28131258487701416, "learning_rate": 0.00013817699736154616, "loss": 1.2654, "step": 23799 }, { "epoch": 0.30927014519778506, "grad_norm": 0.5153700709342957, "learning_rate": 0.00013817439789963479, "loss": 1.5621, "step": 23800 }, { "epoch": 0.30928313974170096, "grad_norm": 0.39196959137916565, "learning_rate": 0.00013817179843772338, "loss": 1.5736, "step": 23801 }, { "epoch": 0.3092961342856168, "grad_norm": 0.43468302488327026, "learning_rate": 0.000138169198975812, "loss": 1.4264, "step": 23802 }, { "epoch": 0.3093091288295327, "grad_norm": 0.328887939453125, "learning_rate": 0.00013816659951390063, "loss": 1.2911, "step": 23803 }, { "epoch": 0.30932212337344855, "grad_norm": 0.3720610737800598, "learning_rate": 0.00013816400005198923, "loss": 1.4383, "step": 23804 }, { "epoch": 0.30933511791736445, "grad_norm": 0.4355788826942444, "learning_rate": 0.00013816140059007785, "loss": 1.4154, "step": 23805 }, { "epoch": 0.3093481124612803, "grad_norm": 0.41605398058891296, "learning_rate": 0.00013815880112816648, "loss": 1.4601, "step": 23806 }, { "epoch": 0.3093611070051962, "grad_norm": 0.23450107872486115, "learning_rate": 0.0001381562016662551, "loss": 1.2207, "step": 23807 }, { "epoch": 0.30937410154911205, "grad_norm": 0.45504528284072876, "learning_rate": 0.0001381536022043437, "loss": 1.4695, "step": 23808 }, { "epoch": 0.30938709609302795, "grad_norm": 0.34779179096221924, "learning_rate": 0.00013815100274243232, "loss": 1.1928, "step": 23809 }, { "epoch": 0.3094000906369438, "grad_norm": 0.4406035840511322, "learning_rate": 0.00013814840328052095, "loss": 1.496, "step": 23810 }, { "epoch": 0.3094130851808597, "grad_norm": 0.3680429756641388, "learning_rate": 0.00013814580381860955, "loss": 1.2381, "step": 23811 }, { "epoch": 0.30942607972477554, "grad_norm": 0.3156750202178955, "learning_rate": 0.00013814320435669817, "loss": 1.3683, "step": 23812 }, { "epoch": 0.30943907426869144, "grad_norm": 0.4043225944042206, "learning_rate": 0.0001381406048947868, "loss": 1.5869, "step": 23813 }, { "epoch": 0.3094520688126073, "grad_norm": 0.45116084814071655, "learning_rate": 0.0001381380054328754, "loss": 1.4057, "step": 23814 }, { "epoch": 0.3094650633565232, "grad_norm": 0.3871017396450043, "learning_rate": 0.00013813540597096402, "loss": 1.3121, "step": 23815 }, { "epoch": 0.30947805790043903, "grad_norm": 0.4697607457637787, "learning_rate": 0.00013813280650905261, "loss": 1.5304, "step": 23816 }, { "epoch": 0.30949105244435493, "grad_norm": 0.39670342206954956, "learning_rate": 0.00013813020704714127, "loss": 1.5512, "step": 23817 }, { "epoch": 0.3095040469882708, "grad_norm": 0.48963800072669983, "learning_rate": 0.00013812760758522986, "loss": 1.309, "step": 23818 }, { "epoch": 0.3095170415321867, "grad_norm": 0.3558309078216553, "learning_rate": 0.0001381250081233185, "loss": 1.3864, "step": 23819 }, { "epoch": 0.3095300360761025, "grad_norm": 0.3990516662597656, "learning_rate": 0.00013812240866140709, "loss": 1.472, "step": 23820 }, { "epoch": 0.3095430306200184, "grad_norm": 0.38804891705513, "learning_rate": 0.0001381198091994957, "loss": 1.5044, "step": 23821 }, { "epoch": 0.30955602516393427, "grad_norm": 0.37251004576683044, "learning_rate": 0.00013811720973758433, "loss": 1.4186, "step": 23822 }, { "epoch": 0.30956901970785017, "grad_norm": 0.46192091703414917, "learning_rate": 0.00013811461027567293, "loss": 1.3742, "step": 23823 }, { "epoch": 0.309582014251766, "grad_norm": 0.40239229798316956, "learning_rate": 0.00013811201081376156, "loss": 1.448, "step": 23824 }, { "epoch": 0.3095950087956819, "grad_norm": 0.35058051347732544, "learning_rate": 0.00013810941135185018, "loss": 1.4288, "step": 23825 }, { "epoch": 0.30960800333959776, "grad_norm": 0.43243512511253357, "learning_rate": 0.0001381068118899388, "loss": 1.3979, "step": 23826 }, { "epoch": 0.30962099788351366, "grad_norm": 0.39954861998558044, "learning_rate": 0.0001381042124280274, "loss": 1.6014, "step": 23827 }, { "epoch": 0.30963399242742956, "grad_norm": 0.34985974431037903, "learning_rate": 0.000138101612966116, "loss": 1.3475, "step": 23828 }, { "epoch": 0.3096469869713454, "grad_norm": 0.3275580406188965, "learning_rate": 0.00013809901350420465, "loss": 1.4011, "step": 23829 }, { "epoch": 0.3096599815152613, "grad_norm": 0.49311327934265137, "learning_rate": 0.00013809641404229325, "loss": 1.3243, "step": 23830 }, { "epoch": 0.30967297605917715, "grad_norm": 0.43973541259765625, "learning_rate": 0.00013809381458038187, "loss": 1.334, "step": 23831 }, { "epoch": 0.30968597060309305, "grad_norm": 0.42753127217292786, "learning_rate": 0.00013809121511847047, "loss": 1.4136, "step": 23832 }, { "epoch": 0.3096989651470089, "grad_norm": 0.43281760811805725, "learning_rate": 0.0001380886156565591, "loss": 1.3877, "step": 23833 }, { "epoch": 0.3097119596909248, "grad_norm": 0.49253880977630615, "learning_rate": 0.00013808601619464772, "loss": 1.4942, "step": 23834 }, { "epoch": 0.30972495423484064, "grad_norm": 0.4486534595489502, "learning_rate": 0.00013808341673273632, "loss": 1.4632, "step": 23835 }, { "epoch": 0.30973794877875654, "grad_norm": 0.41377606987953186, "learning_rate": 0.00013808081727082494, "loss": 1.4024, "step": 23836 }, { "epoch": 0.3097509433226724, "grad_norm": 0.4178917109966278, "learning_rate": 0.00013807821780891357, "loss": 1.5282, "step": 23837 }, { "epoch": 0.3097639378665883, "grad_norm": 0.44078192114830017, "learning_rate": 0.0001380756183470022, "loss": 1.2529, "step": 23838 }, { "epoch": 0.30977693241050414, "grad_norm": 0.3333128094673157, "learning_rate": 0.0001380730188850908, "loss": 1.2448, "step": 23839 }, { "epoch": 0.30978992695442004, "grad_norm": 0.4325866401195526, "learning_rate": 0.00013807041942317939, "loss": 1.3517, "step": 23840 }, { "epoch": 0.3098029214983359, "grad_norm": 0.40073856711387634, "learning_rate": 0.00013806781996126804, "loss": 1.285, "step": 23841 }, { "epoch": 0.3098159160422518, "grad_norm": 0.3905218243598938, "learning_rate": 0.00013806522049935663, "loss": 1.5221, "step": 23842 }, { "epoch": 0.3098289105861676, "grad_norm": 0.4415857791900635, "learning_rate": 0.00013806262103744526, "loss": 1.4371, "step": 23843 }, { "epoch": 0.30984190513008353, "grad_norm": 0.37541815638542175, "learning_rate": 0.00013806002157553388, "loss": 1.362, "step": 23844 }, { "epoch": 0.3098548996739994, "grad_norm": 0.4728158116340637, "learning_rate": 0.00013805742211362248, "loss": 1.5003, "step": 23845 }, { "epoch": 0.3098678942179153, "grad_norm": 0.3400465250015259, "learning_rate": 0.0001380548226517111, "loss": 1.4446, "step": 23846 }, { "epoch": 0.3098808887618311, "grad_norm": 0.4620821475982666, "learning_rate": 0.0001380522231897997, "loss": 1.55, "step": 23847 }, { "epoch": 0.309893883305747, "grad_norm": 0.4416901767253876, "learning_rate": 0.00013804962372788835, "loss": 1.514, "step": 23848 }, { "epoch": 0.30990687784966287, "grad_norm": 0.37687355279922485, "learning_rate": 0.00013804702426597695, "loss": 1.249, "step": 23849 }, { "epoch": 0.30991987239357877, "grad_norm": 0.4145062565803528, "learning_rate": 0.00013804442480406558, "loss": 1.3067, "step": 23850 }, { "epoch": 0.3099328669374946, "grad_norm": 0.39866968989372253, "learning_rate": 0.00013804182534215417, "loss": 1.6535, "step": 23851 }, { "epoch": 0.3099458614814105, "grad_norm": 0.42530226707458496, "learning_rate": 0.0001380392258802428, "loss": 1.3922, "step": 23852 }, { "epoch": 0.30995885602532636, "grad_norm": 0.45034313201904297, "learning_rate": 0.00013803662641833142, "loss": 1.5332, "step": 23853 }, { "epoch": 0.30997185056924226, "grad_norm": 0.43751785159111023, "learning_rate": 0.00013803402695642002, "loss": 1.749, "step": 23854 }, { "epoch": 0.3099848451131581, "grad_norm": 0.43934813141822815, "learning_rate": 0.00013803142749450864, "loss": 1.4842, "step": 23855 }, { "epoch": 0.309997839657074, "grad_norm": 0.36137276887893677, "learning_rate": 0.00013802882803259727, "loss": 1.3301, "step": 23856 }, { "epoch": 0.31001083420098985, "grad_norm": 0.39494848251342773, "learning_rate": 0.00013802622857068587, "loss": 1.3921, "step": 23857 }, { "epoch": 0.31002382874490575, "grad_norm": 0.3148389756679535, "learning_rate": 0.0001380236291087745, "loss": 1.3547, "step": 23858 }, { "epoch": 0.3100368232888216, "grad_norm": 0.3343336582183838, "learning_rate": 0.0001380210296468631, "loss": 1.5378, "step": 23859 }, { "epoch": 0.3100498178327375, "grad_norm": 0.496134489774704, "learning_rate": 0.00013801843018495174, "loss": 1.5298, "step": 23860 }, { "epoch": 0.31006281237665334, "grad_norm": 0.46657252311706543, "learning_rate": 0.00013801583072304034, "loss": 1.3416, "step": 23861 }, { "epoch": 0.31007580692056924, "grad_norm": 0.45345184206962585, "learning_rate": 0.00013801323126112896, "loss": 1.2855, "step": 23862 }, { "epoch": 0.3100888014644851, "grad_norm": 0.42172691226005554, "learning_rate": 0.00013801063179921756, "loss": 1.4361, "step": 23863 }, { "epoch": 0.310101796008401, "grad_norm": 0.35896340012550354, "learning_rate": 0.00013800803233730618, "loss": 1.3581, "step": 23864 }, { "epoch": 0.31011479055231683, "grad_norm": 0.41128477454185486, "learning_rate": 0.0001380054328753948, "loss": 1.3925, "step": 23865 }, { "epoch": 0.31012778509623273, "grad_norm": 0.2846456468105316, "learning_rate": 0.0001380028334134834, "loss": 1.2609, "step": 23866 }, { "epoch": 0.3101407796401486, "grad_norm": 0.3456208109855652, "learning_rate": 0.00013800023395157203, "loss": 1.2951, "step": 23867 }, { "epoch": 0.3101537741840645, "grad_norm": 0.40712153911590576, "learning_rate": 0.00013799763448966065, "loss": 1.4023, "step": 23868 }, { "epoch": 0.3101667687279803, "grad_norm": 0.4775449335575104, "learning_rate": 0.00013799503502774925, "loss": 1.1947, "step": 23869 }, { "epoch": 0.3101797632718962, "grad_norm": 0.39188286662101746, "learning_rate": 0.00013799243556583788, "loss": 1.2654, "step": 23870 }, { "epoch": 0.31019275781581207, "grad_norm": 0.4361734986305237, "learning_rate": 0.00013798983610392647, "loss": 1.3095, "step": 23871 }, { "epoch": 0.31020575235972797, "grad_norm": 0.48571932315826416, "learning_rate": 0.00013798723664201513, "loss": 1.5007, "step": 23872 }, { "epoch": 0.3102187469036438, "grad_norm": 0.38242021203041077, "learning_rate": 0.00013798463718010372, "loss": 1.3414, "step": 23873 }, { "epoch": 0.3102317414475597, "grad_norm": 0.429584801197052, "learning_rate": 0.00013798203771819235, "loss": 1.3548, "step": 23874 }, { "epoch": 0.31024473599147556, "grad_norm": 0.4487929046154022, "learning_rate": 0.00013797943825628094, "loss": 1.4083, "step": 23875 }, { "epoch": 0.31025773053539146, "grad_norm": 0.44359031319618225, "learning_rate": 0.00013797683879436957, "loss": 1.297, "step": 23876 }, { "epoch": 0.3102707250793073, "grad_norm": 0.3933943510055542, "learning_rate": 0.0001379742393324582, "loss": 1.1522, "step": 23877 }, { "epoch": 0.3102837196232232, "grad_norm": 0.419813334941864, "learning_rate": 0.0001379716398705468, "loss": 1.3617, "step": 23878 }, { "epoch": 0.31029671416713905, "grad_norm": 0.37526410818099976, "learning_rate": 0.00013796904040863542, "loss": 1.222, "step": 23879 }, { "epoch": 0.31030970871105495, "grad_norm": 0.4736362397670746, "learning_rate": 0.00013796644094672404, "loss": 1.4928, "step": 23880 }, { "epoch": 0.3103227032549708, "grad_norm": 0.2675705850124359, "learning_rate": 0.00013796384148481264, "loss": 1.3189, "step": 23881 }, { "epoch": 0.3103356977988867, "grad_norm": 0.4965255558490753, "learning_rate": 0.00013796124202290126, "loss": 1.3363, "step": 23882 }, { "epoch": 0.31034869234280255, "grad_norm": 0.4060961604118347, "learning_rate": 0.00013795864256098989, "loss": 1.2321, "step": 23883 }, { "epoch": 0.31036168688671845, "grad_norm": 0.349907785654068, "learning_rate": 0.0001379560430990785, "loss": 1.59, "step": 23884 }, { "epoch": 0.3103746814306343, "grad_norm": 0.39603179693222046, "learning_rate": 0.0001379534436371671, "loss": 1.4503, "step": 23885 }, { "epoch": 0.3103876759745502, "grad_norm": 0.5262779593467712, "learning_rate": 0.00013795084417525573, "loss": 1.4417, "step": 23886 }, { "epoch": 0.31040067051846604, "grad_norm": 0.46533364057540894, "learning_rate": 0.00013794824471334436, "loss": 1.6625, "step": 23887 }, { "epoch": 0.31041366506238194, "grad_norm": 0.3112325668334961, "learning_rate": 0.00013794564525143295, "loss": 1.1391, "step": 23888 }, { "epoch": 0.3104266596062978, "grad_norm": 0.5040755867958069, "learning_rate": 0.00013794304578952158, "loss": 1.4223, "step": 23889 }, { "epoch": 0.3104396541502137, "grad_norm": 0.3455553948879242, "learning_rate": 0.00013794044632761018, "loss": 1.3353, "step": 23890 }, { "epoch": 0.31045264869412953, "grad_norm": 0.40547439455986023, "learning_rate": 0.00013793784686569883, "loss": 1.3239, "step": 23891 }, { "epoch": 0.31046564323804543, "grad_norm": 0.4873808026313782, "learning_rate": 0.00013793524740378743, "loss": 1.4291, "step": 23892 }, { "epoch": 0.3104786377819613, "grad_norm": 0.3819611966609955, "learning_rate": 0.00013793264794187605, "loss": 1.236, "step": 23893 }, { "epoch": 0.3104916323258772, "grad_norm": 0.4641636610031128, "learning_rate": 0.00013793004847996465, "loss": 1.4637, "step": 23894 }, { "epoch": 0.310504626869793, "grad_norm": 0.3523745536804199, "learning_rate": 0.00013792744901805327, "loss": 1.1905, "step": 23895 }, { "epoch": 0.3105176214137089, "grad_norm": 0.44878754019737244, "learning_rate": 0.0001379248495561419, "loss": 1.603, "step": 23896 }, { "epoch": 0.31053061595762477, "grad_norm": 0.3051418662071228, "learning_rate": 0.0001379222500942305, "loss": 1.5226, "step": 23897 }, { "epoch": 0.31054361050154067, "grad_norm": 0.39380788803100586, "learning_rate": 0.00013791965063231912, "loss": 1.36, "step": 23898 }, { "epoch": 0.3105566050454565, "grad_norm": 0.4346606731414795, "learning_rate": 0.00013791705117040774, "loss": 1.5733, "step": 23899 }, { "epoch": 0.3105695995893724, "grad_norm": 0.39551079273223877, "learning_rate": 0.00013791445170849634, "loss": 1.5093, "step": 23900 }, { "epoch": 0.31058259413328826, "grad_norm": 0.4321194291114807, "learning_rate": 0.00013791185224658496, "loss": 1.3279, "step": 23901 }, { "epoch": 0.31059558867720416, "grad_norm": 0.33355411887168884, "learning_rate": 0.00013790925278467356, "loss": 1.3862, "step": 23902 }, { "epoch": 0.31060858322112, "grad_norm": 0.4714154601097107, "learning_rate": 0.0001379066533227622, "loss": 1.5895, "step": 23903 }, { "epoch": 0.3106215777650359, "grad_norm": 0.3886106014251709, "learning_rate": 0.0001379040538608508, "loss": 1.2789, "step": 23904 }, { "epoch": 0.3106345723089518, "grad_norm": 0.37490972876548767, "learning_rate": 0.00013790145439893944, "loss": 1.3529, "step": 23905 }, { "epoch": 0.31064756685286765, "grad_norm": 0.30774742364883423, "learning_rate": 0.00013789885493702803, "loss": 1.5286, "step": 23906 }, { "epoch": 0.31066056139678355, "grad_norm": 0.3692784607410431, "learning_rate": 0.00013789625547511666, "loss": 1.3646, "step": 23907 }, { "epoch": 0.3106735559406994, "grad_norm": 0.41448262333869934, "learning_rate": 0.00013789365601320528, "loss": 1.2563, "step": 23908 }, { "epoch": 0.3106865504846153, "grad_norm": 0.3954908847808838, "learning_rate": 0.00013789105655129388, "loss": 1.2521, "step": 23909 }, { "epoch": 0.31069954502853114, "grad_norm": 0.39265531301498413, "learning_rate": 0.0001378884570893825, "loss": 1.5022, "step": 23910 }, { "epoch": 0.31071253957244704, "grad_norm": 0.3893373906612396, "learning_rate": 0.00013788585762747113, "loss": 1.4652, "step": 23911 }, { "epoch": 0.3107255341163629, "grad_norm": 0.5740199685096741, "learning_rate": 0.00013788325816555973, "loss": 1.1323, "step": 23912 }, { "epoch": 0.3107385286602788, "grad_norm": 0.39213043451309204, "learning_rate": 0.00013788065870364835, "loss": 1.5641, "step": 23913 }, { "epoch": 0.31075152320419464, "grad_norm": 0.4795304834842682, "learning_rate": 0.00013787805924173695, "loss": 1.3815, "step": 23914 }, { "epoch": 0.31076451774811054, "grad_norm": 0.6655697226524353, "learning_rate": 0.0001378754597798256, "loss": 1.4232, "step": 23915 }, { "epoch": 0.3107775122920264, "grad_norm": 0.35082104802131653, "learning_rate": 0.0001378728603179142, "loss": 1.1995, "step": 23916 }, { "epoch": 0.3107905068359423, "grad_norm": 0.418209433555603, "learning_rate": 0.00013787026085600282, "loss": 1.4208, "step": 23917 }, { "epoch": 0.3108035013798581, "grad_norm": 0.31623315811157227, "learning_rate": 0.00013786766139409144, "loss": 1.2735, "step": 23918 }, { "epoch": 0.310816495923774, "grad_norm": 0.4177623987197876, "learning_rate": 0.00013786506193218004, "loss": 1.5594, "step": 23919 }, { "epoch": 0.3108294904676899, "grad_norm": 0.3654765486717224, "learning_rate": 0.00013786246247026867, "loss": 1.4608, "step": 23920 }, { "epoch": 0.3108424850116058, "grad_norm": 0.42051026225090027, "learning_rate": 0.00013785986300835726, "loss": 1.5841, "step": 23921 }, { "epoch": 0.3108554795555216, "grad_norm": 0.3291291892528534, "learning_rate": 0.00013785726354644592, "loss": 1.4767, "step": 23922 }, { "epoch": 0.3108684740994375, "grad_norm": 0.39693668484687805, "learning_rate": 0.0001378546640845345, "loss": 1.6129, "step": 23923 }, { "epoch": 0.31088146864335336, "grad_norm": 0.4205494523048401, "learning_rate": 0.0001378520646226231, "loss": 1.319, "step": 23924 }, { "epoch": 0.31089446318726927, "grad_norm": 0.4231456518173218, "learning_rate": 0.00013784946516071173, "loss": 1.5289, "step": 23925 }, { "epoch": 0.3109074577311851, "grad_norm": 0.386369526386261, "learning_rate": 0.00013784686569880036, "loss": 1.415, "step": 23926 }, { "epoch": 0.310920452275101, "grad_norm": 0.43499818444252014, "learning_rate": 0.00013784426623688898, "loss": 1.5224, "step": 23927 }, { "epoch": 0.31093344681901686, "grad_norm": 0.3281128406524658, "learning_rate": 0.00013784166677497758, "loss": 1.1598, "step": 23928 }, { "epoch": 0.31094644136293276, "grad_norm": 0.4508039653301239, "learning_rate": 0.0001378390673130662, "loss": 1.3316, "step": 23929 }, { "epoch": 0.3109594359068486, "grad_norm": 0.43688860535621643, "learning_rate": 0.00013783646785115483, "loss": 1.5154, "step": 23930 }, { "epoch": 0.3109724304507645, "grad_norm": 0.48640280961990356, "learning_rate": 0.00013783386838924343, "loss": 1.4405, "step": 23931 }, { "epoch": 0.31098542499468035, "grad_norm": 0.40494704246520996, "learning_rate": 0.00013783126892733205, "loss": 1.4462, "step": 23932 }, { "epoch": 0.31099841953859625, "grad_norm": 0.4335281252861023, "learning_rate": 0.00013782866946542065, "loss": 1.3523, "step": 23933 }, { "epoch": 0.3110114140825121, "grad_norm": 0.30513033270835876, "learning_rate": 0.0001378260700035093, "loss": 1.4079, "step": 23934 }, { "epoch": 0.311024408626428, "grad_norm": 0.3715493381023407, "learning_rate": 0.0001378234705415979, "loss": 1.402, "step": 23935 }, { "epoch": 0.31103740317034384, "grad_norm": 0.46856236457824707, "learning_rate": 0.0001378208710796865, "loss": 1.3601, "step": 23936 }, { "epoch": 0.31105039771425974, "grad_norm": 0.4111510217189789, "learning_rate": 0.00013781827161777512, "loss": 1.3899, "step": 23937 }, { "epoch": 0.3110633922581756, "grad_norm": 0.30440622568130493, "learning_rate": 0.00013781567215586374, "loss": 1.5925, "step": 23938 }, { "epoch": 0.3110763868020915, "grad_norm": 0.38103538751602173, "learning_rate": 0.00013781307269395237, "loss": 1.4483, "step": 23939 }, { "epoch": 0.31108938134600733, "grad_norm": 0.4235471487045288, "learning_rate": 0.00013781047323204097, "loss": 1.394, "step": 23940 }, { "epoch": 0.31110237588992323, "grad_norm": 0.42663297057151794, "learning_rate": 0.0001378078737701296, "loss": 1.2704, "step": 23941 }, { "epoch": 0.3111153704338391, "grad_norm": 0.3289090394973755, "learning_rate": 0.00013780527430821822, "loss": 1.4361, "step": 23942 }, { "epoch": 0.311128364977755, "grad_norm": 0.40683725476264954, "learning_rate": 0.0001378026748463068, "loss": 1.2902, "step": 23943 }, { "epoch": 0.3111413595216708, "grad_norm": 0.39345088601112366, "learning_rate": 0.00013780007538439544, "loss": 1.4135, "step": 23944 }, { "epoch": 0.3111543540655867, "grad_norm": 0.48659366369247437, "learning_rate": 0.00013779747592248403, "loss": 1.3726, "step": 23945 }, { "epoch": 0.31116734860950257, "grad_norm": 0.37349313497543335, "learning_rate": 0.0001377948764605727, "loss": 1.3369, "step": 23946 }, { "epoch": 0.31118034315341847, "grad_norm": 0.3508731424808502, "learning_rate": 0.00013779227699866128, "loss": 1.4158, "step": 23947 }, { "epoch": 0.3111933376973343, "grad_norm": 0.4582858979701996, "learning_rate": 0.0001377896775367499, "loss": 1.4077, "step": 23948 }, { "epoch": 0.3112063322412502, "grad_norm": 0.37235763669013977, "learning_rate": 0.0001377870780748385, "loss": 1.3243, "step": 23949 }, { "epoch": 0.31121932678516606, "grad_norm": 0.45014306902885437, "learning_rate": 0.00013778447861292713, "loss": 1.4114, "step": 23950 }, { "epoch": 0.31123232132908196, "grad_norm": 0.42739301919937134, "learning_rate": 0.00013778187915101575, "loss": 1.5206, "step": 23951 }, { "epoch": 0.3112453158729978, "grad_norm": 0.3125927448272705, "learning_rate": 0.00013777927968910435, "loss": 1.2116, "step": 23952 }, { "epoch": 0.3112583104169137, "grad_norm": 0.4059589207172394, "learning_rate": 0.00013777668022719298, "loss": 1.2822, "step": 23953 }, { "epoch": 0.31127130496082955, "grad_norm": 0.37575000524520874, "learning_rate": 0.0001377740807652816, "loss": 1.4164, "step": 23954 }, { "epoch": 0.31128429950474545, "grad_norm": 0.32792118191719055, "learning_rate": 0.0001377714813033702, "loss": 1.2217, "step": 23955 }, { "epoch": 0.3112972940486613, "grad_norm": 0.41451743245124817, "learning_rate": 0.00013776888184145882, "loss": 1.4349, "step": 23956 }, { "epoch": 0.3113102885925772, "grad_norm": 0.3205423951148987, "learning_rate": 0.00013776628237954745, "loss": 1.3984, "step": 23957 }, { "epoch": 0.31132328313649305, "grad_norm": 0.44887575507164, "learning_rate": 0.00013776368291763607, "loss": 1.4088, "step": 23958 }, { "epoch": 0.31133627768040895, "grad_norm": 0.39050063490867615, "learning_rate": 0.00013776108345572467, "loss": 1.5116, "step": 23959 }, { "epoch": 0.3113492722243248, "grad_norm": 0.38548868894577026, "learning_rate": 0.0001377584839938133, "loss": 1.609, "step": 23960 }, { "epoch": 0.3113622667682407, "grad_norm": 0.4732484221458435, "learning_rate": 0.00013775588453190192, "loss": 1.5134, "step": 23961 }, { "epoch": 0.31137526131215654, "grad_norm": 0.3074033856391907, "learning_rate": 0.00013775328506999052, "loss": 1.1652, "step": 23962 }, { "epoch": 0.31138825585607244, "grad_norm": 0.3868245780467987, "learning_rate": 0.00013775068560807914, "loss": 1.3733, "step": 23963 }, { "epoch": 0.3114012503999883, "grad_norm": 0.4524303674697876, "learning_rate": 0.00013774808614616774, "loss": 1.3938, "step": 23964 }, { "epoch": 0.3114142449439042, "grad_norm": 0.3855283260345459, "learning_rate": 0.00013774548668425636, "loss": 1.3352, "step": 23965 }, { "epoch": 0.31142723948782003, "grad_norm": 0.32370954751968384, "learning_rate": 0.00013774288722234499, "loss": 1.3453, "step": 23966 }, { "epoch": 0.31144023403173593, "grad_norm": 0.4870944619178772, "learning_rate": 0.00013774028776043358, "loss": 1.3989, "step": 23967 }, { "epoch": 0.3114532285756518, "grad_norm": 0.405438631772995, "learning_rate": 0.0001377376882985222, "loss": 1.4075, "step": 23968 }, { "epoch": 0.3114662231195677, "grad_norm": 0.34983202815055847, "learning_rate": 0.00013773508883661083, "loss": 1.5481, "step": 23969 }, { "epoch": 0.3114792176634835, "grad_norm": 0.2988024055957794, "learning_rate": 0.00013773248937469946, "loss": 1.333, "step": 23970 }, { "epoch": 0.3114922122073994, "grad_norm": 0.3165874779224396, "learning_rate": 0.00013772988991278805, "loss": 1.2441, "step": 23971 }, { "epoch": 0.31150520675131527, "grad_norm": 0.3601977527141571, "learning_rate": 0.00013772729045087668, "loss": 1.3238, "step": 23972 }, { "epoch": 0.31151820129523117, "grad_norm": 0.4172667860984802, "learning_rate": 0.0001377246909889653, "loss": 1.3174, "step": 23973 }, { "epoch": 0.311531195839147, "grad_norm": 0.4173881709575653, "learning_rate": 0.0001377220915270539, "loss": 1.5729, "step": 23974 }, { "epoch": 0.3115441903830629, "grad_norm": 0.3168468773365021, "learning_rate": 0.00013771949206514253, "loss": 1.2685, "step": 23975 }, { "epoch": 0.31155718492697876, "grad_norm": 0.4073374569416046, "learning_rate": 0.00013771689260323112, "loss": 1.3985, "step": 23976 }, { "epoch": 0.31157017947089466, "grad_norm": 0.3657204210758209, "learning_rate": 0.00013771429314131977, "loss": 1.3847, "step": 23977 }, { "epoch": 0.3115831740148105, "grad_norm": 0.32920265197753906, "learning_rate": 0.00013771169367940837, "loss": 1.3454, "step": 23978 }, { "epoch": 0.3115961685587264, "grad_norm": 0.4257546365261078, "learning_rate": 0.00013770909421749697, "loss": 1.3956, "step": 23979 }, { "epoch": 0.31160916310264225, "grad_norm": 0.38961026072502136, "learning_rate": 0.0001377064947555856, "loss": 1.4517, "step": 23980 }, { "epoch": 0.31162215764655815, "grad_norm": 0.4626120328903198, "learning_rate": 0.00013770389529367422, "loss": 1.4823, "step": 23981 }, { "epoch": 0.31163515219047405, "grad_norm": 0.4362609386444092, "learning_rate": 0.00013770129583176284, "loss": 1.376, "step": 23982 }, { "epoch": 0.3116481467343899, "grad_norm": 0.47997599840164185, "learning_rate": 0.00013769869636985144, "loss": 1.403, "step": 23983 }, { "epoch": 0.3116611412783058, "grad_norm": 0.33067432045936584, "learning_rate": 0.00013769609690794006, "loss": 1.2723, "step": 23984 }, { "epoch": 0.31167413582222164, "grad_norm": 0.46800974011421204, "learning_rate": 0.0001376934974460287, "loss": 1.5817, "step": 23985 }, { "epoch": 0.31168713036613754, "grad_norm": 0.4393022954463959, "learning_rate": 0.00013769089798411729, "loss": 1.2901, "step": 23986 }, { "epoch": 0.3117001249100534, "grad_norm": 0.3599379360675812, "learning_rate": 0.0001376882985222059, "loss": 1.4042, "step": 23987 }, { "epoch": 0.3117131194539693, "grad_norm": 0.4527144730091095, "learning_rate": 0.0001376856990602945, "loss": 1.4807, "step": 23988 }, { "epoch": 0.31172611399788513, "grad_norm": 0.388300359249115, "learning_rate": 0.00013768309959838316, "loss": 1.4414, "step": 23989 }, { "epoch": 0.31173910854180104, "grad_norm": 0.37205156683921814, "learning_rate": 0.00013768050013647176, "loss": 1.3013, "step": 23990 }, { "epoch": 0.3117521030857169, "grad_norm": 0.3441292345523834, "learning_rate": 0.00013767790067456035, "loss": 1.5675, "step": 23991 }, { "epoch": 0.3117650976296328, "grad_norm": 0.3798999488353729, "learning_rate": 0.00013767530121264898, "loss": 1.2948, "step": 23992 }, { "epoch": 0.3117780921735486, "grad_norm": 0.4052700102329254, "learning_rate": 0.0001376727017507376, "loss": 1.2227, "step": 23993 }, { "epoch": 0.3117910867174645, "grad_norm": 0.43474170565605164, "learning_rate": 0.00013767010228882623, "loss": 1.3954, "step": 23994 }, { "epoch": 0.3118040812613804, "grad_norm": 0.3933478891849518, "learning_rate": 0.00013766750282691483, "loss": 1.4149, "step": 23995 }, { "epoch": 0.3118170758052963, "grad_norm": 0.45425668358802795, "learning_rate": 0.00013766490336500345, "loss": 1.3104, "step": 23996 }, { "epoch": 0.3118300703492121, "grad_norm": 0.40313950181007385, "learning_rate": 0.00013766230390309207, "loss": 1.4334, "step": 23997 }, { "epoch": 0.311843064893128, "grad_norm": 0.33744296431541443, "learning_rate": 0.00013765970444118067, "loss": 1.375, "step": 23998 }, { "epoch": 0.31185605943704386, "grad_norm": 0.43625134229660034, "learning_rate": 0.0001376571049792693, "loss": 1.5292, "step": 23999 }, { "epoch": 0.31186905398095977, "grad_norm": 0.488025426864624, "learning_rate": 0.00013765450551735792, "loss": 1.49, "step": 24000 }, { "epoch": 0.3118820485248756, "grad_norm": 0.3705483675003052, "learning_rate": 0.00013765190605544655, "loss": 1.4062, "step": 24001 }, { "epoch": 0.3118950430687915, "grad_norm": 0.3765580952167511, "learning_rate": 0.00013764930659353514, "loss": 1.4171, "step": 24002 }, { "epoch": 0.31190803761270736, "grad_norm": 0.3116188645362854, "learning_rate": 0.00013764670713162374, "loss": 1.4177, "step": 24003 }, { "epoch": 0.31192103215662326, "grad_norm": 0.510449230670929, "learning_rate": 0.0001376441076697124, "loss": 1.4859, "step": 24004 }, { "epoch": 0.3119340267005391, "grad_norm": 0.41841232776641846, "learning_rate": 0.000137641508207801, "loss": 1.5218, "step": 24005 }, { "epoch": 0.311947021244455, "grad_norm": 0.36413973569869995, "learning_rate": 0.0001376389087458896, "loss": 1.4506, "step": 24006 }, { "epoch": 0.31196001578837085, "grad_norm": 0.40747568011283875, "learning_rate": 0.0001376363092839782, "loss": 1.3849, "step": 24007 }, { "epoch": 0.31197301033228675, "grad_norm": 0.40432819724082947, "learning_rate": 0.00013763370982206684, "loss": 1.4365, "step": 24008 }, { "epoch": 0.3119860048762026, "grad_norm": 0.3930312693119049, "learning_rate": 0.00013763111036015546, "loss": 1.4956, "step": 24009 }, { "epoch": 0.3119989994201185, "grad_norm": 0.2995540499687195, "learning_rate": 0.00013762851089824406, "loss": 1.462, "step": 24010 }, { "epoch": 0.31201199396403434, "grad_norm": 0.449510782957077, "learning_rate": 0.00013762591143633268, "loss": 1.4356, "step": 24011 }, { "epoch": 0.31202498850795024, "grad_norm": 0.3419528305530548, "learning_rate": 0.0001376233119744213, "loss": 1.3369, "step": 24012 }, { "epoch": 0.3120379830518661, "grad_norm": 0.44447213411331177, "learning_rate": 0.00013762071251250993, "loss": 1.372, "step": 24013 }, { "epoch": 0.312050977595782, "grad_norm": 0.43342724442481995, "learning_rate": 0.00013761811305059853, "loss": 1.5641, "step": 24014 }, { "epoch": 0.31206397213969783, "grad_norm": 0.4521684944629669, "learning_rate": 0.00013761551358868715, "loss": 1.5426, "step": 24015 }, { "epoch": 0.31207696668361373, "grad_norm": 0.5262646675109863, "learning_rate": 0.00013761291412677578, "loss": 1.303, "step": 24016 }, { "epoch": 0.3120899612275296, "grad_norm": 0.36772117018699646, "learning_rate": 0.00013761031466486437, "loss": 1.2938, "step": 24017 }, { "epoch": 0.3121029557714455, "grad_norm": 0.3977145850658417, "learning_rate": 0.000137607715202953, "loss": 1.3204, "step": 24018 }, { "epoch": 0.3121159503153613, "grad_norm": 0.3843863010406494, "learning_rate": 0.0001376051157410416, "loss": 1.5278, "step": 24019 }, { "epoch": 0.3121289448592772, "grad_norm": 0.3801257908344269, "learning_rate": 0.00013760251627913022, "loss": 1.3724, "step": 24020 }, { "epoch": 0.31214193940319307, "grad_norm": 0.33764639496803284, "learning_rate": 0.00013759991681721885, "loss": 1.2946, "step": 24021 }, { "epoch": 0.31215493394710897, "grad_norm": 0.3801344335079193, "learning_rate": 0.00013759731735530744, "loss": 1.2576, "step": 24022 }, { "epoch": 0.3121679284910248, "grad_norm": 0.3762162923812866, "learning_rate": 0.00013759471789339607, "loss": 1.3091, "step": 24023 }, { "epoch": 0.3121809230349407, "grad_norm": 0.49149996042251587, "learning_rate": 0.0001375921184314847, "loss": 1.45, "step": 24024 }, { "epoch": 0.31219391757885656, "grad_norm": 0.4664984345436096, "learning_rate": 0.00013758951896957332, "loss": 1.2782, "step": 24025 }, { "epoch": 0.31220691212277246, "grad_norm": 0.3481917679309845, "learning_rate": 0.0001375869195076619, "loss": 1.4094, "step": 24026 }, { "epoch": 0.3122199066666883, "grad_norm": 0.39677417278289795, "learning_rate": 0.00013758432004575054, "loss": 1.5093, "step": 24027 }, { "epoch": 0.3122329012106042, "grad_norm": 0.4046335816383362, "learning_rate": 0.00013758172058383916, "loss": 1.4802, "step": 24028 }, { "epoch": 0.31224589575452005, "grad_norm": 0.4369329512119293, "learning_rate": 0.00013757912112192776, "loss": 1.3582, "step": 24029 }, { "epoch": 0.31225889029843595, "grad_norm": 0.36028096079826355, "learning_rate": 0.00013757652166001638, "loss": 1.2879, "step": 24030 }, { "epoch": 0.3122718848423518, "grad_norm": 0.5566724538803101, "learning_rate": 0.000137573922198105, "loss": 1.331, "step": 24031 }, { "epoch": 0.3122848793862677, "grad_norm": 0.34386762976646423, "learning_rate": 0.00013757132273619363, "loss": 1.1165, "step": 24032 }, { "epoch": 0.31229787393018354, "grad_norm": 0.4286525547504425, "learning_rate": 0.00013756872327428223, "loss": 1.4363, "step": 24033 }, { "epoch": 0.31231086847409945, "grad_norm": 0.4375098645687103, "learning_rate": 0.00013756612381237083, "loss": 1.2707, "step": 24034 }, { "epoch": 0.3123238630180153, "grad_norm": 0.3450906276702881, "learning_rate": 0.00013756352435045948, "loss": 1.3853, "step": 24035 }, { "epoch": 0.3123368575619312, "grad_norm": 0.4314018487930298, "learning_rate": 0.00013756092488854808, "loss": 1.4795, "step": 24036 }, { "epoch": 0.31234985210584704, "grad_norm": 0.415788859128952, "learning_rate": 0.0001375583254266367, "loss": 1.3929, "step": 24037 }, { "epoch": 0.31236284664976294, "grad_norm": 0.4273725748062134, "learning_rate": 0.0001375557259647253, "loss": 1.4401, "step": 24038 }, { "epoch": 0.3123758411936788, "grad_norm": 0.38512519001960754, "learning_rate": 0.00013755312650281392, "loss": 1.4073, "step": 24039 }, { "epoch": 0.3123888357375947, "grad_norm": 0.34402963519096375, "learning_rate": 0.00013755052704090255, "loss": 1.5044, "step": 24040 }, { "epoch": 0.31240183028151053, "grad_norm": 0.3577123284339905, "learning_rate": 0.00013754792757899115, "loss": 1.2278, "step": 24041 }, { "epoch": 0.31241482482542643, "grad_norm": 0.40234658122062683, "learning_rate": 0.00013754532811707977, "loss": 1.3492, "step": 24042 }, { "epoch": 0.3124278193693423, "grad_norm": 0.435512512922287, "learning_rate": 0.0001375427286551684, "loss": 1.5132, "step": 24043 }, { "epoch": 0.3124408139132582, "grad_norm": 0.2758682370185852, "learning_rate": 0.00013754012919325702, "loss": 1.4252, "step": 24044 }, { "epoch": 0.312453808457174, "grad_norm": 0.4181942641735077, "learning_rate": 0.00013753752973134562, "loss": 1.4579, "step": 24045 }, { "epoch": 0.3124668030010899, "grad_norm": 0.4430083632469177, "learning_rate": 0.0001375349302694342, "loss": 1.2538, "step": 24046 }, { "epoch": 0.31247979754500577, "grad_norm": 0.46867311000823975, "learning_rate": 0.00013753233080752286, "loss": 1.4697, "step": 24047 }, { "epoch": 0.31249279208892167, "grad_norm": 0.38926905393600464, "learning_rate": 0.00013752973134561146, "loss": 1.6609, "step": 24048 }, { "epoch": 0.3125057866328375, "grad_norm": 0.35909736156463623, "learning_rate": 0.0001375271318837001, "loss": 1.5465, "step": 24049 }, { "epoch": 0.3125187811767534, "grad_norm": 0.32579079270362854, "learning_rate": 0.00013752453242178868, "loss": 1.1768, "step": 24050 }, { "epoch": 0.31253177572066926, "grad_norm": 0.39720970392227173, "learning_rate": 0.0001375219329598773, "loss": 1.5772, "step": 24051 }, { "epoch": 0.31254477026458516, "grad_norm": 0.40443727374076843, "learning_rate": 0.00013751933349796593, "loss": 1.409, "step": 24052 }, { "epoch": 0.312557764808501, "grad_norm": 0.303507536649704, "learning_rate": 0.00013751673403605453, "loss": 1.2178, "step": 24053 }, { "epoch": 0.3125707593524169, "grad_norm": 0.4055541455745697, "learning_rate": 0.00013751413457414316, "loss": 1.4201, "step": 24054 }, { "epoch": 0.31258375389633275, "grad_norm": 0.3177415132522583, "learning_rate": 0.00013751153511223178, "loss": 1.5134, "step": 24055 }, { "epoch": 0.31259674844024865, "grad_norm": 0.30238714814186096, "learning_rate": 0.0001375089356503204, "loss": 1.1993, "step": 24056 }, { "epoch": 0.31260974298416455, "grad_norm": 0.44723403453826904, "learning_rate": 0.000137506336188409, "loss": 1.4595, "step": 24057 }, { "epoch": 0.3126227375280804, "grad_norm": 0.31841811537742615, "learning_rate": 0.0001375037367264976, "loss": 1.2656, "step": 24058 }, { "epoch": 0.3126357320719963, "grad_norm": 0.4292653203010559, "learning_rate": 0.00013750113726458625, "loss": 1.5004, "step": 24059 }, { "epoch": 0.31264872661591214, "grad_norm": 0.41497403383255005, "learning_rate": 0.00013749853780267485, "loss": 1.4755, "step": 24060 }, { "epoch": 0.31266172115982804, "grad_norm": 0.41533756256103516, "learning_rate": 0.00013749593834076347, "loss": 1.4158, "step": 24061 }, { "epoch": 0.3126747157037439, "grad_norm": 0.36261487007141113, "learning_rate": 0.00013749333887885207, "loss": 1.5097, "step": 24062 }, { "epoch": 0.3126877102476598, "grad_norm": 0.4451095759868622, "learning_rate": 0.0001374907394169407, "loss": 1.384, "step": 24063 }, { "epoch": 0.31270070479157563, "grad_norm": 0.49637743830680847, "learning_rate": 0.00013748813995502932, "loss": 1.3524, "step": 24064 }, { "epoch": 0.31271369933549154, "grad_norm": 0.38129115104675293, "learning_rate": 0.00013748554049311792, "loss": 1.3337, "step": 24065 }, { "epoch": 0.3127266938794074, "grad_norm": 0.4188744127750397, "learning_rate": 0.00013748294103120654, "loss": 1.2504, "step": 24066 }, { "epoch": 0.3127396884233233, "grad_norm": 0.37698012590408325, "learning_rate": 0.00013748034156929516, "loss": 1.3171, "step": 24067 }, { "epoch": 0.3127526829672391, "grad_norm": 0.4148073196411133, "learning_rate": 0.0001374777421073838, "loss": 1.374, "step": 24068 }, { "epoch": 0.312765677511155, "grad_norm": 0.38230788707733154, "learning_rate": 0.0001374751426454724, "loss": 1.3433, "step": 24069 }, { "epoch": 0.31277867205507087, "grad_norm": 0.4472162425518036, "learning_rate": 0.000137472543183561, "loss": 1.2701, "step": 24070 }, { "epoch": 0.3127916665989868, "grad_norm": 0.41548895835876465, "learning_rate": 0.00013746994372164964, "loss": 1.4901, "step": 24071 }, { "epoch": 0.3128046611429026, "grad_norm": 0.3675248920917511, "learning_rate": 0.00013746734425973823, "loss": 1.3729, "step": 24072 }, { "epoch": 0.3128176556868185, "grad_norm": 0.41979625821113586, "learning_rate": 0.00013746474479782686, "loss": 1.3662, "step": 24073 }, { "epoch": 0.31283065023073436, "grad_norm": 0.41736137866973877, "learning_rate": 0.00013746214533591548, "loss": 1.6323, "step": 24074 }, { "epoch": 0.31284364477465026, "grad_norm": 0.41358327865600586, "learning_rate": 0.00013745954587400408, "loss": 1.416, "step": 24075 }, { "epoch": 0.3128566393185661, "grad_norm": 0.3807070255279541, "learning_rate": 0.0001374569464120927, "loss": 1.5321, "step": 24076 }, { "epoch": 0.312869633862482, "grad_norm": 0.41228362917900085, "learning_rate": 0.0001374543469501813, "loss": 1.7842, "step": 24077 }, { "epoch": 0.31288262840639786, "grad_norm": 0.36300885677337646, "learning_rate": 0.00013745174748826995, "loss": 1.4059, "step": 24078 }, { "epoch": 0.31289562295031376, "grad_norm": 0.4169485569000244, "learning_rate": 0.00013744914802635855, "loss": 1.3575, "step": 24079 }, { "epoch": 0.3129086174942296, "grad_norm": 0.45995306968688965, "learning_rate": 0.00013744654856444717, "loss": 1.4774, "step": 24080 }, { "epoch": 0.3129216120381455, "grad_norm": 0.4167274832725525, "learning_rate": 0.00013744394910253577, "loss": 1.4083, "step": 24081 }, { "epoch": 0.31293460658206135, "grad_norm": 0.33171501755714417, "learning_rate": 0.0001374413496406244, "loss": 1.5144, "step": 24082 }, { "epoch": 0.31294760112597725, "grad_norm": 0.38084664940834045, "learning_rate": 0.00013743875017871302, "loss": 1.4078, "step": 24083 }, { "epoch": 0.3129605956698931, "grad_norm": 0.36482226848602295, "learning_rate": 0.00013743615071680162, "loss": 1.4199, "step": 24084 }, { "epoch": 0.312973590213809, "grad_norm": 0.35472533106803894, "learning_rate": 0.00013743355125489024, "loss": 1.5307, "step": 24085 }, { "epoch": 0.31298658475772484, "grad_norm": 0.4131404459476471, "learning_rate": 0.00013743095179297887, "loss": 1.3654, "step": 24086 }, { "epoch": 0.31299957930164074, "grad_norm": 0.4545859694480896, "learning_rate": 0.00013742835233106746, "loss": 1.4369, "step": 24087 }, { "epoch": 0.3130125738455566, "grad_norm": 0.4544179141521454, "learning_rate": 0.0001374257528691561, "loss": 1.4936, "step": 24088 }, { "epoch": 0.3130255683894725, "grad_norm": 0.38573402166366577, "learning_rate": 0.0001374231534072447, "loss": 1.52, "step": 24089 }, { "epoch": 0.31303856293338833, "grad_norm": 0.3297925293445587, "learning_rate": 0.00013742055394533334, "loss": 1.3784, "step": 24090 }, { "epoch": 0.31305155747730423, "grad_norm": 0.38560348749160767, "learning_rate": 0.00013741795448342194, "loss": 1.4111, "step": 24091 }, { "epoch": 0.3130645520212201, "grad_norm": 0.4217059314250946, "learning_rate": 0.00013741535502151056, "loss": 1.5637, "step": 24092 }, { "epoch": 0.313077546565136, "grad_norm": 0.4886864125728607, "learning_rate": 0.00013741275555959916, "loss": 1.5556, "step": 24093 }, { "epoch": 0.3130905411090518, "grad_norm": 0.36298999190330505, "learning_rate": 0.00013741015609768778, "loss": 1.2336, "step": 24094 }, { "epoch": 0.3131035356529677, "grad_norm": 0.28728771209716797, "learning_rate": 0.0001374075566357764, "loss": 1.534, "step": 24095 }, { "epoch": 0.31311653019688357, "grad_norm": 0.4165332317352295, "learning_rate": 0.000137404957173865, "loss": 1.4454, "step": 24096 }, { "epoch": 0.31312952474079947, "grad_norm": 0.33897554874420166, "learning_rate": 0.00013740235771195363, "loss": 1.3528, "step": 24097 }, { "epoch": 0.3131425192847153, "grad_norm": 0.4142508804798126, "learning_rate": 0.00013739975825004225, "loss": 1.3996, "step": 24098 }, { "epoch": 0.3131555138286312, "grad_norm": 0.47479376196861267, "learning_rate": 0.00013739715878813088, "loss": 1.2737, "step": 24099 }, { "epoch": 0.31316850837254706, "grad_norm": 0.42113324999809265, "learning_rate": 0.00013739455932621947, "loss": 1.4923, "step": 24100 }, { "epoch": 0.31318150291646296, "grad_norm": 0.40646979212760925, "learning_rate": 0.00013739195986430807, "loss": 1.3435, "step": 24101 }, { "epoch": 0.3131944974603788, "grad_norm": 0.4228103756904602, "learning_rate": 0.00013738936040239672, "loss": 1.3386, "step": 24102 }, { "epoch": 0.3132074920042947, "grad_norm": 0.487327516078949, "learning_rate": 0.00013738676094048532, "loss": 1.3879, "step": 24103 }, { "epoch": 0.31322048654821055, "grad_norm": 0.33931151032447815, "learning_rate": 0.00013738416147857395, "loss": 1.4628, "step": 24104 }, { "epoch": 0.31323348109212645, "grad_norm": 0.4432222843170166, "learning_rate": 0.00013738156201666257, "loss": 1.5147, "step": 24105 }, { "epoch": 0.3132464756360423, "grad_norm": 0.41895508766174316, "learning_rate": 0.00013737896255475117, "loss": 1.5267, "step": 24106 }, { "epoch": 0.3132594701799582, "grad_norm": 0.3876771032810211, "learning_rate": 0.0001373763630928398, "loss": 1.4458, "step": 24107 }, { "epoch": 0.31327246472387404, "grad_norm": 0.33563360571861267, "learning_rate": 0.0001373737636309284, "loss": 1.3171, "step": 24108 }, { "epoch": 0.31328545926778995, "grad_norm": 0.5040119886398315, "learning_rate": 0.00013737116416901704, "loss": 1.468, "step": 24109 }, { "epoch": 0.3132984538117058, "grad_norm": 0.38198140263557434, "learning_rate": 0.00013736856470710564, "loss": 1.2265, "step": 24110 }, { "epoch": 0.3133114483556217, "grad_norm": 0.4412464499473572, "learning_rate": 0.00013736596524519426, "loss": 1.4326, "step": 24111 }, { "epoch": 0.31332444289953754, "grad_norm": 0.3524186909198761, "learning_rate": 0.00013736336578328286, "loss": 1.3943, "step": 24112 }, { "epoch": 0.31333743744345344, "grad_norm": 0.36579394340515137, "learning_rate": 0.00013736076632137148, "loss": 1.27, "step": 24113 }, { "epoch": 0.3133504319873693, "grad_norm": 0.46102070808410645, "learning_rate": 0.0001373581668594601, "loss": 1.3613, "step": 24114 }, { "epoch": 0.3133634265312852, "grad_norm": 0.3797166645526886, "learning_rate": 0.0001373555673975487, "loss": 1.4383, "step": 24115 }, { "epoch": 0.31337642107520103, "grad_norm": 0.3789106011390686, "learning_rate": 0.00013735296793563733, "loss": 1.2324, "step": 24116 }, { "epoch": 0.31338941561911693, "grad_norm": 0.3841444253921509, "learning_rate": 0.00013735036847372596, "loss": 1.4528, "step": 24117 }, { "epoch": 0.3134024101630328, "grad_norm": 0.38438114523887634, "learning_rate": 0.00013734776901181455, "loss": 1.316, "step": 24118 }, { "epoch": 0.3134154047069487, "grad_norm": 0.40486881136894226, "learning_rate": 0.00013734516954990318, "loss": 1.4839, "step": 24119 }, { "epoch": 0.3134283992508645, "grad_norm": 0.3532828986644745, "learning_rate": 0.00013734257008799177, "loss": 1.5072, "step": 24120 }, { "epoch": 0.3134413937947804, "grad_norm": 0.4030096232891083, "learning_rate": 0.00013733997062608043, "loss": 1.4009, "step": 24121 }, { "epoch": 0.31345438833869627, "grad_norm": 0.4326718747615814, "learning_rate": 0.00013733737116416902, "loss": 1.3561, "step": 24122 }, { "epoch": 0.31346738288261217, "grad_norm": 0.42302659153938293, "learning_rate": 0.00013733477170225765, "loss": 1.3948, "step": 24123 }, { "epoch": 0.313480377426528, "grad_norm": 0.3909808397293091, "learning_rate": 0.00013733217224034625, "loss": 1.1477, "step": 24124 }, { "epoch": 0.3134933719704439, "grad_norm": 0.3916614353656769, "learning_rate": 0.00013732957277843487, "loss": 1.3368, "step": 24125 }, { "epoch": 0.31350636651435976, "grad_norm": 0.47921115159988403, "learning_rate": 0.0001373269733165235, "loss": 1.6167, "step": 24126 }, { "epoch": 0.31351936105827566, "grad_norm": 0.4598756432533264, "learning_rate": 0.0001373243738546121, "loss": 1.4836, "step": 24127 }, { "epoch": 0.3135323556021915, "grad_norm": 0.29184019565582275, "learning_rate": 0.00013732177439270072, "loss": 1.4215, "step": 24128 }, { "epoch": 0.3135453501461074, "grad_norm": 0.3766686022281647, "learning_rate": 0.00013731917493078934, "loss": 1.4199, "step": 24129 }, { "epoch": 0.31355834469002325, "grad_norm": 0.4528316259384155, "learning_rate": 0.00013731657546887794, "loss": 1.3668, "step": 24130 }, { "epoch": 0.31357133923393915, "grad_norm": 0.3740106225013733, "learning_rate": 0.00013731397600696656, "loss": 1.2701, "step": 24131 }, { "epoch": 0.313584333777855, "grad_norm": 0.4456227123737335, "learning_rate": 0.00013731137654505516, "loss": 1.2809, "step": 24132 }, { "epoch": 0.3135973283217709, "grad_norm": 0.3641541302204132, "learning_rate": 0.0001373087770831438, "loss": 1.5589, "step": 24133 }, { "epoch": 0.3136103228656868, "grad_norm": 0.4384995102882385, "learning_rate": 0.0001373061776212324, "loss": 1.5314, "step": 24134 }, { "epoch": 0.31362331740960264, "grad_norm": 0.3754170536994934, "learning_rate": 0.00013730357815932103, "loss": 1.4484, "step": 24135 }, { "epoch": 0.31363631195351854, "grad_norm": 0.4135434329509735, "learning_rate": 0.00013730097869740963, "loss": 1.3965, "step": 24136 }, { "epoch": 0.3136493064974344, "grad_norm": 0.42548951506614685, "learning_rate": 0.00013729837923549826, "loss": 1.4212, "step": 24137 }, { "epoch": 0.3136623010413503, "grad_norm": 0.3537392020225525, "learning_rate": 0.00013729577977358688, "loss": 1.2549, "step": 24138 }, { "epoch": 0.31367529558526613, "grad_norm": 0.4996125102043152, "learning_rate": 0.00013729318031167548, "loss": 1.3863, "step": 24139 }, { "epoch": 0.31368829012918203, "grad_norm": 0.34995129704475403, "learning_rate": 0.0001372905808497641, "loss": 1.265, "step": 24140 }, { "epoch": 0.3137012846730979, "grad_norm": 0.31927287578582764, "learning_rate": 0.00013728798138785273, "loss": 1.4223, "step": 24141 }, { "epoch": 0.3137142792170138, "grad_norm": 0.3679982125759125, "learning_rate": 0.00013728538192594132, "loss": 1.4303, "step": 24142 }, { "epoch": 0.3137272737609296, "grad_norm": 0.3511558473110199, "learning_rate": 0.00013728278246402995, "loss": 1.4301, "step": 24143 }, { "epoch": 0.3137402683048455, "grad_norm": 0.2987215518951416, "learning_rate": 0.00013728018300211857, "loss": 1.4566, "step": 24144 }, { "epoch": 0.31375326284876137, "grad_norm": 0.42606785893440247, "learning_rate": 0.0001372775835402072, "loss": 1.5058, "step": 24145 }, { "epoch": 0.3137662573926773, "grad_norm": 0.3913020193576813, "learning_rate": 0.0001372749840782958, "loss": 1.5466, "step": 24146 }, { "epoch": 0.3137792519365931, "grad_norm": 0.38378703594207764, "learning_rate": 0.00013727238461638442, "loss": 1.3251, "step": 24147 }, { "epoch": 0.313792246480509, "grad_norm": 0.45694097876548767, "learning_rate": 0.00013726978515447304, "loss": 1.4177, "step": 24148 }, { "epoch": 0.31380524102442486, "grad_norm": 0.3502528667449951, "learning_rate": 0.00013726718569256164, "loss": 1.3085, "step": 24149 }, { "epoch": 0.31381823556834076, "grad_norm": 0.41781380772590637, "learning_rate": 0.00013726458623065027, "loss": 1.4455, "step": 24150 }, { "epoch": 0.3138312301122566, "grad_norm": 0.2427089363336563, "learning_rate": 0.00013726198676873886, "loss": 1.1942, "step": 24151 }, { "epoch": 0.3138442246561725, "grad_norm": 0.3682045638561249, "learning_rate": 0.00013725938730682751, "loss": 1.4915, "step": 24152 }, { "epoch": 0.31385721920008836, "grad_norm": 0.3806772232055664, "learning_rate": 0.0001372567878449161, "loss": 1.4897, "step": 24153 }, { "epoch": 0.31387021374400426, "grad_norm": 0.4181349277496338, "learning_rate": 0.00013725418838300474, "loss": 1.51, "step": 24154 }, { "epoch": 0.3138832082879201, "grad_norm": 0.40033575892448425, "learning_rate": 0.00013725158892109333, "loss": 1.5686, "step": 24155 }, { "epoch": 0.313896202831836, "grad_norm": 0.382345587015152, "learning_rate": 0.00013724898945918196, "loss": 1.3563, "step": 24156 }, { "epoch": 0.31390919737575185, "grad_norm": 0.36429542303085327, "learning_rate": 0.00013724638999727058, "loss": 1.5089, "step": 24157 }, { "epoch": 0.31392219191966775, "grad_norm": 0.35979387164115906, "learning_rate": 0.00013724379053535918, "loss": 1.3182, "step": 24158 }, { "epoch": 0.3139351864635836, "grad_norm": 0.39158037304878235, "learning_rate": 0.0001372411910734478, "loss": 1.4904, "step": 24159 }, { "epoch": 0.3139481810074995, "grad_norm": 0.421166330575943, "learning_rate": 0.00013723859161153643, "loss": 1.5244, "step": 24160 }, { "epoch": 0.31396117555141534, "grad_norm": 0.35423725843429565, "learning_rate": 0.00013723599214962503, "loss": 1.4312, "step": 24161 }, { "epoch": 0.31397417009533124, "grad_norm": 0.404064416885376, "learning_rate": 0.00013723339268771365, "loss": 1.4624, "step": 24162 }, { "epoch": 0.3139871646392471, "grad_norm": 0.5315631031990051, "learning_rate": 0.00013723079322580225, "loss": 1.5876, "step": 24163 }, { "epoch": 0.314000159183163, "grad_norm": 0.37470829486846924, "learning_rate": 0.0001372281937638909, "loss": 1.3917, "step": 24164 }, { "epoch": 0.31401315372707883, "grad_norm": 0.3997611403465271, "learning_rate": 0.0001372255943019795, "loss": 1.3947, "step": 24165 }, { "epoch": 0.31402614827099473, "grad_norm": 0.4182421863079071, "learning_rate": 0.00013722299484006812, "loss": 1.4538, "step": 24166 }, { "epoch": 0.3140391428149106, "grad_norm": 0.37487128376960754, "learning_rate": 0.00013722039537815672, "loss": 1.3848, "step": 24167 }, { "epoch": 0.3140521373588265, "grad_norm": 0.39135023951530457, "learning_rate": 0.00013721779591624534, "loss": 1.332, "step": 24168 }, { "epoch": 0.3140651319027423, "grad_norm": 0.3737618029117584, "learning_rate": 0.00013721519645433397, "loss": 1.2217, "step": 24169 }, { "epoch": 0.3140781264466582, "grad_norm": 0.30085426568984985, "learning_rate": 0.00013721259699242257, "loss": 1.5156, "step": 24170 }, { "epoch": 0.31409112099057407, "grad_norm": 0.349826842546463, "learning_rate": 0.0001372099975305112, "loss": 1.5919, "step": 24171 }, { "epoch": 0.31410411553448997, "grad_norm": 0.4013342559337616, "learning_rate": 0.00013720739806859981, "loss": 1.5131, "step": 24172 }, { "epoch": 0.3141171100784058, "grad_norm": 0.45974043011665344, "learning_rate": 0.0001372047986066884, "loss": 1.3831, "step": 24173 }, { "epoch": 0.3141301046223217, "grad_norm": 0.4515654742717743, "learning_rate": 0.00013720219914477704, "loss": 1.4738, "step": 24174 }, { "epoch": 0.31414309916623756, "grad_norm": 0.3971514403820038, "learning_rate": 0.00013719959968286563, "loss": 1.331, "step": 24175 }, { "epoch": 0.31415609371015346, "grad_norm": 0.45709437131881714, "learning_rate": 0.00013719700022095428, "loss": 1.4114, "step": 24176 }, { "epoch": 0.3141690882540693, "grad_norm": 0.36304983496665955, "learning_rate": 0.00013719440075904288, "loss": 1.29, "step": 24177 }, { "epoch": 0.3141820827979852, "grad_norm": 0.3682185411453247, "learning_rate": 0.0001371918012971315, "loss": 1.3481, "step": 24178 }, { "epoch": 0.31419507734190105, "grad_norm": 0.47754621505737305, "learning_rate": 0.00013718920183522013, "loss": 1.2722, "step": 24179 }, { "epoch": 0.31420807188581695, "grad_norm": 0.4243612587451935, "learning_rate": 0.00013718660237330873, "loss": 1.3513, "step": 24180 }, { "epoch": 0.3142210664297328, "grad_norm": 0.381763219833374, "learning_rate": 0.00013718400291139735, "loss": 1.4275, "step": 24181 }, { "epoch": 0.3142340609736487, "grad_norm": 0.42432597279548645, "learning_rate": 0.00013718140344948595, "loss": 1.4977, "step": 24182 }, { "epoch": 0.31424705551756454, "grad_norm": 0.4389258921146393, "learning_rate": 0.0001371788039875746, "loss": 1.5341, "step": 24183 }, { "epoch": 0.31426005006148044, "grad_norm": 0.33113405108451843, "learning_rate": 0.0001371762045256632, "loss": 1.4842, "step": 24184 }, { "epoch": 0.3142730446053963, "grad_norm": 0.34596988558769226, "learning_rate": 0.0001371736050637518, "loss": 1.3565, "step": 24185 }, { "epoch": 0.3142860391493122, "grad_norm": 0.39609968662261963, "learning_rate": 0.00013717100560184042, "loss": 1.4609, "step": 24186 }, { "epoch": 0.31429903369322804, "grad_norm": 0.3026363253593445, "learning_rate": 0.00013716840613992905, "loss": 1.1944, "step": 24187 }, { "epoch": 0.31431202823714394, "grad_norm": 0.41935089230537415, "learning_rate": 0.00013716580667801767, "loss": 1.4014, "step": 24188 }, { "epoch": 0.3143250227810598, "grad_norm": 0.40155860781669617, "learning_rate": 0.00013716320721610627, "loss": 1.4568, "step": 24189 }, { "epoch": 0.3143380173249757, "grad_norm": 0.4086877703666687, "learning_rate": 0.0001371606077541949, "loss": 1.4024, "step": 24190 }, { "epoch": 0.31435101186889153, "grad_norm": 0.4069059193134308, "learning_rate": 0.00013715800829228352, "loss": 1.4331, "step": 24191 }, { "epoch": 0.31436400641280743, "grad_norm": 0.37308403849601746, "learning_rate": 0.00013715540883037211, "loss": 1.4, "step": 24192 }, { "epoch": 0.3143770009567233, "grad_norm": 0.38528764247894287, "learning_rate": 0.00013715280936846074, "loss": 1.2503, "step": 24193 }, { "epoch": 0.3143899955006392, "grad_norm": 0.4985155165195465, "learning_rate": 0.00013715020990654934, "loss": 1.4655, "step": 24194 }, { "epoch": 0.314402990044555, "grad_norm": 0.34910356998443604, "learning_rate": 0.000137147610444638, "loss": 1.2278, "step": 24195 }, { "epoch": 0.3144159845884709, "grad_norm": 0.3683018982410431, "learning_rate": 0.00013714501098272658, "loss": 1.4835, "step": 24196 }, { "epoch": 0.31442897913238677, "grad_norm": 0.43597593903541565, "learning_rate": 0.00013714241152081518, "loss": 1.3774, "step": 24197 }, { "epoch": 0.31444197367630267, "grad_norm": 0.3188539743423462, "learning_rate": 0.0001371398120589038, "loss": 1.4925, "step": 24198 }, { "epoch": 0.3144549682202185, "grad_norm": 0.4241398274898529, "learning_rate": 0.00013713721259699243, "loss": 1.246, "step": 24199 }, { "epoch": 0.3144679627641344, "grad_norm": 0.5855793356895447, "learning_rate": 0.00013713461313508106, "loss": 1.5874, "step": 24200 }, { "epoch": 0.31448095730805026, "grad_norm": 0.2817264795303345, "learning_rate": 0.00013713201367316965, "loss": 1.2687, "step": 24201 }, { "epoch": 0.31449395185196616, "grad_norm": 0.41730374097824097, "learning_rate": 0.00013712941421125828, "loss": 1.562, "step": 24202 }, { "epoch": 0.314506946395882, "grad_norm": 0.4404489994049072, "learning_rate": 0.0001371268147493469, "loss": 1.527, "step": 24203 }, { "epoch": 0.3145199409397979, "grad_norm": 0.35299018025398254, "learning_rate": 0.0001371242152874355, "loss": 1.4261, "step": 24204 }, { "epoch": 0.31453293548371375, "grad_norm": 0.37917932868003845, "learning_rate": 0.00013712161582552412, "loss": 1.4166, "step": 24205 }, { "epoch": 0.31454593002762965, "grad_norm": 0.42596685886383057, "learning_rate": 0.00013711901636361272, "loss": 1.4279, "step": 24206 }, { "epoch": 0.3145589245715455, "grad_norm": 0.3967914879322052, "learning_rate": 0.00013711641690170137, "loss": 1.3899, "step": 24207 }, { "epoch": 0.3145719191154614, "grad_norm": 0.4533579349517822, "learning_rate": 0.00013711381743978997, "loss": 1.5057, "step": 24208 }, { "epoch": 0.3145849136593773, "grad_norm": 0.36241182684898376, "learning_rate": 0.00013711121797787857, "loss": 1.1778, "step": 24209 }, { "epoch": 0.31459790820329314, "grad_norm": 0.3608736991882324, "learning_rate": 0.0001371086185159672, "loss": 1.3887, "step": 24210 }, { "epoch": 0.31461090274720904, "grad_norm": 0.29960396885871887, "learning_rate": 0.00013710601905405582, "loss": 1.1806, "step": 24211 }, { "epoch": 0.3146238972911249, "grad_norm": 0.438826322555542, "learning_rate": 0.00013710341959214444, "loss": 1.4569, "step": 24212 }, { "epoch": 0.3146368918350408, "grad_norm": 0.35067325830459595, "learning_rate": 0.00013710082013023304, "loss": 1.4293, "step": 24213 }, { "epoch": 0.31464988637895663, "grad_norm": 0.495954304933548, "learning_rate": 0.00013709822066832166, "loss": 1.6212, "step": 24214 }, { "epoch": 0.31466288092287253, "grad_norm": 0.3596116900444031, "learning_rate": 0.0001370956212064103, "loss": 1.4736, "step": 24215 }, { "epoch": 0.3146758754667884, "grad_norm": 0.3464513421058655, "learning_rate": 0.00013709302174449888, "loss": 1.5765, "step": 24216 }, { "epoch": 0.3146888700107043, "grad_norm": 0.36833566427230835, "learning_rate": 0.0001370904222825875, "loss": 1.5671, "step": 24217 }, { "epoch": 0.3147018645546201, "grad_norm": 0.497585654258728, "learning_rate": 0.00013708782282067613, "loss": 1.3884, "step": 24218 }, { "epoch": 0.314714859098536, "grad_norm": 0.4037121832370758, "learning_rate": 0.00013708522335876476, "loss": 1.5154, "step": 24219 }, { "epoch": 0.31472785364245187, "grad_norm": 0.5084320902824402, "learning_rate": 0.00013708262389685336, "loss": 1.5147, "step": 24220 }, { "epoch": 0.31474084818636777, "grad_norm": 0.35329383611679077, "learning_rate": 0.00013708002443494198, "loss": 1.3351, "step": 24221 }, { "epoch": 0.3147538427302836, "grad_norm": 0.4006134867668152, "learning_rate": 0.0001370774249730306, "loss": 1.309, "step": 24222 }, { "epoch": 0.3147668372741995, "grad_norm": 0.45675128698349, "learning_rate": 0.0001370748255111192, "loss": 1.445, "step": 24223 }, { "epoch": 0.31477983181811536, "grad_norm": 0.4189942181110382, "learning_rate": 0.00013707222604920783, "loss": 1.4819, "step": 24224 }, { "epoch": 0.31479282636203126, "grad_norm": 0.444923460483551, "learning_rate": 0.00013706962658729642, "loss": 1.4941, "step": 24225 }, { "epoch": 0.3148058209059471, "grad_norm": 0.398495078086853, "learning_rate": 0.00013706702712538505, "loss": 1.3008, "step": 24226 }, { "epoch": 0.314818815449863, "grad_norm": 0.34429240226745605, "learning_rate": 0.00013706442766347367, "loss": 1.3207, "step": 24227 }, { "epoch": 0.31483180999377886, "grad_norm": 0.4360469579696655, "learning_rate": 0.00013706182820156227, "loss": 1.4586, "step": 24228 }, { "epoch": 0.31484480453769476, "grad_norm": 0.3913939297199249, "learning_rate": 0.0001370592287396509, "loss": 1.3652, "step": 24229 }, { "epoch": 0.3148577990816106, "grad_norm": 0.4482981264591217, "learning_rate": 0.00013705662927773952, "loss": 1.5234, "step": 24230 }, { "epoch": 0.3148707936255265, "grad_norm": 0.3716791868209839, "learning_rate": 0.00013705402981582814, "loss": 1.4302, "step": 24231 }, { "epoch": 0.31488378816944235, "grad_norm": 0.43635624647140503, "learning_rate": 0.00013705143035391674, "loss": 1.3219, "step": 24232 }, { "epoch": 0.31489678271335825, "grad_norm": 0.3972419202327728, "learning_rate": 0.00013704883089200537, "loss": 1.4784, "step": 24233 }, { "epoch": 0.3149097772572741, "grad_norm": 0.45062461495399475, "learning_rate": 0.000137046231430094, "loss": 1.4234, "step": 24234 }, { "epoch": 0.31492277180119, "grad_norm": 0.45785340666770935, "learning_rate": 0.0001370436319681826, "loss": 1.3391, "step": 24235 }, { "epoch": 0.31493576634510584, "grad_norm": 0.42377781867980957, "learning_rate": 0.0001370410325062712, "loss": 1.4262, "step": 24236 }, { "epoch": 0.31494876088902174, "grad_norm": 0.5246242880821228, "learning_rate": 0.0001370384330443598, "loss": 1.5391, "step": 24237 }, { "epoch": 0.3149617554329376, "grad_norm": 0.4232649505138397, "learning_rate": 0.00013703583358244846, "loss": 1.41, "step": 24238 }, { "epoch": 0.3149747499768535, "grad_norm": 0.3410038352012634, "learning_rate": 0.00013703323412053706, "loss": 1.0745, "step": 24239 }, { "epoch": 0.31498774452076933, "grad_norm": 0.4006120264530182, "learning_rate": 0.00013703063465862566, "loss": 1.4459, "step": 24240 }, { "epoch": 0.31500073906468523, "grad_norm": 0.45589473843574524, "learning_rate": 0.00013702803519671428, "loss": 1.3432, "step": 24241 }, { "epoch": 0.3150137336086011, "grad_norm": 0.34500938653945923, "learning_rate": 0.0001370254357348029, "loss": 1.3072, "step": 24242 }, { "epoch": 0.315026728152517, "grad_norm": 0.3783598840236664, "learning_rate": 0.00013702283627289153, "loss": 1.2806, "step": 24243 }, { "epoch": 0.3150397226964328, "grad_norm": 0.4570356011390686, "learning_rate": 0.00013702023681098013, "loss": 1.5026, "step": 24244 }, { "epoch": 0.3150527172403487, "grad_norm": 0.42782655358314514, "learning_rate": 0.00013701763734906875, "loss": 1.4194, "step": 24245 }, { "epoch": 0.31506571178426457, "grad_norm": 0.3588072657585144, "learning_rate": 0.00013701503788715738, "loss": 1.4905, "step": 24246 }, { "epoch": 0.31507870632818047, "grad_norm": 0.37493863701820374, "learning_rate": 0.00013701243842524597, "loss": 1.3526, "step": 24247 }, { "epoch": 0.3150917008720963, "grad_norm": 0.3687364161014557, "learning_rate": 0.0001370098389633346, "loss": 1.1768, "step": 24248 }, { "epoch": 0.3151046954160122, "grad_norm": 0.3847581148147583, "learning_rate": 0.0001370072395014232, "loss": 1.3121, "step": 24249 }, { "epoch": 0.31511768995992806, "grad_norm": 0.4969902038574219, "learning_rate": 0.00013700464003951185, "loss": 1.5232, "step": 24250 }, { "epoch": 0.31513068450384396, "grad_norm": 0.3823295533657074, "learning_rate": 0.00013700204057760044, "loss": 1.3681, "step": 24251 }, { "epoch": 0.3151436790477598, "grad_norm": 0.5384612679481506, "learning_rate": 0.00013699944111568904, "loss": 1.5687, "step": 24252 }, { "epoch": 0.3151566735916757, "grad_norm": 0.45925915241241455, "learning_rate": 0.0001369968416537777, "loss": 1.4197, "step": 24253 }, { "epoch": 0.31516966813559155, "grad_norm": 0.33666902780532837, "learning_rate": 0.0001369942421918663, "loss": 1.5279, "step": 24254 }, { "epoch": 0.31518266267950745, "grad_norm": 0.3572155833244324, "learning_rate": 0.00013699164272995491, "loss": 1.4185, "step": 24255 }, { "epoch": 0.3151956572234233, "grad_norm": 0.36339375376701355, "learning_rate": 0.0001369890432680435, "loss": 1.345, "step": 24256 }, { "epoch": 0.3152086517673392, "grad_norm": 0.4763093888759613, "learning_rate": 0.00013698644380613214, "loss": 1.495, "step": 24257 }, { "epoch": 0.31522164631125504, "grad_norm": 0.46020177006721497, "learning_rate": 0.00013698384434422076, "loss": 1.5206, "step": 24258 }, { "epoch": 0.31523464085517094, "grad_norm": 0.36117589473724365, "learning_rate": 0.00013698124488230936, "loss": 1.2139, "step": 24259 }, { "epoch": 0.3152476353990868, "grad_norm": 0.3548513352870941, "learning_rate": 0.00013697864542039798, "loss": 1.3241, "step": 24260 }, { "epoch": 0.3152606299430027, "grad_norm": 0.36290788650512695, "learning_rate": 0.0001369760459584866, "loss": 1.3601, "step": 24261 }, { "epoch": 0.31527362448691854, "grad_norm": 0.31978094577789307, "learning_rate": 0.00013697344649657523, "loss": 1.3904, "step": 24262 }, { "epoch": 0.31528661903083444, "grad_norm": 0.4334924519062042, "learning_rate": 0.00013697084703466383, "loss": 1.4669, "step": 24263 }, { "epoch": 0.3152996135747503, "grad_norm": 0.3809143900871277, "learning_rate": 0.00013696824757275243, "loss": 1.5479, "step": 24264 }, { "epoch": 0.3153126081186662, "grad_norm": 0.39382120966911316, "learning_rate": 0.00013696564811084108, "loss": 1.4626, "step": 24265 }, { "epoch": 0.315325602662582, "grad_norm": 0.347513347864151, "learning_rate": 0.00013696304864892968, "loss": 1.3503, "step": 24266 }, { "epoch": 0.31533859720649793, "grad_norm": 0.3870210647583008, "learning_rate": 0.0001369604491870183, "loss": 1.6351, "step": 24267 }, { "epoch": 0.3153515917504138, "grad_norm": 0.4059775769710541, "learning_rate": 0.0001369578497251069, "loss": 1.271, "step": 24268 }, { "epoch": 0.3153645862943297, "grad_norm": 0.4513508677482605, "learning_rate": 0.00013695525026319552, "loss": 1.4766, "step": 24269 }, { "epoch": 0.3153775808382455, "grad_norm": 0.45753729343414307, "learning_rate": 0.00013695265080128415, "loss": 1.4137, "step": 24270 }, { "epoch": 0.3153905753821614, "grad_norm": 0.39034897089004517, "learning_rate": 0.00013695005133937274, "loss": 1.3052, "step": 24271 }, { "epoch": 0.31540356992607727, "grad_norm": 0.38706445693969727, "learning_rate": 0.00013694745187746137, "loss": 1.5688, "step": 24272 }, { "epoch": 0.31541656446999317, "grad_norm": 0.42655348777770996, "learning_rate": 0.00013694485241555, "loss": 1.3109, "step": 24273 }, { "epoch": 0.315429559013909, "grad_norm": 0.3179061710834503, "learning_rate": 0.00013694225295363862, "loss": 1.3148, "step": 24274 }, { "epoch": 0.3154425535578249, "grad_norm": 0.37439557909965515, "learning_rate": 0.00013693965349172721, "loss": 1.4902, "step": 24275 }, { "epoch": 0.31545554810174076, "grad_norm": 0.3858317732810974, "learning_rate": 0.00013693705402981584, "loss": 1.3114, "step": 24276 }, { "epoch": 0.31546854264565666, "grad_norm": 0.3992762565612793, "learning_rate": 0.00013693445456790446, "loss": 1.4296, "step": 24277 }, { "epoch": 0.3154815371895725, "grad_norm": 0.2564488649368286, "learning_rate": 0.00013693185510599306, "loss": 1.1431, "step": 24278 }, { "epoch": 0.3154945317334884, "grad_norm": 0.36163845658302307, "learning_rate": 0.00013692925564408169, "loss": 1.3885, "step": 24279 }, { "epoch": 0.31550752627740425, "grad_norm": 0.3678258955478668, "learning_rate": 0.00013692665618217028, "loss": 1.443, "step": 24280 }, { "epoch": 0.31552052082132015, "grad_norm": 0.40115487575531006, "learning_rate": 0.0001369240567202589, "loss": 1.502, "step": 24281 }, { "epoch": 0.315533515365236, "grad_norm": 0.3350605070590973, "learning_rate": 0.00013692145725834753, "loss": 1.2839, "step": 24282 }, { "epoch": 0.3155465099091519, "grad_norm": 0.31742534041404724, "learning_rate": 0.00013691885779643613, "loss": 1.3904, "step": 24283 }, { "epoch": 0.31555950445306774, "grad_norm": 0.44456052780151367, "learning_rate": 0.00013691625833452475, "loss": 1.5256, "step": 24284 }, { "epoch": 0.31557249899698364, "grad_norm": 0.36216914653778076, "learning_rate": 0.00013691365887261338, "loss": 1.4848, "step": 24285 }, { "epoch": 0.31558549354089954, "grad_norm": 0.4361882209777832, "learning_rate": 0.000136911059410702, "loss": 1.3248, "step": 24286 }, { "epoch": 0.3155984880848154, "grad_norm": 0.47620388865470886, "learning_rate": 0.0001369084599487906, "loss": 1.5696, "step": 24287 }, { "epoch": 0.3156114826287313, "grad_norm": 0.42277124524116516, "learning_rate": 0.00013690586048687922, "loss": 1.3461, "step": 24288 }, { "epoch": 0.31562447717264713, "grad_norm": 0.3535692095756531, "learning_rate": 0.00013690326102496785, "loss": 1.3456, "step": 24289 }, { "epoch": 0.31563747171656303, "grad_norm": 0.4585002660751343, "learning_rate": 0.00013690066156305645, "loss": 1.5086, "step": 24290 }, { "epoch": 0.3156504662604789, "grad_norm": 0.2959149181842804, "learning_rate": 0.00013689806210114507, "loss": 1.4206, "step": 24291 }, { "epoch": 0.3156634608043948, "grad_norm": 0.44144290685653687, "learning_rate": 0.0001368954626392337, "loss": 1.3529, "step": 24292 }, { "epoch": 0.3156764553483106, "grad_norm": 0.43028753995895386, "learning_rate": 0.0001368928631773223, "loss": 1.495, "step": 24293 }, { "epoch": 0.3156894498922265, "grad_norm": 0.3719899356365204, "learning_rate": 0.00013689026371541092, "loss": 1.5469, "step": 24294 }, { "epoch": 0.31570244443614237, "grad_norm": 0.34897610545158386, "learning_rate": 0.00013688766425349951, "loss": 1.2544, "step": 24295 }, { "epoch": 0.31571543898005827, "grad_norm": 0.39846864342689514, "learning_rate": 0.00013688506479158817, "loss": 1.4804, "step": 24296 }, { "epoch": 0.3157284335239741, "grad_norm": 0.4127960801124573, "learning_rate": 0.00013688246532967676, "loss": 1.4743, "step": 24297 }, { "epoch": 0.31574142806789, "grad_norm": 0.37189939618110657, "learning_rate": 0.0001368798658677654, "loss": 1.426, "step": 24298 }, { "epoch": 0.31575442261180586, "grad_norm": 0.39628031849861145, "learning_rate": 0.00013687726640585399, "loss": 1.4488, "step": 24299 }, { "epoch": 0.31576741715572176, "grad_norm": 0.4661518335342407, "learning_rate": 0.0001368746669439426, "loss": 1.5333, "step": 24300 }, { "epoch": 0.3157804116996376, "grad_norm": 0.37219560146331787, "learning_rate": 0.00013687206748203123, "loss": 1.4406, "step": 24301 }, { "epoch": 0.3157934062435535, "grad_norm": 0.3905365765094757, "learning_rate": 0.00013686946802011983, "loss": 1.3698, "step": 24302 }, { "epoch": 0.31580640078746935, "grad_norm": 0.5504800081253052, "learning_rate": 0.00013686686855820846, "loss": 1.4798, "step": 24303 }, { "epoch": 0.31581939533138526, "grad_norm": 0.4288892447948456, "learning_rate": 0.00013686426909629708, "loss": 1.4789, "step": 24304 }, { "epoch": 0.3158323898753011, "grad_norm": 0.4315957725048065, "learning_rate": 0.0001368616696343857, "loss": 1.6025, "step": 24305 }, { "epoch": 0.315845384419217, "grad_norm": 0.48499569296836853, "learning_rate": 0.0001368590701724743, "loss": 1.4618, "step": 24306 }, { "epoch": 0.31585837896313285, "grad_norm": 0.39639511704444885, "learning_rate": 0.0001368564707105629, "loss": 1.7133, "step": 24307 }, { "epoch": 0.31587137350704875, "grad_norm": 0.41407209634780884, "learning_rate": 0.00013685387124865155, "loss": 1.5126, "step": 24308 }, { "epoch": 0.3158843680509646, "grad_norm": 0.32924962043762207, "learning_rate": 0.00013685127178674015, "loss": 1.3487, "step": 24309 }, { "epoch": 0.3158973625948805, "grad_norm": 0.3358137905597687, "learning_rate": 0.00013684867232482877, "loss": 1.5968, "step": 24310 }, { "epoch": 0.31591035713879634, "grad_norm": 0.5075027942657471, "learning_rate": 0.00013684607286291737, "loss": 1.6422, "step": 24311 }, { "epoch": 0.31592335168271224, "grad_norm": 0.5027745366096497, "learning_rate": 0.000136843473401006, "loss": 1.3337, "step": 24312 }, { "epoch": 0.3159363462266281, "grad_norm": 0.42853492498397827, "learning_rate": 0.00013684087393909462, "loss": 1.2563, "step": 24313 }, { "epoch": 0.315949340770544, "grad_norm": 0.3679285943508148, "learning_rate": 0.00013683827447718322, "loss": 1.3319, "step": 24314 }, { "epoch": 0.31596233531445983, "grad_norm": 0.30248886346817017, "learning_rate": 0.00013683567501527184, "loss": 1.4425, "step": 24315 }, { "epoch": 0.31597532985837573, "grad_norm": 0.3021405339241028, "learning_rate": 0.00013683307555336047, "loss": 1.1713, "step": 24316 }, { "epoch": 0.3159883244022916, "grad_norm": 0.4111071527004242, "learning_rate": 0.0001368304760914491, "loss": 1.3056, "step": 24317 }, { "epoch": 0.3160013189462075, "grad_norm": 0.40387460589408875, "learning_rate": 0.0001368278766295377, "loss": 1.3966, "step": 24318 }, { "epoch": 0.3160143134901233, "grad_norm": 0.42876961827278137, "learning_rate": 0.00013682527716762629, "loss": 1.4523, "step": 24319 }, { "epoch": 0.3160273080340392, "grad_norm": 0.4366833567619324, "learning_rate": 0.00013682267770571494, "loss": 1.3512, "step": 24320 }, { "epoch": 0.31604030257795507, "grad_norm": 0.2831141948699951, "learning_rate": 0.00013682007824380353, "loss": 1.4583, "step": 24321 }, { "epoch": 0.31605329712187097, "grad_norm": 0.5409504175186157, "learning_rate": 0.00013681747878189216, "loss": 1.5404, "step": 24322 }, { "epoch": 0.3160662916657868, "grad_norm": 0.30610910058021545, "learning_rate": 0.00013681487931998076, "loss": 1.3331, "step": 24323 }, { "epoch": 0.3160792862097027, "grad_norm": 0.40455469489097595, "learning_rate": 0.00013681227985806938, "loss": 1.6068, "step": 24324 }, { "epoch": 0.31609228075361856, "grad_norm": 0.3958268463611603, "learning_rate": 0.000136809680396158, "loss": 1.3528, "step": 24325 }, { "epoch": 0.31610527529753446, "grad_norm": 0.4272357225418091, "learning_rate": 0.0001368070809342466, "loss": 1.2241, "step": 24326 }, { "epoch": 0.3161182698414503, "grad_norm": 0.40770307183265686, "learning_rate": 0.00013680448147233525, "loss": 1.3467, "step": 24327 }, { "epoch": 0.3161312643853662, "grad_norm": 0.35952791571617126, "learning_rate": 0.00013680188201042385, "loss": 1.0945, "step": 24328 }, { "epoch": 0.31614425892928205, "grad_norm": 0.4818912148475647, "learning_rate": 0.00013679928254851248, "loss": 1.3933, "step": 24329 }, { "epoch": 0.31615725347319795, "grad_norm": 0.34593772888183594, "learning_rate": 0.00013679668308660107, "loss": 1.2958, "step": 24330 }, { "epoch": 0.3161702480171138, "grad_norm": 0.37886014580726624, "learning_rate": 0.0001367940836246897, "loss": 1.5966, "step": 24331 }, { "epoch": 0.3161832425610297, "grad_norm": 0.42048585414886475, "learning_rate": 0.00013679148416277832, "loss": 1.4293, "step": 24332 }, { "epoch": 0.31619623710494554, "grad_norm": 0.4240248501300812, "learning_rate": 0.00013678888470086692, "loss": 1.45, "step": 24333 }, { "epoch": 0.31620923164886144, "grad_norm": 0.3015948534011841, "learning_rate": 0.00013678628523895554, "loss": 1.1447, "step": 24334 }, { "epoch": 0.3162222261927773, "grad_norm": 0.3940720558166504, "learning_rate": 0.00013678368577704417, "loss": 1.4425, "step": 24335 }, { "epoch": 0.3162352207366932, "grad_norm": 0.43135395646095276, "learning_rate": 0.00013678108631513277, "loss": 1.3101, "step": 24336 }, { "epoch": 0.31624821528060904, "grad_norm": 0.3022572100162506, "learning_rate": 0.0001367784868532214, "loss": 1.4673, "step": 24337 }, { "epoch": 0.31626120982452494, "grad_norm": 0.3595219552516937, "learning_rate": 0.00013677588739131, "loss": 1.4218, "step": 24338 }, { "epoch": 0.3162742043684408, "grad_norm": 0.3659757673740387, "learning_rate": 0.00013677328792939864, "loss": 1.2944, "step": 24339 }, { "epoch": 0.3162871989123567, "grad_norm": 0.4848015308380127, "learning_rate": 0.00013677068846748724, "loss": 1.4138, "step": 24340 }, { "epoch": 0.3163001934562725, "grad_norm": 0.38484156131744385, "learning_rate": 0.00013676808900557586, "loss": 1.605, "step": 24341 }, { "epoch": 0.3163131880001884, "grad_norm": 0.40679213404655457, "learning_rate": 0.00013676548954366446, "loss": 1.4427, "step": 24342 }, { "epoch": 0.3163261825441043, "grad_norm": 0.3918675184249878, "learning_rate": 0.00013676289008175308, "loss": 1.3539, "step": 24343 }, { "epoch": 0.3163391770880202, "grad_norm": 0.40854018926620483, "learning_rate": 0.0001367602906198417, "loss": 1.4043, "step": 24344 }, { "epoch": 0.316352171631936, "grad_norm": 0.5385652184486389, "learning_rate": 0.0001367576911579303, "loss": 1.3421, "step": 24345 }, { "epoch": 0.3163651661758519, "grad_norm": 0.41769295930862427, "learning_rate": 0.00013675509169601893, "loss": 1.4397, "step": 24346 }, { "epoch": 0.31637816071976776, "grad_norm": 0.46131348609924316, "learning_rate": 0.00013675249223410755, "loss": 1.2472, "step": 24347 }, { "epoch": 0.31639115526368367, "grad_norm": 0.3219773471355438, "learning_rate": 0.00013674989277219615, "loss": 1.246, "step": 24348 }, { "epoch": 0.3164041498075995, "grad_norm": 0.4026868939399719, "learning_rate": 0.00013674729331028478, "loss": 1.4756, "step": 24349 }, { "epoch": 0.3164171443515154, "grad_norm": 0.4557186961174011, "learning_rate": 0.00013674469384837337, "loss": 1.6064, "step": 24350 }, { "epoch": 0.31643013889543126, "grad_norm": 0.4154195487499237, "learning_rate": 0.00013674209438646202, "loss": 1.5842, "step": 24351 }, { "epoch": 0.31644313343934716, "grad_norm": 0.44961991906166077, "learning_rate": 0.00013673949492455062, "loss": 1.3923, "step": 24352 }, { "epoch": 0.316456127983263, "grad_norm": 0.4193685054779053, "learning_rate": 0.00013673689546263925, "loss": 1.4905, "step": 24353 }, { "epoch": 0.3164691225271789, "grad_norm": 0.4472082555294037, "learning_rate": 0.00013673429600072784, "loss": 1.2917, "step": 24354 }, { "epoch": 0.31648211707109475, "grad_norm": 0.3476710617542267, "learning_rate": 0.00013673169653881647, "loss": 1.1783, "step": 24355 }, { "epoch": 0.31649511161501065, "grad_norm": 0.4199211895465851, "learning_rate": 0.0001367290970769051, "loss": 1.2929, "step": 24356 }, { "epoch": 0.3165081061589265, "grad_norm": 0.3336026072502136, "learning_rate": 0.0001367264976149937, "loss": 1.4057, "step": 24357 }, { "epoch": 0.3165211007028424, "grad_norm": 0.4056035280227661, "learning_rate": 0.00013672389815308231, "loss": 1.3765, "step": 24358 }, { "epoch": 0.31653409524675824, "grad_norm": 0.32808977365493774, "learning_rate": 0.00013672129869117094, "loss": 1.3913, "step": 24359 }, { "epoch": 0.31654708979067414, "grad_norm": 0.4635668992996216, "learning_rate": 0.00013671869922925956, "loss": 1.3776, "step": 24360 }, { "epoch": 0.31656008433459004, "grad_norm": 0.42094820737838745, "learning_rate": 0.00013671609976734816, "loss": 1.4913, "step": 24361 }, { "epoch": 0.3165730788785059, "grad_norm": 0.508424699306488, "learning_rate": 0.00013671350030543676, "loss": 1.316, "step": 24362 }, { "epoch": 0.3165860734224218, "grad_norm": 0.42761772871017456, "learning_rate": 0.0001367109008435254, "loss": 1.3915, "step": 24363 }, { "epoch": 0.31659906796633763, "grad_norm": 0.3329997658729553, "learning_rate": 0.000136708301381614, "loss": 1.5453, "step": 24364 }, { "epoch": 0.31661206251025353, "grad_norm": 0.47200146317481995, "learning_rate": 0.00013670570191970263, "loss": 1.4411, "step": 24365 }, { "epoch": 0.3166250570541694, "grad_norm": 0.3911210298538208, "learning_rate": 0.00013670310245779126, "loss": 1.4591, "step": 24366 }, { "epoch": 0.3166380515980853, "grad_norm": 0.36426842212677, "learning_rate": 0.00013670050299587985, "loss": 1.2939, "step": 24367 }, { "epoch": 0.3166510461420011, "grad_norm": 0.48546144366264343, "learning_rate": 0.00013669790353396848, "loss": 1.4095, "step": 24368 }, { "epoch": 0.316664040685917, "grad_norm": 0.43470498919487, "learning_rate": 0.00013669530407205708, "loss": 1.5054, "step": 24369 }, { "epoch": 0.31667703522983287, "grad_norm": 0.29904910922050476, "learning_rate": 0.00013669270461014573, "loss": 1.3634, "step": 24370 }, { "epoch": 0.31669002977374877, "grad_norm": 0.4916445314884186, "learning_rate": 0.00013669010514823432, "loss": 1.4675, "step": 24371 }, { "epoch": 0.3167030243176646, "grad_norm": 0.42997390031814575, "learning_rate": 0.00013668750568632295, "loss": 1.4468, "step": 24372 }, { "epoch": 0.3167160188615805, "grad_norm": 0.4192761182785034, "learning_rate": 0.00013668490622441155, "loss": 1.3557, "step": 24373 }, { "epoch": 0.31672901340549636, "grad_norm": 0.46975022554397583, "learning_rate": 0.00013668230676250017, "loss": 1.5046, "step": 24374 }, { "epoch": 0.31674200794941226, "grad_norm": 0.3747991621494293, "learning_rate": 0.0001366797073005888, "loss": 1.4379, "step": 24375 }, { "epoch": 0.3167550024933281, "grad_norm": 0.3558545708656311, "learning_rate": 0.0001366771078386774, "loss": 1.2838, "step": 24376 }, { "epoch": 0.316767997037244, "grad_norm": 0.45769351720809937, "learning_rate": 0.00013667450837676602, "loss": 1.5688, "step": 24377 }, { "epoch": 0.31678099158115985, "grad_norm": 0.367517352104187, "learning_rate": 0.00013667190891485464, "loss": 1.2833, "step": 24378 }, { "epoch": 0.31679398612507575, "grad_norm": 0.36312514543533325, "learning_rate": 0.00013666930945294324, "loss": 1.3183, "step": 24379 }, { "epoch": 0.3168069806689916, "grad_norm": 0.40454697608947754, "learning_rate": 0.00013666670999103186, "loss": 1.2847, "step": 24380 }, { "epoch": 0.3168199752129075, "grad_norm": 0.4117373526096344, "learning_rate": 0.00013666411052912046, "loss": 1.2778, "step": 24381 }, { "epoch": 0.31683296975682335, "grad_norm": 0.42060205340385437, "learning_rate": 0.0001366615110672091, "loss": 1.4025, "step": 24382 }, { "epoch": 0.31684596430073925, "grad_norm": 0.36232438683509827, "learning_rate": 0.0001366589116052977, "loss": 1.4401, "step": 24383 }, { "epoch": 0.3168589588446551, "grad_norm": 0.38088318705558777, "learning_rate": 0.00013665631214338633, "loss": 1.3573, "step": 24384 }, { "epoch": 0.316871953388571, "grad_norm": 0.37629103660583496, "learning_rate": 0.00013665371268147493, "loss": 1.4336, "step": 24385 }, { "epoch": 0.31688494793248684, "grad_norm": 0.29912474751472473, "learning_rate": 0.00013665111321956356, "loss": 1.3813, "step": 24386 }, { "epoch": 0.31689794247640274, "grad_norm": 0.3750167787075043, "learning_rate": 0.00013664851375765218, "loss": 1.4466, "step": 24387 }, { "epoch": 0.3169109370203186, "grad_norm": 0.395393431186676, "learning_rate": 0.00013664591429574078, "loss": 1.3905, "step": 24388 }, { "epoch": 0.3169239315642345, "grad_norm": 0.40814340114593506, "learning_rate": 0.0001366433148338294, "loss": 1.5117, "step": 24389 }, { "epoch": 0.31693692610815033, "grad_norm": 0.31867504119873047, "learning_rate": 0.00013664071537191803, "loss": 1.2665, "step": 24390 }, { "epoch": 0.31694992065206623, "grad_norm": 0.3972594738006592, "learning_rate": 0.00013663811591000662, "loss": 1.527, "step": 24391 }, { "epoch": 0.3169629151959821, "grad_norm": 0.3781193196773529, "learning_rate": 0.00013663551644809525, "loss": 1.5671, "step": 24392 }, { "epoch": 0.316975909739898, "grad_norm": 0.3944792151451111, "learning_rate": 0.00013663291698618385, "loss": 1.414, "step": 24393 }, { "epoch": 0.3169889042838138, "grad_norm": 0.40063533186912537, "learning_rate": 0.0001366303175242725, "loss": 1.4716, "step": 24394 }, { "epoch": 0.3170018988277297, "grad_norm": 0.43710336089134216, "learning_rate": 0.0001366277180623611, "loss": 1.3416, "step": 24395 }, { "epoch": 0.31701489337164557, "grad_norm": 0.3572905659675598, "learning_rate": 0.00013662511860044972, "loss": 1.3681, "step": 24396 }, { "epoch": 0.31702788791556147, "grad_norm": 0.37871429324150085, "learning_rate": 0.00013662251913853832, "loss": 1.3879, "step": 24397 }, { "epoch": 0.3170408824594773, "grad_norm": 0.3703164756298065, "learning_rate": 0.00013661991967662694, "loss": 1.4376, "step": 24398 }, { "epoch": 0.3170538770033932, "grad_norm": 0.38005539774894714, "learning_rate": 0.00013661732021471557, "loss": 1.4788, "step": 24399 }, { "epoch": 0.31706687154730906, "grad_norm": 0.3313455283641815, "learning_rate": 0.00013661472075280416, "loss": 1.2636, "step": 24400 }, { "epoch": 0.31707986609122496, "grad_norm": 0.43130943179130554, "learning_rate": 0.00013661212129089282, "loss": 1.5056, "step": 24401 }, { "epoch": 0.3170928606351408, "grad_norm": 0.39680278301239014, "learning_rate": 0.0001366095218289814, "loss": 1.5009, "step": 24402 }, { "epoch": 0.3171058551790567, "grad_norm": 0.40444695949554443, "learning_rate": 0.00013660692236707, "loss": 1.4351, "step": 24403 }, { "epoch": 0.31711884972297255, "grad_norm": 0.4056309163570404, "learning_rate": 0.00013660432290515863, "loss": 1.4454, "step": 24404 }, { "epoch": 0.31713184426688845, "grad_norm": 0.39997339248657227, "learning_rate": 0.00013660172344324726, "loss": 1.3238, "step": 24405 }, { "epoch": 0.3171448388108043, "grad_norm": 0.3984402120113373, "learning_rate": 0.00013659912398133588, "loss": 1.3976, "step": 24406 }, { "epoch": 0.3171578333547202, "grad_norm": 0.3693009614944458, "learning_rate": 0.00013659652451942448, "loss": 1.4603, "step": 24407 }, { "epoch": 0.31717082789863604, "grad_norm": 0.28103724122047424, "learning_rate": 0.0001365939250575131, "loss": 1.3755, "step": 24408 }, { "epoch": 0.31718382244255194, "grad_norm": 0.40090128779411316, "learning_rate": 0.00013659132559560173, "loss": 1.4489, "step": 24409 }, { "epoch": 0.3171968169864678, "grad_norm": 0.44954749941825867, "learning_rate": 0.00013658872613369033, "loss": 1.4129, "step": 24410 }, { "epoch": 0.3172098115303837, "grad_norm": 0.3736685812473297, "learning_rate": 0.00013658612667177895, "loss": 1.4027, "step": 24411 }, { "epoch": 0.31722280607429953, "grad_norm": 0.41179800033569336, "learning_rate": 0.00013658352720986755, "loss": 1.5489, "step": 24412 }, { "epoch": 0.31723580061821544, "grad_norm": 0.32932525873184204, "learning_rate": 0.0001365809277479562, "loss": 1.3133, "step": 24413 }, { "epoch": 0.3172487951621313, "grad_norm": 0.3847118318080902, "learning_rate": 0.0001365783282860448, "loss": 1.5251, "step": 24414 }, { "epoch": 0.3172617897060472, "grad_norm": 0.3626595735549927, "learning_rate": 0.0001365757288241334, "loss": 1.5451, "step": 24415 }, { "epoch": 0.317274784249963, "grad_norm": 0.37977007031440735, "learning_rate": 0.00013657312936222202, "loss": 1.3042, "step": 24416 }, { "epoch": 0.3172877787938789, "grad_norm": 0.34163352847099304, "learning_rate": 0.00013657052990031064, "loss": 1.4929, "step": 24417 }, { "epoch": 0.3173007733377948, "grad_norm": 0.5282631516456604, "learning_rate": 0.00013656793043839927, "loss": 1.5479, "step": 24418 }, { "epoch": 0.3173137678817107, "grad_norm": 0.3378818929195404, "learning_rate": 0.00013656533097648787, "loss": 1.4773, "step": 24419 }, { "epoch": 0.3173267624256265, "grad_norm": 0.3953704237937927, "learning_rate": 0.0001365627315145765, "loss": 1.4961, "step": 24420 }, { "epoch": 0.3173397569695424, "grad_norm": 0.16165396571159363, "learning_rate": 0.00013656013205266512, "loss": 1.0871, "step": 24421 }, { "epoch": 0.31735275151345826, "grad_norm": 0.35105374455451965, "learning_rate": 0.0001365575325907537, "loss": 1.426, "step": 24422 }, { "epoch": 0.31736574605737417, "grad_norm": 0.4470350742340088, "learning_rate": 0.00013655493312884234, "loss": 1.4488, "step": 24423 }, { "epoch": 0.31737874060129, "grad_norm": 0.43733686208724976, "learning_rate": 0.00013655233366693093, "loss": 1.411, "step": 24424 }, { "epoch": 0.3173917351452059, "grad_norm": 0.41593149304389954, "learning_rate": 0.00013654973420501959, "loss": 1.3897, "step": 24425 }, { "epoch": 0.31740472968912176, "grad_norm": 0.36359885334968567, "learning_rate": 0.00013654713474310818, "loss": 1.3737, "step": 24426 }, { "epoch": 0.31741772423303766, "grad_norm": 0.38714805245399475, "learning_rate": 0.0001365445352811968, "loss": 1.423, "step": 24427 }, { "epoch": 0.3174307187769535, "grad_norm": 0.3751436173915863, "learning_rate": 0.0001365419358192854, "loss": 1.4568, "step": 24428 }, { "epoch": 0.3174437133208694, "grad_norm": 0.4020783007144928, "learning_rate": 0.00013653933635737403, "loss": 1.3229, "step": 24429 }, { "epoch": 0.31745670786478525, "grad_norm": 0.4770466685295105, "learning_rate": 0.00013653673689546265, "loss": 1.5079, "step": 24430 }, { "epoch": 0.31746970240870115, "grad_norm": 0.33619800209999084, "learning_rate": 0.00013653413743355125, "loss": 1.0921, "step": 24431 }, { "epoch": 0.317482696952617, "grad_norm": 0.4646088480949402, "learning_rate": 0.00013653153797163988, "loss": 1.4453, "step": 24432 }, { "epoch": 0.3174956914965329, "grad_norm": 0.4613949656486511, "learning_rate": 0.0001365289385097285, "loss": 1.4958, "step": 24433 }, { "epoch": 0.31750868604044874, "grad_norm": 0.45388227701187134, "learning_rate": 0.0001365263390478171, "loss": 1.2837, "step": 24434 }, { "epoch": 0.31752168058436464, "grad_norm": 0.4936314523220062, "learning_rate": 0.00013652373958590572, "loss": 1.453, "step": 24435 }, { "epoch": 0.3175346751282805, "grad_norm": 0.39332345128059387, "learning_rate": 0.00013652114012399432, "loss": 1.4375, "step": 24436 }, { "epoch": 0.3175476696721964, "grad_norm": 0.4769171476364136, "learning_rate": 0.00013651854066208297, "loss": 1.3027, "step": 24437 }, { "epoch": 0.3175606642161123, "grad_norm": 0.3637208342552185, "learning_rate": 0.00013651594120017157, "loss": 1.3096, "step": 24438 }, { "epoch": 0.31757365876002813, "grad_norm": 0.3802751302719116, "learning_rate": 0.0001365133417382602, "loss": 1.3362, "step": 24439 }, { "epoch": 0.31758665330394403, "grad_norm": 0.4256417155265808, "learning_rate": 0.00013651074227634882, "loss": 1.4033, "step": 24440 }, { "epoch": 0.3175996478478599, "grad_norm": 0.38301560282707214, "learning_rate": 0.00013650814281443742, "loss": 1.3392, "step": 24441 }, { "epoch": 0.3176126423917758, "grad_norm": 0.3901337683200836, "learning_rate": 0.00013650554335252604, "loss": 1.2899, "step": 24442 }, { "epoch": 0.3176256369356916, "grad_norm": 0.4726414084434509, "learning_rate": 0.00013650294389061464, "loss": 1.4639, "step": 24443 }, { "epoch": 0.3176386314796075, "grad_norm": 0.4089820086956024, "learning_rate": 0.0001365003444287033, "loss": 1.5096, "step": 24444 }, { "epoch": 0.31765162602352337, "grad_norm": 0.43032199144363403, "learning_rate": 0.00013649774496679189, "loss": 1.364, "step": 24445 }, { "epoch": 0.31766462056743927, "grad_norm": 0.47647249698638916, "learning_rate": 0.00013649514550488048, "loss": 1.5068, "step": 24446 }, { "epoch": 0.3176776151113551, "grad_norm": 0.4064078629016876, "learning_rate": 0.0001364925460429691, "loss": 1.294, "step": 24447 }, { "epoch": 0.317690609655271, "grad_norm": 0.4681776463985443, "learning_rate": 0.00013648994658105773, "loss": 1.3685, "step": 24448 }, { "epoch": 0.31770360419918686, "grad_norm": 0.4651210606098175, "learning_rate": 0.00013648734711914636, "loss": 1.4488, "step": 24449 }, { "epoch": 0.31771659874310276, "grad_norm": 0.4250487983226776, "learning_rate": 0.00013648474765723495, "loss": 1.4641, "step": 24450 }, { "epoch": 0.3177295932870186, "grad_norm": 0.4269929528236389, "learning_rate": 0.00013648214819532358, "loss": 1.4989, "step": 24451 }, { "epoch": 0.3177425878309345, "grad_norm": 0.38231131434440613, "learning_rate": 0.0001364795487334122, "loss": 1.4425, "step": 24452 }, { "epoch": 0.31775558237485035, "grad_norm": 0.4581094980239868, "learning_rate": 0.0001364769492715008, "loss": 1.4599, "step": 24453 }, { "epoch": 0.31776857691876625, "grad_norm": 0.42847728729248047, "learning_rate": 0.00013647434980958943, "loss": 1.3118, "step": 24454 }, { "epoch": 0.3177815714626821, "grad_norm": 0.3296990692615509, "learning_rate": 0.00013647175034767802, "loss": 1.302, "step": 24455 }, { "epoch": 0.317794566006598, "grad_norm": 0.3605743646621704, "learning_rate": 0.00013646915088576667, "loss": 1.4442, "step": 24456 }, { "epoch": 0.31780756055051385, "grad_norm": 0.3527849018573761, "learning_rate": 0.00013646655142385527, "loss": 1.4337, "step": 24457 }, { "epoch": 0.31782055509442975, "grad_norm": 0.2750887870788574, "learning_rate": 0.00013646395196194387, "loss": 1.4747, "step": 24458 }, { "epoch": 0.3178335496383456, "grad_norm": 0.33957940340042114, "learning_rate": 0.0001364613525000325, "loss": 1.4128, "step": 24459 }, { "epoch": 0.3178465441822615, "grad_norm": 0.34004250168800354, "learning_rate": 0.00013645875303812112, "loss": 1.1761, "step": 24460 }, { "epoch": 0.31785953872617734, "grad_norm": 0.38412028551101685, "learning_rate": 0.00013645615357620974, "loss": 1.4794, "step": 24461 }, { "epoch": 0.31787253327009324, "grad_norm": 0.48290520906448364, "learning_rate": 0.00013645355411429834, "loss": 1.4543, "step": 24462 }, { "epoch": 0.3178855278140091, "grad_norm": 0.45453929901123047, "learning_rate": 0.00013645095465238696, "loss": 1.3192, "step": 24463 }, { "epoch": 0.317898522357925, "grad_norm": 0.47529467940330505, "learning_rate": 0.0001364483551904756, "loss": 1.4917, "step": 24464 }, { "epoch": 0.31791151690184083, "grad_norm": 0.35940366983413696, "learning_rate": 0.00013644575572856419, "loss": 1.3336, "step": 24465 }, { "epoch": 0.31792451144575673, "grad_norm": 0.4172157347202301, "learning_rate": 0.0001364431562666528, "loss": 1.433, "step": 24466 }, { "epoch": 0.3179375059896726, "grad_norm": 0.4016782343387604, "learning_rate": 0.0001364405568047414, "loss": 1.3937, "step": 24467 }, { "epoch": 0.3179505005335885, "grad_norm": 0.40001797676086426, "learning_rate": 0.00013643795734283006, "loss": 1.2662, "step": 24468 }, { "epoch": 0.3179634950775043, "grad_norm": 0.34742429852485657, "learning_rate": 0.00013643535788091866, "loss": 1.2846, "step": 24469 }, { "epoch": 0.3179764896214202, "grad_norm": 0.34739571809768677, "learning_rate": 0.00013643275841900725, "loss": 1.2309, "step": 24470 }, { "epoch": 0.31798948416533607, "grad_norm": 0.3443008065223694, "learning_rate": 0.00013643015895709588, "loss": 1.4515, "step": 24471 }, { "epoch": 0.31800247870925197, "grad_norm": 0.3984827995300293, "learning_rate": 0.0001364275594951845, "loss": 1.4079, "step": 24472 }, { "epoch": 0.3180154732531678, "grad_norm": 0.38483062386512756, "learning_rate": 0.00013642496003327313, "loss": 1.4874, "step": 24473 }, { "epoch": 0.3180284677970837, "grad_norm": 0.34384283423423767, "learning_rate": 0.00013642236057136172, "loss": 1.4637, "step": 24474 }, { "epoch": 0.31804146234099956, "grad_norm": 0.25040942430496216, "learning_rate": 0.00013641976110945035, "loss": 1.3285, "step": 24475 }, { "epoch": 0.31805445688491546, "grad_norm": 0.4131932258605957, "learning_rate": 0.00013641716164753897, "loss": 1.5199, "step": 24476 }, { "epoch": 0.3180674514288313, "grad_norm": 0.4325697124004364, "learning_rate": 0.00013641456218562757, "loss": 1.4554, "step": 24477 }, { "epoch": 0.3180804459727472, "grad_norm": 0.3533201813697815, "learning_rate": 0.0001364119627237162, "loss": 1.5592, "step": 24478 }, { "epoch": 0.31809344051666305, "grad_norm": 0.42637214064598083, "learning_rate": 0.00013640936326180482, "loss": 1.5279, "step": 24479 }, { "epoch": 0.31810643506057895, "grad_norm": 0.34816643595695496, "learning_rate": 0.00013640676379989344, "loss": 1.4878, "step": 24480 }, { "epoch": 0.3181194296044948, "grad_norm": 0.3948005437850952, "learning_rate": 0.00013640416433798204, "loss": 1.3542, "step": 24481 }, { "epoch": 0.3181324241484107, "grad_norm": 0.343655526638031, "learning_rate": 0.00013640156487607067, "loss": 1.2842, "step": 24482 }, { "epoch": 0.31814541869232654, "grad_norm": 0.40343061089515686, "learning_rate": 0.0001363989654141593, "loss": 1.3605, "step": 24483 }, { "epoch": 0.31815841323624244, "grad_norm": 0.4316779375076294, "learning_rate": 0.0001363963659522479, "loss": 1.4025, "step": 24484 }, { "epoch": 0.3181714077801583, "grad_norm": 0.41991502046585083, "learning_rate": 0.0001363937664903365, "loss": 1.3039, "step": 24485 }, { "epoch": 0.3181844023240742, "grad_norm": 0.46094104647636414, "learning_rate": 0.0001363911670284251, "loss": 1.2499, "step": 24486 }, { "epoch": 0.31819739686799003, "grad_norm": 0.39675092697143555, "learning_rate": 0.00013638856756651373, "loss": 1.4136, "step": 24487 }, { "epoch": 0.31821039141190594, "grad_norm": 0.40125471353530884, "learning_rate": 0.00013638596810460236, "loss": 1.3759, "step": 24488 }, { "epoch": 0.3182233859558218, "grad_norm": 0.4595598578453064, "learning_rate": 0.00013638336864269096, "loss": 1.4896, "step": 24489 }, { "epoch": 0.3182363804997377, "grad_norm": 0.33391091227531433, "learning_rate": 0.00013638076918077958, "loss": 1.3795, "step": 24490 }, { "epoch": 0.3182493750436535, "grad_norm": 0.32041823863983154, "learning_rate": 0.0001363781697188682, "loss": 1.431, "step": 24491 }, { "epoch": 0.3182623695875694, "grad_norm": 0.3728831112384796, "learning_rate": 0.00013637557025695683, "loss": 1.4762, "step": 24492 }, { "epoch": 0.31827536413148527, "grad_norm": 0.44518592953681946, "learning_rate": 0.00013637297079504543, "loss": 1.3638, "step": 24493 }, { "epoch": 0.3182883586754012, "grad_norm": 0.4794606864452362, "learning_rate": 0.00013637037133313405, "loss": 1.5528, "step": 24494 }, { "epoch": 0.318301353219317, "grad_norm": 0.305264949798584, "learning_rate": 0.00013636777187122268, "loss": 1.3475, "step": 24495 }, { "epoch": 0.3183143477632329, "grad_norm": 0.3816368281841278, "learning_rate": 0.00013636517240931127, "loss": 1.3319, "step": 24496 }, { "epoch": 0.31832734230714876, "grad_norm": 0.30250993371009827, "learning_rate": 0.0001363625729473999, "loss": 1.3405, "step": 24497 }, { "epoch": 0.31834033685106466, "grad_norm": 0.4202573299407959, "learning_rate": 0.0001363599734854885, "loss": 1.3743, "step": 24498 }, { "epoch": 0.3183533313949805, "grad_norm": 0.36171600222587585, "learning_rate": 0.00013635737402357712, "loss": 1.5809, "step": 24499 }, { "epoch": 0.3183663259388964, "grad_norm": 0.460409015417099, "learning_rate": 0.00013635477456166574, "loss": 1.5428, "step": 24500 }, { "epoch": 0.31837932048281226, "grad_norm": 0.4011229872703552, "learning_rate": 0.00013635217509975434, "loss": 1.3342, "step": 24501 }, { "epoch": 0.31839231502672816, "grad_norm": 0.33957865834236145, "learning_rate": 0.00013634957563784297, "loss": 1.4648, "step": 24502 }, { "epoch": 0.318405309570644, "grad_norm": 0.42703142762184143, "learning_rate": 0.0001363469761759316, "loss": 1.4846, "step": 24503 }, { "epoch": 0.3184183041145599, "grad_norm": 0.3616746962070465, "learning_rate": 0.00013634437671402022, "loss": 1.2663, "step": 24504 }, { "epoch": 0.31843129865847575, "grad_norm": 0.45498013496398926, "learning_rate": 0.0001363417772521088, "loss": 1.3494, "step": 24505 }, { "epoch": 0.31844429320239165, "grad_norm": 0.4362391233444214, "learning_rate": 0.00013633917779019744, "loss": 1.203, "step": 24506 }, { "epoch": 0.3184572877463075, "grad_norm": 0.42321792244911194, "learning_rate": 0.00013633657832828606, "loss": 1.2931, "step": 24507 }, { "epoch": 0.3184702822902234, "grad_norm": 0.42419230937957764, "learning_rate": 0.00013633397886637466, "loss": 1.3792, "step": 24508 }, { "epoch": 0.31848327683413924, "grad_norm": 0.46707555651664734, "learning_rate": 0.00013633137940446328, "loss": 1.3083, "step": 24509 }, { "epoch": 0.31849627137805514, "grad_norm": 0.3309074342250824, "learning_rate": 0.00013632877994255188, "loss": 1.4309, "step": 24510 }, { "epoch": 0.318509265921971, "grad_norm": 0.4789426624774933, "learning_rate": 0.00013632618048064053, "loss": 1.4981, "step": 24511 }, { "epoch": 0.3185222604658869, "grad_norm": 0.4254739582538605, "learning_rate": 0.00013632358101872913, "loss": 1.3034, "step": 24512 }, { "epoch": 0.31853525500980273, "grad_norm": 0.4464763104915619, "learning_rate": 0.00013632098155681773, "loss": 1.424, "step": 24513 }, { "epoch": 0.31854824955371863, "grad_norm": 0.44271036982536316, "learning_rate": 0.00013631838209490638, "loss": 1.4236, "step": 24514 }, { "epoch": 0.31856124409763453, "grad_norm": 0.4108887016773224, "learning_rate": 0.00013631578263299498, "loss": 1.311, "step": 24515 }, { "epoch": 0.3185742386415504, "grad_norm": 0.42524009943008423, "learning_rate": 0.0001363131831710836, "loss": 1.3427, "step": 24516 }, { "epoch": 0.3185872331854663, "grad_norm": 0.3634762465953827, "learning_rate": 0.0001363105837091722, "loss": 1.4374, "step": 24517 }, { "epoch": 0.3186002277293821, "grad_norm": 0.34638872742652893, "learning_rate": 0.00013630798424726082, "loss": 1.2739, "step": 24518 }, { "epoch": 0.318613222273298, "grad_norm": 0.5592221617698669, "learning_rate": 0.00013630538478534945, "loss": 1.3902, "step": 24519 }, { "epoch": 0.31862621681721387, "grad_norm": 0.4069071114063263, "learning_rate": 0.00013630278532343804, "loss": 1.432, "step": 24520 }, { "epoch": 0.31863921136112977, "grad_norm": 0.44740116596221924, "learning_rate": 0.00013630018586152667, "loss": 1.4657, "step": 24521 }, { "epoch": 0.3186522059050456, "grad_norm": 0.5179588794708252, "learning_rate": 0.0001362975863996153, "loss": 1.3662, "step": 24522 }, { "epoch": 0.3186652004489615, "grad_norm": 0.46699976921081543, "learning_rate": 0.00013629498693770392, "loss": 1.2919, "step": 24523 }, { "epoch": 0.31867819499287736, "grad_norm": 0.3441481590270996, "learning_rate": 0.00013629238747579252, "loss": 1.4648, "step": 24524 }, { "epoch": 0.31869118953679326, "grad_norm": 0.38569527864456177, "learning_rate": 0.0001362897880138811, "loss": 1.4444, "step": 24525 }, { "epoch": 0.3187041840807091, "grad_norm": 0.3232535123825073, "learning_rate": 0.00013628718855196976, "loss": 1.2475, "step": 24526 }, { "epoch": 0.318717178624625, "grad_norm": 0.36247608065605164, "learning_rate": 0.00013628458909005836, "loss": 1.2857, "step": 24527 }, { "epoch": 0.31873017316854085, "grad_norm": 0.448169082403183, "learning_rate": 0.00013628198962814699, "loss": 1.4624, "step": 24528 }, { "epoch": 0.31874316771245675, "grad_norm": 0.37179121375083923, "learning_rate": 0.00013627939016623558, "loss": 1.3567, "step": 24529 }, { "epoch": 0.3187561622563726, "grad_norm": 0.4183000922203064, "learning_rate": 0.0001362767907043242, "loss": 1.1763, "step": 24530 }, { "epoch": 0.3187691568002885, "grad_norm": 0.4072781205177307, "learning_rate": 0.00013627419124241283, "loss": 1.1975, "step": 24531 }, { "epoch": 0.31878215134420435, "grad_norm": 0.48555609583854675, "learning_rate": 0.00013627159178050143, "loss": 1.5033, "step": 24532 }, { "epoch": 0.31879514588812025, "grad_norm": 0.44378501176834106, "learning_rate": 0.00013626899231859005, "loss": 1.5301, "step": 24533 }, { "epoch": 0.3188081404320361, "grad_norm": 0.4073767066001892, "learning_rate": 0.00013626639285667868, "loss": 1.2428, "step": 24534 }, { "epoch": 0.318821134975952, "grad_norm": 0.3824664056301117, "learning_rate": 0.0001362637933947673, "loss": 1.3102, "step": 24535 }, { "epoch": 0.31883412951986784, "grad_norm": 0.4208299517631531, "learning_rate": 0.0001362611939328559, "loss": 1.3669, "step": 24536 }, { "epoch": 0.31884712406378374, "grad_norm": 0.37531328201293945, "learning_rate": 0.0001362585944709445, "loss": 1.5238, "step": 24537 }, { "epoch": 0.3188601186076996, "grad_norm": 0.391003280878067, "learning_rate": 0.00013625599500903315, "loss": 1.3356, "step": 24538 }, { "epoch": 0.3188731131516155, "grad_norm": 0.3075683116912842, "learning_rate": 0.00013625339554712175, "loss": 1.3381, "step": 24539 }, { "epoch": 0.31888610769553133, "grad_norm": 0.49173039197921753, "learning_rate": 0.00013625079608521037, "loss": 1.3635, "step": 24540 }, { "epoch": 0.31889910223944723, "grad_norm": 0.44790974259376526, "learning_rate": 0.00013624819662329897, "loss": 1.2755, "step": 24541 }, { "epoch": 0.3189120967833631, "grad_norm": 0.4942026734352112, "learning_rate": 0.0001362455971613876, "loss": 1.5959, "step": 24542 }, { "epoch": 0.318925091327279, "grad_norm": 0.4439903795719147, "learning_rate": 0.00013624299769947622, "loss": 1.2486, "step": 24543 }, { "epoch": 0.3189380858711948, "grad_norm": 0.41642075777053833, "learning_rate": 0.00013624039823756482, "loss": 1.3676, "step": 24544 }, { "epoch": 0.3189510804151107, "grad_norm": 0.41230639815330505, "learning_rate": 0.00013623779877565344, "loss": 1.4296, "step": 24545 }, { "epoch": 0.31896407495902657, "grad_norm": 0.31965962052345276, "learning_rate": 0.00013623519931374206, "loss": 1.44, "step": 24546 }, { "epoch": 0.31897706950294247, "grad_norm": 0.3615824580192566, "learning_rate": 0.0001362325998518307, "loss": 1.3134, "step": 24547 }, { "epoch": 0.3189900640468583, "grad_norm": 0.37412142753601074, "learning_rate": 0.00013623000038991929, "loss": 1.373, "step": 24548 }, { "epoch": 0.3190030585907742, "grad_norm": 0.40013277530670166, "learning_rate": 0.0001362274009280079, "loss": 1.4433, "step": 24549 }, { "epoch": 0.31901605313469006, "grad_norm": 0.4531174302101135, "learning_rate": 0.00013622480146609654, "loss": 1.5444, "step": 24550 }, { "epoch": 0.31902904767860596, "grad_norm": 0.4477156102657318, "learning_rate": 0.00013622220200418513, "loss": 1.2742, "step": 24551 }, { "epoch": 0.3190420422225218, "grad_norm": 0.4236210882663727, "learning_rate": 0.00013621960254227376, "loss": 1.4847, "step": 24552 }, { "epoch": 0.3190550367664377, "grad_norm": 0.36249276995658875, "learning_rate": 0.00013621700308036238, "loss": 1.1774, "step": 24553 }, { "epoch": 0.31906803131035355, "grad_norm": 0.3766421377658844, "learning_rate": 0.00013621440361845098, "loss": 1.4264, "step": 24554 }, { "epoch": 0.31908102585426945, "grad_norm": 0.4403527081012726, "learning_rate": 0.0001362118041565396, "loss": 1.4028, "step": 24555 }, { "epoch": 0.3190940203981853, "grad_norm": 0.4380132555961609, "learning_rate": 0.0001362092046946282, "loss": 1.5295, "step": 24556 }, { "epoch": 0.3191070149421012, "grad_norm": 0.25155001878738403, "learning_rate": 0.00013620660523271685, "loss": 1.2453, "step": 24557 }, { "epoch": 0.31912000948601704, "grad_norm": 0.46866559982299805, "learning_rate": 0.00013620400577080545, "loss": 1.5336, "step": 24558 }, { "epoch": 0.31913300402993294, "grad_norm": 0.325844943523407, "learning_rate": 0.00013620140630889407, "loss": 1.3731, "step": 24559 }, { "epoch": 0.3191459985738488, "grad_norm": 0.26560500264167786, "learning_rate": 0.00013619880684698267, "loss": 1.4453, "step": 24560 }, { "epoch": 0.3191589931177647, "grad_norm": 0.3765057325363159, "learning_rate": 0.0001361962073850713, "loss": 1.3048, "step": 24561 }, { "epoch": 0.31917198766168053, "grad_norm": 0.31004953384399414, "learning_rate": 0.00013619360792315992, "loss": 1.3569, "step": 24562 }, { "epoch": 0.31918498220559643, "grad_norm": 0.40252864360809326, "learning_rate": 0.00013619100846124852, "loss": 1.4125, "step": 24563 }, { "epoch": 0.3191979767495123, "grad_norm": 0.25679269433021545, "learning_rate": 0.00013618840899933714, "loss": 1.2879, "step": 24564 }, { "epoch": 0.3192109712934282, "grad_norm": 0.378481924533844, "learning_rate": 0.00013618580953742577, "loss": 1.4235, "step": 24565 }, { "epoch": 0.319223965837344, "grad_norm": 0.4207339882850647, "learning_rate": 0.0001361832100755144, "loss": 1.501, "step": 24566 }, { "epoch": 0.3192369603812599, "grad_norm": 0.4911903440952301, "learning_rate": 0.000136180610613603, "loss": 1.6087, "step": 24567 }, { "epoch": 0.31924995492517577, "grad_norm": 0.32365554571151733, "learning_rate": 0.00013617801115169159, "loss": 1.3274, "step": 24568 }, { "epoch": 0.3192629494690917, "grad_norm": 0.42182356119155884, "learning_rate": 0.00013617541168978024, "loss": 1.552, "step": 24569 }, { "epoch": 0.3192759440130075, "grad_norm": 0.34021878242492676, "learning_rate": 0.00013617281222786884, "loss": 1.2254, "step": 24570 }, { "epoch": 0.3192889385569234, "grad_norm": 0.3618484139442444, "learning_rate": 0.00013617021276595746, "loss": 1.2343, "step": 24571 }, { "epoch": 0.31930193310083926, "grad_norm": 0.40864869952201843, "learning_rate": 0.00013616761330404606, "loss": 1.6207, "step": 24572 }, { "epoch": 0.31931492764475516, "grad_norm": 0.3376300632953644, "learning_rate": 0.00013616501384213468, "loss": 1.2565, "step": 24573 }, { "epoch": 0.319327922188671, "grad_norm": 0.3087749481201172, "learning_rate": 0.0001361624143802233, "loss": 1.5313, "step": 24574 }, { "epoch": 0.3193409167325869, "grad_norm": 0.4960716664791107, "learning_rate": 0.0001361598149183119, "loss": 1.4389, "step": 24575 }, { "epoch": 0.31935391127650276, "grad_norm": 0.4120165705680847, "learning_rate": 0.00013615721545640053, "loss": 1.58, "step": 24576 }, { "epoch": 0.31936690582041866, "grad_norm": 0.45616215467453003, "learning_rate": 0.00013615461599448915, "loss": 1.5636, "step": 24577 }, { "epoch": 0.3193799003643345, "grad_norm": 0.3470858931541443, "learning_rate": 0.00013615201653257778, "loss": 1.4587, "step": 24578 }, { "epoch": 0.3193928949082504, "grad_norm": 0.41595685482025146, "learning_rate": 0.00013614941707066637, "loss": 1.3261, "step": 24579 }, { "epoch": 0.31940588945216625, "grad_norm": 0.40102699398994446, "learning_rate": 0.00013614681760875497, "loss": 1.2579, "step": 24580 }, { "epoch": 0.31941888399608215, "grad_norm": 0.3551299571990967, "learning_rate": 0.00013614421814684362, "loss": 1.0856, "step": 24581 }, { "epoch": 0.319431878539998, "grad_norm": 0.41390490531921387, "learning_rate": 0.00013614161868493222, "loss": 1.327, "step": 24582 }, { "epoch": 0.3194448730839139, "grad_norm": 0.37795621156692505, "learning_rate": 0.00013613901922302085, "loss": 1.4272, "step": 24583 }, { "epoch": 0.31945786762782974, "grad_norm": 0.4226207435131073, "learning_rate": 0.00013613641976110944, "loss": 1.4477, "step": 24584 }, { "epoch": 0.31947086217174564, "grad_norm": 0.3721774220466614, "learning_rate": 0.00013613382029919807, "loss": 1.685, "step": 24585 }, { "epoch": 0.3194838567156615, "grad_norm": 0.43431785702705383, "learning_rate": 0.0001361312208372867, "loss": 1.3605, "step": 24586 }, { "epoch": 0.3194968512595774, "grad_norm": 0.5108338594436646, "learning_rate": 0.0001361286213753753, "loss": 1.3863, "step": 24587 }, { "epoch": 0.31950984580349323, "grad_norm": 0.39368823170661926, "learning_rate": 0.00013612602191346394, "loss": 1.4914, "step": 24588 }, { "epoch": 0.31952284034740913, "grad_norm": 0.33489078283309937, "learning_rate": 0.00013612342245155254, "loss": 1.4493, "step": 24589 }, { "epoch": 0.31953583489132503, "grad_norm": 0.4886419177055359, "learning_rate": 0.00013612082298964116, "loss": 1.6374, "step": 24590 }, { "epoch": 0.3195488294352409, "grad_norm": 0.36556005477905273, "learning_rate": 0.00013611822352772976, "loss": 1.2086, "step": 24591 }, { "epoch": 0.3195618239791568, "grad_norm": 0.3822033703327179, "learning_rate": 0.00013611562406581838, "loss": 1.4478, "step": 24592 }, { "epoch": 0.3195748185230726, "grad_norm": 0.3966180980205536, "learning_rate": 0.000136113024603907, "loss": 1.2841, "step": 24593 }, { "epoch": 0.3195878130669885, "grad_norm": 0.4178012013435364, "learning_rate": 0.0001361104251419956, "loss": 1.3748, "step": 24594 }, { "epoch": 0.31960080761090437, "grad_norm": 0.4416796863079071, "learning_rate": 0.00013610782568008423, "loss": 1.5387, "step": 24595 }, { "epoch": 0.31961380215482027, "grad_norm": 0.447081595659256, "learning_rate": 0.00013610522621817285, "loss": 1.3171, "step": 24596 }, { "epoch": 0.3196267966987361, "grad_norm": 0.33643415570259094, "learning_rate": 0.00013610262675626145, "loss": 1.0847, "step": 24597 }, { "epoch": 0.319639791242652, "grad_norm": 0.38043490052223206, "learning_rate": 0.00013610002729435008, "loss": 1.2841, "step": 24598 }, { "epoch": 0.31965278578656786, "grad_norm": 0.34751492738723755, "learning_rate": 0.00013609742783243867, "loss": 1.3364, "step": 24599 }, { "epoch": 0.31966578033048376, "grad_norm": 0.640848696231842, "learning_rate": 0.00013609482837052733, "loss": 1.611, "step": 24600 }, { "epoch": 0.3196787748743996, "grad_norm": 0.4591352939605713, "learning_rate": 0.00013609222890861592, "loss": 1.5975, "step": 24601 }, { "epoch": 0.3196917694183155, "grad_norm": 0.41186749935150146, "learning_rate": 0.00013608962944670455, "loss": 1.4004, "step": 24602 }, { "epoch": 0.31970476396223135, "grad_norm": 0.39533838629722595, "learning_rate": 0.00013608702998479315, "loss": 1.5247, "step": 24603 }, { "epoch": 0.31971775850614725, "grad_norm": 0.4489741027355194, "learning_rate": 0.00013608443052288177, "loss": 1.4234, "step": 24604 }, { "epoch": 0.3197307530500631, "grad_norm": 0.48175880312919617, "learning_rate": 0.0001360818310609704, "loss": 1.5163, "step": 24605 }, { "epoch": 0.319743747593979, "grad_norm": 0.3358345031738281, "learning_rate": 0.000136079231599059, "loss": 1.5759, "step": 24606 }, { "epoch": 0.31975674213789484, "grad_norm": 0.46662598848342896, "learning_rate": 0.00013607663213714762, "loss": 1.3458, "step": 24607 }, { "epoch": 0.31976973668181075, "grad_norm": 0.3371489346027374, "learning_rate": 0.00013607403267523624, "loss": 1.502, "step": 24608 }, { "epoch": 0.3197827312257266, "grad_norm": 0.3279533088207245, "learning_rate": 0.00013607143321332484, "loss": 1.3046, "step": 24609 }, { "epoch": 0.3197957257696425, "grad_norm": 0.351125568151474, "learning_rate": 0.00013606883375141346, "loss": 1.4309, "step": 24610 }, { "epoch": 0.31980872031355834, "grad_norm": 0.3855311870574951, "learning_rate": 0.00013606623428950206, "loss": 1.3136, "step": 24611 }, { "epoch": 0.31982171485747424, "grad_norm": 0.42153966426849365, "learning_rate": 0.0001360636348275907, "loss": 1.2424, "step": 24612 }, { "epoch": 0.3198347094013901, "grad_norm": 0.45653510093688965, "learning_rate": 0.0001360610353656793, "loss": 1.3847, "step": 24613 }, { "epoch": 0.319847703945306, "grad_norm": 0.27487313747406006, "learning_rate": 0.00013605843590376793, "loss": 1.1458, "step": 24614 }, { "epoch": 0.31986069848922183, "grad_norm": 0.4824073314666748, "learning_rate": 0.00013605583644185653, "loss": 1.3576, "step": 24615 }, { "epoch": 0.31987369303313773, "grad_norm": 0.31202632188796997, "learning_rate": 0.00013605323697994515, "loss": 1.276, "step": 24616 }, { "epoch": 0.3198866875770536, "grad_norm": 0.5924356579780579, "learning_rate": 0.00013605063751803378, "loss": 1.5418, "step": 24617 }, { "epoch": 0.3198996821209695, "grad_norm": 0.3384953737258911, "learning_rate": 0.00013604803805612238, "loss": 1.2339, "step": 24618 }, { "epoch": 0.3199126766648853, "grad_norm": 0.5104926824569702, "learning_rate": 0.000136045438594211, "loss": 1.2862, "step": 24619 }, { "epoch": 0.3199256712088012, "grad_norm": 0.39461395144462585, "learning_rate": 0.00013604283913229963, "loss": 1.4092, "step": 24620 }, { "epoch": 0.31993866575271707, "grad_norm": 0.38680028915405273, "learning_rate": 0.00013604023967038822, "loss": 1.356, "step": 24621 }, { "epoch": 0.31995166029663297, "grad_norm": 0.4401646554470062, "learning_rate": 0.00013603764020847685, "loss": 1.3714, "step": 24622 }, { "epoch": 0.3199646548405488, "grad_norm": 0.3675658404827118, "learning_rate": 0.00013603504074656544, "loss": 1.3751, "step": 24623 }, { "epoch": 0.3199776493844647, "grad_norm": 0.41579878330230713, "learning_rate": 0.0001360324412846541, "loss": 1.4316, "step": 24624 }, { "epoch": 0.31999064392838056, "grad_norm": 0.3984793424606323, "learning_rate": 0.0001360298418227427, "loss": 1.2428, "step": 24625 }, { "epoch": 0.32000363847229646, "grad_norm": 0.3353992998600006, "learning_rate": 0.00013602724236083132, "loss": 1.2812, "step": 24626 }, { "epoch": 0.3200166330162123, "grad_norm": 0.35817191004753113, "learning_rate": 0.00013602464289891994, "loss": 1.3114, "step": 24627 }, { "epoch": 0.3200296275601282, "grad_norm": 0.35655084252357483, "learning_rate": 0.00013602204343700854, "loss": 1.2821, "step": 24628 }, { "epoch": 0.32004262210404405, "grad_norm": 0.336092472076416, "learning_rate": 0.00013601944397509716, "loss": 1.3803, "step": 24629 }, { "epoch": 0.32005561664795995, "grad_norm": 0.34984907507896423, "learning_rate": 0.00013601684451318576, "loss": 1.5084, "step": 24630 }, { "epoch": 0.3200686111918758, "grad_norm": 0.4446631669998169, "learning_rate": 0.00013601424505127441, "loss": 1.4192, "step": 24631 }, { "epoch": 0.3200816057357917, "grad_norm": 0.44216668605804443, "learning_rate": 0.000136011645589363, "loss": 1.3771, "step": 24632 }, { "epoch": 0.32009460027970754, "grad_norm": 0.3548837900161743, "learning_rate": 0.00013600904612745164, "loss": 1.3578, "step": 24633 }, { "epoch": 0.32010759482362344, "grad_norm": 0.3141326904296875, "learning_rate": 0.00013600644666554023, "loss": 1.3227, "step": 24634 }, { "epoch": 0.3201205893675393, "grad_norm": 0.4169851541519165, "learning_rate": 0.00013600384720362886, "loss": 1.3578, "step": 24635 }, { "epoch": 0.3201335839114552, "grad_norm": 0.42173251509666443, "learning_rate": 0.00013600124774171748, "loss": 1.3471, "step": 24636 }, { "epoch": 0.32014657845537103, "grad_norm": 0.41726696491241455, "learning_rate": 0.00013599864827980608, "loss": 1.4472, "step": 24637 }, { "epoch": 0.32015957299928693, "grad_norm": 0.3060067594051361, "learning_rate": 0.0001359960488178947, "loss": 1.366, "step": 24638 }, { "epoch": 0.3201725675432028, "grad_norm": 0.33108460903167725, "learning_rate": 0.00013599344935598333, "loss": 1.4009, "step": 24639 }, { "epoch": 0.3201855620871187, "grad_norm": 0.5050896406173706, "learning_rate": 0.00013599084989407193, "loss": 1.49, "step": 24640 }, { "epoch": 0.3201985566310345, "grad_norm": 0.5148933529853821, "learning_rate": 0.00013598825043216055, "loss": 1.4794, "step": 24641 }, { "epoch": 0.3202115511749504, "grad_norm": 0.39288076758384705, "learning_rate": 0.00013598565097024915, "loss": 1.3284, "step": 24642 }, { "epoch": 0.32022454571886627, "grad_norm": 0.42341890931129456, "learning_rate": 0.0001359830515083378, "loss": 1.519, "step": 24643 }, { "epoch": 0.32023754026278217, "grad_norm": 0.3518048822879791, "learning_rate": 0.0001359804520464264, "loss": 1.4584, "step": 24644 }, { "epoch": 0.320250534806698, "grad_norm": 0.3701116740703583, "learning_rate": 0.00013597785258451502, "loss": 1.26, "step": 24645 }, { "epoch": 0.3202635293506139, "grad_norm": 0.43790045380592346, "learning_rate": 0.00013597525312260362, "loss": 1.2672, "step": 24646 }, { "epoch": 0.32027652389452976, "grad_norm": 0.47366979718208313, "learning_rate": 0.00013597265366069224, "loss": 1.3905, "step": 24647 }, { "epoch": 0.32028951843844566, "grad_norm": 0.4065723419189453, "learning_rate": 0.00013597005419878087, "loss": 1.5294, "step": 24648 }, { "epoch": 0.3203025129823615, "grad_norm": 0.4497450590133667, "learning_rate": 0.00013596745473686946, "loss": 1.4367, "step": 24649 }, { "epoch": 0.3203155075262774, "grad_norm": 0.4001935124397278, "learning_rate": 0.0001359648552749581, "loss": 1.326, "step": 24650 }, { "epoch": 0.32032850207019326, "grad_norm": 0.409372478723526, "learning_rate": 0.00013596225581304671, "loss": 1.2982, "step": 24651 }, { "epoch": 0.32034149661410916, "grad_norm": 0.26022660732269287, "learning_rate": 0.0001359596563511353, "loss": 1.4038, "step": 24652 }, { "epoch": 0.320354491158025, "grad_norm": 0.4460715055465698, "learning_rate": 0.00013595705688922394, "loss": 1.3915, "step": 24653 }, { "epoch": 0.3203674857019409, "grad_norm": 0.3659096956253052, "learning_rate": 0.00013595445742731253, "loss": 1.4536, "step": 24654 }, { "epoch": 0.32038048024585675, "grad_norm": 0.4425286650657654, "learning_rate": 0.00013595185796540118, "loss": 1.4586, "step": 24655 }, { "epoch": 0.32039347478977265, "grad_norm": 0.34227854013442993, "learning_rate": 0.00013594925850348978, "loss": 1.3676, "step": 24656 }, { "epoch": 0.3204064693336885, "grad_norm": 0.3165017068386078, "learning_rate": 0.0001359466590415784, "loss": 1.3435, "step": 24657 }, { "epoch": 0.3204194638776044, "grad_norm": 0.338697612285614, "learning_rate": 0.000135944059579667, "loss": 1.3449, "step": 24658 }, { "epoch": 0.32043245842152024, "grad_norm": 0.4276241660118103, "learning_rate": 0.00013594146011775563, "loss": 1.4797, "step": 24659 }, { "epoch": 0.32044545296543614, "grad_norm": 0.5089761018753052, "learning_rate": 0.00013593886065584425, "loss": 1.3462, "step": 24660 }, { "epoch": 0.320458447509352, "grad_norm": 0.37063679099082947, "learning_rate": 0.00013593626119393285, "loss": 1.3394, "step": 24661 }, { "epoch": 0.3204714420532679, "grad_norm": 0.47729796171188354, "learning_rate": 0.0001359336617320215, "loss": 1.5818, "step": 24662 }, { "epoch": 0.32048443659718373, "grad_norm": 0.3525777757167816, "learning_rate": 0.0001359310622701101, "loss": 1.2506, "step": 24663 }, { "epoch": 0.32049743114109963, "grad_norm": 0.3640398383140564, "learning_rate": 0.0001359284628081987, "loss": 1.3161, "step": 24664 }, { "epoch": 0.3205104256850155, "grad_norm": 0.368281751871109, "learning_rate": 0.00013592586334628732, "loss": 1.4794, "step": 24665 }, { "epoch": 0.3205234202289314, "grad_norm": 0.4180395007133484, "learning_rate": 0.00013592326388437595, "loss": 1.3675, "step": 24666 }, { "epoch": 0.3205364147728473, "grad_norm": 0.4629068672657013, "learning_rate": 0.00013592066442246457, "loss": 1.5641, "step": 24667 }, { "epoch": 0.3205494093167631, "grad_norm": 0.4522596299648285, "learning_rate": 0.00013591806496055317, "loss": 1.4391, "step": 24668 }, { "epoch": 0.320562403860679, "grad_norm": 0.4398055374622345, "learning_rate": 0.0001359154654986418, "loss": 1.517, "step": 24669 }, { "epoch": 0.32057539840459487, "grad_norm": 0.38920411467552185, "learning_rate": 0.00013591286603673042, "loss": 1.4407, "step": 24670 }, { "epoch": 0.32058839294851077, "grad_norm": 0.4751942753791809, "learning_rate": 0.00013591026657481901, "loss": 1.5235, "step": 24671 }, { "epoch": 0.3206013874924266, "grad_norm": 0.3321654200553894, "learning_rate": 0.00013590766711290764, "loss": 1.2633, "step": 24672 }, { "epoch": 0.3206143820363425, "grad_norm": 0.44496938586235046, "learning_rate": 0.00013590506765099624, "loss": 1.5561, "step": 24673 }, { "epoch": 0.32062737658025836, "grad_norm": 0.42505893111228943, "learning_rate": 0.0001359024681890849, "loss": 1.3232, "step": 24674 }, { "epoch": 0.32064037112417426, "grad_norm": 0.46920567750930786, "learning_rate": 0.00013589986872717348, "loss": 1.4445, "step": 24675 }, { "epoch": 0.3206533656680901, "grad_norm": 0.39228391647338867, "learning_rate": 0.00013589726926526208, "loss": 1.3508, "step": 24676 }, { "epoch": 0.320666360212006, "grad_norm": 0.42103394865989685, "learning_rate": 0.0001358946698033507, "loss": 1.4086, "step": 24677 }, { "epoch": 0.32067935475592185, "grad_norm": 0.3482295870780945, "learning_rate": 0.00013589207034143933, "loss": 1.2377, "step": 24678 }, { "epoch": 0.32069234929983775, "grad_norm": 0.33322474360466003, "learning_rate": 0.00013588947087952796, "loss": 1.3471, "step": 24679 }, { "epoch": 0.3207053438437536, "grad_norm": 0.3872907757759094, "learning_rate": 0.00013588687141761655, "loss": 1.3884, "step": 24680 }, { "epoch": 0.3207183383876695, "grad_norm": 0.35781705379486084, "learning_rate": 0.00013588427195570518, "loss": 1.5207, "step": 24681 }, { "epoch": 0.32073133293158534, "grad_norm": 0.34672942757606506, "learning_rate": 0.0001358816724937938, "loss": 1.3324, "step": 24682 }, { "epoch": 0.32074432747550125, "grad_norm": 0.4029582738876343, "learning_rate": 0.0001358790730318824, "loss": 1.3019, "step": 24683 }, { "epoch": 0.3207573220194171, "grad_norm": 0.39951464533805847, "learning_rate": 0.00013587647356997102, "loss": 1.6366, "step": 24684 }, { "epoch": 0.320770316563333, "grad_norm": 0.37376925349235535, "learning_rate": 0.00013587387410805962, "loss": 1.2487, "step": 24685 }, { "epoch": 0.32078331110724884, "grad_norm": 0.3113340139389038, "learning_rate": 0.00013587127464614827, "loss": 1.2078, "step": 24686 }, { "epoch": 0.32079630565116474, "grad_norm": 0.3956928253173828, "learning_rate": 0.00013586867518423687, "loss": 1.4368, "step": 24687 }, { "epoch": 0.3208093001950806, "grad_norm": 0.4499419033527374, "learning_rate": 0.0001358660757223255, "loss": 1.3592, "step": 24688 }, { "epoch": 0.3208222947389965, "grad_norm": 0.3688843250274658, "learning_rate": 0.0001358634762604141, "loss": 1.1919, "step": 24689 }, { "epoch": 0.32083528928291233, "grad_norm": 0.35973894596099854, "learning_rate": 0.00013586087679850272, "loss": 1.2937, "step": 24690 }, { "epoch": 0.32084828382682823, "grad_norm": 0.43575525283813477, "learning_rate": 0.00013585827733659134, "loss": 1.4523, "step": 24691 }, { "epoch": 0.3208612783707441, "grad_norm": 0.4094131290912628, "learning_rate": 0.00013585567787467994, "loss": 1.4535, "step": 24692 }, { "epoch": 0.32087427291466, "grad_norm": 0.3185214698314667, "learning_rate": 0.00013585307841276856, "loss": 1.3424, "step": 24693 }, { "epoch": 0.3208872674585758, "grad_norm": 0.3318707346916199, "learning_rate": 0.0001358504789508572, "loss": 1.2323, "step": 24694 }, { "epoch": 0.3209002620024917, "grad_norm": 0.34532883763313293, "learning_rate": 0.00013584787948894578, "loss": 1.4138, "step": 24695 }, { "epoch": 0.32091325654640757, "grad_norm": 0.3023165762424469, "learning_rate": 0.0001358452800270344, "loss": 1.1863, "step": 24696 }, { "epoch": 0.32092625109032347, "grad_norm": 0.3690437376499176, "learning_rate": 0.000135842680565123, "loss": 1.3486, "step": 24697 }, { "epoch": 0.3209392456342393, "grad_norm": 0.5054942965507507, "learning_rate": 0.00013584008110321166, "loss": 1.5228, "step": 24698 }, { "epoch": 0.3209522401781552, "grad_norm": 0.49830055236816406, "learning_rate": 0.00013583748164130026, "loss": 1.4806, "step": 24699 }, { "epoch": 0.32096523472207106, "grad_norm": 0.43226420879364014, "learning_rate": 0.00013583488217938888, "loss": 1.3502, "step": 24700 }, { "epoch": 0.32097822926598696, "grad_norm": 0.35177141427993774, "learning_rate": 0.0001358322827174775, "loss": 1.4824, "step": 24701 }, { "epoch": 0.3209912238099028, "grad_norm": 0.4701346457004547, "learning_rate": 0.0001358296832555661, "loss": 1.3295, "step": 24702 }, { "epoch": 0.3210042183538187, "grad_norm": 0.5246003866195679, "learning_rate": 0.00013582708379365473, "loss": 1.4603, "step": 24703 }, { "epoch": 0.32101721289773455, "grad_norm": 0.34467628598213196, "learning_rate": 0.00013582448433174332, "loss": 1.2419, "step": 24704 }, { "epoch": 0.32103020744165045, "grad_norm": 0.4125974178314209, "learning_rate": 0.00013582188486983195, "loss": 1.5919, "step": 24705 }, { "epoch": 0.3210432019855663, "grad_norm": 0.407257616519928, "learning_rate": 0.00013581928540792057, "loss": 1.4451, "step": 24706 }, { "epoch": 0.3210561965294822, "grad_norm": 0.4813106060028076, "learning_rate": 0.00013581668594600917, "loss": 1.5407, "step": 24707 }, { "epoch": 0.32106919107339804, "grad_norm": 0.4208795428276062, "learning_rate": 0.0001358140864840978, "loss": 1.3404, "step": 24708 }, { "epoch": 0.32108218561731394, "grad_norm": 0.38644152879714966, "learning_rate": 0.00013581148702218642, "loss": 1.3186, "step": 24709 }, { "epoch": 0.3210951801612298, "grad_norm": 0.4278043508529663, "learning_rate": 0.00013580888756027504, "loss": 1.3186, "step": 24710 }, { "epoch": 0.3211081747051457, "grad_norm": 0.43379321694374084, "learning_rate": 0.00013580628809836364, "loss": 1.2023, "step": 24711 }, { "epoch": 0.32112116924906153, "grad_norm": 0.2837704122066498, "learning_rate": 0.00013580368863645227, "loss": 1.2742, "step": 24712 }, { "epoch": 0.32113416379297743, "grad_norm": 0.3324788510799408, "learning_rate": 0.0001358010891745409, "loss": 1.2826, "step": 24713 }, { "epoch": 0.3211471583368933, "grad_norm": 0.40308767557144165, "learning_rate": 0.0001357984897126295, "loss": 1.4953, "step": 24714 }, { "epoch": 0.3211601528808092, "grad_norm": 0.3645550012588501, "learning_rate": 0.0001357958902507181, "loss": 1.3198, "step": 24715 }, { "epoch": 0.321173147424725, "grad_norm": 0.37720733880996704, "learning_rate": 0.0001357932907888067, "loss": 1.5122, "step": 24716 }, { "epoch": 0.3211861419686409, "grad_norm": 0.476252019405365, "learning_rate": 0.00013579069132689536, "loss": 1.4569, "step": 24717 }, { "epoch": 0.32119913651255677, "grad_norm": 0.31597527861595154, "learning_rate": 0.00013578809186498396, "loss": 1.4193, "step": 24718 }, { "epoch": 0.32121213105647267, "grad_norm": 0.4516609013080597, "learning_rate": 0.00013578549240307256, "loss": 1.561, "step": 24719 }, { "epoch": 0.3212251256003885, "grad_norm": 0.33386701345443726, "learning_rate": 0.00013578289294116118, "loss": 1.27, "step": 24720 }, { "epoch": 0.3212381201443044, "grad_norm": 0.31866100430488586, "learning_rate": 0.0001357802934792498, "loss": 1.2678, "step": 24721 }, { "epoch": 0.32125111468822026, "grad_norm": 0.46269989013671875, "learning_rate": 0.00013577769401733843, "loss": 1.4505, "step": 24722 }, { "epoch": 0.32126410923213616, "grad_norm": 0.4592706561088562, "learning_rate": 0.00013577509455542703, "loss": 1.559, "step": 24723 }, { "epoch": 0.321277103776052, "grad_norm": 0.41620030999183655, "learning_rate": 0.00013577249509351565, "loss": 1.4544, "step": 24724 }, { "epoch": 0.3212900983199679, "grad_norm": 0.38035884499549866, "learning_rate": 0.00013576989563160427, "loss": 1.2912, "step": 24725 }, { "epoch": 0.32130309286388375, "grad_norm": 0.45668527483940125, "learning_rate": 0.00013576729616969287, "loss": 1.4422, "step": 24726 }, { "epoch": 0.32131608740779966, "grad_norm": 0.3690143823623657, "learning_rate": 0.0001357646967077815, "loss": 1.3352, "step": 24727 }, { "epoch": 0.3213290819517155, "grad_norm": 0.3793088495731354, "learning_rate": 0.0001357620972458701, "loss": 1.3989, "step": 24728 }, { "epoch": 0.3213420764956314, "grad_norm": 0.416709303855896, "learning_rate": 0.00013575949778395875, "loss": 1.3578, "step": 24729 }, { "epoch": 0.32135507103954725, "grad_norm": 0.37327855825424194, "learning_rate": 0.00013575689832204734, "loss": 1.426, "step": 24730 }, { "epoch": 0.32136806558346315, "grad_norm": 0.3240900933742523, "learning_rate": 0.00013575429886013594, "loss": 1.4102, "step": 24731 }, { "epoch": 0.321381060127379, "grad_norm": 0.36944425106048584, "learning_rate": 0.00013575169939822457, "loss": 1.2829, "step": 24732 }, { "epoch": 0.3213940546712949, "grad_norm": 0.47413942217826843, "learning_rate": 0.0001357490999363132, "loss": 1.3273, "step": 24733 }, { "epoch": 0.32140704921521074, "grad_norm": 0.3661559522151947, "learning_rate": 0.00013574650047440181, "loss": 1.2789, "step": 24734 }, { "epoch": 0.32142004375912664, "grad_norm": 0.47811728715896606, "learning_rate": 0.0001357439010124904, "loss": 1.4873, "step": 24735 }, { "epoch": 0.3214330383030425, "grad_norm": 0.42382270097732544, "learning_rate": 0.00013574130155057904, "loss": 1.4457, "step": 24736 }, { "epoch": 0.3214460328469584, "grad_norm": 0.43184298276901245, "learning_rate": 0.00013573870208866766, "loss": 1.3813, "step": 24737 }, { "epoch": 0.32145902739087423, "grad_norm": 0.36754274368286133, "learning_rate": 0.00013573610262675626, "loss": 1.1517, "step": 24738 }, { "epoch": 0.32147202193479013, "grad_norm": 0.3488259017467499, "learning_rate": 0.00013573350316484488, "loss": 1.2776, "step": 24739 }, { "epoch": 0.321485016478706, "grad_norm": 0.3927743434906006, "learning_rate": 0.0001357309037029335, "loss": 1.4579, "step": 24740 }, { "epoch": 0.3214980110226219, "grad_norm": 0.3752271831035614, "learning_rate": 0.00013572830424102213, "loss": 1.3483, "step": 24741 }, { "epoch": 0.3215110055665378, "grad_norm": 0.3438195586204529, "learning_rate": 0.00013572570477911073, "loss": 1.4341, "step": 24742 }, { "epoch": 0.3215240001104536, "grad_norm": 0.3992052674293518, "learning_rate": 0.00013572310531719933, "loss": 1.4993, "step": 24743 }, { "epoch": 0.3215369946543695, "grad_norm": 0.48372069001197815, "learning_rate": 0.00013572050585528798, "loss": 1.4975, "step": 24744 }, { "epoch": 0.32154998919828537, "grad_norm": 0.43863922357559204, "learning_rate": 0.00013571790639337657, "loss": 1.4671, "step": 24745 }, { "epoch": 0.32156298374220127, "grad_norm": 0.3579051196575165, "learning_rate": 0.0001357153069314652, "loss": 1.2875, "step": 24746 }, { "epoch": 0.3215759782861171, "grad_norm": 0.45592498779296875, "learning_rate": 0.0001357127074695538, "loss": 1.4144, "step": 24747 }, { "epoch": 0.321588972830033, "grad_norm": 0.43024441599845886, "learning_rate": 0.00013571010800764242, "loss": 1.5898, "step": 24748 }, { "epoch": 0.32160196737394886, "grad_norm": 0.4178403913974762, "learning_rate": 0.00013570750854573105, "loss": 1.413, "step": 24749 }, { "epoch": 0.32161496191786476, "grad_norm": 0.4219180643558502, "learning_rate": 0.00013570490908381964, "loss": 1.3591, "step": 24750 }, { "epoch": 0.3216279564617806, "grad_norm": 0.42791593074798584, "learning_rate": 0.00013570230962190827, "loss": 1.3157, "step": 24751 }, { "epoch": 0.3216409510056965, "grad_norm": 0.39800095558166504, "learning_rate": 0.0001356997101599969, "loss": 1.3912, "step": 24752 }, { "epoch": 0.32165394554961235, "grad_norm": 0.4012994170188904, "learning_rate": 0.00013569711069808552, "loss": 1.4016, "step": 24753 }, { "epoch": 0.32166694009352825, "grad_norm": 0.37264829874038696, "learning_rate": 0.00013569451123617411, "loss": 1.3934, "step": 24754 }, { "epoch": 0.3216799346374441, "grad_norm": 0.3317003846168518, "learning_rate": 0.00013569191177426274, "loss": 1.4107, "step": 24755 }, { "epoch": 0.32169292918136, "grad_norm": 0.4236525297164917, "learning_rate": 0.00013568931231235136, "loss": 1.5237, "step": 24756 }, { "epoch": 0.32170592372527584, "grad_norm": 0.3545015752315521, "learning_rate": 0.00013568671285043996, "loss": 1.326, "step": 24757 }, { "epoch": 0.32171891826919174, "grad_norm": 0.4817945063114166, "learning_rate": 0.00013568411338852858, "loss": 1.466, "step": 24758 }, { "epoch": 0.3217319128131076, "grad_norm": 0.4174249768257141, "learning_rate": 0.00013568151392661718, "loss": 1.3172, "step": 24759 }, { "epoch": 0.3217449073570235, "grad_norm": 0.3138663172721863, "learning_rate": 0.0001356789144647058, "loss": 1.5928, "step": 24760 }, { "epoch": 0.32175790190093934, "grad_norm": 0.4899853765964508, "learning_rate": 0.00013567631500279443, "loss": 1.4632, "step": 24761 }, { "epoch": 0.32177089644485524, "grad_norm": 0.40293407440185547, "learning_rate": 0.00013567371554088303, "loss": 1.3502, "step": 24762 }, { "epoch": 0.3217838909887711, "grad_norm": 0.3393685519695282, "learning_rate": 0.00013567111607897165, "loss": 1.4755, "step": 24763 }, { "epoch": 0.321796885532687, "grad_norm": 0.38959047198295593, "learning_rate": 0.00013566851661706028, "loss": 1.3434, "step": 24764 }, { "epoch": 0.32180988007660283, "grad_norm": 0.3684130609035492, "learning_rate": 0.0001356659171551489, "loss": 1.4785, "step": 24765 }, { "epoch": 0.32182287462051873, "grad_norm": 0.35236477851867676, "learning_rate": 0.0001356633176932375, "loss": 1.4781, "step": 24766 }, { "epoch": 0.3218358691644346, "grad_norm": 0.3297717869281769, "learning_rate": 0.00013566071823132612, "loss": 1.2998, "step": 24767 }, { "epoch": 0.3218488637083505, "grad_norm": 0.4193660616874695, "learning_rate": 0.00013565811876941475, "loss": 1.402, "step": 24768 }, { "epoch": 0.3218618582522663, "grad_norm": 0.4387204349040985, "learning_rate": 0.00013565551930750335, "loss": 1.4731, "step": 24769 }, { "epoch": 0.3218748527961822, "grad_norm": 0.39443477988243103, "learning_rate": 0.00013565291984559197, "loss": 1.3148, "step": 24770 }, { "epoch": 0.32188784734009807, "grad_norm": 0.37812450528144836, "learning_rate": 0.00013565032038368057, "loss": 1.4579, "step": 24771 }, { "epoch": 0.32190084188401397, "grad_norm": 0.4122190475463867, "learning_rate": 0.00013564772092176922, "loss": 1.2654, "step": 24772 }, { "epoch": 0.3219138364279298, "grad_norm": 0.3226117491722107, "learning_rate": 0.00013564512145985782, "loss": 1.3342, "step": 24773 }, { "epoch": 0.3219268309718457, "grad_norm": 0.4152553975582123, "learning_rate": 0.00013564252199794641, "loss": 1.3738, "step": 24774 }, { "epoch": 0.32193982551576156, "grad_norm": 0.43611374497413635, "learning_rate": 0.00013563992253603507, "loss": 1.3646, "step": 24775 }, { "epoch": 0.32195282005967746, "grad_norm": 0.49749991297721863, "learning_rate": 0.00013563732307412366, "loss": 1.528, "step": 24776 }, { "epoch": 0.3219658146035933, "grad_norm": 0.38084685802459717, "learning_rate": 0.0001356347236122123, "loss": 1.3476, "step": 24777 }, { "epoch": 0.3219788091475092, "grad_norm": 0.4132595956325531, "learning_rate": 0.00013563212415030088, "loss": 1.4489, "step": 24778 }, { "epoch": 0.32199180369142505, "grad_norm": 0.668992817401886, "learning_rate": 0.0001356295246883895, "loss": 1.4169, "step": 24779 }, { "epoch": 0.32200479823534095, "grad_norm": 0.4263128340244293, "learning_rate": 0.00013562692522647813, "loss": 1.2513, "step": 24780 }, { "epoch": 0.3220177927792568, "grad_norm": 0.4289945960044861, "learning_rate": 0.00013562432576456673, "loss": 1.4288, "step": 24781 }, { "epoch": 0.3220307873231727, "grad_norm": 0.476245254278183, "learning_rate": 0.00013562172630265536, "loss": 1.3965, "step": 24782 }, { "epoch": 0.32204378186708854, "grad_norm": 0.4400055408477783, "learning_rate": 0.00013561912684074398, "loss": 1.4549, "step": 24783 }, { "epoch": 0.32205677641100444, "grad_norm": 0.374044269323349, "learning_rate": 0.0001356165273788326, "loss": 1.4691, "step": 24784 }, { "epoch": 0.3220697709549203, "grad_norm": 0.3557385802268982, "learning_rate": 0.0001356139279169212, "loss": 1.2804, "step": 24785 }, { "epoch": 0.3220827654988362, "grad_norm": 0.333656370639801, "learning_rate": 0.0001356113284550098, "loss": 1.4894, "step": 24786 }, { "epoch": 0.32209576004275203, "grad_norm": 0.440645694732666, "learning_rate": 0.00013560872899309845, "loss": 1.4679, "step": 24787 }, { "epoch": 0.32210875458666793, "grad_norm": 0.34813442826271057, "learning_rate": 0.00013560612953118705, "loss": 1.4026, "step": 24788 }, { "epoch": 0.3221217491305838, "grad_norm": 0.40143880248069763, "learning_rate": 0.00013560353006927567, "loss": 1.6164, "step": 24789 }, { "epoch": 0.3221347436744997, "grad_norm": 0.4037768840789795, "learning_rate": 0.00013560093060736427, "loss": 1.4597, "step": 24790 }, { "epoch": 0.3221477382184155, "grad_norm": 0.42063459753990173, "learning_rate": 0.0001355983311454529, "loss": 1.388, "step": 24791 }, { "epoch": 0.3221607327623314, "grad_norm": 0.450285941362381, "learning_rate": 0.00013559573168354152, "loss": 1.6518, "step": 24792 }, { "epoch": 0.32217372730624727, "grad_norm": 0.3764720857143402, "learning_rate": 0.00013559313222163012, "loss": 1.5588, "step": 24793 }, { "epoch": 0.32218672185016317, "grad_norm": 0.4656607508659363, "learning_rate": 0.00013559053275971874, "loss": 1.3533, "step": 24794 }, { "epoch": 0.322199716394079, "grad_norm": 0.3740445375442505, "learning_rate": 0.00013558793329780737, "loss": 1.2915, "step": 24795 }, { "epoch": 0.3222127109379949, "grad_norm": 0.37352094054222107, "learning_rate": 0.000135585333835896, "loss": 1.5986, "step": 24796 }, { "epoch": 0.32222570548191076, "grad_norm": 0.4056890308856964, "learning_rate": 0.0001355827343739846, "loss": 1.3736, "step": 24797 }, { "epoch": 0.32223870002582666, "grad_norm": 0.5165325403213501, "learning_rate": 0.00013558013491207318, "loss": 1.5007, "step": 24798 }, { "epoch": 0.3222516945697425, "grad_norm": 0.3987845778465271, "learning_rate": 0.00013557753545016184, "loss": 1.4611, "step": 24799 }, { "epoch": 0.3222646891136584, "grad_norm": 0.3437821567058563, "learning_rate": 0.00013557493598825043, "loss": 1.2405, "step": 24800 }, { "epoch": 0.32227768365757425, "grad_norm": 0.4448677599430084, "learning_rate": 0.00013557233652633906, "loss": 1.4778, "step": 24801 }, { "epoch": 0.32229067820149015, "grad_norm": 0.3452218174934387, "learning_rate": 0.00013556973706442766, "loss": 1.3146, "step": 24802 }, { "epoch": 0.322303672745406, "grad_norm": 0.3361167907714844, "learning_rate": 0.00013556713760251628, "loss": 1.6413, "step": 24803 }, { "epoch": 0.3223166672893219, "grad_norm": 0.42919233441352844, "learning_rate": 0.0001355645381406049, "loss": 1.3712, "step": 24804 }, { "epoch": 0.32232966183323775, "grad_norm": 0.4290870726108551, "learning_rate": 0.0001355619386786935, "loss": 1.5637, "step": 24805 }, { "epoch": 0.32234265637715365, "grad_norm": 0.37546175718307495, "learning_rate": 0.00013555933921678213, "loss": 1.1305, "step": 24806 }, { "epoch": 0.3223556509210695, "grad_norm": 0.45757678151130676, "learning_rate": 0.00013555673975487075, "loss": 1.5652, "step": 24807 }, { "epoch": 0.3223686454649854, "grad_norm": 0.4538206458091736, "learning_rate": 0.00013555414029295938, "loss": 1.4418, "step": 24808 }, { "epoch": 0.32238164000890124, "grad_norm": 0.4278055429458618, "learning_rate": 0.00013555154083104797, "loss": 1.3544, "step": 24809 }, { "epoch": 0.32239463455281714, "grad_norm": 0.34231773018836975, "learning_rate": 0.0001355489413691366, "loss": 1.5204, "step": 24810 }, { "epoch": 0.322407629096733, "grad_norm": 0.4222254455089569, "learning_rate": 0.00013554634190722522, "loss": 1.2565, "step": 24811 }, { "epoch": 0.3224206236406489, "grad_norm": 0.46273741126060486, "learning_rate": 0.00013554374244531382, "loss": 1.3795, "step": 24812 }, { "epoch": 0.32243361818456473, "grad_norm": 0.4589740037918091, "learning_rate": 0.00013554114298340244, "loss": 1.3784, "step": 24813 }, { "epoch": 0.32244661272848063, "grad_norm": 0.5123797059059143, "learning_rate": 0.00013553854352149107, "loss": 1.4594, "step": 24814 }, { "epoch": 0.3224596072723965, "grad_norm": 0.2949802875518799, "learning_rate": 0.00013553594405957967, "loss": 1.4215, "step": 24815 }, { "epoch": 0.3224726018163124, "grad_norm": 0.370859831571579, "learning_rate": 0.0001355333445976683, "loss": 1.2735, "step": 24816 }, { "epoch": 0.3224855963602282, "grad_norm": 0.38516542315483093, "learning_rate": 0.0001355307451357569, "loss": 1.5608, "step": 24817 }, { "epoch": 0.3224985909041441, "grad_norm": 0.3918180465698242, "learning_rate": 0.00013552814567384554, "loss": 1.3215, "step": 24818 }, { "epoch": 0.32251158544806, "grad_norm": 0.36429646611213684, "learning_rate": 0.00013552554621193414, "loss": 1.2987, "step": 24819 }, { "epoch": 0.32252457999197587, "grad_norm": 0.3534800112247467, "learning_rate": 0.00013552294675002276, "loss": 1.4025, "step": 24820 }, { "epoch": 0.32253757453589177, "grad_norm": 0.4373858869075775, "learning_rate": 0.00013552034728811136, "loss": 1.3019, "step": 24821 }, { "epoch": 0.3225505690798076, "grad_norm": 0.4387717545032501, "learning_rate": 0.00013551774782619998, "loss": 1.4948, "step": 24822 }, { "epoch": 0.3225635636237235, "grad_norm": 0.4137800335884094, "learning_rate": 0.0001355151483642886, "loss": 1.5045, "step": 24823 }, { "epoch": 0.32257655816763936, "grad_norm": 0.3808870315551758, "learning_rate": 0.0001355125489023772, "loss": 1.3537, "step": 24824 }, { "epoch": 0.32258955271155526, "grad_norm": 0.5064056515693665, "learning_rate": 0.00013550994944046583, "loss": 1.3483, "step": 24825 }, { "epoch": 0.3226025472554711, "grad_norm": 0.26330268383026123, "learning_rate": 0.00013550734997855445, "loss": 1.2074, "step": 24826 }, { "epoch": 0.322615541799387, "grad_norm": 0.39733344316482544, "learning_rate": 0.00013550475051664305, "loss": 1.4038, "step": 24827 }, { "epoch": 0.32262853634330285, "grad_norm": 0.37604841589927673, "learning_rate": 0.00013550215105473168, "loss": 1.5098, "step": 24828 }, { "epoch": 0.32264153088721875, "grad_norm": 0.41053780913352966, "learning_rate": 0.00013549955159282027, "loss": 1.4856, "step": 24829 }, { "epoch": 0.3226545254311346, "grad_norm": 0.33901020884513855, "learning_rate": 0.00013549695213090892, "loss": 1.5203, "step": 24830 }, { "epoch": 0.3226675199750505, "grad_norm": 0.33161652088165283, "learning_rate": 0.00013549435266899752, "loss": 1.476, "step": 24831 }, { "epoch": 0.32268051451896634, "grad_norm": 0.43718859553337097, "learning_rate": 0.00013549175320708615, "loss": 1.4997, "step": 24832 }, { "epoch": 0.32269350906288224, "grad_norm": 0.39829009771347046, "learning_rate": 0.00013548915374517474, "loss": 1.5696, "step": 24833 }, { "epoch": 0.3227065036067981, "grad_norm": 0.4563552141189575, "learning_rate": 0.00013548655428326337, "loss": 1.5666, "step": 24834 }, { "epoch": 0.322719498150714, "grad_norm": 0.3627989590167999, "learning_rate": 0.000135483954821352, "loss": 1.322, "step": 24835 }, { "epoch": 0.32273249269462984, "grad_norm": 0.47464463114738464, "learning_rate": 0.0001354813553594406, "loss": 1.3886, "step": 24836 }, { "epoch": 0.32274548723854574, "grad_norm": 0.4163193106651306, "learning_rate": 0.00013547875589752921, "loss": 1.4001, "step": 24837 }, { "epoch": 0.3227584817824616, "grad_norm": 0.3628351390361786, "learning_rate": 0.00013547615643561784, "loss": 1.4763, "step": 24838 }, { "epoch": 0.3227714763263775, "grad_norm": 0.4316408932209015, "learning_rate": 0.00013547355697370646, "loss": 1.4874, "step": 24839 }, { "epoch": 0.3227844708702933, "grad_norm": 0.36311185359954834, "learning_rate": 0.00013547095751179506, "loss": 1.3914, "step": 24840 }, { "epoch": 0.32279746541420923, "grad_norm": 0.4402531385421753, "learning_rate": 0.00013546835804988366, "loss": 1.2641, "step": 24841 }, { "epoch": 0.3228104599581251, "grad_norm": 0.48618343472480774, "learning_rate": 0.0001354657585879723, "loss": 1.4057, "step": 24842 }, { "epoch": 0.322823454502041, "grad_norm": 0.4292903244495392, "learning_rate": 0.0001354631591260609, "loss": 1.2999, "step": 24843 }, { "epoch": 0.3228364490459568, "grad_norm": 0.4178447127342224, "learning_rate": 0.00013546055966414953, "loss": 1.5188, "step": 24844 }, { "epoch": 0.3228494435898727, "grad_norm": 0.37929201126098633, "learning_rate": 0.00013545796020223813, "loss": 1.4396, "step": 24845 }, { "epoch": 0.32286243813378857, "grad_norm": 0.4620269536972046, "learning_rate": 0.00013545536074032675, "loss": 1.3595, "step": 24846 }, { "epoch": 0.32287543267770447, "grad_norm": 0.39598965644836426, "learning_rate": 0.00013545276127841538, "loss": 1.2685, "step": 24847 }, { "epoch": 0.3228884272216203, "grad_norm": 0.30083295702934265, "learning_rate": 0.00013545016181650398, "loss": 1.2249, "step": 24848 }, { "epoch": 0.3229014217655362, "grad_norm": 0.404985249042511, "learning_rate": 0.00013544756235459263, "loss": 1.3546, "step": 24849 }, { "epoch": 0.32291441630945206, "grad_norm": 0.4552009403705597, "learning_rate": 0.00013544496289268122, "loss": 1.4632, "step": 24850 }, { "epoch": 0.32292741085336796, "grad_norm": 0.4433949589729309, "learning_rate": 0.00013544236343076985, "loss": 1.263, "step": 24851 }, { "epoch": 0.3229404053972838, "grad_norm": 0.3626491129398346, "learning_rate": 0.00013543976396885845, "loss": 1.3176, "step": 24852 }, { "epoch": 0.3229533999411997, "grad_norm": 0.4268733859062195, "learning_rate": 0.00013543716450694707, "loss": 1.4841, "step": 24853 }, { "epoch": 0.32296639448511555, "grad_norm": 0.39087337255477905, "learning_rate": 0.0001354345650450357, "loss": 1.2964, "step": 24854 }, { "epoch": 0.32297938902903145, "grad_norm": 0.40472298860549927, "learning_rate": 0.0001354319655831243, "loss": 1.4587, "step": 24855 }, { "epoch": 0.3229923835729473, "grad_norm": 0.538482666015625, "learning_rate": 0.00013542936612121292, "loss": 1.4917, "step": 24856 }, { "epoch": 0.3230053781168632, "grad_norm": 0.2860257923603058, "learning_rate": 0.00013542676665930154, "loss": 1.6233, "step": 24857 }, { "epoch": 0.32301837266077904, "grad_norm": 0.37848132848739624, "learning_rate": 0.00013542416719739014, "loss": 1.5776, "step": 24858 }, { "epoch": 0.32303136720469494, "grad_norm": 0.3342726528644562, "learning_rate": 0.00013542156773547876, "loss": 1.2313, "step": 24859 }, { "epoch": 0.3230443617486108, "grad_norm": 0.3620634973049164, "learning_rate": 0.00013541896827356736, "loss": 1.3532, "step": 24860 }, { "epoch": 0.3230573562925267, "grad_norm": 0.27235373854637146, "learning_rate": 0.000135416368811656, "loss": 1.4234, "step": 24861 }, { "epoch": 0.32307035083644253, "grad_norm": 0.3243304491043091, "learning_rate": 0.0001354137693497446, "loss": 1.2027, "step": 24862 }, { "epoch": 0.32308334538035843, "grad_norm": 0.28318798542022705, "learning_rate": 0.00013541116988783323, "loss": 1.2209, "step": 24863 }, { "epoch": 0.3230963399242743, "grad_norm": 0.47671380639076233, "learning_rate": 0.00013540857042592183, "loss": 1.5632, "step": 24864 }, { "epoch": 0.3231093344681902, "grad_norm": 0.4260321855545044, "learning_rate": 0.00013540597096401046, "loss": 1.4437, "step": 24865 }, { "epoch": 0.323122329012106, "grad_norm": 0.41248229146003723, "learning_rate": 0.00013540337150209908, "loss": 1.5356, "step": 24866 }, { "epoch": 0.3231353235560219, "grad_norm": 0.3432072699069977, "learning_rate": 0.00013540077204018768, "loss": 1.3537, "step": 24867 }, { "epoch": 0.32314831809993777, "grad_norm": 0.36243635416030884, "learning_rate": 0.0001353981725782763, "loss": 1.4697, "step": 24868 }, { "epoch": 0.32316131264385367, "grad_norm": 0.4090852737426758, "learning_rate": 0.00013539557311636493, "loss": 1.1962, "step": 24869 }, { "epoch": 0.3231743071877695, "grad_norm": 0.3374473452568054, "learning_rate": 0.00013539297365445352, "loss": 1.3431, "step": 24870 }, { "epoch": 0.3231873017316854, "grad_norm": 0.32808125019073486, "learning_rate": 0.00013539037419254215, "loss": 1.1792, "step": 24871 }, { "epoch": 0.32320029627560126, "grad_norm": 0.3953491747379303, "learning_rate": 0.00013538777473063075, "loss": 1.4313, "step": 24872 }, { "epoch": 0.32321329081951716, "grad_norm": 0.30446162819862366, "learning_rate": 0.0001353851752687194, "loss": 1.344, "step": 24873 }, { "epoch": 0.323226285363433, "grad_norm": 0.43865498900413513, "learning_rate": 0.000135382575806808, "loss": 1.4877, "step": 24874 }, { "epoch": 0.3232392799073489, "grad_norm": 0.4455358386039734, "learning_rate": 0.00013537997634489662, "loss": 1.43, "step": 24875 }, { "epoch": 0.32325227445126475, "grad_norm": 0.35313087701797485, "learning_rate": 0.00013537737688298522, "loss": 1.5881, "step": 24876 }, { "epoch": 0.32326526899518065, "grad_norm": 0.37257739901542664, "learning_rate": 0.00013537477742107384, "loss": 1.3303, "step": 24877 }, { "epoch": 0.3232782635390965, "grad_norm": 0.5111738443374634, "learning_rate": 0.00013537217795916247, "loss": 1.4266, "step": 24878 }, { "epoch": 0.3232912580830124, "grad_norm": 0.38831818103790283, "learning_rate": 0.00013536957849725106, "loss": 1.4175, "step": 24879 }, { "epoch": 0.32330425262692825, "grad_norm": 0.37848910689353943, "learning_rate": 0.0001353669790353397, "loss": 1.345, "step": 24880 }, { "epoch": 0.32331724717084415, "grad_norm": 0.319864958524704, "learning_rate": 0.0001353643795734283, "loss": 1.3104, "step": 24881 }, { "epoch": 0.32333024171476, "grad_norm": 0.4297686517238617, "learning_rate": 0.0001353617801115169, "loss": 1.3595, "step": 24882 }, { "epoch": 0.3233432362586759, "grad_norm": 0.4045651853084564, "learning_rate": 0.00013535918064960553, "loss": 1.4907, "step": 24883 }, { "epoch": 0.32335623080259174, "grad_norm": 0.4724644422531128, "learning_rate": 0.00013535658118769416, "loss": 1.5476, "step": 24884 }, { "epoch": 0.32336922534650764, "grad_norm": 0.3693622350692749, "learning_rate": 0.00013535398172578278, "loss": 1.2824, "step": 24885 }, { "epoch": 0.3233822198904235, "grad_norm": 0.46172967553138733, "learning_rate": 0.00013535138226387138, "loss": 1.529, "step": 24886 }, { "epoch": 0.3233952144343394, "grad_norm": 0.47828933596611023, "learning_rate": 0.00013534878280196, "loss": 1.3226, "step": 24887 }, { "epoch": 0.32340820897825523, "grad_norm": 0.33672595024108887, "learning_rate": 0.00013534618334004863, "loss": 1.4788, "step": 24888 }, { "epoch": 0.32342120352217113, "grad_norm": 0.4923304617404938, "learning_rate": 0.00013534358387813723, "loss": 1.4222, "step": 24889 }, { "epoch": 0.323434198066087, "grad_norm": 0.39976876974105835, "learning_rate": 0.00013534098441622585, "loss": 1.3924, "step": 24890 }, { "epoch": 0.3234471926100029, "grad_norm": 0.3337671458721161, "learning_rate": 0.00013533838495431445, "loss": 1.2557, "step": 24891 }, { "epoch": 0.3234601871539187, "grad_norm": 0.41363513469696045, "learning_rate": 0.0001353357854924031, "loss": 1.4137, "step": 24892 }, { "epoch": 0.3234731816978346, "grad_norm": 0.5088506937026978, "learning_rate": 0.0001353331860304917, "loss": 1.4961, "step": 24893 }, { "epoch": 0.3234861762417505, "grad_norm": 0.3360183537006378, "learning_rate": 0.00013533058656858032, "loss": 1.3568, "step": 24894 }, { "epoch": 0.32349917078566637, "grad_norm": 0.3352796733379364, "learning_rate": 0.00013532798710666892, "loss": 1.2787, "step": 24895 }, { "epoch": 0.32351216532958227, "grad_norm": 0.35485097765922546, "learning_rate": 0.00013532538764475754, "loss": 1.4271, "step": 24896 }, { "epoch": 0.3235251598734981, "grad_norm": 0.4580633044242859, "learning_rate": 0.00013532278818284617, "loss": 1.2401, "step": 24897 }, { "epoch": 0.323538154417414, "grad_norm": 0.517603874206543, "learning_rate": 0.00013532018872093477, "loss": 1.4979, "step": 24898 }, { "epoch": 0.32355114896132986, "grad_norm": 0.2718959450721741, "learning_rate": 0.0001353175892590234, "loss": 1.3858, "step": 24899 }, { "epoch": 0.32356414350524576, "grad_norm": 0.39144110679626465, "learning_rate": 0.00013531498979711201, "loss": 1.3775, "step": 24900 }, { "epoch": 0.3235771380491616, "grad_norm": 0.429180771112442, "learning_rate": 0.0001353123903352006, "loss": 1.499, "step": 24901 }, { "epoch": 0.3235901325930775, "grad_norm": 0.4488694667816162, "learning_rate": 0.00013530979087328924, "loss": 1.5914, "step": 24902 }, { "epoch": 0.32360312713699335, "grad_norm": 0.3418270945549011, "learning_rate": 0.00013530719141137783, "loss": 1.1116, "step": 24903 }, { "epoch": 0.32361612168090925, "grad_norm": 0.4319004714488983, "learning_rate": 0.00013530459194946649, "loss": 1.2198, "step": 24904 }, { "epoch": 0.3236291162248251, "grad_norm": 0.3784480690956116, "learning_rate": 0.00013530199248755508, "loss": 1.2532, "step": 24905 }, { "epoch": 0.323642110768741, "grad_norm": 0.36385276913642883, "learning_rate": 0.0001352993930256437, "loss": 1.2948, "step": 24906 }, { "epoch": 0.32365510531265684, "grad_norm": 0.45260900259017944, "learning_rate": 0.0001352967935637323, "loss": 1.3254, "step": 24907 }, { "epoch": 0.32366809985657274, "grad_norm": 0.4192999601364136, "learning_rate": 0.00013529419410182093, "loss": 1.3874, "step": 24908 }, { "epoch": 0.3236810944004886, "grad_norm": 0.4241378903388977, "learning_rate": 0.00013529159463990955, "loss": 1.3429, "step": 24909 }, { "epoch": 0.3236940889444045, "grad_norm": 0.4041059911251068, "learning_rate": 0.00013528899517799815, "loss": 1.3765, "step": 24910 }, { "epoch": 0.32370708348832034, "grad_norm": 0.38063371181488037, "learning_rate": 0.00013528639571608678, "loss": 1.3264, "step": 24911 }, { "epoch": 0.32372007803223624, "grad_norm": 0.43497347831726074, "learning_rate": 0.0001352837962541754, "loss": 1.4182, "step": 24912 }, { "epoch": 0.3237330725761521, "grad_norm": 0.38060855865478516, "learning_rate": 0.000135281196792264, "loss": 1.5217, "step": 24913 }, { "epoch": 0.323746067120068, "grad_norm": 0.4803584814071655, "learning_rate": 0.00013527859733035262, "loss": 1.4066, "step": 24914 }, { "epoch": 0.3237590616639838, "grad_norm": 0.5258299112319946, "learning_rate": 0.00013527599786844122, "loss": 1.4874, "step": 24915 }, { "epoch": 0.3237720562078997, "grad_norm": 0.386028915643692, "learning_rate": 0.00013527339840652987, "loss": 1.2463, "step": 24916 }, { "epoch": 0.3237850507518156, "grad_norm": 0.43555545806884766, "learning_rate": 0.00013527079894461847, "loss": 1.6694, "step": 24917 }, { "epoch": 0.3237980452957315, "grad_norm": 0.5101915597915649, "learning_rate": 0.0001352681994827071, "loss": 1.366, "step": 24918 }, { "epoch": 0.3238110398396473, "grad_norm": 0.3759496212005615, "learning_rate": 0.0001352656000207957, "loss": 1.4064, "step": 24919 }, { "epoch": 0.3238240343835632, "grad_norm": 0.32454878091812134, "learning_rate": 0.00013526300055888431, "loss": 1.4186, "step": 24920 }, { "epoch": 0.32383702892747906, "grad_norm": 0.39061838388442993, "learning_rate": 0.00013526040109697294, "loss": 1.3787, "step": 24921 }, { "epoch": 0.32385002347139497, "grad_norm": 0.47894737124443054, "learning_rate": 0.00013525780163506154, "loss": 1.3398, "step": 24922 }, { "epoch": 0.3238630180153108, "grad_norm": 0.31642550230026245, "learning_rate": 0.0001352552021731502, "loss": 1.3317, "step": 24923 }, { "epoch": 0.3238760125592267, "grad_norm": 0.4408561587333679, "learning_rate": 0.00013525260271123879, "loss": 1.5135, "step": 24924 }, { "epoch": 0.32388900710314256, "grad_norm": 0.38072502613067627, "learning_rate": 0.00013525000324932738, "loss": 1.4164, "step": 24925 }, { "epoch": 0.32390200164705846, "grad_norm": 0.5071101784706116, "learning_rate": 0.000135247403787416, "loss": 1.441, "step": 24926 }, { "epoch": 0.3239149961909743, "grad_norm": 0.4267844259738922, "learning_rate": 0.00013524480432550463, "loss": 1.557, "step": 24927 }, { "epoch": 0.3239279907348902, "grad_norm": 0.3761705160140991, "learning_rate": 0.00013524220486359326, "loss": 1.5055, "step": 24928 }, { "epoch": 0.32394098527880605, "grad_norm": 0.4288162291049957, "learning_rate": 0.00013523960540168185, "loss": 1.4598, "step": 24929 }, { "epoch": 0.32395397982272195, "grad_norm": 0.4203217029571533, "learning_rate": 0.00013523700593977048, "loss": 1.3949, "step": 24930 }, { "epoch": 0.3239669743666378, "grad_norm": 0.4510311186313629, "learning_rate": 0.0001352344064778591, "loss": 1.3672, "step": 24931 }, { "epoch": 0.3239799689105537, "grad_norm": 0.42196300625801086, "learning_rate": 0.0001352318070159477, "loss": 1.3068, "step": 24932 }, { "epoch": 0.32399296345446954, "grad_norm": 0.35303065180778503, "learning_rate": 0.00013522920755403632, "loss": 1.4527, "step": 24933 }, { "epoch": 0.32400595799838544, "grad_norm": 0.389010488986969, "learning_rate": 0.00013522660809212492, "loss": 1.398, "step": 24934 }, { "epoch": 0.3240189525423013, "grad_norm": 0.35926100611686707, "learning_rate": 0.00013522400863021357, "loss": 1.4196, "step": 24935 }, { "epoch": 0.3240319470862172, "grad_norm": 0.42396387457847595, "learning_rate": 0.00013522140916830217, "loss": 1.4498, "step": 24936 }, { "epoch": 0.32404494163013303, "grad_norm": 0.4404793083667755, "learning_rate": 0.00013521880970639077, "loss": 1.5801, "step": 24937 }, { "epoch": 0.32405793617404893, "grad_norm": 0.36292192339897156, "learning_rate": 0.0001352162102444794, "loss": 1.3481, "step": 24938 }, { "epoch": 0.3240709307179648, "grad_norm": 0.4069438576698303, "learning_rate": 0.00013521361078256802, "loss": 1.3579, "step": 24939 }, { "epoch": 0.3240839252618807, "grad_norm": 0.32507357001304626, "learning_rate": 0.00013521101132065664, "loss": 1.3694, "step": 24940 }, { "epoch": 0.3240969198057965, "grad_norm": 0.4535185992717743, "learning_rate": 0.00013520841185874524, "loss": 1.5158, "step": 24941 }, { "epoch": 0.3241099143497124, "grad_norm": 0.3327348232269287, "learning_rate": 0.00013520581239683386, "loss": 1.4626, "step": 24942 }, { "epoch": 0.32412290889362827, "grad_norm": 0.4001970589160919, "learning_rate": 0.0001352032129349225, "loss": 1.3684, "step": 24943 }, { "epoch": 0.32413590343754417, "grad_norm": 0.36679449677467346, "learning_rate": 0.00013520061347301109, "loss": 1.2966, "step": 24944 }, { "epoch": 0.32414889798146, "grad_norm": 0.3613438904285431, "learning_rate": 0.0001351980140110997, "loss": 1.63, "step": 24945 }, { "epoch": 0.3241618925253759, "grad_norm": 0.39503341913223267, "learning_rate": 0.0001351954145491883, "loss": 1.3538, "step": 24946 }, { "epoch": 0.32417488706929176, "grad_norm": 0.39064785838127136, "learning_rate": 0.00013519281508727696, "loss": 1.5296, "step": 24947 }, { "epoch": 0.32418788161320766, "grad_norm": 0.4340202510356903, "learning_rate": 0.00013519021562536556, "loss": 1.3433, "step": 24948 }, { "epoch": 0.3242008761571235, "grad_norm": 0.33337584137916565, "learning_rate": 0.00013518761616345415, "loss": 1.4876, "step": 24949 }, { "epoch": 0.3242138707010394, "grad_norm": 0.4356640577316284, "learning_rate": 0.00013518501670154278, "loss": 1.3719, "step": 24950 }, { "epoch": 0.32422686524495525, "grad_norm": 0.4716472923755646, "learning_rate": 0.0001351824172396314, "loss": 1.4316, "step": 24951 }, { "epoch": 0.32423985978887115, "grad_norm": 0.49827995896339417, "learning_rate": 0.00013517981777772003, "loss": 1.3297, "step": 24952 }, { "epoch": 0.324252854332787, "grad_norm": 0.4101789891719818, "learning_rate": 0.00013517721831580862, "loss": 1.3277, "step": 24953 }, { "epoch": 0.3242658488767029, "grad_norm": 0.492003858089447, "learning_rate": 0.00013517461885389725, "loss": 1.5181, "step": 24954 }, { "epoch": 0.32427884342061875, "grad_norm": 0.35504239797592163, "learning_rate": 0.00013517201939198587, "loss": 1.2464, "step": 24955 }, { "epoch": 0.32429183796453465, "grad_norm": 0.49131059646606445, "learning_rate": 0.00013516941993007447, "loss": 1.3402, "step": 24956 }, { "epoch": 0.3243048325084505, "grad_norm": 0.34662574529647827, "learning_rate": 0.0001351668204681631, "loss": 1.1408, "step": 24957 }, { "epoch": 0.3243178270523664, "grad_norm": 0.4073435366153717, "learning_rate": 0.00013516422100625172, "loss": 1.2208, "step": 24958 }, { "epoch": 0.32433082159628224, "grad_norm": 0.40237903594970703, "learning_rate": 0.00013516162154434034, "loss": 1.3924, "step": 24959 }, { "epoch": 0.32434381614019814, "grad_norm": 0.4088524878025055, "learning_rate": 0.00013515902208242894, "loss": 1.3625, "step": 24960 }, { "epoch": 0.324356810684114, "grad_norm": 0.3493786156177521, "learning_rate": 0.00013515642262051757, "loss": 1.2137, "step": 24961 }, { "epoch": 0.3243698052280299, "grad_norm": 0.3620012700557709, "learning_rate": 0.0001351538231586062, "loss": 1.5046, "step": 24962 }, { "epoch": 0.32438279977194573, "grad_norm": 0.43123066425323486, "learning_rate": 0.0001351512236966948, "loss": 1.3069, "step": 24963 }, { "epoch": 0.32439579431586163, "grad_norm": 0.33102044463157654, "learning_rate": 0.0001351486242347834, "loss": 1.5371, "step": 24964 }, { "epoch": 0.3244087888597775, "grad_norm": 0.2837887704372406, "learning_rate": 0.000135146024772872, "loss": 1.5202, "step": 24965 }, { "epoch": 0.3244217834036934, "grad_norm": 0.43771886825561523, "learning_rate": 0.00013514342531096063, "loss": 1.3736, "step": 24966 }, { "epoch": 0.3244347779476092, "grad_norm": 0.4169369339942932, "learning_rate": 0.00013514082584904926, "loss": 1.3102, "step": 24967 }, { "epoch": 0.3244477724915251, "grad_norm": 0.4754272997379303, "learning_rate": 0.00013513822638713786, "loss": 1.4647, "step": 24968 }, { "epoch": 0.32446076703544097, "grad_norm": 0.35552677512168884, "learning_rate": 0.00013513562692522648, "loss": 1.3328, "step": 24969 }, { "epoch": 0.32447376157935687, "grad_norm": 0.3923947513103485, "learning_rate": 0.0001351330274633151, "loss": 1.5282, "step": 24970 }, { "epoch": 0.32448675612327277, "grad_norm": 0.3813493847846985, "learning_rate": 0.00013513042800140373, "loss": 1.3916, "step": 24971 }, { "epoch": 0.3244997506671886, "grad_norm": 0.40924695134162903, "learning_rate": 0.00013512782853949233, "loss": 1.4469, "step": 24972 }, { "epoch": 0.3245127452111045, "grad_norm": 0.5052087903022766, "learning_rate": 0.00013512522907758095, "loss": 1.5353, "step": 24973 }, { "epoch": 0.32452573975502036, "grad_norm": 0.36621546745300293, "learning_rate": 0.00013512262961566958, "loss": 1.3115, "step": 24974 }, { "epoch": 0.32453873429893626, "grad_norm": 0.42935553193092346, "learning_rate": 0.00013512003015375817, "loss": 1.4772, "step": 24975 }, { "epoch": 0.3245517288428521, "grad_norm": 0.45067891478538513, "learning_rate": 0.0001351174306918468, "loss": 1.3963, "step": 24976 }, { "epoch": 0.324564723386768, "grad_norm": 0.334783136844635, "learning_rate": 0.0001351148312299354, "loss": 1.2486, "step": 24977 }, { "epoch": 0.32457771793068385, "grad_norm": 0.3122904300689697, "learning_rate": 0.00013511223176802405, "loss": 1.5302, "step": 24978 }, { "epoch": 0.32459071247459975, "grad_norm": 0.4262402057647705, "learning_rate": 0.00013510963230611264, "loss": 1.2689, "step": 24979 }, { "epoch": 0.3246037070185156, "grad_norm": 0.34411951899528503, "learning_rate": 0.00013510703284420124, "loss": 1.4389, "step": 24980 }, { "epoch": 0.3246167015624315, "grad_norm": 0.3515605330467224, "learning_rate": 0.00013510443338228987, "loss": 1.3707, "step": 24981 }, { "epoch": 0.32462969610634734, "grad_norm": 0.40537312626838684, "learning_rate": 0.0001351018339203785, "loss": 1.3657, "step": 24982 }, { "epoch": 0.32464269065026324, "grad_norm": 0.3802444040775299, "learning_rate": 0.00013509923445846712, "loss": 1.5547, "step": 24983 }, { "epoch": 0.3246556851941791, "grad_norm": 0.38722100853919983, "learning_rate": 0.0001350966349965557, "loss": 1.408, "step": 24984 }, { "epoch": 0.324668679738095, "grad_norm": 0.39462536573410034, "learning_rate": 0.00013509403553464434, "loss": 1.4789, "step": 24985 }, { "epoch": 0.32468167428201083, "grad_norm": 0.420680969953537, "learning_rate": 0.00013509143607273296, "loss": 1.3267, "step": 24986 }, { "epoch": 0.32469466882592674, "grad_norm": 0.33159300684928894, "learning_rate": 0.00013508883661082156, "loss": 1.5063, "step": 24987 }, { "epoch": 0.3247076633698426, "grad_norm": 0.3995024263858795, "learning_rate": 0.00013508623714891018, "loss": 1.6027, "step": 24988 }, { "epoch": 0.3247206579137585, "grad_norm": 0.36618131399154663, "learning_rate": 0.00013508363768699878, "loss": 1.4363, "step": 24989 }, { "epoch": 0.3247336524576743, "grad_norm": 0.38703015446662903, "learning_rate": 0.00013508103822508743, "loss": 1.5189, "step": 24990 }, { "epoch": 0.3247466470015902, "grad_norm": 0.32923591136932373, "learning_rate": 0.00013507843876317603, "loss": 1.6484, "step": 24991 }, { "epoch": 0.3247596415455061, "grad_norm": 0.4181039035320282, "learning_rate": 0.00013507583930126463, "loss": 1.4062, "step": 24992 }, { "epoch": 0.324772636089422, "grad_norm": 0.41602247953414917, "learning_rate": 0.00013507323983935325, "loss": 1.5106, "step": 24993 }, { "epoch": 0.3247856306333378, "grad_norm": 0.27113261818885803, "learning_rate": 0.00013507064037744188, "loss": 1.3445, "step": 24994 }, { "epoch": 0.3247986251772537, "grad_norm": 0.4463062584400177, "learning_rate": 0.0001350680409155305, "loss": 1.4254, "step": 24995 }, { "epoch": 0.32481161972116956, "grad_norm": 0.30592623353004456, "learning_rate": 0.0001350654414536191, "loss": 1.2206, "step": 24996 }, { "epoch": 0.32482461426508547, "grad_norm": 0.3911055028438568, "learning_rate": 0.00013506284199170772, "loss": 1.3311, "step": 24997 }, { "epoch": 0.3248376088090013, "grad_norm": 0.45315492153167725, "learning_rate": 0.00013506024252979635, "loss": 1.1246, "step": 24998 }, { "epoch": 0.3248506033529172, "grad_norm": 0.4048317074775696, "learning_rate": 0.00013505764306788494, "loss": 1.4223, "step": 24999 }, { "epoch": 0.32486359789683306, "grad_norm": 0.35791638493537903, "learning_rate": 0.00013505504360597357, "loss": 1.5504, "step": 25000 }, { "epoch": 0.32487659244074896, "grad_norm": 0.46022653579711914, "learning_rate": 0.0001350524441440622, "loss": 1.4974, "step": 25001 }, { "epoch": 0.3248895869846648, "grad_norm": 0.417204886674881, "learning_rate": 0.00013504984468215082, "loss": 1.6056, "step": 25002 }, { "epoch": 0.3249025815285807, "grad_norm": 0.38330891728401184, "learning_rate": 0.00013504724522023942, "loss": 1.4404, "step": 25003 }, { "epoch": 0.32491557607249655, "grad_norm": 0.5061829686164856, "learning_rate": 0.000135044645758328, "loss": 1.424, "step": 25004 }, { "epoch": 0.32492857061641245, "grad_norm": 0.4341750144958496, "learning_rate": 0.00013504204629641666, "loss": 1.379, "step": 25005 }, { "epoch": 0.3249415651603283, "grad_norm": 0.3365899622440338, "learning_rate": 0.00013503944683450526, "loss": 1.281, "step": 25006 }, { "epoch": 0.3249545597042442, "grad_norm": 0.5352455973625183, "learning_rate": 0.00013503684737259389, "loss": 1.567, "step": 25007 }, { "epoch": 0.32496755424816004, "grad_norm": 0.4097532331943512, "learning_rate": 0.00013503424791068248, "loss": 1.6571, "step": 25008 }, { "epoch": 0.32498054879207594, "grad_norm": 0.3723010718822479, "learning_rate": 0.0001350316484487711, "loss": 1.4807, "step": 25009 }, { "epoch": 0.3249935433359918, "grad_norm": 0.3988194167613983, "learning_rate": 0.00013502904898685973, "loss": 1.4546, "step": 25010 }, { "epoch": 0.3250065378799077, "grad_norm": 0.4118337631225586, "learning_rate": 0.00013502644952494833, "loss": 1.2544, "step": 25011 }, { "epoch": 0.32501953242382353, "grad_norm": 0.4609181582927704, "learning_rate": 0.00013502385006303695, "loss": 1.5052, "step": 25012 }, { "epoch": 0.32503252696773943, "grad_norm": 0.4377482533454895, "learning_rate": 0.00013502125060112558, "loss": 1.518, "step": 25013 }, { "epoch": 0.3250455215116553, "grad_norm": 0.3532160222530365, "learning_rate": 0.0001350186511392142, "loss": 1.3827, "step": 25014 }, { "epoch": 0.3250585160555712, "grad_norm": 0.449031263589859, "learning_rate": 0.0001350160516773028, "loss": 1.4279, "step": 25015 }, { "epoch": 0.325071510599487, "grad_norm": 0.3605179488658905, "learning_rate": 0.00013501345221539142, "loss": 1.2892, "step": 25016 }, { "epoch": 0.3250845051434029, "grad_norm": 0.4708857238292694, "learning_rate": 0.00013501085275348005, "loss": 1.665, "step": 25017 }, { "epoch": 0.32509749968731877, "grad_norm": 0.35279399156570435, "learning_rate": 0.00013500825329156865, "loss": 1.2813, "step": 25018 }, { "epoch": 0.32511049423123467, "grad_norm": 0.352827787399292, "learning_rate": 0.00013500565382965727, "loss": 1.1623, "step": 25019 }, { "epoch": 0.3251234887751505, "grad_norm": 0.34444841742515564, "learning_rate": 0.00013500305436774587, "loss": 1.4464, "step": 25020 }, { "epoch": 0.3251364833190664, "grad_norm": 0.4622649550437927, "learning_rate": 0.0001350004549058345, "loss": 1.4626, "step": 25021 }, { "epoch": 0.32514947786298226, "grad_norm": 0.32305943965911865, "learning_rate": 0.00013499785544392312, "loss": 1.301, "step": 25022 }, { "epoch": 0.32516247240689816, "grad_norm": 0.32979416847229004, "learning_rate": 0.00013499525598201171, "loss": 1.4031, "step": 25023 }, { "epoch": 0.325175466950814, "grad_norm": 0.5701051354408264, "learning_rate": 0.00013499265652010034, "loss": 1.6597, "step": 25024 }, { "epoch": 0.3251884614947299, "grad_norm": 0.3824736177921295, "learning_rate": 0.00013499005705818896, "loss": 1.3873, "step": 25025 }, { "epoch": 0.32520145603864575, "grad_norm": 0.4354667365550995, "learning_rate": 0.0001349874575962776, "loss": 1.3016, "step": 25026 }, { "epoch": 0.32521445058256165, "grad_norm": 0.3852863311767578, "learning_rate": 0.00013498485813436619, "loss": 1.5023, "step": 25027 }, { "epoch": 0.3252274451264775, "grad_norm": 0.4297827482223511, "learning_rate": 0.0001349822586724548, "loss": 1.3444, "step": 25028 }, { "epoch": 0.3252404396703934, "grad_norm": 0.4374180734157562, "learning_rate": 0.00013497965921054343, "loss": 1.5081, "step": 25029 }, { "epoch": 0.32525343421430924, "grad_norm": 0.3657471537590027, "learning_rate": 0.00013497705974863203, "loss": 1.3981, "step": 25030 }, { "epoch": 0.32526642875822515, "grad_norm": 0.2984415590763092, "learning_rate": 0.00013497446028672066, "loss": 1.3992, "step": 25031 }, { "epoch": 0.325279423302141, "grad_norm": 0.47639337182044983, "learning_rate": 0.00013497186082480928, "loss": 1.4649, "step": 25032 }, { "epoch": 0.3252924178460569, "grad_norm": 0.4582609534263611, "learning_rate": 0.00013496926136289788, "loss": 1.5148, "step": 25033 }, { "epoch": 0.32530541238997274, "grad_norm": 0.505190908908844, "learning_rate": 0.0001349666619009865, "loss": 1.4259, "step": 25034 }, { "epoch": 0.32531840693388864, "grad_norm": 0.4094711244106293, "learning_rate": 0.0001349640624390751, "loss": 1.4837, "step": 25035 }, { "epoch": 0.3253314014778045, "grad_norm": 0.3223525583744049, "learning_rate": 0.00013496146297716375, "loss": 1.4357, "step": 25036 }, { "epoch": 0.3253443960217204, "grad_norm": 0.42297056317329407, "learning_rate": 0.00013495886351525235, "loss": 1.4376, "step": 25037 }, { "epoch": 0.32535739056563623, "grad_norm": 0.32708024978637695, "learning_rate": 0.00013495626405334097, "loss": 1.114, "step": 25038 }, { "epoch": 0.32537038510955213, "grad_norm": 0.48127609491348267, "learning_rate": 0.00013495366459142957, "loss": 1.3963, "step": 25039 }, { "epoch": 0.325383379653468, "grad_norm": 0.39153796434402466, "learning_rate": 0.0001349510651295182, "loss": 1.4529, "step": 25040 }, { "epoch": 0.3253963741973839, "grad_norm": 0.3022606074810028, "learning_rate": 0.00013494846566760682, "loss": 1.5539, "step": 25041 }, { "epoch": 0.3254093687412997, "grad_norm": 0.5268205404281616, "learning_rate": 0.00013494586620569542, "loss": 1.3033, "step": 25042 }, { "epoch": 0.3254223632852156, "grad_norm": 0.35959258675575256, "learning_rate": 0.00013494326674378404, "loss": 1.4074, "step": 25043 }, { "epoch": 0.32543535782913147, "grad_norm": 0.4490048885345459, "learning_rate": 0.00013494066728187267, "loss": 1.36, "step": 25044 }, { "epoch": 0.32544835237304737, "grad_norm": 0.3637920916080475, "learning_rate": 0.0001349380678199613, "loss": 1.4068, "step": 25045 }, { "epoch": 0.32546134691696327, "grad_norm": 0.36366933584213257, "learning_rate": 0.0001349354683580499, "loss": 1.4479, "step": 25046 }, { "epoch": 0.3254743414608791, "grad_norm": 0.45708683133125305, "learning_rate": 0.00013493286889613849, "loss": 1.3328, "step": 25047 }, { "epoch": 0.325487336004795, "grad_norm": 0.3925706446170807, "learning_rate": 0.00013493026943422714, "loss": 1.3605, "step": 25048 }, { "epoch": 0.32550033054871086, "grad_norm": 0.33771613240242004, "learning_rate": 0.00013492766997231573, "loss": 1.4516, "step": 25049 }, { "epoch": 0.32551332509262676, "grad_norm": 0.4395461678504944, "learning_rate": 0.00013492507051040436, "loss": 1.6518, "step": 25050 }, { "epoch": 0.3255263196365426, "grad_norm": 0.4612828195095062, "learning_rate": 0.00013492247104849296, "loss": 1.4781, "step": 25051 }, { "epoch": 0.3255393141804585, "grad_norm": 0.34174174070358276, "learning_rate": 0.00013491987158658158, "loss": 1.2694, "step": 25052 }, { "epoch": 0.32555230872437435, "grad_norm": 0.36931562423706055, "learning_rate": 0.0001349172721246702, "loss": 1.4181, "step": 25053 }, { "epoch": 0.32556530326829025, "grad_norm": 0.3899327218532562, "learning_rate": 0.0001349146726627588, "loss": 1.3871, "step": 25054 }, { "epoch": 0.3255782978122061, "grad_norm": 0.42077282071113586, "learning_rate": 0.00013491207320084743, "loss": 1.4662, "step": 25055 }, { "epoch": 0.325591292356122, "grad_norm": 0.4258863925933838, "learning_rate": 0.00013490947373893605, "loss": 1.4813, "step": 25056 }, { "epoch": 0.32560428690003784, "grad_norm": 0.5056393146514893, "learning_rate": 0.00013490687427702468, "loss": 1.5901, "step": 25057 }, { "epoch": 0.32561728144395374, "grad_norm": 0.4341283142566681, "learning_rate": 0.00013490427481511327, "loss": 1.4062, "step": 25058 }, { "epoch": 0.3256302759878696, "grad_norm": 0.38846975564956665, "learning_rate": 0.00013490167535320187, "loss": 1.5391, "step": 25059 }, { "epoch": 0.3256432705317855, "grad_norm": 0.39466699957847595, "learning_rate": 0.00013489907589129052, "loss": 1.1182, "step": 25060 }, { "epoch": 0.32565626507570133, "grad_norm": 0.32375431060791016, "learning_rate": 0.00013489647642937912, "loss": 1.2492, "step": 25061 }, { "epoch": 0.32566925961961724, "grad_norm": 0.36306890845298767, "learning_rate": 0.00013489387696746774, "loss": 1.4695, "step": 25062 }, { "epoch": 0.3256822541635331, "grad_norm": 0.2906920313835144, "learning_rate": 0.00013489127750555634, "loss": 1.1623, "step": 25063 }, { "epoch": 0.325695248707449, "grad_norm": 0.41639089584350586, "learning_rate": 0.00013488867804364497, "loss": 1.5278, "step": 25064 }, { "epoch": 0.3257082432513648, "grad_norm": 0.40844419598579407, "learning_rate": 0.0001348860785817336, "loss": 1.29, "step": 25065 }, { "epoch": 0.3257212377952807, "grad_norm": 0.40919041633605957, "learning_rate": 0.0001348834791198222, "loss": 1.4418, "step": 25066 }, { "epoch": 0.32573423233919657, "grad_norm": 0.4001988470554352, "learning_rate": 0.0001348808796579108, "loss": 1.4395, "step": 25067 }, { "epoch": 0.3257472268831125, "grad_norm": 0.38842952251434326, "learning_rate": 0.00013487828019599944, "loss": 1.336, "step": 25068 }, { "epoch": 0.3257602214270283, "grad_norm": 0.4620618224143982, "learning_rate": 0.00013487568073408806, "loss": 1.559, "step": 25069 }, { "epoch": 0.3257732159709442, "grad_norm": 0.4246152639389038, "learning_rate": 0.00013487308127217666, "loss": 1.4155, "step": 25070 }, { "epoch": 0.32578621051486006, "grad_norm": 0.35777661204338074, "learning_rate": 0.00013487048181026528, "loss": 1.4633, "step": 25071 }, { "epoch": 0.32579920505877596, "grad_norm": 0.3936436176300049, "learning_rate": 0.0001348678823483539, "loss": 1.4218, "step": 25072 }, { "epoch": 0.3258121996026918, "grad_norm": 0.4077737331390381, "learning_rate": 0.0001348652828864425, "loss": 1.3473, "step": 25073 }, { "epoch": 0.3258251941466077, "grad_norm": 0.5033107995986938, "learning_rate": 0.00013486268342453113, "loss": 1.3831, "step": 25074 }, { "epoch": 0.32583818869052356, "grad_norm": 0.34461936354637146, "learning_rate": 0.00013486008396261975, "loss": 1.3294, "step": 25075 }, { "epoch": 0.32585118323443946, "grad_norm": 0.34761327505111694, "learning_rate": 0.00013485748450070835, "loss": 1.3322, "step": 25076 }, { "epoch": 0.3258641777783553, "grad_norm": 0.4112671911716461, "learning_rate": 0.00013485488503879698, "loss": 1.4145, "step": 25077 }, { "epoch": 0.3258771723222712, "grad_norm": 0.34792712330818176, "learning_rate": 0.00013485228557688557, "loss": 1.4282, "step": 25078 }, { "epoch": 0.32589016686618705, "grad_norm": 0.49308788776397705, "learning_rate": 0.00013484968611497423, "loss": 1.4411, "step": 25079 }, { "epoch": 0.32590316141010295, "grad_norm": 0.3949072062969208, "learning_rate": 0.00013484708665306282, "loss": 1.3714, "step": 25080 }, { "epoch": 0.3259161559540188, "grad_norm": 0.41147202253341675, "learning_rate": 0.00013484448719115145, "loss": 1.4397, "step": 25081 }, { "epoch": 0.3259291504979347, "grad_norm": 0.3794117867946625, "learning_rate": 0.00013484188772924004, "loss": 1.5291, "step": 25082 }, { "epoch": 0.32594214504185054, "grad_norm": 0.4321388304233551, "learning_rate": 0.00013483928826732867, "loss": 1.4051, "step": 25083 }, { "epoch": 0.32595513958576644, "grad_norm": 0.44398024678230286, "learning_rate": 0.0001348366888054173, "loss": 1.412, "step": 25084 }, { "epoch": 0.3259681341296823, "grad_norm": 0.4830962121486664, "learning_rate": 0.0001348340893435059, "loss": 1.4736, "step": 25085 }, { "epoch": 0.3259811286735982, "grad_norm": 0.3724367320537567, "learning_rate": 0.00013483148988159452, "loss": 1.2677, "step": 25086 }, { "epoch": 0.32599412321751403, "grad_norm": 0.36601653695106506, "learning_rate": 0.00013482889041968314, "loss": 1.4373, "step": 25087 }, { "epoch": 0.32600711776142993, "grad_norm": 0.36193615198135376, "learning_rate": 0.00013482629095777174, "loss": 1.3491, "step": 25088 }, { "epoch": 0.3260201123053458, "grad_norm": 0.425514817237854, "learning_rate": 0.00013482369149586036, "loss": 1.4519, "step": 25089 }, { "epoch": 0.3260331068492617, "grad_norm": 0.4317486882209778, "learning_rate": 0.00013482109203394896, "loss": 1.3881, "step": 25090 }, { "epoch": 0.3260461013931775, "grad_norm": 0.5020955801010132, "learning_rate": 0.0001348184925720376, "loss": 1.5875, "step": 25091 }, { "epoch": 0.3260590959370934, "grad_norm": 0.41325709223747253, "learning_rate": 0.0001348158931101262, "loss": 1.436, "step": 25092 }, { "epoch": 0.32607209048100927, "grad_norm": 0.36766985058784485, "learning_rate": 0.00013481329364821483, "loss": 1.3766, "step": 25093 }, { "epoch": 0.32608508502492517, "grad_norm": 0.39926642179489136, "learning_rate": 0.00013481069418630343, "loss": 1.4934, "step": 25094 }, { "epoch": 0.326098079568841, "grad_norm": 0.2830159664154053, "learning_rate": 0.00013480809472439205, "loss": 1.1838, "step": 25095 }, { "epoch": 0.3261110741127569, "grad_norm": 0.34247270226478577, "learning_rate": 0.00013480549526248068, "loss": 1.373, "step": 25096 }, { "epoch": 0.32612406865667276, "grad_norm": 0.38097715377807617, "learning_rate": 0.00013480289580056928, "loss": 1.2018, "step": 25097 }, { "epoch": 0.32613706320058866, "grad_norm": 0.4696395993232727, "learning_rate": 0.0001348002963386579, "loss": 1.3039, "step": 25098 }, { "epoch": 0.3261500577445045, "grad_norm": 0.300123393535614, "learning_rate": 0.00013479769687674653, "loss": 1.4659, "step": 25099 }, { "epoch": 0.3261630522884204, "grad_norm": 0.3671986162662506, "learning_rate": 0.00013479509741483515, "loss": 1.2592, "step": 25100 }, { "epoch": 0.32617604683233625, "grad_norm": 0.45503172278404236, "learning_rate": 0.00013479249795292375, "loss": 1.5242, "step": 25101 }, { "epoch": 0.32618904137625215, "grad_norm": 0.4303828775882721, "learning_rate": 0.00013478989849101234, "loss": 1.4291, "step": 25102 }, { "epoch": 0.326202035920168, "grad_norm": 0.4110209047794342, "learning_rate": 0.000134787299029101, "loss": 1.3396, "step": 25103 }, { "epoch": 0.3262150304640839, "grad_norm": 0.41984519362449646, "learning_rate": 0.0001347846995671896, "loss": 1.3952, "step": 25104 }, { "epoch": 0.32622802500799974, "grad_norm": 0.3950515687465668, "learning_rate": 0.00013478210010527822, "loss": 1.1887, "step": 25105 }, { "epoch": 0.32624101955191565, "grad_norm": 0.2811693847179413, "learning_rate": 0.00013477950064336684, "loss": 1.3754, "step": 25106 }, { "epoch": 0.3262540140958315, "grad_norm": 0.34692201018333435, "learning_rate": 0.00013477690118145544, "loss": 1.5367, "step": 25107 }, { "epoch": 0.3262670086397474, "grad_norm": 0.3976317346096039, "learning_rate": 0.00013477430171954406, "loss": 1.3272, "step": 25108 }, { "epoch": 0.32628000318366324, "grad_norm": 0.3629496693611145, "learning_rate": 0.00013477170225763266, "loss": 1.2415, "step": 25109 }, { "epoch": 0.32629299772757914, "grad_norm": 0.37405526638031006, "learning_rate": 0.0001347691027957213, "loss": 1.4472, "step": 25110 }, { "epoch": 0.326305992271495, "grad_norm": 0.37751832604408264, "learning_rate": 0.0001347665033338099, "loss": 1.1198, "step": 25111 }, { "epoch": 0.3263189868154109, "grad_norm": 0.4611285328865051, "learning_rate": 0.00013476390387189854, "loss": 1.3738, "step": 25112 }, { "epoch": 0.32633198135932673, "grad_norm": 0.37156814336776733, "learning_rate": 0.00013476130440998713, "loss": 1.2268, "step": 25113 }, { "epoch": 0.32634497590324263, "grad_norm": 0.4042040705680847, "learning_rate": 0.00013475870494807576, "loss": 1.4387, "step": 25114 }, { "epoch": 0.3263579704471585, "grad_norm": 0.3807907700538635, "learning_rate": 0.00013475610548616438, "loss": 1.2069, "step": 25115 }, { "epoch": 0.3263709649910744, "grad_norm": 0.3175273537635803, "learning_rate": 0.00013475350602425298, "loss": 1.4251, "step": 25116 }, { "epoch": 0.3263839595349902, "grad_norm": 0.46717217564582825, "learning_rate": 0.0001347509065623416, "loss": 1.335, "step": 25117 }, { "epoch": 0.3263969540789061, "grad_norm": 0.38880303502082825, "learning_rate": 0.00013474830710043023, "loss": 1.4468, "step": 25118 }, { "epoch": 0.32640994862282197, "grad_norm": 0.35459861159324646, "learning_rate": 0.00013474570763851883, "loss": 1.1196, "step": 25119 }, { "epoch": 0.32642294316673787, "grad_norm": 0.42634567618370056, "learning_rate": 0.00013474310817660745, "loss": 1.2734, "step": 25120 }, { "epoch": 0.3264359377106537, "grad_norm": 0.4068199098110199, "learning_rate": 0.00013474050871469605, "loss": 1.4219, "step": 25121 }, { "epoch": 0.3264489322545696, "grad_norm": 0.4204493463039398, "learning_rate": 0.0001347379092527847, "loss": 1.2902, "step": 25122 }, { "epoch": 0.3264619267984855, "grad_norm": 0.4918639361858368, "learning_rate": 0.0001347353097908733, "loss": 1.4356, "step": 25123 }, { "epoch": 0.32647492134240136, "grad_norm": 0.3572603166103363, "learning_rate": 0.00013473271032896192, "loss": 1.3307, "step": 25124 }, { "epoch": 0.32648791588631726, "grad_norm": 0.42816242575645447, "learning_rate": 0.00013473011086705052, "loss": 1.4716, "step": 25125 }, { "epoch": 0.3265009104302331, "grad_norm": 0.3943621516227722, "learning_rate": 0.00013472751140513914, "loss": 1.4461, "step": 25126 }, { "epoch": 0.326513904974149, "grad_norm": 0.48014211654663086, "learning_rate": 0.00013472491194322777, "loss": 1.5333, "step": 25127 }, { "epoch": 0.32652689951806485, "grad_norm": 0.36372148990631104, "learning_rate": 0.00013472231248131636, "loss": 1.3142, "step": 25128 }, { "epoch": 0.32653989406198075, "grad_norm": 0.4038459062576294, "learning_rate": 0.000134719713019405, "loss": 1.4598, "step": 25129 }, { "epoch": 0.3265528886058966, "grad_norm": 0.4052509069442749, "learning_rate": 0.0001347171135574936, "loss": 1.3734, "step": 25130 }, { "epoch": 0.3265658831498125, "grad_norm": 0.32374992966651917, "learning_rate": 0.0001347145140955822, "loss": 1.5525, "step": 25131 }, { "epoch": 0.32657887769372834, "grad_norm": 0.22822009027004242, "learning_rate": 0.00013471191463367084, "loss": 1.4732, "step": 25132 }, { "epoch": 0.32659187223764424, "grad_norm": 0.4433041214942932, "learning_rate": 0.00013470931517175943, "loss": 1.4581, "step": 25133 }, { "epoch": 0.3266048667815601, "grad_norm": 0.392037957906723, "learning_rate": 0.00013470671570984808, "loss": 1.6374, "step": 25134 }, { "epoch": 0.326617861325476, "grad_norm": 0.47803208231925964, "learning_rate": 0.00013470411624793668, "loss": 1.3338, "step": 25135 }, { "epoch": 0.32663085586939183, "grad_norm": 0.4125545620918274, "learning_rate": 0.0001347015167860253, "loss": 1.4345, "step": 25136 }, { "epoch": 0.32664385041330773, "grad_norm": 0.390426367521286, "learning_rate": 0.0001346989173241139, "loss": 1.2749, "step": 25137 }, { "epoch": 0.3266568449572236, "grad_norm": 0.37080010771751404, "learning_rate": 0.00013469631786220253, "loss": 1.4645, "step": 25138 }, { "epoch": 0.3266698395011395, "grad_norm": 0.34431877732276917, "learning_rate": 0.00013469371840029115, "loss": 1.3404, "step": 25139 }, { "epoch": 0.3266828340450553, "grad_norm": 0.5365375876426697, "learning_rate": 0.00013469111893837975, "loss": 1.4166, "step": 25140 }, { "epoch": 0.3266958285889712, "grad_norm": 0.506577730178833, "learning_rate": 0.00013468851947646837, "loss": 1.5908, "step": 25141 }, { "epoch": 0.32670882313288707, "grad_norm": 0.43247413635253906, "learning_rate": 0.000134685920014557, "loss": 1.3231, "step": 25142 }, { "epoch": 0.326721817676803, "grad_norm": 0.3522419333457947, "learning_rate": 0.0001346833205526456, "loss": 1.318, "step": 25143 }, { "epoch": 0.3267348122207188, "grad_norm": 0.4904137849807739, "learning_rate": 0.00013468072109073422, "loss": 1.5135, "step": 25144 }, { "epoch": 0.3267478067646347, "grad_norm": 0.32583436369895935, "learning_rate": 0.00013467812162882284, "loss": 1.3889, "step": 25145 }, { "epoch": 0.32676080130855056, "grad_norm": 0.4270898401737213, "learning_rate": 0.00013467552216691147, "loss": 1.4651, "step": 25146 }, { "epoch": 0.32677379585246646, "grad_norm": 0.4327910840511322, "learning_rate": 0.00013467292270500007, "loss": 1.3234, "step": 25147 }, { "epoch": 0.3267867903963823, "grad_norm": 0.3867788314819336, "learning_rate": 0.0001346703232430887, "loss": 1.3614, "step": 25148 }, { "epoch": 0.3267997849402982, "grad_norm": 0.416170209646225, "learning_rate": 0.00013466772378117732, "loss": 1.3736, "step": 25149 }, { "epoch": 0.32681277948421406, "grad_norm": 0.4406786859035492, "learning_rate": 0.0001346651243192659, "loss": 1.4754, "step": 25150 }, { "epoch": 0.32682577402812996, "grad_norm": 0.41690707206726074, "learning_rate": 0.00013466252485735454, "loss": 1.4068, "step": 25151 }, { "epoch": 0.3268387685720458, "grad_norm": 0.3712075352668762, "learning_rate": 0.00013465992539544314, "loss": 1.3816, "step": 25152 }, { "epoch": 0.3268517631159617, "grad_norm": 0.4014545977115631, "learning_rate": 0.0001346573259335318, "loss": 1.5239, "step": 25153 }, { "epoch": 0.32686475765987755, "grad_norm": 0.22958482801914215, "learning_rate": 0.00013465472647162038, "loss": 0.9796, "step": 25154 }, { "epoch": 0.32687775220379345, "grad_norm": 0.3111101984977722, "learning_rate": 0.00013465212700970898, "loss": 1.3745, "step": 25155 }, { "epoch": 0.3268907467477093, "grad_norm": 0.49364593625068665, "learning_rate": 0.0001346495275477976, "loss": 1.5498, "step": 25156 }, { "epoch": 0.3269037412916252, "grad_norm": 0.3933982849121094, "learning_rate": 0.00013464692808588623, "loss": 1.2648, "step": 25157 }, { "epoch": 0.32691673583554104, "grad_norm": 0.3878893554210663, "learning_rate": 0.00013464432862397485, "loss": 1.3151, "step": 25158 }, { "epoch": 0.32692973037945694, "grad_norm": 0.37900814414024353, "learning_rate": 0.00013464172916206345, "loss": 1.4053, "step": 25159 }, { "epoch": 0.3269427249233728, "grad_norm": 0.4502893388271332, "learning_rate": 0.00013463912970015208, "loss": 1.5794, "step": 25160 }, { "epoch": 0.3269557194672887, "grad_norm": 0.3121495544910431, "learning_rate": 0.0001346365302382407, "loss": 1.399, "step": 25161 }, { "epoch": 0.32696871401120453, "grad_norm": 0.4192117750644684, "learning_rate": 0.0001346339307763293, "loss": 1.416, "step": 25162 }, { "epoch": 0.32698170855512043, "grad_norm": 0.43233877420425415, "learning_rate": 0.00013463133131441792, "loss": 1.4482, "step": 25163 }, { "epoch": 0.3269947030990363, "grad_norm": 0.4069402813911438, "learning_rate": 0.00013462873185250652, "loss": 1.4392, "step": 25164 }, { "epoch": 0.3270076976429522, "grad_norm": 0.41604119539260864, "learning_rate": 0.00013462613239059517, "loss": 1.4955, "step": 25165 }, { "epoch": 0.327020692186868, "grad_norm": 0.35321110486984253, "learning_rate": 0.00013462353292868377, "loss": 1.4087, "step": 25166 }, { "epoch": 0.3270336867307839, "grad_norm": 0.4783344566822052, "learning_rate": 0.0001346209334667724, "loss": 1.3354, "step": 25167 }, { "epoch": 0.32704668127469977, "grad_norm": 0.3716732859611511, "learning_rate": 0.000134618334004861, "loss": 1.6129, "step": 25168 }, { "epoch": 0.32705967581861567, "grad_norm": 0.3891408145427704, "learning_rate": 0.00013461573454294962, "loss": 1.3723, "step": 25169 }, { "epoch": 0.3270726703625315, "grad_norm": 0.44201067090034485, "learning_rate": 0.00013461313508103824, "loss": 1.431, "step": 25170 }, { "epoch": 0.3270856649064474, "grad_norm": 0.3926926553249359, "learning_rate": 0.00013461053561912684, "loss": 1.1106, "step": 25171 }, { "epoch": 0.32709865945036326, "grad_norm": 0.3201066255569458, "learning_rate": 0.00013460793615721546, "loss": 1.2104, "step": 25172 }, { "epoch": 0.32711165399427916, "grad_norm": 0.4122949242591858, "learning_rate": 0.0001346053366953041, "loss": 1.3383, "step": 25173 }, { "epoch": 0.327124648538195, "grad_norm": 0.5389511585235596, "learning_rate": 0.00013460273723339268, "loss": 1.4564, "step": 25174 }, { "epoch": 0.3271376430821109, "grad_norm": 0.4164218008518219, "learning_rate": 0.0001346001377714813, "loss": 1.6989, "step": 25175 }, { "epoch": 0.32715063762602675, "grad_norm": 0.40415647625923157, "learning_rate": 0.0001345975383095699, "loss": 1.3596, "step": 25176 }, { "epoch": 0.32716363216994265, "grad_norm": 0.4228530824184418, "learning_rate": 0.00013459493884765856, "loss": 1.4409, "step": 25177 }, { "epoch": 0.3271766267138585, "grad_norm": 0.4279729723930359, "learning_rate": 0.00013459233938574715, "loss": 1.3715, "step": 25178 }, { "epoch": 0.3271896212577744, "grad_norm": 0.4416216015815735, "learning_rate": 0.00013458973992383578, "loss": 1.3926, "step": 25179 }, { "epoch": 0.32720261580169024, "grad_norm": 0.4428456425666809, "learning_rate": 0.0001345871404619244, "loss": 1.4309, "step": 25180 }, { "epoch": 0.32721561034560614, "grad_norm": 0.485161691904068, "learning_rate": 0.000134584541000013, "loss": 1.4378, "step": 25181 }, { "epoch": 0.327228604889522, "grad_norm": 0.42604485154151917, "learning_rate": 0.00013458194153810163, "loss": 1.2462, "step": 25182 }, { "epoch": 0.3272415994334379, "grad_norm": 0.510232150554657, "learning_rate": 0.00013457934207619022, "loss": 1.5616, "step": 25183 }, { "epoch": 0.32725459397735374, "grad_norm": 0.43469035625457764, "learning_rate": 0.00013457674261427887, "loss": 1.583, "step": 25184 }, { "epoch": 0.32726758852126964, "grad_norm": 0.4172048270702362, "learning_rate": 0.00013457414315236747, "loss": 1.3413, "step": 25185 }, { "epoch": 0.3272805830651855, "grad_norm": 0.39685049653053284, "learning_rate": 0.00013457154369045607, "loss": 1.4196, "step": 25186 }, { "epoch": 0.3272935776091014, "grad_norm": 0.43034377694129944, "learning_rate": 0.0001345689442285447, "loss": 1.4554, "step": 25187 }, { "epoch": 0.32730657215301723, "grad_norm": 0.409824013710022, "learning_rate": 0.00013456634476663332, "loss": 1.3962, "step": 25188 }, { "epoch": 0.32731956669693313, "grad_norm": 0.45401108264923096, "learning_rate": 0.00013456374530472194, "loss": 1.4953, "step": 25189 }, { "epoch": 0.327332561240849, "grad_norm": 0.35680845379829407, "learning_rate": 0.00013456114584281054, "loss": 1.2949, "step": 25190 }, { "epoch": 0.3273455557847649, "grad_norm": 0.313738077878952, "learning_rate": 0.00013455854638089916, "loss": 1.3872, "step": 25191 }, { "epoch": 0.3273585503286807, "grad_norm": 0.4033963084220886, "learning_rate": 0.0001345559469189878, "loss": 1.3798, "step": 25192 }, { "epoch": 0.3273715448725966, "grad_norm": 0.37819743156433105, "learning_rate": 0.0001345533474570764, "loss": 1.3276, "step": 25193 }, { "epoch": 0.32738453941651247, "grad_norm": 0.4011194407939911, "learning_rate": 0.000134550747995165, "loss": 1.5889, "step": 25194 }, { "epoch": 0.32739753396042837, "grad_norm": 0.37969887256622314, "learning_rate": 0.0001345481485332536, "loss": 1.3793, "step": 25195 }, { "epoch": 0.3274105285043442, "grad_norm": 0.412293016910553, "learning_rate": 0.00013454554907134226, "loss": 1.3066, "step": 25196 }, { "epoch": 0.3274235230482601, "grad_norm": 0.39382970333099365, "learning_rate": 0.00013454294960943086, "loss": 1.587, "step": 25197 }, { "epoch": 0.32743651759217596, "grad_norm": 0.37101104855537415, "learning_rate": 0.00013454035014751945, "loss": 1.205, "step": 25198 }, { "epoch": 0.32744951213609186, "grad_norm": 0.38875311613082886, "learning_rate": 0.00013453775068560808, "loss": 1.5434, "step": 25199 }, { "epoch": 0.32746250668000776, "grad_norm": 0.4111554026603699, "learning_rate": 0.0001345351512236967, "loss": 1.4005, "step": 25200 }, { "epoch": 0.3274755012239236, "grad_norm": 0.38296395540237427, "learning_rate": 0.00013453255176178533, "loss": 1.4733, "step": 25201 }, { "epoch": 0.3274884957678395, "grad_norm": 0.4104140102863312, "learning_rate": 0.00013452995229987393, "loss": 1.2903, "step": 25202 }, { "epoch": 0.32750149031175535, "grad_norm": 0.4143497347831726, "learning_rate": 0.00013452735283796255, "loss": 1.3873, "step": 25203 }, { "epoch": 0.32751448485567125, "grad_norm": 0.40942829847335815, "learning_rate": 0.00013452475337605117, "loss": 1.2982, "step": 25204 }, { "epoch": 0.3275274793995871, "grad_norm": 0.4146082401275635, "learning_rate": 0.00013452215391413977, "loss": 1.2994, "step": 25205 }, { "epoch": 0.327540473943503, "grad_norm": 0.39570704102516174, "learning_rate": 0.0001345195544522284, "loss": 1.4247, "step": 25206 }, { "epoch": 0.32755346848741884, "grad_norm": 0.27458903193473816, "learning_rate": 0.000134516954990317, "loss": 1.4095, "step": 25207 }, { "epoch": 0.32756646303133474, "grad_norm": 0.28139635920524597, "learning_rate": 0.00013451435552840565, "loss": 1.3069, "step": 25208 }, { "epoch": 0.3275794575752506, "grad_norm": 0.37578094005584717, "learning_rate": 0.00013451175606649424, "loss": 1.3259, "step": 25209 }, { "epoch": 0.3275924521191665, "grad_norm": 0.5164992213249207, "learning_rate": 0.00013450915660458284, "loss": 1.683, "step": 25210 }, { "epoch": 0.32760544666308233, "grad_norm": 0.4637851417064667, "learning_rate": 0.00013450655714267146, "loss": 1.4682, "step": 25211 }, { "epoch": 0.32761844120699823, "grad_norm": 0.527933657169342, "learning_rate": 0.0001345039576807601, "loss": 1.3064, "step": 25212 }, { "epoch": 0.3276314357509141, "grad_norm": 0.34882813692092896, "learning_rate": 0.00013450135821884871, "loss": 1.4003, "step": 25213 }, { "epoch": 0.32764443029483, "grad_norm": 0.34134653210639954, "learning_rate": 0.0001344987587569373, "loss": 1.2132, "step": 25214 }, { "epoch": 0.3276574248387458, "grad_norm": 0.37958288192749023, "learning_rate": 0.00013449615929502594, "loss": 1.4585, "step": 25215 }, { "epoch": 0.3276704193826617, "grad_norm": 0.45946335792541504, "learning_rate": 0.00013449355983311456, "loss": 1.5578, "step": 25216 }, { "epoch": 0.32768341392657757, "grad_norm": 0.47827282547950745, "learning_rate": 0.00013449096037120316, "loss": 1.3504, "step": 25217 }, { "epoch": 0.32769640847049347, "grad_norm": 0.3432607054710388, "learning_rate": 0.00013448836090929178, "loss": 1.5206, "step": 25218 }, { "epoch": 0.3277094030144093, "grad_norm": 0.48154816031455994, "learning_rate": 0.0001344857614473804, "loss": 1.5157, "step": 25219 }, { "epoch": 0.3277223975583252, "grad_norm": 0.4297764003276825, "learning_rate": 0.00013448316198546903, "loss": 1.3819, "step": 25220 }, { "epoch": 0.32773539210224106, "grad_norm": 0.34937822818756104, "learning_rate": 0.00013448056252355763, "loss": 1.3598, "step": 25221 }, { "epoch": 0.32774838664615696, "grad_norm": 0.36012890934944153, "learning_rate": 0.00013447796306164625, "loss": 1.3775, "step": 25222 }, { "epoch": 0.3277613811900728, "grad_norm": 0.3779444992542267, "learning_rate": 0.00013447536359973488, "loss": 1.3051, "step": 25223 }, { "epoch": 0.3277743757339887, "grad_norm": 0.33696267008781433, "learning_rate": 0.00013447276413782347, "loss": 1.4542, "step": 25224 }, { "epoch": 0.32778737027790456, "grad_norm": 0.3947320878505707, "learning_rate": 0.0001344701646759121, "loss": 1.4633, "step": 25225 }, { "epoch": 0.32780036482182046, "grad_norm": 0.36321836709976196, "learning_rate": 0.0001344675652140007, "loss": 1.5153, "step": 25226 }, { "epoch": 0.3278133593657363, "grad_norm": 0.49338874220848083, "learning_rate": 0.00013446496575208932, "loss": 1.4833, "step": 25227 }, { "epoch": 0.3278263539096522, "grad_norm": 0.37824904918670654, "learning_rate": 0.00013446236629017795, "loss": 1.4234, "step": 25228 }, { "epoch": 0.32783934845356805, "grad_norm": 0.36573806405067444, "learning_rate": 0.00013445976682826654, "loss": 1.3192, "step": 25229 }, { "epoch": 0.32785234299748395, "grad_norm": 0.40590664744377136, "learning_rate": 0.00013445716736635517, "loss": 1.4116, "step": 25230 }, { "epoch": 0.3278653375413998, "grad_norm": 0.35290586948394775, "learning_rate": 0.0001344545679044438, "loss": 1.2989, "step": 25231 }, { "epoch": 0.3278783320853157, "grad_norm": 0.35414546728134155, "learning_rate": 0.00013445196844253242, "loss": 1.5103, "step": 25232 }, { "epoch": 0.32789132662923154, "grad_norm": 0.41480574011802673, "learning_rate": 0.000134449368980621, "loss": 1.5032, "step": 25233 }, { "epoch": 0.32790432117314744, "grad_norm": 0.40544453263282776, "learning_rate": 0.00013444676951870964, "loss": 1.368, "step": 25234 }, { "epoch": 0.3279173157170633, "grad_norm": 0.38026750087738037, "learning_rate": 0.00013444417005679826, "loss": 1.6145, "step": 25235 }, { "epoch": 0.3279303102609792, "grad_norm": 0.46215152740478516, "learning_rate": 0.00013444157059488686, "loss": 1.1759, "step": 25236 }, { "epoch": 0.32794330480489503, "grad_norm": 0.3808053135871887, "learning_rate": 0.00013443897113297548, "loss": 1.4905, "step": 25237 }, { "epoch": 0.32795629934881093, "grad_norm": 0.40001416206359863, "learning_rate": 0.00013443637167106408, "loss": 1.3214, "step": 25238 }, { "epoch": 0.3279692938927268, "grad_norm": 0.39595237374305725, "learning_rate": 0.0001344337722091527, "loss": 1.5032, "step": 25239 }, { "epoch": 0.3279822884366427, "grad_norm": 0.40611952543258667, "learning_rate": 0.00013443117274724133, "loss": 1.6659, "step": 25240 }, { "epoch": 0.3279952829805585, "grad_norm": 0.3672429919242859, "learning_rate": 0.00013442857328532993, "loss": 1.1421, "step": 25241 }, { "epoch": 0.3280082775244744, "grad_norm": 0.44474703073501587, "learning_rate": 0.00013442597382341855, "loss": 1.5541, "step": 25242 }, { "epoch": 0.32802127206839027, "grad_norm": 0.4027102291584015, "learning_rate": 0.00013442337436150718, "loss": 1.5021, "step": 25243 }, { "epoch": 0.32803426661230617, "grad_norm": 0.3847944438457489, "learning_rate": 0.0001344207748995958, "loss": 1.5262, "step": 25244 }, { "epoch": 0.328047261156222, "grad_norm": 0.42310675978660583, "learning_rate": 0.0001344181754376844, "loss": 1.3288, "step": 25245 }, { "epoch": 0.3280602557001379, "grad_norm": 0.41820088028907776, "learning_rate": 0.00013441557597577302, "loss": 1.4223, "step": 25246 }, { "epoch": 0.32807325024405376, "grad_norm": 0.3190530836582184, "learning_rate": 0.00013441297651386165, "loss": 1.3857, "step": 25247 }, { "epoch": 0.32808624478796966, "grad_norm": 0.41750451922416687, "learning_rate": 0.00013441037705195025, "loss": 1.3981, "step": 25248 }, { "epoch": 0.3280992393318855, "grad_norm": 0.35738861560821533, "learning_rate": 0.00013440777759003887, "loss": 1.3984, "step": 25249 }, { "epoch": 0.3281122338758014, "grad_norm": 0.43311652541160583, "learning_rate": 0.00013440517812812747, "loss": 1.3313, "step": 25250 }, { "epoch": 0.32812522841971725, "grad_norm": 0.35025453567504883, "learning_rate": 0.00013440257866621612, "loss": 1.1331, "step": 25251 }, { "epoch": 0.32813822296363315, "grad_norm": 0.58076411485672, "learning_rate": 0.00013439997920430472, "loss": 1.4556, "step": 25252 }, { "epoch": 0.328151217507549, "grad_norm": 0.38680019974708557, "learning_rate": 0.0001343973797423933, "loss": 1.0974, "step": 25253 }, { "epoch": 0.3281642120514649, "grad_norm": 0.44846659898757935, "learning_rate": 0.00013439478028048194, "loss": 1.3886, "step": 25254 }, { "epoch": 0.32817720659538074, "grad_norm": 0.3297249674797058, "learning_rate": 0.00013439218081857056, "loss": 1.2803, "step": 25255 }, { "epoch": 0.32819020113929664, "grad_norm": 0.3909626007080078, "learning_rate": 0.0001343895813566592, "loss": 1.3719, "step": 25256 }, { "epoch": 0.3282031956832125, "grad_norm": 0.3747754693031311, "learning_rate": 0.00013438698189474778, "loss": 1.577, "step": 25257 }, { "epoch": 0.3282161902271284, "grad_norm": 0.4181773364543915, "learning_rate": 0.0001343843824328364, "loss": 1.3302, "step": 25258 }, { "epoch": 0.32822918477104424, "grad_norm": 0.40097978711128235, "learning_rate": 0.00013438178297092503, "loss": 1.3113, "step": 25259 }, { "epoch": 0.32824217931496014, "grad_norm": 0.388907790184021, "learning_rate": 0.00013437918350901363, "loss": 1.4804, "step": 25260 }, { "epoch": 0.328255173858876, "grad_norm": 0.39584091305732727, "learning_rate": 0.00013437658404710226, "loss": 1.2277, "step": 25261 }, { "epoch": 0.3282681684027919, "grad_norm": 0.44478240609169006, "learning_rate": 0.00013437398458519088, "loss": 1.6657, "step": 25262 }, { "epoch": 0.3282811629467077, "grad_norm": 0.35414913296699524, "learning_rate": 0.0001343713851232795, "loss": 1.3972, "step": 25263 }, { "epoch": 0.32829415749062363, "grad_norm": 0.3457530736923218, "learning_rate": 0.0001343687856613681, "loss": 1.229, "step": 25264 }, { "epoch": 0.3283071520345395, "grad_norm": 0.3642204701900482, "learning_rate": 0.0001343661861994567, "loss": 1.3387, "step": 25265 }, { "epoch": 0.3283201465784554, "grad_norm": 0.46774348616600037, "learning_rate": 0.00013436358673754535, "loss": 1.4054, "step": 25266 }, { "epoch": 0.3283331411223712, "grad_norm": 0.46813157200813293, "learning_rate": 0.00013436098727563395, "loss": 1.5388, "step": 25267 }, { "epoch": 0.3283461356662871, "grad_norm": 0.4620802104473114, "learning_rate": 0.00013435838781372257, "loss": 1.4207, "step": 25268 }, { "epoch": 0.32835913021020297, "grad_norm": 0.37374627590179443, "learning_rate": 0.00013435578835181117, "loss": 1.5331, "step": 25269 }, { "epoch": 0.32837212475411887, "grad_norm": 0.2873345911502838, "learning_rate": 0.0001343531888898998, "loss": 1.296, "step": 25270 }, { "epoch": 0.3283851192980347, "grad_norm": 0.35456565022468567, "learning_rate": 0.00013435058942798842, "loss": 1.5246, "step": 25271 }, { "epoch": 0.3283981138419506, "grad_norm": 0.41262704133987427, "learning_rate": 0.00013434798996607702, "loss": 1.4908, "step": 25272 }, { "epoch": 0.32841110838586646, "grad_norm": 0.45627617835998535, "learning_rate": 0.00013434539050416564, "loss": 1.3803, "step": 25273 }, { "epoch": 0.32842410292978236, "grad_norm": 0.40540611743927, "learning_rate": 0.00013434279104225427, "loss": 1.4691, "step": 25274 }, { "epoch": 0.32843709747369826, "grad_norm": 0.3801732063293457, "learning_rate": 0.0001343401915803429, "loss": 1.303, "step": 25275 }, { "epoch": 0.3284500920176141, "grad_norm": 0.3658905327320099, "learning_rate": 0.0001343375921184315, "loss": 1.5613, "step": 25276 }, { "epoch": 0.32846308656153, "grad_norm": 0.48025932908058167, "learning_rate": 0.00013433499265652008, "loss": 1.3701, "step": 25277 }, { "epoch": 0.32847608110544585, "grad_norm": 0.4760112464427948, "learning_rate": 0.00013433239319460874, "loss": 1.4226, "step": 25278 }, { "epoch": 0.32848907564936175, "grad_norm": 0.4144611656665802, "learning_rate": 0.00013432979373269733, "loss": 1.3887, "step": 25279 }, { "epoch": 0.3285020701932776, "grad_norm": 0.46555212140083313, "learning_rate": 0.00013432719427078596, "loss": 1.3211, "step": 25280 }, { "epoch": 0.3285150647371935, "grad_norm": 0.46026021242141724, "learning_rate": 0.00013432459480887456, "loss": 1.5042, "step": 25281 }, { "epoch": 0.32852805928110934, "grad_norm": 0.40246525406837463, "learning_rate": 0.00013432199534696318, "loss": 1.4642, "step": 25282 }, { "epoch": 0.32854105382502524, "grad_norm": 0.4118730127811432, "learning_rate": 0.0001343193958850518, "loss": 1.4044, "step": 25283 }, { "epoch": 0.3285540483689411, "grad_norm": 0.3817659914493561, "learning_rate": 0.0001343167964231404, "loss": 1.4898, "step": 25284 }, { "epoch": 0.328567042912857, "grad_norm": 0.3684300482273102, "learning_rate": 0.00013431419696122903, "loss": 1.3707, "step": 25285 }, { "epoch": 0.32858003745677283, "grad_norm": 0.4501408636569977, "learning_rate": 0.00013431159749931765, "loss": 1.5859, "step": 25286 }, { "epoch": 0.32859303200068873, "grad_norm": 0.40602347254753113, "learning_rate": 0.00013430899803740627, "loss": 1.2278, "step": 25287 }, { "epoch": 0.3286060265446046, "grad_norm": 0.3946186900138855, "learning_rate": 0.00013430639857549487, "loss": 1.3844, "step": 25288 }, { "epoch": 0.3286190210885205, "grad_norm": 0.4072436988353729, "learning_rate": 0.0001343037991135835, "loss": 1.2894, "step": 25289 }, { "epoch": 0.3286320156324363, "grad_norm": 0.33170178532600403, "learning_rate": 0.00013430119965167212, "loss": 1.4158, "step": 25290 }, { "epoch": 0.3286450101763522, "grad_norm": 0.27600911259651184, "learning_rate": 0.00013429860018976072, "loss": 1.3802, "step": 25291 }, { "epoch": 0.32865800472026807, "grad_norm": 0.3940262496471405, "learning_rate": 0.00013429600072784934, "loss": 1.436, "step": 25292 }, { "epoch": 0.32867099926418397, "grad_norm": 0.30938389897346497, "learning_rate": 0.00013429340126593797, "loss": 1.2563, "step": 25293 }, { "epoch": 0.3286839938080998, "grad_norm": 0.3945947587490082, "learning_rate": 0.00013429080180402656, "loss": 1.5157, "step": 25294 }, { "epoch": 0.3286969883520157, "grad_norm": 0.45850762724876404, "learning_rate": 0.0001342882023421152, "loss": 1.6917, "step": 25295 }, { "epoch": 0.32870998289593156, "grad_norm": 0.33542680740356445, "learning_rate": 0.0001342856028802038, "loss": 1.5708, "step": 25296 }, { "epoch": 0.32872297743984746, "grad_norm": 0.4499053955078125, "learning_rate": 0.00013428300341829244, "loss": 1.5506, "step": 25297 }, { "epoch": 0.3287359719837633, "grad_norm": 0.3834080100059509, "learning_rate": 0.00013428040395638104, "loss": 1.5082, "step": 25298 }, { "epoch": 0.3287489665276792, "grad_norm": 0.4806734323501587, "learning_rate": 0.00013427780449446966, "loss": 1.5844, "step": 25299 }, { "epoch": 0.32876196107159505, "grad_norm": 0.3630460202693939, "learning_rate": 0.00013427520503255826, "loss": 1.3928, "step": 25300 }, { "epoch": 0.32877495561551096, "grad_norm": 0.35897013545036316, "learning_rate": 0.00013427260557064688, "loss": 1.5129, "step": 25301 }, { "epoch": 0.3287879501594268, "grad_norm": 0.5187689661979675, "learning_rate": 0.0001342700061087355, "loss": 1.53, "step": 25302 }, { "epoch": 0.3288009447033427, "grad_norm": 0.4041348099708557, "learning_rate": 0.0001342674066468241, "loss": 1.5057, "step": 25303 }, { "epoch": 0.32881393924725855, "grad_norm": 0.36623117327690125, "learning_rate": 0.00013426480718491273, "loss": 1.1344, "step": 25304 }, { "epoch": 0.32882693379117445, "grad_norm": 0.4020625352859497, "learning_rate": 0.00013426220772300135, "loss": 1.4503, "step": 25305 }, { "epoch": 0.3288399283350903, "grad_norm": 0.4368743896484375, "learning_rate": 0.00013425960826108998, "loss": 1.28, "step": 25306 }, { "epoch": 0.3288529228790062, "grad_norm": 0.37784093618392944, "learning_rate": 0.00013425700879917857, "loss": 1.3131, "step": 25307 }, { "epoch": 0.32886591742292204, "grad_norm": 0.42796891927719116, "learning_rate": 0.00013425440933726717, "loss": 1.4572, "step": 25308 }, { "epoch": 0.32887891196683794, "grad_norm": 0.3729535937309265, "learning_rate": 0.00013425180987535582, "loss": 1.2365, "step": 25309 }, { "epoch": 0.3288919065107538, "grad_norm": 0.3360167145729065, "learning_rate": 0.00013424921041344442, "loss": 1.4642, "step": 25310 }, { "epoch": 0.3289049010546697, "grad_norm": 0.3128669559955597, "learning_rate": 0.00013424661095153305, "loss": 1.4907, "step": 25311 }, { "epoch": 0.32891789559858553, "grad_norm": 0.40844419598579407, "learning_rate": 0.00013424401148962164, "loss": 1.3444, "step": 25312 }, { "epoch": 0.32893089014250143, "grad_norm": 0.3773769736289978, "learning_rate": 0.00013424141202771027, "loss": 1.4485, "step": 25313 }, { "epoch": 0.3289438846864173, "grad_norm": 0.47407764196395874, "learning_rate": 0.0001342388125657989, "loss": 1.5178, "step": 25314 }, { "epoch": 0.3289568792303332, "grad_norm": 0.4541483521461487, "learning_rate": 0.0001342362131038875, "loss": 1.3417, "step": 25315 }, { "epoch": 0.328969873774249, "grad_norm": 0.5842644572257996, "learning_rate": 0.00013423361364197611, "loss": 1.504, "step": 25316 }, { "epoch": 0.3289828683181649, "grad_norm": 0.40054672956466675, "learning_rate": 0.00013423101418006474, "loss": 1.466, "step": 25317 }, { "epoch": 0.32899586286208077, "grad_norm": 0.250788152217865, "learning_rate": 0.00013422841471815336, "loss": 1.358, "step": 25318 }, { "epoch": 0.32900885740599667, "grad_norm": 0.39067402482032776, "learning_rate": 0.00013422581525624196, "loss": 1.4238, "step": 25319 }, { "epoch": 0.3290218519499125, "grad_norm": 0.4361144006252289, "learning_rate": 0.00013422321579433056, "loss": 1.3413, "step": 25320 }, { "epoch": 0.3290348464938284, "grad_norm": 0.3501947224140167, "learning_rate": 0.0001342206163324192, "loss": 1.5347, "step": 25321 }, { "epoch": 0.32904784103774426, "grad_norm": 0.5104490518569946, "learning_rate": 0.0001342180168705078, "loss": 1.395, "step": 25322 }, { "epoch": 0.32906083558166016, "grad_norm": 0.4331497251987457, "learning_rate": 0.00013421541740859643, "loss": 1.7013, "step": 25323 }, { "epoch": 0.329073830125576, "grad_norm": 0.42801639437675476, "learning_rate": 0.00013421281794668503, "loss": 1.5066, "step": 25324 }, { "epoch": 0.3290868246694919, "grad_norm": 0.389602929353714, "learning_rate": 0.00013421021848477365, "loss": 1.4475, "step": 25325 }, { "epoch": 0.32909981921340775, "grad_norm": 0.3439443111419678, "learning_rate": 0.00013420761902286228, "loss": 1.3567, "step": 25326 }, { "epoch": 0.32911281375732365, "grad_norm": 0.28177815675735474, "learning_rate": 0.00013420501956095087, "loss": 1.3039, "step": 25327 }, { "epoch": 0.3291258083012395, "grad_norm": 0.4260914921760559, "learning_rate": 0.0001342024200990395, "loss": 1.4301, "step": 25328 }, { "epoch": 0.3291388028451554, "grad_norm": 0.4295695126056671, "learning_rate": 0.00013419982063712812, "loss": 1.5908, "step": 25329 }, { "epoch": 0.32915179738907124, "grad_norm": 0.45482027530670166, "learning_rate": 0.00013419722117521675, "loss": 1.331, "step": 25330 }, { "epoch": 0.32916479193298714, "grad_norm": 0.3673102855682373, "learning_rate": 0.00013419462171330535, "loss": 1.4821, "step": 25331 }, { "epoch": 0.329177786476903, "grad_norm": 0.3738667666912079, "learning_rate": 0.00013419202225139397, "loss": 1.447, "step": 25332 }, { "epoch": 0.3291907810208189, "grad_norm": 0.36126378178596497, "learning_rate": 0.0001341894227894826, "loss": 1.3497, "step": 25333 }, { "epoch": 0.32920377556473474, "grad_norm": 0.3273408114910126, "learning_rate": 0.0001341868233275712, "loss": 1.3492, "step": 25334 }, { "epoch": 0.32921677010865064, "grad_norm": 0.49449607729911804, "learning_rate": 0.00013418422386565982, "loss": 1.327, "step": 25335 }, { "epoch": 0.3292297646525665, "grad_norm": 0.3891625702381134, "learning_rate": 0.00013418162440374844, "loss": 1.3715, "step": 25336 }, { "epoch": 0.3292427591964824, "grad_norm": 0.36737412214279175, "learning_rate": 0.00013417902494183704, "loss": 1.5767, "step": 25337 }, { "epoch": 0.3292557537403982, "grad_norm": 0.41383060812950134, "learning_rate": 0.00013417642547992566, "loss": 1.5812, "step": 25338 }, { "epoch": 0.32926874828431413, "grad_norm": 0.44203418493270874, "learning_rate": 0.00013417382601801426, "loss": 1.4539, "step": 25339 }, { "epoch": 0.32928174282823, "grad_norm": 0.36490094661712646, "learning_rate": 0.0001341712265561029, "loss": 1.5176, "step": 25340 }, { "epoch": 0.3292947373721459, "grad_norm": 0.3501736521720886, "learning_rate": 0.0001341686270941915, "loss": 1.2783, "step": 25341 }, { "epoch": 0.3293077319160617, "grad_norm": 0.3369479477405548, "learning_rate": 0.00013416602763228013, "loss": 1.4937, "step": 25342 }, { "epoch": 0.3293207264599776, "grad_norm": 0.4171413779258728, "learning_rate": 0.00013416342817036873, "loss": 1.417, "step": 25343 }, { "epoch": 0.32933372100389346, "grad_norm": 0.33711177110671997, "learning_rate": 0.00013416082870845736, "loss": 1.2241, "step": 25344 }, { "epoch": 0.32934671554780937, "grad_norm": 0.42718514800071716, "learning_rate": 0.00013415822924654598, "loss": 1.6906, "step": 25345 }, { "epoch": 0.3293597100917252, "grad_norm": 0.3979288339614868, "learning_rate": 0.00013415562978463458, "loss": 1.5149, "step": 25346 }, { "epoch": 0.3293727046356411, "grad_norm": 0.3372088074684143, "learning_rate": 0.0001341530303227232, "loss": 1.3083, "step": 25347 }, { "epoch": 0.32938569917955696, "grad_norm": 0.21296252310276031, "learning_rate": 0.00013415043086081183, "loss": 1.2374, "step": 25348 }, { "epoch": 0.32939869372347286, "grad_norm": 0.3407288193702698, "learning_rate": 0.00013414783139890042, "loss": 1.4008, "step": 25349 }, { "epoch": 0.3294116882673887, "grad_norm": 0.347440630197525, "learning_rate": 0.00013414523193698905, "loss": 1.5054, "step": 25350 }, { "epoch": 0.3294246828113046, "grad_norm": 0.4117650091648102, "learning_rate": 0.00013414263247507765, "loss": 1.5126, "step": 25351 }, { "epoch": 0.3294376773552205, "grad_norm": 0.39046260714530945, "learning_rate": 0.0001341400330131663, "loss": 1.3533, "step": 25352 }, { "epoch": 0.32945067189913635, "grad_norm": 0.44210928678512573, "learning_rate": 0.0001341374335512549, "loss": 1.5414, "step": 25353 }, { "epoch": 0.32946366644305225, "grad_norm": 0.41369375586509705, "learning_rate": 0.00013413483408934352, "loss": 1.0772, "step": 25354 }, { "epoch": 0.3294766609869681, "grad_norm": 0.4005845785140991, "learning_rate": 0.00013413223462743212, "loss": 1.3835, "step": 25355 }, { "epoch": 0.329489655530884, "grad_norm": 0.4949833154678345, "learning_rate": 0.00013412963516552074, "loss": 1.3986, "step": 25356 }, { "epoch": 0.32950265007479984, "grad_norm": 0.34249964356422424, "learning_rate": 0.00013412703570360937, "loss": 1.5586, "step": 25357 }, { "epoch": 0.32951564461871574, "grad_norm": 0.41654446721076965, "learning_rate": 0.00013412443624169796, "loss": 1.3539, "step": 25358 }, { "epoch": 0.3295286391626316, "grad_norm": 0.3785189688205719, "learning_rate": 0.0001341218367797866, "loss": 1.4091, "step": 25359 }, { "epoch": 0.3295416337065475, "grad_norm": 0.47925683856010437, "learning_rate": 0.0001341192373178752, "loss": 1.4224, "step": 25360 }, { "epoch": 0.32955462825046333, "grad_norm": 0.4160226881504059, "learning_rate": 0.0001341166378559638, "loss": 1.4358, "step": 25361 }, { "epoch": 0.32956762279437923, "grad_norm": 0.46545690298080444, "learning_rate": 0.00013411403839405243, "loss": 1.2741, "step": 25362 }, { "epoch": 0.3295806173382951, "grad_norm": 0.47618338465690613, "learning_rate": 0.00013411143893214103, "loss": 1.3474, "step": 25363 }, { "epoch": 0.329593611882211, "grad_norm": 0.3372313380241394, "learning_rate": 0.00013410883947022968, "loss": 1.5282, "step": 25364 }, { "epoch": 0.3296066064261268, "grad_norm": 0.45731109380722046, "learning_rate": 0.00013410624000831828, "loss": 1.3606, "step": 25365 }, { "epoch": 0.3296196009700427, "grad_norm": 0.4226211607456207, "learning_rate": 0.0001341036405464069, "loss": 1.4463, "step": 25366 }, { "epoch": 0.32963259551395857, "grad_norm": 0.38409683108329773, "learning_rate": 0.00013410104108449553, "loss": 1.4347, "step": 25367 }, { "epoch": 0.32964559005787447, "grad_norm": 0.4026913344860077, "learning_rate": 0.00013409844162258413, "loss": 1.3023, "step": 25368 }, { "epoch": 0.3296585846017903, "grad_norm": 0.4126409888267517, "learning_rate": 0.00013409584216067275, "loss": 1.3066, "step": 25369 }, { "epoch": 0.3296715791457062, "grad_norm": 0.3737824261188507, "learning_rate": 0.00013409324269876135, "loss": 1.5713, "step": 25370 }, { "epoch": 0.32968457368962206, "grad_norm": 0.3570338487625122, "learning_rate": 0.00013409064323685, "loss": 1.3271, "step": 25371 }, { "epoch": 0.32969756823353796, "grad_norm": 0.399305522441864, "learning_rate": 0.0001340880437749386, "loss": 1.2789, "step": 25372 }, { "epoch": 0.3297105627774538, "grad_norm": 0.3623657524585724, "learning_rate": 0.00013408544431302722, "loss": 1.3151, "step": 25373 }, { "epoch": 0.3297235573213697, "grad_norm": 0.4736778736114502, "learning_rate": 0.00013408284485111582, "loss": 1.4529, "step": 25374 }, { "epoch": 0.32973655186528555, "grad_norm": 0.4068353474140167, "learning_rate": 0.00013408024538920444, "loss": 1.4499, "step": 25375 }, { "epoch": 0.32974954640920145, "grad_norm": 0.4361303746700287, "learning_rate": 0.00013407764592729307, "loss": 1.478, "step": 25376 }, { "epoch": 0.3297625409531173, "grad_norm": 0.4379952847957611, "learning_rate": 0.00013407504646538167, "loss": 1.3812, "step": 25377 }, { "epoch": 0.3297755354970332, "grad_norm": 0.38622012734413147, "learning_rate": 0.0001340724470034703, "loss": 1.4063, "step": 25378 }, { "epoch": 0.32978853004094905, "grad_norm": 0.5305108428001404, "learning_rate": 0.00013406984754155891, "loss": 1.3855, "step": 25379 }, { "epoch": 0.32980152458486495, "grad_norm": 0.44919219613075256, "learning_rate": 0.0001340672480796475, "loss": 1.3975, "step": 25380 }, { "epoch": 0.3298145191287808, "grad_norm": 0.2653268873691559, "learning_rate": 0.00013406464861773614, "loss": 1.4539, "step": 25381 }, { "epoch": 0.3298275136726967, "grad_norm": 0.28848984837532043, "learning_rate": 0.00013406204915582473, "loss": 1.2992, "step": 25382 }, { "epoch": 0.32984050821661254, "grad_norm": 0.4742273986339569, "learning_rate": 0.00013405944969391339, "loss": 1.4019, "step": 25383 }, { "epoch": 0.32985350276052844, "grad_norm": 0.3242991268634796, "learning_rate": 0.00013405685023200198, "loss": 1.3752, "step": 25384 }, { "epoch": 0.3298664973044443, "grad_norm": 0.39433974027633667, "learning_rate": 0.0001340542507700906, "loss": 1.4665, "step": 25385 }, { "epoch": 0.3298794918483602, "grad_norm": 0.2532108426094055, "learning_rate": 0.0001340516513081792, "loss": 1.0141, "step": 25386 }, { "epoch": 0.32989248639227603, "grad_norm": 0.35759055614471436, "learning_rate": 0.00013404905184626783, "loss": 1.4838, "step": 25387 }, { "epoch": 0.32990548093619193, "grad_norm": 0.42066437005996704, "learning_rate": 0.00013404645238435645, "loss": 1.2467, "step": 25388 }, { "epoch": 0.3299184754801078, "grad_norm": 0.3852258324623108, "learning_rate": 0.00013404385292244505, "loss": 1.3166, "step": 25389 }, { "epoch": 0.3299314700240237, "grad_norm": 0.3838479816913605, "learning_rate": 0.00013404125346053368, "loss": 1.2427, "step": 25390 }, { "epoch": 0.3299444645679395, "grad_norm": 0.4362042248249054, "learning_rate": 0.0001340386539986223, "loss": 1.3048, "step": 25391 }, { "epoch": 0.3299574591118554, "grad_norm": 0.41609805822372437, "learning_rate": 0.0001340360545367109, "loss": 1.3693, "step": 25392 }, { "epoch": 0.32997045365577127, "grad_norm": 0.45666131377220154, "learning_rate": 0.00013403345507479952, "loss": 1.291, "step": 25393 }, { "epoch": 0.32998344819968717, "grad_norm": 0.3742736279964447, "learning_rate": 0.00013403085561288812, "loss": 1.4613, "step": 25394 }, { "epoch": 0.329996442743603, "grad_norm": 0.48365676403045654, "learning_rate": 0.00013402825615097677, "loss": 1.3548, "step": 25395 }, { "epoch": 0.3300094372875189, "grad_norm": 0.402761846780777, "learning_rate": 0.00013402565668906537, "loss": 1.4136, "step": 25396 }, { "epoch": 0.33002243183143476, "grad_norm": 0.37098076939582825, "learning_rate": 0.000134023057227154, "loss": 1.2083, "step": 25397 }, { "epoch": 0.33003542637535066, "grad_norm": 0.4020889401435852, "learning_rate": 0.0001340204577652426, "loss": 1.4296, "step": 25398 }, { "epoch": 0.3300484209192665, "grad_norm": 0.4559450149536133, "learning_rate": 0.00013401785830333121, "loss": 1.4123, "step": 25399 }, { "epoch": 0.3300614154631824, "grad_norm": 0.45479264855384827, "learning_rate": 0.00013401525884141984, "loss": 1.5204, "step": 25400 }, { "epoch": 0.33007441000709825, "grad_norm": 0.47942304611206055, "learning_rate": 0.00013401265937950844, "loss": 1.5651, "step": 25401 }, { "epoch": 0.33008740455101415, "grad_norm": 0.37381014227867126, "learning_rate": 0.00013401005991759706, "loss": 1.2103, "step": 25402 }, { "epoch": 0.33010039909493, "grad_norm": 0.27371224761009216, "learning_rate": 0.00013400746045568569, "loss": 1.0815, "step": 25403 }, { "epoch": 0.3301133936388459, "grad_norm": 0.35925397276878357, "learning_rate": 0.00013400486099377428, "loss": 1.4117, "step": 25404 }, { "epoch": 0.33012638818276174, "grad_norm": 0.4015498757362366, "learning_rate": 0.0001340022615318629, "loss": 1.6293, "step": 25405 }, { "epoch": 0.33013938272667764, "grad_norm": 0.43442967534065247, "learning_rate": 0.00013399966206995153, "loss": 1.5014, "step": 25406 }, { "epoch": 0.3301523772705935, "grad_norm": 0.38886693120002747, "learning_rate": 0.00013399706260804016, "loss": 1.2442, "step": 25407 }, { "epoch": 0.3301653718145094, "grad_norm": 0.3544869124889374, "learning_rate": 0.00013399446314612875, "loss": 1.3819, "step": 25408 }, { "epoch": 0.33017836635842523, "grad_norm": 0.5248733758926392, "learning_rate": 0.00013399186368421738, "loss": 1.4933, "step": 25409 }, { "epoch": 0.33019136090234114, "grad_norm": 0.49900826811790466, "learning_rate": 0.000133989264222306, "loss": 1.5402, "step": 25410 }, { "epoch": 0.330204355446257, "grad_norm": 0.3594284951686859, "learning_rate": 0.0001339866647603946, "loss": 1.3585, "step": 25411 }, { "epoch": 0.3302173499901729, "grad_norm": 0.3940020799636841, "learning_rate": 0.00013398406529848322, "loss": 1.2415, "step": 25412 }, { "epoch": 0.3302303445340887, "grad_norm": 0.2958138585090637, "learning_rate": 0.00013398146583657182, "loss": 1.5371, "step": 25413 }, { "epoch": 0.3302433390780046, "grad_norm": 0.2538348436355591, "learning_rate": 0.00013397886637466047, "loss": 1.331, "step": 25414 }, { "epoch": 0.3302563336219205, "grad_norm": 0.39588114619255066, "learning_rate": 0.00013397626691274907, "loss": 1.3503, "step": 25415 }, { "epoch": 0.3302693281658364, "grad_norm": 0.46107110381126404, "learning_rate": 0.00013397366745083767, "loss": 1.6041, "step": 25416 }, { "epoch": 0.3302823227097522, "grad_norm": 0.3055363595485687, "learning_rate": 0.0001339710679889263, "loss": 1.3291, "step": 25417 }, { "epoch": 0.3302953172536681, "grad_norm": 0.34722602367401123, "learning_rate": 0.00013396846852701492, "loss": 1.2665, "step": 25418 }, { "epoch": 0.33030831179758396, "grad_norm": 0.43605929613113403, "learning_rate": 0.00013396586906510354, "loss": 1.5043, "step": 25419 }, { "epoch": 0.33032130634149987, "grad_norm": 0.40294522047042847, "learning_rate": 0.00013396326960319214, "loss": 1.2429, "step": 25420 }, { "epoch": 0.3303343008854157, "grad_norm": 0.3467344641685486, "learning_rate": 0.00013396067014128076, "loss": 1.1676, "step": 25421 }, { "epoch": 0.3303472954293316, "grad_norm": 0.5104806423187256, "learning_rate": 0.0001339580706793694, "loss": 1.4379, "step": 25422 }, { "epoch": 0.33036028997324746, "grad_norm": 0.30016499757766724, "learning_rate": 0.00013395547121745799, "loss": 1.3433, "step": 25423 }, { "epoch": 0.33037328451716336, "grad_norm": 0.5396904349327087, "learning_rate": 0.0001339528717555466, "loss": 1.4127, "step": 25424 }, { "epoch": 0.3303862790610792, "grad_norm": 0.41141584515571594, "learning_rate": 0.0001339502722936352, "loss": 1.3464, "step": 25425 }, { "epoch": 0.3303992736049951, "grad_norm": 0.33225777745246887, "learning_rate": 0.00013394767283172386, "loss": 1.4562, "step": 25426 }, { "epoch": 0.330412268148911, "grad_norm": 0.4551813006401062, "learning_rate": 0.00013394507336981246, "loss": 1.6444, "step": 25427 }, { "epoch": 0.33042526269282685, "grad_norm": 0.4935167729854584, "learning_rate": 0.00013394247390790108, "loss": 1.3621, "step": 25428 }, { "epoch": 0.33043825723674275, "grad_norm": 0.3895745575428009, "learning_rate": 0.00013393987444598968, "loss": 1.1186, "step": 25429 }, { "epoch": 0.3304512517806586, "grad_norm": 0.3657846450805664, "learning_rate": 0.0001339372749840783, "loss": 1.5083, "step": 25430 }, { "epoch": 0.3304642463245745, "grad_norm": 0.44281864166259766, "learning_rate": 0.00013393467552216693, "loss": 1.327, "step": 25431 }, { "epoch": 0.33047724086849034, "grad_norm": 0.36560437083244324, "learning_rate": 0.00013393207606025552, "loss": 1.5263, "step": 25432 }, { "epoch": 0.33049023541240624, "grad_norm": 0.49306923151016235, "learning_rate": 0.00013392947659834415, "loss": 1.5285, "step": 25433 }, { "epoch": 0.3305032299563221, "grad_norm": 0.43165305256843567, "learning_rate": 0.00013392687713643277, "loss": 1.3839, "step": 25434 }, { "epoch": 0.330516224500238, "grad_norm": 0.39934834837913513, "learning_rate": 0.00013392427767452137, "loss": 1.3574, "step": 25435 }, { "epoch": 0.33052921904415383, "grad_norm": 0.3733825385570526, "learning_rate": 0.00013392167821261, "loss": 1.1742, "step": 25436 }, { "epoch": 0.33054221358806973, "grad_norm": 0.33662697672843933, "learning_rate": 0.0001339190787506986, "loss": 1.2535, "step": 25437 }, { "epoch": 0.3305552081319856, "grad_norm": 0.3938794434070587, "learning_rate": 0.00013391647928878724, "loss": 1.3558, "step": 25438 }, { "epoch": 0.3305682026759015, "grad_norm": 0.33154577016830444, "learning_rate": 0.00013391387982687584, "loss": 1.3742, "step": 25439 }, { "epoch": 0.3305811972198173, "grad_norm": 0.5127232074737549, "learning_rate": 0.00013391128036496447, "loss": 1.5486, "step": 25440 }, { "epoch": 0.3305941917637332, "grad_norm": 0.39770156145095825, "learning_rate": 0.0001339086809030531, "loss": 1.4735, "step": 25441 }, { "epoch": 0.33060718630764907, "grad_norm": 0.40657123923301697, "learning_rate": 0.0001339060814411417, "loss": 1.2754, "step": 25442 }, { "epoch": 0.33062018085156497, "grad_norm": 0.36890271306037903, "learning_rate": 0.0001339034819792303, "loss": 1.2363, "step": 25443 }, { "epoch": 0.3306331753954808, "grad_norm": 0.41669103503227234, "learning_rate": 0.0001339008825173189, "loss": 1.4988, "step": 25444 }, { "epoch": 0.3306461699393967, "grad_norm": 0.45236214995384216, "learning_rate": 0.00013389828305540753, "loss": 1.2997, "step": 25445 }, { "epoch": 0.33065916448331256, "grad_norm": 0.37558236718177795, "learning_rate": 0.00013389568359349616, "loss": 1.4876, "step": 25446 }, { "epoch": 0.33067215902722846, "grad_norm": 0.3748414218425751, "learning_rate": 0.00013389308413158476, "loss": 1.4397, "step": 25447 }, { "epoch": 0.3306851535711443, "grad_norm": 0.2971377968788147, "learning_rate": 0.00013389048466967338, "loss": 1.5025, "step": 25448 }, { "epoch": 0.3306981481150602, "grad_norm": 0.41289663314819336, "learning_rate": 0.000133887885207762, "loss": 1.4438, "step": 25449 }, { "epoch": 0.33071114265897605, "grad_norm": 0.4127791225910187, "learning_rate": 0.00013388528574585063, "loss": 1.427, "step": 25450 }, { "epoch": 0.33072413720289195, "grad_norm": 0.43375536799430847, "learning_rate": 0.00013388268628393923, "loss": 1.4366, "step": 25451 }, { "epoch": 0.3307371317468078, "grad_norm": 0.4230966866016388, "learning_rate": 0.00013388008682202785, "loss": 1.3474, "step": 25452 }, { "epoch": 0.3307501262907237, "grad_norm": 0.41233646869659424, "learning_rate": 0.00013387748736011648, "loss": 1.3783, "step": 25453 }, { "epoch": 0.33076312083463955, "grad_norm": 0.453925222158432, "learning_rate": 0.00013387488789820507, "loss": 1.5539, "step": 25454 }, { "epoch": 0.33077611537855545, "grad_norm": 0.45712530612945557, "learning_rate": 0.0001338722884362937, "loss": 1.4423, "step": 25455 }, { "epoch": 0.3307891099224713, "grad_norm": 0.31733739376068115, "learning_rate": 0.0001338696889743823, "loss": 1.3004, "step": 25456 }, { "epoch": 0.3308021044663872, "grad_norm": 0.33850136399269104, "learning_rate": 0.00013386708951247095, "loss": 1.4268, "step": 25457 }, { "epoch": 0.33081509901030304, "grad_norm": 0.34473028779029846, "learning_rate": 0.00013386449005055954, "loss": 1.5118, "step": 25458 }, { "epoch": 0.33082809355421894, "grad_norm": 0.35425084829330444, "learning_rate": 0.00013386189058864814, "loss": 1.4399, "step": 25459 }, { "epoch": 0.3308410880981348, "grad_norm": 0.43701741099357605, "learning_rate": 0.00013385929112673677, "loss": 1.3944, "step": 25460 }, { "epoch": 0.3308540826420507, "grad_norm": 0.376965194940567, "learning_rate": 0.0001338566916648254, "loss": 1.3592, "step": 25461 }, { "epoch": 0.33086707718596653, "grad_norm": 0.29334285855293274, "learning_rate": 0.00013385409220291401, "loss": 1.5212, "step": 25462 }, { "epoch": 0.33088007172988243, "grad_norm": 0.39277926087379456, "learning_rate": 0.0001338514927410026, "loss": 1.4898, "step": 25463 }, { "epoch": 0.3308930662737983, "grad_norm": 0.4381749927997589, "learning_rate": 0.00013384889327909124, "loss": 1.2541, "step": 25464 }, { "epoch": 0.3309060608177142, "grad_norm": 0.330308198928833, "learning_rate": 0.00013384629381717986, "loss": 1.5591, "step": 25465 }, { "epoch": 0.33091905536163, "grad_norm": 0.44727402925491333, "learning_rate": 0.00013384369435526846, "loss": 1.4599, "step": 25466 }, { "epoch": 0.3309320499055459, "grad_norm": 0.28293436765670776, "learning_rate": 0.00013384109489335708, "loss": 1.5182, "step": 25467 }, { "epoch": 0.33094504444946177, "grad_norm": 0.4533950984477997, "learning_rate": 0.00013383849543144568, "loss": 1.4373, "step": 25468 }, { "epoch": 0.33095803899337767, "grad_norm": 0.40868476033210754, "learning_rate": 0.00013383589596953433, "loss": 1.3619, "step": 25469 }, { "epoch": 0.3309710335372935, "grad_norm": 0.4499734342098236, "learning_rate": 0.00013383329650762293, "loss": 1.5575, "step": 25470 }, { "epoch": 0.3309840280812094, "grad_norm": 0.45362964272499084, "learning_rate": 0.00013383069704571153, "loss": 1.5309, "step": 25471 }, { "epoch": 0.33099702262512526, "grad_norm": 0.5177431106567383, "learning_rate": 0.00013382809758380015, "loss": 1.4244, "step": 25472 }, { "epoch": 0.33101001716904116, "grad_norm": 0.4701518416404724, "learning_rate": 0.00013382549812188878, "loss": 1.4907, "step": 25473 }, { "epoch": 0.331023011712957, "grad_norm": 0.38079217076301575, "learning_rate": 0.0001338228986599774, "loss": 1.2489, "step": 25474 }, { "epoch": 0.3310360062568729, "grad_norm": 0.383716881275177, "learning_rate": 0.000133820299198066, "loss": 1.6244, "step": 25475 }, { "epoch": 0.33104900080078875, "grad_norm": 0.33251166343688965, "learning_rate": 0.00013381769973615462, "loss": 1.4712, "step": 25476 }, { "epoch": 0.33106199534470465, "grad_norm": 0.4782688617706299, "learning_rate": 0.00013381510027424325, "loss": 1.3404, "step": 25477 }, { "epoch": 0.3310749898886205, "grad_norm": 0.34370654821395874, "learning_rate": 0.00013381250081233184, "loss": 1.3602, "step": 25478 }, { "epoch": 0.3310879844325364, "grad_norm": 0.3607679009437561, "learning_rate": 0.00013380990135042047, "loss": 1.2124, "step": 25479 }, { "epoch": 0.33110097897645224, "grad_norm": 0.41087839007377625, "learning_rate": 0.0001338073018885091, "loss": 1.242, "step": 25480 }, { "epoch": 0.33111397352036814, "grad_norm": 0.35053449869155884, "learning_rate": 0.00013380470242659772, "loss": 1.3611, "step": 25481 }, { "epoch": 0.331126968064284, "grad_norm": 0.28869110345840454, "learning_rate": 0.00013380210296468631, "loss": 1.321, "step": 25482 }, { "epoch": 0.3311399626081999, "grad_norm": 0.4529825448989868, "learning_rate": 0.00013379950350277494, "loss": 1.3559, "step": 25483 }, { "epoch": 0.33115295715211573, "grad_norm": 0.3635726571083069, "learning_rate": 0.00013379690404086356, "loss": 1.2835, "step": 25484 }, { "epoch": 0.33116595169603164, "grad_norm": 0.29228654503822327, "learning_rate": 0.00013379430457895216, "loss": 1.4655, "step": 25485 }, { "epoch": 0.3311789462399475, "grad_norm": 0.45637771487236023, "learning_rate": 0.00013379170511704079, "loss": 1.4376, "step": 25486 }, { "epoch": 0.3311919407838634, "grad_norm": 0.41637957096099854, "learning_rate": 0.00013378910565512938, "loss": 1.5206, "step": 25487 }, { "epoch": 0.3312049353277792, "grad_norm": 0.5057008862495422, "learning_rate": 0.000133786506193218, "loss": 1.5834, "step": 25488 }, { "epoch": 0.3312179298716951, "grad_norm": 0.3937355577945709, "learning_rate": 0.00013378390673130663, "loss": 1.5438, "step": 25489 }, { "epoch": 0.33123092441561097, "grad_norm": 0.3645849823951721, "learning_rate": 0.00013378130726939523, "loss": 1.4476, "step": 25490 }, { "epoch": 0.3312439189595269, "grad_norm": 0.5379676818847656, "learning_rate": 0.00013377870780748385, "loss": 1.5305, "step": 25491 }, { "epoch": 0.3312569135034427, "grad_norm": 0.45664548873901367, "learning_rate": 0.00013377610834557248, "loss": 1.4633, "step": 25492 }, { "epoch": 0.3312699080473586, "grad_norm": 0.40925315022468567, "learning_rate": 0.0001337735088836611, "loss": 1.3933, "step": 25493 }, { "epoch": 0.33128290259127446, "grad_norm": 0.34462788701057434, "learning_rate": 0.0001337709094217497, "loss": 1.2266, "step": 25494 }, { "epoch": 0.33129589713519036, "grad_norm": 0.48976314067840576, "learning_rate": 0.00013376830995983832, "loss": 1.4696, "step": 25495 }, { "epoch": 0.3313088916791062, "grad_norm": 0.3538649380207062, "learning_rate": 0.00013376571049792695, "loss": 1.3508, "step": 25496 }, { "epoch": 0.3313218862230221, "grad_norm": 0.4076395332813263, "learning_rate": 0.00013376311103601555, "loss": 1.328, "step": 25497 }, { "epoch": 0.33133488076693796, "grad_norm": 0.3823518753051758, "learning_rate": 0.00013376051157410417, "loss": 1.2838, "step": 25498 }, { "epoch": 0.33134787531085386, "grad_norm": 0.4611894190311432, "learning_rate": 0.00013375791211219277, "loss": 1.5482, "step": 25499 }, { "epoch": 0.3313608698547697, "grad_norm": 0.3460150957107544, "learning_rate": 0.0001337553126502814, "loss": 1.2385, "step": 25500 }, { "epoch": 0.3313738643986856, "grad_norm": 0.3888540267944336, "learning_rate": 0.00013375271318837002, "loss": 1.4351, "step": 25501 }, { "epoch": 0.33138685894260145, "grad_norm": 0.3436017632484436, "learning_rate": 0.00013375011372645861, "loss": 1.3135, "step": 25502 }, { "epoch": 0.33139985348651735, "grad_norm": 0.4628414511680603, "learning_rate": 0.00013374751426454724, "loss": 1.342, "step": 25503 }, { "epoch": 0.33141284803043325, "grad_norm": 0.3945187032222748, "learning_rate": 0.00013374491480263586, "loss": 1.4171, "step": 25504 }, { "epoch": 0.3314258425743491, "grad_norm": 0.44210487604141235, "learning_rate": 0.0001337423153407245, "loss": 1.4375, "step": 25505 }, { "epoch": 0.331438837118265, "grad_norm": 0.42043545842170715, "learning_rate": 0.00013373971587881309, "loss": 1.4242, "step": 25506 }, { "epoch": 0.33145183166218084, "grad_norm": 0.33141228556632996, "learning_rate": 0.0001337371164169017, "loss": 1.29, "step": 25507 }, { "epoch": 0.33146482620609674, "grad_norm": 0.28985899686813354, "learning_rate": 0.00013373451695499033, "loss": 1.3966, "step": 25508 }, { "epoch": 0.3314778207500126, "grad_norm": 0.45740869641304016, "learning_rate": 0.00013373191749307893, "loss": 1.3164, "step": 25509 }, { "epoch": 0.3314908152939285, "grad_norm": 0.3912624716758728, "learning_rate": 0.00013372931803116756, "loss": 1.7083, "step": 25510 }, { "epoch": 0.33150380983784433, "grad_norm": 0.4109328091144562, "learning_rate": 0.00013372671856925615, "loss": 1.5296, "step": 25511 }, { "epoch": 0.33151680438176023, "grad_norm": 0.35951530933380127, "learning_rate": 0.0001337241191073448, "loss": 1.3281, "step": 25512 }, { "epoch": 0.3315297989256761, "grad_norm": 0.3745634853839874, "learning_rate": 0.0001337215196454334, "loss": 1.4327, "step": 25513 }, { "epoch": 0.331542793469592, "grad_norm": 0.3250645697116852, "learning_rate": 0.000133718920183522, "loss": 1.3297, "step": 25514 }, { "epoch": 0.3315557880135078, "grad_norm": 0.39504584670066833, "learning_rate": 0.00013371632072161065, "loss": 1.455, "step": 25515 }, { "epoch": 0.3315687825574237, "grad_norm": 0.33048221468925476, "learning_rate": 0.00013371372125969925, "loss": 1.2809, "step": 25516 }, { "epoch": 0.33158177710133957, "grad_norm": 0.3931441903114319, "learning_rate": 0.00013371112179778787, "loss": 1.6605, "step": 25517 }, { "epoch": 0.33159477164525547, "grad_norm": 0.42793184518814087, "learning_rate": 0.00013370852233587647, "loss": 1.3966, "step": 25518 }, { "epoch": 0.3316077661891713, "grad_norm": 0.3611810803413391, "learning_rate": 0.0001337059228739651, "loss": 1.423, "step": 25519 }, { "epoch": 0.3316207607330872, "grad_norm": 0.3704386353492737, "learning_rate": 0.00013370332341205372, "loss": 1.401, "step": 25520 }, { "epoch": 0.33163375527700306, "grad_norm": 0.5026872158050537, "learning_rate": 0.00013370072395014232, "loss": 1.2996, "step": 25521 }, { "epoch": 0.33164674982091896, "grad_norm": 0.31302785873413086, "learning_rate": 0.00013369812448823094, "loss": 1.2171, "step": 25522 }, { "epoch": 0.3316597443648348, "grad_norm": 0.34155482053756714, "learning_rate": 0.00013369552502631957, "loss": 1.3272, "step": 25523 }, { "epoch": 0.3316727389087507, "grad_norm": 0.392689973115921, "learning_rate": 0.0001336929255644082, "loss": 1.3675, "step": 25524 }, { "epoch": 0.33168573345266655, "grad_norm": 0.42233341932296753, "learning_rate": 0.0001336903261024968, "loss": 1.554, "step": 25525 }, { "epoch": 0.33169872799658245, "grad_norm": 0.588141679763794, "learning_rate": 0.00013368772664058539, "loss": 1.2468, "step": 25526 }, { "epoch": 0.3317117225404983, "grad_norm": 0.35416871309280396, "learning_rate": 0.00013368512717867404, "loss": 1.4324, "step": 25527 }, { "epoch": 0.3317247170844142, "grad_norm": 0.454398512840271, "learning_rate": 0.00013368252771676263, "loss": 1.5324, "step": 25528 }, { "epoch": 0.33173771162833005, "grad_norm": 0.3870798945426941, "learning_rate": 0.00013367992825485126, "loss": 1.4309, "step": 25529 }, { "epoch": 0.33175070617224595, "grad_norm": 0.430955708026886, "learning_rate": 0.00013367732879293986, "loss": 1.5778, "step": 25530 }, { "epoch": 0.3317637007161618, "grad_norm": 0.4626177251338959, "learning_rate": 0.00013367472933102848, "loss": 1.6093, "step": 25531 }, { "epoch": 0.3317766952600777, "grad_norm": 0.5052372217178345, "learning_rate": 0.0001336721298691171, "loss": 1.3736, "step": 25532 }, { "epoch": 0.33178968980399354, "grad_norm": 0.3977068066596985, "learning_rate": 0.0001336695304072057, "loss": 1.5113, "step": 25533 }, { "epoch": 0.33180268434790944, "grad_norm": 0.2562795579433441, "learning_rate": 0.00013366693094529433, "loss": 1.2995, "step": 25534 }, { "epoch": 0.3318156788918253, "grad_norm": 0.4043462872505188, "learning_rate": 0.00013366433148338295, "loss": 1.3254, "step": 25535 }, { "epoch": 0.3318286734357412, "grad_norm": 0.5225029587745667, "learning_rate": 0.00013366173202147158, "loss": 1.4949, "step": 25536 }, { "epoch": 0.33184166797965703, "grad_norm": 0.5557762980461121, "learning_rate": 0.00013365913255956017, "loss": 1.4127, "step": 25537 }, { "epoch": 0.33185466252357293, "grad_norm": 0.38120874762535095, "learning_rate": 0.00013365653309764877, "loss": 1.35, "step": 25538 }, { "epoch": 0.3318676570674888, "grad_norm": 0.4264238774776459, "learning_rate": 0.00013365393363573742, "loss": 1.4785, "step": 25539 }, { "epoch": 0.3318806516114047, "grad_norm": 0.3222150504589081, "learning_rate": 0.00013365133417382602, "loss": 1.2357, "step": 25540 }, { "epoch": 0.3318936461553205, "grad_norm": 0.46247807145118713, "learning_rate": 0.00013364873471191464, "loss": 1.5866, "step": 25541 }, { "epoch": 0.3319066406992364, "grad_norm": 0.34905382990837097, "learning_rate": 0.00013364613525000324, "loss": 1.3636, "step": 25542 }, { "epoch": 0.33191963524315227, "grad_norm": 0.4535878300666809, "learning_rate": 0.00013364353578809187, "loss": 1.4386, "step": 25543 }, { "epoch": 0.33193262978706817, "grad_norm": 0.3320789635181427, "learning_rate": 0.0001336409363261805, "loss": 1.42, "step": 25544 }, { "epoch": 0.331945624330984, "grad_norm": 0.31830722093582153, "learning_rate": 0.0001336383368642691, "loss": 1.2966, "step": 25545 }, { "epoch": 0.3319586188748999, "grad_norm": 0.3413156569004059, "learning_rate": 0.0001336357374023577, "loss": 1.4182, "step": 25546 }, { "epoch": 0.33197161341881576, "grad_norm": 0.3917073607444763, "learning_rate": 0.00013363313794044634, "loss": 1.552, "step": 25547 }, { "epoch": 0.33198460796273166, "grad_norm": 0.4104979336261749, "learning_rate": 0.00013363053847853496, "loss": 1.4159, "step": 25548 }, { "epoch": 0.3319976025066475, "grad_norm": 0.40208619832992554, "learning_rate": 0.00013362793901662356, "loss": 1.2627, "step": 25549 }, { "epoch": 0.3320105970505634, "grad_norm": 0.3399253785610199, "learning_rate": 0.00013362533955471218, "loss": 1.2989, "step": 25550 }, { "epoch": 0.33202359159447925, "grad_norm": 0.36069270968437195, "learning_rate": 0.0001336227400928008, "loss": 1.427, "step": 25551 }, { "epoch": 0.33203658613839515, "grad_norm": 0.3751765787601471, "learning_rate": 0.0001336201406308894, "loss": 1.2451, "step": 25552 }, { "epoch": 0.332049580682311, "grad_norm": 0.39233654737472534, "learning_rate": 0.00013361754116897803, "loss": 1.363, "step": 25553 }, { "epoch": 0.3320625752262269, "grad_norm": 0.3892533481121063, "learning_rate": 0.00013361494170706665, "loss": 1.5235, "step": 25554 }, { "epoch": 0.33207556977014274, "grad_norm": 0.3848094940185547, "learning_rate": 0.00013361234224515525, "loss": 1.4284, "step": 25555 }, { "epoch": 0.33208856431405864, "grad_norm": 0.49205902218818665, "learning_rate": 0.00013360974278324388, "loss": 1.5989, "step": 25556 }, { "epoch": 0.3321015588579745, "grad_norm": 0.36181342601776123, "learning_rate": 0.00013360714332133247, "loss": 1.4242, "step": 25557 }, { "epoch": 0.3321145534018904, "grad_norm": 0.42041993141174316, "learning_rate": 0.00013360454385942112, "loss": 1.3902, "step": 25558 }, { "epoch": 0.33212754794580623, "grad_norm": 0.33958718180656433, "learning_rate": 0.00013360194439750972, "loss": 1.3398, "step": 25559 }, { "epoch": 0.33214054248972213, "grad_norm": 0.3924887478351593, "learning_rate": 0.00013359934493559835, "loss": 1.3547, "step": 25560 }, { "epoch": 0.332153537033638, "grad_norm": 0.3785068392753601, "learning_rate": 0.00013359674547368694, "loss": 1.3232, "step": 25561 }, { "epoch": 0.3321665315775539, "grad_norm": 0.37241414189338684, "learning_rate": 0.00013359414601177557, "loss": 1.2776, "step": 25562 }, { "epoch": 0.3321795261214697, "grad_norm": 0.45610663294792175, "learning_rate": 0.0001335915465498642, "loss": 1.4505, "step": 25563 }, { "epoch": 0.3321925206653856, "grad_norm": 0.3319467008113861, "learning_rate": 0.0001335889470879528, "loss": 1.2296, "step": 25564 }, { "epoch": 0.33220551520930147, "grad_norm": 0.5282312631607056, "learning_rate": 0.00013358634762604141, "loss": 1.3536, "step": 25565 }, { "epoch": 0.3322185097532174, "grad_norm": 0.39996835589408875, "learning_rate": 0.00013358374816413004, "loss": 1.5491, "step": 25566 }, { "epoch": 0.3322315042971332, "grad_norm": 0.39466020464897156, "learning_rate": 0.00013358114870221864, "loss": 1.4308, "step": 25567 }, { "epoch": 0.3322444988410491, "grad_norm": 0.381485253572464, "learning_rate": 0.00013357854924030726, "loss": 1.3573, "step": 25568 }, { "epoch": 0.33225749338496496, "grad_norm": 0.34763917326927185, "learning_rate": 0.00013357594977839586, "loss": 1.3254, "step": 25569 }, { "epoch": 0.33227048792888086, "grad_norm": 0.3791951537132263, "learning_rate": 0.0001335733503164845, "loss": 1.3656, "step": 25570 }, { "epoch": 0.3322834824727967, "grad_norm": 0.4989057779312134, "learning_rate": 0.0001335707508545731, "loss": 1.5201, "step": 25571 }, { "epoch": 0.3322964770167126, "grad_norm": 0.37039926648139954, "learning_rate": 0.00013356815139266173, "loss": 1.5187, "step": 25572 }, { "epoch": 0.33230947156062846, "grad_norm": 0.34916022419929504, "learning_rate": 0.00013356555193075033, "loss": 1.4985, "step": 25573 }, { "epoch": 0.33232246610454436, "grad_norm": 0.5183528065681458, "learning_rate": 0.00013356295246883895, "loss": 1.4452, "step": 25574 }, { "epoch": 0.3323354606484602, "grad_norm": 0.3225412964820862, "learning_rate": 0.00013356035300692758, "loss": 1.3835, "step": 25575 }, { "epoch": 0.3323484551923761, "grad_norm": 0.502003014087677, "learning_rate": 0.00013355775354501618, "loss": 1.5655, "step": 25576 }, { "epoch": 0.33236144973629195, "grad_norm": 0.3813602030277252, "learning_rate": 0.0001335551540831048, "loss": 1.3902, "step": 25577 }, { "epoch": 0.33237444428020785, "grad_norm": 0.3464597165584564, "learning_rate": 0.00013355255462119342, "loss": 1.3222, "step": 25578 }, { "epoch": 0.33238743882412375, "grad_norm": 0.3733803927898407, "learning_rate": 0.00013354995515928205, "loss": 1.4504, "step": 25579 }, { "epoch": 0.3324004333680396, "grad_norm": 0.5423606038093567, "learning_rate": 0.00013354735569737065, "loss": 1.4372, "step": 25580 }, { "epoch": 0.3324134279119555, "grad_norm": 0.32530727982521057, "learning_rate": 0.00013354475623545924, "loss": 1.2882, "step": 25581 }, { "epoch": 0.33242642245587134, "grad_norm": 0.3465639352798462, "learning_rate": 0.0001335421567735479, "loss": 1.3681, "step": 25582 }, { "epoch": 0.33243941699978724, "grad_norm": 0.31461164355278015, "learning_rate": 0.0001335395573116365, "loss": 1.3331, "step": 25583 }, { "epoch": 0.3324524115437031, "grad_norm": 0.4370171129703522, "learning_rate": 0.00013353695784972512, "loss": 1.3451, "step": 25584 }, { "epoch": 0.332465406087619, "grad_norm": 0.3450091779232025, "learning_rate": 0.00013353435838781371, "loss": 1.5385, "step": 25585 }, { "epoch": 0.33247840063153483, "grad_norm": 0.404261976480484, "learning_rate": 0.00013353175892590234, "loss": 1.2719, "step": 25586 }, { "epoch": 0.33249139517545073, "grad_norm": 0.39896342158317566, "learning_rate": 0.00013352915946399096, "loss": 1.4844, "step": 25587 }, { "epoch": 0.3325043897193666, "grad_norm": 0.4159028232097626, "learning_rate": 0.00013352656000207956, "loss": 1.3059, "step": 25588 }, { "epoch": 0.3325173842632825, "grad_norm": 0.42139920592308044, "learning_rate": 0.0001335239605401682, "loss": 1.3424, "step": 25589 }, { "epoch": 0.3325303788071983, "grad_norm": 0.4097001850605011, "learning_rate": 0.0001335213610782568, "loss": 1.7296, "step": 25590 }, { "epoch": 0.3325433733511142, "grad_norm": 0.4299386143684387, "learning_rate": 0.00013351876161634543, "loss": 1.3901, "step": 25591 }, { "epoch": 0.33255636789503007, "grad_norm": 0.2708926796913147, "learning_rate": 0.00013351616215443403, "loss": 1.204, "step": 25592 }, { "epoch": 0.33256936243894597, "grad_norm": 0.4295893907546997, "learning_rate": 0.00013351356269252266, "loss": 1.455, "step": 25593 }, { "epoch": 0.3325823569828618, "grad_norm": 0.4148605763912201, "learning_rate": 0.00013351096323061128, "loss": 1.3552, "step": 25594 }, { "epoch": 0.3325953515267777, "grad_norm": 0.36075738072395325, "learning_rate": 0.00013350836376869988, "loss": 1.3259, "step": 25595 }, { "epoch": 0.33260834607069356, "grad_norm": 0.4220554828643799, "learning_rate": 0.0001335057643067885, "loss": 1.3586, "step": 25596 }, { "epoch": 0.33262134061460946, "grad_norm": 0.4005264937877655, "learning_rate": 0.00013350316484487713, "loss": 1.2632, "step": 25597 }, { "epoch": 0.3326343351585253, "grad_norm": 0.38507142663002014, "learning_rate": 0.00013350056538296572, "loss": 1.2469, "step": 25598 }, { "epoch": 0.3326473297024412, "grad_norm": 0.4175419509410858, "learning_rate": 0.00013349796592105435, "loss": 1.4666, "step": 25599 }, { "epoch": 0.33266032424635705, "grad_norm": 0.35873863101005554, "learning_rate": 0.00013349536645914295, "loss": 1.2894, "step": 25600 }, { "epoch": 0.33267331879027295, "grad_norm": 0.45133766531944275, "learning_rate": 0.0001334927669972316, "loss": 1.3752, "step": 25601 }, { "epoch": 0.3326863133341888, "grad_norm": 0.6153174042701721, "learning_rate": 0.0001334901675353202, "loss": 1.3217, "step": 25602 }, { "epoch": 0.3326993078781047, "grad_norm": 0.4296231269836426, "learning_rate": 0.00013348756807340882, "loss": 1.3398, "step": 25603 }, { "epoch": 0.33271230242202054, "grad_norm": 0.4200472831726074, "learning_rate": 0.00013348496861149742, "loss": 1.296, "step": 25604 }, { "epoch": 0.33272529696593645, "grad_norm": 0.38560792803764343, "learning_rate": 0.00013348236914958604, "loss": 1.495, "step": 25605 }, { "epoch": 0.3327382915098523, "grad_norm": 0.393822580575943, "learning_rate": 0.00013347976968767467, "loss": 1.4542, "step": 25606 }, { "epoch": 0.3327512860537682, "grad_norm": 0.4270383417606354, "learning_rate": 0.00013347717022576326, "loss": 1.3654, "step": 25607 }, { "epoch": 0.33276428059768404, "grad_norm": 0.3826548457145691, "learning_rate": 0.0001334745707638519, "loss": 1.3082, "step": 25608 }, { "epoch": 0.33277727514159994, "grad_norm": 0.33963364362716675, "learning_rate": 0.0001334719713019405, "loss": 1.192, "step": 25609 }, { "epoch": 0.3327902696855158, "grad_norm": 0.27436015009880066, "learning_rate": 0.0001334693718400291, "loss": 1.4287, "step": 25610 }, { "epoch": 0.3328032642294317, "grad_norm": 0.42664679884910583, "learning_rate": 0.00013346677237811773, "loss": 1.4623, "step": 25611 }, { "epoch": 0.33281625877334753, "grad_norm": 0.4803803563117981, "learning_rate": 0.00013346417291620633, "loss": 1.4997, "step": 25612 }, { "epoch": 0.33282925331726343, "grad_norm": 0.3724839389324188, "learning_rate": 0.00013346157345429498, "loss": 1.4123, "step": 25613 }, { "epoch": 0.3328422478611793, "grad_norm": 0.25665339827537537, "learning_rate": 0.00013345897399238358, "loss": 1.409, "step": 25614 }, { "epoch": 0.3328552424050952, "grad_norm": 0.32765892148017883, "learning_rate": 0.0001334563745304722, "loss": 1.2473, "step": 25615 }, { "epoch": 0.332868236949011, "grad_norm": 0.3515726923942566, "learning_rate": 0.0001334537750685608, "loss": 1.5841, "step": 25616 }, { "epoch": 0.3328812314929269, "grad_norm": 0.3732127249240875, "learning_rate": 0.00013345117560664943, "loss": 1.5871, "step": 25617 }, { "epoch": 0.33289422603684277, "grad_norm": 0.4103303849697113, "learning_rate": 0.00013344857614473805, "loss": 1.2642, "step": 25618 }, { "epoch": 0.33290722058075867, "grad_norm": 0.4249461889266968, "learning_rate": 0.00013344597668282665, "loss": 1.5431, "step": 25619 }, { "epoch": 0.3329202151246745, "grad_norm": 0.2997204065322876, "learning_rate": 0.00013344337722091527, "loss": 1.1373, "step": 25620 }, { "epoch": 0.3329332096685904, "grad_norm": 0.38188666105270386, "learning_rate": 0.0001334407777590039, "loss": 1.3343, "step": 25621 }, { "epoch": 0.33294620421250626, "grad_norm": 0.4125663936138153, "learning_rate": 0.0001334381782970925, "loss": 1.4628, "step": 25622 }, { "epoch": 0.33295919875642216, "grad_norm": 0.3723592758178711, "learning_rate": 0.00013343557883518112, "loss": 1.2897, "step": 25623 }, { "epoch": 0.332972193300338, "grad_norm": 0.4307374656200409, "learning_rate": 0.00013343297937326972, "loss": 1.4864, "step": 25624 }, { "epoch": 0.3329851878442539, "grad_norm": 0.4564815163612366, "learning_rate": 0.00013343037991135837, "loss": 1.4334, "step": 25625 }, { "epoch": 0.33299818238816975, "grad_norm": 0.3599027693271637, "learning_rate": 0.00013342778044944697, "loss": 1.4046, "step": 25626 }, { "epoch": 0.33301117693208565, "grad_norm": 0.4610879421234131, "learning_rate": 0.0001334251809875356, "loss": 1.5288, "step": 25627 }, { "epoch": 0.3330241714760015, "grad_norm": 0.47012439370155334, "learning_rate": 0.00013342258152562422, "loss": 1.4034, "step": 25628 }, { "epoch": 0.3330371660199174, "grad_norm": 0.3730287253856659, "learning_rate": 0.0001334199820637128, "loss": 1.3497, "step": 25629 }, { "epoch": 0.33305016056383324, "grad_norm": 0.36285486817359924, "learning_rate": 0.00013341738260180144, "loss": 1.4784, "step": 25630 }, { "epoch": 0.33306315510774914, "grad_norm": 0.3468230962753296, "learning_rate": 0.00013341478313989003, "loss": 1.5671, "step": 25631 }, { "epoch": 0.333076149651665, "grad_norm": 0.3691968023777008, "learning_rate": 0.00013341218367797869, "loss": 1.4311, "step": 25632 }, { "epoch": 0.3330891441955809, "grad_norm": 0.38946670293807983, "learning_rate": 0.00013340958421606728, "loss": 1.3988, "step": 25633 }, { "epoch": 0.33310213873949673, "grad_norm": 0.5536205768585205, "learning_rate": 0.0001334069847541559, "loss": 1.4267, "step": 25634 }, { "epoch": 0.33311513328341263, "grad_norm": 0.40093138813972473, "learning_rate": 0.0001334043852922445, "loss": 1.3898, "step": 25635 }, { "epoch": 0.3331281278273285, "grad_norm": 0.27834880352020264, "learning_rate": 0.00013340178583033313, "loss": 1.3848, "step": 25636 }, { "epoch": 0.3331411223712444, "grad_norm": 0.369975209236145, "learning_rate": 0.00013339918636842175, "loss": 1.4382, "step": 25637 }, { "epoch": 0.3331541169151602, "grad_norm": 0.34010785818099976, "learning_rate": 0.00013339658690651035, "loss": 1.5149, "step": 25638 }, { "epoch": 0.3331671114590761, "grad_norm": 0.4089600741863251, "learning_rate": 0.00013339398744459898, "loss": 1.4833, "step": 25639 }, { "epoch": 0.33318010600299197, "grad_norm": 0.43873879313468933, "learning_rate": 0.0001333913879826876, "loss": 1.4863, "step": 25640 }, { "epoch": 0.33319310054690787, "grad_norm": 0.33970677852630615, "learning_rate": 0.0001333887885207762, "loss": 1.4104, "step": 25641 }, { "epoch": 0.3332060950908237, "grad_norm": 0.3152744174003601, "learning_rate": 0.00013338618905886482, "loss": 1.4255, "step": 25642 }, { "epoch": 0.3332190896347396, "grad_norm": 0.3619931638240814, "learning_rate": 0.00013338358959695342, "loss": 1.0646, "step": 25643 }, { "epoch": 0.33323208417865546, "grad_norm": 0.39924100041389465, "learning_rate": 0.00013338099013504207, "loss": 1.3965, "step": 25644 }, { "epoch": 0.33324507872257136, "grad_norm": 0.3600206971168518, "learning_rate": 0.00013337839067313067, "loss": 1.3047, "step": 25645 }, { "epoch": 0.3332580732664872, "grad_norm": 0.3806486129760742, "learning_rate": 0.0001333757912112193, "loss": 1.4779, "step": 25646 }, { "epoch": 0.3332710678104031, "grad_norm": 0.4065740406513214, "learning_rate": 0.0001333731917493079, "loss": 1.6361, "step": 25647 }, { "epoch": 0.33328406235431896, "grad_norm": 0.362339586019516, "learning_rate": 0.00013337059228739652, "loss": 1.445, "step": 25648 }, { "epoch": 0.33329705689823486, "grad_norm": 0.3874898850917816, "learning_rate": 0.00013336799282548514, "loss": 1.4442, "step": 25649 }, { "epoch": 0.3333100514421507, "grad_norm": 0.4575158655643463, "learning_rate": 0.00013336539336357374, "loss": 1.3938, "step": 25650 }, { "epoch": 0.3333230459860666, "grad_norm": 0.3464610278606415, "learning_rate": 0.00013336279390166236, "loss": 1.4896, "step": 25651 }, { "epoch": 0.33333604052998245, "grad_norm": 0.4411802589893341, "learning_rate": 0.00013336019443975099, "loss": 1.3937, "step": 25652 }, { "epoch": 0.33334903507389835, "grad_norm": 0.41082653403282166, "learning_rate": 0.00013335759497783958, "loss": 1.3852, "step": 25653 }, { "epoch": 0.3333620296178142, "grad_norm": 0.3968498408794403, "learning_rate": 0.0001333549955159282, "loss": 1.484, "step": 25654 }, { "epoch": 0.3333750241617301, "grad_norm": 0.25850117206573486, "learning_rate": 0.0001333523960540168, "loss": 1.2697, "step": 25655 }, { "epoch": 0.333388018705646, "grad_norm": 0.435116171836853, "learning_rate": 0.00013334979659210546, "loss": 1.384, "step": 25656 }, { "epoch": 0.33340101324956184, "grad_norm": 0.2973543405532837, "learning_rate": 0.00013334719713019405, "loss": 1.205, "step": 25657 }, { "epoch": 0.33341400779347774, "grad_norm": 0.3141220211982727, "learning_rate": 0.00013334459766828268, "loss": 1.3016, "step": 25658 }, { "epoch": 0.3334270023373936, "grad_norm": 0.4753108322620392, "learning_rate": 0.00013334199820637128, "loss": 1.351, "step": 25659 }, { "epoch": 0.3334399968813095, "grad_norm": 0.4370015859603882, "learning_rate": 0.0001333393987444599, "loss": 1.377, "step": 25660 }, { "epoch": 0.33345299142522533, "grad_norm": 0.48066309094429016, "learning_rate": 0.00013333679928254853, "loss": 1.5265, "step": 25661 }, { "epoch": 0.33346598596914123, "grad_norm": 0.41677922010421753, "learning_rate": 0.00013333419982063712, "loss": 1.4218, "step": 25662 }, { "epoch": 0.3334789805130571, "grad_norm": 0.29214680194854736, "learning_rate": 0.00013333160035872577, "loss": 1.2593, "step": 25663 }, { "epoch": 0.333491975056973, "grad_norm": 0.3423587679862976, "learning_rate": 0.00013332900089681437, "loss": 1.622, "step": 25664 }, { "epoch": 0.3335049696008888, "grad_norm": 0.3217088580131531, "learning_rate": 0.00013332640143490297, "loss": 1.5343, "step": 25665 }, { "epoch": 0.3335179641448047, "grad_norm": 0.42120659351348877, "learning_rate": 0.0001333238019729916, "loss": 1.5382, "step": 25666 }, { "epoch": 0.33353095868872057, "grad_norm": 0.3697757124900818, "learning_rate": 0.00013332120251108022, "loss": 1.4421, "step": 25667 }, { "epoch": 0.33354395323263647, "grad_norm": 0.4235227406024933, "learning_rate": 0.00013331860304916884, "loss": 1.409, "step": 25668 }, { "epoch": 0.3335569477765523, "grad_norm": 0.4443199634552002, "learning_rate": 0.00013331600358725744, "loss": 1.2413, "step": 25669 }, { "epoch": 0.3335699423204682, "grad_norm": 0.5446480512619019, "learning_rate": 0.00013331340412534606, "loss": 1.2817, "step": 25670 }, { "epoch": 0.33358293686438406, "grad_norm": 0.3863662779331207, "learning_rate": 0.0001333108046634347, "loss": 1.4404, "step": 25671 }, { "epoch": 0.33359593140829996, "grad_norm": 0.3658975064754486, "learning_rate": 0.00013330820520152329, "loss": 1.4263, "step": 25672 }, { "epoch": 0.3336089259522158, "grad_norm": 0.3328890800476074, "learning_rate": 0.0001333056057396119, "loss": 1.2018, "step": 25673 }, { "epoch": 0.3336219204961317, "grad_norm": 0.41930091381073, "learning_rate": 0.0001333030062777005, "loss": 1.4204, "step": 25674 }, { "epoch": 0.33363491504004755, "grad_norm": 0.42810705304145813, "learning_rate": 0.00013330040681578916, "loss": 1.3207, "step": 25675 }, { "epoch": 0.33364790958396345, "grad_norm": 0.41794756054878235, "learning_rate": 0.00013329780735387776, "loss": 1.3049, "step": 25676 }, { "epoch": 0.3336609041278793, "grad_norm": 0.3177722096443176, "learning_rate": 0.00013329520789196635, "loss": 1.3336, "step": 25677 }, { "epoch": 0.3336738986717952, "grad_norm": 0.4186551868915558, "learning_rate": 0.00013329260843005498, "loss": 1.3983, "step": 25678 }, { "epoch": 0.33368689321571104, "grad_norm": 0.3689252436161041, "learning_rate": 0.0001332900089681436, "loss": 1.437, "step": 25679 }, { "epoch": 0.33369988775962695, "grad_norm": 0.5300559997558594, "learning_rate": 0.00013328740950623223, "loss": 1.4918, "step": 25680 }, { "epoch": 0.3337128823035428, "grad_norm": 0.30538153648376465, "learning_rate": 0.00013328481004432083, "loss": 1.3849, "step": 25681 }, { "epoch": 0.3337258768474587, "grad_norm": 0.3892616033554077, "learning_rate": 0.00013328221058240945, "loss": 1.361, "step": 25682 }, { "epoch": 0.33373887139137454, "grad_norm": 0.41723451018333435, "learning_rate": 0.00013327961112049807, "loss": 1.3645, "step": 25683 }, { "epoch": 0.33375186593529044, "grad_norm": 0.4507998824119568, "learning_rate": 0.00013327701165858667, "loss": 1.4467, "step": 25684 }, { "epoch": 0.3337648604792063, "grad_norm": 0.4456848204135895, "learning_rate": 0.0001332744121966753, "loss": 1.5981, "step": 25685 }, { "epoch": 0.3337778550231222, "grad_norm": 0.42757314443588257, "learning_rate": 0.0001332718127347639, "loss": 1.2731, "step": 25686 }, { "epoch": 0.33379084956703803, "grad_norm": 0.4181702136993408, "learning_rate": 0.00013326921327285254, "loss": 1.519, "step": 25687 }, { "epoch": 0.33380384411095393, "grad_norm": 0.36280614137649536, "learning_rate": 0.00013326661381094114, "loss": 1.5972, "step": 25688 }, { "epoch": 0.3338168386548698, "grad_norm": 0.41598019003868103, "learning_rate": 0.00013326401434902977, "loss": 1.2564, "step": 25689 }, { "epoch": 0.3338298331987857, "grad_norm": 0.33791980147361755, "learning_rate": 0.00013326141488711836, "loss": 1.1705, "step": 25690 }, { "epoch": 0.3338428277427015, "grad_norm": 0.4271943271160126, "learning_rate": 0.000133258815425207, "loss": 1.2781, "step": 25691 }, { "epoch": 0.3338558222866174, "grad_norm": 0.4520389139652252, "learning_rate": 0.0001332562159632956, "loss": 1.4017, "step": 25692 }, { "epoch": 0.33386881683053327, "grad_norm": 0.3014369308948517, "learning_rate": 0.0001332536165013842, "loss": 1.3252, "step": 25693 }, { "epoch": 0.33388181137444917, "grad_norm": 0.3556816875934601, "learning_rate": 0.00013325101703947283, "loss": 1.3722, "step": 25694 }, { "epoch": 0.333894805918365, "grad_norm": 0.3957395553588867, "learning_rate": 0.00013324841757756146, "loss": 1.313, "step": 25695 }, { "epoch": 0.3339078004622809, "grad_norm": 0.4657697379589081, "learning_rate": 0.00013324581811565006, "loss": 1.5053, "step": 25696 }, { "epoch": 0.33392079500619676, "grad_norm": 0.38660985231399536, "learning_rate": 0.00013324321865373868, "loss": 1.4131, "step": 25697 }, { "epoch": 0.33393378955011266, "grad_norm": 0.4783041179180145, "learning_rate": 0.00013324061919182728, "loss": 1.487, "step": 25698 }, { "epoch": 0.3339467840940285, "grad_norm": 0.3154374361038208, "learning_rate": 0.00013323801972991593, "loss": 1.362, "step": 25699 }, { "epoch": 0.3339597786379444, "grad_norm": 0.3468500077724457, "learning_rate": 0.00013323542026800453, "loss": 1.1607, "step": 25700 }, { "epoch": 0.33397277318186025, "grad_norm": 0.3841954469680786, "learning_rate": 0.00013323282080609315, "loss": 1.3338, "step": 25701 }, { "epoch": 0.33398576772577615, "grad_norm": 0.34123364090919495, "learning_rate": 0.00013323022134418178, "loss": 1.4662, "step": 25702 }, { "epoch": 0.333998762269692, "grad_norm": 0.3330534100532532, "learning_rate": 0.00013322762188227037, "loss": 1.3348, "step": 25703 }, { "epoch": 0.3340117568136079, "grad_norm": 0.4104132652282715, "learning_rate": 0.000133225022420359, "loss": 1.5437, "step": 25704 }, { "epoch": 0.33402475135752374, "grad_norm": 0.41392773389816284, "learning_rate": 0.0001332224229584476, "loss": 1.4591, "step": 25705 }, { "epoch": 0.33403774590143964, "grad_norm": 0.3826049864292145, "learning_rate": 0.00013321982349653622, "loss": 1.3916, "step": 25706 }, { "epoch": 0.3340507404453555, "grad_norm": 0.5106659531593323, "learning_rate": 0.00013321722403462484, "loss": 1.3409, "step": 25707 }, { "epoch": 0.3340637349892714, "grad_norm": 0.3896544873714447, "learning_rate": 0.00013321462457271344, "loss": 1.4043, "step": 25708 }, { "epoch": 0.33407672953318723, "grad_norm": 0.3524024486541748, "learning_rate": 0.00013321202511080207, "loss": 1.3384, "step": 25709 }, { "epoch": 0.33408972407710313, "grad_norm": 0.381040096282959, "learning_rate": 0.0001332094256488907, "loss": 1.1831, "step": 25710 }, { "epoch": 0.334102718621019, "grad_norm": 0.35315778851509094, "learning_rate": 0.00013320682618697932, "loss": 1.5032, "step": 25711 }, { "epoch": 0.3341157131649349, "grad_norm": 0.37174782156944275, "learning_rate": 0.0001332042267250679, "loss": 1.5872, "step": 25712 }, { "epoch": 0.3341287077088507, "grad_norm": 0.387458473443985, "learning_rate": 0.00013320162726315654, "loss": 1.2877, "step": 25713 }, { "epoch": 0.3341417022527666, "grad_norm": 0.42157042026519775, "learning_rate": 0.00013319902780124516, "loss": 1.4704, "step": 25714 }, { "epoch": 0.33415469679668247, "grad_norm": 0.47148948907852173, "learning_rate": 0.00013319642833933376, "loss": 1.3166, "step": 25715 }, { "epoch": 0.33416769134059837, "grad_norm": 0.43402060866355896, "learning_rate": 0.00013319382887742238, "loss": 1.2751, "step": 25716 }, { "epoch": 0.3341806858845142, "grad_norm": 0.33133432269096375, "learning_rate": 0.00013319122941551098, "loss": 1.2877, "step": 25717 }, { "epoch": 0.3341936804284301, "grad_norm": 0.4010087847709656, "learning_rate": 0.00013318862995359963, "loss": 1.4587, "step": 25718 }, { "epoch": 0.33420667497234596, "grad_norm": 0.4143335521221161, "learning_rate": 0.00013318603049168823, "loss": 1.4603, "step": 25719 }, { "epoch": 0.33421966951626186, "grad_norm": 0.46601802110671997, "learning_rate": 0.00013318343102977683, "loss": 1.5658, "step": 25720 }, { "epoch": 0.3342326640601777, "grad_norm": 0.45379146933555603, "learning_rate": 0.00013318083156786545, "loss": 1.3591, "step": 25721 }, { "epoch": 0.3342456586040936, "grad_norm": 0.39463135600090027, "learning_rate": 0.00013317823210595408, "loss": 1.3518, "step": 25722 }, { "epoch": 0.33425865314800945, "grad_norm": 0.41754457354545593, "learning_rate": 0.0001331756326440427, "loss": 1.4463, "step": 25723 }, { "epoch": 0.33427164769192536, "grad_norm": 0.3736172914505005, "learning_rate": 0.0001331730331821313, "loss": 1.4334, "step": 25724 }, { "epoch": 0.3342846422358412, "grad_norm": 0.47630414366722107, "learning_rate": 0.00013317043372021992, "loss": 1.6443, "step": 25725 }, { "epoch": 0.3342976367797571, "grad_norm": 0.3538120985031128, "learning_rate": 0.00013316783425830855, "loss": 1.4333, "step": 25726 }, { "epoch": 0.33431063132367295, "grad_norm": 0.39376553893089294, "learning_rate": 0.00013316523479639714, "loss": 1.4702, "step": 25727 }, { "epoch": 0.33432362586758885, "grad_norm": 0.39656057953834534, "learning_rate": 0.00013316263533448577, "loss": 1.5044, "step": 25728 }, { "epoch": 0.3343366204115047, "grad_norm": 0.3424912095069885, "learning_rate": 0.00013316003587257437, "loss": 1.4403, "step": 25729 }, { "epoch": 0.3343496149554206, "grad_norm": 0.40744975209236145, "learning_rate": 0.00013315743641066302, "loss": 1.4297, "step": 25730 }, { "epoch": 0.33436260949933644, "grad_norm": 0.38761138916015625, "learning_rate": 0.00013315483694875162, "loss": 1.265, "step": 25731 }, { "epoch": 0.33437560404325234, "grad_norm": 0.39098161458969116, "learning_rate": 0.0001331522374868402, "loss": 1.3046, "step": 25732 }, { "epoch": 0.33438859858716824, "grad_norm": 0.30262884497642517, "learning_rate": 0.00013314963802492884, "loss": 1.358, "step": 25733 }, { "epoch": 0.3344015931310841, "grad_norm": 0.3829196095466614, "learning_rate": 0.00013314703856301746, "loss": 1.3496, "step": 25734 }, { "epoch": 0.334414587675, "grad_norm": 0.3335346281528473, "learning_rate": 0.0001331444391011061, "loss": 1.4643, "step": 25735 }, { "epoch": 0.33442758221891583, "grad_norm": 0.3577483892440796, "learning_rate": 0.00013314183963919468, "loss": 1.2778, "step": 25736 }, { "epoch": 0.33444057676283173, "grad_norm": 0.3655077815055847, "learning_rate": 0.0001331392401772833, "loss": 1.3219, "step": 25737 }, { "epoch": 0.3344535713067476, "grad_norm": 0.43895676732063293, "learning_rate": 0.00013313664071537193, "loss": 1.3642, "step": 25738 }, { "epoch": 0.3344665658506635, "grad_norm": 0.30158793926239014, "learning_rate": 0.00013313404125346053, "loss": 1.2761, "step": 25739 }, { "epoch": 0.3344795603945793, "grad_norm": 0.3677535951137543, "learning_rate": 0.00013313144179154915, "loss": 1.4953, "step": 25740 }, { "epoch": 0.3344925549384952, "grad_norm": 0.3854217231273651, "learning_rate": 0.00013312884232963778, "loss": 1.5315, "step": 25741 }, { "epoch": 0.33450554948241107, "grad_norm": 0.5100762844085693, "learning_rate": 0.0001331262428677264, "loss": 1.4422, "step": 25742 }, { "epoch": 0.33451854402632697, "grad_norm": 0.3941008150577545, "learning_rate": 0.000133123643405815, "loss": 1.3095, "step": 25743 }, { "epoch": 0.3345315385702428, "grad_norm": 0.5803819298744202, "learning_rate": 0.0001331210439439036, "loss": 1.355, "step": 25744 }, { "epoch": 0.3345445331141587, "grad_norm": 0.4111936390399933, "learning_rate": 0.00013311844448199225, "loss": 1.3819, "step": 25745 }, { "epoch": 0.33455752765807456, "grad_norm": 0.28085440397262573, "learning_rate": 0.00013311584502008085, "loss": 1.3052, "step": 25746 }, { "epoch": 0.33457052220199046, "grad_norm": 0.4010433852672577, "learning_rate": 0.00013311324555816947, "loss": 1.4377, "step": 25747 }, { "epoch": 0.3345835167459063, "grad_norm": 0.41501107811927795, "learning_rate": 0.00013311064609625807, "loss": 1.4122, "step": 25748 }, { "epoch": 0.3345965112898222, "grad_norm": 0.48217329382896423, "learning_rate": 0.0001331080466343467, "loss": 1.3691, "step": 25749 }, { "epoch": 0.33460950583373805, "grad_norm": 0.3848549723625183, "learning_rate": 0.00013310544717243532, "loss": 1.4608, "step": 25750 }, { "epoch": 0.33462250037765395, "grad_norm": 0.6189988255500793, "learning_rate": 0.00013310284771052392, "loss": 1.5218, "step": 25751 }, { "epoch": 0.3346354949215698, "grad_norm": 0.44294923543930054, "learning_rate": 0.00013310024824861254, "loss": 1.3063, "step": 25752 }, { "epoch": 0.3346484894654857, "grad_norm": 0.3766375780105591, "learning_rate": 0.00013309764878670116, "loss": 1.3734, "step": 25753 }, { "epoch": 0.33466148400940154, "grad_norm": 0.4893748164176941, "learning_rate": 0.0001330950493247898, "loss": 1.4105, "step": 25754 }, { "epoch": 0.33467447855331744, "grad_norm": 0.3725035488605499, "learning_rate": 0.00013309244986287839, "loss": 1.2359, "step": 25755 }, { "epoch": 0.3346874730972333, "grad_norm": 0.3913702666759491, "learning_rate": 0.000133089850400967, "loss": 1.5945, "step": 25756 }, { "epoch": 0.3347004676411492, "grad_norm": 0.46373018622398376, "learning_rate": 0.00013308725093905564, "loss": 1.487, "step": 25757 }, { "epoch": 0.33471346218506504, "grad_norm": 0.43601059913635254, "learning_rate": 0.00013308465147714423, "loss": 1.6358, "step": 25758 }, { "epoch": 0.33472645672898094, "grad_norm": 0.3991638720035553, "learning_rate": 0.00013308205201523286, "loss": 1.5986, "step": 25759 }, { "epoch": 0.3347394512728968, "grad_norm": 0.4584387540817261, "learning_rate": 0.00013307945255332145, "loss": 1.4354, "step": 25760 }, { "epoch": 0.3347524458168127, "grad_norm": 0.3756411671638489, "learning_rate": 0.00013307685309141008, "loss": 1.371, "step": 25761 }, { "epoch": 0.33476544036072853, "grad_norm": 0.38803601264953613, "learning_rate": 0.0001330742536294987, "loss": 1.2757, "step": 25762 }, { "epoch": 0.33477843490464443, "grad_norm": 0.5287535190582275, "learning_rate": 0.0001330716541675873, "loss": 1.6751, "step": 25763 }, { "epoch": 0.3347914294485603, "grad_norm": 0.42449700832366943, "learning_rate": 0.00013306905470567593, "loss": 1.3604, "step": 25764 }, { "epoch": 0.3348044239924762, "grad_norm": 0.37302276492118835, "learning_rate": 0.00013306645524376455, "loss": 1.2632, "step": 25765 }, { "epoch": 0.334817418536392, "grad_norm": 0.3807085156440735, "learning_rate": 0.00013306385578185317, "loss": 1.3199, "step": 25766 }, { "epoch": 0.3348304130803079, "grad_norm": 0.3239312171936035, "learning_rate": 0.00013306125631994177, "loss": 1.2471, "step": 25767 }, { "epoch": 0.33484340762422377, "grad_norm": 0.39483028650283813, "learning_rate": 0.0001330586568580304, "loss": 1.5344, "step": 25768 }, { "epoch": 0.33485640216813967, "grad_norm": 0.4829374849796295, "learning_rate": 0.00013305605739611902, "loss": 1.4222, "step": 25769 }, { "epoch": 0.3348693967120555, "grad_norm": 0.3475134074687958, "learning_rate": 0.00013305345793420762, "loss": 1.3184, "step": 25770 }, { "epoch": 0.3348823912559714, "grad_norm": 0.39459505677223206, "learning_rate": 0.00013305085847229624, "loss": 1.3622, "step": 25771 }, { "epoch": 0.33489538579988726, "grad_norm": 0.408558189868927, "learning_rate": 0.00013304825901038484, "loss": 1.4059, "step": 25772 }, { "epoch": 0.33490838034380316, "grad_norm": 0.49281829595565796, "learning_rate": 0.00013304565954847346, "loss": 1.5471, "step": 25773 }, { "epoch": 0.334921374887719, "grad_norm": 0.3547395169734955, "learning_rate": 0.0001330430600865621, "loss": 1.3622, "step": 25774 }, { "epoch": 0.3349343694316349, "grad_norm": 0.38016456365585327, "learning_rate": 0.00013304046062465069, "loss": 1.384, "step": 25775 }, { "epoch": 0.33494736397555075, "grad_norm": 0.3344155251979828, "learning_rate": 0.00013303786116273934, "loss": 1.208, "step": 25776 }, { "epoch": 0.33496035851946665, "grad_norm": 0.43175214529037476, "learning_rate": 0.00013303526170082794, "loss": 1.486, "step": 25777 }, { "epoch": 0.3349733530633825, "grad_norm": 0.35000523924827576, "learning_rate": 0.00013303266223891656, "loss": 1.3439, "step": 25778 }, { "epoch": 0.3349863476072984, "grad_norm": 0.4333122670650482, "learning_rate": 0.00013303006277700516, "loss": 1.4226, "step": 25779 }, { "epoch": 0.33499934215121424, "grad_norm": 0.46730920672416687, "learning_rate": 0.00013302746331509378, "loss": 1.3632, "step": 25780 }, { "epoch": 0.33501233669513014, "grad_norm": 0.5109378695487976, "learning_rate": 0.0001330248638531824, "loss": 1.4994, "step": 25781 }, { "epoch": 0.335025331239046, "grad_norm": 0.38987481594085693, "learning_rate": 0.000133022264391271, "loss": 1.4897, "step": 25782 }, { "epoch": 0.3350383257829619, "grad_norm": 0.3619634211063385, "learning_rate": 0.00013301966492935963, "loss": 1.4555, "step": 25783 }, { "epoch": 0.33505132032687773, "grad_norm": 0.42930787801742554, "learning_rate": 0.00013301706546744825, "loss": 1.4001, "step": 25784 }, { "epoch": 0.33506431487079363, "grad_norm": 0.2995814383029938, "learning_rate": 0.00013301446600553688, "loss": 1.4406, "step": 25785 }, { "epoch": 0.3350773094147095, "grad_norm": 0.42854374647140503, "learning_rate": 0.00013301186654362547, "loss": 1.3489, "step": 25786 }, { "epoch": 0.3350903039586254, "grad_norm": 0.36997437477111816, "learning_rate": 0.00013300926708171407, "loss": 1.374, "step": 25787 }, { "epoch": 0.3351032985025412, "grad_norm": 0.3460169732570648, "learning_rate": 0.00013300666761980272, "loss": 1.1957, "step": 25788 }, { "epoch": 0.3351162930464571, "grad_norm": 0.40232178568840027, "learning_rate": 0.00013300406815789132, "loss": 1.3714, "step": 25789 }, { "epoch": 0.33512928759037297, "grad_norm": 0.3390718102455139, "learning_rate": 0.00013300146869597995, "loss": 1.5198, "step": 25790 }, { "epoch": 0.33514228213428887, "grad_norm": 0.4513596296310425, "learning_rate": 0.00013299886923406854, "loss": 1.3926, "step": 25791 }, { "epoch": 0.3351552766782047, "grad_norm": 0.3837360739707947, "learning_rate": 0.00013299626977215717, "loss": 1.5218, "step": 25792 }, { "epoch": 0.3351682712221206, "grad_norm": 0.35585376620292664, "learning_rate": 0.0001329936703102458, "loss": 1.4913, "step": 25793 }, { "epoch": 0.33518126576603646, "grad_norm": 0.40672287344932556, "learning_rate": 0.0001329910708483344, "loss": 1.425, "step": 25794 }, { "epoch": 0.33519426030995236, "grad_norm": 0.4959190785884857, "learning_rate": 0.000132988471386423, "loss": 1.545, "step": 25795 }, { "epoch": 0.3352072548538682, "grad_norm": 0.39881736040115356, "learning_rate": 0.00013298587192451164, "loss": 1.4014, "step": 25796 }, { "epoch": 0.3352202493977841, "grad_norm": 0.25943171977996826, "learning_rate": 0.00013298327246260026, "loss": 1.4929, "step": 25797 }, { "epoch": 0.33523324394169995, "grad_norm": 0.4061547815799713, "learning_rate": 0.00013298067300068886, "loss": 1.5397, "step": 25798 }, { "epoch": 0.33524623848561586, "grad_norm": 0.38471323251724243, "learning_rate": 0.00013297807353877746, "loss": 1.3281, "step": 25799 }, { "epoch": 0.3352592330295317, "grad_norm": 0.38609546422958374, "learning_rate": 0.0001329754740768661, "loss": 1.5517, "step": 25800 }, { "epoch": 0.3352722275734476, "grad_norm": 0.35709348320961, "learning_rate": 0.0001329728746149547, "loss": 1.3181, "step": 25801 }, { "epoch": 0.33528522211736345, "grad_norm": 0.4715689420700073, "learning_rate": 0.00013297027515304333, "loss": 1.4992, "step": 25802 }, { "epoch": 0.33529821666127935, "grad_norm": 0.3763044774532318, "learning_rate": 0.00013296767569113193, "loss": 1.2592, "step": 25803 }, { "epoch": 0.3353112112051952, "grad_norm": 0.33089184761047363, "learning_rate": 0.00013296507622922055, "loss": 1.268, "step": 25804 }, { "epoch": 0.3353242057491111, "grad_norm": 0.45010942220687866, "learning_rate": 0.00013296247676730918, "loss": 1.3536, "step": 25805 }, { "epoch": 0.33533720029302694, "grad_norm": 0.42581161856651306, "learning_rate": 0.00013295987730539777, "loss": 1.3623, "step": 25806 }, { "epoch": 0.33535019483694284, "grad_norm": 0.3786059617996216, "learning_rate": 0.0001329572778434864, "loss": 1.1144, "step": 25807 }, { "epoch": 0.33536318938085874, "grad_norm": 0.39766258001327515, "learning_rate": 0.00013295467838157502, "loss": 1.5018, "step": 25808 }, { "epoch": 0.3353761839247746, "grad_norm": 0.4654091000556946, "learning_rate": 0.00013295207891966365, "loss": 1.3901, "step": 25809 }, { "epoch": 0.3353891784686905, "grad_norm": 0.32602956891059875, "learning_rate": 0.00013294947945775225, "loss": 1.3941, "step": 25810 }, { "epoch": 0.33540217301260633, "grad_norm": 0.33514222502708435, "learning_rate": 0.00013294687999584087, "loss": 1.4649, "step": 25811 }, { "epoch": 0.33541516755652223, "grad_norm": 0.29577383399009705, "learning_rate": 0.0001329442805339295, "loss": 1.3222, "step": 25812 }, { "epoch": 0.3354281621004381, "grad_norm": 0.3306135833263397, "learning_rate": 0.0001329416810720181, "loss": 1.394, "step": 25813 }, { "epoch": 0.335441156644354, "grad_norm": 0.36510902643203735, "learning_rate": 0.00013293908161010672, "loss": 1.2757, "step": 25814 }, { "epoch": 0.3354541511882698, "grad_norm": 0.39862626791000366, "learning_rate": 0.00013293648214819534, "loss": 1.4258, "step": 25815 }, { "epoch": 0.3354671457321857, "grad_norm": 0.4043697118759155, "learning_rate": 0.00013293388268628394, "loss": 1.4016, "step": 25816 }, { "epoch": 0.33548014027610157, "grad_norm": 0.34013503789901733, "learning_rate": 0.00013293128322437256, "loss": 1.3396, "step": 25817 }, { "epoch": 0.33549313482001747, "grad_norm": 0.4256230294704437, "learning_rate": 0.00013292868376246116, "loss": 1.4932, "step": 25818 }, { "epoch": 0.3355061293639333, "grad_norm": 0.38826268911361694, "learning_rate": 0.0001329260843005498, "loss": 1.4574, "step": 25819 }, { "epoch": 0.3355191239078492, "grad_norm": 0.33866193890571594, "learning_rate": 0.0001329234848386384, "loss": 1.5768, "step": 25820 }, { "epoch": 0.33553211845176506, "grad_norm": 0.3599224090576172, "learning_rate": 0.00013292088537672703, "loss": 1.3958, "step": 25821 }, { "epoch": 0.33554511299568096, "grad_norm": 0.3819292485713959, "learning_rate": 0.00013291828591481563, "loss": 1.1807, "step": 25822 }, { "epoch": 0.3355581075395968, "grad_norm": 0.43045055866241455, "learning_rate": 0.00013291568645290426, "loss": 1.4105, "step": 25823 }, { "epoch": 0.3355711020835127, "grad_norm": 0.2808796167373657, "learning_rate": 0.00013291308699099288, "loss": 1.2963, "step": 25824 }, { "epoch": 0.33558409662742855, "grad_norm": 0.39081984758377075, "learning_rate": 0.00013291048752908148, "loss": 1.3504, "step": 25825 }, { "epoch": 0.33559709117134445, "grad_norm": 0.36974290013313293, "learning_rate": 0.0001329078880671701, "loss": 1.2827, "step": 25826 }, { "epoch": 0.3356100857152603, "grad_norm": 0.3717120885848999, "learning_rate": 0.00013290528860525873, "loss": 1.5719, "step": 25827 }, { "epoch": 0.3356230802591762, "grad_norm": 0.36996975541114807, "learning_rate": 0.00013290268914334732, "loss": 1.4233, "step": 25828 }, { "epoch": 0.33563607480309204, "grad_norm": 0.40690329670906067, "learning_rate": 0.00013290008968143595, "loss": 1.5165, "step": 25829 }, { "epoch": 0.33564906934700794, "grad_norm": 0.37998270988464355, "learning_rate": 0.00013289749021952455, "loss": 1.4288, "step": 25830 }, { "epoch": 0.3356620638909238, "grad_norm": 0.3923504650592804, "learning_rate": 0.0001328948907576132, "loss": 1.3807, "step": 25831 }, { "epoch": 0.3356750584348397, "grad_norm": 0.394951730966568, "learning_rate": 0.0001328922912957018, "loss": 1.4369, "step": 25832 }, { "epoch": 0.33568805297875554, "grad_norm": 0.4979211390018463, "learning_rate": 0.00013288969183379042, "loss": 1.3597, "step": 25833 }, { "epoch": 0.33570104752267144, "grad_norm": 0.5070407390594482, "learning_rate": 0.00013288709237187902, "loss": 1.4285, "step": 25834 }, { "epoch": 0.3357140420665873, "grad_norm": 0.316627562046051, "learning_rate": 0.00013288449290996764, "loss": 1.3436, "step": 25835 }, { "epoch": 0.3357270366105032, "grad_norm": 0.472935289144516, "learning_rate": 0.00013288189344805626, "loss": 1.3824, "step": 25836 }, { "epoch": 0.335740031154419, "grad_norm": 0.3751969635486603, "learning_rate": 0.00013287929398614486, "loss": 1.3853, "step": 25837 }, { "epoch": 0.33575302569833493, "grad_norm": 0.37136751413345337, "learning_rate": 0.0001328766945242335, "loss": 1.3163, "step": 25838 }, { "epoch": 0.3357660202422508, "grad_norm": 0.48989471793174744, "learning_rate": 0.0001328740950623221, "loss": 1.3307, "step": 25839 }, { "epoch": 0.3357790147861667, "grad_norm": 0.2639951705932617, "learning_rate": 0.00013287149560041074, "loss": 1.2027, "step": 25840 }, { "epoch": 0.3357920093300825, "grad_norm": 0.39515629410743713, "learning_rate": 0.00013286889613849933, "loss": 1.433, "step": 25841 }, { "epoch": 0.3358050038739984, "grad_norm": 0.3363722860813141, "learning_rate": 0.00013286629667658793, "loss": 1.2829, "step": 25842 }, { "epoch": 0.33581799841791427, "grad_norm": 0.38438382744789124, "learning_rate": 0.00013286369721467658, "loss": 1.4802, "step": 25843 }, { "epoch": 0.33583099296183017, "grad_norm": 0.4884340167045593, "learning_rate": 0.00013286109775276518, "loss": 1.4087, "step": 25844 }, { "epoch": 0.335843987505746, "grad_norm": 0.43961596488952637, "learning_rate": 0.0001328584982908538, "loss": 1.5294, "step": 25845 }, { "epoch": 0.3358569820496619, "grad_norm": 0.33718201518058777, "learning_rate": 0.0001328558988289424, "loss": 1.2932, "step": 25846 }, { "epoch": 0.33586997659357776, "grad_norm": 0.270742267370224, "learning_rate": 0.00013285329936703103, "loss": 1.2658, "step": 25847 }, { "epoch": 0.33588297113749366, "grad_norm": 0.4900417923927307, "learning_rate": 0.00013285069990511965, "loss": 1.4858, "step": 25848 }, { "epoch": 0.3358959656814095, "grad_norm": 0.534781813621521, "learning_rate": 0.00013284810044320825, "loss": 1.389, "step": 25849 }, { "epoch": 0.3359089602253254, "grad_norm": 0.4320332705974579, "learning_rate": 0.0001328455009812969, "loss": 1.5189, "step": 25850 }, { "epoch": 0.33592195476924125, "grad_norm": 0.42957374453544617, "learning_rate": 0.0001328429015193855, "loss": 1.5125, "step": 25851 }, { "epoch": 0.33593494931315715, "grad_norm": 0.47541356086730957, "learning_rate": 0.00013284030205747412, "loss": 1.4657, "step": 25852 }, { "epoch": 0.335947943857073, "grad_norm": 0.3425551950931549, "learning_rate": 0.00013283770259556272, "loss": 1.4741, "step": 25853 }, { "epoch": 0.3359609384009889, "grad_norm": 0.41446205973625183, "learning_rate": 0.00013283510313365134, "loss": 1.3261, "step": 25854 }, { "epoch": 0.33597393294490474, "grad_norm": 0.3287166357040405, "learning_rate": 0.00013283250367173997, "loss": 1.3456, "step": 25855 }, { "epoch": 0.33598692748882064, "grad_norm": 0.39911869168281555, "learning_rate": 0.00013282990420982856, "loss": 1.3534, "step": 25856 }, { "epoch": 0.3359999220327365, "grad_norm": 0.3255561888217926, "learning_rate": 0.0001328273047479172, "loss": 1.3262, "step": 25857 }, { "epoch": 0.3360129165766524, "grad_norm": 0.3266236484050751, "learning_rate": 0.00013282470528600581, "loss": 1.303, "step": 25858 }, { "epoch": 0.33602591112056823, "grad_norm": 0.48608916997909546, "learning_rate": 0.0001328221058240944, "loss": 1.3884, "step": 25859 }, { "epoch": 0.33603890566448413, "grad_norm": 0.4095337986946106, "learning_rate": 0.00013281950636218304, "loss": 1.6156, "step": 25860 }, { "epoch": 0.3360519002084, "grad_norm": 0.46016305685043335, "learning_rate": 0.00013281690690027163, "loss": 1.6192, "step": 25861 }, { "epoch": 0.3360648947523159, "grad_norm": 0.4501890540122986, "learning_rate": 0.00013281430743836028, "loss": 1.4899, "step": 25862 }, { "epoch": 0.3360778892962317, "grad_norm": 0.37623700499534607, "learning_rate": 0.00013281170797644888, "loss": 1.4898, "step": 25863 }, { "epoch": 0.3360908838401476, "grad_norm": 0.41207200288772583, "learning_rate": 0.0001328091085145375, "loss": 1.4318, "step": 25864 }, { "epoch": 0.33610387838406347, "grad_norm": 0.3514039218425751, "learning_rate": 0.0001328065090526261, "loss": 1.2774, "step": 25865 }, { "epoch": 0.33611687292797937, "grad_norm": 0.4079730212688446, "learning_rate": 0.00013280390959071473, "loss": 1.5517, "step": 25866 }, { "epoch": 0.3361298674718952, "grad_norm": 0.3291737735271454, "learning_rate": 0.00013280131012880335, "loss": 1.1904, "step": 25867 }, { "epoch": 0.3361428620158111, "grad_norm": 0.4333759546279907, "learning_rate": 0.00013279871066689195, "loss": 1.482, "step": 25868 }, { "epoch": 0.33615585655972696, "grad_norm": 0.4017113149166107, "learning_rate": 0.00013279611120498057, "loss": 1.4207, "step": 25869 }, { "epoch": 0.33616885110364286, "grad_norm": 0.33533087372779846, "learning_rate": 0.0001327935117430692, "loss": 1.3787, "step": 25870 }, { "epoch": 0.3361818456475587, "grad_norm": 0.39227885007858276, "learning_rate": 0.0001327909122811578, "loss": 1.5333, "step": 25871 }, { "epoch": 0.3361948401914746, "grad_norm": 0.36286699771881104, "learning_rate": 0.00013278831281924642, "loss": 1.4355, "step": 25872 }, { "epoch": 0.33620783473539045, "grad_norm": 0.34125709533691406, "learning_rate": 0.00013278571335733502, "loss": 1.3908, "step": 25873 }, { "epoch": 0.33622082927930635, "grad_norm": 0.24990318715572357, "learning_rate": 0.00013278311389542367, "loss": 1.4261, "step": 25874 }, { "epoch": 0.3362338238232222, "grad_norm": 0.4212367832660675, "learning_rate": 0.00013278051443351227, "loss": 1.4594, "step": 25875 }, { "epoch": 0.3362468183671381, "grad_norm": 0.5457077026367188, "learning_rate": 0.0001327779149716009, "loss": 1.3095, "step": 25876 }, { "epoch": 0.33625981291105395, "grad_norm": 0.36358198523521423, "learning_rate": 0.0001327753155096895, "loss": 1.3502, "step": 25877 }, { "epoch": 0.33627280745496985, "grad_norm": 0.39090877771377563, "learning_rate": 0.00013277271604777811, "loss": 1.3575, "step": 25878 }, { "epoch": 0.3362858019988857, "grad_norm": 0.4438571035861969, "learning_rate": 0.00013277011658586674, "loss": 1.4258, "step": 25879 }, { "epoch": 0.3362987965428016, "grad_norm": 0.4956974387168884, "learning_rate": 0.00013276751712395534, "loss": 1.4071, "step": 25880 }, { "epoch": 0.33631179108671744, "grad_norm": 0.33568087220191956, "learning_rate": 0.00013276491766204396, "loss": 1.3551, "step": 25881 }, { "epoch": 0.33632478563063334, "grad_norm": 0.41659367084503174, "learning_rate": 0.00013276231820013258, "loss": 1.4955, "step": 25882 }, { "epoch": 0.3363377801745492, "grad_norm": 0.47589755058288574, "learning_rate": 0.00013275971873822118, "loss": 1.4073, "step": 25883 }, { "epoch": 0.3363507747184651, "grad_norm": 0.3754766881465912, "learning_rate": 0.0001327571192763098, "loss": 1.3217, "step": 25884 }, { "epoch": 0.336363769262381, "grad_norm": 0.37256643176078796, "learning_rate": 0.0001327545198143984, "loss": 1.2539, "step": 25885 }, { "epoch": 0.33637676380629683, "grad_norm": 0.4344462752342224, "learning_rate": 0.00013275192035248706, "loss": 1.4253, "step": 25886 }, { "epoch": 0.33638975835021273, "grad_norm": 0.4055934250354767, "learning_rate": 0.00013274932089057565, "loss": 1.4551, "step": 25887 }, { "epoch": 0.3364027528941286, "grad_norm": 0.46688514947891235, "learning_rate": 0.00013274672142866428, "loss": 1.4554, "step": 25888 }, { "epoch": 0.3364157474380445, "grad_norm": 0.3879428803920746, "learning_rate": 0.0001327441219667529, "loss": 1.5035, "step": 25889 }, { "epoch": 0.3364287419819603, "grad_norm": 0.3789123594760895, "learning_rate": 0.0001327415225048415, "loss": 1.5252, "step": 25890 }, { "epoch": 0.3364417365258762, "grad_norm": 0.35874906182289124, "learning_rate": 0.00013273892304293012, "loss": 1.2911, "step": 25891 }, { "epoch": 0.33645473106979207, "grad_norm": 0.32427549362182617, "learning_rate": 0.00013273632358101872, "loss": 1.3824, "step": 25892 }, { "epoch": 0.33646772561370797, "grad_norm": 0.36753812432289124, "learning_rate": 0.00013273372411910737, "loss": 1.4153, "step": 25893 }, { "epoch": 0.3364807201576238, "grad_norm": 0.296047180891037, "learning_rate": 0.00013273112465719597, "loss": 1.3456, "step": 25894 }, { "epoch": 0.3364937147015397, "grad_norm": 0.3588009178638458, "learning_rate": 0.0001327285251952846, "loss": 1.3898, "step": 25895 }, { "epoch": 0.33650670924545556, "grad_norm": 0.4116191267967224, "learning_rate": 0.0001327259257333732, "loss": 1.3677, "step": 25896 }, { "epoch": 0.33651970378937146, "grad_norm": 0.3759332001209259, "learning_rate": 0.00013272332627146182, "loss": 1.3373, "step": 25897 }, { "epoch": 0.3365326983332873, "grad_norm": 0.4630047380924225, "learning_rate": 0.00013272072680955044, "loss": 1.4346, "step": 25898 }, { "epoch": 0.3365456928772032, "grad_norm": 0.2555157244205475, "learning_rate": 0.00013271812734763904, "loss": 1.3256, "step": 25899 }, { "epoch": 0.33655868742111905, "grad_norm": 0.40940627455711365, "learning_rate": 0.00013271552788572766, "loss": 1.4412, "step": 25900 }, { "epoch": 0.33657168196503495, "grad_norm": 0.3870319724082947, "learning_rate": 0.0001327129284238163, "loss": 1.3399, "step": 25901 }, { "epoch": 0.3365846765089508, "grad_norm": 0.4527110457420349, "learning_rate": 0.00013271032896190488, "loss": 1.4017, "step": 25902 }, { "epoch": 0.3365976710528667, "grad_norm": 0.46222782135009766, "learning_rate": 0.0001327077294999935, "loss": 1.4352, "step": 25903 }, { "epoch": 0.33661066559678254, "grad_norm": 0.4205856919288635, "learning_rate": 0.0001327051300380821, "loss": 1.1065, "step": 25904 }, { "epoch": 0.33662366014069844, "grad_norm": 0.39422059059143066, "learning_rate": 0.00013270253057617076, "loss": 1.4335, "step": 25905 }, { "epoch": 0.3366366546846143, "grad_norm": 0.41192176938056946, "learning_rate": 0.00013269993111425936, "loss": 1.4046, "step": 25906 }, { "epoch": 0.3366496492285302, "grad_norm": 0.33498743176460266, "learning_rate": 0.00013269733165234798, "loss": 1.5912, "step": 25907 }, { "epoch": 0.33666264377244604, "grad_norm": 0.3808746635913849, "learning_rate": 0.00013269473219043658, "loss": 1.4101, "step": 25908 }, { "epoch": 0.33667563831636194, "grad_norm": 0.4875222444534302, "learning_rate": 0.0001326921327285252, "loss": 1.4948, "step": 25909 }, { "epoch": 0.3366886328602778, "grad_norm": 0.38794437050819397, "learning_rate": 0.00013268953326661383, "loss": 1.5841, "step": 25910 }, { "epoch": 0.3367016274041937, "grad_norm": 0.4314601421356201, "learning_rate": 0.00013268693380470242, "loss": 1.4745, "step": 25911 }, { "epoch": 0.3367146219481095, "grad_norm": 0.37771472334861755, "learning_rate": 0.00013268433434279105, "loss": 1.4342, "step": 25912 }, { "epoch": 0.33672761649202543, "grad_norm": 0.4475351572036743, "learning_rate": 0.00013268173488087967, "loss": 1.4324, "step": 25913 }, { "epoch": 0.3367406110359413, "grad_norm": 0.4147765040397644, "learning_rate": 0.00013267913541896827, "loss": 1.4885, "step": 25914 }, { "epoch": 0.3367536055798572, "grad_norm": 0.4396967887878418, "learning_rate": 0.0001326765359570569, "loss": 1.3747, "step": 25915 }, { "epoch": 0.336766600123773, "grad_norm": 0.3707548975944519, "learning_rate": 0.0001326739364951455, "loss": 1.3314, "step": 25916 }, { "epoch": 0.3367795946676889, "grad_norm": 0.4012846350669861, "learning_rate": 0.00013267133703323414, "loss": 1.3984, "step": 25917 }, { "epoch": 0.33679258921160476, "grad_norm": 0.43612775206565857, "learning_rate": 0.00013266873757132274, "loss": 1.5301, "step": 25918 }, { "epoch": 0.33680558375552067, "grad_norm": 0.41023901104927063, "learning_rate": 0.00013266613810941137, "loss": 1.519, "step": 25919 }, { "epoch": 0.3368185782994365, "grad_norm": 0.3649330139160156, "learning_rate": 0.00013266353864749996, "loss": 1.2271, "step": 25920 }, { "epoch": 0.3368315728433524, "grad_norm": 0.4352780282497406, "learning_rate": 0.0001326609391855886, "loss": 1.5554, "step": 25921 }, { "epoch": 0.33684456738726826, "grad_norm": 0.4940381944179535, "learning_rate": 0.0001326583397236772, "loss": 1.4621, "step": 25922 }, { "epoch": 0.33685756193118416, "grad_norm": 0.42648664116859436, "learning_rate": 0.0001326557402617658, "loss": 1.5415, "step": 25923 }, { "epoch": 0.3368705564751, "grad_norm": 0.37579816579818726, "learning_rate": 0.00013265314079985446, "loss": 1.4407, "step": 25924 }, { "epoch": 0.3368835510190159, "grad_norm": 0.36903703212738037, "learning_rate": 0.00013265054133794306, "loss": 1.3951, "step": 25925 }, { "epoch": 0.33689654556293175, "grad_norm": 0.3801431357860565, "learning_rate": 0.00013264794187603166, "loss": 1.3851, "step": 25926 }, { "epoch": 0.33690954010684765, "grad_norm": 0.48554742336273193, "learning_rate": 0.00013264534241412028, "loss": 1.4229, "step": 25927 }, { "epoch": 0.3369225346507635, "grad_norm": 0.38802024722099304, "learning_rate": 0.0001326427429522089, "loss": 1.4832, "step": 25928 }, { "epoch": 0.3369355291946794, "grad_norm": 0.49022141098976135, "learning_rate": 0.00013264014349029753, "loss": 1.4171, "step": 25929 }, { "epoch": 0.33694852373859524, "grad_norm": 0.4355834126472473, "learning_rate": 0.00013263754402838613, "loss": 1.4349, "step": 25930 }, { "epoch": 0.33696151828251114, "grad_norm": 0.34544193744659424, "learning_rate": 0.00013263494456647475, "loss": 1.1949, "step": 25931 }, { "epoch": 0.336974512826427, "grad_norm": 0.4932114779949188, "learning_rate": 0.00013263234510456338, "loss": 1.5736, "step": 25932 }, { "epoch": 0.3369875073703429, "grad_norm": 0.4376586675643921, "learning_rate": 0.00013262974564265197, "loss": 1.4379, "step": 25933 }, { "epoch": 0.33700050191425873, "grad_norm": 0.4078996181488037, "learning_rate": 0.0001326271461807406, "loss": 1.4992, "step": 25934 }, { "epoch": 0.33701349645817463, "grad_norm": 0.4634312689304352, "learning_rate": 0.0001326245467188292, "loss": 1.4847, "step": 25935 }, { "epoch": 0.3370264910020905, "grad_norm": 0.34474724531173706, "learning_rate": 0.00013262194725691785, "loss": 1.1529, "step": 25936 }, { "epoch": 0.3370394855460064, "grad_norm": 0.39053812623023987, "learning_rate": 0.00013261934779500644, "loss": 1.5433, "step": 25937 }, { "epoch": 0.3370524800899222, "grad_norm": 0.38804084062576294, "learning_rate": 0.00013261674833309504, "loss": 1.3174, "step": 25938 }, { "epoch": 0.3370654746338381, "grad_norm": 0.45724987983703613, "learning_rate": 0.00013261414887118367, "loss": 1.4434, "step": 25939 }, { "epoch": 0.33707846917775397, "grad_norm": 0.34311631321907043, "learning_rate": 0.0001326115494092723, "loss": 1.1802, "step": 25940 }, { "epoch": 0.33709146372166987, "grad_norm": 0.400392085313797, "learning_rate": 0.00013260894994736091, "loss": 1.3581, "step": 25941 }, { "epoch": 0.3371044582655857, "grad_norm": 0.4552929103374481, "learning_rate": 0.0001326063504854495, "loss": 1.5644, "step": 25942 }, { "epoch": 0.3371174528095016, "grad_norm": 0.40228772163391113, "learning_rate": 0.00013260375102353814, "loss": 1.3824, "step": 25943 }, { "epoch": 0.33713044735341746, "grad_norm": 0.39098143577575684, "learning_rate": 0.00013260115156162676, "loss": 1.3173, "step": 25944 }, { "epoch": 0.33714344189733336, "grad_norm": 0.3960305154323578, "learning_rate": 0.00013259855209971536, "loss": 1.3335, "step": 25945 }, { "epoch": 0.3371564364412492, "grad_norm": 0.3813297152519226, "learning_rate": 0.00013259595263780398, "loss": 1.2998, "step": 25946 }, { "epoch": 0.3371694309851651, "grad_norm": 0.46862417459487915, "learning_rate": 0.00013259335317589258, "loss": 1.4435, "step": 25947 }, { "epoch": 0.33718242552908095, "grad_norm": 0.4491961598396301, "learning_rate": 0.00013259075371398123, "loss": 1.5086, "step": 25948 }, { "epoch": 0.33719542007299685, "grad_norm": 0.4344651401042938, "learning_rate": 0.00013258815425206983, "loss": 1.347, "step": 25949 }, { "epoch": 0.3372084146169127, "grad_norm": 0.5068522095680237, "learning_rate": 0.00013258555479015843, "loss": 1.5285, "step": 25950 }, { "epoch": 0.3372214091608286, "grad_norm": 0.4222278594970703, "learning_rate": 0.00013258295532824705, "loss": 1.525, "step": 25951 }, { "epoch": 0.33723440370474445, "grad_norm": 0.39252403378486633, "learning_rate": 0.00013258035586633568, "loss": 1.3428, "step": 25952 }, { "epoch": 0.33724739824866035, "grad_norm": 0.46583855152130127, "learning_rate": 0.0001325777564044243, "loss": 1.6201, "step": 25953 }, { "epoch": 0.3372603927925762, "grad_norm": 0.42092910408973694, "learning_rate": 0.0001325751569425129, "loss": 1.4626, "step": 25954 }, { "epoch": 0.3372733873364921, "grad_norm": 0.3947404623031616, "learning_rate": 0.00013257255748060152, "loss": 1.5335, "step": 25955 }, { "epoch": 0.33728638188040794, "grad_norm": 0.33180686831474304, "learning_rate": 0.00013256995801869015, "loss": 1.3432, "step": 25956 }, { "epoch": 0.33729937642432384, "grad_norm": 0.43609657883644104, "learning_rate": 0.00013256735855677874, "loss": 1.5709, "step": 25957 }, { "epoch": 0.3373123709682397, "grad_norm": 0.4034457206726074, "learning_rate": 0.00013256475909486737, "loss": 1.3492, "step": 25958 }, { "epoch": 0.3373253655121556, "grad_norm": 0.41907432675361633, "learning_rate": 0.00013256215963295597, "loss": 1.4434, "step": 25959 }, { "epoch": 0.3373383600560715, "grad_norm": 0.2986696660518646, "learning_rate": 0.00013255956017104462, "loss": 1.5661, "step": 25960 }, { "epoch": 0.33735135459998733, "grad_norm": 0.39098668098449707, "learning_rate": 0.00013255696070913321, "loss": 1.4509, "step": 25961 }, { "epoch": 0.33736434914390323, "grad_norm": 0.39820170402526855, "learning_rate": 0.00013255436124722184, "loss": 1.3919, "step": 25962 }, { "epoch": 0.3373773436878191, "grad_norm": 0.49654802680015564, "learning_rate": 0.00013255176178531046, "loss": 1.3231, "step": 25963 }, { "epoch": 0.337390338231735, "grad_norm": 0.3687657117843628, "learning_rate": 0.00013254916232339906, "loss": 1.5856, "step": 25964 }, { "epoch": 0.3374033327756508, "grad_norm": 0.3904503583908081, "learning_rate": 0.00013254656286148768, "loss": 1.4621, "step": 25965 }, { "epoch": 0.3374163273195667, "grad_norm": 0.4072244465351105, "learning_rate": 0.00013254396339957628, "loss": 1.351, "step": 25966 }, { "epoch": 0.33742932186348257, "grad_norm": 0.40208739042282104, "learning_rate": 0.0001325413639376649, "loss": 1.3602, "step": 25967 }, { "epoch": 0.33744231640739847, "grad_norm": 0.4271987974643707, "learning_rate": 0.00013253876447575353, "loss": 1.2959, "step": 25968 }, { "epoch": 0.3374553109513143, "grad_norm": 0.555769145488739, "learning_rate": 0.00013253616501384213, "loss": 1.5875, "step": 25969 }, { "epoch": 0.3374683054952302, "grad_norm": 0.39692923426628113, "learning_rate": 0.00013253356555193075, "loss": 1.3605, "step": 25970 }, { "epoch": 0.33748130003914606, "grad_norm": 0.38616666197776794, "learning_rate": 0.00013253096609001938, "loss": 1.3348, "step": 25971 }, { "epoch": 0.33749429458306196, "grad_norm": 0.3021883964538574, "learning_rate": 0.000132528366628108, "loss": 1.2672, "step": 25972 }, { "epoch": 0.3375072891269778, "grad_norm": 0.40427741408348083, "learning_rate": 0.0001325257671661966, "loss": 1.2641, "step": 25973 }, { "epoch": 0.3375202836708937, "grad_norm": 0.3402095437049866, "learning_rate": 0.00013252316770428522, "loss": 1.4119, "step": 25974 }, { "epoch": 0.33753327821480955, "grad_norm": 0.43896281719207764, "learning_rate": 0.00013252056824237385, "loss": 1.3589, "step": 25975 }, { "epoch": 0.33754627275872545, "grad_norm": 0.36701881885528564, "learning_rate": 0.00013251796878046245, "loss": 1.447, "step": 25976 }, { "epoch": 0.3375592673026413, "grad_norm": 0.3691614270210266, "learning_rate": 0.00013251536931855107, "loss": 1.3894, "step": 25977 }, { "epoch": 0.3375722618465572, "grad_norm": 0.44661641120910645, "learning_rate": 0.00013251276985663967, "loss": 1.2686, "step": 25978 }, { "epoch": 0.33758525639047304, "grad_norm": 0.3732391893863678, "learning_rate": 0.0001325101703947283, "loss": 1.2704, "step": 25979 }, { "epoch": 0.33759825093438894, "grad_norm": 0.2902577519416809, "learning_rate": 0.00013250757093281692, "loss": 1.34, "step": 25980 }, { "epoch": 0.3376112454783048, "grad_norm": 0.36671948432922363, "learning_rate": 0.00013250497147090551, "loss": 1.3817, "step": 25981 }, { "epoch": 0.3376242400222207, "grad_norm": 0.4579131305217743, "learning_rate": 0.00013250237200899414, "loss": 1.4356, "step": 25982 }, { "epoch": 0.33763723456613653, "grad_norm": 0.3641127943992615, "learning_rate": 0.00013249977254708276, "loss": 1.4254, "step": 25983 }, { "epoch": 0.33765022911005244, "grad_norm": 0.4204072654247284, "learning_rate": 0.0001324971730851714, "loss": 1.4322, "step": 25984 }, { "epoch": 0.3376632236539683, "grad_norm": 0.5142878293991089, "learning_rate": 0.00013249457362325998, "loss": 1.4629, "step": 25985 }, { "epoch": 0.3376762181978842, "grad_norm": 0.43719133734703064, "learning_rate": 0.0001324919741613486, "loss": 1.4805, "step": 25986 }, { "epoch": 0.3376892127418, "grad_norm": 0.2883050739765167, "learning_rate": 0.00013248937469943723, "loss": 1.2557, "step": 25987 }, { "epoch": 0.3377022072857159, "grad_norm": 0.3705081641674042, "learning_rate": 0.00013248677523752583, "loss": 1.5661, "step": 25988 }, { "epoch": 0.3377152018296318, "grad_norm": 0.5233997106552124, "learning_rate": 0.00013248417577561446, "loss": 1.3378, "step": 25989 }, { "epoch": 0.3377281963735477, "grad_norm": 0.4095056354999542, "learning_rate": 0.00013248157631370305, "loss": 1.3971, "step": 25990 }, { "epoch": 0.3377411909174635, "grad_norm": 0.4464869797229767, "learning_rate": 0.0001324789768517917, "loss": 1.2914, "step": 25991 }, { "epoch": 0.3377541854613794, "grad_norm": 0.3482622504234314, "learning_rate": 0.0001324763773898803, "loss": 1.2218, "step": 25992 }, { "epoch": 0.33776718000529526, "grad_norm": 0.455708771944046, "learning_rate": 0.0001324737779279689, "loss": 1.3833, "step": 25993 }, { "epoch": 0.33778017454921117, "grad_norm": 0.37493157386779785, "learning_rate": 0.00013247117846605752, "loss": 1.3375, "step": 25994 }, { "epoch": 0.337793169093127, "grad_norm": 0.4280388355255127, "learning_rate": 0.00013246857900414615, "loss": 1.3368, "step": 25995 }, { "epoch": 0.3378061636370429, "grad_norm": 0.3672119379043579, "learning_rate": 0.00013246597954223477, "loss": 1.2575, "step": 25996 }, { "epoch": 0.33781915818095876, "grad_norm": 0.30698785185813904, "learning_rate": 0.00013246338008032337, "loss": 1.2533, "step": 25997 }, { "epoch": 0.33783215272487466, "grad_norm": 0.363212525844574, "learning_rate": 0.000132460780618412, "loss": 1.2954, "step": 25998 }, { "epoch": 0.3378451472687905, "grad_norm": 0.3986384868621826, "learning_rate": 0.00013245818115650062, "loss": 1.4081, "step": 25999 }, { "epoch": 0.3378581418127064, "grad_norm": 0.3540073037147522, "learning_rate": 0.00013245558169458922, "loss": 1.4012, "step": 26000 }, { "epoch": 0.33787113635662225, "grad_norm": 0.3556385338306427, "learning_rate": 0.00013245298223267784, "loss": 1.3937, "step": 26001 }, { "epoch": 0.33788413090053815, "grad_norm": 0.4650766849517822, "learning_rate": 0.00013245038277076647, "loss": 1.5194, "step": 26002 }, { "epoch": 0.337897125444454, "grad_norm": 0.41923680901527405, "learning_rate": 0.0001324477833088551, "loss": 1.4581, "step": 26003 }, { "epoch": 0.3379101199883699, "grad_norm": 0.369552880525589, "learning_rate": 0.0001324451838469437, "loss": 1.0854, "step": 26004 }, { "epoch": 0.33792311453228574, "grad_norm": 0.37159621715545654, "learning_rate": 0.00013244258438503228, "loss": 1.5795, "step": 26005 }, { "epoch": 0.33793610907620164, "grad_norm": 0.40106499195098877, "learning_rate": 0.00013243998492312094, "loss": 1.3117, "step": 26006 }, { "epoch": 0.3379491036201175, "grad_norm": 0.43580904603004456, "learning_rate": 0.00013243738546120953, "loss": 1.5166, "step": 26007 }, { "epoch": 0.3379620981640334, "grad_norm": 0.35146501660346985, "learning_rate": 0.00013243478599929816, "loss": 1.5207, "step": 26008 }, { "epoch": 0.33797509270794923, "grad_norm": 0.47249868512153625, "learning_rate": 0.00013243218653738676, "loss": 1.4799, "step": 26009 }, { "epoch": 0.33798808725186513, "grad_norm": 0.44152122735977173, "learning_rate": 0.00013242958707547538, "loss": 1.3723, "step": 26010 }, { "epoch": 0.338001081795781, "grad_norm": 0.4457405209541321, "learning_rate": 0.000132426987613564, "loss": 1.3819, "step": 26011 }, { "epoch": 0.3380140763396969, "grad_norm": 0.4266115128993988, "learning_rate": 0.0001324243881516526, "loss": 1.4162, "step": 26012 }, { "epoch": 0.3380270708836127, "grad_norm": 0.34012266993522644, "learning_rate": 0.00013242178868974123, "loss": 1.2692, "step": 26013 }, { "epoch": 0.3380400654275286, "grad_norm": 0.47643885016441345, "learning_rate": 0.00013241918922782985, "loss": 1.5577, "step": 26014 }, { "epoch": 0.33805305997144447, "grad_norm": 0.42859864234924316, "learning_rate": 0.00013241658976591848, "loss": 1.3983, "step": 26015 }, { "epoch": 0.33806605451536037, "grad_norm": 0.32130736112594604, "learning_rate": 0.00013241399030400707, "loss": 1.2395, "step": 26016 }, { "epoch": 0.3380790490592762, "grad_norm": 0.37041670083999634, "learning_rate": 0.0001324113908420957, "loss": 1.3293, "step": 26017 }, { "epoch": 0.3380920436031921, "grad_norm": 0.3703097701072693, "learning_rate": 0.00013240879138018432, "loss": 1.1806, "step": 26018 }, { "epoch": 0.33810503814710796, "grad_norm": 0.4163018763065338, "learning_rate": 0.00013240619191827292, "loss": 1.5549, "step": 26019 }, { "epoch": 0.33811803269102386, "grad_norm": 0.3582121729850769, "learning_rate": 0.00013240359245636154, "loss": 1.3888, "step": 26020 }, { "epoch": 0.3381310272349397, "grad_norm": 0.31785061955451965, "learning_rate": 0.00013240099299445014, "loss": 1.2873, "step": 26021 }, { "epoch": 0.3381440217788556, "grad_norm": 0.3992772400379181, "learning_rate": 0.00013239839353253877, "loss": 1.2759, "step": 26022 }, { "epoch": 0.33815701632277145, "grad_norm": 0.29709020256996155, "learning_rate": 0.0001323957940706274, "loss": 1.2396, "step": 26023 }, { "epoch": 0.33817001086668735, "grad_norm": 0.3581277132034302, "learning_rate": 0.000132393194608716, "loss": 1.3635, "step": 26024 }, { "epoch": 0.3381830054106032, "grad_norm": 0.43390825390815735, "learning_rate": 0.0001323905951468046, "loss": 1.3634, "step": 26025 }, { "epoch": 0.3381959999545191, "grad_norm": 0.33940964937210083, "learning_rate": 0.00013238799568489324, "loss": 1.6098, "step": 26026 }, { "epoch": 0.33820899449843494, "grad_norm": 0.3332218527793884, "learning_rate": 0.00013238539622298186, "loss": 1.388, "step": 26027 }, { "epoch": 0.33822198904235085, "grad_norm": 0.3670242130756378, "learning_rate": 0.00013238279676107046, "loss": 1.3298, "step": 26028 }, { "epoch": 0.3382349835862667, "grad_norm": 0.37164583802223206, "learning_rate": 0.00013238019729915908, "loss": 1.3288, "step": 26029 }, { "epoch": 0.3382479781301826, "grad_norm": 0.44527381658554077, "learning_rate": 0.0001323775978372477, "loss": 1.3691, "step": 26030 }, { "epoch": 0.33826097267409844, "grad_norm": 0.408902645111084, "learning_rate": 0.0001323749983753363, "loss": 1.3029, "step": 26031 }, { "epoch": 0.33827396721801434, "grad_norm": 0.3251926004886627, "learning_rate": 0.00013237239891342493, "loss": 1.3576, "step": 26032 }, { "epoch": 0.3382869617619302, "grad_norm": 0.4064948558807373, "learning_rate": 0.00013236979945151353, "loss": 1.5685, "step": 26033 }, { "epoch": 0.3382999563058461, "grad_norm": 0.39309945702552795, "learning_rate": 0.00013236719998960215, "loss": 1.3101, "step": 26034 }, { "epoch": 0.33831295084976193, "grad_norm": 0.3306905925273895, "learning_rate": 0.00013236460052769078, "loss": 1.5342, "step": 26035 }, { "epoch": 0.33832594539367783, "grad_norm": 0.26769521832466125, "learning_rate": 0.00013236200106577937, "loss": 1.2381, "step": 26036 }, { "epoch": 0.33833893993759373, "grad_norm": 0.38197287917137146, "learning_rate": 0.00013235940160386802, "loss": 1.3308, "step": 26037 }, { "epoch": 0.3383519344815096, "grad_norm": 0.320083886384964, "learning_rate": 0.00013235680214195662, "loss": 1.4075, "step": 26038 }, { "epoch": 0.3383649290254255, "grad_norm": 0.4613439738750458, "learning_rate": 0.00013235420268004525, "loss": 1.2781, "step": 26039 }, { "epoch": 0.3383779235693413, "grad_norm": 0.47412562370300293, "learning_rate": 0.00013235160321813384, "loss": 1.4591, "step": 26040 }, { "epoch": 0.3383909181132572, "grad_norm": 0.44963017106056213, "learning_rate": 0.00013234900375622247, "loss": 1.3288, "step": 26041 }, { "epoch": 0.33840391265717307, "grad_norm": 0.40758216381073, "learning_rate": 0.0001323464042943111, "loss": 1.3454, "step": 26042 }, { "epoch": 0.33841690720108897, "grad_norm": 0.3422310948371887, "learning_rate": 0.0001323438048323997, "loss": 1.421, "step": 26043 }, { "epoch": 0.3384299017450048, "grad_norm": 0.45112258195877075, "learning_rate": 0.00013234120537048831, "loss": 1.4576, "step": 26044 }, { "epoch": 0.3384428962889207, "grad_norm": 0.3183678090572357, "learning_rate": 0.00013233860590857694, "loss": 1.2271, "step": 26045 }, { "epoch": 0.33845589083283656, "grad_norm": 0.34023067355155945, "learning_rate": 0.00013233600644666556, "loss": 1.5048, "step": 26046 }, { "epoch": 0.33846888537675246, "grad_norm": 0.3973219394683838, "learning_rate": 0.00013233340698475416, "loss": 1.4479, "step": 26047 }, { "epoch": 0.3384818799206683, "grad_norm": 0.37197497487068176, "learning_rate": 0.00013233080752284276, "loss": 1.6001, "step": 26048 }, { "epoch": 0.3384948744645842, "grad_norm": 0.38511350750923157, "learning_rate": 0.0001323282080609314, "loss": 1.133, "step": 26049 }, { "epoch": 0.33850786900850005, "grad_norm": 0.46855947375297546, "learning_rate": 0.00013232560859902, "loss": 1.548, "step": 26050 }, { "epoch": 0.33852086355241595, "grad_norm": 0.344058632850647, "learning_rate": 0.00013232300913710863, "loss": 1.2832, "step": 26051 }, { "epoch": 0.3385338580963318, "grad_norm": 0.48236799240112305, "learning_rate": 0.00013232040967519723, "loss": 1.477, "step": 26052 }, { "epoch": 0.3385468526402477, "grad_norm": 0.3212074339389801, "learning_rate": 0.00013231781021328585, "loss": 1.1441, "step": 26053 }, { "epoch": 0.33855984718416354, "grad_norm": 0.31936028599739075, "learning_rate": 0.00013231521075137448, "loss": 1.5827, "step": 26054 }, { "epoch": 0.33857284172807944, "grad_norm": 0.4629895091056824, "learning_rate": 0.00013231261128946308, "loss": 1.352, "step": 26055 }, { "epoch": 0.3385858362719953, "grad_norm": 0.43505293130874634, "learning_rate": 0.0001323100118275517, "loss": 1.4927, "step": 26056 }, { "epoch": 0.3385988308159112, "grad_norm": 0.32325783371925354, "learning_rate": 0.00013230741236564032, "loss": 1.3873, "step": 26057 }, { "epoch": 0.33861182535982703, "grad_norm": 0.3562498986721039, "learning_rate": 0.00013230481290372895, "loss": 1.3323, "step": 26058 }, { "epoch": 0.33862481990374294, "grad_norm": 0.3656165897846222, "learning_rate": 0.00013230221344181755, "loss": 1.718, "step": 26059 }, { "epoch": 0.3386378144476588, "grad_norm": 0.43518126010894775, "learning_rate": 0.00013229961397990614, "loss": 1.3138, "step": 26060 }, { "epoch": 0.3386508089915747, "grad_norm": 0.3630739748477936, "learning_rate": 0.0001322970145179948, "loss": 1.2582, "step": 26061 }, { "epoch": 0.3386638035354905, "grad_norm": 0.35151228308677673, "learning_rate": 0.0001322944150560834, "loss": 1.4146, "step": 26062 }, { "epoch": 0.3386767980794064, "grad_norm": 0.47331181168556213, "learning_rate": 0.00013229181559417202, "loss": 1.2425, "step": 26063 }, { "epoch": 0.33868979262332227, "grad_norm": 0.47240084409713745, "learning_rate": 0.00013228921613226061, "loss": 1.3923, "step": 26064 }, { "epoch": 0.3387027871672382, "grad_norm": 0.35691216588020325, "learning_rate": 0.00013228661667034924, "loss": 1.402, "step": 26065 }, { "epoch": 0.338715781711154, "grad_norm": 0.4560912251472473, "learning_rate": 0.00013228401720843786, "loss": 1.3788, "step": 26066 }, { "epoch": 0.3387287762550699, "grad_norm": 0.46608152985572815, "learning_rate": 0.00013228141774652646, "loss": 1.6213, "step": 26067 }, { "epoch": 0.33874177079898576, "grad_norm": 0.40156424045562744, "learning_rate": 0.00013227881828461509, "loss": 1.3351, "step": 26068 }, { "epoch": 0.33875476534290166, "grad_norm": 0.42579886317253113, "learning_rate": 0.0001322762188227037, "loss": 1.3216, "step": 26069 }, { "epoch": 0.3387677598868175, "grad_norm": 0.42317482829093933, "learning_rate": 0.00013227361936079233, "loss": 1.3659, "step": 26070 }, { "epoch": 0.3387807544307334, "grad_norm": 0.4158824682235718, "learning_rate": 0.00013227101989888093, "loss": 1.5008, "step": 26071 }, { "epoch": 0.33879374897464926, "grad_norm": 0.4330485463142395, "learning_rate": 0.00013226842043696956, "loss": 1.4683, "step": 26072 }, { "epoch": 0.33880674351856516, "grad_norm": 0.47141727805137634, "learning_rate": 0.00013226582097505818, "loss": 1.4663, "step": 26073 }, { "epoch": 0.338819738062481, "grad_norm": 0.3531055450439453, "learning_rate": 0.00013226322151314678, "loss": 1.3336, "step": 26074 }, { "epoch": 0.3388327326063969, "grad_norm": 0.39789867401123047, "learning_rate": 0.0001322606220512354, "loss": 1.3242, "step": 26075 }, { "epoch": 0.33884572715031275, "grad_norm": 0.4301973581314087, "learning_rate": 0.00013225802258932403, "loss": 1.2429, "step": 26076 }, { "epoch": 0.33885872169422865, "grad_norm": 0.34061765670776367, "learning_rate": 0.00013225542312741262, "loss": 1.3111, "step": 26077 }, { "epoch": 0.3388717162381445, "grad_norm": 0.3037078380584717, "learning_rate": 0.00013225282366550125, "loss": 1.2519, "step": 26078 }, { "epoch": 0.3388847107820604, "grad_norm": 0.38923218846321106, "learning_rate": 0.00013225022420358985, "loss": 1.3227, "step": 26079 }, { "epoch": 0.33889770532597624, "grad_norm": 0.33996447920799255, "learning_rate": 0.0001322476247416785, "loss": 1.2638, "step": 26080 }, { "epoch": 0.33891069986989214, "grad_norm": 0.35176604986190796, "learning_rate": 0.0001322450252797671, "loss": 1.4445, "step": 26081 }, { "epoch": 0.338923694413808, "grad_norm": 0.3652324974536896, "learning_rate": 0.00013224242581785572, "loss": 1.4231, "step": 26082 }, { "epoch": 0.3389366889577239, "grad_norm": 0.4295262396335602, "learning_rate": 0.00013223982635594432, "loss": 1.4937, "step": 26083 }, { "epoch": 0.33894968350163973, "grad_norm": 0.5046804547309875, "learning_rate": 0.00013223722689403294, "loss": 1.4346, "step": 26084 }, { "epoch": 0.33896267804555563, "grad_norm": 0.5072299242019653, "learning_rate": 0.00013223462743212157, "loss": 1.4525, "step": 26085 }, { "epoch": 0.3389756725894715, "grad_norm": 1.0560909509658813, "learning_rate": 0.00013223202797021016, "loss": 1.522, "step": 26086 }, { "epoch": 0.3389886671333874, "grad_norm": 0.445686936378479, "learning_rate": 0.0001322294285082988, "loss": 1.3364, "step": 26087 }, { "epoch": 0.3390016616773032, "grad_norm": 0.40685030817985535, "learning_rate": 0.0001322268290463874, "loss": 1.3276, "step": 26088 }, { "epoch": 0.3390146562212191, "grad_norm": 0.366913378238678, "learning_rate": 0.000132224229584476, "loss": 1.1726, "step": 26089 }, { "epoch": 0.33902765076513497, "grad_norm": 0.40471911430358887, "learning_rate": 0.00013222163012256463, "loss": 1.4212, "step": 26090 }, { "epoch": 0.33904064530905087, "grad_norm": 0.4582507312297821, "learning_rate": 0.00013221903066065323, "loss": 1.4037, "step": 26091 }, { "epoch": 0.3390536398529667, "grad_norm": 0.3212548792362213, "learning_rate": 0.00013221643119874188, "loss": 1.339, "step": 26092 }, { "epoch": 0.3390666343968826, "grad_norm": 0.3519456088542938, "learning_rate": 0.00013221383173683048, "loss": 1.4781, "step": 26093 }, { "epoch": 0.33907962894079846, "grad_norm": 0.31467771530151367, "learning_rate": 0.0001322112322749191, "loss": 1.1384, "step": 26094 }, { "epoch": 0.33909262348471436, "grad_norm": 0.5373119711875916, "learning_rate": 0.0001322086328130077, "loss": 1.3816, "step": 26095 }, { "epoch": 0.3391056180286302, "grad_norm": 0.3905687928199768, "learning_rate": 0.00013220603335109633, "loss": 1.3385, "step": 26096 }, { "epoch": 0.3391186125725461, "grad_norm": 0.43328219652175903, "learning_rate": 0.00013220343388918495, "loss": 1.3244, "step": 26097 }, { "epoch": 0.33913160711646195, "grad_norm": 0.5360528826713562, "learning_rate": 0.00013220083442727355, "loss": 1.4831, "step": 26098 }, { "epoch": 0.33914460166037785, "grad_norm": 0.390375018119812, "learning_rate": 0.00013219823496536217, "loss": 1.3543, "step": 26099 }, { "epoch": 0.3391575962042937, "grad_norm": 0.5416364669799805, "learning_rate": 0.0001321956355034508, "loss": 1.4785, "step": 26100 }, { "epoch": 0.3391705907482096, "grad_norm": 0.45381808280944824, "learning_rate": 0.00013219303604153942, "loss": 1.6316, "step": 26101 }, { "epoch": 0.33918358529212544, "grad_norm": 0.24971823394298553, "learning_rate": 0.00013219043657962802, "loss": 1.2042, "step": 26102 }, { "epoch": 0.33919657983604135, "grad_norm": 0.35749295353889465, "learning_rate": 0.00013218783711771662, "loss": 1.3901, "step": 26103 }, { "epoch": 0.3392095743799572, "grad_norm": 0.4308362901210785, "learning_rate": 0.00013218523765580527, "loss": 1.5455, "step": 26104 }, { "epoch": 0.3392225689238731, "grad_norm": 0.48581263422966003, "learning_rate": 0.00013218263819389387, "loss": 1.4545, "step": 26105 }, { "epoch": 0.33923556346778894, "grad_norm": 0.4493102431297302, "learning_rate": 0.0001321800387319825, "loss": 1.3151, "step": 26106 }, { "epoch": 0.33924855801170484, "grad_norm": 0.4331951439380646, "learning_rate": 0.0001321774392700711, "loss": 1.6084, "step": 26107 }, { "epoch": 0.3392615525556207, "grad_norm": 0.3062371015548706, "learning_rate": 0.0001321748398081597, "loss": 1.4859, "step": 26108 }, { "epoch": 0.3392745470995366, "grad_norm": 0.4729411005973816, "learning_rate": 0.00013217224034624834, "loss": 1.5745, "step": 26109 }, { "epoch": 0.33928754164345243, "grad_norm": 0.4998309910297394, "learning_rate": 0.00013216964088433693, "loss": 1.6605, "step": 26110 }, { "epoch": 0.33930053618736833, "grad_norm": 0.5050880312919617, "learning_rate": 0.00013216704142242559, "loss": 1.4067, "step": 26111 }, { "epoch": 0.33931353073128423, "grad_norm": 0.4295085668563843, "learning_rate": 0.00013216444196051418, "loss": 1.4078, "step": 26112 }, { "epoch": 0.3393265252752001, "grad_norm": 0.3796772360801697, "learning_rate": 0.0001321618424986028, "loss": 1.2763, "step": 26113 }, { "epoch": 0.339339519819116, "grad_norm": 0.4142545461654663, "learning_rate": 0.0001321592430366914, "loss": 1.2981, "step": 26114 }, { "epoch": 0.3393525143630318, "grad_norm": 0.3426409363746643, "learning_rate": 0.00013215664357478003, "loss": 1.3433, "step": 26115 }, { "epoch": 0.3393655089069477, "grad_norm": 0.3506077229976654, "learning_rate": 0.00013215404411286865, "loss": 1.4286, "step": 26116 }, { "epoch": 0.33937850345086357, "grad_norm": 0.3806890547275543, "learning_rate": 0.00013215144465095725, "loss": 1.4377, "step": 26117 }, { "epoch": 0.33939149799477947, "grad_norm": 0.3685814142227173, "learning_rate": 0.00013214884518904588, "loss": 1.3635, "step": 26118 }, { "epoch": 0.3394044925386953, "grad_norm": 0.4511130750179291, "learning_rate": 0.0001321462457271345, "loss": 1.5876, "step": 26119 }, { "epoch": 0.3394174870826112, "grad_norm": 0.49052268266677856, "learning_rate": 0.0001321436462652231, "loss": 1.599, "step": 26120 }, { "epoch": 0.33943048162652706, "grad_norm": 0.37976324558258057, "learning_rate": 0.00013214104680331172, "loss": 1.3536, "step": 26121 }, { "epoch": 0.33944347617044296, "grad_norm": 0.3351363241672516, "learning_rate": 0.00013213844734140032, "loss": 1.3554, "step": 26122 }, { "epoch": 0.3394564707143588, "grad_norm": 0.38419032096862793, "learning_rate": 0.00013213584787948897, "loss": 1.3478, "step": 26123 }, { "epoch": 0.3394694652582747, "grad_norm": 0.3367789387702942, "learning_rate": 0.00013213324841757757, "loss": 1.2381, "step": 26124 }, { "epoch": 0.33948245980219055, "grad_norm": 0.4104432165622711, "learning_rate": 0.0001321306489556662, "loss": 1.366, "step": 26125 }, { "epoch": 0.33949545434610645, "grad_norm": 0.3378024101257324, "learning_rate": 0.0001321280494937548, "loss": 1.2678, "step": 26126 }, { "epoch": 0.3395084488900223, "grad_norm": 0.3339763581752777, "learning_rate": 0.00013212545003184341, "loss": 1.4026, "step": 26127 }, { "epoch": 0.3395214434339382, "grad_norm": 0.43284308910369873, "learning_rate": 0.00013212285056993204, "loss": 1.4484, "step": 26128 }, { "epoch": 0.33953443797785404, "grad_norm": 0.3656160235404968, "learning_rate": 0.00013212025110802064, "loss": 1.4621, "step": 26129 }, { "epoch": 0.33954743252176994, "grad_norm": 0.5169016122817993, "learning_rate": 0.00013211765164610926, "loss": 1.2912, "step": 26130 }, { "epoch": 0.3395604270656858, "grad_norm": 0.385970801115036, "learning_rate": 0.00013211505218419789, "loss": 1.3042, "step": 26131 }, { "epoch": 0.3395734216096017, "grad_norm": 0.43052437901496887, "learning_rate": 0.00013211245272228648, "loss": 1.5935, "step": 26132 }, { "epoch": 0.33958641615351753, "grad_norm": 0.4337923526763916, "learning_rate": 0.0001321098532603751, "loss": 1.2833, "step": 26133 }, { "epoch": 0.33959941069743343, "grad_norm": 0.3738989531993866, "learning_rate": 0.0001321072537984637, "loss": 1.4643, "step": 26134 }, { "epoch": 0.3396124052413493, "grad_norm": 0.4272087514400482, "learning_rate": 0.00013210465433655236, "loss": 1.569, "step": 26135 }, { "epoch": 0.3396253997852652, "grad_norm": 0.40140500664711, "learning_rate": 0.00013210205487464095, "loss": 1.2109, "step": 26136 }, { "epoch": 0.339638394329181, "grad_norm": 0.6054965257644653, "learning_rate": 0.00013209945541272958, "loss": 1.4751, "step": 26137 }, { "epoch": 0.3396513888730969, "grad_norm": 0.29412147402763367, "learning_rate": 0.00013209685595081818, "loss": 1.33, "step": 26138 }, { "epoch": 0.33966438341701277, "grad_norm": 0.3866554796695709, "learning_rate": 0.0001320942564889068, "loss": 1.5938, "step": 26139 }, { "epoch": 0.3396773779609287, "grad_norm": 0.35024407505989075, "learning_rate": 0.00013209165702699542, "loss": 1.2907, "step": 26140 }, { "epoch": 0.3396903725048445, "grad_norm": 0.48450350761413574, "learning_rate": 0.00013208905756508402, "loss": 1.5128, "step": 26141 }, { "epoch": 0.3397033670487604, "grad_norm": 0.3828144073486328, "learning_rate": 0.00013208645810317265, "loss": 1.343, "step": 26142 }, { "epoch": 0.33971636159267626, "grad_norm": 0.4006251096725464, "learning_rate": 0.00013208385864126127, "loss": 1.3495, "step": 26143 }, { "epoch": 0.33972935613659216, "grad_norm": 0.3299696445465088, "learning_rate": 0.00013208125917934987, "loss": 1.3069, "step": 26144 }, { "epoch": 0.339742350680508, "grad_norm": 0.5259748697280884, "learning_rate": 0.0001320786597174385, "loss": 1.3785, "step": 26145 }, { "epoch": 0.3397553452244239, "grad_norm": 0.32864344120025635, "learning_rate": 0.00013207606025552712, "loss": 1.2414, "step": 26146 }, { "epoch": 0.33976833976833976, "grad_norm": 0.24723125994205475, "learning_rate": 0.00013207346079361574, "loss": 1.2345, "step": 26147 }, { "epoch": 0.33978133431225566, "grad_norm": 0.38176044821739197, "learning_rate": 0.00013207086133170434, "loss": 1.4247, "step": 26148 }, { "epoch": 0.3397943288561715, "grad_norm": 0.22277477383613586, "learning_rate": 0.00013206826186979296, "loss": 1.3702, "step": 26149 }, { "epoch": 0.3398073234000874, "grad_norm": 0.45568305253982544, "learning_rate": 0.0001320656624078816, "loss": 1.3779, "step": 26150 }, { "epoch": 0.33982031794400325, "grad_norm": 0.37732306122779846, "learning_rate": 0.00013206306294597019, "loss": 1.3485, "step": 26151 }, { "epoch": 0.33983331248791915, "grad_norm": 0.48806843161582947, "learning_rate": 0.0001320604634840588, "loss": 1.4271, "step": 26152 }, { "epoch": 0.339846307031835, "grad_norm": 0.3434215486049652, "learning_rate": 0.0001320578640221474, "loss": 1.3304, "step": 26153 }, { "epoch": 0.3398593015757509, "grad_norm": 0.3597976267337799, "learning_rate": 0.00013205526456023606, "loss": 1.4033, "step": 26154 }, { "epoch": 0.33987229611966674, "grad_norm": 0.3724515736103058, "learning_rate": 0.00013205266509832466, "loss": 1.3631, "step": 26155 }, { "epoch": 0.33988529066358264, "grad_norm": 0.3474212884902954, "learning_rate": 0.00013205006563641325, "loss": 1.3655, "step": 26156 }, { "epoch": 0.3398982852074985, "grad_norm": 0.3434038460254669, "learning_rate": 0.00013204746617450188, "loss": 1.4879, "step": 26157 }, { "epoch": 0.3399112797514144, "grad_norm": 0.362981379032135, "learning_rate": 0.0001320448667125905, "loss": 1.2338, "step": 26158 }, { "epoch": 0.33992427429533023, "grad_norm": 0.37427300214767456, "learning_rate": 0.00013204226725067913, "loss": 1.4734, "step": 26159 }, { "epoch": 0.33993726883924613, "grad_norm": 0.43690022826194763, "learning_rate": 0.00013203966778876772, "loss": 1.3909, "step": 26160 }, { "epoch": 0.339950263383162, "grad_norm": 0.3793913424015045, "learning_rate": 0.00013203706832685635, "loss": 1.4261, "step": 26161 }, { "epoch": 0.3399632579270779, "grad_norm": 0.39530012011528015, "learning_rate": 0.00013203446886494497, "loss": 1.3707, "step": 26162 }, { "epoch": 0.3399762524709937, "grad_norm": 0.45010656118392944, "learning_rate": 0.00013203186940303357, "loss": 1.4455, "step": 26163 }, { "epoch": 0.3399892470149096, "grad_norm": 0.3283880650997162, "learning_rate": 0.0001320292699411222, "loss": 1.5056, "step": 26164 }, { "epoch": 0.34000224155882547, "grad_norm": 0.3500770032405853, "learning_rate": 0.0001320266704792108, "loss": 1.2825, "step": 26165 }, { "epoch": 0.34001523610274137, "grad_norm": 0.4843919575214386, "learning_rate": 0.00013202407101729944, "loss": 1.3489, "step": 26166 }, { "epoch": 0.3400282306466572, "grad_norm": 0.39160025119781494, "learning_rate": 0.00013202147155538804, "loss": 1.3675, "step": 26167 }, { "epoch": 0.3400412251905731, "grad_norm": 0.4595074951648712, "learning_rate": 0.00013201887209347667, "loss": 1.4157, "step": 26168 }, { "epoch": 0.34005421973448896, "grad_norm": 0.3389313220977783, "learning_rate": 0.00013201627263156526, "loss": 1.4829, "step": 26169 }, { "epoch": 0.34006721427840486, "grad_norm": 0.4173663258552551, "learning_rate": 0.0001320136731696539, "loss": 1.4554, "step": 26170 }, { "epoch": 0.3400802088223207, "grad_norm": 0.43081143498420715, "learning_rate": 0.0001320110737077425, "loss": 1.25, "step": 26171 }, { "epoch": 0.3400932033662366, "grad_norm": 0.42154356837272644, "learning_rate": 0.0001320084742458311, "loss": 1.3325, "step": 26172 }, { "epoch": 0.34010619791015245, "grad_norm": 0.4978295564651489, "learning_rate": 0.00013200587478391973, "loss": 1.4311, "step": 26173 }, { "epoch": 0.34011919245406835, "grad_norm": 0.3951527178287506, "learning_rate": 0.00013200327532200836, "loss": 1.317, "step": 26174 }, { "epoch": 0.3401321869979842, "grad_norm": 0.35416314005851746, "learning_rate": 0.00013200067586009696, "loss": 1.4091, "step": 26175 }, { "epoch": 0.3401451815419001, "grad_norm": 0.4264552891254425, "learning_rate": 0.00013199807639818558, "loss": 1.3756, "step": 26176 }, { "epoch": 0.34015817608581594, "grad_norm": 0.4598437547683716, "learning_rate": 0.00013199547693627418, "loss": 1.2823, "step": 26177 }, { "epoch": 0.34017117062973184, "grad_norm": 0.45161259174346924, "learning_rate": 0.00013199287747436283, "loss": 1.6098, "step": 26178 }, { "epoch": 0.3401841651736477, "grad_norm": 0.3187314569950104, "learning_rate": 0.00013199027801245143, "loss": 1.4541, "step": 26179 }, { "epoch": 0.3401971597175636, "grad_norm": 0.3738307058811188, "learning_rate": 0.00013198767855054005, "loss": 1.2449, "step": 26180 }, { "epoch": 0.34021015426147944, "grad_norm": 0.37954947352409363, "learning_rate": 0.00013198507908862865, "loss": 1.4872, "step": 26181 }, { "epoch": 0.34022314880539534, "grad_norm": 0.3951531648635864, "learning_rate": 0.00013198247962671727, "loss": 1.4865, "step": 26182 }, { "epoch": 0.3402361433493112, "grad_norm": 0.38394370675086975, "learning_rate": 0.0001319798801648059, "loss": 1.4791, "step": 26183 }, { "epoch": 0.3402491378932271, "grad_norm": 0.366731733083725, "learning_rate": 0.0001319772807028945, "loss": 1.4386, "step": 26184 }, { "epoch": 0.34026213243714293, "grad_norm": 0.3587181568145752, "learning_rate": 0.00013197468124098315, "loss": 1.4507, "step": 26185 }, { "epoch": 0.34027512698105883, "grad_norm": 0.41163700819015503, "learning_rate": 0.00013197208177907174, "loss": 1.3388, "step": 26186 }, { "epoch": 0.3402881215249747, "grad_norm": 0.4417600929737091, "learning_rate": 0.00013196948231716034, "loss": 1.3494, "step": 26187 }, { "epoch": 0.3403011160688906, "grad_norm": 0.4449005126953125, "learning_rate": 0.00013196688285524897, "loss": 1.5053, "step": 26188 }, { "epoch": 0.3403141106128065, "grad_norm": 0.34276872873306274, "learning_rate": 0.0001319642833933376, "loss": 1.3984, "step": 26189 }, { "epoch": 0.3403271051567223, "grad_norm": 0.3789070248603821, "learning_rate": 0.00013196168393142622, "loss": 1.2547, "step": 26190 }, { "epoch": 0.3403400997006382, "grad_norm": 0.4343651533126831, "learning_rate": 0.0001319590844695148, "loss": 1.4138, "step": 26191 }, { "epoch": 0.34035309424455407, "grad_norm": 0.3517058491706848, "learning_rate": 0.00013195648500760344, "loss": 1.6342, "step": 26192 }, { "epoch": 0.34036608878846997, "grad_norm": 0.3006521761417389, "learning_rate": 0.00013195388554569206, "loss": 1.4442, "step": 26193 }, { "epoch": 0.3403790833323858, "grad_norm": 0.430831640958786, "learning_rate": 0.00013195128608378066, "loss": 1.4491, "step": 26194 }, { "epoch": 0.3403920778763017, "grad_norm": 0.3935178816318512, "learning_rate": 0.00013194868662186928, "loss": 1.4217, "step": 26195 }, { "epoch": 0.34040507242021756, "grad_norm": 0.3533515930175781, "learning_rate": 0.00013194608715995788, "loss": 1.41, "step": 26196 }, { "epoch": 0.34041806696413346, "grad_norm": 0.43116259574890137, "learning_rate": 0.00013194348769804653, "loss": 1.4256, "step": 26197 }, { "epoch": 0.3404310615080493, "grad_norm": 0.37773051857948303, "learning_rate": 0.00013194088823613513, "loss": 1.3644, "step": 26198 }, { "epoch": 0.3404440560519652, "grad_norm": 0.4361073672771454, "learning_rate": 0.00013193828877422373, "loss": 1.5483, "step": 26199 }, { "epoch": 0.34045705059588105, "grad_norm": 0.365222305059433, "learning_rate": 0.00013193568931231235, "loss": 1.5489, "step": 26200 }, { "epoch": 0.34047004513979695, "grad_norm": 0.3768000602722168, "learning_rate": 0.00013193308985040098, "loss": 1.3091, "step": 26201 }, { "epoch": 0.3404830396837128, "grad_norm": 0.3791691064834595, "learning_rate": 0.0001319304903884896, "loss": 1.4619, "step": 26202 }, { "epoch": 0.3404960342276287, "grad_norm": 0.40866950154304504, "learning_rate": 0.0001319278909265782, "loss": 1.2275, "step": 26203 }, { "epoch": 0.34050902877154454, "grad_norm": 0.31759610772132874, "learning_rate": 0.00013192529146466682, "loss": 1.5058, "step": 26204 }, { "epoch": 0.34052202331546044, "grad_norm": 0.3818267583847046, "learning_rate": 0.00013192269200275545, "loss": 1.3961, "step": 26205 }, { "epoch": 0.3405350178593763, "grad_norm": 0.37706467509269714, "learning_rate": 0.00013192009254084404, "loss": 1.5099, "step": 26206 }, { "epoch": 0.3405480124032922, "grad_norm": 0.2985096573829651, "learning_rate": 0.00013191749307893267, "loss": 1.6341, "step": 26207 }, { "epoch": 0.34056100694720803, "grad_norm": 0.2959730923175812, "learning_rate": 0.00013191489361702127, "loss": 1.4541, "step": 26208 }, { "epoch": 0.34057400149112393, "grad_norm": 0.4692249000072479, "learning_rate": 0.00013191229415510992, "loss": 1.4288, "step": 26209 }, { "epoch": 0.3405869960350398, "grad_norm": 0.41624554991722107, "learning_rate": 0.00013190969469319852, "loss": 1.4315, "step": 26210 }, { "epoch": 0.3405999905789557, "grad_norm": 0.499685674905777, "learning_rate": 0.0001319070952312871, "loss": 1.5436, "step": 26211 }, { "epoch": 0.3406129851228715, "grad_norm": 0.3813563585281372, "learning_rate": 0.00013190449576937574, "loss": 1.4574, "step": 26212 }, { "epoch": 0.3406259796667874, "grad_norm": 0.29567989706993103, "learning_rate": 0.00013190189630746436, "loss": 1.371, "step": 26213 }, { "epoch": 0.34063897421070327, "grad_norm": 0.4973951280117035, "learning_rate": 0.00013189929684555299, "loss": 1.3602, "step": 26214 }, { "epoch": 0.34065196875461917, "grad_norm": 0.4051089584827423, "learning_rate": 0.00013189669738364158, "loss": 1.4089, "step": 26215 }, { "epoch": 0.340664963298535, "grad_norm": 0.3603508770465851, "learning_rate": 0.0001318940979217302, "loss": 1.2967, "step": 26216 }, { "epoch": 0.3406779578424509, "grad_norm": 0.44503557682037354, "learning_rate": 0.00013189149845981883, "loss": 1.3451, "step": 26217 }, { "epoch": 0.34069095238636676, "grad_norm": 0.348886638879776, "learning_rate": 0.00013188889899790743, "loss": 1.272, "step": 26218 }, { "epoch": 0.34070394693028266, "grad_norm": 0.3516038656234741, "learning_rate": 0.00013188629953599605, "loss": 1.4474, "step": 26219 }, { "epoch": 0.3407169414741985, "grad_norm": 0.4363178312778473, "learning_rate": 0.00013188370007408468, "loss": 1.479, "step": 26220 }, { "epoch": 0.3407299360181144, "grad_norm": 0.3630240261554718, "learning_rate": 0.0001318811006121733, "loss": 1.566, "step": 26221 }, { "epoch": 0.34074293056203026, "grad_norm": 0.4306725263595581, "learning_rate": 0.0001318785011502619, "loss": 1.4945, "step": 26222 }, { "epoch": 0.34075592510594616, "grad_norm": 0.1859276294708252, "learning_rate": 0.00013187590168835053, "loss": 1.2217, "step": 26223 }, { "epoch": 0.340768919649862, "grad_norm": 0.4466590881347656, "learning_rate": 0.00013187330222643915, "loss": 1.4849, "step": 26224 }, { "epoch": 0.3407819141937779, "grad_norm": 0.3336814343929291, "learning_rate": 0.00013187070276452775, "loss": 1.4181, "step": 26225 }, { "epoch": 0.34079490873769375, "grad_norm": 0.33973827958106995, "learning_rate": 0.00013186810330261637, "loss": 1.3845, "step": 26226 }, { "epoch": 0.34080790328160965, "grad_norm": 0.39488059282302856, "learning_rate": 0.00013186550384070497, "loss": 1.4804, "step": 26227 }, { "epoch": 0.3408208978255255, "grad_norm": 0.43439504504203796, "learning_rate": 0.0001318629043787936, "loss": 1.4096, "step": 26228 }, { "epoch": 0.3408338923694414, "grad_norm": 0.4110659956932068, "learning_rate": 0.00013186030491688222, "loss": 1.4024, "step": 26229 }, { "epoch": 0.34084688691335724, "grad_norm": 0.4611261487007141, "learning_rate": 0.00013185770545497082, "loss": 1.2091, "step": 26230 }, { "epoch": 0.34085988145727314, "grad_norm": 0.4604453444480896, "learning_rate": 0.00013185510599305944, "loss": 1.3202, "step": 26231 }, { "epoch": 0.340872876001189, "grad_norm": 0.3813309371471405, "learning_rate": 0.00013185250653114806, "loss": 1.2697, "step": 26232 }, { "epoch": 0.3408858705451049, "grad_norm": 0.36649268865585327, "learning_rate": 0.0001318499070692367, "loss": 1.1675, "step": 26233 }, { "epoch": 0.34089886508902073, "grad_norm": 0.3964530825614929, "learning_rate": 0.00013184730760732529, "loss": 1.5673, "step": 26234 }, { "epoch": 0.34091185963293663, "grad_norm": 0.3706746995449066, "learning_rate": 0.0001318447081454139, "loss": 1.5572, "step": 26235 }, { "epoch": 0.3409248541768525, "grad_norm": 0.540446400642395, "learning_rate": 0.00013184210868350253, "loss": 1.4675, "step": 26236 }, { "epoch": 0.3409378487207684, "grad_norm": 0.4490174651145935, "learning_rate": 0.00013183950922159113, "loss": 1.4082, "step": 26237 }, { "epoch": 0.3409508432646842, "grad_norm": 0.4045741856098175, "learning_rate": 0.00013183690975967976, "loss": 1.2907, "step": 26238 }, { "epoch": 0.3409638378086001, "grad_norm": 0.355634868144989, "learning_rate": 0.00013183431029776835, "loss": 1.3647, "step": 26239 }, { "epoch": 0.34097683235251597, "grad_norm": 0.43347349762916565, "learning_rate": 0.00013183171083585698, "loss": 1.4101, "step": 26240 }, { "epoch": 0.34098982689643187, "grad_norm": 0.4328981637954712, "learning_rate": 0.0001318291113739456, "loss": 1.4145, "step": 26241 }, { "epoch": 0.3410028214403477, "grad_norm": 0.3859333097934723, "learning_rate": 0.0001318265119120342, "loss": 1.2177, "step": 26242 }, { "epoch": 0.3410158159842636, "grad_norm": 0.38912269473075867, "learning_rate": 0.00013182391245012282, "loss": 1.4744, "step": 26243 }, { "epoch": 0.34102881052817946, "grad_norm": 0.4100562334060669, "learning_rate": 0.00013182131298821145, "loss": 1.5057, "step": 26244 }, { "epoch": 0.34104180507209536, "grad_norm": 0.2997613251209259, "learning_rate": 0.00013181871352630007, "loss": 1.221, "step": 26245 }, { "epoch": 0.3410547996160112, "grad_norm": 0.32728850841522217, "learning_rate": 0.00013181611406438867, "loss": 1.3384, "step": 26246 }, { "epoch": 0.3410677941599271, "grad_norm": 0.27415427565574646, "learning_rate": 0.0001318135146024773, "loss": 1.3474, "step": 26247 }, { "epoch": 0.34108078870384295, "grad_norm": 0.478397011756897, "learning_rate": 0.00013181091514056592, "loss": 1.5275, "step": 26248 }, { "epoch": 0.34109378324775885, "grad_norm": 0.40981540083885193, "learning_rate": 0.00013180831567865452, "loss": 1.4127, "step": 26249 }, { "epoch": 0.3411067777916747, "grad_norm": 0.42245998978614807, "learning_rate": 0.00013180571621674314, "loss": 1.4214, "step": 26250 }, { "epoch": 0.3411197723355906, "grad_norm": 0.332137793302536, "learning_rate": 0.00013180311675483174, "loss": 1.2254, "step": 26251 }, { "epoch": 0.34113276687950644, "grad_norm": 0.392443984746933, "learning_rate": 0.0001318005172929204, "loss": 1.5185, "step": 26252 }, { "epoch": 0.34114576142342234, "grad_norm": 0.30165979266166687, "learning_rate": 0.000131797917831009, "loss": 1.3045, "step": 26253 }, { "epoch": 0.3411587559673382, "grad_norm": 0.4552200436592102, "learning_rate": 0.00013179531836909759, "loss": 1.3728, "step": 26254 }, { "epoch": 0.3411717505112541, "grad_norm": 0.3729575574398041, "learning_rate": 0.0001317927189071862, "loss": 1.3288, "step": 26255 }, { "epoch": 0.34118474505516994, "grad_norm": 0.4388963282108307, "learning_rate": 0.00013179011944527483, "loss": 1.3155, "step": 26256 }, { "epoch": 0.34119773959908584, "grad_norm": 0.40844663977622986, "learning_rate": 0.00013178751998336346, "loss": 1.3226, "step": 26257 }, { "epoch": 0.3412107341430017, "grad_norm": 0.4488868713378906, "learning_rate": 0.00013178492052145206, "loss": 1.3919, "step": 26258 }, { "epoch": 0.3412237286869176, "grad_norm": 0.4920104742050171, "learning_rate": 0.00013178232105954068, "loss": 1.5657, "step": 26259 }, { "epoch": 0.3412367232308334, "grad_norm": 0.4138815999031067, "learning_rate": 0.0001317797215976293, "loss": 1.2676, "step": 26260 }, { "epoch": 0.34124971777474933, "grad_norm": 0.37703418731689453, "learning_rate": 0.0001317771221357179, "loss": 1.362, "step": 26261 }, { "epoch": 0.3412627123186652, "grad_norm": 0.4716152250766754, "learning_rate": 0.00013177452267380653, "loss": 1.561, "step": 26262 }, { "epoch": 0.3412757068625811, "grad_norm": 0.35938727855682373, "learning_rate": 0.00013177192321189515, "loss": 1.3984, "step": 26263 }, { "epoch": 0.341288701406497, "grad_norm": 0.39183974266052246, "learning_rate": 0.00013176932374998378, "loss": 1.4437, "step": 26264 }, { "epoch": 0.3413016959504128, "grad_norm": 0.48845890164375305, "learning_rate": 0.00013176672428807237, "loss": 1.5872, "step": 26265 }, { "epoch": 0.3413146904943287, "grad_norm": 0.3081307113170624, "learning_rate": 0.00013176412482616097, "loss": 1.473, "step": 26266 }, { "epoch": 0.34132768503824457, "grad_norm": 0.45425915718078613, "learning_rate": 0.00013176152536424962, "loss": 1.5067, "step": 26267 }, { "epoch": 0.34134067958216047, "grad_norm": 0.26841649413108826, "learning_rate": 0.00013175892590233822, "loss": 1.3336, "step": 26268 }, { "epoch": 0.3413536741260763, "grad_norm": 0.43170174956321716, "learning_rate": 0.00013175632644042684, "loss": 1.5657, "step": 26269 }, { "epoch": 0.3413666686699922, "grad_norm": 0.40054938197135925, "learning_rate": 0.00013175372697851544, "loss": 1.3851, "step": 26270 }, { "epoch": 0.34137966321390806, "grad_norm": 0.42931756377220154, "learning_rate": 0.00013175112751660407, "loss": 1.3425, "step": 26271 }, { "epoch": 0.34139265775782396, "grad_norm": 0.3564052879810333, "learning_rate": 0.0001317485280546927, "loss": 1.4348, "step": 26272 }, { "epoch": 0.3414056523017398, "grad_norm": 0.3920668065547943, "learning_rate": 0.0001317459285927813, "loss": 1.5408, "step": 26273 }, { "epoch": 0.3414186468456557, "grad_norm": 0.3827129900455475, "learning_rate": 0.0001317433291308699, "loss": 1.5328, "step": 26274 }, { "epoch": 0.34143164138957155, "grad_norm": 0.34597861766815186, "learning_rate": 0.00013174072966895854, "loss": 1.5048, "step": 26275 }, { "epoch": 0.34144463593348745, "grad_norm": 0.4332326054573059, "learning_rate": 0.00013173813020704716, "loss": 1.4551, "step": 26276 }, { "epoch": 0.3414576304774033, "grad_norm": 0.40205588936805725, "learning_rate": 0.00013173553074513576, "loss": 1.4662, "step": 26277 }, { "epoch": 0.3414706250213192, "grad_norm": 0.38862383365631104, "learning_rate": 0.00013173293128322436, "loss": 1.2794, "step": 26278 }, { "epoch": 0.34148361956523504, "grad_norm": 0.3297480046749115, "learning_rate": 0.000131730331821313, "loss": 1.4107, "step": 26279 }, { "epoch": 0.34149661410915094, "grad_norm": 0.3797451853752136, "learning_rate": 0.0001317277323594016, "loss": 1.3059, "step": 26280 }, { "epoch": 0.3415096086530668, "grad_norm": 0.354782372713089, "learning_rate": 0.00013172513289749023, "loss": 1.286, "step": 26281 }, { "epoch": 0.3415226031969827, "grad_norm": 0.47224223613739014, "learning_rate": 0.00013172253343557883, "loss": 1.4397, "step": 26282 }, { "epoch": 0.34153559774089853, "grad_norm": 0.43819892406463623, "learning_rate": 0.00013171993397366745, "loss": 1.4415, "step": 26283 }, { "epoch": 0.34154859228481443, "grad_norm": 0.43576580286026, "learning_rate": 0.00013171733451175608, "loss": 1.4579, "step": 26284 }, { "epoch": 0.3415615868287303, "grad_norm": 0.5775370001792908, "learning_rate": 0.00013171473504984467, "loss": 1.4571, "step": 26285 }, { "epoch": 0.3415745813726462, "grad_norm": 0.3763059377670288, "learning_rate": 0.0001317121355879333, "loss": 1.4663, "step": 26286 }, { "epoch": 0.341587575916562, "grad_norm": 0.4656793475151062, "learning_rate": 0.00013170953612602192, "loss": 1.4845, "step": 26287 }, { "epoch": 0.3416005704604779, "grad_norm": 0.5006188750267029, "learning_rate": 0.00013170693666411055, "loss": 1.4441, "step": 26288 }, { "epoch": 0.34161356500439377, "grad_norm": 0.3583909571170807, "learning_rate": 0.00013170433720219914, "loss": 1.5034, "step": 26289 }, { "epoch": 0.34162655954830967, "grad_norm": 0.4445924460887909, "learning_rate": 0.00013170173774028777, "loss": 1.4116, "step": 26290 }, { "epoch": 0.3416395540922255, "grad_norm": 0.47544562816619873, "learning_rate": 0.0001316991382783764, "loss": 1.3125, "step": 26291 }, { "epoch": 0.3416525486361414, "grad_norm": 0.3971347510814667, "learning_rate": 0.000131696538816465, "loss": 1.3046, "step": 26292 }, { "epoch": 0.34166554318005726, "grad_norm": 0.3860016465187073, "learning_rate": 0.00013169393935455362, "loss": 1.2685, "step": 26293 }, { "epoch": 0.34167853772397316, "grad_norm": 0.4071313738822937, "learning_rate": 0.00013169133989264224, "loss": 1.4901, "step": 26294 }, { "epoch": 0.341691532267889, "grad_norm": 0.3571883738040924, "learning_rate": 0.00013168874043073084, "loss": 1.4147, "step": 26295 }, { "epoch": 0.3417045268118049, "grad_norm": 0.45985203981399536, "learning_rate": 0.00013168614096881946, "loss": 1.512, "step": 26296 }, { "epoch": 0.34171752135572075, "grad_norm": 0.33661729097366333, "learning_rate": 0.00013168354150690806, "loss": 1.3382, "step": 26297 }, { "epoch": 0.34173051589963666, "grad_norm": 0.3625209331512451, "learning_rate": 0.0001316809420449967, "loss": 1.3341, "step": 26298 }, { "epoch": 0.3417435104435525, "grad_norm": 0.44608303904533386, "learning_rate": 0.0001316783425830853, "loss": 1.3986, "step": 26299 }, { "epoch": 0.3417565049874684, "grad_norm": 0.41623011231422424, "learning_rate": 0.00013167574312117393, "loss": 1.4593, "step": 26300 }, { "epoch": 0.34176949953138425, "grad_norm": 0.42953404784202576, "learning_rate": 0.00013167314365926253, "loss": 1.6288, "step": 26301 }, { "epoch": 0.34178249407530015, "grad_norm": 0.45586636662483215, "learning_rate": 0.00013167054419735115, "loss": 1.3778, "step": 26302 }, { "epoch": 0.341795488619216, "grad_norm": 0.47565069794654846, "learning_rate": 0.00013166794473543978, "loss": 1.4072, "step": 26303 }, { "epoch": 0.3418084831631319, "grad_norm": 0.3788107633590698, "learning_rate": 0.00013166534527352838, "loss": 1.2263, "step": 26304 }, { "epoch": 0.34182147770704774, "grad_norm": 0.5201157927513123, "learning_rate": 0.000131662745811617, "loss": 1.6562, "step": 26305 }, { "epoch": 0.34183447225096364, "grad_norm": 0.5794119238853455, "learning_rate": 0.00013166014634970563, "loss": 1.4535, "step": 26306 }, { "epoch": 0.3418474667948795, "grad_norm": 0.3820491135120392, "learning_rate": 0.00013165754688779425, "loss": 1.5125, "step": 26307 }, { "epoch": 0.3418604613387954, "grad_norm": 0.498670369386673, "learning_rate": 0.00013165494742588285, "loss": 1.4182, "step": 26308 }, { "epoch": 0.34187345588271123, "grad_norm": 0.3038325607776642, "learning_rate": 0.00013165234796397144, "loss": 1.3584, "step": 26309 }, { "epoch": 0.34188645042662713, "grad_norm": 0.41189032793045044, "learning_rate": 0.0001316497485020601, "loss": 1.3637, "step": 26310 }, { "epoch": 0.341899444970543, "grad_norm": 0.39935967326164246, "learning_rate": 0.0001316471490401487, "loss": 1.5526, "step": 26311 }, { "epoch": 0.3419124395144589, "grad_norm": 0.4516189396381378, "learning_rate": 0.00013164454957823732, "loss": 1.2475, "step": 26312 }, { "epoch": 0.3419254340583747, "grad_norm": 0.35357481241226196, "learning_rate": 0.00013164195011632592, "loss": 1.3867, "step": 26313 }, { "epoch": 0.3419384286022906, "grad_norm": 0.3095816969871521, "learning_rate": 0.00013163935065441454, "loss": 1.2348, "step": 26314 }, { "epoch": 0.34195142314620647, "grad_norm": 0.32389524579048157, "learning_rate": 0.00013163675119250316, "loss": 1.4023, "step": 26315 }, { "epoch": 0.34196441769012237, "grad_norm": 0.35221102833747864, "learning_rate": 0.00013163415173059176, "loss": 1.1919, "step": 26316 }, { "epoch": 0.3419774122340382, "grad_norm": 0.3853890001773834, "learning_rate": 0.00013163155226868039, "loss": 1.3998, "step": 26317 }, { "epoch": 0.3419904067779541, "grad_norm": 0.3337884843349457, "learning_rate": 0.000131628952806769, "loss": 1.3211, "step": 26318 }, { "epoch": 0.34200340132186996, "grad_norm": 0.4588981568813324, "learning_rate": 0.00013162635334485764, "loss": 1.2218, "step": 26319 }, { "epoch": 0.34201639586578586, "grad_norm": 0.752056896686554, "learning_rate": 0.00013162375388294623, "loss": 1.414, "step": 26320 }, { "epoch": 0.3420293904097017, "grad_norm": 0.4785199463367462, "learning_rate": 0.00013162115442103483, "loss": 1.6106, "step": 26321 }, { "epoch": 0.3420423849536176, "grad_norm": 0.35274800658226013, "learning_rate": 0.00013161855495912348, "loss": 1.1984, "step": 26322 }, { "epoch": 0.34205537949753345, "grad_norm": 0.4226943850517273, "learning_rate": 0.00013161595549721208, "loss": 1.3982, "step": 26323 }, { "epoch": 0.34206837404144935, "grad_norm": 0.4360480010509491, "learning_rate": 0.0001316133560353007, "loss": 1.3852, "step": 26324 }, { "epoch": 0.3420813685853652, "grad_norm": 0.3864213526248932, "learning_rate": 0.0001316107565733893, "loss": 1.3523, "step": 26325 }, { "epoch": 0.3420943631292811, "grad_norm": 0.42303675413131714, "learning_rate": 0.00013160815711147793, "loss": 1.4889, "step": 26326 }, { "epoch": 0.34210735767319694, "grad_norm": 0.3875943422317505, "learning_rate": 0.00013160555764956655, "loss": 1.4875, "step": 26327 }, { "epoch": 0.34212035221711284, "grad_norm": 0.4419989287853241, "learning_rate": 0.00013160295818765515, "loss": 1.3568, "step": 26328 }, { "epoch": 0.3421333467610287, "grad_norm": 0.3648955821990967, "learning_rate": 0.00013160035872574377, "loss": 1.3217, "step": 26329 }, { "epoch": 0.3421463413049446, "grad_norm": 0.4119963049888611, "learning_rate": 0.0001315977592638324, "loss": 1.4249, "step": 26330 }, { "epoch": 0.34215933584886044, "grad_norm": 0.3409191370010376, "learning_rate": 0.00013159515980192102, "loss": 1.298, "step": 26331 }, { "epoch": 0.34217233039277634, "grad_norm": 0.40300676226615906, "learning_rate": 0.00013159256034000962, "loss": 1.4704, "step": 26332 }, { "epoch": 0.3421853249366922, "grad_norm": 0.4108428359031677, "learning_rate": 0.00013158996087809824, "loss": 1.4081, "step": 26333 }, { "epoch": 0.3421983194806081, "grad_norm": 0.47484689950942993, "learning_rate": 0.00013158736141618687, "loss": 1.4511, "step": 26334 }, { "epoch": 0.3422113140245239, "grad_norm": 0.3842059373855591, "learning_rate": 0.00013158476195427546, "loss": 1.5228, "step": 26335 }, { "epoch": 0.34222430856843983, "grad_norm": 0.42994076013565063, "learning_rate": 0.0001315821624923641, "loss": 1.3756, "step": 26336 }, { "epoch": 0.3422373031123557, "grad_norm": 0.511379599571228, "learning_rate": 0.0001315795630304527, "loss": 1.4124, "step": 26337 }, { "epoch": 0.3422502976562716, "grad_norm": 0.50417160987854, "learning_rate": 0.0001315769635685413, "loss": 1.4752, "step": 26338 }, { "epoch": 0.3422632922001874, "grad_norm": 0.39912721514701843, "learning_rate": 0.00013157436410662994, "loss": 1.5553, "step": 26339 }, { "epoch": 0.3422762867441033, "grad_norm": 0.3984702229499817, "learning_rate": 0.00013157176464471853, "loss": 1.3134, "step": 26340 }, { "epoch": 0.3422892812880192, "grad_norm": 0.4423327147960663, "learning_rate": 0.00013156916518280718, "loss": 1.3988, "step": 26341 }, { "epoch": 0.34230227583193507, "grad_norm": 0.36527183651924133, "learning_rate": 0.00013156656572089578, "loss": 1.4895, "step": 26342 }, { "epoch": 0.34231527037585097, "grad_norm": 0.37877795100212097, "learning_rate": 0.0001315639662589844, "loss": 1.3614, "step": 26343 }, { "epoch": 0.3423282649197668, "grad_norm": 0.37745794653892517, "learning_rate": 0.000131561366797073, "loss": 1.4064, "step": 26344 }, { "epoch": 0.3423412594636827, "grad_norm": 0.49252164363861084, "learning_rate": 0.00013155876733516163, "loss": 1.4495, "step": 26345 }, { "epoch": 0.34235425400759856, "grad_norm": 0.411770224571228, "learning_rate": 0.00013155616787325025, "loss": 1.3277, "step": 26346 }, { "epoch": 0.34236724855151446, "grad_norm": 0.4192935526371002, "learning_rate": 0.00013155356841133885, "loss": 1.5386, "step": 26347 }, { "epoch": 0.3423802430954303, "grad_norm": 0.32775774598121643, "learning_rate": 0.00013155096894942747, "loss": 1.3214, "step": 26348 }, { "epoch": 0.3423932376393462, "grad_norm": 0.4045034646987915, "learning_rate": 0.0001315483694875161, "loss": 1.3583, "step": 26349 }, { "epoch": 0.34240623218326205, "grad_norm": 0.39173516631126404, "learning_rate": 0.0001315457700256047, "loss": 1.5276, "step": 26350 }, { "epoch": 0.34241922672717795, "grad_norm": 0.34003376960754395, "learning_rate": 0.00013154317056369332, "loss": 1.4005, "step": 26351 }, { "epoch": 0.3424322212710938, "grad_norm": 0.37397250533103943, "learning_rate": 0.00013154057110178192, "loss": 1.1944, "step": 26352 }, { "epoch": 0.3424452158150097, "grad_norm": 0.42243069410324097, "learning_rate": 0.00013153797163987057, "loss": 1.4837, "step": 26353 }, { "epoch": 0.34245821035892554, "grad_norm": 0.35920339822769165, "learning_rate": 0.00013153537217795917, "loss": 1.1708, "step": 26354 }, { "epoch": 0.34247120490284144, "grad_norm": 0.4497944116592407, "learning_rate": 0.0001315327727160478, "loss": 1.3322, "step": 26355 }, { "epoch": 0.3424841994467573, "grad_norm": 0.41501423716545105, "learning_rate": 0.0001315301732541364, "loss": 1.4838, "step": 26356 }, { "epoch": 0.3424971939906732, "grad_norm": 0.4426480531692505, "learning_rate": 0.000131527573792225, "loss": 1.4626, "step": 26357 }, { "epoch": 0.34251018853458903, "grad_norm": 0.5210657715797424, "learning_rate": 0.00013152497433031364, "loss": 1.4778, "step": 26358 }, { "epoch": 0.34252318307850493, "grad_norm": 0.35102003812789917, "learning_rate": 0.00013152237486840224, "loss": 1.3906, "step": 26359 }, { "epoch": 0.3425361776224208, "grad_norm": 0.35952049493789673, "learning_rate": 0.00013151977540649086, "loss": 1.3698, "step": 26360 }, { "epoch": 0.3425491721663367, "grad_norm": 0.3831641972064972, "learning_rate": 0.00013151717594457948, "loss": 1.4593, "step": 26361 }, { "epoch": 0.3425621667102525, "grad_norm": 0.36221179366111755, "learning_rate": 0.00013151457648266808, "loss": 1.4441, "step": 26362 }, { "epoch": 0.3425751612541684, "grad_norm": 0.44126009941101074, "learning_rate": 0.0001315119770207567, "loss": 1.4505, "step": 26363 }, { "epoch": 0.34258815579808427, "grad_norm": 0.34182173013687134, "learning_rate": 0.0001315093775588453, "loss": 1.2765, "step": 26364 }, { "epoch": 0.34260115034200017, "grad_norm": 0.3579317331314087, "learning_rate": 0.00013150677809693395, "loss": 1.2909, "step": 26365 }, { "epoch": 0.342614144885916, "grad_norm": 0.46075743436813354, "learning_rate": 0.00013150417863502255, "loss": 1.4719, "step": 26366 }, { "epoch": 0.3426271394298319, "grad_norm": 0.4803047776222229, "learning_rate": 0.00013150157917311118, "loss": 1.3745, "step": 26367 }, { "epoch": 0.34264013397374776, "grad_norm": 0.4405366778373718, "learning_rate": 0.0001314989797111998, "loss": 1.4704, "step": 26368 }, { "epoch": 0.34265312851766366, "grad_norm": 0.40220585465431213, "learning_rate": 0.0001314963802492884, "loss": 1.1502, "step": 26369 }, { "epoch": 0.3426661230615795, "grad_norm": 0.42921125888824463, "learning_rate": 0.00013149378078737702, "loss": 1.3817, "step": 26370 }, { "epoch": 0.3426791176054954, "grad_norm": 0.4432045519351959, "learning_rate": 0.00013149118132546562, "loss": 1.4615, "step": 26371 }, { "epoch": 0.34269211214941125, "grad_norm": 0.3832964599132538, "learning_rate": 0.00013148858186355427, "loss": 1.2793, "step": 26372 }, { "epoch": 0.34270510669332716, "grad_norm": 0.4057629406452179, "learning_rate": 0.00013148598240164287, "loss": 1.3216, "step": 26373 }, { "epoch": 0.342718101237243, "grad_norm": 0.4439396262168884, "learning_rate": 0.0001314833829397315, "loss": 1.4825, "step": 26374 }, { "epoch": 0.3427310957811589, "grad_norm": 0.45235151052474976, "learning_rate": 0.0001314807834778201, "loss": 1.4786, "step": 26375 }, { "epoch": 0.34274409032507475, "grad_norm": 0.5342997312545776, "learning_rate": 0.00013147818401590872, "loss": 1.4621, "step": 26376 }, { "epoch": 0.34275708486899065, "grad_norm": 0.3664233386516571, "learning_rate": 0.00013147558455399734, "loss": 1.4069, "step": 26377 }, { "epoch": 0.3427700794129065, "grad_norm": 0.5223405361175537, "learning_rate": 0.00013147298509208594, "loss": 1.4488, "step": 26378 }, { "epoch": 0.3427830739568224, "grad_norm": 0.46037352085113525, "learning_rate": 0.00013147038563017456, "loss": 1.5085, "step": 26379 }, { "epoch": 0.34279606850073824, "grad_norm": 0.36788487434387207, "learning_rate": 0.0001314677861682632, "loss": 1.2701, "step": 26380 }, { "epoch": 0.34280906304465414, "grad_norm": 0.5282098054885864, "learning_rate": 0.00013146518670635178, "loss": 1.4128, "step": 26381 }, { "epoch": 0.34282205758857, "grad_norm": 0.47046396136283875, "learning_rate": 0.0001314625872444404, "loss": 1.4163, "step": 26382 }, { "epoch": 0.3428350521324859, "grad_norm": 0.40179768204689026, "learning_rate": 0.000131459987782529, "loss": 1.3198, "step": 26383 }, { "epoch": 0.34284804667640173, "grad_norm": 0.4061926305294037, "learning_rate": 0.00013145738832061766, "loss": 1.3725, "step": 26384 }, { "epoch": 0.34286104122031763, "grad_norm": 0.27053365111351013, "learning_rate": 0.00013145478885870625, "loss": 1.1807, "step": 26385 }, { "epoch": 0.3428740357642335, "grad_norm": 0.4072888493537903, "learning_rate": 0.00013145218939679488, "loss": 1.3772, "step": 26386 }, { "epoch": 0.3428870303081494, "grad_norm": 0.4764579236507416, "learning_rate": 0.00013144958993488348, "loss": 1.4268, "step": 26387 }, { "epoch": 0.3429000248520652, "grad_norm": 0.38692694902420044, "learning_rate": 0.0001314469904729721, "loss": 1.2772, "step": 26388 }, { "epoch": 0.3429130193959811, "grad_norm": 0.49210596084594727, "learning_rate": 0.00013144439101106073, "loss": 1.3939, "step": 26389 }, { "epoch": 0.34292601393989697, "grad_norm": 0.4415774345397949, "learning_rate": 0.00013144179154914932, "loss": 1.4295, "step": 26390 }, { "epoch": 0.34293900848381287, "grad_norm": 0.3748113214969635, "learning_rate": 0.00013143919208723795, "loss": 1.4526, "step": 26391 }, { "epoch": 0.3429520030277287, "grad_norm": 0.36553555727005005, "learning_rate": 0.00013143659262532657, "loss": 1.4133, "step": 26392 }, { "epoch": 0.3429649975716446, "grad_norm": 0.35615047812461853, "learning_rate": 0.00013143399316341517, "loss": 1.4006, "step": 26393 }, { "epoch": 0.34297799211556046, "grad_norm": 0.32209745049476624, "learning_rate": 0.0001314313937015038, "loss": 1.4184, "step": 26394 }, { "epoch": 0.34299098665947636, "grad_norm": 0.35506588220596313, "learning_rate": 0.0001314287942395924, "loss": 1.4175, "step": 26395 }, { "epoch": 0.3430039812033922, "grad_norm": 0.3618049621582031, "learning_rate": 0.00013142619477768104, "loss": 1.342, "step": 26396 }, { "epoch": 0.3430169757473081, "grad_norm": 0.46010950207710266, "learning_rate": 0.00013142359531576964, "loss": 1.3659, "step": 26397 }, { "epoch": 0.34302997029122395, "grad_norm": 0.33406463265419006, "learning_rate": 0.00013142099585385826, "loss": 1.3513, "step": 26398 }, { "epoch": 0.34304296483513985, "grad_norm": 0.43464308977127075, "learning_rate": 0.00013141839639194686, "loss": 1.4307, "step": 26399 }, { "epoch": 0.3430559593790557, "grad_norm": 0.29055631160736084, "learning_rate": 0.0001314157969300355, "loss": 1.4803, "step": 26400 }, { "epoch": 0.3430689539229716, "grad_norm": 0.4285714030265808, "learning_rate": 0.0001314131974681241, "loss": 1.3632, "step": 26401 }, { "epoch": 0.34308194846688744, "grad_norm": 0.4142897129058838, "learning_rate": 0.0001314105980062127, "loss": 1.3268, "step": 26402 }, { "epoch": 0.34309494301080334, "grad_norm": 0.4252566695213318, "learning_rate": 0.00013140799854430133, "loss": 1.4349, "step": 26403 }, { "epoch": 0.3431079375547192, "grad_norm": 0.3657315671443939, "learning_rate": 0.00013140539908238996, "loss": 1.3544, "step": 26404 }, { "epoch": 0.3431209320986351, "grad_norm": 0.4156716465950012, "learning_rate": 0.00013140279962047855, "loss": 1.3758, "step": 26405 }, { "epoch": 0.34313392664255093, "grad_norm": 0.49092602729797363, "learning_rate": 0.00013140020015856718, "loss": 1.4939, "step": 26406 }, { "epoch": 0.34314692118646684, "grad_norm": 0.3937337100505829, "learning_rate": 0.0001313976006966558, "loss": 1.4222, "step": 26407 }, { "epoch": 0.3431599157303827, "grad_norm": 0.39711228013038635, "learning_rate": 0.00013139500123474443, "loss": 1.4624, "step": 26408 }, { "epoch": 0.3431729102742986, "grad_norm": 0.464324027299881, "learning_rate": 0.00013139240177283303, "loss": 1.5072, "step": 26409 }, { "epoch": 0.3431859048182144, "grad_norm": 0.43275925517082214, "learning_rate": 0.00013138980231092165, "loss": 1.2666, "step": 26410 }, { "epoch": 0.3431988993621303, "grad_norm": 0.43466436862945557, "learning_rate": 0.00013138720284901027, "loss": 1.4789, "step": 26411 }, { "epoch": 0.3432118939060462, "grad_norm": 0.3580063283443451, "learning_rate": 0.00013138460338709887, "loss": 1.6326, "step": 26412 }, { "epoch": 0.3432248884499621, "grad_norm": 0.2598384916782379, "learning_rate": 0.0001313820039251875, "loss": 1.1258, "step": 26413 }, { "epoch": 0.3432378829938779, "grad_norm": 0.38475027680397034, "learning_rate": 0.0001313794044632761, "loss": 1.4263, "step": 26414 }, { "epoch": 0.3432508775377938, "grad_norm": 0.32091692090034485, "learning_rate": 0.00013137680500136475, "loss": 1.2125, "step": 26415 }, { "epoch": 0.34326387208170966, "grad_norm": 0.3703199028968811, "learning_rate": 0.00013137420553945334, "loss": 1.2698, "step": 26416 }, { "epoch": 0.34327686662562557, "grad_norm": 0.27267199754714966, "learning_rate": 0.00013137160607754194, "loss": 1.2436, "step": 26417 }, { "epoch": 0.34328986116954147, "grad_norm": 0.5563708543777466, "learning_rate": 0.00013136900661563056, "loss": 1.6145, "step": 26418 }, { "epoch": 0.3433028557134573, "grad_norm": 0.3313523530960083, "learning_rate": 0.0001313664071537192, "loss": 1.6008, "step": 26419 }, { "epoch": 0.3433158502573732, "grad_norm": 0.44421207904815674, "learning_rate": 0.00013136380769180781, "loss": 1.303, "step": 26420 }, { "epoch": 0.34332884480128906, "grad_norm": 0.33951789140701294, "learning_rate": 0.0001313612082298964, "loss": 1.3742, "step": 26421 }, { "epoch": 0.34334183934520496, "grad_norm": 0.3753160238265991, "learning_rate": 0.00013135860876798504, "loss": 1.2552, "step": 26422 }, { "epoch": 0.3433548338891208, "grad_norm": 0.40816816687583923, "learning_rate": 0.00013135600930607366, "loss": 1.5655, "step": 26423 }, { "epoch": 0.3433678284330367, "grad_norm": 0.3919028043746948, "learning_rate": 0.00013135340984416226, "loss": 1.4693, "step": 26424 }, { "epoch": 0.34338082297695255, "grad_norm": 0.42948704957962036, "learning_rate": 0.00013135081038225088, "loss": 1.4758, "step": 26425 }, { "epoch": 0.34339381752086845, "grad_norm": 0.40445271134376526, "learning_rate": 0.00013134821092033948, "loss": 1.4842, "step": 26426 }, { "epoch": 0.3434068120647843, "grad_norm": 0.3974856734275818, "learning_rate": 0.00013134561145842813, "loss": 1.4983, "step": 26427 }, { "epoch": 0.3434198066087002, "grad_norm": 0.3403288424015045, "learning_rate": 0.00013134301199651673, "loss": 1.6849, "step": 26428 }, { "epoch": 0.34343280115261604, "grad_norm": 0.26605531573295593, "learning_rate": 0.00013134041253460535, "loss": 1.3151, "step": 26429 }, { "epoch": 0.34344579569653194, "grad_norm": 0.5248103737831116, "learning_rate": 0.00013133781307269395, "loss": 1.3908, "step": 26430 }, { "epoch": 0.3434587902404478, "grad_norm": 0.3893047571182251, "learning_rate": 0.00013133521361078257, "loss": 1.2712, "step": 26431 }, { "epoch": 0.3434717847843637, "grad_norm": 0.43012064695358276, "learning_rate": 0.0001313326141488712, "loss": 1.2313, "step": 26432 }, { "epoch": 0.34348477932827953, "grad_norm": 0.4553283154964447, "learning_rate": 0.0001313300146869598, "loss": 1.465, "step": 26433 }, { "epoch": 0.34349777387219543, "grad_norm": 0.5589954257011414, "learning_rate": 0.00013132741522504842, "loss": 1.4338, "step": 26434 }, { "epoch": 0.3435107684161113, "grad_norm": 0.31945690512657166, "learning_rate": 0.00013132481576313705, "loss": 1.2953, "step": 26435 }, { "epoch": 0.3435237629600272, "grad_norm": 0.4094160497188568, "learning_rate": 0.00013132221630122564, "loss": 1.4035, "step": 26436 }, { "epoch": 0.343536757503943, "grad_norm": 0.4629078209400177, "learning_rate": 0.00013131961683931427, "loss": 1.6031, "step": 26437 }, { "epoch": 0.3435497520478589, "grad_norm": 0.5530632138252258, "learning_rate": 0.00013131701737740286, "loss": 1.5559, "step": 26438 }, { "epoch": 0.34356274659177477, "grad_norm": 0.46392112970352173, "learning_rate": 0.00013131441791549152, "loss": 1.4361, "step": 26439 }, { "epoch": 0.34357574113569067, "grad_norm": 0.4042486548423767, "learning_rate": 0.00013131181845358011, "loss": 1.4919, "step": 26440 }, { "epoch": 0.3435887356796065, "grad_norm": 0.3291502892971039, "learning_rate": 0.00013130921899166874, "loss": 1.2254, "step": 26441 }, { "epoch": 0.3436017302235224, "grad_norm": 0.39213046431541443, "learning_rate": 0.00013130661952975736, "loss": 1.4108, "step": 26442 }, { "epoch": 0.34361472476743826, "grad_norm": 0.41376858949661255, "learning_rate": 0.00013130402006784596, "loss": 1.4525, "step": 26443 }, { "epoch": 0.34362771931135416, "grad_norm": 0.5738300681114197, "learning_rate": 0.00013130142060593458, "loss": 1.5174, "step": 26444 }, { "epoch": 0.34364071385527, "grad_norm": 0.3326481580734253, "learning_rate": 0.00013129882114402318, "loss": 1.6406, "step": 26445 }, { "epoch": 0.3436537083991859, "grad_norm": 0.49983862042427063, "learning_rate": 0.0001312962216821118, "loss": 1.6276, "step": 26446 }, { "epoch": 0.34366670294310175, "grad_norm": 0.4768376648426056, "learning_rate": 0.00013129362222020043, "loss": 1.5746, "step": 26447 }, { "epoch": 0.34367969748701765, "grad_norm": 0.5384788513183594, "learning_rate": 0.00013129102275828903, "loss": 1.4349, "step": 26448 }, { "epoch": 0.3436926920309335, "grad_norm": 0.42418918013572693, "learning_rate": 0.00013128842329637765, "loss": 1.4907, "step": 26449 }, { "epoch": 0.3437056865748494, "grad_norm": 0.3909682631492615, "learning_rate": 0.00013128582383446628, "loss": 1.4411, "step": 26450 }, { "epoch": 0.34371868111876525, "grad_norm": 0.3991309106349945, "learning_rate": 0.0001312832243725549, "loss": 1.3129, "step": 26451 }, { "epoch": 0.34373167566268115, "grad_norm": 0.38891950249671936, "learning_rate": 0.0001312806249106435, "loss": 1.2726, "step": 26452 }, { "epoch": 0.343744670206597, "grad_norm": 0.3750346601009369, "learning_rate": 0.00013127802544873212, "loss": 1.3529, "step": 26453 }, { "epoch": 0.3437576647505129, "grad_norm": 0.3789776861667633, "learning_rate": 0.00013127542598682075, "loss": 1.3423, "step": 26454 }, { "epoch": 0.34377065929442874, "grad_norm": 0.35564276576042175, "learning_rate": 0.00013127282652490935, "loss": 1.2671, "step": 26455 }, { "epoch": 0.34378365383834464, "grad_norm": 0.5056749582290649, "learning_rate": 0.00013127022706299797, "loss": 1.5405, "step": 26456 }, { "epoch": 0.3437966483822605, "grad_norm": 0.33762025833129883, "learning_rate": 0.00013126762760108657, "loss": 1.4346, "step": 26457 }, { "epoch": 0.3438096429261764, "grad_norm": 0.336229532957077, "learning_rate": 0.00013126502813917522, "loss": 1.2583, "step": 26458 }, { "epoch": 0.34382263747009223, "grad_norm": 0.40589988231658936, "learning_rate": 0.00013126242867726382, "loss": 1.309, "step": 26459 }, { "epoch": 0.34383563201400813, "grad_norm": 0.4543602466583252, "learning_rate": 0.00013125982921535241, "loss": 1.3681, "step": 26460 }, { "epoch": 0.343848626557924, "grad_norm": 0.3799445629119873, "learning_rate": 0.00013125722975344104, "loss": 1.1839, "step": 26461 }, { "epoch": 0.3438616211018399, "grad_norm": 0.37944915890693665, "learning_rate": 0.00013125463029152966, "loss": 1.3555, "step": 26462 }, { "epoch": 0.3438746156457557, "grad_norm": 0.436146080493927, "learning_rate": 0.0001312520308296183, "loss": 1.4196, "step": 26463 }, { "epoch": 0.3438876101896716, "grad_norm": 0.4754045307636261, "learning_rate": 0.00013124943136770688, "loss": 1.532, "step": 26464 }, { "epoch": 0.34390060473358747, "grad_norm": 0.29849299788475037, "learning_rate": 0.0001312468319057955, "loss": 1.4282, "step": 26465 }, { "epoch": 0.34391359927750337, "grad_norm": 0.37399810552597046, "learning_rate": 0.00013124423244388413, "loss": 1.5605, "step": 26466 }, { "epoch": 0.3439265938214192, "grad_norm": 0.4111667573451996, "learning_rate": 0.00013124163298197273, "loss": 1.2352, "step": 26467 }, { "epoch": 0.3439395883653351, "grad_norm": 0.36064958572387695, "learning_rate": 0.00013123903352006136, "loss": 1.3691, "step": 26468 }, { "epoch": 0.34395258290925096, "grad_norm": 0.3570909798145294, "learning_rate": 0.00013123643405814995, "loss": 1.6023, "step": 26469 }, { "epoch": 0.34396557745316686, "grad_norm": 0.44390809535980225, "learning_rate": 0.0001312338345962386, "loss": 1.5732, "step": 26470 }, { "epoch": 0.3439785719970827, "grad_norm": 0.3735039234161377, "learning_rate": 0.0001312312351343272, "loss": 1.4107, "step": 26471 }, { "epoch": 0.3439915665409986, "grad_norm": 0.44730323553085327, "learning_rate": 0.0001312286356724158, "loss": 1.261, "step": 26472 }, { "epoch": 0.34400456108491445, "grad_norm": 0.3879878520965576, "learning_rate": 0.00013122603621050442, "loss": 1.5137, "step": 26473 }, { "epoch": 0.34401755562883035, "grad_norm": 0.41281330585479736, "learning_rate": 0.00013122343674859305, "loss": 1.3499, "step": 26474 }, { "epoch": 0.3440305501727462, "grad_norm": 0.35124334692955017, "learning_rate": 0.00013122083728668167, "loss": 1.3447, "step": 26475 }, { "epoch": 0.3440435447166621, "grad_norm": 0.39903172850608826, "learning_rate": 0.00013121823782477027, "loss": 1.5345, "step": 26476 }, { "epoch": 0.34405653926057794, "grad_norm": 0.43390360474586487, "learning_rate": 0.0001312156383628589, "loss": 1.5127, "step": 26477 }, { "epoch": 0.34406953380449384, "grad_norm": 0.39824920892715454, "learning_rate": 0.00013121303890094752, "loss": 1.4264, "step": 26478 }, { "epoch": 0.3440825283484097, "grad_norm": 0.41766518354415894, "learning_rate": 0.00013121043943903612, "loss": 1.4143, "step": 26479 }, { "epoch": 0.3440955228923256, "grad_norm": 0.3217308819293976, "learning_rate": 0.00013120783997712474, "loss": 1.3848, "step": 26480 }, { "epoch": 0.34410851743624143, "grad_norm": 0.3809047043323517, "learning_rate": 0.00013120524051521337, "loss": 1.3955, "step": 26481 }, { "epoch": 0.34412151198015734, "grad_norm": 0.48214903473854065, "learning_rate": 0.000131202641053302, "loss": 1.3745, "step": 26482 }, { "epoch": 0.3441345065240732, "grad_norm": 0.4011686146259308, "learning_rate": 0.0001312000415913906, "loss": 1.5607, "step": 26483 }, { "epoch": 0.3441475010679891, "grad_norm": 0.3684973120689392, "learning_rate": 0.00013119744212947918, "loss": 1.248, "step": 26484 }, { "epoch": 0.3441604956119049, "grad_norm": 0.46288734674453735, "learning_rate": 0.00013119484266756784, "loss": 1.4456, "step": 26485 }, { "epoch": 0.3441734901558208, "grad_norm": 0.38262632489204407, "learning_rate": 0.00013119224320565643, "loss": 1.5189, "step": 26486 }, { "epoch": 0.3441864846997367, "grad_norm": 0.48087427020072937, "learning_rate": 0.00013118964374374506, "loss": 1.7132, "step": 26487 }, { "epoch": 0.3441994792436526, "grad_norm": 0.3711980879306793, "learning_rate": 0.00013118704428183366, "loss": 1.401, "step": 26488 }, { "epoch": 0.3442124737875684, "grad_norm": 0.40589219331741333, "learning_rate": 0.00013118444481992228, "loss": 1.2023, "step": 26489 }, { "epoch": 0.3442254683314843, "grad_norm": 0.4111505448818207, "learning_rate": 0.0001311818453580109, "loss": 1.3354, "step": 26490 }, { "epoch": 0.34423846287540016, "grad_norm": 0.40154072642326355, "learning_rate": 0.0001311792458960995, "loss": 1.547, "step": 26491 }, { "epoch": 0.34425145741931606, "grad_norm": 0.3786192536354065, "learning_rate": 0.00013117664643418813, "loss": 1.3041, "step": 26492 }, { "epoch": 0.34426445196323197, "grad_norm": 0.33170366287231445, "learning_rate": 0.00013117404697227675, "loss": 1.3759, "step": 26493 }, { "epoch": 0.3442774465071478, "grad_norm": 0.2915847599506378, "learning_rate": 0.00013117144751036538, "loss": 1.2715, "step": 26494 }, { "epoch": 0.3442904410510637, "grad_norm": 0.4430015981197357, "learning_rate": 0.00013116884804845397, "loss": 1.2897, "step": 26495 }, { "epoch": 0.34430343559497956, "grad_norm": 0.480791300535202, "learning_rate": 0.0001311662485865426, "loss": 1.4735, "step": 26496 }, { "epoch": 0.34431643013889546, "grad_norm": 0.4185304045677185, "learning_rate": 0.00013116364912463122, "loss": 1.2849, "step": 26497 }, { "epoch": 0.3443294246828113, "grad_norm": 0.3496531546115875, "learning_rate": 0.00013116104966271982, "loss": 1.3791, "step": 26498 }, { "epoch": 0.3443424192267272, "grad_norm": 0.34698158502578735, "learning_rate": 0.00013115845020080844, "loss": 1.3564, "step": 26499 }, { "epoch": 0.34435541377064305, "grad_norm": 0.5261573195457458, "learning_rate": 0.00013115585073889704, "loss": 1.4895, "step": 26500 }, { "epoch": 0.34436840831455895, "grad_norm": 0.4059036076068878, "learning_rate": 0.00013115325127698567, "loss": 1.2928, "step": 26501 }, { "epoch": 0.3443814028584748, "grad_norm": 0.3983599543571472, "learning_rate": 0.0001311506518150743, "loss": 1.5055, "step": 26502 }, { "epoch": 0.3443943974023907, "grad_norm": 0.3382245600223541, "learning_rate": 0.0001311480523531629, "loss": 1.4103, "step": 26503 }, { "epoch": 0.34440739194630654, "grad_norm": 0.3494957983493805, "learning_rate": 0.0001311454528912515, "loss": 1.4274, "step": 26504 }, { "epoch": 0.34442038649022244, "grad_norm": 0.45912179350852966, "learning_rate": 0.00013114285342934014, "loss": 1.6341, "step": 26505 }, { "epoch": 0.3444333810341383, "grad_norm": 0.38130703568458557, "learning_rate": 0.00013114025396742876, "loss": 1.45, "step": 26506 }, { "epoch": 0.3444463755780542, "grad_norm": 0.37535321712493896, "learning_rate": 0.00013113765450551736, "loss": 1.2743, "step": 26507 }, { "epoch": 0.34445937012197003, "grad_norm": 0.4131513237953186, "learning_rate": 0.00013113505504360598, "loss": 1.3767, "step": 26508 }, { "epoch": 0.34447236466588593, "grad_norm": 0.45072776079177856, "learning_rate": 0.0001311324555816946, "loss": 1.5252, "step": 26509 }, { "epoch": 0.3444853592098018, "grad_norm": 0.36597904562950134, "learning_rate": 0.0001311298561197832, "loss": 1.2754, "step": 26510 }, { "epoch": 0.3444983537537177, "grad_norm": 0.3923594653606415, "learning_rate": 0.00013112725665787183, "loss": 1.3506, "step": 26511 }, { "epoch": 0.3445113482976335, "grad_norm": 0.38187533617019653, "learning_rate": 0.00013112465719596043, "loss": 1.4836, "step": 26512 }, { "epoch": 0.3445243428415494, "grad_norm": 0.3593902587890625, "learning_rate": 0.00013112205773404908, "loss": 1.2763, "step": 26513 }, { "epoch": 0.34453733738546527, "grad_norm": 0.3674440085887909, "learning_rate": 0.00013111945827213767, "loss": 1.3149, "step": 26514 }, { "epoch": 0.34455033192938117, "grad_norm": 0.42395225167274475, "learning_rate": 0.00013111685881022627, "loss": 1.5541, "step": 26515 }, { "epoch": 0.344563326473297, "grad_norm": 0.3809218108654022, "learning_rate": 0.00013111425934831492, "loss": 1.4042, "step": 26516 }, { "epoch": 0.3445763210172129, "grad_norm": 0.44831323623657227, "learning_rate": 0.00013111165988640352, "loss": 1.3498, "step": 26517 }, { "epoch": 0.34458931556112876, "grad_norm": 0.4570062756538391, "learning_rate": 0.00013110906042449215, "loss": 1.5425, "step": 26518 }, { "epoch": 0.34460231010504466, "grad_norm": 0.38537171483039856, "learning_rate": 0.00013110646096258074, "loss": 1.4267, "step": 26519 }, { "epoch": 0.3446153046489605, "grad_norm": 0.32493555545806885, "learning_rate": 0.00013110386150066937, "loss": 1.3463, "step": 26520 }, { "epoch": 0.3446282991928764, "grad_norm": 0.4687088131904602, "learning_rate": 0.000131101262038758, "loss": 1.4415, "step": 26521 }, { "epoch": 0.34464129373679225, "grad_norm": 0.486675500869751, "learning_rate": 0.0001310986625768466, "loss": 1.4634, "step": 26522 }, { "epoch": 0.34465428828070815, "grad_norm": 0.3574809730052948, "learning_rate": 0.00013109606311493521, "loss": 1.4279, "step": 26523 }, { "epoch": 0.344667282824624, "grad_norm": 0.2816152572631836, "learning_rate": 0.00013109346365302384, "loss": 1.383, "step": 26524 }, { "epoch": 0.3446802773685399, "grad_norm": 0.3294565677642822, "learning_rate": 0.00013109086419111246, "loss": 1.5036, "step": 26525 }, { "epoch": 0.34469327191245575, "grad_norm": 0.34105944633483887, "learning_rate": 0.00013108826472920106, "loss": 1.4198, "step": 26526 }, { "epoch": 0.34470626645637165, "grad_norm": 0.37580713629722595, "learning_rate": 0.00013108566526728966, "loss": 1.1172, "step": 26527 }, { "epoch": 0.3447192610002875, "grad_norm": 0.47000688314437866, "learning_rate": 0.0001310830658053783, "loss": 1.4908, "step": 26528 }, { "epoch": 0.3447322555442034, "grad_norm": 0.5564127564430237, "learning_rate": 0.0001310804663434669, "loss": 1.5379, "step": 26529 }, { "epoch": 0.34474525008811924, "grad_norm": 0.5190191268920898, "learning_rate": 0.00013107786688155553, "loss": 1.4677, "step": 26530 }, { "epoch": 0.34475824463203514, "grad_norm": 0.4767417311668396, "learning_rate": 0.00013107526741964413, "loss": 1.4555, "step": 26531 }, { "epoch": 0.344771239175951, "grad_norm": 0.29780545830726624, "learning_rate": 0.00013107266795773275, "loss": 1.5077, "step": 26532 }, { "epoch": 0.3447842337198669, "grad_norm": 0.3425752520561218, "learning_rate": 0.00013107006849582138, "loss": 1.3988, "step": 26533 }, { "epoch": 0.34479722826378273, "grad_norm": 0.5032296776771545, "learning_rate": 0.00013106746903390997, "loss": 1.5185, "step": 26534 }, { "epoch": 0.34481022280769863, "grad_norm": 0.40545040369033813, "learning_rate": 0.0001310648695719986, "loss": 1.3336, "step": 26535 }, { "epoch": 0.3448232173516145, "grad_norm": 0.4259105324745178, "learning_rate": 0.00013106227011008722, "loss": 1.3649, "step": 26536 }, { "epoch": 0.3448362118955304, "grad_norm": 0.3445252478122711, "learning_rate": 0.00013105967064817585, "loss": 1.1858, "step": 26537 }, { "epoch": 0.3448492064394462, "grad_norm": 0.39967963099479675, "learning_rate": 0.00013105707118626445, "loss": 1.2473, "step": 26538 }, { "epoch": 0.3448622009833621, "grad_norm": 0.31189975142478943, "learning_rate": 0.00013105447172435304, "loss": 0.9183, "step": 26539 }, { "epoch": 0.34487519552727797, "grad_norm": 0.4261758327484131, "learning_rate": 0.0001310518722624417, "loss": 1.3791, "step": 26540 }, { "epoch": 0.34488819007119387, "grad_norm": 0.37235456705093384, "learning_rate": 0.0001310492728005303, "loss": 1.3171, "step": 26541 }, { "epoch": 0.3449011846151097, "grad_norm": 0.5004569292068481, "learning_rate": 0.00013104667333861892, "loss": 1.4813, "step": 26542 }, { "epoch": 0.3449141791590256, "grad_norm": 0.7393776178359985, "learning_rate": 0.00013104407387670751, "loss": 1.4146, "step": 26543 }, { "epoch": 0.34492717370294146, "grad_norm": 0.36590343713760376, "learning_rate": 0.00013104147441479614, "loss": 1.3753, "step": 26544 }, { "epoch": 0.34494016824685736, "grad_norm": 0.3392195403575897, "learning_rate": 0.00013103887495288476, "loss": 1.2776, "step": 26545 }, { "epoch": 0.3449531627907732, "grad_norm": 0.44265690445899963, "learning_rate": 0.00013103627549097336, "loss": 1.1844, "step": 26546 }, { "epoch": 0.3449661573346891, "grad_norm": 0.3379330635070801, "learning_rate": 0.00013103367602906198, "loss": 1.3021, "step": 26547 }, { "epoch": 0.34497915187860495, "grad_norm": 0.41267129778862, "learning_rate": 0.0001310310765671506, "loss": 1.5288, "step": 26548 }, { "epoch": 0.34499214642252085, "grad_norm": 0.46174588799476624, "learning_rate": 0.00013102847710523923, "loss": 1.3397, "step": 26549 }, { "epoch": 0.3450051409664367, "grad_norm": 0.5087870955467224, "learning_rate": 0.00013102587764332783, "loss": 1.3289, "step": 26550 }, { "epoch": 0.3450181355103526, "grad_norm": 0.47850853204727173, "learning_rate": 0.00013102327818141646, "loss": 1.5565, "step": 26551 }, { "epoch": 0.34503113005426844, "grad_norm": 0.4383699893951416, "learning_rate": 0.00013102067871950508, "loss": 1.3686, "step": 26552 }, { "epoch": 0.34504412459818434, "grad_norm": 0.3639032542705536, "learning_rate": 0.00013101807925759368, "loss": 1.4646, "step": 26553 }, { "epoch": 0.3450571191421002, "grad_norm": 0.3041555881500244, "learning_rate": 0.0001310154797956823, "loss": 1.3459, "step": 26554 }, { "epoch": 0.3450701136860161, "grad_norm": 0.44253215193748474, "learning_rate": 0.00013101288033377093, "loss": 1.4899, "step": 26555 }, { "epoch": 0.34508310822993193, "grad_norm": 0.36604005098342896, "learning_rate": 0.00013101028087185952, "loss": 1.5035, "step": 26556 }, { "epoch": 0.34509610277384783, "grad_norm": 0.45684728026390076, "learning_rate": 0.00013100768140994815, "loss": 1.637, "step": 26557 }, { "epoch": 0.3451090973177637, "grad_norm": 0.31577402353286743, "learning_rate": 0.00013100508194803675, "loss": 1.3657, "step": 26558 }, { "epoch": 0.3451220918616796, "grad_norm": 0.3353492021560669, "learning_rate": 0.0001310024824861254, "loss": 1.3518, "step": 26559 }, { "epoch": 0.3451350864055954, "grad_norm": 0.43974122405052185, "learning_rate": 0.000130999883024214, "loss": 1.5166, "step": 26560 }, { "epoch": 0.3451480809495113, "grad_norm": 0.36859339475631714, "learning_rate": 0.00013099728356230262, "loss": 1.2987, "step": 26561 }, { "epoch": 0.34516107549342717, "grad_norm": 0.33191508054733276, "learning_rate": 0.00013099468410039122, "loss": 1.3629, "step": 26562 }, { "epoch": 0.3451740700373431, "grad_norm": 0.3837544918060303, "learning_rate": 0.00013099208463847984, "loss": 1.3121, "step": 26563 }, { "epoch": 0.3451870645812589, "grad_norm": 0.4141685366630554, "learning_rate": 0.00013098948517656847, "loss": 1.3303, "step": 26564 }, { "epoch": 0.3452000591251748, "grad_norm": 0.35764312744140625, "learning_rate": 0.00013098688571465706, "loss": 1.2393, "step": 26565 }, { "epoch": 0.34521305366909066, "grad_norm": 0.43173328042030334, "learning_rate": 0.0001309842862527457, "loss": 1.3957, "step": 26566 }, { "epoch": 0.34522604821300656, "grad_norm": 0.403044193983078, "learning_rate": 0.0001309816867908343, "loss": 1.3548, "step": 26567 }, { "epoch": 0.3452390427569224, "grad_norm": 0.35216888785362244, "learning_rate": 0.0001309790873289229, "loss": 1.3546, "step": 26568 }, { "epoch": 0.3452520373008383, "grad_norm": 0.38506150245666504, "learning_rate": 0.00013097648786701153, "loss": 1.4264, "step": 26569 }, { "epoch": 0.3452650318447542, "grad_norm": 0.4423760175704956, "learning_rate": 0.00013097388840510013, "loss": 1.1305, "step": 26570 }, { "epoch": 0.34527802638867006, "grad_norm": 0.3367474377155304, "learning_rate": 0.00013097128894318878, "loss": 1.4004, "step": 26571 }, { "epoch": 0.34529102093258596, "grad_norm": 0.4223332703113556, "learning_rate": 0.00013096868948127738, "loss": 1.3798, "step": 26572 }, { "epoch": 0.3453040154765018, "grad_norm": 0.44328758120536804, "learning_rate": 0.000130966090019366, "loss": 1.3648, "step": 26573 }, { "epoch": 0.3453170100204177, "grad_norm": 0.3861069977283478, "learning_rate": 0.0001309634905574546, "loss": 1.4447, "step": 26574 }, { "epoch": 0.34533000456433355, "grad_norm": 0.36092132329940796, "learning_rate": 0.00013096089109554323, "loss": 1.5799, "step": 26575 }, { "epoch": 0.34534299910824945, "grad_norm": 0.38764095306396484, "learning_rate": 0.00013095829163363185, "loss": 1.2236, "step": 26576 }, { "epoch": 0.3453559936521653, "grad_norm": 0.42050817608833313, "learning_rate": 0.00013095569217172045, "loss": 1.5395, "step": 26577 }, { "epoch": 0.3453689881960812, "grad_norm": 0.40338295698165894, "learning_rate": 0.00013095309270980907, "loss": 1.4459, "step": 26578 }, { "epoch": 0.34538198273999704, "grad_norm": 0.37771138548851013, "learning_rate": 0.0001309504932478977, "loss": 1.4327, "step": 26579 }, { "epoch": 0.34539497728391294, "grad_norm": 0.34933656454086304, "learning_rate": 0.00013094789378598632, "loss": 1.2706, "step": 26580 }, { "epoch": 0.3454079718278288, "grad_norm": 0.4201446771621704, "learning_rate": 0.00013094529432407492, "loss": 1.4194, "step": 26581 }, { "epoch": 0.3454209663717447, "grad_norm": 0.46379080414772034, "learning_rate": 0.00013094269486216352, "loss": 1.4112, "step": 26582 }, { "epoch": 0.34543396091566053, "grad_norm": 0.4353771507740021, "learning_rate": 0.00013094009540025217, "loss": 1.2551, "step": 26583 }, { "epoch": 0.34544695545957643, "grad_norm": 0.3479970395565033, "learning_rate": 0.00013093749593834077, "loss": 1.3093, "step": 26584 }, { "epoch": 0.3454599500034923, "grad_norm": 0.34489932656288147, "learning_rate": 0.0001309348964764294, "loss": 1.4885, "step": 26585 }, { "epoch": 0.3454729445474082, "grad_norm": 0.3877830505371094, "learning_rate": 0.000130932297014518, "loss": 1.3846, "step": 26586 }, { "epoch": 0.345485939091324, "grad_norm": 0.5589036345481873, "learning_rate": 0.0001309296975526066, "loss": 1.5886, "step": 26587 }, { "epoch": 0.3454989336352399, "grad_norm": 0.4398100972175598, "learning_rate": 0.00013092709809069524, "loss": 1.4444, "step": 26588 }, { "epoch": 0.34551192817915577, "grad_norm": 0.44797220826148987, "learning_rate": 0.00013092449862878383, "loss": 1.3693, "step": 26589 }, { "epoch": 0.34552492272307167, "grad_norm": 0.40396520495414734, "learning_rate": 0.00013092189916687246, "loss": 1.1889, "step": 26590 }, { "epoch": 0.3455379172669875, "grad_norm": 0.4713892936706543, "learning_rate": 0.00013091929970496108, "loss": 1.4271, "step": 26591 }, { "epoch": 0.3455509118109034, "grad_norm": 0.4433099925518036, "learning_rate": 0.0001309167002430497, "loss": 1.3485, "step": 26592 }, { "epoch": 0.34556390635481926, "grad_norm": 0.36831218004226685, "learning_rate": 0.0001309141007811383, "loss": 1.2387, "step": 26593 }, { "epoch": 0.34557690089873516, "grad_norm": 0.4953293204307556, "learning_rate": 0.00013091150131922693, "loss": 1.4972, "step": 26594 }, { "epoch": 0.345589895442651, "grad_norm": 0.3527107834815979, "learning_rate": 0.00013090890185731555, "loss": 1.4307, "step": 26595 }, { "epoch": 0.3456028899865669, "grad_norm": 0.35049378871917725, "learning_rate": 0.00013090630239540415, "loss": 1.4342, "step": 26596 }, { "epoch": 0.34561588453048275, "grad_norm": 0.4256710410118103, "learning_rate": 0.00013090370293349278, "loss": 1.3892, "step": 26597 }, { "epoch": 0.34562887907439865, "grad_norm": 0.44184941053390503, "learning_rate": 0.0001309011034715814, "loss": 1.4305, "step": 26598 }, { "epoch": 0.3456418736183145, "grad_norm": 0.41900575160980225, "learning_rate": 0.00013089850400967, "loss": 1.4564, "step": 26599 }, { "epoch": 0.3456548681622304, "grad_norm": 0.26965197920799255, "learning_rate": 0.00013089590454775862, "loss": 1.2301, "step": 26600 }, { "epoch": 0.34566786270614624, "grad_norm": 0.3843028247356415, "learning_rate": 0.00013089330508584722, "loss": 1.3771, "step": 26601 }, { "epoch": 0.34568085725006215, "grad_norm": 0.3360713720321655, "learning_rate": 0.00013089070562393587, "loss": 1.2677, "step": 26602 }, { "epoch": 0.345693851793978, "grad_norm": 0.3312324285507202, "learning_rate": 0.00013088810616202447, "loss": 1.3179, "step": 26603 }, { "epoch": 0.3457068463378939, "grad_norm": 0.4654258191585541, "learning_rate": 0.0001308855067001131, "loss": 1.3397, "step": 26604 }, { "epoch": 0.34571984088180974, "grad_norm": 0.414943665266037, "learning_rate": 0.0001308829072382017, "loss": 1.3137, "step": 26605 }, { "epoch": 0.34573283542572564, "grad_norm": 0.42995309829711914, "learning_rate": 0.00013088030777629031, "loss": 1.3915, "step": 26606 }, { "epoch": 0.3457458299696415, "grad_norm": 0.33729854226112366, "learning_rate": 0.00013087770831437894, "loss": 1.2833, "step": 26607 }, { "epoch": 0.3457588245135574, "grad_norm": 0.424701064825058, "learning_rate": 0.00013087510885246754, "loss": 1.3046, "step": 26608 }, { "epoch": 0.34577181905747323, "grad_norm": 0.4337643086910248, "learning_rate": 0.00013087250939055616, "loss": 1.4366, "step": 26609 }, { "epoch": 0.34578481360138913, "grad_norm": 0.4414233863353729, "learning_rate": 0.00013086990992864479, "loss": 1.5143, "step": 26610 }, { "epoch": 0.345797808145305, "grad_norm": 0.3761984407901764, "learning_rate": 0.00013086731046673338, "loss": 1.447, "step": 26611 }, { "epoch": 0.3458108026892209, "grad_norm": 0.33630993962287903, "learning_rate": 0.000130864711004822, "loss": 1.3806, "step": 26612 }, { "epoch": 0.3458237972331367, "grad_norm": 0.40293318033218384, "learning_rate": 0.0001308621115429106, "loss": 1.3526, "step": 26613 }, { "epoch": 0.3458367917770526, "grad_norm": 0.3696369230747223, "learning_rate": 0.00013085951208099926, "loss": 1.4276, "step": 26614 }, { "epoch": 0.34584978632096847, "grad_norm": 0.3684592843055725, "learning_rate": 0.00013085691261908785, "loss": 1.3795, "step": 26615 }, { "epoch": 0.34586278086488437, "grad_norm": 0.3810883164405823, "learning_rate": 0.00013085431315717648, "loss": 1.2979, "step": 26616 }, { "epoch": 0.3458757754088002, "grad_norm": 0.3024573028087616, "learning_rate": 0.00013085171369526508, "loss": 1.2637, "step": 26617 }, { "epoch": 0.3458887699527161, "grad_norm": 0.3940378725528717, "learning_rate": 0.0001308491142333537, "loss": 1.3488, "step": 26618 }, { "epoch": 0.34590176449663196, "grad_norm": 0.5149592161178589, "learning_rate": 0.00013084651477144232, "loss": 1.5238, "step": 26619 }, { "epoch": 0.34591475904054786, "grad_norm": 0.4384099543094635, "learning_rate": 0.00013084391530953092, "loss": 1.5454, "step": 26620 }, { "epoch": 0.3459277535844637, "grad_norm": 0.25954023003578186, "learning_rate": 0.00013084131584761955, "loss": 1.4853, "step": 26621 }, { "epoch": 0.3459407481283796, "grad_norm": 0.3572298288345337, "learning_rate": 0.00013083871638570817, "loss": 1.3969, "step": 26622 }, { "epoch": 0.34595374267229545, "grad_norm": 0.45168083906173706, "learning_rate": 0.00013083611692379677, "loss": 1.3728, "step": 26623 }, { "epoch": 0.34596673721621135, "grad_norm": 0.27782830595970154, "learning_rate": 0.0001308335174618854, "loss": 1.1622, "step": 26624 }, { "epoch": 0.3459797317601272, "grad_norm": 0.3413933217525482, "learning_rate": 0.000130830917999974, "loss": 1.2639, "step": 26625 }, { "epoch": 0.3459927263040431, "grad_norm": 0.4766233563423157, "learning_rate": 0.00013082831853806264, "loss": 1.4792, "step": 26626 }, { "epoch": 0.34600572084795894, "grad_norm": 0.35271376371383667, "learning_rate": 0.00013082571907615124, "loss": 1.2007, "step": 26627 }, { "epoch": 0.34601871539187484, "grad_norm": 0.31171727180480957, "learning_rate": 0.00013082311961423986, "loss": 1.4985, "step": 26628 }, { "epoch": 0.3460317099357907, "grad_norm": 0.4962668716907501, "learning_rate": 0.0001308205201523285, "loss": 1.5039, "step": 26629 }, { "epoch": 0.3460447044797066, "grad_norm": 0.30937057733535767, "learning_rate": 0.00013081792069041709, "loss": 1.1504, "step": 26630 }, { "epoch": 0.34605769902362243, "grad_norm": 0.4515775144100189, "learning_rate": 0.0001308153212285057, "loss": 1.4177, "step": 26631 }, { "epoch": 0.34607069356753833, "grad_norm": 0.375863641500473, "learning_rate": 0.0001308127217665943, "loss": 1.3634, "step": 26632 }, { "epoch": 0.3460836881114542, "grad_norm": 0.4325713813304901, "learning_rate": 0.00013081012230468296, "loss": 1.351, "step": 26633 }, { "epoch": 0.3460966826553701, "grad_norm": 0.4273795187473297, "learning_rate": 0.00013080752284277156, "loss": 1.5213, "step": 26634 }, { "epoch": 0.3461096771992859, "grad_norm": 0.4006188213825226, "learning_rate": 0.00013080492338086018, "loss": 1.4764, "step": 26635 }, { "epoch": 0.3461226717432018, "grad_norm": 0.3644360601902008, "learning_rate": 0.00013080232391894878, "loss": 1.5916, "step": 26636 }, { "epoch": 0.34613566628711767, "grad_norm": 0.4127972424030304, "learning_rate": 0.0001307997244570374, "loss": 1.4561, "step": 26637 }, { "epoch": 0.34614866083103357, "grad_norm": 0.3162440359592438, "learning_rate": 0.00013079712499512603, "loss": 1.3836, "step": 26638 }, { "epoch": 0.3461616553749494, "grad_norm": 0.39263442158699036, "learning_rate": 0.00013079452553321462, "loss": 1.3344, "step": 26639 }, { "epoch": 0.3461746499188653, "grad_norm": 0.3766639828681946, "learning_rate": 0.00013079192607130325, "loss": 1.1546, "step": 26640 }, { "epoch": 0.34618764446278116, "grad_norm": 0.3880411982536316, "learning_rate": 0.00013078932660939187, "loss": 1.4707, "step": 26641 }, { "epoch": 0.34620063900669706, "grad_norm": 0.3839316964149475, "learning_rate": 0.00013078672714748047, "loss": 1.4076, "step": 26642 }, { "epoch": 0.3462136335506129, "grad_norm": 0.40619587898254395, "learning_rate": 0.0001307841276855691, "loss": 1.3638, "step": 26643 }, { "epoch": 0.3462266280945288, "grad_norm": 0.4590248167514801, "learning_rate": 0.0001307815282236577, "loss": 1.2623, "step": 26644 }, { "epoch": 0.3462396226384447, "grad_norm": 0.4797360897064209, "learning_rate": 0.00013077892876174634, "loss": 1.5481, "step": 26645 }, { "epoch": 0.34625261718236056, "grad_norm": 0.28102216124534607, "learning_rate": 0.00013077632929983494, "loss": 1.3325, "step": 26646 }, { "epoch": 0.34626561172627646, "grad_norm": 0.34471723437309265, "learning_rate": 0.00013077372983792357, "loss": 1.2331, "step": 26647 }, { "epoch": 0.3462786062701923, "grad_norm": 0.43607595562934875, "learning_rate": 0.00013077113037601216, "loss": 1.3849, "step": 26648 }, { "epoch": 0.3462916008141082, "grad_norm": 0.362563818693161, "learning_rate": 0.0001307685309141008, "loss": 1.2369, "step": 26649 }, { "epoch": 0.34630459535802405, "grad_norm": 0.3580750823020935, "learning_rate": 0.0001307659314521894, "loss": 1.6221, "step": 26650 }, { "epoch": 0.34631758990193995, "grad_norm": 0.43542084097862244, "learning_rate": 0.000130763331990278, "loss": 1.3917, "step": 26651 }, { "epoch": 0.3463305844458558, "grad_norm": 0.33879831433296204, "learning_rate": 0.00013076073252836663, "loss": 1.344, "step": 26652 }, { "epoch": 0.3463435789897717, "grad_norm": 0.4282108545303345, "learning_rate": 0.00013075813306645526, "loss": 1.3396, "step": 26653 }, { "epoch": 0.34635657353368754, "grad_norm": 0.3643760681152344, "learning_rate": 0.00013075553360454386, "loss": 1.2406, "step": 26654 }, { "epoch": 0.34636956807760344, "grad_norm": 0.34198275208473206, "learning_rate": 0.00013075293414263248, "loss": 1.2846, "step": 26655 }, { "epoch": 0.3463825626215193, "grad_norm": 0.41564545035362244, "learning_rate": 0.00013075033468072108, "loss": 1.3382, "step": 26656 }, { "epoch": 0.3463955571654352, "grad_norm": 0.34525421261787415, "learning_rate": 0.00013074773521880973, "loss": 1.4865, "step": 26657 }, { "epoch": 0.34640855170935103, "grad_norm": 0.4296342730522156, "learning_rate": 0.00013074513575689833, "loss": 1.4088, "step": 26658 }, { "epoch": 0.34642154625326693, "grad_norm": 0.4237171411514282, "learning_rate": 0.00013074253629498695, "loss": 1.5191, "step": 26659 }, { "epoch": 0.3464345407971828, "grad_norm": 0.3386423885822296, "learning_rate": 0.00013073993683307555, "loss": 1.5748, "step": 26660 }, { "epoch": 0.3464475353410987, "grad_norm": 0.4119246006011963, "learning_rate": 0.00013073733737116417, "loss": 1.4767, "step": 26661 }, { "epoch": 0.3464605298850145, "grad_norm": 0.3888741433620453, "learning_rate": 0.0001307347379092528, "loss": 1.1758, "step": 26662 }, { "epoch": 0.3464735244289304, "grad_norm": 0.37108558416366577, "learning_rate": 0.0001307321384473414, "loss": 1.3987, "step": 26663 }, { "epoch": 0.34648651897284627, "grad_norm": 0.356509268283844, "learning_rate": 0.00013072953898543002, "loss": 1.4093, "step": 26664 }, { "epoch": 0.34649951351676217, "grad_norm": 0.39120975136756897, "learning_rate": 0.00013072693952351864, "loss": 1.4408, "step": 26665 }, { "epoch": 0.346512508060678, "grad_norm": 0.45946523547172546, "learning_rate": 0.00013072434006160724, "loss": 1.3367, "step": 26666 }, { "epoch": 0.3465255026045939, "grad_norm": 0.4458352327346802, "learning_rate": 0.00013072174059969587, "loss": 1.2939, "step": 26667 }, { "epoch": 0.34653849714850976, "grad_norm": 0.3228728175163269, "learning_rate": 0.0001307191411377845, "loss": 1.396, "step": 26668 }, { "epoch": 0.34655149169242566, "grad_norm": 0.38279810547828674, "learning_rate": 0.00013071654167587311, "loss": 1.5347, "step": 26669 }, { "epoch": 0.3465644862363415, "grad_norm": 0.5361925363540649, "learning_rate": 0.0001307139422139617, "loss": 1.3269, "step": 26670 }, { "epoch": 0.3465774807802574, "grad_norm": 0.3816174566745758, "learning_rate": 0.00013071134275205034, "loss": 1.4083, "step": 26671 }, { "epoch": 0.34659047532417325, "grad_norm": 0.3572666645050049, "learning_rate": 0.00013070874329013896, "loss": 1.319, "step": 26672 }, { "epoch": 0.34660346986808915, "grad_norm": 0.3896945118904114, "learning_rate": 0.00013070614382822756, "loss": 1.3536, "step": 26673 }, { "epoch": 0.346616464412005, "grad_norm": 0.4612908363342285, "learning_rate": 0.00013070354436631618, "loss": 1.4627, "step": 26674 }, { "epoch": 0.3466294589559209, "grad_norm": 0.43143564462661743, "learning_rate": 0.00013070094490440478, "loss": 1.3865, "step": 26675 }, { "epoch": 0.34664245349983674, "grad_norm": 0.3683503568172455, "learning_rate": 0.00013069834544249343, "loss": 1.451, "step": 26676 }, { "epoch": 0.34665544804375265, "grad_norm": 0.3972083628177643, "learning_rate": 0.00013069574598058203, "loss": 1.4988, "step": 26677 }, { "epoch": 0.3466684425876685, "grad_norm": 0.43939390778541565, "learning_rate": 0.00013069314651867063, "loss": 1.3289, "step": 26678 }, { "epoch": 0.3466814371315844, "grad_norm": 0.4743555188179016, "learning_rate": 0.00013069054705675925, "loss": 1.523, "step": 26679 }, { "epoch": 0.34669443167550024, "grad_norm": 0.457882285118103, "learning_rate": 0.00013068794759484788, "loss": 1.4088, "step": 26680 }, { "epoch": 0.34670742621941614, "grad_norm": 0.3829633593559265, "learning_rate": 0.0001306853481329365, "loss": 1.3891, "step": 26681 }, { "epoch": 0.346720420763332, "grad_norm": 0.4332887530326843, "learning_rate": 0.0001306827486710251, "loss": 1.6148, "step": 26682 }, { "epoch": 0.3467334153072479, "grad_norm": 0.505901575088501, "learning_rate": 0.00013068014920911372, "loss": 1.3889, "step": 26683 }, { "epoch": 0.34674640985116373, "grad_norm": 0.3263007402420044, "learning_rate": 0.00013067754974720235, "loss": 1.2105, "step": 26684 }, { "epoch": 0.34675940439507963, "grad_norm": 0.429423987865448, "learning_rate": 0.00013067495028529094, "loss": 1.4806, "step": 26685 }, { "epoch": 0.3467723989389955, "grad_norm": 0.3776237964630127, "learning_rate": 0.00013067235082337957, "loss": 1.3164, "step": 26686 }, { "epoch": 0.3467853934829114, "grad_norm": 0.3904382586479187, "learning_rate": 0.00013066975136146817, "loss": 1.4389, "step": 26687 }, { "epoch": 0.3467983880268272, "grad_norm": 0.38412243127822876, "learning_rate": 0.00013066715189955682, "loss": 1.5245, "step": 26688 }, { "epoch": 0.3468113825707431, "grad_norm": 0.452934592962265, "learning_rate": 0.00013066455243764541, "loss": 1.4766, "step": 26689 }, { "epoch": 0.34682437711465897, "grad_norm": 0.4092235267162323, "learning_rate": 0.000130661952975734, "loss": 1.4664, "step": 26690 }, { "epoch": 0.34683737165857487, "grad_norm": 0.39480167627334595, "learning_rate": 0.00013065935351382264, "loss": 1.3915, "step": 26691 }, { "epoch": 0.3468503662024907, "grad_norm": 0.2849251627922058, "learning_rate": 0.00013065675405191126, "loss": 1.5287, "step": 26692 }, { "epoch": 0.3468633607464066, "grad_norm": 0.36102426052093506, "learning_rate": 0.00013065415458999989, "loss": 1.1372, "step": 26693 }, { "epoch": 0.34687635529032246, "grad_norm": 0.3119557797908783, "learning_rate": 0.00013065155512808848, "loss": 1.4428, "step": 26694 }, { "epoch": 0.34688934983423836, "grad_norm": 0.5146140456199646, "learning_rate": 0.0001306489556661771, "loss": 1.6175, "step": 26695 }, { "epoch": 0.3469023443781542, "grad_norm": 0.4329960346221924, "learning_rate": 0.00013064635620426573, "loss": 1.3941, "step": 26696 }, { "epoch": 0.3469153389220701, "grad_norm": 0.48260965943336487, "learning_rate": 0.00013064375674235433, "loss": 1.2661, "step": 26697 }, { "epoch": 0.34692833346598595, "grad_norm": 0.4369378089904785, "learning_rate": 0.00013064115728044295, "loss": 1.4921, "step": 26698 }, { "epoch": 0.34694132800990185, "grad_norm": 0.40378421545028687, "learning_rate": 0.00013063855781853155, "loss": 1.3804, "step": 26699 }, { "epoch": 0.3469543225538177, "grad_norm": 0.2600562572479248, "learning_rate": 0.0001306359583566202, "loss": 1.228, "step": 26700 }, { "epoch": 0.3469673170977336, "grad_norm": 0.4316212236881256, "learning_rate": 0.0001306333588947088, "loss": 1.3007, "step": 26701 }, { "epoch": 0.34698031164164944, "grad_norm": 0.3639039099216461, "learning_rate": 0.00013063075943279742, "loss": 1.3644, "step": 26702 }, { "epoch": 0.34699330618556534, "grad_norm": 0.3018897771835327, "learning_rate": 0.00013062815997088605, "loss": 1.3853, "step": 26703 }, { "epoch": 0.3470063007294812, "grad_norm": 0.3685210049152374, "learning_rate": 0.00013062556050897465, "loss": 1.3648, "step": 26704 }, { "epoch": 0.3470192952733971, "grad_norm": 0.3241848051548004, "learning_rate": 0.00013062296104706327, "loss": 1.4097, "step": 26705 }, { "epoch": 0.34703228981731293, "grad_norm": 0.4015924334526062, "learning_rate": 0.00013062036158515187, "loss": 1.3189, "step": 26706 }, { "epoch": 0.34704528436122883, "grad_norm": 0.4488102197647095, "learning_rate": 0.0001306177621232405, "loss": 1.5717, "step": 26707 }, { "epoch": 0.3470582789051447, "grad_norm": 0.3320499658584595, "learning_rate": 0.00013061516266132912, "loss": 1.3487, "step": 26708 }, { "epoch": 0.3470712734490606, "grad_norm": 0.32914769649505615, "learning_rate": 0.00013061256319941771, "loss": 1.3243, "step": 26709 }, { "epoch": 0.3470842679929764, "grad_norm": 0.37523412704467773, "learning_rate": 0.00013060996373750634, "loss": 1.3217, "step": 26710 }, { "epoch": 0.3470972625368923, "grad_norm": 0.4538055956363678, "learning_rate": 0.00013060736427559496, "loss": 1.2689, "step": 26711 }, { "epoch": 0.34711025708080817, "grad_norm": 0.3820892572402954, "learning_rate": 0.0001306047648136836, "loss": 1.333, "step": 26712 }, { "epoch": 0.34712325162472407, "grad_norm": 0.4614258408546448, "learning_rate": 0.00013060216535177219, "loss": 1.5112, "step": 26713 }, { "epoch": 0.3471362461686399, "grad_norm": 0.37766528129577637, "learning_rate": 0.0001305995658898608, "loss": 1.5765, "step": 26714 }, { "epoch": 0.3471492407125558, "grad_norm": 0.4048602879047394, "learning_rate": 0.00013059696642794943, "loss": 1.4194, "step": 26715 }, { "epoch": 0.34716223525647166, "grad_norm": 0.35259419679641724, "learning_rate": 0.00013059436696603803, "loss": 1.3734, "step": 26716 }, { "epoch": 0.34717522980038756, "grad_norm": 0.3524955213069916, "learning_rate": 0.00013059176750412666, "loss": 1.2932, "step": 26717 }, { "epoch": 0.3471882243443034, "grad_norm": 0.42704930901527405, "learning_rate": 0.00013058916804221525, "loss": 1.368, "step": 26718 }, { "epoch": 0.3472012188882193, "grad_norm": 0.42683956027030945, "learning_rate": 0.0001305865685803039, "loss": 1.2146, "step": 26719 }, { "epoch": 0.34721421343213515, "grad_norm": 0.48381561040878296, "learning_rate": 0.0001305839691183925, "loss": 1.5479, "step": 26720 }, { "epoch": 0.34722720797605106, "grad_norm": 0.4491442143917084, "learning_rate": 0.0001305813696564811, "loss": 1.4738, "step": 26721 }, { "epoch": 0.34724020251996696, "grad_norm": 0.4334605634212494, "learning_rate": 0.00013057877019456972, "loss": 1.2744, "step": 26722 }, { "epoch": 0.3472531970638828, "grad_norm": 0.3497220277786255, "learning_rate": 0.00013057617073265835, "loss": 1.3667, "step": 26723 }, { "epoch": 0.3472661916077987, "grad_norm": 0.3376922607421875, "learning_rate": 0.00013057357127074697, "loss": 1.2066, "step": 26724 }, { "epoch": 0.34727918615171455, "grad_norm": 0.37966978549957275, "learning_rate": 0.00013057097180883557, "loss": 1.2996, "step": 26725 }, { "epoch": 0.34729218069563045, "grad_norm": 0.3898547887802124, "learning_rate": 0.0001305683723469242, "loss": 1.3304, "step": 26726 }, { "epoch": 0.3473051752395463, "grad_norm": 0.3639774024486542, "learning_rate": 0.00013056577288501282, "loss": 1.2869, "step": 26727 }, { "epoch": 0.3473181697834622, "grad_norm": 0.3686040937900543, "learning_rate": 0.00013056317342310142, "loss": 1.1486, "step": 26728 }, { "epoch": 0.34733116432737804, "grad_norm": 0.38157588243484497, "learning_rate": 0.00013056057396119004, "loss": 1.3202, "step": 26729 }, { "epoch": 0.34734415887129394, "grad_norm": 0.47151950001716614, "learning_rate": 0.00013055797449927864, "loss": 1.5539, "step": 26730 }, { "epoch": 0.3473571534152098, "grad_norm": 0.4067578911781311, "learning_rate": 0.0001305553750373673, "loss": 1.298, "step": 26731 }, { "epoch": 0.3473701479591257, "grad_norm": 0.49372127652168274, "learning_rate": 0.0001305527755754559, "loss": 1.5364, "step": 26732 }, { "epoch": 0.34738314250304153, "grad_norm": 0.4783504605293274, "learning_rate": 0.00013055017611354449, "loss": 1.4912, "step": 26733 }, { "epoch": 0.34739613704695743, "grad_norm": 0.33491459488868713, "learning_rate": 0.0001305475766516331, "loss": 1.422, "step": 26734 }, { "epoch": 0.3474091315908733, "grad_norm": 0.35459378361701965, "learning_rate": 0.00013054497718972173, "loss": 1.5814, "step": 26735 }, { "epoch": 0.3474221261347892, "grad_norm": 0.46049872040748596, "learning_rate": 0.00013054237772781036, "loss": 1.4832, "step": 26736 }, { "epoch": 0.347435120678705, "grad_norm": 0.26960837841033936, "learning_rate": 0.00013053977826589896, "loss": 1.4086, "step": 26737 }, { "epoch": 0.3474481152226209, "grad_norm": 0.4439995288848877, "learning_rate": 0.00013053717880398758, "loss": 1.5378, "step": 26738 }, { "epoch": 0.34746110976653677, "grad_norm": 0.3669470548629761, "learning_rate": 0.0001305345793420762, "loss": 1.4257, "step": 26739 }, { "epoch": 0.34747410431045267, "grad_norm": 0.42249906063079834, "learning_rate": 0.0001305319798801648, "loss": 1.4669, "step": 26740 }, { "epoch": 0.3474870988543685, "grad_norm": 0.3158291280269623, "learning_rate": 0.00013052938041825343, "loss": 1.2623, "step": 26741 }, { "epoch": 0.3475000933982844, "grad_norm": 0.358104407787323, "learning_rate": 0.00013052678095634205, "loss": 1.2407, "step": 26742 }, { "epoch": 0.34751308794220026, "grad_norm": 0.39465901255607605, "learning_rate": 0.00013052418149443068, "loss": 1.4655, "step": 26743 }, { "epoch": 0.34752608248611616, "grad_norm": 0.31541532278060913, "learning_rate": 0.00013052158203251927, "loss": 1.3835, "step": 26744 }, { "epoch": 0.347539077030032, "grad_norm": 0.4479454457759857, "learning_rate": 0.00013051898257060787, "loss": 1.4042, "step": 26745 }, { "epoch": 0.3475520715739479, "grad_norm": 0.28242114186286926, "learning_rate": 0.00013051638310869652, "loss": 1.375, "step": 26746 }, { "epoch": 0.34756506611786375, "grad_norm": 0.41462913155555725, "learning_rate": 0.00013051378364678512, "loss": 1.5129, "step": 26747 }, { "epoch": 0.34757806066177965, "grad_norm": 0.3445412218570709, "learning_rate": 0.00013051118418487374, "loss": 1.5114, "step": 26748 }, { "epoch": 0.3475910552056955, "grad_norm": 0.44674918055534363, "learning_rate": 0.00013050858472296234, "loss": 1.3324, "step": 26749 }, { "epoch": 0.3476040497496114, "grad_norm": 0.46157950162887573, "learning_rate": 0.00013050598526105097, "loss": 1.4279, "step": 26750 }, { "epoch": 0.34761704429352724, "grad_norm": 0.36281871795654297, "learning_rate": 0.0001305033857991396, "loss": 1.332, "step": 26751 }, { "epoch": 0.34763003883744314, "grad_norm": 0.3747524619102478, "learning_rate": 0.0001305007863372282, "loss": 1.4512, "step": 26752 }, { "epoch": 0.347643033381359, "grad_norm": 0.37799760699272156, "learning_rate": 0.0001304981868753168, "loss": 1.4614, "step": 26753 }, { "epoch": 0.3476560279252749, "grad_norm": 0.426281601190567, "learning_rate": 0.00013049558741340544, "loss": 1.4905, "step": 26754 }, { "epoch": 0.34766902246919074, "grad_norm": 0.4806407392024994, "learning_rate": 0.00013049298795149406, "loss": 1.4491, "step": 26755 }, { "epoch": 0.34768201701310664, "grad_norm": 0.3714763820171356, "learning_rate": 0.00013049038848958266, "loss": 1.3374, "step": 26756 }, { "epoch": 0.3476950115570225, "grad_norm": 0.38324642181396484, "learning_rate": 0.00013048778902767128, "loss": 1.359, "step": 26757 }, { "epoch": 0.3477080061009384, "grad_norm": 0.376619815826416, "learning_rate": 0.0001304851895657599, "loss": 1.2543, "step": 26758 }, { "epoch": 0.34772100064485423, "grad_norm": 0.3975328505039215, "learning_rate": 0.0001304825901038485, "loss": 1.2356, "step": 26759 }, { "epoch": 0.34773399518877013, "grad_norm": 0.40708211064338684, "learning_rate": 0.00013047999064193713, "loss": 1.3125, "step": 26760 }, { "epoch": 0.347746989732686, "grad_norm": 0.39774417877197266, "learning_rate": 0.00013047739118002573, "loss": 1.3985, "step": 26761 }, { "epoch": 0.3477599842766019, "grad_norm": 0.3474942743778229, "learning_rate": 0.00013047479171811435, "loss": 1.4585, "step": 26762 }, { "epoch": 0.3477729788205177, "grad_norm": 0.4041062295436859, "learning_rate": 0.00013047219225620298, "loss": 1.2488, "step": 26763 }, { "epoch": 0.3477859733644336, "grad_norm": 0.43606680631637573, "learning_rate": 0.00013046959279429157, "loss": 1.4448, "step": 26764 }, { "epoch": 0.34779896790834947, "grad_norm": 0.4023717939853668, "learning_rate": 0.0001304669933323802, "loss": 1.4276, "step": 26765 }, { "epoch": 0.34781196245226537, "grad_norm": 0.4473191201686859, "learning_rate": 0.00013046439387046882, "loss": 1.5601, "step": 26766 }, { "epoch": 0.3478249569961812, "grad_norm": 0.39667677879333496, "learning_rate": 0.00013046179440855745, "loss": 1.2787, "step": 26767 }, { "epoch": 0.3478379515400971, "grad_norm": 0.505514919757843, "learning_rate": 0.00013045919494664604, "loss": 1.6057, "step": 26768 }, { "epoch": 0.34785094608401296, "grad_norm": 0.33252087235450745, "learning_rate": 0.00013045659548473467, "loss": 1.3894, "step": 26769 }, { "epoch": 0.34786394062792886, "grad_norm": 0.36331695318222046, "learning_rate": 0.0001304539960228233, "loss": 1.1501, "step": 26770 }, { "epoch": 0.3478769351718447, "grad_norm": 0.33615437150001526, "learning_rate": 0.0001304513965609119, "loss": 1.2659, "step": 26771 }, { "epoch": 0.3478899297157606, "grad_norm": 0.4483484923839569, "learning_rate": 0.00013044879709900052, "loss": 1.4512, "step": 26772 }, { "epoch": 0.34790292425967645, "grad_norm": 0.36265531182289124, "learning_rate": 0.0001304461976370891, "loss": 1.531, "step": 26773 }, { "epoch": 0.34791591880359235, "grad_norm": 0.500744640827179, "learning_rate": 0.00013044359817517774, "loss": 1.4346, "step": 26774 }, { "epoch": 0.3479289133475082, "grad_norm": 0.4104214310646057, "learning_rate": 0.00013044099871326636, "loss": 1.5788, "step": 26775 }, { "epoch": 0.3479419078914241, "grad_norm": 0.3251439332962036, "learning_rate": 0.00013043839925135496, "loss": 1.2545, "step": 26776 }, { "epoch": 0.34795490243533994, "grad_norm": 0.4101673364639282, "learning_rate": 0.0001304357997894436, "loss": 1.3646, "step": 26777 }, { "epoch": 0.34796789697925584, "grad_norm": 0.31956610083580017, "learning_rate": 0.0001304332003275322, "loss": 1.4088, "step": 26778 }, { "epoch": 0.3479808915231717, "grad_norm": 0.396186500787735, "learning_rate": 0.00013043060086562083, "loss": 1.6465, "step": 26779 }, { "epoch": 0.3479938860670876, "grad_norm": 0.3561484217643738, "learning_rate": 0.00013042800140370943, "loss": 1.3215, "step": 26780 }, { "epoch": 0.34800688061100343, "grad_norm": 0.40662887692451477, "learning_rate": 0.00013042540194179805, "loss": 1.5822, "step": 26781 }, { "epoch": 0.34801987515491933, "grad_norm": 0.4449978172779083, "learning_rate": 0.00013042280247988668, "loss": 1.2456, "step": 26782 }, { "epoch": 0.3480328696988352, "grad_norm": 0.5672455430030823, "learning_rate": 0.00013042020301797528, "loss": 1.4257, "step": 26783 }, { "epoch": 0.3480458642427511, "grad_norm": 0.46624547243118286, "learning_rate": 0.0001304176035560639, "loss": 1.6072, "step": 26784 }, { "epoch": 0.3480588587866669, "grad_norm": 0.4050205945968628, "learning_rate": 0.00013041500409415252, "loss": 1.2362, "step": 26785 }, { "epoch": 0.3480718533305828, "grad_norm": 0.3998050093650818, "learning_rate": 0.00013041240463224115, "loss": 1.5535, "step": 26786 }, { "epoch": 0.34808484787449867, "grad_norm": 0.3982885777950287, "learning_rate": 0.00013040980517032975, "loss": 1.443, "step": 26787 }, { "epoch": 0.34809784241841457, "grad_norm": 0.38016390800476074, "learning_rate": 0.00013040720570841834, "loss": 1.1967, "step": 26788 }, { "epoch": 0.3481108369623304, "grad_norm": 0.4149615168571472, "learning_rate": 0.000130404606246507, "loss": 1.3022, "step": 26789 }, { "epoch": 0.3481238315062463, "grad_norm": 0.46945932507514954, "learning_rate": 0.0001304020067845956, "loss": 1.3634, "step": 26790 }, { "epoch": 0.34813682605016216, "grad_norm": 0.45238369703292847, "learning_rate": 0.00013039940732268422, "loss": 1.3256, "step": 26791 }, { "epoch": 0.34814982059407806, "grad_norm": 0.258750855922699, "learning_rate": 0.00013039680786077281, "loss": 1.3508, "step": 26792 }, { "epoch": 0.3481628151379939, "grad_norm": 0.542230486869812, "learning_rate": 0.00013039420839886144, "loss": 1.477, "step": 26793 }, { "epoch": 0.3481758096819098, "grad_norm": 0.3329165279865265, "learning_rate": 0.00013039160893695006, "loss": 1.5693, "step": 26794 }, { "epoch": 0.34818880422582565, "grad_norm": 0.3818724751472473, "learning_rate": 0.00013038900947503866, "loss": 1.3297, "step": 26795 }, { "epoch": 0.34820179876974156, "grad_norm": 0.4799937903881073, "learning_rate": 0.00013038641001312729, "loss": 1.4337, "step": 26796 }, { "epoch": 0.34821479331365746, "grad_norm": 0.424570769071579, "learning_rate": 0.0001303838105512159, "loss": 1.4419, "step": 26797 }, { "epoch": 0.3482277878575733, "grad_norm": 0.42631858587265015, "learning_rate": 0.00013038121108930453, "loss": 1.4089, "step": 26798 }, { "epoch": 0.3482407824014892, "grad_norm": 0.31591910123825073, "learning_rate": 0.00013037861162739313, "loss": 1.3228, "step": 26799 }, { "epoch": 0.34825377694540505, "grad_norm": 0.3717425763607025, "learning_rate": 0.00013037601216548173, "loss": 1.2834, "step": 26800 }, { "epoch": 0.34826677148932095, "grad_norm": 0.4752770960330963, "learning_rate": 0.00013037341270357038, "loss": 1.482, "step": 26801 }, { "epoch": 0.3482797660332368, "grad_norm": 0.5003453493118286, "learning_rate": 0.00013037081324165898, "loss": 1.6048, "step": 26802 }, { "epoch": 0.3482927605771527, "grad_norm": 0.46734803915023804, "learning_rate": 0.0001303682137797476, "loss": 1.2822, "step": 26803 }, { "epoch": 0.34830575512106854, "grad_norm": 0.30715063214302063, "learning_rate": 0.0001303656143178362, "loss": 1.3288, "step": 26804 }, { "epoch": 0.34831874966498444, "grad_norm": 0.4108499586582184, "learning_rate": 0.00013036301485592482, "loss": 1.3815, "step": 26805 }, { "epoch": 0.3483317442089003, "grad_norm": 0.36533400416374207, "learning_rate": 0.00013036041539401345, "loss": 1.1965, "step": 26806 }, { "epoch": 0.3483447387528162, "grad_norm": 0.5020785927772522, "learning_rate": 0.00013035781593210205, "loss": 1.5708, "step": 26807 }, { "epoch": 0.34835773329673203, "grad_norm": 0.48276105523109436, "learning_rate": 0.00013035521647019067, "loss": 1.2978, "step": 26808 }, { "epoch": 0.34837072784064793, "grad_norm": 0.5764045715332031, "learning_rate": 0.0001303526170082793, "loss": 1.4011, "step": 26809 }, { "epoch": 0.3483837223845638, "grad_norm": 0.5054870843887329, "learning_rate": 0.00013035001754636792, "loss": 1.5591, "step": 26810 }, { "epoch": 0.3483967169284797, "grad_norm": 0.3600458800792694, "learning_rate": 0.00013034741808445652, "loss": 1.5582, "step": 26811 }, { "epoch": 0.3484097114723955, "grad_norm": 0.3598695397377014, "learning_rate": 0.00013034481862254511, "loss": 1.1718, "step": 26812 }, { "epoch": 0.3484227060163114, "grad_norm": 0.35998156666755676, "learning_rate": 0.00013034221916063377, "loss": 1.4594, "step": 26813 }, { "epoch": 0.34843570056022727, "grad_norm": 0.3428550362586975, "learning_rate": 0.00013033961969872236, "loss": 1.4121, "step": 26814 }, { "epoch": 0.34844869510414317, "grad_norm": 0.48719048500061035, "learning_rate": 0.000130337020236811, "loss": 1.2761, "step": 26815 }, { "epoch": 0.348461689648059, "grad_norm": 0.43609923124313354, "learning_rate": 0.0001303344207748996, "loss": 1.3259, "step": 26816 }, { "epoch": 0.3484746841919749, "grad_norm": 0.3070835471153259, "learning_rate": 0.0001303318213129882, "loss": 1.3896, "step": 26817 }, { "epoch": 0.34848767873589076, "grad_norm": 0.4117427468299866, "learning_rate": 0.00013032922185107683, "loss": 1.1216, "step": 26818 }, { "epoch": 0.34850067327980666, "grad_norm": 0.34535476565361023, "learning_rate": 0.00013032662238916543, "loss": 1.3092, "step": 26819 }, { "epoch": 0.3485136678237225, "grad_norm": 0.368813693523407, "learning_rate": 0.00013032402292725408, "loss": 1.3062, "step": 26820 }, { "epoch": 0.3485266623676384, "grad_norm": 0.3915729820728302, "learning_rate": 0.00013032142346534268, "loss": 1.3569, "step": 26821 }, { "epoch": 0.34853965691155425, "grad_norm": 0.3854377567768097, "learning_rate": 0.0001303188240034313, "loss": 1.466, "step": 26822 }, { "epoch": 0.34855265145547015, "grad_norm": 0.45055168867111206, "learning_rate": 0.0001303162245415199, "loss": 1.487, "step": 26823 }, { "epoch": 0.348565645999386, "grad_norm": 0.4203272759914398, "learning_rate": 0.00013031362507960853, "loss": 1.4011, "step": 26824 }, { "epoch": 0.3485786405433019, "grad_norm": 0.3517386019229889, "learning_rate": 0.00013031102561769715, "loss": 1.3355, "step": 26825 }, { "epoch": 0.34859163508721774, "grad_norm": 0.35518479347229004, "learning_rate": 0.00013030842615578575, "loss": 1.3006, "step": 26826 }, { "epoch": 0.34860462963113364, "grad_norm": 0.42156311869621277, "learning_rate": 0.00013030582669387437, "loss": 1.6495, "step": 26827 }, { "epoch": 0.3486176241750495, "grad_norm": 0.3892824649810791, "learning_rate": 0.000130303227231963, "loss": 1.3528, "step": 26828 }, { "epoch": 0.3486306187189654, "grad_norm": 0.3573375642299652, "learning_rate": 0.0001303006277700516, "loss": 1.456, "step": 26829 }, { "epoch": 0.34864361326288124, "grad_norm": 0.377510130405426, "learning_rate": 0.00013029802830814022, "loss": 1.5098, "step": 26830 }, { "epoch": 0.34865660780679714, "grad_norm": 0.4737508296966553, "learning_rate": 0.00013029542884622882, "loss": 1.3942, "step": 26831 }, { "epoch": 0.348669602350713, "grad_norm": 0.5155438780784607, "learning_rate": 0.00013029282938431747, "loss": 1.6184, "step": 26832 }, { "epoch": 0.3486825968946289, "grad_norm": 0.41911548376083374, "learning_rate": 0.00013029022992240607, "loss": 1.3075, "step": 26833 }, { "epoch": 0.3486955914385447, "grad_norm": 0.393661230802536, "learning_rate": 0.0001302876304604947, "loss": 1.4884, "step": 26834 }, { "epoch": 0.34870858598246063, "grad_norm": 0.41192901134490967, "learning_rate": 0.0001302850309985833, "loss": 1.3713, "step": 26835 }, { "epoch": 0.3487215805263765, "grad_norm": 0.40410315990448, "learning_rate": 0.0001302824315366719, "loss": 1.3532, "step": 26836 }, { "epoch": 0.3487345750702924, "grad_norm": 0.37459951639175415, "learning_rate": 0.00013027983207476054, "loss": 1.626, "step": 26837 }, { "epoch": 0.3487475696142082, "grad_norm": 0.5089786052703857, "learning_rate": 0.00013027723261284913, "loss": 1.3874, "step": 26838 }, { "epoch": 0.3487605641581241, "grad_norm": 0.42086324095726013, "learning_rate": 0.00013027463315093776, "loss": 1.4244, "step": 26839 }, { "epoch": 0.34877355870203997, "grad_norm": 0.4996035695075989, "learning_rate": 0.00013027203368902638, "loss": 1.5482, "step": 26840 }, { "epoch": 0.34878655324595587, "grad_norm": 0.3828352987766266, "learning_rate": 0.000130269434227115, "loss": 1.4537, "step": 26841 }, { "epoch": 0.3487995477898717, "grad_norm": 0.38782718777656555, "learning_rate": 0.0001302668347652036, "loss": 1.5231, "step": 26842 }, { "epoch": 0.3488125423337876, "grad_norm": 0.3696490526199341, "learning_rate": 0.0001302642353032922, "loss": 1.4333, "step": 26843 }, { "epoch": 0.34882553687770346, "grad_norm": 0.457909494638443, "learning_rate": 0.00013026163584138085, "loss": 1.5323, "step": 26844 }, { "epoch": 0.34883853142161936, "grad_norm": 0.45369207859039307, "learning_rate": 0.00013025903637946945, "loss": 1.5404, "step": 26845 }, { "epoch": 0.3488515259655352, "grad_norm": 0.39002755284309387, "learning_rate": 0.00013025643691755808, "loss": 1.3482, "step": 26846 }, { "epoch": 0.3488645205094511, "grad_norm": 0.4447665214538574, "learning_rate": 0.00013025383745564667, "loss": 1.3912, "step": 26847 }, { "epoch": 0.34887751505336695, "grad_norm": 0.516338050365448, "learning_rate": 0.0001302512379937353, "loss": 1.5053, "step": 26848 }, { "epoch": 0.34889050959728285, "grad_norm": 0.3584909439086914, "learning_rate": 0.00013024863853182392, "loss": 1.7162, "step": 26849 }, { "epoch": 0.3489035041411987, "grad_norm": 0.45109349489212036, "learning_rate": 0.00013024603906991252, "loss": 1.4726, "step": 26850 }, { "epoch": 0.3489164986851146, "grad_norm": 0.48669570684432983, "learning_rate": 0.00013024343960800117, "loss": 1.277, "step": 26851 }, { "epoch": 0.34892949322903044, "grad_norm": 0.3479592204093933, "learning_rate": 0.00013024084014608977, "loss": 1.3113, "step": 26852 }, { "epoch": 0.34894248777294634, "grad_norm": 0.43328437209129333, "learning_rate": 0.0001302382406841784, "loss": 1.6825, "step": 26853 }, { "epoch": 0.3489554823168622, "grad_norm": 0.3987380862236023, "learning_rate": 0.000130235641222267, "loss": 1.2621, "step": 26854 }, { "epoch": 0.3489684768607781, "grad_norm": 0.49009841680526733, "learning_rate": 0.00013023304176035562, "loss": 1.5059, "step": 26855 }, { "epoch": 0.34898147140469393, "grad_norm": 0.4359487295150757, "learning_rate": 0.00013023044229844424, "loss": 1.3974, "step": 26856 }, { "epoch": 0.34899446594860983, "grad_norm": 0.3093532919883728, "learning_rate": 0.00013022784283653284, "loss": 1.4209, "step": 26857 }, { "epoch": 0.3490074604925257, "grad_norm": 0.33768537640571594, "learning_rate": 0.00013022524337462146, "loss": 1.3923, "step": 26858 }, { "epoch": 0.3490204550364416, "grad_norm": 0.36312350630760193, "learning_rate": 0.00013022264391271009, "loss": 1.1169, "step": 26859 }, { "epoch": 0.3490334495803574, "grad_norm": 0.460262268781662, "learning_rate": 0.00013022004445079868, "loss": 1.372, "step": 26860 }, { "epoch": 0.3490464441242733, "grad_norm": 0.3945809006690979, "learning_rate": 0.0001302174449888873, "loss": 1.4191, "step": 26861 }, { "epoch": 0.34905943866818917, "grad_norm": 0.3828829228878021, "learning_rate": 0.0001302148455269759, "loss": 1.41, "step": 26862 }, { "epoch": 0.34907243321210507, "grad_norm": 0.4079018533229828, "learning_rate": 0.00013021224606506456, "loss": 1.4159, "step": 26863 }, { "epoch": 0.3490854277560209, "grad_norm": 0.37046951055526733, "learning_rate": 0.00013020964660315315, "loss": 1.3911, "step": 26864 }, { "epoch": 0.3490984222999368, "grad_norm": 0.37089216709136963, "learning_rate": 0.00013020704714124178, "loss": 1.3024, "step": 26865 }, { "epoch": 0.34911141684385266, "grad_norm": 0.33971282839775085, "learning_rate": 0.00013020444767933038, "loss": 1.4925, "step": 26866 }, { "epoch": 0.34912441138776856, "grad_norm": 0.3835345208644867, "learning_rate": 0.000130201848217419, "loss": 1.476, "step": 26867 }, { "epoch": 0.3491374059316844, "grad_norm": 0.3725162148475647, "learning_rate": 0.00013019924875550763, "loss": 1.1449, "step": 26868 }, { "epoch": 0.3491504004756003, "grad_norm": 0.494057834148407, "learning_rate": 0.00013019664929359622, "loss": 1.4632, "step": 26869 }, { "epoch": 0.34916339501951615, "grad_norm": 0.3196750283241272, "learning_rate": 0.00013019404983168485, "loss": 1.3638, "step": 26870 }, { "epoch": 0.34917638956343205, "grad_norm": 0.37443065643310547, "learning_rate": 0.00013019145036977347, "loss": 1.4362, "step": 26871 }, { "epoch": 0.3491893841073479, "grad_norm": 0.4043956696987152, "learning_rate": 0.00013018885090786207, "loss": 1.2854, "step": 26872 }, { "epoch": 0.3492023786512638, "grad_norm": 0.45044976472854614, "learning_rate": 0.0001301862514459507, "loss": 1.4914, "step": 26873 }, { "epoch": 0.3492153731951797, "grad_norm": 0.4577068090438843, "learning_rate": 0.0001301836519840393, "loss": 1.392, "step": 26874 }, { "epoch": 0.34922836773909555, "grad_norm": 0.4043601453304291, "learning_rate": 0.00013018105252212794, "loss": 1.5481, "step": 26875 }, { "epoch": 0.34924136228301145, "grad_norm": 0.39608028531074524, "learning_rate": 0.00013017845306021654, "loss": 1.3272, "step": 26876 }, { "epoch": 0.3492543568269273, "grad_norm": 0.3926863372325897, "learning_rate": 0.00013017585359830516, "loss": 1.3866, "step": 26877 }, { "epoch": 0.3492673513708432, "grad_norm": 0.32485154271125793, "learning_rate": 0.00013017325413639376, "loss": 1.0555, "step": 26878 }, { "epoch": 0.34928034591475904, "grad_norm": 0.44558027386665344, "learning_rate": 0.00013017065467448239, "loss": 1.5047, "step": 26879 }, { "epoch": 0.34929334045867494, "grad_norm": 0.45488807559013367, "learning_rate": 0.000130168055212571, "loss": 1.2739, "step": 26880 }, { "epoch": 0.3493063350025908, "grad_norm": 0.40049830079078674, "learning_rate": 0.0001301654557506596, "loss": 1.3355, "step": 26881 }, { "epoch": 0.3493193295465067, "grad_norm": 0.4068833589553833, "learning_rate": 0.00013016285628874823, "loss": 1.3403, "step": 26882 }, { "epoch": 0.34933232409042253, "grad_norm": 0.29012948274612427, "learning_rate": 0.00013016025682683686, "loss": 1.4189, "step": 26883 }, { "epoch": 0.34934531863433843, "grad_norm": 0.43037736415863037, "learning_rate": 0.00013015765736492545, "loss": 1.3888, "step": 26884 }, { "epoch": 0.3493583131782543, "grad_norm": 0.4870368540287018, "learning_rate": 0.00013015505790301408, "loss": 1.6691, "step": 26885 }, { "epoch": 0.3493713077221702, "grad_norm": 0.3602190613746643, "learning_rate": 0.00013015245844110268, "loss": 1.4264, "step": 26886 }, { "epoch": 0.349384302266086, "grad_norm": 0.35174962878227234, "learning_rate": 0.00013014985897919133, "loss": 1.4528, "step": 26887 }, { "epoch": 0.3493972968100019, "grad_norm": 0.37076127529144287, "learning_rate": 0.00013014725951727993, "loss": 1.4095, "step": 26888 }, { "epoch": 0.34941029135391777, "grad_norm": 0.45997482538223267, "learning_rate": 0.00013014466005536855, "loss": 1.5328, "step": 26889 }, { "epoch": 0.34942328589783367, "grad_norm": 0.34510675072669983, "learning_rate": 0.00013014206059345717, "loss": 1.4021, "step": 26890 }, { "epoch": 0.3494362804417495, "grad_norm": 0.3288014829158783, "learning_rate": 0.00013013946113154577, "loss": 1.2502, "step": 26891 }, { "epoch": 0.3494492749856654, "grad_norm": 0.436376690864563, "learning_rate": 0.0001301368616696344, "loss": 1.3732, "step": 26892 }, { "epoch": 0.34946226952958126, "grad_norm": 0.4917808771133423, "learning_rate": 0.000130134262207723, "loss": 1.411, "step": 26893 }, { "epoch": 0.34947526407349716, "grad_norm": 0.33655521273612976, "learning_rate": 0.00013013166274581165, "loss": 1.3825, "step": 26894 }, { "epoch": 0.349488258617413, "grad_norm": 0.40204986929893494, "learning_rate": 0.00013012906328390024, "loss": 1.3366, "step": 26895 }, { "epoch": 0.3495012531613289, "grad_norm": 0.3732942044734955, "learning_rate": 0.00013012646382198884, "loss": 1.4675, "step": 26896 }, { "epoch": 0.34951424770524475, "grad_norm": 0.3512749671936035, "learning_rate": 0.00013012386436007746, "loss": 1.3461, "step": 26897 }, { "epoch": 0.34952724224916065, "grad_norm": 0.5294820666313171, "learning_rate": 0.0001301212648981661, "loss": 1.48, "step": 26898 }, { "epoch": 0.3495402367930765, "grad_norm": 0.4390273094177246, "learning_rate": 0.0001301186654362547, "loss": 1.4389, "step": 26899 }, { "epoch": 0.3495532313369924, "grad_norm": 0.44069093465805054, "learning_rate": 0.0001301160659743433, "loss": 1.4742, "step": 26900 }, { "epoch": 0.34956622588090824, "grad_norm": 0.3821607232093811, "learning_rate": 0.00013011346651243194, "loss": 1.4528, "step": 26901 }, { "epoch": 0.34957922042482414, "grad_norm": 0.4463363587856293, "learning_rate": 0.00013011086705052056, "loss": 1.3085, "step": 26902 }, { "epoch": 0.34959221496874, "grad_norm": 0.4405565857887268, "learning_rate": 0.00013010826758860916, "loss": 1.4502, "step": 26903 }, { "epoch": 0.3496052095126559, "grad_norm": 0.457396924495697, "learning_rate": 0.00013010566812669778, "loss": 1.3796, "step": 26904 }, { "epoch": 0.34961820405657174, "grad_norm": 0.4090796709060669, "learning_rate": 0.00013010306866478638, "loss": 1.2769, "step": 26905 }, { "epoch": 0.34963119860048764, "grad_norm": 0.5057259202003479, "learning_rate": 0.00013010046920287503, "loss": 1.3692, "step": 26906 }, { "epoch": 0.3496441931444035, "grad_norm": 0.3464180529117584, "learning_rate": 0.00013009786974096363, "loss": 1.4632, "step": 26907 }, { "epoch": 0.3496571876883194, "grad_norm": 0.3629237115383148, "learning_rate": 0.00013009527027905225, "loss": 1.3575, "step": 26908 }, { "epoch": 0.3496701822322352, "grad_norm": 0.3495189845561981, "learning_rate": 0.00013009267081714085, "loss": 1.64, "step": 26909 }, { "epoch": 0.34968317677615113, "grad_norm": 0.513372540473938, "learning_rate": 0.00013009007135522947, "loss": 1.3079, "step": 26910 }, { "epoch": 0.349696171320067, "grad_norm": 0.37216681241989136, "learning_rate": 0.0001300874718933181, "loss": 1.3351, "step": 26911 }, { "epoch": 0.3497091658639829, "grad_norm": 0.38222554326057434, "learning_rate": 0.0001300848724314067, "loss": 1.3744, "step": 26912 }, { "epoch": 0.3497221604078987, "grad_norm": 0.4410099983215332, "learning_rate": 0.00013008227296949532, "loss": 1.53, "step": 26913 }, { "epoch": 0.3497351549518146, "grad_norm": 0.41673049330711365, "learning_rate": 0.00013007967350758394, "loss": 1.3899, "step": 26914 }, { "epoch": 0.34974814949573046, "grad_norm": 0.3506511449813843, "learning_rate": 0.00013007707404567254, "loss": 1.5135, "step": 26915 }, { "epoch": 0.34976114403964637, "grad_norm": 0.4729503095149994, "learning_rate": 0.00013007447458376117, "loss": 1.4096, "step": 26916 }, { "epoch": 0.3497741385835622, "grad_norm": 0.36889171600341797, "learning_rate": 0.00013007187512184976, "loss": 1.5223, "step": 26917 }, { "epoch": 0.3497871331274781, "grad_norm": 0.35369524359703064, "learning_rate": 0.00013006927565993842, "loss": 1.4416, "step": 26918 }, { "epoch": 0.34980012767139396, "grad_norm": 0.42414289712905884, "learning_rate": 0.000130066676198027, "loss": 1.4806, "step": 26919 }, { "epoch": 0.34981312221530986, "grad_norm": 0.35942816734313965, "learning_rate": 0.00013006407673611564, "loss": 1.202, "step": 26920 }, { "epoch": 0.3498261167592257, "grad_norm": 0.3879469931125641, "learning_rate": 0.00013006147727420424, "loss": 1.4859, "step": 26921 }, { "epoch": 0.3498391113031416, "grad_norm": 0.43745309114456177, "learning_rate": 0.00013005887781229286, "loss": 1.5778, "step": 26922 }, { "epoch": 0.34985210584705745, "grad_norm": 0.4024139940738678, "learning_rate": 0.00013005627835038148, "loss": 1.3276, "step": 26923 }, { "epoch": 0.34986510039097335, "grad_norm": 0.5062936544418335, "learning_rate": 0.00013005367888847008, "loss": 1.4731, "step": 26924 }, { "epoch": 0.3498780949348892, "grad_norm": 0.5520234107971191, "learning_rate": 0.00013005107942655873, "loss": 1.4388, "step": 26925 }, { "epoch": 0.3498910894788051, "grad_norm": 0.3350699543952942, "learning_rate": 0.00013004847996464733, "loss": 1.5799, "step": 26926 }, { "epoch": 0.34990408402272094, "grad_norm": 0.42720645666122437, "learning_rate": 0.00013004588050273593, "loss": 1.4483, "step": 26927 }, { "epoch": 0.34991707856663684, "grad_norm": 0.4357052445411682, "learning_rate": 0.00013004328104082455, "loss": 1.6186, "step": 26928 }, { "epoch": 0.3499300731105527, "grad_norm": 0.39714014530181885, "learning_rate": 0.00013004068157891318, "loss": 1.5038, "step": 26929 }, { "epoch": 0.3499430676544686, "grad_norm": 0.3437274396419525, "learning_rate": 0.0001300380821170018, "loss": 1.2741, "step": 26930 }, { "epoch": 0.34995606219838443, "grad_norm": 0.4881211817264557, "learning_rate": 0.0001300354826550904, "loss": 1.3654, "step": 26931 }, { "epoch": 0.34996905674230033, "grad_norm": 0.4385480284690857, "learning_rate": 0.00013003288319317902, "loss": 1.602, "step": 26932 }, { "epoch": 0.3499820512862162, "grad_norm": 0.3212842047214508, "learning_rate": 0.00013003028373126765, "loss": 1.2611, "step": 26933 }, { "epoch": 0.3499950458301321, "grad_norm": 0.3364401161670685, "learning_rate": 0.00013002768426935624, "loss": 1.3029, "step": 26934 }, { "epoch": 0.3500080403740479, "grad_norm": 0.33741095662117004, "learning_rate": 0.00013002508480744487, "loss": 1.39, "step": 26935 }, { "epoch": 0.3500210349179638, "grad_norm": 0.43944478034973145, "learning_rate": 0.00013002248534553347, "loss": 1.3808, "step": 26936 }, { "epoch": 0.35003402946187967, "grad_norm": 0.4914529025554657, "learning_rate": 0.00013001988588362212, "loss": 1.4968, "step": 26937 }, { "epoch": 0.35004702400579557, "grad_norm": 0.38273531198501587, "learning_rate": 0.00013001728642171072, "loss": 1.3837, "step": 26938 }, { "epoch": 0.3500600185497114, "grad_norm": 0.35967475175857544, "learning_rate": 0.0001300146869597993, "loss": 1.3749, "step": 26939 }, { "epoch": 0.3500730130936273, "grad_norm": 0.4515627324581146, "learning_rate": 0.00013001208749788794, "loss": 1.4449, "step": 26940 }, { "epoch": 0.35008600763754316, "grad_norm": 0.35798168182373047, "learning_rate": 0.00013000948803597656, "loss": 1.2316, "step": 26941 }, { "epoch": 0.35009900218145906, "grad_norm": 0.45054376125335693, "learning_rate": 0.0001300068885740652, "loss": 1.5569, "step": 26942 }, { "epoch": 0.3501119967253749, "grad_norm": 0.413839727640152, "learning_rate": 0.00013000428911215378, "loss": 1.3274, "step": 26943 }, { "epoch": 0.3501249912692908, "grad_norm": 0.3641895055770874, "learning_rate": 0.0001300016896502424, "loss": 1.4187, "step": 26944 }, { "epoch": 0.35013798581320665, "grad_norm": 0.4793921709060669, "learning_rate": 0.00012999909018833103, "loss": 1.4823, "step": 26945 }, { "epoch": 0.35015098035712255, "grad_norm": 0.4696202874183655, "learning_rate": 0.00012999649072641963, "loss": 1.3019, "step": 26946 }, { "epoch": 0.3501639749010384, "grad_norm": 0.38293203711509705, "learning_rate": 0.00012999389126450825, "loss": 1.3395, "step": 26947 }, { "epoch": 0.3501769694449543, "grad_norm": 0.384132444858551, "learning_rate": 0.00012999129180259685, "loss": 1.3047, "step": 26948 }, { "epoch": 0.35018996398887015, "grad_norm": 0.3273977041244507, "learning_rate": 0.0001299886923406855, "loss": 1.357, "step": 26949 }, { "epoch": 0.35020295853278605, "grad_norm": 0.4120301306247711, "learning_rate": 0.0001299860928787741, "loss": 1.5079, "step": 26950 }, { "epoch": 0.35021595307670195, "grad_norm": 0.3835808038711548, "learning_rate": 0.0001299834934168627, "loss": 1.4467, "step": 26951 }, { "epoch": 0.3502289476206178, "grad_norm": 0.38245677947998047, "learning_rate": 0.00012998089395495132, "loss": 1.5248, "step": 26952 }, { "epoch": 0.3502419421645337, "grad_norm": 0.3783007860183716, "learning_rate": 0.00012997829449303995, "loss": 1.5285, "step": 26953 }, { "epoch": 0.35025493670844954, "grad_norm": 0.39009571075439453, "learning_rate": 0.00012997569503112857, "loss": 1.309, "step": 26954 }, { "epoch": 0.35026793125236544, "grad_norm": 0.3074268698692322, "learning_rate": 0.00012997309556921717, "loss": 1.2015, "step": 26955 }, { "epoch": 0.3502809257962813, "grad_norm": 0.3684777319431305, "learning_rate": 0.0001299704961073058, "loss": 1.3467, "step": 26956 }, { "epoch": 0.3502939203401972, "grad_norm": 0.377359002828598, "learning_rate": 0.00012996789664539442, "loss": 1.2908, "step": 26957 }, { "epoch": 0.35030691488411303, "grad_norm": 0.4214313328266144, "learning_rate": 0.00012996529718348302, "loss": 1.3175, "step": 26958 }, { "epoch": 0.35031990942802893, "grad_norm": 0.37894168496131897, "learning_rate": 0.00012996269772157164, "loss": 1.4382, "step": 26959 }, { "epoch": 0.3503329039719448, "grad_norm": 0.40779784321784973, "learning_rate": 0.00012996009825966024, "loss": 1.3777, "step": 26960 }, { "epoch": 0.3503458985158607, "grad_norm": 0.3485451340675354, "learning_rate": 0.0001299574987977489, "loss": 1.2494, "step": 26961 }, { "epoch": 0.3503588930597765, "grad_norm": 0.38289153575897217, "learning_rate": 0.0001299548993358375, "loss": 1.4358, "step": 26962 }, { "epoch": 0.3503718876036924, "grad_norm": 0.3391575217247009, "learning_rate": 0.0001299522998739261, "loss": 1.3516, "step": 26963 }, { "epoch": 0.35038488214760827, "grad_norm": 0.3853176534175873, "learning_rate": 0.00012994970041201474, "loss": 1.1945, "step": 26964 }, { "epoch": 0.35039787669152417, "grad_norm": 0.4643430709838867, "learning_rate": 0.00012994710095010333, "loss": 1.487, "step": 26965 }, { "epoch": 0.35041087123544, "grad_norm": 0.41897791624069214, "learning_rate": 0.00012994450148819196, "loss": 1.4653, "step": 26966 }, { "epoch": 0.3504238657793559, "grad_norm": 0.40874582529067993, "learning_rate": 0.00012994190202628055, "loss": 1.5163, "step": 26967 }, { "epoch": 0.35043686032327176, "grad_norm": 0.5114034414291382, "learning_rate": 0.00012993930256436918, "loss": 1.433, "step": 26968 }, { "epoch": 0.35044985486718766, "grad_norm": 0.40875813364982605, "learning_rate": 0.0001299367031024578, "loss": 1.3619, "step": 26969 }, { "epoch": 0.3504628494111035, "grad_norm": 0.35730940103530884, "learning_rate": 0.0001299341036405464, "loss": 1.4359, "step": 26970 }, { "epoch": 0.3504758439550194, "grad_norm": 0.4098184108734131, "learning_rate": 0.00012993150417863503, "loss": 1.3103, "step": 26971 }, { "epoch": 0.35048883849893525, "grad_norm": 0.45835843682289124, "learning_rate": 0.00012992890471672365, "loss": 1.4403, "step": 26972 }, { "epoch": 0.35050183304285115, "grad_norm": 0.4666747748851776, "learning_rate": 0.00012992630525481227, "loss": 1.52, "step": 26973 }, { "epoch": 0.350514827586767, "grad_norm": 0.3246106803417206, "learning_rate": 0.00012992370579290087, "loss": 1.3549, "step": 26974 }, { "epoch": 0.3505278221306829, "grad_norm": 0.3853197395801544, "learning_rate": 0.0001299211063309895, "loss": 1.3704, "step": 26975 }, { "epoch": 0.35054081667459874, "grad_norm": 0.4811348021030426, "learning_rate": 0.00012991850686907812, "loss": 1.5558, "step": 26976 }, { "epoch": 0.35055381121851464, "grad_norm": 0.39772531390190125, "learning_rate": 0.00012991590740716672, "loss": 1.1971, "step": 26977 }, { "epoch": 0.3505668057624305, "grad_norm": 0.3588211238384247, "learning_rate": 0.00012991330794525534, "loss": 1.246, "step": 26978 }, { "epoch": 0.3505798003063464, "grad_norm": 0.39411279559135437, "learning_rate": 0.00012991070848334394, "loss": 1.2463, "step": 26979 }, { "epoch": 0.35059279485026223, "grad_norm": 0.34600964188575745, "learning_rate": 0.00012990810902143256, "loss": 1.336, "step": 26980 }, { "epoch": 0.35060578939417814, "grad_norm": 0.30417564511299133, "learning_rate": 0.0001299055095595212, "loss": 1.113, "step": 26981 }, { "epoch": 0.350618783938094, "grad_norm": 0.49325844645500183, "learning_rate": 0.0001299029100976098, "loss": 1.4323, "step": 26982 }, { "epoch": 0.3506317784820099, "grad_norm": 0.40701404213905334, "learning_rate": 0.0001299003106356984, "loss": 1.4937, "step": 26983 }, { "epoch": 0.3506447730259257, "grad_norm": 0.44545090198516846, "learning_rate": 0.00012989771117378704, "loss": 1.2507, "step": 26984 }, { "epoch": 0.3506577675698416, "grad_norm": 0.32665714621543884, "learning_rate": 0.00012989511171187566, "loss": 1.3411, "step": 26985 }, { "epoch": 0.3506707621137575, "grad_norm": 0.36822062730789185, "learning_rate": 0.00012989251224996426, "loss": 1.2569, "step": 26986 }, { "epoch": 0.3506837566576734, "grad_norm": 0.416346937417984, "learning_rate": 0.00012988991278805288, "loss": 1.4851, "step": 26987 }, { "epoch": 0.3506967512015892, "grad_norm": 0.4807894229888916, "learning_rate": 0.0001298873133261415, "loss": 1.3956, "step": 26988 }, { "epoch": 0.3507097457455051, "grad_norm": 0.46177607774734497, "learning_rate": 0.0001298847138642301, "loss": 1.2707, "step": 26989 }, { "epoch": 0.35072274028942096, "grad_norm": 0.39404296875, "learning_rate": 0.00012988211440231873, "loss": 1.5894, "step": 26990 }, { "epoch": 0.35073573483333687, "grad_norm": 0.38684603571891785, "learning_rate": 0.00012987951494040733, "loss": 1.2964, "step": 26991 }, { "epoch": 0.3507487293772527, "grad_norm": 0.4723605513572693, "learning_rate": 0.00012987691547849598, "loss": 1.479, "step": 26992 }, { "epoch": 0.3507617239211686, "grad_norm": 0.37446537613868713, "learning_rate": 0.00012987431601658457, "loss": 1.5069, "step": 26993 }, { "epoch": 0.35077471846508446, "grad_norm": 0.4684825837612152, "learning_rate": 0.00012987171655467317, "loss": 1.3526, "step": 26994 }, { "epoch": 0.35078771300900036, "grad_norm": 0.4224631190299988, "learning_rate": 0.0001298691170927618, "loss": 1.3635, "step": 26995 }, { "epoch": 0.3508007075529162, "grad_norm": 0.40388378500938416, "learning_rate": 0.00012986651763085042, "loss": 1.4961, "step": 26996 }, { "epoch": 0.3508137020968321, "grad_norm": 0.3414419889450073, "learning_rate": 0.00012986391816893905, "loss": 1.2616, "step": 26997 }, { "epoch": 0.35082669664074795, "grad_norm": 0.33642011880874634, "learning_rate": 0.00012986131870702764, "loss": 1.4362, "step": 26998 }, { "epoch": 0.35083969118466385, "grad_norm": 0.2976331114768982, "learning_rate": 0.00012985871924511627, "loss": 1.2113, "step": 26999 }, { "epoch": 0.3508526857285797, "grad_norm": 0.33675840497016907, "learning_rate": 0.0001298561197832049, "loss": 1.218, "step": 27000 }, { "epoch": 0.3508656802724956, "grad_norm": 0.37695372104644775, "learning_rate": 0.0001298535203212935, "loss": 1.4911, "step": 27001 }, { "epoch": 0.35087867481641144, "grad_norm": 0.43729162216186523, "learning_rate": 0.0001298509208593821, "loss": 1.5009, "step": 27002 }, { "epoch": 0.35089166936032734, "grad_norm": 0.3892037272453308, "learning_rate": 0.00012984832139747074, "loss": 1.2716, "step": 27003 }, { "epoch": 0.3509046639042432, "grad_norm": 0.32991844415664673, "learning_rate": 0.00012984572193555936, "loss": 1.1744, "step": 27004 }, { "epoch": 0.3509176584481591, "grad_norm": 0.3075510561466217, "learning_rate": 0.00012984312247364796, "loss": 1.3327, "step": 27005 }, { "epoch": 0.35093065299207493, "grad_norm": 0.4822843670845032, "learning_rate": 0.00012984052301173656, "loss": 1.411, "step": 27006 }, { "epoch": 0.35094364753599083, "grad_norm": 0.4144551753997803, "learning_rate": 0.0001298379235498252, "loss": 1.2215, "step": 27007 }, { "epoch": 0.3509566420799067, "grad_norm": 0.3838679790496826, "learning_rate": 0.0001298353240879138, "loss": 1.4537, "step": 27008 }, { "epoch": 0.3509696366238226, "grad_norm": 0.3840310275554657, "learning_rate": 0.00012983272462600243, "loss": 1.4244, "step": 27009 }, { "epoch": 0.3509826311677384, "grad_norm": 0.33375999331474304, "learning_rate": 0.00012983012516409103, "loss": 1.1103, "step": 27010 }, { "epoch": 0.3509956257116543, "grad_norm": 0.4148995280265808, "learning_rate": 0.00012982752570217965, "loss": 1.5907, "step": 27011 }, { "epoch": 0.35100862025557017, "grad_norm": 0.4446263611316681, "learning_rate": 0.00012982492624026828, "loss": 1.3562, "step": 27012 }, { "epoch": 0.35102161479948607, "grad_norm": 0.4836627244949341, "learning_rate": 0.00012982232677835687, "loss": 1.475, "step": 27013 }, { "epoch": 0.3510346093434019, "grad_norm": 0.37680524587631226, "learning_rate": 0.0001298197273164455, "loss": 1.3046, "step": 27014 }, { "epoch": 0.3510476038873178, "grad_norm": 0.29187628626823425, "learning_rate": 0.00012981712785453412, "loss": 1.1886, "step": 27015 }, { "epoch": 0.35106059843123366, "grad_norm": 0.3478575646877289, "learning_rate": 0.00012981452839262275, "loss": 1.4898, "step": 27016 }, { "epoch": 0.35107359297514956, "grad_norm": 0.4063595235347748, "learning_rate": 0.00012981192893071135, "loss": 1.4405, "step": 27017 }, { "epoch": 0.3510865875190654, "grad_norm": 0.3334345817565918, "learning_rate": 0.00012980932946879994, "loss": 1.2063, "step": 27018 }, { "epoch": 0.3510995820629813, "grad_norm": 0.3710629940032959, "learning_rate": 0.0001298067300068886, "loss": 1.1747, "step": 27019 }, { "epoch": 0.35111257660689715, "grad_norm": 0.4263536036014557, "learning_rate": 0.0001298041305449772, "loss": 1.5293, "step": 27020 }, { "epoch": 0.35112557115081305, "grad_norm": 0.4655008614063263, "learning_rate": 0.00012980153108306582, "loss": 1.4345, "step": 27021 }, { "epoch": 0.3511385656947289, "grad_norm": 0.3801930844783783, "learning_rate": 0.0001297989316211544, "loss": 1.3823, "step": 27022 }, { "epoch": 0.3511515602386448, "grad_norm": 0.3835110366344452, "learning_rate": 0.00012979633215924304, "loss": 1.7593, "step": 27023 }, { "epoch": 0.35116455478256065, "grad_norm": 0.2913677990436554, "learning_rate": 0.00012979373269733166, "loss": 1.3644, "step": 27024 }, { "epoch": 0.35117754932647655, "grad_norm": 0.5896442532539368, "learning_rate": 0.00012979113323542026, "loss": 1.5097, "step": 27025 }, { "epoch": 0.35119054387039245, "grad_norm": 0.44798359274864197, "learning_rate": 0.00012978853377350888, "loss": 1.5235, "step": 27026 }, { "epoch": 0.3512035384143083, "grad_norm": 0.2839646637439728, "learning_rate": 0.0001297859343115975, "loss": 1.214, "step": 27027 }, { "epoch": 0.3512165329582242, "grad_norm": 0.2983718812465668, "learning_rate": 0.00012978333484968613, "loss": 1.3195, "step": 27028 }, { "epoch": 0.35122952750214004, "grad_norm": 0.44161394238471985, "learning_rate": 0.00012978073538777473, "loss": 1.4013, "step": 27029 }, { "epoch": 0.35124252204605594, "grad_norm": 0.43229126930236816, "learning_rate": 0.00012977813592586336, "loss": 1.3821, "step": 27030 }, { "epoch": 0.3512555165899718, "grad_norm": 0.4676183760166168, "learning_rate": 0.00012977553646395198, "loss": 1.2401, "step": 27031 }, { "epoch": 0.3512685111338877, "grad_norm": 0.42624327540397644, "learning_rate": 0.00012977293700204058, "loss": 1.4408, "step": 27032 }, { "epoch": 0.35128150567780353, "grad_norm": 0.47141867876052856, "learning_rate": 0.0001297703375401292, "loss": 1.4197, "step": 27033 }, { "epoch": 0.35129450022171943, "grad_norm": 0.47578155994415283, "learning_rate": 0.0001297677380782178, "loss": 1.4183, "step": 27034 }, { "epoch": 0.3513074947656353, "grad_norm": 0.3964194357395172, "learning_rate": 0.00012976513861630642, "loss": 1.641, "step": 27035 }, { "epoch": 0.3513204893095512, "grad_norm": 0.47579044103622437, "learning_rate": 0.00012976253915439505, "loss": 1.4875, "step": 27036 }, { "epoch": 0.351333483853467, "grad_norm": 0.4011628329753876, "learning_rate": 0.00012975993969248365, "loss": 1.298, "step": 27037 }, { "epoch": 0.3513464783973829, "grad_norm": 0.3676300346851349, "learning_rate": 0.0001297573402305723, "loss": 1.4357, "step": 27038 }, { "epoch": 0.35135947294129877, "grad_norm": 0.3938177525997162, "learning_rate": 0.0001297547407686609, "loss": 1.2096, "step": 27039 }, { "epoch": 0.35137246748521467, "grad_norm": 0.3831042945384979, "learning_rate": 0.00012975214130674952, "loss": 1.4742, "step": 27040 }, { "epoch": 0.3513854620291305, "grad_norm": 0.3779260814189911, "learning_rate": 0.00012974954184483812, "loss": 1.4277, "step": 27041 }, { "epoch": 0.3513984565730464, "grad_norm": 0.3410851061344147, "learning_rate": 0.00012974694238292674, "loss": 1.1344, "step": 27042 }, { "epoch": 0.35141145111696226, "grad_norm": 0.509138286113739, "learning_rate": 0.00012974434292101537, "loss": 1.3833, "step": 27043 }, { "epoch": 0.35142444566087816, "grad_norm": 0.43648701906204224, "learning_rate": 0.00012974174345910396, "loss": 1.3019, "step": 27044 }, { "epoch": 0.351437440204794, "grad_norm": 0.469257652759552, "learning_rate": 0.0001297391439971926, "loss": 1.3618, "step": 27045 }, { "epoch": 0.3514504347487099, "grad_norm": 0.5228627920150757, "learning_rate": 0.0001297365445352812, "loss": 1.6148, "step": 27046 }, { "epoch": 0.35146342929262575, "grad_norm": 0.4003678858280182, "learning_rate": 0.00012973394507336984, "loss": 1.3875, "step": 27047 }, { "epoch": 0.35147642383654165, "grad_norm": 0.45567476749420166, "learning_rate": 0.00012973134561145843, "loss": 1.3138, "step": 27048 }, { "epoch": 0.3514894183804575, "grad_norm": 0.48438018560409546, "learning_rate": 0.00012972874614954703, "loss": 1.4265, "step": 27049 }, { "epoch": 0.3515024129243734, "grad_norm": 0.400463730096817, "learning_rate": 0.00012972614668763568, "loss": 1.4113, "step": 27050 }, { "epoch": 0.35151540746828924, "grad_norm": 0.36620020866394043, "learning_rate": 0.00012972354722572428, "loss": 1.2006, "step": 27051 }, { "epoch": 0.35152840201220514, "grad_norm": 0.42971378564834595, "learning_rate": 0.0001297209477638129, "loss": 1.4268, "step": 27052 }, { "epoch": 0.351541396556121, "grad_norm": 0.4140094220638275, "learning_rate": 0.0001297183483019015, "loss": 1.471, "step": 27053 }, { "epoch": 0.3515543911000369, "grad_norm": 0.34700700640678406, "learning_rate": 0.00012971574883999013, "loss": 1.4169, "step": 27054 }, { "epoch": 0.35156738564395273, "grad_norm": 0.3546464145183563, "learning_rate": 0.00012971314937807875, "loss": 1.281, "step": 27055 }, { "epoch": 0.35158038018786864, "grad_norm": 0.3365534842014313, "learning_rate": 0.00012971054991616735, "loss": 1.3327, "step": 27056 }, { "epoch": 0.3515933747317845, "grad_norm": 0.4163077473640442, "learning_rate": 0.00012970795045425597, "loss": 1.542, "step": 27057 }, { "epoch": 0.3516063692757004, "grad_norm": 0.4851019084453583, "learning_rate": 0.0001297053509923446, "loss": 1.19, "step": 27058 }, { "epoch": 0.3516193638196162, "grad_norm": 0.35443252325057983, "learning_rate": 0.00012970275153043322, "loss": 1.4416, "step": 27059 }, { "epoch": 0.3516323583635321, "grad_norm": 0.4046309292316437, "learning_rate": 0.00012970015206852182, "loss": 1.3784, "step": 27060 }, { "epoch": 0.351645352907448, "grad_norm": 0.4896560311317444, "learning_rate": 0.00012969755260661042, "loss": 1.3577, "step": 27061 }, { "epoch": 0.3516583474513639, "grad_norm": 0.3035734295845032, "learning_rate": 0.00012969495314469907, "loss": 1.4203, "step": 27062 }, { "epoch": 0.3516713419952797, "grad_norm": 0.3710680902004242, "learning_rate": 0.00012969235368278766, "loss": 1.633, "step": 27063 }, { "epoch": 0.3516843365391956, "grad_norm": 0.41660815477371216, "learning_rate": 0.0001296897542208763, "loss": 1.3859, "step": 27064 }, { "epoch": 0.35169733108311146, "grad_norm": 0.5244542956352234, "learning_rate": 0.0001296871547589649, "loss": 1.4916, "step": 27065 }, { "epoch": 0.35171032562702736, "grad_norm": 0.3543093204498291, "learning_rate": 0.0001296845552970535, "loss": 1.4181, "step": 27066 }, { "epoch": 0.3517233201709432, "grad_norm": 0.3983149230480194, "learning_rate": 0.00012968195583514214, "loss": 1.3706, "step": 27067 }, { "epoch": 0.3517363147148591, "grad_norm": 0.41664624214172363, "learning_rate": 0.00012967935637323073, "loss": 1.5179, "step": 27068 }, { "epoch": 0.35174930925877496, "grad_norm": 0.3637050986289978, "learning_rate": 0.00012967675691131936, "loss": 1.4566, "step": 27069 }, { "epoch": 0.35176230380269086, "grad_norm": 0.45833367109298706, "learning_rate": 0.00012967415744940798, "loss": 1.3251, "step": 27070 }, { "epoch": 0.3517752983466067, "grad_norm": 0.35691601037979126, "learning_rate": 0.0001296715579874966, "loss": 1.3208, "step": 27071 }, { "epoch": 0.3517882928905226, "grad_norm": 0.39897042512893677, "learning_rate": 0.0001296689585255852, "loss": 1.4172, "step": 27072 }, { "epoch": 0.35180128743443845, "grad_norm": 0.4390867054462433, "learning_rate": 0.00012966635906367383, "loss": 1.4651, "step": 27073 }, { "epoch": 0.35181428197835435, "grad_norm": 0.3452596366405487, "learning_rate": 0.00012966375960176245, "loss": 1.3759, "step": 27074 }, { "epoch": 0.3518272765222702, "grad_norm": 0.37790384888648987, "learning_rate": 0.00012966116013985105, "loss": 1.2155, "step": 27075 }, { "epoch": 0.3518402710661861, "grad_norm": 0.42543140053749084, "learning_rate": 0.00012965856067793967, "loss": 1.5491, "step": 27076 }, { "epoch": 0.35185326561010194, "grad_norm": 0.46987539529800415, "learning_rate": 0.0001296559612160283, "loss": 1.4277, "step": 27077 }, { "epoch": 0.35186626015401784, "grad_norm": 0.28291940689086914, "learning_rate": 0.0001296533617541169, "loss": 1.3385, "step": 27078 }, { "epoch": 0.3518792546979337, "grad_norm": 0.42285341024398804, "learning_rate": 0.00012965076229220552, "loss": 1.3215, "step": 27079 }, { "epoch": 0.3518922492418496, "grad_norm": 0.4595276117324829, "learning_rate": 0.00012964816283029412, "loss": 1.3864, "step": 27080 }, { "epoch": 0.35190524378576543, "grad_norm": 0.3763505816459656, "learning_rate": 0.00012964556336838277, "loss": 1.4072, "step": 27081 }, { "epoch": 0.35191823832968133, "grad_norm": 0.36641186475753784, "learning_rate": 0.00012964296390647137, "loss": 1.3056, "step": 27082 }, { "epoch": 0.3519312328735972, "grad_norm": 0.49726569652557373, "learning_rate": 0.00012964036444456, "loss": 1.5565, "step": 27083 }, { "epoch": 0.3519442274175131, "grad_norm": 0.4177541732788086, "learning_rate": 0.0001296377649826486, "loss": 1.2578, "step": 27084 }, { "epoch": 0.3519572219614289, "grad_norm": 0.48122891783714294, "learning_rate": 0.00012963516552073721, "loss": 1.2529, "step": 27085 }, { "epoch": 0.3519702165053448, "grad_norm": 0.3749167323112488, "learning_rate": 0.00012963256605882584, "loss": 1.2844, "step": 27086 }, { "epoch": 0.35198321104926067, "grad_norm": 0.4928489923477173, "learning_rate": 0.00012962996659691444, "loss": 1.3423, "step": 27087 }, { "epoch": 0.35199620559317657, "grad_norm": 0.3367745876312256, "learning_rate": 0.00012962736713500306, "loss": 1.4079, "step": 27088 }, { "epoch": 0.3520092001370924, "grad_norm": 0.4701220691204071, "learning_rate": 0.00012962476767309168, "loss": 1.5198, "step": 27089 }, { "epoch": 0.3520221946810083, "grad_norm": 0.2655181288719177, "learning_rate": 0.00012962216821118028, "loss": 1.3074, "step": 27090 }, { "epoch": 0.35203518922492416, "grad_norm": 0.4165365993976593, "learning_rate": 0.0001296195687492689, "loss": 1.3321, "step": 27091 }, { "epoch": 0.35204818376884006, "grad_norm": 0.45527905225753784, "learning_rate": 0.0001296169692873575, "loss": 1.3616, "step": 27092 }, { "epoch": 0.3520611783127559, "grad_norm": 0.41198909282684326, "learning_rate": 0.00012961436982544616, "loss": 1.3157, "step": 27093 }, { "epoch": 0.3520741728566718, "grad_norm": 0.3758623003959656, "learning_rate": 0.00012961177036353475, "loss": 1.1563, "step": 27094 }, { "epoch": 0.35208716740058765, "grad_norm": 0.366791695356369, "learning_rate": 0.00012960917090162338, "loss": 1.3808, "step": 27095 }, { "epoch": 0.35210016194450355, "grad_norm": 0.3308677673339844, "learning_rate": 0.00012960657143971197, "loss": 1.166, "step": 27096 }, { "epoch": 0.3521131564884194, "grad_norm": 0.3171396255493164, "learning_rate": 0.0001296039719778006, "loss": 1.282, "step": 27097 }, { "epoch": 0.3521261510323353, "grad_norm": 0.41879427433013916, "learning_rate": 0.00012960137251588922, "loss": 1.3349, "step": 27098 }, { "epoch": 0.35213914557625114, "grad_norm": 0.3509661853313446, "learning_rate": 0.00012959877305397782, "loss": 1.5242, "step": 27099 }, { "epoch": 0.35215214012016705, "grad_norm": 0.45570144057273865, "learning_rate": 0.00012959617359206645, "loss": 1.2994, "step": 27100 }, { "epoch": 0.3521651346640829, "grad_norm": 0.2961921691894531, "learning_rate": 0.00012959357413015507, "loss": 1.145, "step": 27101 }, { "epoch": 0.3521781292079988, "grad_norm": 0.4828557074069977, "learning_rate": 0.00012959097466824367, "loss": 1.4225, "step": 27102 }, { "epoch": 0.3521911237519147, "grad_norm": 0.3583434820175171, "learning_rate": 0.0001295883752063323, "loss": 1.2372, "step": 27103 }, { "epoch": 0.35220411829583054, "grad_norm": 0.41975805163383484, "learning_rate": 0.0001295857757444209, "loss": 1.3186, "step": 27104 }, { "epoch": 0.35221711283974644, "grad_norm": 0.39681947231292725, "learning_rate": 0.00012958317628250954, "loss": 1.501, "step": 27105 }, { "epoch": 0.3522301073836623, "grad_norm": 0.3591127097606659, "learning_rate": 0.00012958057682059814, "loss": 1.4966, "step": 27106 }, { "epoch": 0.3522431019275782, "grad_norm": 0.446712464094162, "learning_rate": 0.00012957797735868676, "loss": 1.2911, "step": 27107 }, { "epoch": 0.35225609647149403, "grad_norm": 0.40670046210289, "learning_rate": 0.00012957537789677536, "loss": 1.5025, "step": 27108 }, { "epoch": 0.35226909101540993, "grad_norm": 0.3264428675174713, "learning_rate": 0.00012957277843486398, "loss": 1.2477, "step": 27109 }, { "epoch": 0.3522820855593258, "grad_norm": 0.41159600019454956, "learning_rate": 0.0001295701789729526, "loss": 1.4344, "step": 27110 }, { "epoch": 0.3522950801032417, "grad_norm": 0.3865565359592438, "learning_rate": 0.0001295675795110412, "loss": 1.3583, "step": 27111 }, { "epoch": 0.3523080746471575, "grad_norm": 0.47535240650177, "learning_rate": 0.00012956498004912986, "loss": 1.5369, "step": 27112 }, { "epoch": 0.3523210691910734, "grad_norm": 0.4841874837875366, "learning_rate": 0.00012956238058721846, "loss": 1.3149, "step": 27113 }, { "epoch": 0.35233406373498927, "grad_norm": 0.36783015727996826, "learning_rate": 0.00012955978112530708, "loss": 1.38, "step": 27114 }, { "epoch": 0.35234705827890517, "grad_norm": 0.3593432307243347, "learning_rate": 0.00012955718166339568, "loss": 1.3898, "step": 27115 }, { "epoch": 0.352360052822821, "grad_norm": 0.41562482714653015, "learning_rate": 0.0001295545822014843, "loss": 1.2801, "step": 27116 }, { "epoch": 0.3523730473667369, "grad_norm": 0.4107935428619385, "learning_rate": 0.00012955198273957293, "loss": 1.3241, "step": 27117 }, { "epoch": 0.35238604191065276, "grad_norm": 0.4419505298137665, "learning_rate": 0.00012954938327766152, "loss": 1.4997, "step": 27118 }, { "epoch": 0.35239903645456866, "grad_norm": 0.5432904362678528, "learning_rate": 0.00012954678381575015, "loss": 1.5411, "step": 27119 }, { "epoch": 0.3524120309984845, "grad_norm": 0.39166587591171265, "learning_rate": 0.00012954418435383877, "loss": 1.4298, "step": 27120 }, { "epoch": 0.3524250255424004, "grad_norm": 0.4792831242084503, "learning_rate": 0.00012954158489192737, "loss": 1.3857, "step": 27121 }, { "epoch": 0.35243802008631625, "grad_norm": 0.3664616346359253, "learning_rate": 0.000129538985430016, "loss": 1.4441, "step": 27122 }, { "epoch": 0.35245101463023215, "grad_norm": 0.3138395845890045, "learning_rate": 0.0001295363859681046, "loss": 1.2474, "step": 27123 }, { "epoch": 0.352464009174148, "grad_norm": 0.46682190895080566, "learning_rate": 0.00012953378650619324, "loss": 1.3826, "step": 27124 }, { "epoch": 0.3524770037180639, "grad_norm": 0.40159592032432556, "learning_rate": 0.00012953118704428184, "loss": 1.3264, "step": 27125 }, { "epoch": 0.35248999826197974, "grad_norm": 0.36960569024086, "learning_rate": 0.00012952858758237047, "loss": 1.554, "step": 27126 }, { "epoch": 0.35250299280589564, "grad_norm": 0.47910141944885254, "learning_rate": 0.00012952598812045906, "loss": 1.5541, "step": 27127 }, { "epoch": 0.3525159873498115, "grad_norm": 0.3956829905509949, "learning_rate": 0.0001295233886585477, "loss": 1.3818, "step": 27128 }, { "epoch": 0.3525289818937274, "grad_norm": 0.4648318290710449, "learning_rate": 0.0001295207891966363, "loss": 1.4809, "step": 27129 }, { "epoch": 0.35254197643764323, "grad_norm": 0.37654101848602295, "learning_rate": 0.0001295181897347249, "loss": 1.4072, "step": 27130 }, { "epoch": 0.35255497098155913, "grad_norm": 0.4834626019001007, "learning_rate": 0.00012951559027281353, "loss": 1.3761, "step": 27131 }, { "epoch": 0.352567965525475, "grad_norm": 0.31139543652534485, "learning_rate": 0.00012951299081090216, "loss": 1.2309, "step": 27132 }, { "epoch": 0.3525809600693909, "grad_norm": 0.3681308329105377, "learning_rate": 0.00012951039134899076, "loss": 1.5518, "step": 27133 }, { "epoch": 0.3525939546133067, "grad_norm": 0.4737289845943451, "learning_rate": 0.00012950779188707938, "loss": 1.4512, "step": 27134 }, { "epoch": 0.3526069491572226, "grad_norm": 0.4096894860267639, "learning_rate": 0.00012950519242516798, "loss": 1.3149, "step": 27135 }, { "epoch": 0.35261994370113847, "grad_norm": 0.4310626685619354, "learning_rate": 0.00012950259296325663, "loss": 1.4938, "step": 27136 }, { "epoch": 0.3526329382450544, "grad_norm": 0.4049728810787201, "learning_rate": 0.00012949999350134523, "loss": 1.3232, "step": 27137 }, { "epoch": 0.3526459327889702, "grad_norm": 0.49418267607688904, "learning_rate": 0.00012949739403943385, "loss": 1.4927, "step": 27138 }, { "epoch": 0.3526589273328861, "grad_norm": 0.37416353821754456, "learning_rate": 0.00012949479457752245, "loss": 1.2577, "step": 27139 }, { "epoch": 0.35267192187680196, "grad_norm": 0.40460726618766785, "learning_rate": 0.00012949219511561107, "loss": 1.4623, "step": 27140 }, { "epoch": 0.35268491642071786, "grad_norm": 0.4240929186344147, "learning_rate": 0.0001294895956536997, "loss": 1.3855, "step": 27141 }, { "epoch": 0.3526979109646337, "grad_norm": 0.468641996383667, "learning_rate": 0.0001294869961917883, "loss": 1.4968, "step": 27142 }, { "epoch": 0.3527109055085496, "grad_norm": 0.3761010766029358, "learning_rate": 0.00012948439672987692, "loss": 1.3565, "step": 27143 }, { "epoch": 0.35272390005246546, "grad_norm": 0.36480721831321716, "learning_rate": 0.00012948179726796554, "loss": 1.6263, "step": 27144 }, { "epoch": 0.35273689459638136, "grad_norm": 0.4512459337711334, "learning_rate": 0.00012947919780605414, "loss": 1.4567, "step": 27145 }, { "epoch": 0.3527498891402972, "grad_norm": 0.4153778553009033, "learning_rate": 0.00012947659834414277, "loss": 1.4001, "step": 27146 }, { "epoch": 0.3527628836842131, "grad_norm": 0.4784587323665619, "learning_rate": 0.0001294739988822314, "loss": 1.6973, "step": 27147 }, { "epoch": 0.35277587822812895, "grad_norm": 0.46332332491874695, "learning_rate": 0.00012947139942032001, "loss": 1.3256, "step": 27148 }, { "epoch": 0.35278887277204485, "grad_norm": 0.4082441031932831, "learning_rate": 0.0001294687999584086, "loss": 1.4366, "step": 27149 }, { "epoch": 0.3528018673159607, "grad_norm": 0.32346153259277344, "learning_rate": 0.00012946620049649724, "loss": 1.4199, "step": 27150 }, { "epoch": 0.3528148618598766, "grad_norm": 0.43141740560531616, "learning_rate": 0.00012946360103458586, "loss": 1.5345, "step": 27151 }, { "epoch": 0.35282785640379244, "grad_norm": 0.3736341893672943, "learning_rate": 0.00012946100157267446, "loss": 1.2965, "step": 27152 }, { "epoch": 0.35284085094770834, "grad_norm": 0.37873899936676025, "learning_rate": 0.00012945840211076308, "loss": 1.4427, "step": 27153 }, { "epoch": 0.3528538454916242, "grad_norm": 0.2640284299850464, "learning_rate": 0.00012945580264885168, "loss": 1.4798, "step": 27154 }, { "epoch": 0.3528668400355401, "grad_norm": 0.356067419052124, "learning_rate": 0.00012945320318694033, "loss": 1.4675, "step": 27155 }, { "epoch": 0.35287983457945593, "grad_norm": 0.4387631118297577, "learning_rate": 0.00012945060372502893, "loss": 1.3971, "step": 27156 }, { "epoch": 0.35289282912337183, "grad_norm": 0.3957112729549408, "learning_rate": 0.00012944800426311753, "loss": 1.4403, "step": 27157 }, { "epoch": 0.3529058236672877, "grad_norm": 0.4088270962238312, "learning_rate": 0.00012944540480120615, "loss": 1.5837, "step": 27158 }, { "epoch": 0.3529188182112036, "grad_norm": 0.4400434195995331, "learning_rate": 0.00012944280533929478, "loss": 1.5252, "step": 27159 }, { "epoch": 0.3529318127551194, "grad_norm": 0.34892958402633667, "learning_rate": 0.0001294402058773834, "loss": 1.5356, "step": 27160 }, { "epoch": 0.3529448072990353, "grad_norm": 0.4146108031272888, "learning_rate": 0.000129437606415472, "loss": 1.4409, "step": 27161 }, { "epoch": 0.35295780184295117, "grad_norm": 0.33908921480178833, "learning_rate": 0.00012943500695356062, "loss": 1.3888, "step": 27162 }, { "epoch": 0.35297079638686707, "grad_norm": 0.3601709306240082, "learning_rate": 0.00012943240749164925, "loss": 1.1899, "step": 27163 }, { "epoch": 0.3529837909307829, "grad_norm": 0.36996033787727356, "learning_rate": 0.00012942980802973784, "loss": 1.2757, "step": 27164 }, { "epoch": 0.3529967854746988, "grad_norm": 0.3836931884288788, "learning_rate": 0.00012942720856782647, "loss": 1.3537, "step": 27165 }, { "epoch": 0.35300978001861466, "grad_norm": 0.3262748122215271, "learning_rate": 0.00012942460910591507, "loss": 1.3677, "step": 27166 }, { "epoch": 0.35302277456253056, "grad_norm": 0.48026683926582336, "learning_rate": 0.00012942200964400372, "loss": 1.4142, "step": 27167 }, { "epoch": 0.3530357691064464, "grad_norm": 0.34279999136924744, "learning_rate": 0.00012941941018209231, "loss": 1.3388, "step": 27168 }, { "epoch": 0.3530487636503623, "grad_norm": 0.29648464918136597, "learning_rate": 0.00012941681072018094, "loss": 1.4247, "step": 27169 }, { "epoch": 0.35306175819427815, "grad_norm": 0.4288727343082428, "learning_rate": 0.00012941421125826954, "loss": 1.3239, "step": 27170 }, { "epoch": 0.35307475273819405, "grad_norm": 0.43024468421936035, "learning_rate": 0.00012941161179635816, "loss": 1.2727, "step": 27171 }, { "epoch": 0.3530877472821099, "grad_norm": 0.38322314620018005, "learning_rate": 0.00012940901233444679, "loss": 1.4501, "step": 27172 }, { "epoch": 0.3531007418260258, "grad_norm": 0.3667447865009308, "learning_rate": 0.00012940641287253538, "loss": 1.2903, "step": 27173 }, { "epoch": 0.35311373636994164, "grad_norm": 0.17939794063568115, "learning_rate": 0.000129403813410624, "loss": 1.2339, "step": 27174 }, { "epoch": 0.35312673091385754, "grad_norm": 0.4549000561237335, "learning_rate": 0.00012940121394871263, "loss": 1.349, "step": 27175 }, { "epoch": 0.3531397254577734, "grad_norm": 0.5015128254890442, "learning_rate": 0.00012939861448680123, "loss": 1.564, "step": 27176 }, { "epoch": 0.3531527200016893, "grad_norm": 0.41706499457359314, "learning_rate": 0.00012939601502488985, "loss": 1.3077, "step": 27177 }, { "epoch": 0.3531657145456052, "grad_norm": 0.3436248004436493, "learning_rate": 0.00012939341556297845, "loss": 1.1769, "step": 27178 }, { "epoch": 0.35317870908952104, "grad_norm": 0.3895522952079773, "learning_rate": 0.0001293908161010671, "loss": 1.5675, "step": 27179 }, { "epoch": 0.35319170363343694, "grad_norm": 0.3534156084060669, "learning_rate": 0.0001293882166391557, "loss": 1.5711, "step": 27180 }, { "epoch": 0.3532046981773528, "grad_norm": 0.3871992826461792, "learning_rate": 0.00012938561717724432, "loss": 1.3985, "step": 27181 }, { "epoch": 0.3532176927212687, "grad_norm": 0.475013792514801, "learning_rate": 0.00012938301771533292, "loss": 1.6038, "step": 27182 }, { "epoch": 0.35323068726518453, "grad_norm": 0.4193267822265625, "learning_rate": 0.00012938041825342155, "loss": 1.337, "step": 27183 }, { "epoch": 0.35324368180910043, "grad_norm": 0.34763145446777344, "learning_rate": 0.00012937781879151017, "loss": 1.3848, "step": 27184 }, { "epoch": 0.3532566763530163, "grad_norm": 0.3938160538673401, "learning_rate": 0.00012937521932959877, "loss": 1.4728, "step": 27185 }, { "epoch": 0.3532696708969322, "grad_norm": 0.34862634539604187, "learning_rate": 0.0001293726198676874, "loss": 1.4367, "step": 27186 }, { "epoch": 0.353282665440848, "grad_norm": 0.4235605299472809, "learning_rate": 0.00012937002040577602, "loss": 1.3693, "step": 27187 }, { "epoch": 0.3532956599847639, "grad_norm": 0.4253822863101959, "learning_rate": 0.00012936742094386461, "loss": 1.386, "step": 27188 }, { "epoch": 0.35330865452867977, "grad_norm": 0.4068506956100464, "learning_rate": 0.00012936482148195324, "loss": 1.287, "step": 27189 }, { "epoch": 0.35332164907259567, "grad_norm": 0.38031384348869324, "learning_rate": 0.00012936222202004186, "loss": 1.2728, "step": 27190 }, { "epoch": 0.3533346436165115, "grad_norm": 0.48991456627845764, "learning_rate": 0.0001293596225581305, "loss": 1.3372, "step": 27191 }, { "epoch": 0.3533476381604274, "grad_norm": 0.45399340987205505, "learning_rate": 0.00012935702309621909, "loss": 1.3801, "step": 27192 }, { "epoch": 0.35336063270434326, "grad_norm": 0.3930378258228302, "learning_rate": 0.0001293544236343077, "loss": 1.4351, "step": 27193 }, { "epoch": 0.35337362724825916, "grad_norm": 0.3967180848121643, "learning_rate": 0.00012935182417239633, "loss": 1.4975, "step": 27194 }, { "epoch": 0.353386621792175, "grad_norm": 0.5270243287086487, "learning_rate": 0.00012934922471048493, "loss": 1.4861, "step": 27195 }, { "epoch": 0.3533996163360909, "grad_norm": 0.3890226185321808, "learning_rate": 0.00012934662524857356, "loss": 1.1952, "step": 27196 }, { "epoch": 0.35341261088000675, "grad_norm": 0.4488624632358551, "learning_rate": 0.00012934402578666215, "loss": 1.1772, "step": 27197 }, { "epoch": 0.35342560542392265, "grad_norm": 0.4957972764968872, "learning_rate": 0.0001293414263247508, "loss": 1.4716, "step": 27198 }, { "epoch": 0.3534385999678385, "grad_norm": 0.3510872423648834, "learning_rate": 0.0001293388268628394, "loss": 1.2411, "step": 27199 }, { "epoch": 0.3534515945117544, "grad_norm": 0.42558473348617554, "learning_rate": 0.000129336227400928, "loss": 1.4113, "step": 27200 }, { "epoch": 0.35346458905567024, "grad_norm": 0.3287118673324585, "learning_rate": 0.00012933362793901662, "loss": 1.2199, "step": 27201 }, { "epoch": 0.35347758359958614, "grad_norm": 0.4286823570728302, "learning_rate": 0.00012933102847710525, "loss": 1.614, "step": 27202 }, { "epoch": 0.353490578143502, "grad_norm": 0.48619240522384644, "learning_rate": 0.00012932842901519387, "loss": 1.445, "step": 27203 }, { "epoch": 0.3535035726874179, "grad_norm": 0.3716752827167511, "learning_rate": 0.00012932582955328247, "loss": 1.4941, "step": 27204 }, { "epoch": 0.35351656723133373, "grad_norm": 0.35998740792274475, "learning_rate": 0.0001293232300913711, "loss": 1.3316, "step": 27205 }, { "epoch": 0.35352956177524963, "grad_norm": 0.34886422753334045, "learning_rate": 0.00012932063062945972, "loss": 1.3542, "step": 27206 }, { "epoch": 0.3535425563191655, "grad_norm": 0.42458242177963257, "learning_rate": 0.00012931803116754832, "loss": 1.4558, "step": 27207 }, { "epoch": 0.3535555508630814, "grad_norm": 0.3253622055053711, "learning_rate": 0.00012931543170563694, "loss": 1.1567, "step": 27208 }, { "epoch": 0.3535685454069972, "grad_norm": 0.4473830759525299, "learning_rate": 0.00012931283224372554, "loss": 1.3769, "step": 27209 }, { "epoch": 0.3535815399509131, "grad_norm": 0.37994784116744995, "learning_rate": 0.0001293102327818142, "loss": 1.3377, "step": 27210 }, { "epoch": 0.35359453449482897, "grad_norm": 0.29859626293182373, "learning_rate": 0.0001293076333199028, "loss": 1.2177, "step": 27211 }, { "epoch": 0.35360752903874487, "grad_norm": 0.49773484468460083, "learning_rate": 0.00012930503385799138, "loss": 1.5254, "step": 27212 }, { "epoch": 0.3536205235826607, "grad_norm": 0.4549420475959778, "learning_rate": 0.00012930243439608, "loss": 1.4952, "step": 27213 }, { "epoch": 0.3536335181265766, "grad_norm": 0.3952312469482422, "learning_rate": 0.00012929983493416863, "loss": 1.3511, "step": 27214 }, { "epoch": 0.35364651267049246, "grad_norm": 0.47965988516807556, "learning_rate": 0.00012929723547225726, "loss": 1.4842, "step": 27215 }, { "epoch": 0.35365950721440836, "grad_norm": 0.37479913234710693, "learning_rate": 0.00012929463601034586, "loss": 1.3256, "step": 27216 }, { "epoch": 0.3536725017583242, "grad_norm": 0.3680041432380676, "learning_rate": 0.00012929203654843448, "loss": 1.3475, "step": 27217 }, { "epoch": 0.3536854963022401, "grad_norm": 0.38537779450416565, "learning_rate": 0.0001292894370865231, "loss": 1.4512, "step": 27218 }, { "epoch": 0.35369849084615596, "grad_norm": 0.3340739905834198, "learning_rate": 0.0001292868376246117, "loss": 1.4271, "step": 27219 }, { "epoch": 0.35371148539007186, "grad_norm": 0.3916126787662506, "learning_rate": 0.00012928423816270033, "loss": 1.3101, "step": 27220 }, { "epoch": 0.3537244799339877, "grad_norm": 0.41248396039009094, "learning_rate": 0.00012928163870078892, "loss": 1.3531, "step": 27221 }, { "epoch": 0.3537374744779036, "grad_norm": 0.4454796016216278, "learning_rate": 0.00012927903923887758, "loss": 1.3815, "step": 27222 }, { "epoch": 0.35375046902181945, "grad_norm": 1.0947011709213257, "learning_rate": 0.00012927643977696617, "loss": 1.4745, "step": 27223 }, { "epoch": 0.35376346356573535, "grad_norm": 0.4864475131034851, "learning_rate": 0.00012927384031505477, "loss": 1.3942, "step": 27224 }, { "epoch": 0.3537764581096512, "grad_norm": 0.39578163623809814, "learning_rate": 0.00012927124085314342, "loss": 1.3082, "step": 27225 }, { "epoch": 0.3537894526535671, "grad_norm": 0.38561388850212097, "learning_rate": 0.00012926864139123202, "loss": 1.3415, "step": 27226 }, { "epoch": 0.35380244719748294, "grad_norm": 0.5119971632957458, "learning_rate": 0.00012926604192932064, "loss": 1.4067, "step": 27227 }, { "epoch": 0.35381544174139884, "grad_norm": 0.4380015432834625, "learning_rate": 0.00012926344246740924, "loss": 1.3924, "step": 27228 }, { "epoch": 0.3538284362853147, "grad_norm": 0.38569170236587524, "learning_rate": 0.00012926084300549787, "loss": 1.4107, "step": 27229 }, { "epoch": 0.3538414308292306, "grad_norm": 0.37366804480552673, "learning_rate": 0.0001292582435435865, "loss": 1.3113, "step": 27230 }, { "epoch": 0.35385442537314643, "grad_norm": 0.3786996603012085, "learning_rate": 0.0001292556440816751, "loss": 1.3082, "step": 27231 }, { "epoch": 0.35386741991706233, "grad_norm": 0.4101344645023346, "learning_rate": 0.0001292530446197637, "loss": 1.2962, "step": 27232 }, { "epoch": 0.3538804144609782, "grad_norm": 0.3904150724411011, "learning_rate": 0.00012925044515785234, "loss": 1.3084, "step": 27233 }, { "epoch": 0.3538934090048941, "grad_norm": 0.49593988060951233, "learning_rate": 0.00012924784569594096, "loss": 1.588, "step": 27234 }, { "epoch": 0.3539064035488099, "grad_norm": 0.47312068939208984, "learning_rate": 0.00012924524623402956, "loss": 1.5722, "step": 27235 }, { "epoch": 0.3539193980927258, "grad_norm": 0.43164587020874023, "learning_rate": 0.00012924264677211818, "loss": 1.4255, "step": 27236 }, { "epoch": 0.35393239263664167, "grad_norm": 0.43883249163627625, "learning_rate": 0.0001292400473102068, "loss": 1.4523, "step": 27237 }, { "epoch": 0.35394538718055757, "grad_norm": 0.4415944218635559, "learning_rate": 0.0001292374478482954, "loss": 1.3986, "step": 27238 }, { "epoch": 0.3539583817244734, "grad_norm": 0.4156629145145416, "learning_rate": 0.00012923484838638403, "loss": 1.3301, "step": 27239 }, { "epoch": 0.3539713762683893, "grad_norm": 0.37376484274864197, "learning_rate": 0.00012923224892447263, "loss": 1.405, "step": 27240 }, { "epoch": 0.35398437081230516, "grad_norm": 0.2952210307121277, "learning_rate": 0.00012922964946256125, "loss": 1.3493, "step": 27241 }, { "epoch": 0.35399736535622106, "grad_norm": 0.36737576127052307, "learning_rate": 0.00012922705000064988, "loss": 1.4874, "step": 27242 }, { "epoch": 0.3540103599001369, "grad_norm": 0.4822121262550354, "learning_rate": 0.00012922445053873847, "loss": 1.3707, "step": 27243 }, { "epoch": 0.3540233544440528, "grad_norm": 0.4565078318119049, "learning_rate": 0.0001292218510768271, "loss": 1.4621, "step": 27244 }, { "epoch": 0.35403634898796865, "grad_norm": 0.429116815328598, "learning_rate": 0.00012921925161491572, "loss": 1.4263, "step": 27245 }, { "epoch": 0.35404934353188455, "grad_norm": 0.38773947954177856, "learning_rate": 0.00012921665215300435, "loss": 1.4314, "step": 27246 }, { "epoch": 0.3540623380758004, "grad_norm": 0.37396326661109924, "learning_rate": 0.00012921405269109294, "loss": 1.3845, "step": 27247 }, { "epoch": 0.3540753326197163, "grad_norm": 0.42212367057800293, "learning_rate": 0.00012921145322918157, "loss": 1.3265, "step": 27248 }, { "epoch": 0.35408832716363214, "grad_norm": 0.4560120403766632, "learning_rate": 0.0001292088537672702, "loss": 1.4377, "step": 27249 }, { "epoch": 0.35410132170754804, "grad_norm": 0.40525567531585693, "learning_rate": 0.0001292062543053588, "loss": 1.3804, "step": 27250 }, { "epoch": 0.3541143162514639, "grad_norm": 0.43293580412864685, "learning_rate": 0.00012920365484344741, "loss": 1.5648, "step": 27251 }, { "epoch": 0.3541273107953798, "grad_norm": 0.44749411940574646, "learning_rate": 0.000129201055381536, "loss": 1.4421, "step": 27252 }, { "epoch": 0.35414030533929564, "grad_norm": 0.3926527202129364, "learning_rate": 0.00012919845591962466, "loss": 1.5333, "step": 27253 }, { "epoch": 0.35415329988321154, "grad_norm": 0.4460231363773346, "learning_rate": 0.00012919585645771326, "loss": 1.2362, "step": 27254 }, { "epoch": 0.35416629442712744, "grad_norm": 0.4694913625717163, "learning_rate": 0.00012919325699580186, "loss": 1.5574, "step": 27255 }, { "epoch": 0.3541792889710433, "grad_norm": 0.40756139159202576, "learning_rate": 0.00012919065753389048, "loss": 1.3267, "step": 27256 }, { "epoch": 0.3541922835149592, "grad_norm": 0.4112973213195801, "learning_rate": 0.0001291880580719791, "loss": 1.2599, "step": 27257 }, { "epoch": 0.35420527805887503, "grad_norm": 0.3808411657810211, "learning_rate": 0.00012918545861006773, "loss": 1.2661, "step": 27258 }, { "epoch": 0.35421827260279093, "grad_norm": 0.4318644106388092, "learning_rate": 0.00012918285914815633, "loss": 1.5312, "step": 27259 }, { "epoch": 0.3542312671467068, "grad_norm": 0.4153105318546295, "learning_rate": 0.00012918025968624495, "loss": 1.2959, "step": 27260 }, { "epoch": 0.3542442616906227, "grad_norm": 0.479297935962677, "learning_rate": 0.00012917766022433358, "loss": 1.3265, "step": 27261 }, { "epoch": 0.3542572562345385, "grad_norm": 0.40943631529808044, "learning_rate": 0.00012917506076242218, "loss": 1.311, "step": 27262 }, { "epoch": 0.3542702507784544, "grad_norm": 0.4646322429180145, "learning_rate": 0.0001291724613005108, "loss": 1.4378, "step": 27263 }, { "epoch": 0.35428324532237027, "grad_norm": 0.44986772537231445, "learning_rate": 0.00012916986183859942, "loss": 1.4146, "step": 27264 }, { "epoch": 0.35429623986628617, "grad_norm": 0.3947587311267853, "learning_rate": 0.00012916726237668805, "loss": 1.6124, "step": 27265 }, { "epoch": 0.354309234410202, "grad_norm": 0.3918352723121643, "learning_rate": 0.00012916466291477665, "loss": 1.5189, "step": 27266 }, { "epoch": 0.3543222289541179, "grad_norm": 0.3979182243347168, "learning_rate": 0.00012916206345286524, "loss": 1.2607, "step": 27267 }, { "epoch": 0.35433522349803376, "grad_norm": 0.3903525769710541, "learning_rate": 0.0001291594639909539, "loss": 1.4899, "step": 27268 }, { "epoch": 0.35434821804194966, "grad_norm": 0.5187159776687622, "learning_rate": 0.0001291568645290425, "loss": 1.3973, "step": 27269 }, { "epoch": 0.3543612125858655, "grad_norm": 0.37096890807151794, "learning_rate": 0.00012915426506713112, "loss": 1.2167, "step": 27270 }, { "epoch": 0.3543742071297814, "grad_norm": 0.40710756182670593, "learning_rate": 0.00012915166560521971, "loss": 1.2715, "step": 27271 }, { "epoch": 0.35438720167369725, "grad_norm": 0.42568182945251465, "learning_rate": 0.00012914906614330834, "loss": 1.3001, "step": 27272 }, { "epoch": 0.35440019621761315, "grad_norm": 0.36408406496047974, "learning_rate": 0.00012914646668139696, "loss": 1.5063, "step": 27273 }, { "epoch": 0.354413190761529, "grad_norm": 0.4338377118110657, "learning_rate": 0.00012914386721948556, "loss": 1.2803, "step": 27274 }, { "epoch": 0.3544261853054449, "grad_norm": 0.3782581090927124, "learning_rate": 0.00012914126775757419, "loss": 1.3557, "step": 27275 }, { "epoch": 0.35443917984936074, "grad_norm": 0.44673991203308105, "learning_rate": 0.0001291386682956628, "loss": 1.2482, "step": 27276 }, { "epoch": 0.35445217439327664, "grad_norm": 0.4182107150554657, "learning_rate": 0.00012913606883375143, "loss": 1.3941, "step": 27277 }, { "epoch": 0.3544651689371925, "grad_norm": 0.4898601174354553, "learning_rate": 0.00012913346937184003, "loss": 1.4784, "step": 27278 }, { "epoch": 0.3544781634811084, "grad_norm": 0.47608789801597595, "learning_rate": 0.00012913086990992863, "loss": 1.5314, "step": 27279 }, { "epoch": 0.35449115802502423, "grad_norm": 0.3632807433605194, "learning_rate": 0.00012912827044801728, "loss": 1.4266, "step": 27280 }, { "epoch": 0.35450415256894013, "grad_norm": 0.34378114342689514, "learning_rate": 0.00012912567098610588, "loss": 1.1938, "step": 27281 }, { "epoch": 0.354517147112856, "grad_norm": 0.38386866450309753, "learning_rate": 0.0001291230715241945, "loss": 1.5958, "step": 27282 }, { "epoch": 0.3545301416567719, "grad_norm": 0.4135749042034149, "learning_rate": 0.0001291204720622831, "loss": 1.2388, "step": 27283 }, { "epoch": 0.3545431362006877, "grad_norm": 0.5608319044113159, "learning_rate": 0.00012911787260037172, "loss": 1.5145, "step": 27284 }, { "epoch": 0.3545561307446036, "grad_norm": 0.35127127170562744, "learning_rate": 0.00012911527313846035, "loss": 1.3196, "step": 27285 }, { "epoch": 0.35456912528851947, "grad_norm": 0.5179656744003296, "learning_rate": 0.00012911267367654895, "loss": 1.4301, "step": 27286 }, { "epoch": 0.35458211983243537, "grad_norm": 0.40099936723709106, "learning_rate": 0.00012911007421463757, "loss": 1.3277, "step": 27287 }, { "epoch": 0.3545951143763512, "grad_norm": 0.3803557753562927, "learning_rate": 0.0001291074747527262, "loss": 1.5526, "step": 27288 }, { "epoch": 0.3546081089202671, "grad_norm": 0.45398080348968506, "learning_rate": 0.00012910487529081482, "loss": 1.4372, "step": 27289 }, { "epoch": 0.35462110346418296, "grad_norm": 0.405096173286438, "learning_rate": 0.00012910227582890342, "loss": 1.527, "step": 27290 }, { "epoch": 0.35463409800809886, "grad_norm": 0.4905530512332916, "learning_rate": 0.00012909967636699204, "loss": 1.4064, "step": 27291 }, { "epoch": 0.3546470925520147, "grad_norm": 0.3833281397819519, "learning_rate": 0.00012909707690508067, "loss": 1.3354, "step": 27292 }, { "epoch": 0.3546600870959306, "grad_norm": 0.3851037919521332, "learning_rate": 0.00012909447744316926, "loss": 1.3762, "step": 27293 }, { "epoch": 0.35467308163984645, "grad_norm": 0.45993658900260925, "learning_rate": 0.0001290918779812579, "loss": 1.5244, "step": 27294 }, { "epoch": 0.35468607618376236, "grad_norm": 0.5152266621589661, "learning_rate": 0.00012908927851934649, "loss": 1.4363, "step": 27295 }, { "epoch": 0.3546990707276782, "grad_norm": 0.542110025882721, "learning_rate": 0.0001290866790574351, "loss": 1.2823, "step": 27296 }, { "epoch": 0.3547120652715941, "grad_norm": 0.35849252343177795, "learning_rate": 0.00012908407959552373, "loss": 1.3021, "step": 27297 }, { "epoch": 0.35472505981550995, "grad_norm": 0.4606815278530121, "learning_rate": 0.00012908148013361233, "loss": 1.3716, "step": 27298 }, { "epoch": 0.35473805435942585, "grad_norm": 0.45573890209198, "learning_rate": 0.00012907888067170098, "loss": 1.5586, "step": 27299 }, { "epoch": 0.3547510489033417, "grad_norm": 0.5061871409416199, "learning_rate": 0.00012907628120978958, "loss": 1.5335, "step": 27300 }, { "epoch": 0.3547640434472576, "grad_norm": 0.45350974798202515, "learning_rate": 0.0001290736817478782, "loss": 1.483, "step": 27301 }, { "epoch": 0.35477703799117344, "grad_norm": 0.3578108847141266, "learning_rate": 0.0001290710822859668, "loss": 1.349, "step": 27302 }, { "epoch": 0.35479003253508934, "grad_norm": 0.5508697032928467, "learning_rate": 0.00012906848282405543, "loss": 1.4382, "step": 27303 }, { "epoch": 0.3548030270790052, "grad_norm": 0.382597416639328, "learning_rate": 0.00012906588336214405, "loss": 1.4373, "step": 27304 }, { "epoch": 0.3548160216229211, "grad_norm": 0.39887455105781555, "learning_rate": 0.00012906328390023265, "loss": 1.2122, "step": 27305 }, { "epoch": 0.35482901616683693, "grad_norm": 0.24349313974380493, "learning_rate": 0.00012906068443832127, "loss": 1.108, "step": 27306 }, { "epoch": 0.35484201071075283, "grad_norm": 0.2975713312625885, "learning_rate": 0.0001290580849764099, "loss": 1.4866, "step": 27307 }, { "epoch": 0.3548550052546687, "grad_norm": 0.388852596282959, "learning_rate": 0.0001290554855144985, "loss": 1.3193, "step": 27308 }, { "epoch": 0.3548679997985846, "grad_norm": 0.2948521375656128, "learning_rate": 0.00012905288605258712, "loss": 1.1614, "step": 27309 }, { "epoch": 0.3548809943425004, "grad_norm": 0.27335628867149353, "learning_rate": 0.00012905028659067572, "loss": 1.2761, "step": 27310 }, { "epoch": 0.3548939888864163, "grad_norm": 0.40031322836875916, "learning_rate": 0.00012904768712876437, "loss": 1.5413, "step": 27311 }, { "epoch": 0.35490698343033217, "grad_norm": 0.4333665668964386, "learning_rate": 0.00012904508766685297, "loss": 1.2508, "step": 27312 }, { "epoch": 0.35491997797424807, "grad_norm": 0.368606835603714, "learning_rate": 0.0001290424882049416, "loss": 1.3583, "step": 27313 }, { "epoch": 0.3549329725181639, "grad_norm": 0.39081260561943054, "learning_rate": 0.0001290398887430302, "loss": 1.3101, "step": 27314 }, { "epoch": 0.3549459670620798, "grad_norm": 0.509438693523407, "learning_rate": 0.0001290372892811188, "loss": 1.3052, "step": 27315 }, { "epoch": 0.35495896160599566, "grad_norm": 0.31404542922973633, "learning_rate": 0.00012903468981920744, "loss": 1.441, "step": 27316 }, { "epoch": 0.35497195614991156, "grad_norm": 0.3912501037120819, "learning_rate": 0.00012903209035729603, "loss": 1.3667, "step": 27317 }, { "epoch": 0.3549849506938274, "grad_norm": 0.40713170170783997, "learning_rate": 0.00012902949089538466, "loss": 1.4105, "step": 27318 }, { "epoch": 0.3549979452377433, "grad_norm": 0.37688878178596497, "learning_rate": 0.00012902689143347328, "loss": 1.2671, "step": 27319 }, { "epoch": 0.35501093978165915, "grad_norm": 0.4456031620502472, "learning_rate": 0.0001290242919715619, "loss": 1.4529, "step": 27320 }, { "epoch": 0.35502393432557505, "grad_norm": 0.4662650525569916, "learning_rate": 0.0001290216925096505, "loss": 1.3751, "step": 27321 }, { "epoch": 0.3550369288694909, "grad_norm": 0.39828863739967346, "learning_rate": 0.0001290190930477391, "loss": 1.4193, "step": 27322 }, { "epoch": 0.3550499234134068, "grad_norm": 0.4406076967716217, "learning_rate": 0.00012901649358582775, "loss": 1.4869, "step": 27323 }, { "epoch": 0.35506291795732264, "grad_norm": 0.3906269669532776, "learning_rate": 0.00012901389412391635, "loss": 1.4086, "step": 27324 }, { "epoch": 0.35507591250123854, "grad_norm": 0.4025513231754303, "learning_rate": 0.00012901129466200498, "loss": 1.2439, "step": 27325 }, { "epoch": 0.3550889070451544, "grad_norm": 0.35108527541160583, "learning_rate": 0.00012900869520009357, "loss": 1.3176, "step": 27326 }, { "epoch": 0.3551019015890703, "grad_norm": 0.32911616563796997, "learning_rate": 0.0001290060957381822, "loss": 1.3763, "step": 27327 }, { "epoch": 0.35511489613298614, "grad_norm": 0.47493571043014526, "learning_rate": 0.00012900349627627082, "loss": 1.4917, "step": 27328 }, { "epoch": 0.35512789067690204, "grad_norm": 0.2588498294353485, "learning_rate": 0.00012900089681435942, "loss": 1.4285, "step": 27329 }, { "epoch": 0.35514088522081794, "grad_norm": 0.38209205865859985, "learning_rate": 0.00012899829735244804, "loss": 1.4594, "step": 27330 }, { "epoch": 0.3551538797647338, "grad_norm": 0.36052706837654114, "learning_rate": 0.00012899569789053667, "loss": 1.3562, "step": 27331 }, { "epoch": 0.3551668743086497, "grad_norm": 0.44969382882118225, "learning_rate": 0.0001289930984286253, "loss": 1.666, "step": 27332 }, { "epoch": 0.35517986885256553, "grad_norm": 0.3375230133533478, "learning_rate": 0.0001289904989667139, "loss": 1.2547, "step": 27333 }, { "epoch": 0.35519286339648143, "grad_norm": 0.44575703144073486, "learning_rate": 0.00012898789950480251, "loss": 1.5851, "step": 27334 }, { "epoch": 0.3552058579403973, "grad_norm": 0.31467291712760925, "learning_rate": 0.00012898530004289114, "loss": 1.41, "step": 27335 }, { "epoch": 0.3552188524843132, "grad_norm": 0.305544376373291, "learning_rate": 0.00012898270058097974, "loss": 1.2327, "step": 27336 }, { "epoch": 0.355231847028229, "grad_norm": 0.3942440152168274, "learning_rate": 0.00012898010111906836, "loss": 1.3341, "step": 27337 }, { "epoch": 0.3552448415721449, "grad_norm": 0.3977477252483368, "learning_rate": 0.00012897750165715699, "loss": 1.2542, "step": 27338 }, { "epoch": 0.35525783611606077, "grad_norm": 0.38865166902542114, "learning_rate": 0.00012897490219524558, "loss": 1.4307, "step": 27339 }, { "epoch": 0.35527083065997667, "grad_norm": 0.33131730556488037, "learning_rate": 0.0001289723027333342, "loss": 1.343, "step": 27340 }, { "epoch": 0.3552838252038925, "grad_norm": 0.3502008616924286, "learning_rate": 0.0001289697032714228, "loss": 1.2184, "step": 27341 }, { "epoch": 0.3552968197478084, "grad_norm": 0.43779516220092773, "learning_rate": 0.00012896710380951146, "loss": 1.4779, "step": 27342 }, { "epoch": 0.35530981429172426, "grad_norm": 0.4649774134159088, "learning_rate": 0.00012896450434760005, "loss": 1.5927, "step": 27343 }, { "epoch": 0.35532280883564016, "grad_norm": 0.40542006492614746, "learning_rate": 0.00012896190488568868, "loss": 1.5022, "step": 27344 }, { "epoch": 0.355335803379556, "grad_norm": 0.43315234780311584, "learning_rate": 0.00012895930542377728, "loss": 1.4163, "step": 27345 }, { "epoch": 0.3553487979234719, "grad_norm": 0.3740081489086151, "learning_rate": 0.0001289567059618659, "loss": 1.3333, "step": 27346 }, { "epoch": 0.35536179246738775, "grad_norm": 0.364037424325943, "learning_rate": 0.00012895410649995452, "loss": 1.5248, "step": 27347 }, { "epoch": 0.35537478701130365, "grad_norm": 0.3614572584629059, "learning_rate": 0.00012895150703804312, "loss": 1.2211, "step": 27348 }, { "epoch": 0.3553877815552195, "grad_norm": 0.34527918696403503, "learning_rate": 0.00012894890757613175, "loss": 1.4175, "step": 27349 }, { "epoch": 0.3554007760991354, "grad_norm": 0.4455159306526184, "learning_rate": 0.00012894630811422037, "loss": 1.2227, "step": 27350 }, { "epoch": 0.35541377064305124, "grad_norm": 0.3693142235279083, "learning_rate": 0.00012894370865230897, "loss": 1.4173, "step": 27351 }, { "epoch": 0.35542676518696714, "grad_norm": 0.3905201256275177, "learning_rate": 0.0001289411091903976, "loss": 1.4463, "step": 27352 }, { "epoch": 0.355439759730883, "grad_norm": 0.2974683344364166, "learning_rate": 0.0001289385097284862, "loss": 1.2861, "step": 27353 }, { "epoch": 0.3554527542747989, "grad_norm": 0.4927957057952881, "learning_rate": 0.00012893591026657484, "loss": 1.3052, "step": 27354 }, { "epoch": 0.35546574881871473, "grad_norm": 0.3714558482170105, "learning_rate": 0.00012893331080466344, "loss": 1.5349, "step": 27355 }, { "epoch": 0.35547874336263063, "grad_norm": 0.39975398778915405, "learning_rate": 0.00012893071134275206, "loss": 1.5843, "step": 27356 }, { "epoch": 0.3554917379065465, "grad_norm": 0.37283632159233093, "learning_rate": 0.00012892811188084066, "loss": 1.4262, "step": 27357 }, { "epoch": 0.3555047324504624, "grad_norm": 0.3456113636493683, "learning_rate": 0.00012892551241892929, "loss": 1.385, "step": 27358 }, { "epoch": 0.3555177269943782, "grad_norm": 0.343330442905426, "learning_rate": 0.0001289229129570179, "loss": 1.3461, "step": 27359 }, { "epoch": 0.3555307215382941, "grad_norm": 0.3786865472793579, "learning_rate": 0.0001289203134951065, "loss": 1.4356, "step": 27360 }, { "epoch": 0.35554371608220997, "grad_norm": 0.38240817189216614, "learning_rate": 0.00012891771403319513, "loss": 1.419, "step": 27361 }, { "epoch": 0.35555671062612587, "grad_norm": 0.39295902848243713, "learning_rate": 0.00012891511457128376, "loss": 1.2585, "step": 27362 }, { "epoch": 0.3555697051700417, "grad_norm": 0.40160566568374634, "learning_rate": 0.00012891251510937235, "loss": 1.4868, "step": 27363 }, { "epoch": 0.3555826997139576, "grad_norm": 0.4206991493701935, "learning_rate": 0.00012890991564746098, "loss": 1.3647, "step": 27364 }, { "epoch": 0.35559569425787346, "grad_norm": 0.4079906642436981, "learning_rate": 0.00012890731618554958, "loss": 1.5184, "step": 27365 }, { "epoch": 0.35560868880178936, "grad_norm": 0.47296544909477234, "learning_rate": 0.00012890471672363823, "loss": 1.4384, "step": 27366 }, { "epoch": 0.3556216833457052, "grad_norm": 0.37764808535575867, "learning_rate": 0.00012890211726172682, "loss": 1.3668, "step": 27367 }, { "epoch": 0.3556346778896211, "grad_norm": 0.36749958992004395, "learning_rate": 0.00012889951779981545, "loss": 1.6478, "step": 27368 }, { "epoch": 0.35564767243353695, "grad_norm": 0.4778698682785034, "learning_rate": 0.00012889691833790405, "loss": 1.5483, "step": 27369 }, { "epoch": 0.35566066697745286, "grad_norm": 0.3563973903656006, "learning_rate": 0.00012889431887599267, "loss": 1.6435, "step": 27370 }, { "epoch": 0.3556736615213687, "grad_norm": 0.448848694562912, "learning_rate": 0.0001288917194140813, "loss": 1.3224, "step": 27371 }, { "epoch": 0.3556866560652846, "grad_norm": 0.34736618399620056, "learning_rate": 0.0001288891199521699, "loss": 1.3078, "step": 27372 }, { "epoch": 0.35569965060920045, "grad_norm": 0.3348294794559479, "learning_rate": 0.00012888652049025854, "loss": 1.242, "step": 27373 }, { "epoch": 0.35571264515311635, "grad_norm": 0.573210597038269, "learning_rate": 0.00012888392102834714, "loss": 1.4365, "step": 27374 }, { "epoch": 0.3557256396970322, "grad_norm": 0.37353646755218506, "learning_rate": 0.00012888132156643577, "loss": 1.2811, "step": 27375 }, { "epoch": 0.3557386342409481, "grad_norm": 0.26454856991767883, "learning_rate": 0.00012887872210452436, "loss": 1.2599, "step": 27376 }, { "epoch": 0.35575162878486394, "grad_norm": 0.3324933350086212, "learning_rate": 0.000128876122642613, "loss": 1.1641, "step": 27377 }, { "epoch": 0.35576462332877984, "grad_norm": 0.4507709741592407, "learning_rate": 0.0001288735231807016, "loss": 1.5577, "step": 27378 }, { "epoch": 0.3557776178726957, "grad_norm": 0.37117379903793335, "learning_rate": 0.0001288709237187902, "loss": 1.4714, "step": 27379 }, { "epoch": 0.3557906124166116, "grad_norm": 0.2662966251373291, "learning_rate": 0.00012886832425687883, "loss": 1.1298, "step": 27380 }, { "epoch": 0.35580360696052743, "grad_norm": 0.43862196803092957, "learning_rate": 0.00012886572479496746, "loss": 1.4317, "step": 27381 }, { "epoch": 0.35581660150444333, "grad_norm": 0.35771024227142334, "learning_rate": 0.00012886312533305606, "loss": 1.3211, "step": 27382 }, { "epoch": 0.3558295960483592, "grad_norm": 0.4146856665611267, "learning_rate": 0.00012886052587114468, "loss": 1.4294, "step": 27383 }, { "epoch": 0.3558425905922751, "grad_norm": 0.35615599155426025, "learning_rate": 0.00012885792640923328, "loss": 1.4626, "step": 27384 }, { "epoch": 0.3558555851361909, "grad_norm": 0.4137577712535858, "learning_rate": 0.00012885532694732193, "loss": 1.5483, "step": 27385 }, { "epoch": 0.3558685796801068, "grad_norm": 0.3834209740161896, "learning_rate": 0.00012885272748541053, "loss": 1.5189, "step": 27386 }, { "epoch": 0.35588157422402267, "grad_norm": 0.38186535239219666, "learning_rate": 0.00012885012802349915, "loss": 1.4562, "step": 27387 }, { "epoch": 0.35589456876793857, "grad_norm": 0.30644282698631287, "learning_rate": 0.00012884752856158775, "loss": 1.3494, "step": 27388 }, { "epoch": 0.3559075633118544, "grad_norm": 0.3626924753189087, "learning_rate": 0.00012884492909967637, "loss": 1.269, "step": 27389 }, { "epoch": 0.3559205578557703, "grad_norm": 0.3227306604385376, "learning_rate": 0.000128842329637765, "loss": 1.3591, "step": 27390 }, { "epoch": 0.35593355239968616, "grad_norm": 0.44583258032798767, "learning_rate": 0.0001288397301758536, "loss": 1.4902, "step": 27391 }, { "epoch": 0.35594654694360206, "grad_norm": 0.5193873047828674, "learning_rate": 0.00012883713071394222, "loss": 1.3556, "step": 27392 }, { "epoch": 0.3559595414875179, "grad_norm": 0.39028802514076233, "learning_rate": 0.00012883453125203084, "loss": 1.3033, "step": 27393 }, { "epoch": 0.3559725360314338, "grad_norm": 0.2958873212337494, "learning_rate": 0.00012883193179011944, "loss": 1.4745, "step": 27394 }, { "epoch": 0.35598553057534965, "grad_norm": 0.4339183270931244, "learning_rate": 0.00012882933232820807, "loss": 1.6533, "step": 27395 }, { "epoch": 0.35599852511926555, "grad_norm": 0.34015390276908875, "learning_rate": 0.00012882673286629666, "loss": 1.3683, "step": 27396 }, { "epoch": 0.3560115196631814, "grad_norm": 0.3121531903743744, "learning_rate": 0.00012882413340438532, "loss": 1.3909, "step": 27397 }, { "epoch": 0.3560245142070973, "grad_norm": 0.3852488398551941, "learning_rate": 0.0001288215339424739, "loss": 1.5461, "step": 27398 }, { "epoch": 0.35603750875101314, "grad_norm": 0.4048745036125183, "learning_rate": 0.00012881893448056254, "loss": 1.4399, "step": 27399 }, { "epoch": 0.35605050329492904, "grad_norm": 0.2905421257019043, "learning_rate": 0.00012881633501865113, "loss": 1.3293, "step": 27400 }, { "epoch": 0.3560634978388449, "grad_norm": 0.46253320574760437, "learning_rate": 0.00012881373555673976, "loss": 1.4594, "step": 27401 }, { "epoch": 0.3560764923827608, "grad_norm": 0.40848439931869507, "learning_rate": 0.00012881113609482838, "loss": 1.4353, "step": 27402 }, { "epoch": 0.35608948692667663, "grad_norm": 0.44682836532592773, "learning_rate": 0.00012880853663291698, "loss": 1.4041, "step": 27403 }, { "epoch": 0.35610248147059254, "grad_norm": 0.42151501774787903, "learning_rate": 0.0001288059371710056, "loss": 1.4256, "step": 27404 }, { "epoch": 0.3561154760145084, "grad_norm": 0.3288726210594177, "learning_rate": 0.00012880333770909423, "loss": 1.2968, "step": 27405 }, { "epoch": 0.3561284705584243, "grad_norm": 0.42383715510368347, "learning_rate": 0.00012880073824718283, "loss": 1.4676, "step": 27406 }, { "epoch": 0.3561414651023402, "grad_norm": 0.35092878341674805, "learning_rate": 0.00012879813878527145, "loss": 1.3394, "step": 27407 }, { "epoch": 0.356154459646256, "grad_norm": 0.34372085332870483, "learning_rate": 0.00012879553932336008, "loss": 1.2541, "step": 27408 }, { "epoch": 0.35616745419017193, "grad_norm": 0.35861751437187195, "learning_rate": 0.0001287929398614487, "loss": 1.2553, "step": 27409 }, { "epoch": 0.3561804487340878, "grad_norm": 0.42946261167526245, "learning_rate": 0.0001287903403995373, "loss": 1.4311, "step": 27410 }, { "epoch": 0.3561934432780037, "grad_norm": 0.43926334381103516, "learning_rate": 0.00012878774093762592, "loss": 1.4788, "step": 27411 }, { "epoch": 0.3562064378219195, "grad_norm": 0.3741862177848816, "learning_rate": 0.00012878514147571455, "loss": 1.412, "step": 27412 }, { "epoch": 0.3562194323658354, "grad_norm": 0.3580864369869232, "learning_rate": 0.00012878254201380314, "loss": 1.3169, "step": 27413 }, { "epoch": 0.35623242690975127, "grad_norm": 0.38417330384254456, "learning_rate": 0.00012877994255189177, "loss": 1.4038, "step": 27414 }, { "epoch": 0.35624542145366717, "grad_norm": 0.3287806510925293, "learning_rate": 0.00012877734308998037, "loss": 1.2851, "step": 27415 }, { "epoch": 0.356258415997583, "grad_norm": 0.32369324564933777, "learning_rate": 0.00012877474362806902, "loss": 1.2773, "step": 27416 }, { "epoch": 0.3562714105414989, "grad_norm": 0.3283182382583618, "learning_rate": 0.00012877214416615762, "loss": 1.3611, "step": 27417 }, { "epoch": 0.35628440508541476, "grad_norm": 0.49619248509407043, "learning_rate": 0.0001287695447042462, "loss": 1.4695, "step": 27418 }, { "epoch": 0.35629739962933066, "grad_norm": 0.35933759808540344, "learning_rate": 0.00012876694524233484, "loss": 1.3998, "step": 27419 }, { "epoch": 0.3563103941732465, "grad_norm": 0.3288777768611908, "learning_rate": 0.00012876434578042346, "loss": 1.2439, "step": 27420 }, { "epoch": 0.3563233887171624, "grad_norm": 0.37923625111579895, "learning_rate": 0.00012876174631851209, "loss": 1.5645, "step": 27421 }, { "epoch": 0.35633638326107825, "grad_norm": 0.47454309463500977, "learning_rate": 0.00012875914685660068, "loss": 1.4808, "step": 27422 }, { "epoch": 0.35634937780499415, "grad_norm": 0.4253118932247162, "learning_rate": 0.0001287565473946893, "loss": 1.3443, "step": 27423 }, { "epoch": 0.35636237234891, "grad_norm": 0.34108883142471313, "learning_rate": 0.00012875394793277793, "loss": 1.2262, "step": 27424 }, { "epoch": 0.3563753668928259, "grad_norm": 0.41169416904449463, "learning_rate": 0.00012875134847086653, "loss": 1.3079, "step": 27425 }, { "epoch": 0.35638836143674174, "grad_norm": 0.43785667419433594, "learning_rate": 0.00012874874900895515, "loss": 1.4222, "step": 27426 }, { "epoch": 0.35640135598065764, "grad_norm": 0.41733235120773315, "learning_rate": 0.00012874614954704375, "loss": 1.5373, "step": 27427 }, { "epoch": 0.3564143505245735, "grad_norm": 0.4157243072986603, "learning_rate": 0.0001287435500851324, "loss": 1.5147, "step": 27428 }, { "epoch": 0.3564273450684894, "grad_norm": 0.4268386960029602, "learning_rate": 0.000128740950623221, "loss": 1.4156, "step": 27429 }, { "epoch": 0.35644033961240523, "grad_norm": 0.43282243609428406, "learning_rate": 0.0001287383511613096, "loss": 1.5022, "step": 27430 }, { "epoch": 0.35645333415632113, "grad_norm": 0.4985935389995575, "learning_rate": 0.00012873575169939822, "loss": 1.3959, "step": 27431 }, { "epoch": 0.356466328700237, "grad_norm": 0.3616192936897278, "learning_rate": 0.00012873315223748685, "loss": 1.3045, "step": 27432 }, { "epoch": 0.3564793232441529, "grad_norm": 0.44508063793182373, "learning_rate": 0.00012873055277557547, "loss": 1.4425, "step": 27433 }, { "epoch": 0.3564923177880687, "grad_norm": 0.4529131054878235, "learning_rate": 0.00012872795331366407, "loss": 1.5997, "step": 27434 }, { "epoch": 0.3565053123319846, "grad_norm": 0.4284322261810303, "learning_rate": 0.0001287253538517527, "loss": 1.6232, "step": 27435 }, { "epoch": 0.35651830687590047, "grad_norm": 0.40824034810066223, "learning_rate": 0.00012872275438984132, "loss": 1.535, "step": 27436 }, { "epoch": 0.35653130141981637, "grad_norm": 0.4123683273792267, "learning_rate": 0.00012872015492792992, "loss": 1.4224, "step": 27437 }, { "epoch": 0.3565442959637322, "grad_norm": 0.3949636220932007, "learning_rate": 0.00012871755546601854, "loss": 1.4208, "step": 27438 }, { "epoch": 0.3565572905076481, "grad_norm": 0.36995771527290344, "learning_rate": 0.00012871495600410714, "loss": 1.4248, "step": 27439 }, { "epoch": 0.35657028505156396, "grad_norm": 0.38794466853141785, "learning_rate": 0.0001287123565421958, "loss": 1.4921, "step": 27440 }, { "epoch": 0.35658327959547986, "grad_norm": 0.4458300471305847, "learning_rate": 0.00012870975708028439, "loss": 1.5408, "step": 27441 }, { "epoch": 0.3565962741393957, "grad_norm": 0.45325544476509094, "learning_rate": 0.000128707157618373, "loss": 1.2174, "step": 27442 }, { "epoch": 0.3566092686833116, "grad_norm": 0.37380313873291016, "learning_rate": 0.0001287045581564616, "loss": 1.3825, "step": 27443 }, { "epoch": 0.35662226322722745, "grad_norm": 0.43868035078048706, "learning_rate": 0.00012870195869455023, "loss": 1.5105, "step": 27444 }, { "epoch": 0.35663525777114335, "grad_norm": 0.4251108467578888, "learning_rate": 0.00012869935923263886, "loss": 1.3589, "step": 27445 }, { "epoch": 0.3566482523150592, "grad_norm": 0.4346287250518799, "learning_rate": 0.00012869675977072745, "loss": 1.7435, "step": 27446 }, { "epoch": 0.3566612468589751, "grad_norm": 0.3544836640357971, "learning_rate": 0.00012869416030881608, "loss": 1.2252, "step": 27447 }, { "epoch": 0.35667424140289095, "grad_norm": 0.27312132716178894, "learning_rate": 0.0001286915608469047, "loss": 1.3102, "step": 27448 }, { "epoch": 0.35668723594680685, "grad_norm": 0.33369505405426025, "learning_rate": 0.0001286889613849933, "loss": 1.1703, "step": 27449 }, { "epoch": 0.3567002304907227, "grad_norm": 0.4068581163883209, "learning_rate": 0.00012868636192308193, "loss": 1.4872, "step": 27450 }, { "epoch": 0.3567132250346386, "grad_norm": 0.3219605088233948, "learning_rate": 0.00012868376246117055, "loss": 1.6284, "step": 27451 }, { "epoch": 0.35672621957855444, "grad_norm": 0.4218311011791229, "learning_rate": 0.00012868116299925917, "loss": 1.1814, "step": 27452 }, { "epoch": 0.35673921412247034, "grad_norm": 0.37565284967422485, "learning_rate": 0.00012867856353734777, "loss": 1.3931, "step": 27453 }, { "epoch": 0.3567522086663862, "grad_norm": 0.40845251083374023, "learning_rate": 0.0001286759640754364, "loss": 1.2751, "step": 27454 }, { "epoch": 0.3567652032103021, "grad_norm": 0.20542186498641968, "learning_rate": 0.00012867336461352502, "loss": 1.2302, "step": 27455 }, { "epoch": 0.35677819775421793, "grad_norm": 0.4377051293849945, "learning_rate": 0.00012867076515161362, "loss": 1.3382, "step": 27456 }, { "epoch": 0.35679119229813383, "grad_norm": 0.3814966082572937, "learning_rate": 0.00012866816568970224, "loss": 1.3556, "step": 27457 }, { "epoch": 0.3568041868420497, "grad_norm": 0.26771900057792664, "learning_rate": 0.00012866556622779084, "loss": 1.1319, "step": 27458 }, { "epoch": 0.3568171813859656, "grad_norm": 0.3916628658771515, "learning_rate": 0.0001286629667658795, "loss": 1.2944, "step": 27459 }, { "epoch": 0.3568301759298814, "grad_norm": 0.43458664417266846, "learning_rate": 0.0001286603673039681, "loss": 1.5212, "step": 27460 }, { "epoch": 0.3568431704737973, "grad_norm": 0.4136747419834137, "learning_rate": 0.00012865776784205669, "loss": 1.4247, "step": 27461 }, { "epoch": 0.35685616501771317, "grad_norm": 0.41235896944999695, "learning_rate": 0.0001286551683801453, "loss": 1.3885, "step": 27462 }, { "epoch": 0.35686915956162907, "grad_norm": 0.4405613839626312, "learning_rate": 0.00012865256891823393, "loss": 1.5091, "step": 27463 }, { "epoch": 0.3568821541055449, "grad_norm": 0.30737170577049255, "learning_rate": 0.00012864996945632256, "loss": 1.2896, "step": 27464 }, { "epoch": 0.3568951486494608, "grad_norm": 0.4509899914264679, "learning_rate": 0.00012864736999441116, "loss": 1.5135, "step": 27465 }, { "epoch": 0.35690814319337666, "grad_norm": 0.4336063861846924, "learning_rate": 0.00012864477053249978, "loss": 1.2786, "step": 27466 }, { "epoch": 0.35692113773729256, "grad_norm": 0.4078902304172516, "learning_rate": 0.0001286421710705884, "loss": 1.543, "step": 27467 }, { "epoch": 0.3569341322812084, "grad_norm": 0.455644816160202, "learning_rate": 0.000128639571608677, "loss": 1.4249, "step": 27468 }, { "epoch": 0.3569471268251243, "grad_norm": 0.329316109418869, "learning_rate": 0.00012863697214676563, "loss": 1.2674, "step": 27469 }, { "epoch": 0.35696012136904015, "grad_norm": 0.465891033411026, "learning_rate": 0.00012863437268485423, "loss": 1.5171, "step": 27470 }, { "epoch": 0.35697311591295605, "grad_norm": 0.44211751222610474, "learning_rate": 0.00012863177322294288, "loss": 1.3645, "step": 27471 }, { "epoch": 0.3569861104568719, "grad_norm": 0.4569490849971771, "learning_rate": 0.00012862917376103147, "loss": 1.344, "step": 27472 }, { "epoch": 0.3569991050007878, "grad_norm": 0.4001865088939667, "learning_rate": 0.00012862657429912007, "loss": 1.4994, "step": 27473 }, { "epoch": 0.35701209954470364, "grad_norm": 0.34411925077438354, "learning_rate": 0.0001286239748372087, "loss": 1.2782, "step": 27474 }, { "epoch": 0.35702509408861954, "grad_norm": 0.4111136496067047, "learning_rate": 0.00012862137537529732, "loss": 1.3819, "step": 27475 }, { "epoch": 0.3570380886325354, "grad_norm": 0.42017242312431335, "learning_rate": 0.00012861877591338594, "loss": 1.6094, "step": 27476 }, { "epoch": 0.3570510831764513, "grad_norm": 0.5055058002471924, "learning_rate": 0.00012861617645147454, "loss": 1.2962, "step": 27477 }, { "epoch": 0.35706407772036713, "grad_norm": 0.31187164783477783, "learning_rate": 0.00012861357698956317, "loss": 1.206, "step": 27478 }, { "epoch": 0.35707707226428304, "grad_norm": 0.3526204526424408, "learning_rate": 0.0001286109775276518, "loss": 1.3635, "step": 27479 }, { "epoch": 0.3570900668081989, "grad_norm": 0.3856695294380188, "learning_rate": 0.0001286083780657404, "loss": 1.3929, "step": 27480 }, { "epoch": 0.3571030613521148, "grad_norm": 0.38815370202064514, "learning_rate": 0.000128605778603829, "loss": 1.4646, "step": 27481 }, { "epoch": 0.3571160558960306, "grad_norm": 0.3988179862499237, "learning_rate": 0.00012860317914191764, "loss": 1.2555, "step": 27482 }, { "epoch": 0.3571290504399465, "grad_norm": 0.5218237042427063, "learning_rate": 0.00012860057968000626, "loss": 1.5987, "step": 27483 }, { "epoch": 0.35714204498386243, "grad_norm": 0.3820565342903137, "learning_rate": 0.00012859798021809486, "loss": 1.4103, "step": 27484 }, { "epoch": 0.3571550395277783, "grad_norm": 0.4730757474899292, "learning_rate": 0.00012859538075618346, "loss": 1.3554, "step": 27485 }, { "epoch": 0.3571680340716942, "grad_norm": 0.4169571101665497, "learning_rate": 0.0001285927812942721, "loss": 1.3402, "step": 27486 }, { "epoch": 0.35718102861561, "grad_norm": 0.3566894829273224, "learning_rate": 0.0001285901818323607, "loss": 1.281, "step": 27487 }, { "epoch": 0.3571940231595259, "grad_norm": 0.4983883798122406, "learning_rate": 0.00012858758237044933, "loss": 1.4113, "step": 27488 }, { "epoch": 0.35720701770344176, "grad_norm": 0.33130237460136414, "learning_rate": 0.00012858498290853793, "loss": 1.483, "step": 27489 }, { "epoch": 0.35722001224735767, "grad_norm": 0.575483500957489, "learning_rate": 0.00012858238344662655, "loss": 1.6155, "step": 27490 }, { "epoch": 0.3572330067912735, "grad_norm": 0.3217366635799408, "learning_rate": 0.00012857978398471518, "loss": 1.2439, "step": 27491 }, { "epoch": 0.3572460013351894, "grad_norm": 0.405439555644989, "learning_rate": 0.00012857718452280377, "loss": 1.4758, "step": 27492 }, { "epoch": 0.35725899587910526, "grad_norm": 0.422473281621933, "learning_rate": 0.0001285745850608924, "loss": 1.4858, "step": 27493 }, { "epoch": 0.35727199042302116, "grad_norm": 0.49928635358810425, "learning_rate": 0.00012857198559898102, "loss": 1.3273, "step": 27494 }, { "epoch": 0.357284984966937, "grad_norm": 0.3200952708721161, "learning_rate": 0.00012856938613706965, "loss": 1.4406, "step": 27495 }, { "epoch": 0.3572979795108529, "grad_norm": 0.49757829308509827, "learning_rate": 0.00012856678667515824, "loss": 1.4077, "step": 27496 }, { "epoch": 0.35731097405476875, "grad_norm": 0.43589434027671814, "learning_rate": 0.00012856418721324687, "loss": 1.2956, "step": 27497 }, { "epoch": 0.35732396859868465, "grad_norm": 0.3679733872413635, "learning_rate": 0.0001285615877513355, "loss": 1.1184, "step": 27498 }, { "epoch": 0.3573369631426005, "grad_norm": 0.4152955114841461, "learning_rate": 0.0001285589882894241, "loss": 1.3858, "step": 27499 }, { "epoch": 0.3573499576865164, "grad_norm": 0.36925140023231506, "learning_rate": 0.00012855638882751272, "loss": 1.3428, "step": 27500 }, { "epoch": 0.35736295223043224, "grad_norm": 0.3072669208049774, "learning_rate": 0.0001285537893656013, "loss": 1.2925, "step": 27501 }, { "epoch": 0.35737594677434814, "grad_norm": 0.44781291484832764, "learning_rate": 0.00012855118990368994, "loss": 1.4501, "step": 27502 }, { "epoch": 0.357388941318264, "grad_norm": 0.3549068570137024, "learning_rate": 0.00012854859044177856, "loss": 1.2465, "step": 27503 }, { "epoch": 0.3574019358621799, "grad_norm": 0.35514843463897705, "learning_rate": 0.00012854599097986716, "loss": 1.4616, "step": 27504 }, { "epoch": 0.35741493040609573, "grad_norm": 0.4505949020385742, "learning_rate": 0.00012854339151795578, "loss": 1.3495, "step": 27505 }, { "epoch": 0.35742792495001163, "grad_norm": 0.39385169744491577, "learning_rate": 0.0001285407920560444, "loss": 1.4904, "step": 27506 }, { "epoch": 0.3574409194939275, "grad_norm": 0.42098578810691833, "learning_rate": 0.00012853819259413303, "loss": 1.2555, "step": 27507 }, { "epoch": 0.3574539140378434, "grad_norm": 0.3905838429927826, "learning_rate": 0.00012853559313222163, "loss": 1.5281, "step": 27508 }, { "epoch": 0.3574669085817592, "grad_norm": 0.45986589789390564, "learning_rate": 0.00012853299367031025, "loss": 1.3821, "step": 27509 }, { "epoch": 0.3574799031256751, "grad_norm": 0.423350065946579, "learning_rate": 0.00012853039420839888, "loss": 1.3925, "step": 27510 }, { "epoch": 0.35749289766959097, "grad_norm": 0.36655890941619873, "learning_rate": 0.00012852779474648748, "loss": 1.2027, "step": 27511 }, { "epoch": 0.35750589221350687, "grad_norm": 0.47736120223999023, "learning_rate": 0.0001285251952845761, "loss": 1.4323, "step": 27512 }, { "epoch": 0.3575188867574227, "grad_norm": 0.41521215438842773, "learning_rate": 0.0001285225958226647, "loss": 1.2866, "step": 27513 }, { "epoch": 0.3575318813013386, "grad_norm": 0.4382805824279785, "learning_rate": 0.00012851999636075332, "loss": 1.3405, "step": 27514 }, { "epoch": 0.35754487584525446, "grad_norm": 0.4032374322414398, "learning_rate": 0.00012851739689884195, "loss": 1.489, "step": 27515 }, { "epoch": 0.35755787038917036, "grad_norm": 0.26001858711242676, "learning_rate": 0.00012851479743693054, "loss": 1.105, "step": 27516 }, { "epoch": 0.3575708649330862, "grad_norm": 0.4461202025413513, "learning_rate": 0.00012851219797501917, "loss": 1.3607, "step": 27517 }, { "epoch": 0.3575838594770021, "grad_norm": 0.44025787711143494, "learning_rate": 0.0001285095985131078, "loss": 1.5405, "step": 27518 }, { "epoch": 0.35759685402091795, "grad_norm": 0.33944806456565857, "learning_rate": 0.00012850699905119642, "loss": 1.1997, "step": 27519 }, { "epoch": 0.35760984856483385, "grad_norm": 0.40193989872932434, "learning_rate": 0.00012850439958928502, "loss": 1.4727, "step": 27520 }, { "epoch": 0.3576228431087497, "grad_norm": 0.33706724643707275, "learning_rate": 0.00012850180012737364, "loss": 1.0974, "step": 27521 }, { "epoch": 0.3576358376526656, "grad_norm": 0.32669657468795776, "learning_rate": 0.00012849920066546226, "loss": 1.48, "step": 27522 }, { "epoch": 0.35764883219658145, "grad_norm": 0.3747687339782715, "learning_rate": 0.00012849660120355086, "loss": 1.5573, "step": 27523 }, { "epoch": 0.35766182674049735, "grad_norm": 0.32767003774642944, "learning_rate": 0.00012849400174163949, "loss": 1.4037, "step": 27524 }, { "epoch": 0.3576748212844132, "grad_norm": 0.35751771926879883, "learning_rate": 0.0001284914022797281, "loss": 1.2994, "step": 27525 }, { "epoch": 0.3576878158283291, "grad_norm": 0.45140567421913147, "learning_rate": 0.00012848880281781674, "loss": 1.2774, "step": 27526 }, { "epoch": 0.35770081037224494, "grad_norm": 0.4241960048675537, "learning_rate": 0.00012848620335590533, "loss": 1.4395, "step": 27527 }, { "epoch": 0.35771380491616084, "grad_norm": 0.4169099032878876, "learning_rate": 0.00012848360389399393, "loss": 1.4031, "step": 27528 }, { "epoch": 0.3577267994600767, "grad_norm": 0.4897788166999817, "learning_rate": 0.00012848100443208258, "loss": 1.4363, "step": 27529 }, { "epoch": 0.3577397940039926, "grad_norm": 0.37553176283836365, "learning_rate": 0.00012847840497017118, "loss": 1.4147, "step": 27530 }, { "epoch": 0.35775278854790843, "grad_norm": 0.3327403664588928, "learning_rate": 0.0001284758055082598, "loss": 1.387, "step": 27531 }, { "epoch": 0.35776578309182433, "grad_norm": 0.363903671503067, "learning_rate": 0.0001284732060463484, "loss": 1.3835, "step": 27532 }, { "epoch": 0.3577787776357402, "grad_norm": 0.3305162787437439, "learning_rate": 0.00012847060658443703, "loss": 1.3024, "step": 27533 }, { "epoch": 0.3577917721796561, "grad_norm": 0.36481979489326477, "learning_rate": 0.00012846800712252565, "loss": 1.2966, "step": 27534 }, { "epoch": 0.3578047667235719, "grad_norm": 0.5287383794784546, "learning_rate": 0.00012846540766061425, "loss": 1.5463, "step": 27535 }, { "epoch": 0.3578177612674878, "grad_norm": 0.2881213128566742, "learning_rate": 0.00012846280819870287, "loss": 1.3323, "step": 27536 }, { "epoch": 0.35783075581140367, "grad_norm": 0.41407474875450134, "learning_rate": 0.0001284602087367915, "loss": 1.3141, "step": 27537 }, { "epoch": 0.35784375035531957, "grad_norm": 0.3082997798919678, "learning_rate": 0.00012845760927488012, "loss": 1.3548, "step": 27538 }, { "epoch": 0.3578567448992354, "grad_norm": 0.410317987203598, "learning_rate": 0.00012845500981296872, "loss": 1.4282, "step": 27539 }, { "epoch": 0.3578697394431513, "grad_norm": 0.39177659153938293, "learning_rate": 0.00012845241035105732, "loss": 1.4706, "step": 27540 }, { "epoch": 0.35788273398706716, "grad_norm": 0.4329338073730469, "learning_rate": 0.00012844981088914597, "loss": 1.4902, "step": 27541 }, { "epoch": 0.35789572853098306, "grad_norm": 0.41480037569999695, "learning_rate": 0.00012844721142723456, "loss": 1.512, "step": 27542 }, { "epoch": 0.3579087230748989, "grad_norm": 0.3825821578502655, "learning_rate": 0.0001284446119653232, "loss": 1.6259, "step": 27543 }, { "epoch": 0.3579217176188148, "grad_norm": 0.4038439393043518, "learning_rate": 0.00012844201250341179, "loss": 1.6682, "step": 27544 }, { "epoch": 0.35793471216273065, "grad_norm": 0.4784427881240845, "learning_rate": 0.0001284394130415004, "loss": 1.32, "step": 27545 }, { "epoch": 0.35794770670664655, "grad_norm": 0.41692790389060974, "learning_rate": 0.00012843681357958904, "loss": 1.374, "step": 27546 }, { "epoch": 0.3579607012505624, "grad_norm": 0.5039559602737427, "learning_rate": 0.00012843421411767763, "loss": 1.5656, "step": 27547 }, { "epoch": 0.3579736957944783, "grad_norm": 0.31632688641548157, "learning_rate": 0.00012843161465576626, "loss": 1.3362, "step": 27548 }, { "epoch": 0.35798669033839414, "grad_norm": 0.45519399642944336, "learning_rate": 0.00012842901519385488, "loss": 1.3214, "step": 27549 }, { "epoch": 0.35799968488231004, "grad_norm": 0.4742424488067627, "learning_rate": 0.0001284264157319435, "loss": 1.5111, "step": 27550 }, { "epoch": 0.3580126794262259, "grad_norm": 0.5005501508712769, "learning_rate": 0.0001284238162700321, "loss": 1.6841, "step": 27551 }, { "epoch": 0.3580256739701418, "grad_norm": 0.3781135678291321, "learning_rate": 0.00012842121680812073, "loss": 1.5331, "step": 27552 }, { "epoch": 0.35803866851405763, "grad_norm": 0.4068751931190491, "learning_rate": 0.00012841861734620935, "loss": 1.4153, "step": 27553 }, { "epoch": 0.35805166305797353, "grad_norm": 0.3550473153591156, "learning_rate": 0.00012841601788429795, "loss": 1.4758, "step": 27554 }, { "epoch": 0.3580646576018894, "grad_norm": 0.43151313066482544, "learning_rate": 0.00012841341842238657, "loss": 1.402, "step": 27555 }, { "epoch": 0.3580776521458053, "grad_norm": 0.44430312514305115, "learning_rate": 0.0001284108189604752, "loss": 1.4594, "step": 27556 }, { "epoch": 0.3580906466897211, "grad_norm": 0.36316725611686707, "learning_rate": 0.0001284082194985638, "loss": 1.5865, "step": 27557 }, { "epoch": 0.358103641233637, "grad_norm": 0.4119855463504791, "learning_rate": 0.00012840562003665242, "loss": 1.5897, "step": 27558 }, { "epoch": 0.3581166357775529, "grad_norm": 0.2742438316345215, "learning_rate": 0.00012840302057474102, "loss": 1.1691, "step": 27559 }, { "epoch": 0.3581296303214688, "grad_norm": 0.4849892258644104, "learning_rate": 0.00012840042111282967, "loss": 1.4513, "step": 27560 }, { "epoch": 0.3581426248653847, "grad_norm": 0.44913890957832336, "learning_rate": 0.00012839782165091827, "loss": 1.3446, "step": 27561 }, { "epoch": 0.3581556194093005, "grad_norm": 0.3804006278514862, "learning_rate": 0.0001283952221890069, "loss": 1.1593, "step": 27562 }, { "epoch": 0.3581686139532164, "grad_norm": 0.654653787612915, "learning_rate": 0.0001283926227270955, "loss": 1.4065, "step": 27563 }, { "epoch": 0.35818160849713226, "grad_norm": 0.3345881998538971, "learning_rate": 0.0001283900232651841, "loss": 1.5086, "step": 27564 }, { "epoch": 0.35819460304104817, "grad_norm": 0.4422236382961273, "learning_rate": 0.00012838742380327274, "loss": 1.3207, "step": 27565 }, { "epoch": 0.358207597584964, "grad_norm": 0.40724867582321167, "learning_rate": 0.00012838482434136134, "loss": 1.4222, "step": 27566 }, { "epoch": 0.3582205921288799, "grad_norm": 0.3789246082305908, "learning_rate": 0.00012838222487944996, "loss": 1.2805, "step": 27567 }, { "epoch": 0.35823358667279576, "grad_norm": 0.39470186829566956, "learning_rate": 0.00012837962541753858, "loss": 1.426, "step": 27568 }, { "epoch": 0.35824658121671166, "grad_norm": 0.36517176032066345, "learning_rate": 0.00012837702595562718, "loss": 1.4738, "step": 27569 }, { "epoch": 0.3582595757606275, "grad_norm": 0.42506855726242065, "learning_rate": 0.0001283744264937158, "loss": 1.4405, "step": 27570 }, { "epoch": 0.3582725703045434, "grad_norm": 0.5019168853759766, "learning_rate": 0.0001283718270318044, "loss": 1.5072, "step": 27571 }, { "epoch": 0.35828556484845925, "grad_norm": 0.3862207233905792, "learning_rate": 0.00012836922756989306, "loss": 1.2913, "step": 27572 }, { "epoch": 0.35829855939237515, "grad_norm": 0.42447784543037415, "learning_rate": 0.00012836662810798165, "loss": 1.5047, "step": 27573 }, { "epoch": 0.358311553936291, "grad_norm": 0.2949811518192291, "learning_rate": 0.00012836402864607028, "loss": 1.218, "step": 27574 }, { "epoch": 0.3583245484802069, "grad_norm": 0.43490588665008545, "learning_rate": 0.00012836142918415887, "loss": 1.5563, "step": 27575 }, { "epoch": 0.35833754302412274, "grad_norm": 0.4238477051258087, "learning_rate": 0.0001283588297222475, "loss": 1.4812, "step": 27576 }, { "epoch": 0.35835053756803864, "grad_norm": 0.5049501061439514, "learning_rate": 0.00012835623026033612, "loss": 1.5118, "step": 27577 }, { "epoch": 0.3583635321119545, "grad_norm": 0.40976300835609436, "learning_rate": 0.00012835363079842472, "loss": 1.3149, "step": 27578 }, { "epoch": 0.3583765266558704, "grad_norm": 0.3435368239879608, "learning_rate": 0.00012835103133651335, "loss": 1.2494, "step": 27579 }, { "epoch": 0.35838952119978623, "grad_norm": 0.4117524325847626, "learning_rate": 0.00012834843187460197, "loss": 1.3695, "step": 27580 }, { "epoch": 0.35840251574370213, "grad_norm": 0.40709027647972107, "learning_rate": 0.0001283458324126906, "loss": 1.4684, "step": 27581 }, { "epoch": 0.358415510287618, "grad_norm": 0.4361148476600647, "learning_rate": 0.0001283432329507792, "loss": 1.4955, "step": 27582 }, { "epoch": 0.3584285048315339, "grad_norm": 0.36271005868911743, "learning_rate": 0.0001283406334888678, "loss": 1.4855, "step": 27583 }, { "epoch": 0.3584414993754497, "grad_norm": 0.43793946504592896, "learning_rate": 0.00012833803402695644, "loss": 1.514, "step": 27584 }, { "epoch": 0.3584544939193656, "grad_norm": 0.4169389307498932, "learning_rate": 0.00012833543456504504, "loss": 1.2333, "step": 27585 }, { "epoch": 0.35846748846328147, "grad_norm": 0.42226526141166687, "learning_rate": 0.00012833283510313366, "loss": 1.2977, "step": 27586 }, { "epoch": 0.35848048300719737, "grad_norm": 0.3446049094200134, "learning_rate": 0.00012833023564122226, "loss": 1.2861, "step": 27587 }, { "epoch": 0.3584934775511132, "grad_norm": 0.4019283354282379, "learning_rate": 0.00012832763617931088, "loss": 1.2001, "step": 27588 }, { "epoch": 0.3585064720950291, "grad_norm": 0.4221995174884796, "learning_rate": 0.0001283250367173995, "loss": 1.2721, "step": 27589 }, { "epoch": 0.35851946663894496, "grad_norm": 0.46465909481048584, "learning_rate": 0.0001283224372554881, "loss": 1.4662, "step": 27590 }, { "epoch": 0.35853246118286086, "grad_norm": 0.336833655834198, "learning_rate": 0.00012831983779357673, "loss": 1.4819, "step": 27591 }, { "epoch": 0.3585454557267767, "grad_norm": 0.42345377802848816, "learning_rate": 0.00012831723833166536, "loss": 1.3168, "step": 27592 }, { "epoch": 0.3585584502706926, "grad_norm": 0.4284893870353699, "learning_rate": 0.00012831463886975398, "loss": 1.7065, "step": 27593 }, { "epoch": 0.35857144481460845, "grad_norm": 0.36274608969688416, "learning_rate": 0.00012831203940784258, "loss": 1.5371, "step": 27594 }, { "epoch": 0.35858443935852435, "grad_norm": 0.3589741587638855, "learning_rate": 0.0001283094399459312, "loss": 1.3476, "step": 27595 }, { "epoch": 0.3585974339024402, "grad_norm": 0.360270619392395, "learning_rate": 0.00012830684048401983, "loss": 1.6086, "step": 27596 }, { "epoch": 0.3586104284463561, "grad_norm": 0.41673773527145386, "learning_rate": 0.00012830424102210842, "loss": 1.319, "step": 27597 }, { "epoch": 0.35862342299027195, "grad_norm": 0.45880311727523804, "learning_rate": 0.00012830164156019705, "loss": 1.3824, "step": 27598 }, { "epoch": 0.35863641753418785, "grad_norm": 0.46175432205200195, "learning_rate": 0.00012829904209828567, "loss": 1.2331, "step": 27599 }, { "epoch": 0.3586494120781037, "grad_norm": 0.5502286553382874, "learning_rate": 0.00012829644263637427, "loss": 1.335, "step": 27600 }, { "epoch": 0.3586624066220196, "grad_norm": 0.39677104353904724, "learning_rate": 0.0001282938431744629, "loss": 1.2464, "step": 27601 }, { "epoch": 0.35867540116593544, "grad_norm": 0.39820152521133423, "learning_rate": 0.0001282912437125515, "loss": 1.2381, "step": 27602 }, { "epoch": 0.35868839570985134, "grad_norm": 0.40472689270973206, "learning_rate": 0.00012828864425064014, "loss": 1.3148, "step": 27603 }, { "epoch": 0.3587013902537672, "grad_norm": 0.3810681104660034, "learning_rate": 0.00012828604478872874, "loss": 1.4141, "step": 27604 }, { "epoch": 0.3587143847976831, "grad_norm": 0.31121259927749634, "learning_rate": 0.00012828344532681736, "loss": 1.3646, "step": 27605 }, { "epoch": 0.35872737934159893, "grad_norm": 0.3821033239364624, "learning_rate": 0.00012828084586490596, "loss": 1.3919, "step": 27606 }, { "epoch": 0.35874037388551483, "grad_norm": 0.4427330791950226, "learning_rate": 0.0001282782464029946, "loss": 1.6134, "step": 27607 }, { "epoch": 0.3587533684294307, "grad_norm": 0.41570109128952026, "learning_rate": 0.0001282756469410832, "loss": 1.3399, "step": 27608 }, { "epoch": 0.3587663629733466, "grad_norm": 0.42868563532829285, "learning_rate": 0.0001282730474791718, "loss": 1.3288, "step": 27609 }, { "epoch": 0.3587793575172624, "grad_norm": 0.3560831844806671, "learning_rate": 0.00012827044801726043, "loss": 1.379, "step": 27610 }, { "epoch": 0.3587923520611783, "grad_norm": 0.427074670791626, "learning_rate": 0.00012826784855534906, "loss": 1.3527, "step": 27611 }, { "epoch": 0.35880534660509417, "grad_norm": 0.39446666836738586, "learning_rate": 0.00012826524909343765, "loss": 1.5074, "step": 27612 }, { "epoch": 0.35881834114901007, "grad_norm": 0.31512296199798584, "learning_rate": 0.00012826264963152628, "loss": 1.1866, "step": 27613 }, { "epoch": 0.3588313356929259, "grad_norm": 0.3750813603401184, "learning_rate": 0.00012826005016961488, "loss": 1.4543, "step": 27614 }, { "epoch": 0.3588443302368418, "grad_norm": 0.3376021981239319, "learning_rate": 0.00012825745070770353, "loss": 1.3941, "step": 27615 }, { "epoch": 0.35885732478075766, "grad_norm": 0.42299073934555054, "learning_rate": 0.00012825485124579213, "loss": 1.4769, "step": 27616 }, { "epoch": 0.35887031932467356, "grad_norm": 0.44412702322006226, "learning_rate": 0.00012825225178388075, "loss": 1.3452, "step": 27617 }, { "epoch": 0.3588833138685894, "grad_norm": 0.3990938067436218, "learning_rate": 0.00012824965232196935, "loss": 1.5445, "step": 27618 }, { "epoch": 0.3588963084125053, "grad_norm": 0.3706763982772827, "learning_rate": 0.00012824705286005797, "loss": 1.2381, "step": 27619 }, { "epoch": 0.35890930295642115, "grad_norm": 0.3917693495750427, "learning_rate": 0.0001282444533981466, "loss": 1.2236, "step": 27620 }, { "epoch": 0.35892229750033705, "grad_norm": 0.5068303346633911, "learning_rate": 0.0001282418539362352, "loss": 1.4606, "step": 27621 }, { "epoch": 0.3589352920442529, "grad_norm": 0.4087268114089966, "learning_rate": 0.00012823925447432382, "loss": 1.5043, "step": 27622 }, { "epoch": 0.3589482865881688, "grad_norm": 0.3701595366001129, "learning_rate": 0.00012823665501241244, "loss": 1.4456, "step": 27623 }, { "epoch": 0.35896128113208464, "grad_norm": 0.36677101254463196, "learning_rate": 0.00012823405555050104, "loss": 1.3954, "step": 27624 }, { "epoch": 0.35897427567600054, "grad_norm": 0.5198606252670288, "learning_rate": 0.00012823145608858966, "loss": 1.5948, "step": 27625 }, { "epoch": 0.3589872702199164, "grad_norm": 0.37631678581237793, "learning_rate": 0.00012822885662667826, "loss": 1.2855, "step": 27626 }, { "epoch": 0.3590002647638323, "grad_norm": 0.47325000166893005, "learning_rate": 0.00012822625716476691, "loss": 1.5006, "step": 27627 }, { "epoch": 0.35901325930774813, "grad_norm": 0.3766353726387024, "learning_rate": 0.0001282236577028555, "loss": 1.4653, "step": 27628 }, { "epoch": 0.35902625385166403, "grad_norm": 0.44791173934936523, "learning_rate": 0.00012822105824094414, "loss": 1.334, "step": 27629 }, { "epoch": 0.3590392483955799, "grad_norm": 0.39020416140556335, "learning_rate": 0.00012821845877903276, "loss": 1.2794, "step": 27630 }, { "epoch": 0.3590522429394958, "grad_norm": 0.4643324613571167, "learning_rate": 0.00012821585931712136, "loss": 1.5083, "step": 27631 }, { "epoch": 0.3590652374834116, "grad_norm": 0.40096136927604675, "learning_rate": 0.00012821325985520998, "loss": 1.3139, "step": 27632 }, { "epoch": 0.3590782320273275, "grad_norm": 0.35726621747016907, "learning_rate": 0.00012821066039329858, "loss": 1.3973, "step": 27633 }, { "epoch": 0.35909122657124337, "grad_norm": 0.36392661929130554, "learning_rate": 0.00012820806093138723, "loss": 1.2806, "step": 27634 }, { "epoch": 0.35910422111515927, "grad_norm": 0.40534746646881104, "learning_rate": 0.00012820546146947583, "loss": 1.5025, "step": 27635 }, { "epoch": 0.3591172156590752, "grad_norm": 0.33008432388305664, "learning_rate": 0.00012820286200756445, "loss": 1.3684, "step": 27636 }, { "epoch": 0.359130210202991, "grad_norm": 0.3452047109603882, "learning_rate": 0.00012820026254565305, "loss": 1.3064, "step": 27637 }, { "epoch": 0.3591432047469069, "grad_norm": 0.46656790375709534, "learning_rate": 0.00012819766308374167, "loss": 1.4379, "step": 27638 }, { "epoch": 0.35915619929082276, "grad_norm": 0.42533358931541443, "learning_rate": 0.0001281950636218303, "loss": 1.419, "step": 27639 }, { "epoch": 0.35916919383473866, "grad_norm": 0.47659459710121155, "learning_rate": 0.0001281924641599189, "loss": 1.3854, "step": 27640 }, { "epoch": 0.3591821883786545, "grad_norm": 0.32988694310188293, "learning_rate": 0.00012818986469800752, "loss": 1.2151, "step": 27641 }, { "epoch": 0.3591951829225704, "grad_norm": 0.31307247281074524, "learning_rate": 0.00012818726523609615, "loss": 1.1736, "step": 27642 }, { "epoch": 0.35920817746648626, "grad_norm": 0.39512899518013, "learning_rate": 0.00012818466577418474, "loss": 1.1759, "step": 27643 }, { "epoch": 0.35922117201040216, "grad_norm": 0.4873647391796112, "learning_rate": 0.00012818206631227337, "loss": 1.6339, "step": 27644 }, { "epoch": 0.359234166554318, "grad_norm": 0.32334479689598083, "learning_rate": 0.00012817946685036196, "loss": 1.3449, "step": 27645 }, { "epoch": 0.3592471610982339, "grad_norm": 0.4204961955547333, "learning_rate": 0.00012817686738845062, "loss": 1.4573, "step": 27646 }, { "epoch": 0.35926015564214975, "grad_norm": 0.5079982280731201, "learning_rate": 0.00012817426792653921, "loss": 1.3406, "step": 27647 }, { "epoch": 0.35927315018606565, "grad_norm": 0.34709957242012024, "learning_rate": 0.00012817166846462784, "loss": 1.4508, "step": 27648 }, { "epoch": 0.3592861447299815, "grad_norm": 0.48476696014404297, "learning_rate": 0.00012816906900271644, "loss": 1.4477, "step": 27649 }, { "epoch": 0.3592991392738974, "grad_norm": 0.4945536255836487, "learning_rate": 0.00012816646954080506, "loss": 1.4686, "step": 27650 }, { "epoch": 0.35931213381781324, "grad_norm": 0.3563969135284424, "learning_rate": 0.00012816387007889368, "loss": 1.257, "step": 27651 }, { "epoch": 0.35932512836172914, "grad_norm": 0.4547992944717407, "learning_rate": 0.00012816127061698228, "loss": 1.4987, "step": 27652 }, { "epoch": 0.359338122905645, "grad_norm": 0.38634777069091797, "learning_rate": 0.0001281586711550709, "loss": 1.3582, "step": 27653 }, { "epoch": 0.3593511174495609, "grad_norm": 0.4946206510066986, "learning_rate": 0.00012815607169315953, "loss": 1.5002, "step": 27654 }, { "epoch": 0.35936411199347673, "grad_norm": 0.4899272322654724, "learning_rate": 0.00012815347223124813, "loss": 1.4997, "step": 27655 }, { "epoch": 0.35937710653739263, "grad_norm": 0.42135342955589294, "learning_rate": 0.00012815087276933675, "loss": 1.298, "step": 27656 }, { "epoch": 0.3593901010813085, "grad_norm": 0.4033015966415405, "learning_rate": 0.00012814827330742535, "loss": 1.3741, "step": 27657 }, { "epoch": 0.3594030956252244, "grad_norm": 0.44045010209083557, "learning_rate": 0.000128145673845514, "loss": 1.4239, "step": 27658 }, { "epoch": 0.3594160901691402, "grad_norm": 0.44414782524108887, "learning_rate": 0.0001281430743836026, "loss": 1.4081, "step": 27659 }, { "epoch": 0.3594290847130561, "grad_norm": 0.34166398644447327, "learning_rate": 0.00012814047492169122, "loss": 1.1138, "step": 27660 }, { "epoch": 0.35944207925697197, "grad_norm": 0.4519447088241577, "learning_rate": 0.00012813787545977982, "loss": 1.2966, "step": 27661 }, { "epoch": 0.35945507380088787, "grad_norm": 0.41849958896636963, "learning_rate": 0.00012813527599786845, "loss": 1.4028, "step": 27662 }, { "epoch": 0.3594680683448037, "grad_norm": 0.4420962631702423, "learning_rate": 0.00012813267653595707, "loss": 1.3693, "step": 27663 }, { "epoch": 0.3594810628887196, "grad_norm": 0.41066598892211914, "learning_rate": 0.00012813007707404567, "loss": 1.5534, "step": 27664 }, { "epoch": 0.35949405743263546, "grad_norm": 0.3112351596355438, "learning_rate": 0.0001281274776121343, "loss": 1.3731, "step": 27665 }, { "epoch": 0.35950705197655136, "grad_norm": 0.38092565536499023, "learning_rate": 0.00012812487815022292, "loss": 1.36, "step": 27666 }, { "epoch": 0.3595200465204672, "grad_norm": 0.3766894042491913, "learning_rate": 0.00012812227868831151, "loss": 1.3587, "step": 27667 }, { "epoch": 0.3595330410643831, "grad_norm": 0.3930797576904297, "learning_rate": 0.00012811967922640014, "loss": 1.2604, "step": 27668 }, { "epoch": 0.35954603560829895, "grad_norm": 0.46917724609375, "learning_rate": 0.00012811707976448876, "loss": 1.1305, "step": 27669 }, { "epoch": 0.35955903015221485, "grad_norm": 0.5573945641517639, "learning_rate": 0.0001281144803025774, "loss": 1.3345, "step": 27670 }, { "epoch": 0.3595720246961307, "grad_norm": 0.3489684760570526, "learning_rate": 0.00012811188084066598, "loss": 1.426, "step": 27671 }, { "epoch": 0.3595850192400466, "grad_norm": 0.3526850640773773, "learning_rate": 0.0001281092813787546, "loss": 1.561, "step": 27672 }, { "epoch": 0.35959801378396244, "grad_norm": 0.2495710402727127, "learning_rate": 0.00012810668191684323, "loss": 1.4678, "step": 27673 }, { "epoch": 0.35961100832787835, "grad_norm": 0.3864297568798065, "learning_rate": 0.00012810408245493183, "loss": 1.5309, "step": 27674 }, { "epoch": 0.3596240028717942, "grad_norm": 0.5073360800743103, "learning_rate": 0.00012810148299302046, "loss": 1.3306, "step": 27675 }, { "epoch": 0.3596369974157101, "grad_norm": 0.4561956226825714, "learning_rate": 0.00012809888353110905, "loss": 1.5017, "step": 27676 }, { "epoch": 0.35964999195962594, "grad_norm": 0.4788547456264496, "learning_rate": 0.0001280962840691977, "loss": 1.3734, "step": 27677 }, { "epoch": 0.35966298650354184, "grad_norm": 0.4014264643192291, "learning_rate": 0.0001280936846072863, "loss": 1.4943, "step": 27678 }, { "epoch": 0.3596759810474577, "grad_norm": 0.3657355308532715, "learning_rate": 0.0001280910851453749, "loss": 1.292, "step": 27679 }, { "epoch": 0.3596889755913736, "grad_norm": 0.5509775876998901, "learning_rate": 0.00012808848568346352, "loss": 1.4267, "step": 27680 }, { "epoch": 0.35970197013528943, "grad_norm": 0.4427236020565033, "learning_rate": 0.00012808588622155215, "loss": 1.3906, "step": 27681 }, { "epoch": 0.35971496467920533, "grad_norm": 0.44188711047172546, "learning_rate": 0.00012808328675964077, "loss": 1.7388, "step": 27682 }, { "epoch": 0.3597279592231212, "grad_norm": 0.3850189447402954, "learning_rate": 0.00012808068729772937, "loss": 1.5433, "step": 27683 }, { "epoch": 0.3597409537670371, "grad_norm": 0.39822354912757874, "learning_rate": 0.000128078087835818, "loss": 1.2922, "step": 27684 }, { "epoch": 0.3597539483109529, "grad_norm": 0.3820672929286957, "learning_rate": 0.00012807548837390662, "loss": 1.6341, "step": 27685 }, { "epoch": 0.3597669428548688, "grad_norm": 0.37761199474334717, "learning_rate": 0.00012807288891199522, "loss": 1.3627, "step": 27686 }, { "epoch": 0.35977993739878467, "grad_norm": 0.4008931517601013, "learning_rate": 0.00012807028945008384, "loss": 1.3191, "step": 27687 }, { "epoch": 0.35979293194270057, "grad_norm": 0.40517768263816833, "learning_rate": 0.00012806768998817244, "loss": 1.3468, "step": 27688 }, { "epoch": 0.3598059264866164, "grad_norm": 0.4657978117465973, "learning_rate": 0.0001280650905262611, "loss": 1.4435, "step": 27689 }, { "epoch": 0.3598189210305323, "grad_norm": 0.4199911952018738, "learning_rate": 0.0001280624910643497, "loss": 1.5789, "step": 27690 }, { "epoch": 0.35983191557444816, "grad_norm": 0.4553356468677521, "learning_rate": 0.00012805989160243828, "loss": 1.2788, "step": 27691 }, { "epoch": 0.35984491011836406, "grad_norm": 0.36766916513442993, "learning_rate": 0.0001280572921405269, "loss": 1.2288, "step": 27692 }, { "epoch": 0.3598579046622799, "grad_norm": 0.4244515299797058, "learning_rate": 0.00012805469267861553, "loss": 1.3975, "step": 27693 }, { "epoch": 0.3598708992061958, "grad_norm": 0.44811877608299255, "learning_rate": 0.00012805209321670416, "loss": 1.2439, "step": 27694 }, { "epoch": 0.35988389375011165, "grad_norm": 0.3755172789096832, "learning_rate": 0.00012804949375479276, "loss": 1.5773, "step": 27695 }, { "epoch": 0.35989688829402755, "grad_norm": 0.2625598907470703, "learning_rate": 0.00012804689429288138, "loss": 1.4265, "step": 27696 }, { "epoch": 0.3599098828379434, "grad_norm": 0.3401976227760315, "learning_rate": 0.00012804429483097, "loss": 1.2307, "step": 27697 }, { "epoch": 0.3599228773818593, "grad_norm": 0.41952764987945557, "learning_rate": 0.0001280416953690586, "loss": 1.335, "step": 27698 }, { "epoch": 0.35993587192577514, "grad_norm": 0.3857037425041199, "learning_rate": 0.00012803909590714723, "loss": 1.3957, "step": 27699 }, { "epoch": 0.35994886646969104, "grad_norm": 0.45489734411239624, "learning_rate": 0.00012803649644523582, "loss": 1.3912, "step": 27700 }, { "epoch": 0.3599618610136069, "grad_norm": 0.39681991934776306, "learning_rate": 0.00012803389698332448, "loss": 1.4129, "step": 27701 }, { "epoch": 0.3599748555575228, "grad_norm": 0.46642985939979553, "learning_rate": 0.00012803129752141307, "loss": 1.2736, "step": 27702 }, { "epoch": 0.35998785010143863, "grad_norm": 0.4191685616970062, "learning_rate": 0.0001280286980595017, "loss": 1.3242, "step": 27703 }, { "epoch": 0.36000084464535453, "grad_norm": 0.5292420387268066, "learning_rate": 0.00012802609859759032, "loss": 1.4502, "step": 27704 }, { "epoch": 0.3600138391892704, "grad_norm": 0.4451882243156433, "learning_rate": 0.00012802349913567892, "loss": 1.1633, "step": 27705 }, { "epoch": 0.3600268337331863, "grad_norm": 0.4311913847923279, "learning_rate": 0.00012802089967376754, "loss": 1.3569, "step": 27706 }, { "epoch": 0.3600398282771021, "grad_norm": 0.4199172854423523, "learning_rate": 0.00012801830021185614, "loss": 1.381, "step": 27707 }, { "epoch": 0.360052822821018, "grad_norm": 0.41160690784454346, "learning_rate": 0.00012801570074994477, "loss": 1.4665, "step": 27708 }, { "epoch": 0.36006581736493387, "grad_norm": 0.3560848832130432, "learning_rate": 0.0001280131012880334, "loss": 1.2832, "step": 27709 }, { "epoch": 0.36007881190884977, "grad_norm": 0.3419175148010254, "learning_rate": 0.000128010501826122, "loss": 1.6159, "step": 27710 }, { "epoch": 0.3600918064527657, "grad_norm": 0.3952697813510895, "learning_rate": 0.0001280079023642106, "loss": 1.3438, "step": 27711 }, { "epoch": 0.3601048009966815, "grad_norm": 0.38377606868743896, "learning_rate": 0.00012800530290229924, "loss": 1.4806, "step": 27712 }, { "epoch": 0.3601177955405974, "grad_norm": 0.4361295998096466, "learning_rate": 0.00012800270344038786, "loss": 1.3901, "step": 27713 }, { "epoch": 0.36013079008451326, "grad_norm": 0.401187002658844, "learning_rate": 0.00012800010397847646, "loss": 1.5078, "step": 27714 }, { "epoch": 0.36014378462842916, "grad_norm": 0.43393754959106445, "learning_rate": 0.00012799750451656508, "loss": 1.3964, "step": 27715 }, { "epoch": 0.360156779172345, "grad_norm": 0.3406221568584442, "learning_rate": 0.0001279949050546537, "loss": 1.3653, "step": 27716 }, { "epoch": 0.3601697737162609, "grad_norm": 0.37753725051879883, "learning_rate": 0.0001279923055927423, "loss": 1.4628, "step": 27717 }, { "epoch": 0.36018276826017676, "grad_norm": 0.42783549427986145, "learning_rate": 0.00012798970613083093, "loss": 1.4597, "step": 27718 }, { "epoch": 0.36019576280409266, "grad_norm": 0.36001017689704895, "learning_rate": 0.00012798710666891953, "loss": 1.4056, "step": 27719 }, { "epoch": 0.3602087573480085, "grad_norm": 0.4802101254463196, "learning_rate": 0.00012798450720700815, "loss": 1.5855, "step": 27720 }, { "epoch": 0.3602217518919244, "grad_norm": 0.4641880393028259, "learning_rate": 0.00012798190774509678, "loss": 1.339, "step": 27721 }, { "epoch": 0.36023474643584025, "grad_norm": 0.49150583148002625, "learning_rate": 0.00012797930828318537, "loss": 1.3896, "step": 27722 }, { "epoch": 0.36024774097975615, "grad_norm": 0.2644767463207245, "learning_rate": 0.000127976708821274, "loss": 1.2511, "step": 27723 }, { "epoch": 0.360260735523672, "grad_norm": 0.37700560688972473, "learning_rate": 0.00012797410935936262, "loss": 1.3508, "step": 27724 }, { "epoch": 0.3602737300675879, "grad_norm": 0.48504844307899475, "learning_rate": 0.00012797150989745125, "loss": 1.4253, "step": 27725 }, { "epoch": 0.36028672461150374, "grad_norm": 0.3321053087711334, "learning_rate": 0.00012796891043553984, "loss": 1.4786, "step": 27726 }, { "epoch": 0.36029971915541964, "grad_norm": 0.43959081172943115, "learning_rate": 0.00012796631097362847, "loss": 1.5355, "step": 27727 }, { "epoch": 0.3603127136993355, "grad_norm": 0.3881930112838745, "learning_rate": 0.0001279637115117171, "loss": 1.4316, "step": 27728 }, { "epoch": 0.3603257082432514, "grad_norm": 0.3957481384277344, "learning_rate": 0.0001279611120498057, "loss": 1.4775, "step": 27729 }, { "epoch": 0.36033870278716723, "grad_norm": 0.32693642377853394, "learning_rate": 0.00012795851258789431, "loss": 1.1513, "step": 27730 }, { "epoch": 0.36035169733108313, "grad_norm": 0.44906678795814514, "learning_rate": 0.0001279559131259829, "loss": 1.3994, "step": 27731 }, { "epoch": 0.360364691874999, "grad_norm": 0.4644584357738495, "learning_rate": 0.00012795331366407156, "loss": 1.4257, "step": 27732 }, { "epoch": 0.3603776864189149, "grad_norm": 0.38901931047439575, "learning_rate": 0.00012795071420216016, "loss": 1.16, "step": 27733 }, { "epoch": 0.3603906809628307, "grad_norm": 0.39654111862182617, "learning_rate": 0.00012794811474024876, "loss": 1.4416, "step": 27734 }, { "epoch": 0.3604036755067466, "grad_norm": 0.4276122450828552, "learning_rate": 0.00012794551527833738, "loss": 1.2246, "step": 27735 }, { "epoch": 0.36041667005066247, "grad_norm": 0.4020725190639496, "learning_rate": 0.000127942915816426, "loss": 1.2876, "step": 27736 }, { "epoch": 0.36042966459457837, "grad_norm": 0.274831622838974, "learning_rate": 0.00012794031635451463, "loss": 1.2217, "step": 27737 }, { "epoch": 0.3604426591384942, "grad_norm": 0.2875143885612488, "learning_rate": 0.00012793771689260323, "loss": 1.4213, "step": 27738 }, { "epoch": 0.3604556536824101, "grad_norm": 0.3644232153892517, "learning_rate": 0.00012793511743069185, "loss": 1.2063, "step": 27739 }, { "epoch": 0.36046864822632596, "grad_norm": 0.5233795046806335, "learning_rate": 0.00012793251796878048, "loss": 1.3546, "step": 27740 }, { "epoch": 0.36048164277024186, "grad_norm": 0.43412065505981445, "learning_rate": 0.00012792991850686908, "loss": 1.465, "step": 27741 }, { "epoch": 0.3604946373141577, "grad_norm": 0.29403597116470337, "learning_rate": 0.0001279273190449577, "loss": 1.3138, "step": 27742 }, { "epoch": 0.3605076318580736, "grad_norm": 0.35137149691581726, "learning_rate": 0.00012792471958304632, "loss": 1.3716, "step": 27743 }, { "epoch": 0.36052062640198945, "grad_norm": 0.494189977645874, "learning_rate": 0.00012792212012113495, "loss": 1.5509, "step": 27744 }, { "epoch": 0.36053362094590535, "grad_norm": 0.4517155587673187, "learning_rate": 0.00012791952065922355, "loss": 1.1368, "step": 27745 }, { "epoch": 0.3605466154898212, "grad_norm": 0.5183231830596924, "learning_rate": 0.00012791692119731214, "loss": 1.3575, "step": 27746 }, { "epoch": 0.3605596100337371, "grad_norm": 0.37608230113983154, "learning_rate": 0.0001279143217354008, "loss": 1.4324, "step": 27747 }, { "epoch": 0.36057260457765294, "grad_norm": 0.3984592854976654, "learning_rate": 0.0001279117222734894, "loss": 1.5375, "step": 27748 }, { "epoch": 0.36058559912156884, "grad_norm": 0.4559206962585449, "learning_rate": 0.00012790912281157802, "loss": 1.3844, "step": 27749 }, { "epoch": 0.3605985936654847, "grad_norm": 0.44641441106796265, "learning_rate": 0.00012790652334966661, "loss": 1.4414, "step": 27750 }, { "epoch": 0.3606115882094006, "grad_norm": 0.4737507998943329, "learning_rate": 0.00012790392388775524, "loss": 1.2061, "step": 27751 }, { "epoch": 0.36062458275331644, "grad_norm": 0.468979150056839, "learning_rate": 0.00012790132442584386, "loss": 1.4307, "step": 27752 }, { "epoch": 0.36063757729723234, "grad_norm": 0.44866955280303955, "learning_rate": 0.00012789872496393246, "loss": 1.6161, "step": 27753 }, { "epoch": 0.3606505718411482, "grad_norm": 0.4902353286743164, "learning_rate": 0.00012789612550202108, "loss": 1.5617, "step": 27754 }, { "epoch": 0.3606635663850641, "grad_norm": 0.4259644150733948, "learning_rate": 0.0001278935260401097, "loss": 1.3698, "step": 27755 }, { "epoch": 0.36067656092897993, "grad_norm": 0.4154307544231415, "learning_rate": 0.00012789092657819833, "loss": 1.3558, "step": 27756 }, { "epoch": 0.36068955547289583, "grad_norm": 0.3634119927883148, "learning_rate": 0.00012788832711628693, "loss": 1.445, "step": 27757 }, { "epoch": 0.3607025500168117, "grad_norm": 0.4950021207332611, "learning_rate": 0.00012788572765437556, "loss": 1.2763, "step": 27758 }, { "epoch": 0.3607155445607276, "grad_norm": 0.3662574291229248, "learning_rate": 0.00012788312819246418, "loss": 1.4004, "step": 27759 }, { "epoch": 0.3607285391046434, "grad_norm": 0.3208286166191101, "learning_rate": 0.00012788052873055278, "loss": 1.3769, "step": 27760 }, { "epoch": 0.3607415336485593, "grad_norm": 0.26576370000839233, "learning_rate": 0.0001278779292686414, "loss": 1.1657, "step": 27761 }, { "epoch": 0.36075452819247517, "grad_norm": 0.4860704243183136, "learning_rate": 0.00012787532980673, "loss": 1.3634, "step": 27762 }, { "epoch": 0.36076752273639107, "grad_norm": 0.3989601135253906, "learning_rate": 0.00012787273034481862, "loss": 1.3924, "step": 27763 }, { "epoch": 0.3607805172803069, "grad_norm": 0.42672085762023926, "learning_rate": 0.00012787013088290725, "loss": 1.3004, "step": 27764 }, { "epoch": 0.3607935118242228, "grad_norm": 0.29372063279151917, "learning_rate": 0.00012786753142099585, "loss": 1.0006, "step": 27765 }, { "epoch": 0.36080650636813866, "grad_norm": 0.4319475591182709, "learning_rate": 0.00012786493195908447, "loss": 1.4514, "step": 27766 }, { "epoch": 0.36081950091205456, "grad_norm": 0.4387374222278595, "learning_rate": 0.0001278623324971731, "loss": 1.421, "step": 27767 }, { "epoch": 0.3608324954559704, "grad_norm": 0.39316609501838684, "learning_rate": 0.00012785973303526172, "loss": 1.419, "step": 27768 }, { "epoch": 0.3608454899998863, "grad_norm": 0.4666074216365814, "learning_rate": 0.00012785713357335032, "loss": 1.4141, "step": 27769 }, { "epoch": 0.36085848454380215, "grad_norm": 0.4391013979911804, "learning_rate": 0.00012785453411143894, "loss": 1.5427, "step": 27770 }, { "epoch": 0.36087147908771805, "grad_norm": 0.4647830128669739, "learning_rate": 0.00012785193464952757, "loss": 1.4831, "step": 27771 }, { "epoch": 0.3608844736316339, "grad_norm": 0.3286523222923279, "learning_rate": 0.00012784933518761616, "loss": 1.2075, "step": 27772 }, { "epoch": 0.3608974681755498, "grad_norm": 0.4326229989528656, "learning_rate": 0.0001278467357257048, "loss": 1.2158, "step": 27773 }, { "epoch": 0.36091046271946564, "grad_norm": 0.34523260593414307, "learning_rate": 0.00012784413626379338, "loss": 1.3759, "step": 27774 }, { "epoch": 0.36092345726338154, "grad_norm": 0.4005174934864044, "learning_rate": 0.000127841536801882, "loss": 1.3219, "step": 27775 }, { "epoch": 0.3609364518072974, "grad_norm": 0.38597485423088074, "learning_rate": 0.00012783893733997063, "loss": 1.6224, "step": 27776 }, { "epoch": 0.3609494463512133, "grad_norm": 0.3368265628814697, "learning_rate": 0.00012783633787805923, "loss": 1.3623, "step": 27777 }, { "epoch": 0.36096244089512913, "grad_norm": 0.3311925530433655, "learning_rate": 0.00012783373841614788, "loss": 1.4814, "step": 27778 }, { "epoch": 0.36097543543904503, "grad_norm": 0.42169272899627686, "learning_rate": 0.00012783113895423648, "loss": 1.5797, "step": 27779 }, { "epoch": 0.3609884299829609, "grad_norm": 0.4065757095813751, "learning_rate": 0.0001278285394923251, "loss": 1.3247, "step": 27780 }, { "epoch": 0.3610014245268768, "grad_norm": 0.5299268960952759, "learning_rate": 0.0001278259400304137, "loss": 1.4152, "step": 27781 }, { "epoch": 0.3610144190707926, "grad_norm": 0.4666915237903595, "learning_rate": 0.00012782334056850233, "loss": 1.6351, "step": 27782 }, { "epoch": 0.3610274136147085, "grad_norm": 0.44816410541534424, "learning_rate": 0.00012782074110659095, "loss": 1.4535, "step": 27783 }, { "epoch": 0.36104040815862437, "grad_norm": 0.37693458795547485, "learning_rate": 0.00012781814164467955, "loss": 1.2685, "step": 27784 }, { "epoch": 0.36105340270254027, "grad_norm": 0.4154645800590515, "learning_rate": 0.00012781554218276817, "loss": 1.3166, "step": 27785 }, { "epoch": 0.3610663972464561, "grad_norm": 0.3993780314922333, "learning_rate": 0.0001278129427208568, "loss": 1.4926, "step": 27786 }, { "epoch": 0.361079391790372, "grad_norm": 0.40013110637664795, "learning_rate": 0.00012781034325894542, "loss": 1.4259, "step": 27787 }, { "epoch": 0.3610923863342879, "grad_norm": 0.43420761823654175, "learning_rate": 0.00012780774379703402, "loss": 1.5246, "step": 27788 }, { "epoch": 0.36110538087820376, "grad_norm": 0.3763276934623718, "learning_rate": 0.00012780514433512262, "loss": 1.4771, "step": 27789 }, { "epoch": 0.36111837542211966, "grad_norm": 0.4337875247001648, "learning_rate": 0.00012780254487321127, "loss": 1.5364, "step": 27790 }, { "epoch": 0.3611313699660355, "grad_norm": 0.397617906332016, "learning_rate": 0.00012779994541129987, "loss": 1.528, "step": 27791 }, { "epoch": 0.3611443645099514, "grad_norm": 0.3005382716655731, "learning_rate": 0.0001277973459493885, "loss": 1.1786, "step": 27792 }, { "epoch": 0.36115735905386726, "grad_norm": 0.378738135099411, "learning_rate": 0.0001277947464874771, "loss": 1.481, "step": 27793 }, { "epoch": 0.36117035359778316, "grad_norm": 0.3815094232559204, "learning_rate": 0.0001277921470255657, "loss": 1.1579, "step": 27794 }, { "epoch": 0.361183348141699, "grad_norm": 0.45481762290000916, "learning_rate": 0.00012778954756365434, "loss": 1.3996, "step": 27795 }, { "epoch": 0.3611963426856149, "grad_norm": 0.38075512647628784, "learning_rate": 0.00012778694810174293, "loss": 1.4065, "step": 27796 }, { "epoch": 0.36120933722953075, "grad_norm": 0.4106857180595398, "learning_rate": 0.00012778434863983156, "loss": 1.4362, "step": 27797 }, { "epoch": 0.36122233177344665, "grad_norm": 0.4493132531642914, "learning_rate": 0.00012778174917792018, "loss": 1.5314, "step": 27798 }, { "epoch": 0.3612353263173625, "grad_norm": 0.3695198893547058, "learning_rate": 0.0001277791497160088, "loss": 1.2503, "step": 27799 }, { "epoch": 0.3612483208612784, "grad_norm": 0.3015180826187134, "learning_rate": 0.0001277765502540974, "loss": 1.2105, "step": 27800 }, { "epoch": 0.36126131540519424, "grad_norm": 0.369755357503891, "learning_rate": 0.000127773950792186, "loss": 1.5278, "step": 27801 }, { "epoch": 0.36127430994911014, "grad_norm": 0.37347927689552307, "learning_rate": 0.00012777135133027465, "loss": 1.522, "step": 27802 }, { "epoch": 0.361287304493026, "grad_norm": 0.4142630398273468, "learning_rate": 0.00012776875186836325, "loss": 1.5668, "step": 27803 }, { "epoch": 0.3613002990369419, "grad_norm": 0.4206830859184265, "learning_rate": 0.00012776615240645188, "loss": 1.3385, "step": 27804 }, { "epoch": 0.36131329358085773, "grad_norm": 0.4634554982185364, "learning_rate": 0.00012776355294454047, "loss": 1.5281, "step": 27805 }, { "epoch": 0.36132628812477363, "grad_norm": 0.44164320826530457, "learning_rate": 0.0001277609534826291, "loss": 1.4743, "step": 27806 }, { "epoch": 0.3613392826686895, "grad_norm": 0.3963533937931061, "learning_rate": 0.00012775835402071772, "loss": 1.4595, "step": 27807 }, { "epoch": 0.3613522772126054, "grad_norm": 0.3321821987628937, "learning_rate": 0.00012775575455880632, "loss": 1.4171, "step": 27808 }, { "epoch": 0.3613652717565212, "grad_norm": 0.472731351852417, "learning_rate": 0.00012775315509689494, "loss": 1.4773, "step": 27809 }, { "epoch": 0.3613782663004371, "grad_norm": 0.34022805094718933, "learning_rate": 0.00012775055563498357, "loss": 1.1777, "step": 27810 }, { "epoch": 0.36139126084435297, "grad_norm": 0.4276570677757263, "learning_rate": 0.0001277479561730722, "loss": 1.5319, "step": 27811 }, { "epoch": 0.36140425538826887, "grad_norm": 0.3602466285228729, "learning_rate": 0.0001277453567111608, "loss": 1.3432, "step": 27812 }, { "epoch": 0.3614172499321847, "grad_norm": 0.35369378328323364, "learning_rate": 0.0001277427572492494, "loss": 1.2578, "step": 27813 }, { "epoch": 0.3614302444761006, "grad_norm": 0.31022390723228455, "learning_rate": 0.00012774015778733804, "loss": 1.3267, "step": 27814 }, { "epoch": 0.36144323902001646, "grad_norm": 0.3798324763774872, "learning_rate": 0.00012773755832542664, "loss": 1.3003, "step": 27815 }, { "epoch": 0.36145623356393236, "grad_norm": 0.4148436188697815, "learning_rate": 0.00012773495886351526, "loss": 1.4778, "step": 27816 }, { "epoch": 0.3614692281078482, "grad_norm": 0.44393473863601685, "learning_rate": 0.00012773235940160389, "loss": 1.5811, "step": 27817 }, { "epoch": 0.3614822226517641, "grad_norm": 0.47508734464645386, "learning_rate": 0.00012772975993969248, "loss": 1.562, "step": 27818 }, { "epoch": 0.36149521719567995, "grad_norm": 0.3382413387298584, "learning_rate": 0.0001277271604777811, "loss": 1.4004, "step": 27819 }, { "epoch": 0.36150821173959585, "grad_norm": 0.3241707980632782, "learning_rate": 0.0001277245610158697, "loss": 1.094, "step": 27820 }, { "epoch": 0.3615212062835117, "grad_norm": 0.2444005161523819, "learning_rate": 0.00012772196155395836, "loss": 1.3469, "step": 27821 }, { "epoch": 0.3615342008274276, "grad_norm": 0.4090452790260315, "learning_rate": 0.00012771936209204695, "loss": 1.3498, "step": 27822 }, { "epoch": 0.36154719537134344, "grad_norm": 0.48385652899742126, "learning_rate": 0.00012771676263013558, "loss": 1.4867, "step": 27823 }, { "epoch": 0.36156018991525934, "grad_norm": 0.47417891025543213, "learning_rate": 0.00012771416316822418, "loss": 1.3614, "step": 27824 }, { "epoch": 0.3615731844591752, "grad_norm": 0.4144386649131775, "learning_rate": 0.0001277115637063128, "loss": 1.439, "step": 27825 }, { "epoch": 0.3615861790030911, "grad_norm": 0.38045093417167664, "learning_rate": 0.00012770896424440142, "loss": 1.4307, "step": 27826 }, { "epoch": 0.36159917354700694, "grad_norm": 0.6519449949264526, "learning_rate": 0.00012770636478249002, "loss": 1.3359, "step": 27827 }, { "epoch": 0.36161216809092284, "grad_norm": 0.4009718894958496, "learning_rate": 0.00012770376532057865, "loss": 1.3478, "step": 27828 }, { "epoch": 0.3616251626348387, "grad_norm": 0.30755242705345154, "learning_rate": 0.00012770116585866727, "loss": 1.2132, "step": 27829 }, { "epoch": 0.3616381571787546, "grad_norm": 0.478539377450943, "learning_rate": 0.00012769856639675587, "loss": 1.4943, "step": 27830 }, { "epoch": 0.3616511517226704, "grad_norm": 0.32504621148109436, "learning_rate": 0.0001276959669348445, "loss": 1.4372, "step": 27831 }, { "epoch": 0.36166414626658633, "grad_norm": 0.4168856143951416, "learning_rate": 0.0001276933674729331, "loss": 1.4452, "step": 27832 }, { "epoch": 0.3616771408105022, "grad_norm": 0.42507296800613403, "learning_rate": 0.00012769076801102174, "loss": 1.5417, "step": 27833 }, { "epoch": 0.3616901353544181, "grad_norm": 0.5024882555007935, "learning_rate": 0.00012768816854911034, "loss": 1.5667, "step": 27834 }, { "epoch": 0.3617031298983339, "grad_norm": 0.40814459323883057, "learning_rate": 0.00012768556908719896, "loss": 1.3775, "step": 27835 }, { "epoch": 0.3617161244422498, "grad_norm": 0.44818344712257385, "learning_rate": 0.00012768296962528756, "loss": 1.4195, "step": 27836 }, { "epoch": 0.36172911898616567, "grad_norm": 0.5035160779953003, "learning_rate": 0.00012768037016337619, "loss": 1.3621, "step": 27837 }, { "epoch": 0.36174211353008157, "grad_norm": 0.2922307848930359, "learning_rate": 0.0001276777707014648, "loss": 1.493, "step": 27838 }, { "epoch": 0.3617551080739974, "grad_norm": 0.40868890285491943, "learning_rate": 0.0001276751712395534, "loss": 1.2985, "step": 27839 }, { "epoch": 0.3617681026179133, "grad_norm": 0.35993722081184387, "learning_rate": 0.00012767257177764203, "loss": 1.3174, "step": 27840 }, { "epoch": 0.36178109716182916, "grad_norm": 0.4102262556552887, "learning_rate": 0.00012766997231573066, "loss": 1.4433, "step": 27841 }, { "epoch": 0.36179409170574506, "grad_norm": 0.3526569604873657, "learning_rate": 0.00012766737285381928, "loss": 1.5094, "step": 27842 }, { "epoch": 0.3618070862496609, "grad_norm": 0.3262081444263458, "learning_rate": 0.00012766477339190788, "loss": 1.3418, "step": 27843 }, { "epoch": 0.3618200807935768, "grad_norm": 0.5079373717308044, "learning_rate": 0.00012766217392999648, "loss": 1.4933, "step": 27844 }, { "epoch": 0.36183307533749265, "grad_norm": 0.4362841546535492, "learning_rate": 0.00012765957446808513, "loss": 1.5118, "step": 27845 }, { "epoch": 0.36184606988140855, "grad_norm": 0.3379189074039459, "learning_rate": 0.00012765697500617372, "loss": 1.4802, "step": 27846 }, { "epoch": 0.3618590644253244, "grad_norm": 0.3922182321548462, "learning_rate": 0.00012765437554426235, "loss": 1.3502, "step": 27847 }, { "epoch": 0.3618720589692403, "grad_norm": 0.4121309518814087, "learning_rate": 0.00012765177608235095, "loss": 1.4688, "step": 27848 }, { "epoch": 0.36188505351315614, "grad_norm": 0.42894285917282104, "learning_rate": 0.00012764917662043957, "loss": 1.3544, "step": 27849 }, { "epoch": 0.36189804805707204, "grad_norm": 0.43683096766471863, "learning_rate": 0.0001276465771585282, "loss": 1.3238, "step": 27850 }, { "epoch": 0.3619110426009879, "grad_norm": 0.39007100462913513, "learning_rate": 0.0001276439776966168, "loss": 1.3881, "step": 27851 }, { "epoch": 0.3619240371449038, "grad_norm": 0.3597339987754822, "learning_rate": 0.00012764137823470542, "loss": 1.3517, "step": 27852 }, { "epoch": 0.36193703168881963, "grad_norm": 0.2994987368583679, "learning_rate": 0.00012763877877279404, "loss": 1.187, "step": 27853 }, { "epoch": 0.36195002623273553, "grad_norm": 0.39084556698799133, "learning_rate": 0.00012763617931088267, "loss": 1.3499, "step": 27854 }, { "epoch": 0.3619630207766514, "grad_norm": 0.4339793920516968, "learning_rate": 0.00012763357984897126, "loss": 1.2556, "step": 27855 }, { "epoch": 0.3619760153205673, "grad_norm": 0.31620359420776367, "learning_rate": 0.0001276309803870599, "loss": 1.2077, "step": 27856 }, { "epoch": 0.3619890098644831, "grad_norm": 0.5104015469551086, "learning_rate": 0.0001276283809251485, "loss": 1.4807, "step": 27857 }, { "epoch": 0.362002004408399, "grad_norm": 0.5380813479423523, "learning_rate": 0.0001276257814632371, "loss": 1.4119, "step": 27858 }, { "epoch": 0.36201499895231487, "grad_norm": 0.4350837767124176, "learning_rate": 0.00012762318200132573, "loss": 1.619, "step": 27859 }, { "epoch": 0.36202799349623077, "grad_norm": 0.3735024034976959, "learning_rate": 0.00012762058253941436, "loss": 1.4184, "step": 27860 }, { "epoch": 0.3620409880401466, "grad_norm": 0.39258772134780884, "learning_rate": 0.00012761798307750296, "loss": 1.382, "step": 27861 }, { "epoch": 0.3620539825840625, "grad_norm": 0.35262545943260193, "learning_rate": 0.00012761538361559158, "loss": 1.2955, "step": 27862 }, { "epoch": 0.3620669771279784, "grad_norm": 0.4838973879814148, "learning_rate": 0.00012761278415368018, "loss": 1.4091, "step": 27863 }, { "epoch": 0.36207997167189426, "grad_norm": 0.4565093517303467, "learning_rate": 0.00012761018469176883, "loss": 1.3676, "step": 27864 }, { "epoch": 0.36209296621581016, "grad_norm": 0.43859195709228516, "learning_rate": 0.00012760758522985743, "loss": 1.4905, "step": 27865 }, { "epoch": 0.362105960759726, "grad_norm": 0.4944971203804016, "learning_rate": 0.00012760498576794605, "loss": 1.5892, "step": 27866 }, { "epoch": 0.3621189553036419, "grad_norm": 0.39383307099342346, "learning_rate": 0.00012760238630603465, "loss": 1.4989, "step": 27867 }, { "epoch": 0.36213194984755775, "grad_norm": 0.519035816192627, "learning_rate": 0.00012759978684412327, "loss": 1.4191, "step": 27868 }, { "epoch": 0.36214494439147366, "grad_norm": 0.4462484121322632, "learning_rate": 0.0001275971873822119, "loss": 1.3063, "step": 27869 }, { "epoch": 0.3621579389353895, "grad_norm": 0.35490527749061584, "learning_rate": 0.0001275945879203005, "loss": 1.3366, "step": 27870 }, { "epoch": 0.3621709334793054, "grad_norm": 0.45181557536125183, "learning_rate": 0.00012759198845838912, "loss": 1.2444, "step": 27871 }, { "epoch": 0.36218392802322125, "grad_norm": 0.4016639292240143, "learning_rate": 0.00012758938899647774, "loss": 1.4877, "step": 27872 }, { "epoch": 0.36219692256713715, "grad_norm": 0.340221643447876, "learning_rate": 0.00012758678953456634, "loss": 1.3932, "step": 27873 }, { "epoch": 0.362209917111053, "grad_norm": 0.47415557503700256, "learning_rate": 0.00012758419007265497, "loss": 1.3907, "step": 27874 }, { "epoch": 0.3622229116549689, "grad_norm": 0.3619421720504761, "learning_rate": 0.00012758159061074356, "loss": 1.2047, "step": 27875 }, { "epoch": 0.36223590619888474, "grad_norm": 0.3746826946735382, "learning_rate": 0.00012757899114883221, "loss": 1.5186, "step": 27876 }, { "epoch": 0.36224890074280064, "grad_norm": 0.4093152582645416, "learning_rate": 0.0001275763916869208, "loss": 1.3872, "step": 27877 }, { "epoch": 0.3622618952867165, "grad_norm": 0.4057905673980713, "learning_rate": 0.00012757379222500944, "loss": 1.473, "step": 27878 }, { "epoch": 0.3622748898306324, "grad_norm": 0.5744732618331909, "learning_rate": 0.00012757119276309803, "loss": 1.4239, "step": 27879 }, { "epoch": 0.36228788437454823, "grad_norm": 0.39384716749191284, "learning_rate": 0.00012756859330118666, "loss": 1.3799, "step": 27880 }, { "epoch": 0.36230087891846413, "grad_norm": 0.35178133845329285, "learning_rate": 0.00012756599383927528, "loss": 1.321, "step": 27881 }, { "epoch": 0.36231387346238, "grad_norm": 0.4607468843460083, "learning_rate": 0.00012756339437736388, "loss": 1.5119, "step": 27882 }, { "epoch": 0.3623268680062959, "grad_norm": 0.3953493535518646, "learning_rate": 0.0001275607949154525, "loss": 1.3561, "step": 27883 }, { "epoch": 0.3623398625502117, "grad_norm": 0.4084276258945465, "learning_rate": 0.00012755819545354113, "loss": 1.5949, "step": 27884 }, { "epoch": 0.3623528570941276, "grad_norm": 0.4487241804599762, "learning_rate": 0.00012755559599162973, "loss": 1.483, "step": 27885 }, { "epoch": 0.36236585163804347, "grad_norm": 0.3903324604034424, "learning_rate": 0.00012755299652971835, "loss": 1.3356, "step": 27886 }, { "epoch": 0.36237884618195937, "grad_norm": 0.39339789748191833, "learning_rate": 0.00012755039706780695, "loss": 1.3676, "step": 27887 }, { "epoch": 0.3623918407258752, "grad_norm": 0.36171844601631165, "learning_rate": 0.0001275477976058956, "loss": 1.3902, "step": 27888 }, { "epoch": 0.3624048352697911, "grad_norm": 0.4454975724220276, "learning_rate": 0.0001275451981439842, "loss": 1.3918, "step": 27889 }, { "epoch": 0.36241782981370696, "grad_norm": 0.43110841512680054, "learning_rate": 0.00012754259868207282, "loss": 1.5098, "step": 27890 }, { "epoch": 0.36243082435762286, "grad_norm": 0.38996773958206177, "learning_rate": 0.00012753999922016145, "loss": 1.5115, "step": 27891 }, { "epoch": 0.3624438189015387, "grad_norm": 0.31067830324172974, "learning_rate": 0.00012753739975825004, "loss": 1.4401, "step": 27892 }, { "epoch": 0.3624568134454546, "grad_norm": 0.43410617113113403, "learning_rate": 0.00012753480029633867, "loss": 1.1939, "step": 27893 }, { "epoch": 0.36246980798937045, "grad_norm": 0.3915244936943054, "learning_rate": 0.00012753220083442727, "loss": 1.5003, "step": 27894 }, { "epoch": 0.36248280253328635, "grad_norm": 0.2988142967224121, "learning_rate": 0.00012752960137251592, "loss": 1.3511, "step": 27895 }, { "epoch": 0.3624957970772022, "grad_norm": 0.3417602479457855, "learning_rate": 0.00012752700191060451, "loss": 1.1653, "step": 27896 }, { "epoch": 0.3625087916211181, "grad_norm": 0.4114759862422943, "learning_rate": 0.0001275244024486931, "loss": 1.4774, "step": 27897 }, { "epoch": 0.36252178616503394, "grad_norm": 0.3979776203632355, "learning_rate": 0.00012752180298678174, "loss": 1.4565, "step": 27898 }, { "epoch": 0.36253478070894984, "grad_norm": 0.5564953088760376, "learning_rate": 0.00012751920352487036, "loss": 1.498, "step": 27899 }, { "epoch": 0.3625477752528657, "grad_norm": 0.3537488877773285, "learning_rate": 0.00012751660406295899, "loss": 1.444, "step": 27900 }, { "epoch": 0.3625607697967816, "grad_norm": 0.46747323870658875, "learning_rate": 0.00012751400460104758, "loss": 1.2902, "step": 27901 }, { "epoch": 0.36257376434069744, "grad_norm": 0.37444618344306946, "learning_rate": 0.0001275114051391362, "loss": 1.5085, "step": 27902 }, { "epoch": 0.36258675888461334, "grad_norm": 0.4861817955970764, "learning_rate": 0.00012750880567722483, "loss": 1.4716, "step": 27903 }, { "epoch": 0.3625997534285292, "grad_norm": 0.41579338908195496, "learning_rate": 0.00012750620621531343, "loss": 1.3574, "step": 27904 }, { "epoch": 0.3626127479724451, "grad_norm": 0.47768378257751465, "learning_rate": 0.00012750360675340205, "loss": 1.3915, "step": 27905 }, { "epoch": 0.3626257425163609, "grad_norm": 0.24530890583992004, "learning_rate": 0.00012750100729149065, "loss": 1.3806, "step": 27906 }, { "epoch": 0.36263873706027683, "grad_norm": 0.3343988358974457, "learning_rate": 0.0001274984078295793, "loss": 1.2221, "step": 27907 }, { "epoch": 0.3626517316041927, "grad_norm": 0.3423810601234436, "learning_rate": 0.0001274958083676679, "loss": 1.1695, "step": 27908 }, { "epoch": 0.3626647261481086, "grad_norm": 0.4304436147212982, "learning_rate": 0.00012749320890575652, "loss": 1.2429, "step": 27909 }, { "epoch": 0.3626777206920244, "grad_norm": 0.34206873178482056, "learning_rate": 0.00012749060944384512, "loss": 1.2436, "step": 27910 }, { "epoch": 0.3626907152359403, "grad_norm": 0.468522310256958, "learning_rate": 0.00012748800998193375, "loss": 1.3922, "step": 27911 }, { "epoch": 0.36270370977985616, "grad_norm": 0.4226739704608917, "learning_rate": 0.00012748541052002237, "loss": 1.4326, "step": 27912 }, { "epoch": 0.36271670432377207, "grad_norm": 0.34291574358940125, "learning_rate": 0.00012748281105811097, "loss": 1.5153, "step": 27913 }, { "epoch": 0.3627296988676879, "grad_norm": 0.44214433431625366, "learning_rate": 0.0001274802115961996, "loss": 1.3395, "step": 27914 }, { "epoch": 0.3627426934116038, "grad_norm": 0.4218233823776245, "learning_rate": 0.00012747761213428822, "loss": 1.5031, "step": 27915 }, { "epoch": 0.36275568795551966, "grad_norm": 0.4764331579208374, "learning_rate": 0.00012747501267237681, "loss": 1.414, "step": 27916 }, { "epoch": 0.36276868249943556, "grad_norm": 0.40904611349105835, "learning_rate": 0.00012747241321046544, "loss": 1.3522, "step": 27917 }, { "epoch": 0.3627816770433514, "grad_norm": 0.46175211668014526, "learning_rate": 0.00012746981374855404, "loss": 1.4644, "step": 27918 }, { "epoch": 0.3627946715872673, "grad_norm": 0.4082449674606323, "learning_rate": 0.0001274672142866427, "loss": 1.4438, "step": 27919 }, { "epoch": 0.36280766613118315, "grad_norm": 0.7580820918083191, "learning_rate": 0.00012746461482473129, "loss": 1.3861, "step": 27920 }, { "epoch": 0.36282066067509905, "grad_norm": 0.43384575843811035, "learning_rate": 0.0001274620153628199, "loss": 1.467, "step": 27921 }, { "epoch": 0.3628336552190149, "grad_norm": 0.47677040100097656, "learning_rate": 0.0001274594159009085, "loss": 1.4714, "step": 27922 }, { "epoch": 0.3628466497629308, "grad_norm": 0.3647285997867584, "learning_rate": 0.00012745681643899713, "loss": 1.3462, "step": 27923 }, { "epoch": 0.36285964430684664, "grad_norm": 0.3311726152896881, "learning_rate": 0.00012745421697708576, "loss": 1.2766, "step": 27924 }, { "epoch": 0.36287263885076254, "grad_norm": 0.43895748257637024, "learning_rate": 0.00012745161751517435, "loss": 1.2864, "step": 27925 }, { "epoch": 0.3628856333946784, "grad_norm": 0.3090461492538452, "learning_rate": 0.00012744901805326298, "loss": 1.2642, "step": 27926 }, { "epoch": 0.3628986279385943, "grad_norm": 0.47124722599983215, "learning_rate": 0.0001274464185913516, "loss": 1.1927, "step": 27927 }, { "epoch": 0.36291162248251013, "grad_norm": 0.3850697875022888, "learning_rate": 0.0001274438191294402, "loss": 1.6199, "step": 27928 }, { "epoch": 0.36292461702642603, "grad_norm": 0.4305574297904968, "learning_rate": 0.00012744121966752882, "loss": 1.6524, "step": 27929 }, { "epoch": 0.3629376115703419, "grad_norm": 0.3600034713745117, "learning_rate": 0.00012743862020561745, "loss": 1.2012, "step": 27930 }, { "epoch": 0.3629506061142578, "grad_norm": 0.3226478397846222, "learning_rate": 0.00012743602074370607, "loss": 1.4266, "step": 27931 }, { "epoch": 0.3629636006581736, "grad_norm": 0.44012942910194397, "learning_rate": 0.00012743342128179467, "loss": 1.3038, "step": 27932 }, { "epoch": 0.3629765952020895, "grad_norm": 0.4136284291744232, "learning_rate": 0.0001274308218198833, "loss": 1.3312, "step": 27933 }, { "epoch": 0.36298958974600537, "grad_norm": 0.4837016463279724, "learning_rate": 0.00012742822235797192, "loss": 1.385, "step": 27934 }, { "epoch": 0.36300258428992127, "grad_norm": 0.2655482590198517, "learning_rate": 0.00012742562289606052, "loss": 1.2812, "step": 27935 }, { "epoch": 0.3630155788338371, "grad_norm": 0.4441456198692322, "learning_rate": 0.00012742302343414914, "loss": 1.3846, "step": 27936 }, { "epoch": 0.363028573377753, "grad_norm": 0.4971577823162079, "learning_rate": 0.00012742042397223774, "loss": 1.4031, "step": 27937 }, { "epoch": 0.36304156792166886, "grad_norm": 0.3880029320716858, "learning_rate": 0.0001274178245103264, "loss": 1.2369, "step": 27938 }, { "epoch": 0.36305456246558476, "grad_norm": 0.4071570932865143, "learning_rate": 0.000127415225048415, "loss": 1.3813, "step": 27939 }, { "epoch": 0.36306755700950066, "grad_norm": 0.3111426830291748, "learning_rate": 0.00012741262558650359, "loss": 1.1114, "step": 27940 }, { "epoch": 0.3630805515534165, "grad_norm": 0.2971980571746826, "learning_rate": 0.0001274100261245922, "loss": 1.3144, "step": 27941 }, { "epoch": 0.3630935460973324, "grad_norm": 0.4115547239780426, "learning_rate": 0.00012740742666268083, "loss": 1.1417, "step": 27942 }, { "epoch": 0.36310654064124825, "grad_norm": 0.3459772765636444, "learning_rate": 0.00012740482720076946, "loss": 1.502, "step": 27943 }, { "epoch": 0.36311953518516416, "grad_norm": 0.3610880374908447, "learning_rate": 0.00012740222773885806, "loss": 1.2536, "step": 27944 }, { "epoch": 0.36313252972908, "grad_norm": 0.3762784004211426, "learning_rate": 0.00012739962827694668, "loss": 1.2442, "step": 27945 }, { "epoch": 0.3631455242729959, "grad_norm": 0.34537798166275024, "learning_rate": 0.0001273970288150353, "loss": 1.3429, "step": 27946 }, { "epoch": 0.36315851881691175, "grad_norm": 0.4199179708957672, "learning_rate": 0.0001273944293531239, "loss": 1.2794, "step": 27947 }, { "epoch": 0.36317151336082765, "grad_norm": 0.4479343593120575, "learning_rate": 0.00012739182989121253, "loss": 1.3417, "step": 27948 }, { "epoch": 0.3631845079047435, "grad_norm": 0.49493348598480225, "learning_rate": 0.00012738923042930112, "loss": 1.3672, "step": 27949 }, { "epoch": 0.3631975024486594, "grad_norm": 0.46684977412223816, "learning_rate": 0.00012738663096738978, "loss": 1.5311, "step": 27950 }, { "epoch": 0.36321049699257524, "grad_norm": 0.39218395948410034, "learning_rate": 0.00012738403150547837, "loss": 1.4725, "step": 27951 }, { "epoch": 0.36322349153649114, "grad_norm": 0.4167647659778595, "learning_rate": 0.00012738143204356697, "loss": 1.4079, "step": 27952 }, { "epoch": 0.363236486080407, "grad_norm": 0.5562061667442322, "learning_rate": 0.0001273788325816556, "loss": 1.4342, "step": 27953 }, { "epoch": 0.3632494806243229, "grad_norm": 0.42765992879867554, "learning_rate": 0.00012737623311974422, "loss": 1.444, "step": 27954 }, { "epoch": 0.36326247516823873, "grad_norm": 0.3909742832183838, "learning_rate": 0.00012737363365783284, "loss": 1.4311, "step": 27955 }, { "epoch": 0.36327546971215463, "grad_norm": 0.4262784421443939, "learning_rate": 0.00012737103419592144, "loss": 1.4109, "step": 27956 }, { "epoch": 0.3632884642560705, "grad_norm": 0.42134782671928406, "learning_rate": 0.00012736843473401007, "loss": 1.4371, "step": 27957 }, { "epoch": 0.3633014587999864, "grad_norm": 0.45517075061798096, "learning_rate": 0.0001273658352720987, "loss": 1.501, "step": 27958 }, { "epoch": 0.3633144533439022, "grad_norm": 0.4511476457118988, "learning_rate": 0.0001273632358101873, "loss": 1.5152, "step": 27959 }, { "epoch": 0.3633274478878181, "grad_norm": 0.4538951516151428, "learning_rate": 0.0001273606363482759, "loss": 1.3827, "step": 27960 }, { "epoch": 0.36334044243173397, "grad_norm": 0.3794161379337311, "learning_rate": 0.0001273580368863645, "loss": 1.4722, "step": 27961 }, { "epoch": 0.36335343697564987, "grad_norm": 0.44694384932518005, "learning_rate": 0.00012735543742445316, "loss": 1.3154, "step": 27962 }, { "epoch": 0.3633664315195657, "grad_norm": 0.30698803067207336, "learning_rate": 0.00012735283796254176, "loss": 1.2275, "step": 27963 }, { "epoch": 0.3633794260634816, "grad_norm": 0.3932294547557831, "learning_rate": 0.00012735023850063038, "loss": 1.3938, "step": 27964 }, { "epoch": 0.36339242060739746, "grad_norm": 0.48068439960479736, "learning_rate": 0.000127347639038719, "loss": 1.5854, "step": 27965 }, { "epoch": 0.36340541515131336, "grad_norm": 0.5042515397071838, "learning_rate": 0.0001273450395768076, "loss": 1.4511, "step": 27966 }, { "epoch": 0.3634184096952292, "grad_norm": 0.4765413999557495, "learning_rate": 0.00012734244011489623, "loss": 1.3311, "step": 27967 }, { "epoch": 0.3634314042391451, "grad_norm": 0.3270588219165802, "learning_rate": 0.00012733984065298483, "loss": 1.4466, "step": 27968 }, { "epoch": 0.36344439878306095, "grad_norm": 0.3830268979072571, "learning_rate": 0.00012733724119107345, "loss": 1.3375, "step": 27969 }, { "epoch": 0.36345739332697685, "grad_norm": 0.31781959533691406, "learning_rate": 0.00012733464172916208, "loss": 1.2993, "step": 27970 }, { "epoch": 0.3634703878708927, "grad_norm": 0.4149746894836426, "learning_rate": 0.00012733204226725067, "loss": 1.3424, "step": 27971 }, { "epoch": 0.3634833824148086, "grad_norm": 0.3852953612804413, "learning_rate": 0.0001273294428053393, "loss": 1.4706, "step": 27972 }, { "epoch": 0.36349637695872444, "grad_norm": 0.3324621021747589, "learning_rate": 0.00012732684334342792, "loss": 1.4881, "step": 27973 }, { "epoch": 0.36350937150264034, "grad_norm": 0.41167035698890686, "learning_rate": 0.00012732424388151655, "loss": 1.3677, "step": 27974 }, { "epoch": 0.3635223660465562, "grad_norm": 0.412930428981781, "learning_rate": 0.00012732164441960514, "loss": 1.3112, "step": 27975 }, { "epoch": 0.3635353605904721, "grad_norm": 0.3955928385257721, "learning_rate": 0.00012731904495769377, "loss": 1.6119, "step": 27976 }, { "epoch": 0.36354835513438793, "grad_norm": 0.47529542446136475, "learning_rate": 0.0001273164454957824, "loss": 1.2549, "step": 27977 }, { "epoch": 0.36356134967830384, "grad_norm": 0.5175417065620422, "learning_rate": 0.000127313846033871, "loss": 1.579, "step": 27978 }, { "epoch": 0.3635743442222197, "grad_norm": 0.3789021670818329, "learning_rate": 0.00012731124657195962, "loss": 1.4066, "step": 27979 }, { "epoch": 0.3635873387661356, "grad_norm": 0.442864328622818, "learning_rate": 0.0001273086471100482, "loss": 1.3544, "step": 27980 }, { "epoch": 0.3636003333100514, "grad_norm": 0.3379141092300415, "learning_rate": 0.00012730604764813684, "loss": 1.2663, "step": 27981 }, { "epoch": 0.3636133278539673, "grad_norm": 0.3805772662162781, "learning_rate": 0.00012730344818622546, "loss": 1.6819, "step": 27982 }, { "epoch": 0.3636263223978832, "grad_norm": 0.3813496232032776, "learning_rate": 0.00012730084872431406, "loss": 1.4624, "step": 27983 }, { "epoch": 0.3636393169417991, "grad_norm": 0.42601850628852844, "learning_rate": 0.00012729824926240268, "loss": 1.548, "step": 27984 }, { "epoch": 0.3636523114857149, "grad_norm": 0.3615221083164215, "learning_rate": 0.0001272956498004913, "loss": 1.3247, "step": 27985 }, { "epoch": 0.3636653060296308, "grad_norm": 0.3807869851589203, "learning_rate": 0.00012729305033857993, "loss": 1.4613, "step": 27986 }, { "epoch": 0.36367830057354666, "grad_norm": 0.38697120547294617, "learning_rate": 0.00012729045087666853, "loss": 1.4758, "step": 27987 }, { "epoch": 0.36369129511746257, "grad_norm": 0.4261217415332794, "learning_rate": 0.00012728785141475715, "loss": 1.4357, "step": 27988 }, { "epoch": 0.3637042896613784, "grad_norm": 0.35582613945007324, "learning_rate": 0.00012728525195284578, "loss": 1.3145, "step": 27989 }, { "epoch": 0.3637172842052943, "grad_norm": 0.39728814363479614, "learning_rate": 0.00012728265249093438, "loss": 1.562, "step": 27990 }, { "epoch": 0.36373027874921016, "grad_norm": 0.40408873558044434, "learning_rate": 0.000127280053029023, "loss": 1.4113, "step": 27991 }, { "epoch": 0.36374327329312606, "grad_norm": 0.5270810723304749, "learning_rate": 0.0001272774535671116, "loss": 1.3387, "step": 27992 }, { "epoch": 0.3637562678370419, "grad_norm": 0.3873375356197357, "learning_rate": 0.00012727485410520025, "loss": 1.5123, "step": 27993 }, { "epoch": 0.3637692623809578, "grad_norm": 0.41070127487182617, "learning_rate": 0.00012727225464328885, "loss": 1.2837, "step": 27994 }, { "epoch": 0.36378225692487365, "grad_norm": 0.3857309818267822, "learning_rate": 0.00012726965518137744, "loss": 1.2547, "step": 27995 }, { "epoch": 0.36379525146878955, "grad_norm": 0.420284628868103, "learning_rate": 0.00012726705571946607, "loss": 1.2891, "step": 27996 }, { "epoch": 0.3638082460127054, "grad_norm": 0.4320487380027771, "learning_rate": 0.0001272644562575547, "loss": 1.4931, "step": 27997 }, { "epoch": 0.3638212405566213, "grad_norm": 0.309845894575119, "learning_rate": 0.00012726185679564332, "loss": 1.1223, "step": 27998 }, { "epoch": 0.36383423510053714, "grad_norm": 0.4259468615055084, "learning_rate": 0.00012725925733373192, "loss": 1.491, "step": 27999 }, { "epoch": 0.36384722964445304, "grad_norm": 0.48759549856185913, "learning_rate": 0.00012725665787182054, "loss": 1.5763, "step": 28000 }, { "epoch": 0.3638602241883689, "grad_norm": 0.4181251525878906, "learning_rate": 0.00012725405840990916, "loss": 1.2331, "step": 28001 }, { "epoch": 0.3638732187322848, "grad_norm": 0.396205335855484, "learning_rate": 0.00012725145894799776, "loss": 1.2252, "step": 28002 }, { "epoch": 0.36388621327620063, "grad_norm": 0.4210398495197296, "learning_rate": 0.00012724885948608639, "loss": 1.3981, "step": 28003 }, { "epoch": 0.36389920782011653, "grad_norm": 0.40787240862846375, "learning_rate": 0.000127246260024175, "loss": 1.3534, "step": 28004 }, { "epoch": 0.3639122023640324, "grad_norm": 0.39276304841041565, "learning_rate": 0.00012724366056226363, "loss": 1.3414, "step": 28005 }, { "epoch": 0.3639251969079483, "grad_norm": 0.45182257890701294, "learning_rate": 0.00012724106110035223, "loss": 1.5151, "step": 28006 }, { "epoch": 0.3639381914518641, "grad_norm": 0.4759543836116791, "learning_rate": 0.00012723846163844083, "loss": 1.3491, "step": 28007 }, { "epoch": 0.36395118599578, "grad_norm": 0.48742738366127014, "learning_rate": 0.00012723586217652948, "loss": 1.4055, "step": 28008 }, { "epoch": 0.36396418053969587, "grad_norm": 0.3423019349575043, "learning_rate": 0.00012723326271461808, "loss": 1.3521, "step": 28009 }, { "epoch": 0.36397717508361177, "grad_norm": 0.37417492270469666, "learning_rate": 0.0001272306632527067, "loss": 1.2074, "step": 28010 }, { "epoch": 0.3639901696275276, "grad_norm": 0.4327795207500458, "learning_rate": 0.0001272280637907953, "loss": 1.3384, "step": 28011 }, { "epoch": 0.3640031641714435, "grad_norm": 0.3741166591644287, "learning_rate": 0.00012722546432888392, "loss": 1.3832, "step": 28012 }, { "epoch": 0.36401615871535936, "grad_norm": 0.32205912470817566, "learning_rate": 0.00012722286486697255, "loss": 1.3758, "step": 28013 }, { "epoch": 0.36402915325927526, "grad_norm": 0.43938589096069336, "learning_rate": 0.00012722026540506115, "loss": 1.3601, "step": 28014 }, { "epoch": 0.36404214780319116, "grad_norm": 0.3736664056777954, "learning_rate": 0.00012721766594314977, "loss": 1.3785, "step": 28015 }, { "epoch": 0.364055142347107, "grad_norm": 0.4308818280696869, "learning_rate": 0.0001272150664812384, "loss": 1.3639, "step": 28016 }, { "epoch": 0.3640681368910229, "grad_norm": 0.36459285020828247, "learning_rate": 0.00012721246701932702, "loss": 1.2955, "step": 28017 }, { "epoch": 0.36408113143493875, "grad_norm": 0.3885711133480072, "learning_rate": 0.00012720986755741562, "loss": 1.2944, "step": 28018 }, { "epoch": 0.36409412597885465, "grad_norm": 0.37572920322418213, "learning_rate": 0.00012720726809550422, "loss": 1.4485, "step": 28019 }, { "epoch": 0.3641071205227705, "grad_norm": 0.41908925771713257, "learning_rate": 0.00012720466863359287, "loss": 1.2875, "step": 28020 }, { "epoch": 0.3641201150666864, "grad_norm": 0.41559898853302, "learning_rate": 0.00012720206917168146, "loss": 1.5178, "step": 28021 }, { "epoch": 0.36413310961060225, "grad_norm": 0.5386263132095337, "learning_rate": 0.0001271994697097701, "loss": 1.363, "step": 28022 }, { "epoch": 0.36414610415451815, "grad_norm": 0.35852131247520447, "learning_rate": 0.00012719687024785869, "loss": 1.4462, "step": 28023 }, { "epoch": 0.364159098698434, "grad_norm": 0.3842683732509613, "learning_rate": 0.0001271942707859473, "loss": 1.368, "step": 28024 }, { "epoch": 0.3641720932423499, "grad_norm": 0.3549666106700897, "learning_rate": 0.00012719167132403593, "loss": 1.4171, "step": 28025 }, { "epoch": 0.36418508778626574, "grad_norm": 0.5157406330108643, "learning_rate": 0.00012718907186212453, "loss": 1.561, "step": 28026 }, { "epoch": 0.36419808233018164, "grad_norm": 0.31362292170524597, "learning_rate": 0.00012718647240021316, "loss": 1.2652, "step": 28027 }, { "epoch": 0.3642110768740975, "grad_norm": 0.3563797175884247, "learning_rate": 0.00012718387293830178, "loss": 1.4917, "step": 28028 }, { "epoch": 0.3642240714180134, "grad_norm": 0.3759686052799225, "learning_rate": 0.0001271812734763904, "loss": 1.5185, "step": 28029 }, { "epoch": 0.36423706596192923, "grad_norm": 0.3824428617954254, "learning_rate": 0.000127178674014479, "loss": 1.6421, "step": 28030 }, { "epoch": 0.36425006050584513, "grad_norm": 0.48471498489379883, "learning_rate": 0.00012717607455256763, "loss": 1.1653, "step": 28031 }, { "epoch": 0.364263055049761, "grad_norm": 0.4868031144142151, "learning_rate": 0.00012717347509065625, "loss": 1.4659, "step": 28032 }, { "epoch": 0.3642760495936769, "grad_norm": 0.24223241209983826, "learning_rate": 0.00012717087562874485, "loss": 1.2754, "step": 28033 }, { "epoch": 0.3642890441375927, "grad_norm": 0.4007316529750824, "learning_rate": 0.00012716827616683347, "loss": 1.4847, "step": 28034 }, { "epoch": 0.3643020386815086, "grad_norm": 0.42385217547416687, "learning_rate": 0.00012716567670492207, "loss": 1.5243, "step": 28035 }, { "epoch": 0.36431503322542447, "grad_norm": 0.37853702902793884, "learning_rate": 0.0001271630772430107, "loss": 1.332, "step": 28036 }, { "epoch": 0.36432802776934037, "grad_norm": 0.44703051447868347, "learning_rate": 0.00012716047778109932, "loss": 1.5835, "step": 28037 }, { "epoch": 0.3643410223132562, "grad_norm": 0.38588809967041016, "learning_rate": 0.00012715787831918792, "loss": 1.3347, "step": 28038 }, { "epoch": 0.3643540168571721, "grad_norm": 0.4064985513687134, "learning_rate": 0.00012715527885727657, "loss": 1.4309, "step": 28039 }, { "epoch": 0.36436701140108796, "grad_norm": 0.5217446088790894, "learning_rate": 0.00012715267939536517, "loss": 1.4052, "step": 28040 }, { "epoch": 0.36438000594500386, "grad_norm": 0.40893876552581787, "learning_rate": 0.0001271500799334538, "loss": 1.5905, "step": 28041 }, { "epoch": 0.3643930004889197, "grad_norm": 0.3365531861782074, "learning_rate": 0.0001271474804715424, "loss": 1.1859, "step": 28042 }, { "epoch": 0.3644059950328356, "grad_norm": 0.37935712933540344, "learning_rate": 0.000127144881009631, "loss": 1.4425, "step": 28043 }, { "epoch": 0.36441898957675145, "grad_norm": 0.31487390398979187, "learning_rate": 0.00012714228154771964, "loss": 1.3752, "step": 28044 }, { "epoch": 0.36443198412066735, "grad_norm": 0.44216734170913696, "learning_rate": 0.00012713968208580823, "loss": 1.3929, "step": 28045 }, { "epoch": 0.3644449786645832, "grad_norm": 0.36743104457855225, "learning_rate": 0.00012713708262389686, "loss": 1.3501, "step": 28046 }, { "epoch": 0.3644579732084991, "grad_norm": 0.37534141540527344, "learning_rate": 0.00012713448316198548, "loss": 1.5126, "step": 28047 }, { "epoch": 0.36447096775241494, "grad_norm": 0.418866902589798, "learning_rate": 0.0001271318837000741, "loss": 1.3158, "step": 28048 }, { "epoch": 0.36448396229633084, "grad_norm": 0.37248414754867554, "learning_rate": 0.0001271292842381627, "loss": 1.3707, "step": 28049 }, { "epoch": 0.3644969568402467, "grad_norm": 0.3232172727584839, "learning_rate": 0.0001271266847762513, "loss": 1.2997, "step": 28050 }, { "epoch": 0.3645099513841626, "grad_norm": 0.4679691195487976, "learning_rate": 0.00012712408531433995, "loss": 1.4489, "step": 28051 }, { "epoch": 0.36452294592807843, "grad_norm": 0.4077663719654083, "learning_rate": 0.00012712148585242855, "loss": 1.3671, "step": 28052 }, { "epoch": 0.36453594047199434, "grad_norm": 0.4671263098716736, "learning_rate": 0.00012711888639051718, "loss": 1.3064, "step": 28053 }, { "epoch": 0.3645489350159102, "grad_norm": 0.37598350644111633, "learning_rate": 0.00012711628692860577, "loss": 1.505, "step": 28054 }, { "epoch": 0.3645619295598261, "grad_norm": 0.3588161766529083, "learning_rate": 0.0001271136874666944, "loss": 1.2779, "step": 28055 }, { "epoch": 0.3645749241037419, "grad_norm": 0.3823160231113434, "learning_rate": 0.00012711108800478302, "loss": 1.387, "step": 28056 }, { "epoch": 0.3645879186476578, "grad_norm": 0.4202607572078705, "learning_rate": 0.00012710848854287162, "loss": 1.3979, "step": 28057 }, { "epoch": 0.3646009131915737, "grad_norm": 0.5225715637207031, "learning_rate": 0.00012710588908096024, "loss": 1.4796, "step": 28058 }, { "epoch": 0.3646139077354896, "grad_norm": 0.30882301926612854, "learning_rate": 0.00012710328961904887, "loss": 1.4556, "step": 28059 }, { "epoch": 0.3646269022794054, "grad_norm": 0.3137337863445282, "learning_rate": 0.0001271006901571375, "loss": 1.3561, "step": 28060 }, { "epoch": 0.3646398968233213, "grad_norm": 0.4012397825717926, "learning_rate": 0.0001270980906952261, "loss": 1.377, "step": 28061 }, { "epoch": 0.36465289136723716, "grad_norm": 0.38306254148483276, "learning_rate": 0.0001270954912333147, "loss": 1.3023, "step": 28062 }, { "epoch": 0.36466588591115306, "grad_norm": 0.46783527731895447, "learning_rate": 0.00012709289177140334, "loss": 1.5391, "step": 28063 }, { "epoch": 0.3646788804550689, "grad_norm": 0.4088996648788452, "learning_rate": 0.00012709029230949194, "loss": 1.4238, "step": 28064 }, { "epoch": 0.3646918749989848, "grad_norm": 0.40283480286598206, "learning_rate": 0.00012708769284758056, "loss": 1.4434, "step": 28065 }, { "epoch": 0.36470486954290066, "grad_norm": 0.356254518032074, "learning_rate": 0.00012708509338566916, "loss": 1.4431, "step": 28066 }, { "epoch": 0.36471786408681656, "grad_norm": 0.4371912479400635, "learning_rate": 0.00012708249392375778, "loss": 1.3425, "step": 28067 }, { "epoch": 0.3647308586307324, "grad_norm": 0.3706740438938141, "learning_rate": 0.0001270798944618464, "loss": 1.384, "step": 28068 }, { "epoch": 0.3647438531746483, "grad_norm": 0.5011845827102661, "learning_rate": 0.000127077294999935, "loss": 1.4167, "step": 28069 }, { "epoch": 0.36475684771856415, "grad_norm": 0.3658675253391266, "learning_rate": 0.00012707469553802363, "loss": 1.313, "step": 28070 }, { "epoch": 0.36476984226248005, "grad_norm": 0.5029263496398926, "learning_rate": 0.00012707209607611225, "loss": 1.5346, "step": 28071 }, { "epoch": 0.3647828368063959, "grad_norm": 0.3849468231201172, "learning_rate": 0.00012706949661420088, "loss": 1.5605, "step": 28072 }, { "epoch": 0.3647958313503118, "grad_norm": 0.4321690797805786, "learning_rate": 0.00012706689715228948, "loss": 1.3432, "step": 28073 }, { "epoch": 0.36480882589422764, "grad_norm": 0.28547438979148865, "learning_rate": 0.00012706429769037807, "loss": 1.3526, "step": 28074 }, { "epoch": 0.36482182043814354, "grad_norm": 0.42576876282691956, "learning_rate": 0.00012706169822846673, "loss": 1.4022, "step": 28075 }, { "epoch": 0.3648348149820594, "grad_norm": 0.2830517888069153, "learning_rate": 0.00012705909876655532, "loss": 1.0827, "step": 28076 }, { "epoch": 0.3648478095259753, "grad_norm": 0.3930256962776184, "learning_rate": 0.00012705649930464395, "loss": 1.504, "step": 28077 }, { "epoch": 0.36486080406989113, "grad_norm": 0.3815311789512634, "learning_rate": 0.00012705389984273257, "loss": 1.4094, "step": 28078 }, { "epoch": 0.36487379861380703, "grad_norm": 0.31158214807510376, "learning_rate": 0.00012705130038082117, "loss": 1.2232, "step": 28079 }, { "epoch": 0.3648867931577229, "grad_norm": 0.36985287070274353, "learning_rate": 0.0001270487009189098, "loss": 1.4014, "step": 28080 }, { "epoch": 0.3648997877016388, "grad_norm": 0.36557716131210327, "learning_rate": 0.0001270461014569984, "loss": 1.3851, "step": 28081 }, { "epoch": 0.3649127822455546, "grad_norm": 0.4449576735496521, "learning_rate": 0.00012704350199508704, "loss": 1.4077, "step": 28082 }, { "epoch": 0.3649257767894705, "grad_norm": 0.3640066087245941, "learning_rate": 0.00012704090253317564, "loss": 1.2831, "step": 28083 }, { "epoch": 0.36493877133338637, "grad_norm": 0.36832019686698914, "learning_rate": 0.00012703830307126426, "loss": 1.449, "step": 28084 }, { "epoch": 0.36495176587730227, "grad_norm": 0.4077436923980713, "learning_rate": 0.00012703570360935286, "loss": 1.3886, "step": 28085 }, { "epoch": 0.3649647604212181, "grad_norm": 0.3969815969467163, "learning_rate": 0.00012703310414744149, "loss": 1.3984, "step": 28086 }, { "epoch": 0.364977754965134, "grad_norm": 0.43063801527023315, "learning_rate": 0.0001270305046855301, "loss": 1.3701, "step": 28087 }, { "epoch": 0.36499074950904986, "grad_norm": 0.42025133967399597, "learning_rate": 0.0001270279052236187, "loss": 1.5501, "step": 28088 }, { "epoch": 0.36500374405296576, "grad_norm": 0.4629644453525543, "learning_rate": 0.00012702530576170733, "loss": 1.4808, "step": 28089 }, { "epoch": 0.3650167385968816, "grad_norm": 0.3486928343772888, "learning_rate": 0.00012702270629979596, "loss": 1.4206, "step": 28090 }, { "epoch": 0.3650297331407975, "grad_norm": 0.3603616952896118, "learning_rate": 0.00012702010683788455, "loss": 1.5814, "step": 28091 }, { "epoch": 0.3650427276847134, "grad_norm": 0.37927836179733276, "learning_rate": 0.00012701750737597318, "loss": 1.2954, "step": 28092 }, { "epoch": 0.36505572222862925, "grad_norm": 0.44568052887916565, "learning_rate": 0.00012701490791406178, "loss": 1.3147, "step": 28093 }, { "epoch": 0.36506871677254515, "grad_norm": 0.4193950891494751, "learning_rate": 0.00012701230845215043, "loss": 1.3607, "step": 28094 }, { "epoch": 0.365081711316461, "grad_norm": 0.3798714876174927, "learning_rate": 0.00012700970899023903, "loss": 1.4552, "step": 28095 }, { "epoch": 0.3650947058603769, "grad_norm": 0.3370906710624695, "learning_rate": 0.00012700710952832765, "loss": 1.4651, "step": 28096 }, { "epoch": 0.36510770040429275, "grad_norm": 0.43046578764915466, "learning_rate": 0.00012700451006641625, "loss": 1.4425, "step": 28097 }, { "epoch": 0.36512069494820865, "grad_norm": 0.34880244731903076, "learning_rate": 0.00012700191060450487, "loss": 1.2982, "step": 28098 }, { "epoch": 0.3651336894921245, "grad_norm": 0.3743686079978943, "learning_rate": 0.0001269993111425935, "loss": 1.2331, "step": 28099 }, { "epoch": 0.3651466840360404, "grad_norm": 0.38713330030441284, "learning_rate": 0.0001269967116806821, "loss": 1.3827, "step": 28100 }, { "epoch": 0.36515967857995624, "grad_norm": 0.39366090297698975, "learning_rate": 0.00012699411221877072, "loss": 1.4139, "step": 28101 }, { "epoch": 0.36517267312387214, "grad_norm": 0.30771100521087646, "learning_rate": 0.00012699151275685934, "loss": 1.4529, "step": 28102 }, { "epoch": 0.365185667667788, "grad_norm": 0.33973583579063416, "learning_rate": 0.00012698891329494794, "loss": 1.4037, "step": 28103 }, { "epoch": 0.3651986622117039, "grad_norm": 0.46407073736190796, "learning_rate": 0.00012698631383303656, "loss": 1.4893, "step": 28104 }, { "epoch": 0.36521165675561973, "grad_norm": 0.37019267678260803, "learning_rate": 0.00012698371437112516, "loss": 1.4, "step": 28105 }, { "epoch": 0.36522465129953563, "grad_norm": 0.5243204236030579, "learning_rate": 0.0001269811149092138, "loss": 1.3632, "step": 28106 }, { "epoch": 0.3652376458434515, "grad_norm": 0.37223151326179504, "learning_rate": 0.0001269785154473024, "loss": 1.1576, "step": 28107 }, { "epoch": 0.3652506403873674, "grad_norm": 0.328691691160202, "learning_rate": 0.00012697591598539104, "loss": 1.6465, "step": 28108 }, { "epoch": 0.3652636349312832, "grad_norm": 0.39232245087623596, "learning_rate": 0.00012697331652347963, "loss": 1.4403, "step": 28109 }, { "epoch": 0.3652766294751991, "grad_norm": 0.4726991057395935, "learning_rate": 0.00012697071706156826, "loss": 1.3703, "step": 28110 }, { "epoch": 0.36528962401911497, "grad_norm": 0.4225909113883972, "learning_rate": 0.00012696811759965688, "loss": 1.3159, "step": 28111 }, { "epoch": 0.36530261856303087, "grad_norm": 0.49309974908828735, "learning_rate": 0.00012696551813774548, "loss": 1.4466, "step": 28112 }, { "epoch": 0.3653156131069467, "grad_norm": 0.39265117049217224, "learning_rate": 0.00012696291867583413, "loss": 1.3459, "step": 28113 }, { "epoch": 0.3653286076508626, "grad_norm": 0.32008376717567444, "learning_rate": 0.00012696031921392273, "loss": 1.3289, "step": 28114 }, { "epoch": 0.36534160219477846, "grad_norm": 0.378717839717865, "learning_rate": 0.00012695771975201135, "loss": 1.3664, "step": 28115 }, { "epoch": 0.36535459673869436, "grad_norm": 0.42208704352378845, "learning_rate": 0.00012695512029009995, "loss": 1.2614, "step": 28116 }, { "epoch": 0.3653675912826102, "grad_norm": 0.384769469499588, "learning_rate": 0.00012695252082818857, "loss": 1.29, "step": 28117 }, { "epoch": 0.3653805858265261, "grad_norm": 0.3501450717449188, "learning_rate": 0.0001269499213662772, "loss": 1.1789, "step": 28118 }, { "epoch": 0.36539358037044195, "grad_norm": 0.3689560890197754, "learning_rate": 0.0001269473219043658, "loss": 1.4202, "step": 28119 }, { "epoch": 0.36540657491435785, "grad_norm": 0.409323513507843, "learning_rate": 0.00012694472244245442, "loss": 1.4544, "step": 28120 }, { "epoch": 0.3654195694582737, "grad_norm": 0.3456791937351227, "learning_rate": 0.00012694212298054305, "loss": 1.3124, "step": 28121 }, { "epoch": 0.3654325640021896, "grad_norm": 0.4129564166069031, "learning_rate": 0.00012693952351863164, "loss": 1.2841, "step": 28122 }, { "epoch": 0.36544555854610544, "grad_norm": 0.36690765619277954, "learning_rate": 0.00012693692405672027, "loss": 1.2794, "step": 28123 }, { "epoch": 0.36545855309002134, "grad_norm": 0.4636576473712921, "learning_rate": 0.00012693432459480886, "loss": 1.3251, "step": 28124 }, { "epoch": 0.3654715476339372, "grad_norm": 0.3891012966632843, "learning_rate": 0.00012693172513289752, "loss": 1.4565, "step": 28125 }, { "epoch": 0.3654845421778531, "grad_norm": 0.32809528708457947, "learning_rate": 0.0001269291256709861, "loss": 1.3799, "step": 28126 }, { "epoch": 0.36549753672176893, "grad_norm": 0.42696109414100647, "learning_rate": 0.00012692652620907474, "loss": 1.3017, "step": 28127 }, { "epoch": 0.36551053126568483, "grad_norm": 0.2179551124572754, "learning_rate": 0.00012692392674716334, "loss": 1.1535, "step": 28128 }, { "epoch": 0.3655235258096007, "grad_norm": 0.4546618163585663, "learning_rate": 0.00012692132728525196, "loss": 1.2079, "step": 28129 }, { "epoch": 0.3655365203535166, "grad_norm": 0.44335225224494934, "learning_rate": 0.00012691872782334058, "loss": 1.6024, "step": 28130 }, { "epoch": 0.3655495148974324, "grad_norm": 0.6570788621902466, "learning_rate": 0.00012691612836142918, "loss": 1.4937, "step": 28131 }, { "epoch": 0.3655625094413483, "grad_norm": 0.44252750277519226, "learning_rate": 0.0001269135288995178, "loss": 1.3296, "step": 28132 }, { "epoch": 0.36557550398526417, "grad_norm": 0.43089139461517334, "learning_rate": 0.00012691092943760643, "loss": 1.5463, "step": 28133 }, { "epoch": 0.3655884985291801, "grad_norm": 0.33625930547714233, "learning_rate": 0.00012690832997569503, "loss": 1.2925, "step": 28134 }, { "epoch": 0.3656014930730959, "grad_norm": 0.3515777289867401, "learning_rate": 0.00012690573051378365, "loss": 1.582, "step": 28135 }, { "epoch": 0.3656144876170118, "grad_norm": 0.3245621919631958, "learning_rate": 0.00012690313105187225, "loss": 1.3586, "step": 28136 }, { "epoch": 0.36562748216092766, "grad_norm": 0.46025052666664124, "learning_rate": 0.0001269005315899609, "loss": 1.3535, "step": 28137 }, { "epoch": 0.36564047670484356, "grad_norm": 0.4299743175506592, "learning_rate": 0.0001268979321280495, "loss": 1.2106, "step": 28138 }, { "epoch": 0.3656534712487594, "grad_norm": 0.34638112783432007, "learning_rate": 0.00012689533266613812, "loss": 1.1963, "step": 28139 }, { "epoch": 0.3656664657926753, "grad_norm": 0.38650864362716675, "learning_rate": 0.00012689273320422672, "loss": 1.4405, "step": 28140 }, { "epoch": 0.36567946033659116, "grad_norm": 0.5232280492782593, "learning_rate": 0.00012689013374231535, "loss": 1.4255, "step": 28141 }, { "epoch": 0.36569245488050706, "grad_norm": 0.3474600613117218, "learning_rate": 0.00012688753428040397, "loss": 1.256, "step": 28142 }, { "epoch": 0.3657054494244229, "grad_norm": 0.4987822473049164, "learning_rate": 0.00012688493481849257, "loss": 1.392, "step": 28143 }, { "epoch": 0.3657184439683388, "grad_norm": 0.42552152276039124, "learning_rate": 0.0001268823353565812, "loss": 1.5524, "step": 28144 }, { "epoch": 0.36573143851225465, "grad_norm": 0.3580000400543213, "learning_rate": 0.00012687973589466982, "loss": 1.4081, "step": 28145 }, { "epoch": 0.36574443305617055, "grad_norm": 0.38259294629096985, "learning_rate": 0.0001268771364327584, "loss": 1.6565, "step": 28146 }, { "epoch": 0.3657574276000864, "grad_norm": 0.44206780195236206, "learning_rate": 0.00012687453697084704, "loss": 1.5425, "step": 28147 }, { "epoch": 0.3657704221440023, "grad_norm": 0.44376686215400696, "learning_rate": 0.00012687193750893564, "loss": 1.3856, "step": 28148 }, { "epoch": 0.36578341668791814, "grad_norm": 0.34192654490470886, "learning_rate": 0.0001268693380470243, "loss": 1.483, "step": 28149 }, { "epoch": 0.36579641123183404, "grad_norm": 0.3827860355377197, "learning_rate": 0.00012686673858511288, "loss": 1.3772, "step": 28150 }, { "epoch": 0.3658094057757499, "grad_norm": 0.42208024859428406, "learning_rate": 0.0001268641391232015, "loss": 1.3912, "step": 28151 }, { "epoch": 0.3658224003196658, "grad_norm": 0.430095374584198, "learning_rate": 0.00012686153966129013, "loss": 1.4129, "step": 28152 }, { "epoch": 0.36583539486358163, "grad_norm": 0.2927623391151428, "learning_rate": 0.00012685894019937873, "loss": 1.3213, "step": 28153 }, { "epoch": 0.36584838940749753, "grad_norm": 0.35218799114227295, "learning_rate": 0.00012685634073746735, "loss": 1.3798, "step": 28154 }, { "epoch": 0.3658613839514134, "grad_norm": 0.35474035143852234, "learning_rate": 0.00012685374127555595, "loss": 1.5253, "step": 28155 }, { "epoch": 0.3658743784953293, "grad_norm": 0.5100380778312683, "learning_rate": 0.0001268511418136446, "loss": 1.4388, "step": 28156 }, { "epoch": 0.3658873730392451, "grad_norm": 0.37747254967689514, "learning_rate": 0.0001268485423517332, "loss": 1.5742, "step": 28157 }, { "epoch": 0.365900367583161, "grad_norm": 0.3083898723125458, "learning_rate": 0.0001268459428898218, "loss": 1.3759, "step": 28158 }, { "epoch": 0.36591336212707687, "grad_norm": 0.373930960893631, "learning_rate": 0.00012684334342791042, "loss": 1.4642, "step": 28159 }, { "epoch": 0.36592635667099277, "grad_norm": 0.35953137278556824, "learning_rate": 0.00012684074396599905, "loss": 1.4968, "step": 28160 }, { "epoch": 0.3659393512149086, "grad_norm": 0.4357090890407562, "learning_rate": 0.00012683814450408767, "loss": 1.3088, "step": 28161 }, { "epoch": 0.3659523457588245, "grad_norm": 0.41514185070991516, "learning_rate": 0.00012683554504217627, "loss": 1.2275, "step": 28162 }, { "epoch": 0.36596534030274036, "grad_norm": 0.3018457293510437, "learning_rate": 0.0001268329455802649, "loss": 1.4676, "step": 28163 }, { "epoch": 0.36597833484665626, "grad_norm": 0.29435214400291443, "learning_rate": 0.00012683034611835352, "loss": 1.3255, "step": 28164 }, { "epoch": 0.3659913293905721, "grad_norm": 0.4148188531398773, "learning_rate": 0.00012682774665644212, "loss": 1.4706, "step": 28165 }, { "epoch": 0.366004323934488, "grad_norm": 0.4417075216770172, "learning_rate": 0.00012682514719453074, "loss": 1.3882, "step": 28166 }, { "epoch": 0.36601731847840385, "grad_norm": 0.2957364022731781, "learning_rate": 0.00012682254773261934, "loss": 1.2805, "step": 28167 }, { "epoch": 0.36603031302231975, "grad_norm": 0.39461666345596313, "learning_rate": 0.000126819948270708, "loss": 1.3663, "step": 28168 }, { "epoch": 0.36604330756623565, "grad_norm": 0.41828155517578125, "learning_rate": 0.0001268173488087966, "loss": 1.3407, "step": 28169 }, { "epoch": 0.3660563021101515, "grad_norm": 0.42200860381126404, "learning_rate": 0.0001268147493468852, "loss": 1.4452, "step": 28170 }, { "epoch": 0.3660692966540674, "grad_norm": 0.4721120595932007, "learning_rate": 0.0001268121498849738, "loss": 1.5777, "step": 28171 }, { "epoch": 0.36608229119798325, "grad_norm": 0.38554278016090393, "learning_rate": 0.00012680955042306243, "loss": 1.3583, "step": 28172 }, { "epoch": 0.36609528574189915, "grad_norm": 0.4189189076423645, "learning_rate": 0.00012680695096115106, "loss": 1.2832, "step": 28173 }, { "epoch": 0.366108280285815, "grad_norm": 0.38415658473968506, "learning_rate": 0.00012680435149923965, "loss": 1.4226, "step": 28174 }, { "epoch": 0.3661212748297309, "grad_norm": 0.48040997982025146, "learning_rate": 0.00012680175203732828, "loss": 1.5027, "step": 28175 }, { "epoch": 0.36613426937364674, "grad_norm": 0.43938133120536804, "learning_rate": 0.0001267991525754169, "loss": 1.4759, "step": 28176 }, { "epoch": 0.36614726391756264, "grad_norm": 0.4448304772377014, "learning_rate": 0.0001267965531135055, "loss": 1.437, "step": 28177 }, { "epoch": 0.3661602584614785, "grad_norm": 0.4525499939918518, "learning_rate": 0.00012679395365159413, "loss": 1.3965, "step": 28178 }, { "epoch": 0.3661732530053944, "grad_norm": 0.44312888383865356, "learning_rate": 0.00012679135418968272, "loss": 1.2919, "step": 28179 }, { "epoch": 0.36618624754931023, "grad_norm": 0.3774242699146271, "learning_rate": 0.00012678875472777137, "loss": 1.4792, "step": 28180 }, { "epoch": 0.36619924209322613, "grad_norm": 0.4145831763744354, "learning_rate": 0.00012678615526585997, "loss": 1.4226, "step": 28181 }, { "epoch": 0.366212236637142, "grad_norm": 0.4463750720024109, "learning_rate": 0.0001267835558039486, "loss": 1.4233, "step": 28182 }, { "epoch": 0.3662252311810579, "grad_norm": 0.4231424033641815, "learning_rate": 0.0001267809563420372, "loss": 1.4051, "step": 28183 }, { "epoch": 0.3662382257249737, "grad_norm": 0.40167200565338135, "learning_rate": 0.00012677835688012582, "loss": 1.5238, "step": 28184 }, { "epoch": 0.3662512202688896, "grad_norm": 0.5366488695144653, "learning_rate": 0.00012677575741821444, "loss": 1.3343, "step": 28185 }, { "epoch": 0.36626421481280547, "grad_norm": 0.4084893763065338, "learning_rate": 0.00012677315795630304, "loss": 1.3832, "step": 28186 }, { "epoch": 0.36627720935672137, "grad_norm": 0.35248279571533203, "learning_rate": 0.00012677055849439166, "loss": 1.488, "step": 28187 }, { "epoch": 0.3662902039006372, "grad_norm": 0.4183511435985565, "learning_rate": 0.0001267679590324803, "loss": 1.3968, "step": 28188 }, { "epoch": 0.3663031984445531, "grad_norm": 0.33337122201919556, "learning_rate": 0.0001267653595705689, "loss": 1.1879, "step": 28189 }, { "epoch": 0.36631619298846896, "grad_norm": 0.3395618200302124, "learning_rate": 0.0001267627601086575, "loss": 1.2035, "step": 28190 }, { "epoch": 0.36632918753238486, "grad_norm": 0.4477768540382385, "learning_rate": 0.00012676016064674614, "loss": 1.5642, "step": 28191 }, { "epoch": 0.3663421820763007, "grad_norm": 0.3394140899181366, "learning_rate": 0.00012675756118483476, "loss": 1.3381, "step": 28192 }, { "epoch": 0.3663551766202166, "grad_norm": 0.4839639663696289, "learning_rate": 0.00012675496172292336, "loss": 1.4964, "step": 28193 }, { "epoch": 0.36636817116413245, "grad_norm": 0.37405848503112793, "learning_rate": 0.00012675236226101198, "loss": 1.3034, "step": 28194 }, { "epoch": 0.36638116570804835, "grad_norm": 0.5386796593666077, "learning_rate": 0.0001267497627991006, "loss": 1.4253, "step": 28195 }, { "epoch": 0.3663941602519642, "grad_norm": 0.4870552718639374, "learning_rate": 0.0001267471633371892, "loss": 1.3083, "step": 28196 }, { "epoch": 0.3664071547958801, "grad_norm": 0.4764971137046814, "learning_rate": 0.00012674456387527783, "loss": 1.3278, "step": 28197 }, { "epoch": 0.36642014933979594, "grad_norm": 0.3245146572589874, "learning_rate": 0.00012674196441336643, "loss": 1.3507, "step": 28198 }, { "epoch": 0.36643314388371184, "grad_norm": 0.44167420268058777, "learning_rate": 0.00012673936495145508, "loss": 1.3914, "step": 28199 }, { "epoch": 0.3664461384276277, "grad_norm": 0.4022807776927948, "learning_rate": 0.00012673676548954367, "loss": 1.316, "step": 28200 }, { "epoch": 0.3664591329715436, "grad_norm": 0.3878355026245117, "learning_rate": 0.00012673416602763227, "loss": 1.3399, "step": 28201 }, { "epoch": 0.36647212751545943, "grad_norm": 0.3598688244819641, "learning_rate": 0.0001267315665657209, "loss": 1.383, "step": 28202 }, { "epoch": 0.36648512205937533, "grad_norm": 0.400646448135376, "learning_rate": 0.00012672896710380952, "loss": 1.4203, "step": 28203 }, { "epoch": 0.3664981166032912, "grad_norm": 0.430014431476593, "learning_rate": 0.00012672636764189815, "loss": 1.4413, "step": 28204 }, { "epoch": 0.3665111111472071, "grad_norm": 0.3218499720096588, "learning_rate": 0.00012672376817998674, "loss": 1.2877, "step": 28205 }, { "epoch": 0.3665241056911229, "grad_norm": 0.40727245807647705, "learning_rate": 0.00012672116871807537, "loss": 1.2765, "step": 28206 }, { "epoch": 0.3665371002350388, "grad_norm": 0.3618388772010803, "learning_rate": 0.000126718569256164, "loss": 1.4602, "step": 28207 }, { "epoch": 0.36655009477895467, "grad_norm": 0.41891559958457947, "learning_rate": 0.0001267159697942526, "loss": 1.5062, "step": 28208 }, { "epoch": 0.36656308932287057, "grad_norm": 0.39407289028167725, "learning_rate": 0.00012671337033234121, "loss": 1.5623, "step": 28209 }, { "epoch": 0.3665760838667864, "grad_norm": 0.3864799439907074, "learning_rate": 0.0001267107708704298, "loss": 1.4045, "step": 28210 }, { "epoch": 0.3665890784107023, "grad_norm": 0.3171851336956024, "learning_rate": 0.00012670817140851846, "loss": 1.2591, "step": 28211 }, { "epoch": 0.36660207295461816, "grad_norm": 0.4939090609550476, "learning_rate": 0.00012670557194660706, "loss": 1.4209, "step": 28212 }, { "epoch": 0.36661506749853406, "grad_norm": 0.383779913187027, "learning_rate": 0.00012670297248469566, "loss": 1.2718, "step": 28213 }, { "epoch": 0.3666280620424499, "grad_norm": 0.4203174114227295, "learning_rate": 0.00012670037302278428, "loss": 1.43, "step": 28214 }, { "epoch": 0.3666410565863658, "grad_norm": 0.4474756121635437, "learning_rate": 0.0001266977735608729, "loss": 1.4156, "step": 28215 }, { "epoch": 0.36665405113028166, "grad_norm": 0.3777061402797699, "learning_rate": 0.00012669517409896153, "loss": 1.45, "step": 28216 }, { "epoch": 0.36666704567419756, "grad_norm": 0.3842798173427582, "learning_rate": 0.00012669257463705013, "loss": 1.3492, "step": 28217 }, { "epoch": 0.3666800402181134, "grad_norm": 0.34121498465538025, "learning_rate": 0.00012668997517513875, "loss": 1.3476, "step": 28218 }, { "epoch": 0.3666930347620293, "grad_norm": 0.4007270634174347, "learning_rate": 0.00012668737571322738, "loss": 1.3771, "step": 28219 }, { "epoch": 0.36670602930594515, "grad_norm": 0.4401649534702301, "learning_rate": 0.00012668477625131597, "loss": 1.4885, "step": 28220 }, { "epoch": 0.36671902384986105, "grad_norm": 0.4479008913040161, "learning_rate": 0.0001266821767894046, "loss": 1.3543, "step": 28221 }, { "epoch": 0.3667320183937769, "grad_norm": 0.3135763108730316, "learning_rate": 0.0001266795773274932, "loss": 1.3169, "step": 28222 }, { "epoch": 0.3667450129376928, "grad_norm": 0.3499266803264618, "learning_rate": 0.00012667697786558185, "loss": 1.3856, "step": 28223 }, { "epoch": 0.36675800748160864, "grad_norm": 0.3431118428707123, "learning_rate": 0.00012667437840367045, "loss": 1.4253, "step": 28224 }, { "epoch": 0.36677100202552454, "grad_norm": 0.3204564154148102, "learning_rate": 0.00012667177894175904, "loss": 1.2607, "step": 28225 }, { "epoch": 0.3667839965694404, "grad_norm": 0.31765884160995483, "learning_rate": 0.0001266691794798477, "loss": 1.2611, "step": 28226 }, { "epoch": 0.3667969911133563, "grad_norm": 0.38403427600860596, "learning_rate": 0.0001266665800179363, "loss": 1.3393, "step": 28227 }, { "epoch": 0.36680998565727213, "grad_norm": 0.3300393521785736, "learning_rate": 0.00012666398055602492, "loss": 1.1441, "step": 28228 }, { "epoch": 0.36682298020118803, "grad_norm": 0.45865610241889954, "learning_rate": 0.00012666138109411351, "loss": 1.536, "step": 28229 }, { "epoch": 0.3668359747451039, "grad_norm": 0.3928692042827606, "learning_rate": 0.00012665878163220214, "loss": 1.5818, "step": 28230 }, { "epoch": 0.3668489692890198, "grad_norm": 0.371881902217865, "learning_rate": 0.00012665618217029076, "loss": 1.3133, "step": 28231 }, { "epoch": 0.3668619638329356, "grad_norm": 0.3203613758087158, "learning_rate": 0.00012665358270837936, "loss": 1.4208, "step": 28232 }, { "epoch": 0.3668749583768515, "grad_norm": 0.3743828237056732, "learning_rate": 0.00012665098324646798, "loss": 1.3392, "step": 28233 }, { "epoch": 0.36688795292076737, "grad_norm": 0.400187224149704, "learning_rate": 0.0001266483837845566, "loss": 1.4545, "step": 28234 }, { "epoch": 0.36690094746468327, "grad_norm": 0.37582534551620483, "learning_rate": 0.00012664578432264523, "loss": 1.455, "step": 28235 }, { "epoch": 0.3669139420085991, "grad_norm": 0.3804473876953125, "learning_rate": 0.00012664318486073383, "loss": 1.4632, "step": 28236 }, { "epoch": 0.366926936552515, "grad_norm": 0.37541574239730835, "learning_rate": 0.00012664058539882246, "loss": 1.3594, "step": 28237 }, { "epoch": 0.36693993109643086, "grad_norm": 0.38975194096565247, "learning_rate": 0.00012663798593691108, "loss": 1.5815, "step": 28238 }, { "epoch": 0.36695292564034676, "grad_norm": 0.34498849511146545, "learning_rate": 0.00012663538647499968, "loss": 1.3103, "step": 28239 }, { "epoch": 0.3669659201842626, "grad_norm": 0.33500340580940247, "learning_rate": 0.0001266327870130883, "loss": 1.2509, "step": 28240 }, { "epoch": 0.3669789147281785, "grad_norm": 0.3520366847515106, "learning_rate": 0.0001266301875511769, "loss": 1.4546, "step": 28241 }, { "epoch": 0.36699190927209435, "grad_norm": 0.4794207811355591, "learning_rate": 0.00012662758808926552, "loss": 1.4834, "step": 28242 }, { "epoch": 0.36700490381601025, "grad_norm": 0.4109174311161041, "learning_rate": 0.00012662498862735415, "loss": 1.3443, "step": 28243 }, { "epoch": 0.36701789835992615, "grad_norm": 0.4426645338535309, "learning_rate": 0.00012662238916544275, "loss": 1.4635, "step": 28244 }, { "epoch": 0.367030892903842, "grad_norm": 0.42775824666023254, "learning_rate": 0.00012661978970353137, "loss": 1.2225, "step": 28245 }, { "epoch": 0.3670438874477579, "grad_norm": 0.31784045696258545, "learning_rate": 0.00012661719024162, "loss": 1.1861, "step": 28246 }, { "epoch": 0.36705688199167374, "grad_norm": 0.38443872332572937, "learning_rate": 0.00012661459077970862, "loss": 1.2705, "step": 28247 }, { "epoch": 0.36706987653558965, "grad_norm": 0.3345561921596527, "learning_rate": 0.00012661199131779722, "loss": 1.1758, "step": 28248 }, { "epoch": 0.3670828710795055, "grad_norm": 0.3287811279296875, "learning_rate": 0.00012660939185588584, "loss": 1.3391, "step": 28249 }, { "epoch": 0.3670958656234214, "grad_norm": 0.3764609098434448, "learning_rate": 0.00012660679239397447, "loss": 1.3094, "step": 28250 }, { "epoch": 0.36710886016733724, "grad_norm": 0.5600670576095581, "learning_rate": 0.00012660419293206306, "loss": 1.443, "step": 28251 }, { "epoch": 0.36712185471125314, "grad_norm": 0.3201703131198883, "learning_rate": 0.0001266015934701517, "loss": 1.5131, "step": 28252 }, { "epoch": 0.367134849255169, "grad_norm": 0.514254093170166, "learning_rate": 0.00012659899400824028, "loss": 1.4315, "step": 28253 }, { "epoch": 0.3671478437990849, "grad_norm": 0.5076304078102112, "learning_rate": 0.00012659639454632894, "loss": 1.4243, "step": 28254 }, { "epoch": 0.36716083834300073, "grad_norm": 0.38884350657463074, "learning_rate": 0.00012659379508441753, "loss": 1.375, "step": 28255 }, { "epoch": 0.36717383288691663, "grad_norm": 0.46446534991264343, "learning_rate": 0.00012659119562250613, "loss": 1.5978, "step": 28256 }, { "epoch": 0.3671868274308325, "grad_norm": 0.39796850085258484, "learning_rate": 0.00012658859616059476, "loss": 1.355, "step": 28257 }, { "epoch": 0.3671998219747484, "grad_norm": 0.42515382170677185, "learning_rate": 0.00012658599669868338, "loss": 1.4486, "step": 28258 }, { "epoch": 0.3672128165186642, "grad_norm": 0.46917420625686646, "learning_rate": 0.000126583397236772, "loss": 1.6279, "step": 28259 }, { "epoch": 0.3672258110625801, "grad_norm": 0.4049796164035797, "learning_rate": 0.0001265807977748606, "loss": 1.1622, "step": 28260 }, { "epoch": 0.36723880560649597, "grad_norm": 0.5218256115913391, "learning_rate": 0.00012657819831294923, "loss": 1.41, "step": 28261 }, { "epoch": 0.36725180015041187, "grad_norm": 0.40468713641166687, "learning_rate": 0.00012657559885103785, "loss": 1.2532, "step": 28262 }, { "epoch": 0.3672647946943277, "grad_norm": 0.34589505195617676, "learning_rate": 0.00012657299938912645, "loss": 1.2171, "step": 28263 }, { "epoch": 0.3672777892382436, "grad_norm": 0.3997263014316559, "learning_rate": 0.00012657039992721507, "loss": 1.4543, "step": 28264 }, { "epoch": 0.36729078378215946, "grad_norm": 0.4658871591091156, "learning_rate": 0.0001265678004653037, "loss": 1.3436, "step": 28265 }, { "epoch": 0.36730377832607536, "grad_norm": 0.38897818326950073, "learning_rate": 0.00012656520100339232, "loss": 1.3865, "step": 28266 }, { "epoch": 0.3673167728699912, "grad_norm": 0.3992270827293396, "learning_rate": 0.00012656260154148092, "loss": 1.4514, "step": 28267 }, { "epoch": 0.3673297674139071, "grad_norm": 0.3155049979686737, "learning_rate": 0.00012656000207956952, "loss": 1.349, "step": 28268 }, { "epoch": 0.36734276195782295, "grad_norm": 0.3793800473213196, "learning_rate": 0.00012655740261765817, "loss": 1.1923, "step": 28269 }, { "epoch": 0.36735575650173885, "grad_norm": 0.3289342522621155, "learning_rate": 0.00012655480315574677, "loss": 1.4689, "step": 28270 }, { "epoch": 0.3673687510456547, "grad_norm": 0.4585122764110565, "learning_rate": 0.0001265522036938354, "loss": 1.4682, "step": 28271 }, { "epoch": 0.3673817455895706, "grad_norm": 0.5174217820167542, "learning_rate": 0.000126549604231924, "loss": 1.3983, "step": 28272 }, { "epoch": 0.36739474013348644, "grad_norm": 0.38904857635498047, "learning_rate": 0.0001265470047700126, "loss": 1.5605, "step": 28273 }, { "epoch": 0.36740773467740234, "grad_norm": 0.4413037896156311, "learning_rate": 0.00012654440530810124, "loss": 1.4244, "step": 28274 }, { "epoch": 0.3674207292213182, "grad_norm": 0.3603969216346741, "learning_rate": 0.00012654180584618983, "loss": 1.4262, "step": 28275 }, { "epoch": 0.3674337237652341, "grad_norm": 0.4461103677749634, "learning_rate": 0.00012653920638427846, "loss": 1.4628, "step": 28276 }, { "epoch": 0.36744671830914993, "grad_norm": 0.3080480694770813, "learning_rate": 0.00012653660692236708, "loss": 1.49, "step": 28277 }, { "epoch": 0.36745971285306583, "grad_norm": 0.3298843204975128, "learning_rate": 0.0001265340074604557, "loss": 1.3883, "step": 28278 }, { "epoch": 0.3674727073969817, "grad_norm": 0.35455086827278137, "learning_rate": 0.0001265314079985443, "loss": 1.4008, "step": 28279 }, { "epoch": 0.3674857019408976, "grad_norm": 0.4058314263820648, "learning_rate": 0.0001265288085366329, "loss": 1.4439, "step": 28280 }, { "epoch": 0.3674986964848134, "grad_norm": 0.5180437564849854, "learning_rate": 0.00012652620907472155, "loss": 1.4467, "step": 28281 }, { "epoch": 0.3675116910287293, "grad_norm": 0.25658056139945984, "learning_rate": 0.00012652360961281015, "loss": 1.4205, "step": 28282 }, { "epoch": 0.36752468557264517, "grad_norm": 0.402537077665329, "learning_rate": 0.00012652101015089877, "loss": 1.3648, "step": 28283 }, { "epoch": 0.36753768011656107, "grad_norm": 0.4145286977291107, "learning_rate": 0.00012651841068898737, "loss": 1.3977, "step": 28284 }, { "epoch": 0.3675506746604769, "grad_norm": 0.31924372911453247, "learning_rate": 0.000126515811227076, "loss": 1.5362, "step": 28285 }, { "epoch": 0.3675636692043928, "grad_norm": 0.38005587458610535, "learning_rate": 0.00012651321176516462, "loss": 1.3518, "step": 28286 }, { "epoch": 0.36757666374830866, "grad_norm": 0.5064309239387512, "learning_rate": 0.00012651061230325322, "loss": 1.3775, "step": 28287 }, { "epoch": 0.36758965829222456, "grad_norm": 0.43269461393356323, "learning_rate": 0.00012650801284134184, "loss": 1.4696, "step": 28288 }, { "epoch": 0.3676026528361404, "grad_norm": 0.38758280873298645, "learning_rate": 0.00012650541337943047, "loss": 1.2844, "step": 28289 }, { "epoch": 0.3676156473800563, "grad_norm": 0.3623393476009369, "learning_rate": 0.0001265028139175191, "loss": 1.4764, "step": 28290 }, { "epoch": 0.36762864192397215, "grad_norm": 0.4177338778972626, "learning_rate": 0.0001265002144556077, "loss": 1.5634, "step": 28291 }, { "epoch": 0.36764163646788806, "grad_norm": 0.4605614244937897, "learning_rate": 0.00012649761499369631, "loss": 1.3638, "step": 28292 }, { "epoch": 0.3676546310118039, "grad_norm": 0.3411669135093689, "learning_rate": 0.00012649501553178494, "loss": 1.3509, "step": 28293 }, { "epoch": 0.3676676255557198, "grad_norm": 0.43192151188850403, "learning_rate": 0.00012649241606987354, "loss": 1.4854, "step": 28294 }, { "epoch": 0.36768062009963565, "grad_norm": 0.49286994338035583, "learning_rate": 0.00012648981660796216, "loss": 1.5278, "step": 28295 }, { "epoch": 0.36769361464355155, "grad_norm": 0.46842652559280396, "learning_rate": 0.00012648721714605076, "loss": 1.5172, "step": 28296 }, { "epoch": 0.3677066091874674, "grad_norm": 0.3526531755924225, "learning_rate": 0.00012648461768413938, "loss": 1.3694, "step": 28297 }, { "epoch": 0.3677196037313833, "grad_norm": 0.46419310569763184, "learning_rate": 0.000126482018222228, "loss": 1.3507, "step": 28298 }, { "epoch": 0.36773259827529914, "grad_norm": 0.38507261872291565, "learning_rate": 0.0001264794187603166, "loss": 1.4583, "step": 28299 }, { "epoch": 0.36774559281921504, "grad_norm": 0.5182105302810669, "learning_rate": 0.00012647681929840526, "loss": 1.4289, "step": 28300 }, { "epoch": 0.3677585873631309, "grad_norm": 0.3965875804424286, "learning_rate": 0.00012647421983649385, "loss": 1.3298, "step": 28301 }, { "epoch": 0.3677715819070468, "grad_norm": 0.31373509764671326, "learning_rate": 0.00012647162037458248, "loss": 1.2522, "step": 28302 }, { "epoch": 0.36778457645096263, "grad_norm": 0.4045376777648926, "learning_rate": 0.00012646902091267107, "loss": 1.4169, "step": 28303 }, { "epoch": 0.36779757099487853, "grad_norm": 0.4659777581691742, "learning_rate": 0.0001264664214507597, "loss": 1.5066, "step": 28304 }, { "epoch": 0.3678105655387944, "grad_norm": 0.4321700930595398, "learning_rate": 0.00012646382198884832, "loss": 1.3231, "step": 28305 }, { "epoch": 0.3678235600827103, "grad_norm": 0.30213743448257446, "learning_rate": 0.00012646122252693692, "loss": 1.4256, "step": 28306 }, { "epoch": 0.3678365546266261, "grad_norm": 0.4156777560710907, "learning_rate": 0.00012645862306502555, "loss": 1.4331, "step": 28307 }, { "epoch": 0.367849549170542, "grad_norm": 0.6103479862213135, "learning_rate": 0.00012645602360311417, "loss": 1.6721, "step": 28308 }, { "epoch": 0.36786254371445787, "grad_norm": 0.4632102847099304, "learning_rate": 0.00012645342414120277, "loss": 1.4686, "step": 28309 }, { "epoch": 0.36787553825837377, "grad_norm": 0.3962036073207855, "learning_rate": 0.0001264508246792914, "loss": 1.488, "step": 28310 }, { "epoch": 0.3678885328022896, "grad_norm": 0.39585331082344055, "learning_rate": 0.00012644822521738, "loss": 1.3485, "step": 28311 }, { "epoch": 0.3679015273462055, "grad_norm": 0.33448514342308044, "learning_rate": 0.00012644562575546864, "loss": 1.2157, "step": 28312 }, { "epoch": 0.36791452189012136, "grad_norm": 0.46917641162872314, "learning_rate": 0.00012644302629355724, "loss": 1.2645, "step": 28313 }, { "epoch": 0.36792751643403726, "grad_norm": 0.30787110328674316, "learning_rate": 0.00012644042683164586, "loss": 1.2682, "step": 28314 }, { "epoch": 0.3679405109779531, "grad_norm": 0.47201284766197205, "learning_rate": 0.00012643782736973446, "loss": 1.3786, "step": 28315 }, { "epoch": 0.367953505521869, "grad_norm": 0.4249972999095917, "learning_rate": 0.00012643522790782308, "loss": 1.3464, "step": 28316 }, { "epoch": 0.36796650006578485, "grad_norm": 0.29537349939346313, "learning_rate": 0.0001264326284459117, "loss": 1.4019, "step": 28317 }, { "epoch": 0.36797949460970075, "grad_norm": 0.3659409284591675, "learning_rate": 0.0001264300289840003, "loss": 1.3854, "step": 28318 }, { "epoch": 0.3679924891536166, "grad_norm": 0.4648875892162323, "learning_rate": 0.00012642742952208893, "loss": 1.385, "step": 28319 }, { "epoch": 0.3680054836975325, "grad_norm": 0.48431596159935, "learning_rate": 0.00012642483006017756, "loss": 1.6043, "step": 28320 }, { "epoch": 0.3680184782414484, "grad_norm": 0.47812896966934204, "learning_rate": 0.00012642223059826618, "loss": 1.5167, "step": 28321 }, { "epoch": 0.36803147278536424, "grad_norm": 0.4868755340576172, "learning_rate": 0.00012641963113635478, "loss": 1.5123, "step": 28322 }, { "epoch": 0.36804446732928014, "grad_norm": 0.398409366607666, "learning_rate": 0.00012641703167444337, "loss": 1.4557, "step": 28323 }, { "epoch": 0.368057461873196, "grad_norm": 0.4370778203010559, "learning_rate": 0.00012641443221253203, "loss": 1.5169, "step": 28324 }, { "epoch": 0.3680704564171119, "grad_norm": 0.3462732434272766, "learning_rate": 0.00012641183275062062, "loss": 1.6015, "step": 28325 }, { "epoch": 0.36808345096102774, "grad_norm": 0.41952386498451233, "learning_rate": 0.00012640923328870925, "loss": 1.3269, "step": 28326 }, { "epoch": 0.36809644550494364, "grad_norm": 0.4484606385231018, "learning_rate": 0.00012640663382679785, "loss": 1.5717, "step": 28327 }, { "epoch": 0.3681094400488595, "grad_norm": 0.3657273054122925, "learning_rate": 0.00012640403436488647, "loss": 1.4062, "step": 28328 }, { "epoch": 0.3681224345927754, "grad_norm": 0.5000094771385193, "learning_rate": 0.0001264014349029751, "loss": 1.3152, "step": 28329 }, { "epoch": 0.36813542913669123, "grad_norm": 0.4084291458129883, "learning_rate": 0.0001263988354410637, "loss": 1.5985, "step": 28330 }, { "epoch": 0.36814842368060713, "grad_norm": 0.437229722738266, "learning_rate": 0.00012639623597915232, "loss": 1.3473, "step": 28331 }, { "epoch": 0.368161418224523, "grad_norm": 0.3651241958141327, "learning_rate": 0.00012639363651724094, "loss": 1.2269, "step": 28332 }, { "epoch": 0.3681744127684389, "grad_norm": 0.42561301589012146, "learning_rate": 0.00012639103705532957, "loss": 1.3654, "step": 28333 }, { "epoch": 0.3681874073123547, "grad_norm": 0.394216924905777, "learning_rate": 0.00012638843759341816, "loss": 1.3629, "step": 28334 }, { "epoch": 0.3682004018562706, "grad_norm": 0.40950807929039, "learning_rate": 0.0001263858381315068, "loss": 1.5047, "step": 28335 }, { "epoch": 0.36821339640018647, "grad_norm": 0.27787330746650696, "learning_rate": 0.0001263832386695954, "loss": 1.4821, "step": 28336 }, { "epoch": 0.36822639094410237, "grad_norm": 0.3892754018306732, "learning_rate": 0.000126380639207684, "loss": 1.3792, "step": 28337 }, { "epoch": 0.3682393854880182, "grad_norm": 0.38460496068000793, "learning_rate": 0.00012637803974577263, "loss": 1.3473, "step": 28338 }, { "epoch": 0.3682523800319341, "grad_norm": 0.44265082478523254, "learning_rate": 0.00012637544028386126, "loss": 1.361, "step": 28339 }, { "epoch": 0.36826537457584996, "grad_norm": 0.4281325936317444, "learning_rate": 0.00012637284082194986, "loss": 1.4208, "step": 28340 }, { "epoch": 0.36827836911976586, "grad_norm": 0.4138733446598053, "learning_rate": 0.00012637024136003848, "loss": 1.4761, "step": 28341 }, { "epoch": 0.3682913636636817, "grad_norm": 0.405500590801239, "learning_rate": 0.00012636764189812708, "loss": 1.4006, "step": 28342 }, { "epoch": 0.3683043582075976, "grad_norm": 0.39190343022346497, "learning_rate": 0.00012636504243621573, "loss": 1.2992, "step": 28343 }, { "epoch": 0.36831735275151345, "grad_norm": 0.47981125116348267, "learning_rate": 0.00012636244297430433, "loss": 1.49, "step": 28344 }, { "epoch": 0.36833034729542935, "grad_norm": 0.40345466136932373, "learning_rate": 0.00012635984351239295, "loss": 1.3537, "step": 28345 }, { "epoch": 0.3683433418393452, "grad_norm": 0.3976694941520691, "learning_rate": 0.00012635724405048155, "loss": 1.5583, "step": 28346 }, { "epoch": 0.3683563363832611, "grad_norm": 0.31501248478889465, "learning_rate": 0.00012635464458857017, "loss": 1.3989, "step": 28347 }, { "epoch": 0.36836933092717694, "grad_norm": 0.45425599813461304, "learning_rate": 0.0001263520451266588, "loss": 1.4561, "step": 28348 }, { "epoch": 0.36838232547109284, "grad_norm": 0.3244284987449646, "learning_rate": 0.0001263494456647474, "loss": 1.2902, "step": 28349 }, { "epoch": 0.3683953200150087, "grad_norm": 0.5544261336326599, "learning_rate": 0.00012634684620283602, "loss": 1.4324, "step": 28350 }, { "epoch": 0.3684083145589246, "grad_norm": 0.37385526299476624, "learning_rate": 0.00012634424674092464, "loss": 1.4953, "step": 28351 }, { "epoch": 0.36842130910284043, "grad_norm": 0.3878785967826843, "learning_rate": 0.00012634164727901324, "loss": 1.2415, "step": 28352 }, { "epoch": 0.36843430364675633, "grad_norm": 0.32700225710868835, "learning_rate": 0.00012633904781710187, "loss": 1.2772, "step": 28353 }, { "epoch": 0.3684472981906722, "grad_norm": 0.43131348490715027, "learning_rate": 0.00012633644835519046, "loss": 1.3612, "step": 28354 }, { "epoch": 0.3684602927345881, "grad_norm": 0.36507701873779297, "learning_rate": 0.00012633384889327911, "loss": 1.4487, "step": 28355 }, { "epoch": 0.3684732872785039, "grad_norm": 0.4436410367488861, "learning_rate": 0.0001263312494313677, "loss": 1.5529, "step": 28356 }, { "epoch": 0.3684862818224198, "grad_norm": 0.5941357612609863, "learning_rate": 0.00012632864996945634, "loss": 1.5078, "step": 28357 }, { "epoch": 0.36849927636633567, "grad_norm": 0.5877930521965027, "learning_rate": 0.00012632605050754493, "loss": 1.3498, "step": 28358 }, { "epoch": 0.36851227091025157, "grad_norm": 0.4041191339492798, "learning_rate": 0.00012632345104563356, "loss": 1.4086, "step": 28359 }, { "epoch": 0.3685252654541674, "grad_norm": 0.3810862600803375, "learning_rate": 0.00012632085158372218, "loss": 1.4334, "step": 28360 }, { "epoch": 0.3685382599980833, "grad_norm": 0.4194347560405731, "learning_rate": 0.00012631825212181078, "loss": 1.3721, "step": 28361 }, { "epoch": 0.36855125454199916, "grad_norm": 0.30258432030677795, "learning_rate": 0.0001263156526598994, "loss": 1.2923, "step": 28362 }, { "epoch": 0.36856424908591506, "grad_norm": 0.44581466913223267, "learning_rate": 0.00012631305319798803, "loss": 1.3805, "step": 28363 }, { "epoch": 0.3685772436298309, "grad_norm": 0.4554748237133026, "learning_rate": 0.00012631045373607663, "loss": 1.4398, "step": 28364 }, { "epoch": 0.3685902381737468, "grad_norm": 0.4317418038845062, "learning_rate": 0.00012630785427416525, "loss": 1.4953, "step": 28365 }, { "epoch": 0.36860323271766265, "grad_norm": 0.3941706418991089, "learning_rate": 0.00012630525481225385, "loss": 1.2766, "step": 28366 }, { "epoch": 0.36861622726157856, "grad_norm": 0.48049888014793396, "learning_rate": 0.0001263026553503425, "loss": 1.4382, "step": 28367 }, { "epoch": 0.3686292218054944, "grad_norm": 0.4745429754257202, "learning_rate": 0.0001263000558884311, "loss": 1.3276, "step": 28368 }, { "epoch": 0.3686422163494103, "grad_norm": 0.5584399104118347, "learning_rate": 0.00012629745642651972, "loss": 1.4464, "step": 28369 }, { "epoch": 0.36865521089332615, "grad_norm": 0.2976830303668976, "learning_rate": 0.00012629485696460832, "loss": 1.2346, "step": 28370 }, { "epoch": 0.36866820543724205, "grad_norm": 0.44537925720214844, "learning_rate": 0.00012629225750269694, "loss": 1.3879, "step": 28371 }, { "epoch": 0.3686811999811579, "grad_norm": 0.4536598324775696, "learning_rate": 0.00012628965804078557, "loss": 1.4492, "step": 28372 }, { "epoch": 0.3686941945250738, "grad_norm": 0.41047587990760803, "learning_rate": 0.00012628705857887417, "loss": 1.5961, "step": 28373 }, { "epoch": 0.36870718906898964, "grad_norm": 0.4140320420265198, "learning_rate": 0.00012628445911696282, "loss": 1.2937, "step": 28374 }, { "epoch": 0.36872018361290554, "grad_norm": 0.4037458598613739, "learning_rate": 0.00012628185965505141, "loss": 1.5198, "step": 28375 }, { "epoch": 0.3687331781568214, "grad_norm": 0.3343091905117035, "learning_rate": 0.00012627926019314004, "loss": 1.253, "step": 28376 }, { "epoch": 0.3687461727007373, "grad_norm": 0.34510770440101624, "learning_rate": 0.00012627666073122864, "loss": 1.5007, "step": 28377 }, { "epoch": 0.36875916724465313, "grad_norm": 0.47452855110168457, "learning_rate": 0.00012627406126931726, "loss": 1.436, "step": 28378 }, { "epoch": 0.36877216178856903, "grad_norm": 0.3934592306613922, "learning_rate": 0.00012627146180740589, "loss": 1.2946, "step": 28379 }, { "epoch": 0.3687851563324849, "grad_norm": 0.44692307710647583, "learning_rate": 0.00012626886234549448, "loss": 1.3827, "step": 28380 }, { "epoch": 0.3687981508764008, "grad_norm": 0.277104914188385, "learning_rate": 0.0001262662628835831, "loss": 1.3933, "step": 28381 }, { "epoch": 0.3688111454203166, "grad_norm": 0.31293246150016785, "learning_rate": 0.00012626366342167173, "loss": 1.3516, "step": 28382 }, { "epoch": 0.3688241399642325, "grad_norm": 0.38781681656837463, "learning_rate": 0.00012626106395976033, "loss": 1.1197, "step": 28383 }, { "epoch": 0.36883713450814837, "grad_norm": 0.3892761766910553, "learning_rate": 0.00012625846449784895, "loss": 1.376, "step": 28384 }, { "epoch": 0.36885012905206427, "grad_norm": 0.4109431207180023, "learning_rate": 0.00012625586503593755, "loss": 1.3412, "step": 28385 }, { "epoch": 0.3688631235959801, "grad_norm": 0.4051346182823181, "learning_rate": 0.0001262532655740262, "loss": 1.4347, "step": 28386 }, { "epoch": 0.368876118139896, "grad_norm": 0.43324047327041626, "learning_rate": 0.0001262506661121148, "loss": 1.4616, "step": 28387 }, { "epoch": 0.36888911268381186, "grad_norm": 0.44072192907333374, "learning_rate": 0.00012624806665020342, "loss": 1.2729, "step": 28388 }, { "epoch": 0.36890210722772776, "grad_norm": 0.5031135678291321, "learning_rate": 0.00012624546718829202, "loss": 1.4062, "step": 28389 }, { "epoch": 0.3689151017716436, "grad_norm": 0.31822875142097473, "learning_rate": 0.00012624286772638065, "loss": 1.3635, "step": 28390 }, { "epoch": 0.3689280963155595, "grad_norm": 0.37226349115371704, "learning_rate": 0.00012624026826446927, "loss": 1.2548, "step": 28391 }, { "epoch": 0.36894109085947535, "grad_norm": 0.426704466342926, "learning_rate": 0.00012623766880255787, "loss": 1.2026, "step": 28392 }, { "epoch": 0.36895408540339125, "grad_norm": 0.36013510823249817, "learning_rate": 0.0001262350693406465, "loss": 1.3736, "step": 28393 }, { "epoch": 0.3689670799473071, "grad_norm": 0.4186386466026306, "learning_rate": 0.00012623246987873512, "loss": 1.5031, "step": 28394 }, { "epoch": 0.368980074491223, "grad_norm": 0.38532984256744385, "learning_rate": 0.00012622987041682371, "loss": 1.3806, "step": 28395 }, { "epoch": 0.3689930690351389, "grad_norm": 0.40690773725509644, "learning_rate": 0.00012622727095491234, "loss": 1.5597, "step": 28396 }, { "epoch": 0.36900606357905474, "grad_norm": 0.42174431681632996, "learning_rate": 0.00012622467149300094, "loss": 1.2916, "step": 28397 }, { "epoch": 0.36901905812297064, "grad_norm": 0.286306232213974, "learning_rate": 0.0001262220720310896, "loss": 1.3654, "step": 28398 }, { "epoch": 0.3690320526668865, "grad_norm": 0.39403626322746277, "learning_rate": 0.00012621947256917819, "loss": 1.3287, "step": 28399 }, { "epoch": 0.3690450472108024, "grad_norm": 0.3883787989616394, "learning_rate": 0.0001262168731072668, "loss": 1.5887, "step": 28400 }, { "epoch": 0.36905804175471824, "grad_norm": 0.46099182963371277, "learning_rate": 0.0001262142736453554, "loss": 1.2755, "step": 28401 }, { "epoch": 0.36907103629863414, "grad_norm": 0.48636871576309204, "learning_rate": 0.00012621167418344403, "loss": 1.3673, "step": 28402 }, { "epoch": 0.36908403084255, "grad_norm": 0.3928277790546417, "learning_rate": 0.00012620907472153266, "loss": 1.3767, "step": 28403 }, { "epoch": 0.3690970253864659, "grad_norm": 0.37164103984832764, "learning_rate": 0.00012620647525962125, "loss": 1.2692, "step": 28404 }, { "epoch": 0.3691100199303817, "grad_norm": 0.435621052980423, "learning_rate": 0.00012620387579770988, "loss": 1.4863, "step": 28405 }, { "epoch": 0.36912301447429763, "grad_norm": 0.3375067412853241, "learning_rate": 0.0001262012763357985, "loss": 1.4317, "step": 28406 }, { "epoch": 0.3691360090182135, "grad_norm": 0.38092947006225586, "learning_rate": 0.0001261986768738871, "loss": 1.1144, "step": 28407 }, { "epoch": 0.3691490035621294, "grad_norm": 0.37608110904693604, "learning_rate": 0.00012619607741197572, "loss": 1.2479, "step": 28408 }, { "epoch": 0.3691619981060452, "grad_norm": 0.4085616171360016, "learning_rate": 0.00012619347795006435, "loss": 1.2836, "step": 28409 }, { "epoch": 0.3691749926499611, "grad_norm": 0.3654405176639557, "learning_rate": 0.00012619087848815297, "loss": 1.4252, "step": 28410 }, { "epoch": 0.36918798719387697, "grad_norm": 0.3107021152973175, "learning_rate": 0.00012618827902624157, "loss": 1.5306, "step": 28411 }, { "epoch": 0.36920098173779287, "grad_norm": 0.20088303089141846, "learning_rate": 0.0001261856795643302, "loss": 1.1483, "step": 28412 }, { "epoch": 0.3692139762817087, "grad_norm": 0.37039002776145935, "learning_rate": 0.00012618308010241882, "loss": 1.4032, "step": 28413 }, { "epoch": 0.3692269708256246, "grad_norm": 0.383558988571167, "learning_rate": 0.00012618048064050742, "loss": 1.2915, "step": 28414 }, { "epoch": 0.36923996536954046, "grad_norm": 0.3268488347530365, "learning_rate": 0.00012617788117859604, "loss": 1.2759, "step": 28415 }, { "epoch": 0.36925295991345636, "grad_norm": 0.2907080054283142, "learning_rate": 0.00012617528171668464, "loss": 1.221, "step": 28416 }, { "epoch": 0.3692659544573722, "grad_norm": 0.3817647695541382, "learning_rate": 0.0001261726822547733, "loss": 1.6906, "step": 28417 }, { "epoch": 0.3692789490012881, "grad_norm": 0.3616327941417694, "learning_rate": 0.0001261700827928619, "loss": 1.3307, "step": 28418 }, { "epoch": 0.36929194354520395, "grad_norm": 0.35940033197402954, "learning_rate": 0.00012616748333095049, "loss": 1.3133, "step": 28419 }, { "epoch": 0.36930493808911985, "grad_norm": 0.4270550012588501, "learning_rate": 0.0001261648838690391, "loss": 1.4502, "step": 28420 }, { "epoch": 0.3693179326330357, "grad_norm": 0.32805201411247253, "learning_rate": 0.00012616228440712773, "loss": 1.257, "step": 28421 }, { "epoch": 0.3693309271769516, "grad_norm": 0.3111568093299866, "learning_rate": 0.00012615968494521636, "loss": 1.2839, "step": 28422 }, { "epoch": 0.36934392172086744, "grad_norm": 0.29012373089790344, "learning_rate": 0.00012615708548330496, "loss": 1.3184, "step": 28423 }, { "epoch": 0.36935691626478334, "grad_norm": 0.3929380476474762, "learning_rate": 0.00012615448602139358, "loss": 1.4195, "step": 28424 }, { "epoch": 0.3693699108086992, "grad_norm": 0.39281922578811646, "learning_rate": 0.0001261518865594822, "loss": 1.5138, "step": 28425 }, { "epoch": 0.3693829053526151, "grad_norm": 0.46634259819984436, "learning_rate": 0.0001261492870975708, "loss": 1.4582, "step": 28426 }, { "epoch": 0.36939589989653093, "grad_norm": 0.38453537225723267, "learning_rate": 0.00012614668763565943, "loss": 1.3435, "step": 28427 }, { "epoch": 0.36940889444044683, "grad_norm": 0.36820077896118164, "learning_rate": 0.00012614408817374802, "loss": 1.4042, "step": 28428 }, { "epoch": 0.3694218889843627, "grad_norm": 0.5294620990753174, "learning_rate": 0.00012614148871183668, "loss": 1.415, "step": 28429 }, { "epoch": 0.3694348835282786, "grad_norm": 0.3890458941459656, "learning_rate": 0.00012613888924992527, "loss": 1.5321, "step": 28430 }, { "epoch": 0.3694478780721944, "grad_norm": 0.4221755862236023, "learning_rate": 0.00012613628978801387, "loss": 1.3836, "step": 28431 }, { "epoch": 0.3694608726161103, "grad_norm": 0.3718966841697693, "learning_rate": 0.0001261336903261025, "loss": 1.3411, "step": 28432 }, { "epoch": 0.36947386716002617, "grad_norm": 0.5034308433532715, "learning_rate": 0.00012613109086419112, "loss": 1.5535, "step": 28433 }, { "epoch": 0.36948686170394207, "grad_norm": 0.36583542823791504, "learning_rate": 0.00012612849140227974, "loss": 1.3992, "step": 28434 }, { "epoch": 0.3694998562478579, "grad_norm": 0.7009222507476807, "learning_rate": 0.00012612589194036834, "loss": 1.3751, "step": 28435 }, { "epoch": 0.3695128507917738, "grad_norm": 0.43068015575408936, "learning_rate": 0.00012612329247845697, "loss": 1.5226, "step": 28436 }, { "epoch": 0.36952584533568966, "grad_norm": 0.4313845932483673, "learning_rate": 0.0001261206930165456, "loss": 1.3913, "step": 28437 }, { "epoch": 0.36953883987960556, "grad_norm": 0.37349241971969604, "learning_rate": 0.0001261180935546342, "loss": 1.3403, "step": 28438 }, { "epoch": 0.3695518344235214, "grad_norm": 0.45809611678123474, "learning_rate": 0.0001261154940927228, "loss": 1.4647, "step": 28439 }, { "epoch": 0.3695648289674373, "grad_norm": 0.2655130922794342, "learning_rate": 0.0001261128946308114, "loss": 1.1656, "step": 28440 }, { "epoch": 0.36957782351135315, "grad_norm": 0.41603967547416687, "learning_rate": 0.00012611029516890006, "loss": 1.5333, "step": 28441 }, { "epoch": 0.36959081805526905, "grad_norm": 0.39628106355667114, "learning_rate": 0.00012610769570698866, "loss": 1.5212, "step": 28442 }, { "epoch": 0.3696038125991849, "grad_norm": 0.44422391057014465, "learning_rate": 0.00012610509624507728, "loss": 1.4105, "step": 28443 }, { "epoch": 0.3696168071431008, "grad_norm": 0.48169004917144775, "learning_rate": 0.00012610249678316588, "loss": 1.4759, "step": 28444 }, { "epoch": 0.36962980168701665, "grad_norm": 0.4032215178012848, "learning_rate": 0.0001260998973212545, "loss": 1.3084, "step": 28445 }, { "epoch": 0.36964279623093255, "grad_norm": 0.4138106405735016, "learning_rate": 0.00012609729785934313, "loss": 1.433, "step": 28446 }, { "epoch": 0.3696557907748484, "grad_norm": 0.27430957555770874, "learning_rate": 0.00012609469839743173, "loss": 1.3192, "step": 28447 }, { "epoch": 0.3696687853187643, "grad_norm": 0.4157394766807556, "learning_rate": 0.00012609209893552035, "loss": 1.4377, "step": 28448 }, { "epoch": 0.36968177986268014, "grad_norm": 0.4084011912345886, "learning_rate": 0.00012608949947360898, "loss": 1.3974, "step": 28449 }, { "epoch": 0.36969477440659604, "grad_norm": 0.2879941761493683, "learning_rate": 0.00012608690001169757, "loss": 1.3122, "step": 28450 }, { "epoch": 0.3697077689505119, "grad_norm": 0.443834125995636, "learning_rate": 0.0001260843005497862, "loss": 1.335, "step": 28451 }, { "epoch": 0.3697207634944278, "grad_norm": 0.5865363478660583, "learning_rate": 0.00012608170108787482, "loss": 1.4374, "step": 28452 }, { "epoch": 0.36973375803834363, "grad_norm": 0.4429281949996948, "learning_rate": 0.00012607910162596345, "loss": 1.4711, "step": 28453 }, { "epoch": 0.36974675258225953, "grad_norm": 0.5329416990280151, "learning_rate": 0.00012607650216405204, "loss": 1.4233, "step": 28454 }, { "epoch": 0.3697597471261754, "grad_norm": 0.39693841338157654, "learning_rate": 0.00012607390270214067, "loss": 1.2098, "step": 28455 }, { "epoch": 0.3697727416700913, "grad_norm": 0.4363026022911072, "learning_rate": 0.0001260713032402293, "loss": 1.3654, "step": 28456 }, { "epoch": 0.3697857362140071, "grad_norm": 0.46276989579200745, "learning_rate": 0.0001260687037783179, "loss": 1.5165, "step": 28457 }, { "epoch": 0.369798730757923, "grad_norm": 0.4555894732475281, "learning_rate": 0.00012606610431640651, "loss": 1.4129, "step": 28458 }, { "epoch": 0.36981172530183887, "grad_norm": 0.3975951075553894, "learning_rate": 0.0001260635048544951, "loss": 1.2047, "step": 28459 }, { "epoch": 0.36982471984575477, "grad_norm": 0.31107133626937866, "learning_rate": 0.00012606090539258376, "loss": 1.459, "step": 28460 }, { "epoch": 0.3698377143896706, "grad_norm": 0.45250436663627625, "learning_rate": 0.00012605830593067236, "loss": 1.5376, "step": 28461 }, { "epoch": 0.3698507089335865, "grad_norm": 0.37765440344810486, "learning_rate": 0.00012605570646876096, "loss": 1.4927, "step": 28462 }, { "epoch": 0.36986370347750236, "grad_norm": 0.4555840492248535, "learning_rate": 0.00012605310700684958, "loss": 1.462, "step": 28463 }, { "epoch": 0.36987669802141826, "grad_norm": 0.4437340199947357, "learning_rate": 0.0001260505075449382, "loss": 1.22, "step": 28464 }, { "epoch": 0.3698896925653341, "grad_norm": 0.385195255279541, "learning_rate": 0.00012604790808302683, "loss": 1.3634, "step": 28465 }, { "epoch": 0.36990268710925, "grad_norm": 0.37381511926651, "learning_rate": 0.00012604530862111543, "loss": 1.4284, "step": 28466 }, { "epoch": 0.36991568165316585, "grad_norm": 0.3051937520503998, "learning_rate": 0.00012604270915920405, "loss": 1.2756, "step": 28467 }, { "epoch": 0.36992867619708175, "grad_norm": 0.4351508319377899, "learning_rate": 0.00012604010969729268, "loss": 1.3764, "step": 28468 }, { "epoch": 0.3699416707409976, "grad_norm": 0.41596466302871704, "learning_rate": 0.00012603751023538128, "loss": 1.3805, "step": 28469 }, { "epoch": 0.3699546652849135, "grad_norm": 0.5010490417480469, "learning_rate": 0.0001260349107734699, "loss": 1.485, "step": 28470 }, { "epoch": 0.36996765982882934, "grad_norm": 0.3573402464389801, "learning_rate": 0.0001260323113115585, "loss": 1.2764, "step": 28471 }, { "epoch": 0.36998065437274524, "grad_norm": 0.34636861085891724, "learning_rate": 0.00012602971184964715, "loss": 1.4044, "step": 28472 }, { "epoch": 0.36999364891666114, "grad_norm": 0.34264034032821655, "learning_rate": 0.00012602711238773575, "loss": 1.2301, "step": 28473 }, { "epoch": 0.370006643460577, "grad_norm": 0.35069140791893005, "learning_rate": 0.00012602451292582434, "loss": 1.4195, "step": 28474 }, { "epoch": 0.3700196380044929, "grad_norm": 0.4894803464412689, "learning_rate": 0.00012602191346391297, "loss": 1.3296, "step": 28475 }, { "epoch": 0.37003263254840874, "grad_norm": 0.37858906388282776, "learning_rate": 0.0001260193140020016, "loss": 1.2516, "step": 28476 }, { "epoch": 0.37004562709232464, "grad_norm": 0.33961084485054016, "learning_rate": 0.00012601671454009022, "loss": 1.4527, "step": 28477 }, { "epoch": 0.3700586216362405, "grad_norm": 0.3975807726383209, "learning_rate": 0.00012601411507817881, "loss": 1.4288, "step": 28478 }, { "epoch": 0.3700716161801564, "grad_norm": 0.3572922348976135, "learning_rate": 0.00012601151561626744, "loss": 1.4308, "step": 28479 }, { "epoch": 0.3700846107240722, "grad_norm": 0.4830910265445709, "learning_rate": 0.00012600891615435606, "loss": 1.5647, "step": 28480 }, { "epoch": 0.37009760526798813, "grad_norm": 0.30390429496765137, "learning_rate": 0.00012600631669244466, "loss": 1.3876, "step": 28481 }, { "epoch": 0.370110599811904, "grad_norm": 0.4210784137248993, "learning_rate": 0.00012600371723053329, "loss": 1.3366, "step": 28482 }, { "epoch": 0.3701235943558199, "grad_norm": 0.28564774990081787, "learning_rate": 0.00012600111776862188, "loss": 1.3688, "step": 28483 }, { "epoch": 0.3701365888997357, "grad_norm": 0.4326128363609314, "learning_rate": 0.00012599851830671053, "loss": 1.4626, "step": 28484 }, { "epoch": 0.3701495834436516, "grad_norm": 0.3149370551109314, "learning_rate": 0.00012599591884479913, "loss": 1.2964, "step": 28485 }, { "epoch": 0.37016257798756746, "grad_norm": 0.3751177191734314, "learning_rate": 0.00012599331938288773, "loss": 1.2786, "step": 28486 }, { "epoch": 0.37017557253148337, "grad_norm": 0.3783768117427826, "learning_rate": 0.00012599071992097638, "loss": 1.3866, "step": 28487 }, { "epoch": 0.3701885670753992, "grad_norm": 0.3886982500553131, "learning_rate": 0.00012598812045906498, "loss": 1.4466, "step": 28488 }, { "epoch": 0.3702015616193151, "grad_norm": 0.4140985310077667, "learning_rate": 0.0001259855209971536, "loss": 1.4811, "step": 28489 }, { "epoch": 0.37021455616323096, "grad_norm": 0.40521103143692017, "learning_rate": 0.0001259829215352422, "loss": 1.4413, "step": 28490 }, { "epoch": 0.37022755070714686, "grad_norm": 0.44567880034446716, "learning_rate": 0.00012598032207333082, "loss": 1.4824, "step": 28491 }, { "epoch": 0.3702405452510627, "grad_norm": 0.3174348771572113, "learning_rate": 0.00012597772261141945, "loss": 1.4906, "step": 28492 }, { "epoch": 0.3702535397949786, "grad_norm": 0.4010981321334839, "learning_rate": 0.00012597512314950805, "loss": 1.6142, "step": 28493 }, { "epoch": 0.37026653433889445, "grad_norm": 0.33209729194641113, "learning_rate": 0.00012597252368759667, "loss": 1.267, "step": 28494 }, { "epoch": 0.37027952888281035, "grad_norm": 0.4276234805583954, "learning_rate": 0.0001259699242256853, "loss": 1.3047, "step": 28495 }, { "epoch": 0.3702925234267262, "grad_norm": 0.42527666687965393, "learning_rate": 0.00012596732476377392, "loss": 1.3985, "step": 28496 }, { "epoch": 0.3703055179706421, "grad_norm": 0.40639492869377136, "learning_rate": 0.00012596472530186252, "loss": 1.5361, "step": 28497 }, { "epoch": 0.37031851251455794, "grad_norm": 0.43494170904159546, "learning_rate": 0.00012596212583995114, "loss": 1.3271, "step": 28498 }, { "epoch": 0.37033150705847384, "grad_norm": 0.3743082582950592, "learning_rate": 0.00012595952637803977, "loss": 1.6363, "step": 28499 }, { "epoch": 0.3703445016023897, "grad_norm": 0.4028823673725128, "learning_rate": 0.00012595692691612836, "loss": 1.3723, "step": 28500 }, { "epoch": 0.3703574961463056, "grad_norm": 0.48029401898384094, "learning_rate": 0.000125954327454217, "loss": 1.4518, "step": 28501 }, { "epoch": 0.37037049069022143, "grad_norm": 0.4387197196483612, "learning_rate": 0.00012595172799230559, "loss": 1.624, "step": 28502 }, { "epoch": 0.37038348523413733, "grad_norm": 0.3294973373413086, "learning_rate": 0.0001259491285303942, "loss": 1.4965, "step": 28503 }, { "epoch": 0.3703964797780532, "grad_norm": 0.44095978140830994, "learning_rate": 0.00012594652906848283, "loss": 1.3095, "step": 28504 }, { "epoch": 0.3704094743219691, "grad_norm": 0.4823935925960541, "learning_rate": 0.00012594392960657143, "loss": 1.5212, "step": 28505 }, { "epoch": 0.3704224688658849, "grad_norm": 0.5020119547843933, "learning_rate": 0.00012594133014466006, "loss": 1.4705, "step": 28506 }, { "epoch": 0.3704354634098008, "grad_norm": 0.39172613620758057, "learning_rate": 0.00012593873068274868, "loss": 1.4425, "step": 28507 }, { "epoch": 0.37044845795371667, "grad_norm": 0.4444243907928467, "learning_rate": 0.0001259361312208373, "loss": 1.4462, "step": 28508 }, { "epoch": 0.37046145249763257, "grad_norm": 0.34541818499565125, "learning_rate": 0.0001259335317589259, "loss": 1.223, "step": 28509 }, { "epoch": 0.3704744470415484, "grad_norm": 0.35177627205848694, "learning_rate": 0.00012593093229701453, "loss": 1.6182, "step": 28510 }, { "epoch": 0.3704874415854643, "grad_norm": 0.3601895570755005, "learning_rate": 0.00012592833283510315, "loss": 1.3744, "step": 28511 }, { "epoch": 0.37050043612938016, "grad_norm": 0.42994552850723267, "learning_rate": 0.00012592573337319175, "loss": 1.2998, "step": 28512 }, { "epoch": 0.37051343067329606, "grad_norm": 0.42121946811676025, "learning_rate": 0.00012592313391128037, "loss": 1.3006, "step": 28513 }, { "epoch": 0.3705264252172119, "grad_norm": 0.5001784563064575, "learning_rate": 0.00012592053444936897, "loss": 1.4156, "step": 28514 }, { "epoch": 0.3705394197611278, "grad_norm": 0.5242595672607422, "learning_rate": 0.0001259179349874576, "loss": 1.3819, "step": 28515 }, { "epoch": 0.37055241430504365, "grad_norm": 0.43644097447395325, "learning_rate": 0.00012591533552554622, "loss": 1.5083, "step": 28516 }, { "epoch": 0.37056540884895955, "grad_norm": 0.3800097703933716, "learning_rate": 0.00012591273606363482, "loss": 1.5342, "step": 28517 }, { "epoch": 0.3705784033928754, "grad_norm": 0.41643908619880676, "learning_rate": 0.00012591013660172344, "loss": 1.2589, "step": 28518 }, { "epoch": 0.3705913979367913, "grad_norm": 0.39021429419517517, "learning_rate": 0.00012590753713981207, "loss": 1.3798, "step": 28519 }, { "epoch": 0.37060439248070715, "grad_norm": 0.49075019359588623, "learning_rate": 0.0001259049376779007, "loss": 1.4627, "step": 28520 }, { "epoch": 0.37061738702462305, "grad_norm": 0.33392399549484253, "learning_rate": 0.0001259023382159893, "loss": 1.5124, "step": 28521 }, { "epoch": 0.3706303815685389, "grad_norm": 0.4660463035106659, "learning_rate": 0.0001258997387540779, "loss": 1.3184, "step": 28522 }, { "epoch": 0.3706433761124548, "grad_norm": 0.48202791810035706, "learning_rate": 0.00012589713929216654, "loss": 1.5474, "step": 28523 }, { "epoch": 0.37065637065637064, "grad_norm": 0.43193021416664124, "learning_rate": 0.00012589453983025513, "loss": 1.3771, "step": 28524 }, { "epoch": 0.37066936520028654, "grad_norm": 0.6539021134376526, "learning_rate": 0.00012589194036834376, "loss": 1.4231, "step": 28525 }, { "epoch": 0.3706823597442024, "grad_norm": 0.3315410017967224, "learning_rate": 0.00012588934090643238, "loss": 1.3109, "step": 28526 }, { "epoch": 0.3706953542881183, "grad_norm": 0.4619930386543274, "learning_rate": 0.000125886741444521, "loss": 1.5056, "step": 28527 }, { "epoch": 0.37070834883203413, "grad_norm": 0.42916443943977356, "learning_rate": 0.0001258841419826096, "loss": 1.3638, "step": 28528 }, { "epoch": 0.37072134337595003, "grad_norm": 0.3815104067325592, "learning_rate": 0.0001258815425206982, "loss": 1.4779, "step": 28529 }, { "epoch": 0.3707343379198659, "grad_norm": 0.3461533188819885, "learning_rate": 0.00012587894305878685, "loss": 1.4815, "step": 28530 }, { "epoch": 0.3707473324637818, "grad_norm": 0.3713798522949219, "learning_rate": 0.00012587634359687545, "loss": 1.3427, "step": 28531 }, { "epoch": 0.3707603270076976, "grad_norm": 0.3443409204483032, "learning_rate": 0.00012587374413496408, "loss": 1.3924, "step": 28532 }, { "epoch": 0.3707733215516135, "grad_norm": 0.3767041265964508, "learning_rate": 0.00012587114467305267, "loss": 1.4005, "step": 28533 }, { "epoch": 0.37078631609552937, "grad_norm": 0.31080323457717896, "learning_rate": 0.0001258685452111413, "loss": 1.4707, "step": 28534 }, { "epoch": 0.37079931063944527, "grad_norm": 0.3808943033218384, "learning_rate": 0.00012586594574922992, "loss": 1.4928, "step": 28535 }, { "epoch": 0.3708123051833611, "grad_norm": 0.43265923857688904, "learning_rate": 0.00012586334628731852, "loss": 1.4685, "step": 28536 }, { "epoch": 0.370825299727277, "grad_norm": 0.2630341351032257, "learning_rate": 0.00012586074682540714, "loss": 1.3499, "step": 28537 }, { "epoch": 0.37083829427119286, "grad_norm": 0.34474700689315796, "learning_rate": 0.00012585814736349577, "loss": 1.3261, "step": 28538 }, { "epoch": 0.37085128881510876, "grad_norm": 0.4624866843223572, "learning_rate": 0.0001258555479015844, "loss": 1.4161, "step": 28539 }, { "epoch": 0.3708642833590246, "grad_norm": 0.3981010913848877, "learning_rate": 0.000125852948439673, "loss": 1.3475, "step": 28540 }, { "epoch": 0.3708772779029405, "grad_norm": 0.42675501108169556, "learning_rate": 0.0001258503489777616, "loss": 1.5631, "step": 28541 }, { "epoch": 0.37089027244685635, "grad_norm": 0.38703203201293945, "learning_rate": 0.00012584774951585024, "loss": 1.3689, "step": 28542 }, { "epoch": 0.37090326699077225, "grad_norm": 0.370449423789978, "learning_rate": 0.00012584515005393884, "loss": 1.3381, "step": 28543 }, { "epoch": 0.3709162615346881, "grad_norm": 0.3972005546092987, "learning_rate": 0.00012584255059202746, "loss": 1.458, "step": 28544 }, { "epoch": 0.370929256078604, "grad_norm": 0.47045713663101196, "learning_rate": 0.00012583995113011606, "loss": 1.2855, "step": 28545 }, { "epoch": 0.37094225062251984, "grad_norm": 0.42684119939804077, "learning_rate": 0.00012583735166820468, "loss": 1.4915, "step": 28546 }, { "epoch": 0.37095524516643574, "grad_norm": 0.39620745182037354, "learning_rate": 0.0001258347522062933, "loss": 1.424, "step": 28547 }, { "epoch": 0.37096823971035164, "grad_norm": 0.4511460065841675, "learning_rate": 0.0001258321527443819, "loss": 1.4513, "step": 28548 }, { "epoch": 0.3709812342542675, "grad_norm": 0.3990863263607025, "learning_rate": 0.00012582955328247053, "loss": 1.572, "step": 28549 }, { "epoch": 0.3709942287981834, "grad_norm": 0.37590500712394714, "learning_rate": 0.00012582695382055915, "loss": 1.306, "step": 28550 }, { "epoch": 0.37100722334209923, "grad_norm": 0.4584839940071106, "learning_rate": 0.00012582435435864778, "loss": 1.3541, "step": 28551 }, { "epoch": 0.37102021788601514, "grad_norm": 0.42680874466896057, "learning_rate": 0.00012582175489673638, "loss": 1.2581, "step": 28552 }, { "epoch": 0.371033212429931, "grad_norm": 0.46659156680107117, "learning_rate": 0.00012581915543482497, "loss": 1.3899, "step": 28553 }, { "epoch": 0.3710462069738469, "grad_norm": 0.29457229375839233, "learning_rate": 0.00012581655597291362, "loss": 1.3952, "step": 28554 }, { "epoch": 0.3710592015177627, "grad_norm": 0.38635507225990295, "learning_rate": 0.00012581395651100222, "loss": 1.5377, "step": 28555 }, { "epoch": 0.3710721960616786, "grad_norm": 0.4559784531593323, "learning_rate": 0.00012581135704909085, "loss": 1.4913, "step": 28556 }, { "epoch": 0.3710851906055945, "grad_norm": 0.4491625726222992, "learning_rate": 0.00012580875758717944, "loss": 1.5112, "step": 28557 }, { "epoch": 0.3710981851495104, "grad_norm": 0.48063036799430847, "learning_rate": 0.00012580615812526807, "loss": 1.4863, "step": 28558 }, { "epoch": 0.3711111796934262, "grad_norm": 0.4372749626636505, "learning_rate": 0.0001258035586633567, "loss": 1.4965, "step": 28559 }, { "epoch": 0.3711241742373421, "grad_norm": 0.35763171315193176, "learning_rate": 0.0001258009592014453, "loss": 1.4775, "step": 28560 }, { "epoch": 0.37113716878125796, "grad_norm": 0.3866100311279297, "learning_rate": 0.00012579835973953394, "loss": 1.4834, "step": 28561 }, { "epoch": 0.37115016332517387, "grad_norm": 0.4046694338321686, "learning_rate": 0.00012579576027762254, "loss": 1.3932, "step": 28562 }, { "epoch": 0.3711631578690897, "grad_norm": 0.31172484159469604, "learning_rate": 0.00012579316081571116, "loss": 1.459, "step": 28563 }, { "epoch": 0.3711761524130056, "grad_norm": 0.3125612437725067, "learning_rate": 0.00012579056135379976, "loss": 1.0811, "step": 28564 }, { "epoch": 0.37118914695692146, "grad_norm": 0.3897337019443512, "learning_rate": 0.00012578796189188839, "loss": 1.5187, "step": 28565 }, { "epoch": 0.37120214150083736, "grad_norm": 0.3549739718437195, "learning_rate": 0.000125785362429977, "loss": 1.2253, "step": 28566 }, { "epoch": 0.3712151360447532, "grad_norm": 0.564928412437439, "learning_rate": 0.0001257827629680656, "loss": 1.4104, "step": 28567 }, { "epoch": 0.3712281305886691, "grad_norm": 0.4044153094291687, "learning_rate": 0.00012578016350615423, "loss": 1.3929, "step": 28568 }, { "epoch": 0.37124112513258495, "grad_norm": 0.3604017198085785, "learning_rate": 0.00012577756404424286, "loss": 1.201, "step": 28569 }, { "epoch": 0.37125411967650085, "grad_norm": 0.48543795943260193, "learning_rate": 0.00012577496458233145, "loss": 1.4587, "step": 28570 }, { "epoch": 0.3712671142204167, "grad_norm": 0.4029945135116577, "learning_rate": 0.00012577236512042008, "loss": 1.5741, "step": 28571 }, { "epoch": 0.3712801087643326, "grad_norm": 0.3804973065853119, "learning_rate": 0.00012576976565850868, "loss": 1.374, "step": 28572 }, { "epoch": 0.37129310330824844, "grad_norm": 0.3861284554004669, "learning_rate": 0.00012576716619659733, "loss": 1.3911, "step": 28573 }, { "epoch": 0.37130609785216434, "grad_norm": 0.36409443616867065, "learning_rate": 0.00012576456673468592, "loss": 1.4648, "step": 28574 }, { "epoch": 0.3713190923960802, "grad_norm": 0.3111497163772583, "learning_rate": 0.00012576196727277455, "loss": 1.413, "step": 28575 }, { "epoch": 0.3713320869399961, "grad_norm": 0.3570258915424347, "learning_rate": 0.00012575936781086315, "loss": 1.7669, "step": 28576 }, { "epoch": 0.37134508148391193, "grad_norm": 0.3728431165218353, "learning_rate": 0.00012575676834895177, "loss": 1.1999, "step": 28577 }, { "epoch": 0.37135807602782783, "grad_norm": 0.47614988684654236, "learning_rate": 0.0001257541688870404, "loss": 1.5474, "step": 28578 }, { "epoch": 0.3713710705717437, "grad_norm": 0.37860941886901855, "learning_rate": 0.000125751569425129, "loss": 1.413, "step": 28579 }, { "epoch": 0.3713840651156596, "grad_norm": 0.4034661054611206, "learning_rate": 0.00012574896996321762, "loss": 1.4269, "step": 28580 }, { "epoch": 0.3713970596595754, "grad_norm": 0.46161821484565735, "learning_rate": 0.00012574637050130624, "loss": 1.3991, "step": 28581 }, { "epoch": 0.3714100542034913, "grad_norm": 0.3956683278083801, "learning_rate": 0.00012574377103939487, "loss": 1.5259, "step": 28582 }, { "epoch": 0.37142304874740717, "grad_norm": 0.42626920342445374, "learning_rate": 0.00012574117157748346, "loss": 1.4586, "step": 28583 }, { "epoch": 0.37143604329132307, "grad_norm": 0.3239297866821289, "learning_rate": 0.00012573857211557206, "loss": 1.258, "step": 28584 }, { "epoch": 0.3714490378352389, "grad_norm": 0.45306044816970825, "learning_rate": 0.0001257359726536607, "loss": 1.4138, "step": 28585 }, { "epoch": 0.3714620323791548, "grad_norm": 0.4213177263736725, "learning_rate": 0.0001257333731917493, "loss": 1.329, "step": 28586 }, { "epoch": 0.37147502692307066, "grad_norm": 0.47822171449661255, "learning_rate": 0.00012573077372983793, "loss": 1.4275, "step": 28587 }, { "epoch": 0.37148802146698656, "grad_norm": 0.34636977314949036, "learning_rate": 0.00012572817426792653, "loss": 1.4334, "step": 28588 }, { "epoch": 0.3715010160109024, "grad_norm": 0.30887115001678467, "learning_rate": 0.00012572557480601516, "loss": 1.5388, "step": 28589 }, { "epoch": 0.3715140105548183, "grad_norm": 0.39939185976982117, "learning_rate": 0.00012572297534410378, "loss": 1.4764, "step": 28590 }, { "epoch": 0.37152700509873415, "grad_norm": 0.3110973834991455, "learning_rate": 0.00012572037588219238, "loss": 1.3193, "step": 28591 }, { "epoch": 0.37153999964265005, "grad_norm": 0.4323619306087494, "learning_rate": 0.000125717776420281, "loss": 1.3408, "step": 28592 }, { "epoch": 0.3715529941865659, "grad_norm": 0.30838853120803833, "learning_rate": 0.00012571517695836963, "loss": 1.3376, "step": 28593 }, { "epoch": 0.3715659887304818, "grad_norm": 0.47952741384506226, "learning_rate": 0.00012571257749645825, "loss": 1.4203, "step": 28594 }, { "epoch": 0.37157898327439765, "grad_norm": 0.43382659554481506, "learning_rate": 0.00012570997803454685, "loss": 1.3298, "step": 28595 }, { "epoch": 0.37159197781831355, "grad_norm": 0.27219194173812866, "learning_rate": 0.00012570737857263547, "loss": 1.491, "step": 28596 }, { "epoch": 0.3716049723622294, "grad_norm": 0.43320417404174805, "learning_rate": 0.0001257047791107241, "loss": 1.4877, "step": 28597 }, { "epoch": 0.3716179669061453, "grad_norm": 0.3958994746208191, "learning_rate": 0.0001257021796488127, "loss": 1.2832, "step": 28598 }, { "epoch": 0.37163096145006114, "grad_norm": 0.37890514731407166, "learning_rate": 0.00012569958018690132, "loss": 1.3926, "step": 28599 }, { "epoch": 0.37164395599397704, "grad_norm": 0.5272101163864136, "learning_rate": 0.00012569698072498994, "loss": 1.673, "step": 28600 }, { "epoch": 0.3716569505378929, "grad_norm": 0.3599000871181488, "learning_rate": 0.00012569438126307854, "loss": 1.5321, "step": 28601 }, { "epoch": 0.3716699450818088, "grad_norm": 0.4670974612236023, "learning_rate": 0.00012569178180116717, "loss": 1.3906, "step": 28602 }, { "epoch": 0.37168293962572463, "grad_norm": 0.32362043857574463, "learning_rate": 0.00012568918233925576, "loss": 1.2695, "step": 28603 }, { "epoch": 0.37169593416964053, "grad_norm": 0.27641040086746216, "learning_rate": 0.00012568658287734442, "loss": 1.2319, "step": 28604 }, { "epoch": 0.3717089287135564, "grad_norm": 0.4326845407485962, "learning_rate": 0.000125683983415433, "loss": 1.5359, "step": 28605 }, { "epoch": 0.3717219232574723, "grad_norm": 0.4204167425632477, "learning_rate": 0.00012568138395352164, "loss": 1.4979, "step": 28606 }, { "epoch": 0.3717349178013881, "grad_norm": 0.41838812828063965, "learning_rate": 0.00012567878449161023, "loss": 1.4636, "step": 28607 }, { "epoch": 0.371747912345304, "grad_norm": 0.4017530381679535, "learning_rate": 0.00012567618502969886, "loss": 1.4571, "step": 28608 }, { "epoch": 0.37176090688921987, "grad_norm": 0.33240532875061035, "learning_rate": 0.00012567358556778748, "loss": 1.2888, "step": 28609 }, { "epoch": 0.37177390143313577, "grad_norm": 0.4004509449005127, "learning_rate": 0.00012567098610587608, "loss": 1.43, "step": 28610 }, { "epoch": 0.3717868959770516, "grad_norm": 0.3449625074863434, "learning_rate": 0.0001256683866439647, "loss": 1.2762, "step": 28611 }, { "epoch": 0.3717998905209675, "grad_norm": 0.31445473432540894, "learning_rate": 0.00012566578718205333, "loss": 1.3907, "step": 28612 }, { "epoch": 0.37181288506488336, "grad_norm": 0.39500120282173157, "learning_rate": 0.00012566318772014193, "loss": 1.3571, "step": 28613 }, { "epoch": 0.37182587960879926, "grad_norm": 0.3693854808807373, "learning_rate": 0.00012566058825823055, "loss": 1.4698, "step": 28614 }, { "epoch": 0.3718388741527151, "grad_norm": 0.44018974900245667, "learning_rate": 0.00012565798879631915, "loss": 1.4344, "step": 28615 }, { "epoch": 0.371851868696631, "grad_norm": 0.3605126440525055, "learning_rate": 0.0001256553893344078, "loss": 1.4717, "step": 28616 }, { "epoch": 0.37186486324054685, "grad_norm": 0.42302262783050537, "learning_rate": 0.0001256527898724964, "loss": 1.3964, "step": 28617 }, { "epoch": 0.37187785778446275, "grad_norm": 0.24802769720554352, "learning_rate": 0.00012565019041058502, "loss": 1.2781, "step": 28618 }, { "epoch": 0.3718908523283786, "grad_norm": 0.42664703726768494, "learning_rate": 0.00012564759094867362, "loss": 1.6889, "step": 28619 }, { "epoch": 0.3719038468722945, "grad_norm": 0.4162429869174957, "learning_rate": 0.00012564499148676224, "loss": 1.3692, "step": 28620 }, { "epoch": 0.37191684141621034, "grad_norm": 0.4092422425746918, "learning_rate": 0.00012564239202485087, "loss": 1.5516, "step": 28621 }, { "epoch": 0.37192983596012624, "grad_norm": 0.3229100704193115, "learning_rate": 0.00012563979256293947, "loss": 1.4682, "step": 28622 }, { "epoch": 0.3719428305040421, "grad_norm": 0.3950127363204956, "learning_rate": 0.0001256371931010281, "loss": 1.3339, "step": 28623 }, { "epoch": 0.371955825047958, "grad_norm": 0.3432612419128418, "learning_rate": 0.00012563459363911672, "loss": 1.0709, "step": 28624 }, { "epoch": 0.3719688195918739, "grad_norm": 0.321338027715683, "learning_rate": 0.0001256319941772053, "loss": 1.3111, "step": 28625 }, { "epoch": 0.37198181413578973, "grad_norm": 0.48300492763519287, "learning_rate": 0.00012562939471529394, "loss": 1.3849, "step": 28626 }, { "epoch": 0.37199480867970564, "grad_norm": 0.34484028816223145, "learning_rate": 0.00012562679525338253, "loss": 1.3808, "step": 28627 }, { "epoch": 0.3720078032236215, "grad_norm": 0.3866361975669861, "learning_rate": 0.00012562419579147119, "loss": 1.3935, "step": 28628 }, { "epoch": 0.3720207977675374, "grad_norm": 0.41459280252456665, "learning_rate": 0.00012562159632955978, "loss": 1.5649, "step": 28629 }, { "epoch": 0.3720337923114532, "grad_norm": 0.3238179683685303, "learning_rate": 0.0001256189968676484, "loss": 1.49, "step": 28630 }, { "epoch": 0.3720467868553691, "grad_norm": 0.27497413754463196, "learning_rate": 0.000125616397405737, "loss": 1.4515, "step": 28631 }, { "epoch": 0.372059781399285, "grad_norm": 0.42133787274360657, "learning_rate": 0.00012561379794382563, "loss": 1.4946, "step": 28632 }, { "epoch": 0.3720727759432009, "grad_norm": 0.4290090501308441, "learning_rate": 0.00012561119848191425, "loss": 1.5536, "step": 28633 }, { "epoch": 0.3720857704871167, "grad_norm": 0.4357638359069824, "learning_rate": 0.00012560859902000285, "loss": 1.3266, "step": 28634 }, { "epoch": 0.3720987650310326, "grad_norm": 0.35319873690605164, "learning_rate": 0.0001256059995580915, "loss": 1.2838, "step": 28635 }, { "epoch": 0.37211175957494846, "grad_norm": 0.2876637578010559, "learning_rate": 0.0001256034000961801, "loss": 1.2703, "step": 28636 }, { "epoch": 0.37212475411886436, "grad_norm": 0.40520432591438293, "learning_rate": 0.0001256008006342687, "loss": 1.3238, "step": 28637 }, { "epoch": 0.3721377486627802, "grad_norm": 0.4260050654411316, "learning_rate": 0.00012559820117235732, "loss": 1.4977, "step": 28638 }, { "epoch": 0.3721507432066961, "grad_norm": 0.3838316798210144, "learning_rate": 0.00012559560171044595, "loss": 1.592, "step": 28639 }, { "epoch": 0.37216373775061196, "grad_norm": 0.33360186219215393, "learning_rate": 0.00012559300224853457, "loss": 1.5221, "step": 28640 }, { "epoch": 0.37217673229452786, "grad_norm": 0.3735339641571045, "learning_rate": 0.00012559040278662317, "loss": 1.3799, "step": 28641 }, { "epoch": 0.3721897268384437, "grad_norm": 0.41494402289390564, "learning_rate": 0.0001255878033247118, "loss": 1.5151, "step": 28642 }, { "epoch": 0.3722027213823596, "grad_norm": 0.3460751175880432, "learning_rate": 0.00012558520386280042, "loss": 1.4712, "step": 28643 }, { "epoch": 0.37221571592627545, "grad_norm": 0.38725244998931885, "learning_rate": 0.00012558260440088902, "loss": 1.356, "step": 28644 }, { "epoch": 0.37222871047019135, "grad_norm": 0.45364412665367126, "learning_rate": 0.00012558000493897764, "loss": 1.2966, "step": 28645 }, { "epoch": 0.3722417050141072, "grad_norm": 0.35522225499153137, "learning_rate": 0.00012557740547706624, "loss": 1.347, "step": 28646 }, { "epoch": 0.3722546995580231, "grad_norm": 0.3954715430736542, "learning_rate": 0.0001255748060151549, "loss": 1.3806, "step": 28647 }, { "epoch": 0.37226769410193894, "grad_norm": 0.2840288281440735, "learning_rate": 0.00012557220655324349, "loss": 1.3557, "step": 28648 }, { "epoch": 0.37228068864585484, "grad_norm": 0.4001220166683197, "learning_rate": 0.0001255696070913321, "loss": 1.4431, "step": 28649 }, { "epoch": 0.3722936831897707, "grad_norm": 0.4043634831905365, "learning_rate": 0.0001255670076294207, "loss": 1.3139, "step": 28650 }, { "epoch": 0.3723066777336866, "grad_norm": 0.3944031298160553, "learning_rate": 0.00012556440816750933, "loss": 1.3241, "step": 28651 }, { "epoch": 0.37231967227760243, "grad_norm": 0.3356615900993347, "learning_rate": 0.00012556180870559796, "loss": 1.0619, "step": 28652 }, { "epoch": 0.37233266682151833, "grad_norm": 0.568498432636261, "learning_rate": 0.00012555920924368655, "loss": 1.4497, "step": 28653 }, { "epoch": 0.3723456613654342, "grad_norm": 0.5096614956855774, "learning_rate": 0.00012555660978177518, "loss": 1.3966, "step": 28654 }, { "epoch": 0.3723586559093501, "grad_norm": 0.45051145553588867, "learning_rate": 0.0001255540103198638, "loss": 1.156, "step": 28655 }, { "epoch": 0.3723716504532659, "grad_norm": 0.3604600429534912, "learning_rate": 0.0001255514108579524, "loss": 1.4989, "step": 28656 }, { "epoch": 0.3723846449971818, "grad_norm": 0.3429659307003021, "learning_rate": 0.00012554881139604103, "loss": 1.2856, "step": 28657 }, { "epoch": 0.37239763954109767, "grad_norm": 0.38033246994018555, "learning_rate": 0.00012554621193412962, "loss": 1.4322, "step": 28658 }, { "epoch": 0.37241063408501357, "grad_norm": 0.4652290940284729, "learning_rate": 0.00012554361247221827, "loss": 1.3399, "step": 28659 }, { "epoch": 0.3724236286289294, "grad_norm": 0.4360791742801666, "learning_rate": 0.00012554101301030687, "loss": 1.406, "step": 28660 }, { "epoch": 0.3724366231728453, "grad_norm": 0.3296932280063629, "learning_rate": 0.0001255384135483955, "loss": 1.3443, "step": 28661 }, { "epoch": 0.37244961771676116, "grad_norm": 0.39879682660102844, "learning_rate": 0.0001255358140864841, "loss": 1.4026, "step": 28662 }, { "epoch": 0.37246261226067706, "grad_norm": 0.2461155205965042, "learning_rate": 0.00012553321462457272, "loss": 1.168, "step": 28663 }, { "epoch": 0.3724756068045929, "grad_norm": 0.3588959872722626, "learning_rate": 0.00012553061516266134, "loss": 1.2884, "step": 28664 }, { "epoch": 0.3724886013485088, "grad_norm": 0.5581744313240051, "learning_rate": 0.00012552801570074994, "loss": 1.3158, "step": 28665 }, { "epoch": 0.37250159589242465, "grad_norm": 0.4687407910823822, "learning_rate": 0.00012552541623883856, "loss": 1.5517, "step": 28666 }, { "epoch": 0.37251459043634055, "grad_norm": 0.27124786376953125, "learning_rate": 0.0001255228167769272, "loss": 1.3155, "step": 28667 }, { "epoch": 0.3725275849802564, "grad_norm": 0.3334617614746094, "learning_rate": 0.00012552021731501579, "loss": 1.4242, "step": 28668 }, { "epoch": 0.3725405795241723, "grad_norm": 0.4493698477745056, "learning_rate": 0.0001255176178531044, "loss": 1.372, "step": 28669 }, { "epoch": 0.37255357406808814, "grad_norm": 0.4473382234573364, "learning_rate": 0.00012551501839119304, "loss": 1.4243, "step": 28670 }, { "epoch": 0.37256656861200405, "grad_norm": 0.3975907564163208, "learning_rate": 0.00012551241892928166, "loss": 1.5818, "step": 28671 }, { "epoch": 0.3725795631559199, "grad_norm": 0.31713929772377014, "learning_rate": 0.00012550981946737026, "loss": 1.2746, "step": 28672 }, { "epoch": 0.3725925576998358, "grad_norm": 0.4670765995979309, "learning_rate": 0.00012550722000545888, "loss": 1.4332, "step": 28673 }, { "epoch": 0.37260555224375164, "grad_norm": 0.42125341296195984, "learning_rate": 0.0001255046205435475, "loss": 1.491, "step": 28674 }, { "epoch": 0.37261854678766754, "grad_norm": 0.36299535632133484, "learning_rate": 0.0001255020210816361, "loss": 1.3692, "step": 28675 }, { "epoch": 0.3726315413315834, "grad_norm": 0.43252214789390564, "learning_rate": 0.00012549942161972473, "loss": 1.2532, "step": 28676 }, { "epoch": 0.3726445358754993, "grad_norm": 0.4870414435863495, "learning_rate": 0.00012549682215781333, "loss": 1.3925, "step": 28677 }, { "epoch": 0.37265753041941513, "grad_norm": 0.32571282982826233, "learning_rate": 0.00012549422269590198, "loss": 1.2693, "step": 28678 }, { "epoch": 0.37267052496333103, "grad_norm": 0.39593273401260376, "learning_rate": 0.00012549162323399057, "loss": 1.2947, "step": 28679 }, { "epoch": 0.3726835195072469, "grad_norm": 0.37479060888290405, "learning_rate": 0.00012548902377207917, "loss": 1.1472, "step": 28680 }, { "epoch": 0.3726965140511628, "grad_norm": 0.41043952107429504, "learning_rate": 0.0001254864243101678, "loss": 1.3782, "step": 28681 }, { "epoch": 0.3727095085950786, "grad_norm": 0.39029228687286377, "learning_rate": 0.00012548382484825642, "loss": 1.2535, "step": 28682 }, { "epoch": 0.3727225031389945, "grad_norm": 0.4228711426258087, "learning_rate": 0.00012548122538634504, "loss": 1.2723, "step": 28683 }, { "epoch": 0.37273549768291037, "grad_norm": 0.3423106372356415, "learning_rate": 0.00012547862592443364, "loss": 1.4927, "step": 28684 }, { "epoch": 0.37274849222682627, "grad_norm": 0.36802372336387634, "learning_rate": 0.00012547602646252227, "loss": 1.4354, "step": 28685 }, { "epoch": 0.3727614867707421, "grad_norm": 0.42316174507141113, "learning_rate": 0.0001254734270006109, "loss": 1.5389, "step": 28686 }, { "epoch": 0.372774481314658, "grad_norm": 0.39688268303871155, "learning_rate": 0.0001254708275386995, "loss": 1.4057, "step": 28687 }, { "epoch": 0.37278747585857386, "grad_norm": 0.37469565868377686, "learning_rate": 0.0001254682280767881, "loss": 1.5899, "step": 28688 }, { "epoch": 0.37280047040248976, "grad_norm": 0.3846202492713928, "learning_rate": 0.0001254656286148767, "loss": 1.3641, "step": 28689 }, { "epoch": 0.3728134649464056, "grad_norm": 0.3596822917461395, "learning_rate": 0.00012546302915296536, "loss": 1.4972, "step": 28690 }, { "epoch": 0.3728264594903215, "grad_norm": 0.473092257976532, "learning_rate": 0.00012546042969105396, "loss": 1.3877, "step": 28691 }, { "epoch": 0.37283945403423735, "grad_norm": 0.41913488507270813, "learning_rate": 0.00012545783022914256, "loss": 1.3067, "step": 28692 }, { "epoch": 0.37285244857815325, "grad_norm": 0.42669397592544556, "learning_rate": 0.00012545523076723118, "loss": 1.3986, "step": 28693 }, { "epoch": 0.3728654431220691, "grad_norm": 0.31775355339050293, "learning_rate": 0.0001254526313053198, "loss": 1.5008, "step": 28694 }, { "epoch": 0.372878437665985, "grad_norm": 0.43441590666770935, "learning_rate": 0.00012545003184340843, "loss": 1.3251, "step": 28695 }, { "epoch": 0.37289143220990084, "grad_norm": 0.34615200757980347, "learning_rate": 0.00012544743238149703, "loss": 1.4758, "step": 28696 }, { "epoch": 0.37290442675381674, "grad_norm": 0.37057366967201233, "learning_rate": 0.00012544483291958565, "loss": 1.4136, "step": 28697 }, { "epoch": 0.3729174212977326, "grad_norm": 0.3675193786621094, "learning_rate": 0.00012544223345767428, "loss": 1.3833, "step": 28698 }, { "epoch": 0.3729304158416485, "grad_norm": 0.40878060460090637, "learning_rate": 0.00012543963399576287, "loss": 1.3567, "step": 28699 }, { "epoch": 0.37294341038556433, "grad_norm": 0.36005470156669617, "learning_rate": 0.0001254370345338515, "loss": 1.4398, "step": 28700 }, { "epoch": 0.37295640492948023, "grad_norm": 0.3990764617919922, "learning_rate": 0.0001254344350719401, "loss": 1.4644, "step": 28701 }, { "epoch": 0.37296939947339613, "grad_norm": 0.42862221598625183, "learning_rate": 0.00012543183561002875, "loss": 1.4198, "step": 28702 }, { "epoch": 0.372982394017312, "grad_norm": 0.4164692759513855, "learning_rate": 0.00012542923614811734, "loss": 1.2998, "step": 28703 }, { "epoch": 0.3729953885612279, "grad_norm": 0.43596258759498596, "learning_rate": 0.00012542663668620597, "loss": 1.299, "step": 28704 }, { "epoch": 0.3730083831051437, "grad_norm": 0.5348424315452576, "learning_rate": 0.00012542403722429457, "loss": 1.4191, "step": 28705 }, { "epoch": 0.3730213776490596, "grad_norm": 0.39547592401504517, "learning_rate": 0.0001254214377623832, "loss": 1.5583, "step": 28706 }, { "epoch": 0.37303437219297547, "grad_norm": 0.4528743624687195, "learning_rate": 0.00012541883830047182, "loss": 1.3959, "step": 28707 }, { "epoch": 0.3730473667368914, "grad_norm": 0.3225213587284088, "learning_rate": 0.0001254162388385604, "loss": 1.1863, "step": 28708 }, { "epoch": 0.3730603612808072, "grad_norm": 0.33787041902542114, "learning_rate": 0.00012541363937664904, "loss": 1.5928, "step": 28709 }, { "epoch": 0.3730733558247231, "grad_norm": 0.4740648567676544, "learning_rate": 0.00012541103991473766, "loss": 1.6671, "step": 28710 }, { "epoch": 0.37308635036863896, "grad_norm": 0.3897722363471985, "learning_rate": 0.00012540844045282626, "loss": 1.4861, "step": 28711 }, { "epoch": 0.37309934491255486, "grad_norm": 0.4286596477031708, "learning_rate": 0.00012540584099091488, "loss": 1.2548, "step": 28712 }, { "epoch": 0.3731123394564707, "grad_norm": 0.41074612736701965, "learning_rate": 0.0001254032415290035, "loss": 1.565, "step": 28713 }, { "epoch": 0.3731253340003866, "grad_norm": 0.33220306038856506, "learning_rate": 0.00012540064206709213, "loss": 1.3268, "step": 28714 }, { "epoch": 0.37313832854430246, "grad_norm": 0.4868376553058624, "learning_rate": 0.00012539804260518073, "loss": 1.3465, "step": 28715 }, { "epoch": 0.37315132308821836, "grad_norm": 0.31483379006385803, "learning_rate": 0.00012539544314326935, "loss": 1.4502, "step": 28716 }, { "epoch": 0.3731643176321342, "grad_norm": 0.37703099846839905, "learning_rate": 0.00012539284368135798, "loss": 1.3666, "step": 28717 }, { "epoch": 0.3731773121760501, "grad_norm": 0.3846674859523773, "learning_rate": 0.00012539024421944658, "loss": 1.4064, "step": 28718 }, { "epoch": 0.37319030671996595, "grad_norm": 0.3678659200668335, "learning_rate": 0.0001253876447575352, "loss": 1.323, "step": 28719 }, { "epoch": 0.37320330126388185, "grad_norm": 0.3345188498497009, "learning_rate": 0.0001253850452956238, "loss": 1.3551, "step": 28720 }, { "epoch": 0.3732162958077977, "grad_norm": 0.48107364773750305, "learning_rate": 0.00012538244583371242, "loss": 1.3925, "step": 28721 }, { "epoch": 0.3732292903517136, "grad_norm": 0.4644206166267395, "learning_rate": 0.00012537984637180105, "loss": 1.4438, "step": 28722 }, { "epoch": 0.37324228489562944, "grad_norm": 0.30679380893707275, "learning_rate": 0.00012537724690988964, "loss": 1.3272, "step": 28723 }, { "epoch": 0.37325527943954534, "grad_norm": 0.40435296297073364, "learning_rate": 0.00012537464744797827, "loss": 1.3818, "step": 28724 }, { "epoch": 0.3732682739834612, "grad_norm": 0.4277142286300659, "learning_rate": 0.0001253720479860669, "loss": 1.3127, "step": 28725 }, { "epoch": 0.3732812685273771, "grad_norm": 0.4050789475440979, "learning_rate": 0.00012536944852415552, "loss": 1.4671, "step": 28726 }, { "epoch": 0.37329426307129293, "grad_norm": 0.2995666265487671, "learning_rate": 0.00012536684906224412, "loss": 1.2414, "step": 28727 }, { "epoch": 0.37330725761520883, "grad_norm": 0.3702373802661896, "learning_rate": 0.00012536424960033274, "loss": 1.4577, "step": 28728 }, { "epoch": 0.3733202521591247, "grad_norm": 0.4661139249801636, "learning_rate": 0.00012536165013842136, "loss": 1.4977, "step": 28729 }, { "epoch": 0.3733332467030406, "grad_norm": 0.3852730989456177, "learning_rate": 0.00012535905067650996, "loss": 1.4309, "step": 28730 }, { "epoch": 0.3733462412469564, "grad_norm": 0.35303887724876404, "learning_rate": 0.0001253564512145986, "loss": 1.3882, "step": 28731 }, { "epoch": 0.3733592357908723, "grad_norm": 0.41061705350875854, "learning_rate": 0.00012535385175268718, "loss": 1.4584, "step": 28732 }, { "epoch": 0.37337223033478817, "grad_norm": 0.33713802695274353, "learning_rate": 0.00012535125229077584, "loss": 1.2744, "step": 28733 }, { "epoch": 0.37338522487870407, "grad_norm": 0.4064413905143738, "learning_rate": 0.00012534865282886443, "loss": 1.4572, "step": 28734 }, { "epoch": 0.3733982194226199, "grad_norm": 0.33813905715942383, "learning_rate": 0.00012534605336695303, "loss": 1.3166, "step": 28735 }, { "epoch": 0.3734112139665358, "grad_norm": 0.41231778264045715, "learning_rate": 0.00012534345390504165, "loss": 1.3165, "step": 28736 }, { "epoch": 0.37342420851045166, "grad_norm": 0.4169091284275055, "learning_rate": 0.00012534085444313028, "loss": 1.3911, "step": 28737 }, { "epoch": 0.37343720305436756, "grad_norm": 0.38153883814811707, "learning_rate": 0.0001253382549812189, "loss": 1.2459, "step": 28738 }, { "epoch": 0.3734501975982834, "grad_norm": 0.24761615693569183, "learning_rate": 0.0001253356555193075, "loss": 1.3432, "step": 28739 }, { "epoch": 0.3734631921421993, "grad_norm": 0.4171951115131378, "learning_rate": 0.00012533305605739613, "loss": 1.2287, "step": 28740 }, { "epoch": 0.37347618668611515, "grad_norm": 0.36548227071762085, "learning_rate": 0.00012533045659548475, "loss": 1.6066, "step": 28741 }, { "epoch": 0.37348918123003105, "grad_norm": 0.4641980230808258, "learning_rate": 0.00012532785713357335, "loss": 1.5063, "step": 28742 }, { "epoch": 0.3735021757739469, "grad_norm": 0.47401878237724304, "learning_rate": 0.00012532525767166197, "loss": 1.4163, "step": 28743 }, { "epoch": 0.3735151703178628, "grad_norm": 0.3747689127922058, "learning_rate": 0.0001253226582097506, "loss": 1.4072, "step": 28744 }, { "epoch": 0.37352816486177864, "grad_norm": 0.5015178918838501, "learning_rate": 0.00012532005874783922, "loss": 1.4538, "step": 28745 }, { "epoch": 0.37354115940569455, "grad_norm": 0.4278414845466614, "learning_rate": 0.00012531745928592782, "loss": 1.2608, "step": 28746 }, { "epoch": 0.3735541539496104, "grad_norm": 0.46396157145500183, "learning_rate": 0.00012531485982401642, "loss": 1.4209, "step": 28747 }, { "epoch": 0.3735671484935263, "grad_norm": 0.32740846276283264, "learning_rate": 0.00012531226036210507, "loss": 1.3874, "step": 28748 }, { "epoch": 0.37358014303744214, "grad_norm": 0.3755617141723633, "learning_rate": 0.00012530966090019366, "loss": 1.3691, "step": 28749 }, { "epoch": 0.37359313758135804, "grad_norm": 0.3294253945350647, "learning_rate": 0.0001253070614382823, "loss": 1.2616, "step": 28750 }, { "epoch": 0.3736061321252739, "grad_norm": 0.32209479808807373, "learning_rate": 0.0001253044619763709, "loss": 1.352, "step": 28751 }, { "epoch": 0.3736191266691898, "grad_norm": 0.34468215703964233, "learning_rate": 0.0001253018625144595, "loss": 1.2463, "step": 28752 }, { "epoch": 0.37363212121310563, "grad_norm": 0.4348553717136383, "learning_rate": 0.00012529926305254814, "loss": 1.3622, "step": 28753 }, { "epoch": 0.37364511575702153, "grad_norm": 0.4141204059123993, "learning_rate": 0.00012529666359063673, "loss": 1.3976, "step": 28754 }, { "epoch": 0.3736581103009374, "grad_norm": 0.4958474636077881, "learning_rate": 0.00012529406412872536, "loss": 1.4994, "step": 28755 }, { "epoch": 0.3736711048448533, "grad_norm": 0.404459148645401, "learning_rate": 0.00012529146466681398, "loss": 1.5054, "step": 28756 }, { "epoch": 0.3736840993887691, "grad_norm": 0.4204579293727875, "learning_rate": 0.0001252888652049026, "loss": 1.2679, "step": 28757 }, { "epoch": 0.373697093932685, "grad_norm": 0.33855143189430237, "learning_rate": 0.0001252862657429912, "loss": 1.0715, "step": 28758 }, { "epoch": 0.37371008847660087, "grad_norm": 0.3690027892589569, "learning_rate": 0.0001252836662810798, "loss": 1.2481, "step": 28759 }, { "epoch": 0.37372308302051677, "grad_norm": 0.36489149928092957, "learning_rate": 0.00012528106681916845, "loss": 1.3135, "step": 28760 }, { "epoch": 0.3737360775644326, "grad_norm": 0.4587075114250183, "learning_rate": 0.00012527846735725705, "loss": 1.4474, "step": 28761 }, { "epoch": 0.3737490721083485, "grad_norm": 0.4402141571044922, "learning_rate": 0.00012527586789534567, "loss": 1.5649, "step": 28762 }, { "epoch": 0.37376206665226436, "grad_norm": 0.4330047369003296, "learning_rate": 0.00012527326843343427, "loss": 1.3132, "step": 28763 }, { "epoch": 0.37377506119618026, "grad_norm": 0.3839380443096161, "learning_rate": 0.0001252706689715229, "loss": 1.3463, "step": 28764 }, { "epoch": 0.3737880557400961, "grad_norm": 0.3916539251804352, "learning_rate": 0.00012526806950961152, "loss": 1.4293, "step": 28765 }, { "epoch": 0.373801050284012, "grad_norm": 0.2990649938583374, "learning_rate": 0.00012526547004770012, "loss": 1.3049, "step": 28766 }, { "epoch": 0.37381404482792785, "grad_norm": 0.37046363949775696, "learning_rate": 0.00012526287058578874, "loss": 1.3364, "step": 28767 }, { "epoch": 0.37382703937184375, "grad_norm": 0.4471253454685211, "learning_rate": 0.00012526027112387737, "loss": 1.4361, "step": 28768 }, { "epoch": 0.3738400339157596, "grad_norm": 0.41291218996047974, "learning_rate": 0.000125257671661966, "loss": 1.3924, "step": 28769 }, { "epoch": 0.3738530284596755, "grad_norm": 0.3176535665988922, "learning_rate": 0.0001252550722000546, "loss": 1.267, "step": 28770 }, { "epoch": 0.37386602300359134, "grad_norm": 0.5047698020935059, "learning_rate": 0.0001252524727381432, "loss": 1.5015, "step": 28771 }, { "epoch": 0.37387901754750724, "grad_norm": 0.46696439385414124, "learning_rate": 0.00012524987327623184, "loss": 1.4474, "step": 28772 }, { "epoch": 0.3738920120914231, "grad_norm": 0.4528016149997711, "learning_rate": 0.00012524727381432044, "loss": 1.5101, "step": 28773 }, { "epoch": 0.373905006635339, "grad_norm": 0.41030579805374146, "learning_rate": 0.00012524467435240906, "loss": 1.2877, "step": 28774 }, { "epoch": 0.37391800117925483, "grad_norm": 0.4559067189693451, "learning_rate": 0.00012524207489049766, "loss": 1.4248, "step": 28775 }, { "epoch": 0.37393099572317073, "grad_norm": 0.4302278459072113, "learning_rate": 0.00012523947542858628, "loss": 1.4347, "step": 28776 }, { "epoch": 0.37394399026708663, "grad_norm": 0.41090983152389526, "learning_rate": 0.0001252368759666749, "loss": 1.3745, "step": 28777 }, { "epoch": 0.3739569848110025, "grad_norm": 0.35330748558044434, "learning_rate": 0.0001252342765047635, "loss": 1.3543, "step": 28778 }, { "epoch": 0.3739699793549184, "grad_norm": 0.6061524748802185, "learning_rate": 0.00012523167704285213, "loss": 1.4235, "step": 28779 }, { "epoch": 0.3739829738988342, "grad_norm": 0.3606860637664795, "learning_rate": 0.00012522907758094075, "loss": 1.3685, "step": 28780 }, { "epoch": 0.3739959684427501, "grad_norm": 0.4220470190048218, "learning_rate": 0.00012522647811902938, "loss": 1.2141, "step": 28781 }, { "epoch": 0.37400896298666597, "grad_norm": 0.44551536440849304, "learning_rate": 0.00012522387865711797, "loss": 1.4827, "step": 28782 }, { "epoch": 0.37402195753058187, "grad_norm": 0.36242103576660156, "learning_rate": 0.0001252212791952066, "loss": 1.3, "step": 28783 }, { "epoch": 0.3740349520744977, "grad_norm": 0.3974902033805847, "learning_rate": 0.00012521867973329522, "loss": 1.4835, "step": 28784 }, { "epoch": 0.3740479466184136, "grad_norm": 0.4516390562057495, "learning_rate": 0.00012521608027138382, "loss": 1.3384, "step": 28785 }, { "epoch": 0.37406094116232946, "grad_norm": 0.31487998366355896, "learning_rate": 0.00012521348080947245, "loss": 1.2772, "step": 28786 }, { "epoch": 0.37407393570624536, "grad_norm": 0.3444060981273651, "learning_rate": 0.00012521088134756107, "loss": 1.3413, "step": 28787 }, { "epoch": 0.3740869302501612, "grad_norm": 0.34494084119796753, "learning_rate": 0.0001252082818856497, "loss": 1.5097, "step": 28788 }, { "epoch": 0.3740999247940771, "grad_norm": 0.3277198374271393, "learning_rate": 0.0001252056824237383, "loss": 1.3112, "step": 28789 }, { "epoch": 0.37411291933799296, "grad_norm": 0.4672078788280487, "learning_rate": 0.0001252030829618269, "loss": 1.3987, "step": 28790 }, { "epoch": 0.37412591388190886, "grad_norm": 0.44889727234840393, "learning_rate": 0.00012520048349991554, "loss": 1.3671, "step": 28791 }, { "epoch": 0.3741389084258247, "grad_norm": 0.29564836621284485, "learning_rate": 0.00012519788403800414, "loss": 1.3785, "step": 28792 }, { "epoch": 0.3741519029697406, "grad_norm": 0.5507976412773132, "learning_rate": 0.00012519528457609276, "loss": 1.2517, "step": 28793 }, { "epoch": 0.37416489751365645, "grad_norm": 0.412579745054245, "learning_rate": 0.00012519268511418136, "loss": 1.3978, "step": 28794 }, { "epoch": 0.37417789205757235, "grad_norm": 0.3830762803554535, "learning_rate": 0.00012519008565226998, "loss": 1.2006, "step": 28795 }, { "epoch": 0.3741908866014882, "grad_norm": 0.28367650508880615, "learning_rate": 0.0001251874861903586, "loss": 1.1969, "step": 28796 }, { "epoch": 0.3742038811454041, "grad_norm": 0.4791542887687683, "learning_rate": 0.0001251848867284472, "loss": 1.4949, "step": 28797 }, { "epoch": 0.37421687568931994, "grad_norm": 0.3939990699291229, "learning_rate": 0.00012518228726653583, "loss": 1.6424, "step": 28798 }, { "epoch": 0.37422987023323584, "grad_norm": 0.34524670243263245, "learning_rate": 0.00012517968780462446, "loss": 1.341, "step": 28799 }, { "epoch": 0.3742428647771517, "grad_norm": 0.37180066108703613, "learning_rate": 0.00012517708834271308, "loss": 1.3833, "step": 28800 }, { "epoch": 0.3742558593210676, "grad_norm": 0.4311344623565674, "learning_rate": 0.00012517448888080168, "loss": 1.4499, "step": 28801 }, { "epoch": 0.37426885386498343, "grad_norm": 0.32370734214782715, "learning_rate": 0.00012517188941889027, "loss": 1.3205, "step": 28802 }, { "epoch": 0.37428184840889933, "grad_norm": 0.3162273168563843, "learning_rate": 0.00012516928995697893, "loss": 1.3224, "step": 28803 }, { "epoch": 0.3742948429528152, "grad_norm": 0.3170686960220337, "learning_rate": 0.00012516669049506752, "loss": 1.2121, "step": 28804 }, { "epoch": 0.3743078374967311, "grad_norm": 0.37588006258010864, "learning_rate": 0.00012516409103315615, "loss": 1.401, "step": 28805 }, { "epoch": 0.3743208320406469, "grad_norm": 0.5735766291618347, "learning_rate": 0.00012516149157124475, "loss": 1.4528, "step": 28806 }, { "epoch": 0.3743338265845628, "grad_norm": 0.40847253799438477, "learning_rate": 0.00012515889210933337, "loss": 1.2933, "step": 28807 }, { "epoch": 0.37434682112847867, "grad_norm": 0.4204402267932892, "learning_rate": 0.000125156292647422, "loss": 1.4996, "step": 28808 }, { "epoch": 0.37435981567239457, "grad_norm": 0.3809019923210144, "learning_rate": 0.0001251536931855106, "loss": 1.449, "step": 28809 }, { "epoch": 0.3743728102163104, "grad_norm": 0.40501073002815247, "learning_rate": 0.00012515109372359922, "loss": 1.3807, "step": 28810 }, { "epoch": 0.3743858047602263, "grad_norm": 0.45273464918136597, "learning_rate": 0.00012514849426168784, "loss": 1.3613, "step": 28811 }, { "epoch": 0.37439879930414216, "grad_norm": 0.4391747713088989, "learning_rate": 0.00012514589479977647, "loss": 1.4603, "step": 28812 }, { "epoch": 0.37441179384805806, "grad_norm": 0.3503970503807068, "learning_rate": 0.00012514329533786506, "loss": 1.2576, "step": 28813 }, { "epoch": 0.3744247883919739, "grad_norm": 0.25196388363838196, "learning_rate": 0.00012514069587595366, "loss": 1.4823, "step": 28814 }, { "epoch": 0.3744377829358898, "grad_norm": 0.5326829552650452, "learning_rate": 0.0001251380964140423, "loss": 1.4918, "step": 28815 }, { "epoch": 0.37445077747980565, "grad_norm": 0.47540566325187683, "learning_rate": 0.0001251354969521309, "loss": 1.422, "step": 28816 }, { "epoch": 0.37446377202372155, "grad_norm": 0.4210006594657898, "learning_rate": 0.00012513289749021953, "loss": 1.5774, "step": 28817 }, { "epoch": 0.3744767665676374, "grad_norm": 0.42402294278144836, "learning_rate": 0.00012513029802830816, "loss": 1.416, "step": 28818 }, { "epoch": 0.3744897611115533, "grad_norm": 0.3172576129436493, "learning_rate": 0.00012512769856639676, "loss": 1.5137, "step": 28819 }, { "epoch": 0.37450275565546914, "grad_norm": 0.4440973401069641, "learning_rate": 0.00012512509910448538, "loss": 1.3478, "step": 28820 }, { "epoch": 0.37451575019938504, "grad_norm": 0.4081081748008728, "learning_rate": 0.00012512249964257398, "loss": 1.3932, "step": 28821 }, { "epoch": 0.3745287447433009, "grad_norm": 0.4473981261253357, "learning_rate": 0.00012511990018066263, "loss": 1.3888, "step": 28822 }, { "epoch": 0.3745417392872168, "grad_norm": 0.38845670223236084, "learning_rate": 0.00012511730071875123, "loss": 1.2379, "step": 28823 }, { "epoch": 0.37455473383113264, "grad_norm": 0.45279181003570557, "learning_rate": 0.00012511470125683985, "loss": 1.4901, "step": 28824 }, { "epoch": 0.37456772837504854, "grad_norm": 0.30662423372268677, "learning_rate": 0.00012511210179492845, "loss": 1.1286, "step": 28825 }, { "epoch": 0.3745807229189644, "grad_norm": 0.4232569634914398, "learning_rate": 0.00012510950233301707, "loss": 1.2701, "step": 28826 }, { "epoch": 0.3745937174628803, "grad_norm": 0.3922737240791321, "learning_rate": 0.0001251069028711057, "loss": 1.3488, "step": 28827 }, { "epoch": 0.3746067120067961, "grad_norm": 0.34042033553123474, "learning_rate": 0.0001251043034091943, "loss": 1.4408, "step": 28828 }, { "epoch": 0.37461970655071203, "grad_norm": 0.321860671043396, "learning_rate": 0.00012510170394728292, "loss": 1.332, "step": 28829 }, { "epoch": 0.3746327010946279, "grad_norm": 0.38447946310043335, "learning_rate": 0.00012509910448537154, "loss": 1.4279, "step": 28830 }, { "epoch": 0.3746456956385438, "grad_norm": 0.36960867047309875, "learning_rate": 0.00012509650502346014, "loss": 1.3425, "step": 28831 }, { "epoch": 0.3746586901824596, "grad_norm": 0.4848098158836365, "learning_rate": 0.00012509390556154876, "loss": 1.3095, "step": 28832 }, { "epoch": 0.3746716847263755, "grad_norm": 0.45199307799339294, "learning_rate": 0.00012509130609963736, "loss": 1.6055, "step": 28833 }, { "epoch": 0.37468467927029137, "grad_norm": 0.3760932385921478, "learning_rate": 0.00012508870663772601, "loss": 1.3039, "step": 28834 }, { "epoch": 0.37469767381420727, "grad_norm": 0.4585811495780945, "learning_rate": 0.0001250861071758146, "loss": 1.5426, "step": 28835 }, { "epoch": 0.3747106683581231, "grad_norm": 0.39848774671554565, "learning_rate": 0.00012508350771390324, "loss": 1.3304, "step": 28836 }, { "epoch": 0.374723662902039, "grad_norm": 0.47406378388404846, "learning_rate": 0.00012508090825199183, "loss": 1.4261, "step": 28837 }, { "epoch": 0.37473665744595486, "grad_norm": 0.24699153006076813, "learning_rate": 0.00012507830879008046, "loss": 1.1634, "step": 28838 }, { "epoch": 0.37474965198987076, "grad_norm": 0.3858836889266968, "learning_rate": 0.00012507570932816908, "loss": 1.4059, "step": 28839 }, { "epoch": 0.3747626465337866, "grad_norm": 0.4618825614452362, "learning_rate": 0.00012507310986625768, "loss": 1.3787, "step": 28840 }, { "epoch": 0.3747756410777025, "grad_norm": 0.38702648878097534, "learning_rate": 0.0001250705104043463, "loss": 1.4838, "step": 28841 }, { "epoch": 0.37478863562161835, "grad_norm": 0.422344446182251, "learning_rate": 0.00012506791094243493, "loss": 1.5091, "step": 28842 }, { "epoch": 0.37480163016553425, "grad_norm": 0.529836893081665, "learning_rate": 0.00012506531148052353, "loss": 1.3878, "step": 28843 }, { "epoch": 0.3748146247094501, "grad_norm": 0.31421998143196106, "learning_rate": 0.00012506271201861215, "loss": 1.4457, "step": 28844 }, { "epoch": 0.374827619253366, "grad_norm": 0.4187467694282532, "learning_rate": 0.00012506011255670075, "loss": 1.5248, "step": 28845 }, { "epoch": 0.37484061379728184, "grad_norm": 0.4240720868110657, "learning_rate": 0.0001250575130947894, "loss": 1.381, "step": 28846 }, { "epoch": 0.37485360834119774, "grad_norm": 0.46227872371673584, "learning_rate": 0.000125054913632878, "loss": 1.5045, "step": 28847 }, { "epoch": 0.3748666028851136, "grad_norm": 0.44956791400909424, "learning_rate": 0.00012505231417096662, "loss": 1.3974, "step": 28848 }, { "epoch": 0.3748795974290295, "grad_norm": 0.3727710247039795, "learning_rate": 0.00012504971470905522, "loss": 1.3807, "step": 28849 }, { "epoch": 0.37489259197294533, "grad_norm": 0.3538811504840851, "learning_rate": 0.00012504711524714384, "loss": 1.3123, "step": 28850 }, { "epoch": 0.37490558651686123, "grad_norm": 0.3979772925376892, "learning_rate": 0.00012504451578523247, "loss": 1.4317, "step": 28851 }, { "epoch": 0.3749185810607771, "grad_norm": 0.34732866287231445, "learning_rate": 0.00012504191632332106, "loss": 1.5715, "step": 28852 }, { "epoch": 0.374931575604693, "grad_norm": 0.4288010895252228, "learning_rate": 0.0001250393168614097, "loss": 1.4755, "step": 28853 }, { "epoch": 0.3749445701486089, "grad_norm": 0.3757539391517639, "learning_rate": 0.00012503671739949831, "loss": 1.466, "step": 28854 }, { "epoch": 0.3749575646925247, "grad_norm": 0.46227288246154785, "learning_rate": 0.00012503411793758694, "loss": 1.3452, "step": 28855 }, { "epoch": 0.3749705592364406, "grad_norm": 0.25591906905174255, "learning_rate": 0.00012503151847567554, "loss": 1.1903, "step": 28856 }, { "epoch": 0.37498355378035647, "grad_norm": 0.4343232214450836, "learning_rate": 0.00012502891901376416, "loss": 1.4139, "step": 28857 }, { "epoch": 0.37499654832427237, "grad_norm": 0.43891939520835876, "learning_rate": 0.00012502631955185278, "loss": 1.3417, "step": 28858 }, { "epoch": 0.3750095428681882, "grad_norm": 0.37385502457618713, "learning_rate": 0.00012502372008994138, "loss": 1.3168, "step": 28859 }, { "epoch": 0.3750225374121041, "grad_norm": 0.3902006447315216, "learning_rate": 0.00012502112062803, "loss": 1.5619, "step": 28860 }, { "epoch": 0.37503553195601996, "grad_norm": 0.38616517186164856, "learning_rate": 0.00012501852116611863, "loss": 1.5015, "step": 28861 }, { "epoch": 0.37504852649993586, "grad_norm": 0.24514099955558777, "learning_rate": 0.00012501592170420723, "loss": 1.3169, "step": 28862 }, { "epoch": 0.3750615210438517, "grad_norm": 0.40264204144477844, "learning_rate": 0.00012501332224229585, "loss": 1.4906, "step": 28863 }, { "epoch": 0.3750745155877676, "grad_norm": 0.36614781618118286, "learning_rate": 0.00012501072278038445, "loss": 1.3317, "step": 28864 }, { "epoch": 0.37508751013168345, "grad_norm": 0.39818108081817627, "learning_rate": 0.0001250081233184731, "loss": 1.4702, "step": 28865 }, { "epoch": 0.37510050467559936, "grad_norm": 0.405103862285614, "learning_rate": 0.0001250055238565617, "loss": 1.4284, "step": 28866 }, { "epoch": 0.3751134992195152, "grad_norm": 0.46252959966659546, "learning_rate": 0.00012500292439465032, "loss": 1.43, "step": 28867 }, { "epoch": 0.3751264937634311, "grad_norm": 0.41500088572502136, "learning_rate": 0.00012500032493273892, "loss": 1.4114, "step": 28868 }, { "epoch": 0.37513948830734695, "grad_norm": 0.4190671443939209, "learning_rate": 0.00012499772547082755, "loss": 1.4935, "step": 28869 }, { "epoch": 0.37515248285126285, "grad_norm": 0.36047592759132385, "learning_rate": 0.00012499512600891617, "loss": 1.2584, "step": 28870 }, { "epoch": 0.3751654773951787, "grad_norm": 0.33831408619880676, "learning_rate": 0.00012499252654700477, "loss": 1.4996, "step": 28871 }, { "epoch": 0.3751784719390946, "grad_norm": 0.41569969058036804, "learning_rate": 0.0001249899270850934, "loss": 1.4035, "step": 28872 }, { "epoch": 0.37519146648301044, "grad_norm": 0.38575318455696106, "learning_rate": 0.00012498732762318202, "loss": 1.5862, "step": 28873 }, { "epoch": 0.37520446102692634, "grad_norm": 0.38970300555229187, "learning_rate": 0.00012498472816127061, "loss": 1.3781, "step": 28874 }, { "epoch": 0.3752174555708422, "grad_norm": 0.4353445768356323, "learning_rate": 0.00012498212869935924, "loss": 1.3112, "step": 28875 }, { "epoch": 0.3752304501147581, "grad_norm": 0.7563292384147644, "learning_rate": 0.00012497952923744784, "loss": 1.301, "step": 28876 }, { "epoch": 0.37524344465867393, "grad_norm": 0.39830726385116577, "learning_rate": 0.0001249769297755365, "loss": 1.4271, "step": 28877 }, { "epoch": 0.37525643920258983, "grad_norm": 0.3310627341270447, "learning_rate": 0.00012497433031362508, "loss": 1.3393, "step": 28878 }, { "epoch": 0.3752694337465057, "grad_norm": 0.4384199380874634, "learning_rate": 0.0001249717308517137, "loss": 1.5294, "step": 28879 }, { "epoch": 0.3752824282904216, "grad_norm": 0.45933350920677185, "learning_rate": 0.0001249691313898023, "loss": 1.3951, "step": 28880 }, { "epoch": 0.3752954228343374, "grad_norm": 0.44464603066444397, "learning_rate": 0.00012496653192789093, "loss": 1.4112, "step": 28881 }, { "epoch": 0.3753084173782533, "grad_norm": 0.2777060568332672, "learning_rate": 0.00012496393246597956, "loss": 1.3047, "step": 28882 }, { "epoch": 0.37532141192216917, "grad_norm": 0.36590129137039185, "learning_rate": 0.00012496133300406815, "loss": 1.245, "step": 28883 }, { "epoch": 0.37533440646608507, "grad_norm": 0.44382137060165405, "learning_rate": 0.00012495873354215678, "loss": 1.367, "step": 28884 }, { "epoch": 0.3753474010100009, "grad_norm": 0.48704805970191956, "learning_rate": 0.0001249561340802454, "loss": 1.5727, "step": 28885 }, { "epoch": 0.3753603955539168, "grad_norm": 0.33053481578826904, "learning_rate": 0.000124953534618334, "loss": 1.4756, "step": 28886 }, { "epoch": 0.37537339009783266, "grad_norm": 0.4279806315898895, "learning_rate": 0.00012495093515642262, "loss": 1.573, "step": 28887 }, { "epoch": 0.37538638464174856, "grad_norm": 0.429724782705307, "learning_rate": 0.00012494833569451122, "loss": 1.5761, "step": 28888 }, { "epoch": 0.3753993791856644, "grad_norm": 0.43826866149902344, "learning_rate": 0.00012494573623259987, "loss": 1.3706, "step": 28889 }, { "epoch": 0.3754123737295803, "grad_norm": 0.4755299687385559, "learning_rate": 0.00012494313677068847, "loss": 1.4164, "step": 28890 }, { "epoch": 0.37542536827349615, "grad_norm": 0.40580055117607117, "learning_rate": 0.0001249405373087771, "loss": 1.3211, "step": 28891 }, { "epoch": 0.37543836281741205, "grad_norm": 0.38015103340148926, "learning_rate": 0.00012493793784686572, "loss": 1.4253, "step": 28892 }, { "epoch": 0.3754513573613279, "grad_norm": 0.4138756990432739, "learning_rate": 0.00012493533838495432, "loss": 1.1728, "step": 28893 }, { "epoch": 0.3754643519052438, "grad_norm": 0.41246840357780457, "learning_rate": 0.00012493273892304294, "loss": 1.5317, "step": 28894 }, { "epoch": 0.37547734644915964, "grad_norm": 0.34073248505592346, "learning_rate": 0.00012493013946113154, "loss": 1.5484, "step": 28895 }, { "epoch": 0.37549034099307554, "grad_norm": 0.4287576377391815, "learning_rate": 0.0001249275399992202, "loss": 1.3092, "step": 28896 }, { "epoch": 0.3755033355369914, "grad_norm": 0.38764679431915283, "learning_rate": 0.0001249249405373088, "loss": 1.2772, "step": 28897 }, { "epoch": 0.3755163300809073, "grad_norm": 0.4042341709136963, "learning_rate": 0.00012492234107539738, "loss": 1.3907, "step": 28898 }, { "epoch": 0.37552932462482314, "grad_norm": 0.4195457398891449, "learning_rate": 0.000124919741613486, "loss": 1.3287, "step": 28899 }, { "epoch": 0.37554231916873904, "grad_norm": 0.31598949432373047, "learning_rate": 0.00012491714215157463, "loss": 1.3299, "step": 28900 }, { "epoch": 0.3755553137126549, "grad_norm": 0.348940908908844, "learning_rate": 0.00012491454268966326, "loss": 1.6232, "step": 28901 }, { "epoch": 0.3755683082565708, "grad_norm": 0.35178977251052856, "learning_rate": 0.00012491194322775186, "loss": 1.3992, "step": 28902 }, { "epoch": 0.3755813028004866, "grad_norm": 0.3172970712184906, "learning_rate": 0.00012490934376584048, "loss": 1.2761, "step": 28903 }, { "epoch": 0.37559429734440253, "grad_norm": 0.36124634742736816, "learning_rate": 0.0001249067443039291, "loss": 1.1986, "step": 28904 }, { "epoch": 0.3756072918883184, "grad_norm": 0.4207603335380554, "learning_rate": 0.0001249041448420177, "loss": 1.5873, "step": 28905 }, { "epoch": 0.3756202864322343, "grad_norm": 0.36357760429382324, "learning_rate": 0.00012490154538010633, "loss": 1.3592, "step": 28906 }, { "epoch": 0.3756332809761501, "grad_norm": 0.4150446355342865, "learning_rate": 0.00012489894591819492, "loss": 1.4162, "step": 28907 }, { "epoch": 0.375646275520066, "grad_norm": 0.40322908759117126, "learning_rate": 0.00012489634645628358, "loss": 1.5258, "step": 28908 }, { "epoch": 0.37565927006398186, "grad_norm": 0.4171421229839325, "learning_rate": 0.00012489374699437217, "loss": 1.5131, "step": 28909 }, { "epoch": 0.37567226460789777, "grad_norm": 0.3973075747489929, "learning_rate": 0.0001248911475324608, "loss": 1.3498, "step": 28910 }, { "epoch": 0.3756852591518136, "grad_norm": 0.3319651782512665, "learning_rate": 0.0001248885480705494, "loss": 1.3227, "step": 28911 }, { "epoch": 0.3756982536957295, "grad_norm": 0.4336509108543396, "learning_rate": 0.00012488594860863802, "loss": 1.2831, "step": 28912 }, { "epoch": 0.37571124823964536, "grad_norm": 0.3868233561515808, "learning_rate": 0.00012488334914672664, "loss": 1.3754, "step": 28913 }, { "epoch": 0.37572424278356126, "grad_norm": 0.40609779953956604, "learning_rate": 0.00012488074968481524, "loss": 1.2653, "step": 28914 }, { "epoch": 0.3757372373274771, "grad_norm": 0.4330745339393616, "learning_rate": 0.00012487815022290387, "loss": 1.4787, "step": 28915 }, { "epoch": 0.375750231871393, "grad_norm": 0.4194702208042145, "learning_rate": 0.0001248755507609925, "loss": 1.4082, "step": 28916 }, { "epoch": 0.37576322641530885, "grad_norm": 0.4286632239818573, "learning_rate": 0.0001248729512990811, "loss": 1.5517, "step": 28917 }, { "epoch": 0.37577622095922475, "grad_norm": 0.3197442591190338, "learning_rate": 0.0001248703518371697, "loss": 1.4543, "step": 28918 }, { "epoch": 0.3757892155031406, "grad_norm": 0.4218446612358093, "learning_rate": 0.0001248677523752583, "loss": 1.2784, "step": 28919 }, { "epoch": 0.3758022100470565, "grad_norm": 0.3768215477466583, "learning_rate": 0.00012486515291334696, "loss": 1.4439, "step": 28920 }, { "epoch": 0.37581520459097234, "grad_norm": 0.2715786397457123, "learning_rate": 0.00012486255345143556, "loss": 1.59, "step": 28921 }, { "epoch": 0.37582819913488824, "grad_norm": 0.3572438955307007, "learning_rate": 0.00012485995398952418, "loss": 1.3555, "step": 28922 }, { "epoch": 0.3758411936788041, "grad_norm": 0.4257218539714813, "learning_rate": 0.00012485735452761278, "loss": 1.5358, "step": 28923 }, { "epoch": 0.37585418822272, "grad_norm": 0.5142207145690918, "learning_rate": 0.0001248547550657014, "loss": 1.3167, "step": 28924 }, { "epoch": 0.37586718276663583, "grad_norm": 0.3964592218399048, "learning_rate": 0.00012485215560379003, "loss": 1.3761, "step": 28925 }, { "epoch": 0.37588017731055173, "grad_norm": 0.3714204728603363, "learning_rate": 0.00012484955614187863, "loss": 1.509, "step": 28926 }, { "epoch": 0.3758931718544676, "grad_norm": 0.40894579887390137, "learning_rate": 0.00012484695667996725, "loss": 1.4759, "step": 28927 }, { "epoch": 0.3759061663983835, "grad_norm": 0.5266218781471252, "learning_rate": 0.00012484435721805588, "loss": 1.4613, "step": 28928 }, { "epoch": 0.3759191609422994, "grad_norm": 0.41685837507247925, "learning_rate": 0.00012484175775614447, "loss": 1.363, "step": 28929 }, { "epoch": 0.3759321554862152, "grad_norm": 0.41698765754699707, "learning_rate": 0.0001248391582942331, "loss": 1.5562, "step": 28930 }, { "epoch": 0.3759451500301311, "grad_norm": 0.3285476565361023, "learning_rate": 0.00012483655883232172, "loss": 1.4279, "step": 28931 }, { "epoch": 0.37595814457404697, "grad_norm": 0.40252307057380676, "learning_rate": 0.00012483395937041035, "loss": 1.2857, "step": 28932 }, { "epoch": 0.37597113911796287, "grad_norm": 0.4399270713329315, "learning_rate": 0.00012483135990849894, "loss": 1.2879, "step": 28933 }, { "epoch": 0.3759841336618787, "grad_norm": 0.4111799895763397, "learning_rate": 0.00012482876044658757, "loss": 1.4794, "step": 28934 }, { "epoch": 0.3759971282057946, "grad_norm": 0.4470709562301636, "learning_rate": 0.0001248261609846762, "loss": 1.5317, "step": 28935 }, { "epoch": 0.37601012274971046, "grad_norm": 0.5309771299362183, "learning_rate": 0.0001248235615227648, "loss": 1.4524, "step": 28936 }, { "epoch": 0.37602311729362636, "grad_norm": 0.33288291096687317, "learning_rate": 0.00012482096206085341, "loss": 1.2423, "step": 28937 }, { "epoch": 0.3760361118375422, "grad_norm": 0.36665570735931396, "learning_rate": 0.000124818362598942, "loss": 1.4693, "step": 28938 }, { "epoch": 0.3760491063814581, "grad_norm": 0.4501255750656128, "learning_rate": 0.00012481576313703066, "loss": 1.267, "step": 28939 }, { "epoch": 0.37606210092537395, "grad_norm": 0.3907860517501831, "learning_rate": 0.00012481316367511926, "loss": 1.3128, "step": 28940 }, { "epoch": 0.37607509546928986, "grad_norm": 0.35737356543540955, "learning_rate": 0.00012481056421320786, "loss": 1.3525, "step": 28941 }, { "epoch": 0.3760880900132057, "grad_norm": 0.48332130908966064, "learning_rate": 0.00012480796475129648, "loss": 1.5365, "step": 28942 }, { "epoch": 0.3761010845571216, "grad_norm": 0.3659692704677582, "learning_rate": 0.0001248053652893851, "loss": 1.2856, "step": 28943 }, { "epoch": 0.37611407910103745, "grad_norm": 0.4015612304210663, "learning_rate": 0.00012480276582747373, "loss": 1.2625, "step": 28944 }, { "epoch": 0.37612707364495335, "grad_norm": 0.40080517530441284, "learning_rate": 0.00012480016636556233, "loss": 1.2334, "step": 28945 }, { "epoch": 0.3761400681888692, "grad_norm": 0.4071347713470459, "learning_rate": 0.00012479756690365095, "loss": 1.3845, "step": 28946 }, { "epoch": 0.3761530627327851, "grad_norm": 0.42958763241767883, "learning_rate": 0.00012479496744173958, "loss": 1.4909, "step": 28947 }, { "epoch": 0.37616605727670094, "grad_norm": 0.39240241050720215, "learning_rate": 0.00012479236797982818, "loss": 1.2925, "step": 28948 }, { "epoch": 0.37617905182061684, "grad_norm": 0.3925537168979645, "learning_rate": 0.0001247897685179168, "loss": 1.413, "step": 28949 }, { "epoch": 0.3761920463645327, "grad_norm": 0.4498641788959503, "learning_rate": 0.0001247871690560054, "loss": 1.248, "step": 28950 }, { "epoch": 0.3762050409084486, "grad_norm": 0.4823785424232483, "learning_rate": 0.00012478456959409405, "loss": 1.5332, "step": 28951 }, { "epoch": 0.37621803545236443, "grad_norm": 0.41888949275016785, "learning_rate": 0.00012478197013218265, "loss": 1.4827, "step": 28952 }, { "epoch": 0.37623102999628033, "grad_norm": 0.43660542368888855, "learning_rate": 0.00012477937067027124, "loss": 1.3882, "step": 28953 }, { "epoch": 0.3762440245401962, "grad_norm": 0.33248305320739746, "learning_rate": 0.00012477677120835987, "loss": 1.3288, "step": 28954 }, { "epoch": 0.3762570190841121, "grad_norm": 0.3973116874694824, "learning_rate": 0.0001247741717464485, "loss": 1.3128, "step": 28955 }, { "epoch": 0.3762700136280279, "grad_norm": 0.42672908306121826, "learning_rate": 0.00012477157228453712, "loss": 1.5794, "step": 28956 }, { "epoch": 0.3762830081719438, "grad_norm": 0.32531246542930603, "learning_rate": 0.00012476897282262571, "loss": 1.0845, "step": 28957 }, { "epoch": 0.37629600271585967, "grad_norm": 0.26367253065109253, "learning_rate": 0.00012476637336071434, "loss": 1.2971, "step": 28958 }, { "epoch": 0.37630899725977557, "grad_norm": 0.5099393725395203, "learning_rate": 0.00012476377389880296, "loss": 1.408, "step": 28959 }, { "epoch": 0.3763219918036914, "grad_norm": 0.3670794665813446, "learning_rate": 0.00012476117443689156, "loss": 1.4446, "step": 28960 }, { "epoch": 0.3763349863476073, "grad_norm": 0.47548606991767883, "learning_rate": 0.00012475857497498019, "loss": 1.4179, "step": 28961 }, { "epoch": 0.37634798089152316, "grad_norm": 0.3915907144546509, "learning_rate": 0.00012475597551306878, "loss": 1.4244, "step": 28962 }, { "epoch": 0.37636097543543906, "grad_norm": 0.3869493007659912, "learning_rate": 0.00012475337605115743, "loss": 1.468, "step": 28963 }, { "epoch": 0.3763739699793549, "grad_norm": 0.43332675099372864, "learning_rate": 0.00012475077658924603, "loss": 1.4047, "step": 28964 }, { "epoch": 0.3763869645232708, "grad_norm": 0.4835144281387329, "learning_rate": 0.00012474817712733463, "loss": 1.4065, "step": 28965 }, { "epoch": 0.37639995906718665, "grad_norm": 0.26890265941619873, "learning_rate": 0.00012474557766542328, "loss": 1.2324, "step": 28966 }, { "epoch": 0.37641295361110255, "grad_norm": 0.3907824754714966, "learning_rate": 0.00012474297820351188, "loss": 1.4259, "step": 28967 }, { "epoch": 0.3764259481550184, "grad_norm": 0.4691123962402344, "learning_rate": 0.0001247403787416005, "loss": 1.2552, "step": 28968 }, { "epoch": 0.3764389426989343, "grad_norm": 0.43298959732055664, "learning_rate": 0.0001247377792796891, "loss": 1.4952, "step": 28969 }, { "epoch": 0.37645193724285014, "grad_norm": 0.46592846512794495, "learning_rate": 0.00012473517981777772, "loss": 1.5468, "step": 28970 }, { "epoch": 0.37646493178676604, "grad_norm": 0.4093274474143982, "learning_rate": 0.00012473258035586635, "loss": 1.4561, "step": 28971 }, { "epoch": 0.3764779263306819, "grad_norm": 0.43703800439834595, "learning_rate": 0.00012472998089395495, "loss": 1.4916, "step": 28972 }, { "epoch": 0.3764909208745978, "grad_norm": 0.30699682235717773, "learning_rate": 0.00012472738143204357, "loss": 1.2114, "step": 28973 }, { "epoch": 0.37650391541851363, "grad_norm": 0.3838783800601959, "learning_rate": 0.0001247247819701322, "loss": 1.4807, "step": 28974 }, { "epoch": 0.37651690996242954, "grad_norm": 0.31833335757255554, "learning_rate": 0.00012472218250822082, "loss": 1.2898, "step": 28975 }, { "epoch": 0.3765299045063454, "grad_norm": 0.4873744249343872, "learning_rate": 0.00012471958304630942, "loss": 1.3817, "step": 28976 }, { "epoch": 0.3765428990502613, "grad_norm": 0.3133584260940552, "learning_rate": 0.00012471698358439804, "loss": 1.3875, "step": 28977 }, { "epoch": 0.3765558935941771, "grad_norm": 0.37226349115371704, "learning_rate": 0.00012471438412248667, "loss": 1.362, "step": 28978 }, { "epoch": 0.376568888138093, "grad_norm": 0.4297705888748169, "learning_rate": 0.00012471178466057526, "loss": 1.3149, "step": 28979 }, { "epoch": 0.3765818826820089, "grad_norm": 0.4352920949459076, "learning_rate": 0.0001247091851986639, "loss": 1.3512, "step": 28980 }, { "epoch": 0.3765948772259248, "grad_norm": 0.4327623248100281, "learning_rate": 0.00012470658573675248, "loss": 1.468, "step": 28981 }, { "epoch": 0.3766078717698406, "grad_norm": 0.32561036944389343, "learning_rate": 0.0001247039862748411, "loss": 1.2933, "step": 28982 }, { "epoch": 0.3766208663137565, "grad_norm": 0.44417065382003784, "learning_rate": 0.00012470138681292973, "loss": 1.2152, "step": 28983 }, { "epoch": 0.37663386085767236, "grad_norm": 0.4013122618198395, "learning_rate": 0.00012469878735101833, "loss": 1.5456, "step": 28984 }, { "epoch": 0.37664685540158827, "grad_norm": 0.4067772924900055, "learning_rate": 0.00012469618788910696, "loss": 1.3099, "step": 28985 }, { "epoch": 0.3766598499455041, "grad_norm": 0.5173984169960022, "learning_rate": 0.00012469358842719558, "loss": 1.5832, "step": 28986 }, { "epoch": 0.37667284448942, "grad_norm": 0.41349759697914124, "learning_rate": 0.0001246909889652842, "loss": 1.5248, "step": 28987 }, { "epoch": 0.37668583903333586, "grad_norm": 0.416258841753006, "learning_rate": 0.0001246883895033728, "loss": 1.5054, "step": 28988 }, { "epoch": 0.37669883357725176, "grad_norm": 0.42920196056365967, "learning_rate": 0.00012468579004146143, "loss": 1.4458, "step": 28989 }, { "epoch": 0.3767118281211676, "grad_norm": 0.3957315683364868, "learning_rate": 0.00012468319057955005, "loss": 1.2049, "step": 28990 }, { "epoch": 0.3767248226650835, "grad_norm": 0.4205652177333832, "learning_rate": 0.00012468059111763865, "loss": 1.2859, "step": 28991 }, { "epoch": 0.37673781720899935, "grad_norm": 0.3964245021343231, "learning_rate": 0.00012467799165572727, "loss": 1.5463, "step": 28992 }, { "epoch": 0.37675081175291525, "grad_norm": 0.3582054376602173, "learning_rate": 0.00012467539219381587, "loss": 1.2587, "step": 28993 }, { "epoch": 0.3767638062968311, "grad_norm": 0.4287501871585846, "learning_rate": 0.00012467279273190452, "loss": 1.5084, "step": 28994 }, { "epoch": 0.376776800840747, "grad_norm": 0.39114248752593994, "learning_rate": 0.00012467019326999312, "loss": 1.4415, "step": 28995 }, { "epoch": 0.37678979538466284, "grad_norm": 0.493540495634079, "learning_rate": 0.00012466759380808172, "loss": 1.4702, "step": 28996 }, { "epoch": 0.37680278992857874, "grad_norm": 0.5261231660842896, "learning_rate": 0.00012466499434617034, "loss": 1.3912, "step": 28997 }, { "epoch": 0.3768157844724946, "grad_norm": 0.32692450284957886, "learning_rate": 0.00012466239488425897, "loss": 1.311, "step": 28998 }, { "epoch": 0.3768287790164105, "grad_norm": 0.8313952088356018, "learning_rate": 0.0001246597954223476, "loss": 1.715, "step": 28999 }, { "epoch": 0.37684177356032633, "grad_norm": 0.25373613834381104, "learning_rate": 0.0001246571959604362, "loss": 1.1988, "step": 29000 }, { "epoch": 0.37685476810424223, "grad_norm": 0.3408701717853546, "learning_rate": 0.0001246545964985248, "loss": 1.3072, "step": 29001 }, { "epoch": 0.3768677626481581, "grad_norm": 0.4798407554626465, "learning_rate": 0.00012465199703661344, "loss": 1.4693, "step": 29002 }, { "epoch": 0.376880757192074, "grad_norm": 0.4050613343715668, "learning_rate": 0.00012464939757470203, "loss": 1.45, "step": 29003 }, { "epoch": 0.3768937517359898, "grad_norm": 0.4492764174938202, "learning_rate": 0.00012464679811279066, "loss": 1.4175, "step": 29004 }, { "epoch": 0.3769067462799057, "grad_norm": 0.43367472290992737, "learning_rate": 0.00012464419865087928, "loss": 1.2815, "step": 29005 }, { "epoch": 0.3769197408238216, "grad_norm": 0.35858961939811707, "learning_rate": 0.0001246415991889679, "loss": 1.1285, "step": 29006 }, { "epoch": 0.37693273536773747, "grad_norm": 0.301493376493454, "learning_rate": 0.0001246389997270565, "loss": 1.4326, "step": 29007 }, { "epoch": 0.37694572991165337, "grad_norm": 0.4287569224834442, "learning_rate": 0.0001246364002651451, "loss": 1.1246, "step": 29008 }, { "epoch": 0.3769587244555692, "grad_norm": 0.4359901547431946, "learning_rate": 0.00012463380080323375, "loss": 1.5789, "step": 29009 }, { "epoch": 0.3769717189994851, "grad_norm": 0.46248188614845276, "learning_rate": 0.00012463120134132235, "loss": 1.4456, "step": 29010 }, { "epoch": 0.37698471354340096, "grad_norm": 0.4319164752960205, "learning_rate": 0.00012462860187941098, "loss": 1.5375, "step": 29011 }, { "epoch": 0.37699770808731686, "grad_norm": 0.2823288142681122, "learning_rate": 0.00012462600241749957, "loss": 1.2573, "step": 29012 }, { "epoch": 0.3770107026312327, "grad_norm": 0.5519055724143982, "learning_rate": 0.0001246234029555882, "loss": 1.6317, "step": 29013 }, { "epoch": 0.3770236971751486, "grad_norm": 0.3213790953159332, "learning_rate": 0.00012462080349367682, "loss": 1.3412, "step": 29014 }, { "epoch": 0.37703669171906445, "grad_norm": 0.35477882623672485, "learning_rate": 0.00012461820403176542, "loss": 1.2711, "step": 29015 }, { "epoch": 0.37704968626298035, "grad_norm": 0.36729753017425537, "learning_rate": 0.00012461560456985404, "loss": 1.3556, "step": 29016 }, { "epoch": 0.3770626808068962, "grad_norm": 0.5463625192642212, "learning_rate": 0.00012461300510794267, "loss": 1.3132, "step": 29017 }, { "epoch": 0.3770756753508121, "grad_norm": 0.4464842975139618, "learning_rate": 0.0001246104056460313, "loss": 1.637, "step": 29018 }, { "epoch": 0.37708866989472795, "grad_norm": 0.3476654589176178, "learning_rate": 0.0001246078061841199, "loss": 1.1511, "step": 29019 }, { "epoch": 0.37710166443864385, "grad_norm": 0.323486864566803, "learning_rate": 0.0001246052067222085, "loss": 1.3813, "step": 29020 }, { "epoch": 0.3771146589825597, "grad_norm": 0.395973801612854, "learning_rate": 0.00012460260726029714, "loss": 1.4752, "step": 29021 }, { "epoch": 0.3771276535264756, "grad_norm": 0.4222963750362396, "learning_rate": 0.00012460000779838574, "loss": 1.5462, "step": 29022 }, { "epoch": 0.37714064807039144, "grad_norm": 0.3361075222492218, "learning_rate": 0.00012459740833647436, "loss": 1.3312, "step": 29023 }, { "epoch": 0.37715364261430734, "grad_norm": 0.47997570037841797, "learning_rate": 0.00012459480887456296, "loss": 1.4521, "step": 29024 }, { "epoch": 0.3771666371582232, "grad_norm": 0.4050688147544861, "learning_rate": 0.00012459220941265158, "loss": 1.2776, "step": 29025 }, { "epoch": 0.3771796317021391, "grad_norm": 0.4550187885761261, "learning_rate": 0.0001245896099507402, "loss": 1.277, "step": 29026 }, { "epoch": 0.37719262624605493, "grad_norm": 0.3913280665874481, "learning_rate": 0.0001245870104888288, "loss": 1.4726, "step": 29027 }, { "epoch": 0.37720562078997083, "grad_norm": 0.4334585964679718, "learning_rate": 0.00012458441102691743, "loss": 1.2845, "step": 29028 }, { "epoch": 0.3772186153338867, "grad_norm": 0.4494137465953827, "learning_rate": 0.00012458181156500605, "loss": 1.4394, "step": 29029 }, { "epoch": 0.3772316098778026, "grad_norm": 0.3425474464893341, "learning_rate": 0.00012457921210309468, "loss": 1.4459, "step": 29030 }, { "epoch": 0.3772446044217184, "grad_norm": 0.4476107358932495, "learning_rate": 0.00012457661264118328, "loss": 1.252, "step": 29031 }, { "epoch": 0.3772575989656343, "grad_norm": 0.31464043259620667, "learning_rate": 0.0001245740131792719, "loss": 1.1194, "step": 29032 }, { "epoch": 0.37727059350955017, "grad_norm": 0.4742145836353302, "learning_rate": 0.00012457141371736052, "loss": 1.4444, "step": 29033 }, { "epoch": 0.37728358805346607, "grad_norm": 0.5120161175727844, "learning_rate": 0.00012456881425544912, "loss": 1.3086, "step": 29034 }, { "epoch": 0.3772965825973819, "grad_norm": 0.354155033826828, "learning_rate": 0.00012456621479353775, "loss": 1.4172, "step": 29035 }, { "epoch": 0.3773095771412978, "grad_norm": 0.44087985157966614, "learning_rate": 0.00012456361533162634, "loss": 1.4429, "step": 29036 }, { "epoch": 0.37732257168521366, "grad_norm": 0.4338029623031616, "learning_rate": 0.00012456101586971497, "loss": 1.3781, "step": 29037 }, { "epoch": 0.37733556622912956, "grad_norm": 0.4211170971393585, "learning_rate": 0.0001245584164078036, "loss": 1.413, "step": 29038 }, { "epoch": 0.3773485607730454, "grad_norm": 0.43683192133903503, "learning_rate": 0.0001245558169458922, "loss": 1.3641, "step": 29039 }, { "epoch": 0.3773615553169613, "grad_norm": 0.42215219140052795, "learning_rate": 0.00012455321748398084, "loss": 1.5607, "step": 29040 }, { "epoch": 0.37737454986087715, "grad_norm": 0.4348534941673279, "learning_rate": 0.00012455061802206944, "loss": 1.4015, "step": 29041 }, { "epoch": 0.37738754440479305, "grad_norm": 0.2570900619029999, "learning_rate": 0.00012454801856015806, "loss": 1.2447, "step": 29042 }, { "epoch": 0.3774005389487089, "grad_norm": 0.38586023449897766, "learning_rate": 0.00012454541909824666, "loss": 1.2944, "step": 29043 }, { "epoch": 0.3774135334926248, "grad_norm": 0.3790202736854553, "learning_rate": 0.00012454281963633529, "loss": 1.5477, "step": 29044 }, { "epoch": 0.37742652803654064, "grad_norm": 0.4255603551864624, "learning_rate": 0.0001245402201744239, "loss": 1.3821, "step": 29045 }, { "epoch": 0.37743952258045654, "grad_norm": 0.47181031107902527, "learning_rate": 0.0001245376207125125, "loss": 1.467, "step": 29046 }, { "epoch": 0.3774525171243724, "grad_norm": 0.400684118270874, "learning_rate": 0.00012453502125060113, "loss": 1.3834, "step": 29047 }, { "epoch": 0.3774655116682883, "grad_norm": 0.316967248916626, "learning_rate": 0.00012453242178868976, "loss": 1.21, "step": 29048 }, { "epoch": 0.37747850621220413, "grad_norm": 0.3553643524646759, "learning_rate": 0.00012452982232677835, "loss": 1.3608, "step": 29049 }, { "epoch": 0.37749150075612004, "grad_norm": 0.47424566745758057, "learning_rate": 0.00012452722286486698, "loss": 1.503, "step": 29050 }, { "epoch": 0.3775044953000359, "grad_norm": 0.4041392207145691, "learning_rate": 0.00012452462340295558, "loss": 1.4028, "step": 29051 }, { "epoch": 0.3775174898439518, "grad_norm": 0.3005121946334839, "learning_rate": 0.00012452202394104423, "loss": 1.1557, "step": 29052 }, { "epoch": 0.3775304843878676, "grad_norm": 0.3968088626861572, "learning_rate": 0.00012451942447913282, "loss": 1.4114, "step": 29053 }, { "epoch": 0.3775434789317835, "grad_norm": 0.3767034709453583, "learning_rate": 0.00012451682501722145, "loss": 1.4948, "step": 29054 }, { "epoch": 0.3775564734756994, "grad_norm": 0.41209688782691956, "learning_rate": 0.00012451422555531005, "loss": 1.4503, "step": 29055 }, { "epoch": 0.3775694680196153, "grad_norm": 0.42090338468551636, "learning_rate": 0.00012451162609339867, "loss": 1.4419, "step": 29056 }, { "epoch": 0.3775824625635311, "grad_norm": 0.4267866313457489, "learning_rate": 0.0001245090266314873, "loss": 1.3762, "step": 29057 }, { "epoch": 0.377595457107447, "grad_norm": 0.38681766390800476, "learning_rate": 0.0001245064271695759, "loss": 1.1559, "step": 29058 }, { "epoch": 0.37760845165136286, "grad_norm": 0.2588450014591217, "learning_rate": 0.00012450382770766452, "loss": 1.3235, "step": 29059 }, { "epoch": 0.37762144619527876, "grad_norm": 0.3225371539592743, "learning_rate": 0.00012450122824575314, "loss": 1.2559, "step": 29060 }, { "epoch": 0.3776344407391946, "grad_norm": 0.2933025658130646, "learning_rate": 0.00012449862878384177, "loss": 1.3796, "step": 29061 }, { "epoch": 0.3776474352831105, "grad_norm": 0.4021296203136444, "learning_rate": 0.00012449602932193036, "loss": 1.2806, "step": 29062 }, { "epoch": 0.37766042982702636, "grad_norm": 0.3588813543319702, "learning_rate": 0.00012449342986001896, "loss": 1.5836, "step": 29063 }, { "epoch": 0.37767342437094226, "grad_norm": 0.5445573329925537, "learning_rate": 0.0001244908303981076, "loss": 1.5183, "step": 29064 }, { "epoch": 0.3776864189148581, "grad_norm": 0.40056294202804565, "learning_rate": 0.0001244882309361962, "loss": 1.351, "step": 29065 }, { "epoch": 0.377699413458774, "grad_norm": 0.3994879722595215, "learning_rate": 0.00012448563147428483, "loss": 1.5433, "step": 29066 }, { "epoch": 0.37771240800268985, "grad_norm": 0.45303842425346375, "learning_rate": 0.00012448303201237343, "loss": 1.4605, "step": 29067 }, { "epoch": 0.37772540254660575, "grad_norm": 0.510071337223053, "learning_rate": 0.00012448043255046206, "loss": 1.4775, "step": 29068 }, { "epoch": 0.3777383970905216, "grad_norm": 0.3420342206954956, "learning_rate": 0.00012447783308855068, "loss": 1.3244, "step": 29069 }, { "epoch": 0.3777513916344375, "grad_norm": 0.4512944221496582, "learning_rate": 0.00012447523362663928, "loss": 1.6217, "step": 29070 }, { "epoch": 0.37776438617835334, "grad_norm": 0.39227572083473206, "learning_rate": 0.0001244726341647279, "loss": 1.3262, "step": 29071 }, { "epoch": 0.37777738072226924, "grad_norm": 0.3984995484352112, "learning_rate": 0.00012447003470281653, "loss": 1.5671, "step": 29072 }, { "epoch": 0.3777903752661851, "grad_norm": 0.3556367754936218, "learning_rate": 0.00012446743524090515, "loss": 1.3485, "step": 29073 }, { "epoch": 0.377803369810101, "grad_norm": 0.33799123764038086, "learning_rate": 0.00012446483577899375, "loss": 1.2686, "step": 29074 }, { "epoch": 0.37781636435401683, "grad_norm": 0.28384819626808167, "learning_rate": 0.00012446223631708235, "loss": 1.2731, "step": 29075 }, { "epoch": 0.37782935889793273, "grad_norm": 0.42464518547058105, "learning_rate": 0.000124459636855171, "loss": 1.3001, "step": 29076 }, { "epoch": 0.3778423534418486, "grad_norm": 0.48076972365379333, "learning_rate": 0.0001244570373932596, "loss": 1.442, "step": 29077 }, { "epoch": 0.3778553479857645, "grad_norm": 0.33230268955230713, "learning_rate": 0.00012445443793134822, "loss": 1.506, "step": 29078 }, { "epoch": 0.3778683425296803, "grad_norm": 0.34044328331947327, "learning_rate": 0.00012445183846943684, "loss": 1.137, "step": 29079 }, { "epoch": 0.3778813370735962, "grad_norm": 0.3845383822917938, "learning_rate": 0.00012444923900752544, "loss": 1.3085, "step": 29080 }, { "epoch": 0.3778943316175121, "grad_norm": 0.3190215826034546, "learning_rate": 0.00012444663954561407, "loss": 1.2845, "step": 29081 }, { "epoch": 0.37790732616142797, "grad_norm": 0.3442033529281616, "learning_rate": 0.00012444404008370266, "loss": 1.3278, "step": 29082 }, { "epoch": 0.37792032070534387, "grad_norm": 0.4151146411895752, "learning_rate": 0.00012444144062179132, "loss": 1.3828, "step": 29083 }, { "epoch": 0.3779333152492597, "grad_norm": 0.38955193758010864, "learning_rate": 0.0001244388411598799, "loss": 1.3126, "step": 29084 }, { "epoch": 0.3779463097931756, "grad_norm": 0.38300663232803345, "learning_rate": 0.00012443624169796854, "loss": 1.3438, "step": 29085 }, { "epoch": 0.37795930433709146, "grad_norm": 0.4433871805667877, "learning_rate": 0.00012443364223605713, "loss": 1.4953, "step": 29086 }, { "epoch": 0.37797229888100736, "grad_norm": 0.5659882426261902, "learning_rate": 0.00012443104277414576, "loss": 1.4436, "step": 29087 }, { "epoch": 0.3779852934249232, "grad_norm": 0.35576027631759644, "learning_rate": 0.00012442844331223438, "loss": 1.3506, "step": 29088 }, { "epoch": 0.3779982879688391, "grad_norm": 0.38196861743927, "learning_rate": 0.00012442584385032298, "loss": 1.4136, "step": 29089 }, { "epoch": 0.37801128251275495, "grad_norm": 0.39096394181251526, "learning_rate": 0.0001244232443884116, "loss": 1.2255, "step": 29090 }, { "epoch": 0.37802427705667085, "grad_norm": 0.4709497094154358, "learning_rate": 0.00012442064492650023, "loss": 1.5984, "step": 29091 }, { "epoch": 0.3780372716005867, "grad_norm": 0.49661585688591003, "learning_rate": 0.00012441804546458883, "loss": 1.2049, "step": 29092 }, { "epoch": 0.3780502661445026, "grad_norm": 0.4364995062351227, "learning_rate": 0.00012441544600267745, "loss": 1.4643, "step": 29093 }, { "epoch": 0.37806326068841845, "grad_norm": 0.40390875935554504, "learning_rate": 0.00012441284654076605, "loss": 1.4835, "step": 29094 }, { "epoch": 0.37807625523233435, "grad_norm": 0.25178074836730957, "learning_rate": 0.0001244102470788547, "loss": 1.4639, "step": 29095 }, { "epoch": 0.3780892497762502, "grad_norm": 0.3871273100376129, "learning_rate": 0.0001244076476169433, "loss": 1.4521, "step": 29096 }, { "epoch": 0.3781022443201661, "grad_norm": 0.447313129901886, "learning_rate": 0.00012440504815503192, "loss": 1.3362, "step": 29097 }, { "epoch": 0.37811523886408194, "grad_norm": 0.3746159076690674, "learning_rate": 0.00012440244869312052, "loss": 1.5628, "step": 29098 }, { "epoch": 0.37812823340799784, "grad_norm": 0.3722781538963318, "learning_rate": 0.00012439984923120914, "loss": 1.3143, "step": 29099 }, { "epoch": 0.3781412279519137, "grad_norm": 0.4642501175403595, "learning_rate": 0.00012439724976929777, "loss": 1.4844, "step": 29100 }, { "epoch": 0.3781542224958296, "grad_norm": 0.37560561299324036, "learning_rate": 0.00012439465030738637, "loss": 1.3506, "step": 29101 }, { "epoch": 0.37816721703974543, "grad_norm": 0.38260748982429504, "learning_rate": 0.000124392050845475, "loss": 1.4681, "step": 29102 }, { "epoch": 0.37818021158366133, "grad_norm": 0.39832690358161926, "learning_rate": 0.00012438945138356361, "loss": 1.4453, "step": 29103 }, { "epoch": 0.3781932061275772, "grad_norm": 0.3914296329021454, "learning_rate": 0.0001243868519216522, "loss": 1.5234, "step": 29104 }, { "epoch": 0.3782062006714931, "grad_norm": 0.532298743724823, "learning_rate": 0.00012438425245974084, "loss": 1.5036, "step": 29105 }, { "epoch": 0.3782191952154089, "grad_norm": 0.45808789134025574, "learning_rate": 0.00012438165299782943, "loss": 1.2574, "step": 29106 }, { "epoch": 0.3782321897593248, "grad_norm": 0.47658196091651917, "learning_rate": 0.00012437905353591809, "loss": 1.4875, "step": 29107 }, { "epoch": 0.37824518430324067, "grad_norm": 0.37270355224609375, "learning_rate": 0.00012437645407400668, "loss": 1.4023, "step": 29108 }, { "epoch": 0.37825817884715657, "grad_norm": 0.39826536178588867, "learning_rate": 0.0001243738546120953, "loss": 1.5969, "step": 29109 }, { "epoch": 0.3782711733910724, "grad_norm": 0.37886396050453186, "learning_rate": 0.0001243712551501839, "loss": 1.1517, "step": 29110 }, { "epoch": 0.3782841679349883, "grad_norm": 0.4825325310230255, "learning_rate": 0.00012436865568827253, "loss": 1.4136, "step": 29111 }, { "epoch": 0.37829716247890416, "grad_norm": 0.3799412250518799, "learning_rate": 0.00012436605622636115, "loss": 1.3989, "step": 29112 }, { "epoch": 0.37831015702282006, "grad_norm": 0.3932362496852875, "learning_rate": 0.00012436345676444975, "loss": 1.4309, "step": 29113 }, { "epoch": 0.3783231515667359, "grad_norm": 0.3965602517127991, "learning_rate": 0.00012436085730253838, "loss": 1.3298, "step": 29114 }, { "epoch": 0.3783361461106518, "grad_norm": 0.4701803922653198, "learning_rate": 0.000124358257840627, "loss": 1.7342, "step": 29115 }, { "epoch": 0.37834914065456765, "grad_norm": 0.3027585446834564, "learning_rate": 0.00012435565837871562, "loss": 1.4163, "step": 29116 }, { "epoch": 0.37836213519848355, "grad_norm": 0.42055410146713257, "learning_rate": 0.00012435305891680422, "loss": 1.3831, "step": 29117 }, { "epoch": 0.3783751297423994, "grad_norm": 0.4090738296508789, "learning_rate": 0.00012435045945489285, "loss": 1.376, "step": 29118 }, { "epoch": 0.3783881242863153, "grad_norm": 0.32407262921333313, "learning_rate": 0.00012434785999298147, "loss": 1.4236, "step": 29119 }, { "epoch": 0.37840111883023114, "grad_norm": 0.4219169318675995, "learning_rate": 0.00012434526053107007, "loss": 1.4333, "step": 29120 }, { "epoch": 0.37841411337414704, "grad_norm": 0.3304325342178345, "learning_rate": 0.0001243426610691587, "loss": 1.2584, "step": 29121 }, { "epoch": 0.3784271079180629, "grad_norm": 0.41518527269363403, "learning_rate": 0.00012434006160724732, "loss": 1.3859, "step": 29122 }, { "epoch": 0.3784401024619788, "grad_norm": 0.35314589738845825, "learning_rate": 0.00012433746214533591, "loss": 1.4197, "step": 29123 }, { "epoch": 0.37845309700589463, "grad_norm": 0.39648914337158203, "learning_rate": 0.00012433486268342454, "loss": 1.3606, "step": 29124 }, { "epoch": 0.37846609154981053, "grad_norm": 0.3424660265445709, "learning_rate": 0.00012433226322151314, "loss": 1.3252, "step": 29125 }, { "epoch": 0.3784790860937264, "grad_norm": 0.4410635828971863, "learning_rate": 0.0001243296637596018, "loss": 1.4207, "step": 29126 }, { "epoch": 0.3784920806376423, "grad_norm": 0.4133591055870056, "learning_rate": 0.00012432706429769039, "loss": 1.584, "step": 29127 }, { "epoch": 0.3785050751815581, "grad_norm": 0.43514785170555115, "learning_rate": 0.000124324464835779, "loss": 1.3868, "step": 29128 }, { "epoch": 0.378518069725474, "grad_norm": 0.38888445496559143, "learning_rate": 0.0001243218653738676, "loss": 1.2944, "step": 29129 }, { "epoch": 0.37853106426938987, "grad_norm": 0.381853312253952, "learning_rate": 0.00012431926591195623, "loss": 1.4695, "step": 29130 }, { "epoch": 0.3785440588133058, "grad_norm": 0.3694745898246765, "learning_rate": 0.00012431666645004486, "loss": 1.6475, "step": 29131 }, { "epoch": 0.3785570533572216, "grad_norm": 0.45428037643432617, "learning_rate": 0.00012431406698813345, "loss": 1.4406, "step": 29132 }, { "epoch": 0.3785700479011375, "grad_norm": 0.316353440284729, "learning_rate": 0.00012431146752622208, "loss": 1.2983, "step": 29133 }, { "epoch": 0.37858304244505336, "grad_norm": 0.3684375584125519, "learning_rate": 0.0001243088680643107, "loss": 1.1729, "step": 29134 }, { "epoch": 0.37859603698896926, "grad_norm": 0.41225841641426086, "learning_rate": 0.0001243062686023993, "loss": 1.4182, "step": 29135 }, { "epoch": 0.3786090315328851, "grad_norm": 0.4168204367160797, "learning_rate": 0.00012430366914048792, "loss": 1.4061, "step": 29136 }, { "epoch": 0.378622026076801, "grad_norm": 0.39882737398147583, "learning_rate": 0.00012430106967857652, "loss": 1.2722, "step": 29137 }, { "epoch": 0.37863502062071686, "grad_norm": 0.38166508078575134, "learning_rate": 0.00012429847021666517, "loss": 1.4264, "step": 29138 }, { "epoch": 0.37864801516463276, "grad_norm": 0.35266152024269104, "learning_rate": 0.00012429587075475377, "loss": 1.3299, "step": 29139 }, { "epoch": 0.3786610097085486, "grad_norm": 0.4482724368572235, "learning_rate": 0.0001242932712928424, "loss": 1.408, "step": 29140 }, { "epoch": 0.3786740042524645, "grad_norm": 0.37660476565361023, "learning_rate": 0.000124290671830931, "loss": 1.2338, "step": 29141 }, { "epoch": 0.37868699879638035, "grad_norm": 0.33763495087623596, "learning_rate": 0.00012428807236901962, "loss": 1.4744, "step": 29142 }, { "epoch": 0.37869999334029625, "grad_norm": 0.38571202754974365, "learning_rate": 0.00012428547290710824, "loss": 1.4643, "step": 29143 }, { "epoch": 0.3787129878842121, "grad_norm": 0.4321689009666443, "learning_rate": 0.00012428287344519684, "loss": 1.468, "step": 29144 }, { "epoch": 0.378725982428128, "grad_norm": 0.43656328320503235, "learning_rate": 0.00012428027398328546, "loss": 1.4904, "step": 29145 }, { "epoch": 0.37873897697204384, "grad_norm": 0.44920194149017334, "learning_rate": 0.0001242776745213741, "loss": 1.4925, "step": 29146 }, { "epoch": 0.37875197151595974, "grad_norm": 0.4356619119644165, "learning_rate": 0.00012427507505946269, "loss": 1.451, "step": 29147 }, { "epoch": 0.3787649660598756, "grad_norm": 0.3751243054866791, "learning_rate": 0.0001242724755975513, "loss": 1.534, "step": 29148 }, { "epoch": 0.3787779606037915, "grad_norm": 0.460116446018219, "learning_rate": 0.0001242698761356399, "loss": 1.4667, "step": 29149 }, { "epoch": 0.37879095514770733, "grad_norm": 0.4050978720188141, "learning_rate": 0.00012426727667372856, "loss": 1.3061, "step": 29150 }, { "epoch": 0.37880394969162323, "grad_norm": 0.4400278627872467, "learning_rate": 0.00012426467721181716, "loss": 1.2966, "step": 29151 }, { "epoch": 0.3788169442355391, "grad_norm": 0.44580578804016113, "learning_rate": 0.00012426207774990578, "loss": 1.5289, "step": 29152 }, { "epoch": 0.378829938779455, "grad_norm": 0.42245128750801086, "learning_rate": 0.0001242594782879944, "loss": 1.3893, "step": 29153 }, { "epoch": 0.3788429333233708, "grad_norm": 0.3842412531375885, "learning_rate": 0.000124256878826083, "loss": 1.3115, "step": 29154 }, { "epoch": 0.3788559278672867, "grad_norm": 0.5081456899642944, "learning_rate": 0.00012425427936417163, "loss": 1.5385, "step": 29155 }, { "epoch": 0.37886892241120257, "grad_norm": 0.3887665271759033, "learning_rate": 0.00012425167990226022, "loss": 1.3776, "step": 29156 }, { "epoch": 0.37888191695511847, "grad_norm": 0.30462446808815, "learning_rate": 0.00012424908044034888, "loss": 1.3108, "step": 29157 }, { "epoch": 0.37889491149903437, "grad_norm": 0.34881502389907837, "learning_rate": 0.00012424648097843747, "loss": 1.3075, "step": 29158 }, { "epoch": 0.3789079060429502, "grad_norm": 0.3741593062877655, "learning_rate": 0.00012424388151652607, "loss": 1.3194, "step": 29159 }, { "epoch": 0.3789209005868661, "grad_norm": 0.4332476854324341, "learning_rate": 0.0001242412820546147, "loss": 1.3211, "step": 29160 }, { "epoch": 0.37893389513078196, "grad_norm": 0.46578484773635864, "learning_rate": 0.00012423868259270332, "loss": 1.3973, "step": 29161 }, { "epoch": 0.37894688967469786, "grad_norm": 0.42272937297821045, "learning_rate": 0.00012423608313079194, "loss": 1.4261, "step": 29162 }, { "epoch": 0.3789598842186137, "grad_norm": 0.39170020818710327, "learning_rate": 0.00012423348366888054, "loss": 1.3626, "step": 29163 }, { "epoch": 0.3789728787625296, "grad_norm": 0.49651801586151123, "learning_rate": 0.00012423088420696917, "loss": 1.4106, "step": 29164 }, { "epoch": 0.37898587330644545, "grad_norm": 0.5419514775276184, "learning_rate": 0.0001242282847450578, "loss": 1.5202, "step": 29165 }, { "epoch": 0.37899886785036135, "grad_norm": 0.4186174273490906, "learning_rate": 0.0001242256852831464, "loss": 1.352, "step": 29166 }, { "epoch": 0.3790118623942772, "grad_norm": 0.469951331615448, "learning_rate": 0.000124223085821235, "loss": 1.393, "step": 29167 }, { "epoch": 0.3790248569381931, "grad_norm": 0.4513460695743561, "learning_rate": 0.0001242204863593236, "loss": 1.4655, "step": 29168 }, { "epoch": 0.37903785148210895, "grad_norm": 0.4261421859264374, "learning_rate": 0.00012421788689741226, "loss": 1.5796, "step": 29169 }, { "epoch": 0.37905084602602485, "grad_norm": 0.3736957013607025, "learning_rate": 0.00012421528743550086, "loss": 1.3083, "step": 29170 }, { "epoch": 0.3790638405699407, "grad_norm": 0.3792990744113922, "learning_rate": 0.00012421268797358946, "loss": 1.32, "step": 29171 }, { "epoch": 0.3790768351138566, "grad_norm": 0.4717152714729309, "learning_rate": 0.00012421008851167808, "loss": 1.4518, "step": 29172 }, { "epoch": 0.37908982965777244, "grad_norm": 0.3989870846271515, "learning_rate": 0.0001242074890497667, "loss": 1.3954, "step": 29173 }, { "epoch": 0.37910282420168834, "grad_norm": 0.4924822151660919, "learning_rate": 0.00012420488958785533, "loss": 1.3549, "step": 29174 }, { "epoch": 0.3791158187456042, "grad_norm": 0.44121262431144714, "learning_rate": 0.00012420229012594393, "loss": 1.3776, "step": 29175 }, { "epoch": 0.3791288132895201, "grad_norm": 0.4238477647304535, "learning_rate": 0.00012419969066403255, "loss": 1.2493, "step": 29176 }, { "epoch": 0.37914180783343593, "grad_norm": 0.42497536540031433, "learning_rate": 0.00012419709120212118, "loss": 1.4564, "step": 29177 }, { "epoch": 0.37915480237735183, "grad_norm": 0.3816337287425995, "learning_rate": 0.00012419449174020977, "loss": 1.2512, "step": 29178 }, { "epoch": 0.3791677969212677, "grad_norm": 0.35534584522247314, "learning_rate": 0.0001241918922782984, "loss": 1.3603, "step": 29179 }, { "epoch": 0.3791807914651836, "grad_norm": 0.5033047795295715, "learning_rate": 0.000124189292816387, "loss": 1.3541, "step": 29180 }, { "epoch": 0.3791937860090994, "grad_norm": 0.4293133616447449, "learning_rate": 0.00012418669335447565, "loss": 1.4724, "step": 29181 }, { "epoch": 0.3792067805530153, "grad_norm": 0.358553409576416, "learning_rate": 0.00012418409389256424, "loss": 1.4491, "step": 29182 }, { "epoch": 0.37921977509693117, "grad_norm": 0.40651682019233704, "learning_rate": 0.00012418149443065287, "loss": 1.5088, "step": 29183 }, { "epoch": 0.37923276964084707, "grad_norm": 0.3773633539676666, "learning_rate": 0.00012417889496874147, "loss": 1.2438, "step": 29184 }, { "epoch": 0.3792457641847629, "grad_norm": 0.35284101963043213, "learning_rate": 0.0001241762955068301, "loss": 1.402, "step": 29185 }, { "epoch": 0.3792587587286788, "grad_norm": 0.34899717569351196, "learning_rate": 0.00012417369604491872, "loss": 1.3044, "step": 29186 }, { "epoch": 0.37927175327259466, "grad_norm": 0.4575243294239044, "learning_rate": 0.0001241710965830073, "loss": 1.5636, "step": 29187 }, { "epoch": 0.37928474781651056, "grad_norm": 0.4361872971057892, "learning_rate": 0.00012416849712109594, "loss": 1.3333, "step": 29188 }, { "epoch": 0.3792977423604264, "grad_norm": 0.35873571038246155, "learning_rate": 0.00012416589765918456, "loss": 1.312, "step": 29189 }, { "epoch": 0.3793107369043423, "grad_norm": 0.46585625410079956, "learning_rate": 0.00012416329819727316, "loss": 1.4023, "step": 29190 }, { "epoch": 0.37932373144825815, "grad_norm": 0.37591755390167236, "learning_rate": 0.00012416069873536178, "loss": 1.3332, "step": 29191 }, { "epoch": 0.37933672599217405, "grad_norm": 0.47125518321990967, "learning_rate": 0.0001241580992734504, "loss": 1.46, "step": 29192 }, { "epoch": 0.3793497205360899, "grad_norm": 0.4839010238647461, "learning_rate": 0.00012415549981153903, "loss": 1.4585, "step": 29193 }, { "epoch": 0.3793627150800058, "grad_norm": 0.46539804339408875, "learning_rate": 0.00012415290034962763, "loss": 1.1312, "step": 29194 }, { "epoch": 0.37937570962392164, "grad_norm": 0.5114302039146423, "learning_rate": 0.00012415030088771625, "loss": 1.3347, "step": 29195 }, { "epoch": 0.37938870416783754, "grad_norm": 0.5146238803863525, "learning_rate": 0.00012414770142580488, "loss": 1.4793, "step": 29196 }, { "epoch": 0.3794016987117534, "grad_norm": 0.5047728419303894, "learning_rate": 0.00012414510196389348, "loss": 1.2871, "step": 29197 }, { "epoch": 0.3794146932556693, "grad_norm": 0.4356023967266083, "learning_rate": 0.0001241425025019821, "loss": 1.4527, "step": 29198 }, { "epoch": 0.37942768779958513, "grad_norm": 0.3981245160102844, "learning_rate": 0.0001241399030400707, "loss": 1.3795, "step": 29199 }, { "epoch": 0.37944068234350103, "grad_norm": 0.3890610933303833, "learning_rate": 0.00012413730357815935, "loss": 1.4034, "step": 29200 }, { "epoch": 0.3794536768874169, "grad_norm": 0.3172937035560608, "learning_rate": 0.00012413470411624795, "loss": 1.2939, "step": 29201 }, { "epoch": 0.3794666714313328, "grad_norm": 0.4470987617969513, "learning_rate": 0.00012413210465433654, "loss": 1.3325, "step": 29202 }, { "epoch": 0.3794796659752486, "grad_norm": 0.39845916628837585, "learning_rate": 0.00012412950519242517, "loss": 1.4547, "step": 29203 }, { "epoch": 0.3794926605191645, "grad_norm": 0.3770211637020111, "learning_rate": 0.0001241269057305138, "loss": 1.3263, "step": 29204 }, { "epoch": 0.37950565506308037, "grad_norm": 0.5465637445449829, "learning_rate": 0.00012412430626860242, "loss": 1.2462, "step": 29205 }, { "epoch": 0.3795186496069963, "grad_norm": 0.3737373948097229, "learning_rate": 0.00012412170680669102, "loss": 1.4105, "step": 29206 }, { "epoch": 0.3795316441509121, "grad_norm": 0.3600003123283386, "learning_rate": 0.00012411910734477964, "loss": 1.3738, "step": 29207 }, { "epoch": 0.379544638694828, "grad_norm": 0.4306640326976776, "learning_rate": 0.00012411650788286826, "loss": 1.1999, "step": 29208 }, { "epoch": 0.37955763323874386, "grad_norm": 0.39429956674575806, "learning_rate": 0.00012411390842095686, "loss": 1.467, "step": 29209 }, { "epoch": 0.37957062778265976, "grad_norm": 0.37216323614120483, "learning_rate": 0.00012411130895904549, "loss": 1.4452, "step": 29210 }, { "epoch": 0.3795836223265756, "grad_norm": 0.4850517809391022, "learning_rate": 0.00012410870949713408, "loss": 1.386, "step": 29211 }, { "epoch": 0.3795966168704915, "grad_norm": 0.3833988606929779, "learning_rate": 0.00012410611003522274, "loss": 1.3681, "step": 29212 }, { "epoch": 0.37960961141440736, "grad_norm": 0.4129987061023712, "learning_rate": 0.00012410351057331133, "loss": 1.1961, "step": 29213 }, { "epoch": 0.37962260595832326, "grad_norm": 0.38009902834892273, "learning_rate": 0.00012410091111139993, "loss": 1.232, "step": 29214 }, { "epoch": 0.3796356005022391, "grad_norm": 0.4127764403820038, "learning_rate": 0.00012409831164948855, "loss": 1.4375, "step": 29215 }, { "epoch": 0.379648595046155, "grad_norm": 0.3812805712223053, "learning_rate": 0.00012409571218757718, "loss": 1.2951, "step": 29216 }, { "epoch": 0.37966158959007085, "grad_norm": 0.36285439133644104, "learning_rate": 0.0001240931127256658, "loss": 1.4076, "step": 29217 }, { "epoch": 0.37967458413398675, "grad_norm": 0.44615450501441956, "learning_rate": 0.0001240905132637544, "loss": 1.4534, "step": 29218 }, { "epoch": 0.3796875786779026, "grad_norm": 0.33002763986587524, "learning_rate": 0.00012408791380184303, "loss": 1.2594, "step": 29219 }, { "epoch": 0.3797005732218185, "grad_norm": 0.4580899775028229, "learning_rate": 0.00012408531433993165, "loss": 1.4329, "step": 29220 }, { "epoch": 0.37971356776573434, "grad_norm": 0.3683582544326782, "learning_rate": 0.00012408271487802025, "loss": 1.4915, "step": 29221 }, { "epoch": 0.37972656230965024, "grad_norm": 0.4178430438041687, "learning_rate": 0.00012408011541610887, "loss": 1.395, "step": 29222 }, { "epoch": 0.3797395568535661, "grad_norm": 0.39848238229751587, "learning_rate": 0.00012407751595419747, "loss": 1.3463, "step": 29223 }, { "epoch": 0.379752551397482, "grad_norm": 0.4193390905857086, "learning_rate": 0.00012407491649228612, "loss": 1.3417, "step": 29224 }, { "epoch": 0.37976554594139783, "grad_norm": 0.4338294565677643, "learning_rate": 0.00012407231703037472, "loss": 1.4001, "step": 29225 }, { "epoch": 0.37977854048531373, "grad_norm": 0.374187171459198, "learning_rate": 0.00012406971756846332, "loss": 1.3563, "step": 29226 }, { "epoch": 0.3797915350292296, "grad_norm": 0.37506723403930664, "learning_rate": 0.00012406711810655197, "loss": 1.4996, "step": 29227 }, { "epoch": 0.3798045295731455, "grad_norm": 0.3967728018760681, "learning_rate": 0.00012406451864464056, "loss": 1.2992, "step": 29228 }, { "epoch": 0.3798175241170613, "grad_norm": 0.4145708680152893, "learning_rate": 0.0001240619191827292, "loss": 1.5146, "step": 29229 }, { "epoch": 0.3798305186609772, "grad_norm": 0.3018800914287567, "learning_rate": 0.00012405931972081779, "loss": 1.4163, "step": 29230 }, { "epoch": 0.37984351320489307, "grad_norm": 0.3748125731945038, "learning_rate": 0.0001240567202589064, "loss": 1.2653, "step": 29231 }, { "epoch": 0.37985650774880897, "grad_norm": 0.3817799985408783, "learning_rate": 0.00012405412079699504, "loss": 1.3769, "step": 29232 }, { "epoch": 0.37986950229272487, "grad_norm": 0.32482659816741943, "learning_rate": 0.00012405152133508363, "loss": 1.489, "step": 29233 }, { "epoch": 0.3798824968366407, "grad_norm": 0.40616363286972046, "learning_rate": 0.00012404892187317226, "loss": 1.2141, "step": 29234 }, { "epoch": 0.3798954913805566, "grad_norm": 0.42527300119400024, "learning_rate": 0.00012404632241126088, "loss": 1.6836, "step": 29235 }, { "epoch": 0.37990848592447246, "grad_norm": 0.3806702494621277, "learning_rate": 0.0001240437229493495, "loss": 1.3101, "step": 29236 }, { "epoch": 0.37992148046838836, "grad_norm": 0.4548983871936798, "learning_rate": 0.0001240411234874381, "loss": 1.4323, "step": 29237 }, { "epoch": 0.3799344750123042, "grad_norm": 0.44386056065559387, "learning_rate": 0.00012403852402552673, "loss": 1.3975, "step": 29238 }, { "epoch": 0.3799474695562201, "grad_norm": 0.3747439384460449, "learning_rate": 0.00012403592456361535, "loss": 1.4673, "step": 29239 }, { "epoch": 0.37996046410013595, "grad_norm": 0.4109708368778229, "learning_rate": 0.00012403332510170395, "loss": 1.617, "step": 29240 }, { "epoch": 0.37997345864405185, "grad_norm": 0.41999250650405884, "learning_rate": 0.00012403072563979257, "loss": 1.5366, "step": 29241 }, { "epoch": 0.3799864531879677, "grad_norm": 0.4750569760799408, "learning_rate": 0.00012402812617788117, "loss": 1.3749, "step": 29242 }, { "epoch": 0.3799994477318836, "grad_norm": 0.36913996934890747, "learning_rate": 0.0001240255267159698, "loss": 1.3594, "step": 29243 }, { "epoch": 0.38001244227579944, "grad_norm": 0.33061403036117554, "learning_rate": 0.00012402292725405842, "loss": 1.504, "step": 29244 }, { "epoch": 0.38002543681971535, "grad_norm": 0.3785783648490906, "learning_rate": 0.00012402032779214702, "loss": 1.2643, "step": 29245 }, { "epoch": 0.3800384313636312, "grad_norm": 0.4089740812778473, "learning_rate": 0.00012401772833023564, "loss": 1.4331, "step": 29246 }, { "epoch": 0.3800514259075471, "grad_norm": 0.4011761546134949, "learning_rate": 0.00012401512886832427, "loss": 1.4471, "step": 29247 }, { "epoch": 0.38006442045146294, "grad_norm": 0.4180662930011749, "learning_rate": 0.0001240125294064129, "loss": 1.3963, "step": 29248 }, { "epoch": 0.38007741499537884, "grad_norm": 0.3726418614387512, "learning_rate": 0.0001240099299445015, "loss": 1.7263, "step": 29249 }, { "epoch": 0.3800904095392947, "grad_norm": 0.43294036388397217, "learning_rate": 0.0001240073304825901, "loss": 1.4756, "step": 29250 }, { "epoch": 0.3801034040832106, "grad_norm": 0.41812747716903687, "learning_rate": 0.00012400473102067874, "loss": 1.5949, "step": 29251 }, { "epoch": 0.38011639862712643, "grad_norm": 0.47401514649391174, "learning_rate": 0.00012400213155876733, "loss": 1.4265, "step": 29252 }, { "epoch": 0.38012939317104233, "grad_norm": 0.3371295928955078, "learning_rate": 0.00012399953209685596, "loss": 1.4049, "step": 29253 }, { "epoch": 0.3801423877149582, "grad_norm": 0.39377865195274353, "learning_rate": 0.00012399693263494456, "loss": 1.5149, "step": 29254 }, { "epoch": 0.3801553822588741, "grad_norm": 0.39270544052124023, "learning_rate": 0.00012399433317303318, "loss": 1.4356, "step": 29255 }, { "epoch": 0.3801683768027899, "grad_norm": 0.38318485021591187, "learning_rate": 0.0001239917337111218, "loss": 1.512, "step": 29256 }, { "epoch": 0.3801813713467058, "grad_norm": 0.38301828503608704, "learning_rate": 0.0001239891342492104, "loss": 1.1555, "step": 29257 }, { "epoch": 0.38019436589062167, "grad_norm": 0.45692166686058044, "learning_rate": 0.00012398653478729903, "loss": 1.2978, "step": 29258 }, { "epoch": 0.38020736043453757, "grad_norm": 0.46812117099761963, "learning_rate": 0.00012398393532538765, "loss": 1.4326, "step": 29259 }, { "epoch": 0.3802203549784534, "grad_norm": 0.3846263587474823, "learning_rate": 0.00012398133586347628, "loss": 1.4002, "step": 29260 }, { "epoch": 0.3802333495223693, "grad_norm": 0.3237459659576416, "learning_rate": 0.00012397873640156487, "loss": 1.3219, "step": 29261 }, { "epoch": 0.38024634406628516, "grad_norm": 0.3671210706233978, "learning_rate": 0.0001239761369396535, "loss": 1.3859, "step": 29262 }, { "epoch": 0.38025933861020106, "grad_norm": 0.35202863812446594, "learning_rate": 0.00012397353747774212, "loss": 1.1257, "step": 29263 }, { "epoch": 0.3802723331541169, "grad_norm": 0.3972894251346588, "learning_rate": 0.00012397093801583072, "loss": 1.1971, "step": 29264 }, { "epoch": 0.3802853276980328, "grad_norm": 0.35992005467414856, "learning_rate": 0.00012396833855391934, "loss": 1.3961, "step": 29265 }, { "epoch": 0.38029832224194865, "grad_norm": 0.5016254782676697, "learning_rate": 0.00012396573909200797, "loss": 1.5307, "step": 29266 }, { "epoch": 0.38031131678586455, "grad_norm": 0.4317588210105896, "learning_rate": 0.0001239631396300966, "loss": 1.5, "step": 29267 }, { "epoch": 0.3803243113297804, "grad_norm": 0.35746684670448303, "learning_rate": 0.0001239605401681852, "loss": 1.3658, "step": 29268 }, { "epoch": 0.3803373058736963, "grad_norm": 0.47025319933891296, "learning_rate": 0.0001239579407062738, "loss": 1.3123, "step": 29269 }, { "epoch": 0.38035030041761214, "grad_norm": 0.4329600930213928, "learning_rate": 0.00012395534124436244, "loss": 1.3053, "step": 29270 }, { "epoch": 0.38036329496152804, "grad_norm": 0.34861868619918823, "learning_rate": 0.00012395274178245104, "loss": 1.2645, "step": 29271 }, { "epoch": 0.3803762895054439, "grad_norm": 0.521388590335846, "learning_rate": 0.00012395014232053966, "loss": 1.6508, "step": 29272 }, { "epoch": 0.3803892840493598, "grad_norm": 0.41579383611679077, "learning_rate": 0.00012394754285862826, "loss": 1.682, "step": 29273 }, { "epoch": 0.38040227859327563, "grad_norm": 0.47176480293273926, "learning_rate": 0.00012394494339671688, "loss": 1.3106, "step": 29274 }, { "epoch": 0.38041527313719153, "grad_norm": 0.30683043599128723, "learning_rate": 0.0001239423439348055, "loss": 1.6188, "step": 29275 }, { "epoch": 0.3804282676811074, "grad_norm": 0.37323975563049316, "learning_rate": 0.0001239397444728941, "loss": 1.3084, "step": 29276 }, { "epoch": 0.3804412622250233, "grad_norm": 0.4219805598258972, "learning_rate": 0.00012393714501098273, "loss": 1.4363, "step": 29277 }, { "epoch": 0.3804542567689391, "grad_norm": 0.4615146219730377, "learning_rate": 0.00012393454554907135, "loss": 1.3311, "step": 29278 }, { "epoch": 0.380467251312855, "grad_norm": 0.44219422340393066, "learning_rate": 0.00012393194608715998, "loss": 1.4973, "step": 29279 }, { "epoch": 0.38048024585677087, "grad_norm": 0.36293652653694153, "learning_rate": 0.00012392934662524858, "loss": 1.3596, "step": 29280 }, { "epoch": 0.38049324040068677, "grad_norm": 0.31426486372947693, "learning_rate": 0.00012392674716333717, "loss": 1.453, "step": 29281 }, { "epoch": 0.3805062349446026, "grad_norm": 0.3880532383918762, "learning_rate": 0.00012392414770142583, "loss": 1.351, "step": 29282 }, { "epoch": 0.3805192294885185, "grad_norm": 0.32018741965293884, "learning_rate": 0.00012392154823951442, "loss": 1.378, "step": 29283 }, { "epoch": 0.38053222403243436, "grad_norm": 0.4035203754901886, "learning_rate": 0.00012391894877760305, "loss": 1.3425, "step": 29284 }, { "epoch": 0.38054521857635026, "grad_norm": 0.42208942770957947, "learning_rate": 0.00012391634931569164, "loss": 1.4648, "step": 29285 }, { "epoch": 0.3805582131202661, "grad_norm": 0.49098819494247437, "learning_rate": 0.00012391374985378027, "loss": 1.3835, "step": 29286 }, { "epoch": 0.380571207664182, "grad_norm": 0.4460628628730774, "learning_rate": 0.0001239111503918689, "loss": 1.3597, "step": 29287 }, { "epoch": 0.38058420220809785, "grad_norm": 0.4968879222869873, "learning_rate": 0.0001239085509299575, "loss": 1.3622, "step": 29288 }, { "epoch": 0.38059719675201376, "grad_norm": 0.4973567724227905, "learning_rate": 0.00012390595146804612, "loss": 1.2452, "step": 29289 }, { "epoch": 0.3806101912959296, "grad_norm": 0.3727114498615265, "learning_rate": 0.00012390335200613474, "loss": 1.4074, "step": 29290 }, { "epoch": 0.3806231858398455, "grad_norm": 0.2838941514492035, "learning_rate": 0.00012390075254422336, "loss": 1.1543, "step": 29291 }, { "epoch": 0.38063618038376135, "grad_norm": 0.4578581154346466, "learning_rate": 0.00012389815308231196, "loss": 1.3251, "step": 29292 }, { "epoch": 0.38064917492767725, "grad_norm": 0.321943074464798, "learning_rate": 0.00012389555362040059, "loss": 1.3916, "step": 29293 }, { "epoch": 0.3806621694715931, "grad_norm": 0.4434391260147095, "learning_rate": 0.0001238929541584892, "loss": 1.4198, "step": 29294 }, { "epoch": 0.380675164015509, "grad_norm": 0.42371317744255066, "learning_rate": 0.0001238903546965778, "loss": 1.42, "step": 29295 }, { "epoch": 0.38068815855942484, "grad_norm": 0.40165191888809204, "learning_rate": 0.00012388775523466643, "loss": 1.3288, "step": 29296 }, { "epoch": 0.38070115310334074, "grad_norm": 0.390564888715744, "learning_rate": 0.00012388515577275503, "loss": 1.3019, "step": 29297 }, { "epoch": 0.3807141476472566, "grad_norm": 0.3627987205982208, "learning_rate": 0.00012388255631084365, "loss": 1.3453, "step": 29298 }, { "epoch": 0.3807271421911725, "grad_norm": 0.38638800382614136, "learning_rate": 0.00012387995684893228, "loss": 1.112, "step": 29299 }, { "epoch": 0.38074013673508833, "grad_norm": 0.4235222637653351, "learning_rate": 0.00012387735738702088, "loss": 1.2382, "step": 29300 }, { "epoch": 0.38075313127900423, "grad_norm": 0.4219317138195038, "learning_rate": 0.00012387475792510953, "loss": 1.5246, "step": 29301 }, { "epoch": 0.3807661258229201, "grad_norm": 0.36694619059562683, "learning_rate": 0.00012387215846319813, "loss": 1.3284, "step": 29302 }, { "epoch": 0.380779120366836, "grad_norm": 0.4686177372932434, "learning_rate": 0.00012386955900128675, "loss": 1.3858, "step": 29303 }, { "epoch": 0.3807921149107518, "grad_norm": 0.42843008041381836, "learning_rate": 0.00012386695953937535, "loss": 1.2314, "step": 29304 }, { "epoch": 0.3808051094546677, "grad_norm": 0.47080984711647034, "learning_rate": 0.00012386436007746397, "loss": 1.4952, "step": 29305 }, { "epoch": 0.38081810399858357, "grad_norm": 0.3955569863319397, "learning_rate": 0.0001238617606155526, "loss": 1.4162, "step": 29306 }, { "epoch": 0.38083109854249947, "grad_norm": 0.44336366653442383, "learning_rate": 0.0001238591611536412, "loss": 1.2713, "step": 29307 }, { "epoch": 0.3808440930864153, "grad_norm": 0.310170978307724, "learning_rate": 0.00012385656169172982, "loss": 1.1964, "step": 29308 }, { "epoch": 0.3808570876303312, "grad_norm": 0.4874942898750305, "learning_rate": 0.00012385396222981844, "loss": 1.5207, "step": 29309 }, { "epoch": 0.3808700821742471, "grad_norm": 0.3653622269630432, "learning_rate": 0.00012385136276790704, "loss": 1.4265, "step": 29310 }, { "epoch": 0.38088307671816296, "grad_norm": 0.44529467821121216, "learning_rate": 0.00012384876330599566, "loss": 1.3345, "step": 29311 }, { "epoch": 0.38089607126207886, "grad_norm": 0.42509302496910095, "learning_rate": 0.00012384616384408426, "loss": 1.2987, "step": 29312 }, { "epoch": 0.3809090658059947, "grad_norm": 0.47302448749542236, "learning_rate": 0.0001238435643821729, "loss": 1.3404, "step": 29313 }, { "epoch": 0.3809220603499106, "grad_norm": 0.4644275903701782, "learning_rate": 0.0001238409649202615, "loss": 1.4853, "step": 29314 }, { "epoch": 0.38093505489382645, "grad_norm": 0.3776567280292511, "learning_rate": 0.00012383836545835014, "loss": 1.4354, "step": 29315 }, { "epoch": 0.38094804943774235, "grad_norm": 0.37674030661582947, "learning_rate": 0.00012383576599643873, "loss": 1.2761, "step": 29316 }, { "epoch": 0.3809610439816582, "grad_norm": 0.45794782042503357, "learning_rate": 0.00012383316653452736, "loss": 1.4461, "step": 29317 }, { "epoch": 0.3809740385255741, "grad_norm": 0.39756396412849426, "learning_rate": 0.00012383056707261598, "loss": 1.3745, "step": 29318 }, { "epoch": 0.38098703306948994, "grad_norm": 0.4186994731426239, "learning_rate": 0.00012382796761070458, "loss": 1.508, "step": 29319 }, { "epoch": 0.38100002761340585, "grad_norm": 0.6823927760124207, "learning_rate": 0.0001238253681487932, "loss": 1.4673, "step": 29320 }, { "epoch": 0.3810130221573217, "grad_norm": 0.3511705696582794, "learning_rate": 0.00012382276868688183, "loss": 1.3597, "step": 29321 }, { "epoch": 0.3810260167012376, "grad_norm": 0.459009051322937, "learning_rate": 0.00012382016922497045, "loss": 1.5418, "step": 29322 }, { "epoch": 0.38103901124515344, "grad_norm": 0.24147070944309235, "learning_rate": 0.00012381756976305905, "loss": 1.2157, "step": 29323 }, { "epoch": 0.38105200578906934, "grad_norm": 0.27654290199279785, "learning_rate": 0.00012381497030114765, "loss": 1.2676, "step": 29324 }, { "epoch": 0.3810650003329852, "grad_norm": 0.3977309465408325, "learning_rate": 0.0001238123708392363, "loss": 1.5296, "step": 29325 }, { "epoch": 0.3810779948769011, "grad_norm": 0.4082179367542267, "learning_rate": 0.0001238097713773249, "loss": 1.2637, "step": 29326 }, { "epoch": 0.38109098942081693, "grad_norm": 0.35059911012649536, "learning_rate": 0.00012380717191541352, "loss": 1.5569, "step": 29327 }, { "epoch": 0.38110398396473283, "grad_norm": 0.365424782037735, "learning_rate": 0.00012380457245350212, "loss": 1.3473, "step": 29328 }, { "epoch": 0.3811169785086487, "grad_norm": 0.37108683586120605, "learning_rate": 0.00012380197299159074, "loss": 1.4112, "step": 29329 }, { "epoch": 0.3811299730525646, "grad_norm": 0.3869462311267853, "learning_rate": 0.00012379937352967937, "loss": 1.308, "step": 29330 }, { "epoch": 0.3811429675964804, "grad_norm": 0.4423421025276184, "learning_rate": 0.00012379677406776796, "loss": 1.5004, "step": 29331 }, { "epoch": 0.3811559621403963, "grad_norm": 0.3770103454589844, "learning_rate": 0.0001237941746058566, "loss": 1.3411, "step": 29332 }, { "epoch": 0.38116895668431217, "grad_norm": 0.3661179840564728, "learning_rate": 0.0001237915751439452, "loss": 1.3521, "step": 29333 }, { "epoch": 0.38118195122822807, "grad_norm": 0.29008179903030396, "learning_rate": 0.00012378897568203384, "loss": 1.3433, "step": 29334 }, { "epoch": 0.3811949457721439, "grad_norm": 0.3734094500541687, "learning_rate": 0.00012378637622012244, "loss": 1.4482, "step": 29335 }, { "epoch": 0.3812079403160598, "grad_norm": 0.43825024366378784, "learning_rate": 0.00012378377675821103, "loss": 1.4307, "step": 29336 }, { "epoch": 0.38122093485997566, "grad_norm": 0.3639650344848633, "learning_rate": 0.00012378117729629968, "loss": 1.515, "step": 29337 }, { "epoch": 0.38123392940389156, "grad_norm": 0.41644492745399475, "learning_rate": 0.00012377857783438828, "loss": 1.4221, "step": 29338 }, { "epoch": 0.3812469239478074, "grad_norm": 0.3838006854057312, "learning_rate": 0.0001237759783724769, "loss": 1.4273, "step": 29339 }, { "epoch": 0.3812599184917233, "grad_norm": 0.48059695959091187, "learning_rate": 0.00012377337891056553, "loss": 1.5367, "step": 29340 }, { "epoch": 0.38127291303563915, "grad_norm": 0.37707996368408203, "learning_rate": 0.00012377077944865413, "loss": 1.3081, "step": 29341 }, { "epoch": 0.38128590757955505, "grad_norm": 0.31958329677581787, "learning_rate": 0.00012376817998674275, "loss": 1.0924, "step": 29342 }, { "epoch": 0.3812989021234709, "grad_norm": 0.3857501447200775, "learning_rate": 0.00012376558052483135, "loss": 1.2971, "step": 29343 }, { "epoch": 0.3813118966673868, "grad_norm": 0.32319754362106323, "learning_rate": 0.00012376298106292, "loss": 1.2435, "step": 29344 }, { "epoch": 0.38132489121130264, "grad_norm": 0.472208172082901, "learning_rate": 0.0001237603816010086, "loss": 1.4088, "step": 29345 }, { "epoch": 0.38133788575521854, "grad_norm": 0.41490796208381653, "learning_rate": 0.00012375778213909722, "loss": 1.4036, "step": 29346 }, { "epoch": 0.3813508802991344, "grad_norm": 0.37932026386260986, "learning_rate": 0.00012375518267718582, "loss": 1.2381, "step": 29347 }, { "epoch": 0.3813638748430503, "grad_norm": 0.4356116056442261, "learning_rate": 0.00012375258321527445, "loss": 1.369, "step": 29348 }, { "epoch": 0.38137686938696613, "grad_norm": 0.43697962164878845, "learning_rate": 0.00012374998375336307, "loss": 1.4287, "step": 29349 }, { "epoch": 0.38138986393088203, "grad_norm": 0.24116088449954987, "learning_rate": 0.00012374738429145167, "loss": 1.1915, "step": 29350 }, { "epoch": 0.3814028584747979, "grad_norm": 0.45889362692832947, "learning_rate": 0.0001237447848295403, "loss": 1.2901, "step": 29351 }, { "epoch": 0.3814158530187138, "grad_norm": 0.4414624571800232, "learning_rate": 0.00012374218536762892, "loss": 1.457, "step": 29352 }, { "epoch": 0.3814288475626296, "grad_norm": 0.4113902449607849, "learning_rate": 0.0001237395859057175, "loss": 1.3579, "step": 29353 }, { "epoch": 0.3814418421065455, "grad_norm": 0.3689514994621277, "learning_rate": 0.00012373698644380614, "loss": 1.5436, "step": 29354 }, { "epoch": 0.38145483665046137, "grad_norm": 0.5008370876312256, "learning_rate": 0.00012373438698189474, "loss": 1.4787, "step": 29355 }, { "epoch": 0.38146783119437727, "grad_norm": 0.29000452160835266, "learning_rate": 0.0001237317875199834, "loss": 1.4038, "step": 29356 }, { "epoch": 0.3814808257382931, "grad_norm": 0.3488605320453644, "learning_rate": 0.00012372918805807198, "loss": 1.3101, "step": 29357 }, { "epoch": 0.381493820282209, "grad_norm": 0.4578481912612915, "learning_rate": 0.0001237265885961606, "loss": 1.4426, "step": 29358 }, { "epoch": 0.38150681482612486, "grad_norm": 0.437642902135849, "learning_rate": 0.0001237239891342492, "loss": 1.339, "step": 29359 }, { "epoch": 0.38151980937004076, "grad_norm": 0.43414634466171265, "learning_rate": 0.00012372138967233783, "loss": 1.5506, "step": 29360 }, { "epoch": 0.3815328039139566, "grad_norm": 0.3625759482383728, "learning_rate": 0.00012371879021042646, "loss": 1.3672, "step": 29361 }, { "epoch": 0.3815457984578725, "grad_norm": 0.4236309230327606, "learning_rate": 0.00012371619074851505, "loss": 1.46, "step": 29362 }, { "epoch": 0.38155879300178835, "grad_norm": 0.3547542691230774, "learning_rate": 0.00012371359128660368, "loss": 1.3847, "step": 29363 }, { "epoch": 0.38157178754570426, "grad_norm": 0.41549909114837646, "learning_rate": 0.0001237109918246923, "loss": 1.4217, "step": 29364 }, { "epoch": 0.3815847820896201, "grad_norm": 0.26602962613105774, "learning_rate": 0.0001237083923627809, "loss": 1.2376, "step": 29365 }, { "epoch": 0.381597776633536, "grad_norm": 0.49340665340423584, "learning_rate": 0.00012370579290086952, "loss": 1.3961, "step": 29366 }, { "epoch": 0.38161077117745185, "grad_norm": 0.4609818160533905, "learning_rate": 0.00012370319343895812, "loss": 1.5643, "step": 29367 }, { "epoch": 0.38162376572136775, "grad_norm": 0.3672391176223755, "learning_rate": 0.00012370059397704677, "loss": 1.3276, "step": 29368 }, { "epoch": 0.3816367602652836, "grad_norm": 0.37062928080558777, "learning_rate": 0.00012369799451513537, "loss": 1.4721, "step": 29369 }, { "epoch": 0.3816497548091995, "grad_norm": 0.40355733036994934, "learning_rate": 0.000123695395053224, "loss": 1.3654, "step": 29370 }, { "epoch": 0.38166274935311534, "grad_norm": 0.39091750979423523, "learning_rate": 0.0001236927955913126, "loss": 1.5978, "step": 29371 }, { "epoch": 0.38167574389703124, "grad_norm": 0.5253551602363586, "learning_rate": 0.00012369019612940122, "loss": 1.5154, "step": 29372 }, { "epoch": 0.3816887384409471, "grad_norm": 0.34352824091911316, "learning_rate": 0.00012368759666748984, "loss": 1.4004, "step": 29373 }, { "epoch": 0.381701732984863, "grad_norm": 0.40059515833854675, "learning_rate": 0.00012368499720557844, "loss": 1.1791, "step": 29374 }, { "epoch": 0.38171472752877883, "grad_norm": 0.4644610583782196, "learning_rate": 0.0001236823977436671, "loss": 1.3207, "step": 29375 }, { "epoch": 0.38172772207269473, "grad_norm": 0.4177878797054291, "learning_rate": 0.0001236797982817557, "loss": 1.4119, "step": 29376 }, { "epoch": 0.3817407166166106, "grad_norm": 0.31053271889686584, "learning_rate": 0.00012367719881984428, "loss": 1.2872, "step": 29377 }, { "epoch": 0.3817537111605265, "grad_norm": 0.4227723181247711, "learning_rate": 0.0001236745993579329, "loss": 1.5247, "step": 29378 }, { "epoch": 0.3817667057044423, "grad_norm": 0.4705777168273926, "learning_rate": 0.00012367199989602153, "loss": 1.334, "step": 29379 }, { "epoch": 0.3817797002483582, "grad_norm": 0.2954559028148651, "learning_rate": 0.00012366940043411016, "loss": 1.1607, "step": 29380 }, { "epoch": 0.38179269479227407, "grad_norm": 0.35180020332336426, "learning_rate": 0.00012366680097219875, "loss": 1.3692, "step": 29381 }, { "epoch": 0.38180568933618997, "grad_norm": 0.34350132942199707, "learning_rate": 0.00012366420151028738, "loss": 1.1326, "step": 29382 }, { "epoch": 0.3818186838801058, "grad_norm": 0.4459732472896576, "learning_rate": 0.000123661602048376, "loss": 1.3347, "step": 29383 }, { "epoch": 0.3818316784240217, "grad_norm": 0.3138584494590759, "learning_rate": 0.0001236590025864646, "loss": 1.4915, "step": 29384 }, { "epoch": 0.38184467296793756, "grad_norm": 0.34164419770240784, "learning_rate": 0.00012365640312455323, "loss": 1.5184, "step": 29385 }, { "epoch": 0.38185766751185346, "grad_norm": 0.4256591200828552, "learning_rate": 0.00012365380366264182, "loss": 1.2372, "step": 29386 }, { "epoch": 0.38187066205576936, "grad_norm": 0.3848235309123993, "learning_rate": 0.00012365120420073047, "loss": 1.3494, "step": 29387 }, { "epoch": 0.3818836565996852, "grad_norm": 0.4413069188594818, "learning_rate": 0.00012364860473881907, "loss": 1.5947, "step": 29388 }, { "epoch": 0.3818966511436011, "grad_norm": 0.36427780985832214, "learning_rate": 0.0001236460052769077, "loss": 1.3516, "step": 29389 }, { "epoch": 0.38190964568751695, "grad_norm": 0.4441884756088257, "learning_rate": 0.0001236434058149963, "loss": 1.3335, "step": 29390 }, { "epoch": 0.38192264023143285, "grad_norm": 0.3923276364803314, "learning_rate": 0.00012364080635308492, "loss": 1.4158, "step": 29391 }, { "epoch": 0.3819356347753487, "grad_norm": 0.33791735768318176, "learning_rate": 0.00012363820689117354, "loss": 1.2208, "step": 29392 }, { "epoch": 0.3819486293192646, "grad_norm": 0.46055054664611816, "learning_rate": 0.00012363560742926214, "loss": 1.6191, "step": 29393 }, { "epoch": 0.38196162386318044, "grad_norm": 0.5053493976593018, "learning_rate": 0.00012363300796735076, "loss": 1.3984, "step": 29394 }, { "epoch": 0.38197461840709634, "grad_norm": 0.38026535511016846, "learning_rate": 0.0001236304085054394, "loss": 1.4039, "step": 29395 }, { "epoch": 0.3819876129510122, "grad_norm": 0.322826623916626, "learning_rate": 0.000123627809043528, "loss": 1.3155, "step": 29396 }, { "epoch": 0.3820006074949281, "grad_norm": 0.4060744047164917, "learning_rate": 0.0001236252095816166, "loss": 1.4254, "step": 29397 }, { "epoch": 0.38201360203884394, "grad_norm": 0.4446217715740204, "learning_rate": 0.0001236226101197052, "loss": 1.4378, "step": 29398 }, { "epoch": 0.38202659658275984, "grad_norm": 0.4494479298591614, "learning_rate": 0.00012362001065779386, "loss": 1.2752, "step": 29399 }, { "epoch": 0.3820395911266757, "grad_norm": 0.30591675639152527, "learning_rate": 0.00012361741119588246, "loss": 1.5168, "step": 29400 }, { "epoch": 0.3820525856705916, "grad_norm": 0.3413150906562805, "learning_rate": 0.00012361481173397108, "loss": 1.6, "step": 29401 }, { "epoch": 0.3820655802145074, "grad_norm": 0.4443274140357971, "learning_rate": 0.00012361221227205968, "loss": 1.4589, "step": 29402 }, { "epoch": 0.38207857475842333, "grad_norm": 0.37355321645736694, "learning_rate": 0.0001236096128101483, "loss": 1.4099, "step": 29403 }, { "epoch": 0.3820915693023392, "grad_norm": 0.38128727674484253, "learning_rate": 0.00012360701334823693, "loss": 1.4049, "step": 29404 }, { "epoch": 0.3821045638462551, "grad_norm": 0.4323970079421997, "learning_rate": 0.00012360441388632553, "loss": 1.4708, "step": 29405 }, { "epoch": 0.3821175583901709, "grad_norm": 0.4144299626350403, "learning_rate": 0.00012360181442441415, "loss": 1.4166, "step": 29406 }, { "epoch": 0.3821305529340868, "grad_norm": 0.4436207115650177, "learning_rate": 0.00012359921496250277, "loss": 1.4823, "step": 29407 }, { "epoch": 0.38214354747800267, "grad_norm": 0.39162495732307434, "learning_rate": 0.00012359661550059137, "loss": 1.5734, "step": 29408 }, { "epoch": 0.38215654202191857, "grad_norm": 0.4349675178527832, "learning_rate": 0.00012359401603868, "loss": 1.4324, "step": 29409 }, { "epoch": 0.3821695365658344, "grad_norm": 0.3897383511066437, "learning_rate": 0.0001235914165767686, "loss": 1.5939, "step": 29410 }, { "epoch": 0.3821825311097503, "grad_norm": 0.36471816897392273, "learning_rate": 0.00012358881711485725, "loss": 1.2838, "step": 29411 }, { "epoch": 0.38219552565366616, "grad_norm": 0.4042739272117615, "learning_rate": 0.00012358621765294584, "loss": 1.3122, "step": 29412 }, { "epoch": 0.38220852019758206, "grad_norm": 0.42443063855171204, "learning_rate": 0.00012358361819103447, "loss": 1.596, "step": 29413 }, { "epoch": 0.3822215147414979, "grad_norm": 0.384961873292923, "learning_rate": 0.0001235810187291231, "loss": 1.3624, "step": 29414 }, { "epoch": 0.3822345092854138, "grad_norm": 0.37326520681381226, "learning_rate": 0.0001235784192672117, "loss": 1.2551, "step": 29415 }, { "epoch": 0.38224750382932965, "grad_norm": 0.48011258244514465, "learning_rate": 0.00012357581980530031, "loss": 1.473, "step": 29416 }, { "epoch": 0.38226049837324555, "grad_norm": 0.3795984089374542, "learning_rate": 0.0001235732203433889, "loss": 1.4254, "step": 29417 }, { "epoch": 0.3822734929171614, "grad_norm": 0.4432600140571594, "learning_rate": 0.00012357062088147756, "loss": 1.4637, "step": 29418 }, { "epoch": 0.3822864874610773, "grad_norm": 0.33412620425224304, "learning_rate": 0.00012356802141956616, "loss": 1.6703, "step": 29419 }, { "epoch": 0.38229948200499314, "grad_norm": 0.44140705466270447, "learning_rate": 0.00012356542195765476, "loss": 1.5791, "step": 29420 }, { "epoch": 0.38231247654890904, "grad_norm": 0.38218456506729126, "learning_rate": 0.00012356282249574338, "loss": 1.4292, "step": 29421 }, { "epoch": 0.3823254710928249, "grad_norm": 0.47046366333961487, "learning_rate": 0.000123560223033832, "loss": 1.4469, "step": 29422 }, { "epoch": 0.3823384656367408, "grad_norm": 0.36526861786842346, "learning_rate": 0.00012355762357192063, "loss": 1.3608, "step": 29423 }, { "epoch": 0.38235146018065663, "grad_norm": 0.3890009820461273, "learning_rate": 0.00012355502411000923, "loss": 1.4135, "step": 29424 }, { "epoch": 0.38236445472457253, "grad_norm": 0.37007662653923035, "learning_rate": 0.00012355242464809785, "loss": 1.1086, "step": 29425 }, { "epoch": 0.3823774492684884, "grad_norm": 0.39993101358413696, "learning_rate": 0.00012354982518618648, "loss": 1.3535, "step": 29426 }, { "epoch": 0.3823904438124043, "grad_norm": 0.37410008907318115, "learning_rate": 0.00012354722572427507, "loss": 1.4623, "step": 29427 }, { "epoch": 0.3824034383563201, "grad_norm": 0.3292379379272461, "learning_rate": 0.0001235446262623637, "loss": 1.3055, "step": 29428 }, { "epoch": 0.382416432900236, "grad_norm": 0.3753480613231659, "learning_rate": 0.0001235420268004523, "loss": 1.2823, "step": 29429 }, { "epoch": 0.38242942744415187, "grad_norm": 0.38356128334999084, "learning_rate": 0.00012353942733854095, "loss": 1.4063, "step": 29430 }, { "epoch": 0.38244242198806777, "grad_norm": 0.4168642461299896, "learning_rate": 0.00012353682787662955, "loss": 1.2351, "step": 29431 }, { "epoch": 0.3824554165319836, "grad_norm": 0.42736533284187317, "learning_rate": 0.00012353422841471814, "loss": 1.5735, "step": 29432 }, { "epoch": 0.3824684110758995, "grad_norm": 0.3064168095588684, "learning_rate": 0.00012353162895280677, "loss": 1.4054, "step": 29433 }, { "epoch": 0.38248140561981536, "grad_norm": 0.4678229093551636, "learning_rate": 0.0001235290294908954, "loss": 1.5785, "step": 29434 }, { "epoch": 0.38249440016373126, "grad_norm": 0.3700430989265442, "learning_rate": 0.00012352643002898402, "loss": 1.3764, "step": 29435 }, { "epoch": 0.3825073947076471, "grad_norm": 0.42482495307922363, "learning_rate": 0.00012352383056707261, "loss": 1.5867, "step": 29436 }, { "epoch": 0.382520389251563, "grad_norm": 0.42966827750205994, "learning_rate": 0.00012352123110516124, "loss": 1.5272, "step": 29437 }, { "epoch": 0.38253338379547885, "grad_norm": 0.39197731018066406, "learning_rate": 0.00012351863164324986, "loss": 1.4112, "step": 29438 }, { "epoch": 0.38254637833939475, "grad_norm": 0.4871208369731903, "learning_rate": 0.00012351603218133846, "loss": 1.3997, "step": 29439 }, { "epoch": 0.3825593728833106, "grad_norm": 0.38346433639526367, "learning_rate": 0.00012351343271942708, "loss": 1.3051, "step": 29440 }, { "epoch": 0.3825723674272265, "grad_norm": 0.28971898555755615, "learning_rate": 0.00012351083325751568, "loss": 1.3486, "step": 29441 }, { "epoch": 0.38258536197114235, "grad_norm": 0.4238617420196533, "learning_rate": 0.00012350823379560433, "loss": 1.3721, "step": 29442 }, { "epoch": 0.38259835651505825, "grad_norm": 0.4442681670188904, "learning_rate": 0.00012350563433369293, "loss": 1.4932, "step": 29443 }, { "epoch": 0.3826113510589741, "grad_norm": 0.3989102244377136, "learning_rate": 0.00012350303487178156, "loss": 1.4284, "step": 29444 }, { "epoch": 0.38262434560289, "grad_norm": 0.3324430286884308, "learning_rate": 0.00012350043540987015, "loss": 1.4139, "step": 29445 }, { "epoch": 0.38263734014680584, "grad_norm": 0.39921894669532776, "learning_rate": 0.00012349783594795878, "loss": 1.4239, "step": 29446 }, { "epoch": 0.38265033469072174, "grad_norm": 0.4210700988769531, "learning_rate": 0.0001234952364860474, "loss": 1.4715, "step": 29447 }, { "epoch": 0.3826633292346376, "grad_norm": 0.40407630801200867, "learning_rate": 0.000123492637024136, "loss": 1.3505, "step": 29448 }, { "epoch": 0.3826763237785535, "grad_norm": 0.4380398988723755, "learning_rate": 0.00012349003756222462, "loss": 1.5281, "step": 29449 }, { "epoch": 0.38268931832246933, "grad_norm": 0.3914535641670227, "learning_rate": 0.00012348743810031325, "loss": 1.5056, "step": 29450 }, { "epoch": 0.38270231286638523, "grad_norm": 0.35153651237487793, "learning_rate": 0.00012348483863840185, "loss": 1.4343, "step": 29451 }, { "epoch": 0.3827153074103011, "grad_norm": 0.44689682126045227, "learning_rate": 0.00012348223917649047, "loss": 1.3514, "step": 29452 }, { "epoch": 0.382728301954217, "grad_norm": 0.39732825756073, "learning_rate": 0.0001234796397145791, "loss": 1.4462, "step": 29453 }, { "epoch": 0.3827412964981328, "grad_norm": 0.3514556586742401, "learning_rate": 0.00012347704025266772, "loss": 1.503, "step": 29454 }, { "epoch": 0.3827542910420487, "grad_norm": 0.30261436104774475, "learning_rate": 0.00012347444079075632, "loss": 1.1625, "step": 29455 }, { "epoch": 0.38276728558596457, "grad_norm": 0.29890885949134827, "learning_rate": 0.00012347184132884494, "loss": 1.2394, "step": 29456 }, { "epoch": 0.38278028012988047, "grad_norm": 0.3462114930152893, "learning_rate": 0.00012346924186693357, "loss": 1.2561, "step": 29457 }, { "epoch": 0.3827932746737963, "grad_norm": 0.35676610469818115, "learning_rate": 0.00012346664240502216, "loss": 1.4892, "step": 29458 }, { "epoch": 0.3828062692177122, "grad_norm": 0.45956483483314514, "learning_rate": 0.0001234640429431108, "loss": 1.3537, "step": 29459 }, { "epoch": 0.38281926376162806, "grad_norm": 0.5853527784347534, "learning_rate": 0.00012346144348119938, "loss": 1.4982, "step": 29460 }, { "epoch": 0.38283225830554396, "grad_norm": 0.4398396909236908, "learning_rate": 0.000123458844019288, "loss": 1.3552, "step": 29461 }, { "epoch": 0.38284525284945986, "grad_norm": 0.3814709782600403, "learning_rate": 0.00012345624455737663, "loss": 1.3995, "step": 29462 }, { "epoch": 0.3828582473933757, "grad_norm": 0.49610692262649536, "learning_rate": 0.00012345364509546523, "loss": 1.5677, "step": 29463 }, { "epoch": 0.3828712419372916, "grad_norm": 0.4440975785255432, "learning_rate": 0.00012345104563355386, "loss": 1.4149, "step": 29464 }, { "epoch": 0.38288423648120745, "grad_norm": 0.39127984642982483, "learning_rate": 0.00012344844617164248, "loss": 1.3244, "step": 29465 }, { "epoch": 0.38289723102512335, "grad_norm": 0.4027152955532074, "learning_rate": 0.0001234458467097311, "loss": 1.294, "step": 29466 }, { "epoch": 0.3829102255690392, "grad_norm": 0.42077744007110596, "learning_rate": 0.0001234432472478197, "loss": 1.4829, "step": 29467 }, { "epoch": 0.3829232201129551, "grad_norm": 0.3501160442829132, "learning_rate": 0.00012344064778590833, "loss": 1.3497, "step": 29468 }, { "epoch": 0.38293621465687094, "grad_norm": 0.427538126707077, "learning_rate": 0.00012343804832399695, "loss": 1.387, "step": 29469 }, { "epoch": 0.38294920920078684, "grad_norm": 0.3432125449180603, "learning_rate": 0.00012343544886208555, "loss": 1.4066, "step": 29470 }, { "epoch": 0.3829622037447027, "grad_norm": 0.30831846594810486, "learning_rate": 0.00012343284940017417, "loss": 1.2973, "step": 29471 }, { "epoch": 0.3829751982886186, "grad_norm": 0.4371612071990967, "learning_rate": 0.00012343024993826277, "loss": 1.529, "step": 29472 }, { "epoch": 0.38298819283253444, "grad_norm": 0.44449731707572937, "learning_rate": 0.00012342765047635142, "loss": 1.4206, "step": 29473 }, { "epoch": 0.38300118737645034, "grad_norm": 0.3294375538825989, "learning_rate": 0.00012342505101444002, "loss": 1.2176, "step": 29474 }, { "epoch": 0.3830141819203662, "grad_norm": 0.43963056802749634, "learning_rate": 0.00012342245155252862, "loss": 1.3801, "step": 29475 }, { "epoch": 0.3830271764642821, "grad_norm": 0.3999042212963104, "learning_rate": 0.00012341985209061724, "loss": 1.366, "step": 29476 }, { "epoch": 0.3830401710081979, "grad_norm": 0.33256688714027405, "learning_rate": 0.00012341725262870587, "loss": 1.2743, "step": 29477 }, { "epoch": 0.38305316555211383, "grad_norm": 0.3898864686489105, "learning_rate": 0.0001234146531667945, "loss": 1.5186, "step": 29478 }, { "epoch": 0.3830661600960297, "grad_norm": 0.45742136240005493, "learning_rate": 0.0001234120537048831, "loss": 1.4502, "step": 29479 }, { "epoch": 0.3830791546399456, "grad_norm": 0.4187413156032562, "learning_rate": 0.0001234094542429717, "loss": 1.4334, "step": 29480 }, { "epoch": 0.3830921491838614, "grad_norm": 0.3867330253124237, "learning_rate": 0.00012340685478106034, "loss": 1.5103, "step": 29481 }, { "epoch": 0.3831051437277773, "grad_norm": 0.2904622554779053, "learning_rate": 0.00012340425531914893, "loss": 0.9805, "step": 29482 }, { "epoch": 0.38311813827169316, "grad_norm": 0.4402660131454468, "learning_rate": 0.00012340165585723756, "loss": 1.4872, "step": 29483 }, { "epoch": 0.38313113281560907, "grad_norm": 0.4186619222164154, "learning_rate": 0.00012339905639532616, "loss": 1.4515, "step": 29484 }, { "epoch": 0.3831441273595249, "grad_norm": 0.3732668459415436, "learning_rate": 0.0001233964569334148, "loss": 1.5705, "step": 29485 }, { "epoch": 0.3831571219034408, "grad_norm": 0.4282355010509491, "learning_rate": 0.0001233938574715034, "loss": 1.4904, "step": 29486 }, { "epoch": 0.38317011644735666, "grad_norm": 0.36313551664352417, "learning_rate": 0.000123391258009592, "loss": 1.4112, "step": 29487 }, { "epoch": 0.38318311099127256, "grad_norm": 0.44452571868896484, "learning_rate": 0.00012338865854768065, "loss": 1.4113, "step": 29488 }, { "epoch": 0.3831961055351884, "grad_norm": 0.4374120235443115, "learning_rate": 0.00012338605908576925, "loss": 1.4564, "step": 29489 }, { "epoch": 0.3832091000791043, "grad_norm": 0.44149017333984375, "learning_rate": 0.00012338345962385788, "loss": 1.5246, "step": 29490 }, { "epoch": 0.38322209462302015, "grad_norm": 0.27829477190971375, "learning_rate": 0.00012338086016194647, "loss": 1.22, "step": 29491 }, { "epoch": 0.38323508916693605, "grad_norm": 0.3236914575099945, "learning_rate": 0.0001233782607000351, "loss": 1.299, "step": 29492 }, { "epoch": 0.3832480837108519, "grad_norm": 0.49193939566612244, "learning_rate": 0.00012337566123812372, "loss": 1.5156, "step": 29493 }, { "epoch": 0.3832610782547678, "grad_norm": 0.37566548585891724, "learning_rate": 0.00012337306177621232, "loss": 1.4679, "step": 29494 }, { "epoch": 0.38327407279868364, "grad_norm": 0.46173906326293945, "learning_rate": 0.00012337046231430094, "loss": 1.4902, "step": 29495 }, { "epoch": 0.38328706734259954, "grad_norm": 0.44643354415893555, "learning_rate": 0.00012336786285238957, "loss": 1.3632, "step": 29496 }, { "epoch": 0.3833000618865154, "grad_norm": 0.395754337310791, "learning_rate": 0.0001233652633904782, "loss": 1.5402, "step": 29497 }, { "epoch": 0.3833130564304313, "grad_norm": 0.42827412486076355, "learning_rate": 0.0001233626639285668, "loss": 1.3144, "step": 29498 }, { "epoch": 0.38332605097434713, "grad_norm": 0.5095736980438232, "learning_rate": 0.00012336006446665541, "loss": 1.2139, "step": 29499 }, { "epoch": 0.38333904551826303, "grad_norm": 0.406919926404953, "learning_rate": 0.00012335746500474404, "loss": 1.4349, "step": 29500 }, { "epoch": 0.3833520400621789, "grad_norm": 0.31829166412353516, "learning_rate": 0.00012335486554283264, "loss": 1.3305, "step": 29501 }, { "epoch": 0.3833650346060948, "grad_norm": 0.3732173442840576, "learning_rate": 0.00012335226608092126, "loss": 1.5454, "step": 29502 }, { "epoch": 0.3833780291500106, "grad_norm": 0.513748049736023, "learning_rate": 0.00012334966661900986, "loss": 1.3684, "step": 29503 }, { "epoch": 0.3833910236939265, "grad_norm": 0.34807541966438293, "learning_rate": 0.00012334706715709848, "loss": 1.336, "step": 29504 }, { "epoch": 0.38340401823784237, "grad_norm": 0.37416428327560425, "learning_rate": 0.0001233444676951871, "loss": 1.4171, "step": 29505 }, { "epoch": 0.38341701278175827, "grad_norm": 0.4538436532020569, "learning_rate": 0.0001233418682332757, "loss": 1.3917, "step": 29506 }, { "epoch": 0.3834300073256741, "grad_norm": 0.3606007993221283, "learning_rate": 0.00012333926877136433, "loss": 1.6012, "step": 29507 }, { "epoch": 0.38344300186959, "grad_norm": 0.47051510214805603, "learning_rate": 0.00012333666930945295, "loss": 1.519, "step": 29508 }, { "epoch": 0.38345599641350586, "grad_norm": 0.3099076747894287, "learning_rate": 0.00012333406984754158, "loss": 1.3667, "step": 29509 }, { "epoch": 0.38346899095742176, "grad_norm": 0.32664576172828674, "learning_rate": 0.00012333147038563018, "loss": 1.5163, "step": 29510 }, { "epoch": 0.3834819855013376, "grad_norm": 0.446612685918808, "learning_rate": 0.0001233288709237188, "loss": 1.4837, "step": 29511 }, { "epoch": 0.3834949800452535, "grad_norm": 0.3849194645881653, "learning_rate": 0.00012332627146180742, "loss": 1.3815, "step": 29512 }, { "epoch": 0.38350797458916935, "grad_norm": 0.4123111069202423, "learning_rate": 0.00012332367199989602, "loss": 1.4184, "step": 29513 }, { "epoch": 0.38352096913308525, "grad_norm": 0.40780967473983765, "learning_rate": 0.00012332107253798465, "loss": 1.3085, "step": 29514 }, { "epoch": 0.3835339636770011, "grad_norm": 0.34047502279281616, "learning_rate": 0.00012331847307607324, "loss": 1.3118, "step": 29515 }, { "epoch": 0.383546958220917, "grad_norm": 0.4311307370662689, "learning_rate": 0.00012331587361416187, "loss": 1.2915, "step": 29516 }, { "epoch": 0.38355995276483285, "grad_norm": 0.3975888192653656, "learning_rate": 0.0001233132741522505, "loss": 1.3128, "step": 29517 }, { "epoch": 0.38357294730874875, "grad_norm": 0.4133479595184326, "learning_rate": 0.0001233106746903391, "loss": 1.2339, "step": 29518 }, { "epoch": 0.3835859418526646, "grad_norm": 0.3120031952857971, "learning_rate": 0.00012330807522842771, "loss": 1.3423, "step": 29519 }, { "epoch": 0.3835989363965805, "grad_norm": 0.4945957660675049, "learning_rate": 0.00012330547576651634, "loss": 1.4794, "step": 29520 }, { "epoch": 0.38361193094049634, "grad_norm": 0.40636706352233887, "learning_rate": 0.00012330287630460496, "loss": 1.3666, "step": 29521 }, { "epoch": 0.38362492548441224, "grad_norm": 0.3585086762905121, "learning_rate": 0.00012330027684269356, "loss": 1.4693, "step": 29522 }, { "epoch": 0.3836379200283281, "grad_norm": 0.45814022421836853, "learning_rate": 0.00012329767738078218, "loss": 1.3958, "step": 29523 }, { "epoch": 0.383650914572244, "grad_norm": 0.37405315041542053, "learning_rate": 0.0001232950779188708, "loss": 1.3487, "step": 29524 }, { "epoch": 0.38366390911615983, "grad_norm": 0.3508339822292328, "learning_rate": 0.0001232924784569594, "loss": 1.3029, "step": 29525 }, { "epoch": 0.38367690366007573, "grad_norm": 0.5569481253623962, "learning_rate": 0.00012328987899504803, "loss": 1.3542, "step": 29526 }, { "epoch": 0.3836898982039916, "grad_norm": 0.43022823333740234, "learning_rate": 0.00012328727953313666, "loss": 1.4787, "step": 29527 }, { "epoch": 0.3837028927479075, "grad_norm": 0.45691606402397156, "learning_rate": 0.00012328468007122528, "loss": 1.5234, "step": 29528 }, { "epoch": 0.3837158872918233, "grad_norm": 0.444759339094162, "learning_rate": 0.00012328208060931388, "loss": 1.3691, "step": 29529 }, { "epoch": 0.3837288818357392, "grad_norm": 0.4338513910770416, "learning_rate": 0.00012327948114740247, "loss": 1.3558, "step": 29530 }, { "epoch": 0.38374187637965507, "grad_norm": 0.44795432686805725, "learning_rate": 0.00012327688168549113, "loss": 1.5349, "step": 29531 }, { "epoch": 0.38375487092357097, "grad_norm": 0.5515525937080383, "learning_rate": 0.00012327428222357972, "loss": 1.4683, "step": 29532 }, { "epoch": 0.3837678654674868, "grad_norm": 0.3268814980983734, "learning_rate": 0.00012327168276166835, "loss": 1.5168, "step": 29533 }, { "epoch": 0.3837808600114027, "grad_norm": 0.32242539525032043, "learning_rate": 0.00012326908329975695, "loss": 1.4235, "step": 29534 }, { "epoch": 0.38379385455531856, "grad_norm": 0.35423004627227783, "learning_rate": 0.00012326648383784557, "loss": 1.4055, "step": 29535 }, { "epoch": 0.38380684909923446, "grad_norm": 0.3132795989513397, "learning_rate": 0.0001232638843759342, "loss": 1.2203, "step": 29536 }, { "epoch": 0.3838198436431503, "grad_norm": 0.2962224781513214, "learning_rate": 0.0001232612849140228, "loss": 1.3078, "step": 29537 }, { "epoch": 0.3838328381870662, "grad_norm": 0.4085319936275482, "learning_rate": 0.00012325868545211142, "loss": 1.553, "step": 29538 }, { "epoch": 0.3838458327309821, "grad_norm": 0.4380180239677429, "learning_rate": 0.00012325608599020004, "loss": 1.3859, "step": 29539 }, { "epoch": 0.38385882727489795, "grad_norm": 0.34929758310317993, "learning_rate": 0.00012325348652828867, "loss": 1.3987, "step": 29540 }, { "epoch": 0.38387182181881385, "grad_norm": 0.430632621049881, "learning_rate": 0.00012325088706637726, "loss": 1.3116, "step": 29541 }, { "epoch": 0.3838848163627297, "grad_norm": 0.41115549206733704, "learning_rate": 0.00012324828760446586, "loss": 1.475, "step": 29542 }, { "epoch": 0.3838978109066456, "grad_norm": 0.4312101900577545, "learning_rate": 0.0001232456881425545, "loss": 1.3531, "step": 29543 }, { "epoch": 0.38391080545056144, "grad_norm": 0.423052042722702, "learning_rate": 0.0001232430886806431, "loss": 1.5625, "step": 29544 }, { "epoch": 0.38392379999447734, "grad_norm": 0.39454153180122375, "learning_rate": 0.00012324048921873173, "loss": 1.4679, "step": 29545 }, { "epoch": 0.3839367945383932, "grad_norm": 0.4111841917037964, "learning_rate": 0.00012323788975682033, "loss": 1.4577, "step": 29546 }, { "epoch": 0.3839497890823091, "grad_norm": 0.3759179711341858, "learning_rate": 0.00012323529029490896, "loss": 1.4753, "step": 29547 }, { "epoch": 0.38396278362622493, "grad_norm": 0.3814232349395752, "learning_rate": 0.00012323269083299758, "loss": 1.2555, "step": 29548 }, { "epoch": 0.38397577817014084, "grad_norm": 0.41831669211387634, "learning_rate": 0.00012323009137108618, "loss": 1.6321, "step": 29549 }, { "epoch": 0.3839887727140567, "grad_norm": 0.34405896067619324, "learning_rate": 0.0001232274919091748, "loss": 1.381, "step": 29550 }, { "epoch": 0.3840017672579726, "grad_norm": 0.3482199013233185, "learning_rate": 0.00012322489244726343, "loss": 1.4558, "step": 29551 }, { "epoch": 0.3840147618018884, "grad_norm": 0.47335493564605713, "learning_rate": 0.00012322229298535205, "loss": 1.4289, "step": 29552 }, { "epoch": 0.3840277563458043, "grad_norm": 0.4393950402736664, "learning_rate": 0.00012321969352344065, "loss": 1.4487, "step": 29553 }, { "epoch": 0.3840407508897202, "grad_norm": 0.2793898284435272, "learning_rate": 0.00012321709406152925, "loss": 1.3675, "step": 29554 }, { "epoch": 0.3840537454336361, "grad_norm": 0.3861125409603119, "learning_rate": 0.0001232144945996179, "loss": 1.1869, "step": 29555 }, { "epoch": 0.3840667399775519, "grad_norm": 0.30514398217201233, "learning_rate": 0.0001232118951377065, "loss": 1.1335, "step": 29556 }, { "epoch": 0.3840797345214678, "grad_norm": 0.3920758068561554, "learning_rate": 0.00012320929567579512, "loss": 1.2021, "step": 29557 }, { "epoch": 0.38409272906538366, "grad_norm": 0.29421690106391907, "learning_rate": 0.00012320669621388372, "loss": 1.3537, "step": 29558 }, { "epoch": 0.38410572360929957, "grad_norm": 0.39906632900238037, "learning_rate": 0.00012320409675197234, "loss": 1.5168, "step": 29559 }, { "epoch": 0.3841187181532154, "grad_norm": 0.36401134729385376, "learning_rate": 0.00012320149729006097, "loss": 1.492, "step": 29560 }, { "epoch": 0.3841317126971313, "grad_norm": 0.41493383049964905, "learning_rate": 0.00012319889782814956, "loss": 1.417, "step": 29561 }, { "epoch": 0.38414470724104716, "grad_norm": 0.38168710470199585, "learning_rate": 0.00012319629836623821, "loss": 1.4164, "step": 29562 }, { "epoch": 0.38415770178496306, "grad_norm": 0.4548499584197998, "learning_rate": 0.0001231936989043268, "loss": 1.4051, "step": 29563 }, { "epoch": 0.3841706963288789, "grad_norm": 0.4543074667453766, "learning_rate": 0.00012319109944241544, "loss": 1.5098, "step": 29564 }, { "epoch": 0.3841836908727948, "grad_norm": 0.32889440655708313, "learning_rate": 0.00012318849998050403, "loss": 1.2761, "step": 29565 }, { "epoch": 0.38419668541671065, "grad_norm": 0.6412518620491028, "learning_rate": 0.00012318590051859266, "loss": 1.471, "step": 29566 }, { "epoch": 0.38420967996062655, "grad_norm": 0.4148516058921814, "learning_rate": 0.00012318330105668128, "loss": 1.6807, "step": 29567 }, { "epoch": 0.3842226745045424, "grad_norm": 0.38719743490219116, "learning_rate": 0.00012318070159476988, "loss": 1.2277, "step": 29568 }, { "epoch": 0.3842356690484583, "grad_norm": 0.4391591548919678, "learning_rate": 0.0001231781021328585, "loss": 1.6889, "step": 29569 }, { "epoch": 0.38424866359237414, "grad_norm": 0.3942606449127197, "learning_rate": 0.00012317550267094713, "loss": 1.3006, "step": 29570 }, { "epoch": 0.38426165813629004, "grad_norm": 0.3238622546195984, "learning_rate": 0.00012317290320903573, "loss": 1.3655, "step": 29571 }, { "epoch": 0.3842746526802059, "grad_norm": 0.4696352183818817, "learning_rate": 0.00012317030374712435, "loss": 1.4848, "step": 29572 }, { "epoch": 0.3842876472241218, "grad_norm": 0.4001320004463196, "learning_rate": 0.00012316770428521295, "loss": 1.4334, "step": 29573 }, { "epoch": 0.38430064176803763, "grad_norm": 0.4030158221721649, "learning_rate": 0.0001231651048233016, "loss": 1.3915, "step": 29574 }, { "epoch": 0.38431363631195353, "grad_norm": 0.3935050964355469, "learning_rate": 0.0001231625053613902, "loss": 1.5405, "step": 29575 }, { "epoch": 0.3843266308558694, "grad_norm": 0.33803674578666687, "learning_rate": 0.00012315990589947882, "loss": 1.2854, "step": 29576 }, { "epoch": 0.3843396253997853, "grad_norm": 0.538811445236206, "learning_rate": 0.00012315730643756742, "loss": 1.5299, "step": 29577 }, { "epoch": 0.3843526199437011, "grad_norm": 0.3705592453479767, "learning_rate": 0.00012315470697565604, "loss": 1.4275, "step": 29578 }, { "epoch": 0.384365614487617, "grad_norm": 0.32899147272109985, "learning_rate": 0.00012315210751374467, "loss": 1.321, "step": 29579 }, { "epoch": 0.38437860903153287, "grad_norm": 0.4022553861141205, "learning_rate": 0.00012314950805183327, "loss": 1.3398, "step": 29580 }, { "epoch": 0.38439160357544877, "grad_norm": 0.4059377610683441, "learning_rate": 0.0001231469085899219, "loss": 1.4669, "step": 29581 }, { "epoch": 0.3844045981193646, "grad_norm": 0.2949143946170807, "learning_rate": 0.00012314430912801051, "loss": 1.5091, "step": 29582 }, { "epoch": 0.3844175926632805, "grad_norm": 0.2948555648326874, "learning_rate": 0.0001231417096660991, "loss": 1.2982, "step": 29583 }, { "epoch": 0.38443058720719636, "grad_norm": 0.5847094655036926, "learning_rate": 0.00012313911020418774, "loss": 1.6183, "step": 29584 }, { "epoch": 0.38444358175111226, "grad_norm": 0.4073833227157593, "learning_rate": 0.00012313651074227633, "loss": 1.328, "step": 29585 }, { "epoch": 0.3844565762950281, "grad_norm": 0.3363688588142395, "learning_rate": 0.00012313391128036499, "loss": 1.2448, "step": 29586 }, { "epoch": 0.384469570838944, "grad_norm": 0.36529943346977234, "learning_rate": 0.00012313131181845358, "loss": 1.2179, "step": 29587 }, { "epoch": 0.38448256538285985, "grad_norm": 0.4881378412246704, "learning_rate": 0.0001231287123565422, "loss": 1.3075, "step": 29588 }, { "epoch": 0.38449555992677575, "grad_norm": 0.4256056249141693, "learning_rate": 0.0001231261128946308, "loss": 1.4528, "step": 29589 }, { "epoch": 0.3845085544706916, "grad_norm": 0.3573410212993622, "learning_rate": 0.00012312351343271943, "loss": 1.1659, "step": 29590 }, { "epoch": 0.3845215490146075, "grad_norm": 0.40770286321640015, "learning_rate": 0.00012312091397080805, "loss": 1.4056, "step": 29591 }, { "epoch": 0.38453454355852335, "grad_norm": 0.44021254777908325, "learning_rate": 0.00012311831450889665, "loss": 1.3254, "step": 29592 }, { "epoch": 0.38454753810243925, "grad_norm": 0.2869035005569458, "learning_rate": 0.00012311571504698528, "loss": 1.2718, "step": 29593 }, { "epoch": 0.3845605326463551, "grad_norm": 0.4177190363407135, "learning_rate": 0.0001231131155850739, "loss": 1.5365, "step": 29594 }, { "epoch": 0.384573527190271, "grad_norm": 0.39106976985931396, "learning_rate": 0.00012311051612316252, "loss": 1.4844, "step": 29595 }, { "epoch": 0.38458652173418684, "grad_norm": 0.3891622722148895, "learning_rate": 0.00012310791666125112, "loss": 1.3454, "step": 29596 }, { "epoch": 0.38459951627810274, "grad_norm": 0.3690756559371948, "learning_rate": 0.00012310531719933975, "loss": 1.397, "step": 29597 }, { "epoch": 0.3846125108220186, "grad_norm": 0.4759562313556671, "learning_rate": 0.00012310271773742837, "loss": 1.3455, "step": 29598 }, { "epoch": 0.3846255053659345, "grad_norm": 0.3874732553958893, "learning_rate": 0.00012310011827551697, "loss": 1.3089, "step": 29599 }, { "epoch": 0.38463849990985033, "grad_norm": 0.4730784595012665, "learning_rate": 0.0001230975188136056, "loss": 1.5655, "step": 29600 }, { "epoch": 0.38465149445376623, "grad_norm": 0.40431496500968933, "learning_rate": 0.00012309491935169422, "loss": 1.3822, "step": 29601 }, { "epoch": 0.3846644889976821, "grad_norm": 0.4325055181980133, "learning_rate": 0.00012309231988978281, "loss": 1.399, "step": 29602 }, { "epoch": 0.384677483541598, "grad_norm": 0.40868815779685974, "learning_rate": 0.00012308972042787144, "loss": 1.532, "step": 29603 }, { "epoch": 0.3846904780855138, "grad_norm": 0.3345896899700165, "learning_rate": 0.00012308712096596004, "loss": 1.4445, "step": 29604 }, { "epoch": 0.3847034726294297, "grad_norm": 0.4426919221878052, "learning_rate": 0.0001230845215040487, "loss": 1.3472, "step": 29605 }, { "epoch": 0.38471646717334557, "grad_norm": 0.43984127044677734, "learning_rate": 0.00012308192204213729, "loss": 1.6056, "step": 29606 }, { "epoch": 0.38472946171726147, "grad_norm": 0.43081796169281006, "learning_rate": 0.0001230793225802259, "loss": 1.5311, "step": 29607 }, { "epoch": 0.3847424562611773, "grad_norm": 0.3965555429458618, "learning_rate": 0.0001230767231183145, "loss": 1.4376, "step": 29608 }, { "epoch": 0.3847554508050932, "grad_norm": 0.45082512497901917, "learning_rate": 0.00012307412365640313, "loss": 1.2607, "step": 29609 }, { "epoch": 0.38476844534900906, "grad_norm": 0.4424546957015991, "learning_rate": 0.00012307152419449176, "loss": 1.3874, "step": 29610 }, { "epoch": 0.38478143989292496, "grad_norm": 0.38146525621414185, "learning_rate": 0.00012306892473258035, "loss": 1.3446, "step": 29611 }, { "epoch": 0.3847944344368408, "grad_norm": 0.4125452935695648, "learning_rate": 0.00012306632527066898, "loss": 1.585, "step": 29612 }, { "epoch": 0.3848074289807567, "grad_norm": 0.3718058466911316, "learning_rate": 0.0001230637258087576, "loss": 1.3402, "step": 29613 }, { "epoch": 0.3848204235246726, "grad_norm": 0.3712333142757416, "learning_rate": 0.0001230611263468462, "loss": 1.4449, "step": 29614 }, { "epoch": 0.38483341806858845, "grad_norm": 0.4591827392578125, "learning_rate": 0.00012305852688493482, "loss": 1.3396, "step": 29615 }, { "epoch": 0.38484641261250435, "grad_norm": 0.38640880584716797, "learning_rate": 0.00012305592742302342, "loss": 1.2972, "step": 29616 }, { "epoch": 0.3848594071564202, "grad_norm": 0.3804060220718384, "learning_rate": 0.00012305332796111207, "loss": 1.4104, "step": 29617 }, { "epoch": 0.3848724017003361, "grad_norm": 0.4230775237083435, "learning_rate": 0.00012305072849920067, "loss": 1.441, "step": 29618 }, { "epoch": 0.38488539624425194, "grad_norm": 0.3696955740451813, "learning_rate": 0.0001230481290372893, "loss": 1.5557, "step": 29619 }, { "epoch": 0.38489839078816784, "grad_norm": 0.38091495633125305, "learning_rate": 0.0001230455295753779, "loss": 1.3525, "step": 29620 }, { "epoch": 0.3849113853320837, "grad_norm": 0.5071821808815002, "learning_rate": 0.00012304293011346652, "loss": 1.5072, "step": 29621 }, { "epoch": 0.3849243798759996, "grad_norm": 0.40129274129867554, "learning_rate": 0.00012304033065155514, "loss": 1.475, "step": 29622 }, { "epoch": 0.38493737441991543, "grad_norm": 0.42611274123191833, "learning_rate": 0.00012303773118964374, "loss": 1.3045, "step": 29623 }, { "epoch": 0.38495036896383134, "grad_norm": 0.28798148036003113, "learning_rate": 0.00012303513172773236, "loss": 1.4347, "step": 29624 }, { "epoch": 0.3849633635077472, "grad_norm": 0.30114781856536865, "learning_rate": 0.000123032532265821, "loss": 1.444, "step": 29625 }, { "epoch": 0.3849763580516631, "grad_norm": 0.33857262134552, "learning_rate": 0.00012302993280390959, "loss": 1.2281, "step": 29626 }, { "epoch": 0.3849893525955789, "grad_norm": 0.34680691361427307, "learning_rate": 0.0001230273333419982, "loss": 1.4113, "step": 29627 }, { "epoch": 0.3850023471394948, "grad_norm": 0.36102545261383057, "learning_rate": 0.0001230247338800868, "loss": 1.3754, "step": 29628 }, { "epoch": 0.3850153416834107, "grad_norm": 0.42711347341537476, "learning_rate": 0.00012302213441817546, "loss": 1.3388, "step": 29629 }, { "epoch": 0.3850283362273266, "grad_norm": 0.36953380703926086, "learning_rate": 0.00012301953495626406, "loss": 1.5219, "step": 29630 }, { "epoch": 0.3850413307712424, "grad_norm": 0.3848010301589966, "learning_rate": 0.00012301693549435268, "loss": 1.216, "step": 29631 }, { "epoch": 0.3850543253151583, "grad_norm": 0.3525538742542267, "learning_rate": 0.00012301433603244128, "loss": 1.3489, "step": 29632 }, { "epoch": 0.38506731985907416, "grad_norm": 0.4170626997947693, "learning_rate": 0.0001230117365705299, "loss": 1.3912, "step": 29633 }, { "epoch": 0.38508031440299006, "grad_norm": 0.4323761463165283, "learning_rate": 0.00012300913710861853, "loss": 1.4716, "step": 29634 }, { "epoch": 0.3850933089469059, "grad_norm": 0.4812493622303009, "learning_rate": 0.00012300653764670712, "loss": 1.545, "step": 29635 }, { "epoch": 0.3851063034908218, "grad_norm": 0.3876829445362091, "learning_rate": 0.00012300393818479578, "loss": 1.562, "step": 29636 }, { "epoch": 0.38511929803473766, "grad_norm": 0.4575363099575043, "learning_rate": 0.00012300133872288437, "loss": 1.467, "step": 29637 }, { "epoch": 0.38513229257865356, "grad_norm": 0.411072701215744, "learning_rate": 0.00012299873926097297, "loss": 1.364, "step": 29638 }, { "epoch": 0.3851452871225694, "grad_norm": 0.3790915906429291, "learning_rate": 0.0001229961397990616, "loss": 1.4937, "step": 29639 }, { "epoch": 0.3851582816664853, "grad_norm": 0.3745225667953491, "learning_rate": 0.00012299354033715022, "loss": 1.4222, "step": 29640 }, { "epoch": 0.38517127621040115, "grad_norm": 0.48106110095977783, "learning_rate": 0.00012299094087523884, "loss": 1.6468, "step": 29641 }, { "epoch": 0.38518427075431705, "grad_norm": 0.39493224024772644, "learning_rate": 0.00012298834141332744, "loss": 1.5423, "step": 29642 }, { "epoch": 0.3851972652982329, "grad_norm": 0.3688465356826782, "learning_rate": 0.00012298574195141607, "loss": 1.3025, "step": 29643 }, { "epoch": 0.3852102598421488, "grad_norm": 0.38973426818847656, "learning_rate": 0.0001229831424895047, "loss": 1.4366, "step": 29644 }, { "epoch": 0.38522325438606464, "grad_norm": 0.4233192801475525, "learning_rate": 0.0001229805430275933, "loss": 1.4256, "step": 29645 }, { "epoch": 0.38523624892998054, "grad_norm": 0.34802868962287903, "learning_rate": 0.0001229779435656819, "loss": 1.3975, "step": 29646 }, { "epoch": 0.3852492434738964, "grad_norm": 0.4500443935394287, "learning_rate": 0.0001229753441037705, "loss": 1.6002, "step": 29647 }, { "epoch": 0.3852622380178123, "grad_norm": 0.3810059726238251, "learning_rate": 0.00012297274464185916, "loss": 1.2729, "step": 29648 }, { "epoch": 0.38527523256172813, "grad_norm": 0.5015361309051514, "learning_rate": 0.00012297014517994776, "loss": 1.2725, "step": 29649 }, { "epoch": 0.38528822710564403, "grad_norm": 0.38909563422203064, "learning_rate": 0.00012296754571803638, "loss": 1.3373, "step": 29650 }, { "epoch": 0.3853012216495599, "grad_norm": 0.36756131052970886, "learning_rate": 0.00012296494625612498, "loss": 1.2384, "step": 29651 }, { "epoch": 0.3853142161934758, "grad_norm": 0.34497302770614624, "learning_rate": 0.0001229623467942136, "loss": 1.2016, "step": 29652 }, { "epoch": 0.3853272107373916, "grad_norm": 0.38281774520874023, "learning_rate": 0.00012295974733230223, "loss": 1.3533, "step": 29653 }, { "epoch": 0.3853402052813075, "grad_norm": 0.41373544931411743, "learning_rate": 0.00012295714787039083, "loss": 1.3843, "step": 29654 }, { "epoch": 0.38535319982522337, "grad_norm": 0.4446506202220917, "learning_rate": 0.00012295454840847945, "loss": 1.5086, "step": 29655 }, { "epoch": 0.38536619436913927, "grad_norm": 0.43855687975883484, "learning_rate": 0.00012295194894656808, "loss": 1.448, "step": 29656 }, { "epoch": 0.3853791889130551, "grad_norm": 0.4544562101364136, "learning_rate": 0.00012294934948465667, "loss": 1.2667, "step": 29657 }, { "epoch": 0.385392183456971, "grad_norm": 0.39347976446151733, "learning_rate": 0.0001229467500227453, "loss": 1.1744, "step": 29658 }, { "epoch": 0.38540517800088686, "grad_norm": 0.36112740635871887, "learning_rate": 0.0001229441505608339, "loss": 1.1105, "step": 29659 }, { "epoch": 0.38541817254480276, "grad_norm": 0.43231871724128723, "learning_rate": 0.00012294155109892255, "loss": 1.3788, "step": 29660 }, { "epoch": 0.3854311670887186, "grad_norm": 0.4059285819530487, "learning_rate": 0.00012293895163701114, "loss": 1.2127, "step": 29661 }, { "epoch": 0.3854441616326345, "grad_norm": 0.38333094120025635, "learning_rate": 0.00012293635217509977, "loss": 1.419, "step": 29662 }, { "epoch": 0.38545715617655035, "grad_norm": 0.4240880012512207, "learning_rate": 0.00012293375271318837, "loss": 1.3824, "step": 29663 }, { "epoch": 0.38547015072046625, "grad_norm": 0.4265848994255066, "learning_rate": 0.000122931153251277, "loss": 1.29, "step": 29664 }, { "epoch": 0.3854831452643821, "grad_norm": 0.3585989475250244, "learning_rate": 0.00012292855378936561, "loss": 1.3191, "step": 29665 }, { "epoch": 0.385496139808298, "grad_norm": 0.44446685910224915, "learning_rate": 0.0001229259543274542, "loss": 1.5389, "step": 29666 }, { "epoch": 0.38550913435221384, "grad_norm": 0.43914347887039185, "learning_rate": 0.00012292335486554284, "loss": 1.3775, "step": 29667 }, { "epoch": 0.38552212889612975, "grad_norm": 0.45096486806869507, "learning_rate": 0.00012292075540363146, "loss": 1.3388, "step": 29668 }, { "epoch": 0.3855351234400456, "grad_norm": 0.45299309492111206, "learning_rate": 0.00012291815594172006, "loss": 1.2909, "step": 29669 }, { "epoch": 0.3855481179839615, "grad_norm": 0.3884403109550476, "learning_rate": 0.00012291555647980868, "loss": 1.4764, "step": 29670 }, { "epoch": 0.38556111252787734, "grad_norm": 0.37652429938316345, "learning_rate": 0.0001229129570178973, "loss": 1.3974, "step": 29671 }, { "epoch": 0.38557410707179324, "grad_norm": 0.42271530628204346, "learning_rate": 0.00012291035755598593, "loss": 1.617, "step": 29672 }, { "epoch": 0.3855871016157091, "grad_norm": 0.4457968771457672, "learning_rate": 0.00012290775809407453, "loss": 1.2743, "step": 29673 }, { "epoch": 0.385600096159625, "grad_norm": 0.4090537130832672, "learning_rate": 0.00012290515863216315, "loss": 1.2524, "step": 29674 }, { "epoch": 0.38561309070354083, "grad_norm": 0.4532301425933838, "learning_rate": 0.00012290255917025178, "loss": 1.5549, "step": 29675 }, { "epoch": 0.38562608524745673, "grad_norm": 0.39488521218299866, "learning_rate": 0.00012289995970834038, "loss": 1.1212, "step": 29676 }, { "epoch": 0.3856390797913726, "grad_norm": 0.3525855243206024, "learning_rate": 0.000122897360246429, "loss": 1.1209, "step": 29677 }, { "epoch": 0.3856520743352885, "grad_norm": 0.346203476190567, "learning_rate": 0.0001228947607845176, "loss": 1.1241, "step": 29678 }, { "epoch": 0.3856650688792043, "grad_norm": 0.43990054726600647, "learning_rate": 0.00012289216132260625, "loss": 1.5306, "step": 29679 }, { "epoch": 0.3856780634231202, "grad_norm": 0.340364933013916, "learning_rate": 0.00012288956186069485, "loss": 1.2509, "step": 29680 }, { "epoch": 0.38569105796703607, "grad_norm": 0.37132421135902405, "learning_rate": 0.00012288696239878344, "loss": 1.2538, "step": 29681 }, { "epoch": 0.38570405251095197, "grad_norm": 0.4005928337574005, "learning_rate": 0.00012288436293687207, "loss": 1.2828, "step": 29682 }, { "epoch": 0.3857170470548678, "grad_norm": 0.4884699881076813, "learning_rate": 0.0001228817634749607, "loss": 1.4636, "step": 29683 }, { "epoch": 0.3857300415987837, "grad_norm": 0.5030505061149597, "learning_rate": 0.00012287916401304932, "loss": 1.482, "step": 29684 }, { "epoch": 0.38574303614269956, "grad_norm": 0.3435817360877991, "learning_rate": 0.00012287656455113791, "loss": 1.4532, "step": 29685 }, { "epoch": 0.38575603068661546, "grad_norm": 0.2859126329421997, "learning_rate": 0.00012287396508922654, "loss": 1.2355, "step": 29686 }, { "epoch": 0.3857690252305313, "grad_norm": 0.32572945952415466, "learning_rate": 0.00012287136562731516, "loss": 1.301, "step": 29687 }, { "epoch": 0.3857820197744472, "grad_norm": 0.33779045939445496, "learning_rate": 0.00012286876616540376, "loss": 1.2462, "step": 29688 }, { "epoch": 0.38579501431836305, "grad_norm": 0.3701564371585846, "learning_rate": 0.00012286616670349239, "loss": 1.2648, "step": 29689 }, { "epoch": 0.38580800886227895, "grad_norm": 0.4164462983608246, "learning_rate": 0.00012286356724158098, "loss": 1.3668, "step": 29690 }, { "epoch": 0.38582100340619485, "grad_norm": 0.3467954695224762, "learning_rate": 0.00012286096777966963, "loss": 1.5213, "step": 29691 }, { "epoch": 0.3858339979501107, "grad_norm": 0.40671205520629883, "learning_rate": 0.00012285836831775823, "loss": 1.691, "step": 29692 }, { "epoch": 0.3858469924940266, "grad_norm": 0.47208172082901, "learning_rate": 0.00012285576885584683, "loss": 1.4094, "step": 29693 }, { "epoch": 0.38585998703794244, "grad_norm": 0.34101757407188416, "learning_rate": 0.00012285316939393545, "loss": 1.3154, "step": 29694 }, { "epoch": 0.38587298158185834, "grad_norm": 0.3383236229419708, "learning_rate": 0.00012285056993202408, "loss": 1.3567, "step": 29695 }, { "epoch": 0.3858859761257742, "grad_norm": 0.4301341474056244, "learning_rate": 0.0001228479704701127, "loss": 1.4164, "step": 29696 }, { "epoch": 0.3858989706696901, "grad_norm": 0.3285837173461914, "learning_rate": 0.0001228453710082013, "loss": 1.4719, "step": 29697 }, { "epoch": 0.38591196521360593, "grad_norm": 0.38588473200798035, "learning_rate": 0.00012284277154628992, "loss": 1.3768, "step": 29698 }, { "epoch": 0.38592495975752183, "grad_norm": 0.4318685233592987, "learning_rate": 0.00012284017208437855, "loss": 1.2705, "step": 29699 }, { "epoch": 0.3859379543014377, "grad_norm": 0.42043429613113403, "learning_rate": 0.00012283757262246715, "loss": 1.4581, "step": 29700 }, { "epoch": 0.3859509488453536, "grad_norm": 0.4800224304199219, "learning_rate": 0.00012283497316055577, "loss": 1.4179, "step": 29701 }, { "epoch": 0.3859639433892694, "grad_norm": 0.30927640199661255, "learning_rate": 0.00012283237369864437, "loss": 1.2575, "step": 29702 }, { "epoch": 0.3859769379331853, "grad_norm": 0.48445573449134827, "learning_rate": 0.00012282977423673302, "loss": 1.463, "step": 29703 }, { "epoch": 0.38598993247710117, "grad_norm": 0.418739378452301, "learning_rate": 0.00012282717477482162, "loss": 1.4311, "step": 29704 }, { "epoch": 0.3860029270210171, "grad_norm": 0.4125359356403351, "learning_rate": 0.00012282457531291024, "loss": 1.2874, "step": 29705 }, { "epoch": 0.3860159215649329, "grad_norm": 0.37390682101249695, "learning_rate": 0.00012282197585099884, "loss": 1.3034, "step": 29706 }, { "epoch": 0.3860289161088488, "grad_norm": 0.3878862261772156, "learning_rate": 0.00012281937638908746, "loss": 1.3842, "step": 29707 }, { "epoch": 0.38604191065276466, "grad_norm": 0.38967305421829224, "learning_rate": 0.0001228167769271761, "loss": 1.3761, "step": 29708 }, { "epoch": 0.38605490519668056, "grad_norm": 0.3685172200202942, "learning_rate": 0.00012281417746526469, "loss": 1.3453, "step": 29709 }, { "epoch": 0.3860678997405964, "grad_norm": 0.49002605676651, "learning_rate": 0.0001228115780033533, "loss": 1.4268, "step": 29710 }, { "epoch": 0.3860808942845123, "grad_norm": 0.4744281470775604, "learning_rate": 0.00012280897854144193, "loss": 1.4328, "step": 29711 }, { "epoch": 0.38609388882842816, "grad_norm": 0.42512616515159607, "learning_rate": 0.00012280637907953053, "loss": 1.4905, "step": 29712 }, { "epoch": 0.38610688337234406, "grad_norm": 0.4794875383377075, "learning_rate": 0.00012280377961761916, "loss": 1.413, "step": 29713 }, { "epoch": 0.3861198779162599, "grad_norm": 0.4263433516025543, "learning_rate": 0.00012280118015570778, "loss": 1.4963, "step": 29714 }, { "epoch": 0.3861328724601758, "grad_norm": 0.40015482902526855, "learning_rate": 0.0001227985806937964, "loss": 1.4116, "step": 29715 }, { "epoch": 0.38614586700409165, "grad_norm": 0.43053826689720154, "learning_rate": 0.000122795981231885, "loss": 1.3597, "step": 29716 }, { "epoch": 0.38615886154800755, "grad_norm": 0.3474050760269165, "learning_rate": 0.00012279338176997363, "loss": 1.4843, "step": 29717 }, { "epoch": 0.3861718560919234, "grad_norm": 0.26055964827537537, "learning_rate": 0.00012279078230806225, "loss": 1.4287, "step": 29718 }, { "epoch": 0.3861848506358393, "grad_norm": 0.45391812920570374, "learning_rate": 0.00012278818284615085, "loss": 1.4813, "step": 29719 }, { "epoch": 0.38619784517975514, "grad_norm": 0.41253888607025146, "learning_rate": 0.00012278558338423947, "loss": 1.4172, "step": 29720 }, { "epoch": 0.38621083972367104, "grad_norm": 0.41941237449645996, "learning_rate": 0.00012278298392232807, "loss": 1.211, "step": 29721 }, { "epoch": 0.3862238342675869, "grad_norm": 0.4305826425552368, "learning_rate": 0.0001227803844604167, "loss": 1.5506, "step": 29722 }, { "epoch": 0.3862368288115028, "grad_norm": 0.4402470886707306, "learning_rate": 0.00012277778499850532, "loss": 1.6148, "step": 29723 }, { "epoch": 0.38624982335541863, "grad_norm": 0.39274129271507263, "learning_rate": 0.00012277518553659392, "loss": 1.2169, "step": 29724 }, { "epoch": 0.38626281789933453, "grad_norm": 0.33297446370124817, "learning_rate": 0.00012277258607468254, "loss": 1.3153, "step": 29725 }, { "epoch": 0.3862758124432504, "grad_norm": 0.39429450035095215, "learning_rate": 0.00012276998661277117, "loss": 1.3565, "step": 29726 }, { "epoch": 0.3862888069871663, "grad_norm": 0.34069812297821045, "learning_rate": 0.0001227673871508598, "loss": 1.2784, "step": 29727 }, { "epoch": 0.3863018015310821, "grad_norm": 0.4167749583721161, "learning_rate": 0.0001227647876889484, "loss": 1.578, "step": 29728 }, { "epoch": 0.386314796074998, "grad_norm": 0.3590206801891327, "learning_rate": 0.000122762188227037, "loss": 1.3422, "step": 29729 }, { "epoch": 0.38632779061891387, "grad_norm": 0.39976903796195984, "learning_rate": 0.00012275958876512564, "loss": 1.3873, "step": 29730 }, { "epoch": 0.38634078516282977, "grad_norm": 0.48879480361938477, "learning_rate": 0.00012275698930321423, "loss": 1.3372, "step": 29731 }, { "epoch": 0.3863537797067456, "grad_norm": 0.37933698296546936, "learning_rate": 0.00012275438984130286, "loss": 1.6007, "step": 29732 }, { "epoch": 0.3863667742506615, "grad_norm": 0.39360150694847107, "learning_rate": 0.00012275179037939146, "loss": 1.3832, "step": 29733 }, { "epoch": 0.38637976879457736, "grad_norm": 0.3616618514060974, "learning_rate": 0.0001227491909174801, "loss": 1.328, "step": 29734 }, { "epoch": 0.38639276333849326, "grad_norm": 0.47436678409576416, "learning_rate": 0.0001227465914555687, "loss": 1.5347, "step": 29735 }, { "epoch": 0.3864057578824091, "grad_norm": 0.38394227623939514, "learning_rate": 0.0001227439919936573, "loss": 1.5284, "step": 29736 }, { "epoch": 0.386418752426325, "grad_norm": 0.44200798869132996, "learning_rate": 0.00012274139253174593, "loss": 1.3914, "step": 29737 }, { "epoch": 0.38643174697024085, "grad_norm": 0.3390139937400818, "learning_rate": 0.00012273879306983455, "loss": 1.1884, "step": 29738 }, { "epoch": 0.38644474151415675, "grad_norm": 0.37402334809303284, "learning_rate": 0.00012273619360792318, "loss": 1.4467, "step": 29739 }, { "epoch": 0.3864577360580726, "grad_norm": 0.3869030475616455, "learning_rate": 0.00012273359414601177, "loss": 1.5286, "step": 29740 }, { "epoch": 0.3864707306019885, "grad_norm": 0.33115485310554504, "learning_rate": 0.0001227309946841004, "loss": 1.3785, "step": 29741 }, { "epoch": 0.38648372514590434, "grad_norm": 0.42454832792282104, "learning_rate": 0.00012272839522218902, "loss": 1.5248, "step": 29742 }, { "epoch": 0.38649671968982025, "grad_norm": 0.38917481899261475, "learning_rate": 0.00012272579576027762, "loss": 1.5065, "step": 29743 }, { "epoch": 0.3865097142337361, "grad_norm": 0.37830406427383423, "learning_rate": 0.00012272319629836624, "loss": 1.4404, "step": 29744 }, { "epoch": 0.386522708777652, "grad_norm": 0.39022427797317505, "learning_rate": 0.00012272059683645487, "loss": 1.4768, "step": 29745 }, { "epoch": 0.38653570332156784, "grad_norm": 0.5081313848495483, "learning_rate": 0.0001227179973745435, "loss": 1.4071, "step": 29746 }, { "epoch": 0.38654869786548374, "grad_norm": 0.37116414308547974, "learning_rate": 0.0001227153979126321, "loss": 1.4866, "step": 29747 }, { "epoch": 0.3865616924093996, "grad_norm": 0.3568953573703766, "learning_rate": 0.0001227127984507207, "loss": 1.3873, "step": 29748 }, { "epoch": 0.3865746869533155, "grad_norm": 0.39687058329582214, "learning_rate": 0.00012271019898880934, "loss": 1.4239, "step": 29749 }, { "epoch": 0.38658768149723133, "grad_norm": 0.4455108344554901, "learning_rate": 0.00012270759952689794, "loss": 1.3989, "step": 29750 }, { "epoch": 0.38660067604114723, "grad_norm": 0.39055877923965454, "learning_rate": 0.00012270500006498656, "loss": 1.3154, "step": 29751 }, { "epoch": 0.3866136705850631, "grad_norm": 0.3702305853366852, "learning_rate": 0.00012270240060307516, "loss": 1.095, "step": 29752 }, { "epoch": 0.386626665128979, "grad_norm": 0.5071846842765808, "learning_rate": 0.00012269980114116378, "loss": 1.5827, "step": 29753 }, { "epoch": 0.3866396596728948, "grad_norm": 0.3344666659832001, "learning_rate": 0.0001226972016792524, "loss": 1.3228, "step": 29754 }, { "epoch": 0.3866526542168107, "grad_norm": 0.3974967896938324, "learning_rate": 0.000122694602217341, "loss": 1.6777, "step": 29755 }, { "epoch": 0.38666564876072657, "grad_norm": 0.4079102873802185, "learning_rate": 0.00012269200275542963, "loss": 1.3265, "step": 29756 }, { "epoch": 0.38667864330464247, "grad_norm": 0.3736034333705902, "learning_rate": 0.00012268940329351825, "loss": 1.337, "step": 29757 }, { "epoch": 0.3866916378485583, "grad_norm": 0.4684407114982605, "learning_rate": 0.00012268680383160688, "loss": 1.4321, "step": 29758 }, { "epoch": 0.3867046323924742, "grad_norm": 0.3875333368778229, "learning_rate": 0.00012268420436969548, "loss": 1.3352, "step": 29759 }, { "epoch": 0.38671762693639006, "grad_norm": 0.4370006322860718, "learning_rate": 0.00012268160490778407, "loss": 1.4266, "step": 29760 }, { "epoch": 0.38673062148030596, "grad_norm": 0.4806390404701233, "learning_rate": 0.00012267900544587273, "loss": 1.5746, "step": 29761 }, { "epoch": 0.3867436160242218, "grad_norm": 0.41084182262420654, "learning_rate": 0.00012267640598396132, "loss": 1.468, "step": 29762 }, { "epoch": 0.3867566105681377, "grad_norm": 0.39975109696388245, "learning_rate": 0.00012267380652204995, "loss": 1.5812, "step": 29763 }, { "epoch": 0.38676960511205355, "grad_norm": 0.3874363601207733, "learning_rate": 0.00012267120706013854, "loss": 1.4981, "step": 29764 }, { "epoch": 0.38678259965596945, "grad_norm": 0.38484007120132446, "learning_rate": 0.00012266860759822717, "loss": 1.4525, "step": 29765 }, { "epoch": 0.38679559419988535, "grad_norm": 0.33379194140434265, "learning_rate": 0.0001226660081363158, "loss": 1.2499, "step": 29766 }, { "epoch": 0.3868085887438012, "grad_norm": 0.3669845163822174, "learning_rate": 0.0001226634086744044, "loss": 1.4502, "step": 29767 }, { "epoch": 0.3868215832877171, "grad_norm": 0.482738733291626, "learning_rate": 0.00012266080921249302, "loss": 1.2857, "step": 29768 }, { "epoch": 0.38683457783163294, "grad_norm": 0.3862116038799286, "learning_rate": 0.00012265820975058164, "loss": 1.3294, "step": 29769 }, { "epoch": 0.38684757237554884, "grad_norm": 0.4594738185405731, "learning_rate": 0.00012265561028867026, "loss": 1.346, "step": 29770 }, { "epoch": 0.3868605669194647, "grad_norm": 0.4819294214248657, "learning_rate": 0.00012265301082675886, "loss": 1.2718, "step": 29771 }, { "epoch": 0.3868735614633806, "grad_norm": 0.3536228835582733, "learning_rate": 0.00012265041136484749, "loss": 1.5036, "step": 29772 }, { "epoch": 0.38688655600729643, "grad_norm": 0.35345199704170227, "learning_rate": 0.0001226478119029361, "loss": 1.3253, "step": 29773 }, { "epoch": 0.38689955055121233, "grad_norm": 0.3304901123046875, "learning_rate": 0.0001226452124410247, "loss": 1.2689, "step": 29774 }, { "epoch": 0.3869125450951282, "grad_norm": 0.4230820834636688, "learning_rate": 0.00012264261297911333, "loss": 1.3563, "step": 29775 }, { "epoch": 0.3869255396390441, "grad_norm": 0.3944951295852661, "learning_rate": 0.00012264001351720193, "loss": 1.3652, "step": 29776 }, { "epoch": 0.3869385341829599, "grad_norm": 0.3445724546909332, "learning_rate": 0.00012263741405529055, "loss": 1.293, "step": 29777 }, { "epoch": 0.3869515287268758, "grad_norm": 0.45801207423210144, "learning_rate": 0.00012263481459337918, "loss": 1.4637, "step": 29778 }, { "epoch": 0.38696452327079167, "grad_norm": 0.3891269862651825, "learning_rate": 0.00012263221513146778, "loss": 1.19, "step": 29779 }, { "epoch": 0.3869775178147076, "grad_norm": 0.44965341687202454, "learning_rate": 0.0001226296156695564, "loss": 1.5173, "step": 29780 }, { "epoch": 0.3869905123586234, "grad_norm": 0.4066510498523712, "learning_rate": 0.00012262701620764503, "loss": 1.3143, "step": 29781 }, { "epoch": 0.3870035069025393, "grad_norm": 0.3551245331764221, "learning_rate": 0.00012262441674573365, "loss": 1.3002, "step": 29782 }, { "epoch": 0.38701650144645516, "grad_norm": 0.34226372838020325, "learning_rate": 0.00012262181728382225, "loss": 1.3357, "step": 29783 }, { "epoch": 0.38702949599037106, "grad_norm": 0.37835416197776794, "learning_rate": 0.00012261921782191087, "loss": 1.2966, "step": 29784 }, { "epoch": 0.3870424905342869, "grad_norm": 0.42439281940460205, "learning_rate": 0.0001226166183599995, "loss": 1.4746, "step": 29785 }, { "epoch": 0.3870554850782028, "grad_norm": 0.39346417784690857, "learning_rate": 0.0001226140188980881, "loss": 1.3842, "step": 29786 }, { "epoch": 0.38706847962211866, "grad_norm": 0.4251452386379242, "learning_rate": 0.00012261141943617672, "loss": 1.4836, "step": 29787 }, { "epoch": 0.38708147416603456, "grad_norm": 0.4480631947517395, "learning_rate": 0.00012260881997426534, "loss": 1.3541, "step": 29788 }, { "epoch": 0.3870944687099504, "grad_norm": 0.3870769441127777, "learning_rate": 0.00012260622051235394, "loss": 1.4393, "step": 29789 }, { "epoch": 0.3871074632538663, "grad_norm": 0.3966757357120514, "learning_rate": 0.00012260362105044256, "loss": 1.4477, "step": 29790 }, { "epoch": 0.38712045779778215, "grad_norm": 0.4037858843803406, "learning_rate": 0.00012260102158853116, "loss": 1.3016, "step": 29791 }, { "epoch": 0.38713345234169805, "grad_norm": 0.432411253452301, "learning_rate": 0.0001225984221266198, "loss": 1.3313, "step": 29792 }, { "epoch": 0.3871464468856139, "grad_norm": 0.42230790853500366, "learning_rate": 0.0001225958226647084, "loss": 1.2872, "step": 29793 }, { "epoch": 0.3871594414295298, "grad_norm": 0.4460132122039795, "learning_rate": 0.00012259322320279703, "loss": 1.4183, "step": 29794 }, { "epoch": 0.38717243597344564, "grad_norm": 0.388439416885376, "learning_rate": 0.00012259062374088563, "loss": 1.3582, "step": 29795 }, { "epoch": 0.38718543051736154, "grad_norm": 0.370025634765625, "learning_rate": 0.00012258802427897426, "loss": 1.3086, "step": 29796 }, { "epoch": 0.3871984250612774, "grad_norm": 0.36072489619255066, "learning_rate": 0.00012258542481706288, "loss": 1.2024, "step": 29797 }, { "epoch": 0.3872114196051933, "grad_norm": 0.3985675573348999, "learning_rate": 0.00012258282535515148, "loss": 1.4394, "step": 29798 }, { "epoch": 0.38722441414910913, "grad_norm": 0.38333627581596375, "learning_rate": 0.0001225802258932401, "loss": 1.3597, "step": 29799 }, { "epoch": 0.38723740869302503, "grad_norm": 0.47742190957069397, "learning_rate": 0.00012257762643132873, "loss": 1.6916, "step": 29800 }, { "epoch": 0.3872504032369409, "grad_norm": 0.4569127559661865, "learning_rate": 0.00012257502696941735, "loss": 1.7027, "step": 29801 }, { "epoch": 0.3872633977808568, "grad_norm": 0.2834679186344147, "learning_rate": 0.00012257242750750595, "loss": 1.2926, "step": 29802 }, { "epoch": 0.3872763923247726, "grad_norm": 0.33267778158187866, "learning_rate": 0.00012256982804559455, "loss": 1.3553, "step": 29803 }, { "epoch": 0.3872893868686885, "grad_norm": 0.35983896255493164, "learning_rate": 0.0001225672285836832, "loss": 1.3815, "step": 29804 }, { "epoch": 0.38730238141260437, "grad_norm": 0.3674542009830475, "learning_rate": 0.0001225646291217718, "loss": 1.2445, "step": 29805 }, { "epoch": 0.38731537595652027, "grad_norm": 0.44520899653434753, "learning_rate": 0.00012256202965986042, "loss": 1.3601, "step": 29806 }, { "epoch": 0.3873283705004361, "grad_norm": 0.3955947160720825, "learning_rate": 0.00012255943019794902, "loss": 1.3747, "step": 29807 }, { "epoch": 0.387341365044352, "grad_norm": 0.46005773544311523, "learning_rate": 0.00012255683073603764, "loss": 1.5152, "step": 29808 }, { "epoch": 0.38735435958826786, "grad_norm": 0.4513423442840576, "learning_rate": 0.00012255423127412627, "loss": 1.6036, "step": 29809 }, { "epoch": 0.38736735413218376, "grad_norm": 0.25207453966140747, "learning_rate": 0.00012255163181221486, "loss": 1.2006, "step": 29810 }, { "epoch": 0.3873803486760996, "grad_norm": 0.4496898353099823, "learning_rate": 0.0001225490323503035, "loss": 1.4391, "step": 29811 }, { "epoch": 0.3873933432200155, "grad_norm": 0.4895389974117279, "learning_rate": 0.0001225464328883921, "loss": 1.3897, "step": 29812 }, { "epoch": 0.38740633776393135, "grad_norm": 0.7299759984016418, "learning_rate": 0.00012254383342648074, "loss": 1.2374, "step": 29813 }, { "epoch": 0.38741933230784725, "grad_norm": 0.4900203049182892, "learning_rate": 0.00012254123396456933, "loss": 1.3894, "step": 29814 }, { "epoch": 0.3874323268517631, "grad_norm": 0.34242701530456543, "learning_rate": 0.00012253863450265793, "loss": 1.2608, "step": 29815 }, { "epoch": 0.387445321395679, "grad_norm": 0.5025469660758972, "learning_rate": 0.00012253603504074658, "loss": 1.5498, "step": 29816 }, { "epoch": 0.38745831593959484, "grad_norm": 0.48988255858421326, "learning_rate": 0.00012253343557883518, "loss": 1.3759, "step": 29817 }, { "epoch": 0.38747131048351074, "grad_norm": 0.3595893085002899, "learning_rate": 0.0001225308361169238, "loss": 1.3703, "step": 29818 }, { "epoch": 0.3874843050274266, "grad_norm": 0.3543982207775116, "learning_rate": 0.0001225282366550124, "loss": 1.2888, "step": 29819 }, { "epoch": 0.3874972995713425, "grad_norm": 0.4523742198944092, "learning_rate": 0.00012252563719310103, "loss": 1.3428, "step": 29820 }, { "epoch": 0.38751029411525834, "grad_norm": 0.39910241961479187, "learning_rate": 0.00012252303773118965, "loss": 1.4277, "step": 29821 }, { "epoch": 0.38752328865917424, "grad_norm": 0.4761836528778076, "learning_rate": 0.00012252043826927825, "loss": 1.3918, "step": 29822 }, { "epoch": 0.3875362832030901, "grad_norm": 0.3799963593482971, "learning_rate": 0.0001225178388073669, "loss": 1.2415, "step": 29823 }, { "epoch": 0.387549277747006, "grad_norm": 0.4324726462364197, "learning_rate": 0.0001225152393454555, "loss": 1.3634, "step": 29824 }, { "epoch": 0.3875622722909218, "grad_norm": 0.4833022952079773, "learning_rate": 0.00012251263988354412, "loss": 1.5643, "step": 29825 }, { "epoch": 0.38757526683483773, "grad_norm": 0.4042578637599945, "learning_rate": 0.00012251004042163272, "loss": 1.3777, "step": 29826 }, { "epoch": 0.3875882613787536, "grad_norm": 0.3437054455280304, "learning_rate": 0.00012250744095972134, "loss": 1.3368, "step": 29827 }, { "epoch": 0.3876012559226695, "grad_norm": 0.31347450613975525, "learning_rate": 0.00012250484149780997, "loss": 1.3307, "step": 29828 }, { "epoch": 0.3876142504665853, "grad_norm": 0.38781213760375977, "learning_rate": 0.00012250224203589857, "loss": 1.4691, "step": 29829 }, { "epoch": 0.3876272450105012, "grad_norm": 0.37810152769088745, "learning_rate": 0.0001224996425739872, "loss": 1.5532, "step": 29830 }, { "epoch": 0.38764023955441707, "grad_norm": 0.3242591619491577, "learning_rate": 0.00012249704311207582, "loss": 1.3749, "step": 29831 }, { "epoch": 0.38765323409833297, "grad_norm": 0.2856314182281494, "learning_rate": 0.0001224944436501644, "loss": 1.2521, "step": 29832 }, { "epoch": 0.3876662286422488, "grad_norm": 0.39256522059440613, "learning_rate": 0.00012249184418825304, "loss": 1.4652, "step": 29833 }, { "epoch": 0.3876792231861647, "grad_norm": 0.3368414044380188, "learning_rate": 0.00012248924472634163, "loss": 1.3384, "step": 29834 }, { "epoch": 0.38769221773008056, "grad_norm": 0.4324895143508911, "learning_rate": 0.00012248664526443029, "loss": 1.5427, "step": 29835 }, { "epoch": 0.38770521227399646, "grad_norm": 0.32702910900115967, "learning_rate": 0.00012248404580251888, "loss": 1.2738, "step": 29836 }, { "epoch": 0.3877182068179123, "grad_norm": 0.4195633828639984, "learning_rate": 0.0001224814463406075, "loss": 1.4554, "step": 29837 }, { "epoch": 0.3877312013618282, "grad_norm": 0.46183133125305176, "learning_rate": 0.0001224788468786961, "loss": 1.3694, "step": 29838 }, { "epoch": 0.38774419590574405, "grad_norm": 0.39727213978767395, "learning_rate": 0.00012247624741678473, "loss": 1.2311, "step": 29839 }, { "epoch": 0.38775719044965995, "grad_norm": 0.4146970808506012, "learning_rate": 0.00012247364795487335, "loss": 1.2301, "step": 29840 }, { "epoch": 0.3877701849935758, "grad_norm": 0.33627963066101074, "learning_rate": 0.00012247104849296195, "loss": 1.1917, "step": 29841 }, { "epoch": 0.3877831795374917, "grad_norm": 0.39488857984542847, "learning_rate": 0.00012246844903105058, "loss": 1.4307, "step": 29842 }, { "epoch": 0.3877961740814076, "grad_norm": 0.4596952199935913, "learning_rate": 0.0001224658495691392, "loss": 1.3376, "step": 29843 }, { "epoch": 0.38780916862532344, "grad_norm": 0.36482855677604675, "learning_rate": 0.0001224632501072278, "loss": 1.2941, "step": 29844 }, { "epoch": 0.38782216316923934, "grad_norm": 0.37410250306129456, "learning_rate": 0.00012246065064531642, "loss": 1.4677, "step": 29845 }, { "epoch": 0.3878351577131552, "grad_norm": 0.3789508640766144, "learning_rate": 0.00012245805118340502, "loss": 1.445, "step": 29846 }, { "epoch": 0.3878481522570711, "grad_norm": 0.34893906116485596, "learning_rate": 0.00012245545172149367, "loss": 1.3977, "step": 29847 }, { "epoch": 0.38786114680098693, "grad_norm": 0.35791364312171936, "learning_rate": 0.00012245285225958227, "loss": 1.489, "step": 29848 }, { "epoch": 0.38787414134490283, "grad_norm": 0.32428452372550964, "learning_rate": 0.0001224502527976709, "loss": 1.3289, "step": 29849 }, { "epoch": 0.3878871358888187, "grad_norm": 0.4137621223926544, "learning_rate": 0.0001224476533357595, "loss": 1.3861, "step": 29850 }, { "epoch": 0.3879001304327346, "grad_norm": 0.3547743558883667, "learning_rate": 0.00012244505387384812, "loss": 1.5293, "step": 29851 }, { "epoch": 0.3879131249766504, "grad_norm": 0.3912314176559448, "learning_rate": 0.00012244245441193674, "loss": 1.4246, "step": 29852 }, { "epoch": 0.3879261195205663, "grad_norm": 0.40899860858917236, "learning_rate": 0.00012243985495002534, "loss": 1.3946, "step": 29853 }, { "epoch": 0.38793911406448217, "grad_norm": 0.3769967555999756, "learning_rate": 0.00012243725548811396, "loss": 1.2737, "step": 29854 }, { "epoch": 0.38795210860839807, "grad_norm": 0.4838295876979828, "learning_rate": 0.00012243465602620259, "loss": 1.5054, "step": 29855 }, { "epoch": 0.3879651031523139, "grad_norm": 0.4463987946510315, "learning_rate": 0.0001224320565642912, "loss": 1.4829, "step": 29856 }, { "epoch": 0.3879780976962298, "grad_norm": 0.24521949887275696, "learning_rate": 0.0001224294571023798, "loss": 1.3377, "step": 29857 }, { "epoch": 0.38799109224014566, "grad_norm": 0.45139047503471375, "learning_rate": 0.00012242685764046843, "loss": 1.4292, "step": 29858 }, { "epoch": 0.38800408678406156, "grad_norm": 0.4290568232536316, "learning_rate": 0.00012242425817855706, "loss": 1.557, "step": 29859 }, { "epoch": 0.3880170813279774, "grad_norm": 0.3773767054080963, "learning_rate": 0.00012242165871664565, "loss": 1.334, "step": 29860 }, { "epoch": 0.3880300758718933, "grad_norm": 0.27543938159942627, "learning_rate": 0.00012241905925473428, "loss": 1.2525, "step": 29861 }, { "epoch": 0.38804307041580915, "grad_norm": 0.34243497252464294, "learning_rate": 0.0001224164597928229, "loss": 1.1238, "step": 29862 }, { "epoch": 0.38805606495972506, "grad_norm": 0.4504084587097168, "learning_rate": 0.0001224138603309115, "loss": 1.2286, "step": 29863 }, { "epoch": 0.3880690595036409, "grad_norm": 0.4480748474597931, "learning_rate": 0.00012241126086900013, "loss": 1.4154, "step": 29864 }, { "epoch": 0.3880820540475568, "grad_norm": 0.5171236395835876, "learning_rate": 0.00012240866140708872, "loss": 1.2709, "step": 29865 }, { "epoch": 0.38809504859147265, "grad_norm": 0.38499966263771057, "learning_rate": 0.00012240606194517737, "loss": 1.3089, "step": 29866 }, { "epoch": 0.38810804313538855, "grad_norm": 0.35608506202697754, "learning_rate": 0.00012240346248326597, "loss": 1.5467, "step": 29867 }, { "epoch": 0.3881210376793044, "grad_norm": 0.4202132821083069, "learning_rate": 0.0001224008630213546, "loss": 1.5517, "step": 29868 }, { "epoch": 0.3881340322232203, "grad_norm": 0.331220418214798, "learning_rate": 0.0001223982635594432, "loss": 1.1938, "step": 29869 }, { "epoch": 0.38814702676713614, "grad_norm": 0.35433098673820496, "learning_rate": 0.00012239566409753182, "loss": 1.3799, "step": 29870 }, { "epoch": 0.38816002131105204, "grad_norm": 0.5086492896080017, "learning_rate": 0.00012239306463562044, "loss": 1.4711, "step": 29871 }, { "epoch": 0.3881730158549679, "grad_norm": 0.4203968346118927, "learning_rate": 0.00012239046517370904, "loss": 1.4212, "step": 29872 }, { "epoch": 0.3881860103988838, "grad_norm": 0.5037051439285278, "learning_rate": 0.00012238786571179766, "loss": 1.5708, "step": 29873 }, { "epoch": 0.38819900494279963, "grad_norm": 0.3497743308544159, "learning_rate": 0.0001223852662498863, "loss": 1.2971, "step": 29874 }, { "epoch": 0.38821199948671553, "grad_norm": 0.35463201999664307, "learning_rate": 0.00012238266678797489, "loss": 1.4071, "step": 29875 }, { "epoch": 0.3882249940306314, "grad_norm": 0.4891999661922455, "learning_rate": 0.0001223800673260635, "loss": 1.5178, "step": 29876 }, { "epoch": 0.3882379885745473, "grad_norm": 0.39860543608665466, "learning_rate": 0.0001223774678641521, "loss": 1.2939, "step": 29877 }, { "epoch": 0.3882509831184631, "grad_norm": 0.3721354603767395, "learning_rate": 0.00012237486840224076, "loss": 1.3603, "step": 29878 }, { "epoch": 0.388263977662379, "grad_norm": 0.43463608622550964, "learning_rate": 0.00012237226894032936, "loss": 1.4447, "step": 29879 }, { "epoch": 0.38827697220629487, "grad_norm": 0.3414178490638733, "learning_rate": 0.00012236966947841798, "loss": 1.2873, "step": 29880 }, { "epoch": 0.38828996675021077, "grad_norm": 0.3297525644302368, "learning_rate": 0.00012236707001650658, "loss": 1.4836, "step": 29881 }, { "epoch": 0.3883029612941266, "grad_norm": 0.4560333490371704, "learning_rate": 0.0001223644705545952, "loss": 1.3534, "step": 29882 }, { "epoch": 0.3883159558380425, "grad_norm": 0.4555419385433197, "learning_rate": 0.00012236187109268383, "loss": 1.4457, "step": 29883 }, { "epoch": 0.38832895038195836, "grad_norm": 0.30804577469825745, "learning_rate": 0.00012235927163077243, "loss": 1.0228, "step": 29884 }, { "epoch": 0.38834194492587426, "grad_norm": 0.3303452432155609, "learning_rate": 0.00012235667216886105, "loss": 1.3975, "step": 29885 }, { "epoch": 0.3883549394697901, "grad_norm": 0.2807534337043762, "learning_rate": 0.00012235407270694967, "loss": 1.1644, "step": 29886 }, { "epoch": 0.388367934013706, "grad_norm": 0.4142604470252991, "learning_rate": 0.00012235147324503827, "loss": 1.3199, "step": 29887 }, { "epoch": 0.38838092855762185, "grad_norm": 0.40739983320236206, "learning_rate": 0.0001223488737831269, "loss": 1.5283, "step": 29888 }, { "epoch": 0.38839392310153775, "grad_norm": 0.46091997623443604, "learning_rate": 0.0001223462743212155, "loss": 1.3917, "step": 29889 }, { "epoch": 0.3884069176454536, "grad_norm": 0.4044102430343628, "learning_rate": 0.00012234367485930415, "loss": 1.4336, "step": 29890 }, { "epoch": 0.3884199121893695, "grad_norm": 0.38766008615493774, "learning_rate": 0.00012234107539739274, "loss": 1.194, "step": 29891 }, { "epoch": 0.38843290673328534, "grad_norm": 0.38603052496910095, "learning_rate": 0.00012233847593548137, "loss": 1.2864, "step": 29892 }, { "epoch": 0.38844590127720124, "grad_norm": 0.4562664330005646, "learning_rate": 0.00012233587647356996, "loss": 1.5865, "step": 29893 }, { "epoch": 0.3884588958211171, "grad_norm": 0.4481065273284912, "learning_rate": 0.0001223332770116586, "loss": 1.2825, "step": 29894 }, { "epoch": 0.388471890365033, "grad_norm": 0.435531884431839, "learning_rate": 0.0001223306775497472, "loss": 1.2549, "step": 29895 }, { "epoch": 0.38848488490894884, "grad_norm": 0.38524964451789856, "learning_rate": 0.0001223280780878358, "loss": 1.4728, "step": 29896 }, { "epoch": 0.38849787945286474, "grad_norm": 0.4691812992095947, "learning_rate": 0.00012232547862592446, "loss": 1.5518, "step": 29897 }, { "epoch": 0.3885108739967806, "grad_norm": 0.525292694568634, "learning_rate": 0.00012232287916401306, "loss": 1.5359, "step": 29898 }, { "epoch": 0.3885238685406965, "grad_norm": 0.5330997109413147, "learning_rate": 0.00012232027970210166, "loss": 1.2279, "step": 29899 }, { "epoch": 0.3885368630846123, "grad_norm": 0.41754022240638733, "learning_rate": 0.00012231768024019028, "loss": 1.5521, "step": 29900 }, { "epoch": 0.38854985762852823, "grad_norm": 0.44540759921073914, "learning_rate": 0.0001223150807782789, "loss": 1.4199, "step": 29901 }, { "epoch": 0.3885628521724441, "grad_norm": 0.39884260296821594, "learning_rate": 0.00012231248131636753, "loss": 1.5216, "step": 29902 }, { "epoch": 0.38857584671636, "grad_norm": 0.44731393456459045, "learning_rate": 0.00012230988185445613, "loss": 1.3533, "step": 29903 }, { "epoch": 0.3885888412602758, "grad_norm": 0.36042359471321106, "learning_rate": 0.00012230728239254475, "loss": 1.2104, "step": 29904 }, { "epoch": 0.3886018358041917, "grad_norm": 0.3310069739818573, "learning_rate": 0.00012230468293063338, "loss": 1.3148, "step": 29905 }, { "epoch": 0.38861483034810756, "grad_norm": 0.3812437355518341, "learning_rate": 0.00012230208346872197, "loss": 1.5159, "step": 29906 }, { "epoch": 0.38862782489202347, "grad_norm": 0.3621780276298523, "learning_rate": 0.0001222994840068106, "loss": 1.2924, "step": 29907 }, { "epoch": 0.3886408194359393, "grad_norm": 0.47713184356689453, "learning_rate": 0.0001222968845448992, "loss": 1.6367, "step": 29908 }, { "epoch": 0.3886538139798552, "grad_norm": 0.3496624529361725, "learning_rate": 0.00012229428508298785, "loss": 1.3997, "step": 29909 }, { "epoch": 0.38866680852377106, "grad_norm": 0.30547887086868286, "learning_rate": 0.00012229168562107645, "loss": 1.4512, "step": 29910 }, { "epoch": 0.38867980306768696, "grad_norm": 0.4067460596561432, "learning_rate": 0.00012228908615916507, "loss": 1.3028, "step": 29911 }, { "epoch": 0.3886927976116028, "grad_norm": 0.36536532640457153, "learning_rate": 0.00012228648669725367, "loss": 1.4354, "step": 29912 }, { "epoch": 0.3887057921555187, "grad_norm": 0.4010399878025055, "learning_rate": 0.0001222838872353423, "loss": 1.2371, "step": 29913 }, { "epoch": 0.38871878669943455, "grad_norm": 0.48741745948791504, "learning_rate": 0.00012228128777343092, "loss": 1.4684, "step": 29914 }, { "epoch": 0.38873178124335045, "grad_norm": 0.502138614654541, "learning_rate": 0.0001222786883115195, "loss": 1.43, "step": 29915 }, { "epoch": 0.3887447757872663, "grad_norm": 0.3628387153148651, "learning_rate": 0.00012227608884960814, "loss": 1.3516, "step": 29916 }, { "epoch": 0.3887577703311822, "grad_norm": 0.42328667640686035, "learning_rate": 0.00012227348938769676, "loss": 1.4117, "step": 29917 }, { "epoch": 0.38877076487509804, "grad_norm": 0.36301475763320923, "learning_rate": 0.00012227088992578536, "loss": 1.2453, "step": 29918 }, { "epoch": 0.38878375941901394, "grad_norm": 0.4574221074581146, "learning_rate": 0.00012226829046387398, "loss": 1.3164, "step": 29919 }, { "epoch": 0.38879675396292984, "grad_norm": 0.5163411498069763, "learning_rate": 0.00012226569100196258, "loss": 1.4778, "step": 29920 }, { "epoch": 0.3888097485068457, "grad_norm": 0.4122473895549774, "learning_rate": 0.00012226309154005123, "loss": 1.3811, "step": 29921 }, { "epoch": 0.3888227430507616, "grad_norm": 0.42602020502090454, "learning_rate": 0.00012226049207813983, "loss": 1.5016, "step": 29922 }, { "epoch": 0.38883573759467743, "grad_norm": 0.44233569502830505, "learning_rate": 0.00012225789261622845, "loss": 1.3322, "step": 29923 }, { "epoch": 0.38884873213859333, "grad_norm": 0.35932064056396484, "learning_rate": 0.00012225529315431705, "loss": 1.2632, "step": 29924 }, { "epoch": 0.3888617266825092, "grad_norm": 0.3449583351612091, "learning_rate": 0.00012225269369240568, "loss": 1.1625, "step": 29925 }, { "epoch": 0.3888747212264251, "grad_norm": 0.43039581179618835, "learning_rate": 0.0001222500942304943, "loss": 1.3463, "step": 29926 }, { "epoch": 0.3888877157703409, "grad_norm": 0.41915011405944824, "learning_rate": 0.0001222474947685829, "loss": 1.2752, "step": 29927 }, { "epoch": 0.3889007103142568, "grad_norm": 0.49146372079849243, "learning_rate": 0.00012224489530667152, "loss": 1.5694, "step": 29928 }, { "epoch": 0.38891370485817267, "grad_norm": 0.37312987446784973, "learning_rate": 0.00012224229584476015, "loss": 1.2486, "step": 29929 }, { "epoch": 0.38892669940208857, "grad_norm": 0.4269731938838959, "learning_rate": 0.00012223969638284875, "loss": 1.3001, "step": 29930 }, { "epoch": 0.3889396939460044, "grad_norm": 0.38001537322998047, "learning_rate": 0.00012223709692093737, "loss": 1.307, "step": 29931 }, { "epoch": 0.3889526884899203, "grad_norm": 0.4600802958011627, "learning_rate": 0.000122234497459026, "loss": 1.4522, "step": 29932 }, { "epoch": 0.38896568303383616, "grad_norm": 0.4012828767299652, "learning_rate": 0.00012223189799711462, "loss": 1.5026, "step": 29933 }, { "epoch": 0.38897867757775206, "grad_norm": 0.39937078952789307, "learning_rate": 0.00012222929853520322, "loss": 1.3766, "step": 29934 }, { "epoch": 0.3889916721216679, "grad_norm": 0.4260209798812866, "learning_rate": 0.00012222669907329184, "loss": 1.5353, "step": 29935 }, { "epoch": 0.3890046666655838, "grad_norm": 0.4753043055534363, "learning_rate": 0.00012222409961138046, "loss": 1.2501, "step": 29936 }, { "epoch": 0.38901766120949965, "grad_norm": 0.3294399082660675, "learning_rate": 0.00012222150014946906, "loss": 1.5208, "step": 29937 }, { "epoch": 0.38903065575341556, "grad_norm": 0.4828750193119049, "learning_rate": 0.0001222189006875577, "loss": 1.3379, "step": 29938 }, { "epoch": 0.3890436502973314, "grad_norm": 0.48309463262557983, "learning_rate": 0.00012221630122564628, "loss": 1.5085, "step": 29939 }, { "epoch": 0.3890566448412473, "grad_norm": 0.4016285836696625, "learning_rate": 0.00012221370176373494, "loss": 1.5111, "step": 29940 }, { "epoch": 0.38906963938516315, "grad_norm": 0.4267188310623169, "learning_rate": 0.00012221110230182353, "loss": 1.4579, "step": 29941 }, { "epoch": 0.38908263392907905, "grad_norm": 0.3516971468925476, "learning_rate": 0.00012220850283991213, "loss": 1.4335, "step": 29942 }, { "epoch": 0.3890956284729949, "grad_norm": 0.37167680263519287, "learning_rate": 0.00012220590337800075, "loss": 1.4175, "step": 29943 }, { "epoch": 0.3891086230169108, "grad_norm": 0.39827388525009155, "learning_rate": 0.00012220330391608938, "loss": 1.3063, "step": 29944 }, { "epoch": 0.38912161756082664, "grad_norm": 0.34414955973625183, "learning_rate": 0.000122200704454178, "loss": 1.3821, "step": 29945 }, { "epoch": 0.38913461210474254, "grad_norm": 0.3396826982498169, "learning_rate": 0.0001221981049922666, "loss": 1.4192, "step": 29946 }, { "epoch": 0.3891476066486584, "grad_norm": 0.2613384425640106, "learning_rate": 0.00012219550553035523, "loss": 1.3163, "step": 29947 }, { "epoch": 0.3891606011925743, "grad_norm": 0.46040889620780945, "learning_rate": 0.00012219290606844385, "loss": 1.3776, "step": 29948 }, { "epoch": 0.38917359573649013, "grad_norm": 0.3295203447341919, "learning_rate": 0.00012219030660653245, "loss": 1.3544, "step": 29949 }, { "epoch": 0.38918659028040603, "grad_norm": 0.3597117066383362, "learning_rate": 0.00012218770714462107, "loss": 1.3296, "step": 29950 }, { "epoch": 0.3891995848243219, "grad_norm": 0.40903425216674805, "learning_rate": 0.00012218510768270967, "loss": 1.477, "step": 29951 }, { "epoch": 0.3892125793682378, "grad_norm": 0.39330413937568665, "learning_rate": 0.00012218250822079832, "loss": 1.4333, "step": 29952 }, { "epoch": 0.3892255739121536, "grad_norm": 0.3866601586341858, "learning_rate": 0.00012217990875888692, "loss": 1.4585, "step": 29953 }, { "epoch": 0.3892385684560695, "grad_norm": 0.4674544334411621, "learning_rate": 0.00012217730929697552, "loss": 1.5484, "step": 29954 }, { "epoch": 0.38925156299998537, "grad_norm": 0.4730983376502991, "learning_rate": 0.00012217470983506414, "loss": 1.4724, "step": 29955 }, { "epoch": 0.38926455754390127, "grad_norm": 0.5048171281814575, "learning_rate": 0.00012217211037315276, "loss": 1.4768, "step": 29956 }, { "epoch": 0.3892775520878171, "grad_norm": 0.4015820026397705, "learning_rate": 0.0001221695109112414, "loss": 1.3504, "step": 29957 }, { "epoch": 0.389290546631733, "grad_norm": 0.3920265734195709, "learning_rate": 0.00012216691144933, "loss": 1.3823, "step": 29958 }, { "epoch": 0.38930354117564886, "grad_norm": 0.4584847092628479, "learning_rate": 0.0001221643119874186, "loss": 1.5047, "step": 29959 }, { "epoch": 0.38931653571956476, "grad_norm": 0.41017472743988037, "learning_rate": 0.00012216171252550724, "loss": 1.306, "step": 29960 }, { "epoch": 0.3893295302634806, "grad_norm": 0.3940582573413849, "learning_rate": 0.00012215911306359583, "loss": 1.3614, "step": 29961 }, { "epoch": 0.3893425248073965, "grad_norm": 0.40765923261642456, "learning_rate": 0.00012215651360168446, "loss": 1.4318, "step": 29962 }, { "epoch": 0.38935551935131235, "grad_norm": 0.2614087164402008, "learning_rate": 0.00012215391413977305, "loss": 1.4034, "step": 29963 }, { "epoch": 0.38936851389522825, "grad_norm": 0.4263293445110321, "learning_rate": 0.0001221513146778617, "loss": 1.5482, "step": 29964 }, { "epoch": 0.3893815084391441, "grad_norm": 0.3633301258087158, "learning_rate": 0.0001221487152159503, "loss": 1.3543, "step": 29965 }, { "epoch": 0.38939450298306, "grad_norm": 0.33783143758773804, "learning_rate": 0.0001221461157540389, "loss": 1.4277, "step": 29966 }, { "epoch": 0.38940749752697584, "grad_norm": 0.33738669753074646, "learning_rate": 0.00012214351629212753, "loss": 1.3975, "step": 29967 }, { "epoch": 0.38942049207089174, "grad_norm": 0.4516810178756714, "learning_rate": 0.00012214091683021615, "loss": 1.2123, "step": 29968 }, { "epoch": 0.3894334866148076, "grad_norm": 0.41029927134513855, "learning_rate": 0.00012213831736830477, "loss": 1.2133, "step": 29969 }, { "epoch": 0.3894464811587235, "grad_norm": 0.4828219711780548, "learning_rate": 0.00012213571790639337, "loss": 1.4444, "step": 29970 }, { "epoch": 0.38945947570263934, "grad_norm": 0.32723382115364075, "learning_rate": 0.000122133118444482, "loss": 1.2271, "step": 29971 }, { "epoch": 0.38947247024655524, "grad_norm": 0.479070782661438, "learning_rate": 0.00012213051898257062, "loss": 1.4988, "step": 29972 }, { "epoch": 0.3894854647904711, "grad_norm": 0.2727302014827728, "learning_rate": 0.00012212791952065922, "loss": 1.4481, "step": 29973 }, { "epoch": 0.389498459334387, "grad_norm": 0.3500700294971466, "learning_rate": 0.00012212532005874784, "loss": 1.3542, "step": 29974 }, { "epoch": 0.3895114538783028, "grad_norm": 0.46399515867233276, "learning_rate": 0.00012212272059683647, "loss": 1.5085, "step": 29975 }, { "epoch": 0.3895244484222187, "grad_norm": 0.4055573046207428, "learning_rate": 0.0001221201211349251, "loss": 1.3124, "step": 29976 }, { "epoch": 0.3895374429661346, "grad_norm": 0.448212206363678, "learning_rate": 0.0001221175216730137, "loss": 1.3962, "step": 29977 }, { "epoch": 0.3895504375100505, "grad_norm": 0.448320209980011, "learning_rate": 0.00012211492221110231, "loss": 1.4562, "step": 29978 }, { "epoch": 0.3895634320539663, "grad_norm": 0.5457174181938171, "learning_rate": 0.00012211232274919094, "loss": 1.474, "step": 29979 }, { "epoch": 0.3895764265978822, "grad_norm": 0.35137537121772766, "learning_rate": 0.00012210972328727954, "loss": 1.2626, "step": 29980 }, { "epoch": 0.38958942114179806, "grad_norm": 0.37876641750335693, "learning_rate": 0.00012210712382536816, "loss": 1.2808, "step": 29981 }, { "epoch": 0.38960241568571397, "grad_norm": 0.38676291704177856, "learning_rate": 0.00012210452436345676, "loss": 1.3523, "step": 29982 }, { "epoch": 0.3896154102296298, "grad_norm": 0.43328115344047546, "learning_rate": 0.00012210192490154538, "loss": 1.2519, "step": 29983 }, { "epoch": 0.3896284047735457, "grad_norm": 0.4008753299713135, "learning_rate": 0.000122099325439634, "loss": 1.4583, "step": 29984 }, { "epoch": 0.38964139931746156, "grad_norm": 0.476602166891098, "learning_rate": 0.0001220967259777226, "loss": 1.4223, "step": 29985 }, { "epoch": 0.38965439386137746, "grad_norm": 0.37312886118888855, "learning_rate": 0.00012209412651581123, "loss": 1.3438, "step": 29986 }, { "epoch": 0.3896673884052933, "grad_norm": 0.4831525385379791, "learning_rate": 0.00012209152705389985, "loss": 1.4136, "step": 29987 }, { "epoch": 0.3896803829492092, "grad_norm": 0.4432586431503296, "learning_rate": 0.00012208892759198848, "loss": 1.3598, "step": 29988 }, { "epoch": 0.38969337749312505, "grad_norm": 0.37609317898750305, "learning_rate": 0.00012208632813007707, "loss": 1.3896, "step": 29989 }, { "epoch": 0.38970637203704095, "grad_norm": 0.334155410528183, "learning_rate": 0.0001220837286681657, "loss": 1.4285, "step": 29990 }, { "epoch": 0.3897193665809568, "grad_norm": 0.42372921109199524, "learning_rate": 0.00012208112920625432, "loss": 1.3467, "step": 29991 }, { "epoch": 0.3897323611248727, "grad_norm": 0.40522122383117676, "learning_rate": 0.00012207852974434292, "loss": 1.5003, "step": 29992 }, { "epoch": 0.38974535566878854, "grad_norm": 0.4145407974720001, "learning_rate": 0.00012207593028243155, "loss": 1.6396, "step": 29993 }, { "epoch": 0.38975835021270444, "grad_norm": 0.394796758890152, "learning_rate": 0.00012207333082052014, "loss": 1.383, "step": 29994 }, { "epoch": 0.38977134475662034, "grad_norm": 0.42275163531303406, "learning_rate": 0.0001220707313586088, "loss": 1.2702, "step": 29995 }, { "epoch": 0.3897843393005362, "grad_norm": 0.3760804235935211, "learning_rate": 0.00012206813189669739, "loss": 1.3983, "step": 29996 }, { "epoch": 0.3897973338444521, "grad_norm": 0.48112455010414124, "learning_rate": 0.000122065532434786, "loss": 1.4734, "step": 29997 }, { "epoch": 0.38981032838836793, "grad_norm": 0.47838762402534485, "learning_rate": 0.00012206293297287461, "loss": 1.3017, "step": 29998 }, { "epoch": 0.38982332293228383, "grad_norm": 0.40505892038345337, "learning_rate": 0.00012206033351096324, "loss": 1.291, "step": 29999 }, { "epoch": 0.3898363174761997, "grad_norm": 0.3999340832233429, "learning_rate": 0.00012205773404905185, "loss": 1.6041, "step": 30000 }, { "epoch": 0.3898493120201156, "grad_norm": 0.4098939299583435, "learning_rate": 0.00012205513458714046, "loss": 1.5688, "step": 30001 }, { "epoch": 0.3898623065640314, "grad_norm": 0.38183093070983887, "learning_rate": 0.00012205253512522907, "loss": 1.3582, "step": 30002 }, { "epoch": 0.3898753011079473, "grad_norm": 0.4575520157814026, "learning_rate": 0.00012204993566331771, "loss": 1.1648, "step": 30003 }, { "epoch": 0.38988829565186317, "grad_norm": 0.40873438119888306, "learning_rate": 0.00012204733620140632, "loss": 1.2611, "step": 30004 }, { "epoch": 0.38990129019577907, "grad_norm": 0.4148501455783844, "learning_rate": 0.00012204473673949493, "loss": 1.3929, "step": 30005 }, { "epoch": 0.3899142847396949, "grad_norm": 0.36465275287628174, "learning_rate": 0.00012204213727758356, "loss": 1.4328, "step": 30006 }, { "epoch": 0.3899272792836108, "grad_norm": 0.3923112452030182, "learning_rate": 0.00012203953781567217, "loss": 1.4388, "step": 30007 }, { "epoch": 0.38994027382752666, "grad_norm": 0.35317128896713257, "learning_rate": 0.00012203693835376078, "loss": 1.3366, "step": 30008 }, { "epoch": 0.38995326837144256, "grad_norm": 0.36921223998069763, "learning_rate": 0.00012203433889184939, "loss": 1.2926, "step": 30009 }, { "epoch": 0.3899662629153584, "grad_norm": 0.34526345133781433, "learning_rate": 0.00012203173942993801, "loss": 1.334, "step": 30010 }, { "epoch": 0.3899792574592743, "grad_norm": 0.44029760360717773, "learning_rate": 0.00012202913996802662, "loss": 1.2934, "step": 30011 }, { "epoch": 0.38999225200319015, "grad_norm": 0.4285171627998352, "learning_rate": 0.00012202654050611523, "loss": 1.4166, "step": 30012 }, { "epoch": 0.39000524654710605, "grad_norm": 0.39532583951950073, "learning_rate": 0.00012202394104420385, "loss": 1.61, "step": 30013 }, { "epoch": 0.3900182410910219, "grad_norm": 0.3894072473049164, "learning_rate": 0.00012202134158229248, "loss": 1.467, "step": 30014 }, { "epoch": 0.3900312356349378, "grad_norm": 0.3797360360622406, "learning_rate": 0.0001220187421203811, "loss": 1.443, "step": 30015 }, { "epoch": 0.39004423017885365, "grad_norm": 0.5540939569473267, "learning_rate": 0.0001220161426584697, "loss": 1.3629, "step": 30016 }, { "epoch": 0.39005722472276955, "grad_norm": 0.34394213557243347, "learning_rate": 0.00012201354319655832, "loss": 1.4709, "step": 30017 }, { "epoch": 0.3900702192666854, "grad_norm": 0.40100544691085815, "learning_rate": 0.00012201094373464694, "loss": 1.3265, "step": 30018 }, { "epoch": 0.3900832138106013, "grad_norm": 0.5185689330101013, "learning_rate": 0.00012200834427273555, "loss": 1.3884, "step": 30019 }, { "epoch": 0.39009620835451714, "grad_norm": 0.3766475021839142, "learning_rate": 0.00012200574481082416, "loss": 1.3758, "step": 30020 }, { "epoch": 0.39010920289843304, "grad_norm": 0.4197940528392792, "learning_rate": 0.00012200314534891277, "loss": 1.4424, "step": 30021 }, { "epoch": 0.3901221974423489, "grad_norm": 0.29997891187667847, "learning_rate": 0.00012200054588700141, "loss": 1.2417, "step": 30022 }, { "epoch": 0.3901351919862648, "grad_norm": 0.4618741273880005, "learning_rate": 0.00012199794642509001, "loss": 1.3843, "step": 30023 }, { "epoch": 0.39014818653018063, "grad_norm": 0.545307993888855, "learning_rate": 0.00012199534696317862, "loss": 1.4275, "step": 30024 }, { "epoch": 0.39016118107409653, "grad_norm": 0.43186327815055847, "learning_rate": 0.00012199274750126723, "loss": 1.444, "step": 30025 }, { "epoch": 0.3901741756180124, "grad_norm": 0.34508249163627625, "learning_rate": 0.00012199014803935587, "loss": 1.4424, "step": 30026 }, { "epoch": 0.3901871701619283, "grad_norm": 0.3395686745643616, "learning_rate": 0.00012198754857744448, "loss": 1.4116, "step": 30027 }, { "epoch": 0.3902001647058441, "grad_norm": 0.3685622215270996, "learning_rate": 0.00012198494911553309, "loss": 1.3384, "step": 30028 }, { "epoch": 0.39021315924976, "grad_norm": 0.39594343304634094, "learning_rate": 0.0001219823496536217, "loss": 1.4782, "step": 30029 }, { "epoch": 0.39022615379367587, "grad_norm": 0.4663515090942383, "learning_rate": 0.00012197975019171033, "loss": 1.3149, "step": 30030 }, { "epoch": 0.39023914833759177, "grad_norm": 0.40722721815109253, "learning_rate": 0.00012197715072979894, "loss": 1.3377, "step": 30031 }, { "epoch": 0.3902521428815076, "grad_norm": 0.4376753866672516, "learning_rate": 0.00012197455126788755, "loss": 1.4647, "step": 30032 }, { "epoch": 0.3902651374254235, "grad_norm": 0.4399189054965973, "learning_rate": 0.00012197195180597616, "loss": 1.2996, "step": 30033 }, { "epoch": 0.39027813196933936, "grad_norm": 0.4426305592060089, "learning_rate": 0.0001219693523440648, "loss": 1.2137, "step": 30034 }, { "epoch": 0.39029112651325526, "grad_norm": 0.4404280185699463, "learning_rate": 0.00012196675288215341, "loss": 1.4289, "step": 30035 }, { "epoch": 0.3903041210571711, "grad_norm": 0.3486882150173187, "learning_rate": 0.000121964153420242, "loss": 1.3242, "step": 30036 }, { "epoch": 0.390317115601087, "grad_norm": 0.40671306848526, "learning_rate": 0.00012196155395833062, "loss": 1.4706, "step": 30037 }, { "epoch": 0.39033011014500285, "grad_norm": 0.3922707140445709, "learning_rate": 0.00012195895449641925, "loss": 1.4094, "step": 30038 }, { "epoch": 0.39034310468891875, "grad_norm": 0.38598543405532837, "learning_rate": 0.00012195635503450787, "loss": 1.266, "step": 30039 }, { "epoch": 0.3903560992328346, "grad_norm": 0.43411827087402344, "learning_rate": 0.00012195375557259648, "loss": 1.2387, "step": 30040 }, { "epoch": 0.3903690937767505, "grad_norm": 0.4489670395851135, "learning_rate": 0.00012195115611068509, "loss": 1.5064, "step": 30041 }, { "epoch": 0.39038208832066634, "grad_norm": 0.4152943193912506, "learning_rate": 0.00012194855664877371, "loss": 1.4929, "step": 30042 }, { "epoch": 0.39039508286458224, "grad_norm": 0.4033881723880768, "learning_rate": 0.00012194595718686232, "loss": 1.495, "step": 30043 }, { "epoch": 0.3904080774084981, "grad_norm": 0.3771362006664276, "learning_rate": 0.00012194335772495093, "loss": 1.4386, "step": 30044 }, { "epoch": 0.390421071952414, "grad_norm": 0.40339937806129456, "learning_rate": 0.00012194075826303957, "loss": 1.3571, "step": 30045 }, { "epoch": 0.39043406649632983, "grad_norm": 0.43747755885124207, "learning_rate": 0.00012193815880112818, "loss": 1.32, "step": 30046 }, { "epoch": 0.39044706104024574, "grad_norm": 0.34134072065353394, "learning_rate": 0.00012193555933921679, "loss": 1.3762, "step": 30047 }, { "epoch": 0.3904600555841616, "grad_norm": 0.42860689759254456, "learning_rate": 0.00012193295987730539, "loss": 1.3962, "step": 30048 }, { "epoch": 0.3904730501280775, "grad_norm": 0.3270234763622284, "learning_rate": 0.00012193036041539403, "loss": 1.5259, "step": 30049 }, { "epoch": 0.3904860446719933, "grad_norm": 0.38852789998054504, "learning_rate": 0.00012192776095348264, "loss": 1.4467, "step": 30050 }, { "epoch": 0.3904990392159092, "grad_norm": 0.32282713055610657, "learning_rate": 0.00012192516149157125, "loss": 1.3952, "step": 30051 }, { "epoch": 0.3905120337598251, "grad_norm": 0.3749118149280548, "learning_rate": 0.00012192256202965986, "loss": 1.4419, "step": 30052 }, { "epoch": 0.390525028303741, "grad_norm": 0.37093400955200195, "learning_rate": 0.00012191996256774849, "loss": 1.3535, "step": 30053 }, { "epoch": 0.3905380228476568, "grad_norm": 0.42257416248321533, "learning_rate": 0.0001219173631058371, "loss": 1.3667, "step": 30054 }, { "epoch": 0.3905510173915727, "grad_norm": 0.45308542251586914, "learning_rate": 0.00012191476364392571, "loss": 1.6331, "step": 30055 }, { "epoch": 0.39056401193548856, "grad_norm": 0.40969449281692505, "learning_rate": 0.00012191216418201432, "loss": 1.5026, "step": 30056 }, { "epoch": 0.39057700647940446, "grad_norm": 0.5387527942657471, "learning_rate": 0.00012190956472010296, "loss": 1.577, "step": 30057 }, { "epoch": 0.3905900010233203, "grad_norm": 0.4781966209411621, "learning_rate": 0.00012190696525819157, "loss": 1.4391, "step": 30058 }, { "epoch": 0.3906029955672362, "grad_norm": 0.47998109459877014, "learning_rate": 0.00012190436579628018, "loss": 1.408, "step": 30059 }, { "epoch": 0.39061599011115206, "grad_norm": 0.30010759830474854, "learning_rate": 0.00012190176633436879, "loss": 1.2555, "step": 30060 }, { "epoch": 0.39062898465506796, "grad_norm": 0.4064951539039612, "learning_rate": 0.00012189916687245741, "loss": 1.5438, "step": 30061 }, { "epoch": 0.3906419791989838, "grad_norm": 0.3994278609752655, "learning_rate": 0.00012189656741054602, "loss": 1.3502, "step": 30062 }, { "epoch": 0.3906549737428997, "grad_norm": 0.5379024744033813, "learning_rate": 0.00012189396794863464, "loss": 1.3312, "step": 30063 }, { "epoch": 0.39066796828681555, "grad_norm": 0.3372834324836731, "learning_rate": 0.00012189136848672325, "loss": 1.5487, "step": 30064 }, { "epoch": 0.39068096283073145, "grad_norm": 0.41192808747291565, "learning_rate": 0.00012188876902481187, "loss": 1.3943, "step": 30065 }, { "epoch": 0.3906939573746473, "grad_norm": 0.4473399519920349, "learning_rate": 0.00012188616956290048, "loss": 1.1819, "step": 30066 }, { "epoch": 0.3907069519185632, "grad_norm": 0.40857449173927307, "learning_rate": 0.00012188357010098909, "loss": 1.5914, "step": 30067 }, { "epoch": 0.39071994646247904, "grad_norm": 0.35110679268836975, "learning_rate": 0.0001218809706390777, "loss": 1.2856, "step": 30068 }, { "epoch": 0.39073294100639494, "grad_norm": 0.33694663643836975, "learning_rate": 0.00012187837117716634, "loss": 1.5575, "step": 30069 }, { "epoch": 0.3907459355503108, "grad_norm": 0.4459562301635742, "learning_rate": 0.00012187577171525495, "loss": 1.2977, "step": 30070 }, { "epoch": 0.3907589300942267, "grad_norm": 0.49202728271484375, "learning_rate": 0.00012187317225334356, "loss": 1.574, "step": 30071 }, { "epoch": 0.3907719246381426, "grad_norm": 0.4667641818523407, "learning_rate": 0.00012187057279143217, "loss": 1.5255, "step": 30072 }, { "epoch": 0.39078491918205843, "grad_norm": 0.3807390630245209, "learning_rate": 0.0001218679733295208, "loss": 1.5656, "step": 30073 }, { "epoch": 0.39079791372597433, "grad_norm": 0.44292476773262024, "learning_rate": 0.00012186537386760941, "loss": 1.3653, "step": 30074 }, { "epoch": 0.3908109082698902, "grad_norm": 0.5953220725059509, "learning_rate": 0.00012186277440569802, "loss": 1.4531, "step": 30075 }, { "epoch": 0.3908239028138061, "grad_norm": 0.5154035687446594, "learning_rate": 0.00012186017494378663, "loss": 1.512, "step": 30076 }, { "epoch": 0.3908368973577219, "grad_norm": 0.40332356095314026, "learning_rate": 0.00012185757548187527, "loss": 1.2878, "step": 30077 }, { "epoch": 0.3908498919016378, "grad_norm": 0.36124491691589355, "learning_rate": 0.00012185497601996387, "loss": 1.4294, "step": 30078 }, { "epoch": 0.39086288644555367, "grad_norm": 0.27464374899864197, "learning_rate": 0.00012185237655805248, "loss": 1.3954, "step": 30079 }, { "epoch": 0.39087588098946957, "grad_norm": 0.35874730348587036, "learning_rate": 0.00012184977709614112, "loss": 1.5174, "step": 30080 }, { "epoch": 0.3908888755333854, "grad_norm": 0.3673916161060333, "learning_rate": 0.00012184717763422973, "loss": 1.3723, "step": 30081 }, { "epoch": 0.3909018700773013, "grad_norm": 0.40562140941619873, "learning_rate": 0.00012184457817231834, "loss": 1.3638, "step": 30082 }, { "epoch": 0.39091486462121716, "grad_norm": 0.3026283085346222, "learning_rate": 0.00012184197871040695, "loss": 1.3411, "step": 30083 }, { "epoch": 0.39092785916513306, "grad_norm": 0.44750916957855225, "learning_rate": 0.00012183937924849557, "loss": 1.379, "step": 30084 }, { "epoch": 0.3909408537090489, "grad_norm": 0.41736695170402527, "learning_rate": 0.00012183677978658418, "loss": 1.3509, "step": 30085 }, { "epoch": 0.3909538482529648, "grad_norm": 0.32945263385772705, "learning_rate": 0.0001218341803246728, "loss": 1.5389, "step": 30086 }, { "epoch": 0.39096684279688065, "grad_norm": 0.3785216212272644, "learning_rate": 0.0001218315808627614, "loss": 1.3913, "step": 30087 }, { "epoch": 0.39097983734079655, "grad_norm": 0.3218432366847992, "learning_rate": 0.00012182898140085004, "loss": 1.3599, "step": 30088 }, { "epoch": 0.3909928318847124, "grad_norm": 0.3952227532863617, "learning_rate": 0.00012182638193893866, "loss": 1.1908, "step": 30089 }, { "epoch": 0.3910058264286283, "grad_norm": 0.4091629981994629, "learning_rate": 0.00012182378247702725, "loss": 1.3822, "step": 30090 }, { "epoch": 0.39101882097254415, "grad_norm": 0.3612927794456482, "learning_rate": 0.00012182118301511586, "loss": 1.5119, "step": 30091 }, { "epoch": 0.39103181551646005, "grad_norm": 0.3626982271671295, "learning_rate": 0.0001218185835532045, "loss": 1.5039, "step": 30092 }, { "epoch": 0.3910448100603759, "grad_norm": 0.40796971321105957, "learning_rate": 0.00012181598409129311, "loss": 1.4785, "step": 30093 }, { "epoch": 0.3910578046042918, "grad_norm": 0.3584350347518921, "learning_rate": 0.00012181338462938172, "loss": 1.3826, "step": 30094 }, { "epoch": 0.39107079914820764, "grad_norm": 0.46865078806877136, "learning_rate": 0.00012181078516747033, "loss": 1.4512, "step": 30095 }, { "epoch": 0.39108379369212354, "grad_norm": 0.35887107253074646, "learning_rate": 0.00012180818570555896, "loss": 1.4173, "step": 30096 }, { "epoch": 0.3910967882360394, "grad_norm": 0.42081189155578613, "learning_rate": 0.00012180558624364757, "loss": 1.2792, "step": 30097 }, { "epoch": 0.3911097827799553, "grad_norm": 0.4254751205444336, "learning_rate": 0.00012180298678173618, "loss": 1.2319, "step": 30098 }, { "epoch": 0.39112277732387113, "grad_norm": 0.4039698541164398, "learning_rate": 0.00012180038731982479, "loss": 1.286, "step": 30099 }, { "epoch": 0.39113577186778703, "grad_norm": 0.4513438642024994, "learning_rate": 0.00012179778785791343, "loss": 1.2674, "step": 30100 }, { "epoch": 0.3911487664117029, "grad_norm": 0.37958231568336487, "learning_rate": 0.00012179518839600204, "loss": 1.4493, "step": 30101 }, { "epoch": 0.3911617609556188, "grad_norm": 0.3481837511062622, "learning_rate": 0.00012179258893409065, "loss": 1.4223, "step": 30102 }, { "epoch": 0.3911747554995346, "grad_norm": 0.395068883895874, "learning_rate": 0.00012178998947217925, "loss": 1.2806, "step": 30103 }, { "epoch": 0.3911877500434505, "grad_norm": 0.35109856724739075, "learning_rate": 0.00012178739001026789, "loss": 1.3602, "step": 30104 }, { "epoch": 0.39120074458736637, "grad_norm": 0.33429616689682007, "learning_rate": 0.0001217847905483565, "loss": 1.1632, "step": 30105 }, { "epoch": 0.39121373913128227, "grad_norm": 0.3121984004974365, "learning_rate": 0.00012178219108644511, "loss": 1.4174, "step": 30106 }, { "epoch": 0.3912267336751981, "grad_norm": 0.4757743179798126, "learning_rate": 0.00012177959162453372, "loss": 1.5094, "step": 30107 }, { "epoch": 0.391239728219114, "grad_norm": 0.3582538366317749, "learning_rate": 0.00012177699216262234, "loss": 1.3933, "step": 30108 }, { "epoch": 0.39125272276302986, "grad_norm": 0.3541100323200226, "learning_rate": 0.00012177439270071096, "loss": 1.2232, "step": 30109 }, { "epoch": 0.39126571730694576, "grad_norm": 0.35205531120300293, "learning_rate": 0.00012177179323879957, "loss": 1.4804, "step": 30110 }, { "epoch": 0.3912787118508616, "grad_norm": 0.4998394548892975, "learning_rate": 0.00012176919377688818, "loss": 1.6934, "step": 30111 }, { "epoch": 0.3912917063947775, "grad_norm": 0.36380964517593384, "learning_rate": 0.00012176659431497682, "loss": 1.349, "step": 30112 }, { "epoch": 0.39130470093869335, "grad_norm": 0.4160205125808716, "learning_rate": 0.00012176399485306543, "loss": 1.381, "step": 30113 }, { "epoch": 0.39131769548260925, "grad_norm": 0.3899326026439667, "learning_rate": 0.00012176139539115404, "loss": 1.2929, "step": 30114 }, { "epoch": 0.3913306900265251, "grad_norm": 0.3528885841369629, "learning_rate": 0.00012175879592924265, "loss": 1.3306, "step": 30115 }, { "epoch": 0.391343684570441, "grad_norm": 0.5143755078315735, "learning_rate": 0.00012175619646733127, "loss": 1.3214, "step": 30116 }, { "epoch": 0.39135667911435684, "grad_norm": 0.3883429169654846, "learning_rate": 0.00012175359700541988, "loss": 1.5306, "step": 30117 }, { "epoch": 0.39136967365827274, "grad_norm": 0.4060009717941284, "learning_rate": 0.0001217509975435085, "loss": 1.3712, "step": 30118 }, { "epoch": 0.3913826682021886, "grad_norm": 0.44311872124671936, "learning_rate": 0.00012174839808159713, "loss": 1.1517, "step": 30119 }, { "epoch": 0.3913956627461045, "grad_norm": 0.3149387538433075, "learning_rate": 0.00012174579861968573, "loss": 1.5049, "step": 30120 }, { "epoch": 0.39140865729002033, "grad_norm": 0.41133204102516174, "learning_rate": 0.00012174319915777434, "loss": 1.3962, "step": 30121 }, { "epoch": 0.39142165183393623, "grad_norm": 0.3749205470085144, "learning_rate": 0.00012174059969586295, "loss": 1.3365, "step": 30122 }, { "epoch": 0.3914346463778521, "grad_norm": 0.31378787755966187, "learning_rate": 0.00012173800023395159, "loss": 1.3222, "step": 30123 }, { "epoch": 0.391447640921768, "grad_norm": 0.4185314178466797, "learning_rate": 0.0001217354007720402, "loss": 1.4204, "step": 30124 }, { "epoch": 0.3914606354656838, "grad_norm": 0.4197222888469696, "learning_rate": 0.00012173280131012881, "loss": 1.3296, "step": 30125 }, { "epoch": 0.3914736300095997, "grad_norm": 0.46947818994522095, "learning_rate": 0.00012173020184821742, "loss": 1.3735, "step": 30126 }, { "epoch": 0.39148662455351557, "grad_norm": 0.3419879972934723, "learning_rate": 0.00012172760238630605, "loss": 1.4667, "step": 30127 }, { "epoch": 0.3914996190974315, "grad_norm": 0.40291690826416016, "learning_rate": 0.00012172500292439466, "loss": 1.626, "step": 30128 }, { "epoch": 0.3915126136413473, "grad_norm": 0.3707294166088104, "learning_rate": 0.00012172240346248327, "loss": 1.4204, "step": 30129 }, { "epoch": 0.3915256081852632, "grad_norm": 0.3771126866340637, "learning_rate": 0.00012171980400057188, "loss": 1.4433, "step": 30130 }, { "epoch": 0.39153860272917906, "grad_norm": 0.46512410044670105, "learning_rate": 0.00012171720453866052, "loss": 1.4042, "step": 30131 }, { "epoch": 0.39155159727309496, "grad_norm": 0.3892116844654083, "learning_rate": 0.00012171460507674912, "loss": 1.3139, "step": 30132 }, { "epoch": 0.3915645918170108, "grad_norm": 0.3910089433193207, "learning_rate": 0.00012171200561483773, "loss": 1.1926, "step": 30133 }, { "epoch": 0.3915775863609267, "grad_norm": 0.39351779222488403, "learning_rate": 0.00012170940615292634, "loss": 1.453, "step": 30134 }, { "epoch": 0.39159058090484256, "grad_norm": 0.40423285961151123, "learning_rate": 0.00012170680669101498, "loss": 1.2075, "step": 30135 }, { "epoch": 0.39160357544875846, "grad_norm": 0.30895480513572693, "learning_rate": 0.00012170420722910359, "loss": 1.3786, "step": 30136 }, { "epoch": 0.3916165699926743, "grad_norm": 0.29699960350990295, "learning_rate": 0.0001217016077671922, "loss": 1.3196, "step": 30137 }, { "epoch": 0.3916295645365902, "grad_norm": 0.37178802490234375, "learning_rate": 0.00012169900830528081, "loss": 1.5311, "step": 30138 }, { "epoch": 0.39164255908050605, "grad_norm": 0.4345197081565857, "learning_rate": 0.00012169640884336943, "loss": 1.2716, "step": 30139 }, { "epoch": 0.39165555362442195, "grad_norm": 0.44118285179138184, "learning_rate": 0.00012169380938145804, "loss": 1.2205, "step": 30140 }, { "epoch": 0.3916685481683378, "grad_norm": 0.3031332790851593, "learning_rate": 0.00012169120991954665, "loss": 1.2755, "step": 30141 }, { "epoch": 0.3916815427122537, "grad_norm": 0.42799246311187744, "learning_rate": 0.00012168861045763527, "loss": 1.6241, "step": 30142 }, { "epoch": 0.39169453725616954, "grad_norm": 0.42290955781936646, "learning_rate": 0.0001216860109957239, "loss": 1.4982, "step": 30143 }, { "epoch": 0.39170753180008544, "grad_norm": 0.3088582456111908, "learning_rate": 0.00012168341153381251, "loss": 1.4289, "step": 30144 }, { "epoch": 0.3917205263440013, "grad_norm": 0.27989429235458374, "learning_rate": 0.00012168081207190111, "loss": 1.242, "step": 30145 }, { "epoch": 0.3917335208879172, "grad_norm": 0.39245644211769104, "learning_rate": 0.00012167821260998972, "loss": 1.3646, "step": 30146 }, { "epoch": 0.3917465154318331, "grad_norm": 0.3112770617008209, "learning_rate": 0.00012167561314807836, "loss": 1.1425, "step": 30147 }, { "epoch": 0.39175950997574893, "grad_norm": 0.348631888628006, "learning_rate": 0.00012167301368616697, "loss": 1.4115, "step": 30148 }, { "epoch": 0.39177250451966483, "grad_norm": 0.5282865762710571, "learning_rate": 0.00012167041422425558, "loss": 1.3226, "step": 30149 }, { "epoch": 0.3917854990635807, "grad_norm": 0.36630749702453613, "learning_rate": 0.0001216678147623442, "loss": 1.2992, "step": 30150 }, { "epoch": 0.3917984936074966, "grad_norm": 0.4147343039512634, "learning_rate": 0.00012166521530043282, "loss": 1.5258, "step": 30151 }, { "epoch": 0.3918114881514124, "grad_norm": 0.5318707823753357, "learning_rate": 0.00012166261583852143, "loss": 1.4118, "step": 30152 }, { "epoch": 0.3918244826953283, "grad_norm": 0.4136781692504883, "learning_rate": 0.00012166001637661004, "loss": 1.4336, "step": 30153 }, { "epoch": 0.39183747723924417, "grad_norm": 0.36295342445373535, "learning_rate": 0.00012165741691469868, "loss": 1.2866, "step": 30154 }, { "epoch": 0.39185047178316007, "grad_norm": 0.36061781644821167, "learning_rate": 0.00012165481745278729, "loss": 1.5548, "step": 30155 }, { "epoch": 0.3918634663270759, "grad_norm": 0.40959620475769043, "learning_rate": 0.0001216522179908759, "loss": 1.2291, "step": 30156 }, { "epoch": 0.3918764608709918, "grad_norm": 0.27087658643722534, "learning_rate": 0.00012164961852896451, "loss": 1.3663, "step": 30157 }, { "epoch": 0.39188945541490766, "grad_norm": 0.31021416187286377, "learning_rate": 0.00012164701906705314, "loss": 1.2343, "step": 30158 }, { "epoch": 0.39190244995882356, "grad_norm": 0.3856663405895233, "learning_rate": 0.00012164441960514175, "loss": 1.5296, "step": 30159 }, { "epoch": 0.3919154445027394, "grad_norm": 0.4690866768360138, "learning_rate": 0.00012164182014323036, "loss": 1.4471, "step": 30160 }, { "epoch": 0.3919284390466553, "grad_norm": 0.46747124195098877, "learning_rate": 0.00012163922068131897, "loss": 1.3471, "step": 30161 }, { "epoch": 0.39194143359057115, "grad_norm": 0.24842660129070282, "learning_rate": 0.00012163662121940759, "loss": 1.3677, "step": 30162 }, { "epoch": 0.39195442813448705, "grad_norm": 0.3894916772842407, "learning_rate": 0.0001216340217574962, "loss": 1.3395, "step": 30163 }, { "epoch": 0.3919674226784029, "grad_norm": 0.4911866784095764, "learning_rate": 0.00012163142229558481, "loss": 1.4341, "step": 30164 }, { "epoch": 0.3919804172223188, "grad_norm": 0.43905109167099, "learning_rate": 0.00012162882283367343, "loss": 1.596, "step": 30165 }, { "epoch": 0.39199341176623465, "grad_norm": 0.375514954328537, "learning_rate": 0.00012162622337176206, "loss": 1.676, "step": 30166 }, { "epoch": 0.39200640631015055, "grad_norm": 0.352326363325119, "learning_rate": 0.00012162362390985067, "loss": 1.3772, "step": 30167 }, { "epoch": 0.3920194008540664, "grad_norm": 0.37222519516944885, "learning_rate": 0.00012162102444793929, "loss": 1.3941, "step": 30168 }, { "epoch": 0.3920323953979823, "grad_norm": 0.41673657298088074, "learning_rate": 0.0001216184249860279, "loss": 1.4676, "step": 30169 }, { "epoch": 0.39204538994189814, "grad_norm": 0.4229004383087158, "learning_rate": 0.00012161582552411652, "loss": 1.4797, "step": 30170 }, { "epoch": 0.39205838448581404, "grad_norm": 0.3822682499885559, "learning_rate": 0.00012161322606220513, "loss": 1.177, "step": 30171 }, { "epoch": 0.3920713790297299, "grad_norm": 0.3789348006248474, "learning_rate": 0.00012161062660029374, "loss": 1.3862, "step": 30172 }, { "epoch": 0.3920843735736458, "grad_norm": 0.42446979880332947, "learning_rate": 0.00012160802713838235, "loss": 1.285, "step": 30173 }, { "epoch": 0.39209736811756163, "grad_norm": 0.4142511785030365, "learning_rate": 0.00012160542767647098, "loss": 1.4233, "step": 30174 }, { "epoch": 0.39211036266147753, "grad_norm": 0.24715737998485565, "learning_rate": 0.00012160282821455959, "loss": 1.378, "step": 30175 }, { "epoch": 0.3921233572053934, "grad_norm": 0.399018794298172, "learning_rate": 0.0001216002287526482, "loss": 1.3117, "step": 30176 }, { "epoch": 0.3921363517493093, "grad_norm": 0.31237298250198364, "learning_rate": 0.00012159762929073681, "loss": 1.5218, "step": 30177 }, { "epoch": 0.3921493462932251, "grad_norm": 0.404621422290802, "learning_rate": 0.00012159502982882545, "loss": 1.369, "step": 30178 }, { "epoch": 0.392162340837141, "grad_norm": 0.4243197739124298, "learning_rate": 0.00012159243036691406, "loss": 1.4715, "step": 30179 }, { "epoch": 0.39217533538105687, "grad_norm": 0.41246849298477173, "learning_rate": 0.00012158983090500267, "loss": 1.4675, "step": 30180 }, { "epoch": 0.39218832992497277, "grad_norm": 0.38864678144454956, "learning_rate": 0.00012158723144309128, "loss": 1.6011, "step": 30181 }, { "epoch": 0.3922013244688886, "grad_norm": 0.31902214884757996, "learning_rate": 0.0001215846319811799, "loss": 1.4662, "step": 30182 }, { "epoch": 0.3922143190128045, "grad_norm": 0.4103827476501465, "learning_rate": 0.00012158203251926852, "loss": 1.2086, "step": 30183 }, { "epoch": 0.39222731355672036, "grad_norm": 0.440344899892807, "learning_rate": 0.00012157943305735713, "loss": 1.5084, "step": 30184 }, { "epoch": 0.39224030810063626, "grad_norm": 0.5932542681694031, "learning_rate": 0.00012157683359544574, "loss": 1.4913, "step": 30185 }, { "epoch": 0.3922533026445521, "grad_norm": 0.34640777111053467, "learning_rate": 0.00012157423413353438, "loss": 1.4807, "step": 30186 }, { "epoch": 0.392266297188468, "grad_norm": 0.4098737835884094, "learning_rate": 0.00012157163467162297, "loss": 1.1511, "step": 30187 }, { "epoch": 0.39227929173238385, "grad_norm": 0.4336796700954437, "learning_rate": 0.00012156903520971159, "loss": 1.2743, "step": 30188 }, { "epoch": 0.39229228627629975, "grad_norm": 0.4209417998790741, "learning_rate": 0.0001215664357478002, "loss": 1.5153, "step": 30189 }, { "epoch": 0.3923052808202156, "grad_norm": 0.40461570024490356, "learning_rate": 0.00012156383628588883, "loss": 1.168, "step": 30190 }, { "epoch": 0.3923182753641315, "grad_norm": 0.38222548365592957, "learning_rate": 0.00012156123682397745, "loss": 1.4155, "step": 30191 }, { "epoch": 0.39233126990804734, "grad_norm": 0.46849504113197327, "learning_rate": 0.00012155863736206606, "loss": 1.3355, "step": 30192 }, { "epoch": 0.39234426445196324, "grad_norm": 0.2952819764614105, "learning_rate": 0.00012155603790015468, "loss": 1.3886, "step": 30193 }, { "epoch": 0.3923572589958791, "grad_norm": 0.2982991337776184, "learning_rate": 0.00012155343843824329, "loss": 1.2543, "step": 30194 }, { "epoch": 0.392370253539795, "grad_norm": 0.4075080454349518, "learning_rate": 0.0001215508389763319, "loss": 1.4977, "step": 30195 }, { "epoch": 0.39238324808371083, "grad_norm": 0.4195147156715393, "learning_rate": 0.00012154823951442051, "loss": 1.3202, "step": 30196 }, { "epoch": 0.39239624262762673, "grad_norm": 0.4231061041355133, "learning_rate": 0.00012154564005250915, "loss": 1.5016, "step": 30197 }, { "epoch": 0.3924092371715426, "grad_norm": 0.4115855395793915, "learning_rate": 0.00012154304059059776, "loss": 1.3714, "step": 30198 }, { "epoch": 0.3924222317154585, "grad_norm": 0.31283944845199585, "learning_rate": 0.00012154044112868637, "loss": 1.4056, "step": 30199 }, { "epoch": 0.3924352262593743, "grad_norm": 0.4133973717689514, "learning_rate": 0.00012153784166677497, "loss": 1.2775, "step": 30200 }, { "epoch": 0.3924482208032902, "grad_norm": 0.3579365015029907, "learning_rate": 0.00012153524220486361, "loss": 1.4062, "step": 30201 }, { "epoch": 0.39246121534720607, "grad_norm": 0.2966579496860504, "learning_rate": 0.00012153264274295222, "loss": 1.1802, "step": 30202 }, { "epoch": 0.392474209891122, "grad_norm": 0.18775449693202972, "learning_rate": 0.00012153004328104083, "loss": 1.4207, "step": 30203 }, { "epoch": 0.3924872044350378, "grad_norm": 0.3445811867713928, "learning_rate": 0.00012152744381912944, "loss": 1.5866, "step": 30204 }, { "epoch": 0.3925001989789537, "grad_norm": 0.32643625140190125, "learning_rate": 0.00012152484435721807, "loss": 1.3237, "step": 30205 }, { "epoch": 0.39251319352286956, "grad_norm": 0.3694562315940857, "learning_rate": 0.00012152224489530668, "loss": 1.3713, "step": 30206 }, { "epoch": 0.39252618806678546, "grad_norm": 0.4584919214248657, "learning_rate": 0.00012151964543339529, "loss": 1.5977, "step": 30207 }, { "epoch": 0.3925391826107013, "grad_norm": 0.4383241832256317, "learning_rate": 0.0001215170459714839, "loss": 1.4826, "step": 30208 }, { "epoch": 0.3925521771546172, "grad_norm": 0.42453089356422424, "learning_rate": 0.00012151444650957254, "loss": 1.3531, "step": 30209 }, { "epoch": 0.39256517169853306, "grad_norm": 0.3423071801662445, "learning_rate": 0.00012151184704766115, "loss": 1.3022, "step": 30210 }, { "epoch": 0.39257816624244896, "grad_norm": 0.34484803676605225, "learning_rate": 0.00012150924758574976, "loss": 1.146, "step": 30211 }, { "epoch": 0.3925911607863648, "grad_norm": 0.3715389668941498, "learning_rate": 0.00012150664812383836, "loss": 1.5697, "step": 30212 }, { "epoch": 0.3926041553302807, "grad_norm": 0.383456289768219, "learning_rate": 0.000121504048661927, "loss": 1.3851, "step": 30213 }, { "epoch": 0.39261714987419655, "grad_norm": 0.4641530513763428, "learning_rate": 0.0001215014492000156, "loss": 1.2807, "step": 30214 }, { "epoch": 0.39263014441811245, "grad_norm": 0.3006752133369446, "learning_rate": 0.00012149884973810422, "loss": 1.2938, "step": 30215 }, { "epoch": 0.3926431389620283, "grad_norm": 0.40168288350105286, "learning_rate": 0.00012149625027619283, "loss": 1.3246, "step": 30216 }, { "epoch": 0.3926561335059442, "grad_norm": 0.3213330805301666, "learning_rate": 0.00012149365081428145, "loss": 1.3463, "step": 30217 }, { "epoch": 0.39266912804986004, "grad_norm": 0.40640586614608765, "learning_rate": 0.00012149105135237006, "loss": 1.345, "step": 30218 }, { "epoch": 0.39268212259377594, "grad_norm": 0.5321385860443115, "learning_rate": 0.00012148845189045867, "loss": 1.2842, "step": 30219 }, { "epoch": 0.3926951171376918, "grad_norm": 0.363692969083786, "learning_rate": 0.00012148585242854728, "loss": 1.4173, "step": 30220 }, { "epoch": 0.3927081116816077, "grad_norm": 0.4474540650844574, "learning_rate": 0.00012148325296663592, "loss": 1.2595, "step": 30221 }, { "epoch": 0.39272110622552353, "grad_norm": 0.39879482984542847, "learning_rate": 0.00012148065350472453, "loss": 1.3373, "step": 30222 }, { "epoch": 0.39273410076943943, "grad_norm": 0.49099016189575195, "learning_rate": 0.00012147805404281314, "loss": 1.4743, "step": 30223 }, { "epoch": 0.39274709531335533, "grad_norm": 0.5243951082229614, "learning_rate": 0.00012147545458090175, "loss": 1.4535, "step": 30224 }, { "epoch": 0.3927600898572712, "grad_norm": 0.37081024050712585, "learning_rate": 0.00012147285511899038, "loss": 1.3589, "step": 30225 }, { "epoch": 0.3927730844011871, "grad_norm": 0.38306891918182373, "learning_rate": 0.00012147025565707899, "loss": 1.4424, "step": 30226 }, { "epoch": 0.3927860789451029, "grad_norm": 0.4476652145385742, "learning_rate": 0.0001214676561951676, "loss": 1.3947, "step": 30227 }, { "epoch": 0.3927990734890188, "grad_norm": 0.40072041749954224, "learning_rate": 0.00012146505673325624, "loss": 1.3507, "step": 30228 }, { "epoch": 0.39281206803293467, "grad_norm": 0.4932629466056824, "learning_rate": 0.00012146245727134484, "loss": 1.4886, "step": 30229 }, { "epoch": 0.39282506257685057, "grad_norm": 0.31090280413627625, "learning_rate": 0.00012145985780943345, "loss": 1.3406, "step": 30230 }, { "epoch": 0.3928380571207664, "grad_norm": 0.3673417866230011, "learning_rate": 0.00012145725834752206, "loss": 1.427, "step": 30231 }, { "epoch": 0.3928510516646823, "grad_norm": 0.5585647821426392, "learning_rate": 0.0001214546588856107, "loss": 1.3765, "step": 30232 }, { "epoch": 0.39286404620859816, "grad_norm": 0.3452046513557434, "learning_rate": 0.00012145205942369931, "loss": 1.3163, "step": 30233 }, { "epoch": 0.39287704075251406, "grad_norm": 0.33295050263404846, "learning_rate": 0.00012144945996178792, "loss": 1.1413, "step": 30234 }, { "epoch": 0.3928900352964299, "grad_norm": 0.48066234588623047, "learning_rate": 0.00012144686049987653, "loss": 1.3916, "step": 30235 }, { "epoch": 0.3929030298403458, "grad_norm": 0.3673097491264343, "learning_rate": 0.00012144426103796515, "loss": 1.3864, "step": 30236 }, { "epoch": 0.39291602438426165, "grad_norm": 0.45697200298309326, "learning_rate": 0.00012144166157605376, "loss": 1.4431, "step": 30237 }, { "epoch": 0.39292901892817755, "grad_norm": 0.4453720152378082, "learning_rate": 0.00012143906211414238, "loss": 1.5751, "step": 30238 }, { "epoch": 0.3929420134720934, "grad_norm": 0.4138123393058777, "learning_rate": 0.00012143646265223099, "loss": 1.296, "step": 30239 }, { "epoch": 0.3929550080160093, "grad_norm": 0.3418317139148712, "learning_rate": 0.00012143386319031962, "loss": 1.222, "step": 30240 }, { "epoch": 0.39296800255992514, "grad_norm": 0.3883409798145294, "learning_rate": 0.00012143126372840824, "loss": 1.4565, "step": 30241 }, { "epoch": 0.39298099710384105, "grad_norm": 0.46394437551498413, "learning_rate": 0.00012142866426649683, "loss": 1.4492, "step": 30242 }, { "epoch": 0.3929939916477569, "grad_norm": 0.42542657256126404, "learning_rate": 0.00012142606480458544, "loss": 1.4919, "step": 30243 }, { "epoch": 0.3930069861916728, "grad_norm": 0.3854852616786957, "learning_rate": 0.00012142346534267408, "loss": 1.413, "step": 30244 }, { "epoch": 0.39301998073558864, "grad_norm": 0.3956025242805481, "learning_rate": 0.00012142086588076269, "loss": 1.269, "step": 30245 }, { "epoch": 0.39303297527950454, "grad_norm": 0.4396824836730957, "learning_rate": 0.0001214182664188513, "loss": 1.5778, "step": 30246 }, { "epoch": 0.3930459698234204, "grad_norm": 0.3475947678089142, "learning_rate": 0.00012141566695693991, "loss": 1.4049, "step": 30247 }, { "epoch": 0.3930589643673363, "grad_norm": 0.5264297127723694, "learning_rate": 0.00012141306749502854, "loss": 1.5911, "step": 30248 }, { "epoch": 0.39307195891125213, "grad_norm": 0.44453394412994385, "learning_rate": 0.00012141046803311715, "loss": 1.5334, "step": 30249 }, { "epoch": 0.39308495345516803, "grad_norm": 0.3368965685367584, "learning_rate": 0.00012140786857120576, "loss": 1.47, "step": 30250 }, { "epoch": 0.3930979479990839, "grad_norm": 0.3358776569366455, "learning_rate": 0.00012140526910929437, "loss": 1.3875, "step": 30251 }, { "epoch": 0.3931109425429998, "grad_norm": 0.4137432873249054, "learning_rate": 0.00012140266964738301, "loss": 1.5743, "step": 30252 }, { "epoch": 0.3931239370869156, "grad_norm": 0.43048447370529175, "learning_rate": 0.00012140007018547162, "loss": 1.2444, "step": 30253 }, { "epoch": 0.3931369316308315, "grad_norm": 0.4377857744693756, "learning_rate": 0.00012139747072356022, "loss": 1.4032, "step": 30254 }, { "epoch": 0.39314992617474737, "grad_norm": 0.43612468242645264, "learning_rate": 0.00012139487126164883, "loss": 1.674, "step": 30255 }, { "epoch": 0.39316292071866327, "grad_norm": 0.39175236225128174, "learning_rate": 0.00012139227179973747, "loss": 1.3721, "step": 30256 }, { "epoch": 0.3931759152625791, "grad_norm": 0.3942610025405884, "learning_rate": 0.00012138967233782608, "loss": 1.3836, "step": 30257 }, { "epoch": 0.393188909806495, "grad_norm": 0.4507838785648346, "learning_rate": 0.00012138707287591469, "loss": 1.2908, "step": 30258 }, { "epoch": 0.39320190435041086, "grad_norm": 0.42537006735801697, "learning_rate": 0.0001213844734140033, "loss": 1.5651, "step": 30259 }, { "epoch": 0.39321489889432676, "grad_norm": 0.46596047282218933, "learning_rate": 0.00012138187395209192, "loss": 1.5946, "step": 30260 }, { "epoch": 0.3932278934382426, "grad_norm": 0.38619983196258545, "learning_rate": 0.00012137927449018054, "loss": 1.3514, "step": 30261 }, { "epoch": 0.3932408879821585, "grad_norm": 0.3787907361984253, "learning_rate": 0.00012137667502826915, "loss": 1.4278, "step": 30262 }, { "epoch": 0.39325388252607435, "grad_norm": 0.4231860041618347, "learning_rate": 0.00012137407556635776, "loss": 1.5253, "step": 30263 }, { "epoch": 0.39326687706999025, "grad_norm": 0.45576971769332886, "learning_rate": 0.0001213714761044464, "loss": 1.4211, "step": 30264 }, { "epoch": 0.3932798716139061, "grad_norm": 0.46972087025642395, "learning_rate": 0.000121368876642535, "loss": 1.3754, "step": 30265 }, { "epoch": 0.393292866157822, "grad_norm": 0.3763978183269501, "learning_rate": 0.00012136627718062362, "loss": 1.5172, "step": 30266 }, { "epoch": 0.39330586070173784, "grad_norm": 0.3517427444458008, "learning_rate": 0.00012136367771871224, "loss": 1.4023, "step": 30267 }, { "epoch": 0.39331885524565374, "grad_norm": 0.5221895575523376, "learning_rate": 0.00012136107825680085, "loss": 1.554, "step": 30268 }, { "epoch": 0.3933318497895696, "grad_norm": 0.34739628434181213, "learning_rate": 0.00012135847879488946, "loss": 1.3073, "step": 30269 }, { "epoch": 0.3933448443334855, "grad_norm": 0.313321977853775, "learning_rate": 0.00012135587933297807, "loss": 1.1969, "step": 30270 }, { "epoch": 0.39335783887740133, "grad_norm": 0.416465699672699, "learning_rate": 0.0001213532798710667, "loss": 1.4437, "step": 30271 }, { "epoch": 0.39337083342131723, "grad_norm": 0.452899694442749, "learning_rate": 0.00012135068040915531, "loss": 1.3348, "step": 30272 }, { "epoch": 0.3933838279652331, "grad_norm": 0.35301804542541504, "learning_rate": 0.00012134808094724392, "loss": 1.3065, "step": 30273 }, { "epoch": 0.393396822509149, "grad_norm": 0.41131705045700073, "learning_rate": 0.00012134548148533253, "loss": 1.4868, "step": 30274 }, { "epoch": 0.3934098170530648, "grad_norm": 0.4610689580440521, "learning_rate": 0.00012134288202342117, "loss": 1.444, "step": 30275 }, { "epoch": 0.3934228115969807, "grad_norm": 0.4829813539981842, "learning_rate": 0.00012134028256150978, "loss": 1.5999, "step": 30276 }, { "epoch": 0.39343580614089657, "grad_norm": 0.4535762369632721, "learning_rate": 0.00012133768309959839, "loss": 1.5563, "step": 30277 }, { "epoch": 0.39344880068481247, "grad_norm": 0.4518049955368042, "learning_rate": 0.000121335083637687, "loss": 1.5499, "step": 30278 }, { "epoch": 0.3934617952287283, "grad_norm": 0.38682714104652405, "learning_rate": 0.00012133248417577563, "loss": 1.5489, "step": 30279 }, { "epoch": 0.3934747897726442, "grad_norm": 0.454598069190979, "learning_rate": 0.00012132988471386424, "loss": 1.5383, "step": 30280 }, { "epoch": 0.39348778431656006, "grad_norm": 0.3806298077106476, "learning_rate": 0.00012132728525195285, "loss": 1.4287, "step": 30281 }, { "epoch": 0.39350077886047596, "grad_norm": 0.4003894031047821, "learning_rate": 0.00012132468579004146, "loss": 1.3787, "step": 30282 }, { "epoch": 0.3935137734043918, "grad_norm": 0.41036278009414673, "learning_rate": 0.0001213220863281301, "loss": 1.3188, "step": 30283 }, { "epoch": 0.3935267679483077, "grad_norm": 0.40194404125213623, "learning_rate": 0.0001213194868662187, "loss": 1.4512, "step": 30284 }, { "epoch": 0.39353976249222355, "grad_norm": 0.4183902442455292, "learning_rate": 0.0001213168874043073, "loss": 1.3122, "step": 30285 }, { "epoch": 0.39355275703613946, "grad_norm": 0.3271028399467468, "learning_rate": 0.00012131428794239592, "loss": 1.512, "step": 30286 }, { "epoch": 0.3935657515800553, "grad_norm": 0.40299180150032043, "learning_rate": 0.00012131168848048456, "loss": 1.4801, "step": 30287 }, { "epoch": 0.3935787461239712, "grad_norm": 0.4294005334377289, "learning_rate": 0.00012130908901857317, "loss": 1.4308, "step": 30288 }, { "epoch": 0.39359174066788705, "grad_norm": 0.451560914516449, "learning_rate": 0.00012130648955666178, "loss": 1.3914, "step": 30289 }, { "epoch": 0.39360473521180295, "grad_norm": 0.30970409512519836, "learning_rate": 0.00012130389009475039, "loss": 1.2857, "step": 30290 }, { "epoch": 0.3936177297557188, "grad_norm": 0.45484453439712524, "learning_rate": 0.00012130129063283901, "loss": 1.4404, "step": 30291 }, { "epoch": 0.3936307242996347, "grad_norm": 0.24319183826446533, "learning_rate": 0.00012129869117092762, "loss": 1.1451, "step": 30292 }, { "epoch": 0.39364371884355054, "grad_norm": 0.3762994706630707, "learning_rate": 0.00012129609170901623, "loss": 1.4822, "step": 30293 }, { "epoch": 0.39365671338746644, "grad_norm": 0.4294280409812927, "learning_rate": 0.00012129349224710485, "loss": 1.3901, "step": 30294 }, { "epoch": 0.3936697079313823, "grad_norm": 0.4294148087501526, "learning_rate": 0.00012129089278519348, "loss": 1.6459, "step": 30295 }, { "epoch": 0.3936827024752982, "grad_norm": 0.33589833974838257, "learning_rate": 0.00012128829332328208, "loss": 1.3232, "step": 30296 }, { "epoch": 0.39369569701921403, "grad_norm": 0.436295747756958, "learning_rate": 0.00012128569386137069, "loss": 1.3937, "step": 30297 }, { "epoch": 0.39370869156312993, "grad_norm": 0.44970396161079407, "learning_rate": 0.0001212830943994593, "loss": 1.5046, "step": 30298 }, { "epoch": 0.39372168610704583, "grad_norm": 0.4379139542579651, "learning_rate": 0.00012128049493754794, "loss": 1.4892, "step": 30299 }, { "epoch": 0.3937346806509617, "grad_norm": 0.43271517753601074, "learning_rate": 0.00012127789547563655, "loss": 1.5246, "step": 30300 }, { "epoch": 0.3937476751948776, "grad_norm": 0.40532201528549194, "learning_rate": 0.00012127529601372516, "loss": 1.375, "step": 30301 }, { "epoch": 0.3937606697387934, "grad_norm": 0.46064314246177673, "learning_rate": 0.00012127269655181379, "loss": 1.3886, "step": 30302 }, { "epoch": 0.3937736642827093, "grad_norm": 0.3226160407066345, "learning_rate": 0.0001212700970899024, "loss": 1.348, "step": 30303 }, { "epoch": 0.39378665882662517, "grad_norm": 0.4500960111618042, "learning_rate": 0.00012126749762799101, "loss": 1.473, "step": 30304 }, { "epoch": 0.39379965337054107, "grad_norm": 0.29087477922439575, "learning_rate": 0.00012126489816607962, "loss": 1.197, "step": 30305 }, { "epoch": 0.3938126479144569, "grad_norm": 0.5905337333679199, "learning_rate": 0.00012126229870416826, "loss": 1.2978, "step": 30306 }, { "epoch": 0.3938256424583728, "grad_norm": 0.4344117045402527, "learning_rate": 0.00012125969924225687, "loss": 1.4983, "step": 30307 }, { "epoch": 0.39383863700228866, "grad_norm": 0.4091038405895233, "learning_rate": 0.00012125709978034548, "loss": 1.3895, "step": 30308 }, { "epoch": 0.39385163154620456, "grad_norm": 0.49192380905151367, "learning_rate": 0.00012125450031843408, "loss": 1.4325, "step": 30309 }, { "epoch": 0.3938646260901204, "grad_norm": 0.3093331754207611, "learning_rate": 0.00012125190085652272, "loss": 1.4219, "step": 30310 }, { "epoch": 0.3938776206340363, "grad_norm": 0.4658687114715576, "learning_rate": 0.00012124930139461133, "loss": 1.3826, "step": 30311 }, { "epoch": 0.39389061517795215, "grad_norm": 0.39100682735443115, "learning_rate": 0.00012124670193269994, "loss": 1.3979, "step": 30312 }, { "epoch": 0.39390360972186805, "grad_norm": 0.4419659674167633, "learning_rate": 0.00012124410247078855, "loss": 1.4854, "step": 30313 }, { "epoch": 0.3939166042657839, "grad_norm": 0.3633359372615814, "learning_rate": 0.00012124150300887717, "loss": 1.6146, "step": 30314 }, { "epoch": 0.3939295988096998, "grad_norm": 0.35677504539489746, "learning_rate": 0.00012123890354696578, "loss": 1.2367, "step": 30315 }, { "epoch": 0.39394259335361564, "grad_norm": 0.34865784645080566, "learning_rate": 0.0001212363040850544, "loss": 1.2232, "step": 30316 }, { "epoch": 0.39395558789753155, "grad_norm": 0.4777868390083313, "learning_rate": 0.000121233704623143, "loss": 1.5137, "step": 30317 }, { "epoch": 0.3939685824414474, "grad_norm": 0.38286593556404114, "learning_rate": 0.00012123110516123164, "loss": 1.3217, "step": 30318 }, { "epoch": 0.3939815769853633, "grad_norm": 0.38891956210136414, "learning_rate": 0.00012122850569932025, "loss": 1.3912, "step": 30319 }, { "epoch": 0.39399457152927914, "grad_norm": 0.43460381031036377, "learning_rate": 0.00012122590623740887, "loss": 1.184, "step": 30320 }, { "epoch": 0.39400756607319504, "grad_norm": 0.36464667320251465, "learning_rate": 0.00012122330677549748, "loss": 1.1896, "step": 30321 }, { "epoch": 0.3940205606171109, "grad_norm": 0.43258702754974365, "learning_rate": 0.0001212207073135861, "loss": 1.5437, "step": 30322 }, { "epoch": 0.3940335551610268, "grad_norm": 0.4469812214374542, "learning_rate": 0.00012121810785167471, "loss": 1.4213, "step": 30323 }, { "epoch": 0.39404654970494263, "grad_norm": 0.3775706887245178, "learning_rate": 0.00012121550838976332, "loss": 1.4928, "step": 30324 }, { "epoch": 0.39405954424885853, "grad_norm": 0.4082251191139221, "learning_rate": 0.00012121290892785193, "loss": 1.5051, "step": 30325 }, { "epoch": 0.3940725387927744, "grad_norm": 0.4016304612159729, "learning_rate": 0.00012121030946594056, "loss": 1.4906, "step": 30326 }, { "epoch": 0.3940855333366903, "grad_norm": 0.4051564931869507, "learning_rate": 0.00012120771000402917, "loss": 1.3146, "step": 30327 }, { "epoch": 0.3940985278806061, "grad_norm": 0.32665255665779114, "learning_rate": 0.00012120511054211778, "loss": 1.1814, "step": 30328 }, { "epoch": 0.394111522424522, "grad_norm": 0.4152357876300812, "learning_rate": 0.00012120251108020639, "loss": 1.3874, "step": 30329 }, { "epoch": 0.39412451696843787, "grad_norm": 0.48771196603775024, "learning_rate": 0.00012119991161829503, "loss": 1.5541, "step": 30330 }, { "epoch": 0.39413751151235377, "grad_norm": 0.4088663160800934, "learning_rate": 0.00012119731215638364, "loss": 1.3029, "step": 30331 }, { "epoch": 0.3941505060562696, "grad_norm": 0.4661770462989807, "learning_rate": 0.00012119471269447225, "loss": 1.3627, "step": 30332 }, { "epoch": 0.3941635006001855, "grad_norm": 0.3803216218948364, "learning_rate": 0.00012119211323256086, "loss": 1.3756, "step": 30333 }, { "epoch": 0.39417649514410136, "grad_norm": 0.3827464282512665, "learning_rate": 0.00012118951377064949, "loss": 1.4559, "step": 30334 }, { "epoch": 0.39418948968801726, "grad_norm": 0.43791788816452026, "learning_rate": 0.0001211869143087381, "loss": 1.3957, "step": 30335 }, { "epoch": 0.3942024842319331, "grad_norm": 0.45772799849510193, "learning_rate": 0.00012118431484682671, "loss": 1.5785, "step": 30336 }, { "epoch": 0.394215478775849, "grad_norm": 0.3095083236694336, "learning_rate": 0.00012118171538491532, "loss": 1.6462, "step": 30337 }, { "epoch": 0.39422847331976485, "grad_norm": 0.6071044206619263, "learning_rate": 0.00012117911592300394, "loss": 1.4753, "step": 30338 }, { "epoch": 0.39424146786368075, "grad_norm": 0.3454511761665344, "learning_rate": 0.00012117651646109255, "loss": 1.2334, "step": 30339 }, { "epoch": 0.3942544624075966, "grad_norm": 0.44996631145477295, "learning_rate": 0.00012117391699918117, "loss": 1.2994, "step": 30340 }, { "epoch": 0.3942674569515125, "grad_norm": 0.40120968222618103, "learning_rate": 0.0001211713175372698, "loss": 1.4215, "step": 30341 }, { "epoch": 0.39428045149542834, "grad_norm": 0.3478165864944458, "learning_rate": 0.00012116871807535841, "loss": 1.1979, "step": 30342 }, { "epoch": 0.39429344603934424, "grad_norm": 0.3893889784812927, "learning_rate": 0.00012116611861344702, "loss": 1.3458, "step": 30343 }, { "epoch": 0.3943064405832601, "grad_norm": 0.46901631355285645, "learning_rate": 0.00012116351915153564, "loss": 1.3871, "step": 30344 }, { "epoch": 0.394319435127176, "grad_norm": 0.4929788410663605, "learning_rate": 0.00012116091968962426, "loss": 1.4148, "step": 30345 }, { "epoch": 0.39433242967109183, "grad_norm": 0.47531694173812866, "learning_rate": 0.00012115832022771287, "loss": 1.2549, "step": 30346 }, { "epoch": 0.39434542421500773, "grad_norm": 0.3352787494659424, "learning_rate": 0.00012115572076580148, "loss": 1.4831, "step": 30347 }, { "epoch": 0.3943584187589236, "grad_norm": 0.3755118250846863, "learning_rate": 0.00012115312130389009, "loss": 1.4359, "step": 30348 }, { "epoch": 0.3943714133028395, "grad_norm": 0.37394869327545166, "learning_rate": 0.00012115052184197873, "loss": 1.5113, "step": 30349 }, { "epoch": 0.3943844078467553, "grad_norm": 0.33011576533317566, "learning_rate": 0.00012114792238006734, "loss": 1.2471, "step": 30350 }, { "epoch": 0.3943974023906712, "grad_norm": 0.47930094599723816, "learning_rate": 0.00012114532291815594, "loss": 1.3596, "step": 30351 }, { "epoch": 0.39441039693458707, "grad_norm": 0.44782111048698425, "learning_rate": 0.00012114272345624455, "loss": 1.4292, "step": 30352 }, { "epoch": 0.39442339147850297, "grad_norm": 0.32904288172721863, "learning_rate": 0.00012114012399433319, "loss": 1.3898, "step": 30353 }, { "epoch": 0.3944363860224188, "grad_norm": 0.45402792096138, "learning_rate": 0.0001211375245324218, "loss": 1.4728, "step": 30354 }, { "epoch": 0.3944493805663347, "grad_norm": 0.32263854146003723, "learning_rate": 0.00012113492507051041, "loss": 1.2434, "step": 30355 }, { "epoch": 0.39446237511025056, "grad_norm": 0.33296889066696167, "learning_rate": 0.00012113232560859902, "loss": 1.2789, "step": 30356 }, { "epoch": 0.39447536965416646, "grad_norm": 0.4589162766933441, "learning_rate": 0.00012112972614668765, "loss": 1.4358, "step": 30357 }, { "epoch": 0.3944883641980823, "grad_norm": 0.42167553305625916, "learning_rate": 0.00012112712668477626, "loss": 1.4254, "step": 30358 }, { "epoch": 0.3945013587419982, "grad_norm": 0.3578254282474518, "learning_rate": 0.00012112452722286487, "loss": 1.2167, "step": 30359 }, { "epoch": 0.39451435328591405, "grad_norm": 0.40141355991363525, "learning_rate": 0.00012112192776095348, "loss": 1.4833, "step": 30360 }, { "epoch": 0.39452734782982996, "grad_norm": 0.43685653805732727, "learning_rate": 0.00012111932829904212, "loss": 1.3553, "step": 30361 }, { "epoch": 0.3945403423737458, "grad_norm": 0.3432730734348297, "learning_rate": 0.00012111672883713073, "loss": 1.4015, "step": 30362 }, { "epoch": 0.3945533369176617, "grad_norm": 0.4469623267650604, "learning_rate": 0.00012111412937521934, "loss": 1.3108, "step": 30363 }, { "epoch": 0.39456633146157755, "grad_norm": 0.39759576320648193, "learning_rate": 0.00012111152991330794, "loss": 1.244, "step": 30364 }, { "epoch": 0.39457932600549345, "grad_norm": 0.41511356830596924, "learning_rate": 0.00012110893045139657, "loss": 1.5218, "step": 30365 }, { "epoch": 0.3945923205494093, "grad_norm": 0.4499237835407257, "learning_rate": 0.00012110633098948518, "loss": 1.4689, "step": 30366 }, { "epoch": 0.3946053150933252, "grad_norm": 0.3941147029399872, "learning_rate": 0.0001211037315275738, "loss": 1.4664, "step": 30367 }, { "epoch": 0.39461830963724104, "grad_norm": 0.4820142686367035, "learning_rate": 0.0001211011320656624, "loss": 1.6, "step": 30368 }, { "epoch": 0.39463130418115694, "grad_norm": 0.41343823075294495, "learning_rate": 0.00012109853260375103, "loss": 1.5006, "step": 30369 }, { "epoch": 0.3946442987250728, "grad_norm": 0.37576255202293396, "learning_rate": 0.00012109593314183964, "loss": 1.2638, "step": 30370 }, { "epoch": 0.3946572932689887, "grad_norm": 0.42644011974334717, "learning_rate": 0.00012109333367992825, "loss": 1.4276, "step": 30371 }, { "epoch": 0.39467028781290453, "grad_norm": 0.43201544880867004, "learning_rate": 0.00012109073421801686, "loss": 1.3923, "step": 30372 }, { "epoch": 0.39468328235682043, "grad_norm": 0.39694398641586304, "learning_rate": 0.0001210881347561055, "loss": 1.3865, "step": 30373 }, { "epoch": 0.3946962769007363, "grad_norm": 0.40186285972595215, "learning_rate": 0.00012108553529419411, "loss": 1.2719, "step": 30374 }, { "epoch": 0.3947092714446522, "grad_norm": 0.4058942198753357, "learning_rate": 0.00012108293583228272, "loss": 1.4401, "step": 30375 }, { "epoch": 0.3947222659885681, "grad_norm": 0.429519921541214, "learning_rate": 0.00012108033637037135, "loss": 1.6299, "step": 30376 }, { "epoch": 0.3947352605324839, "grad_norm": 0.35235705971717834, "learning_rate": 0.00012107773690845996, "loss": 1.4687, "step": 30377 }, { "epoch": 0.3947482550763998, "grad_norm": 0.3400331735610962, "learning_rate": 0.00012107513744654857, "loss": 1.4229, "step": 30378 }, { "epoch": 0.39476124962031567, "grad_norm": 0.4269712269306183, "learning_rate": 0.00012107253798463718, "loss": 1.5344, "step": 30379 }, { "epoch": 0.39477424416423157, "grad_norm": 0.4160550534725189, "learning_rate": 0.0001210699385227258, "loss": 1.3546, "step": 30380 }, { "epoch": 0.3947872387081474, "grad_norm": 0.33458447456359863, "learning_rate": 0.00012106733906081442, "loss": 1.4475, "step": 30381 }, { "epoch": 0.3948002332520633, "grad_norm": 0.3680357336997986, "learning_rate": 0.00012106473959890303, "loss": 1.3641, "step": 30382 }, { "epoch": 0.39481322779597916, "grad_norm": 0.4701084494590759, "learning_rate": 0.00012106214013699164, "loss": 1.5225, "step": 30383 }, { "epoch": 0.39482622233989506, "grad_norm": 0.33250707387924194, "learning_rate": 0.00012105954067508028, "loss": 1.294, "step": 30384 }, { "epoch": 0.3948392168838109, "grad_norm": 0.4820285141468048, "learning_rate": 0.00012105694121316889, "loss": 1.4186, "step": 30385 }, { "epoch": 0.3948522114277268, "grad_norm": 0.3237875699996948, "learning_rate": 0.0001210543417512575, "loss": 1.5293, "step": 30386 }, { "epoch": 0.39486520597164265, "grad_norm": 0.4344033896923065, "learning_rate": 0.00012105174228934611, "loss": 1.4003, "step": 30387 }, { "epoch": 0.39487820051555855, "grad_norm": 0.29124560952186584, "learning_rate": 0.00012104914282743473, "loss": 1.266, "step": 30388 }, { "epoch": 0.3948911950594744, "grad_norm": 0.3328922986984253, "learning_rate": 0.00012104654336552334, "loss": 1.4182, "step": 30389 }, { "epoch": 0.3949041896033903, "grad_norm": 0.3238808214664459, "learning_rate": 0.00012104394390361196, "loss": 1.4589, "step": 30390 }, { "epoch": 0.39491718414730614, "grad_norm": 0.3149753212928772, "learning_rate": 0.00012104134444170057, "loss": 1.352, "step": 30391 }, { "epoch": 0.39493017869122204, "grad_norm": 0.4066602289676666, "learning_rate": 0.0001210387449797892, "loss": 1.5107, "step": 30392 }, { "epoch": 0.3949431732351379, "grad_norm": 0.3294428586959839, "learning_rate": 0.0001210361455178778, "loss": 1.1583, "step": 30393 }, { "epoch": 0.3949561677790538, "grad_norm": 0.3261302411556244, "learning_rate": 0.00012103354605596641, "loss": 1.2195, "step": 30394 }, { "epoch": 0.39496916232296964, "grad_norm": 0.41166698932647705, "learning_rate": 0.00012103094659405502, "loss": 1.3815, "step": 30395 }, { "epoch": 0.39498215686688554, "grad_norm": 0.5023846626281738, "learning_rate": 0.00012102834713214366, "loss": 1.2746, "step": 30396 }, { "epoch": 0.3949951514108014, "grad_norm": 0.42920002341270447, "learning_rate": 0.00012102574767023227, "loss": 1.4611, "step": 30397 }, { "epoch": 0.3950081459547173, "grad_norm": 0.3440791666507721, "learning_rate": 0.00012102314820832088, "loss": 1.4547, "step": 30398 }, { "epoch": 0.3950211404986331, "grad_norm": 0.3438780605792999, "learning_rate": 0.0001210205487464095, "loss": 1.2532, "step": 30399 }, { "epoch": 0.39503413504254903, "grad_norm": 0.37750938534736633, "learning_rate": 0.00012101794928449812, "loss": 1.4519, "step": 30400 }, { "epoch": 0.3950471295864649, "grad_norm": 0.3575557470321655, "learning_rate": 0.00012101534982258673, "loss": 1.4045, "step": 30401 }, { "epoch": 0.3950601241303808, "grad_norm": 0.30885812640190125, "learning_rate": 0.00012101275036067534, "loss": 1.4515, "step": 30402 }, { "epoch": 0.3950731186742966, "grad_norm": 0.3775915503501892, "learning_rate": 0.00012101015089876395, "loss": 1.4948, "step": 30403 }, { "epoch": 0.3950861132182125, "grad_norm": 0.36845511198043823, "learning_rate": 0.00012100755143685259, "loss": 1.2433, "step": 30404 }, { "epoch": 0.39509910776212837, "grad_norm": 0.32144248485565186, "learning_rate": 0.0001210049519749412, "loss": 1.545, "step": 30405 }, { "epoch": 0.39511210230604427, "grad_norm": 0.32726970314979553, "learning_rate": 0.0001210023525130298, "loss": 1.5395, "step": 30406 }, { "epoch": 0.3951250968499601, "grad_norm": 0.2911970615386963, "learning_rate": 0.00012099975305111841, "loss": 1.1765, "step": 30407 }, { "epoch": 0.395138091393876, "grad_norm": 0.46382129192352295, "learning_rate": 0.00012099715358920705, "loss": 1.4141, "step": 30408 }, { "epoch": 0.39515108593779186, "grad_norm": 0.36765822768211365, "learning_rate": 0.00012099455412729566, "loss": 1.3772, "step": 30409 }, { "epoch": 0.39516408048170776, "grad_norm": 0.38965848088264465, "learning_rate": 0.00012099195466538427, "loss": 1.5916, "step": 30410 }, { "epoch": 0.3951770750256236, "grad_norm": 0.3931178152561188, "learning_rate": 0.00012098935520347288, "loss": 1.3754, "step": 30411 }, { "epoch": 0.3951900695695395, "grad_norm": 0.41154351830482483, "learning_rate": 0.0001209867557415615, "loss": 1.4641, "step": 30412 }, { "epoch": 0.39520306411345535, "grad_norm": 0.3723491132259369, "learning_rate": 0.00012098415627965012, "loss": 1.4292, "step": 30413 }, { "epoch": 0.39521605865737125, "grad_norm": 0.3092310428619385, "learning_rate": 0.00012098155681773873, "loss": 1.2588, "step": 30414 }, { "epoch": 0.3952290532012871, "grad_norm": 0.4849766492843628, "learning_rate": 0.00012097895735582736, "loss": 1.4532, "step": 30415 }, { "epoch": 0.395242047745203, "grad_norm": 0.45692840218544006, "learning_rate": 0.00012097635789391598, "loss": 1.4348, "step": 30416 }, { "epoch": 0.39525504228911884, "grad_norm": 0.35874131321907043, "learning_rate": 0.00012097375843200459, "loss": 1.2967, "step": 30417 }, { "epoch": 0.39526803683303474, "grad_norm": 0.4234062731266022, "learning_rate": 0.00012097115897009318, "loss": 1.3856, "step": 30418 }, { "epoch": 0.3952810313769506, "grad_norm": 0.5291977524757385, "learning_rate": 0.00012096855950818182, "loss": 1.5585, "step": 30419 }, { "epoch": 0.3952940259208665, "grad_norm": 0.43282121419906616, "learning_rate": 0.00012096596004627043, "loss": 1.309, "step": 30420 }, { "epoch": 0.39530702046478233, "grad_norm": 0.42477747797966003, "learning_rate": 0.00012096336058435904, "loss": 1.2817, "step": 30421 }, { "epoch": 0.39532001500869823, "grad_norm": 0.43694546818733215, "learning_rate": 0.00012096076112244765, "loss": 1.4716, "step": 30422 }, { "epoch": 0.3953330095526141, "grad_norm": 0.46578672528266907, "learning_rate": 0.00012095816166053628, "loss": 1.5188, "step": 30423 }, { "epoch": 0.39534600409653, "grad_norm": 0.40985047817230225, "learning_rate": 0.00012095556219862489, "loss": 1.3575, "step": 30424 }, { "epoch": 0.3953589986404458, "grad_norm": 0.41353029012680054, "learning_rate": 0.0001209529627367135, "loss": 1.2939, "step": 30425 }, { "epoch": 0.3953719931843617, "grad_norm": 0.4211789071559906, "learning_rate": 0.00012095036327480211, "loss": 1.544, "step": 30426 }, { "epoch": 0.39538498772827757, "grad_norm": 0.49660423398017883, "learning_rate": 0.00012094776381289075, "loss": 1.2689, "step": 30427 }, { "epoch": 0.39539798227219347, "grad_norm": 0.37554702162742615, "learning_rate": 0.00012094516435097936, "loss": 1.3869, "step": 30428 }, { "epoch": 0.3954109768161093, "grad_norm": 0.3733389973640442, "learning_rate": 0.00012094256488906797, "loss": 1.4184, "step": 30429 }, { "epoch": 0.3954239713600252, "grad_norm": 0.4084286689758301, "learning_rate": 0.00012093996542715658, "loss": 1.3995, "step": 30430 }, { "epoch": 0.39543696590394106, "grad_norm": 0.37877383828163147, "learning_rate": 0.00012093736596524521, "loss": 1.5102, "step": 30431 }, { "epoch": 0.39544996044785696, "grad_norm": 0.3838941156864166, "learning_rate": 0.00012093476650333382, "loss": 1.465, "step": 30432 }, { "epoch": 0.3954629549917728, "grad_norm": 0.5083638429641724, "learning_rate": 0.00012093216704142243, "loss": 1.5578, "step": 30433 }, { "epoch": 0.3954759495356887, "grad_norm": 0.4485577642917633, "learning_rate": 0.00012092956757951104, "loss": 1.2474, "step": 30434 }, { "epoch": 0.39548894407960455, "grad_norm": 0.4122265577316284, "learning_rate": 0.00012092696811759966, "loss": 1.6011, "step": 30435 }, { "epoch": 0.39550193862352045, "grad_norm": 0.4267435669898987, "learning_rate": 0.00012092436865568828, "loss": 1.4997, "step": 30436 }, { "epoch": 0.3955149331674363, "grad_norm": 0.4243057370185852, "learning_rate": 0.00012092176919377689, "loss": 1.4076, "step": 30437 }, { "epoch": 0.3955279277113522, "grad_norm": 0.4130112826824188, "learning_rate": 0.0001209191697318655, "loss": 1.3049, "step": 30438 }, { "epoch": 0.39554092225526805, "grad_norm": 0.3776995837688446, "learning_rate": 0.00012091657026995414, "loss": 1.437, "step": 30439 }, { "epoch": 0.39555391679918395, "grad_norm": 0.4199751317501068, "learning_rate": 0.00012091397080804275, "loss": 1.3364, "step": 30440 }, { "epoch": 0.3955669113430998, "grad_norm": 0.3213954567909241, "learning_rate": 0.00012091137134613136, "loss": 1.4629, "step": 30441 }, { "epoch": 0.3955799058870157, "grad_norm": 0.3428385853767395, "learning_rate": 0.00012090877188421997, "loss": 1.315, "step": 30442 }, { "epoch": 0.39559290043093154, "grad_norm": 0.517498254776001, "learning_rate": 0.00012090617242230859, "loss": 1.5401, "step": 30443 }, { "epoch": 0.39560589497484744, "grad_norm": 0.3723388612270355, "learning_rate": 0.0001209035729603972, "loss": 1.2064, "step": 30444 }, { "epoch": 0.3956188895187633, "grad_norm": 0.4449765682220459, "learning_rate": 0.00012090097349848581, "loss": 1.4529, "step": 30445 }, { "epoch": 0.3956318840626792, "grad_norm": 0.4224238097667694, "learning_rate": 0.00012089837403657443, "loss": 1.2925, "step": 30446 }, { "epoch": 0.39564487860659503, "grad_norm": 0.36337023973464966, "learning_rate": 0.00012089577457466306, "loss": 1.354, "step": 30447 }, { "epoch": 0.39565787315051093, "grad_norm": 0.33868369460105896, "learning_rate": 0.00012089317511275166, "loss": 1.5934, "step": 30448 }, { "epoch": 0.3956708676944268, "grad_norm": 0.3751220107078552, "learning_rate": 0.00012089057565084027, "loss": 1.5654, "step": 30449 }, { "epoch": 0.3956838622383427, "grad_norm": 0.5109931826591492, "learning_rate": 0.00012088797618892888, "loss": 1.3383, "step": 30450 }, { "epoch": 0.3956968567822586, "grad_norm": 0.5026741623878479, "learning_rate": 0.00012088537672701752, "loss": 1.4177, "step": 30451 }, { "epoch": 0.3957098513261744, "grad_norm": 0.36857569217681885, "learning_rate": 0.00012088277726510613, "loss": 1.3923, "step": 30452 }, { "epoch": 0.3957228458700903, "grad_norm": 0.34272661805152893, "learning_rate": 0.00012088017780319474, "loss": 1.1314, "step": 30453 }, { "epoch": 0.39573584041400617, "grad_norm": 0.32468923926353455, "learning_rate": 0.00012087757834128337, "loss": 1.2391, "step": 30454 }, { "epoch": 0.39574883495792207, "grad_norm": 0.38658881187438965, "learning_rate": 0.00012087497887937198, "loss": 1.2582, "step": 30455 }, { "epoch": 0.3957618295018379, "grad_norm": 0.38872388005256653, "learning_rate": 0.00012087237941746059, "loss": 1.2183, "step": 30456 }, { "epoch": 0.3957748240457538, "grad_norm": 0.389353483915329, "learning_rate": 0.0001208697799555492, "loss": 1.3856, "step": 30457 }, { "epoch": 0.39578781858966966, "grad_norm": 0.41509899497032166, "learning_rate": 0.00012086718049363784, "loss": 1.3443, "step": 30458 }, { "epoch": 0.39580081313358556, "grad_norm": 0.3822564482688904, "learning_rate": 0.00012086458103172645, "loss": 1.2186, "step": 30459 }, { "epoch": 0.3958138076775014, "grad_norm": 0.4116131067276001, "learning_rate": 0.00012086198156981505, "loss": 1.3594, "step": 30460 }, { "epoch": 0.3958268022214173, "grad_norm": 0.45288777351379395, "learning_rate": 0.00012085938210790366, "loss": 1.4753, "step": 30461 }, { "epoch": 0.39583979676533315, "grad_norm": 0.44897639751434326, "learning_rate": 0.0001208567826459923, "loss": 1.446, "step": 30462 }, { "epoch": 0.39585279130924905, "grad_norm": 0.2966291606426239, "learning_rate": 0.0001208541831840809, "loss": 1.4837, "step": 30463 }, { "epoch": 0.3958657858531649, "grad_norm": 0.40922674536705017, "learning_rate": 0.00012085158372216952, "loss": 1.1878, "step": 30464 }, { "epoch": 0.3958787803970808, "grad_norm": 0.40658944845199585, "learning_rate": 0.00012084898426025813, "loss": 1.3913, "step": 30465 }, { "epoch": 0.39589177494099664, "grad_norm": 0.3612760007381439, "learning_rate": 0.00012084638479834675, "loss": 1.4402, "step": 30466 }, { "epoch": 0.39590476948491254, "grad_norm": 0.4137723445892334, "learning_rate": 0.00012084378533643536, "loss": 1.3328, "step": 30467 }, { "epoch": 0.3959177640288284, "grad_norm": 0.42410802841186523, "learning_rate": 0.00012084118587452397, "loss": 1.4157, "step": 30468 }, { "epoch": 0.3959307585727443, "grad_norm": 0.48106735944747925, "learning_rate": 0.00012083858641261259, "loss": 1.5204, "step": 30469 }, { "epoch": 0.39594375311666014, "grad_norm": 0.41061797738075256, "learning_rate": 0.00012083598695070122, "loss": 1.3481, "step": 30470 }, { "epoch": 0.39595674766057604, "grad_norm": 0.390939861536026, "learning_rate": 0.00012083338748878983, "loss": 1.4766, "step": 30471 }, { "epoch": 0.3959697422044919, "grad_norm": 0.4585546851158142, "learning_rate": 0.00012083078802687844, "loss": 1.4192, "step": 30472 }, { "epoch": 0.3959827367484078, "grad_norm": 0.3996250629425049, "learning_rate": 0.00012082818856496704, "loss": 1.4573, "step": 30473 }, { "epoch": 0.3959957312923236, "grad_norm": 0.36808812618255615, "learning_rate": 0.00012082558910305568, "loss": 1.2857, "step": 30474 }, { "epoch": 0.39600872583623953, "grad_norm": 0.49980711936950684, "learning_rate": 0.00012082298964114429, "loss": 1.2549, "step": 30475 }, { "epoch": 0.3960217203801554, "grad_norm": 0.40274959802627563, "learning_rate": 0.0001208203901792329, "loss": 1.2752, "step": 30476 }, { "epoch": 0.3960347149240713, "grad_norm": 0.40584805607795715, "learning_rate": 0.00012081779071732151, "loss": 1.302, "step": 30477 }, { "epoch": 0.3960477094679871, "grad_norm": 0.3568449914455414, "learning_rate": 0.00012081519125541014, "loss": 1.3545, "step": 30478 }, { "epoch": 0.396060704011903, "grad_norm": 0.3956317603588104, "learning_rate": 0.00012081259179349875, "loss": 1.2355, "step": 30479 }, { "epoch": 0.39607369855581886, "grad_norm": 0.39014559984207153, "learning_rate": 0.00012080999233158736, "loss": 1.4363, "step": 30480 }, { "epoch": 0.39608669309973477, "grad_norm": 0.31834426522254944, "learning_rate": 0.00012080739286967597, "loss": 1.4256, "step": 30481 }, { "epoch": 0.3960996876436506, "grad_norm": 0.4114624559879303, "learning_rate": 0.00012080479340776461, "loss": 1.4327, "step": 30482 }, { "epoch": 0.3961126821875665, "grad_norm": 0.2922571301460266, "learning_rate": 0.00012080219394585322, "loss": 1.0886, "step": 30483 }, { "epoch": 0.39612567673148236, "grad_norm": 0.5182799696922302, "learning_rate": 0.00012079959448394183, "loss": 1.396, "step": 30484 }, { "epoch": 0.39613867127539826, "grad_norm": 0.35662055015563965, "learning_rate": 0.00012079699502203044, "loss": 1.5107, "step": 30485 }, { "epoch": 0.3961516658193141, "grad_norm": 0.41533610224723816, "learning_rate": 0.00012079439556011907, "loss": 1.4378, "step": 30486 }, { "epoch": 0.39616466036323, "grad_norm": 0.38804373145103455, "learning_rate": 0.00012079179609820768, "loss": 1.3224, "step": 30487 }, { "epoch": 0.39617765490714585, "grad_norm": 0.42738789319992065, "learning_rate": 0.00012078919663629629, "loss": 1.3672, "step": 30488 }, { "epoch": 0.39619064945106175, "grad_norm": 0.4870091378688812, "learning_rate": 0.00012078659717438493, "loss": 1.5115, "step": 30489 }, { "epoch": 0.3962036439949776, "grad_norm": 0.33631303906440735, "learning_rate": 0.00012078399771247352, "loss": 1.3926, "step": 30490 }, { "epoch": 0.3962166385388935, "grad_norm": 0.2987799644470215, "learning_rate": 0.00012078139825056213, "loss": 1.246, "step": 30491 }, { "epoch": 0.39622963308280934, "grad_norm": 0.3712847828865051, "learning_rate": 0.00012077879878865074, "loss": 1.3163, "step": 30492 }, { "epoch": 0.39624262762672524, "grad_norm": 0.366850882768631, "learning_rate": 0.00012077619932673938, "loss": 1.3545, "step": 30493 }, { "epoch": 0.3962556221706411, "grad_norm": 0.38335683941841125, "learning_rate": 0.000120773599864828, "loss": 1.1562, "step": 30494 }, { "epoch": 0.396268616714557, "grad_norm": 0.49375998973846436, "learning_rate": 0.0001207710004029166, "loss": 1.3706, "step": 30495 }, { "epoch": 0.39628161125847283, "grad_norm": 0.321294903755188, "learning_rate": 0.00012076840094100522, "loss": 1.3135, "step": 30496 }, { "epoch": 0.39629460580238873, "grad_norm": 0.370956152677536, "learning_rate": 0.00012076580147909384, "loss": 1.4765, "step": 30497 }, { "epoch": 0.3963076003463046, "grad_norm": 0.39633017778396606, "learning_rate": 0.00012076320201718245, "loss": 1.4525, "step": 30498 }, { "epoch": 0.3963205948902205, "grad_norm": 0.4501740336418152, "learning_rate": 0.00012076060255527106, "loss": 1.5941, "step": 30499 }, { "epoch": 0.3963335894341363, "grad_norm": 0.35627079010009766, "learning_rate": 0.00012075800309335967, "loss": 1.3806, "step": 30500 }, { "epoch": 0.3963465839780522, "grad_norm": 0.3913305699825287, "learning_rate": 0.00012075540363144831, "loss": 1.2485, "step": 30501 }, { "epoch": 0.39635957852196807, "grad_norm": 0.39382895827293396, "learning_rate": 0.00012075280416953691, "loss": 1.3889, "step": 30502 }, { "epoch": 0.39637257306588397, "grad_norm": 0.4183182120323181, "learning_rate": 0.00012075020470762552, "loss": 1.5115, "step": 30503 }, { "epoch": 0.3963855676097998, "grad_norm": 0.3839147686958313, "learning_rate": 0.00012074760524571413, "loss": 1.4127, "step": 30504 }, { "epoch": 0.3963985621537157, "grad_norm": 0.43862468004226685, "learning_rate": 0.00012074500578380277, "loss": 1.3861, "step": 30505 }, { "epoch": 0.39641155669763156, "grad_norm": 0.34355974197387695, "learning_rate": 0.00012074240632189138, "loss": 1.3486, "step": 30506 }, { "epoch": 0.39642455124154746, "grad_norm": 0.38287320733070374, "learning_rate": 0.00012073980685997999, "loss": 1.3295, "step": 30507 }, { "epoch": 0.3964375457854633, "grad_norm": 0.2911398410797119, "learning_rate": 0.0001207372073980686, "loss": 1.2995, "step": 30508 }, { "epoch": 0.3964505403293792, "grad_norm": 0.34748217463493347, "learning_rate": 0.00012073460793615723, "loss": 1.2762, "step": 30509 }, { "epoch": 0.39646353487329505, "grad_norm": 0.4181280732154846, "learning_rate": 0.00012073200847424584, "loss": 1.5515, "step": 30510 }, { "epoch": 0.39647652941721095, "grad_norm": 0.3230595588684082, "learning_rate": 0.00012072940901233445, "loss": 1.3266, "step": 30511 }, { "epoch": 0.3964895239611268, "grad_norm": 0.3540262281894684, "learning_rate": 0.00012072680955042306, "loss": 1.2559, "step": 30512 }, { "epoch": 0.3965025185050427, "grad_norm": 0.5047813057899475, "learning_rate": 0.0001207242100885117, "loss": 1.4668, "step": 30513 }, { "epoch": 0.39651551304895855, "grad_norm": 0.3461460471153259, "learning_rate": 0.00012072161062660031, "loss": 1.4371, "step": 30514 }, { "epoch": 0.39652850759287445, "grad_norm": 0.3908366858959198, "learning_rate": 0.0001207190111646889, "loss": 1.3088, "step": 30515 }, { "epoch": 0.3965415021367903, "grad_norm": 0.43267524242401123, "learning_rate": 0.00012071641170277752, "loss": 1.419, "step": 30516 }, { "epoch": 0.3965544966807062, "grad_norm": 0.31342220306396484, "learning_rate": 0.00012071381224086615, "loss": 1.3394, "step": 30517 }, { "epoch": 0.39656749122462204, "grad_norm": 0.4734180271625519, "learning_rate": 0.00012071121277895476, "loss": 1.4767, "step": 30518 }, { "epoch": 0.39658048576853794, "grad_norm": 0.4645110070705414, "learning_rate": 0.00012070861331704338, "loss": 1.6725, "step": 30519 }, { "epoch": 0.3965934803124538, "grad_norm": 0.324693888425827, "learning_rate": 0.00012070601385513199, "loss": 1.3567, "step": 30520 }, { "epoch": 0.3966064748563697, "grad_norm": 0.3301478326320648, "learning_rate": 0.00012070341439322061, "loss": 1.1773, "step": 30521 }, { "epoch": 0.39661946940028553, "grad_norm": 0.43589335680007935, "learning_rate": 0.00012070081493130922, "loss": 1.3763, "step": 30522 }, { "epoch": 0.39663246394420143, "grad_norm": 0.43325603008270264, "learning_rate": 0.00012069821546939783, "loss": 1.3717, "step": 30523 }, { "epoch": 0.3966454584881173, "grad_norm": 0.37894830107688904, "learning_rate": 0.00012069561600748644, "loss": 1.3014, "step": 30524 }, { "epoch": 0.3966584530320332, "grad_norm": 0.38113725185394287, "learning_rate": 0.00012069301654557508, "loss": 1.3669, "step": 30525 }, { "epoch": 0.396671447575949, "grad_norm": 0.5112550854682922, "learning_rate": 0.00012069041708366369, "loss": 1.4037, "step": 30526 }, { "epoch": 0.3966844421198649, "grad_norm": 0.39900729060173035, "learning_rate": 0.0001206878176217523, "loss": 1.2266, "step": 30527 }, { "epoch": 0.3966974366637808, "grad_norm": 0.490354061126709, "learning_rate": 0.00012068521815984093, "loss": 1.3599, "step": 30528 }, { "epoch": 0.39671043120769667, "grad_norm": 0.5201057195663452, "learning_rate": 0.00012068261869792954, "loss": 1.5922, "step": 30529 }, { "epoch": 0.39672342575161257, "grad_norm": 0.36522430181503296, "learning_rate": 0.00012068001923601815, "loss": 1.457, "step": 30530 }, { "epoch": 0.3967364202955284, "grad_norm": 0.43237805366516113, "learning_rate": 0.00012067741977410676, "loss": 1.422, "step": 30531 }, { "epoch": 0.3967494148394443, "grad_norm": 0.4094823896884918, "learning_rate": 0.00012067482031219539, "loss": 1.4329, "step": 30532 }, { "epoch": 0.39676240938336016, "grad_norm": 0.2513434588909149, "learning_rate": 0.000120672220850284, "loss": 1.0847, "step": 30533 }, { "epoch": 0.39677540392727606, "grad_norm": 0.42192405462265015, "learning_rate": 0.00012066962138837261, "loss": 1.5105, "step": 30534 }, { "epoch": 0.3967883984711919, "grad_norm": 0.45719724893569946, "learning_rate": 0.00012066702192646122, "loss": 1.375, "step": 30535 }, { "epoch": 0.3968013930151078, "grad_norm": 0.44061875343322754, "learning_rate": 0.00012066442246454986, "loss": 1.3445, "step": 30536 }, { "epoch": 0.39681438755902365, "grad_norm": 0.27138903737068176, "learning_rate": 0.00012066182300263847, "loss": 1.3426, "step": 30537 }, { "epoch": 0.39682738210293955, "grad_norm": 0.3566970229148865, "learning_rate": 0.00012065922354072708, "loss": 1.3506, "step": 30538 }, { "epoch": 0.3968403766468554, "grad_norm": 0.47373026609420776, "learning_rate": 0.00012065662407881569, "loss": 1.4535, "step": 30539 }, { "epoch": 0.3968533711907713, "grad_norm": 0.3562994599342346, "learning_rate": 0.00012065402461690431, "loss": 1.4934, "step": 30540 }, { "epoch": 0.39686636573468714, "grad_norm": 0.3392718434333801, "learning_rate": 0.00012065142515499292, "loss": 1.3738, "step": 30541 }, { "epoch": 0.39687936027860304, "grad_norm": 0.4067341685295105, "learning_rate": 0.00012064882569308154, "loss": 1.2405, "step": 30542 }, { "epoch": 0.3968923548225189, "grad_norm": 0.43516093492507935, "learning_rate": 0.00012064622623117015, "loss": 1.4706, "step": 30543 }, { "epoch": 0.3969053493664348, "grad_norm": 0.5096096396446228, "learning_rate": 0.00012064362676925877, "loss": 1.5871, "step": 30544 }, { "epoch": 0.39691834391035064, "grad_norm": 0.43477702140808105, "learning_rate": 0.00012064102730734738, "loss": 1.5679, "step": 30545 }, { "epoch": 0.39693133845426654, "grad_norm": 0.4271295666694641, "learning_rate": 0.00012063842784543599, "loss": 1.4513, "step": 30546 }, { "epoch": 0.3969443329981824, "grad_norm": 0.32982924580574036, "learning_rate": 0.0001206358283835246, "loss": 1.4264, "step": 30547 }, { "epoch": 0.3969573275420983, "grad_norm": 0.3597833216190338, "learning_rate": 0.00012063322892161324, "loss": 1.4237, "step": 30548 }, { "epoch": 0.3969703220860141, "grad_norm": 0.4055320620536804, "learning_rate": 0.00012063062945970185, "loss": 1.5282, "step": 30549 }, { "epoch": 0.39698331662993, "grad_norm": 0.39031103253364563, "learning_rate": 0.00012062802999779046, "loss": 1.4969, "step": 30550 }, { "epoch": 0.3969963111738459, "grad_norm": 0.45249009132385254, "learning_rate": 0.00012062543053587907, "loss": 1.4753, "step": 30551 }, { "epoch": 0.3970093057177618, "grad_norm": 0.3768482506275177, "learning_rate": 0.0001206228310739677, "loss": 1.2406, "step": 30552 }, { "epoch": 0.3970223002616776, "grad_norm": 0.3541805148124695, "learning_rate": 0.00012062023161205631, "loss": 1.3852, "step": 30553 }, { "epoch": 0.3970352948055935, "grad_norm": 0.3437105119228363, "learning_rate": 0.00012061763215014492, "loss": 1.4595, "step": 30554 }, { "epoch": 0.39704828934950936, "grad_norm": 0.3743917644023895, "learning_rate": 0.00012061503268823353, "loss": 1.5073, "step": 30555 }, { "epoch": 0.39706128389342527, "grad_norm": 0.4299183785915375, "learning_rate": 0.00012061243322632217, "loss": 1.2275, "step": 30556 }, { "epoch": 0.3970742784373411, "grad_norm": 0.4112553894519806, "learning_rate": 0.00012060983376441077, "loss": 1.4164, "step": 30557 }, { "epoch": 0.397087272981257, "grad_norm": 0.3559432923793793, "learning_rate": 0.00012060723430249938, "loss": 1.3988, "step": 30558 }, { "epoch": 0.39710026752517286, "grad_norm": 0.40167734026908875, "learning_rate": 0.00012060463484058799, "loss": 1.1957, "step": 30559 }, { "epoch": 0.39711326206908876, "grad_norm": 0.4780190885066986, "learning_rate": 0.00012060203537867663, "loss": 1.5468, "step": 30560 }, { "epoch": 0.3971262566130046, "grad_norm": 0.32918885350227356, "learning_rate": 0.00012059943591676524, "loss": 1.4744, "step": 30561 }, { "epoch": 0.3971392511569205, "grad_norm": 0.4281514585018158, "learning_rate": 0.00012059683645485385, "loss": 1.3534, "step": 30562 }, { "epoch": 0.39715224570083635, "grad_norm": 0.37919455766677856, "learning_rate": 0.00012059423699294247, "loss": 1.2382, "step": 30563 }, { "epoch": 0.39716524024475225, "grad_norm": 0.34532901644706726, "learning_rate": 0.00012059163753103108, "loss": 1.5231, "step": 30564 }, { "epoch": 0.3971782347886681, "grad_norm": 0.4213751554489136, "learning_rate": 0.0001205890380691197, "loss": 1.4574, "step": 30565 }, { "epoch": 0.397191229332584, "grad_norm": 0.3961006999015808, "learning_rate": 0.0001205864386072083, "loss": 1.3479, "step": 30566 }, { "epoch": 0.39720422387649984, "grad_norm": 0.4103583097457886, "learning_rate": 0.00012058383914529694, "loss": 1.315, "step": 30567 }, { "epoch": 0.39721721842041574, "grad_norm": 0.3253219723701477, "learning_rate": 0.00012058123968338556, "loss": 1.1293, "step": 30568 }, { "epoch": 0.3972302129643316, "grad_norm": 0.3826769292354584, "learning_rate": 0.00012057864022147417, "loss": 1.3111, "step": 30569 }, { "epoch": 0.3972432075082475, "grad_norm": 0.44675466418266296, "learning_rate": 0.00012057604075956276, "loss": 1.5, "step": 30570 }, { "epoch": 0.39725620205216333, "grad_norm": 0.36370083689689636, "learning_rate": 0.0001205734412976514, "loss": 1.2416, "step": 30571 }, { "epoch": 0.39726919659607923, "grad_norm": 0.5032706260681152, "learning_rate": 0.00012057084183574001, "loss": 1.4933, "step": 30572 }, { "epoch": 0.3972821911399951, "grad_norm": 0.2902430295944214, "learning_rate": 0.00012056824237382862, "loss": 1.4969, "step": 30573 }, { "epoch": 0.397295185683911, "grad_norm": 0.47435638308525085, "learning_rate": 0.00012056564291191723, "loss": 1.3721, "step": 30574 }, { "epoch": 0.3973081802278268, "grad_norm": 0.3958859145641327, "learning_rate": 0.00012056304345000586, "loss": 1.4035, "step": 30575 }, { "epoch": 0.3973211747717427, "grad_norm": 0.3804663419723511, "learning_rate": 0.00012056044398809447, "loss": 1.4726, "step": 30576 }, { "epoch": 0.39733416931565857, "grad_norm": 0.39885711669921875, "learning_rate": 0.00012055784452618308, "loss": 1.5113, "step": 30577 }, { "epoch": 0.39734716385957447, "grad_norm": 0.4091666638851166, "learning_rate": 0.00012055524506427169, "loss": 1.5481, "step": 30578 }, { "epoch": 0.3973601584034903, "grad_norm": 0.484757661819458, "learning_rate": 0.00012055264560236033, "loss": 1.4901, "step": 30579 }, { "epoch": 0.3973731529474062, "grad_norm": 0.37343254685401917, "learning_rate": 0.00012055004614044894, "loss": 1.2495, "step": 30580 }, { "epoch": 0.39738614749132206, "grad_norm": 0.41060778498649597, "learning_rate": 0.00012054744667853755, "loss": 1.2152, "step": 30581 }, { "epoch": 0.39739914203523796, "grad_norm": 0.4466177225112915, "learning_rate": 0.00012054484721662615, "loss": 1.6325, "step": 30582 }, { "epoch": 0.3974121365791538, "grad_norm": 0.40157777070999146, "learning_rate": 0.00012054224775471479, "loss": 1.6397, "step": 30583 }, { "epoch": 0.3974251311230697, "grad_norm": 0.36034226417541504, "learning_rate": 0.0001205396482928034, "loss": 1.1805, "step": 30584 }, { "epoch": 0.39743812566698555, "grad_norm": 0.3764556348323822, "learning_rate": 0.00012053704883089201, "loss": 1.3388, "step": 30585 }, { "epoch": 0.39745112021090145, "grad_norm": 0.39236927032470703, "learning_rate": 0.00012053444936898062, "loss": 1.506, "step": 30586 }, { "epoch": 0.3974641147548173, "grad_norm": 0.36235329508781433, "learning_rate": 0.00012053184990706924, "loss": 1.2361, "step": 30587 }, { "epoch": 0.3974771092987332, "grad_norm": 0.2898573577404022, "learning_rate": 0.00012052925044515786, "loss": 1.1999, "step": 30588 }, { "epoch": 0.39749010384264905, "grad_norm": 0.4543534219264984, "learning_rate": 0.00012052665098324647, "loss": 1.5407, "step": 30589 }, { "epoch": 0.39750309838656495, "grad_norm": 0.3502747118473053, "learning_rate": 0.00012052405152133508, "loss": 1.3537, "step": 30590 }, { "epoch": 0.3975160929304808, "grad_norm": 0.4579504728317261, "learning_rate": 0.00012052145205942372, "loss": 1.7329, "step": 30591 }, { "epoch": 0.3975290874743967, "grad_norm": 0.4381742775440216, "learning_rate": 0.00012051885259751233, "loss": 1.4102, "step": 30592 }, { "epoch": 0.39754208201831254, "grad_norm": 0.34841784834861755, "learning_rate": 0.00012051625313560094, "loss": 1.3533, "step": 30593 }, { "epoch": 0.39755507656222844, "grad_norm": 0.45948076248168945, "learning_rate": 0.00012051365367368955, "loss": 1.4975, "step": 30594 }, { "epoch": 0.3975680711061443, "grad_norm": 0.3173246383666992, "learning_rate": 0.00012051105421177817, "loss": 1.3703, "step": 30595 }, { "epoch": 0.3975810656500602, "grad_norm": 0.4007013738155365, "learning_rate": 0.00012050845474986678, "loss": 1.3819, "step": 30596 }, { "epoch": 0.39759406019397603, "grad_norm": 0.3364884853363037, "learning_rate": 0.0001205058552879554, "loss": 1.1384, "step": 30597 }, { "epoch": 0.39760705473789193, "grad_norm": 0.3834405541419983, "learning_rate": 0.000120503255826044, "loss": 1.3962, "step": 30598 }, { "epoch": 0.3976200492818078, "grad_norm": 0.4373567998409271, "learning_rate": 0.00012050065636413263, "loss": 1.4803, "step": 30599 }, { "epoch": 0.3976330438257237, "grad_norm": 0.3747972548007965, "learning_rate": 0.00012049805690222124, "loss": 1.3698, "step": 30600 }, { "epoch": 0.3976460383696395, "grad_norm": 0.39132165908813477, "learning_rate": 0.00012049545744030985, "loss": 1.4599, "step": 30601 }, { "epoch": 0.3976590329135554, "grad_norm": 0.46721139550209045, "learning_rate": 0.00012049285797839849, "loss": 1.257, "step": 30602 }, { "epoch": 0.39767202745747127, "grad_norm": 0.3414823114871979, "learning_rate": 0.0001204902585164871, "loss": 1.628, "step": 30603 }, { "epoch": 0.39768502200138717, "grad_norm": 0.3354717195034027, "learning_rate": 0.00012048765905457571, "loss": 1.419, "step": 30604 }, { "epoch": 0.39769801654530307, "grad_norm": 0.47253406047821045, "learning_rate": 0.00012048505959266432, "loss": 1.3272, "step": 30605 }, { "epoch": 0.3977110110892189, "grad_norm": 0.39107823371887207, "learning_rate": 0.00012048246013075295, "loss": 1.4431, "step": 30606 }, { "epoch": 0.3977240056331348, "grad_norm": 0.4482027292251587, "learning_rate": 0.00012047986066884156, "loss": 1.6257, "step": 30607 }, { "epoch": 0.39773700017705066, "grad_norm": 0.42433831095695496, "learning_rate": 0.00012047726120693017, "loss": 1.3668, "step": 30608 }, { "epoch": 0.39774999472096656, "grad_norm": 0.42059847712516785, "learning_rate": 0.00012047466174501878, "loss": 1.4938, "step": 30609 }, { "epoch": 0.3977629892648824, "grad_norm": 0.4015505015850067, "learning_rate": 0.00012047206228310742, "loss": 1.4419, "step": 30610 }, { "epoch": 0.3977759838087983, "grad_norm": 0.35424479842185974, "learning_rate": 0.00012046946282119603, "loss": 1.5279, "step": 30611 }, { "epoch": 0.39778897835271415, "grad_norm": 0.4345249533653259, "learning_rate": 0.00012046686335928463, "loss": 1.5128, "step": 30612 }, { "epoch": 0.39780197289663005, "grad_norm": 0.40703484416007996, "learning_rate": 0.00012046426389737324, "loss": 1.5333, "step": 30613 }, { "epoch": 0.3978149674405459, "grad_norm": 0.3266160190105438, "learning_rate": 0.00012046166443546187, "loss": 1.3241, "step": 30614 }, { "epoch": 0.3978279619844618, "grad_norm": 0.346336305141449, "learning_rate": 0.00012045906497355049, "loss": 1.3503, "step": 30615 }, { "epoch": 0.39784095652837764, "grad_norm": 0.4651446044445038, "learning_rate": 0.0001204564655116391, "loss": 1.5313, "step": 30616 }, { "epoch": 0.39785395107229354, "grad_norm": 0.4894111156463623, "learning_rate": 0.00012045386604972771, "loss": 1.3909, "step": 30617 }, { "epoch": 0.3978669456162094, "grad_norm": 0.43553051352500916, "learning_rate": 0.00012045126658781633, "loss": 1.2227, "step": 30618 }, { "epoch": 0.3978799401601253, "grad_norm": 0.42442312836647034, "learning_rate": 0.00012044866712590494, "loss": 1.3431, "step": 30619 }, { "epoch": 0.39789293470404113, "grad_norm": 0.39639878273010254, "learning_rate": 0.00012044606766399355, "loss": 1.2855, "step": 30620 }, { "epoch": 0.39790592924795704, "grad_norm": 0.42936867475509644, "learning_rate": 0.00012044346820208216, "loss": 1.3673, "step": 30621 }, { "epoch": 0.3979189237918729, "grad_norm": 0.4305630028247833, "learning_rate": 0.0001204408687401708, "loss": 1.2348, "step": 30622 }, { "epoch": 0.3979319183357888, "grad_norm": 0.39179790019989014, "learning_rate": 0.00012043826927825941, "loss": 1.2855, "step": 30623 }, { "epoch": 0.3979449128797046, "grad_norm": 0.4149450659751892, "learning_rate": 0.00012043566981634801, "loss": 1.3681, "step": 30624 }, { "epoch": 0.3979579074236205, "grad_norm": 0.4168071150779724, "learning_rate": 0.00012043307035443662, "loss": 1.3684, "step": 30625 }, { "epoch": 0.3979709019675364, "grad_norm": 0.2427930235862732, "learning_rate": 0.00012043047089252526, "loss": 1.3372, "step": 30626 }, { "epoch": 0.3979838965114523, "grad_norm": 0.3487960398197174, "learning_rate": 0.00012042787143061387, "loss": 1.3398, "step": 30627 }, { "epoch": 0.3979968910553681, "grad_norm": 0.4447000324726105, "learning_rate": 0.00012042527196870248, "loss": 1.48, "step": 30628 }, { "epoch": 0.398009885599284, "grad_norm": 0.40953731536865234, "learning_rate": 0.00012042267250679109, "loss": 1.4033, "step": 30629 }, { "epoch": 0.39802288014319986, "grad_norm": 0.417463093996048, "learning_rate": 0.00012042007304487972, "loss": 1.3576, "step": 30630 }, { "epoch": 0.39803587468711576, "grad_norm": 0.39330989122390747, "learning_rate": 0.00012041747358296833, "loss": 1.2288, "step": 30631 }, { "epoch": 0.3980488692310316, "grad_norm": 0.43738213181495667, "learning_rate": 0.00012041487412105694, "loss": 1.3999, "step": 30632 }, { "epoch": 0.3980618637749475, "grad_norm": 0.40446197986602783, "learning_rate": 0.00012041227465914555, "loss": 1.4824, "step": 30633 }, { "epoch": 0.39807485831886336, "grad_norm": 0.4569695293903351, "learning_rate": 0.00012040967519723419, "loss": 1.4332, "step": 30634 }, { "epoch": 0.39808785286277926, "grad_norm": 0.4315387010574341, "learning_rate": 0.0001204070757353228, "loss": 1.5125, "step": 30635 }, { "epoch": 0.3981008474066951, "grad_norm": 0.41952329874038696, "learning_rate": 0.00012040447627341141, "loss": 1.3004, "step": 30636 }, { "epoch": 0.398113841950611, "grad_norm": 0.46102291345596313, "learning_rate": 0.00012040187681150003, "loss": 1.2783, "step": 30637 }, { "epoch": 0.39812683649452685, "grad_norm": 0.3600445091724396, "learning_rate": 0.00012039927734958865, "loss": 1.2823, "step": 30638 }, { "epoch": 0.39813983103844275, "grad_norm": 0.39268195629119873, "learning_rate": 0.00012039667788767726, "loss": 1.5793, "step": 30639 }, { "epoch": 0.3981528255823586, "grad_norm": 0.42229700088500977, "learning_rate": 0.00012039407842576587, "loss": 1.635, "step": 30640 }, { "epoch": 0.3981658201262745, "grad_norm": 0.3998759686946869, "learning_rate": 0.00012039147896385449, "loss": 1.3335, "step": 30641 }, { "epoch": 0.39817881467019034, "grad_norm": 0.4558330476284027, "learning_rate": 0.0001203888795019431, "loss": 1.4519, "step": 30642 }, { "epoch": 0.39819180921410624, "grad_norm": 0.39772799611091614, "learning_rate": 0.00012038628004003171, "loss": 1.3465, "step": 30643 }, { "epoch": 0.3982048037580221, "grad_norm": 0.3977593779563904, "learning_rate": 0.00012038368057812032, "loss": 1.3635, "step": 30644 }, { "epoch": 0.398217798301938, "grad_norm": 0.4713815748691559, "learning_rate": 0.00012038108111620896, "loss": 1.3811, "step": 30645 }, { "epoch": 0.39823079284585383, "grad_norm": 0.32866600155830383, "learning_rate": 0.00012037848165429757, "loss": 1.2731, "step": 30646 }, { "epoch": 0.39824378738976973, "grad_norm": 0.38345086574554443, "learning_rate": 0.00012037588219238618, "loss": 1.3226, "step": 30647 }, { "epoch": 0.3982567819336856, "grad_norm": 0.46418246626853943, "learning_rate": 0.0001203732827304748, "loss": 1.4317, "step": 30648 }, { "epoch": 0.3982697764776015, "grad_norm": 0.3906811475753784, "learning_rate": 0.00012037068326856342, "loss": 1.3708, "step": 30649 }, { "epoch": 0.3982827710215173, "grad_norm": 0.3054935038089752, "learning_rate": 0.00012036808380665203, "loss": 1.2675, "step": 30650 }, { "epoch": 0.3982957655654332, "grad_norm": 0.44424697756767273, "learning_rate": 0.00012036548434474064, "loss": 1.5847, "step": 30651 }, { "epoch": 0.39830876010934907, "grad_norm": 0.4200749099254608, "learning_rate": 0.00012036288488282925, "loss": 1.4534, "step": 30652 }, { "epoch": 0.39832175465326497, "grad_norm": 0.40228980779647827, "learning_rate": 0.00012036028542091789, "loss": 1.5031, "step": 30653 }, { "epoch": 0.3983347491971808, "grad_norm": 0.5039932727813721, "learning_rate": 0.00012035768595900649, "loss": 1.5185, "step": 30654 }, { "epoch": 0.3983477437410967, "grad_norm": 0.4084838926792145, "learning_rate": 0.0001203550864970951, "loss": 1.3826, "step": 30655 }, { "epoch": 0.39836073828501256, "grad_norm": 0.4750950336456299, "learning_rate": 0.00012035248703518371, "loss": 1.4573, "step": 30656 }, { "epoch": 0.39837373282892846, "grad_norm": 0.4051925539970398, "learning_rate": 0.00012034988757327235, "loss": 1.5065, "step": 30657 }, { "epoch": 0.3983867273728443, "grad_norm": 0.4245031476020813, "learning_rate": 0.00012034728811136096, "loss": 1.4599, "step": 30658 }, { "epoch": 0.3983997219167602, "grad_norm": 0.3715025782585144, "learning_rate": 0.00012034468864944957, "loss": 1.4545, "step": 30659 }, { "epoch": 0.39841271646067605, "grad_norm": 0.49048787355422974, "learning_rate": 0.00012034208918753818, "loss": 1.4418, "step": 30660 }, { "epoch": 0.39842571100459195, "grad_norm": 0.3705085217952728, "learning_rate": 0.0001203394897256268, "loss": 1.3518, "step": 30661 }, { "epoch": 0.3984387055485078, "grad_norm": 0.35156843066215515, "learning_rate": 0.00012033689026371542, "loss": 1.4056, "step": 30662 }, { "epoch": 0.3984517000924237, "grad_norm": 0.38524767756462097, "learning_rate": 0.00012033429080180403, "loss": 1.2149, "step": 30663 }, { "epoch": 0.39846469463633954, "grad_norm": 0.4420812129974365, "learning_rate": 0.00012033169133989264, "loss": 1.5197, "step": 30664 }, { "epoch": 0.39847768918025545, "grad_norm": 0.41359129548072815, "learning_rate": 0.00012032909187798128, "loss": 1.385, "step": 30665 }, { "epoch": 0.3984906837241713, "grad_norm": 0.4534244239330292, "learning_rate": 0.00012032649241606987, "loss": 1.4598, "step": 30666 }, { "epoch": 0.3985036782680872, "grad_norm": 0.43217048048973083, "learning_rate": 0.00012032389295415848, "loss": 1.2485, "step": 30667 }, { "epoch": 0.39851667281200304, "grad_norm": 0.3915845453739166, "learning_rate": 0.0001203212934922471, "loss": 1.2805, "step": 30668 }, { "epoch": 0.39852966735591894, "grad_norm": 0.43231967091560364, "learning_rate": 0.00012031869403033573, "loss": 1.1903, "step": 30669 }, { "epoch": 0.3985426618998348, "grad_norm": 0.40213385224342346, "learning_rate": 0.00012031609456842434, "loss": 1.5774, "step": 30670 }, { "epoch": 0.3985556564437507, "grad_norm": 0.3723752498626709, "learning_rate": 0.00012031349510651296, "loss": 1.3481, "step": 30671 }, { "epoch": 0.39856865098766653, "grad_norm": 0.32263416051864624, "learning_rate": 0.00012031089564460157, "loss": 1.3231, "step": 30672 }, { "epoch": 0.39858164553158243, "grad_norm": 0.37805575132369995, "learning_rate": 0.00012030829618269019, "loss": 1.4731, "step": 30673 }, { "epoch": 0.3985946400754983, "grad_norm": 0.43951764702796936, "learning_rate": 0.0001203056967207788, "loss": 1.4792, "step": 30674 }, { "epoch": 0.3986076346194142, "grad_norm": 0.3575046956539154, "learning_rate": 0.00012030309725886741, "loss": 1.4373, "step": 30675 }, { "epoch": 0.39862062916333, "grad_norm": 0.3849867284297943, "learning_rate": 0.00012030049779695605, "loss": 1.2911, "step": 30676 }, { "epoch": 0.3986336237072459, "grad_norm": 0.44810420274734497, "learning_rate": 0.00012029789833504466, "loss": 1.4358, "step": 30677 }, { "epoch": 0.39864661825116177, "grad_norm": 0.3901337683200836, "learning_rate": 0.00012029529887313327, "loss": 1.3077, "step": 30678 }, { "epoch": 0.39865961279507767, "grad_norm": 0.3956059217453003, "learning_rate": 0.00012029269941122187, "loss": 1.3711, "step": 30679 }, { "epoch": 0.39867260733899357, "grad_norm": 0.45140060782432556, "learning_rate": 0.00012029009994931051, "loss": 1.5245, "step": 30680 }, { "epoch": 0.3986856018829094, "grad_norm": 0.4881375730037689, "learning_rate": 0.00012028750048739912, "loss": 1.5835, "step": 30681 }, { "epoch": 0.3986985964268253, "grad_norm": 0.339712530374527, "learning_rate": 0.00012028490102548773, "loss": 1.2691, "step": 30682 }, { "epoch": 0.39871159097074116, "grad_norm": 0.4990187883377075, "learning_rate": 0.00012028230156357634, "loss": 1.4265, "step": 30683 }, { "epoch": 0.39872458551465706, "grad_norm": 0.41261714696884155, "learning_rate": 0.00012027970210166497, "loss": 1.4037, "step": 30684 }, { "epoch": 0.3987375800585729, "grad_norm": 0.409064918756485, "learning_rate": 0.00012027710263975358, "loss": 1.4489, "step": 30685 }, { "epoch": 0.3987505746024888, "grad_norm": 0.46332839131355286, "learning_rate": 0.00012027450317784219, "loss": 1.5264, "step": 30686 }, { "epoch": 0.39876356914640465, "grad_norm": 0.4505152702331543, "learning_rate": 0.0001202719037159308, "loss": 1.4464, "step": 30687 }, { "epoch": 0.39877656369032055, "grad_norm": 0.4598102569580078, "learning_rate": 0.00012026930425401944, "loss": 1.5445, "step": 30688 }, { "epoch": 0.3987895582342364, "grad_norm": 0.4157380759716034, "learning_rate": 0.00012026670479210805, "loss": 1.6055, "step": 30689 }, { "epoch": 0.3988025527781523, "grad_norm": 0.3972194194793701, "learning_rate": 0.00012026410533019666, "loss": 1.5474, "step": 30690 }, { "epoch": 0.39881554732206814, "grad_norm": 0.3970952033996582, "learning_rate": 0.00012026150586828527, "loss": 1.3554, "step": 30691 }, { "epoch": 0.39882854186598404, "grad_norm": 0.30305469036102295, "learning_rate": 0.0001202589064063739, "loss": 1.1761, "step": 30692 }, { "epoch": 0.3988415364098999, "grad_norm": 0.34345945715904236, "learning_rate": 0.0001202563069444625, "loss": 1.3842, "step": 30693 }, { "epoch": 0.3988545309538158, "grad_norm": 0.4360566735267639, "learning_rate": 0.00012025370748255112, "loss": 1.4139, "step": 30694 }, { "epoch": 0.39886752549773163, "grad_norm": 0.4208584427833557, "learning_rate": 0.00012025110802063973, "loss": 1.3481, "step": 30695 }, { "epoch": 0.39888052004164753, "grad_norm": 0.3233270049095154, "learning_rate": 0.00012024850855872835, "loss": 1.2849, "step": 30696 }, { "epoch": 0.3988935145855634, "grad_norm": 0.36880871653556824, "learning_rate": 0.00012024590909681696, "loss": 1.1013, "step": 30697 }, { "epoch": 0.3989065091294793, "grad_norm": 0.3352777063846588, "learning_rate": 0.00012024330963490557, "loss": 1.33, "step": 30698 }, { "epoch": 0.3989195036733951, "grad_norm": 0.3982882797718048, "learning_rate": 0.00012024071017299418, "loss": 1.3385, "step": 30699 }, { "epoch": 0.398932498217311, "grad_norm": 0.2909446656703949, "learning_rate": 0.00012023811071108282, "loss": 1.2409, "step": 30700 }, { "epoch": 0.39894549276122687, "grad_norm": 0.4888071119785309, "learning_rate": 0.00012023551124917143, "loss": 1.5196, "step": 30701 }, { "epoch": 0.3989584873051428, "grad_norm": 0.4029799997806549, "learning_rate": 0.00012023291178726004, "loss": 1.4665, "step": 30702 }, { "epoch": 0.3989714818490586, "grad_norm": 0.448395311832428, "learning_rate": 0.00012023031232534865, "loss": 1.3786, "step": 30703 }, { "epoch": 0.3989844763929745, "grad_norm": 0.4449112117290497, "learning_rate": 0.00012022771286343728, "loss": 1.2909, "step": 30704 }, { "epoch": 0.39899747093689036, "grad_norm": 0.42984306812286377, "learning_rate": 0.00012022511340152589, "loss": 1.4295, "step": 30705 }, { "epoch": 0.39901046548080626, "grad_norm": 0.29719048738479614, "learning_rate": 0.0001202225139396145, "loss": 1.2586, "step": 30706 }, { "epoch": 0.3990234600247221, "grad_norm": 0.36108139157295227, "learning_rate": 0.00012021991447770311, "loss": 1.3847, "step": 30707 }, { "epoch": 0.399036454568638, "grad_norm": 0.5173310041427612, "learning_rate": 0.00012021731501579174, "loss": 1.4372, "step": 30708 }, { "epoch": 0.39904944911255386, "grad_norm": 0.5210025906562805, "learning_rate": 0.00012021471555388035, "loss": 1.4764, "step": 30709 }, { "epoch": 0.39906244365646976, "grad_norm": 0.4139043092727661, "learning_rate": 0.00012021211609196896, "loss": 1.3859, "step": 30710 }, { "epoch": 0.3990754382003856, "grad_norm": 0.27381631731987, "learning_rate": 0.0001202095166300576, "loss": 1.5237, "step": 30711 }, { "epoch": 0.3990884327443015, "grad_norm": 0.3617205321788788, "learning_rate": 0.00012020691716814621, "loss": 1.421, "step": 30712 }, { "epoch": 0.39910142728821735, "grad_norm": 0.3965858221054077, "learning_rate": 0.00012020431770623482, "loss": 1.465, "step": 30713 }, { "epoch": 0.39911442183213325, "grad_norm": 0.4448576867580414, "learning_rate": 0.00012020171824432343, "loss": 1.4211, "step": 30714 }, { "epoch": 0.3991274163760491, "grad_norm": 0.46282872557640076, "learning_rate": 0.00012019911878241205, "loss": 1.5665, "step": 30715 }, { "epoch": 0.399140410919965, "grad_norm": 0.4164327383041382, "learning_rate": 0.00012019651932050066, "loss": 1.2611, "step": 30716 }, { "epoch": 0.39915340546388084, "grad_norm": 0.40648898482322693, "learning_rate": 0.00012019391985858928, "loss": 1.2993, "step": 30717 }, { "epoch": 0.39916640000779674, "grad_norm": 0.38534462451934814, "learning_rate": 0.00012019132039667789, "loss": 1.3671, "step": 30718 }, { "epoch": 0.3991793945517126, "grad_norm": 0.3989992141723633, "learning_rate": 0.00012018872093476652, "loss": 1.6946, "step": 30719 }, { "epoch": 0.3991923890956285, "grad_norm": 0.28701189160346985, "learning_rate": 0.00012018612147285514, "loss": 1.0815, "step": 30720 }, { "epoch": 0.39920538363954433, "grad_norm": 0.3366556167602539, "learning_rate": 0.00012018352201094373, "loss": 1.3896, "step": 30721 }, { "epoch": 0.39921837818346023, "grad_norm": 0.3729995787143707, "learning_rate": 0.00012018092254903234, "loss": 1.301, "step": 30722 }, { "epoch": 0.3992313727273761, "grad_norm": 0.37458181381225586, "learning_rate": 0.00012017832308712098, "loss": 1.4842, "step": 30723 }, { "epoch": 0.399244367271292, "grad_norm": 0.3517019748687744, "learning_rate": 0.00012017572362520959, "loss": 1.3655, "step": 30724 }, { "epoch": 0.3992573618152078, "grad_norm": 0.33635079860687256, "learning_rate": 0.0001201731241632982, "loss": 1.2718, "step": 30725 }, { "epoch": 0.3992703563591237, "grad_norm": 0.4275568425655365, "learning_rate": 0.00012017052470138681, "loss": 1.4739, "step": 30726 }, { "epoch": 0.39928335090303957, "grad_norm": 0.39404749870300293, "learning_rate": 0.00012016792523947544, "loss": 1.2442, "step": 30727 }, { "epoch": 0.39929634544695547, "grad_norm": 0.36775586009025574, "learning_rate": 0.00012016532577756405, "loss": 1.4552, "step": 30728 }, { "epoch": 0.3993093399908713, "grad_norm": 0.3561258912086487, "learning_rate": 0.00012016272631565266, "loss": 1.4317, "step": 30729 }, { "epoch": 0.3993223345347872, "grad_norm": 0.395398885011673, "learning_rate": 0.00012016012685374127, "loss": 1.5401, "step": 30730 }, { "epoch": 0.39933532907870306, "grad_norm": 0.4343360960483551, "learning_rate": 0.00012015752739182991, "loss": 1.3702, "step": 30731 }, { "epoch": 0.39934832362261896, "grad_norm": 0.4516986608505249, "learning_rate": 0.00012015492792991852, "loss": 1.5651, "step": 30732 }, { "epoch": 0.3993613181665348, "grad_norm": 0.4154672622680664, "learning_rate": 0.00012015232846800713, "loss": 1.5392, "step": 30733 }, { "epoch": 0.3993743127104507, "grad_norm": 0.39604562520980835, "learning_rate": 0.00012014972900609573, "loss": 1.2259, "step": 30734 }, { "epoch": 0.39938730725436655, "grad_norm": 0.4680011570453644, "learning_rate": 0.00012014712954418437, "loss": 1.4026, "step": 30735 }, { "epoch": 0.39940030179828245, "grad_norm": 0.4952586889266968, "learning_rate": 0.00012014453008227298, "loss": 1.4814, "step": 30736 }, { "epoch": 0.3994132963421983, "grad_norm": 0.39622732996940613, "learning_rate": 0.00012014193062036159, "loss": 1.2221, "step": 30737 }, { "epoch": 0.3994262908861142, "grad_norm": 0.4304730296134949, "learning_rate": 0.0001201393311584502, "loss": 1.2051, "step": 30738 }, { "epoch": 0.39943928543003004, "grad_norm": 0.41334205865859985, "learning_rate": 0.00012013673169653882, "loss": 1.4043, "step": 30739 }, { "epoch": 0.39945227997394595, "grad_norm": 0.509895920753479, "learning_rate": 0.00012013413223462744, "loss": 1.5151, "step": 30740 }, { "epoch": 0.3994652745178618, "grad_norm": 0.5046713352203369, "learning_rate": 0.00012013153277271605, "loss": 1.4272, "step": 30741 }, { "epoch": 0.3994782690617777, "grad_norm": 0.5115154981613159, "learning_rate": 0.00012012893331080466, "loss": 1.5151, "step": 30742 }, { "epoch": 0.39949126360569354, "grad_norm": 0.4074965715408325, "learning_rate": 0.0001201263338488933, "loss": 1.3666, "step": 30743 }, { "epoch": 0.39950425814960944, "grad_norm": 0.45780643820762634, "learning_rate": 0.0001201237343869819, "loss": 1.3173, "step": 30744 }, { "epoch": 0.3995172526935253, "grad_norm": 0.4388483464717865, "learning_rate": 0.00012012113492507052, "loss": 1.425, "step": 30745 }, { "epoch": 0.3995302472374412, "grad_norm": 0.3814353942871094, "learning_rate": 0.00012011853546315911, "loss": 1.5571, "step": 30746 }, { "epoch": 0.39954324178135703, "grad_norm": 0.4243190884590149, "learning_rate": 0.00012011593600124775, "loss": 1.5229, "step": 30747 }, { "epoch": 0.39955623632527293, "grad_norm": 0.39877456426620483, "learning_rate": 0.00012011333653933636, "loss": 1.3513, "step": 30748 }, { "epoch": 0.3995692308691888, "grad_norm": 0.3427993357181549, "learning_rate": 0.00012011073707742497, "loss": 1.4455, "step": 30749 }, { "epoch": 0.3995822254131047, "grad_norm": 0.4350269138813019, "learning_rate": 0.0001201081376155136, "loss": 1.3482, "step": 30750 }, { "epoch": 0.3995952199570205, "grad_norm": 0.4132777750492096, "learning_rate": 0.00012010553815360221, "loss": 1.3185, "step": 30751 }, { "epoch": 0.3996082145009364, "grad_norm": 0.38242480158805847, "learning_rate": 0.00012010293869169082, "loss": 1.3265, "step": 30752 }, { "epoch": 0.39962120904485227, "grad_norm": 0.4838862121105194, "learning_rate": 0.00012010033922977943, "loss": 1.6599, "step": 30753 }, { "epoch": 0.39963420358876817, "grad_norm": 0.3450539708137512, "learning_rate": 0.00012009773976786807, "loss": 1.344, "step": 30754 }, { "epoch": 0.399647198132684, "grad_norm": 0.48688775300979614, "learning_rate": 0.00012009514030595668, "loss": 1.4564, "step": 30755 }, { "epoch": 0.3996601926765999, "grad_norm": 0.39791902899742126, "learning_rate": 0.00012009254084404529, "loss": 1.4225, "step": 30756 }, { "epoch": 0.3996731872205158, "grad_norm": 0.35362982749938965, "learning_rate": 0.0001200899413821339, "loss": 1.3233, "step": 30757 }, { "epoch": 0.39968618176443166, "grad_norm": 0.36608344316482544, "learning_rate": 0.00012008734192022253, "loss": 1.355, "step": 30758 }, { "epoch": 0.39969917630834756, "grad_norm": 0.44153866171836853, "learning_rate": 0.00012008474245831114, "loss": 1.4646, "step": 30759 }, { "epoch": 0.3997121708522634, "grad_norm": 0.4325862526893616, "learning_rate": 0.00012008214299639975, "loss": 1.4314, "step": 30760 }, { "epoch": 0.3997251653961793, "grad_norm": 0.38937950134277344, "learning_rate": 0.00012007954353448836, "loss": 1.3571, "step": 30761 }, { "epoch": 0.39973815994009515, "grad_norm": 0.45320913195610046, "learning_rate": 0.000120076944072577, "loss": 1.703, "step": 30762 }, { "epoch": 0.39975115448401105, "grad_norm": 0.45183277130126953, "learning_rate": 0.0001200743446106656, "loss": 1.4915, "step": 30763 }, { "epoch": 0.3997641490279269, "grad_norm": 0.3765687942504883, "learning_rate": 0.0001200717451487542, "loss": 1.1622, "step": 30764 }, { "epoch": 0.3997771435718428, "grad_norm": 0.3917236328125, "learning_rate": 0.00012006914568684282, "loss": 1.3739, "step": 30765 }, { "epoch": 0.39979013811575864, "grad_norm": 0.36005303263664246, "learning_rate": 0.00012006654622493145, "loss": 1.2984, "step": 30766 }, { "epoch": 0.39980313265967454, "grad_norm": 0.330409973859787, "learning_rate": 0.00012006394676302007, "loss": 1.212, "step": 30767 }, { "epoch": 0.3998161272035904, "grad_norm": 0.44632336497306824, "learning_rate": 0.00012006134730110868, "loss": 1.4048, "step": 30768 }, { "epoch": 0.3998291217475063, "grad_norm": 0.3802608251571655, "learning_rate": 0.00012005874783919729, "loss": 1.3177, "step": 30769 }, { "epoch": 0.39984211629142213, "grad_norm": 0.31766894459724426, "learning_rate": 0.00012005614837728591, "loss": 1.3982, "step": 30770 }, { "epoch": 0.39985511083533803, "grad_norm": 0.3211827576160431, "learning_rate": 0.00012005354891537452, "loss": 1.3492, "step": 30771 }, { "epoch": 0.3998681053792539, "grad_norm": 0.4244026243686676, "learning_rate": 0.00012005094945346313, "loss": 1.3055, "step": 30772 }, { "epoch": 0.3998810999231698, "grad_norm": 0.40082234144210815, "learning_rate": 0.00012004834999155174, "loss": 1.3247, "step": 30773 }, { "epoch": 0.3998940944670856, "grad_norm": 0.46327075362205505, "learning_rate": 0.00012004575052964038, "loss": 1.2136, "step": 30774 }, { "epoch": 0.3999070890110015, "grad_norm": 0.3778066039085388, "learning_rate": 0.000120043151067729, "loss": 1.615, "step": 30775 }, { "epoch": 0.39992008355491737, "grad_norm": 0.43111875653266907, "learning_rate": 0.00012004055160581759, "loss": 1.5014, "step": 30776 }, { "epoch": 0.3999330780988333, "grad_norm": 0.3785284757614136, "learning_rate": 0.0001200379521439062, "loss": 1.2903, "step": 30777 }, { "epoch": 0.3999460726427491, "grad_norm": 0.5251731276512146, "learning_rate": 0.00012003535268199484, "loss": 1.5504, "step": 30778 }, { "epoch": 0.399959067186665, "grad_norm": 0.4645079970359802, "learning_rate": 0.00012003275322008345, "loss": 1.3668, "step": 30779 }, { "epoch": 0.39997206173058086, "grad_norm": 0.34047141671180725, "learning_rate": 0.00012003015375817206, "loss": 1.34, "step": 30780 }, { "epoch": 0.39998505627449676, "grad_norm": 0.36831164360046387, "learning_rate": 0.00012002755429626067, "loss": 1.3671, "step": 30781 }, { "epoch": 0.3999980508184126, "grad_norm": 0.2827679514884949, "learning_rate": 0.0001200249548343493, "loss": 1.1858, "step": 30782 }, { "epoch": 0.4000110453623285, "grad_norm": 0.29537034034729004, "learning_rate": 0.00012002235537243791, "loss": 1.2754, "step": 30783 }, { "epoch": 0.40002403990624436, "grad_norm": 0.3191387355327606, "learning_rate": 0.00012001975591052652, "loss": 1.3112, "step": 30784 }, { "epoch": 0.40003703445016026, "grad_norm": 0.38101980090141296, "learning_rate": 0.00012001715644861516, "loss": 1.5365, "step": 30785 }, { "epoch": 0.4000500289940761, "grad_norm": 0.3570547103881836, "learning_rate": 0.00012001455698670377, "loss": 1.3776, "step": 30786 }, { "epoch": 0.400063023537992, "grad_norm": 0.4522722065448761, "learning_rate": 0.00012001195752479238, "loss": 1.2787, "step": 30787 }, { "epoch": 0.40007601808190785, "grad_norm": 0.44312748312950134, "learning_rate": 0.00012000935806288098, "loss": 1.378, "step": 30788 }, { "epoch": 0.40008901262582375, "grad_norm": 0.4156210720539093, "learning_rate": 0.00012000675860096961, "loss": 1.5378, "step": 30789 }, { "epoch": 0.4001020071697396, "grad_norm": 0.43703708052635193, "learning_rate": 0.00012000415913905823, "loss": 1.3802, "step": 30790 }, { "epoch": 0.4001150017136555, "grad_norm": 0.3735348880290985, "learning_rate": 0.00012000155967714684, "loss": 1.4785, "step": 30791 }, { "epoch": 0.40012799625757134, "grad_norm": 0.41813093423843384, "learning_rate": 0.00011999896021523545, "loss": 1.4512, "step": 30792 }, { "epoch": 0.40014099080148724, "grad_norm": 0.506083071231842, "learning_rate": 0.00011999636075332407, "loss": 1.4751, "step": 30793 }, { "epoch": 0.4001539853454031, "grad_norm": 0.3321952223777771, "learning_rate": 0.00011999376129141268, "loss": 1.4753, "step": 30794 }, { "epoch": 0.400166979889319, "grad_norm": 0.42100703716278076, "learning_rate": 0.0001199911618295013, "loss": 1.3513, "step": 30795 }, { "epoch": 0.40017997443323483, "grad_norm": 0.3634747266769409, "learning_rate": 0.0001199885623675899, "loss": 1.3472, "step": 30796 }, { "epoch": 0.40019296897715073, "grad_norm": 0.4350329637527466, "learning_rate": 0.00011998596290567854, "loss": 1.306, "step": 30797 }, { "epoch": 0.4002059635210666, "grad_norm": 0.4530585706233978, "learning_rate": 0.00011998336344376715, "loss": 1.6067, "step": 30798 }, { "epoch": 0.4002189580649825, "grad_norm": 0.39739182591438293, "learning_rate": 0.00011998076398185576, "loss": 1.4186, "step": 30799 }, { "epoch": 0.4002319526088983, "grad_norm": 0.43387311697006226, "learning_rate": 0.00011997816451994438, "loss": 1.5991, "step": 30800 }, { "epoch": 0.4002449471528142, "grad_norm": 0.35849398374557495, "learning_rate": 0.000119975565058033, "loss": 1.6729, "step": 30801 }, { "epoch": 0.40025794169673007, "grad_norm": 0.36983904242515564, "learning_rate": 0.00011997296559612161, "loss": 1.2913, "step": 30802 }, { "epoch": 0.40027093624064597, "grad_norm": 0.40881189703941345, "learning_rate": 0.00011997036613421022, "loss": 1.3664, "step": 30803 }, { "epoch": 0.4002839307845618, "grad_norm": 0.37786686420440674, "learning_rate": 0.00011996776667229883, "loss": 1.3601, "step": 30804 }, { "epoch": 0.4002969253284777, "grad_norm": 0.3708288371562958, "learning_rate": 0.00011996516721038746, "loss": 1.35, "step": 30805 }, { "epoch": 0.40030991987239356, "grad_norm": 0.38175007700920105, "learning_rate": 0.00011996256774847607, "loss": 1.3863, "step": 30806 }, { "epoch": 0.40032291441630946, "grad_norm": 0.43318769335746765, "learning_rate": 0.00011995996828656468, "loss": 1.508, "step": 30807 }, { "epoch": 0.4003359089602253, "grad_norm": 0.41580840945243835, "learning_rate": 0.00011995736882465329, "loss": 1.1919, "step": 30808 }, { "epoch": 0.4003489035041412, "grad_norm": 0.4472562372684479, "learning_rate": 0.00011995476936274193, "loss": 1.2593, "step": 30809 }, { "epoch": 0.40036189804805705, "grad_norm": 0.3981015384197235, "learning_rate": 0.00011995216990083054, "loss": 1.6482, "step": 30810 }, { "epoch": 0.40037489259197295, "grad_norm": 0.29677554965019226, "learning_rate": 0.00011994957043891915, "loss": 1.1991, "step": 30811 }, { "epoch": 0.4003878871358888, "grad_norm": 0.45702308416366577, "learning_rate": 0.00011994697097700776, "loss": 1.3663, "step": 30812 }, { "epoch": 0.4004008816798047, "grad_norm": 0.40553992986679077, "learning_rate": 0.00011994437151509639, "loss": 1.5633, "step": 30813 }, { "epoch": 0.40041387622372054, "grad_norm": 0.3707307279109955, "learning_rate": 0.000119941772053185, "loss": 1.345, "step": 30814 }, { "epoch": 0.40042687076763644, "grad_norm": 0.4028571546077728, "learning_rate": 0.00011993917259127361, "loss": 1.5261, "step": 30815 }, { "epoch": 0.4004398653115523, "grad_norm": 0.38304877281188965, "learning_rate": 0.00011993657312936222, "loss": 1.3486, "step": 30816 }, { "epoch": 0.4004528598554682, "grad_norm": 0.38557168841362, "learning_rate": 0.00011993397366745086, "loss": 1.342, "step": 30817 }, { "epoch": 0.40046585439938404, "grad_norm": 0.5038419961929321, "learning_rate": 0.00011993137420553945, "loss": 1.6818, "step": 30818 }, { "epoch": 0.40047884894329994, "grad_norm": 0.3253868818283081, "learning_rate": 0.00011992877474362806, "loss": 1.3631, "step": 30819 }, { "epoch": 0.4004918434872158, "grad_norm": 0.42589250206947327, "learning_rate": 0.00011992617528171668, "loss": 1.3477, "step": 30820 }, { "epoch": 0.4005048380311317, "grad_norm": 0.45339396595954895, "learning_rate": 0.00011992357581980531, "loss": 1.5092, "step": 30821 }, { "epoch": 0.4005178325750475, "grad_norm": 0.35119137167930603, "learning_rate": 0.00011992097635789392, "loss": 1.2571, "step": 30822 }, { "epoch": 0.40053082711896343, "grad_norm": 0.42659008502960205, "learning_rate": 0.00011991837689598254, "loss": 1.4706, "step": 30823 }, { "epoch": 0.4005438216628793, "grad_norm": 0.5652179718017578, "learning_rate": 0.00011991577743407116, "loss": 1.4996, "step": 30824 }, { "epoch": 0.4005568162067952, "grad_norm": 0.4107537269592285, "learning_rate": 0.00011991317797215977, "loss": 1.3935, "step": 30825 }, { "epoch": 0.400569810750711, "grad_norm": 0.42623329162597656, "learning_rate": 0.00011991057851024838, "loss": 1.55, "step": 30826 }, { "epoch": 0.4005828052946269, "grad_norm": 0.5213897824287415, "learning_rate": 0.00011990797904833699, "loss": 1.5624, "step": 30827 }, { "epoch": 0.40059579983854277, "grad_norm": 0.37603679299354553, "learning_rate": 0.00011990537958642563, "loss": 1.1814, "step": 30828 }, { "epoch": 0.40060879438245867, "grad_norm": 0.36184534430503845, "learning_rate": 0.00011990278012451424, "loss": 1.1948, "step": 30829 }, { "epoch": 0.4006217889263745, "grad_norm": 0.41600966453552246, "learning_rate": 0.00011990018066260284, "loss": 1.4123, "step": 30830 }, { "epoch": 0.4006347834702904, "grad_norm": 0.4303798973560333, "learning_rate": 0.00011989758120069145, "loss": 1.5242, "step": 30831 }, { "epoch": 0.4006477780142063, "grad_norm": 0.5037251114845276, "learning_rate": 0.00011989498173878009, "loss": 1.5592, "step": 30832 }, { "epoch": 0.40066077255812216, "grad_norm": 0.43412378430366516, "learning_rate": 0.0001198923822768687, "loss": 1.5731, "step": 30833 }, { "epoch": 0.40067376710203806, "grad_norm": 0.38229018449783325, "learning_rate": 0.00011988978281495731, "loss": 1.371, "step": 30834 }, { "epoch": 0.4006867616459539, "grad_norm": 0.370576947927475, "learning_rate": 0.00011988718335304592, "loss": 1.2974, "step": 30835 }, { "epoch": 0.4006997561898698, "grad_norm": 0.3848870098590851, "learning_rate": 0.00011988458389113455, "loss": 1.1385, "step": 30836 }, { "epoch": 0.40071275073378565, "grad_norm": 0.2757657766342163, "learning_rate": 0.00011988198442922316, "loss": 1.2153, "step": 30837 }, { "epoch": 0.40072574527770155, "grad_norm": 0.5632579922676086, "learning_rate": 0.00011987938496731177, "loss": 1.4664, "step": 30838 }, { "epoch": 0.4007387398216174, "grad_norm": 0.303602010011673, "learning_rate": 0.00011987678550540038, "loss": 1.462, "step": 30839 }, { "epoch": 0.4007517343655333, "grad_norm": 0.35670235753059387, "learning_rate": 0.00011987418604348902, "loss": 1.4834, "step": 30840 }, { "epoch": 0.40076472890944914, "grad_norm": 0.41477686166763306, "learning_rate": 0.00011987158658157763, "loss": 1.3831, "step": 30841 }, { "epoch": 0.40077772345336504, "grad_norm": 0.34395071864128113, "learning_rate": 0.00011986898711966624, "loss": 1.268, "step": 30842 }, { "epoch": 0.4007907179972809, "grad_norm": 0.3681591749191284, "learning_rate": 0.00011986638765775484, "loss": 1.2065, "step": 30843 }, { "epoch": 0.4008037125411968, "grad_norm": 0.43257269263267517, "learning_rate": 0.00011986378819584347, "loss": 1.2028, "step": 30844 }, { "epoch": 0.40081670708511263, "grad_norm": 0.41485995054244995, "learning_rate": 0.00011986118873393208, "loss": 1.2752, "step": 30845 }, { "epoch": 0.40082970162902853, "grad_norm": 0.4048994183540344, "learning_rate": 0.0001198585892720207, "loss": 1.5178, "step": 30846 }, { "epoch": 0.4008426961729444, "grad_norm": 0.3581618368625641, "learning_rate": 0.0001198559898101093, "loss": 1.482, "step": 30847 }, { "epoch": 0.4008556907168603, "grad_norm": 0.4165598750114441, "learning_rate": 0.00011985339034819793, "loss": 1.476, "step": 30848 }, { "epoch": 0.4008686852607761, "grad_norm": 0.3122071921825409, "learning_rate": 0.00011985079088628654, "loss": 1.2657, "step": 30849 }, { "epoch": 0.400881679804692, "grad_norm": 0.4559158980846405, "learning_rate": 0.00011984819142437515, "loss": 1.5182, "step": 30850 }, { "epoch": 0.40089467434860787, "grad_norm": 0.42092403769493103, "learning_rate": 0.00011984559196246376, "loss": 1.4914, "step": 30851 }, { "epoch": 0.40090766889252377, "grad_norm": 0.35695913434028625, "learning_rate": 0.0001198429925005524, "loss": 1.5658, "step": 30852 }, { "epoch": 0.4009206634364396, "grad_norm": 0.4209710955619812, "learning_rate": 0.00011984039303864101, "loss": 1.4249, "step": 30853 }, { "epoch": 0.4009336579803555, "grad_norm": 0.3818749487400055, "learning_rate": 0.00011983779357672962, "loss": 1.3164, "step": 30854 }, { "epoch": 0.40094665252427136, "grad_norm": 0.5126658082008362, "learning_rate": 0.00011983519411481823, "loss": 1.2886, "step": 30855 }, { "epoch": 0.40095964706818726, "grad_norm": 0.5513366460800171, "learning_rate": 0.00011983259465290686, "loss": 1.3874, "step": 30856 }, { "epoch": 0.4009726416121031, "grad_norm": 0.32973718643188477, "learning_rate": 0.00011982999519099547, "loss": 1.237, "step": 30857 }, { "epoch": 0.400985636156019, "grad_norm": 0.38109949231147766, "learning_rate": 0.00011982739572908408, "loss": 1.1659, "step": 30858 }, { "epoch": 0.40099863069993485, "grad_norm": 0.37945684790611267, "learning_rate": 0.00011982479626717272, "loss": 1.3481, "step": 30859 }, { "epoch": 0.40101162524385076, "grad_norm": 0.3536010682582855, "learning_rate": 0.00011982219680526132, "loss": 1.3725, "step": 30860 }, { "epoch": 0.4010246197877666, "grad_norm": 0.3550262153148651, "learning_rate": 0.00011981959734334993, "loss": 1.3076, "step": 30861 }, { "epoch": 0.4010376143316825, "grad_norm": 0.42360740900039673, "learning_rate": 0.00011981699788143854, "loss": 1.5057, "step": 30862 }, { "epoch": 0.40105060887559835, "grad_norm": 0.42317911982536316, "learning_rate": 0.00011981439841952718, "loss": 1.5379, "step": 30863 }, { "epoch": 0.40106360341951425, "grad_norm": 0.4288017749786377, "learning_rate": 0.00011981179895761579, "loss": 1.4198, "step": 30864 }, { "epoch": 0.4010765979634301, "grad_norm": 0.3426763117313385, "learning_rate": 0.0001198091994957044, "loss": 1.4674, "step": 30865 }, { "epoch": 0.401089592507346, "grad_norm": 0.3653353154659271, "learning_rate": 0.00011980660003379301, "loss": 1.5023, "step": 30866 }, { "epoch": 0.40110258705126184, "grad_norm": 0.37957748770713806, "learning_rate": 0.00011980400057188163, "loss": 1.3676, "step": 30867 }, { "epoch": 0.40111558159517774, "grad_norm": 0.38366758823394775, "learning_rate": 0.00011980140110997024, "loss": 1.2874, "step": 30868 }, { "epoch": 0.4011285761390936, "grad_norm": 0.3444020748138428, "learning_rate": 0.00011979880164805886, "loss": 1.2087, "step": 30869 }, { "epoch": 0.4011415706830095, "grad_norm": 0.3868485689163208, "learning_rate": 0.00011979620218614747, "loss": 1.3455, "step": 30870 }, { "epoch": 0.40115456522692533, "grad_norm": 0.38688522577285767, "learning_rate": 0.0001197936027242361, "loss": 1.26, "step": 30871 }, { "epoch": 0.40116755977084123, "grad_norm": 0.4591645300388336, "learning_rate": 0.0001197910032623247, "loss": 1.4208, "step": 30872 }, { "epoch": 0.4011805543147571, "grad_norm": 0.34246429800987244, "learning_rate": 0.00011978840380041331, "loss": 1.1288, "step": 30873 }, { "epoch": 0.401193548858673, "grad_norm": 0.4164416193962097, "learning_rate": 0.00011978580433850192, "loss": 1.3145, "step": 30874 }, { "epoch": 0.4012065434025888, "grad_norm": 0.43146395683288574, "learning_rate": 0.00011978320487659056, "loss": 1.3508, "step": 30875 }, { "epoch": 0.4012195379465047, "grad_norm": 0.38753631711006165, "learning_rate": 0.00011978060541467917, "loss": 1.3951, "step": 30876 }, { "epoch": 0.40123253249042057, "grad_norm": 0.43959835171699524, "learning_rate": 0.00011977800595276778, "loss": 1.3638, "step": 30877 }, { "epoch": 0.40124552703433647, "grad_norm": 0.4973922669887543, "learning_rate": 0.0001197754064908564, "loss": 1.3873, "step": 30878 }, { "epoch": 0.4012585215782523, "grad_norm": 0.41170361638069153, "learning_rate": 0.00011977280702894502, "loss": 1.4862, "step": 30879 }, { "epoch": 0.4012715161221682, "grad_norm": 0.422812819480896, "learning_rate": 0.00011977020756703363, "loss": 1.2926, "step": 30880 }, { "epoch": 0.40128451066608406, "grad_norm": 0.4882308542728424, "learning_rate": 0.00011976760810512224, "loss": 1.4544, "step": 30881 }, { "epoch": 0.40129750520999996, "grad_norm": 0.3994767665863037, "learning_rate": 0.00011976500864321085, "loss": 1.3482, "step": 30882 }, { "epoch": 0.4013104997539158, "grad_norm": 0.46767398715019226, "learning_rate": 0.00011976240918129949, "loss": 1.3036, "step": 30883 }, { "epoch": 0.4013234942978317, "grad_norm": 0.42908281087875366, "learning_rate": 0.0001197598097193881, "loss": 1.4414, "step": 30884 }, { "epoch": 0.40133648884174755, "grad_norm": 0.42466217279434204, "learning_rate": 0.0001197572102574767, "loss": 1.5046, "step": 30885 }, { "epoch": 0.40134948338566345, "grad_norm": 0.3394346535205841, "learning_rate": 0.00011975461079556531, "loss": 1.4717, "step": 30886 }, { "epoch": 0.4013624779295793, "grad_norm": 0.3789116442203522, "learning_rate": 0.00011975201133365395, "loss": 1.2896, "step": 30887 }, { "epoch": 0.4013754724734952, "grad_norm": 0.40706583857536316, "learning_rate": 0.00011974941187174256, "loss": 1.5988, "step": 30888 }, { "epoch": 0.40138846701741104, "grad_norm": 0.43128660321235657, "learning_rate": 0.00011974681240983117, "loss": 1.6382, "step": 30889 }, { "epoch": 0.40140146156132694, "grad_norm": 0.27285197377204895, "learning_rate": 0.00011974421294791978, "loss": 1.6409, "step": 30890 }, { "epoch": 0.4014144561052428, "grad_norm": 0.48049065470695496, "learning_rate": 0.0001197416134860084, "loss": 1.3976, "step": 30891 }, { "epoch": 0.4014274506491587, "grad_norm": 0.30589044094085693, "learning_rate": 0.00011973901402409701, "loss": 1.2662, "step": 30892 }, { "epoch": 0.40144044519307454, "grad_norm": 0.4326670467853546, "learning_rate": 0.00011973641456218563, "loss": 1.3781, "step": 30893 }, { "epoch": 0.40145343973699044, "grad_norm": 0.4140307307243347, "learning_rate": 0.00011973381510027424, "loss": 1.2867, "step": 30894 }, { "epoch": 0.4014664342809063, "grad_norm": 0.37885743379592896, "learning_rate": 0.00011973121563836287, "loss": 1.533, "step": 30895 }, { "epoch": 0.4014794288248222, "grad_norm": 0.40460264682769775, "learning_rate": 0.00011972861617645149, "loss": 1.3126, "step": 30896 }, { "epoch": 0.401492423368738, "grad_norm": 0.29098427295684814, "learning_rate": 0.0001197260167145401, "loss": 1.1961, "step": 30897 }, { "epoch": 0.40150541791265393, "grad_norm": 0.37326356768608093, "learning_rate": 0.00011972341725262872, "loss": 1.3055, "step": 30898 }, { "epoch": 0.4015184124565698, "grad_norm": 0.349769651889801, "learning_rate": 0.00011972081779071733, "loss": 1.4681, "step": 30899 }, { "epoch": 0.4015314070004857, "grad_norm": 0.41014358401298523, "learning_rate": 0.00011971821832880594, "loss": 1.5628, "step": 30900 }, { "epoch": 0.4015444015444015, "grad_norm": 0.5560110211372375, "learning_rate": 0.00011971561886689455, "loss": 1.3391, "step": 30901 }, { "epoch": 0.4015573960883174, "grad_norm": 0.47671815752983093, "learning_rate": 0.00011971301940498318, "loss": 1.477, "step": 30902 }, { "epoch": 0.40157039063223327, "grad_norm": 0.4210048317909241, "learning_rate": 0.00011971041994307179, "loss": 1.3543, "step": 30903 }, { "epoch": 0.40158338517614917, "grad_norm": 0.4483545422554016, "learning_rate": 0.0001197078204811604, "loss": 1.6265, "step": 30904 }, { "epoch": 0.401596379720065, "grad_norm": 0.3446093499660492, "learning_rate": 0.00011970522101924901, "loss": 1.4344, "step": 30905 }, { "epoch": 0.4016093742639809, "grad_norm": 0.27788135409355164, "learning_rate": 0.00011970262155733765, "loss": 1.4237, "step": 30906 }, { "epoch": 0.40162236880789676, "grad_norm": 0.5458555817604065, "learning_rate": 0.00011970002209542626, "loss": 1.4991, "step": 30907 }, { "epoch": 0.40163536335181266, "grad_norm": 0.39793604612350464, "learning_rate": 0.00011969742263351487, "loss": 1.4515, "step": 30908 }, { "epoch": 0.40164835789572856, "grad_norm": 0.4374416470527649, "learning_rate": 0.00011969482317160348, "loss": 1.4077, "step": 30909 }, { "epoch": 0.4016613524396444, "grad_norm": 0.3765833079814911, "learning_rate": 0.0001196922237096921, "loss": 1.4179, "step": 30910 }, { "epoch": 0.4016743469835603, "grad_norm": 0.41321155428886414, "learning_rate": 0.00011968962424778072, "loss": 1.3748, "step": 30911 }, { "epoch": 0.40168734152747615, "grad_norm": 0.33002957701683044, "learning_rate": 0.00011968702478586933, "loss": 1.2833, "step": 30912 }, { "epoch": 0.40170033607139205, "grad_norm": 0.36609795689582825, "learning_rate": 0.00011968442532395794, "loss": 1.4742, "step": 30913 }, { "epoch": 0.4017133306153079, "grad_norm": 0.5107268691062927, "learning_rate": 0.00011968182586204656, "loss": 1.4252, "step": 30914 }, { "epoch": 0.4017263251592238, "grad_norm": 0.42915090918540955, "learning_rate": 0.00011967922640013517, "loss": 1.4848, "step": 30915 }, { "epoch": 0.40173931970313964, "grad_norm": 0.34450864791870117, "learning_rate": 0.00011967662693822379, "loss": 1.4102, "step": 30916 }, { "epoch": 0.40175231424705554, "grad_norm": 0.5170263648033142, "learning_rate": 0.0001196740274763124, "loss": 1.5575, "step": 30917 }, { "epoch": 0.4017653087909714, "grad_norm": 0.39502233266830444, "learning_rate": 0.00011967142801440103, "loss": 1.3843, "step": 30918 }, { "epoch": 0.4017783033348873, "grad_norm": 0.24875958263874054, "learning_rate": 0.00011966882855248965, "loss": 1.3372, "step": 30919 }, { "epoch": 0.40179129787880313, "grad_norm": 0.321890652179718, "learning_rate": 0.00011966622909057826, "loss": 1.6299, "step": 30920 }, { "epoch": 0.40180429242271903, "grad_norm": 0.40718942880630493, "learning_rate": 0.00011966362962866687, "loss": 1.4659, "step": 30921 }, { "epoch": 0.4018172869666349, "grad_norm": 0.39859914779663086, "learning_rate": 0.00011966103016675549, "loss": 1.4074, "step": 30922 }, { "epoch": 0.4018302815105508, "grad_norm": 0.3389623463153839, "learning_rate": 0.0001196584307048441, "loss": 1.3038, "step": 30923 }, { "epoch": 0.4018432760544666, "grad_norm": 0.3802888095378876, "learning_rate": 0.00011965583124293271, "loss": 1.4235, "step": 30924 }, { "epoch": 0.4018562705983825, "grad_norm": 0.2866087853908539, "learning_rate": 0.00011965323178102132, "loss": 1.3044, "step": 30925 }, { "epoch": 0.40186926514229837, "grad_norm": 0.325508713722229, "learning_rate": 0.00011965063231910996, "loss": 1.425, "step": 30926 }, { "epoch": 0.40188225968621427, "grad_norm": 0.34628865122795105, "learning_rate": 0.00011964803285719856, "loss": 1.4094, "step": 30927 }, { "epoch": 0.4018952542301301, "grad_norm": 0.4367072582244873, "learning_rate": 0.00011964543339528717, "loss": 1.443, "step": 30928 }, { "epoch": 0.401908248774046, "grad_norm": 0.3642234206199646, "learning_rate": 0.00011964283393337578, "loss": 1.2116, "step": 30929 }, { "epoch": 0.40192124331796186, "grad_norm": 0.5267978310585022, "learning_rate": 0.00011964023447146442, "loss": 1.5136, "step": 30930 }, { "epoch": 0.40193423786187776, "grad_norm": 0.33697745203971863, "learning_rate": 0.00011963763500955303, "loss": 1.4535, "step": 30931 }, { "epoch": 0.4019472324057936, "grad_norm": 0.3666036128997803, "learning_rate": 0.00011963503554764164, "loss": 1.2289, "step": 30932 }, { "epoch": 0.4019602269497095, "grad_norm": 0.4058707356452942, "learning_rate": 0.00011963243608573027, "loss": 1.7209, "step": 30933 }, { "epoch": 0.40197322149362535, "grad_norm": 0.331142395734787, "learning_rate": 0.00011962983662381888, "loss": 1.233, "step": 30934 }, { "epoch": 0.40198621603754126, "grad_norm": 0.4929024279117584, "learning_rate": 0.00011962723716190749, "loss": 1.5457, "step": 30935 }, { "epoch": 0.4019992105814571, "grad_norm": 0.3453991115093231, "learning_rate": 0.0001196246376999961, "loss": 1.2923, "step": 30936 }, { "epoch": 0.402012205125373, "grad_norm": 0.3793419897556305, "learning_rate": 0.00011962203823808474, "loss": 1.3573, "step": 30937 }, { "epoch": 0.40202519966928885, "grad_norm": 0.37099671363830566, "learning_rate": 0.00011961943877617335, "loss": 1.4699, "step": 30938 }, { "epoch": 0.40203819421320475, "grad_norm": 0.3517524003982544, "learning_rate": 0.00011961683931426196, "loss": 1.3488, "step": 30939 }, { "epoch": 0.4020511887571206, "grad_norm": 0.4371648132801056, "learning_rate": 0.00011961423985235056, "loss": 1.339, "step": 30940 }, { "epoch": 0.4020641833010365, "grad_norm": 0.41290485858917236, "learning_rate": 0.0001196116403904392, "loss": 1.4669, "step": 30941 }, { "epoch": 0.40207717784495234, "grad_norm": 0.5190026164054871, "learning_rate": 0.0001196090409285278, "loss": 1.4955, "step": 30942 }, { "epoch": 0.40209017238886824, "grad_norm": 0.3623102009296417, "learning_rate": 0.00011960644146661642, "loss": 1.4592, "step": 30943 }, { "epoch": 0.4021031669327841, "grad_norm": 0.2695774734020233, "learning_rate": 0.00011960384200470503, "loss": 1.3351, "step": 30944 }, { "epoch": 0.4021161614767, "grad_norm": 0.40172308683395386, "learning_rate": 0.00011960124254279365, "loss": 1.477, "step": 30945 }, { "epoch": 0.40212915602061583, "grad_norm": 0.4356275498867035, "learning_rate": 0.00011959864308088226, "loss": 1.4529, "step": 30946 }, { "epoch": 0.40214215056453173, "grad_norm": 0.4433925747871399, "learning_rate": 0.00011959604361897087, "loss": 1.314, "step": 30947 }, { "epoch": 0.4021551451084476, "grad_norm": 0.4564923048019409, "learning_rate": 0.00011959344415705948, "loss": 1.3194, "step": 30948 }, { "epoch": 0.4021681396523635, "grad_norm": 0.38128161430358887, "learning_rate": 0.00011959084469514812, "loss": 1.3865, "step": 30949 }, { "epoch": 0.4021811341962793, "grad_norm": 0.48772263526916504, "learning_rate": 0.00011958824523323673, "loss": 1.474, "step": 30950 }, { "epoch": 0.4021941287401952, "grad_norm": 0.49761760234832764, "learning_rate": 0.00011958564577132534, "loss": 1.3363, "step": 30951 }, { "epoch": 0.40220712328411107, "grad_norm": 0.5154112577438354, "learning_rate": 0.00011958304630941394, "loss": 1.4275, "step": 30952 }, { "epoch": 0.40222011782802697, "grad_norm": 0.25657325983047485, "learning_rate": 0.00011958044684750258, "loss": 1.1663, "step": 30953 }, { "epoch": 0.4022331123719428, "grad_norm": 0.29696914553642273, "learning_rate": 0.00011957784738559119, "loss": 1.0954, "step": 30954 }, { "epoch": 0.4022461069158587, "grad_norm": 0.4007854759693146, "learning_rate": 0.0001195752479236798, "loss": 1.5016, "step": 30955 }, { "epoch": 0.40225910145977456, "grad_norm": 0.4521598815917969, "learning_rate": 0.00011957264846176841, "loss": 1.3747, "step": 30956 }, { "epoch": 0.40227209600369046, "grad_norm": 0.4260927438735962, "learning_rate": 0.00011957004899985704, "loss": 1.4452, "step": 30957 }, { "epoch": 0.4022850905476063, "grad_norm": 0.374565064907074, "learning_rate": 0.00011956744953794565, "loss": 1.2725, "step": 30958 }, { "epoch": 0.4022980850915222, "grad_norm": 0.3917993903160095, "learning_rate": 0.00011956485007603426, "loss": 1.5097, "step": 30959 }, { "epoch": 0.40231107963543805, "grad_norm": 0.33806294202804565, "learning_rate": 0.00011956225061412287, "loss": 1.2169, "step": 30960 }, { "epoch": 0.40232407417935395, "grad_norm": 0.42782077193260193, "learning_rate": 0.00011955965115221151, "loss": 1.4093, "step": 30961 }, { "epoch": 0.4023370687232698, "grad_norm": 0.4071703851222992, "learning_rate": 0.00011955705169030012, "loss": 1.3272, "step": 30962 }, { "epoch": 0.4023500632671857, "grad_norm": 0.3569739758968353, "learning_rate": 0.00011955445222838873, "loss": 1.2467, "step": 30963 }, { "epoch": 0.40236305781110154, "grad_norm": 0.42998987436294556, "learning_rate": 0.00011955185276647734, "loss": 1.422, "step": 30964 }, { "epoch": 0.40237605235501744, "grad_norm": 0.35882484912872314, "learning_rate": 0.00011954925330456597, "loss": 1.4329, "step": 30965 }, { "epoch": 0.4023890468989333, "grad_norm": 0.48883292078971863, "learning_rate": 0.00011954665384265458, "loss": 1.4236, "step": 30966 }, { "epoch": 0.4024020414428492, "grad_norm": 0.32677263021469116, "learning_rate": 0.00011954405438074319, "loss": 1.2707, "step": 30967 }, { "epoch": 0.40241503598676504, "grad_norm": 0.47621282935142517, "learning_rate": 0.0001195414549188318, "loss": 1.45, "step": 30968 }, { "epoch": 0.40242803053068094, "grad_norm": 0.4009459614753723, "learning_rate": 0.00011953885545692042, "loss": 1.3891, "step": 30969 }, { "epoch": 0.4024410250745968, "grad_norm": 0.3691713809967041, "learning_rate": 0.00011953625599500903, "loss": 1.4875, "step": 30970 }, { "epoch": 0.4024540196185127, "grad_norm": 0.3944145739078522, "learning_rate": 0.00011953365653309764, "loss": 1.4293, "step": 30971 }, { "epoch": 0.4024670141624285, "grad_norm": 0.36506539583206177, "learning_rate": 0.00011953105707118628, "loss": 1.4355, "step": 30972 }, { "epoch": 0.4024800087063444, "grad_norm": 0.30306610465049744, "learning_rate": 0.0001195284576092749, "loss": 1.3112, "step": 30973 }, { "epoch": 0.4024930032502603, "grad_norm": 0.4144796133041382, "learning_rate": 0.0001195258581473635, "loss": 1.403, "step": 30974 }, { "epoch": 0.4025059977941762, "grad_norm": 0.4814968407154083, "learning_rate": 0.00011952325868545212, "loss": 1.2996, "step": 30975 }, { "epoch": 0.402518992338092, "grad_norm": 0.46252134442329407, "learning_rate": 0.00011952065922354074, "loss": 1.5327, "step": 30976 }, { "epoch": 0.4025319868820079, "grad_norm": 0.3180639445781708, "learning_rate": 0.00011951805976162935, "loss": 1.3248, "step": 30977 }, { "epoch": 0.40254498142592376, "grad_norm": 0.3820574879646301, "learning_rate": 0.00011951546029971796, "loss": 1.2548, "step": 30978 }, { "epoch": 0.40255797596983967, "grad_norm": 0.43728670477867126, "learning_rate": 0.00011951286083780657, "loss": 1.3186, "step": 30979 }, { "epoch": 0.4025709705137555, "grad_norm": 0.3367365002632141, "learning_rate": 0.00011951026137589521, "loss": 1.2738, "step": 30980 }, { "epoch": 0.4025839650576714, "grad_norm": 0.4072990119457245, "learning_rate": 0.00011950766191398382, "loss": 1.3514, "step": 30981 }, { "epoch": 0.40259695960158726, "grad_norm": 0.3772618770599365, "learning_rate": 0.00011950506245207242, "loss": 1.3191, "step": 30982 }, { "epoch": 0.40260995414550316, "grad_norm": 0.41574108600616455, "learning_rate": 0.00011950246299016103, "loss": 1.3599, "step": 30983 }, { "epoch": 0.40262294868941906, "grad_norm": 0.4149223566055298, "learning_rate": 0.00011949986352824967, "loss": 1.3292, "step": 30984 }, { "epoch": 0.4026359432333349, "grad_norm": 0.28939738869667053, "learning_rate": 0.00011949726406633828, "loss": 1.0865, "step": 30985 }, { "epoch": 0.4026489377772508, "grad_norm": 0.4369574189186096, "learning_rate": 0.00011949466460442689, "loss": 1.3737, "step": 30986 }, { "epoch": 0.40266193232116665, "grad_norm": 0.43800514936447144, "learning_rate": 0.0001194920651425155, "loss": 1.7039, "step": 30987 }, { "epoch": 0.40267492686508255, "grad_norm": 0.4189130365848541, "learning_rate": 0.00011948946568060413, "loss": 1.4047, "step": 30988 }, { "epoch": 0.4026879214089984, "grad_norm": 0.3541613519191742, "learning_rate": 0.00011948686621869274, "loss": 1.3784, "step": 30989 }, { "epoch": 0.4027009159529143, "grad_norm": 0.4362553656101227, "learning_rate": 0.00011948426675678135, "loss": 1.4625, "step": 30990 }, { "epoch": 0.40271391049683014, "grad_norm": 0.3711378872394562, "learning_rate": 0.00011948166729486996, "loss": 1.3243, "step": 30991 }, { "epoch": 0.40272690504074604, "grad_norm": 0.40367165207862854, "learning_rate": 0.0001194790678329586, "loss": 1.364, "step": 30992 }, { "epoch": 0.4027398995846619, "grad_norm": 0.2890492379665375, "learning_rate": 0.00011947646837104721, "loss": 1.4631, "step": 30993 }, { "epoch": 0.4027528941285778, "grad_norm": 0.4290468692779541, "learning_rate": 0.0001194738689091358, "loss": 1.6929, "step": 30994 }, { "epoch": 0.40276588867249363, "grad_norm": 0.5350620746612549, "learning_rate": 0.00011947126944722442, "loss": 1.4708, "step": 30995 }, { "epoch": 0.40277888321640953, "grad_norm": 0.4404739439487457, "learning_rate": 0.00011946866998531305, "loss": 1.4449, "step": 30996 }, { "epoch": 0.4027918777603254, "grad_norm": 0.43121930956840515, "learning_rate": 0.00011946607052340166, "loss": 1.5191, "step": 30997 }, { "epoch": 0.4028048723042413, "grad_norm": 0.3534773886203766, "learning_rate": 0.00011946347106149028, "loss": 1.4113, "step": 30998 }, { "epoch": 0.4028178668481571, "grad_norm": 0.3682509958744049, "learning_rate": 0.00011946087159957889, "loss": 1.3675, "step": 30999 }, { "epoch": 0.402830861392073, "grad_norm": 0.49179255962371826, "learning_rate": 0.00011945827213766751, "loss": 1.3383, "step": 31000 }, { "epoch": 0.40284385593598887, "grad_norm": 0.3741339445114136, "learning_rate": 0.00011945567267575612, "loss": 1.3002, "step": 31001 }, { "epoch": 0.40285685047990477, "grad_norm": 0.40714019536972046, "learning_rate": 0.00011945307321384473, "loss": 1.4236, "step": 31002 }, { "epoch": 0.4028698450238206, "grad_norm": 0.4596451222896576, "learning_rate": 0.00011945047375193334, "loss": 1.4803, "step": 31003 }, { "epoch": 0.4028828395677365, "grad_norm": 0.37432006001472473, "learning_rate": 0.00011944787429002198, "loss": 1.2799, "step": 31004 }, { "epoch": 0.40289583411165236, "grad_norm": 0.33053556084632874, "learning_rate": 0.00011944527482811059, "loss": 1.3286, "step": 31005 }, { "epoch": 0.40290882865556826, "grad_norm": 0.43211057782173157, "learning_rate": 0.0001194426753661992, "loss": 1.5037, "step": 31006 }, { "epoch": 0.4029218231994841, "grad_norm": 0.41410520672798157, "learning_rate": 0.00011944007590428783, "loss": 1.3322, "step": 31007 }, { "epoch": 0.4029348177434, "grad_norm": 0.3912518620491028, "learning_rate": 0.00011943747644237644, "loss": 1.3655, "step": 31008 }, { "epoch": 0.40294781228731585, "grad_norm": 0.43086865544319153, "learning_rate": 0.00011943487698046505, "loss": 1.3079, "step": 31009 }, { "epoch": 0.40296080683123175, "grad_norm": 0.40380388498306274, "learning_rate": 0.00011943227751855366, "loss": 1.3576, "step": 31010 }, { "epoch": 0.4029738013751476, "grad_norm": 0.4473000466823578, "learning_rate": 0.00011942967805664228, "loss": 1.4773, "step": 31011 }, { "epoch": 0.4029867959190635, "grad_norm": 0.3912469744682312, "learning_rate": 0.0001194270785947309, "loss": 1.3301, "step": 31012 }, { "epoch": 0.40299979046297935, "grad_norm": 0.3911738097667694, "learning_rate": 0.00011942447913281951, "loss": 1.3142, "step": 31013 }, { "epoch": 0.40301278500689525, "grad_norm": 0.5078325271606445, "learning_rate": 0.00011942187967090812, "loss": 1.3698, "step": 31014 }, { "epoch": 0.4030257795508111, "grad_norm": 0.45078396797180176, "learning_rate": 0.00011941928020899676, "loss": 1.4535, "step": 31015 }, { "epoch": 0.403038774094727, "grad_norm": 0.3845955431461334, "learning_rate": 0.00011941668074708537, "loss": 1.3843, "step": 31016 }, { "epoch": 0.40305176863864284, "grad_norm": 0.42612841725349426, "learning_rate": 0.00011941408128517398, "loss": 1.4616, "step": 31017 }, { "epoch": 0.40306476318255874, "grad_norm": 0.3638926148414612, "learning_rate": 0.00011941148182326259, "loss": 1.4092, "step": 31018 }, { "epoch": 0.4030777577264746, "grad_norm": 0.39457377791404724, "learning_rate": 0.00011940888236135121, "loss": 1.4205, "step": 31019 }, { "epoch": 0.4030907522703905, "grad_norm": 0.3126545548439026, "learning_rate": 0.00011940628289943982, "loss": 1.0962, "step": 31020 }, { "epoch": 0.40310374681430633, "grad_norm": 0.35651418566703796, "learning_rate": 0.00011940368343752843, "loss": 1.383, "step": 31021 }, { "epoch": 0.40311674135822223, "grad_norm": 0.5040359497070312, "learning_rate": 0.00011940108397561705, "loss": 1.4347, "step": 31022 }, { "epoch": 0.4031297359021381, "grad_norm": 0.45616036653518677, "learning_rate": 0.00011939848451370568, "loss": 1.4728, "step": 31023 }, { "epoch": 0.403142730446054, "grad_norm": 0.3578883707523346, "learning_rate": 0.00011939588505179428, "loss": 1.2405, "step": 31024 }, { "epoch": 0.4031557249899698, "grad_norm": 0.3503490388393402, "learning_rate": 0.00011939328558988289, "loss": 1.455, "step": 31025 }, { "epoch": 0.4031687195338857, "grad_norm": 0.336767315864563, "learning_rate": 0.0001193906861279715, "loss": 1.4416, "step": 31026 }, { "epoch": 0.40318171407780157, "grad_norm": 0.446418434381485, "learning_rate": 0.00011938808666606014, "loss": 1.5306, "step": 31027 }, { "epoch": 0.40319470862171747, "grad_norm": 0.42929142713546753, "learning_rate": 0.00011938548720414875, "loss": 1.2, "step": 31028 }, { "epoch": 0.4032077031656333, "grad_norm": 0.42894983291625977, "learning_rate": 0.00011938288774223736, "loss": 1.4694, "step": 31029 }, { "epoch": 0.4032206977095492, "grad_norm": 0.31042197346687317, "learning_rate": 0.00011938028828032597, "loss": 1.4006, "step": 31030 }, { "epoch": 0.40323369225346506, "grad_norm": 0.4395008087158203, "learning_rate": 0.0001193776888184146, "loss": 1.4823, "step": 31031 }, { "epoch": 0.40324668679738096, "grad_norm": 0.46104106307029724, "learning_rate": 0.00011937508935650321, "loss": 1.5517, "step": 31032 }, { "epoch": 0.4032596813412968, "grad_norm": 0.2806963324546814, "learning_rate": 0.00011937248989459182, "loss": 1.242, "step": 31033 }, { "epoch": 0.4032726758852127, "grad_norm": 0.3751618564128876, "learning_rate": 0.00011936989043268043, "loss": 1.4648, "step": 31034 }, { "epoch": 0.40328567042912855, "grad_norm": 0.4882134795188904, "learning_rate": 0.00011936729097076907, "loss": 1.3877, "step": 31035 }, { "epoch": 0.40329866497304445, "grad_norm": 0.34530094265937805, "learning_rate": 0.00011936469150885767, "loss": 1.2336, "step": 31036 }, { "epoch": 0.4033116595169603, "grad_norm": 0.4550773501396179, "learning_rate": 0.00011936209204694628, "loss": 1.2656, "step": 31037 }, { "epoch": 0.4033246540608762, "grad_norm": 0.41220822930336, "learning_rate": 0.00011935949258503489, "loss": 1.21, "step": 31038 }, { "epoch": 0.40333764860479204, "grad_norm": 0.44122928380966187, "learning_rate": 0.00011935689312312353, "loss": 1.5191, "step": 31039 }, { "epoch": 0.40335064314870794, "grad_norm": 0.4742184281349182, "learning_rate": 0.00011935429366121214, "loss": 1.574, "step": 31040 }, { "epoch": 0.4033636376926238, "grad_norm": 0.29416942596435547, "learning_rate": 0.00011935169419930075, "loss": 1.2699, "step": 31041 }, { "epoch": 0.4033766322365397, "grad_norm": 0.5073818564414978, "learning_rate": 0.00011934909473738936, "loss": 1.5844, "step": 31042 }, { "epoch": 0.40338962678045553, "grad_norm": 0.5408205986022949, "learning_rate": 0.00011934649527547798, "loss": 1.5044, "step": 31043 }, { "epoch": 0.40340262132437144, "grad_norm": 0.41733425855636597, "learning_rate": 0.0001193438958135666, "loss": 1.6471, "step": 31044 }, { "epoch": 0.4034156158682873, "grad_norm": 0.4319666922092438, "learning_rate": 0.0001193412963516552, "loss": 1.4255, "step": 31045 }, { "epoch": 0.4034286104122032, "grad_norm": 0.4576224088668823, "learning_rate": 0.00011933869688974384, "loss": 1.349, "step": 31046 }, { "epoch": 0.403441604956119, "grad_norm": 0.44393396377563477, "learning_rate": 0.00011933609742783245, "loss": 1.1948, "step": 31047 }, { "epoch": 0.4034545995000349, "grad_norm": 0.5083323121070862, "learning_rate": 0.00011933349796592107, "loss": 1.5725, "step": 31048 }, { "epoch": 0.4034675940439508, "grad_norm": 0.4106099605560303, "learning_rate": 0.00011933089850400966, "loss": 1.3906, "step": 31049 }, { "epoch": 0.4034805885878667, "grad_norm": 0.3360599875450134, "learning_rate": 0.0001193282990420983, "loss": 1.2085, "step": 31050 }, { "epoch": 0.4034935831317825, "grad_norm": 0.37186679244041443, "learning_rate": 0.00011932569958018691, "loss": 1.4006, "step": 31051 }, { "epoch": 0.4035065776756984, "grad_norm": 0.3861527144908905, "learning_rate": 0.00011932310011827552, "loss": 1.4841, "step": 31052 }, { "epoch": 0.40351957221961426, "grad_norm": 0.30264320969581604, "learning_rate": 0.00011932050065636413, "loss": 1.1926, "step": 31053 }, { "epoch": 0.40353256676353016, "grad_norm": 0.38274046778678894, "learning_rate": 0.00011931790119445276, "loss": 1.4578, "step": 31054 }, { "epoch": 0.403545561307446, "grad_norm": 0.2625962197780609, "learning_rate": 0.00011931530173254137, "loss": 1.2895, "step": 31055 }, { "epoch": 0.4035585558513619, "grad_norm": 0.4568151533603668, "learning_rate": 0.00011931270227062998, "loss": 1.3637, "step": 31056 }, { "epoch": 0.40357155039527776, "grad_norm": 0.26751509308815, "learning_rate": 0.00011931010280871859, "loss": 1.2122, "step": 31057 }, { "epoch": 0.40358454493919366, "grad_norm": 0.406337171792984, "learning_rate": 0.00011930750334680723, "loss": 1.4235, "step": 31058 }, { "epoch": 0.4035975394831095, "grad_norm": 0.36575061082839966, "learning_rate": 0.00011930490388489584, "loss": 1.3201, "step": 31059 }, { "epoch": 0.4036105340270254, "grad_norm": 0.572536826133728, "learning_rate": 0.00011930230442298445, "loss": 1.4371, "step": 31060 }, { "epoch": 0.4036235285709413, "grad_norm": 0.29167428612709045, "learning_rate": 0.00011929970496107306, "loss": 1.3815, "step": 31061 }, { "epoch": 0.40363652311485715, "grad_norm": 0.3699348270893097, "learning_rate": 0.00011929710549916169, "loss": 1.3976, "step": 31062 }, { "epoch": 0.40364951765877305, "grad_norm": 0.36710453033447266, "learning_rate": 0.0001192945060372503, "loss": 1.6085, "step": 31063 }, { "epoch": 0.4036625122026889, "grad_norm": 0.39688268303871155, "learning_rate": 0.00011929190657533891, "loss": 1.5108, "step": 31064 }, { "epoch": 0.4036755067466048, "grad_norm": 0.43722018599510193, "learning_rate": 0.00011928930711342752, "loss": 1.6239, "step": 31065 }, { "epoch": 0.40368850129052064, "grad_norm": 0.39373287558555603, "learning_rate": 0.00011928670765151614, "loss": 1.5558, "step": 31066 }, { "epoch": 0.40370149583443654, "grad_norm": 0.3575771152973175, "learning_rate": 0.00011928410818960475, "loss": 1.3021, "step": 31067 }, { "epoch": 0.4037144903783524, "grad_norm": 0.4879028797149658, "learning_rate": 0.00011928150872769337, "loss": 1.4182, "step": 31068 }, { "epoch": 0.4037274849222683, "grad_norm": 0.3960452377796173, "learning_rate": 0.00011927890926578198, "loss": 1.6554, "step": 31069 }, { "epoch": 0.40374047946618413, "grad_norm": 0.36082056164741516, "learning_rate": 0.00011927630980387061, "loss": 1.3421, "step": 31070 }, { "epoch": 0.40375347401010003, "grad_norm": 0.44531652331352234, "learning_rate": 0.00011927371034195923, "loss": 1.3684, "step": 31071 }, { "epoch": 0.4037664685540159, "grad_norm": 0.38977527618408203, "learning_rate": 0.00011927111088004784, "loss": 1.3236, "step": 31072 }, { "epoch": 0.4037794630979318, "grad_norm": 0.4571976959705353, "learning_rate": 0.00011926851141813645, "loss": 1.4554, "step": 31073 }, { "epoch": 0.4037924576418476, "grad_norm": 0.48396363854408264, "learning_rate": 0.00011926591195622507, "loss": 1.1672, "step": 31074 }, { "epoch": 0.4038054521857635, "grad_norm": 0.4495755732059479, "learning_rate": 0.00011926331249431368, "loss": 1.4583, "step": 31075 }, { "epoch": 0.40381844672967937, "grad_norm": 0.36246901750564575, "learning_rate": 0.0001192607130324023, "loss": 1.3106, "step": 31076 }, { "epoch": 0.40383144127359527, "grad_norm": 0.31022751331329346, "learning_rate": 0.0001192581135704909, "loss": 1.4672, "step": 31077 }, { "epoch": 0.4038444358175111, "grad_norm": 0.46667397022247314, "learning_rate": 0.00011925551410857953, "loss": 1.434, "step": 31078 }, { "epoch": 0.403857430361427, "grad_norm": 0.321641206741333, "learning_rate": 0.00011925291464666814, "loss": 1.4739, "step": 31079 }, { "epoch": 0.40387042490534286, "grad_norm": 0.37200087308883667, "learning_rate": 0.00011925031518475675, "loss": 1.3111, "step": 31080 }, { "epoch": 0.40388341944925876, "grad_norm": 0.24284672737121582, "learning_rate": 0.00011924771572284536, "loss": 1.1892, "step": 31081 }, { "epoch": 0.4038964139931746, "grad_norm": 0.4175315797328949, "learning_rate": 0.000119245116260934, "loss": 1.3551, "step": 31082 }, { "epoch": 0.4039094085370905, "grad_norm": 0.3691502809524536, "learning_rate": 0.00011924251679902261, "loss": 1.585, "step": 31083 }, { "epoch": 0.40392240308100635, "grad_norm": 0.4940640330314636, "learning_rate": 0.00011923991733711122, "loss": 1.482, "step": 31084 }, { "epoch": 0.40393539762492225, "grad_norm": 0.42931312322616577, "learning_rate": 0.00011923731787519985, "loss": 1.3735, "step": 31085 }, { "epoch": 0.4039483921688381, "grad_norm": 0.44503793120384216, "learning_rate": 0.00011923471841328846, "loss": 1.4508, "step": 31086 }, { "epoch": 0.403961386712754, "grad_norm": 0.46254003047943115, "learning_rate": 0.00011923211895137707, "loss": 1.3514, "step": 31087 }, { "epoch": 0.40397438125666985, "grad_norm": 0.41324159502983093, "learning_rate": 0.00011922951948946568, "loss": 1.4286, "step": 31088 }, { "epoch": 0.40398737580058575, "grad_norm": 0.41091713309288025, "learning_rate": 0.00011922692002755432, "loss": 1.4806, "step": 31089 }, { "epoch": 0.4040003703445016, "grad_norm": 0.3074330985546112, "learning_rate": 0.00011922432056564293, "loss": 1.2096, "step": 31090 }, { "epoch": 0.4040133648884175, "grad_norm": 0.37177252769470215, "learning_rate": 0.00011922172110373153, "loss": 1.4092, "step": 31091 }, { "epoch": 0.40402635943233334, "grad_norm": 0.3361234962940216, "learning_rate": 0.00011921912164182014, "loss": 1.1664, "step": 31092 }, { "epoch": 0.40403935397624924, "grad_norm": 0.3064337372779846, "learning_rate": 0.00011921652217990877, "loss": 1.5362, "step": 31093 }, { "epoch": 0.4040523485201651, "grad_norm": 0.5648393630981445, "learning_rate": 0.00011921392271799739, "loss": 1.5108, "step": 31094 }, { "epoch": 0.404065343064081, "grad_norm": 0.39658382534980774, "learning_rate": 0.000119211323256086, "loss": 1.2835, "step": 31095 }, { "epoch": 0.40407833760799683, "grad_norm": 0.3781071603298187, "learning_rate": 0.00011920872379417461, "loss": 1.3464, "step": 31096 }, { "epoch": 0.40409133215191273, "grad_norm": 0.4796808660030365, "learning_rate": 0.00011920612433226323, "loss": 1.3243, "step": 31097 }, { "epoch": 0.4041043266958286, "grad_norm": 0.31574904918670654, "learning_rate": 0.00011920352487035184, "loss": 1.3697, "step": 31098 }, { "epoch": 0.4041173212397445, "grad_norm": 0.396132230758667, "learning_rate": 0.00011920092540844045, "loss": 1.2984, "step": 31099 }, { "epoch": 0.4041303157836603, "grad_norm": 0.3588292598724365, "learning_rate": 0.00011919832594652906, "loss": 1.3467, "step": 31100 }, { "epoch": 0.4041433103275762, "grad_norm": 0.3943256437778473, "learning_rate": 0.0001191957264846177, "loss": 1.3035, "step": 31101 }, { "epoch": 0.40415630487149207, "grad_norm": 0.4043674170970917, "learning_rate": 0.00011919312702270631, "loss": 1.2018, "step": 31102 }, { "epoch": 0.40416929941540797, "grad_norm": 0.46197620034217834, "learning_rate": 0.00011919052756079492, "loss": 1.4024, "step": 31103 }, { "epoch": 0.4041822939593238, "grad_norm": 0.2812095284461975, "learning_rate": 0.00011918792809888352, "loss": 1.3708, "step": 31104 }, { "epoch": 0.4041952885032397, "grad_norm": 0.39766836166381836, "learning_rate": 0.00011918532863697216, "loss": 1.4108, "step": 31105 }, { "epoch": 0.40420828304715556, "grad_norm": 0.4104083776473999, "learning_rate": 0.00011918272917506077, "loss": 1.3334, "step": 31106 }, { "epoch": 0.40422127759107146, "grad_norm": 0.45874708890914917, "learning_rate": 0.00011918012971314938, "loss": 1.4111, "step": 31107 }, { "epoch": 0.4042342721349873, "grad_norm": 0.36362719535827637, "learning_rate": 0.00011917753025123799, "loss": 1.4653, "step": 31108 }, { "epoch": 0.4042472666789032, "grad_norm": 0.40747252106666565, "learning_rate": 0.00011917493078932662, "loss": 1.4284, "step": 31109 }, { "epoch": 0.40426026122281905, "grad_norm": 0.34531745314598083, "learning_rate": 0.00011917233132741523, "loss": 1.257, "step": 31110 }, { "epoch": 0.40427325576673495, "grad_norm": 0.49280163645744324, "learning_rate": 0.00011916973186550384, "loss": 1.3904, "step": 31111 }, { "epoch": 0.4042862503106508, "grad_norm": 0.4759371280670166, "learning_rate": 0.00011916713240359245, "loss": 1.2108, "step": 31112 }, { "epoch": 0.4042992448545667, "grad_norm": 0.3847648799419403, "learning_rate": 0.00011916453294168109, "loss": 1.2552, "step": 31113 }, { "epoch": 0.40431223939848254, "grad_norm": 0.42719778418540955, "learning_rate": 0.0001191619334797697, "loss": 1.7538, "step": 31114 }, { "epoch": 0.40432523394239844, "grad_norm": 0.5223241448402405, "learning_rate": 0.00011915933401785831, "loss": 1.4725, "step": 31115 }, { "epoch": 0.4043382284863143, "grad_norm": 0.33119985461235046, "learning_rate": 0.00011915673455594691, "loss": 1.3077, "step": 31116 }, { "epoch": 0.4043512230302302, "grad_norm": 0.32815834879875183, "learning_rate": 0.00011915413509403555, "loss": 1.352, "step": 31117 }, { "epoch": 0.40436421757414603, "grad_norm": 0.359495609998703, "learning_rate": 0.00011915153563212416, "loss": 1.2257, "step": 31118 }, { "epoch": 0.40437721211806193, "grad_norm": 0.40769317746162415, "learning_rate": 0.00011914893617021277, "loss": 1.4266, "step": 31119 }, { "epoch": 0.4043902066619778, "grad_norm": 0.38165783882141113, "learning_rate": 0.00011914633670830139, "loss": 1.2537, "step": 31120 }, { "epoch": 0.4044032012058937, "grad_norm": 0.33748123049736023, "learning_rate": 0.00011914373724639, "loss": 1.2966, "step": 31121 }, { "epoch": 0.4044161957498095, "grad_norm": 0.5030595660209656, "learning_rate": 0.00011914113778447861, "loss": 1.4478, "step": 31122 }, { "epoch": 0.4044291902937254, "grad_norm": 0.32752615213394165, "learning_rate": 0.00011913853832256722, "loss": 1.2675, "step": 31123 }, { "epoch": 0.40444218483764127, "grad_norm": 0.400953471660614, "learning_rate": 0.00011913593886065586, "loss": 1.3851, "step": 31124 }, { "epoch": 0.4044551793815572, "grad_norm": 0.3274124562740326, "learning_rate": 0.00011913333939874447, "loss": 1.1848, "step": 31125 }, { "epoch": 0.404468173925473, "grad_norm": 0.273478627204895, "learning_rate": 0.00011913073993683308, "loss": 1.2055, "step": 31126 }, { "epoch": 0.4044811684693889, "grad_norm": 0.3876365125179291, "learning_rate": 0.0001191281404749217, "loss": 1.4481, "step": 31127 }, { "epoch": 0.40449416301330476, "grad_norm": 0.580828845500946, "learning_rate": 0.00011912554101301032, "loss": 1.3591, "step": 31128 }, { "epoch": 0.40450715755722066, "grad_norm": 0.29494908452033997, "learning_rate": 0.00011912294155109893, "loss": 1.2555, "step": 31129 }, { "epoch": 0.4045201521011365, "grad_norm": 0.4451282024383545, "learning_rate": 0.00011912034208918754, "loss": 1.4914, "step": 31130 }, { "epoch": 0.4045331466450524, "grad_norm": 0.4182499945163727, "learning_rate": 0.00011911774262727615, "loss": 1.3394, "step": 31131 }, { "epoch": 0.40454614118896826, "grad_norm": 0.36886388063430786, "learning_rate": 0.00011911514316536479, "loss": 1.2085, "step": 31132 }, { "epoch": 0.40455913573288416, "grad_norm": 0.41878238320350647, "learning_rate": 0.00011911254370345339, "loss": 1.4315, "step": 31133 }, { "epoch": 0.4045721302768, "grad_norm": 0.3612288534641266, "learning_rate": 0.000119109944241542, "loss": 1.1751, "step": 31134 }, { "epoch": 0.4045851248207159, "grad_norm": 0.39339253306388855, "learning_rate": 0.00011910734477963061, "loss": 1.4236, "step": 31135 }, { "epoch": 0.40459811936463175, "grad_norm": 0.39024654030799866, "learning_rate": 0.00011910474531771925, "loss": 1.3813, "step": 31136 }, { "epoch": 0.40461111390854765, "grad_norm": 0.39640891551971436, "learning_rate": 0.00011910214585580786, "loss": 1.3948, "step": 31137 }, { "epoch": 0.40462410845246355, "grad_norm": 0.40515291690826416, "learning_rate": 0.00011909954639389647, "loss": 1.3752, "step": 31138 }, { "epoch": 0.4046371029963794, "grad_norm": 0.519363284111023, "learning_rate": 0.00011909694693198508, "loss": 1.2778, "step": 31139 }, { "epoch": 0.4046500975402953, "grad_norm": 0.4499009847640991, "learning_rate": 0.0001190943474700737, "loss": 1.438, "step": 31140 }, { "epoch": 0.40466309208421114, "grad_norm": 0.3646060526371002, "learning_rate": 0.00011909174800816232, "loss": 1.202, "step": 31141 }, { "epoch": 0.40467608662812704, "grad_norm": 0.3819766640663147, "learning_rate": 0.00011908914854625093, "loss": 1.5017, "step": 31142 }, { "epoch": 0.4046890811720429, "grad_norm": 0.4532409608364105, "learning_rate": 0.00011908654908433954, "loss": 1.3157, "step": 31143 }, { "epoch": 0.4047020757159588, "grad_norm": 0.41658562421798706, "learning_rate": 0.00011908394962242818, "loss": 1.4714, "step": 31144 }, { "epoch": 0.40471507025987463, "grad_norm": 0.3146139681339264, "learning_rate": 0.00011908135016051679, "loss": 1.5751, "step": 31145 }, { "epoch": 0.40472806480379053, "grad_norm": 0.36841073632240295, "learning_rate": 0.00011907875069860538, "loss": 1.2612, "step": 31146 }, { "epoch": 0.4047410593477064, "grad_norm": 0.3991709351539612, "learning_rate": 0.000119076151236694, "loss": 1.4681, "step": 31147 }, { "epoch": 0.4047540538916223, "grad_norm": 0.505599856376648, "learning_rate": 0.00011907355177478263, "loss": 1.6774, "step": 31148 }, { "epoch": 0.4047670484355381, "grad_norm": 0.3352959156036377, "learning_rate": 0.00011907095231287124, "loss": 1.1709, "step": 31149 }, { "epoch": 0.404780042979454, "grad_norm": 0.43015971779823303, "learning_rate": 0.00011906835285095986, "loss": 1.4067, "step": 31150 }, { "epoch": 0.40479303752336987, "grad_norm": 0.3387715816497803, "learning_rate": 0.00011906575338904847, "loss": 1.2522, "step": 31151 }, { "epoch": 0.40480603206728577, "grad_norm": 0.3760809004306793, "learning_rate": 0.00011906315392713709, "loss": 1.3888, "step": 31152 }, { "epoch": 0.4048190266112016, "grad_norm": 0.40547674894332886, "learning_rate": 0.0001190605544652257, "loss": 1.3071, "step": 31153 }, { "epoch": 0.4048320211551175, "grad_norm": 0.30349001288414, "learning_rate": 0.00011905795500331431, "loss": 1.3224, "step": 31154 }, { "epoch": 0.40484501569903336, "grad_norm": 0.3696651756763458, "learning_rate": 0.00011905535554140292, "loss": 1.3081, "step": 31155 }, { "epoch": 0.40485801024294926, "grad_norm": 0.38968411087989807, "learning_rate": 0.00011905275607949156, "loss": 1.2858, "step": 31156 }, { "epoch": 0.4048710047868651, "grad_norm": 0.41720980405807495, "learning_rate": 0.00011905015661758017, "loss": 1.4594, "step": 31157 }, { "epoch": 0.404883999330781, "grad_norm": 0.49381259083747864, "learning_rate": 0.00011904755715566877, "loss": 1.3585, "step": 31158 }, { "epoch": 0.40489699387469685, "grad_norm": 0.31462299823760986, "learning_rate": 0.00011904495769375741, "loss": 1.3144, "step": 31159 }, { "epoch": 0.40490998841861275, "grad_norm": 0.47582030296325684, "learning_rate": 0.00011904235823184602, "loss": 1.3183, "step": 31160 }, { "epoch": 0.4049229829625286, "grad_norm": 0.4573417901992798, "learning_rate": 0.00011903975876993463, "loss": 1.4331, "step": 31161 }, { "epoch": 0.4049359775064445, "grad_norm": 0.42500001192092896, "learning_rate": 0.00011903715930802324, "loss": 1.4202, "step": 31162 }, { "epoch": 0.40494897205036035, "grad_norm": 0.48033180832862854, "learning_rate": 0.00011903455984611186, "loss": 1.4146, "step": 31163 }, { "epoch": 0.40496196659427625, "grad_norm": 0.4172731041908264, "learning_rate": 0.00011903196038420048, "loss": 1.4106, "step": 31164 }, { "epoch": 0.4049749611381921, "grad_norm": 0.4109196960926056, "learning_rate": 0.00011902936092228909, "loss": 1.0949, "step": 31165 }, { "epoch": 0.404987955682108, "grad_norm": 0.5177567601203918, "learning_rate": 0.0001190267614603777, "loss": 1.5222, "step": 31166 }, { "epoch": 0.40500095022602384, "grad_norm": 0.49179255962371826, "learning_rate": 0.00011902416199846634, "loss": 1.3764, "step": 31167 }, { "epoch": 0.40501394476993974, "grad_norm": 0.48563122749328613, "learning_rate": 0.00011902156253655495, "loss": 1.3855, "step": 31168 }, { "epoch": 0.4050269393138556, "grad_norm": 0.36977359652519226, "learning_rate": 0.00011901896307464356, "loss": 1.3776, "step": 31169 }, { "epoch": 0.4050399338577715, "grad_norm": 0.42352986335754395, "learning_rate": 0.00011901636361273217, "loss": 1.2775, "step": 31170 }, { "epoch": 0.40505292840168733, "grad_norm": 0.3878430724143982, "learning_rate": 0.00011901376415082079, "loss": 1.3114, "step": 31171 }, { "epoch": 0.40506592294560323, "grad_norm": 0.4392317235469818, "learning_rate": 0.0001190111646889094, "loss": 1.3503, "step": 31172 }, { "epoch": 0.4050789174895191, "grad_norm": 0.4080732464790344, "learning_rate": 0.00011900856522699801, "loss": 1.3054, "step": 31173 }, { "epoch": 0.405091912033435, "grad_norm": 0.4280981123447418, "learning_rate": 0.00011900596576508663, "loss": 1.4308, "step": 31174 }, { "epoch": 0.4051049065773508, "grad_norm": 0.38070032000541687, "learning_rate": 0.00011900336630317525, "loss": 1.2601, "step": 31175 }, { "epoch": 0.4051179011212667, "grad_norm": 0.4179835319519043, "learning_rate": 0.00011900076684126386, "loss": 1.4414, "step": 31176 }, { "epoch": 0.40513089566518257, "grad_norm": 0.4899497628211975, "learning_rate": 0.00011899816737935247, "loss": 1.4814, "step": 31177 }, { "epoch": 0.40514389020909847, "grad_norm": 0.38369104266166687, "learning_rate": 0.00011899556791744108, "loss": 1.2864, "step": 31178 }, { "epoch": 0.4051568847530143, "grad_norm": 0.39225828647613525, "learning_rate": 0.00011899296845552972, "loss": 1.3075, "step": 31179 }, { "epoch": 0.4051698792969302, "grad_norm": 0.3863079845905304, "learning_rate": 0.00011899036899361833, "loss": 1.5609, "step": 31180 }, { "epoch": 0.40518287384084606, "grad_norm": 0.34979888796806335, "learning_rate": 0.00011898776953170694, "loss": 1.2385, "step": 31181 }, { "epoch": 0.40519586838476196, "grad_norm": 0.40420815348625183, "learning_rate": 0.00011898517006979555, "loss": 1.3738, "step": 31182 }, { "epoch": 0.4052088629286778, "grad_norm": 0.4980183243751526, "learning_rate": 0.00011898257060788418, "loss": 1.4821, "step": 31183 }, { "epoch": 0.4052218574725937, "grad_norm": 0.26523557305336, "learning_rate": 0.00011897997114597279, "loss": 1.2932, "step": 31184 }, { "epoch": 0.40523485201650955, "grad_norm": 0.4374240040779114, "learning_rate": 0.0001189773716840614, "loss": 1.3174, "step": 31185 }, { "epoch": 0.40524784656042545, "grad_norm": 0.4220539927482605, "learning_rate": 0.00011897477222215001, "loss": 1.3367, "step": 31186 }, { "epoch": 0.4052608411043413, "grad_norm": 0.38757675886154175, "learning_rate": 0.00011897217276023865, "loss": 1.4012, "step": 31187 }, { "epoch": 0.4052738356482572, "grad_norm": 0.39984625577926636, "learning_rate": 0.00011896957329832725, "loss": 1.3633, "step": 31188 }, { "epoch": 0.40528683019217304, "grad_norm": 0.3410710394382477, "learning_rate": 0.00011896697383641586, "loss": 1.316, "step": 31189 }, { "epoch": 0.40529982473608894, "grad_norm": 0.4982589781284332, "learning_rate": 0.00011896437437450447, "loss": 1.4565, "step": 31190 }, { "epoch": 0.4053128192800048, "grad_norm": 0.495503693819046, "learning_rate": 0.0001189617749125931, "loss": 1.5095, "step": 31191 }, { "epoch": 0.4053258138239207, "grad_norm": 0.3099004030227661, "learning_rate": 0.00011895917545068172, "loss": 1.2597, "step": 31192 }, { "epoch": 0.40533880836783653, "grad_norm": 0.430729478597641, "learning_rate": 0.00011895657598877033, "loss": 1.4133, "step": 31193 }, { "epoch": 0.40535180291175243, "grad_norm": 0.4965914189815521, "learning_rate": 0.00011895397652685895, "loss": 1.4518, "step": 31194 }, { "epoch": 0.4053647974556683, "grad_norm": 0.3689592182636261, "learning_rate": 0.00011895137706494756, "loss": 1.326, "step": 31195 }, { "epoch": 0.4053777919995842, "grad_norm": 0.5080020427703857, "learning_rate": 0.00011894877760303617, "loss": 1.1317, "step": 31196 }, { "epoch": 0.4053907865435, "grad_norm": 0.37928450107574463, "learning_rate": 0.00011894617814112479, "loss": 1.4909, "step": 31197 }, { "epoch": 0.4054037810874159, "grad_norm": 0.5257855653762817, "learning_rate": 0.00011894357867921342, "loss": 1.4967, "step": 31198 }, { "epoch": 0.40541677563133177, "grad_norm": 0.3613608181476593, "learning_rate": 0.00011894097921730203, "loss": 1.473, "step": 31199 }, { "epoch": 0.4054297701752477, "grad_norm": 0.38127535581588745, "learning_rate": 0.00011893837975539063, "loss": 1.3844, "step": 31200 }, { "epoch": 0.4054427647191635, "grad_norm": 0.3791249096393585, "learning_rate": 0.00011893578029347924, "loss": 1.3042, "step": 31201 }, { "epoch": 0.4054557592630794, "grad_norm": 0.3875804841518402, "learning_rate": 0.00011893318083156788, "loss": 1.4339, "step": 31202 }, { "epoch": 0.40546875380699526, "grad_norm": 0.31077125668525696, "learning_rate": 0.00011893058136965649, "loss": 1.1927, "step": 31203 }, { "epoch": 0.40548174835091116, "grad_norm": 0.32536572217941284, "learning_rate": 0.0001189279819077451, "loss": 1.4645, "step": 31204 }, { "epoch": 0.405494742894827, "grad_norm": 0.3629818558692932, "learning_rate": 0.00011892538244583371, "loss": 1.3366, "step": 31205 }, { "epoch": 0.4055077374387429, "grad_norm": 0.37646088004112244, "learning_rate": 0.00011892278298392234, "loss": 1.2986, "step": 31206 }, { "epoch": 0.40552073198265876, "grad_norm": 0.3595403730869293, "learning_rate": 0.00011892018352201095, "loss": 1.4383, "step": 31207 }, { "epoch": 0.40553372652657466, "grad_norm": 0.32527852058410645, "learning_rate": 0.00011891758406009956, "loss": 1.4019, "step": 31208 }, { "epoch": 0.4055467210704905, "grad_norm": 0.372487336397171, "learning_rate": 0.00011891498459818817, "loss": 1.4469, "step": 31209 }, { "epoch": 0.4055597156144064, "grad_norm": 0.3933354318141937, "learning_rate": 0.00011891238513627681, "loss": 1.4328, "step": 31210 }, { "epoch": 0.40557271015832225, "grad_norm": 0.43458229303359985, "learning_rate": 0.00011890978567436542, "loss": 1.2702, "step": 31211 }, { "epoch": 0.40558570470223815, "grad_norm": 0.4470691382884979, "learning_rate": 0.00011890718621245403, "loss": 1.4581, "step": 31212 }, { "epoch": 0.40559869924615405, "grad_norm": 0.35403117537498474, "learning_rate": 0.00011890458675054263, "loss": 1.4024, "step": 31213 }, { "epoch": 0.4056116937900699, "grad_norm": 0.4237093925476074, "learning_rate": 0.00011890198728863127, "loss": 1.5175, "step": 31214 }, { "epoch": 0.4056246883339858, "grad_norm": 0.35908371210098267, "learning_rate": 0.00011889938782671988, "loss": 1.2216, "step": 31215 }, { "epoch": 0.40563768287790164, "grad_norm": 0.5276117324829102, "learning_rate": 0.00011889678836480849, "loss": 1.5198, "step": 31216 }, { "epoch": 0.40565067742181754, "grad_norm": 0.3601825535297394, "learning_rate": 0.0001188941889028971, "loss": 1.3974, "step": 31217 }, { "epoch": 0.4056636719657334, "grad_norm": 0.4296603798866272, "learning_rate": 0.00011889158944098572, "loss": 1.5108, "step": 31218 }, { "epoch": 0.4056766665096493, "grad_norm": 0.4216512143611908, "learning_rate": 0.00011888898997907433, "loss": 1.3085, "step": 31219 }, { "epoch": 0.40568966105356513, "grad_norm": 0.42654138803482056, "learning_rate": 0.00011888639051716295, "loss": 1.4722, "step": 31220 }, { "epoch": 0.40570265559748103, "grad_norm": 0.40836450457572937, "learning_rate": 0.00011888379105525156, "loss": 1.4478, "step": 31221 }, { "epoch": 0.4057156501413969, "grad_norm": 0.4115433990955353, "learning_rate": 0.0001188811915933402, "loss": 1.4614, "step": 31222 }, { "epoch": 0.4057286446853128, "grad_norm": 0.2881373167037964, "learning_rate": 0.0001188785921314288, "loss": 1.3968, "step": 31223 }, { "epoch": 0.4057416392292286, "grad_norm": 0.4314188063144684, "learning_rate": 0.00011887599266951742, "loss": 1.4829, "step": 31224 }, { "epoch": 0.4057546337731445, "grad_norm": 0.360015869140625, "learning_rate": 0.00011887339320760603, "loss": 1.4896, "step": 31225 }, { "epoch": 0.40576762831706037, "grad_norm": 0.3828166723251343, "learning_rate": 0.00011887079374569465, "loss": 1.4096, "step": 31226 }, { "epoch": 0.40578062286097627, "grad_norm": 0.501865804195404, "learning_rate": 0.00011886819428378326, "loss": 1.413, "step": 31227 }, { "epoch": 0.4057936174048921, "grad_norm": 0.414804607629776, "learning_rate": 0.00011886559482187187, "loss": 1.5255, "step": 31228 }, { "epoch": 0.405806611948808, "grad_norm": 0.3666289150714874, "learning_rate": 0.00011886299535996048, "loss": 1.3012, "step": 31229 }, { "epoch": 0.40581960649272386, "grad_norm": 0.4416143298149109, "learning_rate": 0.00011886039589804911, "loss": 1.5082, "step": 31230 }, { "epoch": 0.40583260103663976, "grad_norm": 0.38721030950546265, "learning_rate": 0.00011885779643613772, "loss": 1.4334, "step": 31231 }, { "epoch": 0.4058455955805556, "grad_norm": 0.38310903310775757, "learning_rate": 0.00011885519697422633, "loss": 1.6196, "step": 31232 }, { "epoch": 0.4058585901244715, "grad_norm": 0.510926365852356, "learning_rate": 0.00011885259751231497, "loss": 1.4067, "step": 31233 }, { "epoch": 0.40587158466838735, "grad_norm": 0.3168974220752716, "learning_rate": 0.00011884999805040358, "loss": 1.3574, "step": 31234 }, { "epoch": 0.40588457921230325, "grad_norm": 0.45922380685806274, "learning_rate": 0.00011884739858849219, "loss": 1.3532, "step": 31235 }, { "epoch": 0.4058975737562191, "grad_norm": 0.332072377204895, "learning_rate": 0.0001188447991265808, "loss": 1.5918, "step": 31236 }, { "epoch": 0.405910568300135, "grad_norm": 0.5135757327079773, "learning_rate": 0.00011884219966466943, "loss": 1.4235, "step": 31237 }, { "epoch": 0.40592356284405084, "grad_norm": 0.35514453053474426, "learning_rate": 0.00011883960020275804, "loss": 1.3901, "step": 31238 }, { "epoch": 0.40593655738796675, "grad_norm": 0.3588900566101074, "learning_rate": 0.00011883700074084665, "loss": 1.3905, "step": 31239 }, { "epoch": 0.4059495519318826, "grad_norm": 0.46052685379981995, "learning_rate": 0.00011883440127893526, "loss": 1.3979, "step": 31240 }, { "epoch": 0.4059625464757985, "grad_norm": 0.40927180647850037, "learning_rate": 0.0001188318018170239, "loss": 1.1847, "step": 31241 }, { "epoch": 0.40597554101971434, "grad_norm": 0.33551061153411865, "learning_rate": 0.0001188292023551125, "loss": 1.3197, "step": 31242 }, { "epoch": 0.40598853556363024, "grad_norm": 0.34213122725486755, "learning_rate": 0.0001188266028932011, "loss": 1.3425, "step": 31243 }, { "epoch": 0.4060015301075461, "grad_norm": 0.2508379817008972, "learning_rate": 0.00011882400343128972, "loss": 1.222, "step": 31244 }, { "epoch": 0.406014524651462, "grad_norm": 0.3861617147922516, "learning_rate": 0.00011882140396937835, "loss": 1.3168, "step": 31245 }, { "epoch": 0.40602751919537783, "grad_norm": 0.4054805338382721, "learning_rate": 0.00011881880450746697, "loss": 1.5717, "step": 31246 }, { "epoch": 0.40604051373929373, "grad_norm": 0.5045730471611023, "learning_rate": 0.00011881620504555558, "loss": 1.273, "step": 31247 }, { "epoch": 0.4060535082832096, "grad_norm": 0.4012494385242462, "learning_rate": 0.00011881360558364419, "loss": 1.3772, "step": 31248 }, { "epoch": 0.4060665028271255, "grad_norm": 0.39588335156440735, "learning_rate": 0.00011881100612173281, "loss": 1.5071, "step": 31249 }, { "epoch": 0.4060794973710413, "grad_norm": 0.33543112874031067, "learning_rate": 0.00011880840665982142, "loss": 1.4006, "step": 31250 }, { "epoch": 0.4060924919149572, "grad_norm": 0.3463461399078369, "learning_rate": 0.00011880580719791003, "loss": 1.4128, "step": 31251 }, { "epoch": 0.40610548645887307, "grad_norm": 0.42646366357803345, "learning_rate": 0.00011880320773599864, "loss": 1.3896, "step": 31252 }, { "epoch": 0.40611848100278897, "grad_norm": 0.396257221698761, "learning_rate": 0.00011880060827408728, "loss": 1.542, "step": 31253 }, { "epoch": 0.4061314755467048, "grad_norm": 0.38964149355888367, "learning_rate": 0.0001187980088121759, "loss": 1.4644, "step": 31254 }, { "epoch": 0.4061444700906207, "grad_norm": 0.3986106216907501, "learning_rate": 0.00011879540935026449, "loss": 1.3015, "step": 31255 }, { "epoch": 0.40615746463453656, "grad_norm": 0.41529545187950134, "learning_rate": 0.0001187928098883531, "loss": 1.5393, "step": 31256 }, { "epoch": 0.40617045917845246, "grad_norm": 0.40253400802612305, "learning_rate": 0.00011879021042644174, "loss": 1.4339, "step": 31257 }, { "epoch": 0.4061834537223683, "grad_norm": 0.43429264426231384, "learning_rate": 0.00011878761096453035, "loss": 1.2613, "step": 31258 }, { "epoch": 0.4061964482662842, "grad_norm": 0.2691577672958374, "learning_rate": 0.00011878501150261896, "loss": 1.4561, "step": 31259 }, { "epoch": 0.40620944281020005, "grad_norm": 0.40096017718315125, "learning_rate": 0.00011878241204070757, "loss": 1.4678, "step": 31260 }, { "epoch": 0.40622243735411595, "grad_norm": 0.4558047950267792, "learning_rate": 0.0001187798125787962, "loss": 1.3217, "step": 31261 }, { "epoch": 0.4062354318980318, "grad_norm": 0.3821234107017517, "learning_rate": 0.00011877721311688481, "loss": 1.2376, "step": 31262 }, { "epoch": 0.4062484264419477, "grad_norm": 0.40908125042915344, "learning_rate": 0.00011877461365497342, "loss": 1.635, "step": 31263 }, { "epoch": 0.40626142098586354, "grad_norm": 0.3726956844329834, "learning_rate": 0.00011877201419306203, "loss": 1.4287, "step": 31264 }, { "epoch": 0.40627441552977944, "grad_norm": 0.4645557701587677, "learning_rate": 0.00011876941473115067, "loss": 1.5008, "step": 31265 }, { "epoch": 0.4062874100736953, "grad_norm": 0.4110972583293915, "learning_rate": 0.00011876681526923928, "loss": 1.344, "step": 31266 }, { "epoch": 0.4063004046176112, "grad_norm": 0.4408775269985199, "learning_rate": 0.00011876421580732789, "loss": 1.4052, "step": 31267 }, { "epoch": 0.40631339916152703, "grad_norm": 0.384743869304657, "learning_rate": 0.00011876161634541651, "loss": 1.4074, "step": 31268 }, { "epoch": 0.40632639370544293, "grad_norm": 0.4842003285884857, "learning_rate": 0.00011875901688350513, "loss": 1.3696, "step": 31269 }, { "epoch": 0.4063393882493588, "grad_norm": 0.5091818571090698, "learning_rate": 0.00011875641742159374, "loss": 1.6031, "step": 31270 }, { "epoch": 0.4063523827932747, "grad_norm": 0.34551045298576355, "learning_rate": 0.00011875381795968235, "loss": 1.4607, "step": 31271 }, { "epoch": 0.4063653773371905, "grad_norm": 0.3323245346546173, "learning_rate": 0.00011875121849777097, "loss": 1.0766, "step": 31272 }, { "epoch": 0.4063783718811064, "grad_norm": 0.70066899061203, "learning_rate": 0.00011874861903585958, "loss": 1.2979, "step": 31273 }, { "epoch": 0.40639136642502227, "grad_norm": 0.45902806520462036, "learning_rate": 0.0001187460195739482, "loss": 1.3914, "step": 31274 }, { "epoch": 0.40640436096893817, "grad_norm": 0.447151780128479, "learning_rate": 0.0001187434201120368, "loss": 1.2701, "step": 31275 }, { "epoch": 0.406417355512854, "grad_norm": 0.2966899871826172, "learning_rate": 0.00011874082065012544, "loss": 1.212, "step": 31276 }, { "epoch": 0.4064303500567699, "grad_norm": 0.37040624022483826, "learning_rate": 0.00011873822118821405, "loss": 1.1205, "step": 31277 }, { "epoch": 0.40644334460068576, "grad_norm": 0.4391426742076874, "learning_rate": 0.00011873562172630266, "loss": 1.4174, "step": 31278 }, { "epoch": 0.40645633914460166, "grad_norm": 0.30764180421829224, "learning_rate": 0.00011873302226439128, "loss": 1.3751, "step": 31279 }, { "epoch": 0.4064693336885175, "grad_norm": 0.35065677762031555, "learning_rate": 0.0001187304228024799, "loss": 1.2258, "step": 31280 }, { "epoch": 0.4064823282324334, "grad_norm": 0.3802269399166107, "learning_rate": 0.00011872782334056851, "loss": 1.5252, "step": 31281 }, { "epoch": 0.40649532277634925, "grad_norm": 0.45753800868988037, "learning_rate": 0.00011872522387865712, "loss": 1.2664, "step": 31282 }, { "epoch": 0.40650831732026516, "grad_norm": 0.41795745491981506, "learning_rate": 0.00011872262441674573, "loss": 1.4199, "step": 31283 }, { "epoch": 0.406521311864181, "grad_norm": 0.4111131727695465, "learning_rate": 0.00011872002495483436, "loss": 1.3316, "step": 31284 }, { "epoch": 0.4065343064080969, "grad_norm": 0.3360668122768402, "learning_rate": 0.00011871742549292297, "loss": 1.2689, "step": 31285 }, { "epoch": 0.40654730095201275, "grad_norm": 0.3608076274394989, "learning_rate": 0.00011871482603101158, "loss": 1.6717, "step": 31286 }, { "epoch": 0.40656029549592865, "grad_norm": 0.3970521092414856, "learning_rate": 0.00011871222656910019, "loss": 1.4533, "step": 31287 }, { "epoch": 0.4065732900398445, "grad_norm": 0.40583324432373047, "learning_rate": 0.00011870962710718883, "loss": 1.2344, "step": 31288 }, { "epoch": 0.4065862845837604, "grad_norm": 0.2829705774784088, "learning_rate": 0.00011870702764527744, "loss": 1.2993, "step": 31289 }, { "epoch": 0.4065992791276763, "grad_norm": 0.41404134035110474, "learning_rate": 0.00011870442818336605, "loss": 1.5585, "step": 31290 }, { "epoch": 0.40661227367159214, "grad_norm": 0.392275333404541, "learning_rate": 0.00011870182872145466, "loss": 1.2174, "step": 31291 }, { "epoch": 0.40662526821550804, "grad_norm": 0.41608232259750366, "learning_rate": 0.00011869922925954328, "loss": 1.3351, "step": 31292 }, { "epoch": 0.4066382627594239, "grad_norm": 0.30293115973472595, "learning_rate": 0.0001186966297976319, "loss": 1.3793, "step": 31293 }, { "epoch": 0.4066512573033398, "grad_norm": 0.3355635106563568, "learning_rate": 0.00011869403033572051, "loss": 1.2915, "step": 31294 }, { "epoch": 0.40666425184725563, "grad_norm": 0.47416988015174866, "learning_rate": 0.00011869143087380912, "loss": 1.4625, "step": 31295 }, { "epoch": 0.40667724639117153, "grad_norm": 0.37027180194854736, "learning_rate": 0.00011868883141189776, "loss": 1.439, "step": 31296 }, { "epoch": 0.4066902409350874, "grad_norm": 0.3935788869857788, "learning_rate": 0.00011868623194998635, "loss": 1.4832, "step": 31297 }, { "epoch": 0.4067032354790033, "grad_norm": 0.3641882836818695, "learning_rate": 0.00011868363248807496, "loss": 1.4582, "step": 31298 }, { "epoch": 0.4067162300229191, "grad_norm": 0.31681060791015625, "learning_rate": 0.00011868103302616358, "loss": 1.2232, "step": 31299 }, { "epoch": 0.406729224566835, "grad_norm": 0.44886142015457153, "learning_rate": 0.00011867843356425221, "loss": 1.4125, "step": 31300 }, { "epoch": 0.40674221911075087, "grad_norm": 0.37310972809791565, "learning_rate": 0.00011867583410234082, "loss": 1.3819, "step": 31301 }, { "epoch": 0.40675521365466677, "grad_norm": 0.4611188471317291, "learning_rate": 0.00011867323464042943, "loss": 1.4512, "step": 31302 }, { "epoch": 0.4067682081985826, "grad_norm": 0.3597484827041626, "learning_rate": 0.00011867063517851805, "loss": 1.3436, "step": 31303 }, { "epoch": 0.4067812027424985, "grad_norm": 0.42691949009895325, "learning_rate": 0.00011866803571660667, "loss": 1.424, "step": 31304 }, { "epoch": 0.40679419728641436, "grad_norm": 0.5390656590461731, "learning_rate": 0.00011866543625469528, "loss": 1.5211, "step": 31305 }, { "epoch": 0.40680719183033026, "grad_norm": 0.41673198342323303, "learning_rate": 0.00011866283679278389, "loss": 1.464, "step": 31306 }, { "epoch": 0.4068201863742461, "grad_norm": 0.39755260944366455, "learning_rate": 0.00011866023733087253, "loss": 1.3604, "step": 31307 }, { "epoch": 0.406833180918162, "grad_norm": 0.3578665554523468, "learning_rate": 0.00011865763786896114, "loss": 1.3936, "step": 31308 }, { "epoch": 0.40684617546207785, "grad_norm": 0.4285860061645508, "learning_rate": 0.00011865503840704975, "loss": 1.4835, "step": 31309 }, { "epoch": 0.40685917000599375, "grad_norm": 0.43358784914016724, "learning_rate": 0.00011865243894513835, "loss": 1.4817, "step": 31310 }, { "epoch": 0.4068721645499096, "grad_norm": 0.4867531955242157, "learning_rate": 0.00011864983948322699, "loss": 1.3186, "step": 31311 }, { "epoch": 0.4068851590938255, "grad_norm": 0.310566246509552, "learning_rate": 0.0001186472400213156, "loss": 1.5392, "step": 31312 }, { "epoch": 0.40689815363774134, "grad_norm": 0.4140184223651886, "learning_rate": 0.00011864464055940421, "loss": 1.4285, "step": 31313 }, { "epoch": 0.40691114818165725, "grad_norm": 0.4563678205013275, "learning_rate": 0.00011864204109749282, "loss": 1.3861, "step": 31314 }, { "epoch": 0.4069241427255731, "grad_norm": 0.40866297483444214, "learning_rate": 0.00011863944163558144, "loss": 1.4891, "step": 31315 }, { "epoch": 0.406937137269489, "grad_norm": 0.4171508550643921, "learning_rate": 0.00011863684217367006, "loss": 1.5697, "step": 31316 }, { "epoch": 0.40695013181340484, "grad_norm": 0.4434012770652771, "learning_rate": 0.00011863424271175867, "loss": 1.4789, "step": 31317 }, { "epoch": 0.40696312635732074, "grad_norm": 0.4278556704521179, "learning_rate": 0.00011863164324984728, "loss": 1.4238, "step": 31318 }, { "epoch": 0.4069761209012366, "grad_norm": 0.3635933995246887, "learning_rate": 0.00011862904378793592, "loss": 1.4068, "step": 31319 }, { "epoch": 0.4069891154451525, "grad_norm": 0.37037113308906555, "learning_rate": 0.00011862644432602453, "loss": 1.3402, "step": 31320 }, { "epoch": 0.40700210998906833, "grad_norm": 0.3789578974246979, "learning_rate": 0.00011862384486411314, "loss": 1.36, "step": 31321 }, { "epoch": 0.40701510453298423, "grad_norm": 0.4212924540042877, "learning_rate": 0.00011862124540220175, "loss": 1.4364, "step": 31322 }, { "epoch": 0.4070280990769001, "grad_norm": 0.4389123320579529, "learning_rate": 0.00011861864594029037, "loss": 1.3789, "step": 31323 }, { "epoch": 0.407041093620816, "grad_norm": 0.372774600982666, "learning_rate": 0.00011861604647837898, "loss": 1.5103, "step": 31324 }, { "epoch": 0.4070540881647318, "grad_norm": 0.463238388299942, "learning_rate": 0.0001186134470164676, "loss": 1.4617, "step": 31325 }, { "epoch": 0.4070670827086477, "grad_norm": 0.45295020937919617, "learning_rate": 0.0001186108475545562, "loss": 1.3962, "step": 31326 }, { "epoch": 0.40708007725256357, "grad_norm": 0.3821038007736206, "learning_rate": 0.00011860824809264483, "loss": 1.2124, "step": 31327 }, { "epoch": 0.40709307179647947, "grad_norm": 0.2766205966472626, "learning_rate": 0.00011860564863073344, "loss": 1.3476, "step": 31328 }, { "epoch": 0.4071060663403953, "grad_norm": 0.39438456296920776, "learning_rate": 0.00011860304916882205, "loss": 1.4519, "step": 31329 }, { "epoch": 0.4071190608843112, "grad_norm": 0.3885607421398163, "learning_rate": 0.00011860044970691066, "loss": 1.445, "step": 31330 }, { "epoch": 0.40713205542822706, "grad_norm": 0.3620757758617401, "learning_rate": 0.0001185978502449993, "loss": 1.4061, "step": 31331 }, { "epoch": 0.40714504997214296, "grad_norm": 0.49417468905448914, "learning_rate": 0.00011859525078308791, "loss": 1.4369, "step": 31332 }, { "epoch": 0.4071580445160588, "grad_norm": 0.4415302574634552, "learning_rate": 0.00011859265132117652, "loss": 1.399, "step": 31333 }, { "epoch": 0.4071710390599747, "grad_norm": 0.3604930639266968, "learning_rate": 0.00011859005185926513, "loss": 1.6794, "step": 31334 }, { "epoch": 0.40718403360389055, "grad_norm": 0.4214653968811035, "learning_rate": 0.00011858745239735376, "loss": 1.5821, "step": 31335 }, { "epoch": 0.40719702814780645, "grad_norm": 0.4235413670539856, "learning_rate": 0.00011858485293544237, "loss": 1.4422, "step": 31336 }, { "epoch": 0.4072100226917223, "grad_norm": 0.41996026039123535, "learning_rate": 0.00011858225347353098, "loss": 1.4043, "step": 31337 }, { "epoch": 0.4072230172356382, "grad_norm": 0.38430002331733704, "learning_rate": 0.00011857965401161959, "loss": 1.3189, "step": 31338 }, { "epoch": 0.40723601177955404, "grad_norm": 0.4642822742462158, "learning_rate": 0.00011857705454970822, "loss": 1.5658, "step": 31339 }, { "epoch": 0.40724900632346994, "grad_norm": 0.4557071626186371, "learning_rate": 0.00011857445508779683, "loss": 1.5322, "step": 31340 }, { "epoch": 0.4072620008673858, "grad_norm": 0.4533839225769043, "learning_rate": 0.00011857185562588544, "loss": 1.4225, "step": 31341 }, { "epoch": 0.4072749954113017, "grad_norm": 0.36180832982063293, "learning_rate": 0.00011856925616397408, "loss": 1.178, "step": 31342 }, { "epoch": 0.40728798995521753, "grad_norm": 0.47234398126602173, "learning_rate": 0.00011856665670206269, "loss": 1.3295, "step": 31343 }, { "epoch": 0.40730098449913343, "grad_norm": 0.4748702049255371, "learning_rate": 0.0001185640572401513, "loss": 1.4596, "step": 31344 }, { "epoch": 0.4073139790430493, "grad_norm": 0.44142329692840576, "learning_rate": 0.00011856145777823991, "loss": 1.2811, "step": 31345 }, { "epoch": 0.4073269735869652, "grad_norm": 0.41287118196487427, "learning_rate": 0.00011855885831632853, "loss": 1.4395, "step": 31346 }, { "epoch": 0.407339968130881, "grad_norm": 0.36212319135665894, "learning_rate": 0.00011855625885441714, "loss": 1.2541, "step": 31347 }, { "epoch": 0.4073529626747969, "grad_norm": 0.36613714694976807, "learning_rate": 0.00011855365939250575, "loss": 1.4965, "step": 31348 }, { "epoch": 0.40736595721871277, "grad_norm": 0.4289287328720093, "learning_rate": 0.00011855105993059437, "loss": 1.5802, "step": 31349 }, { "epoch": 0.40737895176262867, "grad_norm": 0.4733085036277771, "learning_rate": 0.000118548460468683, "loss": 1.4927, "step": 31350 }, { "epoch": 0.4073919463065445, "grad_norm": 0.43434008955955505, "learning_rate": 0.00011854586100677161, "loss": 1.45, "step": 31351 }, { "epoch": 0.4074049408504604, "grad_norm": 0.44725659489631653, "learning_rate": 0.00011854326154486021, "loss": 1.5963, "step": 31352 }, { "epoch": 0.40741793539437626, "grad_norm": 0.3191600739955902, "learning_rate": 0.00011854066208294882, "loss": 1.3828, "step": 31353 }, { "epoch": 0.40743092993829216, "grad_norm": 0.3672683835029602, "learning_rate": 0.00011853806262103746, "loss": 1.3777, "step": 31354 }, { "epoch": 0.407443924482208, "grad_norm": 0.4257959723472595, "learning_rate": 0.00011853546315912607, "loss": 1.4296, "step": 31355 }, { "epoch": 0.4074569190261239, "grad_norm": 0.32201388478279114, "learning_rate": 0.00011853286369721468, "loss": 1.3911, "step": 31356 }, { "epoch": 0.40746991357003975, "grad_norm": 0.26250603795051575, "learning_rate": 0.0001185302642353033, "loss": 1.1523, "step": 31357 }, { "epoch": 0.40748290811395566, "grad_norm": 0.4814162254333496, "learning_rate": 0.00011852766477339192, "loss": 1.4619, "step": 31358 }, { "epoch": 0.4074959026578715, "grad_norm": 0.3212600350379944, "learning_rate": 0.00011852506531148053, "loss": 1.4189, "step": 31359 }, { "epoch": 0.4075088972017874, "grad_norm": 0.3166644871234894, "learning_rate": 0.00011852246584956914, "loss": 1.4032, "step": 31360 }, { "epoch": 0.40752189174570325, "grad_norm": 0.372885137796402, "learning_rate": 0.00011851986638765775, "loss": 1.3095, "step": 31361 }, { "epoch": 0.40753488628961915, "grad_norm": 0.48347485065460205, "learning_rate": 0.00011851726692574639, "loss": 1.431, "step": 31362 }, { "epoch": 0.407547880833535, "grad_norm": 0.38742896914482117, "learning_rate": 0.000118514667463835, "loss": 1.2352, "step": 31363 }, { "epoch": 0.4075608753774509, "grad_norm": 0.4425623416900635, "learning_rate": 0.0001185120680019236, "loss": 1.4051, "step": 31364 }, { "epoch": 0.4075738699213668, "grad_norm": 0.4310416281223297, "learning_rate": 0.00011850946854001221, "loss": 1.3918, "step": 31365 }, { "epoch": 0.40758686446528264, "grad_norm": 0.3456686735153198, "learning_rate": 0.00011850686907810085, "loss": 1.2985, "step": 31366 }, { "epoch": 0.40759985900919854, "grad_norm": 0.4211243987083435, "learning_rate": 0.00011850426961618946, "loss": 1.4318, "step": 31367 }, { "epoch": 0.4076128535531144, "grad_norm": 0.31994906067848206, "learning_rate": 0.00011850167015427807, "loss": 1.3502, "step": 31368 }, { "epoch": 0.4076258480970303, "grad_norm": 0.4743326008319855, "learning_rate": 0.00011849907069236668, "loss": 1.3237, "step": 31369 }, { "epoch": 0.40763884264094613, "grad_norm": 0.36632925271987915, "learning_rate": 0.0001184964712304553, "loss": 1.4197, "step": 31370 }, { "epoch": 0.40765183718486203, "grad_norm": 0.32511237263679504, "learning_rate": 0.00011849387176854391, "loss": 1.4717, "step": 31371 }, { "epoch": 0.4076648317287779, "grad_norm": 0.3998405337333679, "learning_rate": 0.00011849127230663253, "loss": 1.3779, "step": 31372 }, { "epoch": 0.4076778262726938, "grad_norm": 0.34404999017715454, "learning_rate": 0.00011848867284472114, "loss": 1.3555, "step": 31373 }, { "epoch": 0.4076908208166096, "grad_norm": 0.40047603845596313, "learning_rate": 0.00011848607338280977, "loss": 1.1931, "step": 31374 }, { "epoch": 0.4077038153605255, "grad_norm": 0.395526260137558, "learning_rate": 0.00011848347392089839, "loss": 1.3446, "step": 31375 }, { "epoch": 0.40771680990444137, "grad_norm": 0.42978525161743164, "learning_rate": 0.000118480874458987, "loss": 1.3874, "step": 31376 }, { "epoch": 0.40772980444835727, "grad_norm": 0.382686585187912, "learning_rate": 0.0001184782749970756, "loss": 1.2812, "step": 31377 }, { "epoch": 0.4077427989922731, "grad_norm": 0.39854878187179565, "learning_rate": 0.00011847567553516423, "loss": 1.5112, "step": 31378 }, { "epoch": 0.407755793536189, "grad_norm": 0.4491034746170044, "learning_rate": 0.00011847307607325284, "loss": 1.3977, "step": 31379 }, { "epoch": 0.40776878808010486, "grad_norm": 0.4140099883079529, "learning_rate": 0.00011847047661134145, "loss": 1.2671, "step": 31380 }, { "epoch": 0.40778178262402076, "grad_norm": 0.32000020146369934, "learning_rate": 0.00011846787714943008, "loss": 1.1727, "step": 31381 }, { "epoch": 0.4077947771679366, "grad_norm": 0.4219103455543518, "learning_rate": 0.00011846527768751869, "loss": 1.5198, "step": 31382 }, { "epoch": 0.4078077717118525, "grad_norm": 0.47607484459877014, "learning_rate": 0.0001184626782256073, "loss": 1.5757, "step": 31383 }, { "epoch": 0.40782076625576835, "grad_norm": 0.2943762242794037, "learning_rate": 0.00011846007876369591, "loss": 1.3111, "step": 31384 }, { "epoch": 0.40783376079968425, "grad_norm": 0.45588958263397217, "learning_rate": 0.00011845747930178455, "loss": 1.3689, "step": 31385 }, { "epoch": 0.4078467553436001, "grad_norm": 0.3965607285499573, "learning_rate": 0.00011845487983987316, "loss": 1.4404, "step": 31386 }, { "epoch": 0.407859749887516, "grad_norm": 0.3627980947494507, "learning_rate": 0.00011845228037796177, "loss": 1.4611, "step": 31387 }, { "epoch": 0.40787274443143184, "grad_norm": 0.409006804227829, "learning_rate": 0.00011844968091605038, "loss": 1.444, "step": 31388 }, { "epoch": 0.40788573897534774, "grad_norm": 0.485850989818573, "learning_rate": 0.000118447081454139, "loss": 1.5869, "step": 31389 }, { "epoch": 0.4078987335192636, "grad_norm": 0.35226863622665405, "learning_rate": 0.00011844448199222762, "loss": 1.4892, "step": 31390 }, { "epoch": 0.4079117280631795, "grad_norm": 0.38993802666664124, "learning_rate": 0.00011844188253031623, "loss": 1.4883, "step": 31391 }, { "epoch": 0.40792472260709534, "grad_norm": 0.39846158027648926, "learning_rate": 0.00011843928306840484, "loss": 1.4949, "step": 31392 }, { "epoch": 0.40793771715101124, "grad_norm": 0.4725247323513031, "learning_rate": 0.00011843668360649348, "loss": 1.4223, "step": 31393 }, { "epoch": 0.4079507116949271, "grad_norm": 0.2601841390132904, "learning_rate": 0.00011843408414458207, "loss": 1.0304, "step": 31394 }, { "epoch": 0.407963706238843, "grad_norm": 0.3902965188026428, "learning_rate": 0.00011843148468267069, "loss": 1.3575, "step": 31395 }, { "epoch": 0.4079767007827588, "grad_norm": 0.45944881439208984, "learning_rate": 0.0001184288852207593, "loss": 1.5357, "step": 31396 }, { "epoch": 0.40798969532667473, "grad_norm": 0.36571744084358215, "learning_rate": 0.00011842628575884793, "loss": 1.5448, "step": 31397 }, { "epoch": 0.4080026898705906, "grad_norm": 0.4541155695915222, "learning_rate": 0.00011842368629693655, "loss": 1.4336, "step": 31398 }, { "epoch": 0.4080156844145065, "grad_norm": 0.2900446653366089, "learning_rate": 0.00011842108683502516, "loss": 1.3786, "step": 31399 }, { "epoch": 0.4080286789584223, "grad_norm": 0.4956468641757965, "learning_rate": 0.00011841848737311377, "loss": 1.5467, "step": 31400 }, { "epoch": 0.4080416735023382, "grad_norm": 0.37299689650535583, "learning_rate": 0.00011841588791120239, "loss": 1.2458, "step": 31401 }, { "epoch": 0.40805466804625407, "grad_norm": 0.3782311677932739, "learning_rate": 0.000118413288449291, "loss": 1.3841, "step": 31402 }, { "epoch": 0.40806766259016997, "grad_norm": 0.45050573348999023, "learning_rate": 0.00011841068898737961, "loss": 1.4763, "step": 31403 }, { "epoch": 0.4080806571340858, "grad_norm": 0.4397805631160736, "learning_rate": 0.00011840808952546822, "loss": 1.3661, "step": 31404 }, { "epoch": 0.4080936516780017, "grad_norm": 0.4004155099391937, "learning_rate": 0.00011840549006355686, "loss": 1.5833, "step": 31405 }, { "epoch": 0.40810664622191756, "grad_norm": 0.4636945426464081, "learning_rate": 0.00011840289060164546, "loss": 1.4436, "step": 31406 }, { "epoch": 0.40811964076583346, "grad_norm": 0.4077390432357788, "learning_rate": 0.00011840029113973407, "loss": 1.3702, "step": 31407 }, { "epoch": 0.4081326353097493, "grad_norm": 0.32488545775413513, "learning_rate": 0.00011839769167782268, "loss": 1.34, "step": 31408 }, { "epoch": 0.4081456298536652, "grad_norm": 0.4090341627597809, "learning_rate": 0.00011839509221591132, "loss": 1.2725, "step": 31409 }, { "epoch": 0.40815862439758105, "grad_norm": 0.40529438853263855, "learning_rate": 0.00011839249275399993, "loss": 1.4517, "step": 31410 }, { "epoch": 0.40817161894149695, "grad_norm": 0.4599699079990387, "learning_rate": 0.00011838989329208854, "loss": 1.3348, "step": 31411 }, { "epoch": 0.4081846134854128, "grad_norm": 0.45246192812919617, "learning_rate": 0.00011838729383017715, "loss": 1.5333, "step": 31412 }, { "epoch": 0.4081976080293287, "grad_norm": 0.372466504573822, "learning_rate": 0.00011838469436826578, "loss": 1.3214, "step": 31413 }, { "epoch": 0.40821060257324454, "grad_norm": 0.4106236398220062, "learning_rate": 0.00011838209490635439, "loss": 1.5255, "step": 31414 }, { "epoch": 0.40822359711716044, "grad_norm": 0.4159178137779236, "learning_rate": 0.000118379495444443, "loss": 1.4433, "step": 31415 }, { "epoch": 0.4082365916610763, "grad_norm": 0.4291822016239166, "learning_rate": 0.00011837689598253164, "loss": 1.3729, "step": 31416 }, { "epoch": 0.4082495862049922, "grad_norm": 0.38729721307754517, "learning_rate": 0.00011837429652062025, "loss": 1.2555, "step": 31417 }, { "epoch": 0.40826258074890803, "grad_norm": 0.3638719618320465, "learning_rate": 0.00011837169705870886, "loss": 1.4664, "step": 31418 }, { "epoch": 0.40827557529282393, "grad_norm": 0.2866068184375763, "learning_rate": 0.00011836909759679746, "loss": 1.5283, "step": 31419 }, { "epoch": 0.4082885698367398, "grad_norm": 0.3282183110713959, "learning_rate": 0.0001183664981348861, "loss": 1.1876, "step": 31420 }, { "epoch": 0.4083015643806557, "grad_norm": 0.42717763781547546, "learning_rate": 0.0001183638986729747, "loss": 1.2725, "step": 31421 }, { "epoch": 0.4083145589245715, "grad_norm": 0.45406442880630493, "learning_rate": 0.00011836129921106332, "loss": 1.2582, "step": 31422 }, { "epoch": 0.4083275534684874, "grad_norm": 0.37367990612983704, "learning_rate": 0.00011835869974915193, "loss": 1.4234, "step": 31423 }, { "epoch": 0.40834054801240327, "grad_norm": 0.4946652352809906, "learning_rate": 0.00011835610028724055, "loss": 1.5869, "step": 31424 }, { "epoch": 0.40835354255631917, "grad_norm": 0.37659645080566406, "learning_rate": 0.00011835350082532916, "loss": 1.339, "step": 31425 }, { "epoch": 0.408366537100235, "grad_norm": 0.454621821641922, "learning_rate": 0.00011835090136341777, "loss": 1.2961, "step": 31426 }, { "epoch": 0.4083795316441509, "grad_norm": 0.5059607028961182, "learning_rate": 0.00011834830190150638, "loss": 1.4858, "step": 31427 }, { "epoch": 0.40839252618806676, "grad_norm": 0.4538361728191376, "learning_rate": 0.00011834570243959502, "loss": 1.322, "step": 31428 }, { "epoch": 0.40840552073198266, "grad_norm": 0.36083900928497314, "learning_rate": 0.00011834310297768363, "loss": 1.3338, "step": 31429 }, { "epoch": 0.4084185152758985, "grad_norm": 0.4355923533439636, "learning_rate": 0.00011834050351577224, "loss": 1.3334, "step": 31430 }, { "epoch": 0.4084315098198144, "grad_norm": 0.3410107493400574, "learning_rate": 0.00011833790405386085, "loss": 1.2189, "step": 31431 }, { "epoch": 0.40844450436373025, "grad_norm": 0.4521462321281433, "learning_rate": 0.00011833530459194948, "loss": 1.6769, "step": 31432 }, { "epoch": 0.40845749890764615, "grad_norm": 0.35141193866729736, "learning_rate": 0.00011833270513003809, "loss": 1.1651, "step": 31433 }, { "epoch": 0.408470493451562, "grad_norm": 0.40481558442115784, "learning_rate": 0.0001183301056681267, "loss": 1.4379, "step": 31434 }, { "epoch": 0.4084834879954779, "grad_norm": 0.4880070090293884, "learning_rate": 0.00011832750620621531, "loss": 1.5579, "step": 31435 }, { "epoch": 0.40849648253939375, "grad_norm": 0.41805538535118103, "learning_rate": 0.00011832490674430394, "loss": 1.4931, "step": 31436 }, { "epoch": 0.40850947708330965, "grad_norm": 0.48485302925109863, "learning_rate": 0.00011832230728239255, "loss": 1.5111, "step": 31437 }, { "epoch": 0.4085224716272255, "grad_norm": 0.39246925711631775, "learning_rate": 0.00011831970782048116, "loss": 1.4664, "step": 31438 }, { "epoch": 0.4085354661711414, "grad_norm": 0.39697298407554626, "learning_rate": 0.00011831710835856977, "loss": 1.3511, "step": 31439 }, { "epoch": 0.40854846071505724, "grad_norm": 0.3886861801147461, "learning_rate": 0.00011831450889665841, "loss": 1.538, "step": 31440 }, { "epoch": 0.40856145525897314, "grad_norm": 0.3768399953842163, "learning_rate": 0.00011831190943474702, "loss": 1.3991, "step": 31441 }, { "epoch": 0.40857444980288904, "grad_norm": 0.349935919046402, "learning_rate": 0.00011830930997283563, "loss": 1.4697, "step": 31442 }, { "epoch": 0.4085874443468049, "grad_norm": 0.4222748875617981, "learning_rate": 0.00011830671051092424, "loss": 1.494, "step": 31443 }, { "epoch": 0.4086004388907208, "grad_norm": 0.39832213521003723, "learning_rate": 0.00011830411104901286, "loss": 1.2905, "step": 31444 }, { "epoch": 0.40861343343463663, "grad_norm": 0.3936651051044464, "learning_rate": 0.00011830151158710148, "loss": 1.3466, "step": 31445 }, { "epoch": 0.40862642797855253, "grad_norm": 0.3840542733669281, "learning_rate": 0.00011829891212519009, "loss": 1.3802, "step": 31446 }, { "epoch": 0.4086394225224684, "grad_norm": 0.42872774600982666, "learning_rate": 0.0001182963126632787, "loss": 1.605, "step": 31447 }, { "epoch": 0.4086524170663843, "grad_norm": 0.39350372552871704, "learning_rate": 0.00011829371320136732, "loss": 1.4396, "step": 31448 }, { "epoch": 0.4086654116103001, "grad_norm": 0.4355899691581726, "learning_rate": 0.00011829111373945593, "loss": 1.334, "step": 31449 }, { "epoch": 0.408678406154216, "grad_norm": 0.44449347257614136, "learning_rate": 0.00011828851427754454, "loss": 1.4616, "step": 31450 }, { "epoch": 0.40869140069813187, "grad_norm": 0.3463626801967621, "learning_rate": 0.00011828591481563315, "loss": 1.4628, "step": 31451 }, { "epoch": 0.40870439524204777, "grad_norm": 0.42759230732917786, "learning_rate": 0.00011828331535372179, "loss": 1.4798, "step": 31452 }, { "epoch": 0.4087173897859636, "grad_norm": 0.613426148891449, "learning_rate": 0.0001182807158918104, "loss": 1.3774, "step": 31453 }, { "epoch": 0.4087303843298795, "grad_norm": 0.3927464783191681, "learning_rate": 0.00011827811642989901, "loss": 1.4392, "step": 31454 }, { "epoch": 0.40874337887379536, "grad_norm": 0.33047032356262207, "learning_rate": 0.00011827551696798764, "loss": 1.5495, "step": 31455 }, { "epoch": 0.40875637341771126, "grad_norm": 0.38525456190109253, "learning_rate": 0.00011827291750607625, "loss": 1.3869, "step": 31456 }, { "epoch": 0.4087693679616271, "grad_norm": 0.4371989071369171, "learning_rate": 0.00011827031804416486, "loss": 1.4229, "step": 31457 }, { "epoch": 0.408782362505543, "grad_norm": 0.4202074110507965, "learning_rate": 0.00011826771858225347, "loss": 1.448, "step": 31458 }, { "epoch": 0.40879535704945885, "grad_norm": 0.3906514346599579, "learning_rate": 0.00011826511912034211, "loss": 1.4543, "step": 31459 }, { "epoch": 0.40880835159337475, "grad_norm": 0.3407418131828308, "learning_rate": 0.00011826251965843072, "loss": 1.4444, "step": 31460 }, { "epoch": 0.4088213461372906, "grad_norm": 0.41271138191223145, "learning_rate": 0.00011825992019651932, "loss": 1.4037, "step": 31461 }, { "epoch": 0.4088343406812065, "grad_norm": 0.3754023611545563, "learning_rate": 0.00011825732073460793, "loss": 1.4579, "step": 31462 }, { "epoch": 0.40884733522512234, "grad_norm": 0.37632033228874207, "learning_rate": 0.00011825472127269657, "loss": 1.4101, "step": 31463 }, { "epoch": 0.40886032976903824, "grad_norm": 0.42989492416381836, "learning_rate": 0.00011825212181078518, "loss": 1.3728, "step": 31464 }, { "epoch": 0.4088733243129541, "grad_norm": 0.39576902985572815, "learning_rate": 0.00011824952234887379, "loss": 1.5051, "step": 31465 }, { "epoch": 0.40888631885687, "grad_norm": 0.3420815169811249, "learning_rate": 0.0001182469228869624, "loss": 1.293, "step": 31466 }, { "epoch": 0.40889931340078584, "grad_norm": 0.43998003005981445, "learning_rate": 0.00011824432342505102, "loss": 1.4895, "step": 31467 }, { "epoch": 0.40891230794470174, "grad_norm": 0.3893432915210724, "learning_rate": 0.00011824172396313964, "loss": 1.383, "step": 31468 }, { "epoch": 0.4089253024886176, "grad_norm": 0.4175719618797302, "learning_rate": 0.00011823912450122825, "loss": 1.4606, "step": 31469 }, { "epoch": 0.4089382970325335, "grad_norm": 0.402387797832489, "learning_rate": 0.00011823652503931686, "loss": 1.4206, "step": 31470 }, { "epoch": 0.4089512915764493, "grad_norm": 0.4163857102394104, "learning_rate": 0.0001182339255774055, "loss": 1.4177, "step": 31471 }, { "epoch": 0.40896428612036523, "grad_norm": 0.3899572491645813, "learning_rate": 0.0001182313261154941, "loss": 1.5447, "step": 31472 }, { "epoch": 0.4089772806642811, "grad_norm": 0.5240683555603027, "learning_rate": 0.00011822872665358272, "loss": 1.4688, "step": 31473 }, { "epoch": 0.408990275208197, "grad_norm": 0.4555453062057495, "learning_rate": 0.00011822612719167131, "loss": 1.4944, "step": 31474 }, { "epoch": 0.4090032697521128, "grad_norm": 0.39444679021835327, "learning_rate": 0.00011822352772975995, "loss": 1.5195, "step": 31475 }, { "epoch": 0.4090162642960287, "grad_norm": 0.4139522612094879, "learning_rate": 0.00011822092826784856, "loss": 1.4102, "step": 31476 }, { "epoch": 0.40902925883994457, "grad_norm": 0.47355741262435913, "learning_rate": 0.00011821832880593717, "loss": 1.2987, "step": 31477 }, { "epoch": 0.40904225338386047, "grad_norm": 0.33981433510780334, "learning_rate": 0.00011821572934402579, "loss": 1.2082, "step": 31478 }, { "epoch": 0.4090552479277763, "grad_norm": 0.3353487253189087, "learning_rate": 0.00011821312988211441, "loss": 1.1433, "step": 31479 }, { "epoch": 0.4090682424716922, "grad_norm": 0.47620120644569397, "learning_rate": 0.00011821053042020302, "loss": 1.5979, "step": 31480 }, { "epoch": 0.40908123701560806, "grad_norm": 0.41410520672798157, "learning_rate": 0.00011820793095829163, "loss": 1.4131, "step": 31481 }, { "epoch": 0.40909423155952396, "grad_norm": 0.366629034280777, "learning_rate": 0.00011820533149638024, "loss": 1.5615, "step": 31482 }, { "epoch": 0.4091072261034398, "grad_norm": 0.34971490502357483, "learning_rate": 0.00011820273203446888, "loss": 1.7237, "step": 31483 }, { "epoch": 0.4091202206473557, "grad_norm": 0.463933527469635, "learning_rate": 0.00011820013257255749, "loss": 1.3382, "step": 31484 }, { "epoch": 0.40913321519127155, "grad_norm": 0.48832187056541443, "learning_rate": 0.0001181975331106461, "loss": 1.5046, "step": 31485 }, { "epoch": 0.40914620973518745, "grad_norm": 0.41951534152030945, "learning_rate": 0.00011819493364873471, "loss": 1.4755, "step": 31486 }, { "epoch": 0.4091592042791033, "grad_norm": 0.42187410593032837, "learning_rate": 0.00011819233418682334, "loss": 1.3842, "step": 31487 }, { "epoch": 0.4091721988230192, "grad_norm": 0.42559242248535156, "learning_rate": 0.00011818973472491195, "loss": 1.2474, "step": 31488 }, { "epoch": 0.40918519336693504, "grad_norm": 0.31046196818351746, "learning_rate": 0.00011818713526300056, "loss": 1.4083, "step": 31489 }, { "epoch": 0.40919818791085094, "grad_norm": 0.5612074732780457, "learning_rate": 0.00011818453580108918, "loss": 1.4781, "step": 31490 }, { "epoch": 0.4092111824547668, "grad_norm": 0.4055081605911255, "learning_rate": 0.0001181819363391778, "loss": 1.3433, "step": 31491 }, { "epoch": 0.4092241769986827, "grad_norm": 0.4123353958129883, "learning_rate": 0.0001181793368772664, "loss": 1.3228, "step": 31492 }, { "epoch": 0.40923717154259853, "grad_norm": 0.3822682797908783, "learning_rate": 0.00011817673741535502, "loss": 1.4473, "step": 31493 }, { "epoch": 0.40925016608651443, "grad_norm": 0.40904977917671204, "learning_rate": 0.00011817413795344366, "loss": 1.475, "step": 31494 }, { "epoch": 0.4092631606304303, "grad_norm": 0.3862096965312958, "learning_rate": 0.00011817153849153227, "loss": 1.5589, "step": 31495 }, { "epoch": 0.4092761551743462, "grad_norm": 0.32827651500701904, "learning_rate": 0.00011816893902962088, "loss": 1.2963, "step": 31496 }, { "epoch": 0.409289149718262, "grad_norm": 0.41040390729904175, "learning_rate": 0.00011816633956770949, "loss": 1.28, "step": 31497 }, { "epoch": 0.4093021442621779, "grad_norm": 0.4335785210132599, "learning_rate": 0.00011816374010579811, "loss": 1.4285, "step": 31498 }, { "epoch": 0.40931513880609377, "grad_norm": 0.33759036660194397, "learning_rate": 0.00011816114064388672, "loss": 1.4329, "step": 31499 }, { "epoch": 0.40932813335000967, "grad_norm": 0.41463494300842285, "learning_rate": 0.00011815854118197533, "loss": 1.4981, "step": 31500 }, { "epoch": 0.4093411278939255, "grad_norm": 0.3643905222415924, "learning_rate": 0.00011815594172006395, "loss": 1.3193, "step": 31501 }, { "epoch": 0.4093541224378414, "grad_norm": 0.32527557015419006, "learning_rate": 0.00011815334225815258, "loss": 1.1753, "step": 31502 }, { "epoch": 0.40936711698175726, "grad_norm": 0.4599539041519165, "learning_rate": 0.00011815074279624118, "loss": 1.4875, "step": 31503 }, { "epoch": 0.40938011152567316, "grad_norm": 0.49181321263313293, "learning_rate": 0.00011814814333432979, "loss": 1.3654, "step": 31504 }, { "epoch": 0.409393106069589, "grad_norm": 0.3689109981060028, "learning_rate": 0.0001181455438724184, "loss": 1.3361, "step": 31505 }, { "epoch": 0.4094061006135049, "grad_norm": 0.3569023311138153, "learning_rate": 0.00011814294441050704, "loss": 1.4822, "step": 31506 }, { "epoch": 0.40941909515742075, "grad_norm": 0.3547045886516571, "learning_rate": 0.00011814034494859565, "loss": 1.5036, "step": 31507 }, { "epoch": 0.40943208970133665, "grad_norm": 0.3776378035545349, "learning_rate": 0.00011813774548668426, "loss": 1.4238, "step": 31508 }, { "epoch": 0.4094450842452525, "grad_norm": 0.47640812397003174, "learning_rate": 0.00011813514602477287, "loss": 1.3573, "step": 31509 }, { "epoch": 0.4094580787891684, "grad_norm": 0.32027679681777954, "learning_rate": 0.0001181325465628615, "loss": 1.3882, "step": 31510 }, { "epoch": 0.40947107333308425, "grad_norm": 0.32853272557258606, "learning_rate": 0.00011812994710095011, "loss": 1.5877, "step": 31511 }, { "epoch": 0.40948406787700015, "grad_norm": 0.43371883034706116, "learning_rate": 0.00011812734763903872, "loss": 1.4665, "step": 31512 }, { "epoch": 0.409497062420916, "grad_norm": 0.41488316655158997, "learning_rate": 0.00011812474817712733, "loss": 1.4706, "step": 31513 }, { "epoch": 0.4095100569648319, "grad_norm": 0.33448073267936707, "learning_rate": 0.00011812214871521597, "loss": 1.416, "step": 31514 }, { "epoch": 0.40952305150874774, "grad_norm": 0.3807866871356964, "learning_rate": 0.00011811954925330458, "loss": 1.5369, "step": 31515 }, { "epoch": 0.40953604605266364, "grad_norm": 0.32609882950782776, "learning_rate": 0.00011811694979139318, "loss": 1.4505, "step": 31516 }, { "epoch": 0.40954904059657954, "grad_norm": 0.36160367727279663, "learning_rate": 0.00011811435032948179, "loss": 1.3877, "step": 31517 }, { "epoch": 0.4095620351404954, "grad_norm": 0.4232065975666046, "learning_rate": 0.00011811175086757043, "loss": 1.3083, "step": 31518 }, { "epoch": 0.4095750296844113, "grad_norm": 0.41500887274742126, "learning_rate": 0.00011810915140565904, "loss": 1.5131, "step": 31519 }, { "epoch": 0.40958802422832713, "grad_norm": 0.3550424873828888, "learning_rate": 0.00011810655194374765, "loss": 1.2894, "step": 31520 }, { "epoch": 0.40960101877224303, "grad_norm": 0.44544121623039246, "learning_rate": 0.00011810395248183626, "loss": 1.5123, "step": 31521 }, { "epoch": 0.4096140133161589, "grad_norm": 0.34656384587287903, "learning_rate": 0.00011810135301992488, "loss": 1.3418, "step": 31522 }, { "epoch": 0.4096270078600748, "grad_norm": 0.4084930419921875, "learning_rate": 0.0001180987535580135, "loss": 1.2707, "step": 31523 }, { "epoch": 0.4096400024039906, "grad_norm": 0.39699652791023254, "learning_rate": 0.0001180961540961021, "loss": 1.3302, "step": 31524 }, { "epoch": 0.4096529969479065, "grad_norm": 0.4151805639266968, "learning_rate": 0.00011809355463419072, "loss": 1.3127, "step": 31525 }, { "epoch": 0.40966599149182237, "grad_norm": 0.43207848072052, "learning_rate": 0.00011809095517227935, "loss": 1.4389, "step": 31526 }, { "epoch": 0.40967898603573827, "grad_norm": 0.2966243624687195, "learning_rate": 0.00011808835571036797, "loss": 1.2099, "step": 31527 }, { "epoch": 0.4096919805796541, "grad_norm": 0.4899061620235443, "learning_rate": 0.00011808575624845658, "loss": 1.4809, "step": 31528 }, { "epoch": 0.40970497512357, "grad_norm": 0.36162999272346497, "learning_rate": 0.0001180831567865452, "loss": 1.4903, "step": 31529 }, { "epoch": 0.40971796966748586, "grad_norm": 0.3901997208595276, "learning_rate": 0.00011808055732463381, "loss": 1.3494, "step": 31530 }, { "epoch": 0.40973096421140176, "grad_norm": 0.4472411274909973, "learning_rate": 0.00011807795786272242, "loss": 1.4126, "step": 31531 }, { "epoch": 0.4097439587553176, "grad_norm": 0.40811723470687866, "learning_rate": 0.00011807535840081103, "loss": 1.3222, "step": 31532 }, { "epoch": 0.4097569532992335, "grad_norm": 0.38060200214385986, "learning_rate": 0.00011807275893889966, "loss": 1.5181, "step": 31533 }, { "epoch": 0.40976994784314935, "grad_norm": 0.33410435914993286, "learning_rate": 0.00011807015947698827, "loss": 1.344, "step": 31534 }, { "epoch": 0.40978294238706525, "grad_norm": 0.4646623730659485, "learning_rate": 0.00011806756001507688, "loss": 1.387, "step": 31535 }, { "epoch": 0.4097959369309811, "grad_norm": 0.36100417375564575, "learning_rate": 0.00011806496055316549, "loss": 1.2898, "step": 31536 }, { "epoch": 0.409808931474897, "grad_norm": 0.398863285779953, "learning_rate": 0.00011806236109125413, "loss": 1.4409, "step": 31537 }, { "epoch": 0.40982192601881284, "grad_norm": 0.388494610786438, "learning_rate": 0.00011805976162934274, "loss": 1.4975, "step": 31538 }, { "epoch": 0.40983492056272874, "grad_norm": 0.4466759264469147, "learning_rate": 0.00011805716216743135, "loss": 1.3661, "step": 31539 }, { "epoch": 0.4098479151066446, "grad_norm": 0.40365058183670044, "learning_rate": 0.00011805456270551996, "loss": 1.5221, "step": 31540 }, { "epoch": 0.4098609096505605, "grad_norm": 0.4773750901222229, "learning_rate": 0.00011805196324360859, "loss": 1.4561, "step": 31541 }, { "epoch": 0.40987390419447634, "grad_norm": 0.48428115248680115, "learning_rate": 0.0001180493637816972, "loss": 1.4226, "step": 31542 }, { "epoch": 0.40988689873839224, "grad_norm": 0.2745041251182556, "learning_rate": 0.00011804676431978581, "loss": 1.3222, "step": 31543 }, { "epoch": 0.4098998932823081, "grad_norm": 0.3983737528324127, "learning_rate": 0.00011804416485787442, "loss": 1.2943, "step": 31544 }, { "epoch": 0.409912887826224, "grad_norm": 0.4040609300136566, "learning_rate": 0.00011804156539596304, "loss": 1.4124, "step": 31545 }, { "epoch": 0.4099258823701398, "grad_norm": 0.3810339570045471, "learning_rate": 0.00011803896593405165, "loss": 1.3874, "step": 31546 }, { "epoch": 0.4099388769140557, "grad_norm": 0.39410123229026794, "learning_rate": 0.00011803636647214027, "loss": 1.3064, "step": 31547 }, { "epoch": 0.4099518714579716, "grad_norm": 0.3857850730419159, "learning_rate": 0.00011803376701022888, "loss": 1.368, "step": 31548 }, { "epoch": 0.4099648660018875, "grad_norm": 0.36780086159706116, "learning_rate": 0.00011803116754831751, "loss": 1.3956, "step": 31549 }, { "epoch": 0.4099778605458033, "grad_norm": 0.41981688141822815, "learning_rate": 0.00011802856808640613, "loss": 1.3771, "step": 31550 }, { "epoch": 0.4099908550897192, "grad_norm": 0.4461262822151184, "learning_rate": 0.00011802596862449474, "loss": 1.4245, "step": 31551 }, { "epoch": 0.41000384963363506, "grad_norm": 0.4512978196144104, "learning_rate": 0.00011802336916258335, "loss": 1.3088, "step": 31552 }, { "epoch": 0.41001684417755097, "grad_norm": 0.38236308097839355, "learning_rate": 0.00011802076970067197, "loss": 1.2624, "step": 31553 }, { "epoch": 0.4100298387214668, "grad_norm": 0.43214765191078186, "learning_rate": 0.00011801817023876058, "loss": 1.4063, "step": 31554 }, { "epoch": 0.4100428332653827, "grad_norm": 0.3449733257293701, "learning_rate": 0.0001180155707768492, "loss": 1.2786, "step": 31555 }, { "epoch": 0.41005582780929856, "grad_norm": 0.344591349363327, "learning_rate": 0.0001180129713149378, "loss": 1.4327, "step": 31556 }, { "epoch": 0.41006882235321446, "grad_norm": 0.47518298029899597, "learning_rate": 0.00011801037185302644, "loss": 1.5384, "step": 31557 }, { "epoch": 0.4100818168971303, "grad_norm": 0.4852653741836548, "learning_rate": 0.00011800777239111504, "loss": 1.494, "step": 31558 }, { "epoch": 0.4100948114410462, "grad_norm": 0.37454286217689514, "learning_rate": 0.00011800517292920365, "loss": 1.322, "step": 31559 }, { "epoch": 0.41010780598496205, "grad_norm": 0.4366607367992401, "learning_rate": 0.00011800257346729226, "loss": 1.5601, "step": 31560 }, { "epoch": 0.41012080052887795, "grad_norm": 0.453578382730484, "learning_rate": 0.0001179999740053809, "loss": 1.3266, "step": 31561 }, { "epoch": 0.4101337950727938, "grad_norm": 0.4191003739833832, "learning_rate": 0.00011799737454346951, "loss": 1.3402, "step": 31562 }, { "epoch": 0.4101467896167097, "grad_norm": 0.46088704466819763, "learning_rate": 0.00011799477508155812, "loss": 1.4136, "step": 31563 }, { "epoch": 0.41015978416062554, "grad_norm": 0.43562641739845276, "learning_rate": 0.00011799217561964675, "loss": 1.3783, "step": 31564 }, { "epoch": 0.41017277870454144, "grad_norm": 0.37579742074012756, "learning_rate": 0.00011798957615773536, "loss": 1.2726, "step": 31565 }, { "epoch": 0.4101857732484573, "grad_norm": 0.3702225983142853, "learning_rate": 0.00011798697669582397, "loss": 1.2204, "step": 31566 }, { "epoch": 0.4101987677923732, "grad_norm": 0.5580505132675171, "learning_rate": 0.00011798437723391258, "loss": 1.6742, "step": 31567 }, { "epoch": 0.41021176233628903, "grad_norm": 0.4354010820388794, "learning_rate": 0.00011798177777200122, "loss": 1.3901, "step": 31568 }, { "epoch": 0.41022475688020493, "grad_norm": 0.39802223443984985, "learning_rate": 0.00011797917831008983, "loss": 1.3698, "step": 31569 }, { "epoch": 0.4102377514241208, "grad_norm": 0.4317769706249237, "learning_rate": 0.00011797657884817844, "loss": 1.5276, "step": 31570 }, { "epoch": 0.4102507459680367, "grad_norm": 0.39785075187683105, "learning_rate": 0.00011797397938626704, "loss": 1.4359, "step": 31571 }, { "epoch": 0.4102637405119525, "grad_norm": 0.42543643712997437, "learning_rate": 0.00011797137992435567, "loss": 1.6143, "step": 31572 }, { "epoch": 0.4102767350558684, "grad_norm": 0.33673784136772156, "learning_rate": 0.00011796878046244428, "loss": 1.5353, "step": 31573 }, { "epoch": 0.41028972959978427, "grad_norm": 0.40286701917648315, "learning_rate": 0.0001179661810005329, "loss": 1.4414, "step": 31574 }, { "epoch": 0.41030272414370017, "grad_norm": 0.514765739440918, "learning_rate": 0.0001179635815386215, "loss": 1.4206, "step": 31575 }, { "epoch": 0.410315718687616, "grad_norm": 0.4378872513771057, "learning_rate": 0.00011796098207671013, "loss": 1.366, "step": 31576 }, { "epoch": 0.4103287132315319, "grad_norm": 0.4727979898452759, "learning_rate": 0.00011795838261479874, "loss": 1.473, "step": 31577 }, { "epoch": 0.41034170777544776, "grad_norm": 0.39839720726013184, "learning_rate": 0.00011795578315288735, "loss": 1.499, "step": 31578 }, { "epoch": 0.41035470231936366, "grad_norm": 0.47844377160072327, "learning_rate": 0.00011795318369097596, "loss": 1.3596, "step": 31579 }, { "epoch": 0.4103676968632795, "grad_norm": 0.35658955574035645, "learning_rate": 0.0001179505842290646, "loss": 1.3111, "step": 31580 }, { "epoch": 0.4103806914071954, "grad_norm": 0.33304885029792786, "learning_rate": 0.00011794798476715321, "loss": 1.5764, "step": 31581 }, { "epoch": 0.41039368595111125, "grad_norm": 0.4718421399593353, "learning_rate": 0.00011794538530524182, "loss": 1.3568, "step": 31582 }, { "epoch": 0.41040668049502715, "grad_norm": 0.4658700227737427, "learning_rate": 0.00011794278584333042, "loss": 1.5012, "step": 31583 }, { "epoch": 0.410419675038943, "grad_norm": 0.4477006494998932, "learning_rate": 0.00011794018638141906, "loss": 1.5038, "step": 31584 }, { "epoch": 0.4104326695828589, "grad_norm": 0.41209080815315247, "learning_rate": 0.00011793758691950767, "loss": 1.6472, "step": 31585 }, { "epoch": 0.41044566412677475, "grad_norm": 0.3747536242008209, "learning_rate": 0.00011793498745759628, "loss": 1.2997, "step": 31586 }, { "epoch": 0.41045865867069065, "grad_norm": 0.4215029776096344, "learning_rate": 0.00011793238799568489, "loss": 1.4704, "step": 31587 }, { "epoch": 0.4104716532146065, "grad_norm": 0.47362276911735535, "learning_rate": 0.00011792978853377352, "loss": 1.5515, "step": 31588 }, { "epoch": 0.4104846477585224, "grad_norm": 0.3811095654964447, "learning_rate": 0.00011792718907186213, "loss": 1.391, "step": 31589 }, { "epoch": 0.41049764230243824, "grad_norm": 0.3897624909877777, "learning_rate": 0.00011792458960995074, "loss": 1.3357, "step": 31590 }, { "epoch": 0.41051063684635414, "grad_norm": 0.43030741810798645, "learning_rate": 0.00011792199014803935, "loss": 1.5572, "step": 31591 }, { "epoch": 0.41052363139027, "grad_norm": 0.35224151611328125, "learning_rate": 0.00011791939068612799, "loss": 1.2773, "step": 31592 }, { "epoch": 0.4105366259341859, "grad_norm": 0.382892370223999, "learning_rate": 0.0001179167912242166, "loss": 1.3806, "step": 31593 }, { "epoch": 0.4105496204781018, "grad_norm": 0.31598880887031555, "learning_rate": 0.00011791419176230521, "loss": 1.2217, "step": 31594 }, { "epoch": 0.41056261502201763, "grad_norm": 0.36728498339653015, "learning_rate": 0.00011791159230039382, "loss": 1.2947, "step": 31595 }, { "epoch": 0.41057560956593353, "grad_norm": 0.4650548994541168, "learning_rate": 0.00011790899283848244, "loss": 1.3923, "step": 31596 }, { "epoch": 0.4105886041098494, "grad_norm": 0.3864407241344452, "learning_rate": 0.00011790639337657106, "loss": 1.2028, "step": 31597 }, { "epoch": 0.4106015986537653, "grad_norm": 0.39786413311958313, "learning_rate": 0.00011790379391465967, "loss": 1.3598, "step": 31598 }, { "epoch": 0.4106145931976811, "grad_norm": 0.42337566614151, "learning_rate": 0.00011790119445274828, "loss": 1.2514, "step": 31599 }, { "epoch": 0.410627587741597, "grad_norm": 0.43584102392196655, "learning_rate": 0.0001178985949908369, "loss": 1.4114, "step": 31600 }, { "epoch": 0.41064058228551287, "grad_norm": 0.36493369936943054, "learning_rate": 0.00011789599552892551, "loss": 1.2058, "step": 31601 }, { "epoch": 0.41065357682942877, "grad_norm": 0.39915576577186584, "learning_rate": 0.00011789339606701412, "loss": 1.1882, "step": 31602 }, { "epoch": 0.4106665713733446, "grad_norm": 0.43771296739578247, "learning_rate": 0.00011789079660510276, "loss": 1.2833, "step": 31603 }, { "epoch": 0.4106795659172605, "grad_norm": 0.39958369731903076, "learning_rate": 0.00011788819714319137, "loss": 1.4624, "step": 31604 }, { "epoch": 0.41069256046117636, "grad_norm": 0.45124125480651855, "learning_rate": 0.00011788559768127998, "loss": 1.3059, "step": 31605 }, { "epoch": 0.41070555500509226, "grad_norm": 0.3760499060153961, "learning_rate": 0.0001178829982193686, "loss": 1.3808, "step": 31606 }, { "epoch": 0.4107185495490081, "grad_norm": 0.3665977716445923, "learning_rate": 0.00011788039875745722, "loss": 1.3556, "step": 31607 }, { "epoch": 0.410731544092924, "grad_norm": 0.28853094577789307, "learning_rate": 0.00011787779929554583, "loss": 1.3003, "step": 31608 }, { "epoch": 0.41074453863683985, "grad_norm": 0.3686813712120056, "learning_rate": 0.00011787519983363444, "loss": 1.3116, "step": 31609 }, { "epoch": 0.41075753318075575, "grad_norm": 0.4340326189994812, "learning_rate": 0.00011787260037172305, "loss": 1.4141, "step": 31610 }, { "epoch": 0.4107705277246716, "grad_norm": 0.4526670277118683, "learning_rate": 0.00011787000090981169, "loss": 1.3634, "step": 31611 }, { "epoch": 0.4107835222685875, "grad_norm": 0.33790960907936096, "learning_rate": 0.00011786740144790029, "loss": 1.4982, "step": 31612 }, { "epoch": 0.41079651681250334, "grad_norm": 0.3485633134841919, "learning_rate": 0.0001178648019859889, "loss": 1.2363, "step": 31613 }, { "epoch": 0.41080951135641924, "grad_norm": 0.3062633275985718, "learning_rate": 0.00011786220252407751, "loss": 1.4422, "step": 31614 }, { "epoch": 0.4108225059003351, "grad_norm": 0.38441792130470276, "learning_rate": 0.00011785960306216615, "loss": 1.3213, "step": 31615 }, { "epoch": 0.410835500444251, "grad_norm": 0.46414437890052795, "learning_rate": 0.00011785700360025476, "loss": 1.4924, "step": 31616 }, { "epoch": 0.41084849498816683, "grad_norm": 0.4170149862766266, "learning_rate": 0.00011785440413834337, "loss": 1.2918, "step": 31617 }, { "epoch": 0.41086148953208274, "grad_norm": 0.44067803025245667, "learning_rate": 0.00011785180467643198, "loss": 1.3788, "step": 31618 }, { "epoch": 0.4108744840759986, "grad_norm": 0.33015716075897217, "learning_rate": 0.0001178492052145206, "loss": 1.322, "step": 31619 }, { "epoch": 0.4108874786199145, "grad_norm": 0.39627066254615784, "learning_rate": 0.00011784660575260922, "loss": 1.2984, "step": 31620 }, { "epoch": 0.4109004731638303, "grad_norm": 0.4287610352039337, "learning_rate": 0.00011784400629069783, "loss": 1.413, "step": 31621 }, { "epoch": 0.4109134677077462, "grad_norm": 0.3972589373588562, "learning_rate": 0.00011784140682878644, "loss": 1.4957, "step": 31622 }, { "epoch": 0.4109264622516621, "grad_norm": 0.4931383430957794, "learning_rate": 0.00011783880736687508, "loss": 1.3749, "step": 31623 }, { "epoch": 0.410939456795578, "grad_norm": 0.4313546121120453, "learning_rate": 0.00011783620790496369, "loss": 1.4123, "step": 31624 }, { "epoch": 0.4109524513394938, "grad_norm": 0.41138026118278503, "learning_rate": 0.00011783360844305228, "loss": 1.2964, "step": 31625 }, { "epoch": 0.4109654458834097, "grad_norm": 0.43869635462760925, "learning_rate": 0.0001178310089811409, "loss": 1.45, "step": 31626 }, { "epoch": 0.41097844042732556, "grad_norm": 0.39452725648880005, "learning_rate": 0.00011782840951922953, "loss": 1.4999, "step": 31627 }, { "epoch": 0.41099143497124146, "grad_norm": 0.44536057114601135, "learning_rate": 0.00011782581005731814, "loss": 1.4383, "step": 31628 }, { "epoch": 0.4110044295151573, "grad_norm": 0.4568173289299011, "learning_rate": 0.00011782321059540675, "loss": 1.3343, "step": 31629 }, { "epoch": 0.4110174240590732, "grad_norm": 0.37603095173835754, "learning_rate": 0.00011782061113349537, "loss": 1.4932, "step": 31630 }, { "epoch": 0.41103041860298906, "grad_norm": 0.31568434834480286, "learning_rate": 0.00011781801167158399, "loss": 1.657, "step": 31631 }, { "epoch": 0.41104341314690496, "grad_norm": 0.4077213406562805, "learning_rate": 0.0001178154122096726, "loss": 1.4164, "step": 31632 }, { "epoch": 0.4110564076908208, "grad_norm": 0.44103261828422546, "learning_rate": 0.00011781281274776121, "loss": 1.501, "step": 31633 }, { "epoch": 0.4110694022347367, "grad_norm": 0.42536574602127075, "learning_rate": 0.00011781021328584982, "loss": 1.5286, "step": 31634 }, { "epoch": 0.41108239677865255, "grad_norm": 0.32663974165916443, "learning_rate": 0.00011780761382393846, "loss": 1.4775, "step": 31635 }, { "epoch": 0.41109539132256845, "grad_norm": 0.39440450072288513, "learning_rate": 0.00011780501436202707, "loss": 1.3331, "step": 31636 }, { "epoch": 0.4111083858664843, "grad_norm": 0.4558618664741516, "learning_rate": 0.00011780241490011568, "loss": 1.452, "step": 31637 }, { "epoch": 0.4111213804104002, "grad_norm": 0.39504119753837585, "learning_rate": 0.00011779981543820431, "loss": 1.2357, "step": 31638 }, { "epoch": 0.41113437495431604, "grad_norm": 0.4278838336467743, "learning_rate": 0.00011779721597629292, "loss": 1.2892, "step": 31639 }, { "epoch": 0.41114736949823194, "grad_norm": 0.5051475763320923, "learning_rate": 0.00011779461651438153, "loss": 1.3866, "step": 31640 }, { "epoch": 0.4111603640421478, "grad_norm": 0.32163652777671814, "learning_rate": 0.00011779201705247014, "loss": 1.2396, "step": 31641 }, { "epoch": 0.4111733585860637, "grad_norm": 0.36184391379356384, "learning_rate": 0.00011778941759055876, "loss": 1.3137, "step": 31642 }, { "epoch": 0.41118635312997953, "grad_norm": 0.3968331515789032, "learning_rate": 0.00011778681812864738, "loss": 1.3664, "step": 31643 }, { "epoch": 0.41119934767389543, "grad_norm": 0.481471985578537, "learning_rate": 0.00011778421866673599, "loss": 1.4686, "step": 31644 }, { "epoch": 0.4112123422178113, "grad_norm": 0.4758431017398834, "learning_rate": 0.0001177816192048246, "loss": 1.4014, "step": 31645 }, { "epoch": 0.4112253367617272, "grad_norm": 0.2949455976486206, "learning_rate": 0.00011777901974291324, "loss": 1.2726, "step": 31646 }, { "epoch": 0.411238331305643, "grad_norm": 0.3730049431324005, "learning_rate": 0.00011777642028100185, "loss": 1.3608, "step": 31647 }, { "epoch": 0.4112513258495589, "grad_norm": 0.45002204179763794, "learning_rate": 0.00011777382081909046, "loss": 1.1865, "step": 31648 }, { "epoch": 0.41126432039347477, "grad_norm": 0.38180041313171387, "learning_rate": 0.00011777122135717907, "loss": 1.5136, "step": 31649 }, { "epoch": 0.41127731493739067, "grad_norm": 0.3299858570098877, "learning_rate": 0.00011776862189526769, "loss": 1.3172, "step": 31650 }, { "epoch": 0.4112903094813065, "grad_norm": 0.35219427943229675, "learning_rate": 0.0001177660224333563, "loss": 1.3956, "step": 31651 }, { "epoch": 0.4113033040252224, "grad_norm": 0.47893497347831726, "learning_rate": 0.00011776342297144491, "loss": 1.353, "step": 31652 }, { "epoch": 0.41131629856913826, "grad_norm": 0.3493189811706543, "learning_rate": 0.00011776082350953353, "loss": 1.4158, "step": 31653 }, { "epoch": 0.41132929311305416, "grad_norm": 0.5018308758735657, "learning_rate": 0.00011775822404762215, "loss": 1.4385, "step": 31654 }, { "epoch": 0.41134228765697, "grad_norm": 0.4680333435535431, "learning_rate": 0.00011775562458571076, "loss": 1.5763, "step": 31655 }, { "epoch": 0.4113552822008859, "grad_norm": 0.38837650418281555, "learning_rate": 0.00011775302512379937, "loss": 1.4964, "step": 31656 }, { "epoch": 0.41136827674480175, "grad_norm": 0.39791902899742126, "learning_rate": 0.00011775042566188798, "loss": 1.4055, "step": 31657 }, { "epoch": 0.41138127128871765, "grad_norm": 0.30373644828796387, "learning_rate": 0.00011774782619997662, "loss": 1.3375, "step": 31658 }, { "epoch": 0.4113942658326335, "grad_norm": 0.3376322388648987, "learning_rate": 0.00011774522673806523, "loss": 1.2151, "step": 31659 }, { "epoch": 0.4114072603765494, "grad_norm": 0.4305064380168915, "learning_rate": 0.00011774262727615384, "loss": 1.37, "step": 31660 }, { "epoch": 0.41142025492046524, "grad_norm": 0.4347439110279083, "learning_rate": 0.00011774002781424245, "loss": 1.4325, "step": 31661 }, { "epoch": 0.41143324946438115, "grad_norm": 0.4432355761528015, "learning_rate": 0.00011773742835233108, "loss": 1.2511, "step": 31662 }, { "epoch": 0.411446244008297, "grad_norm": 0.44055166840553284, "learning_rate": 0.00011773482889041969, "loss": 1.6505, "step": 31663 }, { "epoch": 0.4114592385522129, "grad_norm": 0.42715615034103394, "learning_rate": 0.0001177322294285083, "loss": 1.4526, "step": 31664 }, { "epoch": 0.41147223309612874, "grad_norm": 0.3953370749950409, "learning_rate": 0.00011772962996659691, "loss": 1.6368, "step": 31665 }, { "epoch": 0.41148522764004464, "grad_norm": 0.3512831926345825, "learning_rate": 0.00011772703050468555, "loss": 1.3893, "step": 31666 }, { "epoch": 0.4114982221839605, "grad_norm": 0.33947882056236267, "learning_rate": 0.00011772443104277415, "loss": 1.2292, "step": 31667 }, { "epoch": 0.4115112167278764, "grad_norm": 0.4190872609615326, "learning_rate": 0.00011772183158086276, "loss": 1.4618, "step": 31668 }, { "epoch": 0.41152421127179223, "grad_norm": 0.3576473295688629, "learning_rate": 0.00011771923211895137, "loss": 1.2912, "step": 31669 }, { "epoch": 0.41153720581570813, "grad_norm": 0.45064982771873474, "learning_rate": 0.00011771663265704, "loss": 1.3917, "step": 31670 }, { "epoch": 0.41155020035962403, "grad_norm": 0.3616502285003662, "learning_rate": 0.00011771403319512862, "loss": 1.3541, "step": 31671 }, { "epoch": 0.4115631949035399, "grad_norm": 0.4322145879268646, "learning_rate": 0.00011771143373321723, "loss": 1.3843, "step": 31672 }, { "epoch": 0.4115761894474558, "grad_norm": 0.4824223220348358, "learning_rate": 0.00011770883427130584, "loss": 1.1931, "step": 31673 }, { "epoch": 0.4115891839913716, "grad_norm": 0.22588540613651276, "learning_rate": 0.00011770623480939446, "loss": 1.3035, "step": 31674 }, { "epoch": 0.4116021785352875, "grad_norm": 0.41667428612709045, "learning_rate": 0.00011770363534748307, "loss": 1.4036, "step": 31675 }, { "epoch": 0.41161517307920337, "grad_norm": 0.429872989654541, "learning_rate": 0.00011770103588557169, "loss": 1.4253, "step": 31676 }, { "epoch": 0.41162816762311927, "grad_norm": 0.4447936415672302, "learning_rate": 0.00011769843642366032, "loss": 1.5114, "step": 31677 }, { "epoch": 0.4116411621670351, "grad_norm": 0.47786134481430054, "learning_rate": 0.00011769583696174893, "loss": 1.3995, "step": 31678 }, { "epoch": 0.411654156710951, "grad_norm": 0.42816466093063354, "learning_rate": 0.00011769323749983755, "loss": 1.3131, "step": 31679 }, { "epoch": 0.41166715125486686, "grad_norm": 0.38027897477149963, "learning_rate": 0.00011769063803792614, "loss": 1.4852, "step": 31680 }, { "epoch": 0.41168014579878276, "grad_norm": 0.3440971374511719, "learning_rate": 0.00011768803857601478, "loss": 1.3942, "step": 31681 }, { "epoch": 0.4116931403426986, "grad_norm": 0.3857981264591217, "learning_rate": 0.00011768543911410339, "loss": 1.3695, "step": 31682 }, { "epoch": 0.4117061348866145, "grad_norm": 0.3942941725254059, "learning_rate": 0.000117682839652192, "loss": 1.3741, "step": 31683 }, { "epoch": 0.41171912943053035, "grad_norm": 0.40192854404449463, "learning_rate": 0.00011768024019028061, "loss": 1.3453, "step": 31684 }, { "epoch": 0.41173212397444625, "grad_norm": 0.4272104799747467, "learning_rate": 0.00011767764072836924, "loss": 1.4978, "step": 31685 }, { "epoch": 0.4117451185183621, "grad_norm": 0.4035555124282837, "learning_rate": 0.00011767504126645785, "loss": 1.6438, "step": 31686 }, { "epoch": 0.411758113062278, "grad_norm": 0.4157753586769104, "learning_rate": 0.00011767244180454646, "loss": 1.3634, "step": 31687 }, { "epoch": 0.41177110760619384, "grad_norm": 0.3271634876728058, "learning_rate": 0.00011766984234263507, "loss": 1.3578, "step": 31688 }, { "epoch": 0.41178410215010974, "grad_norm": 0.48875632882118225, "learning_rate": 0.00011766724288072371, "loss": 1.5354, "step": 31689 }, { "epoch": 0.4117970966940256, "grad_norm": 0.3462316393852234, "learning_rate": 0.00011766464341881232, "loss": 1.4017, "step": 31690 }, { "epoch": 0.4118100912379415, "grad_norm": 0.3762851059436798, "learning_rate": 0.00011766204395690093, "loss": 1.4698, "step": 31691 }, { "epoch": 0.41182308578185733, "grad_norm": 0.3897680640220642, "learning_rate": 0.00011765944449498954, "loss": 1.6004, "step": 31692 }, { "epoch": 0.41183608032577323, "grad_norm": 0.41915521025657654, "learning_rate": 0.00011765684503307817, "loss": 1.443, "step": 31693 }, { "epoch": 0.4118490748696891, "grad_norm": 0.5198265910148621, "learning_rate": 0.00011765424557116678, "loss": 1.4722, "step": 31694 }, { "epoch": 0.411862069413605, "grad_norm": 0.3126645088195801, "learning_rate": 0.00011765164610925539, "loss": 1.3796, "step": 31695 }, { "epoch": 0.4118750639575208, "grad_norm": 0.47881627082824707, "learning_rate": 0.000117649046647344, "loss": 1.3092, "step": 31696 }, { "epoch": 0.4118880585014367, "grad_norm": 0.3607673645019531, "learning_rate": 0.00011764644718543262, "loss": 1.3685, "step": 31697 }, { "epoch": 0.41190105304535257, "grad_norm": 0.3296034336090088, "learning_rate": 0.00011764384772352123, "loss": 1.2878, "step": 31698 }, { "epoch": 0.4119140475892685, "grad_norm": 0.44708120822906494, "learning_rate": 0.00011764124826160985, "loss": 1.2914, "step": 31699 }, { "epoch": 0.4119270421331843, "grad_norm": 0.4136098623275757, "learning_rate": 0.00011763864879969846, "loss": 1.4351, "step": 31700 }, { "epoch": 0.4119400366771002, "grad_norm": 0.3559790253639221, "learning_rate": 0.0001176360493377871, "loss": 1.5522, "step": 31701 }, { "epoch": 0.41195303122101606, "grad_norm": 0.39897531270980835, "learning_rate": 0.0001176334498758757, "loss": 1.5008, "step": 31702 }, { "epoch": 0.41196602576493196, "grad_norm": 0.46392497420310974, "learning_rate": 0.00011763085041396432, "loss": 1.3661, "step": 31703 }, { "epoch": 0.4119790203088478, "grad_norm": 0.47510677576065063, "learning_rate": 0.00011762825095205293, "loss": 1.6113, "step": 31704 }, { "epoch": 0.4119920148527637, "grad_norm": 0.3748581111431122, "learning_rate": 0.00011762565149014155, "loss": 1.2199, "step": 31705 }, { "epoch": 0.41200500939667956, "grad_norm": 0.38417956233024597, "learning_rate": 0.00011762305202823016, "loss": 1.4346, "step": 31706 }, { "epoch": 0.41201800394059546, "grad_norm": 0.41488510370254517, "learning_rate": 0.00011762045256631877, "loss": 1.3613, "step": 31707 }, { "epoch": 0.4120309984845113, "grad_norm": 0.3389635682106018, "learning_rate": 0.00011761785310440738, "loss": 1.4276, "step": 31708 }, { "epoch": 0.4120439930284272, "grad_norm": 0.3359288275241852, "learning_rate": 0.00011761525364249601, "loss": 1.2936, "step": 31709 }, { "epoch": 0.41205698757234305, "grad_norm": 0.3816105127334595, "learning_rate": 0.00011761265418058462, "loss": 1.3175, "step": 31710 }, { "epoch": 0.41206998211625895, "grad_norm": 0.3633844256401062, "learning_rate": 0.00011761005471867323, "loss": 1.4938, "step": 31711 }, { "epoch": 0.4120829766601748, "grad_norm": 0.43474116921424866, "learning_rate": 0.00011760745525676184, "loss": 1.2917, "step": 31712 }, { "epoch": 0.4120959712040907, "grad_norm": 0.4055688679218292, "learning_rate": 0.00011760485579485048, "loss": 1.4573, "step": 31713 }, { "epoch": 0.41210896574800654, "grad_norm": 0.3859024941921234, "learning_rate": 0.00011760225633293909, "loss": 1.2373, "step": 31714 }, { "epoch": 0.41212196029192244, "grad_norm": 0.6114421486854553, "learning_rate": 0.0001175996568710277, "loss": 1.4964, "step": 31715 }, { "epoch": 0.4121349548358383, "grad_norm": 0.45155957341194153, "learning_rate": 0.00011759705740911633, "loss": 1.4001, "step": 31716 }, { "epoch": 0.4121479493797542, "grad_norm": 0.45348045229911804, "learning_rate": 0.00011759445794720494, "loss": 1.3784, "step": 31717 }, { "epoch": 0.41216094392367003, "grad_norm": 0.45521479845046997, "learning_rate": 0.00011759185848529355, "loss": 1.4011, "step": 31718 }, { "epoch": 0.41217393846758593, "grad_norm": 0.48154446482658386, "learning_rate": 0.00011758925902338216, "loss": 1.4296, "step": 31719 }, { "epoch": 0.4121869330115018, "grad_norm": 0.3529930114746094, "learning_rate": 0.0001175866595614708, "loss": 1.3059, "step": 31720 }, { "epoch": 0.4121999275554177, "grad_norm": 0.4399464428424835, "learning_rate": 0.00011758406009955941, "loss": 1.552, "step": 31721 }, { "epoch": 0.4122129220993335, "grad_norm": 0.42122894525527954, "learning_rate": 0.000117581460637648, "loss": 1.4771, "step": 31722 }, { "epoch": 0.4122259166432494, "grad_norm": 0.39818650484085083, "learning_rate": 0.00011757886117573662, "loss": 1.4132, "step": 31723 }, { "epoch": 0.41223891118716527, "grad_norm": 0.34989452362060547, "learning_rate": 0.00011757626171382525, "loss": 1.2825, "step": 31724 }, { "epoch": 0.41225190573108117, "grad_norm": 0.4541240632534027, "learning_rate": 0.00011757366225191386, "loss": 1.4453, "step": 31725 }, { "epoch": 0.412264900274997, "grad_norm": 0.3833162486553192, "learning_rate": 0.00011757106279000248, "loss": 1.3005, "step": 31726 }, { "epoch": 0.4122778948189129, "grad_norm": 0.27818942070007324, "learning_rate": 0.00011756846332809109, "loss": 1.4477, "step": 31727 }, { "epoch": 0.41229088936282876, "grad_norm": 0.43792203068733215, "learning_rate": 0.00011756586386617971, "loss": 1.5172, "step": 31728 }, { "epoch": 0.41230388390674466, "grad_norm": 0.4293571710586548, "learning_rate": 0.00011756326440426832, "loss": 1.4108, "step": 31729 }, { "epoch": 0.4123168784506605, "grad_norm": 0.41879352927207947, "learning_rate": 0.00011756066494235693, "loss": 1.4593, "step": 31730 }, { "epoch": 0.4123298729945764, "grad_norm": 0.4028339087963104, "learning_rate": 0.00011755806548044554, "loss": 1.634, "step": 31731 }, { "epoch": 0.41234286753849225, "grad_norm": 0.42921629548072815, "learning_rate": 0.00011755546601853418, "loss": 1.4531, "step": 31732 }, { "epoch": 0.41235586208240815, "grad_norm": 0.45704343914985657, "learning_rate": 0.00011755286655662279, "loss": 1.5203, "step": 31733 }, { "epoch": 0.412368856626324, "grad_norm": 0.36380091309547424, "learning_rate": 0.0001175502670947114, "loss": 1.3405, "step": 31734 }, { "epoch": 0.4123818511702399, "grad_norm": 0.4240029454231262, "learning_rate": 0.0001175476676328, "loss": 1.5142, "step": 31735 }, { "epoch": 0.41239484571415574, "grad_norm": 0.38916006684303284, "learning_rate": 0.00011754506817088864, "loss": 1.5145, "step": 31736 }, { "epoch": 0.41240784025807165, "grad_norm": 0.4423098564147949, "learning_rate": 0.00011754246870897725, "loss": 1.4158, "step": 31737 }, { "epoch": 0.4124208348019875, "grad_norm": 0.42087674140930176, "learning_rate": 0.00011753986924706586, "loss": 1.4163, "step": 31738 }, { "epoch": 0.4124338293459034, "grad_norm": 0.4316350221633911, "learning_rate": 0.00011753726978515447, "loss": 1.4245, "step": 31739 }, { "epoch": 0.41244682388981924, "grad_norm": 0.4177074730396271, "learning_rate": 0.0001175346703232431, "loss": 1.5875, "step": 31740 }, { "epoch": 0.41245981843373514, "grad_norm": 0.40894240140914917, "learning_rate": 0.00011753207086133171, "loss": 1.3955, "step": 31741 }, { "epoch": 0.412472812977651, "grad_norm": 0.4216795861721039, "learning_rate": 0.00011752947139942032, "loss": 1.4871, "step": 31742 }, { "epoch": 0.4124858075215669, "grad_norm": 0.2925211191177368, "learning_rate": 0.00011752687193750893, "loss": 1.4822, "step": 31743 }, { "epoch": 0.41249880206548273, "grad_norm": 0.41941073536872864, "learning_rate": 0.00011752427247559757, "loss": 1.4004, "step": 31744 }, { "epoch": 0.41251179660939863, "grad_norm": 0.45500990748405457, "learning_rate": 0.00011752167301368618, "loss": 1.4676, "step": 31745 }, { "epoch": 0.41252479115331453, "grad_norm": 0.4242511987686157, "learning_rate": 0.00011751907355177479, "loss": 1.1443, "step": 31746 }, { "epoch": 0.4125377856972304, "grad_norm": 0.41123929619789124, "learning_rate": 0.00011751647408986339, "loss": 1.5644, "step": 31747 }, { "epoch": 0.4125507802411463, "grad_norm": 0.4290185272693634, "learning_rate": 0.00011751387462795202, "loss": 1.4074, "step": 31748 }, { "epoch": 0.4125637747850621, "grad_norm": 0.33407479524612427, "learning_rate": 0.00011751127516604064, "loss": 1.2584, "step": 31749 }, { "epoch": 0.412576769328978, "grad_norm": 0.38675615191459656, "learning_rate": 0.00011750867570412925, "loss": 1.2887, "step": 31750 }, { "epoch": 0.41258976387289387, "grad_norm": 0.4034826159477234, "learning_rate": 0.00011750607624221787, "loss": 1.4663, "step": 31751 }, { "epoch": 0.41260275841680977, "grad_norm": 0.4832096993923187, "learning_rate": 0.00011750347678030648, "loss": 1.6129, "step": 31752 }, { "epoch": 0.4126157529607256, "grad_norm": 0.5136739611625671, "learning_rate": 0.00011750087731839509, "loss": 1.5366, "step": 31753 }, { "epoch": 0.4126287475046415, "grad_norm": 0.301689088344574, "learning_rate": 0.0001174982778564837, "loss": 1.1754, "step": 31754 }, { "epoch": 0.41264174204855736, "grad_norm": 0.3035409450531006, "learning_rate": 0.00011749567839457234, "loss": 1.2916, "step": 31755 }, { "epoch": 0.41265473659247326, "grad_norm": 0.38761475682258606, "learning_rate": 0.00011749307893266095, "loss": 1.4195, "step": 31756 }, { "epoch": 0.4126677311363891, "grad_norm": 0.3026151955127716, "learning_rate": 0.00011749047947074956, "loss": 1.4134, "step": 31757 }, { "epoch": 0.412680725680305, "grad_norm": 0.3911232650279999, "learning_rate": 0.00011748788000883817, "loss": 1.3184, "step": 31758 }, { "epoch": 0.41269372022422085, "grad_norm": 0.305425763130188, "learning_rate": 0.0001174852805469268, "loss": 1.4759, "step": 31759 }, { "epoch": 0.41270671476813675, "grad_norm": 0.3558710217475891, "learning_rate": 0.00011748268108501541, "loss": 1.5441, "step": 31760 }, { "epoch": 0.4127197093120526, "grad_norm": 0.3979869484901428, "learning_rate": 0.00011748008162310402, "loss": 1.4779, "step": 31761 }, { "epoch": 0.4127327038559685, "grad_norm": 0.4547790586948395, "learning_rate": 0.00011747748216119263, "loss": 1.4595, "step": 31762 }, { "epoch": 0.41274569839988434, "grad_norm": 0.3734130561351776, "learning_rate": 0.00011747488269928127, "loss": 1.3162, "step": 31763 }, { "epoch": 0.41275869294380024, "grad_norm": 0.34993186593055725, "learning_rate": 0.00011747228323736987, "loss": 1.1964, "step": 31764 }, { "epoch": 0.4127716874877161, "grad_norm": 0.2659223973751068, "learning_rate": 0.00011746968377545848, "loss": 1.1795, "step": 31765 }, { "epoch": 0.412784682031632, "grad_norm": 0.38865891098976135, "learning_rate": 0.00011746708431354709, "loss": 1.3731, "step": 31766 }, { "epoch": 0.41279767657554783, "grad_norm": 0.46732452511787415, "learning_rate": 0.00011746448485163573, "loss": 1.5205, "step": 31767 }, { "epoch": 0.41281067111946373, "grad_norm": 0.46368417143821716, "learning_rate": 0.00011746188538972434, "loss": 1.3195, "step": 31768 }, { "epoch": 0.4128236656633796, "grad_norm": 0.36316293478012085, "learning_rate": 0.00011745928592781295, "loss": 1.131, "step": 31769 }, { "epoch": 0.4128366602072955, "grad_norm": 0.4895988702774048, "learning_rate": 0.00011745668646590156, "loss": 1.55, "step": 31770 }, { "epoch": 0.4128496547512113, "grad_norm": 0.38821110129356384, "learning_rate": 0.00011745408700399018, "loss": 1.1851, "step": 31771 }, { "epoch": 0.4128626492951272, "grad_norm": 0.40321868658065796, "learning_rate": 0.0001174514875420788, "loss": 1.3723, "step": 31772 }, { "epoch": 0.41287564383904307, "grad_norm": 0.4872627556324005, "learning_rate": 0.0001174488880801674, "loss": 1.3602, "step": 31773 }, { "epoch": 0.412888638382959, "grad_norm": 0.3604411482810974, "learning_rate": 0.00011744628861825602, "loss": 1.6038, "step": 31774 }, { "epoch": 0.4129016329268748, "grad_norm": 0.38879451155662537, "learning_rate": 0.00011744368915634466, "loss": 1.3975, "step": 31775 }, { "epoch": 0.4129146274707907, "grad_norm": 0.3540457785129547, "learning_rate": 0.00011744108969443327, "loss": 1.3911, "step": 31776 }, { "epoch": 0.41292762201470656, "grad_norm": 0.3881840407848358, "learning_rate": 0.00011743849023252186, "loss": 1.4983, "step": 31777 }, { "epoch": 0.41294061655862246, "grad_norm": 0.39743998646736145, "learning_rate": 0.00011743589077061047, "loss": 1.5392, "step": 31778 }, { "epoch": 0.4129536111025383, "grad_norm": 0.36503171920776367, "learning_rate": 0.00011743329130869911, "loss": 1.567, "step": 31779 }, { "epoch": 0.4129666056464542, "grad_norm": 0.43378350138664246, "learning_rate": 0.00011743069184678772, "loss": 1.5279, "step": 31780 }, { "epoch": 0.41297960019037006, "grad_norm": 0.35806694626808167, "learning_rate": 0.00011742809238487633, "loss": 1.2686, "step": 31781 }, { "epoch": 0.41299259473428596, "grad_norm": 0.3607901334762573, "learning_rate": 0.00011742549292296495, "loss": 1.412, "step": 31782 }, { "epoch": 0.4130055892782018, "grad_norm": 0.4307047128677368, "learning_rate": 0.00011742289346105357, "loss": 1.3682, "step": 31783 }, { "epoch": 0.4130185838221177, "grad_norm": 0.422247976064682, "learning_rate": 0.00011742029399914218, "loss": 1.4062, "step": 31784 }, { "epoch": 0.41303157836603355, "grad_norm": 0.39503756165504456, "learning_rate": 0.00011741769453723079, "loss": 1.476, "step": 31785 }, { "epoch": 0.41304457290994945, "grad_norm": 0.4251067638397217, "learning_rate": 0.0001174150950753194, "loss": 1.3316, "step": 31786 }, { "epoch": 0.4130575674538653, "grad_norm": 0.37648358941078186, "learning_rate": 0.00011741249561340804, "loss": 1.4498, "step": 31787 }, { "epoch": 0.4130705619977812, "grad_norm": 0.43251916766166687, "learning_rate": 0.00011740989615149665, "loss": 1.4284, "step": 31788 }, { "epoch": 0.41308355654169704, "grad_norm": 0.3351757526397705, "learning_rate": 0.00011740729668958525, "loss": 1.2354, "step": 31789 }, { "epoch": 0.41309655108561294, "grad_norm": 0.4672812521457672, "learning_rate": 0.00011740469722767389, "loss": 1.4183, "step": 31790 }, { "epoch": 0.4131095456295288, "grad_norm": 0.48855289816856384, "learning_rate": 0.0001174020977657625, "loss": 1.3432, "step": 31791 }, { "epoch": 0.4131225401734447, "grad_norm": 0.43809136748313904, "learning_rate": 0.00011739949830385111, "loss": 1.3662, "step": 31792 }, { "epoch": 0.41313553471736053, "grad_norm": 0.4194406270980835, "learning_rate": 0.00011739689884193972, "loss": 1.3783, "step": 31793 }, { "epoch": 0.41314852926127643, "grad_norm": 0.43101072311401367, "learning_rate": 0.00011739429938002834, "loss": 1.3867, "step": 31794 }, { "epoch": 0.4131615238051923, "grad_norm": 0.33521032333374023, "learning_rate": 0.00011739169991811696, "loss": 1.2898, "step": 31795 }, { "epoch": 0.4131745183491082, "grad_norm": 0.4578614830970764, "learning_rate": 0.00011738910045620557, "loss": 1.4371, "step": 31796 }, { "epoch": 0.413187512893024, "grad_norm": 0.3994629681110382, "learning_rate": 0.00011738650099429418, "loss": 1.5436, "step": 31797 }, { "epoch": 0.4132005074369399, "grad_norm": 0.33394962549209595, "learning_rate": 0.00011738390153238282, "loss": 1.5098, "step": 31798 }, { "epoch": 0.41321350198085577, "grad_norm": 0.49559107422828674, "learning_rate": 0.00011738130207047143, "loss": 1.4752, "step": 31799 }, { "epoch": 0.41322649652477167, "grad_norm": 0.4040876626968384, "learning_rate": 0.00011737870260856004, "loss": 1.3613, "step": 31800 }, { "epoch": 0.4132394910686875, "grad_norm": 0.46256762742996216, "learning_rate": 0.00011737610314664865, "loss": 1.3513, "step": 31801 }, { "epoch": 0.4132524856126034, "grad_norm": 0.44079864025115967, "learning_rate": 0.00011737350368473727, "loss": 1.378, "step": 31802 }, { "epoch": 0.41326548015651926, "grad_norm": 0.389039009809494, "learning_rate": 0.00011737090422282588, "loss": 1.4559, "step": 31803 }, { "epoch": 0.41327847470043516, "grad_norm": 0.36529913544654846, "learning_rate": 0.0001173683047609145, "loss": 1.505, "step": 31804 }, { "epoch": 0.413291469244351, "grad_norm": 0.3017125427722931, "learning_rate": 0.0001173657052990031, "loss": 1.2248, "step": 31805 }, { "epoch": 0.4133044637882669, "grad_norm": 0.36836498975753784, "learning_rate": 0.00011736310583709173, "loss": 1.5097, "step": 31806 }, { "epoch": 0.41331745833218275, "grad_norm": 0.3604908883571625, "learning_rate": 0.00011736050637518034, "loss": 1.3791, "step": 31807 }, { "epoch": 0.41333045287609865, "grad_norm": 0.33806708455085754, "learning_rate": 0.00011735790691326895, "loss": 1.238, "step": 31808 }, { "epoch": 0.4133434474200145, "grad_norm": 0.4385598599910736, "learning_rate": 0.00011735530745135756, "loss": 1.6464, "step": 31809 }, { "epoch": 0.4133564419639304, "grad_norm": 0.4294182062149048, "learning_rate": 0.0001173527079894462, "loss": 1.433, "step": 31810 }, { "epoch": 0.41336943650784624, "grad_norm": 0.37577179074287415, "learning_rate": 0.00011735010852753481, "loss": 1.4455, "step": 31811 }, { "epoch": 0.41338243105176214, "grad_norm": 0.387668639421463, "learning_rate": 0.00011734750906562342, "loss": 1.637, "step": 31812 }, { "epoch": 0.413395425595678, "grad_norm": 0.48617732524871826, "learning_rate": 0.00011734490960371203, "loss": 1.4254, "step": 31813 }, { "epoch": 0.4134084201395939, "grad_norm": 0.4484746754169464, "learning_rate": 0.00011734231014180066, "loss": 1.2367, "step": 31814 }, { "epoch": 0.41342141468350974, "grad_norm": 0.41519516706466675, "learning_rate": 0.00011733971067988927, "loss": 1.5143, "step": 31815 }, { "epoch": 0.41343440922742564, "grad_norm": 0.40131473541259766, "learning_rate": 0.00011733711121797788, "loss": 1.456, "step": 31816 }, { "epoch": 0.4134474037713415, "grad_norm": 0.35350683331489563, "learning_rate": 0.00011733451175606649, "loss": 1.5, "step": 31817 }, { "epoch": 0.4134603983152574, "grad_norm": 0.3574559688568115, "learning_rate": 0.00011733191229415512, "loss": 1.2697, "step": 31818 }, { "epoch": 0.4134733928591732, "grad_norm": 0.4214079678058624, "learning_rate": 0.00011732931283224373, "loss": 1.4518, "step": 31819 }, { "epoch": 0.41348638740308913, "grad_norm": 0.432996928691864, "learning_rate": 0.00011732671337033234, "loss": 1.2822, "step": 31820 }, { "epoch": 0.413499381947005, "grad_norm": 0.38598212599754333, "learning_rate": 0.00011732411390842095, "loss": 1.4522, "step": 31821 }, { "epoch": 0.4135123764909209, "grad_norm": 0.39914241433143616, "learning_rate": 0.00011732151444650959, "loss": 1.4127, "step": 31822 }, { "epoch": 0.4135253710348368, "grad_norm": 0.3408644497394562, "learning_rate": 0.0001173189149845982, "loss": 1.2182, "step": 31823 }, { "epoch": 0.4135383655787526, "grad_norm": 0.47123709321022034, "learning_rate": 0.00011731631552268681, "loss": 1.4532, "step": 31824 }, { "epoch": 0.4135513601226685, "grad_norm": 0.3978356420993805, "learning_rate": 0.00011731371606077543, "loss": 1.3469, "step": 31825 }, { "epoch": 0.41356435466658437, "grad_norm": 0.4253008961677551, "learning_rate": 0.00011731111659886404, "loss": 1.4096, "step": 31826 }, { "epoch": 0.41357734921050027, "grad_norm": 0.4575842618942261, "learning_rate": 0.00011730851713695265, "loss": 1.4485, "step": 31827 }, { "epoch": 0.4135903437544161, "grad_norm": 0.3874261677265167, "learning_rate": 0.00011730591767504127, "loss": 1.5444, "step": 31828 }, { "epoch": 0.413603338298332, "grad_norm": 0.4312470257282257, "learning_rate": 0.0001173033182131299, "loss": 1.3847, "step": 31829 }, { "epoch": 0.41361633284224786, "grad_norm": 0.4055170714855194, "learning_rate": 0.00011730071875121851, "loss": 1.6956, "step": 31830 }, { "epoch": 0.41362932738616376, "grad_norm": 0.4001989960670471, "learning_rate": 0.00011729811928930711, "loss": 1.4912, "step": 31831 }, { "epoch": 0.4136423219300796, "grad_norm": 0.4116976261138916, "learning_rate": 0.00011729551982739572, "loss": 1.418, "step": 31832 }, { "epoch": 0.4136553164739955, "grad_norm": 0.5365820527076721, "learning_rate": 0.00011729292036548436, "loss": 1.2788, "step": 31833 }, { "epoch": 0.41366831101791135, "grad_norm": 0.3660898506641388, "learning_rate": 0.00011729032090357297, "loss": 1.2591, "step": 31834 }, { "epoch": 0.41368130556182725, "grad_norm": 0.4454982280731201, "learning_rate": 0.00011728772144166158, "loss": 1.3251, "step": 31835 }, { "epoch": 0.4136943001057431, "grad_norm": 0.4318486154079437, "learning_rate": 0.00011728512197975019, "loss": 1.3411, "step": 31836 }, { "epoch": 0.413707294649659, "grad_norm": 0.3369883894920349, "learning_rate": 0.00011728252251783882, "loss": 1.2816, "step": 31837 }, { "epoch": 0.41372028919357484, "grad_norm": 0.4071979522705078, "learning_rate": 0.00011727992305592743, "loss": 1.3644, "step": 31838 }, { "epoch": 0.41373328373749074, "grad_norm": 0.38106846809387207, "learning_rate": 0.00011727732359401604, "loss": 1.5451, "step": 31839 }, { "epoch": 0.4137462782814066, "grad_norm": 0.33907535672187805, "learning_rate": 0.00011727472413210465, "loss": 1.3698, "step": 31840 }, { "epoch": 0.4137592728253225, "grad_norm": 0.3957054018974304, "learning_rate": 0.00011727212467019329, "loss": 1.2806, "step": 31841 }, { "epoch": 0.41377226736923833, "grad_norm": 0.5480278134346008, "learning_rate": 0.0001172695252082819, "loss": 1.4715, "step": 31842 }, { "epoch": 0.41378526191315423, "grad_norm": 0.35822397470474243, "learning_rate": 0.00011726692574637051, "loss": 1.5817, "step": 31843 }, { "epoch": 0.4137982564570701, "grad_norm": 0.47707533836364746, "learning_rate": 0.00011726432628445911, "loss": 1.3573, "step": 31844 }, { "epoch": 0.413811251000986, "grad_norm": 0.3140091001987457, "learning_rate": 0.00011726172682254775, "loss": 1.5765, "step": 31845 }, { "epoch": 0.4138242455449018, "grad_norm": 0.26068300008773804, "learning_rate": 0.00011725912736063636, "loss": 1.3899, "step": 31846 }, { "epoch": 0.4138372400888177, "grad_norm": 0.3251701593399048, "learning_rate": 0.00011725652789872497, "loss": 1.3564, "step": 31847 }, { "epoch": 0.41385023463273357, "grad_norm": 0.3763917088508606, "learning_rate": 0.00011725392843681358, "loss": 1.2111, "step": 31848 }, { "epoch": 0.41386322917664947, "grad_norm": 0.2945026159286499, "learning_rate": 0.0001172513289749022, "loss": 1.3622, "step": 31849 }, { "epoch": 0.4138762237205653, "grad_norm": 0.39809390902519226, "learning_rate": 0.00011724872951299081, "loss": 1.5935, "step": 31850 }, { "epoch": 0.4138892182644812, "grad_norm": 0.3888184726238251, "learning_rate": 0.00011724613005107942, "loss": 1.2957, "step": 31851 }, { "epoch": 0.41390221280839706, "grad_norm": 0.37752488255500793, "learning_rate": 0.00011724353058916804, "loss": 1.4743, "step": 31852 }, { "epoch": 0.41391520735231296, "grad_norm": 0.31938958168029785, "learning_rate": 0.00011724093112725667, "loss": 1.3758, "step": 31853 }, { "epoch": 0.4139282018962288, "grad_norm": 0.5017914175987244, "learning_rate": 0.00011723833166534528, "loss": 1.5197, "step": 31854 }, { "epoch": 0.4139411964401447, "grad_norm": 0.4456081688404083, "learning_rate": 0.0001172357322034339, "loss": 1.5705, "step": 31855 }, { "epoch": 0.41395419098406055, "grad_norm": 0.45309075713157654, "learning_rate": 0.0001172331327415225, "loss": 1.3788, "step": 31856 }, { "epoch": 0.41396718552797646, "grad_norm": 0.35578569769859314, "learning_rate": 0.00011723053327961113, "loss": 1.4373, "step": 31857 }, { "epoch": 0.4139801800718923, "grad_norm": 0.3951316177845001, "learning_rate": 0.00011722793381769974, "loss": 1.429, "step": 31858 }, { "epoch": 0.4139931746158082, "grad_norm": 0.327208012342453, "learning_rate": 0.00011722533435578835, "loss": 1.4253, "step": 31859 }, { "epoch": 0.41400616915972405, "grad_norm": 0.39285707473754883, "learning_rate": 0.00011722273489387696, "loss": 1.5298, "step": 31860 }, { "epoch": 0.41401916370363995, "grad_norm": 0.39437335729599, "learning_rate": 0.00011722013543196559, "loss": 1.4457, "step": 31861 }, { "epoch": 0.4140321582475558, "grad_norm": 0.37862467765808105, "learning_rate": 0.0001172175359700542, "loss": 1.4461, "step": 31862 }, { "epoch": 0.4140451527914717, "grad_norm": 0.3078564703464508, "learning_rate": 0.00011721493650814281, "loss": 1.4327, "step": 31863 }, { "epoch": 0.41405814733538754, "grad_norm": 0.4065170884132385, "learning_rate": 0.00011721233704623145, "loss": 1.2792, "step": 31864 }, { "epoch": 0.41407114187930344, "grad_norm": 0.46307018399238586, "learning_rate": 0.00011720973758432006, "loss": 1.3912, "step": 31865 }, { "epoch": 0.4140841364232193, "grad_norm": 0.3540421426296234, "learning_rate": 0.00011720713812240867, "loss": 1.4259, "step": 31866 }, { "epoch": 0.4140971309671352, "grad_norm": 0.34455418586730957, "learning_rate": 0.00011720453866049728, "loss": 1.5064, "step": 31867 }, { "epoch": 0.41411012551105103, "grad_norm": 0.35030514001846313, "learning_rate": 0.0001172019391985859, "loss": 1.199, "step": 31868 }, { "epoch": 0.41412312005496693, "grad_norm": 0.3843987286090851, "learning_rate": 0.00011719933973667452, "loss": 1.3091, "step": 31869 }, { "epoch": 0.4141361145988828, "grad_norm": 0.310016006231308, "learning_rate": 0.00011719674027476313, "loss": 1.3143, "step": 31870 }, { "epoch": 0.4141491091427987, "grad_norm": 0.560285747051239, "learning_rate": 0.00011719414081285174, "loss": 1.2457, "step": 31871 }, { "epoch": 0.4141621036867145, "grad_norm": 0.43611177802085876, "learning_rate": 0.00011719154135094038, "loss": 1.3695, "step": 31872 }, { "epoch": 0.4141750982306304, "grad_norm": 0.40217819809913635, "learning_rate": 0.00011718894188902897, "loss": 1.4057, "step": 31873 }, { "epoch": 0.41418809277454627, "grad_norm": 0.35014262795448303, "learning_rate": 0.00011718634242711758, "loss": 1.3944, "step": 31874 }, { "epoch": 0.41420108731846217, "grad_norm": 0.4676024913787842, "learning_rate": 0.0001171837429652062, "loss": 1.2367, "step": 31875 }, { "epoch": 0.414214081862378, "grad_norm": 0.42215174436569214, "learning_rate": 0.00011718114350329483, "loss": 1.3562, "step": 31876 }, { "epoch": 0.4142270764062939, "grad_norm": 0.4098634719848633, "learning_rate": 0.00011717854404138344, "loss": 1.5743, "step": 31877 }, { "epoch": 0.41424007095020976, "grad_norm": 0.48227232694625854, "learning_rate": 0.00011717594457947206, "loss": 1.4586, "step": 31878 }, { "epoch": 0.41425306549412566, "grad_norm": 0.3555050194263458, "learning_rate": 0.00011717334511756067, "loss": 1.1874, "step": 31879 }, { "epoch": 0.4142660600380415, "grad_norm": 0.4402369260787964, "learning_rate": 0.00011717074565564929, "loss": 1.4736, "step": 31880 }, { "epoch": 0.4142790545819574, "grad_norm": 0.4640365540981293, "learning_rate": 0.0001171681461937379, "loss": 1.459, "step": 31881 }, { "epoch": 0.41429204912587325, "grad_norm": 0.3996478021144867, "learning_rate": 0.00011716554673182651, "loss": 1.3094, "step": 31882 }, { "epoch": 0.41430504366978915, "grad_norm": 0.3977143168449402, "learning_rate": 0.00011716294726991512, "loss": 1.3877, "step": 31883 }, { "epoch": 0.414318038213705, "grad_norm": 0.2982369363307953, "learning_rate": 0.00011716034780800376, "loss": 1.1032, "step": 31884 }, { "epoch": 0.4143310327576209, "grad_norm": 0.5057021379470825, "learning_rate": 0.00011715774834609237, "loss": 1.5814, "step": 31885 }, { "epoch": 0.41434402730153674, "grad_norm": 0.3497140407562256, "learning_rate": 0.00011715514888418097, "loss": 1.4647, "step": 31886 }, { "epoch": 0.41435702184545264, "grad_norm": 0.4693131446838379, "learning_rate": 0.00011715254942226958, "loss": 1.4025, "step": 31887 }, { "epoch": 0.4143700163893685, "grad_norm": 0.3365936577320099, "learning_rate": 0.00011714994996035822, "loss": 1.4658, "step": 31888 }, { "epoch": 0.4143830109332844, "grad_norm": 0.390459269285202, "learning_rate": 0.00011714735049844683, "loss": 1.2443, "step": 31889 }, { "epoch": 0.41439600547720024, "grad_norm": 0.3920002281665802, "learning_rate": 0.00011714475103653544, "loss": 1.3216, "step": 31890 }, { "epoch": 0.41440900002111614, "grad_norm": 0.3819471299648285, "learning_rate": 0.00011714215157462405, "loss": 1.1292, "step": 31891 }, { "epoch": 0.414421994565032, "grad_norm": 0.3853062391281128, "learning_rate": 0.00011713955211271268, "loss": 1.3044, "step": 31892 }, { "epoch": 0.4144349891089479, "grad_norm": 0.3963586986064911, "learning_rate": 0.00011713695265080129, "loss": 1.437, "step": 31893 }, { "epoch": 0.4144479836528637, "grad_norm": 0.4465895891189575, "learning_rate": 0.0001171343531888899, "loss": 1.4627, "step": 31894 }, { "epoch": 0.41446097819677963, "grad_norm": 0.4670734405517578, "learning_rate": 0.00011713175372697851, "loss": 1.357, "step": 31895 }, { "epoch": 0.4144739727406955, "grad_norm": 0.40060892701148987, "learning_rate": 0.00011712915426506715, "loss": 1.3375, "step": 31896 }, { "epoch": 0.4144869672846114, "grad_norm": 0.4602660834789276, "learning_rate": 0.00011712655480315576, "loss": 1.4134, "step": 31897 }, { "epoch": 0.4144999618285273, "grad_norm": 0.41173529624938965, "learning_rate": 0.00011712395534124437, "loss": 1.2859, "step": 31898 }, { "epoch": 0.4145129563724431, "grad_norm": 0.27473360300064087, "learning_rate": 0.000117121355879333, "loss": 1.2874, "step": 31899 }, { "epoch": 0.414525950916359, "grad_norm": 0.3998537063598633, "learning_rate": 0.0001171187564174216, "loss": 1.5878, "step": 31900 }, { "epoch": 0.41453894546027487, "grad_norm": 0.308927983045578, "learning_rate": 0.00011711615695551022, "loss": 1.2909, "step": 31901 }, { "epoch": 0.41455194000419077, "grad_norm": 0.44094279408454895, "learning_rate": 0.00011711355749359883, "loss": 1.5492, "step": 31902 }, { "epoch": 0.4145649345481066, "grad_norm": 0.3875715136528015, "learning_rate": 0.00011711095803168745, "loss": 1.5206, "step": 31903 }, { "epoch": 0.4145779290920225, "grad_norm": 0.38723552227020264, "learning_rate": 0.00011710835856977606, "loss": 1.2304, "step": 31904 }, { "epoch": 0.41459092363593836, "grad_norm": 0.4022468626499176, "learning_rate": 0.00011710575910786467, "loss": 1.4756, "step": 31905 }, { "epoch": 0.41460391817985426, "grad_norm": 0.4944455325603485, "learning_rate": 0.00011710315964595328, "loss": 1.4196, "step": 31906 }, { "epoch": 0.4146169127237701, "grad_norm": 0.3947686553001404, "learning_rate": 0.00011710056018404192, "loss": 1.4881, "step": 31907 }, { "epoch": 0.414629907267686, "grad_norm": 0.33791208267211914, "learning_rate": 0.00011709796072213053, "loss": 1.2132, "step": 31908 }, { "epoch": 0.41464290181160185, "grad_norm": 0.4213022291660309, "learning_rate": 0.00011709536126021914, "loss": 1.4172, "step": 31909 }, { "epoch": 0.41465589635551775, "grad_norm": 0.3720908761024475, "learning_rate": 0.00011709276179830775, "loss": 1.3545, "step": 31910 }, { "epoch": 0.4146688908994336, "grad_norm": 0.3788302540779114, "learning_rate": 0.00011709016233639638, "loss": 1.3457, "step": 31911 }, { "epoch": 0.4146818854433495, "grad_norm": 0.34555670619010925, "learning_rate": 0.00011708756287448499, "loss": 1.1321, "step": 31912 }, { "epoch": 0.41469487998726534, "grad_norm": 0.4369875192642212, "learning_rate": 0.0001170849634125736, "loss": 1.6589, "step": 31913 }, { "epoch": 0.41470787453118124, "grad_norm": 0.4749024212360382, "learning_rate": 0.00011708236395066221, "loss": 1.477, "step": 31914 }, { "epoch": 0.4147208690750971, "grad_norm": 0.46750178933143616, "learning_rate": 0.00011707976448875084, "loss": 1.3789, "step": 31915 }, { "epoch": 0.414733863619013, "grad_norm": 0.3610598146915436, "learning_rate": 0.00011707716502683945, "loss": 1.273, "step": 31916 }, { "epoch": 0.41474685816292883, "grad_norm": 0.4847123920917511, "learning_rate": 0.00011707456556492806, "loss": 1.4229, "step": 31917 }, { "epoch": 0.41475985270684473, "grad_norm": 0.43790897727012634, "learning_rate": 0.00011707196610301667, "loss": 1.3437, "step": 31918 }, { "epoch": 0.4147728472507606, "grad_norm": 0.4149693548679352, "learning_rate": 0.00011706936664110531, "loss": 1.4696, "step": 31919 }, { "epoch": 0.4147858417946765, "grad_norm": 0.48989924788475037, "learning_rate": 0.00011706676717919392, "loss": 1.5989, "step": 31920 }, { "epoch": 0.4147988363385923, "grad_norm": 0.4579598009586334, "learning_rate": 0.00011706416771728253, "loss": 1.5312, "step": 31921 }, { "epoch": 0.4148118308825082, "grad_norm": 0.47385668754577637, "learning_rate": 0.00011706156825537114, "loss": 1.4213, "step": 31922 }, { "epoch": 0.41482482542642407, "grad_norm": 0.42061296105384827, "learning_rate": 0.00011705896879345976, "loss": 1.5059, "step": 31923 }, { "epoch": 0.41483781997033997, "grad_norm": 0.40995970368385315, "learning_rate": 0.00011705636933154838, "loss": 1.4611, "step": 31924 }, { "epoch": 0.4148508145142558, "grad_norm": 0.38926807045936584, "learning_rate": 0.00011705376986963699, "loss": 1.5426, "step": 31925 }, { "epoch": 0.4148638090581717, "grad_norm": 0.3989414572715759, "learning_rate": 0.0001170511704077256, "loss": 1.5748, "step": 31926 }, { "epoch": 0.41487680360208756, "grad_norm": 0.3369590938091278, "learning_rate": 0.00011704857094581424, "loss": 1.3904, "step": 31927 }, { "epoch": 0.41488979814600346, "grad_norm": 0.4963065981864929, "learning_rate": 0.00011704597148390283, "loss": 1.5211, "step": 31928 }, { "epoch": 0.4149027926899193, "grad_norm": 0.5032174587249756, "learning_rate": 0.00011704337202199144, "loss": 1.6569, "step": 31929 }, { "epoch": 0.4149157872338352, "grad_norm": 0.4077114760875702, "learning_rate": 0.00011704077256008005, "loss": 1.2416, "step": 31930 }, { "epoch": 0.41492878177775105, "grad_norm": 0.4691038131713867, "learning_rate": 0.00011703817309816869, "loss": 1.4702, "step": 31931 }, { "epoch": 0.41494177632166696, "grad_norm": 0.37105709314346313, "learning_rate": 0.0001170355736362573, "loss": 1.4023, "step": 31932 }, { "epoch": 0.4149547708655828, "grad_norm": 0.4176577031612396, "learning_rate": 0.00011703297417434591, "loss": 1.3434, "step": 31933 }, { "epoch": 0.4149677654094987, "grad_norm": 0.35434553027153015, "learning_rate": 0.00011703037471243453, "loss": 1.3154, "step": 31934 }, { "epoch": 0.41498075995341455, "grad_norm": 0.44269946217536926, "learning_rate": 0.00011702777525052315, "loss": 1.5711, "step": 31935 }, { "epoch": 0.41499375449733045, "grad_norm": 0.4185575544834137, "learning_rate": 0.00011702517578861176, "loss": 1.3754, "step": 31936 }, { "epoch": 0.4150067490412463, "grad_norm": 0.4493005573749542, "learning_rate": 0.00011702257632670037, "loss": 1.4258, "step": 31937 }, { "epoch": 0.4150197435851622, "grad_norm": 0.3071945309638977, "learning_rate": 0.00011701997686478901, "loss": 1.5116, "step": 31938 }, { "epoch": 0.41503273812907804, "grad_norm": 0.43161505460739136, "learning_rate": 0.00011701737740287762, "loss": 1.4191, "step": 31939 }, { "epoch": 0.41504573267299394, "grad_norm": 0.39987698197364807, "learning_rate": 0.00011701477794096623, "loss": 1.3056, "step": 31940 }, { "epoch": 0.4150587272169098, "grad_norm": 0.3714471161365509, "learning_rate": 0.00011701217847905483, "loss": 1.4079, "step": 31941 }, { "epoch": 0.4150717217608257, "grad_norm": 0.31806376576423645, "learning_rate": 0.00011700957901714347, "loss": 1.5493, "step": 31942 }, { "epoch": 0.41508471630474153, "grad_norm": 0.3202168345451355, "learning_rate": 0.00011700697955523208, "loss": 1.4257, "step": 31943 }, { "epoch": 0.41509771084865743, "grad_norm": 0.39771831035614014, "learning_rate": 0.00011700438009332069, "loss": 1.2237, "step": 31944 }, { "epoch": 0.4151107053925733, "grad_norm": 0.29544389247894287, "learning_rate": 0.0001170017806314093, "loss": 1.3502, "step": 31945 }, { "epoch": 0.4151236999364892, "grad_norm": 0.39181992411613464, "learning_rate": 0.00011699918116949792, "loss": 1.4435, "step": 31946 }, { "epoch": 0.415136694480405, "grad_norm": 0.5179322361946106, "learning_rate": 0.00011699658170758654, "loss": 1.3349, "step": 31947 }, { "epoch": 0.4151496890243209, "grad_norm": 0.38667455315589905, "learning_rate": 0.00011699398224567515, "loss": 1.3788, "step": 31948 }, { "epoch": 0.41516268356823677, "grad_norm": 0.41124311089515686, "learning_rate": 0.00011699138278376376, "loss": 1.4866, "step": 31949 }, { "epoch": 0.41517567811215267, "grad_norm": 0.49463340640068054, "learning_rate": 0.0001169887833218524, "loss": 1.3833, "step": 31950 }, { "epoch": 0.4151886726560685, "grad_norm": 0.3388766348361969, "learning_rate": 0.000116986183859941, "loss": 1.3869, "step": 31951 }, { "epoch": 0.4152016671999844, "grad_norm": 0.426586776971817, "learning_rate": 0.00011698358439802962, "loss": 1.532, "step": 31952 }, { "epoch": 0.41521466174390026, "grad_norm": 0.3509022295475006, "learning_rate": 0.00011698098493611821, "loss": 1.3991, "step": 31953 }, { "epoch": 0.41522765628781616, "grad_norm": 0.33200231194496155, "learning_rate": 0.00011697838547420685, "loss": 1.5806, "step": 31954 }, { "epoch": 0.415240650831732, "grad_norm": 0.3706192374229431, "learning_rate": 0.00011697578601229546, "loss": 1.4681, "step": 31955 }, { "epoch": 0.4152536453756479, "grad_norm": 0.33666497468948364, "learning_rate": 0.00011697318655038407, "loss": 1.3989, "step": 31956 }, { "epoch": 0.41526663991956375, "grad_norm": 0.5073790550231934, "learning_rate": 0.00011697058708847269, "loss": 1.3996, "step": 31957 }, { "epoch": 0.41527963446347965, "grad_norm": 0.4137423634529114, "learning_rate": 0.00011696798762656131, "loss": 1.2645, "step": 31958 }, { "epoch": 0.4152926290073955, "grad_norm": 0.32358792424201965, "learning_rate": 0.00011696538816464992, "loss": 1.3724, "step": 31959 }, { "epoch": 0.4153056235513114, "grad_norm": 0.4449808597564697, "learning_rate": 0.00011696278870273853, "loss": 1.5268, "step": 31960 }, { "epoch": 0.41531861809522724, "grad_norm": 0.43996110558509827, "learning_rate": 0.00011696018924082714, "loss": 1.4571, "step": 31961 }, { "epoch": 0.41533161263914314, "grad_norm": 0.3297157883644104, "learning_rate": 0.00011695758977891578, "loss": 1.5195, "step": 31962 }, { "epoch": 0.415344607183059, "grad_norm": 0.40892839431762695, "learning_rate": 0.00011695499031700439, "loss": 1.3716, "step": 31963 }, { "epoch": 0.4153576017269749, "grad_norm": 0.3498488664627075, "learning_rate": 0.000116952390855093, "loss": 1.2525, "step": 31964 }, { "epoch": 0.41537059627089074, "grad_norm": 0.3851849436759949, "learning_rate": 0.00011694979139318161, "loss": 1.4043, "step": 31965 }, { "epoch": 0.41538359081480664, "grad_norm": 0.4749259054660797, "learning_rate": 0.00011694719193127024, "loss": 1.3713, "step": 31966 }, { "epoch": 0.4153965853587225, "grad_norm": 0.43708065152168274, "learning_rate": 0.00011694459246935885, "loss": 1.412, "step": 31967 }, { "epoch": 0.4154095799026384, "grad_norm": 0.4439527988433838, "learning_rate": 0.00011694199300744746, "loss": 1.4843, "step": 31968 }, { "epoch": 0.4154225744465542, "grad_norm": 0.31829768419265747, "learning_rate": 0.00011693939354553607, "loss": 1.5222, "step": 31969 }, { "epoch": 0.4154355689904701, "grad_norm": 0.38333284854888916, "learning_rate": 0.0001169367940836247, "loss": 1.5597, "step": 31970 }, { "epoch": 0.415448563534386, "grad_norm": 0.5027971863746643, "learning_rate": 0.0001169341946217133, "loss": 1.5013, "step": 31971 }, { "epoch": 0.4154615580783019, "grad_norm": 0.4251062572002411, "learning_rate": 0.00011693159515980192, "loss": 1.4224, "step": 31972 }, { "epoch": 0.4154745526222177, "grad_norm": 0.46308383345603943, "learning_rate": 0.00011692899569789055, "loss": 1.4266, "step": 31973 }, { "epoch": 0.4154875471661336, "grad_norm": 0.4564632177352905, "learning_rate": 0.00011692639623597917, "loss": 1.3427, "step": 31974 }, { "epoch": 0.4155005417100495, "grad_norm": 0.4805711805820465, "learning_rate": 0.00011692379677406778, "loss": 1.46, "step": 31975 }, { "epoch": 0.41551353625396537, "grad_norm": 0.4796523153781891, "learning_rate": 0.00011692119731215639, "loss": 1.4555, "step": 31976 }, { "epoch": 0.41552653079788127, "grad_norm": 0.4168841242790222, "learning_rate": 0.00011691859785024501, "loss": 1.3406, "step": 31977 }, { "epoch": 0.4155395253417971, "grad_norm": 0.3594214618206024, "learning_rate": 0.00011691599838833362, "loss": 1.2304, "step": 31978 }, { "epoch": 0.415552519885713, "grad_norm": 0.3321438431739807, "learning_rate": 0.00011691339892642223, "loss": 1.2159, "step": 31979 }, { "epoch": 0.41556551442962886, "grad_norm": 0.4258446991443634, "learning_rate": 0.00011691079946451084, "loss": 1.301, "step": 31980 }, { "epoch": 0.41557850897354476, "grad_norm": 0.3417115807533264, "learning_rate": 0.00011690820000259948, "loss": 1.4174, "step": 31981 }, { "epoch": 0.4155915035174606, "grad_norm": 0.47925522923469543, "learning_rate": 0.0001169056005406881, "loss": 1.4056, "step": 31982 }, { "epoch": 0.4156044980613765, "grad_norm": 0.38244304060935974, "learning_rate": 0.00011690300107877669, "loss": 1.3481, "step": 31983 }, { "epoch": 0.41561749260529235, "grad_norm": 0.4504685699939728, "learning_rate": 0.0001169004016168653, "loss": 1.3532, "step": 31984 }, { "epoch": 0.41563048714920825, "grad_norm": 0.39021608233451843, "learning_rate": 0.00011689780215495394, "loss": 1.41, "step": 31985 }, { "epoch": 0.4156434816931241, "grad_norm": 0.3274133503437042, "learning_rate": 0.00011689520269304255, "loss": 1.3299, "step": 31986 }, { "epoch": 0.41565647623704, "grad_norm": 0.37035664916038513, "learning_rate": 0.00011689260323113116, "loss": 1.384, "step": 31987 }, { "epoch": 0.41566947078095584, "grad_norm": 0.4943562150001526, "learning_rate": 0.00011689000376921977, "loss": 1.2374, "step": 31988 }, { "epoch": 0.41568246532487174, "grad_norm": 0.43165430426597595, "learning_rate": 0.0001168874043073084, "loss": 1.512, "step": 31989 }, { "epoch": 0.4156954598687876, "grad_norm": 0.4384680390357971, "learning_rate": 0.00011688480484539701, "loss": 1.544, "step": 31990 }, { "epoch": 0.4157084544127035, "grad_norm": 0.4445725679397583, "learning_rate": 0.00011688220538348562, "loss": 1.544, "step": 31991 }, { "epoch": 0.41572144895661933, "grad_norm": 0.3301665186882019, "learning_rate": 0.00011687960592157423, "loss": 1.4456, "step": 31992 }, { "epoch": 0.41573444350053523, "grad_norm": 0.3533686697483063, "learning_rate": 0.00011687700645966287, "loss": 1.2128, "step": 31993 }, { "epoch": 0.4157474380444511, "grad_norm": 0.4108389914035797, "learning_rate": 0.00011687440699775148, "loss": 1.5507, "step": 31994 }, { "epoch": 0.415760432588367, "grad_norm": 0.3706664443016052, "learning_rate": 0.00011687180753584008, "loss": 1.474, "step": 31995 }, { "epoch": 0.4157734271322828, "grad_norm": 0.4060254693031311, "learning_rate": 0.00011686920807392869, "loss": 1.5062, "step": 31996 }, { "epoch": 0.4157864216761987, "grad_norm": 0.43567201495170593, "learning_rate": 0.00011686660861201733, "loss": 1.3666, "step": 31997 }, { "epoch": 0.41579941622011457, "grad_norm": 0.47659802436828613, "learning_rate": 0.00011686400915010594, "loss": 1.4337, "step": 31998 }, { "epoch": 0.41581241076403047, "grad_norm": 0.4370724558830261, "learning_rate": 0.00011686140968819455, "loss": 1.1913, "step": 31999 }, { "epoch": 0.4158254053079463, "grad_norm": 0.3840669095516205, "learning_rate": 0.00011685881022628316, "loss": 1.2953, "step": 32000 }, { "epoch": 0.4158383998518622, "grad_norm": 0.42917683720588684, "learning_rate": 0.00011685621076437178, "loss": 1.463, "step": 32001 }, { "epoch": 0.41585139439577806, "grad_norm": 0.2843080759048462, "learning_rate": 0.0001168536113024604, "loss": 1.3629, "step": 32002 }, { "epoch": 0.41586438893969396, "grad_norm": 0.3443150818347931, "learning_rate": 0.000116851011840549, "loss": 1.2903, "step": 32003 }, { "epoch": 0.4158773834836098, "grad_norm": 0.32550954818725586, "learning_rate": 0.00011684841237863762, "loss": 1.3704, "step": 32004 }, { "epoch": 0.4158903780275257, "grad_norm": 0.47192445397377014, "learning_rate": 0.00011684581291672625, "loss": 1.4963, "step": 32005 }, { "epoch": 0.41590337257144155, "grad_norm": 0.3771071135997772, "learning_rate": 0.00011684321345481486, "loss": 1.4452, "step": 32006 }, { "epoch": 0.41591636711535745, "grad_norm": 0.36138543486595154, "learning_rate": 0.00011684061399290348, "loss": 1.3879, "step": 32007 }, { "epoch": 0.4159293616592733, "grad_norm": 0.40519770979881287, "learning_rate": 0.00011683801453099207, "loss": 1.3834, "step": 32008 }, { "epoch": 0.4159423562031892, "grad_norm": 0.4360518753528595, "learning_rate": 0.00011683541506908071, "loss": 1.4514, "step": 32009 }, { "epoch": 0.41595535074710505, "grad_norm": 0.30168673396110535, "learning_rate": 0.00011683281560716932, "loss": 1.4035, "step": 32010 }, { "epoch": 0.41596834529102095, "grad_norm": 0.43757927417755127, "learning_rate": 0.00011683021614525793, "loss": 1.4893, "step": 32011 }, { "epoch": 0.4159813398349368, "grad_norm": 0.3698733448982239, "learning_rate": 0.00011682761668334656, "loss": 1.2706, "step": 32012 }, { "epoch": 0.4159943343788527, "grad_norm": 0.3862118422985077, "learning_rate": 0.00011682501722143517, "loss": 1.5849, "step": 32013 }, { "epoch": 0.41600732892276854, "grad_norm": 0.48158594965934753, "learning_rate": 0.00011682241775952378, "loss": 1.7763, "step": 32014 }, { "epoch": 0.41602032346668444, "grad_norm": 0.6137977838516235, "learning_rate": 0.00011681981829761239, "loss": 1.5746, "step": 32015 }, { "epoch": 0.4160333180106003, "grad_norm": 0.37493470311164856, "learning_rate": 0.00011681721883570103, "loss": 1.4512, "step": 32016 }, { "epoch": 0.4160463125545162, "grad_norm": 0.43486329913139343, "learning_rate": 0.00011681461937378964, "loss": 1.2644, "step": 32017 }, { "epoch": 0.41605930709843203, "grad_norm": 0.3970802426338196, "learning_rate": 0.00011681201991187825, "loss": 1.2633, "step": 32018 }, { "epoch": 0.41607230164234793, "grad_norm": 0.48810404539108276, "learning_rate": 0.00011680942044996686, "loss": 1.4063, "step": 32019 }, { "epoch": 0.4160852961862638, "grad_norm": 0.3442177176475525, "learning_rate": 0.00011680682098805549, "loss": 1.3658, "step": 32020 }, { "epoch": 0.4160982907301797, "grad_norm": 0.44915226101875305, "learning_rate": 0.0001168042215261441, "loss": 1.386, "step": 32021 }, { "epoch": 0.4161112852740955, "grad_norm": 0.38550862669944763, "learning_rate": 0.00011680162206423271, "loss": 1.3968, "step": 32022 }, { "epoch": 0.4161242798180114, "grad_norm": 0.38212844729423523, "learning_rate": 0.00011679902260232132, "loss": 1.4965, "step": 32023 }, { "epoch": 0.41613727436192727, "grad_norm": 0.35502326488494873, "learning_rate": 0.00011679642314040996, "loss": 1.3074, "step": 32024 }, { "epoch": 0.41615026890584317, "grad_norm": 0.43055737018585205, "learning_rate": 0.00011679382367849855, "loss": 1.2291, "step": 32025 }, { "epoch": 0.416163263449759, "grad_norm": 0.34200364351272583, "learning_rate": 0.00011679122421658716, "loss": 1.4356, "step": 32026 }, { "epoch": 0.4161762579936749, "grad_norm": 0.42000001668930054, "learning_rate": 0.00011678862475467578, "loss": 1.389, "step": 32027 }, { "epoch": 0.41618925253759076, "grad_norm": 0.3771750032901764, "learning_rate": 0.00011678602529276441, "loss": 1.2203, "step": 32028 }, { "epoch": 0.41620224708150666, "grad_norm": 0.42215681076049805, "learning_rate": 0.00011678342583085302, "loss": 1.4547, "step": 32029 }, { "epoch": 0.4162152416254225, "grad_norm": 0.4950105845928192, "learning_rate": 0.00011678082636894164, "loss": 1.3919, "step": 32030 }, { "epoch": 0.4162282361693384, "grad_norm": 0.33268195390701294, "learning_rate": 0.00011677822690703025, "loss": 1.4816, "step": 32031 }, { "epoch": 0.41624123071325425, "grad_norm": 0.3720681369304657, "learning_rate": 0.00011677562744511887, "loss": 1.4295, "step": 32032 }, { "epoch": 0.41625422525717015, "grad_norm": 0.4514576494693756, "learning_rate": 0.00011677302798320748, "loss": 1.4904, "step": 32033 }, { "epoch": 0.416267219801086, "grad_norm": 0.3868033289909363, "learning_rate": 0.00011677042852129609, "loss": 1.3003, "step": 32034 }, { "epoch": 0.4162802143450019, "grad_norm": 0.4159703850746155, "learning_rate": 0.0001167678290593847, "loss": 1.3896, "step": 32035 }, { "epoch": 0.41629320888891774, "grad_norm": 0.4479852020740509, "learning_rate": 0.00011676522959747334, "loss": 1.5902, "step": 32036 }, { "epoch": 0.41630620343283364, "grad_norm": 0.4594501852989197, "learning_rate": 0.00011676263013556194, "loss": 1.3688, "step": 32037 }, { "epoch": 0.4163191979767495, "grad_norm": 0.371725857257843, "learning_rate": 0.00011676003067365055, "loss": 1.2638, "step": 32038 }, { "epoch": 0.4163321925206654, "grad_norm": 0.3222507834434509, "learning_rate": 0.00011675743121173916, "loss": 1.2595, "step": 32039 }, { "epoch": 0.41634518706458123, "grad_norm": 0.3404005467891693, "learning_rate": 0.0001167548317498278, "loss": 1.4802, "step": 32040 }, { "epoch": 0.41635818160849714, "grad_norm": 0.38105377554893494, "learning_rate": 0.00011675223228791641, "loss": 1.3554, "step": 32041 }, { "epoch": 0.416371176152413, "grad_norm": 0.4509718418121338, "learning_rate": 0.00011674963282600502, "loss": 1.328, "step": 32042 }, { "epoch": 0.4163841706963289, "grad_norm": 0.4476536512374878, "learning_rate": 0.00011674703336409363, "loss": 1.3025, "step": 32043 }, { "epoch": 0.4163971652402447, "grad_norm": 0.34343191981315613, "learning_rate": 0.00011674443390218226, "loss": 1.3024, "step": 32044 }, { "epoch": 0.4164101597841606, "grad_norm": 0.4132869839668274, "learning_rate": 0.00011674183444027087, "loss": 1.4322, "step": 32045 }, { "epoch": 0.4164231543280765, "grad_norm": 0.42127904295921326, "learning_rate": 0.00011673923497835948, "loss": 1.2302, "step": 32046 }, { "epoch": 0.4164361488719924, "grad_norm": 0.305693119764328, "learning_rate": 0.00011673663551644812, "loss": 1.3637, "step": 32047 }, { "epoch": 0.4164491434159082, "grad_norm": 0.4029181897640228, "learning_rate": 0.00011673403605453673, "loss": 1.4786, "step": 32048 }, { "epoch": 0.4164621379598241, "grad_norm": 0.3723565340042114, "learning_rate": 0.00011673143659262534, "loss": 1.3932, "step": 32049 }, { "epoch": 0.41647513250374, "grad_norm": 0.4205729067325592, "learning_rate": 0.00011672883713071394, "loss": 1.3722, "step": 32050 }, { "epoch": 0.41648812704765587, "grad_norm": 0.5118665099143982, "learning_rate": 0.00011672623766880257, "loss": 1.3516, "step": 32051 }, { "epoch": 0.41650112159157177, "grad_norm": 0.3601110279560089, "learning_rate": 0.00011672363820689118, "loss": 1.2533, "step": 32052 }, { "epoch": 0.4165141161354876, "grad_norm": 0.40130096673965454, "learning_rate": 0.0001167210387449798, "loss": 1.2874, "step": 32053 }, { "epoch": 0.4165271106794035, "grad_norm": 0.47753527760505676, "learning_rate": 0.0001167184392830684, "loss": 1.4642, "step": 32054 }, { "epoch": 0.41654010522331936, "grad_norm": 0.36688604950904846, "learning_rate": 0.00011671583982115703, "loss": 1.454, "step": 32055 }, { "epoch": 0.41655309976723526, "grad_norm": 0.4216481149196625, "learning_rate": 0.00011671324035924564, "loss": 1.4542, "step": 32056 }, { "epoch": 0.4165660943111511, "grad_norm": 0.38279709219932556, "learning_rate": 0.00011671064089733425, "loss": 1.3379, "step": 32057 }, { "epoch": 0.416579088855067, "grad_norm": 0.4438285231590271, "learning_rate": 0.00011670804143542286, "loss": 1.4319, "step": 32058 }, { "epoch": 0.41659208339898285, "grad_norm": 0.3428190052509308, "learning_rate": 0.0001167054419735115, "loss": 1.2587, "step": 32059 }, { "epoch": 0.41660507794289875, "grad_norm": 0.36510488390922546, "learning_rate": 0.00011670284251160011, "loss": 1.2772, "step": 32060 }, { "epoch": 0.4166180724868146, "grad_norm": 0.424780935049057, "learning_rate": 0.00011670024304968872, "loss": 1.3283, "step": 32061 }, { "epoch": 0.4166310670307305, "grad_norm": 0.46007877588272095, "learning_rate": 0.00011669764358777733, "loss": 1.3853, "step": 32062 }, { "epoch": 0.41664406157464634, "grad_norm": 0.4064677953720093, "learning_rate": 0.00011669504412586596, "loss": 1.3263, "step": 32063 }, { "epoch": 0.41665705611856224, "grad_norm": 0.40020063519477844, "learning_rate": 0.00011669244466395457, "loss": 1.5091, "step": 32064 }, { "epoch": 0.4166700506624781, "grad_norm": 0.36137667298316956, "learning_rate": 0.00011668984520204318, "loss": 1.1833, "step": 32065 }, { "epoch": 0.416683045206394, "grad_norm": 0.32115164399147034, "learning_rate": 0.00011668724574013179, "loss": 1.3577, "step": 32066 }, { "epoch": 0.41669603975030983, "grad_norm": 0.33294054865837097, "learning_rate": 0.00011668464627822042, "loss": 1.2544, "step": 32067 }, { "epoch": 0.41670903429422573, "grad_norm": 0.39383426308631897, "learning_rate": 0.00011668204681630903, "loss": 1.24, "step": 32068 }, { "epoch": 0.4167220288381416, "grad_norm": 0.40805667638778687, "learning_rate": 0.00011667944735439764, "loss": 1.5659, "step": 32069 }, { "epoch": 0.4167350233820575, "grad_norm": 0.36091023683547974, "learning_rate": 0.00011667684789248625, "loss": 1.2805, "step": 32070 }, { "epoch": 0.4167480179259733, "grad_norm": 0.34582990407943726, "learning_rate": 0.00011667424843057489, "loss": 1.4041, "step": 32071 }, { "epoch": 0.4167610124698892, "grad_norm": 0.34655246138572693, "learning_rate": 0.0001166716489686635, "loss": 1.4454, "step": 32072 }, { "epoch": 0.41677400701380507, "grad_norm": 0.45380038022994995, "learning_rate": 0.00011666904950675211, "loss": 1.4222, "step": 32073 }, { "epoch": 0.41678700155772097, "grad_norm": 0.4173750579357147, "learning_rate": 0.00011666645004484072, "loss": 1.5818, "step": 32074 }, { "epoch": 0.4167999961016368, "grad_norm": 0.3961604833602905, "learning_rate": 0.00011666385058292934, "loss": 1.3394, "step": 32075 }, { "epoch": 0.4168129906455527, "grad_norm": 0.3669795095920563, "learning_rate": 0.00011666125112101796, "loss": 1.2676, "step": 32076 }, { "epoch": 0.41682598518946856, "grad_norm": 0.41534513235092163, "learning_rate": 0.00011665865165910657, "loss": 1.2664, "step": 32077 }, { "epoch": 0.41683897973338446, "grad_norm": 0.3199285566806793, "learning_rate": 0.00011665605219719518, "loss": 1.2609, "step": 32078 }, { "epoch": 0.4168519742773003, "grad_norm": 0.5193150043487549, "learning_rate": 0.0001166534527352838, "loss": 1.3395, "step": 32079 }, { "epoch": 0.4168649688212162, "grad_norm": 0.26878538727760315, "learning_rate": 0.00011665085327337241, "loss": 1.1751, "step": 32080 }, { "epoch": 0.41687796336513205, "grad_norm": 0.3845578730106354, "learning_rate": 0.00011664825381146102, "loss": 1.4235, "step": 32081 }, { "epoch": 0.41689095790904795, "grad_norm": 0.5733974575996399, "learning_rate": 0.00011664565434954963, "loss": 1.3856, "step": 32082 }, { "epoch": 0.4169039524529638, "grad_norm": 0.4119655191898346, "learning_rate": 0.00011664305488763827, "loss": 1.5462, "step": 32083 }, { "epoch": 0.4169169469968797, "grad_norm": 0.2903675436973572, "learning_rate": 0.00011664045542572688, "loss": 1.3881, "step": 32084 }, { "epoch": 0.41692994154079555, "grad_norm": 0.46879932284355164, "learning_rate": 0.0001166378559638155, "loss": 1.492, "step": 32085 }, { "epoch": 0.41694293608471145, "grad_norm": 0.38961219787597656, "learning_rate": 0.00011663525650190412, "loss": 1.3829, "step": 32086 }, { "epoch": 0.4169559306286273, "grad_norm": 0.45521771907806396, "learning_rate": 0.00011663265703999273, "loss": 1.2809, "step": 32087 }, { "epoch": 0.4169689251725432, "grad_norm": 0.3626646399497986, "learning_rate": 0.00011663005757808134, "loss": 1.4247, "step": 32088 }, { "epoch": 0.41698191971645904, "grad_norm": 0.36435046792030334, "learning_rate": 0.00011662745811616995, "loss": 1.4133, "step": 32089 }, { "epoch": 0.41699491426037494, "grad_norm": 0.4388221502304077, "learning_rate": 0.00011662485865425859, "loss": 1.3028, "step": 32090 }, { "epoch": 0.4170079088042908, "grad_norm": 0.37433111667633057, "learning_rate": 0.0001166222591923472, "loss": 1.3355, "step": 32091 }, { "epoch": 0.4170209033482067, "grad_norm": 0.3647276759147644, "learning_rate": 0.0001166196597304358, "loss": 1.1414, "step": 32092 }, { "epoch": 0.41703389789212253, "grad_norm": 0.4272243082523346, "learning_rate": 0.00011661706026852441, "loss": 1.4518, "step": 32093 }, { "epoch": 0.41704689243603843, "grad_norm": 0.3637191355228424, "learning_rate": 0.00011661446080661305, "loss": 1.3407, "step": 32094 }, { "epoch": 0.4170598869799543, "grad_norm": 0.40804797410964966, "learning_rate": 0.00011661186134470166, "loss": 1.3154, "step": 32095 }, { "epoch": 0.4170728815238702, "grad_norm": 0.4135993421077728, "learning_rate": 0.00011660926188279027, "loss": 1.4536, "step": 32096 }, { "epoch": 0.417085876067786, "grad_norm": 0.42042699456214905, "learning_rate": 0.00011660666242087888, "loss": 1.3422, "step": 32097 }, { "epoch": 0.4170988706117019, "grad_norm": 0.40308523178100586, "learning_rate": 0.0001166040629589675, "loss": 1.5127, "step": 32098 }, { "epoch": 0.41711186515561777, "grad_norm": 0.3179549276828766, "learning_rate": 0.00011660146349705612, "loss": 1.384, "step": 32099 }, { "epoch": 0.41712485969953367, "grad_norm": 0.43993109464645386, "learning_rate": 0.00011659886403514473, "loss": 1.401, "step": 32100 }, { "epoch": 0.4171378542434495, "grad_norm": 0.3192737400531769, "learning_rate": 0.00011659626457323334, "loss": 1.3574, "step": 32101 }, { "epoch": 0.4171508487873654, "grad_norm": 0.4138120114803314, "learning_rate": 0.00011659366511132197, "loss": 1.5481, "step": 32102 }, { "epoch": 0.41716384333128126, "grad_norm": 0.22258590161800385, "learning_rate": 0.00011659106564941059, "loss": 1.3111, "step": 32103 }, { "epoch": 0.41717683787519716, "grad_norm": 0.36392152309417725, "learning_rate": 0.0001165884661874992, "loss": 1.4194, "step": 32104 }, { "epoch": 0.417189832419113, "grad_norm": 0.3645169734954834, "learning_rate": 0.0001165858667255878, "loss": 1.3049, "step": 32105 }, { "epoch": 0.4172028269630289, "grad_norm": 0.39033353328704834, "learning_rate": 0.00011658326726367643, "loss": 1.41, "step": 32106 }, { "epoch": 0.41721582150694475, "grad_norm": 0.3834075927734375, "learning_rate": 0.00011658066780176504, "loss": 1.2408, "step": 32107 }, { "epoch": 0.41722881605086065, "grad_norm": 0.34865954518318176, "learning_rate": 0.00011657806833985365, "loss": 1.2383, "step": 32108 }, { "epoch": 0.4172418105947765, "grad_norm": 0.37171727418899536, "learning_rate": 0.00011657546887794227, "loss": 1.3279, "step": 32109 }, { "epoch": 0.4172548051386924, "grad_norm": 0.434829980134964, "learning_rate": 0.00011657286941603089, "loss": 1.2527, "step": 32110 }, { "epoch": 0.41726779968260824, "grad_norm": 0.4001743495464325, "learning_rate": 0.0001165702699541195, "loss": 1.2942, "step": 32111 }, { "epoch": 0.41728079422652414, "grad_norm": 0.45036807656288147, "learning_rate": 0.00011656767049220811, "loss": 1.583, "step": 32112 }, { "epoch": 0.41729378877044, "grad_norm": 0.4461669623851776, "learning_rate": 0.00011656507103029672, "loss": 1.274, "step": 32113 }, { "epoch": 0.4173067833143559, "grad_norm": 0.38426461815834045, "learning_rate": 0.00011656247156838536, "loss": 1.3694, "step": 32114 }, { "epoch": 0.41731977785827173, "grad_norm": 0.41188353300094604, "learning_rate": 0.00011655987210647397, "loss": 1.4398, "step": 32115 }, { "epoch": 0.41733277240218764, "grad_norm": 0.4044858515262604, "learning_rate": 0.00011655727264456258, "loss": 1.4566, "step": 32116 }, { "epoch": 0.4173457669461035, "grad_norm": 0.3475191593170166, "learning_rate": 0.00011655467318265118, "loss": 1.475, "step": 32117 }, { "epoch": 0.4173587614900194, "grad_norm": 0.3644906282424927, "learning_rate": 0.00011655207372073982, "loss": 1.2814, "step": 32118 }, { "epoch": 0.4173717560339352, "grad_norm": 0.3164217472076416, "learning_rate": 0.00011654947425882843, "loss": 1.2209, "step": 32119 }, { "epoch": 0.4173847505778511, "grad_norm": 0.43380212783813477, "learning_rate": 0.00011654687479691704, "loss": 1.3547, "step": 32120 }, { "epoch": 0.41739774512176697, "grad_norm": 0.3839067220687866, "learning_rate": 0.00011654427533500566, "loss": 1.2857, "step": 32121 }, { "epoch": 0.4174107396656829, "grad_norm": 0.4403882622718811, "learning_rate": 0.00011654167587309427, "loss": 1.4929, "step": 32122 }, { "epoch": 0.4174237342095987, "grad_norm": 0.4642385244369507, "learning_rate": 0.00011653907641118289, "loss": 1.4891, "step": 32123 }, { "epoch": 0.4174367287535146, "grad_norm": 0.5472173094749451, "learning_rate": 0.0001165364769492715, "loss": 1.4842, "step": 32124 }, { "epoch": 0.41744972329743046, "grad_norm": 0.4123762547969818, "learning_rate": 0.00011653387748736013, "loss": 1.6344, "step": 32125 }, { "epoch": 0.41746271784134636, "grad_norm": 0.38550078868865967, "learning_rate": 0.00011653127802544875, "loss": 1.276, "step": 32126 }, { "epoch": 0.41747571238526227, "grad_norm": 0.34991931915283203, "learning_rate": 0.00011652867856353736, "loss": 1.3246, "step": 32127 }, { "epoch": 0.4174887069291781, "grad_norm": 0.2843521535396576, "learning_rate": 0.00011652607910162597, "loss": 1.3355, "step": 32128 }, { "epoch": 0.417501701473094, "grad_norm": 0.49471455812454224, "learning_rate": 0.00011652347963971459, "loss": 1.3609, "step": 32129 }, { "epoch": 0.41751469601700986, "grad_norm": 0.3524303734302521, "learning_rate": 0.0001165208801778032, "loss": 1.3714, "step": 32130 }, { "epoch": 0.41752769056092576, "grad_norm": 0.40377113223075867, "learning_rate": 0.00011651828071589181, "loss": 1.2913, "step": 32131 }, { "epoch": 0.4175406851048416, "grad_norm": 0.2524406313896179, "learning_rate": 0.00011651568125398042, "loss": 1.1992, "step": 32132 }, { "epoch": 0.4175536796487575, "grad_norm": 0.3914828896522522, "learning_rate": 0.00011651308179206906, "loss": 1.2464, "step": 32133 }, { "epoch": 0.41756667419267335, "grad_norm": 0.3798538148403168, "learning_rate": 0.00011651048233015766, "loss": 1.3539, "step": 32134 }, { "epoch": 0.41757966873658925, "grad_norm": 0.517610490322113, "learning_rate": 0.00011650788286824627, "loss": 1.3179, "step": 32135 }, { "epoch": 0.4175926632805051, "grad_norm": 0.4488769769668579, "learning_rate": 0.00011650528340633488, "loss": 1.4978, "step": 32136 }, { "epoch": 0.417605657824421, "grad_norm": 0.3117702305316925, "learning_rate": 0.00011650268394442352, "loss": 1.2528, "step": 32137 }, { "epoch": 0.41761865236833684, "grad_norm": 0.35263174772262573, "learning_rate": 0.00011650008448251213, "loss": 1.2663, "step": 32138 }, { "epoch": 0.41763164691225274, "grad_norm": 0.40952765941619873, "learning_rate": 0.00011649748502060074, "loss": 1.5245, "step": 32139 }, { "epoch": 0.4176446414561686, "grad_norm": 0.4192451238632202, "learning_rate": 0.00011649488555868935, "loss": 1.4788, "step": 32140 }, { "epoch": 0.4176576360000845, "grad_norm": 0.4450977146625519, "learning_rate": 0.00011649228609677798, "loss": 1.4296, "step": 32141 }, { "epoch": 0.41767063054400033, "grad_norm": 0.3687826097011566, "learning_rate": 0.00011648968663486659, "loss": 1.4947, "step": 32142 }, { "epoch": 0.41768362508791623, "grad_norm": 0.43868663907051086, "learning_rate": 0.0001164870871729552, "loss": 1.4133, "step": 32143 }, { "epoch": 0.4176966196318321, "grad_norm": 0.3895968496799469, "learning_rate": 0.00011648448771104381, "loss": 1.5238, "step": 32144 }, { "epoch": 0.417709614175748, "grad_norm": 0.39096376299858093, "learning_rate": 0.00011648188824913245, "loss": 1.4127, "step": 32145 }, { "epoch": 0.4177226087196638, "grad_norm": 0.43277764320373535, "learning_rate": 0.00011647928878722106, "loss": 1.4414, "step": 32146 }, { "epoch": 0.4177356032635797, "grad_norm": 0.347722589969635, "learning_rate": 0.00011647668932530966, "loss": 1.2694, "step": 32147 }, { "epoch": 0.41774859780749557, "grad_norm": 0.5539126396179199, "learning_rate": 0.00011647408986339827, "loss": 1.3442, "step": 32148 }, { "epoch": 0.41776159235141147, "grad_norm": 0.39943423867225647, "learning_rate": 0.0001164714904014869, "loss": 1.4288, "step": 32149 }, { "epoch": 0.4177745868953273, "grad_norm": 0.3799853026866913, "learning_rate": 0.00011646889093957552, "loss": 1.2454, "step": 32150 }, { "epoch": 0.4177875814392432, "grad_norm": 0.31893599033355713, "learning_rate": 0.00011646629147766413, "loss": 1.471, "step": 32151 }, { "epoch": 0.41780057598315906, "grad_norm": 0.3551245331764221, "learning_rate": 0.00011646369201575274, "loss": 1.2318, "step": 32152 }, { "epoch": 0.41781357052707496, "grad_norm": 0.44132158160209656, "learning_rate": 0.00011646109255384136, "loss": 1.4269, "step": 32153 }, { "epoch": 0.4178265650709908, "grad_norm": 0.36985716223716736, "learning_rate": 0.00011645849309192997, "loss": 1.5795, "step": 32154 }, { "epoch": 0.4178395596149067, "grad_norm": 0.525563657283783, "learning_rate": 0.00011645589363001858, "loss": 1.5245, "step": 32155 }, { "epoch": 0.41785255415882255, "grad_norm": 0.4491332471370697, "learning_rate": 0.0001164532941681072, "loss": 1.4541, "step": 32156 }, { "epoch": 0.41786554870273845, "grad_norm": 0.42616525292396545, "learning_rate": 0.00011645069470619583, "loss": 1.5457, "step": 32157 }, { "epoch": 0.4178785432466543, "grad_norm": 0.4395771026611328, "learning_rate": 0.00011644809524428444, "loss": 1.4163, "step": 32158 }, { "epoch": 0.4178915377905702, "grad_norm": 0.3255276083946228, "learning_rate": 0.00011644549578237304, "loss": 1.3785, "step": 32159 }, { "epoch": 0.41790453233448605, "grad_norm": 0.39209187030792236, "learning_rate": 0.00011644289632046168, "loss": 1.3643, "step": 32160 }, { "epoch": 0.41791752687840195, "grad_norm": 0.40622061491012573, "learning_rate": 0.00011644029685855029, "loss": 1.4214, "step": 32161 }, { "epoch": 0.4179305214223178, "grad_norm": 0.44107410311698914, "learning_rate": 0.0001164376973966389, "loss": 1.3739, "step": 32162 }, { "epoch": 0.4179435159662337, "grad_norm": 0.33544695377349854, "learning_rate": 0.00011643509793472751, "loss": 1.3716, "step": 32163 }, { "epoch": 0.41795651051014954, "grad_norm": 0.3974848687648773, "learning_rate": 0.00011643249847281614, "loss": 1.3629, "step": 32164 }, { "epoch": 0.41796950505406544, "grad_norm": 0.37257468700408936, "learning_rate": 0.00011642989901090475, "loss": 1.3994, "step": 32165 }, { "epoch": 0.4179824995979813, "grad_norm": 0.4503837823867798, "learning_rate": 0.00011642729954899336, "loss": 1.2988, "step": 32166 }, { "epoch": 0.4179954941418972, "grad_norm": 0.440286248922348, "learning_rate": 0.00011642470008708197, "loss": 1.3539, "step": 32167 }, { "epoch": 0.41800848868581303, "grad_norm": 0.48148369789123535, "learning_rate": 0.00011642210062517061, "loss": 1.4163, "step": 32168 }, { "epoch": 0.41802148322972893, "grad_norm": 0.3875707983970642, "learning_rate": 0.00011641950116325922, "loss": 1.2131, "step": 32169 }, { "epoch": 0.4180344777736448, "grad_norm": 0.4461034834384918, "learning_rate": 0.00011641690170134783, "loss": 1.479, "step": 32170 }, { "epoch": 0.4180474723175607, "grad_norm": 0.3959667682647705, "learning_rate": 0.00011641430223943644, "loss": 1.3602, "step": 32171 }, { "epoch": 0.4180604668614765, "grad_norm": 0.4443519711494446, "learning_rate": 0.00011641170277752507, "loss": 1.4481, "step": 32172 }, { "epoch": 0.4180734614053924, "grad_norm": 0.40153220295906067, "learning_rate": 0.00011640910331561368, "loss": 1.4109, "step": 32173 }, { "epoch": 0.41808645594930827, "grad_norm": 0.34715530276298523, "learning_rate": 0.00011640650385370229, "loss": 1.3425, "step": 32174 }, { "epoch": 0.41809945049322417, "grad_norm": 0.42567458748817444, "learning_rate": 0.0001164039043917909, "loss": 1.3745, "step": 32175 }, { "epoch": 0.41811244503714, "grad_norm": 0.33880865573883057, "learning_rate": 0.00011640130492987952, "loss": 1.356, "step": 32176 }, { "epoch": 0.4181254395810559, "grad_norm": 0.40803760290145874, "learning_rate": 0.00011639870546796813, "loss": 1.4618, "step": 32177 }, { "epoch": 0.41813843412497176, "grad_norm": 0.44775599241256714, "learning_rate": 0.00011639610600605674, "loss": 1.6087, "step": 32178 }, { "epoch": 0.41815142866888766, "grad_norm": 0.29526832699775696, "learning_rate": 0.00011639350654414536, "loss": 1.1344, "step": 32179 }, { "epoch": 0.4181644232128035, "grad_norm": 0.4349403977394104, "learning_rate": 0.000116390907082234, "loss": 1.3462, "step": 32180 }, { "epoch": 0.4181774177567194, "grad_norm": 0.33959266543388367, "learning_rate": 0.0001163883076203226, "loss": 1.2271, "step": 32181 }, { "epoch": 0.41819041230063525, "grad_norm": 0.4348263144493103, "learning_rate": 0.00011638570815841122, "loss": 1.438, "step": 32182 }, { "epoch": 0.41820340684455115, "grad_norm": 0.3986891210079193, "learning_rate": 0.00011638310869649983, "loss": 1.4484, "step": 32183 }, { "epoch": 0.418216401388467, "grad_norm": 0.4220660924911499, "learning_rate": 0.00011638050923458845, "loss": 1.456, "step": 32184 }, { "epoch": 0.4182293959323829, "grad_norm": 0.36104515194892883, "learning_rate": 0.00011637790977267706, "loss": 1.4926, "step": 32185 }, { "epoch": 0.41824239047629874, "grad_norm": 0.4250923693180084, "learning_rate": 0.00011637531031076567, "loss": 1.4356, "step": 32186 }, { "epoch": 0.41825538502021464, "grad_norm": 0.3160364329814911, "learning_rate": 0.00011637271084885428, "loss": 1.2582, "step": 32187 }, { "epoch": 0.4182683795641305, "grad_norm": 0.4519636332988739, "learning_rate": 0.00011637011138694292, "loss": 1.3902, "step": 32188 }, { "epoch": 0.4182813741080464, "grad_norm": 0.3768227994441986, "learning_rate": 0.00011636751192503152, "loss": 1.4594, "step": 32189 }, { "epoch": 0.41829436865196223, "grad_norm": 0.42983752489089966, "learning_rate": 0.00011636491246312013, "loss": 1.2179, "step": 32190 }, { "epoch": 0.41830736319587813, "grad_norm": 0.4631871283054352, "learning_rate": 0.00011636231300120874, "loss": 1.4057, "step": 32191 }, { "epoch": 0.418320357739794, "grad_norm": 0.486556738615036, "learning_rate": 0.00011635971353929738, "loss": 1.5015, "step": 32192 }, { "epoch": 0.4183333522837099, "grad_norm": 0.4325386583805084, "learning_rate": 0.00011635711407738599, "loss": 1.4477, "step": 32193 }, { "epoch": 0.4183463468276257, "grad_norm": 0.4420927166938782, "learning_rate": 0.0001163545146154746, "loss": 1.5671, "step": 32194 }, { "epoch": 0.4183593413715416, "grad_norm": 0.45967647433280945, "learning_rate": 0.00011635191515356323, "loss": 1.4263, "step": 32195 }, { "epoch": 0.41837233591545747, "grad_norm": 0.32406577467918396, "learning_rate": 0.00011634931569165184, "loss": 1.3456, "step": 32196 }, { "epoch": 0.4183853304593734, "grad_norm": 0.43036600947380066, "learning_rate": 0.00011634671622974045, "loss": 1.3495, "step": 32197 }, { "epoch": 0.4183983250032892, "grad_norm": 0.48746275901794434, "learning_rate": 0.00011634411676782906, "loss": 1.3075, "step": 32198 }, { "epoch": 0.4184113195472051, "grad_norm": 0.36737489700317383, "learning_rate": 0.0001163415173059177, "loss": 1.4042, "step": 32199 }, { "epoch": 0.41842431409112096, "grad_norm": 0.41635018587112427, "learning_rate": 0.00011633891784400631, "loss": 1.4237, "step": 32200 }, { "epoch": 0.41843730863503686, "grad_norm": 0.3519030213356018, "learning_rate": 0.0001163363183820949, "loss": 1.2519, "step": 32201 }, { "epoch": 0.41845030317895276, "grad_norm": 0.5549514889717102, "learning_rate": 0.00011633371892018352, "loss": 1.4754, "step": 32202 }, { "epoch": 0.4184632977228686, "grad_norm": 0.4481637179851532, "learning_rate": 0.00011633111945827215, "loss": 1.3793, "step": 32203 }, { "epoch": 0.4184762922667845, "grad_norm": 0.43877995014190674, "learning_rate": 0.00011632851999636076, "loss": 1.2521, "step": 32204 }, { "epoch": 0.41848928681070036, "grad_norm": 0.4198109805583954, "learning_rate": 0.00011632592053444938, "loss": 1.2728, "step": 32205 }, { "epoch": 0.41850228135461626, "grad_norm": 0.3588009178638458, "learning_rate": 0.00011632332107253799, "loss": 1.3339, "step": 32206 }, { "epoch": 0.4185152758985321, "grad_norm": 0.33439791202545166, "learning_rate": 0.00011632072161062661, "loss": 1.2998, "step": 32207 }, { "epoch": 0.418528270442448, "grad_norm": 0.40653684735298157, "learning_rate": 0.00011631812214871522, "loss": 1.4201, "step": 32208 }, { "epoch": 0.41854126498636385, "grad_norm": 0.3330845236778259, "learning_rate": 0.00011631552268680383, "loss": 1.4003, "step": 32209 }, { "epoch": 0.41855425953027975, "grad_norm": 0.3548836410045624, "learning_rate": 0.00011631292322489244, "loss": 1.3528, "step": 32210 }, { "epoch": 0.4185672540741956, "grad_norm": 0.3680754601955414, "learning_rate": 0.00011631032376298108, "loss": 1.3234, "step": 32211 }, { "epoch": 0.4185802486181115, "grad_norm": 0.3345813453197479, "learning_rate": 0.00011630772430106969, "loss": 1.386, "step": 32212 }, { "epoch": 0.41859324316202734, "grad_norm": 0.374685674905777, "learning_rate": 0.0001163051248391583, "loss": 1.2733, "step": 32213 }, { "epoch": 0.41860623770594324, "grad_norm": 0.46408379077911377, "learning_rate": 0.0001163025253772469, "loss": 1.3596, "step": 32214 }, { "epoch": 0.4186192322498591, "grad_norm": 0.3770532011985779, "learning_rate": 0.00011629992591533554, "loss": 1.339, "step": 32215 }, { "epoch": 0.418632226793775, "grad_norm": 0.38618651032447815, "learning_rate": 0.00011629732645342415, "loss": 1.4408, "step": 32216 }, { "epoch": 0.41864522133769083, "grad_norm": 0.3468989133834839, "learning_rate": 0.00011629472699151276, "loss": 1.3171, "step": 32217 }, { "epoch": 0.41865821588160673, "grad_norm": 0.37816137075424194, "learning_rate": 0.00011629212752960137, "loss": 1.5181, "step": 32218 }, { "epoch": 0.4186712104255226, "grad_norm": 0.3315829634666443, "learning_rate": 0.00011628952806769, "loss": 1.3443, "step": 32219 }, { "epoch": 0.4186842049694385, "grad_norm": 0.41978517174720764, "learning_rate": 0.00011628692860577861, "loss": 1.4834, "step": 32220 }, { "epoch": 0.4186971995133543, "grad_norm": 0.4145573377609253, "learning_rate": 0.00011628432914386722, "loss": 1.6032, "step": 32221 }, { "epoch": 0.4187101940572702, "grad_norm": 0.37847137451171875, "learning_rate": 0.00011628172968195583, "loss": 1.1296, "step": 32222 }, { "epoch": 0.41872318860118607, "grad_norm": 0.5768716335296631, "learning_rate": 0.00011627913022004447, "loss": 1.3121, "step": 32223 }, { "epoch": 0.41873618314510197, "grad_norm": 0.45342352986335754, "learning_rate": 0.00011627653075813308, "loss": 1.5283, "step": 32224 }, { "epoch": 0.4187491776890178, "grad_norm": 0.4855320155620575, "learning_rate": 0.00011627393129622169, "loss": 1.5873, "step": 32225 }, { "epoch": 0.4187621722329337, "grad_norm": 0.5236383080482483, "learning_rate": 0.0001162713318343103, "loss": 1.4403, "step": 32226 }, { "epoch": 0.41877516677684956, "grad_norm": 0.38042962551116943, "learning_rate": 0.00011626873237239892, "loss": 1.3865, "step": 32227 }, { "epoch": 0.41878816132076546, "grad_norm": 0.4319530129432678, "learning_rate": 0.00011626613291048754, "loss": 1.4187, "step": 32228 }, { "epoch": 0.4188011558646813, "grad_norm": 0.35347607731819153, "learning_rate": 0.00011626353344857615, "loss": 1.2113, "step": 32229 }, { "epoch": 0.4188141504085972, "grad_norm": 0.46501338481903076, "learning_rate": 0.00011626093398666476, "loss": 1.548, "step": 32230 }, { "epoch": 0.41882714495251305, "grad_norm": 0.45698726177215576, "learning_rate": 0.00011625833452475338, "loss": 1.363, "step": 32231 }, { "epoch": 0.41884013949642895, "grad_norm": 0.3451613187789917, "learning_rate": 0.00011625573506284199, "loss": 1.3086, "step": 32232 }, { "epoch": 0.4188531340403448, "grad_norm": 0.4871053099632263, "learning_rate": 0.0001162531356009306, "loss": 1.454, "step": 32233 }, { "epoch": 0.4188661285842607, "grad_norm": 0.2793738842010498, "learning_rate": 0.00011625053613901924, "loss": 1.183, "step": 32234 }, { "epoch": 0.41887912312817654, "grad_norm": 0.3700677156448364, "learning_rate": 0.00011624793667710785, "loss": 1.2283, "step": 32235 }, { "epoch": 0.41889211767209245, "grad_norm": 0.3881552517414093, "learning_rate": 0.00011624533721519646, "loss": 1.4366, "step": 32236 }, { "epoch": 0.4189051122160083, "grad_norm": 0.5991576910018921, "learning_rate": 0.00011624273775328507, "loss": 1.2616, "step": 32237 }, { "epoch": 0.4189181067599242, "grad_norm": 0.3525272607803345, "learning_rate": 0.0001162401382913737, "loss": 1.4306, "step": 32238 }, { "epoch": 0.41893110130384004, "grad_norm": 0.3290645480155945, "learning_rate": 0.00011623753882946231, "loss": 1.2525, "step": 32239 }, { "epoch": 0.41894409584775594, "grad_norm": 0.3410657048225403, "learning_rate": 0.00011623493936755092, "loss": 1.5184, "step": 32240 }, { "epoch": 0.4189570903916718, "grad_norm": 0.2856634557247162, "learning_rate": 0.00011623233990563953, "loss": 1.3853, "step": 32241 }, { "epoch": 0.4189700849355877, "grad_norm": 0.3366034924983978, "learning_rate": 0.00011622974044372817, "loss": 1.3233, "step": 32242 }, { "epoch": 0.41898307947950353, "grad_norm": 0.47480490803718567, "learning_rate": 0.00011622714098181677, "loss": 1.4806, "step": 32243 }, { "epoch": 0.41899607402341943, "grad_norm": 0.2718394994735718, "learning_rate": 0.00011622454151990538, "loss": 1.3126, "step": 32244 }, { "epoch": 0.4190090685673353, "grad_norm": 0.37457048892974854, "learning_rate": 0.00011622194205799399, "loss": 1.35, "step": 32245 }, { "epoch": 0.4190220631112512, "grad_norm": 0.5084694623947144, "learning_rate": 0.00011621934259608263, "loss": 1.366, "step": 32246 }, { "epoch": 0.419035057655167, "grad_norm": 0.4383896291255951, "learning_rate": 0.00011621674313417124, "loss": 1.3434, "step": 32247 }, { "epoch": 0.4190480521990829, "grad_norm": 0.423346608877182, "learning_rate": 0.00011621414367225985, "loss": 1.3151, "step": 32248 }, { "epoch": 0.41906104674299877, "grad_norm": 0.333220511674881, "learning_rate": 0.00011621154421034846, "loss": 1.3636, "step": 32249 }, { "epoch": 0.41907404128691467, "grad_norm": 0.4135946035385132, "learning_rate": 0.00011620894474843708, "loss": 1.4954, "step": 32250 }, { "epoch": 0.4190870358308305, "grad_norm": 0.31489089131355286, "learning_rate": 0.0001162063452865257, "loss": 1.5427, "step": 32251 }, { "epoch": 0.4191000303747464, "grad_norm": 0.474231094121933, "learning_rate": 0.0001162037458246143, "loss": 1.4955, "step": 32252 }, { "epoch": 0.41911302491866226, "grad_norm": 0.4646984338760376, "learning_rate": 0.00011620114636270292, "loss": 1.4317, "step": 32253 }, { "epoch": 0.41912601946257816, "grad_norm": 0.41386303305625916, "learning_rate": 0.00011619854690079155, "loss": 1.3944, "step": 32254 }, { "epoch": 0.419139014006494, "grad_norm": 0.40486401319503784, "learning_rate": 0.00011619594743888017, "loss": 1.173, "step": 32255 }, { "epoch": 0.4191520085504099, "grad_norm": 0.4298326075077057, "learning_rate": 0.00011619334797696876, "loss": 1.4026, "step": 32256 }, { "epoch": 0.41916500309432575, "grad_norm": 0.3901742994785309, "learning_rate": 0.00011619074851505737, "loss": 1.4589, "step": 32257 }, { "epoch": 0.41917799763824165, "grad_norm": 0.4658268094062805, "learning_rate": 0.00011618814905314601, "loss": 1.3743, "step": 32258 }, { "epoch": 0.4191909921821575, "grad_norm": 0.43392351269721985, "learning_rate": 0.00011618554959123462, "loss": 1.5418, "step": 32259 }, { "epoch": 0.4192039867260734, "grad_norm": 0.44961997866630554, "learning_rate": 0.00011618295012932323, "loss": 1.5159, "step": 32260 }, { "epoch": 0.41921698126998924, "grad_norm": 0.3624570369720459, "learning_rate": 0.00011618035066741184, "loss": 1.2327, "step": 32261 }, { "epoch": 0.41922997581390514, "grad_norm": 0.43030011653900146, "learning_rate": 0.00011617775120550047, "loss": 1.3285, "step": 32262 }, { "epoch": 0.419242970357821, "grad_norm": 0.34005704522132874, "learning_rate": 0.00011617515174358908, "loss": 1.5247, "step": 32263 }, { "epoch": 0.4192559649017369, "grad_norm": 0.371330589056015, "learning_rate": 0.00011617255228167769, "loss": 1.3898, "step": 32264 }, { "epoch": 0.41926895944565273, "grad_norm": 0.32238394021987915, "learning_rate": 0.0001161699528197663, "loss": 1.3084, "step": 32265 }, { "epoch": 0.41928195398956863, "grad_norm": 0.41983339190483093, "learning_rate": 0.00011616735335785494, "loss": 1.2997, "step": 32266 }, { "epoch": 0.4192949485334845, "grad_norm": 0.4474751055240631, "learning_rate": 0.00011616475389594355, "loss": 1.3101, "step": 32267 }, { "epoch": 0.4193079430774004, "grad_norm": 0.4586068093776703, "learning_rate": 0.00011616215443403216, "loss": 1.2624, "step": 32268 }, { "epoch": 0.4193209376213162, "grad_norm": 0.4348476231098175, "learning_rate": 0.00011615955497212079, "loss": 1.4687, "step": 32269 }, { "epoch": 0.4193339321652321, "grad_norm": 0.30033254623413086, "learning_rate": 0.0001161569555102094, "loss": 1.3672, "step": 32270 }, { "epoch": 0.41934692670914797, "grad_norm": 0.44349902868270874, "learning_rate": 0.00011615435604829801, "loss": 1.3642, "step": 32271 }, { "epoch": 0.41935992125306387, "grad_norm": 0.3840700685977936, "learning_rate": 0.00011615175658638662, "loss": 1.4131, "step": 32272 }, { "epoch": 0.4193729157969797, "grad_norm": 0.4895966053009033, "learning_rate": 0.00011614915712447524, "loss": 1.6234, "step": 32273 }, { "epoch": 0.4193859103408956, "grad_norm": 0.465359628200531, "learning_rate": 0.00011614655766256385, "loss": 1.5284, "step": 32274 }, { "epoch": 0.41939890488481146, "grad_norm": 0.47041377425193787, "learning_rate": 0.00011614395820065247, "loss": 1.2472, "step": 32275 }, { "epoch": 0.41941189942872736, "grad_norm": 0.46924617886543274, "learning_rate": 0.00011614135873874108, "loss": 1.537, "step": 32276 }, { "epoch": 0.4194248939726432, "grad_norm": 0.41342389583587646, "learning_rate": 0.00011613875927682971, "loss": 1.1781, "step": 32277 }, { "epoch": 0.4194378885165591, "grad_norm": 0.476694256067276, "learning_rate": 0.00011613615981491833, "loss": 1.3921, "step": 32278 }, { "epoch": 0.419450883060475, "grad_norm": 0.28122037649154663, "learning_rate": 0.00011613356035300694, "loss": 1.2522, "step": 32279 }, { "epoch": 0.41946387760439086, "grad_norm": 0.3382534980773926, "learning_rate": 0.00011613096089109555, "loss": 1.3992, "step": 32280 }, { "epoch": 0.41947687214830676, "grad_norm": 0.4447752833366394, "learning_rate": 0.00011612836142918417, "loss": 1.4606, "step": 32281 }, { "epoch": 0.4194898666922226, "grad_norm": 0.4879113435745239, "learning_rate": 0.00011612576196727278, "loss": 1.4304, "step": 32282 }, { "epoch": 0.4195028612361385, "grad_norm": 0.5712043642997742, "learning_rate": 0.0001161231625053614, "loss": 1.3906, "step": 32283 }, { "epoch": 0.41951585578005435, "grad_norm": 0.3488975167274475, "learning_rate": 0.00011612056304345, "loss": 1.3649, "step": 32284 }, { "epoch": 0.41952885032397025, "grad_norm": 0.40721893310546875, "learning_rate": 0.00011611796358153863, "loss": 1.4337, "step": 32285 }, { "epoch": 0.4195418448678861, "grad_norm": 0.38627147674560547, "learning_rate": 0.00011611536411962724, "loss": 1.4006, "step": 32286 }, { "epoch": 0.419554839411802, "grad_norm": 0.5106608867645264, "learning_rate": 0.00011611276465771585, "loss": 1.4741, "step": 32287 }, { "epoch": 0.41956783395571784, "grad_norm": 0.443295419216156, "learning_rate": 0.00011611016519580446, "loss": 1.5234, "step": 32288 }, { "epoch": 0.41958082849963374, "grad_norm": 0.3780266046524048, "learning_rate": 0.0001161075657338931, "loss": 1.3881, "step": 32289 }, { "epoch": 0.4195938230435496, "grad_norm": 0.49872884154319763, "learning_rate": 0.00011610496627198171, "loss": 1.5286, "step": 32290 }, { "epoch": 0.4196068175874655, "grad_norm": 0.3499641716480255, "learning_rate": 0.00011610236681007032, "loss": 1.3981, "step": 32291 }, { "epoch": 0.41961981213138133, "grad_norm": 0.4204707145690918, "learning_rate": 0.00011609976734815893, "loss": 1.5844, "step": 32292 }, { "epoch": 0.41963280667529723, "grad_norm": 0.41294464468955994, "learning_rate": 0.00011609716788624756, "loss": 1.3681, "step": 32293 }, { "epoch": 0.4196458012192131, "grad_norm": 0.33723780512809753, "learning_rate": 0.00011609456842433617, "loss": 1.436, "step": 32294 }, { "epoch": 0.419658795763129, "grad_norm": 0.35678631067276, "learning_rate": 0.00011609196896242478, "loss": 1.4095, "step": 32295 }, { "epoch": 0.4196717903070448, "grad_norm": 0.3804474472999573, "learning_rate": 0.00011608936950051339, "loss": 1.3847, "step": 32296 }, { "epoch": 0.4196847848509607, "grad_norm": 0.47448214888572693, "learning_rate": 0.00011608677003860203, "loss": 1.4121, "step": 32297 }, { "epoch": 0.41969777939487657, "grad_norm": 0.4430135190486908, "learning_rate": 0.00011608417057669063, "loss": 1.2286, "step": 32298 }, { "epoch": 0.41971077393879247, "grad_norm": 0.4234481155872345, "learning_rate": 0.00011608157111477924, "loss": 1.6024, "step": 32299 }, { "epoch": 0.4197237684827083, "grad_norm": 0.25687652826309204, "learning_rate": 0.00011607897165286785, "loss": 1.2493, "step": 32300 }, { "epoch": 0.4197367630266242, "grad_norm": 0.5005450248718262, "learning_rate": 0.00011607637219095649, "loss": 1.404, "step": 32301 }, { "epoch": 0.41974975757054006, "grad_norm": 0.39949968457221985, "learning_rate": 0.0001160737727290451, "loss": 1.5043, "step": 32302 }, { "epoch": 0.41976275211445596, "grad_norm": 0.4189079701900482, "learning_rate": 0.00011607117326713371, "loss": 1.4088, "step": 32303 }, { "epoch": 0.4197757466583718, "grad_norm": 0.39433741569519043, "learning_rate": 0.00011606857380522232, "loss": 1.5064, "step": 32304 }, { "epoch": 0.4197887412022877, "grad_norm": 0.36041754484176636, "learning_rate": 0.00011606597434331094, "loss": 1.2977, "step": 32305 }, { "epoch": 0.41980173574620355, "grad_norm": 0.4624941647052765, "learning_rate": 0.00011606337488139955, "loss": 1.4347, "step": 32306 }, { "epoch": 0.41981473029011945, "grad_norm": 0.4705483019351959, "learning_rate": 0.00011606077541948816, "loss": 1.4634, "step": 32307 }, { "epoch": 0.4198277248340353, "grad_norm": 0.3786124289035797, "learning_rate": 0.0001160581759575768, "loss": 1.323, "step": 32308 }, { "epoch": 0.4198407193779512, "grad_norm": 0.3233451545238495, "learning_rate": 0.00011605557649566541, "loss": 1.2716, "step": 32309 }, { "epoch": 0.41985371392186704, "grad_norm": 0.34478846192359924, "learning_rate": 0.00011605297703375402, "loss": 1.3264, "step": 32310 }, { "epoch": 0.41986670846578295, "grad_norm": 0.25865858793258667, "learning_rate": 0.00011605037757184262, "loss": 1.3168, "step": 32311 }, { "epoch": 0.4198797030096988, "grad_norm": 0.3190869987010956, "learning_rate": 0.00011604777810993126, "loss": 1.483, "step": 32312 }, { "epoch": 0.4198926975536147, "grad_norm": 0.45041441917419434, "learning_rate": 0.00011604517864801987, "loss": 1.4352, "step": 32313 }, { "epoch": 0.41990569209753054, "grad_norm": 0.3702928125858307, "learning_rate": 0.00011604257918610848, "loss": 1.3017, "step": 32314 }, { "epoch": 0.41991868664144644, "grad_norm": 0.45153993368148804, "learning_rate": 0.00011603997972419709, "loss": 1.3046, "step": 32315 }, { "epoch": 0.4199316811853623, "grad_norm": 0.4125082492828369, "learning_rate": 0.00011603738026228572, "loss": 1.2592, "step": 32316 }, { "epoch": 0.4199446757292782, "grad_norm": 0.43710389733314514, "learning_rate": 0.00011603478080037433, "loss": 1.3791, "step": 32317 }, { "epoch": 0.41995767027319403, "grad_norm": 0.31893301010131836, "learning_rate": 0.00011603218133846294, "loss": 1.4254, "step": 32318 }, { "epoch": 0.41997066481710993, "grad_norm": 0.3802414834499359, "learning_rate": 0.00011602958187655155, "loss": 1.4624, "step": 32319 }, { "epoch": 0.4199836593610258, "grad_norm": 0.4751666188240051, "learning_rate": 0.00011602698241464019, "loss": 1.4983, "step": 32320 }, { "epoch": 0.4199966539049417, "grad_norm": 0.3258700668811798, "learning_rate": 0.0001160243829527288, "loss": 1.4564, "step": 32321 }, { "epoch": 0.4200096484488575, "grad_norm": 0.31875380873680115, "learning_rate": 0.00011602178349081741, "loss": 1.4417, "step": 32322 }, { "epoch": 0.4200226429927734, "grad_norm": 0.37079137563705444, "learning_rate": 0.00011601918402890601, "loss": 1.3303, "step": 32323 }, { "epoch": 0.42003563753668927, "grad_norm": 0.3270212411880493, "learning_rate": 0.00011601658456699465, "loss": 1.0694, "step": 32324 }, { "epoch": 0.42004863208060517, "grad_norm": 0.39389216899871826, "learning_rate": 0.00011601398510508326, "loss": 1.3781, "step": 32325 }, { "epoch": 0.420061626624521, "grad_norm": 0.43165719509124756, "learning_rate": 0.00011601138564317187, "loss": 1.3574, "step": 32326 }, { "epoch": 0.4200746211684369, "grad_norm": 0.4926842749118805, "learning_rate": 0.00011600878618126048, "loss": 1.4515, "step": 32327 }, { "epoch": 0.42008761571235276, "grad_norm": 0.42954862117767334, "learning_rate": 0.0001160061867193491, "loss": 1.5662, "step": 32328 }, { "epoch": 0.42010061025626866, "grad_norm": 0.3975192606449127, "learning_rate": 0.00011600358725743771, "loss": 1.4922, "step": 32329 }, { "epoch": 0.4201136048001845, "grad_norm": 0.3848022520542145, "learning_rate": 0.00011600098779552632, "loss": 1.293, "step": 32330 }, { "epoch": 0.4201265993441004, "grad_norm": 0.36023685336112976, "learning_rate": 0.00011599838833361494, "loss": 1.3819, "step": 32331 }, { "epoch": 0.42013959388801625, "grad_norm": 0.35359495878219604, "learning_rate": 0.00011599578887170357, "loss": 1.3193, "step": 32332 }, { "epoch": 0.42015258843193215, "grad_norm": 0.4508792757987976, "learning_rate": 0.00011599318940979218, "loss": 1.4522, "step": 32333 }, { "epoch": 0.420165582975848, "grad_norm": 0.4280056655406952, "learning_rate": 0.0001159905899478808, "loss": 1.4169, "step": 32334 }, { "epoch": 0.4201785775197639, "grad_norm": 0.392936646938324, "learning_rate": 0.0001159879904859694, "loss": 1.4968, "step": 32335 }, { "epoch": 0.42019157206367974, "grad_norm": 0.3698326051235199, "learning_rate": 0.00011598539102405803, "loss": 1.5217, "step": 32336 }, { "epoch": 0.42020456660759564, "grad_norm": 0.3180531859397888, "learning_rate": 0.00011598279156214664, "loss": 1.3354, "step": 32337 }, { "epoch": 0.4202175611515115, "grad_norm": 0.32269519567489624, "learning_rate": 0.00011598019210023525, "loss": 1.1977, "step": 32338 }, { "epoch": 0.4202305556954274, "grad_norm": 0.4611388146877289, "learning_rate": 0.00011597759263832386, "loss": 1.5446, "step": 32339 }, { "epoch": 0.42024355023934323, "grad_norm": 0.36423933506011963, "learning_rate": 0.00011597499317641249, "loss": 1.3829, "step": 32340 }, { "epoch": 0.42025654478325913, "grad_norm": 0.4284282326698303, "learning_rate": 0.0001159723937145011, "loss": 1.5391, "step": 32341 }, { "epoch": 0.420269539327175, "grad_norm": 0.40483248233795166, "learning_rate": 0.00011596979425258971, "loss": 1.4558, "step": 32342 }, { "epoch": 0.4202825338710909, "grad_norm": 0.38728538155555725, "learning_rate": 0.00011596719479067835, "loss": 1.3604, "step": 32343 }, { "epoch": 0.4202955284150067, "grad_norm": 0.4580335021018982, "learning_rate": 0.00011596459532876696, "loss": 1.295, "step": 32344 }, { "epoch": 0.4203085229589226, "grad_norm": 0.30721211433410645, "learning_rate": 0.00011596199586685557, "loss": 1.2357, "step": 32345 }, { "epoch": 0.42032151750283847, "grad_norm": 0.3419477939605713, "learning_rate": 0.00011595939640494418, "loss": 1.4356, "step": 32346 }, { "epoch": 0.42033451204675437, "grad_norm": 0.3426543176174164, "learning_rate": 0.0001159567969430328, "loss": 1.379, "step": 32347 }, { "epoch": 0.4203475065906702, "grad_norm": 0.3871789872646332, "learning_rate": 0.00011595419748112142, "loss": 1.4562, "step": 32348 }, { "epoch": 0.4203605011345861, "grad_norm": 0.40265098214149475, "learning_rate": 0.00011595159801921003, "loss": 1.3805, "step": 32349 }, { "epoch": 0.42037349567850196, "grad_norm": 0.4091636538505554, "learning_rate": 0.00011594899855729864, "loss": 1.461, "step": 32350 }, { "epoch": 0.42038649022241786, "grad_norm": 0.36400511860847473, "learning_rate": 0.00011594639909538728, "loss": 1.3014, "step": 32351 }, { "epoch": 0.4203994847663337, "grad_norm": 0.4031924307346344, "learning_rate": 0.00011594379963347589, "loss": 1.356, "step": 32352 }, { "epoch": 0.4204124793102496, "grad_norm": 0.3914255201816559, "learning_rate": 0.00011594120017156448, "loss": 1.42, "step": 32353 }, { "epoch": 0.42042547385416545, "grad_norm": 0.38627889752388, "learning_rate": 0.0001159386007096531, "loss": 1.337, "step": 32354 }, { "epoch": 0.42043846839808136, "grad_norm": 0.556472659111023, "learning_rate": 0.00011593600124774173, "loss": 1.2022, "step": 32355 }, { "epoch": 0.42045146294199726, "grad_norm": 0.46469444036483765, "learning_rate": 0.00011593340178583034, "loss": 1.3123, "step": 32356 }, { "epoch": 0.4204644574859131, "grad_norm": 0.35841071605682373, "learning_rate": 0.00011593080232391896, "loss": 1.2154, "step": 32357 }, { "epoch": 0.420477452029829, "grad_norm": 0.43212684988975525, "learning_rate": 0.00011592820286200757, "loss": 1.6088, "step": 32358 }, { "epoch": 0.42049044657374485, "grad_norm": 0.46963879466056824, "learning_rate": 0.00011592560340009619, "loss": 1.4245, "step": 32359 }, { "epoch": 0.42050344111766075, "grad_norm": 0.32945647835731506, "learning_rate": 0.0001159230039381848, "loss": 1.4608, "step": 32360 }, { "epoch": 0.4205164356615766, "grad_norm": 0.48712360858917236, "learning_rate": 0.00011592040447627341, "loss": 1.5481, "step": 32361 }, { "epoch": 0.4205294302054925, "grad_norm": 0.437644898891449, "learning_rate": 0.00011591780501436202, "loss": 1.3199, "step": 32362 }, { "epoch": 0.42054242474940834, "grad_norm": 0.3633168041706085, "learning_rate": 0.00011591520555245066, "loss": 1.3424, "step": 32363 }, { "epoch": 0.42055541929332424, "grad_norm": 0.35237812995910645, "learning_rate": 0.00011591260609053927, "loss": 1.36, "step": 32364 }, { "epoch": 0.4205684138372401, "grad_norm": 0.3826889395713806, "learning_rate": 0.00011591000662862787, "loss": 1.3326, "step": 32365 }, { "epoch": 0.420581408381156, "grad_norm": 0.3915887773036957, "learning_rate": 0.00011590740716671648, "loss": 1.5658, "step": 32366 }, { "epoch": 0.42059440292507183, "grad_norm": 0.45203033089637756, "learning_rate": 0.00011590480770480512, "loss": 1.3102, "step": 32367 }, { "epoch": 0.42060739746898773, "grad_norm": 0.4447691738605499, "learning_rate": 0.00011590220824289373, "loss": 1.3894, "step": 32368 }, { "epoch": 0.4206203920129036, "grad_norm": 0.4356711804866791, "learning_rate": 0.00011589960878098234, "loss": 1.5708, "step": 32369 }, { "epoch": 0.4206333865568195, "grad_norm": 0.45708414912223816, "learning_rate": 0.00011589700931907095, "loss": 1.518, "step": 32370 }, { "epoch": 0.4206463811007353, "grad_norm": 0.40342941880226135, "learning_rate": 0.00011589440985715958, "loss": 1.4355, "step": 32371 }, { "epoch": 0.4206593756446512, "grad_norm": 0.4190525710582733, "learning_rate": 0.00011589181039524819, "loss": 1.4225, "step": 32372 }, { "epoch": 0.42067237018856707, "grad_norm": 0.39301496744155884, "learning_rate": 0.0001158892109333368, "loss": 1.2063, "step": 32373 }, { "epoch": 0.42068536473248297, "grad_norm": 0.3339405953884125, "learning_rate": 0.00011588661147142541, "loss": 1.4193, "step": 32374 }, { "epoch": 0.4206983592763988, "grad_norm": 0.37180769443511963, "learning_rate": 0.00011588401200951405, "loss": 1.4794, "step": 32375 }, { "epoch": 0.4207113538203147, "grad_norm": 0.4803014099597931, "learning_rate": 0.00011588141254760266, "loss": 1.3188, "step": 32376 }, { "epoch": 0.42072434836423056, "grad_norm": 0.37827426195144653, "learning_rate": 0.00011587881308569127, "loss": 1.4775, "step": 32377 }, { "epoch": 0.42073734290814646, "grad_norm": 0.3197339177131653, "learning_rate": 0.00011587621362377987, "loss": 1.2691, "step": 32378 }, { "epoch": 0.4207503374520623, "grad_norm": 0.45096883177757263, "learning_rate": 0.0001158736141618685, "loss": 1.6366, "step": 32379 }, { "epoch": 0.4207633319959782, "grad_norm": 0.3647748529911041, "learning_rate": 0.00011587101469995711, "loss": 1.4396, "step": 32380 }, { "epoch": 0.42077632653989405, "grad_norm": 0.4202468693256378, "learning_rate": 0.00011586841523804573, "loss": 1.3329, "step": 32381 }, { "epoch": 0.42078932108380995, "grad_norm": 0.3129234313964844, "learning_rate": 0.00011586581577613435, "loss": 1.2121, "step": 32382 }, { "epoch": 0.4208023156277258, "grad_norm": 0.461628258228302, "learning_rate": 0.00011586321631422296, "loss": 1.2551, "step": 32383 }, { "epoch": 0.4208153101716417, "grad_norm": 0.3742586672306061, "learning_rate": 0.00011586061685231157, "loss": 1.4465, "step": 32384 }, { "epoch": 0.42082830471555754, "grad_norm": 0.3721318542957306, "learning_rate": 0.00011585801739040018, "loss": 1.4255, "step": 32385 }, { "epoch": 0.42084129925947344, "grad_norm": 0.298602819442749, "learning_rate": 0.00011585541792848882, "loss": 1.5013, "step": 32386 }, { "epoch": 0.4208542938033893, "grad_norm": 0.37362757325172424, "learning_rate": 0.00011585281846657743, "loss": 1.5203, "step": 32387 }, { "epoch": 0.4208672883473052, "grad_norm": 0.4179818630218506, "learning_rate": 0.00011585021900466604, "loss": 1.3826, "step": 32388 }, { "epoch": 0.42088028289122104, "grad_norm": 0.38251161575317383, "learning_rate": 0.00011584761954275465, "loss": 1.2808, "step": 32389 }, { "epoch": 0.42089327743513694, "grad_norm": 0.37224841117858887, "learning_rate": 0.00011584502008084328, "loss": 1.4339, "step": 32390 }, { "epoch": 0.4209062719790528, "grad_norm": 0.33931422233581543, "learning_rate": 0.00011584242061893189, "loss": 1.43, "step": 32391 }, { "epoch": 0.4209192665229687, "grad_norm": 0.4628772437572479, "learning_rate": 0.0001158398211570205, "loss": 1.455, "step": 32392 }, { "epoch": 0.4209322610668845, "grad_norm": 0.3676503002643585, "learning_rate": 0.00011583722169510911, "loss": 1.5402, "step": 32393 }, { "epoch": 0.42094525561080043, "grad_norm": 0.41572582721710205, "learning_rate": 0.00011583462223319775, "loss": 1.7797, "step": 32394 }, { "epoch": 0.4209582501547163, "grad_norm": 0.2746230959892273, "learning_rate": 0.00011583202277128635, "loss": 1.4575, "step": 32395 }, { "epoch": 0.4209712446986322, "grad_norm": 0.36138656735420227, "learning_rate": 0.00011582942330937496, "loss": 1.398, "step": 32396 }, { "epoch": 0.420984239242548, "grad_norm": 0.42010214924812317, "learning_rate": 0.00011582682384746357, "loss": 1.554, "step": 32397 }, { "epoch": 0.4209972337864639, "grad_norm": 0.3860754370689392, "learning_rate": 0.0001158242243855522, "loss": 1.3548, "step": 32398 }, { "epoch": 0.42101022833037977, "grad_norm": 0.34347113966941833, "learning_rate": 0.00011582162492364082, "loss": 1.3631, "step": 32399 }, { "epoch": 0.42102322287429567, "grad_norm": 0.42275938391685486, "learning_rate": 0.00011581902546172943, "loss": 1.422, "step": 32400 }, { "epoch": 0.4210362174182115, "grad_norm": 0.3430614471435547, "learning_rate": 0.00011581642599981804, "loss": 1.2599, "step": 32401 }, { "epoch": 0.4210492119621274, "grad_norm": 0.4406384229660034, "learning_rate": 0.00011581382653790666, "loss": 1.3838, "step": 32402 }, { "epoch": 0.42106220650604326, "grad_norm": 0.3799358904361725, "learning_rate": 0.00011581122707599527, "loss": 1.317, "step": 32403 }, { "epoch": 0.42107520104995916, "grad_norm": 0.4151029586791992, "learning_rate": 0.00011580862761408389, "loss": 1.4482, "step": 32404 }, { "epoch": 0.421088195593875, "grad_norm": 0.45835432410240173, "learning_rate": 0.0001158060281521725, "loss": 1.5123, "step": 32405 }, { "epoch": 0.4211011901377909, "grad_norm": 0.47239863872528076, "learning_rate": 0.00011580342869026113, "loss": 1.3094, "step": 32406 }, { "epoch": 0.42111418468170675, "grad_norm": 0.4508815109729767, "learning_rate": 0.00011580082922834973, "loss": 1.3516, "step": 32407 }, { "epoch": 0.42112717922562265, "grad_norm": 0.39499521255493164, "learning_rate": 0.00011579822976643834, "loss": 1.3663, "step": 32408 }, { "epoch": 0.4211401737695385, "grad_norm": 0.38398101925849915, "learning_rate": 0.00011579563030452695, "loss": 1.4474, "step": 32409 }, { "epoch": 0.4211531683134544, "grad_norm": 0.42292532324790955, "learning_rate": 0.00011579303084261559, "loss": 1.4365, "step": 32410 }, { "epoch": 0.42116616285737024, "grad_norm": 0.3487517535686493, "learning_rate": 0.0001157904313807042, "loss": 1.1576, "step": 32411 }, { "epoch": 0.42117915740128614, "grad_norm": 0.4398074746131897, "learning_rate": 0.00011578783191879281, "loss": 1.2777, "step": 32412 }, { "epoch": 0.421192151945202, "grad_norm": 0.2918659448623657, "learning_rate": 0.00011578523245688142, "loss": 1.3149, "step": 32413 }, { "epoch": 0.4212051464891179, "grad_norm": 0.3867429196834564, "learning_rate": 0.00011578263299497005, "loss": 1.5261, "step": 32414 }, { "epoch": 0.42121814103303373, "grad_norm": 0.4029358923435211, "learning_rate": 0.00011578003353305866, "loss": 1.404, "step": 32415 }, { "epoch": 0.42123113557694963, "grad_norm": 0.2864662706851959, "learning_rate": 0.00011577743407114727, "loss": 1.3101, "step": 32416 }, { "epoch": 0.4212441301208655, "grad_norm": 0.40627962350845337, "learning_rate": 0.00011577483460923588, "loss": 1.4149, "step": 32417 }, { "epoch": 0.4212571246647814, "grad_norm": 0.45507243275642395, "learning_rate": 0.00011577223514732452, "loss": 1.4147, "step": 32418 }, { "epoch": 0.4212701192086972, "grad_norm": 0.5283242464065552, "learning_rate": 0.00011576963568541313, "loss": 1.5259, "step": 32419 }, { "epoch": 0.4212831137526131, "grad_norm": 0.4729507267475128, "learning_rate": 0.00011576703622350173, "loss": 1.4683, "step": 32420 }, { "epoch": 0.42129610829652897, "grad_norm": 0.3660680949687958, "learning_rate": 0.00011576443676159037, "loss": 1.4725, "step": 32421 }, { "epoch": 0.42130910284044487, "grad_norm": 0.34618934988975525, "learning_rate": 0.00011576183729967898, "loss": 1.4073, "step": 32422 }, { "epoch": 0.4213220973843607, "grad_norm": 0.4827609360218048, "learning_rate": 0.00011575923783776759, "loss": 1.4843, "step": 32423 }, { "epoch": 0.4213350919282766, "grad_norm": 0.4057334363460541, "learning_rate": 0.0001157566383758562, "loss": 1.2198, "step": 32424 }, { "epoch": 0.42134808647219246, "grad_norm": 0.4930001199245453, "learning_rate": 0.00011575403891394482, "loss": 1.4337, "step": 32425 }, { "epoch": 0.42136108101610836, "grad_norm": 0.39155885577201843, "learning_rate": 0.00011575143945203343, "loss": 1.4534, "step": 32426 }, { "epoch": 0.4213740755600242, "grad_norm": 0.38546162843704224, "learning_rate": 0.00011574883999012205, "loss": 1.5163, "step": 32427 }, { "epoch": 0.4213870701039401, "grad_norm": 0.351840615272522, "learning_rate": 0.00011574624052821066, "loss": 1.4753, "step": 32428 }, { "epoch": 0.42140006464785595, "grad_norm": 0.41888755559921265, "learning_rate": 0.0001157436410662993, "loss": 1.3816, "step": 32429 }, { "epoch": 0.42141305919177185, "grad_norm": 0.3877086341381073, "learning_rate": 0.0001157410416043879, "loss": 1.2635, "step": 32430 }, { "epoch": 0.42142605373568776, "grad_norm": 0.35030055046081543, "learning_rate": 0.00011573844214247652, "loss": 1.4179, "step": 32431 }, { "epoch": 0.4214390482796036, "grad_norm": 0.39953339099884033, "learning_rate": 0.00011573584268056513, "loss": 1.2546, "step": 32432 }, { "epoch": 0.4214520428235195, "grad_norm": 0.4239491820335388, "learning_rate": 0.00011573324321865375, "loss": 1.563, "step": 32433 }, { "epoch": 0.42146503736743535, "grad_norm": 0.2957848012447357, "learning_rate": 0.00011573064375674236, "loss": 1.2331, "step": 32434 }, { "epoch": 0.42147803191135125, "grad_norm": 0.4347279667854309, "learning_rate": 0.00011572804429483097, "loss": 1.2822, "step": 32435 }, { "epoch": 0.4214910264552671, "grad_norm": 0.2783936858177185, "learning_rate": 0.00011572544483291958, "loss": 1.3009, "step": 32436 }, { "epoch": 0.421504020999183, "grad_norm": 0.25434961915016174, "learning_rate": 0.00011572284537100821, "loss": 1.23, "step": 32437 }, { "epoch": 0.42151701554309884, "grad_norm": 0.4028712213039398, "learning_rate": 0.00011572024590909682, "loss": 1.4765, "step": 32438 }, { "epoch": 0.42153001008701474, "grad_norm": 0.2965194284915924, "learning_rate": 0.00011571764644718543, "loss": 1.3242, "step": 32439 }, { "epoch": 0.4215430046309306, "grad_norm": 0.29522132873535156, "learning_rate": 0.00011571504698527404, "loss": 1.2425, "step": 32440 }, { "epoch": 0.4215559991748465, "grad_norm": 0.3713493347167969, "learning_rate": 0.00011571244752336268, "loss": 1.3776, "step": 32441 }, { "epoch": 0.42156899371876233, "grad_norm": 0.28610777854919434, "learning_rate": 0.00011570984806145129, "loss": 1.4438, "step": 32442 }, { "epoch": 0.42158198826267823, "grad_norm": 0.3771226704120636, "learning_rate": 0.0001157072485995399, "loss": 1.3282, "step": 32443 }, { "epoch": 0.4215949828065941, "grad_norm": 0.44139981269836426, "learning_rate": 0.00011570464913762851, "loss": 1.3342, "step": 32444 }, { "epoch": 0.42160797735051, "grad_norm": 0.34314075112342834, "learning_rate": 0.00011570204967571714, "loss": 1.3384, "step": 32445 }, { "epoch": 0.4216209718944258, "grad_norm": 0.43567466735839844, "learning_rate": 0.00011569945021380575, "loss": 1.4709, "step": 32446 }, { "epoch": 0.4216339664383417, "grad_norm": 0.4547687768936157, "learning_rate": 0.00011569685075189436, "loss": 1.3817, "step": 32447 }, { "epoch": 0.42164696098225757, "grad_norm": 0.4425061345100403, "learning_rate": 0.00011569425128998297, "loss": 1.3218, "step": 32448 }, { "epoch": 0.42165995552617347, "grad_norm": 0.4090802073478699, "learning_rate": 0.0001156916518280716, "loss": 1.4825, "step": 32449 }, { "epoch": 0.4216729500700893, "grad_norm": 0.3710377812385559, "learning_rate": 0.0001156890523661602, "loss": 1.3153, "step": 32450 }, { "epoch": 0.4216859446140052, "grad_norm": 0.36442914605140686, "learning_rate": 0.00011568645290424882, "loss": 1.3846, "step": 32451 }, { "epoch": 0.42169893915792106, "grad_norm": 0.3278874456882477, "learning_rate": 0.00011568385344233743, "loss": 1.5574, "step": 32452 }, { "epoch": 0.42171193370183696, "grad_norm": 0.3954211175441742, "learning_rate": 0.00011568125398042607, "loss": 1.3831, "step": 32453 }, { "epoch": 0.4217249282457528, "grad_norm": 0.48358291387557983, "learning_rate": 0.00011567865451851468, "loss": 1.3582, "step": 32454 }, { "epoch": 0.4217379227896687, "grad_norm": 0.3833157420158386, "learning_rate": 0.00011567605505660329, "loss": 1.3909, "step": 32455 }, { "epoch": 0.42175091733358455, "grad_norm": 0.42973846197128296, "learning_rate": 0.00011567345559469191, "loss": 1.3112, "step": 32456 }, { "epoch": 0.42176391187750045, "grad_norm": 0.418171763420105, "learning_rate": 0.00011567085613278052, "loss": 1.4071, "step": 32457 }, { "epoch": 0.4217769064214163, "grad_norm": 0.34298932552337646, "learning_rate": 0.00011566825667086913, "loss": 1.3623, "step": 32458 }, { "epoch": 0.4217899009653322, "grad_norm": 0.42031776905059814, "learning_rate": 0.00011566565720895774, "loss": 1.2802, "step": 32459 }, { "epoch": 0.42180289550924804, "grad_norm": 0.32334578037261963, "learning_rate": 0.00011566305774704638, "loss": 1.2745, "step": 32460 }, { "epoch": 0.42181589005316394, "grad_norm": 0.3761952221393585, "learning_rate": 0.000115660458285135, "loss": 1.5443, "step": 32461 }, { "epoch": 0.4218288845970798, "grad_norm": 0.3793239891529083, "learning_rate": 0.00011565785882322359, "loss": 1.352, "step": 32462 }, { "epoch": 0.4218418791409957, "grad_norm": 0.41486847400665283, "learning_rate": 0.0001156552593613122, "loss": 1.2028, "step": 32463 }, { "epoch": 0.42185487368491154, "grad_norm": 0.5224927067756653, "learning_rate": 0.00011565265989940084, "loss": 1.3581, "step": 32464 }, { "epoch": 0.42186786822882744, "grad_norm": 0.4299875497817993, "learning_rate": 0.00011565006043748945, "loss": 1.2916, "step": 32465 }, { "epoch": 0.4218808627727433, "grad_norm": 0.4397324323654175, "learning_rate": 0.00011564746097557806, "loss": 1.1838, "step": 32466 }, { "epoch": 0.4218938573166592, "grad_norm": 0.4065145254135132, "learning_rate": 0.00011564486151366667, "loss": 1.5422, "step": 32467 }, { "epoch": 0.421906851860575, "grad_norm": 0.30650031566619873, "learning_rate": 0.0001156422620517553, "loss": 1.3082, "step": 32468 }, { "epoch": 0.42191984640449093, "grad_norm": 0.40959760546684265, "learning_rate": 0.00011563966258984391, "loss": 1.2993, "step": 32469 }, { "epoch": 0.4219328409484068, "grad_norm": 0.40634217858314514, "learning_rate": 0.00011563706312793252, "loss": 1.5222, "step": 32470 }, { "epoch": 0.4219458354923227, "grad_norm": 0.4975477159023285, "learning_rate": 0.00011563446366602113, "loss": 1.4965, "step": 32471 }, { "epoch": 0.4219588300362385, "grad_norm": 0.29792264103889465, "learning_rate": 0.00011563186420410977, "loss": 1.3949, "step": 32472 }, { "epoch": 0.4219718245801544, "grad_norm": 0.4249768853187561, "learning_rate": 0.00011562926474219838, "loss": 1.3595, "step": 32473 }, { "epoch": 0.42198481912407027, "grad_norm": 0.40076184272766113, "learning_rate": 0.00011562666528028699, "loss": 1.4992, "step": 32474 }, { "epoch": 0.42199781366798617, "grad_norm": 0.3255220055580139, "learning_rate": 0.00011562406581837559, "loss": 1.3101, "step": 32475 }, { "epoch": 0.422010808211902, "grad_norm": 0.27454039454460144, "learning_rate": 0.00011562146635646423, "loss": 1.3713, "step": 32476 }, { "epoch": 0.4220238027558179, "grad_norm": 0.3896216154098511, "learning_rate": 0.00011561886689455284, "loss": 1.3059, "step": 32477 }, { "epoch": 0.42203679729973376, "grad_norm": 0.33177557587623596, "learning_rate": 0.00011561626743264145, "loss": 1.2798, "step": 32478 }, { "epoch": 0.42204979184364966, "grad_norm": 0.264780193567276, "learning_rate": 0.00011561366797073006, "loss": 1.366, "step": 32479 }, { "epoch": 0.4220627863875655, "grad_norm": 0.4660983681678772, "learning_rate": 0.00011561106850881868, "loss": 1.4207, "step": 32480 }, { "epoch": 0.4220757809314814, "grad_norm": 0.3910813629627228, "learning_rate": 0.0001156084690469073, "loss": 1.3643, "step": 32481 }, { "epoch": 0.42208877547539725, "grad_norm": 0.4356350302696228, "learning_rate": 0.0001156058695849959, "loss": 1.526, "step": 32482 }, { "epoch": 0.42210177001931315, "grad_norm": 0.34705057740211487, "learning_rate": 0.00011560327012308452, "loss": 1.4244, "step": 32483 }, { "epoch": 0.422114764563229, "grad_norm": 0.3503780961036682, "learning_rate": 0.00011560067066117315, "loss": 1.3386, "step": 32484 }, { "epoch": 0.4221277591071449, "grad_norm": 0.39164572954177856, "learning_rate": 0.00011559807119926176, "loss": 1.5428, "step": 32485 }, { "epoch": 0.42214075365106074, "grad_norm": 0.36391976475715637, "learning_rate": 0.00011559547173735038, "loss": 1.3764, "step": 32486 }, { "epoch": 0.42215374819497664, "grad_norm": 0.4350518584251404, "learning_rate": 0.00011559287227543897, "loss": 1.3942, "step": 32487 }, { "epoch": 0.4221667427388925, "grad_norm": 0.444780558347702, "learning_rate": 0.00011559027281352761, "loss": 1.3879, "step": 32488 }, { "epoch": 0.4221797372828084, "grad_norm": 0.5134239196777344, "learning_rate": 0.00011558767335161622, "loss": 1.4388, "step": 32489 }, { "epoch": 0.42219273182672423, "grad_norm": 0.42801064252853394, "learning_rate": 0.00011558507388970483, "loss": 1.5003, "step": 32490 }, { "epoch": 0.42220572637064013, "grad_norm": 0.4335562288761139, "learning_rate": 0.00011558247442779344, "loss": 1.3406, "step": 32491 }, { "epoch": 0.422218720914556, "grad_norm": 0.29763683676719666, "learning_rate": 0.00011557987496588207, "loss": 1.4418, "step": 32492 }, { "epoch": 0.4222317154584719, "grad_norm": 0.4186922311782837, "learning_rate": 0.00011557727550397068, "loss": 1.4223, "step": 32493 }, { "epoch": 0.4222447100023877, "grad_norm": 0.4694053828716278, "learning_rate": 0.00011557467604205929, "loss": 1.4558, "step": 32494 }, { "epoch": 0.4222577045463036, "grad_norm": 0.4468023478984833, "learning_rate": 0.00011557207658014793, "loss": 1.466, "step": 32495 }, { "epoch": 0.42227069909021947, "grad_norm": 0.46099603176116943, "learning_rate": 0.00011556947711823654, "loss": 1.1997, "step": 32496 }, { "epoch": 0.42228369363413537, "grad_norm": 0.4393361508846283, "learning_rate": 0.00011556687765632515, "loss": 1.3667, "step": 32497 }, { "epoch": 0.4222966881780512, "grad_norm": 0.38493579626083374, "learning_rate": 0.00011556427819441376, "loss": 1.4841, "step": 32498 }, { "epoch": 0.4223096827219671, "grad_norm": 0.5098912119865417, "learning_rate": 0.00011556167873250239, "loss": 1.2183, "step": 32499 }, { "epoch": 0.42232267726588296, "grad_norm": 0.30311039090156555, "learning_rate": 0.000115559079270591, "loss": 1.3999, "step": 32500 }, { "epoch": 0.42233567180979886, "grad_norm": 0.3550584018230438, "learning_rate": 0.00011555647980867961, "loss": 1.2319, "step": 32501 }, { "epoch": 0.4223486663537147, "grad_norm": 0.31162363290786743, "learning_rate": 0.00011555388034676822, "loss": 1.2692, "step": 32502 }, { "epoch": 0.4223616608976306, "grad_norm": 0.3799562454223633, "learning_rate": 0.00011555128088485686, "loss": 1.3296, "step": 32503 }, { "epoch": 0.42237465544154645, "grad_norm": 0.3360872268676758, "learning_rate": 0.00011554868142294545, "loss": 1.3262, "step": 32504 }, { "epoch": 0.42238764998546235, "grad_norm": 0.3756132423877716, "learning_rate": 0.00011554608196103406, "loss": 1.5579, "step": 32505 }, { "epoch": 0.4224006445293782, "grad_norm": 0.43924999237060547, "learning_rate": 0.00011554348249912268, "loss": 1.292, "step": 32506 }, { "epoch": 0.4224136390732941, "grad_norm": 0.5720948576927185, "learning_rate": 0.00011554088303721131, "loss": 1.396, "step": 32507 }, { "epoch": 0.42242663361721, "grad_norm": 0.38311123847961426, "learning_rate": 0.00011553828357529992, "loss": 1.3476, "step": 32508 }, { "epoch": 0.42243962816112585, "grad_norm": 0.42853695154190063, "learning_rate": 0.00011553568411338854, "loss": 1.3229, "step": 32509 }, { "epoch": 0.42245262270504175, "grad_norm": 0.4066378176212311, "learning_rate": 0.00011553308465147715, "loss": 1.4389, "step": 32510 }, { "epoch": 0.4224656172489576, "grad_norm": 0.4813633859157562, "learning_rate": 0.00011553048518956577, "loss": 1.3278, "step": 32511 }, { "epoch": 0.4224786117928735, "grad_norm": 0.39550501108169556, "learning_rate": 0.00011552788572765438, "loss": 1.2252, "step": 32512 }, { "epoch": 0.42249160633678934, "grad_norm": 0.5144707560539246, "learning_rate": 0.00011552528626574299, "loss": 1.6007, "step": 32513 }, { "epoch": 0.42250460088070524, "grad_norm": 0.23891358077526093, "learning_rate": 0.0001155226868038316, "loss": 1.1676, "step": 32514 }, { "epoch": 0.4225175954246211, "grad_norm": 0.36541077494621277, "learning_rate": 0.00011552008734192024, "loss": 1.523, "step": 32515 }, { "epoch": 0.422530589968537, "grad_norm": 0.37614455819129944, "learning_rate": 0.00011551748788000885, "loss": 1.5062, "step": 32516 }, { "epoch": 0.42254358451245283, "grad_norm": 0.46348482370376587, "learning_rate": 0.00011551488841809745, "loss": 1.4108, "step": 32517 }, { "epoch": 0.42255657905636873, "grad_norm": 0.5071980953216553, "learning_rate": 0.00011551228895618606, "loss": 1.4145, "step": 32518 }, { "epoch": 0.4225695736002846, "grad_norm": 0.40767702460289, "learning_rate": 0.0001155096894942747, "loss": 1.4985, "step": 32519 }, { "epoch": 0.4225825681442005, "grad_norm": 0.42947205901145935, "learning_rate": 0.00011550709003236331, "loss": 1.4034, "step": 32520 }, { "epoch": 0.4225955626881163, "grad_norm": 0.5178409814834595, "learning_rate": 0.00011550449057045192, "loss": 1.5899, "step": 32521 }, { "epoch": 0.4226085572320322, "grad_norm": 0.33328777551651, "learning_rate": 0.00011550189110854053, "loss": 1.3116, "step": 32522 }, { "epoch": 0.42262155177594807, "grad_norm": 0.4110775291919708, "learning_rate": 0.00011549929164662916, "loss": 1.5896, "step": 32523 }, { "epoch": 0.42263454631986397, "grad_norm": 0.38887089490890503, "learning_rate": 0.00011549669218471777, "loss": 1.1956, "step": 32524 }, { "epoch": 0.4226475408637798, "grad_norm": 0.4104284644126892, "learning_rate": 0.00011549409272280638, "loss": 1.208, "step": 32525 }, { "epoch": 0.4226605354076957, "grad_norm": 0.37943828105926514, "learning_rate": 0.00011549149326089499, "loss": 1.3752, "step": 32526 }, { "epoch": 0.42267352995161156, "grad_norm": 0.3851604461669922, "learning_rate": 0.00011548889379898363, "loss": 1.4294, "step": 32527 }, { "epoch": 0.42268652449552746, "grad_norm": 0.31594303250312805, "learning_rate": 0.00011548629433707224, "loss": 1.3908, "step": 32528 }, { "epoch": 0.4226995190394433, "grad_norm": 0.32268786430358887, "learning_rate": 0.00011548369487516083, "loss": 1.3349, "step": 32529 }, { "epoch": 0.4227125135833592, "grad_norm": 0.412300705909729, "learning_rate": 0.00011548109541324947, "loss": 1.4975, "step": 32530 }, { "epoch": 0.42272550812727505, "grad_norm": 0.36216339468955994, "learning_rate": 0.00011547849595133808, "loss": 1.4701, "step": 32531 }, { "epoch": 0.42273850267119095, "grad_norm": 0.38794395327568054, "learning_rate": 0.0001154758964894267, "loss": 1.4327, "step": 32532 }, { "epoch": 0.4227514972151068, "grad_norm": 0.522821307182312, "learning_rate": 0.0001154732970275153, "loss": 1.4971, "step": 32533 }, { "epoch": 0.4227644917590227, "grad_norm": 0.4122038185596466, "learning_rate": 0.00011547069756560393, "loss": 1.3446, "step": 32534 }, { "epoch": 0.42277748630293854, "grad_norm": 0.3933035731315613, "learning_rate": 0.00011546809810369254, "loss": 1.4748, "step": 32535 }, { "epoch": 0.42279048084685444, "grad_norm": 0.39935502409935, "learning_rate": 0.00011546549864178115, "loss": 1.3745, "step": 32536 }, { "epoch": 0.4228034753907703, "grad_norm": 0.3199804425239563, "learning_rate": 0.00011546289917986976, "loss": 1.1982, "step": 32537 }, { "epoch": 0.4228164699346862, "grad_norm": 0.41713863611221313, "learning_rate": 0.0001154602997179584, "loss": 1.4462, "step": 32538 }, { "epoch": 0.42282946447860204, "grad_norm": 0.4183112680912018, "learning_rate": 0.00011545770025604701, "loss": 1.441, "step": 32539 }, { "epoch": 0.42284245902251794, "grad_norm": 0.4007284343242645, "learning_rate": 0.00011545510079413562, "loss": 1.4168, "step": 32540 }, { "epoch": 0.4228554535664338, "grad_norm": 0.3678448796272278, "learning_rate": 0.00011545250133222423, "loss": 1.4615, "step": 32541 }, { "epoch": 0.4228684481103497, "grad_norm": 0.43753302097320557, "learning_rate": 0.00011544990187031286, "loss": 1.2577, "step": 32542 }, { "epoch": 0.4228814426542655, "grad_norm": 0.28028228878974915, "learning_rate": 0.00011544730240840147, "loss": 1.3049, "step": 32543 }, { "epoch": 0.4228944371981814, "grad_norm": 0.43065887689590454, "learning_rate": 0.00011544470294649008, "loss": 1.4066, "step": 32544 }, { "epoch": 0.4229074317420973, "grad_norm": 0.4156615734100342, "learning_rate": 0.00011544210348457869, "loss": 1.5258, "step": 32545 }, { "epoch": 0.4229204262860132, "grad_norm": 0.3791862428188324, "learning_rate": 0.00011543950402266732, "loss": 1.5816, "step": 32546 }, { "epoch": 0.422933420829929, "grad_norm": 0.44544869661331177, "learning_rate": 0.00011543690456075593, "loss": 1.3462, "step": 32547 }, { "epoch": 0.4229464153738449, "grad_norm": 0.459179550409317, "learning_rate": 0.00011543430509884454, "loss": 1.3869, "step": 32548 }, { "epoch": 0.42295940991776076, "grad_norm": 0.3827952444553375, "learning_rate": 0.00011543170563693315, "loss": 1.4707, "step": 32549 }, { "epoch": 0.42297240446167667, "grad_norm": 0.4364517033100128, "learning_rate": 0.00011542910617502179, "loss": 1.3031, "step": 32550 }, { "epoch": 0.4229853990055925, "grad_norm": 0.3850482106208801, "learning_rate": 0.0001154265067131104, "loss": 1.2848, "step": 32551 }, { "epoch": 0.4229983935495084, "grad_norm": 0.4495599567890167, "learning_rate": 0.00011542390725119901, "loss": 1.491, "step": 32552 }, { "epoch": 0.42301138809342426, "grad_norm": 0.3892969787120819, "learning_rate": 0.00011542130778928762, "loss": 1.5318, "step": 32553 }, { "epoch": 0.42302438263734016, "grad_norm": 0.4029179811477661, "learning_rate": 0.00011541870832737624, "loss": 1.4513, "step": 32554 }, { "epoch": 0.423037377181256, "grad_norm": 0.40128234028816223, "learning_rate": 0.00011541610886546485, "loss": 1.4578, "step": 32555 }, { "epoch": 0.4230503717251719, "grad_norm": 0.4413566589355469, "learning_rate": 0.00011541350940355347, "loss": 1.3805, "step": 32556 }, { "epoch": 0.42306336626908775, "grad_norm": 0.3579275608062744, "learning_rate": 0.00011541090994164208, "loss": 1.282, "step": 32557 }, { "epoch": 0.42307636081300365, "grad_norm": 0.43506690859794617, "learning_rate": 0.00011540831047973071, "loss": 1.4102, "step": 32558 }, { "epoch": 0.4230893553569195, "grad_norm": 0.3977290987968445, "learning_rate": 0.00011540571101781931, "loss": 1.2649, "step": 32559 }, { "epoch": 0.4231023499008354, "grad_norm": 0.3319036066532135, "learning_rate": 0.00011540311155590792, "loss": 1.464, "step": 32560 }, { "epoch": 0.42311534444475124, "grad_norm": 0.32621195912361145, "learning_rate": 0.00011540051209399653, "loss": 1.2574, "step": 32561 }, { "epoch": 0.42312833898866714, "grad_norm": 0.5112295746803284, "learning_rate": 0.00011539791263208517, "loss": 1.3805, "step": 32562 }, { "epoch": 0.423141333532583, "grad_norm": 0.29058554768562317, "learning_rate": 0.00011539531317017378, "loss": 1.2061, "step": 32563 }, { "epoch": 0.4231543280764989, "grad_norm": 0.3931564688682556, "learning_rate": 0.0001153927137082624, "loss": 1.1972, "step": 32564 }, { "epoch": 0.42316732262041473, "grad_norm": 0.4237231910228729, "learning_rate": 0.000115390114246351, "loss": 1.4078, "step": 32565 }, { "epoch": 0.42318031716433063, "grad_norm": 0.45243990421295166, "learning_rate": 0.00011538751478443963, "loss": 1.3205, "step": 32566 }, { "epoch": 0.4231933117082465, "grad_norm": 0.5141027569770813, "learning_rate": 0.00011538491532252824, "loss": 1.5201, "step": 32567 }, { "epoch": 0.4232063062521624, "grad_norm": 0.3802530765533447, "learning_rate": 0.00011538231586061685, "loss": 1.4894, "step": 32568 }, { "epoch": 0.4232193007960782, "grad_norm": 0.43684080243110657, "learning_rate": 0.00011537971639870549, "loss": 1.2676, "step": 32569 }, { "epoch": 0.4232322953399941, "grad_norm": 0.34049174189567566, "learning_rate": 0.0001153771169367941, "loss": 1.2872, "step": 32570 }, { "epoch": 0.42324528988390997, "grad_norm": 0.40717872977256775, "learning_rate": 0.0001153745174748827, "loss": 1.3117, "step": 32571 }, { "epoch": 0.42325828442782587, "grad_norm": 0.4071490466594696, "learning_rate": 0.00011537191801297131, "loss": 1.4712, "step": 32572 }, { "epoch": 0.4232712789717417, "grad_norm": 0.5588322877883911, "learning_rate": 0.00011536931855105995, "loss": 1.501, "step": 32573 }, { "epoch": 0.4232842735156576, "grad_norm": 0.47346457839012146, "learning_rate": 0.00011536671908914856, "loss": 1.3461, "step": 32574 }, { "epoch": 0.42329726805957346, "grad_norm": 0.31851914525032043, "learning_rate": 0.00011536411962723717, "loss": 1.2281, "step": 32575 }, { "epoch": 0.42331026260348936, "grad_norm": 0.29420894384384155, "learning_rate": 0.00011536152016532578, "loss": 1.4417, "step": 32576 }, { "epoch": 0.4233232571474052, "grad_norm": 0.4689401686191559, "learning_rate": 0.0001153589207034144, "loss": 1.3539, "step": 32577 }, { "epoch": 0.4233362516913211, "grad_norm": 0.3141169548034668, "learning_rate": 0.00011535632124150301, "loss": 1.2189, "step": 32578 }, { "epoch": 0.42334924623523695, "grad_norm": 0.45030298829078674, "learning_rate": 0.00011535372177959163, "loss": 1.5321, "step": 32579 }, { "epoch": 0.42336224077915285, "grad_norm": 0.35827505588531494, "learning_rate": 0.00011535112231768024, "loss": 1.2821, "step": 32580 }, { "epoch": 0.4233752353230687, "grad_norm": 0.35243159532546997, "learning_rate": 0.00011534852285576887, "loss": 1.2912, "step": 32581 }, { "epoch": 0.4233882298669846, "grad_norm": 0.36464205384254456, "learning_rate": 0.00011534592339385749, "loss": 1.468, "step": 32582 }, { "epoch": 0.4234012244109005, "grad_norm": 0.3930002450942993, "learning_rate": 0.0001153433239319461, "loss": 1.4181, "step": 32583 }, { "epoch": 0.42341421895481635, "grad_norm": 0.4512020945549011, "learning_rate": 0.0001153407244700347, "loss": 1.2813, "step": 32584 }, { "epoch": 0.42342721349873225, "grad_norm": 0.2981812059879303, "learning_rate": 0.00011533812500812333, "loss": 1.3263, "step": 32585 }, { "epoch": 0.4234402080426481, "grad_norm": 0.46833929419517517, "learning_rate": 0.00011533552554621194, "loss": 1.4143, "step": 32586 }, { "epoch": 0.423453202586564, "grad_norm": 0.3698435425758362, "learning_rate": 0.00011533292608430055, "loss": 1.4559, "step": 32587 }, { "epoch": 0.42346619713047984, "grad_norm": 0.30801665782928467, "learning_rate": 0.00011533032662238916, "loss": 1.2508, "step": 32588 }, { "epoch": 0.42347919167439574, "grad_norm": 0.3188161253929138, "learning_rate": 0.00011532772716047779, "loss": 1.2101, "step": 32589 }, { "epoch": 0.4234921862183116, "grad_norm": 0.40829670429229736, "learning_rate": 0.0001153251276985664, "loss": 1.394, "step": 32590 }, { "epoch": 0.4235051807622275, "grad_norm": 0.508387565612793, "learning_rate": 0.00011532252823665501, "loss": 1.5417, "step": 32591 }, { "epoch": 0.42351817530614333, "grad_norm": 0.44753769040107727, "learning_rate": 0.00011531992877474362, "loss": 1.3346, "step": 32592 }, { "epoch": 0.42353116985005923, "grad_norm": 0.44312670826911926, "learning_rate": 0.00011531732931283226, "loss": 1.437, "step": 32593 }, { "epoch": 0.4235441643939751, "grad_norm": 0.47309330105781555, "learning_rate": 0.00011531472985092087, "loss": 1.485, "step": 32594 }, { "epoch": 0.423557158937891, "grad_norm": 0.36506420373916626, "learning_rate": 0.00011531213038900948, "loss": 1.4639, "step": 32595 }, { "epoch": 0.4235701534818068, "grad_norm": 0.3561936318874359, "learning_rate": 0.00011530953092709809, "loss": 1.3578, "step": 32596 }, { "epoch": 0.4235831480257227, "grad_norm": 0.44975969195365906, "learning_rate": 0.00011530693146518672, "loss": 1.4312, "step": 32597 }, { "epoch": 0.42359614256963857, "grad_norm": 0.4023270905017853, "learning_rate": 0.00011530433200327533, "loss": 1.3393, "step": 32598 }, { "epoch": 0.42360913711355447, "grad_norm": 0.345290869474411, "learning_rate": 0.00011530173254136394, "loss": 1.5085, "step": 32599 }, { "epoch": 0.4236221316574703, "grad_norm": 0.44778358936309814, "learning_rate": 0.00011529913307945255, "loss": 1.5911, "step": 32600 }, { "epoch": 0.4236351262013862, "grad_norm": 0.4356503188610077, "learning_rate": 0.00011529653361754117, "loss": 1.4426, "step": 32601 }, { "epoch": 0.42364812074530206, "grad_norm": 0.4331046938896179, "learning_rate": 0.00011529393415562979, "loss": 1.3076, "step": 32602 }, { "epoch": 0.42366111528921796, "grad_norm": 0.3995019793510437, "learning_rate": 0.0001152913346937184, "loss": 1.4856, "step": 32603 }, { "epoch": 0.4236741098331338, "grad_norm": 0.39607229828834534, "learning_rate": 0.00011528873523180703, "loss": 1.3929, "step": 32604 }, { "epoch": 0.4236871043770497, "grad_norm": 0.3992483913898468, "learning_rate": 0.00011528613576989565, "loss": 1.3409, "step": 32605 }, { "epoch": 0.42370009892096555, "grad_norm": 0.41545766592025757, "learning_rate": 0.00011528353630798426, "loss": 1.512, "step": 32606 }, { "epoch": 0.42371309346488145, "grad_norm": 0.43989789485931396, "learning_rate": 0.00011528093684607287, "loss": 1.4635, "step": 32607 }, { "epoch": 0.4237260880087973, "grad_norm": 0.31132909655570984, "learning_rate": 0.00011527833738416149, "loss": 1.1587, "step": 32608 }, { "epoch": 0.4237390825527132, "grad_norm": 0.3846941888332367, "learning_rate": 0.0001152757379222501, "loss": 1.4719, "step": 32609 }, { "epoch": 0.42375207709662904, "grad_norm": 0.2898976504802704, "learning_rate": 0.00011527313846033871, "loss": 1.3125, "step": 32610 }, { "epoch": 0.42376507164054494, "grad_norm": 0.38989585638046265, "learning_rate": 0.00011527053899842732, "loss": 1.3963, "step": 32611 }, { "epoch": 0.4237780661844608, "grad_norm": 0.278799444437027, "learning_rate": 0.00011526793953651596, "loss": 1.4038, "step": 32612 }, { "epoch": 0.4237910607283767, "grad_norm": 0.3773708641529083, "learning_rate": 0.00011526534007460456, "loss": 1.1559, "step": 32613 }, { "epoch": 0.42380405527229253, "grad_norm": 0.3979058265686035, "learning_rate": 0.00011526274061269317, "loss": 1.1841, "step": 32614 }, { "epoch": 0.42381704981620844, "grad_norm": 0.464068204164505, "learning_rate": 0.00011526014115078178, "loss": 1.5539, "step": 32615 }, { "epoch": 0.4238300443601243, "grad_norm": 0.43467697501182556, "learning_rate": 0.00011525754168887042, "loss": 1.456, "step": 32616 }, { "epoch": 0.4238430389040402, "grad_norm": 0.34923499822616577, "learning_rate": 0.00011525494222695903, "loss": 1.2825, "step": 32617 }, { "epoch": 0.423856033447956, "grad_norm": 0.32817941904067993, "learning_rate": 0.00011525234276504764, "loss": 1.4485, "step": 32618 }, { "epoch": 0.4238690279918719, "grad_norm": 0.480048805475235, "learning_rate": 0.00011524974330313625, "loss": 1.6184, "step": 32619 }, { "epoch": 0.4238820225357878, "grad_norm": 0.4450722336769104, "learning_rate": 0.00011524714384122488, "loss": 1.5258, "step": 32620 }, { "epoch": 0.4238950170797037, "grad_norm": 0.338461697101593, "learning_rate": 0.00011524454437931349, "loss": 1.3314, "step": 32621 }, { "epoch": 0.4239080116236195, "grad_norm": 0.4182823598384857, "learning_rate": 0.0001152419449174021, "loss": 1.2723, "step": 32622 }, { "epoch": 0.4239210061675354, "grad_norm": 0.36841315031051636, "learning_rate": 0.00011523934545549071, "loss": 1.3257, "step": 32623 }, { "epoch": 0.42393400071145126, "grad_norm": 0.45369064807891846, "learning_rate": 0.00011523674599357935, "loss": 1.3305, "step": 32624 }, { "epoch": 0.42394699525536717, "grad_norm": 0.5095656514167786, "learning_rate": 0.00011523414653166796, "loss": 1.3531, "step": 32625 }, { "epoch": 0.423959989799283, "grad_norm": 0.42047765851020813, "learning_rate": 0.00011523154706975656, "loss": 1.4227, "step": 32626 }, { "epoch": 0.4239729843431989, "grad_norm": 0.46105897426605225, "learning_rate": 0.00011522894760784517, "loss": 1.2701, "step": 32627 }, { "epoch": 0.42398597888711476, "grad_norm": 0.5402507185935974, "learning_rate": 0.0001152263481459338, "loss": 1.326, "step": 32628 }, { "epoch": 0.42399897343103066, "grad_norm": 0.4956165850162506, "learning_rate": 0.00011522374868402242, "loss": 1.4874, "step": 32629 }, { "epoch": 0.4240119679749465, "grad_norm": 0.553966224193573, "learning_rate": 0.00011522114922211103, "loss": 1.2835, "step": 32630 }, { "epoch": 0.4240249625188624, "grad_norm": 0.4198230504989624, "learning_rate": 0.00011521854976019964, "loss": 1.4184, "step": 32631 }, { "epoch": 0.42403795706277825, "grad_norm": 0.29994654655456543, "learning_rate": 0.00011521595029828826, "loss": 1.2848, "step": 32632 }, { "epoch": 0.42405095160669415, "grad_norm": 0.4693107008934021, "learning_rate": 0.00011521335083637687, "loss": 1.374, "step": 32633 }, { "epoch": 0.42406394615061, "grad_norm": 0.3967727720737457, "learning_rate": 0.00011521075137446548, "loss": 1.3083, "step": 32634 }, { "epoch": 0.4240769406945259, "grad_norm": 0.40576738119125366, "learning_rate": 0.0001152081519125541, "loss": 1.3345, "step": 32635 }, { "epoch": 0.42408993523844174, "grad_norm": 0.4012446999549866, "learning_rate": 0.00011520555245064273, "loss": 1.2862, "step": 32636 }, { "epoch": 0.42410292978235764, "grad_norm": 0.42559412121772766, "learning_rate": 0.00011520295298873134, "loss": 1.3528, "step": 32637 }, { "epoch": 0.4241159243262735, "grad_norm": 0.43126824498176575, "learning_rate": 0.00011520035352681996, "loss": 1.5463, "step": 32638 }, { "epoch": 0.4241289188701894, "grad_norm": 0.5899081826210022, "learning_rate": 0.00011519775406490855, "loss": 1.3077, "step": 32639 }, { "epoch": 0.42414191341410523, "grad_norm": 0.4572855830192566, "learning_rate": 0.00011519515460299719, "loss": 1.2637, "step": 32640 }, { "epoch": 0.42415490795802113, "grad_norm": 0.5459520220756531, "learning_rate": 0.0001151925551410858, "loss": 1.4556, "step": 32641 }, { "epoch": 0.424167902501937, "grad_norm": 0.42976656556129456, "learning_rate": 0.00011518995567917441, "loss": 1.7099, "step": 32642 }, { "epoch": 0.4241808970458529, "grad_norm": 0.4294690787792206, "learning_rate": 0.00011518735621726304, "loss": 1.5077, "step": 32643 }, { "epoch": 0.4241938915897687, "grad_norm": 0.3748714327812195, "learning_rate": 0.00011518475675535165, "loss": 1.3059, "step": 32644 }, { "epoch": 0.4242068861336846, "grad_norm": 0.4206548035144806, "learning_rate": 0.00011518215729344026, "loss": 1.5961, "step": 32645 }, { "epoch": 0.42421988067760047, "grad_norm": 0.3011281192302704, "learning_rate": 0.00011517955783152887, "loss": 1.2929, "step": 32646 }, { "epoch": 0.42423287522151637, "grad_norm": 0.378834992647171, "learning_rate": 0.00011517695836961751, "loss": 1.0831, "step": 32647 }, { "epoch": 0.4242458697654322, "grad_norm": 0.3436242938041687, "learning_rate": 0.00011517435890770612, "loss": 1.5377, "step": 32648 }, { "epoch": 0.4242588643093481, "grad_norm": 0.37045952677726746, "learning_rate": 0.00011517175944579473, "loss": 1.4286, "step": 32649 }, { "epoch": 0.42427185885326396, "grad_norm": 0.3832129240036011, "learning_rate": 0.00011516915998388334, "loss": 1.4825, "step": 32650 }, { "epoch": 0.42428485339717986, "grad_norm": 0.321992963552475, "learning_rate": 0.00011516656052197196, "loss": 1.4818, "step": 32651 }, { "epoch": 0.4242978479410957, "grad_norm": 0.46432238817214966, "learning_rate": 0.00011516396106006058, "loss": 1.3464, "step": 32652 }, { "epoch": 0.4243108424850116, "grad_norm": 0.47884055972099304, "learning_rate": 0.00011516136159814919, "loss": 1.4246, "step": 32653 }, { "epoch": 0.42432383702892745, "grad_norm": 0.3578111231327057, "learning_rate": 0.0001151587621362378, "loss": 1.3495, "step": 32654 }, { "epoch": 0.42433683157284335, "grad_norm": 0.3594319224357605, "learning_rate": 0.00011515616267432642, "loss": 1.3246, "step": 32655 }, { "epoch": 0.4243498261167592, "grad_norm": 0.41986316442489624, "learning_rate": 0.00011515356321241503, "loss": 1.3238, "step": 32656 }, { "epoch": 0.4243628206606751, "grad_norm": 0.3603053390979767, "learning_rate": 0.00011515096375050364, "loss": 1.4085, "step": 32657 }, { "epoch": 0.42437581520459094, "grad_norm": 0.391559898853302, "learning_rate": 0.00011514836428859226, "loss": 1.3955, "step": 32658 }, { "epoch": 0.42438880974850685, "grad_norm": 0.4199407994747162, "learning_rate": 0.00011514576482668089, "loss": 1.1347, "step": 32659 }, { "epoch": 0.42440180429242275, "grad_norm": 0.3639289140701294, "learning_rate": 0.0001151431653647695, "loss": 1.1818, "step": 32660 }, { "epoch": 0.4244147988363386, "grad_norm": 0.35207751393318176, "learning_rate": 0.00011514056590285811, "loss": 1.1047, "step": 32661 }, { "epoch": 0.4244277933802545, "grad_norm": 0.43022620677948, "learning_rate": 0.00011513796644094673, "loss": 1.3284, "step": 32662 }, { "epoch": 0.42444078792417034, "grad_norm": 0.3480023145675659, "learning_rate": 0.00011513536697903535, "loss": 1.3873, "step": 32663 }, { "epoch": 0.42445378246808624, "grad_norm": 0.9453961253166199, "learning_rate": 0.00011513276751712396, "loss": 1.2621, "step": 32664 }, { "epoch": 0.4244667770120021, "grad_norm": 0.38237282633781433, "learning_rate": 0.00011513016805521257, "loss": 1.3627, "step": 32665 }, { "epoch": 0.424479771555918, "grad_norm": 0.4273073077201843, "learning_rate": 0.00011512756859330118, "loss": 1.3197, "step": 32666 }, { "epoch": 0.42449276609983383, "grad_norm": 0.45577436685562134, "learning_rate": 0.00011512496913138982, "loss": 1.5395, "step": 32667 }, { "epoch": 0.42450576064374973, "grad_norm": 0.35005468130111694, "learning_rate": 0.00011512236966947842, "loss": 1.408, "step": 32668 }, { "epoch": 0.4245187551876656, "grad_norm": 0.49522069096565247, "learning_rate": 0.00011511977020756703, "loss": 1.4457, "step": 32669 }, { "epoch": 0.4245317497315815, "grad_norm": 0.3975871205329895, "learning_rate": 0.00011511717074565564, "loss": 1.4151, "step": 32670 }, { "epoch": 0.4245447442754973, "grad_norm": 0.463381826877594, "learning_rate": 0.00011511457128374428, "loss": 1.4582, "step": 32671 }, { "epoch": 0.4245577388194132, "grad_norm": 0.4626269042491913, "learning_rate": 0.00011511197182183289, "loss": 1.4629, "step": 32672 }, { "epoch": 0.42457073336332907, "grad_norm": 0.3940832018852234, "learning_rate": 0.0001151093723599215, "loss": 1.3748, "step": 32673 }, { "epoch": 0.42458372790724497, "grad_norm": 0.4912465512752533, "learning_rate": 0.00011510677289801011, "loss": 1.4908, "step": 32674 }, { "epoch": 0.4245967224511608, "grad_norm": 0.3597962260246277, "learning_rate": 0.00011510417343609874, "loss": 1.4021, "step": 32675 }, { "epoch": 0.4246097169950767, "grad_norm": 0.37891116738319397, "learning_rate": 0.00011510157397418735, "loss": 1.3063, "step": 32676 }, { "epoch": 0.42462271153899256, "grad_norm": 0.3783966302871704, "learning_rate": 0.00011509897451227596, "loss": 1.3974, "step": 32677 }, { "epoch": 0.42463570608290846, "grad_norm": 0.41415533423423767, "learning_rate": 0.0001150963750503646, "loss": 1.4527, "step": 32678 }, { "epoch": 0.4246487006268243, "grad_norm": 0.4733615815639496, "learning_rate": 0.0001150937755884532, "loss": 1.4599, "step": 32679 }, { "epoch": 0.4246616951707402, "grad_norm": 0.4656376540660858, "learning_rate": 0.00011509117612654182, "loss": 1.4003, "step": 32680 }, { "epoch": 0.42467468971465605, "grad_norm": 0.3168615698814392, "learning_rate": 0.00011508857666463041, "loss": 1.298, "step": 32681 }, { "epoch": 0.42468768425857195, "grad_norm": 0.3645986318588257, "learning_rate": 0.00011508597720271905, "loss": 1.2577, "step": 32682 }, { "epoch": 0.4247006788024878, "grad_norm": 0.2938631772994995, "learning_rate": 0.00011508337774080766, "loss": 1.2895, "step": 32683 }, { "epoch": 0.4247136733464037, "grad_norm": 0.5293096303939819, "learning_rate": 0.00011508077827889627, "loss": 1.627, "step": 32684 }, { "epoch": 0.42472666789031954, "grad_norm": 0.4071999490261078, "learning_rate": 0.00011507817881698489, "loss": 1.4628, "step": 32685 }, { "epoch": 0.42473966243423544, "grad_norm": 0.30241674184799194, "learning_rate": 0.00011507557935507351, "loss": 1.4766, "step": 32686 }, { "epoch": 0.4247526569781513, "grad_norm": 0.4153682589530945, "learning_rate": 0.00011507297989316212, "loss": 1.6435, "step": 32687 }, { "epoch": 0.4247656515220672, "grad_norm": 0.42855629324913025, "learning_rate": 0.00011507038043125073, "loss": 1.4003, "step": 32688 }, { "epoch": 0.42477864606598303, "grad_norm": 0.4692609906196594, "learning_rate": 0.00011506778096933934, "loss": 1.2338, "step": 32689 }, { "epoch": 0.42479164060989894, "grad_norm": 0.5062618255615234, "learning_rate": 0.00011506518150742798, "loss": 1.5371, "step": 32690 }, { "epoch": 0.4248046351538148, "grad_norm": 0.4251377582550049, "learning_rate": 0.00011506258204551659, "loss": 1.301, "step": 32691 }, { "epoch": 0.4248176296977307, "grad_norm": 0.4042539596557617, "learning_rate": 0.0001150599825836052, "loss": 1.5439, "step": 32692 }, { "epoch": 0.4248306242416465, "grad_norm": 0.37823086977005005, "learning_rate": 0.0001150573831216938, "loss": 1.4319, "step": 32693 }, { "epoch": 0.4248436187855624, "grad_norm": 0.33654046058654785, "learning_rate": 0.00011505478365978244, "loss": 1.3817, "step": 32694 }, { "epoch": 0.42485661332947827, "grad_norm": 0.4661678969860077, "learning_rate": 0.00011505218419787105, "loss": 1.4325, "step": 32695 }, { "epoch": 0.4248696078733942, "grad_norm": 0.3422922194004059, "learning_rate": 0.00011504958473595966, "loss": 1.5663, "step": 32696 }, { "epoch": 0.42488260241731, "grad_norm": 0.3530375063419342, "learning_rate": 0.00011504698527404827, "loss": 1.2248, "step": 32697 }, { "epoch": 0.4248955969612259, "grad_norm": 0.32024598121643066, "learning_rate": 0.0001150443858121369, "loss": 1.2639, "step": 32698 }, { "epoch": 0.42490859150514176, "grad_norm": 0.44534948468208313, "learning_rate": 0.0001150417863502255, "loss": 1.4372, "step": 32699 }, { "epoch": 0.42492158604905766, "grad_norm": 0.42533090710639954, "learning_rate": 0.00011503918688831412, "loss": 1.5056, "step": 32700 }, { "epoch": 0.4249345805929735, "grad_norm": 0.4291832149028778, "learning_rate": 0.00011503658742640273, "loss": 1.3363, "step": 32701 }, { "epoch": 0.4249475751368894, "grad_norm": 0.3901008069515228, "learning_rate": 0.00011503398796449137, "loss": 1.573, "step": 32702 }, { "epoch": 0.42496056968080526, "grad_norm": 0.4670076072216034, "learning_rate": 0.00011503138850257998, "loss": 1.4713, "step": 32703 }, { "epoch": 0.42497356422472116, "grad_norm": 0.3456535339355469, "learning_rate": 0.00011502878904066859, "loss": 1.451, "step": 32704 }, { "epoch": 0.424986558768637, "grad_norm": 0.37462544441223145, "learning_rate": 0.0001150261895787572, "loss": 1.3581, "step": 32705 }, { "epoch": 0.4249995533125529, "grad_norm": 0.43311840295791626, "learning_rate": 0.00011502359011684582, "loss": 1.4809, "step": 32706 }, { "epoch": 0.42501254785646875, "grad_norm": 0.4312298595905304, "learning_rate": 0.00011502099065493443, "loss": 1.4353, "step": 32707 }, { "epoch": 0.42502554240038465, "grad_norm": 0.2707349359989166, "learning_rate": 0.00011501839119302305, "loss": 1.4312, "step": 32708 }, { "epoch": 0.4250385369443005, "grad_norm": 0.3925691246986389, "learning_rate": 0.00011501579173111166, "loss": 1.2244, "step": 32709 }, { "epoch": 0.4250515314882164, "grad_norm": 0.4297676384449005, "learning_rate": 0.00011501319226920028, "loss": 1.4111, "step": 32710 }, { "epoch": 0.42506452603213224, "grad_norm": 0.44752100110054016, "learning_rate": 0.00011501059280728889, "loss": 1.5595, "step": 32711 }, { "epoch": 0.42507752057604814, "grad_norm": 0.45761632919311523, "learning_rate": 0.0001150079933453775, "loss": 1.2665, "step": 32712 }, { "epoch": 0.425090515119964, "grad_norm": 0.480825811624527, "learning_rate": 0.00011500539388346611, "loss": 1.4883, "step": 32713 }, { "epoch": 0.4251035096638799, "grad_norm": 0.35824570059776306, "learning_rate": 0.00011500279442155475, "loss": 1.6681, "step": 32714 }, { "epoch": 0.42511650420779573, "grad_norm": 0.4294672906398773, "learning_rate": 0.00011500019495964336, "loss": 1.4592, "step": 32715 }, { "epoch": 0.42512949875171163, "grad_norm": 0.36337533593177795, "learning_rate": 0.00011499759549773197, "loss": 1.4034, "step": 32716 }, { "epoch": 0.4251424932956275, "grad_norm": 0.2813102900981903, "learning_rate": 0.0001149949960358206, "loss": 1.3132, "step": 32717 }, { "epoch": 0.4251554878395434, "grad_norm": 0.4075600802898407, "learning_rate": 0.00011499239657390921, "loss": 1.4144, "step": 32718 }, { "epoch": 0.4251684823834592, "grad_norm": 0.5380195379257202, "learning_rate": 0.00011498979711199782, "loss": 1.2932, "step": 32719 }, { "epoch": 0.4251814769273751, "grad_norm": 0.4188152253627777, "learning_rate": 0.00011498719765008643, "loss": 1.4946, "step": 32720 }, { "epoch": 0.42519447147129097, "grad_norm": 0.4549977481365204, "learning_rate": 0.00011498459818817507, "loss": 1.5807, "step": 32721 }, { "epoch": 0.42520746601520687, "grad_norm": 0.45418059825897217, "learning_rate": 0.00011498199872626368, "loss": 1.3901, "step": 32722 }, { "epoch": 0.4252204605591227, "grad_norm": 0.4834255278110504, "learning_rate": 0.00011497939926435228, "loss": 1.489, "step": 32723 }, { "epoch": 0.4252334551030386, "grad_norm": 0.4556974172592163, "learning_rate": 0.00011497679980244089, "loss": 1.2928, "step": 32724 }, { "epoch": 0.42524644964695446, "grad_norm": 0.3241259753704071, "learning_rate": 0.00011497420034052953, "loss": 1.2466, "step": 32725 }, { "epoch": 0.42525944419087036, "grad_norm": 0.39881256222724915, "learning_rate": 0.00011497160087861814, "loss": 1.0398, "step": 32726 }, { "epoch": 0.4252724387347862, "grad_norm": 0.3281596004962921, "learning_rate": 0.00011496900141670675, "loss": 1.5064, "step": 32727 }, { "epoch": 0.4252854332787021, "grad_norm": 0.47772088646888733, "learning_rate": 0.00011496640195479536, "loss": 1.641, "step": 32728 }, { "epoch": 0.42529842782261795, "grad_norm": 0.5090879797935486, "learning_rate": 0.00011496380249288398, "loss": 1.541, "step": 32729 }, { "epoch": 0.42531142236653385, "grad_norm": 0.48298248648643494, "learning_rate": 0.0001149612030309726, "loss": 1.3389, "step": 32730 }, { "epoch": 0.4253244169104497, "grad_norm": 0.40232449769973755, "learning_rate": 0.0001149586035690612, "loss": 1.4262, "step": 32731 }, { "epoch": 0.4253374114543656, "grad_norm": 0.42198362946510315, "learning_rate": 0.00011495600410714982, "loss": 1.1582, "step": 32732 }, { "epoch": 0.42535040599828144, "grad_norm": 0.4367421567440033, "learning_rate": 0.00011495340464523845, "loss": 1.4233, "step": 32733 }, { "epoch": 0.42536340054219735, "grad_norm": 0.31430256366729736, "learning_rate": 0.00011495080518332707, "loss": 1.3616, "step": 32734 }, { "epoch": 0.42537639508611325, "grad_norm": 0.3407075107097626, "learning_rate": 0.00011494820572141566, "loss": 1.2431, "step": 32735 }, { "epoch": 0.4253893896300291, "grad_norm": 0.45105311274528503, "learning_rate": 0.00011494560625950427, "loss": 1.4104, "step": 32736 }, { "epoch": 0.425402384173945, "grad_norm": 0.46722736954689026, "learning_rate": 0.00011494300679759291, "loss": 1.3827, "step": 32737 }, { "epoch": 0.42541537871786084, "grad_norm": 0.4503607749938965, "learning_rate": 0.00011494040733568152, "loss": 1.4144, "step": 32738 }, { "epoch": 0.42542837326177674, "grad_norm": 0.369139701128006, "learning_rate": 0.00011493780787377013, "loss": 1.3201, "step": 32739 }, { "epoch": 0.4254413678056926, "grad_norm": 0.36658594012260437, "learning_rate": 0.00011493520841185874, "loss": 1.2704, "step": 32740 }, { "epoch": 0.4254543623496085, "grad_norm": 0.47784295678138733, "learning_rate": 0.00011493260894994737, "loss": 1.4383, "step": 32741 }, { "epoch": 0.42546735689352433, "grad_norm": 0.3908814787864685, "learning_rate": 0.00011493000948803598, "loss": 1.3918, "step": 32742 }, { "epoch": 0.42548035143744023, "grad_norm": 0.3372659683227539, "learning_rate": 0.00011492741002612459, "loss": 1.3738, "step": 32743 }, { "epoch": 0.4254933459813561, "grad_norm": 0.3924589455127716, "learning_rate": 0.0001149248105642132, "loss": 1.4096, "step": 32744 }, { "epoch": 0.425506340525272, "grad_norm": 0.46133190393447876, "learning_rate": 0.00011492221110230184, "loss": 1.4488, "step": 32745 }, { "epoch": 0.4255193350691878, "grad_norm": 0.539125382900238, "learning_rate": 0.00011491961164039045, "loss": 1.3589, "step": 32746 }, { "epoch": 0.4255323296131037, "grad_norm": 0.4047953486442566, "learning_rate": 0.00011491701217847906, "loss": 1.4049, "step": 32747 }, { "epoch": 0.42554532415701957, "grad_norm": 0.4258427619934082, "learning_rate": 0.00011491441271656766, "loss": 1.4506, "step": 32748 }, { "epoch": 0.42555831870093547, "grad_norm": 0.384223997592926, "learning_rate": 0.0001149118132546563, "loss": 1.414, "step": 32749 }, { "epoch": 0.4255713132448513, "grad_norm": 0.34071582555770874, "learning_rate": 0.00011490921379274491, "loss": 1.2714, "step": 32750 }, { "epoch": 0.4255843077887672, "grad_norm": 0.48124393820762634, "learning_rate": 0.00011490661433083352, "loss": 1.3624, "step": 32751 }, { "epoch": 0.42559730233268306, "grad_norm": 0.41478610038757324, "learning_rate": 0.00011490401486892214, "loss": 1.2993, "step": 32752 }, { "epoch": 0.42561029687659896, "grad_norm": 0.4133775532245636, "learning_rate": 0.00011490141540701075, "loss": 1.2451, "step": 32753 }, { "epoch": 0.4256232914205148, "grad_norm": 0.45846426486968994, "learning_rate": 0.00011489881594509937, "loss": 1.3328, "step": 32754 }, { "epoch": 0.4256362859644307, "grad_norm": 0.4006989002227783, "learning_rate": 0.00011489621648318798, "loss": 1.289, "step": 32755 }, { "epoch": 0.42564928050834655, "grad_norm": 0.44254937767982483, "learning_rate": 0.00011489361702127661, "loss": 1.604, "step": 32756 }, { "epoch": 0.42566227505226245, "grad_norm": 0.354093462228775, "learning_rate": 0.00011489101755936523, "loss": 1.4043, "step": 32757 }, { "epoch": 0.4256752695961783, "grad_norm": 0.5432113409042358, "learning_rate": 0.00011488841809745384, "loss": 1.2742, "step": 32758 }, { "epoch": 0.4256882641400942, "grad_norm": 0.486877977848053, "learning_rate": 0.00011488581863554245, "loss": 1.5019, "step": 32759 }, { "epoch": 0.42570125868401004, "grad_norm": 0.4443177282810211, "learning_rate": 0.00011488321917363107, "loss": 1.2686, "step": 32760 }, { "epoch": 0.42571425322792594, "grad_norm": 0.4437646269798279, "learning_rate": 0.00011488061971171968, "loss": 1.3695, "step": 32761 }, { "epoch": 0.4257272477718418, "grad_norm": 0.4943767189979553, "learning_rate": 0.0001148780202498083, "loss": 1.4588, "step": 32762 }, { "epoch": 0.4257402423157577, "grad_norm": 0.43306395411491394, "learning_rate": 0.0001148754207878969, "loss": 1.5101, "step": 32763 }, { "epoch": 0.42575323685967353, "grad_norm": 0.45462557673454285, "learning_rate": 0.00011487282132598554, "loss": 1.3479, "step": 32764 }, { "epoch": 0.42576623140358943, "grad_norm": 0.3237178921699524, "learning_rate": 0.00011487022186407414, "loss": 1.4843, "step": 32765 }, { "epoch": 0.4257792259475053, "grad_norm": 0.38279351592063904, "learning_rate": 0.00011486762240216275, "loss": 1.3532, "step": 32766 }, { "epoch": 0.4257922204914212, "grad_norm": 0.40862855315208435, "learning_rate": 0.00011486502294025136, "loss": 1.4185, "step": 32767 }, { "epoch": 0.425805215035337, "grad_norm": 0.3366811275482178, "learning_rate": 0.00011486242347834, "loss": 1.3415, "step": 32768 }, { "epoch": 0.4258182095792529, "grad_norm": 0.29549768567085266, "learning_rate": 0.00011485982401642861, "loss": 1.319, "step": 32769 }, { "epoch": 0.42583120412316877, "grad_norm": 0.42901721596717834, "learning_rate": 0.00011485722455451722, "loss": 1.2681, "step": 32770 }, { "epoch": 0.4258441986670847, "grad_norm": 0.41053691506385803, "learning_rate": 0.00011485462509260583, "loss": 1.4367, "step": 32771 }, { "epoch": 0.4258571932110005, "grad_norm": 0.36776745319366455, "learning_rate": 0.00011485202563069446, "loss": 1.4827, "step": 32772 }, { "epoch": 0.4258701877549164, "grad_norm": 0.42250028252601624, "learning_rate": 0.00011484942616878307, "loss": 1.3056, "step": 32773 }, { "epoch": 0.42588318229883226, "grad_norm": 0.4061184227466583, "learning_rate": 0.00011484682670687168, "loss": 1.3856, "step": 32774 }, { "epoch": 0.42589617684274816, "grad_norm": 0.3884889781475067, "learning_rate": 0.00011484422724496029, "loss": 1.4417, "step": 32775 }, { "epoch": 0.425909171386664, "grad_norm": 0.36790668964385986, "learning_rate": 0.00011484162778304893, "loss": 1.2486, "step": 32776 }, { "epoch": 0.4259221659305799, "grad_norm": 0.38973650336265564, "learning_rate": 0.00011483902832113753, "loss": 1.3887, "step": 32777 }, { "epoch": 0.42593516047449576, "grad_norm": 0.48880186676979065, "learning_rate": 0.00011483642885922614, "loss": 1.4904, "step": 32778 }, { "epoch": 0.42594815501841166, "grad_norm": 0.4057094156742096, "learning_rate": 0.00011483382939731475, "loss": 1.4893, "step": 32779 }, { "epoch": 0.4259611495623275, "grad_norm": 0.5092507004737854, "learning_rate": 0.00011483122993540339, "loss": 1.5405, "step": 32780 }, { "epoch": 0.4259741441062434, "grad_norm": 0.37149399518966675, "learning_rate": 0.000114828630473492, "loss": 1.2962, "step": 32781 }, { "epoch": 0.42598713865015925, "grad_norm": 0.4929821789264679, "learning_rate": 0.00011482603101158061, "loss": 1.394, "step": 32782 }, { "epoch": 0.42600013319407515, "grad_norm": 0.474163293838501, "learning_rate": 0.00011482343154966922, "loss": 1.4778, "step": 32783 }, { "epoch": 0.426013127737991, "grad_norm": 0.39708057045936584, "learning_rate": 0.00011482083208775784, "loss": 1.4183, "step": 32784 }, { "epoch": 0.4260261222819069, "grad_norm": 0.47365882992744446, "learning_rate": 0.00011481823262584645, "loss": 1.3563, "step": 32785 }, { "epoch": 0.42603911682582274, "grad_norm": 0.44962528347969055, "learning_rate": 0.00011481563316393506, "loss": 1.5358, "step": 32786 }, { "epoch": 0.42605211136973864, "grad_norm": 0.5008590817451477, "learning_rate": 0.00011481303370202368, "loss": 1.4158, "step": 32787 }, { "epoch": 0.4260651059136545, "grad_norm": 0.3530007004737854, "learning_rate": 0.00011481043424011231, "loss": 1.177, "step": 32788 }, { "epoch": 0.4260781004575704, "grad_norm": 0.41533127427101135, "learning_rate": 0.00011480783477820092, "loss": 1.3842, "step": 32789 }, { "epoch": 0.42609109500148623, "grad_norm": 0.36696693301200867, "learning_rate": 0.00011480523531628952, "loss": 1.2127, "step": 32790 }, { "epoch": 0.42610408954540213, "grad_norm": 0.4217143952846527, "learning_rate": 0.00011480263585437816, "loss": 1.3774, "step": 32791 }, { "epoch": 0.426117084089318, "grad_norm": 0.4862287938594818, "learning_rate": 0.00011480003639246677, "loss": 1.3672, "step": 32792 }, { "epoch": 0.4261300786332339, "grad_norm": 0.3787732422351837, "learning_rate": 0.00011479743693055538, "loss": 1.2573, "step": 32793 }, { "epoch": 0.4261430731771497, "grad_norm": 0.3426017463207245, "learning_rate": 0.00011479483746864399, "loss": 1.3609, "step": 32794 }, { "epoch": 0.4261560677210656, "grad_norm": 0.41839492321014404, "learning_rate": 0.00011479223800673262, "loss": 1.4041, "step": 32795 }, { "epoch": 0.42616906226498147, "grad_norm": 0.35034388303756714, "learning_rate": 0.00011478963854482123, "loss": 1.4561, "step": 32796 }, { "epoch": 0.42618205680889737, "grad_norm": 0.44032689929008484, "learning_rate": 0.00011478703908290984, "loss": 1.5159, "step": 32797 }, { "epoch": 0.4261950513528132, "grad_norm": 0.29131001234054565, "learning_rate": 0.00011478443962099845, "loss": 1.1066, "step": 32798 }, { "epoch": 0.4262080458967291, "grad_norm": 0.39421477913856506, "learning_rate": 0.00011478184015908709, "loss": 1.2233, "step": 32799 }, { "epoch": 0.42622104044064496, "grad_norm": 0.38550177216529846, "learning_rate": 0.0001147792406971757, "loss": 1.3127, "step": 32800 }, { "epoch": 0.42623403498456086, "grad_norm": 0.4063124656677246, "learning_rate": 0.00011477664123526431, "loss": 1.4169, "step": 32801 }, { "epoch": 0.4262470295284767, "grad_norm": 0.3618965744972229, "learning_rate": 0.00011477404177335292, "loss": 1.3311, "step": 32802 }, { "epoch": 0.4262600240723926, "grad_norm": 0.4103372395038605, "learning_rate": 0.00011477144231144154, "loss": 1.3457, "step": 32803 }, { "epoch": 0.42627301861630845, "grad_norm": 0.310663104057312, "learning_rate": 0.00011476884284953016, "loss": 1.1693, "step": 32804 }, { "epoch": 0.42628601316022435, "grad_norm": 0.39910319447517395, "learning_rate": 0.00011476624338761877, "loss": 1.4011, "step": 32805 }, { "epoch": 0.4262990077041402, "grad_norm": 0.4499560296535492, "learning_rate": 0.00011476364392570738, "loss": 1.2533, "step": 32806 }, { "epoch": 0.4263120022480561, "grad_norm": 0.33073529601097107, "learning_rate": 0.000114761044463796, "loss": 1.3588, "step": 32807 }, { "epoch": 0.42632499679197194, "grad_norm": 0.31497201323509216, "learning_rate": 0.00011475844500188461, "loss": 1.2694, "step": 32808 }, { "epoch": 0.42633799133588784, "grad_norm": 0.3413882851600647, "learning_rate": 0.00011475584553997322, "loss": 1.2689, "step": 32809 }, { "epoch": 0.4263509858798037, "grad_norm": 0.4022754430770874, "learning_rate": 0.00011475324607806183, "loss": 1.2566, "step": 32810 }, { "epoch": 0.4263639804237196, "grad_norm": 0.42208191752433777, "learning_rate": 0.00011475064661615047, "loss": 1.2734, "step": 32811 }, { "epoch": 0.4263769749676355, "grad_norm": 0.4367223381996155, "learning_rate": 0.00011474804715423908, "loss": 1.3305, "step": 32812 }, { "epoch": 0.42638996951155134, "grad_norm": 0.37246769666671753, "learning_rate": 0.0001147454476923277, "loss": 1.403, "step": 32813 }, { "epoch": 0.42640296405546724, "grad_norm": 0.6029079556465149, "learning_rate": 0.0001147428482304163, "loss": 1.6034, "step": 32814 }, { "epoch": 0.4264159585993831, "grad_norm": 0.44445306062698364, "learning_rate": 0.00011474024876850493, "loss": 1.3958, "step": 32815 }, { "epoch": 0.426428953143299, "grad_norm": 0.3457375168800354, "learning_rate": 0.00011473764930659354, "loss": 1.1933, "step": 32816 }, { "epoch": 0.42644194768721483, "grad_norm": 0.472170889377594, "learning_rate": 0.00011473504984468215, "loss": 1.6479, "step": 32817 }, { "epoch": 0.42645494223113073, "grad_norm": 0.4075019061565399, "learning_rate": 0.00011473245038277076, "loss": 1.2688, "step": 32818 }, { "epoch": 0.4264679367750466, "grad_norm": 0.3741489052772522, "learning_rate": 0.00011472985092085939, "loss": 1.4706, "step": 32819 }, { "epoch": 0.4264809313189625, "grad_norm": 0.4712775647640228, "learning_rate": 0.000114727251458948, "loss": 1.4307, "step": 32820 }, { "epoch": 0.4264939258628783, "grad_norm": 0.4487972855567932, "learning_rate": 0.00011472465199703661, "loss": 1.2472, "step": 32821 }, { "epoch": 0.4265069204067942, "grad_norm": 0.39073678851127625, "learning_rate": 0.00011472205253512522, "loss": 1.0702, "step": 32822 }, { "epoch": 0.42651991495071007, "grad_norm": 0.4891730844974518, "learning_rate": 0.00011471945307321386, "loss": 1.3841, "step": 32823 }, { "epoch": 0.42653290949462597, "grad_norm": 0.39912983775138855, "learning_rate": 0.00011471685361130247, "loss": 1.2607, "step": 32824 }, { "epoch": 0.4265459040385418, "grad_norm": 0.3668476343154907, "learning_rate": 0.00011471425414939108, "loss": 1.3151, "step": 32825 }, { "epoch": 0.4265588985824577, "grad_norm": 0.47021716833114624, "learning_rate": 0.0001147116546874797, "loss": 1.3251, "step": 32826 }, { "epoch": 0.42657189312637356, "grad_norm": 0.5033183693885803, "learning_rate": 0.00011470905522556832, "loss": 1.4136, "step": 32827 }, { "epoch": 0.42658488767028946, "grad_norm": 0.43883442878723145, "learning_rate": 0.00011470645576365693, "loss": 1.3018, "step": 32828 }, { "epoch": 0.4265978822142053, "grad_norm": 0.3371163606643677, "learning_rate": 0.00011470385630174554, "loss": 1.4162, "step": 32829 }, { "epoch": 0.4266108767581212, "grad_norm": 0.4342816472053528, "learning_rate": 0.00011470125683983418, "loss": 1.4202, "step": 32830 }, { "epoch": 0.42662387130203705, "grad_norm": 0.3728175759315491, "learning_rate": 0.00011469865737792279, "loss": 1.3711, "step": 32831 }, { "epoch": 0.42663686584595295, "grad_norm": 0.3101612627506256, "learning_rate": 0.00011469605791601138, "loss": 1.5332, "step": 32832 }, { "epoch": 0.4266498603898688, "grad_norm": 0.38016143441200256, "learning_rate": 0.0001146934584541, "loss": 1.6912, "step": 32833 }, { "epoch": 0.4266628549337847, "grad_norm": 0.348294734954834, "learning_rate": 0.00011469085899218863, "loss": 1.3105, "step": 32834 }, { "epoch": 0.42667584947770054, "grad_norm": 0.4668010175228119, "learning_rate": 0.00011468825953027724, "loss": 1.5294, "step": 32835 }, { "epoch": 0.42668884402161644, "grad_norm": 0.4506417214870453, "learning_rate": 0.00011468566006836585, "loss": 1.5579, "step": 32836 }, { "epoch": 0.4267018385655323, "grad_norm": 0.4124908149242401, "learning_rate": 0.00011468306060645447, "loss": 1.2575, "step": 32837 }, { "epoch": 0.4267148331094482, "grad_norm": 0.3964749574661255, "learning_rate": 0.00011468046114454309, "loss": 1.3125, "step": 32838 }, { "epoch": 0.42672782765336403, "grad_norm": 0.46739378571510315, "learning_rate": 0.0001146778616826317, "loss": 1.5113, "step": 32839 }, { "epoch": 0.42674082219727993, "grad_norm": 0.38561102747917175, "learning_rate": 0.00011467526222072031, "loss": 1.3133, "step": 32840 }, { "epoch": 0.4267538167411958, "grad_norm": 0.4189145863056183, "learning_rate": 0.00011467266275880892, "loss": 1.3998, "step": 32841 }, { "epoch": 0.4267668112851117, "grad_norm": 0.393160879611969, "learning_rate": 0.00011467006329689756, "loss": 1.3429, "step": 32842 }, { "epoch": 0.4267798058290275, "grad_norm": 0.4541653096675873, "learning_rate": 0.00011466746383498617, "loss": 1.5587, "step": 32843 }, { "epoch": 0.4267928003729434, "grad_norm": 0.4426872432231903, "learning_rate": 0.00011466486437307478, "loss": 1.3673, "step": 32844 }, { "epoch": 0.42680579491685927, "grad_norm": 0.4444718658924103, "learning_rate": 0.00011466226491116338, "loss": 1.5381, "step": 32845 }, { "epoch": 0.42681878946077517, "grad_norm": 0.3532416820526123, "learning_rate": 0.00011465966544925202, "loss": 1.2414, "step": 32846 }, { "epoch": 0.426831784004691, "grad_norm": 0.33352282643318176, "learning_rate": 0.00011465706598734063, "loss": 1.5366, "step": 32847 }, { "epoch": 0.4268447785486069, "grad_norm": 0.390583336353302, "learning_rate": 0.00011465446652542924, "loss": 1.2317, "step": 32848 }, { "epoch": 0.42685777309252276, "grad_norm": 0.31314635276794434, "learning_rate": 0.00011465186706351785, "loss": 1.2794, "step": 32849 }, { "epoch": 0.42687076763643866, "grad_norm": 0.3666491210460663, "learning_rate": 0.00011464926760160648, "loss": 1.3526, "step": 32850 }, { "epoch": 0.4268837621803545, "grad_norm": 0.4602649211883545, "learning_rate": 0.00011464666813969509, "loss": 1.4471, "step": 32851 }, { "epoch": 0.4268967567242704, "grad_norm": 0.339557409286499, "learning_rate": 0.0001146440686777837, "loss": 1.3946, "step": 32852 }, { "epoch": 0.42690975126818625, "grad_norm": 0.3528422713279724, "learning_rate": 0.00011464146921587231, "loss": 1.1821, "step": 32853 }, { "epoch": 0.42692274581210216, "grad_norm": 0.4109194874763489, "learning_rate": 0.00011463886975396095, "loss": 1.2582, "step": 32854 }, { "epoch": 0.426935740356018, "grad_norm": 0.4166772663593292, "learning_rate": 0.00011463627029204956, "loss": 1.3765, "step": 32855 }, { "epoch": 0.4269487348999339, "grad_norm": 0.43684664368629456, "learning_rate": 0.00011463367083013817, "loss": 1.5558, "step": 32856 }, { "epoch": 0.42696172944384975, "grad_norm": 0.4646078050136566, "learning_rate": 0.00011463107136822677, "loss": 1.4777, "step": 32857 }, { "epoch": 0.42697472398776565, "grad_norm": 0.36714062094688416, "learning_rate": 0.0001146284719063154, "loss": 1.3341, "step": 32858 }, { "epoch": 0.4269877185316815, "grad_norm": 0.4614235758781433, "learning_rate": 0.00011462587244440401, "loss": 1.3862, "step": 32859 }, { "epoch": 0.4270007130755974, "grad_norm": 0.3928585350513458, "learning_rate": 0.00011462327298249263, "loss": 1.3795, "step": 32860 }, { "epoch": 0.42701370761951324, "grad_norm": 0.28215593099594116, "learning_rate": 0.00011462067352058124, "loss": 1.3317, "step": 32861 }, { "epoch": 0.42702670216342914, "grad_norm": 0.3727930188179016, "learning_rate": 0.00011461807405866986, "loss": 1.4699, "step": 32862 }, { "epoch": 0.427039696707345, "grad_norm": 0.40743106603622437, "learning_rate": 0.00011461547459675847, "loss": 1.5454, "step": 32863 }, { "epoch": 0.4270526912512609, "grad_norm": 0.3678722679615021, "learning_rate": 0.00011461287513484708, "loss": 1.481, "step": 32864 }, { "epoch": 0.42706568579517673, "grad_norm": 0.4831506311893463, "learning_rate": 0.00011461027567293572, "loss": 1.4107, "step": 32865 }, { "epoch": 0.42707868033909263, "grad_norm": 0.5250071287155151, "learning_rate": 0.00011460767621102433, "loss": 1.5032, "step": 32866 }, { "epoch": 0.4270916748830085, "grad_norm": 0.38608986139297485, "learning_rate": 0.00011460507674911294, "loss": 1.4034, "step": 32867 }, { "epoch": 0.4271046694269244, "grad_norm": 0.41576865315437317, "learning_rate": 0.00011460247728720155, "loss": 1.358, "step": 32868 }, { "epoch": 0.4271176639708402, "grad_norm": 0.42396080493927, "learning_rate": 0.00011459987782529018, "loss": 1.3935, "step": 32869 }, { "epoch": 0.4271306585147561, "grad_norm": 0.35341450572013855, "learning_rate": 0.00011459727836337879, "loss": 1.5241, "step": 32870 }, { "epoch": 0.42714365305867197, "grad_norm": 0.2943039536476135, "learning_rate": 0.0001145946789014674, "loss": 1.4861, "step": 32871 }, { "epoch": 0.42715664760258787, "grad_norm": 0.40003466606140137, "learning_rate": 0.00011459207943955601, "loss": 1.548, "step": 32872 }, { "epoch": 0.4271696421465037, "grad_norm": 0.38376590609550476, "learning_rate": 0.00011458947997764465, "loss": 1.2353, "step": 32873 }, { "epoch": 0.4271826366904196, "grad_norm": 0.36843591928482056, "learning_rate": 0.00011458688051573325, "loss": 1.2981, "step": 32874 }, { "epoch": 0.42719563123433546, "grad_norm": 0.3761000335216522, "learning_rate": 0.00011458428105382186, "loss": 1.3807, "step": 32875 }, { "epoch": 0.42720862577825136, "grad_norm": 0.396903395652771, "learning_rate": 0.00011458168159191047, "loss": 1.3137, "step": 32876 }, { "epoch": 0.4272216203221672, "grad_norm": 0.41537466645240784, "learning_rate": 0.0001145790821299991, "loss": 1.6739, "step": 32877 }, { "epoch": 0.4272346148660831, "grad_norm": 0.48561975359916687, "learning_rate": 0.00011457648266808772, "loss": 1.5377, "step": 32878 }, { "epoch": 0.42724760940999895, "grad_norm": 0.41702041029930115, "learning_rate": 0.00011457388320617633, "loss": 1.476, "step": 32879 }, { "epoch": 0.42726060395391485, "grad_norm": 0.353404700756073, "learning_rate": 0.00011457128374426494, "loss": 1.2124, "step": 32880 }, { "epoch": 0.4272735984978307, "grad_norm": 0.43179455399513245, "learning_rate": 0.00011456868428235356, "loss": 1.5913, "step": 32881 }, { "epoch": 0.4272865930417466, "grad_norm": 0.44365885853767395, "learning_rate": 0.00011456608482044217, "loss": 1.479, "step": 32882 }, { "epoch": 0.42729958758566244, "grad_norm": 0.5209981799125671, "learning_rate": 0.00011456348535853079, "loss": 1.6482, "step": 32883 }, { "epoch": 0.42731258212957834, "grad_norm": 0.4402356743812561, "learning_rate": 0.0001145608858966194, "loss": 1.2782, "step": 32884 }, { "epoch": 0.4273255766734942, "grad_norm": 0.4412650763988495, "learning_rate": 0.00011455828643470803, "loss": 1.1813, "step": 32885 }, { "epoch": 0.4273385712174101, "grad_norm": 0.37789034843444824, "learning_rate": 0.00011455568697279665, "loss": 1.3956, "step": 32886 }, { "epoch": 0.42735156576132594, "grad_norm": 0.3888697624206543, "learning_rate": 0.00011455308751088524, "loss": 1.3653, "step": 32887 }, { "epoch": 0.42736456030524184, "grad_norm": 0.384438693523407, "learning_rate": 0.00011455048804897385, "loss": 1.2726, "step": 32888 }, { "epoch": 0.42737755484915774, "grad_norm": 0.45589837431907654, "learning_rate": 0.00011454788858706249, "loss": 1.2597, "step": 32889 }, { "epoch": 0.4273905493930736, "grad_norm": 0.3793381154537201, "learning_rate": 0.0001145452891251511, "loss": 1.5491, "step": 32890 }, { "epoch": 0.4274035439369895, "grad_norm": 0.5033237338066101, "learning_rate": 0.00011454268966323971, "loss": 1.5788, "step": 32891 }, { "epoch": 0.42741653848090533, "grad_norm": 0.40526923537254333, "learning_rate": 0.00011454009020132832, "loss": 1.368, "step": 32892 }, { "epoch": 0.42742953302482123, "grad_norm": 0.4253199100494385, "learning_rate": 0.00011453749073941695, "loss": 1.3319, "step": 32893 }, { "epoch": 0.4274425275687371, "grad_norm": 0.3368600308895111, "learning_rate": 0.00011453489127750556, "loss": 1.3149, "step": 32894 }, { "epoch": 0.427455522112653, "grad_norm": 0.46693888306617737, "learning_rate": 0.00011453229181559417, "loss": 1.3481, "step": 32895 }, { "epoch": 0.4274685166565688, "grad_norm": 0.4167735278606415, "learning_rate": 0.00011452969235368278, "loss": 1.3895, "step": 32896 }, { "epoch": 0.4274815112004847, "grad_norm": 0.4482892155647278, "learning_rate": 0.00011452709289177142, "loss": 1.4057, "step": 32897 }, { "epoch": 0.42749450574440057, "grad_norm": 0.4834119379520416, "learning_rate": 0.00011452449342986003, "loss": 1.3401, "step": 32898 }, { "epoch": 0.42750750028831647, "grad_norm": 0.326749712228775, "learning_rate": 0.00011452189396794863, "loss": 1.3199, "step": 32899 }, { "epoch": 0.4275204948322323, "grad_norm": 0.4899827539920807, "learning_rate": 0.00011451929450603727, "loss": 1.4786, "step": 32900 }, { "epoch": 0.4275334893761482, "grad_norm": 0.5009106397628784, "learning_rate": 0.00011451669504412588, "loss": 1.5277, "step": 32901 }, { "epoch": 0.42754648392006406, "grad_norm": 0.31033840775489807, "learning_rate": 0.00011451409558221449, "loss": 1.512, "step": 32902 }, { "epoch": 0.42755947846397996, "grad_norm": 0.4763345718383789, "learning_rate": 0.0001145114961203031, "loss": 1.4909, "step": 32903 }, { "epoch": 0.4275724730078958, "grad_norm": 0.3603958189487457, "learning_rate": 0.00011450889665839172, "loss": 1.413, "step": 32904 }, { "epoch": 0.4275854675518117, "grad_norm": 0.312365859746933, "learning_rate": 0.00011450629719648033, "loss": 1.2864, "step": 32905 }, { "epoch": 0.42759846209572755, "grad_norm": 0.4907492995262146, "learning_rate": 0.00011450369773456895, "loss": 1.5186, "step": 32906 }, { "epoch": 0.42761145663964345, "grad_norm": 0.34510377049446106, "learning_rate": 0.00011450109827265756, "loss": 1.2269, "step": 32907 }, { "epoch": 0.4276244511835593, "grad_norm": 0.4158482849597931, "learning_rate": 0.0001144984988107462, "loss": 1.3944, "step": 32908 }, { "epoch": 0.4276374457274752, "grad_norm": 0.48456472158432007, "learning_rate": 0.0001144958993488348, "loss": 1.5591, "step": 32909 }, { "epoch": 0.42765044027139104, "grad_norm": 0.34927916526794434, "learning_rate": 0.00011449329988692342, "loss": 1.332, "step": 32910 }, { "epoch": 0.42766343481530694, "grad_norm": 0.4181821346282959, "learning_rate": 0.00011449070042501203, "loss": 1.2098, "step": 32911 }, { "epoch": 0.4276764293592228, "grad_norm": 0.4651978611946106, "learning_rate": 0.00011448810096310065, "loss": 1.5268, "step": 32912 }, { "epoch": 0.4276894239031387, "grad_norm": 0.3932968080043793, "learning_rate": 0.00011448550150118926, "loss": 1.2987, "step": 32913 }, { "epoch": 0.42770241844705453, "grad_norm": 0.44433948397636414, "learning_rate": 0.00011448290203927787, "loss": 1.294, "step": 32914 }, { "epoch": 0.42771541299097043, "grad_norm": 0.31703805923461914, "learning_rate": 0.00011448030257736648, "loss": 1.1767, "step": 32915 }, { "epoch": 0.4277284075348863, "grad_norm": 0.32806262373924255, "learning_rate": 0.00011447770311545511, "loss": 1.3066, "step": 32916 }, { "epoch": 0.4277414020788022, "grad_norm": 0.3266885578632355, "learning_rate": 0.00011447510365354372, "loss": 1.1738, "step": 32917 }, { "epoch": 0.427754396622718, "grad_norm": 0.41356852650642395, "learning_rate": 0.00011447250419163233, "loss": 1.3799, "step": 32918 }, { "epoch": 0.4277673911666339, "grad_norm": 0.4480923116207123, "learning_rate": 0.00011446990472972094, "loss": 1.4505, "step": 32919 }, { "epoch": 0.42778038571054977, "grad_norm": 0.41598185896873474, "learning_rate": 0.00011446730526780958, "loss": 1.4908, "step": 32920 }, { "epoch": 0.42779338025446567, "grad_norm": 0.5049240589141846, "learning_rate": 0.00011446470580589819, "loss": 1.3075, "step": 32921 }, { "epoch": 0.4278063747983815, "grad_norm": 0.4384255111217499, "learning_rate": 0.0001144621063439868, "loss": 1.2741, "step": 32922 }, { "epoch": 0.4278193693422974, "grad_norm": 0.3281305730342865, "learning_rate": 0.00011445950688207541, "loss": 1.3135, "step": 32923 }, { "epoch": 0.42783236388621326, "grad_norm": 0.498891144990921, "learning_rate": 0.00011445690742016404, "loss": 1.5401, "step": 32924 }, { "epoch": 0.42784535843012916, "grad_norm": 0.4500311315059662, "learning_rate": 0.00011445430795825265, "loss": 1.3548, "step": 32925 }, { "epoch": 0.427858352974045, "grad_norm": 0.39651259779930115, "learning_rate": 0.00011445170849634126, "loss": 1.3717, "step": 32926 }, { "epoch": 0.4278713475179609, "grad_norm": 0.3072621822357178, "learning_rate": 0.00011444910903442987, "loss": 1.3399, "step": 32927 }, { "epoch": 0.42788434206187675, "grad_norm": 0.5017160177230835, "learning_rate": 0.00011444650957251851, "loss": 1.3851, "step": 32928 }, { "epoch": 0.42789733660579266, "grad_norm": 0.3628110885620117, "learning_rate": 0.0001144439101106071, "loss": 1.4355, "step": 32929 }, { "epoch": 0.4279103311497085, "grad_norm": 0.38326093554496765, "learning_rate": 0.00011444131064869572, "loss": 1.6049, "step": 32930 }, { "epoch": 0.4279233256936244, "grad_norm": 0.33080607652664185, "learning_rate": 0.00011443871118678433, "loss": 1.2293, "step": 32931 }, { "epoch": 0.42793632023754025, "grad_norm": 0.39626824855804443, "learning_rate": 0.00011443611172487296, "loss": 1.3841, "step": 32932 }, { "epoch": 0.42794931478145615, "grad_norm": 0.4182763695716858, "learning_rate": 0.00011443351226296158, "loss": 1.3319, "step": 32933 }, { "epoch": 0.427962309325372, "grad_norm": 0.39371421933174133, "learning_rate": 0.00011443091280105019, "loss": 1.279, "step": 32934 }, { "epoch": 0.4279753038692879, "grad_norm": 0.4124096632003784, "learning_rate": 0.0001144283133391388, "loss": 1.2485, "step": 32935 }, { "epoch": 0.42798829841320374, "grad_norm": 0.4605541527271271, "learning_rate": 0.00011442571387722742, "loss": 1.4292, "step": 32936 }, { "epoch": 0.42800129295711964, "grad_norm": 0.3719262182712555, "learning_rate": 0.00011442311441531603, "loss": 1.3938, "step": 32937 }, { "epoch": 0.4280142875010355, "grad_norm": 0.45827800035476685, "learning_rate": 0.00011442051495340464, "loss": 1.4153, "step": 32938 }, { "epoch": 0.4280272820449514, "grad_norm": 0.38764849305152893, "learning_rate": 0.00011441791549149328, "loss": 1.2289, "step": 32939 }, { "epoch": 0.42804027658886723, "grad_norm": 0.561897873878479, "learning_rate": 0.00011441531602958189, "loss": 1.417, "step": 32940 }, { "epoch": 0.42805327113278313, "grad_norm": 0.5016710758209229, "learning_rate": 0.00011441271656767049, "loss": 1.5871, "step": 32941 }, { "epoch": 0.428066265676699, "grad_norm": 0.3721367418766022, "learning_rate": 0.0001144101171057591, "loss": 1.5916, "step": 32942 }, { "epoch": 0.4280792602206149, "grad_norm": 0.35788917541503906, "learning_rate": 0.00011440751764384774, "loss": 1.2411, "step": 32943 }, { "epoch": 0.4280922547645307, "grad_norm": 0.3961165249347687, "learning_rate": 0.00011440491818193635, "loss": 1.3592, "step": 32944 }, { "epoch": 0.4281052493084466, "grad_norm": 0.3320601284503937, "learning_rate": 0.00011440231872002496, "loss": 1.2992, "step": 32945 }, { "epoch": 0.42811824385236247, "grad_norm": 0.43274441361427307, "learning_rate": 0.00011439971925811357, "loss": 1.455, "step": 32946 }, { "epoch": 0.42813123839627837, "grad_norm": 0.3733232021331787, "learning_rate": 0.0001143971197962022, "loss": 1.4835, "step": 32947 }, { "epoch": 0.4281442329401942, "grad_norm": 0.39285239577293396, "learning_rate": 0.00011439452033429081, "loss": 1.2906, "step": 32948 }, { "epoch": 0.4281572274841101, "grad_norm": 0.36075690388679504, "learning_rate": 0.00011439192087237942, "loss": 1.4588, "step": 32949 }, { "epoch": 0.42817022202802596, "grad_norm": 0.37205448746681213, "learning_rate": 0.00011438932141046803, "loss": 1.4358, "step": 32950 }, { "epoch": 0.42818321657194186, "grad_norm": 0.3537094295024872, "learning_rate": 0.00011438672194855667, "loss": 1.4761, "step": 32951 }, { "epoch": 0.4281962111158577, "grad_norm": 0.42746567726135254, "learning_rate": 0.00011438412248664528, "loss": 1.2044, "step": 32952 }, { "epoch": 0.4282092056597736, "grad_norm": 0.35676309466362, "learning_rate": 0.00011438152302473389, "loss": 1.3542, "step": 32953 }, { "epoch": 0.42822220020368945, "grad_norm": 0.470663845539093, "learning_rate": 0.00011437892356282249, "loss": 1.4246, "step": 32954 }, { "epoch": 0.42823519474760535, "grad_norm": 0.41334274411201477, "learning_rate": 0.00011437632410091112, "loss": 1.5076, "step": 32955 }, { "epoch": 0.4282481892915212, "grad_norm": 0.40773844718933105, "learning_rate": 0.00011437372463899974, "loss": 1.4476, "step": 32956 }, { "epoch": 0.4282611838354371, "grad_norm": 0.3873310685157776, "learning_rate": 0.00011437112517708835, "loss": 1.5608, "step": 32957 }, { "epoch": 0.42827417837935294, "grad_norm": 0.4539563059806824, "learning_rate": 0.00011436852571517696, "loss": 1.3616, "step": 32958 }, { "epoch": 0.42828717292326884, "grad_norm": 0.3360161781311035, "learning_rate": 0.00011436592625326558, "loss": 1.4219, "step": 32959 }, { "epoch": 0.4283001674671847, "grad_norm": 0.38950201869010925, "learning_rate": 0.00011436332679135419, "loss": 1.341, "step": 32960 }, { "epoch": 0.4283131620111006, "grad_norm": 0.4776470363140106, "learning_rate": 0.0001143607273294428, "loss": 1.3799, "step": 32961 }, { "epoch": 0.42832615655501644, "grad_norm": 0.35883405804634094, "learning_rate": 0.00011435812786753141, "loss": 1.2749, "step": 32962 }, { "epoch": 0.42833915109893234, "grad_norm": 0.4309590458869934, "learning_rate": 0.00011435552840562005, "loss": 1.4162, "step": 32963 }, { "epoch": 0.42835214564284824, "grad_norm": 0.37539729475975037, "learning_rate": 0.00011435292894370866, "loss": 1.3487, "step": 32964 }, { "epoch": 0.4283651401867641, "grad_norm": 0.42053520679473877, "learning_rate": 0.00011435032948179727, "loss": 1.5003, "step": 32965 }, { "epoch": 0.42837813473068, "grad_norm": 0.38253238797187805, "learning_rate": 0.00011434773001988589, "loss": 1.3482, "step": 32966 }, { "epoch": 0.4283911292745958, "grad_norm": 0.4032425582408905, "learning_rate": 0.00011434513055797451, "loss": 1.5095, "step": 32967 }, { "epoch": 0.42840412381851173, "grad_norm": 0.4528466463088989, "learning_rate": 0.00011434253109606312, "loss": 1.3803, "step": 32968 }, { "epoch": 0.4284171183624276, "grad_norm": 0.34825149178504944, "learning_rate": 0.00011433993163415173, "loss": 1.5305, "step": 32969 }, { "epoch": 0.4284301129063435, "grad_norm": 0.4225640892982483, "learning_rate": 0.00011433733217224034, "loss": 1.3553, "step": 32970 }, { "epoch": 0.4284431074502593, "grad_norm": 0.41909220814704895, "learning_rate": 0.00011433473271032897, "loss": 1.3717, "step": 32971 }, { "epoch": 0.4284561019941752, "grad_norm": 0.33388692140579224, "learning_rate": 0.00011433213324841758, "loss": 1.3239, "step": 32972 }, { "epoch": 0.42846909653809107, "grad_norm": 0.33994460105895996, "learning_rate": 0.00011432953378650619, "loss": 1.3269, "step": 32973 }, { "epoch": 0.42848209108200697, "grad_norm": 0.3454279601573944, "learning_rate": 0.00011432693432459483, "loss": 1.2758, "step": 32974 }, { "epoch": 0.4284950856259228, "grad_norm": 0.3173542618751526, "learning_rate": 0.00011432433486268344, "loss": 1.4344, "step": 32975 }, { "epoch": 0.4285080801698387, "grad_norm": 0.40768688917160034, "learning_rate": 0.00011432173540077205, "loss": 1.4672, "step": 32976 }, { "epoch": 0.42852107471375456, "grad_norm": 0.367645263671875, "learning_rate": 0.00011431913593886066, "loss": 1.2757, "step": 32977 }, { "epoch": 0.42853406925767046, "grad_norm": 0.3693621754646301, "learning_rate": 0.00011431653647694928, "loss": 1.4747, "step": 32978 }, { "epoch": 0.4285470638015863, "grad_norm": 0.5344828367233276, "learning_rate": 0.0001143139370150379, "loss": 1.3818, "step": 32979 }, { "epoch": 0.4285600583455022, "grad_norm": 0.38717135787010193, "learning_rate": 0.0001143113375531265, "loss": 1.4041, "step": 32980 }, { "epoch": 0.42857305288941805, "grad_norm": 0.29870977997779846, "learning_rate": 0.00011430873809121512, "loss": 1.2573, "step": 32981 }, { "epoch": 0.42858604743333395, "grad_norm": 0.4521723985671997, "learning_rate": 0.00011430613862930376, "loss": 1.3095, "step": 32982 }, { "epoch": 0.4285990419772498, "grad_norm": 0.3614078760147095, "learning_rate": 0.00011430353916739235, "loss": 1.2895, "step": 32983 }, { "epoch": 0.4286120365211657, "grad_norm": 0.4489821791648865, "learning_rate": 0.00011430093970548096, "loss": 1.415, "step": 32984 }, { "epoch": 0.42862503106508154, "grad_norm": 0.32144278287887573, "learning_rate": 0.00011429834024356957, "loss": 1.2743, "step": 32985 }, { "epoch": 0.42863802560899744, "grad_norm": 0.3889378607273102, "learning_rate": 0.00011429574078165821, "loss": 1.4236, "step": 32986 }, { "epoch": 0.4286510201529133, "grad_norm": 0.3499564230442047, "learning_rate": 0.00011429314131974682, "loss": 1.5122, "step": 32987 }, { "epoch": 0.4286640146968292, "grad_norm": 0.305021196603775, "learning_rate": 0.00011429054185783543, "loss": 1.2263, "step": 32988 }, { "epoch": 0.42867700924074503, "grad_norm": 0.4088214635848999, "learning_rate": 0.00011428794239592405, "loss": 1.6406, "step": 32989 }, { "epoch": 0.42869000378466093, "grad_norm": 0.39947709441185, "learning_rate": 0.00011428534293401267, "loss": 1.4852, "step": 32990 }, { "epoch": 0.4287029983285768, "grad_norm": 0.4040416181087494, "learning_rate": 0.00011428274347210128, "loss": 1.3412, "step": 32991 }, { "epoch": 0.4287159928724927, "grad_norm": 0.4759654402732849, "learning_rate": 0.00011428014401018989, "loss": 1.6076, "step": 32992 }, { "epoch": 0.4287289874164085, "grad_norm": 0.4862821102142334, "learning_rate": 0.0001142775445482785, "loss": 1.5148, "step": 32993 }, { "epoch": 0.4287419819603244, "grad_norm": 0.4117686152458191, "learning_rate": 0.00011427494508636714, "loss": 1.4097, "step": 32994 }, { "epoch": 0.42875497650424027, "grad_norm": 0.34281232953071594, "learning_rate": 0.00011427234562445575, "loss": 1.3735, "step": 32995 }, { "epoch": 0.42876797104815617, "grad_norm": 0.3933635354042053, "learning_rate": 0.00011426974616254435, "loss": 1.2935, "step": 32996 }, { "epoch": 0.428780965592072, "grad_norm": 0.436548113822937, "learning_rate": 0.00011426714670063296, "loss": 1.3785, "step": 32997 }, { "epoch": 0.4287939601359879, "grad_norm": 0.46581515669822693, "learning_rate": 0.0001142645472387216, "loss": 1.5153, "step": 32998 }, { "epoch": 0.42880695467990376, "grad_norm": 0.41836127638816833, "learning_rate": 0.00011426194777681021, "loss": 1.2761, "step": 32999 }, { "epoch": 0.42881994922381966, "grad_norm": 0.4111693203449249, "learning_rate": 0.00011425934831489882, "loss": 1.5732, "step": 33000 }, { "epoch": 0.4288329437677355, "grad_norm": 0.23935194313526154, "learning_rate": 0.00011425674885298743, "loss": 1.3852, "step": 33001 }, { "epoch": 0.4288459383116514, "grad_norm": 0.37584179639816284, "learning_rate": 0.00011425414939107606, "loss": 1.3571, "step": 33002 }, { "epoch": 0.42885893285556725, "grad_norm": 0.35828694701194763, "learning_rate": 0.00011425154992916467, "loss": 1.4326, "step": 33003 }, { "epoch": 0.42887192739948315, "grad_norm": 0.3447136878967285, "learning_rate": 0.00011424895046725328, "loss": 1.3819, "step": 33004 }, { "epoch": 0.428884921943399, "grad_norm": 0.2607092559337616, "learning_rate": 0.00011424635100534189, "loss": 1.1683, "step": 33005 }, { "epoch": 0.4288979164873149, "grad_norm": 0.38082775473594666, "learning_rate": 0.00011424375154343053, "loss": 1.531, "step": 33006 }, { "epoch": 0.42891091103123075, "grad_norm": 0.4063643217086792, "learning_rate": 0.00011424115208151914, "loss": 1.361, "step": 33007 }, { "epoch": 0.42892390557514665, "grad_norm": 0.36824119091033936, "learning_rate": 0.00011423855261960775, "loss": 1.298, "step": 33008 }, { "epoch": 0.4289369001190625, "grad_norm": 0.37015676498413086, "learning_rate": 0.00011423595315769635, "loss": 1.1818, "step": 33009 }, { "epoch": 0.4289498946629784, "grad_norm": 0.39529991149902344, "learning_rate": 0.00011423335369578498, "loss": 1.5586, "step": 33010 }, { "epoch": 0.42896288920689424, "grad_norm": 0.4550600051879883, "learning_rate": 0.0001142307542338736, "loss": 1.3405, "step": 33011 }, { "epoch": 0.42897588375081014, "grad_norm": 0.4093617796897888, "learning_rate": 0.0001142281547719622, "loss": 1.5043, "step": 33012 }, { "epoch": 0.428988878294726, "grad_norm": 0.36031603813171387, "learning_rate": 0.00011422555531005083, "loss": 1.5713, "step": 33013 }, { "epoch": 0.4290018728386419, "grad_norm": 0.48979198932647705, "learning_rate": 0.00011422295584813944, "loss": 1.3559, "step": 33014 }, { "epoch": 0.42901486738255773, "grad_norm": 0.39934849739074707, "learning_rate": 0.00011422035638622805, "loss": 1.3094, "step": 33015 }, { "epoch": 0.42902786192647363, "grad_norm": 0.36826372146606445, "learning_rate": 0.00011421775692431666, "loss": 1.1625, "step": 33016 }, { "epoch": 0.4290408564703895, "grad_norm": 0.2788737416267395, "learning_rate": 0.0001142151574624053, "loss": 1.0874, "step": 33017 }, { "epoch": 0.4290538510143054, "grad_norm": 0.40675240755081177, "learning_rate": 0.00011421255800049391, "loss": 1.5414, "step": 33018 }, { "epoch": 0.4290668455582212, "grad_norm": 0.39083871245384216, "learning_rate": 0.00011420995853858252, "loss": 1.4494, "step": 33019 }, { "epoch": 0.4290798401021371, "grad_norm": 0.34771040081977844, "learning_rate": 0.00011420735907667113, "loss": 1.4102, "step": 33020 }, { "epoch": 0.42909283464605297, "grad_norm": 0.31723377108573914, "learning_rate": 0.00011420475961475976, "loss": 1.2182, "step": 33021 }, { "epoch": 0.42910582918996887, "grad_norm": 0.40262946486473083, "learning_rate": 0.00011420216015284837, "loss": 1.5661, "step": 33022 }, { "epoch": 0.4291188237338847, "grad_norm": 0.5085108876228333, "learning_rate": 0.00011419956069093698, "loss": 1.4595, "step": 33023 }, { "epoch": 0.4291318182778006, "grad_norm": 0.44215112924575806, "learning_rate": 0.00011419696122902559, "loss": 1.4176, "step": 33024 }, { "epoch": 0.42914481282171646, "grad_norm": 0.3943015933036804, "learning_rate": 0.00011419436176711422, "loss": 1.528, "step": 33025 }, { "epoch": 0.42915780736563236, "grad_norm": 0.34902501106262207, "learning_rate": 0.00011419176230520283, "loss": 1.3306, "step": 33026 }, { "epoch": 0.4291708019095482, "grad_norm": 0.4553138315677643, "learning_rate": 0.00011418916284329144, "loss": 1.4682, "step": 33027 }, { "epoch": 0.4291837964534641, "grad_norm": 0.3910723924636841, "learning_rate": 0.00011418656338138005, "loss": 1.3087, "step": 33028 }, { "epoch": 0.42919679099737995, "grad_norm": 0.35567012429237366, "learning_rate": 0.00011418396391946869, "loss": 1.4543, "step": 33029 }, { "epoch": 0.42920978554129585, "grad_norm": 0.467305064201355, "learning_rate": 0.0001141813644575573, "loss": 1.4359, "step": 33030 }, { "epoch": 0.4292227800852117, "grad_norm": 0.48820051550865173, "learning_rate": 0.00011417876499564591, "loss": 1.4388, "step": 33031 }, { "epoch": 0.4292357746291276, "grad_norm": 0.43212008476257324, "learning_rate": 0.00011417616553373452, "loss": 1.5225, "step": 33032 }, { "epoch": 0.42924876917304344, "grad_norm": 0.3583143353462219, "learning_rate": 0.00011417356607182314, "loss": 1.3766, "step": 33033 }, { "epoch": 0.42926176371695934, "grad_norm": 0.46112048625946045, "learning_rate": 0.00011417096660991175, "loss": 1.4813, "step": 33034 }, { "epoch": 0.4292747582608752, "grad_norm": 0.40445926785469055, "learning_rate": 0.00011416836714800037, "loss": 1.3102, "step": 33035 }, { "epoch": 0.4292877528047911, "grad_norm": 0.2742837071418762, "learning_rate": 0.00011416576768608898, "loss": 1.4291, "step": 33036 }, { "epoch": 0.42930074734870693, "grad_norm": 0.3757851719856262, "learning_rate": 0.00011416316822417761, "loss": 1.3656, "step": 33037 }, { "epoch": 0.42931374189262284, "grad_norm": 0.38286471366882324, "learning_rate": 0.00011416056876226621, "loss": 1.4859, "step": 33038 }, { "epoch": 0.4293267364365387, "grad_norm": 0.39252573251724243, "learning_rate": 0.00011415796930035482, "loss": 1.4248, "step": 33039 }, { "epoch": 0.4293397309804546, "grad_norm": 0.36668860912323, "learning_rate": 0.00011415536983844343, "loss": 1.3993, "step": 33040 }, { "epoch": 0.4293527255243705, "grad_norm": 0.47206979990005493, "learning_rate": 0.00011415277037653207, "loss": 1.5113, "step": 33041 }, { "epoch": 0.4293657200682863, "grad_norm": 0.4524155557155609, "learning_rate": 0.00011415017091462068, "loss": 1.3287, "step": 33042 }, { "epoch": 0.42937871461220223, "grad_norm": 0.4893929958343506, "learning_rate": 0.0001141475714527093, "loss": 1.3532, "step": 33043 }, { "epoch": 0.4293917091561181, "grad_norm": 0.4571665823459625, "learning_rate": 0.0001141449719907979, "loss": 1.6273, "step": 33044 }, { "epoch": 0.429404703700034, "grad_norm": 0.420337975025177, "learning_rate": 0.00011414237252888653, "loss": 1.4461, "step": 33045 }, { "epoch": 0.4294176982439498, "grad_norm": 0.3624253273010254, "learning_rate": 0.00011413977306697514, "loss": 1.2497, "step": 33046 }, { "epoch": 0.4294306927878657, "grad_norm": 0.5219370722770691, "learning_rate": 0.00011413717360506375, "loss": 1.4837, "step": 33047 }, { "epoch": 0.42944368733178157, "grad_norm": 0.3202259838581085, "learning_rate": 0.00011413457414315236, "loss": 1.3773, "step": 33048 }, { "epoch": 0.42945668187569747, "grad_norm": 0.3458300232887268, "learning_rate": 0.000114131974681241, "loss": 1.3374, "step": 33049 }, { "epoch": 0.4294696764196133, "grad_norm": 0.27415987849235535, "learning_rate": 0.00011412937521932961, "loss": 1.4492, "step": 33050 }, { "epoch": 0.4294826709635292, "grad_norm": 0.48517686128616333, "learning_rate": 0.00011412677575741821, "loss": 1.3907, "step": 33051 }, { "epoch": 0.42949566550744506, "grad_norm": 0.2366357147693634, "learning_rate": 0.00011412417629550685, "loss": 1.1548, "step": 33052 }, { "epoch": 0.42950866005136096, "grad_norm": 0.464781254529953, "learning_rate": 0.00011412157683359546, "loss": 1.3002, "step": 33053 }, { "epoch": 0.4295216545952768, "grad_norm": 0.436920166015625, "learning_rate": 0.00011411897737168407, "loss": 1.6171, "step": 33054 }, { "epoch": 0.4295346491391927, "grad_norm": 0.35520246624946594, "learning_rate": 0.00011411637790977268, "loss": 1.4255, "step": 33055 }, { "epoch": 0.42954764368310855, "grad_norm": 0.37296155095100403, "learning_rate": 0.0001141137784478613, "loss": 1.3958, "step": 33056 }, { "epoch": 0.42956063822702445, "grad_norm": 0.4117509722709656, "learning_rate": 0.00011411117898594991, "loss": 1.5684, "step": 33057 }, { "epoch": 0.4295736327709403, "grad_norm": 0.43161967396736145, "learning_rate": 0.00011410857952403853, "loss": 1.2941, "step": 33058 }, { "epoch": 0.4295866273148562, "grad_norm": 0.3396981656551361, "learning_rate": 0.00011410598006212714, "loss": 1.3691, "step": 33059 }, { "epoch": 0.42959962185877204, "grad_norm": 0.3769652247428894, "learning_rate": 0.00011410338060021577, "loss": 1.3833, "step": 33060 }, { "epoch": 0.42961261640268794, "grad_norm": 0.3140166699886322, "learning_rate": 0.00011410078113830438, "loss": 1.5036, "step": 33061 }, { "epoch": 0.4296256109466038, "grad_norm": 0.45537781715393066, "learning_rate": 0.000114098181676393, "loss": 1.2985, "step": 33062 }, { "epoch": 0.4296386054905197, "grad_norm": 0.4387262463569641, "learning_rate": 0.0001140955822144816, "loss": 1.2902, "step": 33063 }, { "epoch": 0.42965160003443553, "grad_norm": 0.4398992359638214, "learning_rate": 0.00011409298275257023, "loss": 1.3651, "step": 33064 }, { "epoch": 0.42966459457835143, "grad_norm": 0.3066038489341736, "learning_rate": 0.00011409038329065884, "loss": 1.3958, "step": 33065 }, { "epoch": 0.4296775891222673, "grad_norm": 0.3835153877735138, "learning_rate": 0.00011408778382874745, "loss": 1.5516, "step": 33066 }, { "epoch": 0.4296905836661832, "grad_norm": 0.33736687898635864, "learning_rate": 0.00011408518436683606, "loss": 1.3188, "step": 33067 }, { "epoch": 0.429703578210099, "grad_norm": 0.28521960973739624, "learning_rate": 0.00011408258490492469, "loss": 1.2023, "step": 33068 }, { "epoch": 0.4297165727540149, "grad_norm": 0.36845168471336365, "learning_rate": 0.0001140799854430133, "loss": 1.3836, "step": 33069 }, { "epoch": 0.42972956729793077, "grad_norm": 0.36686450242996216, "learning_rate": 0.00011407738598110191, "loss": 1.5415, "step": 33070 }, { "epoch": 0.42974256184184667, "grad_norm": 0.3897012770175934, "learning_rate": 0.00011407478651919052, "loss": 1.4018, "step": 33071 }, { "epoch": 0.4297555563857625, "grad_norm": 0.43043002486228943, "learning_rate": 0.00011407218705727916, "loss": 1.4643, "step": 33072 }, { "epoch": 0.4297685509296784, "grad_norm": 0.44798365235328674, "learning_rate": 0.00011406958759536777, "loss": 1.5364, "step": 33073 }, { "epoch": 0.42978154547359426, "grad_norm": 0.47974103689193726, "learning_rate": 0.00011406698813345638, "loss": 1.5396, "step": 33074 }, { "epoch": 0.42979454001751016, "grad_norm": 0.40785491466522217, "learning_rate": 0.00011406438867154499, "loss": 1.575, "step": 33075 }, { "epoch": 0.429807534561426, "grad_norm": 0.47836023569107056, "learning_rate": 0.00011406178920963362, "loss": 1.4555, "step": 33076 }, { "epoch": 0.4298205291053419, "grad_norm": 0.3881927728652954, "learning_rate": 0.00011405918974772223, "loss": 1.3507, "step": 33077 }, { "epoch": 0.42983352364925775, "grad_norm": 0.428774893283844, "learning_rate": 0.00011405659028581084, "loss": 1.5527, "step": 33078 }, { "epoch": 0.42984651819317365, "grad_norm": 0.40334850549697876, "learning_rate": 0.00011405399082389945, "loss": 1.3412, "step": 33079 }, { "epoch": 0.4298595127370895, "grad_norm": 0.3947594165802002, "learning_rate": 0.00011405139136198807, "loss": 1.3321, "step": 33080 }, { "epoch": 0.4298725072810054, "grad_norm": 0.3086243271827698, "learning_rate": 0.00011404879190007668, "loss": 1.2419, "step": 33081 }, { "epoch": 0.42988550182492125, "grad_norm": 0.4754907786846161, "learning_rate": 0.0001140461924381653, "loss": 1.532, "step": 33082 }, { "epoch": 0.42989849636883715, "grad_norm": 0.36356648802757263, "learning_rate": 0.0001140435929762539, "loss": 1.4077, "step": 33083 }, { "epoch": 0.429911490912753, "grad_norm": 0.4019680917263031, "learning_rate": 0.00011404099351434254, "loss": 1.4357, "step": 33084 }, { "epoch": 0.4299244854566689, "grad_norm": 0.32742324471473694, "learning_rate": 0.00011403839405243116, "loss": 1.4432, "step": 33085 }, { "epoch": 0.42993748000058474, "grad_norm": 0.47397276759147644, "learning_rate": 0.00011403579459051977, "loss": 1.4284, "step": 33086 }, { "epoch": 0.42995047454450064, "grad_norm": 0.40912535786628723, "learning_rate": 0.00011403319512860839, "loss": 1.5713, "step": 33087 }, { "epoch": 0.4299634690884165, "grad_norm": 0.40757817029953003, "learning_rate": 0.000114030595666697, "loss": 1.5159, "step": 33088 }, { "epoch": 0.4299764636323324, "grad_norm": 0.332685649394989, "learning_rate": 0.00011402799620478561, "loss": 1.2956, "step": 33089 }, { "epoch": 0.42998945817624823, "grad_norm": 0.4536055326461792, "learning_rate": 0.00011402539674287422, "loss": 1.212, "step": 33090 }, { "epoch": 0.43000245272016413, "grad_norm": 0.38578975200653076, "learning_rate": 0.00011402279728096286, "loss": 1.2481, "step": 33091 }, { "epoch": 0.43001544726408, "grad_norm": 0.3576241135597229, "learning_rate": 0.00011402019781905147, "loss": 1.3822, "step": 33092 }, { "epoch": 0.4300284418079959, "grad_norm": 0.37304365634918213, "learning_rate": 0.00011401759835714007, "loss": 1.2057, "step": 33093 }, { "epoch": 0.4300414363519117, "grad_norm": 0.4942501485347748, "learning_rate": 0.00011401499889522868, "loss": 1.4689, "step": 33094 }, { "epoch": 0.4300544308958276, "grad_norm": 0.45764344930648804, "learning_rate": 0.00011401239943331732, "loss": 1.362, "step": 33095 }, { "epoch": 0.43006742543974347, "grad_norm": 0.42469510436058044, "learning_rate": 0.00011400979997140593, "loss": 1.3188, "step": 33096 }, { "epoch": 0.43008041998365937, "grad_norm": 0.33602777123451233, "learning_rate": 0.00011400720050949454, "loss": 1.2276, "step": 33097 }, { "epoch": 0.4300934145275752, "grad_norm": 0.4492737054824829, "learning_rate": 0.00011400460104758315, "loss": 1.5507, "step": 33098 }, { "epoch": 0.4301064090714911, "grad_norm": 0.3442053496837616, "learning_rate": 0.00011400200158567178, "loss": 1.4496, "step": 33099 }, { "epoch": 0.43011940361540696, "grad_norm": 0.41710934042930603, "learning_rate": 0.00011399940212376039, "loss": 1.2895, "step": 33100 }, { "epoch": 0.43013239815932286, "grad_norm": 0.46953511238098145, "learning_rate": 0.000113996802661849, "loss": 1.4354, "step": 33101 }, { "epoch": 0.4301453927032387, "grad_norm": 0.34530511498451233, "learning_rate": 0.00011399420319993761, "loss": 1.4933, "step": 33102 }, { "epoch": 0.4301583872471546, "grad_norm": 0.33698585629463196, "learning_rate": 0.00011399160373802625, "loss": 1.3439, "step": 33103 }, { "epoch": 0.43017138179107045, "grad_norm": 0.3705131709575653, "learning_rate": 0.00011398900427611486, "loss": 1.4833, "step": 33104 }, { "epoch": 0.43018437633498635, "grad_norm": 0.3394135534763336, "learning_rate": 0.00011398640481420346, "loss": 1.0934, "step": 33105 }, { "epoch": 0.4301973708789022, "grad_norm": 0.4138874411582947, "learning_rate": 0.00011398380535229207, "loss": 1.4256, "step": 33106 }, { "epoch": 0.4302103654228181, "grad_norm": 0.3919612467288971, "learning_rate": 0.0001139812058903807, "loss": 1.3404, "step": 33107 }, { "epoch": 0.43022335996673394, "grad_norm": 0.444354772567749, "learning_rate": 0.00011397860642846932, "loss": 1.2738, "step": 33108 }, { "epoch": 0.43023635451064984, "grad_norm": 0.44634923338890076, "learning_rate": 0.00011397600696655793, "loss": 1.5357, "step": 33109 }, { "epoch": 0.4302493490545657, "grad_norm": 0.494880348443985, "learning_rate": 0.00011397340750464654, "loss": 1.5915, "step": 33110 }, { "epoch": 0.4302623435984816, "grad_norm": 0.26488107442855835, "learning_rate": 0.00011397080804273516, "loss": 1.4708, "step": 33111 }, { "epoch": 0.43027533814239743, "grad_norm": 0.461359441280365, "learning_rate": 0.00011396820858082377, "loss": 1.3508, "step": 33112 }, { "epoch": 0.43028833268631334, "grad_norm": 0.43496787548065186, "learning_rate": 0.00011396560911891238, "loss": 1.4757, "step": 33113 }, { "epoch": 0.4303013272302292, "grad_norm": 0.39551621675491333, "learning_rate": 0.000113963009657001, "loss": 1.42, "step": 33114 }, { "epoch": 0.4303143217741451, "grad_norm": 0.3881276547908783, "learning_rate": 0.00011396041019508963, "loss": 1.3462, "step": 33115 }, { "epoch": 0.430327316318061, "grad_norm": 0.4036378264427185, "learning_rate": 0.00011395781073317824, "loss": 1.3031, "step": 33116 }, { "epoch": 0.4303403108619768, "grad_norm": 0.465343177318573, "learning_rate": 0.00011395521127126685, "loss": 1.4617, "step": 33117 }, { "epoch": 0.4303533054058927, "grad_norm": 0.4165148437023163, "learning_rate": 0.00011395261180935545, "loss": 1.3492, "step": 33118 }, { "epoch": 0.4303662999498086, "grad_norm": 0.3834114372730255, "learning_rate": 0.00011395001234744409, "loss": 1.3815, "step": 33119 }, { "epoch": 0.4303792944937245, "grad_norm": 0.4060989022254944, "learning_rate": 0.0001139474128855327, "loss": 1.2717, "step": 33120 }, { "epoch": 0.4303922890376403, "grad_norm": 0.4501494765281677, "learning_rate": 0.00011394481342362131, "loss": 1.448, "step": 33121 }, { "epoch": 0.4304052835815562, "grad_norm": 0.33736148476600647, "learning_rate": 0.00011394221396170992, "loss": 1.3055, "step": 33122 }, { "epoch": 0.43041827812547206, "grad_norm": 0.3838721811771393, "learning_rate": 0.00011393961449979855, "loss": 1.5451, "step": 33123 }, { "epoch": 0.43043127266938797, "grad_norm": 0.5082861185073853, "learning_rate": 0.00011393701503788716, "loss": 1.464, "step": 33124 }, { "epoch": 0.4304442672133038, "grad_norm": 0.3409280776977539, "learning_rate": 0.00011393441557597577, "loss": 1.156, "step": 33125 }, { "epoch": 0.4304572617572197, "grad_norm": 0.3471972644329071, "learning_rate": 0.00011393181611406441, "loss": 1.4289, "step": 33126 }, { "epoch": 0.43047025630113556, "grad_norm": 0.45658400654792786, "learning_rate": 0.00011392921665215302, "loss": 1.405, "step": 33127 }, { "epoch": 0.43048325084505146, "grad_norm": 0.31989625096321106, "learning_rate": 0.00011392661719024163, "loss": 1.4028, "step": 33128 }, { "epoch": 0.4304962453889673, "grad_norm": 0.4005883038043976, "learning_rate": 0.00011392401772833024, "loss": 1.3442, "step": 33129 }, { "epoch": 0.4305092399328832, "grad_norm": 0.4131905138492584, "learning_rate": 0.00011392141826641886, "loss": 1.3235, "step": 33130 }, { "epoch": 0.43052223447679905, "grad_norm": 0.4129081666469574, "learning_rate": 0.00011391881880450748, "loss": 1.4005, "step": 33131 }, { "epoch": 0.43053522902071495, "grad_norm": 0.3129737079143524, "learning_rate": 0.00011391621934259609, "loss": 1.4682, "step": 33132 }, { "epoch": 0.4305482235646308, "grad_norm": 0.4177081882953644, "learning_rate": 0.0001139136198806847, "loss": 1.3637, "step": 33133 }, { "epoch": 0.4305612181085467, "grad_norm": 0.4250037670135498, "learning_rate": 0.00011391102041877334, "loss": 1.3565, "step": 33134 }, { "epoch": 0.43057421265246254, "grad_norm": 0.31789255142211914, "learning_rate": 0.00011390842095686193, "loss": 1.4654, "step": 33135 }, { "epoch": 0.43058720719637844, "grad_norm": 0.4526450037956238, "learning_rate": 0.00011390582149495054, "loss": 1.4473, "step": 33136 }, { "epoch": 0.4306002017402943, "grad_norm": 0.41610458493232727, "learning_rate": 0.00011390322203303915, "loss": 1.4201, "step": 33137 }, { "epoch": 0.4306131962842102, "grad_norm": 0.502224326133728, "learning_rate": 0.00011390062257112779, "loss": 1.3476, "step": 33138 }, { "epoch": 0.43062619082812603, "grad_norm": 0.40534213185310364, "learning_rate": 0.0001138980231092164, "loss": 1.5252, "step": 33139 }, { "epoch": 0.43063918537204193, "grad_norm": 0.37727993726730347, "learning_rate": 0.00011389542364730501, "loss": 1.335, "step": 33140 }, { "epoch": 0.4306521799159578, "grad_norm": 0.32960546016693115, "learning_rate": 0.00011389282418539363, "loss": 1.255, "step": 33141 }, { "epoch": 0.4306651744598737, "grad_norm": 0.3949643075466156, "learning_rate": 0.00011389022472348225, "loss": 1.4645, "step": 33142 }, { "epoch": 0.4306781690037895, "grad_norm": 0.3354935944080353, "learning_rate": 0.00011388762526157086, "loss": 1.3703, "step": 33143 }, { "epoch": 0.4306911635477054, "grad_norm": 0.4239766299724579, "learning_rate": 0.00011388502579965947, "loss": 1.4255, "step": 33144 }, { "epoch": 0.43070415809162127, "grad_norm": 0.35579726099967957, "learning_rate": 0.00011388242633774808, "loss": 1.3935, "step": 33145 }, { "epoch": 0.43071715263553717, "grad_norm": 0.3417304456233978, "learning_rate": 0.00011387982687583672, "loss": 1.3113, "step": 33146 }, { "epoch": 0.430730147179453, "grad_norm": 0.49395790696144104, "learning_rate": 0.00011387722741392532, "loss": 1.4068, "step": 33147 }, { "epoch": 0.4307431417233689, "grad_norm": 0.35646697878837585, "learning_rate": 0.00011387462795201393, "loss": 1.4002, "step": 33148 }, { "epoch": 0.43075613626728476, "grad_norm": 0.3382672965526581, "learning_rate": 0.00011387202849010254, "loss": 1.4763, "step": 33149 }, { "epoch": 0.43076913081120066, "grad_norm": 0.3735864460468292, "learning_rate": 0.00011386942902819118, "loss": 1.3321, "step": 33150 }, { "epoch": 0.4307821253551165, "grad_norm": 0.4748136103153229, "learning_rate": 0.00011386682956627979, "loss": 1.4862, "step": 33151 }, { "epoch": 0.4307951198990324, "grad_norm": 0.3942922055721283, "learning_rate": 0.0001138642301043684, "loss": 1.5218, "step": 33152 }, { "epoch": 0.43080811444294825, "grad_norm": 0.42013445496559143, "learning_rate": 0.00011386163064245701, "loss": 1.2873, "step": 33153 }, { "epoch": 0.43082110898686415, "grad_norm": 0.4014489948749542, "learning_rate": 0.00011385903118054564, "loss": 1.1865, "step": 33154 }, { "epoch": 0.43083410353078, "grad_norm": 0.3736909329891205, "learning_rate": 0.00011385643171863425, "loss": 1.3899, "step": 33155 }, { "epoch": 0.4308470980746959, "grad_norm": 0.3160135746002197, "learning_rate": 0.00011385383225672286, "loss": 1.3598, "step": 33156 }, { "epoch": 0.43086009261861175, "grad_norm": 0.6537340879440308, "learning_rate": 0.00011385123279481147, "loss": 1.4791, "step": 33157 }, { "epoch": 0.43087308716252765, "grad_norm": 0.36841726303100586, "learning_rate": 0.0001138486333329001, "loss": 1.3764, "step": 33158 }, { "epoch": 0.4308860817064435, "grad_norm": 0.34699544310569763, "learning_rate": 0.00011384603387098872, "loss": 1.4489, "step": 33159 }, { "epoch": 0.4308990762503594, "grad_norm": 0.4063728153705597, "learning_rate": 0.00011384343440907731, "loss": 1.3759, "step": 33160 }, { "epoch": 0.43091207079427524, "grad_norm": 0.4053795635700226, "learning_rate": 0.00011384083494716595, "loss": 1.3979, "step": 33161 }, { "epoch": 0.43092506533819114, "grad_norm": 0.41829726099967957, "learning_rate": 0.00011383823548525456, "loss": 1.8126, "step": 33162 }, { "epoch": 0.430938059882107, "grad_norm": 0.39402708411216736, "learning_rate": 0.00011383563602334317, "loss": 1.4858, "step": 33163 }, { "epoch": 0.4309510544260229, "grad_norm": 0.3054930865764618, "learning_rate": 0.00011383303656143179, "loss": 1.283, "step": 33164 }, { "epoch": 0.43096404896993873, "grad_norm": 0.42324545979499817, "learning_rate": 0.00011383043709952041, "loss": 1.3945, "step": 33165 }, { "epoch": 0.43097704351385463, "grad_norm": 0.33468741178512573, "learning_rate": 0.00011382783763760902, "loss": 1.3577, "step": 33166 }, { "epoch": 0.4309900380577705, "grad_norm": 0.4499208629131317, "learning_rate": 0.00011382523817569763, "loss": 1.4147, "step": 33167 }, { "epoch": 0.4310030326016864, "grad_norm": 0.5325268507003784, "learning_rate": 0.00011382263871378624, "loss": 1.3786, "step": 33168 }, { "epoch": 0.4310160271456022, "grad_norm": 0.3340540826320648, "learning_rate": 0.00011382003925187488, "loss": 1.3203, "step": 33169 }, { "epoch": 0.4310290216895181, "grad_norm": 0.38758203387260437, "learning_rate": 0.00011381743978996349, "loss": 1.3524, "step": 33170 }, { "epoch": 0.43104201623343397, "grad_norm": 0.3518804609775543, "learning_rate": 0.0001138148403280521, "loss": 1.5326, "step": 33171 }, { "epoch": 0.43105501077734987, "grad_norm": 0.4840235114097595, "learning_rate": 0.00011381224086614071, "loss": 1.6046, "step": 33172 }, { "epoch": 0.4310680053212657, "grad_norm": 0.41478368639945984, "learning_rate": 0.00011380964140422934, "loss": 1.5209, "step": 33173 }, { "epoch": 0.4310809998651816, "grad_norm": 0.3656295835971832, "learning_rate": 0.00011380704194231795, "loss": 1.1945, "step": 33174 }, { "epoch": 0.43109399440909746, "grad_norm": 0.44753339886665344, "learning_rate": 0.00011380444248040656, "loss": 1.3426, "step": 33175 }, { "epoch": 0.43110698895301336, "grad_norm": 0.4699760377407074, "learning_rate": 0.00011380184301849517, "loss": 1.5319, "step": 33176 }, { "epoch": 0.4311199834969292, "grad_norm": 0.3339541256427765, "learning_rate": 0.0001137992435565838, "loss": 1.3831, "step": 33177 }, { "epoch": 0.4311329780408451, "grad_norm": 0.35164934396743774, "learning_rate": 0.0001137966440946724, "loss": 1.2086, "step": 33178 }, { "epoch": 0.43114597258476095, "grad_norm": 0.4146008789539337, "learning_rate": 0.00011379404463276102, "loss": 1.1877, "step": 33179 }, { "epoch": 0.43115896712867685, "grad_norm": 0.3636126220226288, "learning_rate": 0.00011379144517084963, "loss": 1.5375, "step": 33180 }, { "epoch": 0.4311719616725927, "grad_norm": 0.342949241399765, "learning_rate": 0.00011378884570893827, "loss": 1.2516, "step": 33181 }, { "epoch": 0.4311849562165086, "grad_norm": 0.44998618960380554, "learning_rate": 0.00011378624624702688, "loss": 1.3039, "step": 33182 }, { "epoch": 0.43119795076042444, "grad_norm": 0.42032068967819214, "learning_rate": 0.00011378364678511549, "loss": 1.432, "step": 33183 }, { "epoch": 0.43121094530434034, "grad_norm": 0.4338682293891907, "learning_rate": 0.0001137810473232041, "loss": 1.3524, "step": 33184 }, { "epoch": 0.4312239398482562, "grad_norm": 0.39883100986480713, "learning_rate": 0.00011377844786129272, "loss": 1.2826, "step": 33185 }, { "epoch": 0.4312369343921721, "grad_norm": 0.40389248728752136, "learning_rate": 0.00011377584839938133, "loss": 1.4106, "step": 33186 }, { "epoch": 0.43124992893608793, "grad_norm": 0.39609894156455994, "learning_rate": 0.00011377324893746995, "loss": 1.3038, "step": 33187 }, { "epoch": 0.43126292348000383, "grad_norm": 0.3595294952392578, "learning_rate": 0.00011377064947555856, "loss": 1.3779, "step": 33188 }, { "epoch": 0.4312759180239197, "grad_norm": 0.49085184931755066, "learning_rate": 0.00011376805001364718, "loss": 1.498, "step": 33189 }, { "epoch": 0.4312889125678356, "grad_norm": 0.5130632519721985, "learning_rate": 0.00011376545055173579, "loss": 1.4194, "step": 33190 }, { "epoch": 0.4313019071117514, "grad_norm": 0.4032772183418274, "learning_rate": 0.0001137628510898244, "loss": 1.3503, "step": 33191 }, { "epoch": 0.4313149016556673, "grad_norm": 0.3786592185497284, "learning_rate": 0.00011376025162791301, "loss": 1.3481, "step": 33192 }, { "epoch": 0.4313278961995832, "grad_norm": 0.43501991033554077, "learning_rate": 0.00011375765216600165, "loss": 1.4536, "step": 33193 }, { "epoch": 0.4313408907434991, "grad_norm": 0.41508761048316956, "learning_rate": 0.00011375505270409026, "loss": 1.1944, "step": 33194 }, { "epoch": 0.431353885287415, "grad_norm": 0.3628128468990326, "learning_rate": 0.00011375245324217887, "loss": 1.52, "step": 33195 }, { "epoch": 0.4313668798313308, "grad_norm": 0.42269468307495117, "learning_rate": 0.00011374985378026748, "loss": 1.1061, "step": 33196 }, { "epoch": 0.4313798743752467, "grad_norm": 0.4141942262649536, "learning_rate": 0.00011374725431835611, "loss": 1.4285, "step": 33197 }, { "epoch": 0.43139286891916256, "grad_norm": 0.4284278452396393, "learning_rate": 0.00011374465485644472, "loss": 1.384, "step": 33198 }, { "epoch": 0.43140586346307846, "grad_norm": 0.28185179829597473, "learning_rate": 0.00011374205539453333, "loss": 1.439, "step": 33199 }, { "epoch": 0.4314188580069943, "grad_norm": 0.39052891731262207, "learning_rate": 0.00011373945593262197, "loss": 1.3289, "step": 33200 }, { "epoch": 0.4314318525509102, "grad_norm": 0.3845687508583069, "learning_rate": 0.00011373685647071058, "loss": 1.2464, "step": 33201 }, { "epoch": 0.43144484709482606, "grad_norm": 0.28828588128089905, "learning_rate": 0.00011373425700879918, "loss": 1.2802, "step": 33202 }, { "epoch": 0.43145784163874196, "grad_norm": 0.4119218587875366, "learning_rate": 0.00011373165754688779, "loss": 1.3812, "step": 33203 }, { "epoch": 0.4314708361826578, "grad_norm": 0.3059438467025757, "learning_rate": 0.00011372905808497643, "loss": 1.7177, "step": 33204 }, { "epoch": 0.4314838307265737, "grad_norm": 0.4431051015853882, "learning_rate": 0.00011372645862306504, "loss": 1.5406, "step": 33205 }, { "epoch": 0.43149682527048955, "grad_norm": 0.4355873763561249, "learning_rate": 0.00011372385916115365, "loss": 1.3495, "step": 33206 }, { "epoch": 0.43150981981440545, "grad_norm": 0.3914143443107605, "learning_rate": 0.00011372125969924226, "loss": 1.3385, "step": 33207 }, { "epoch": 0.4315228143583213, "grad_norm": 0.4504346549510956, "learning_rate": 0.00011371866023733088, "loss": 1.5157, "step": 33208 }, { "epoch": 0.4315358089022372, "grad_norm": 0.32465875148773193, "learning_rate": 0.0001137160607754195, "loss": 1.2986, "step": 33209 }, { "epoch": 0.43154880344615304, "grad_norm": 0.4478144645690918, "learning_rate": 0.0001137134613135081, "loss": 1.4112, "step": 33210 }, { "epoch": 0.43156179799006894, "grad_norm": 0.4574632942676544, "learning_rate": 0.00011371086185159672, "loss": 1.2701, "step": 33211 }, { "epoch": 0.4315747925339848, "grad_norm": 0.3956300914287567, "learning_rate": 0.00011370826238968535, "loss": 1.3532, "step": 33212 }, { "epoch": 0.4315877870779007, "grad_norm": 0.29667261242866516, "learning_rate": 0.00011370566292777396, "loss": 1.2325, "step": 33213 }, { "epoch": 0.43160078162181653, "grad_norm": 0.4740503430366516, "learning_rate": 0.00011370306346586258, "loss": 1.5815, "step": 33214 }, { "epoch": 0.43161377616573243, "grad_norm": 0.4166916012763977, "learning_rate": 0.00011370046400395117, "loss": 1.3698, "step": 33215 }, { "epoch": 0.4316267707096483, "grad_norm": 0.32976484298706055, "learning_rate": 0.00011369786454203981, "loss": 1.277, "step": 33216 }, { "epoch": 0.4316397652535642, "grad_norm": 0.3904076814651489, "learning_rate": 0.00011369526508012842, "loss": 1.4539, "step": 33217 }, { "epoch": 0.43165275979748, "grad_norm": 0.42991331219673157, "learning_rate": 0.00011369266561821703, "loss": 1.5142, "step": 33218 }, { "epoch": 0.4316657543413959, "grad_norm": 0.43651294708251953, "learning_rate": 0.00011369006615630564, "loss": 1.3554, "step": 33219 }, { "epoch": 0.43167874888531177, "grad_norm": 0.3718758821487427, "learning_rate": 0.00011368746669439427, "loss": 1.4874, "step": 33220 }, { "epoch": 0.43169174342922767, "grad_norm": 0.41168835759162903, "learning_rate": 0.00011368486723248288, "loss": 1.3757, "step": 33221 }, { "epoch": 0.4317047379731435, "grad_norm": 0.39248085021972656, "learning_rate": 0.00011368226777057149, "loss": 1.4277, "step": 33222 }, { "epoch": 0.4317177325170594, "grad_norm": 0.34415584802627563, "learning_rate": 0.0001136796683086601, "loss": 1.2693, "step": 33223 }, { "epoch": 0.43173072706097526, "grad_norm": 0.3893500864505768, "learning_rate": 0.00011367706884674874, "loss": 1.3699, "step": 33224 }, { "epoch": 0.43174372160489116, "grad_norm": 0.46702802181243896, "learning_rate": 0.00011367446938483735, "loss": 1.3849, "step": 33225 }, { "epoch": 0.431756716148807, "grad_norm": 0.3870939016342163, "learning_rate": 0.00011367186992292596, "loss": 1.4418, "step": 33226 }, { "epoch": 0.4317697106927229, "grad_norm": 0.4282057583332062, "learning_rate": 0.00011366927046101457, "loss": 1.422, "step": 33227 }, { "epoch": 0.43178270523663875, "grad_norm": 0.3599450886249542, "learning_rate": 0.0001136666709991032, "loss": 1.2941, "step": 33228 }, { "epoch": 0.43179569978055465, "grad_norm": 0.39071422815322876, "learning_rate": 0.00011366407153719181, "loss": 1.2355, "step": 33229 }, { "epoch": 0.4318086943244705, "grad_norm": 0.4582749307155609, "learning_rate": 0.00011366147207528042, "loss": 1.4414, "step": 33230 }, { "epoch": 0.4318216888683864, "grad_norm": 0.45632022619247437, "learning_rate": 0.00011365887261336903, "loss": 1.432, "step": 33231 }, { "epoch": 0.43183468341230224, "grad_norm": 0.32240864634513855, "learning_rate": 0.00011365627315145765, "loss": 1.4783, "step": 33232 }, { "epoch": 0.43184767795621815, "grad_norm": 0.46361517906188965, "learning_rate": 0.00011365367368954626, "loss": 1.3672, "step": 33233 }, { "epoch": 0.431860672500134, "grad_norm": 0.4368123412132263, "learning_rate": 0.00011365107422763488, "loss": 1.2213, "step": 33234 }, { "epoch": 0.4318736670440499, "grad_norm": 0.4330582022666931, "learning_rate": 0.00011364847476572351, "loss": 1.4719, "step": 33235 }, { "epoch": 0.43188666158796574, "grad_norm": 0.35051003098487854, "learning_rate": 0.00011364587530381212, "loss": 1.3664, "step": 33236 }, { "epoch": 0.43189965613188164, "grad_norm": 0.42034879326820374, "learning_rate": 0.00011364327584190074, "loss": 1.406, "step": 33237 }, { "epoch": 0.4319126506757975, "grad_norm": 0.44087105989456177, "learning_rate": 0.00011364067637998935, "loss": 1.5003, "step": 33238 }, { "epoch": 0.4319256452197134, "grad_norm": 0.38869795203208923, "learning_rate": 0.00011363807691807797, "loss": 1.2656, "step": 33239 }, { "epoch": 0.43193863976362923, "grad_norm": 0.464563250541687, "learning_rate": 0.00011363547745616658, "loss": 1.4988, "step": 33240 }, { "epoch": 0.43195163430754513, "grad_norm": 0.38857123255729675, "learning_rate": 0.00011363287799425519, "loss": 1.3995, "step": 33241 }, { "epoch": 0.431964628851461, "grad_norm": 0.4506318271160126, "learning_rate": 0.0001136302785323438, "loss": 1.5083, "step": 33242 }, { "epoch": 0.4319776233953769, "grad_norm": 0.42720407247543335, "learning_rate": 0.00011362767907043244, "loss": 1.4089, "step": 33243 }, { "epoch": 0.4319906179392927, "grad_norm": 0.4419236183166504, "learning_rate": 0.00011362507960852104, "loss": 1.4772, "step": 33244 }, { "epoch": 0.4320036124832086, "grad_norm": 0.32073158025741577, "learning_rate": 0.00011362248014660965, "loss": 1.2382, "step": 33245 }, { "epoch": 0.43201660702712447, "grad_norm": 0.32058602571487427, "learning_rate": 0.00011361988068469826, "loss": 1.1853, "step": 33246 }, { "epoch": 0.43202960157104037, "grad_norm": 0.3077157735824585, "learning_rate": 0.0001136172812227869, "loss": 1.6345, "step": 33247 }, { "epoch": 0.4320425961149562, "grad_norm": 0.43998798727989197, "learning_rate": 0.00011361468176087551, "loss": 1.4264, "step": 33248 }, { "epoch": 0.4320555906588721, "grad_norm": 0.3312949240207672, "learning_rate": 0.00011361208229896412, "loss": 1.2769, "step": 33249 }, { "epoch": 0.43206858520278796, "grad_norm": 0.38544297218322754, "learning_rate": 0.00011360948283705273, "loss": 1.3399, "step": 33250 }, { "epoch": 0.43208157974670386, "grad_norm": 0.42414095997810364, "learning_rate": 0.00011360688337514136, "loss": 1.4023, "step": 33251 }, { "epoch": 0.4320945742906197, "grad_norm": 0.385626882314682, "learning_rate": 0.00011360428391322997, "loss": 1.1628, "step": 33252 }, { "epoch": 0.4321075688345356, "grad_norm": 0.5207545161247253, "learning_rate": 0.00011360168445131858, "loss": 1.4965, "step": 33253 }, { "epoch": 0.43212056337845145, "grad_norm": 0.27895423769950867, "learning_rate": 0.00011359908498940719, "loss": 1.0972, "step": 33254 }, { "epoch": 0.43213355792236735, "grad_norm": 0.4397144019603729, "learning_rate": 0.00011359648552749583, "loss": 1.379, "step": 33255 }, { "epoch": 0.4321465524662832, "grad_norm": 0.3078467845916748, "learning_rate": 0.00011359388606558444, "loss": 1.3298, "step": 33256 }, { "epoch": 0.4321595470101991, "grad_norm": 0.3218854069709778, "learning_rate": 0.00011359128660367304, "loss": 1.2533, "step": 33257 }, { "epoch": 0.43217254155411494, "grad_norm": 0.37579187750816345, "learning_rate": 0.00011358868714176165, "loss": 1.4169, "step": 33258 }, { "epoch": 0.43218553609803084, "grad_norm": 0.4179782271385193, "learning_rate": 0.00011358608767985028, "loss": 1.4597, "step": 33259 }, { "epoch": 0.4321985306419467, "grad_norm": 0.33286842703819275, "learning_rate": 0.0001135834882179389, "loss": 1.406, "step": 33260 }, { "epoch": 0.4322115251858626, "grad_norm": 0.41115471720695496, "learning_rate": 0.0001135808887560275, "loss": 1.5031, "step": 33261 }, { "epoch": 0.43222451972977843, "grad_norm": 0.3228372633457184, "learning_rate": 0.00011357828929411612, "loss": 1.5566, "step": 33262 }, { "epoch": 0.43223751427369433, "grad_norm": 0.37301644682884216, "learning_rate": 0.00011357568983220474, "loss": 1.4507, "step": 33263 }, { "epoch": 0.4322505088176102, "grad_norm": 0.3995104134082794, "learning_rate": 0.00011357309037029335, "loss": 1.5073, "step": 33264 }, { "epoch": 0.4322635033615261, "grad_norm": 0.45963793992996216, "learning_rate": 0.00011357049090838196, "loss": 1.6684, "step": 33265 }, { "epoch": 0.4322764979054419, "grad_norm": 0.44271236658096313, "learning_rate": 0.00011356789144647057, "loss": 1.3368, "step": 33266 }, { "epoch": 0.4322894924493578, "grad_norm": 0.4104548990726471, "learning_rate": 0.00011356529198455921, "loss": 1.3336, "step": 33267 }, { "epoch": 0.4323024869932737, "grad_norm": 0.3448936939239502, "learning_rate": 0.00011356269252264782, "loss": 1.1577, "step": 33268 }, { "epoch": 0.43231548153718957, "grad_norm": 0.4223630428314209, "learning_rate": 0.00011356009306073642, "loss": 1.5959, "step": 33269 }, { "epoch": 0.4323284760811055, "grad_norm": 0.40309464931488037, "learning_rate": 0.00011355749359882503, "loss": 1.2942, "step": 33270 }, { "epoch": 0.4323414706250213, "grad_norm": 0.3207789361476898, "learning_rate": 0.00011355489413691367, "loss": 1.3433, "step": 33271 }, { "epoch": 0.4323544651689372, "grad_norm": 0.42869865894317627, "learning_rate": 0.00011355229467500228, "loss": 1.3548, "step": 33272 }, { "epoch": 0.43236745971285306, "grad_norm": 0.4022025465965271, "learning_rate": 0.00011354969521309089, "loss": 1.5777, "step": 33273 }, { "epoch": 0.43238045425676896, "grad_norm": 0.401841938495636, "learning_rate": 0.00011354709575117952, "loss": 1.4223, "step": 33274 }, { "epoch": 0.4323934488006848, "grad_norm": 0.39523476362228394, "learning_rate": 0.00011354449628926813, "loss": 1.3229, "step": 33275 }, { "epoch": 0.4324064433446007, "grad_norm": 0.3510282337665558, "learning_rate": 0.00011354189682735674, "loss": 1.3569, "step": 33276 }, { "epoch": 0.43241943788851656, "grad_norm": 0.3840654790401459, "learning_rate": 0.00011353929736544535, "loss": 1.3249, "step": 33277 }, { "epoch": 0.43243243243243246, "grad_norm": 0.3250669538974762, "learning_rate": 0.00011353669790353399, "loss": 1.4034, "step": 33278 }, { "epoch": 0.4324454269763483, "grad_norm": 0.4078018069267273, "learning_rate": 0.0001135340984416226, "loss": 1.4428, "step": 33279 }, { "epoch": 0.4324584215202642, "grad_norm": 0.40031298995018005, "learning_rate": 0.00011353149897971121, "loss": 1.3446, "step": 33280 }, { "epoch": 0.43247141606418005, "grad_norm": 0.4504551887512207, "learning_rate": 0.00011352889951779982, "loss": 1.4756, "step": 33281 }, { "epoch": 0.43248441060809595, "grad_norm": 0.4024350345134735, "learning_rate": 0.00011352630005588844, "loss": 1.2388, "step": 33282 }, { "epoch": 0.4324974051520118, "grad_norm": 0.36275625228881836, "learning_rate": 0.00011352370059397706, "loss": 1.5586, "step": 33283 }, { "epoch": 0.4325103996959277, "grad_norm": 0.3074016273021698, "learning_rate": 0.00011352110113206567, "loss": 1.4634, "step": 33284 }, { "epoch": 0.43252339423984354, "grad_norm": 0.3277377486228943, "learning_rate": 0.00011351850167015428, "loss": 1.2472, "step": 33285 }, { "epoch": 0.43253638878375944, "grad_norm": 0.4185287654399872, "learning_rate": 0.0001135159022082429, "loss": 1.442, "step": 33286 }, { "epoch": 0.4325493833276753, "grad_norm": 0.37106043100357056, "learning_rate": 0.00011351330274633151, "loss": 1.4118, "step": 33287 }, { "epoch": 0.4325623778715912, "grad_norm": 0.40792375802993774, "learning_rate": 0.00011351070328442012, "loss": 1.5117, "step": 33288 }, { "epoch": 0.43257537241550703, "grad_norm": 0.3630428910255432, "learning_rate": 0.00011350810382250873, "loss": 1.4757, "step": 33289 }, { "epoch": 0.43258836695942293, "grad_norm": 0.3440118432044983, "learning_rate": 0.00011350550436059737, "loss": 1.2884, "step": 33290 }, { "epoch": 0.4326013615033388, "grad_norm": 0.3732414245605469, "learning_rate": 0.00011350290489868598, "loss": 1.3255, "step": 33291 }, { "epoch": 0.4326143560472547, "grad_norm": 0.4982890188694, "learning_rate": 0.0001135003054367746, "loss": 1.4479, "step": 33292 }, { "epoch": 0.4326273505911705, "grad_norm": 0.3514576554298401, "learning_rate": 0.0001134977059748632, "loss": 1.4575, "step": 33293 }, { "epoch": 0.4326403451350864, "grad_norm": 0.517388641834259, "learning_rate": 0.00011349510651295183, "loss": 1.3765, "step": 33294 }, { "epoch": 0.43265333967900227, "grad_norm": 0.47659197449684143, "learning_rate": 0.00011349250705104044, "loss": 1.4819, "step": 33295 }, { "epoch": 0.43266633422291817, "grad_norm": 0.3034079372882843, "learning_rate": 0.00011348990758912905, "loss": 1.1563, "step": 33296 }, { "epoch": 0.432679328766834, "grad_norm": 0.4922278821468353, "learning_rate": 0.00011348730812721766, "loss": 1.6657, "step": 33297 }, { "epoch": 0.4326923233107499, "grad_norm": 0.3886752426624298, "learning_rate": 0.0001134847086653063, "loss": 1.4479, "step": 33298 }, { "epoch": 0.43270531785466576, "grad_norm": 0.39772507548332214, "learning_rate": 0.0001134821092033949, "loss": 1.3902, "step": 33299 }, { "epoch": 0.43271831239858166, "grad_norm": 0.39935287833213806, "learning_rate": 0.00011347950974148351, "loss": 1.0775, "step": 33300 }, { "epoch": 0.4327313069424975, "grad_norm": 0.4971461594104767, "learning_rate": 0.00011347691027957212, "loss": 1.3956, "step": 33301 }, { "epoch": 0.4327443014864134, "grad_norm": 0.40562406182289124, "learning_rate": 0.00011347431081766076, "loss": 1.4968, "step": 33302 }, { "epoch": 0.43275729603032925, "grad_norm": 0.37555643916130066, "learning_rate": 0.00011347171135574937, "loss": 1.5903, "step": 33303 }, { "epoch": 0.43277029057424515, "grad_norm": 0.3111216425895691, "learning_rate": 0.00011346911189383798, "loss": 1.2038, "step": 33304 }, { "epoch": 0.432783285118161, "grad_norm": 0.4424316883087158, "learning_rate": 0.00011346651243192659, "loss": 1.4002, "step": 33305 }, { "epoch": 0.4327962796620769, "grad_norm": 0.3849847614765167, "learning_rate": 0.00011346391297001522, "loss": 1.3532, "step": 33306 }, { "epoch": 0.43280927420599274, "grad_norm": 0.473209947347641, "learning_rate": 0.00011346131350810383, "loss": 1.3683, "step": 33307 }, { "epoch": 0.43282226874990865, "grad_norm": 0.40562304854393005, "learning_rate": 0.00011345871404619244, "loss": 1.25, "step": 33308 }, { "epoch": 0.4328352632938245, "grad_norm": 0.44929763674736023, "learning_rate": 0.00011345611458428108, "loss": 1.3771, "step": 33309 }, { "epoch": 0.4328482578377404, "grad_norm": 0.4249218702316284, "learning_rate": 0.00011345351512236969, "loss": 1.5043, "step": 33310 }, { "epoch": 0.43286125238165624, "grad_norm": 0.47616755962371826, "learning_rate": 0.00011345091566045828, "loss": 1.4547, "step": 33311 }, { "epoch": 0.43287424692557214, "grad_norm": 0.368272989988327, "learning_rate": 0.0001134483161985469, "loss": 1.4776, "step": 33312 }, { "epoch": 0.432887241469488, "grad_norm": 0.31713753938674927, "learning_rate": 0.00011344571673663553, "loss": 1.2758, "step": 33313 }, { "epoch": 0.4329002360134039, "grad_norm": 0.3299190104007721, "learning_rate": 0.00011344311727472414, "loss": 1.4371, "step": 33314 }, { "epoch": 0.43291323055731973, "grad_norm": 0.4193507730960846, "learning_rate": 0.00011344051781281275, "loss": 1.4943, "step": 33315 }, { "epoch": 0.43292622510123563, "grad_norm": 0.36613646149635315, "learning_rate": 0.00011343791835090137, "loss": 1.3403, "step": 33316 }, { "epoch": 0.4329392196451515, "grad_norm": 0.3816149830818176, "learning_rate": 0.00011343531888898999, "loss": 1.3818, "step": 33317 }, { "epoch": 0.4329522141890674, "grad_norm": 0.31470587849617004, "learning_rate": 0.0001134327194270786, "loss": 1.3734, "step": 33318 }, { "epoch": 0.4329652087329832, "grad_norm": 0.3731381893157959, "learning_rate": 0.00011343011996516721, "loss": 1.3972, "step": 33319 }, { "epoch": 0.4329782032768991, "grad_norm": 0.4419451355934143, "learning_rate": 0.00011342752050325582, "loss": 1.5374, "step": 33320 }, { "epoch": 0.43299119782081497, "grad_norm": 0.5802931785583496, "learning_rate": 0.00011342492104134446, "loss": 1.5212, "step": 33321 }, { "epoch": 0.43300419236473087, "grad_norm": 0.45645299553871155, "learning_rate": 0.00011342232157943307, "loss": 1.4509, "step": 33322 }, { "epoch": 0.4330171869086467, "grad_norm": 0.4089113175868988, "learning_rate": 0.00011341972211752168, "loss": 1.4412, "step": 33323 }, { "epoch": 0.4330301814525626, "grad_norm": 0.4606892168521881, "learning_rate": 0.00011341712265561028, "loss": 1.5184, "step": 33324 }, { "epoch": 0.43304317599647846, "grad_norm": 0.4504558742046356, "learning_rate": 0.00011341452319369892, "loss": 1.3575, "step": 33325 }, { "epoch": 0.43305617054039436, "grad_norm": 0.29926320910453796, "learning_rate": 0.00011341192373178753, "loss": 1.4309, "step": 33326 }, { "epoch": 0.4330691650843102, "grad_norm": 0.45521196722984314, "learning_rate": 0.00011340932426987614, "loss": 1.6272, "step": 33327 }, { "epoch": 0.4330821596282261, "grad_norm": 0.4077664911746979, "learning_rate": 0.00011340672480796475, "loss": 1.2156, "step": 33328 }, { "epoch": 0.43309515417214195, "grad_norm": 0.3805752694606781, "learning_rate": 0.00011340412534605338, "loss": 1.3498, "step": 33329 }, { "epoch": 0.43310814871605785, "grad_norm": 0.4026910066604614, "learning_rate": 0.00011340152588414199, "loss": 1.4695, "step": 33330 }, { "epoch": 0.4331211432599737, "grad_norm": 0.3863276243209839, "learning_rate": 0.0001133989264222306, "loss": 1.4246, "step": 33331 }, { "epoch": 0.4331341378038896, "grad_norm": 0.43163633346557617, "learning_rate": 0.00011339632696031921, "loss": 1.3903, "step": 33332 }, { "epoch": 0.43314713234780544, "grad_norm": 0.3720497786998749, "learning_rate": 0.00011339372749840785, "loss": 1.3878, "step": 33333 }, { "epoch": 0.43316012689172134, "grad_norm": 0.45112699270248413, "learning_rate": 0.00011339112803649646, "loss": 1.3059, "step": 33334 }, { "epoch": 0.4331731214356372, "grad_norm": 0.3637961745262146, "learning_rate": 0.00011338852857458507, "loss": 1.2886, "step": 33335 }, { "epoch": 0.4331861159795531, "grad_norm": 0.4268046021461487, "learning_rate": 0.00011338592911267368, "loss": 1.3298, "step": 33336 }, { "epoch": 0.43319911052346893, "grad_norm": 0.5041301250457764, "learning_rate": 0.0001133833296507623, "loss": 1.48, "step": 33337 }, { "epoch": 0.43321210506738483, "grad_norm": 0.4450848698616028, "learning_rate": 0.00011338073018885091, "loss": 1.4316, "step": 33338 }, { "epoch": 0.4332250996113007, "grad_norm": 0.38326382637023926, "learning_rate": 0.00011337813072693952, "loss": 1.3024, "step": 33339 }, { "epoch": 0.4332380941552166, "grad_norm": 0.34570571780204773, "learning_rate": 0.00011337553126502814, "loss": 1.2976, "step": 33340 }, { "epoch": 0.4332510886991324, "grad_norm": 0.44194158911705017, "learning_rate": 0.00011337293180311676, "loss": 1.3505, "step": 33341 }, { "epoch": 0.4332640832430483, "grad_norm": 0.47158992290496826, "learning_rate": 0.00011337033234120537, "loss": 1.2014, "step": 33342 }, { "epoch": 0.43327707778696417, "grad_norm": 0.46597978472709656, "learning_rate": 0.00011336773287929398, "loss": 1.4681, "step": 33343 }, { "epoch": 0.43329007233088007, "grad_norm": 0.40148627758026123, "learning_rate": 0.00011336513341738259, "loss": 1.3262, "step": 33344 }, { "epoch": 0.433303066874796, "grad_norm": 0.37109822034835815, "learning_rate": 0.00011336253395547123, "loss": 1.4103, "step": 33345 }, { "epoch": 0.4333160614187118, "grad_norm": 0.40506136417388916, "learning_rate": 0.00011335993449355984, "loss": 1.2947, "step": 33346 }, { "epoch": 0.4333290559626277, "grad_norm": 0.48681625723838806, "learning_rate": 0.00011335733503164845, "loss": 1.4877, "step": 33347 }, { "epoch": 0.43334205050654356, "grad_norm": 0.4513232409954071, "learning_rate": 0.00011335473556973708, "loss": 1.5822, "step": 33348 }, { "epoch": 0.43335504505045946, "grad_norm": 0.34064745903015137, "learning_rate": 0.00011335213610782569, "loss": 1.3418, "step": 33349 }, { "epoch": 0.4333680395943753, "grad_norm": 0.39339929819107056, "learning_rate": 0.0001133495366459143, "loss": 1.2815, "step": 33350 }, { "epoch": 0.4333810341382912, "grad_norm": 0.4671693444252014, "learning_rate": 0.00011334693718400291, "loss": 1.5291, "step": 33351 }, { "epoch": 0.43339402868220706, "grad_norm": 0.3874957859516144, "learning_rate": 0.00011334433772209155, "loss": 1.4403, "step": 33352 }, { "epoch": 0.43340702322612296, "grad_norm": 0.39423203468322754, "learning_rate": 0.00011334173826018015, "loss": 1.2113, "step": 33353 }, { "epoch": 0.4334200177700388, "grad_norm": 0.31017324328422546, "learning_rate": 0.00011333913879826876, "loss": 1.4776, "step": 33354 }, { "epoch": 0.4334330123139547, "grad_norm": 0.4219195246696472, "learning_rate": 0.00011333653933635737, "loss": 1.4052, "step": 33355 }, { "epoch": 0.43344600685787055, "grad_norm": 0.26179036498069763, "learning_rate": 0.000113333939874446, "loss": 1.1457, "step": 33356 }, { "epoch": 0.43345900140178645, "grad_norm": 0.3898164629936218, "learning_rate": 0.00011333134041253462, "loss": 1.2837, "step": 33357 }, { "epoch": 0.4334719959457023, "grad_norm": 0.4827081263065338, "learning_rate": 0.00011332874095062323, "loss": 1.3817, "step": 33358 }, { "epoch": 0.4334849904896182, "grad_norm": 0.3659784495830536, "learning_rate": 0.00011332614148871184, "loss": 1.4893, "step": 33359 }, { "epoch": 0.43349798503353404, "grad_norm": 0.44223302602767944, "learning_rate": 0.00011332354202680046, "loss": 1.4722, "step": 33360 }, { "epoch": 0.43351097957744994, "grad_norm": 0.42899152636528015, "learning_rate": 0.00011332094256488907, "loss": 1.6069, "step": 33361 }, { "epoch": 0.4335239741213658, "grad_norm": 0.4769567847251892, "learning_rate": 0.00011331834310297768, "loss": 1.5355, "step": 33362 }, { "epoch": 0.4335369686652817, "grad_norm": 0.36739885807037354, "learning_rate": 0.0001133157436410663, "loss": 1.2098, "step": 33363 }, { "epoch": 0.43354996320919753, "grad_norm": 0.38264384865760803, "learning_rate": 0.00011331314417915493, "loss": 1.3599, "step": 33364 }, { "epoch": 0.43356295775311343, "grad_norm": 0.48409056663513184, "learning_rate": 0.00011331054471724354, "loss": 1.4006, "step": 33365 }, { "epoch": 0.4335759522970293, "grad_norm": 0.4064169228076935, "learning_rate": 0.00011330794525533214, "loss": 1.3664, "step": 33366 }, { "epoch": 0.4335889468409452, "grad_norm": 0.3536081314086914, "learning_rate": 0.00011330534579342075, "loss": 1.3164, "step": 33367 }, { "epoch": 0.433601941384861, "grad_norm": 0.3633044362068176, "learning_rate": 0.00011330274633150939, "loss": 1.3938, "step": 33368 }, { "epoch": 0.4336149359287769, "grad_norm": 0.39726391434669495, "learning_rate": 0.000113300146869598, "loss": 1.2013, "step": 33369 }, { "epoch": 0.43362793047269277, "grad_norm": 0.4329591393470764, "learning_rate": 0.00011329754740768661, "loss": 1.3728, "step": 33370 }, { "epoch": 0.43364092501660867, "grad_norm": 0.48028022050857544, "learning_rate": 0.00011329494794577522, "loss": 1.4975, "step": 33371 }, { "epoch": 0.4336539195605245, "grad_norm": 0.5066609382629395, "learning_rate": 0.00011329234848386385, "loss": 1.4266, "step": 33372 }, { "epoch": 0.4336669141044404, "grad_norm": 0.4257539212703705, "learning_rate": 0.00011328974902195246, "loss": 1.5235, "step": 33373 }, { "epoch": 0.43367990864835626, "grad_norm": 0.39514589309692383, "learning_rate": 0.00011328714956004107, "loss": 1.3934, "step": 33374 }, { "epoch": 0.43369290319227216, "grad_norm": 0.41533178091049194, "learning_rate": 0.00011328455009812968, "loss": 1.4558, "step": 33375 }, { "epoch": 0.433705897736188, "grad_norm": 0.3777025640010834, "learning_rate": 0.00011328195063621832, "loss": 1.3505, "step": 33376 }, { "epoch": 0.4337188922801039, "grad_norm": 0.48106345534324646, "learning_rate": 0.00011327935117430693, "loss": 1.5528, "step": 33377 }, { "epoch": 0.43373188682401975, "grad_norm": 0.41469866037368774, "learning_rate": 0.00011327675171239554, "loss": 1.2378, "step": 33378 }, { "epoch": 0.43374488136793565, "grad_norm": 0.37692227959632874, "learning_rate": 0.00011327415225048414, "loss": 1.4141, "step": 33379 }, { "epoch": 0.4337578759118515, "grad_norm": 0.4641399681568146, "learning_rate": 0.00011327155278857278, "loss": 1.4116, "step": 33380 }, { "epoch": 0.4337708704557674, "grad_norm": 0.22812844812870026, "learning_rate": 0.00011326895332666139, "loss": 1.2506, "step": 33381 }, { "epoch": 0.43378386499968324, "grad_norm": 0.31077444553375244, "learning_rate": 0.00011326635386475, "loss": 1.399, "step": 33382 }, { "epoch": 0.43379685954359914, "grad_norm": 0.4887525141239166, "learning_rate": 0.00011326375440283862, "loss": 1.4804, "step": 33383 }, { "epoch": 0.433809854087515, "grad_norm": 0.43618637323379517, "learning_rate": 0.00011326115494092723, "loss": 1.5407, "step": 33384 }, { "epoch": 0.4338228486314309, "grad_norm": 0.3965262472629547, "learning_rate": 0.00011325855547901584, "loss": 1.3818, "step": 33385 }, { "epoch": 0.43383584317534674, "grad_norm": 0.3204534351825714, "learning_rate": 0.00011325595601710446, "loss": 1.2097, "step": 33386 }, { "epoch": 0.43384883771926264, "grad_norm": 0.33079731464385986, "learning_rate": 0.0001132533565551931, "loss": 1.3774, "step": 33387 }, { "epoch": 0.4338618322631785, "grad_norm": 0.34844499826431274, "learning_rate": 0.0001132507570932817, "loss": 1.1496, "step": 33388 }, { "epoch": 0.4338748268070944, "grad_norm": 0.40162885189056396, "learning_rate": 0.00011324815763137032, "loss": 1.2784, "step": 33389 }, { "epoch": 0.4338878213510102, "grad_norm": 0.47484856843948364, "learning_rate": 0.00011324555816945893, "loss": 1.3759, "step": 33390 }, { "epoch": 0.43390081589492613, "grad_norm": 0.38232091069221497, "learning_rate": 0.00011324295870754755, "loss": 1.3834, "step": 33391 }, { "epoch": 0.433913810438842, "grad_norm": 0.4098871350288391, "learning_rate": 0.00011324035924563616, "loss": 1.3013, "step": 33392 }, { "epoch": 0.4339268049827579, "grad_norm": 0.44997867941856384, "learning_rate": 0.00011323775978372477, "loss": 1.4015, "step": 33393 }, { "epoch": 0.4339397995266737, "grad_norm": 0.40890321135520935, "learning_rate": 0.00011323516032181338, "loss": 1.5688, "step": 33394 }, { "epoch": 0.4339527940705896, "grad_norm": 0.4250136911869049, "learning_rate": 0.00011323256085990201, "loss": 1.407, "step": 33395 }, { "epoch": 0.43396578861450547, "grad_norm": 0.4520784020423889, "learning_rate": 0.00011322996139799062, "loss": 1.3771, "step": 33396 }, { "epoch": 0.43397878315842137, "grad_norm": 0.3191169500350952, "learning_rate": 0.00011322736193607923, "loss": 1.1592, "step": 33397 }, { "epoch": 0.4339917777023372, "grad_norm": 0.32721102237701416, "learning_rate": 0.00011322476247416784, "loss": 1.1837, "step": 33398 }, { "epoch": 0.4340047722462531, "grad_norm": 0.43327850103378296, "learning_rate": 0.00011322216301225648, "loss": 1.3023, "step": 33399 }, { "epoch": 0.43401776679016896, "grad_norm": 0.48186078667640686, "learning_rate": 0.00011321956355034509, "loss": 1.4413, "step": 33400 }, { "epoch": 0.43403076133408486, "grad_norm": 0.4613114595413208, "learning_rate": 0.0001132169640884337, "loss": 1.4906, "step": 33401 }, { "epoch": 0.4340437558780007, "grad_norm": 0.4332413375377655, "learning_rate": 0.00011321436462652231, "loss": 1.3221, "step": 33402 }, { "epoch": 0.4340567504219166, "grad_norm": 0.3787354826927185, "learning_rate": 0.00011321176516461094, "loss": 1.2435, "step": 33403 }, { "epoch": 0.43406974496583245, "grad_norm": 0.3831295073032379, "learning_rate": 0.00011320916570269955, "loss": 1.299, "step": 33404 }, { "epoch": 0.43408273950974835, "grad_norm": 0.38606923818588257, "learning_rate": 0.00011320656624078816, "loss": 1.537, "step": 33405 }, { "epoch": 0.4340957340536642, "grad_norm": 0.4439369738101959, "learning_rate": 0.00011320396677887677, "loss": 1.3845, "step": 33406 }, { "epoch": 0.4341087285975801, "grad_norm": 0.42838701605796814, "learning_rate": 0.00011320136731696541, "loss": 1.4798, "step": 33407 }, { "epoch": 0.43412172314149594, "grad_norm": 0.3909963369369507, "learning_rate": 0.000113198767855054, "loss": 1.2312, "step": 33408 }, { "epoch": 0.43413471768541184, "grad_norm": 0.407919704914093, "learning_rate": 0.00011319616839314262, "loss": 1.3801, "step": 33409 }, { "epoch": 0.4341477122293277, "grad_norm": 0.44364839792251587, "learning_rate": 0.00011319356893123123, "loss": 1.387, "step": 33410 }, { "epoch": 0.4341607067732436, "grad_norm": 0.4279322028160095, "learning_rate": 0.00011319096946931986, "loss": 1.3925, "step": 33411 }, { "epoch": 0.43417370131715943, "grad_norm": 0.4127967059612274, "learning_rate": 0.00011318837000740848, "loss": 1.3729, "step": 33412 }, { "epoch": 0.43418669586107533, "grad_norm": 0.47077178955078125, "learning_rate": 0.00011318577054549709, "loss": 1.464, "step": 33413 }, { "epoch": 0.4341996904049912, "grad_norm": 0.3866066038608551, "learning_rate": 0.0001131831710835857, "loss": 1.3356, "step": 33414 }, { "epoch": 0.4342126849489071, "grad_norm": 0.3365674614906311, "learning_rate": 0.00011318057162167432, "loss": 1.5151, "step": 33415 }, { "epoch": 0.4342256794928229, "grad_norm": 0.41441819071769714, "learning_rate": 0.00011317797215976293, "loss": 1.5264, "step": 33416 }, { "epoch": 0.4342386740367388, "grad_norm": 0.48482784628868103, "learning_rate": 0.00011317537269785154, "loss": 1.5655, "step": 33417 }, { "epoch": 0.43425166858065467, "grad_norm": 0.4661523997783661, "learning_rate": 0.00011317277323594015, "loss": 1.4614, "step": 33418 }, { "epoch": 0.43426466312457057, "grad_norm": 0.3824726343154907, "learning_rate": 0.00011317017377402879, "loss": 1.2984, "step": 33419 }, { "epoch": 0.43427765766848647, "grad_norm": 0.4088524580001831, "learning_rate": 0.0001131675743121174, "loss": 1.469, "step": 33420 }, { "epoch": 0.4342906522124023, "grad_norm": 0.4773986041545868, "learning_rate": 0.000113164974850206, "loss": 1.4669, "step": 33421 }, { "epoch": 0.4343036467563182, "grad_norm": 0.4116101861000061, "learning_rate": 0.00011316237538829464, "loss": 1.4242, "step": 33422 }, { "epoch": 0.43431664130023406, "grad_norm": 0.38664594292640686, "learning_rate": 0.00011315977592638325, "loss": 1.2727, "step": 33423 }, { "epoch": 0.43432963584414996, "grad_norm": 0.41536426544189453, "learning_rate": 0.00011315717646447186, "loss": 1.448, "step": 33424 }, { "epoch": 0.4343426303880658, "grad_norm": 0.34537506103515625, "learning_rate": 0.00011315457700256047, "loss": 1.4336, "step": 33425 }, { "epoch": 0.4343556249319817, "grad_norm": 0.41290032863616943, "learning_rate": 0.0001131519775406491, "loss": 1.6197, "step": 33426 }, { "epoch": 0.43436861947589755, "grad_norm": 0.45269858837127686, "learning_rate": 0.00011314937807873771, "loss": 1.3998, "step": 33427 }, { "epoch": 0.43438161401981346, "grad_norm": 0.435006320476532, "learning_rate": 0.00011314677861682632, "loss": 1.3104, "step": 33428 }, { "epoch": 0.4343946085637293, "grad_norm": 0.3992103040218353, "learning_rate": 0.00011314417915491493, "loss": 1.4176, "step": 33429 }, { "epoch": 0.4344076031076452, "grad_norm": 0.3852788209915161, "learning_rate": 0.00011314157969300357, "loss": 1.2839, "step": 33430 }, { "epoch": 0.43442059765156105, "grad_norm": 0.47402387857437134, "learning_rate": 0.00011313898023109218, "loss": 1.3537, "step": 33431 }, { "epoch": 0.43443359219547695, "grad_norm": 0.3946602940559387, "learning_rate": 0.00011313638076918079, "loss": 1.2797, "step": 33432 }, { "epoch": 0.4344465867393928, "grad_norm": 0.4738875925540924, "learning_rate": 0.0001131337813072694, "loss": 1.4255, "step": 33433 }, { "epoch": 0.4344595812833087, "grad_norm": 0.34690219163894653, "learning_rate": 0.00011313118184535802, "loss": 1.2765, "step": 33434 }, { "epoch": 0.43447257582722454, "grad_norm": 0.43955838680267334, "learning_rate": 0.00011312858238344664, "loss": 1.401, "step": 33435 }, { "epoch": 0.43448557037114044, "grad_norm": 0.4817427396774292, "learning_rate": 0.00011312598292153525, "loss": 1.5673, "step": 33436 }, { "epoch": 0.4344985649150563, "grad_norm": 0.3731817305088043, "learning_rate": 0.00011312338345962386, "loss": 1.3954, "step": 33437 }, { "epoch": 0.4345115594589722, "grad_norm": 0.4648199677467346, "learning_rate": 0.00011312078399771248, "loss": 1.4883, "step": 33438 }, { "epoch": 0.43452455400288803, "grad_norm": 0.40364593267440796, "learning_rate": 0.00011311818453580109, "loss": 1.2678, "step": 33439 }, { "epoch": 0.43453754854680393, "grad_norm": 0.43110036849975586, "learning_rate": 0.0001131155850738897, "loss": 1.534, "step": 33440 }, { "epoch": 0.4345505430907198, "grad_norm": 0.3076140880584717, "learning_rate": 0.00011311298561197831, "loss": 1.2552, "step": 33441 }, { "epoch": 0.4345635376346357, "grad_norm": 0.4156789779663086, "learning_rate": 0.00011311038615006695, "loss": 1.4134, "step": 33442 }, { "epoch": 0.4345765321785515, "grad_norm": 0.35401976108551025, "learning_rate": 0.00011310778668815556, "loss": 1.3956, "step": 33443 }, { "epoch": 0.4345895267224674, "grad_norm": 0.43652963638305664, "learning_rate": 0.00011310518722624417, "loss": 1.3795, "step": 33444 }, { "epoch": 0.43460252126638327, "grad_norm": 0.6564937233924866, "learning_rate": 0.00011310258776433279, "loss": 1.3569, "step": 33445 }, { "epoch": 0.43461551581029917, "grad_norm": 0.49574828147888184, "learning_rate": 0.00011309998830242141, "loss": 1.4067, "step": 33446 }, { "epoch": 0.434628510354215, "grad_norm": 0.3732493817806244, "learning_rate": 0.00011309738884051002, "loss": 1.3411, "step": 33447 }, { "epoch": 0.4346415048981309, "grad_norm": 0.41613703966140747, "learning_rate": 0.00011309478937859863, "loss": 1.4204, "step": 33448 }, { "epoch": 0.43465449944204676, "grad_norm": 0.35996541380882263, "learning_rate": 0.00011309218991668724, "loss": 1.4027, "step": 33449 }, { "epoch": 0.43466749398596266, "grad_norm": 0.3963604271411896, "learning_rate": 0.00011308959045477587, "loss": 1.3183, "step": 33450 }, { "epoch": 0.4346804885298785, "grad_norm": 0.46713197231292725, "learning_rate": 0.00011308699099286448, "loss": 1.5591, "step": 33451 }, { "epoch": 0.4346934830737944, "grad_norm": 0.4088074564933777, "learning_rate": 0.00011308439153095309, "loss": 1.3804, "step": 33452 }, { "epoch": 0.43470647761771025, "grad_norm": 0.4444734752178192, "learning_rate": 0.0001130817920690417, "loss": 1.2932, "step": 33453 }, { "epoch": 0.43471947216162615, "grad_norm": 0.4183602035045624, "learning_rate": 0.00011307919260713034, "loss": 1.3085, "step": 33454 }, { "epoch": 0.434732466705542, "grad_norm": 0.38002219796180725, "learning_rate": 0.00011307659314521895, "loss": 1.2622, "step": 33455 }, { "epoch": 0.4347454612494579, "grad_norm": 0.4457433819770813, "learning_rate": 0.00011307399368330756, "loss": 1.2834, "step": 33456 }, { "epoch": 0.43475845579337374, "grad_norm": 0.40768909454345703, "learning_rate": 0.00011307139422139618, "loss": 1.2711, "step": 33457 }, { "epoch": 0.43477145033728964, "grad_norm": 0.4220108091831207, "learning_rate": 0.0001130687947594848, "loss": 1.3668, "step": 33458 }, { "epoch": 0.4347844448812055, "grad_norm": 0.3860601484775543, "learning_rate": 0.0001130661952975734, "loss": 1.4977, "step": 33459 }, { "epoch": 0.4347974394251214, "grad_norm": 0.4006190299987793, "learning_rate": 0.00011306359583566202, "loss": 1.4083, "step": 33460 }, { "epoch": 0.43481043396903724, "grad_norm": 0.2855985164642334, "learning_rate": 0.00011306099637375065, "loss": 1.437, "step": 33461 }, { "epoch": 0.43482342851295314, "grad_norm": 0.47407209873199463, "learning_rate": 0.00011305839691183927, "loss": 1.5727, "step": 33462 }, { "epoch": 0.434836423056869, "grad_norm": 0.46544012427330017, "learning_rate": 0.00011305579744992786, "loss": 1.4005, "step": 33463 }, { "epoch": 0.4348494176007849, "grad_norm": 0.418704628944397, "learning_rate": 0.00011305319798801647, "loss": 1.3765, "step": 33464 }, { "epoch": 0.4348624121447007, "grad_norm": 0.3312271237373352, "learning_rate": 0.00011305059852610511, "loss": 1.1666, "step": 33465 }, { "epoch": 0.43487540668861663, "grad_norm": 0.34655091166496277, "learning_rate": 0.00011304799906419372, "loss": 1.1152, "step": 33466 }, { "epoch": 0.4348884012325325, "grad_norm": 0.4476008713245392, "learning_rate": 0.00011304539960228233, "loss": 1.5592, "step": 33467 }, { "epoch": 0.4349013957764484, "grad_norm": 0.4199071228504181, "learning_rate": 0.00011304280014037095, "loss": 1.2558, "step": 33468 }, { "epoch": 0.4349143903203642, "grad_norm": 0.42124468088150024, "learning_rate": 0.00011304020067845957, "loss": 1.3893, "step": 33469 }, { "epoch": 0.4349273848642801, "grad_norm": 0.42469683289527893, "learning_rate": 0.00011303760121654818, "loss": 1.2345, "step": 33470 }, { "epoch": 0.43494037940819597, "grad_norm": 0.4101039469242096, "learning_rate": 0.00011303500175463679, "loss": 1.5741, "step": 33471 }, { "epoch": 0.43495337395211187, "grad_norm": 0.45337605476379395, "learning_rate": 0.0001130324022927254, "loss": 1.5221, "step": 33472 }, { "epoch": 0.4349663684960277, "grad_norm": 0.49228453636169434, "learning_rate": 0.00011302980283081404, "loss": 1.3706, "step": 33473 }, { "epoch": 0.4349793630399436, "grad_norm": 0.3609216511249542, "learning_rate": 0.00011302720336890265, "loss": 1.4534, "step": 33474 }, { "epoch": 0.43499235758385946, "grad_norm": 0.3879944682121277, "learning_rate": 0.00011302460390699126, "loss": 1.3322, "step": 33475 }, { "epoch": 0.43500535212777536, "grad_norm": 0.4508839249610901, "learning_rate": 0.00011302200444507986, "loss": 1.2471, "step": 33476 }, { "epoch": 0.4350183466716912, "grad_norm": 0.4435296356678009, "learning_rate": 0.0001130194049831685, "loss": 1.2675, "step": 33477 }, { "epoch": 0.4350313412156071, "grad_norm": 0.4883398711681366, "learning_rate": 0.00011301680552125711, "loss": 1.4605, "step": 33478 }, { "epoch": 0.43504433575952295, "grad_norm": 0.4156615436077118, "learning_rate": 0.00011301420605934572, "loss": 1.2154, "step": 33479 }, { "epoch": 0.43505733030343885, "grad_norm": 0.45453551411628723, "learning_rate": 0.00011301160659743433, "loss": 1.2391, "step": 33480 }, { "epoch": 0.4350703248473547, "grad_norm": 0.33524975180625916, "learning_rate": 0.00011300900713552295, "loss": 1.3147, "step": 33481 }, { "epoch": 0.4350833193912706, "grad_norm": 0.3049945831298828, "learning_rate": 0.00011300640767361157, "loss": 1.4444, "step": 33482 }, { "epoch": 0.43509631393518644, "grad_norm": 0.40221959352493286, "learning_rate": 0.00011300380821170018, "loss": 1.4626, "step": 33483 }, { "epoch": 0.43510930847910234, "grad_norm": 0.3802822232246399, "learning_rate": 0.00011300120874978879, "loss": 1.3301, "step": 33484 }, { "epoch": 0.4351223030230182, "grad_norm": 0.39566537737846375, "learning_rate": 0.00011299860928787743, "loss": 1.3197, "step": 33485 }, { "epoch": 0.4351352975669341, "grad_norm": 0.33191347122192383, "learning_rate": 0.00011299600982596604, "loss": 1.3181, "step": 33486 }, { "epoch": 0.43514829211084993, "grad_norm": 0.3309651017189026, "learning_rate": 0.00011299341036405465, "loss": 1.5071, "step": 33487 }, { "epoch": 0.43516128665476583, "grad_norm": 0.33770081400871277, "learning_rate": 0.00011299081090214324, "loss": 1.3857, "step": 33488 }, { "epoch": 0.4351742811986817, "grad_norm": 0.4571974575519562, "learning_rate": 0.00011298821144023188, "loss": 1.3903, "step": 33489 }, { "epoch": 0.4351872757425976, "grad_norm": 0.406531423330307, "learning_rate": 0.0001129856119783205, "loss": 1.4535, "step": 33490 }, { "epoch": 0.4352002702865134, "grad_norm": 0.48849910497665405, "learning_rate": 0.0001129830125164091, "loss": 1.4285, "step": 33491 }, { "epoch": 0.4352132648304293, "grad_norm": 0.36898303031921387, "learning_rate": 0.00011298041305449772, "loss": 1.1151, "step": 33492 }, { "epoch": 0.43522625937434517, "grad_norm": 0.36265623569488525, "learning_rate": 0.00011297781359258634, "loss": 1.449, "step": 33493 }, { "epoch": 0.43523925391826107, "grad_norm": 0.40674179792404175, "learning_rate": 0.00011297521413067495, "loss": 1.2248, "step": 33494 }, { "epoch": 0.4352522484621769, "grad_norm": 0.46880534291267395, "learning_rate": 0.00011297261466876356, "loss": 1.4557, "step": 33495 }, { "epoch": 0.4352652430060928, "grad_norm": 0.3855893909931183, "learning_rate": 0.0001129700152068522, "loss": 1.2897, "step": 33496 }, { "epoch": 0.4352782375500087, "grad_norm": 0.3856970965862274, "learning_rate": 0.00011296741574494081, "loss": 1.3147, "step": 33497 }, { "epoch": 0.43529123209392456, "grad_norm": 0.4600497782230377, "learning_rate": 0.00011296481628302942, "loss": 1.643, "step": 33498 }, { "epoch": 0.43530422663784046, "grad_norm": 0.30839085578918457, "learning_rate": 0.00011296221682111803, "loss": 1.3637, "step": 33499 }, { "epoch": 0.4353172211817563, "grad_norm": 0.38388967514038086, "learning_rate": 0.00011295961735920666, "loss": 1.4948, "step": 33500 }, { "epoch": 0.4353302157256722, "grad_norm": 0.27634376287460327, "learning_rate": 0.00011295701789729527, "loss": 1.3606, "step": 33501 }, { "epoch": 0.43534321026958805, "grad_norm": 0.6087124943733215, "learning_rate": 0.00011295441843538388, "loss": 1.2832, "step": 33502 }, { "epoch": 0.43535620481350396, "grad_norm": 0.43419352173805237, "learning_rate": 0.00011295181897347249, "loss": 1.3046, "step": 33503 }, { "epoch": 0.4353691993574198, "grad_norm": 0.7712401747703552, "learning_rate": 0.00011294921951156113, "loss": 1.4573, "step": 33504 }, { "epoch": 0.4353821939013357, "grad_norm": 0.36036282777786255, "learning_rate": 0.00011294662004964973, "loss": 1.4464, "step": 33505 }, { "epoch": 0.43539518844525155, "grad_norm": 0.6549562811851501, "learning_rate": 0.00011294402058773834, "loss": 1.5476, "step": 33506 }, { "epoch": 0.43540818298916745, "grad_norm": 0.29075387120246887, "learning_rate": 0.00011294142112582695, "loss": 1.5202, "step": 33507 }, { "epoch": 0.4354211775330833, "grad_norm": 0.4662638306617737, "learning_rate": 0.00011293882166391559, "loss": 1.4401, "step": 33508 }, { "epoch": 0.4354341720769992, "grad_norm": 0.3522166609764099, "learning_rate": 0.0001129362222020042, "loss": 1.3283, "step": 33509 }, { "epoch": 0.43544716662091504, "grad_norm": 0.49034950137138367, "learning_rate": 0.00011293362274009281, "loss": 1.4127, "step": 33510 }, { "epoch": 0.43546016116483094, "grad_norm": 0.3325579762458801, "learning_rate": 0.00011293102327818142, "loss": 1.3642, "step": 33511 }, { "epoch": 0.4354731557087468, "grad_norm": 0.364096075296402, "learning_rate": 0.00011292842381627004, "loss": 1.4818, "step": 33512 }, { "epoch": 0.4354861502526627, "grad_norm": 0.42379266023635864, "learning_rate": 0.00011292582435435865, "loss": 1.4459, "step": 33513 }, { "epoch": 0.43549914479657853, "grad_norm": 0.4427761733531952, "learning_rate": 0.00011292322489244726, "loss": 1.5096, "step": 33514 }, { "epoch": 0.43551213934049443, "grad_norm": 0.4284376800060272, "learning_rate": 0.00011292062543053588, "loss": 1.5818, "step": 33515 }, { "epoch": 0.4355251338844103, "grad_norm": 0.4817059338092804, "learning_rate": 0.00011291802596862451, "loss": 1.3576, "step": 33516 }, { "epoch": 0.4355381284283262, "grad_norm": 0.37710994482040405, "learning_rate": 0.00011291542650671311, "loss": 1.4815, "step": 33517 }, { "epoch": 0.435551122972242, "grad_norm": 0.3414710462093353, "learning_rate": 0.00011291282704480172, "loss": 1.3065, "step": 33518 }, { "epoch": 0.4355641175161579, "grad_norm": 0.3931276798248291, "learning_rate": 0.00011291022758289033, "loss": 1.3274, "step": 33519 }, { "epoch": 0.43557711206007377, "grad_norm": 0.43264785408973694, "learning_rate": 0.00011290762812097897, "loss": 1.4062, "step": 33520 }, { "epoch": 0.43559010660398967, "grad_norm": 0.3304761052131653, "learning_rate": 0.00011290502865906758, "loss": 1.1919, "step": 33521 }, { "epoch": 0.4356031011479055, "grad_norm": 0.38635075092315674, "learning_rate": 0.00011290242919715619, "loss": 1.365, "step": 33522 }, { "epoch": 0.4356160956918214, "grad_norm": 0.3684718906879425, "learning_rate": 0.0001128998297352448, "loss": 1.594, "step": 33523 }, { "epoch": 0.43562909023573726, "grad_norm": 0.416511207818985, "learning_rate": 0.00011289723027333343, "loss": 1.442, "step": 33524 }, { "epoch": 0.43564208477965316, "grad_norm": 0.4223576784133911, "learning_rate": 0.00011289463081142204, "loss": 1.578, "step": 33525 }, { "epoch": 0.435655079323569, "grad_norm": 0.365560382604599, "learning_rate": 0.00011289203134951065, "loss": 1.2924, "step": 33526 }, { "epoch": 0.4356680738674849, "grad_norm": 0.36633387207984924, "learning_rate": 0.00011288943188759926, "loss": 1.2276, "step": 33527 }, { "epoch": 0.43568106841140075, "grad_norm": 0.4006326496601105, "learning_rate": 0.0001128868324256879, "loss": 1.34, "step": 33528 }, { "epoch": 0.43569406295531665, "grad_norm": 0.45010441541671753, "learning_rate": 0.00011288423296377651, "loss": 1.3851, "step": 33529 }, { "epoch": 0.4357070574992325, "grad_norm": 0.3831198215484619, "learning_rate": 0.00011288163350186511, "loss": 1.3443, "step": 33530 }, { "epoch": 0.4357200520431484, "grad_norm": 0.4434194564819336, "learning_rate": 0.00011287903403995375, "loss": 1.5173, "step": 33531 }, { "epoch": 0.43573304658706424, "grad_norm": 0.38336092233657837, "learning_rate": 0.00011287643457804236, "loss": 1.5005, "step": 33532 }, { "epoch": 0.43574604113098014, "grad_norm": 0.32698726654052734, "learning_rate": 0.00011287383511613097, "loss": 1.3211, "step": 33533 }, { "epoch": 0.435759035674896, "grad_norm": 0.4255834221839905, "learning_rate": 0.00011287123565421958, "loss": 1.4003, "step": 33534 }, { "epoch": 0.4357720302188119, "grad_norm": 0.42821648716926575, "learning_rate": 0.0001128686361923082, "loss": 1.3387, "step": 33535 }, { "epoch": 0.43578502476272774, "grad_norm": 0.4874619245529175, "learning_rate": 0.00011286603673039681, "loss": 1.3532, "step": 33536 }, { "epoch": 0.43579801930664364, "grad_norm": 0.4532897472381592, "learning_rate": 0.00011286343726848542, "loss": 1.3817, "step": 33537 }, { "epoch": 0.4358110138505595, "grad_norm": 0.4371196925640106, "learning_rate": 0.00011286083780657404, "loss": 1.528, "step": 33538 }, { "epoch": 0.4358240083944754, "grad_norm": 0.3872183561325073, "learning_rate": 0.00011285823834466267, "loss": 1.4688, "step": 33539 }, { "epoch": 0.4358370029383912, "grad_norm": 0.3467790186405182, "learning_rate": 0.00011285563888275128, "loss": 1.4403, "step": 33540 }, { "epoch": 0.4358499974823071, "grad_norm": 0.3333219885826111, "learning_rate": 0.0001128530394208399, "loss": 1.3376, "step": 33541 }, { "epoch": 0.435862992026223, "grad_norm": 0.3824302852153778, "learning_rate": 0.0001128504399589285, "loss": 1.3371, "step": 33542 }, { "epoch": 0.4358759865701389, "grad_norm": 0.39576634764671326, "learning_rate": 0.00011284784049701713, "loss": 1.5762, "step": 33543 }, { "epoch": 0.4358889811140547, "grad_norm": 0.3787519931793213, "learning_rate": 0.00011284524103510574, "loss": 1.3393, "step": 33544 }, { "epoch": 0.4359019756579706, "grad_norm": 0.36629825830459595, "learning_rate": 0.00011284264157319435, "loss": 1.3414, "step": 33545 }, { "epoch": 0.43591497020188646, "grad_norm": 0.4001936614513397, "learning_rate": 0.00011284004211128296, "loss": 1.5644, "step": 33546 }, { "epoch": 0.43592796474580237, "grad_norm": 0.4340262711048126, "learning_rate": 0.00011283744264937159, "loss": 1.3626, "step": 33547 }, { "epoch": 0.4359409592897182, "grad_norm": 0.4547877609729767, "learning_rate": 0.0001128348431874602, "loss": 1.3608, "step": 33548 }, { "epoch": 0.4359539538336341, "grad_norm": 0.38235533237457275, "learning_rate": 0.00011283224372554881, "loss": 1.3387, "step": 33549 }, { "epoch": 0.43596694837754996, "grad_norm": 0.2960379123687744, "learning_rate": 0.00011282964426363742, "loss": 1.1271, "step": 33550 }, { "epoch": 0.43597994292146586, "grad_norm": 0.39158883690834045, "learning_rate": 0.00011282704480172606, "loss": 1.5222, "step": 33551 }, { "epoch": 0.4359929374653817, "grad_norm": 0.3735707700252533, "learning_rate": 0.00011282444533981467, "loss": 1.4341, "step": 33552 }, { "epoch": 0.4360059320092976, "grad_norm": 0.40538403391838074, "learning_rate": 0.00011282184587790328, "loss": 1.38, "step": 33553 }, { "epoch": 0.43601892655321345, "grad_norm": 0.39453834295272827, "learning_rate": 0.00011281924641599189, "loss": 1.5268, "step": 33554 }, { "epoch": 0.43603192109712935, "grad_norm": 0.36798128485679626, "learning_rate": 0.00011281664695408052, "loss": 1.1918, "step": 33555 }, { "epoch": 0.4360449156410452, "grad_norm": 0.4932467043399811, "learning_rate": 0.00011281404749216913, "loss": 1.6268, "step": 33556 }, { "epoch": 0.4360579101849611, "grad_norm": 0.532631516456604, "learning_rate": 0.00011281144803025774, "loss": 1.6405, "step": 33557 }, { "epoch": 0.43607090472887694, "grad_norm": 0.40453746914863586, "learning_rate": 0.00011280884856834635, "loss": 1.2374, "step": 33558 }, { "epoch": 0.43608389927279284, "grad_norm": 0.5885547399520874, "learning_rate": 0.00011280624910643497, "loss": 1.5621, "step": 33559 }, { "epoch": 0.4360968938167087, "grad_norm": 0.4040106236934662, "learning_rate": 0.00011280364964452358, "loss": 1.4886, "step": 33560 }, { "epoch": 0.4361098883606246, "grad_norm": 0.4366855323314667, "learning_rate": 0.0001128010501826122, "loss": 1.4403, "step": 33561 }, { "epoch": 0.43612288290454043, "grad_norm": 0.29536354541778564, "learning_rate": 0.0001127984507207008, "loss": 1.2681, "step": 33562 }, { "epoch": 0.43613587744845633, "grad_norm": 0.42942678928375244, "learning_rate": 0.00011279585125878944, "loss": 1.5517, "step": 33563 }, { "epoch": 0.4361488719923722, "grad_norm": 0.30667543411254883, "learning_rate": 0.00011279325179687806, "loss": 1.3038, "step": 33564 }, { "epoch": 0.4361618665362881, "grad_norm": 0.4050809144973755, "learning_rate": 0.00011279065233496667, "loss": 1.3096, "step": 33565 }, { "epoch": 0.4361748610802039, "grad_norm": 0.38385143876075745, "learning_rate": 0.00011278805287305528, "loss": 1.2196, "step": 33566 }, { "epoch": 0.4361878556241198, "grad_norm": 0.4730268120765686, "learning_rate": 0.0001127854534111439, "loss": 1.1984, "step": 33567 }, { "epoch": 0.43620085016803567, "grad_norm": 0.45110154151916504, "learning_rate": 0.00011278285394923251, "loss": 1.5623, "step": 33568 }, { "epoch": 0.43621384471195157, "grad_norm": 0.2870447635650635, "learning_rate": 0.00011278025448732112, "loss": 1.0624, "step": 33569 }, { "epoch": 0.4362268392558674, "grad_norm": 0.3653133511543274, "learning_rate": 0.00011277765502540976, "loss": 1.3821, "step": 33570 }, { "epoch": 0.4362398337997833, "grad_norm": 0.37583813071250916, "learning_rate": 0.00011277505556349837, "loss": 1.3205, "step": 33571 }, { "epoch": 0.43625282834369916, "grad_norm": 0.4050169587135315, "learning_rate": 0.00011277245610158697, "loss": 1.5281, "step": 33572 }, { "epoch": 0.43626582288761506, "grad_norm": 0.4745836853981018, "learning_rate": 0.00011276985663967558, "loss": 1.4027, "step": 33573 }, { "epoch": 0.43627881743153096, "grad_norm": 0.34038689732551575, "learning_rate": 0.00011276725717776422, "loss": 1.1926, "step": 33574 }, { "epoch": 0.4362918119754468, "grad_norm": 0.36607515811920166, "learning_rate": 0.00011276465771585283, "loss": 1.3093, "step": 33575 }, { "epoch": 0.4363048065193627, "grad_norm": 0.416373610496521, "learning_rate": 0.00011276205825394144, "loss": 1.3037, "step": 33576 }, { "epoch": 0.43631780106327855, "grad_norm": 0.4066951274871826, "learning_rate": 0.00011275945879203005, "loss": 1.1177, "step": 33577 }, { "epoch": 0.43633079560719445, "grad_norm": 0.3136371076107025, "learning_rate": 0.00011275685933011868, "loss": 1.3289, "step": 33578 }, { "epoch": 0.4363437901511103, "grad_norm": 0.4498012065887451, "learning_rate": 0.00011275425986820729, "loss": 1.3866, "step": 33579 }, { "epoch": 0.4363567846950262, "grad_norm": 0.3883761763572693, "learning_rate": 0.0001127516604062959, "loss": 1.4278, "step": 33580 }, { "epoch": 0.43636977923894205, "grad_norm": 0.4388565123081207, "learning_rate": 0.00011274906094438451, "loss": 1.4257, "step": 33581 }, { "epoch": 0.43638277378285795, "grad_norm": 0.4390762150287628, "learning_rate": 0.00011274646148247315, "loss": 1.3223, "step": 33582 }, { "epoch": 0.4363957683267738, "grad_norm": 0.3192100524902344, "learning_rate": 0.00011274386202056176, "loss": 1.5773, "step": 33583 }, { "epoch": 0.4364087628706897, "grad_norm": 0.3366519510746002, "learning_rate": 0.00011274126255865037, "loss": 1.421, "step": 33584 }, { "epoch": 0.43642175741460554, "grad_norm": 0.34497714042663574, "learning_rate": 0.00011273866309673897, "loss": 1.2098, "step": 33585 }, { "epoch": 0.43643475195852144, "grad_norm": 0.42119815945625305, "learning_rate": 0.0001127360636348276, "loss": 1.2516, "step": 33586 }, { "epoch": 0.4364477465024373, "grad_norm": 0.42016011476516724, "learning_rate": 0.00011273346417291622, "loss": 1.3852, "step": 33587 }, { "epoch": 0.4364607410463532, "grad_norm": 0.373702734708786, "learning_rate": 0.00011273086471100483, "loss": 1.5105, "step": 33588 }, { "epoch": 0.43647373559026903, "grad_norm": 0.40728864073753357, "learning_rate": 0.00011272826524909344, "loss": 1.7561, "step": 33589 }, { "epoch": 0.43648673013418493, "grad_norm": 0.31583356857299805, "learning_rate": 0.00011272566578718206, "loss": 1.2995, "step": 33590 }, { "epoch": 0.4364997246781008, "grad_norm": 0.37355634570121765, "learning_rate": 0.00011272306632527067, "loss": 1.3015, "step": 33591 }, { "epoch": 0.4365127192220167, "grad_norm": 0.37475037574768066, "learning_rate": 0.00011272046686335928, "loss": 1.2336, "step": 33592 }, { "epoch": 0.4365257137659325, "grad_norm": 0.4435719847679138, "learning_rate": 0.0001127178674014479, "loss": 1.4132, "step": 33593 }, { "epoch": 0.4365387083098484, "grad_norm": 0.3804897665977478, "learning_rate": 0.00011271526793953653, "loss": 1.4095, "step": 33594 }, { "epoch": 0.43655170285376427, "grad_norm": 0.2668040990829468, "learning_rate": 0.00011271266847762514, "loss": 1.1922, "step": 33595 }, { "epoch": 0.43656469739768017, "grad_norm": 0.3474498391151428, "learning_rate": 0.00011271006901571375, "loss": 1.4769, "step": 33596 }, { "epoch": 0.436577691941596, "grad_norm": 0.34615659713745117, "learning_rate": 0.00011270746955380237, "loss": 1.2778, "step": 33597 }, { "epoch": 0.4365906864855119, "grad_norm": 0.4304315745830536, "learning_rate": 0.00011270487009189099, "loss": 1.5868, "step": 33598 }, { "epoch": 0.43660368102942776, "grad_norm": 0.3263946771621704, "learning_rate": 0.0001127022706299796, "loss": 1.2583, "step": 33599 }, { "epoch": 0.43661667557334366, "grad_norm": 0.36526963114738464, "learning_rate": 0.00011269967116806821, "loss": 1.3842, "step": 33600 }, { "epoch": 0.4366296701172595, "grad_norm": 0.44879162311553955, "learning_rate": 0.00011269707170615682, "loss": 1.3323, "step": 33601 }, { "epoch": 0.4366426646611754, "grad_norm": 0.2959417700767517, "learning_rate": 0.00011269447224424545, "loss": 1.4114, "step": 33602 }, { "epoch": 0.43665565920509125, "grad_norm": 0.4669698178768158, "learning_rate": 0.00011269187278233406, "loss": 1.5505, "step": 33603 }, { "epoch": 0.43666865374900715, "grad_norm": 0.3637484312057495, "learning_rate": 0.00011268927332042267, "loss": 1.3649, "step": 33604 }, { "epoch": 0.436681648292923, "grad_norm": 0.3010956048965454, "learning_rate": 0.0001126866738585113, "loss": 1.1147, "step": 33605 }, { "epoch": 0.4366946428368389, "grad_norm": 0.4042312502861023, "learning_rate": 0.00011268407439659992, "loss": 1.2878, "step": 33606 }, { "epoch": 0.43670763738075474, "grad_norm": 0.35928875207901, "learning_rate": 0.00011268147493468853, "loss": 1.3874, "step": 33607 }, { "epoch": 0.43672063192467064, "grad_norm": 0.45363011956214905, "learning_rate": 0.00011267887547277714, "loss": 1.4329, "step": 33608 }, { "epoch": 0.4367336264685865, "grad_norm": 0.5129709839820862, "learning_rate": 0.00011267627601086576, "loss": 1.3038, "step": 33609 }, { "epoch": 0.4367466210125024, "grad_norm": 0.5133653879165649, "learning_rate": 0.00011267367654895437, "loss": 1.4084, "step": 33610 }, { "epoch": 0.43675961555641823, "grad_norm": 0.3572450280189514, "learning_rate": 0.00011267107708704299, "loss": 1.4602, "step": 33611 }, { "epoch": 0.43677261010033414, "grad_norm": 0.43474170565605164, "learning_rate": 0.0001126684776251316, "loss": 1.5156, "step": 33612 }, { "epoch": 0.43678560464425, "grad_norm": 0.4533253312110901, "learning_rate": 0.00011266587816322023, "loss": 1.4582, "step": 33613 }, { "epoch": 0.4367985991881659, "grad_norm": 0.4947125017642975, "learning_rate": 0.00011266327870130883, "loss": 1.3736, "step": 33614 }, { "epoch": 0.4368115937320817, "grad_norm": 0.3331683874130249, "learning_rate": 0.00011266067923939744, "loss": 1.2773, "step": 33615 }, { "epoch": 0.4368245882759976, "grad_norm": 0.4979170560836792, "learning_rate": 0.00011265807977748605, "loss": 1.4752, "step": 33616 }, { "epoch": 0.4368375828199135, "grad_norm": 0.3768923878669739, "learning_rate": 0.00011265548031557469, "loss": 1.203, "step": 33617 }, { "epoch": 0.4368505773638294, "grad_norm": 0.43261897563934326, "learning_rate": 0.0001126528808536633, "loss": 1.4611, "step": 33618 }, { "epoch": 0.4368635719077452, "grad_norm": 0.3522312045097351, "learning_rate": 0.00011265028139175191, "loss": 1.2645, "step": 33619 }, { "epoch": 0.4368765664516611, "grad_norm": 0.38643181324005127, "learning_rate": 0.00011264768192984052, "loss": 1.4573, "step": 33620 }, { "epoch": 0.43688956099557696, "grad_norm": 0.4221051037311554, "learning_rate": 0.00011264508246792915, "loss": 1.5097, "step": 33621 }, { "epoch": 0.43690255553949287, "grad_norm": 0.35679060220718384, "learning_rate": 0.00011264248300601776, "loss": 1.4753, "step": 33622 }, { "epoch": 0.4369155500834087, "grad_norm": 0.3596331775188446, "learning_rate": 0.00011263988354410637, "loss": 1.4367, "step": 33623 }, { "epoch": 0.4369285446273246, "grad_norm": 0.3658272922039032, "learning_rate": 0.00011263728408219498, "loss": 1.3374, "step": 33624 }, { "epoch": 0.43694153917124046, "grad_norm": 0.3894549310207367, "learning_rate": 0.00011263468462028362, "loss": 1.4432, "step": 33625 }, { "epoch": 0.43695453371515636, "grad_norm": 0.3065608739852905, "learning_rate": 0.00011263208515837223, "loss": 1.1178, "step": 33626 }, { "epoch": 0.4369675282590722, "grad_norm": 0.48100993037223816, "learning_rate": 0.00011262948569646083, "loss": 1.7989, "step": 33627 }, { "epoch": 0.4369805228029881, "grad_norm": 0.4452144205570221, "learning_rate": 0.00011262688623454944, "loss": 1.2811, "step": 33628 }, { "epoch": 0.43699351734690395, "grad_norm": 0.6012799739837646, "learning_rate": 0.00011262428677263808, "loss": 1.6313, "step": 33629 }, { "epoch": 0.43700651189081985, "grad_norm": 0.3175499737262726, "learning_rate": 0.00011262168731072669, "loss": 1.4229, "step": 33630 }, { "epoch": 0.4370195064347357, "grad_norm": 0.3955228328704834, "learning_rate": 0.0001126190878488153, "loss": 1.5599, "step": 33631 }, { "epoch": 0.4370325009786516, "grad_norm": 0.3448098599910736, "learning_rate": 0.00011261648838690391, "loss": 1.2207, "step": 33632 }, { "epoch": 0.43704549552256744, "grad_norm": 0.30379921197891235, "learning_rate": 0.00011261388892499253, "loss": 1.6611, "step": 33633 }, { "epoch": 0.43705849006648334, "grad_norm": 0.45026421546936035, "learning_rate": 0.00011261128946308115, "loss": 1.4578, "step": 33634 }, { "epoch": 0.4370714846103992, "grad_norm": 0.38558822870254517, "learning_rate": 0.00011260869000116976, "loss": 1.418, "step": 33635 }, { "epoch": 0.4370844791543151, "grad_norm": 0.5054637789726257, "learning_rate": 0.00011260609053925837, "loss": 1.5613, "step": 33636 }, { "epoch": 0.43709747369823093, "grad_norm": 0.3358413279056549, "learning_rate": 0.000112603491077347, "loss": 1.3445, "step": 33637 }, { "epoch": 0.43711046824214683, "grad_norm": 0.386957049369812, "learning_rate": 0.00011260089161543562, "loss": 1.159, "step": 33638 }, { "epoch": 0.4371234627860627, "grad_norm": 0.33620503544807434, "learning_rate": 0.00011259829215352423, "loss": 1.4737, "step": 33639 }, { "epoch": 0.4371364573299786, "grad_norm": 0.3374916613101959, "learning_rate": 0.00011259569269161282, "loss": 1.3144, "step": 33640 }, { "epoch": 0.4371494518738944, "grad_norm": 0.3896370232105255, "learning_rate": 0.00011259309322970146, "loss": 1.2875, "step": 33641 }, { "epoch": 0.4371624464178103, "grad_norm": 0.4411768615245819, "learning_rate": 0.00011259049376779007, "loss": 1.372, "step": 33642 }, { "epoch": 0.43717544096172617, "grad_norm": 0.520294725894928, "learning_rate": 0.00011258789430587868, "loss": 1.4626, "step": 33643 }, { "epoch": 0.43718843550564207, "grad_norm": 0.3707142174243927, "learning_rate": 0.00011258529484396731, "loss": 1.3581, "step": 33644 }, { "epoch": 0.4372014300495579, "grad_norm": 0.44626373052597046, "learning_rate": 0.00011258269538205592, "loss": 1.4722, "step": 33645 }, { "epoch": 0.4372144245934738, "grad_norm": 0.3588748276233673, "learning_rate": 0.00011258009592014453, "loss": 1.5229, "step": 33646 }, { "epoch": 0.43722741913738966, "grad_norm": 0.3996641933917999, "learning_rate": 0.00011257749645823314, "loss": 1.3164, "step": 33647 }, { "epoch": 0.43724041368130556, "grad_norm": 0.356577605009079, "learning_rate": 0.00011257489699632178, "loss": 1.4843, "step": 33648 }, { "epoch": 0.43725340822522146, "grad_norm": 0.4103662669658661, "learning_rate": 0.00011257229753441039, "loss": 1.5392, "step": 33649 }, { "epoch": 0.4372664027691373, "grad_norm": 0.384285032749176, "learning_rate": 0.000112569698072499, "loss": 1.4368, "step": 33650 }, { "epoch": 0.4372793973130532, "grad_norm": 0.4841556251049042, "learning_rate": 0.00011256709861058761, "loss": 1.6611, "step": 33651 }, { "epoch": 0.43729239185696905, "grad_norm": 0.3750399053096771, "learning_rate": 0.00011256449914867624, "loss": 1.4013, "step": 33652 }, { "epoch": 0.43730538640088495, "grad_norm": 0.38729211688041687, "learning_rate": 0.00011256189968676485, "loss": 1.2715, "step": 33653 }, { "epoch": 0.4373183809448008, "grad_norm": 0.43664470314979553, "learning_rate": 0.00011255930022485346, "loss": 1.532, "step": 33654 }, { "epoch": 0.4373313754887167, "grad_norm": 0.41050025820732117, "learning_rate": 0.00011255670076294207, "loss": 1.3998, "step": 33655 }, { "epoch": 0.43734437003263255, "grad_norm": 0.4281301200389862, "learning_rate": 0.0001125541013010307, "loss": 1.279, "step": 33656 }, { "epoch": 0.43735736457654845, "grad_norm": 0.3945081830024719, "learning_rate": 0.0001125515018391193, "loss": 1.4622, "step": 33657 }, { "epoch": 0.4373703591204643, "grad_norm": 0.42187169194221497, "learning_rate": 0.00011254890237720792, "loss": 1.3252, "step": 33658 }, { "epoch": 0.4373833536643802, "grad_norm": 0.32891568541526794, "learning_rate": 0.00011254630291529653, "loss": 1.3998, "step": 33659 }, { "epoch": 0.43739634820829604, "grad_norm": 0.3880465030670166, "learning_rate": 0.00011254370345338517, "loss": 1.3065, "step": 33660 }, { "epoch": 0.43740934275221194, "grad_norm": 0.4180728495121002, "learning_rate": 0.00011254110399147378, "loss": 1.5468, "step": 33661 }, { "epoch": 0.4374223372961278, "grad_norm": 0.4271094799041748, "learning_rate": 0.00011253850452956239, "loss": 1.4774, "step": 33662 }, { "epoch": 0.4374353318400437, "grad_norm": 0.4075973331928253, "learning_rate": 0.000112535905067651, "loss": 1.4235, "step": 33663 }, { "epoch": 0.43744832638395953, "grad_norm": 0.3313482105731964, "learning_rate": 0.00011253330560573962, "loss": 1.2185, "step": 33664 }, { "epoch": 0.43746132092787543, "grad_norm": 0.3519590198993683, "learning_rate": 0.00011253070614382823, "loss": 1.4339, "step": 33665 }, { "epoch": 0.4374743154717913, "grad_norm": 0.4447045922279358, "learning_rate": 0.00011252810668191684, "loss": 1.4901, "step": 33666 }, { "epoch": 0.4374873100157072, "grad_norm": 0.4138498604297638, "learning_rate": 0.00011252550722000546, "loss": 1.3637, "step": 33667 }, { "epoch": 0.437500304559623, "grad_norm": 0.42780208587646484, "learning_rate": 0.0001125229077580941, "loss": 1.3771, "step": 33668 }, { "epoch": 0.4375132991035389, "grad_norm": 0.38943788409233093, "learning_rate": 0.00011252030829618269, "loss": 1.421, "step": 33669 }, { "epoch": 0.43752629364745477, "grad_norm": 0.42851778864860535, "learning_rate": 0.0001125177088342713, "loss": 1.3423, "step": 33670 }, { "epoch": 0.43753928819137067, "grad_norm": 0.41370466351509094, "learning_rate": 0.00011251510937235991, "loss": 1.2065, "step": 33671 }, { "epoch": 0.4375522827352865, "grad_norm": 0.5071435570716858, "learning_rate": 0.00011251250991044855, "loss": 1.5005, "step": 33672 }, { "epoch": 0.4375652772792024, "grad_norm": 0.38066232204437256, "learning_rate": 0.00011250991044853716, "loss": 1.4508, "step": 33673 }, { "epoch": 0.43757827182311826, "grad_norm": 0.40746909379959106, "learning_rate": 0.00011250731098662577, "loss": 1.3133, "step": 33674 }, { "epoch": 0.43759126636703416, "grad_norm": 0.41708096861839294, "learning_rate": 0.00011250471152471438, "loss": 1.3271, "step": 33675 }, { "epoch": 0.43760426091095, "grad_norm": 0.38377097249031067, "learning_rate": 0.00011250211206280301, "loss": 1.3973, "step": 33676 }, { "epoch": 0.4376172554548659, "grad_norm": 0.4810415804386139, "learning_rate": 0.00011249951260089162, "loss": 1.2204, "step": 33677 }, { "epoch": 0.43763024999878175, "grad_norm": 0.3533743917942047, "learning_rate": 0.00011249691313898023, "loss": 1.3246, "step": 33678 }, { "epoch": 0.43764324454269765, "grad_norm": 0.3186552822589874, "learning_rate": 0.00011249431367706884, "loss": 1.305, "step": 33679 }, { "epoch": 0.4376562390866135, "grad_norm": 0.4591079354286194, "learning_rate": 0.00011249171421515748, "loss": 1.4328, "step": 33680 }, { "epoch": 0.4376692336305294, "grad_norm": 0.3991362750530243, "learning_rate": 0.00011248911475324609, "loss": 1.3158, "step": 33681 }, { "epoch": 0.43768222817444524, "grad_norm": 0.41839659214019775, "learning_rate": 0.00011248651529133469, "loss": 1.462, "step": 33682 }, { "epoch": 0.43769522271836114, "grad_norm": 0.32797348499298096, "learning_rate": 0.00011248391582942333, "loss": 1.4652, "step": 33683 }, { "epoch": 0.437708217262277, "grad_norm": 0.30605408549308777, "learning_rate": 0.00011248131636751194, "loss": 1.3496, "step": 33684 }, { "epoch": 0.4377212118061929, "grad_norm": 0.3311157524585724, "learning_rate": 0.00011247871690560055, "loss": 1.4715, "step": 33685 }, { "epoch": 0.43773420635010873, "grad_norm": 0.359284371137619, "learning_rate": 0.00011247611744368916, "loss": 1.3078, "step": 33686 }, { "epoch": 0.43774720089402464, "grad_norm": 0.44039100408554077, "learning_rate": 0.00011247351798177778, "loss": 1.2357, "step": 33687 }, { "epoch": 0.4377601954379405, "grad_norm": 0.39011868834495544, "learning_rate": 0.0001124709185198664, "loss": 1.4811, "step": 33688 }, { "epoch": 0.4377731899818564, "grad_norm": 0.40913689136505127, "learning_rate": 0.000112468319057955, "loss": 1.4818, "step": 33689 }, { "epoch": 0.4377861845257722, "grad_norm": 0.39085620641708374, "learning_rate": 0.00011246571959604362, "loss": 1.4016, "step": 33690 }, { "epoch": 0.4377991790696881, "grad_norm": 0.4359629452228546, "learning_rate": 0.00011246312013413225, "loss": 1.4596, "step": 33691 }, { "epoch": 0.43781217361360397, "grad_norm": 0.31746187806129456, "learning_rate": 0.00011246052067222086, "loss": 1.5756, "step": 33692 }, { "epoch": 0.4378251681575199, "grad_norm": 0.3196691870689392, "learning_rate": 0.00011245792121030948, "loss": 1.2098, "step": 33693 }, { "epoch": 0.4378381627014357, "grad_norm": 0.43217554688453674, "learning_rate": 0.00011245532174839807, "loss": 1.2046, "step": 33694 }, { "epoch": 0.4378511572453516, "grad_norm": 0.3626560568809509, "learning_rate": 0.00011245272228648671, "loss": 1.5426, "step": 33695 }, { "epoch": 0.43786415178926746, "grad_norm": 0.3620617091655731, "learning_rate": 0.00011245012282457532, "loss": 1.2463, "step": 33696 }, { "epoch": 0.43787714633318336, "grad_norm": 0.357759028673172, "learning_rate": 0.00011244752336266393, "loss": 1.3739, "step": 33697 }, { "epoch": 0.4378901408770992, "grad_norm": 0.3382255434989929, "learning_rate": 0.00011244492390075254, "loss": 1.2824, "step": 33698 }, { "epoch": 0.4379031354210151, "grad_norm": 0.503534734249115, "learning_rate": 0.00011244232443884117, "loss": 1.4395, "step": 33699 }, { "epoch": 0.43791612996493096, "grad_norm": 0.3558909595012665, "learning_rate": 0.00011243972497692978, "loss": 1.1281, "step": 33700 }, { "epoch": 0.43792912450884686, "grad_norm": 0.33497726917266846, "learning_rate": 0.00011243712551501839, "loss": 1.2917, "step": 33701 }, { "epoch": 0.4379421190527627, "grad_norm": 0.3172791302204132, "learning_rate": 0.000112434526053107, "loss": 1.3284, "step": 33702 }, { "epoch": 0.4379551135966786, "grad_norm": 0.39789000153541565, "learning_rate": 0.00011243192659119564, "loss": 1.5001, "step": 33703 }, { "epoch": 0.43796810814059445, "grad_norm": 0.37026578187942505, "learning_rate": 0.00011242932712928425, "loss": 1.287, "step": 33704 }, { "epoch": 0.43798110268451035, "grad_norm": 0.36092573404312134, "learning_rate": 0.00011242672766737286, "loss": 1.2986, "step": 33705 }, { "epoch": 0.4379940972284262, "grad_norm": 0.3719044625759125, "learning_rate": 0.00011242412820546147, "loss": 1.2152, "step": 33706 }, { "epoch": 0.4380070917723421, "grad_norm": 0.4057196080684662, "learning_rate": 0.0001124215287435501, "loss": 1.4332, "step": 33707 }, { "epoch": 0.43802008631625794, "grad_norm": 0.38312944769859314, "learning_rate": 0.00011241892928163871, "loss": 1.4172, "step": 33708 }, { "epoch": 0.43803308086017384, "grad_norm": 0.4060550630092621, "learning_rate": 0.00011241632981972732, "loss": 1.4773, "step": 33709 }, { "epoch": 0.4380460754040897, "grad_norm": 0.36576610803604126, "learning_rate": 0.00011241373035781593, "loss": 1.49, "step": 33710 }, { "epoch": 0.4380590699480056, "grad_norm": 0.3998563289642334, "learning_rate": 0.00011241113089590455, "loss": 1.4628, "step": 33711 }, { "epoch": 0.43807206449192143, "grad_norm": 0.33990710973739624, "learning_rate": 0.00011240853143399316, "loss": 1.3322, "step": 33712 }, { "epoch": 0.43808505903583733, "grad_norm": 0.47588205337524414, "learning_rate": 0.00011240593197208178, "loss": 1.3308, "step": 33713 }, { "epoch": 0.4380980535797532, "grad_norm": 0.38509654998779297, "learning_rate": 0.00011240333251017039, "loss": 1.4915, "step": 33714 }, { "epoch": 0.4381110481236691, "grad_norm": 0.4186917543411255, "learning_rate": 0.00011240073304825902, "loss": 1.4181, "step": 33715 }, { "epoch": 0.4381240426675849, "grad_norm": 0.46086210012435913, "learning_rate": 0.00011239813358634764, "loss": 1.4794, "step": 33716 }, { "epoch": 0.4381370372115008, "grad_norm": 0.3558335304260254, "learning_rate": 0.00011239553412443625, "loss": 1.2663, "step": 33717 }, { "epoch": 0.43815003175541667, "grad_norm": 0.3251190781593323, "learning_rate": 0.00011239293466252487, "loss": 1.2861, "step": 33718 }, { "epoch": 0.43816302629933257, "grad_norm": 0.41585326194763184, "learning_rate": 0.00011239033520061348, "loss": 1.4611, "step": 33719 }, { "epoch": 0.4381760208432484, "grad_norm": 0.49898022413253784, "learning_rate": 0.00011238773573870209, "loss": 1.3779, "step": 33720 }, { "epoch": 0.4381890153871643, "grad_norm": 0.36906060576438904, "learning_rate": 0.0001123851362767907, "loss": 1.1995, "step": 33721 }, { "epoch": 0.43820200993108016, "grad_norm": 0.4501431882381439, "learning_rate": 0.00011238253681487934, "loss": 1.4462, "step": 33722 }, { "epoch": 0.43821500447499606, "grad_norm": 0.37703192234039307, "learning_rate": 0.00011237993735296794, "loss": 1.1854, "step": 33723 }, { "epoch": 0.4382279990189119, "grad_norm": 0.44412747025489807, "learning_rate": 0.00011237733789105655, "loss": 1.2644, "step": 33724 }, { "epoch": 0.4382409935628278, "grad_norm": 0.3478719890117645, "learning_rate": 0.00011237473842914516, "loss": 1.3366, "step": 33725 }, { "epoch": 0.4382539881067437, "grad_norm": 0.39723125100135803, "learning_rate": 0.0001123721389672338, "loss": 1.4417, "step": 33726 }, { "epoch": 0.43826698265065955, "grad_norm": 0.45014920830726624, "learning_rate": 0.00011236953950532241, "loss": 1.3125, "step": 33727 }, { "epoch": 0.43827997719457545, "grad_norm": 0.5657960772514343, "learning_rate": 0.00011236694004341102, "loss": 1.6342, "step": 33728 }, { "epoch": 0.4382929717384913, "grad_norm": 0.403454065322876, "learning_rate": 0.00011236434058149963, "loss": 1.5114, "step": 33729 }, { "epoch": 0.4383059662824072, "grad_norm": 0.33389076590538025, "learning_rate": 0.00011236174111958826, "loss": 1.2206, "step": 33730 }, { "epoch": 0.43831896082632305, "grad_norm": 0.3333199918270111, "learning_rate": 0.00011235914165767687, "loss": 1.2222, "step": 33731 }, { "epoch": 0.43833195537023895, "grad_norm": 0.4092356264591217, "learning_rate": 0.00011235654219576548, "loss": 1.2094, "step": 33732 }, { "epoch": 0.4383449499141548, "grad_norm": 0.4158893823623657, "learning_rate": 0.00011235394273385409, "loss": 1.6692, "step": 33733 }, { "epoch": 0.4383579444580707, "grad_norm": 0.3535400331020355, "learning_rate": 0.00011235134327194273, "loss": 1.2455, "step": 33734 }, { "epoch": 0.43837093900198654, "grad_norm": 0.37516915798187256, "learning_rate": 0.00011234874381003134, "loss": 1.23, "step": 33735 }, { "epoch": 0.43838393354590244, "grad_norm": 0.325690895318985, "learning_rate": 0.00011234614434811994, "loss": 1.3849, "step": 33736 }, { "epoch": 0.4383969280898183, "grad_norm": 0.37609657645225525, "learning_rate": 0.00011234354488620855, "loss": 1.2639, "step": 33737 }, { "epoch": 0.4384099226337342, "grad_norm": 0.4492342174053192, "learning_rate": 0.00011234094542429718, "loss": 1.3149, "step": 33738 }, { "epoch": 0.43842291717765003, "grad_norm": 0.4404865503311157, "learning_rate": 0.0001123383459623858, "loss": 1.4392, "step": 33739 }, { "epoch": 0.43843591172156593, "grad_norm": 0.3790774941444397, "learning_rate": 0.0001123357465004744, "loss": 1.1568, "step": 33740 }, { "epoch": 0.4384489062654818, "grad_norm": 0.4236330986022949, "learning_rate": 0.00011233314703856302, "loss": 1.5279, "step": 33741 }, { "epoch": 0.4384619008093977, "grad_norm": 0.42766568064689636, "learning_rate": 0.00011233054757665164, "loss": 1.4771, "step": 33742 }, { "epoch": 0.4384748953533135, "grad_norm": 0.3860890865325928, "learning_rate": 0.00011232794811474025, "loss": 1.4847, "step": 33743 }, { "epoch": 0.4384878898972294, "grad_norm": 0.32998207211494446, "learning_rate": 0.00011232534865282886, "loss": 1.3991, "step": 33744 }, { "epoch": 0.43850088444114527, "grad_norm": 0.4026249349117279, "learning_rate": 0.00011232274919091747, "loss": 1.7186, "step": 33745 }, { "epoch": 0.43851387898506117, "grad_norm": 0.446280300617218, "learning_rate": 0.00011232014972900611, "loss": 1.3755, "step": 33746 }, { "epoch": 0.438526873528977, "grad_norm": 0.3363453447818756, "learning_rate": 0.00011231755026709472, "loss": 1.3076, "step": 33747 }, { "epoch": 0.4385398680728929, "grad_norm": 0.45325642824172974, "learning_rate": 0.00011231495080518333, "loss": 1.3107, "step": 33748 }, { "epoch": 0.43855286261680876, "grad_norm": 0.44539177417755127, "learning_rate": 0.00011231235134327193, "loss": 1.2655, "step": 33749 }, { "epoch": 0.43856585716072466, "grad_norm": 0.356073796749115, "learning_rate": 0.00011230975188136057, "loss": 1.2275, "step": 33750 }, { "epoch": 0.4385788517046405, "grad_norm": 0.3830162584781647, "learning_rate": 0.00011230715241944918, "loss": 1.2292, "step": 33751 }, { "epoch": 0.4385918462485564, "grad_norm": 0.3281376361846924, "learning_rate": 0.00011230455295753779, "loss": 1.3824, "step": 33752 }, { "epoch": 0.43860484079247225, "grad_norm": 0.5264029502868652, "learning_rate": 0.0001123019534956264, "loss": 1.3585, "step": 33753 }, { "epoch": 0.43861783533638815, "grad_norm": 0.24634402990341187, "learning_rate": 0.00011229935403371503, "loss": 1.2681, "step": 33754 }, { "epoch": 0.438630829880304, "grad_norm": 0.46418091654777527, "learning_rate": 0.00011229675457180364, "loss": 1.5891, "step": 33755 }, { "epoch": 0.4386438244242199, "grad_norm": 0.36810949444770813, "learning_rate": 0.00011229415510989225, "loss": 1.2621, "step": 33756 }, { "epoch": 0.43865681896813574, "grad_norm": 0.4122721254825592, "learning_rate": 0.00011229155564798089, "loss": 1.2912, "step": 33757 }, { "epoch": 0.43866981351205164, "grad_norm": 0.4004136323928833, "learning_rate": 0.0001122889561860695, "loss": 1.5792, "step": 33758 }, { "epoch": 0.4386828080559675, "grad_norm": 0.4265916347503662, "learning_rate": 0.00011228635672415811, "loss": 1.4328, "step": 33759 }, { "epoch": 0.4386958025998834, "grad_norm": 0.5141733288764954, "learning_rate": 0.00011228375726224672, "loss": 1.4674, "step": 33760 }, { "epoch": 0.43870879714379923, "grad_norm": 0.5589814186096191, "learning_rate": 0.00011228115780033534, "loss": 1.3478, "step": 33761 }, { "epoch": 0.43872179168771513, "grad_norm": 0.3440369963645935, "learning_rate": 0.00011227855833842395, "loss": 1.4411, "step": 33762 }, { "epoch": 0.438734786231631, "grad_norm": 0.34824028611183167, "learning_rate": 0.00011227595887651257, "loss": 1.4798, "step": 33763 }, { "epoch": 0.4387477807755469, "grad_norm": 0.3495718836784363, "learning_rate": 0.00011227335941460118, "loss": 1.1955, "step": 33764 }, { "epoch": 0.4387607753194627, "grad_norm": 0.4167521595954895, "learning_rate": 0.0001122707599526898, "loss": 1.451, "step": 33765 }, { "epoch": 0.4387737698633786, "grad_norm": 0.4216267168521881, "learning_rate": 0.00011226816049077841, "loss": 1.5064, "step": 33766 }, { "epoch": 0.43878676440729447, "grad_norm": 0.41661110520362854, "learning_rate": 0.00011226556102886702, "loss": 1.5417, "step": 33767 }, { "epoch": 0.4387997589512104, "grad_norm": 0.4137970805168152, "learning_rate": 0.00011226296156695563, "loss": 1.3145, "step": 33768 }, { "epoch": 0.4388127534951262, "grad_norm": 0.36150655150413513, "learning_rate": 0.00011226036210504427, "loss": 1.3673, "step": 33769 }, { "epoch": 0.4388257480390421, "grad_norm": 0.3358671963214874, "learning_rate": 0.00011225776264313288, "loss": 1.248, "step": 33770 }, { "epoch": 0.43883874258295796, "grad_norm": 0.4472552239894867, "learning_rate": 0.0001122551631812215, "loss": 1.5463, "step": 33771 }, { "epoch": 0.43885173712687386, "grad_norm": 0.3578947186470032, "learning_rate": 0.0001122525637193101, "loss": 1.3043, "step": 33772 }, { "epoch": 0.4388647316707897, "grad_norm": 0.39294353127479553, "learning_rate": 0.00011224996425739873, "loss": 1.3364, "step": 33773 }, { "epoch": 0.4388777262147056, "grad_norm": 0.3782839775085449, "learning_rate": 0.00011224736479548734, "loss": 1.5311, "step": 33774 }, { "epoch": 0.43889072075862146, "grad_norm": 0.3367482125759125, "learning_rate": 0.00011224476533357595, "loss": 1.194, "step": 33775 }, { "epoch": 0.43890371530253736, "grad_norm": 0.4283084571361542, "learning_rate": 0.00011224216587166456, "loss": 1.4278, "step": 33776 }, { "epoch": 0.4389167098464532, "grad_norm": 0.499356210231781, "learning_rate": 0.0001122395664097532, "loss": 1.3436, "step": 33777 }, { "epoch": 0.4389297043903691, "grad_norm": 0.4606949985027313, "learning_rate": 0.0001122369669478418, "loss": 1.5584, "step": 33778 }, { "epoch": 0.43894269893428495, "grad_norm": 0.4238845705986023, "learning_rate": 0.00011223436748593041, "loss": 1.4395, "step": 33779 }, { "epoch": 0.43895569347820085, "grad_norm": 0.40323111414909363, "learning_rate": 0.00011223176802401902, "loss": 1.2444, "step": 33780 }, { "epoch": 0.4389686880221167, "grad_norm": 0.46773505210876465, "learning_rate": 0.00011222916856210766, "loss": 1.4941, "step": 33781 }, { "epoch": 0.4389816825660326, "grad_norm": 0.4639147222042084, "learning_rate": 0.00011222656910019627, "loss": 1.5372, "step": 33782 }, { "epoch": 0.43899467710994844, "grad_norm": 0.3454759418964386, "learning_rate": 0.00011222396963828488, "loss": 1.5796, "step": 33783 }, { "epoch": 0.43900767165386434, "grad_norm": 0.4438466429710388, "learning_rate": 0.00011222137017637349, "loss": 1.3273, "step": 33784 }, { "epoch": 0.4390206661977802, "grad_norm": 0.36770185828208923, "learning_rate": 0.00011221877071446211, "loss": 1.426, "step": 33785 }, { "epoch": 0.4390336607416961, "grad_norm": 0.4122646749019623, "learning_rate": 0.00011221617125255073, "loss": 1.6401, "step": 33786 }, { "epoch": 0.43904665528561193, "grad_norm": 0.3544377386569977, "learning_rate": 0.00011221357179063934, "loss": 1.4221, "step": 33787 }, { "epoch": 0.43905964982952783, "grad_norm": 0.3139743506908417, "learning_rate": 0.00011221097232872795, "loss": 1.3035, "step": 33788 }, { "epoch": 0.4390726443734437, "grad_norm": 0.40578898787498474, "learning_rate": 0.00011220837286681659, "loss": 1.4468, "step": 33789 }, { "epoch": 0.4390856389173596, "grad_norm": 0.29836177825927734, "learning_rate": 0.0001122057734049052, "loss": 1.3728, "step": 33790 }, { "epoch": 0.4390986334612754, "grad_norm": 0.4170806407928467, "learning_rate": 0.0001122031739429938, "loss": 1.3737, "step": 33791 }, { "epoch": 0.4391116280051913, "grad_norm": 0.5037851929664612, "learning_rate": 0.00011220057448108243, "loss": 1.5151, "step": 33792 }, { "epoch": 0.43912462254910717, "grad_norm": 0.3820703327655792, "learning_rate": 0.00011219797501917104, "loss": 1.3445, "step": 33793 }, { "epoch": 0.43913761709302307, "grad_norm": 0.37377795577049255, "learning_rate": 0.00011219537555725965, "loss": 1.2305, "step": 33794 }, { "epoch": 0.4391506116369389, "grad_norm": 0.3625583350658417, "learning_rate": 0.00011219277609534826, "loss": 1.4772, "step": 33795 }, { "epoch": 0.4391636061808548, "grad_norm": 0.3164084553718567, "learning_rate": 0.00011219017663343689, "loss": 1.2018, "step": 33796 }, { "epoch": 0.43917660072477066, "grad_norm": 0.38175728917121887, "learning_rate": 0.0001121875771715255, "loss": 1.425, "step": 33797 }, { "epoch": 0.43918959526868656, "grad_norm": 0.36510688066482544, "learning_rate": 0.00011218497770961411, "loss": 1.2334, "step": 33798 }, { "epoch": 0.4392025898126024, "grad_norm": 0.37086033821105957, "learning_rate": 0.00011218237824770272, "loss": 1.3364, "step": 33799 }, { "epoch": 0.4392155843565183, "grad_norm": 0.446889728307724, "learning_rate": 0.00011217977878579136, "loss": 1.5079, "step": 33800 }, { "epoch": 0.4392285789004342, "grad_norm": 0.3069266676902771, "learning_rate": 0.00011217717932387997, "loss": 1.2707, "step": 33801 }, { "epoch": 0.43924157344435005, "grad_norm": 0.4998326003551483, "learning_rate": 0.00011217457986196858, "loss": 1.4017, "step": 33802 }, { "epoch": 0.43925456798826595, "grad_norm": 0.4500516951084137, "learning_rate": 0.00011217198040005719, "loss": 1.4176, "step": 33803 }, { "epoch": 0.4392675625321818, "grad_norm": 0.4580056071281433, "learning_rate": 0.00011216938093814582, "loss": 1.312, "step": 33804 }, { "epoch": 0.4392805570760977, "grad_norm": 0.41984865069389343, "learning_rate": 0.00011216678147623443, "loss": 1.4081, "step": 33805 }, { "epoch": 0.43929355162001354, "grad_norm": 0.368792325258255, "learning_rate": 0.00011216418201432304, "loss": 1.3692, "step": 33806 }, { "epoch": 0.43930654616392945, "grad_norm": 0.37742581963539124, "learning_rate": 0.00011216158255241165, "loss": 1.4457, "step": 33807 }, { "epoch": 0.4393195407078453, "grad_norm": 0.3170832395553589, "learning_rate": 0.00011215898309050027, "loss": 1.269, "step": 33808 }, { "epoch": 0.4393325352517612, "grad_norm": 0.3944149613380432, "learning_rate": 0.00011215638362858889, "loss": 1.3628, "step": 33809 }, { "epoch": 0.43934552979567704, "grad_norm": 0.49544981122016907, "learning_rate": 0.0001121537841666775, "loss": 1.243, "step": 33810 }, { "epoch": 0.43935852433959294, "grad_norm": 0.39050644636154175, "learning_rate": 0.00011215118470476611, "loss": 1.3888, "step": 33811 }, { "epoch": 0.4393715188835088, "grad_norm": 0.32727938890457153, "learning_rate": 0.00011214858524285475, "loss": 1.3225, "step": 33812 }, { "epoch": 0.4393845134274247, "grad_norm": 0.49213749170303345, "learning_rate": 0.00011214598578094336, "loss": 1.4431, "step": 33813 }, { "epoch": 0.43939750797134053, "grad_norm": 0.3536868393421173, "learning_rate": 0.00011214338631903197, "loss": 1.4022, "step": 33814 }, { "epoch": 0.43941050251525643, "grad_norm": 0.49926725029945374, "learning_rate": 0.00011214078685712058, "loss": 1.315, "step": 33815 }, { "epoch": 0.4394234970591723, "grad_norm": 0.5178827047348022, "learning_rate": 0.0001121381873952092, "loss": 1.3487, "step": 33816 }, { "epoch": 0.4394364916030882, "grad_norm": 0.37049800157546997, "learning_rate": 0.00011213558793329781, "loss": 1.298, "step": 33817 }, { "epoch": 0.439449486147004, "grad_norm": 0.4458928108215332, "learning_rate": 0.00011213298847138642, "loss": 1.4293, "step": 33818 }, { "epoch": 0.4394624806909199, "grad_norm": 0.44171980023384094, "learning_rate": 0.00011213038900947504, "loss": 1.372, "step": 33819 }, { "epoch": 0.43947547523483577, "grad_norm": 0.5388789772987366, "learning_rate": 0.00011212778954756366, "loss": 1.2785, "step": 33820 }, { "epoch": 0.43948846977875167, "grad_norm": 0.33343589305877686, "learning_rate": 0.00011212519008565227, "loss": 1.5505, "step": 33821 }, { "epoch": 0.4395014643226675, "grad_norm": 0.41944271326065063, "learning_rate": 0.00011212259062374088, "loss": 1.5171, "step": 33822 }, { "epoch": 0.4395144588665834, "grad_norm": 0.37391194701194763, "learning_rate": 0.00011211999116182949, "loss": 1.2939, "step": 33823 }, { "epoch": 0.43952745341049926, "grad_norm": 0.42650604248046875, "learning_rate": 0.00011211739169991813, "loss": 1.5289, "step": 33824 }, { "epoch": 0.43954044795441516, "grad_norm": 0.2201998084783554, "learning_rate": 0.00011211479223800674, "loss": 1.1309, "step": 33825 }, { "epoch": 0.439553442498331, "grad_norm": 0.42188969254493713, "learning_rate": 0.00011211219277609535, "loss": 1.3368, "step": 33826 }, { "epoch": 0.4395664370422469, "grad_norm": 0.3888471722602844, "learning_rate": 0.00011210959331418396, "loss": 1.3842, "step": 33827 }, { "epoch": 0.43957943158616275, "grad_norm": 0.3639715611934662, "learning_rate": 0.00011210699385227259, "loss": 1.4096, "step": 33828 }, { "epoch": 0.43959242613007865, "grad_norm": 0.4172443747520447, "learning_rate": 0.0001121043943903612, "loss": 1.4051, "step": 33829 }, { "epoch": 0.4396054206739945, "grad_norm": 0.42720672488212585, "learning_rate": 0.00011210179492844981, "loss": 1.3579, "step": 33830 }, { "epoch": 0.4396184152179104, "grad_norm": 0.38850098848342896, "learning_rate": 0.00011209919546653845, "loss": 1.3702, "step": 33831 }, { "epoch": 0.43963140976182624, "grad_norm": 0.3582231104373932, "learning_rate": 0.00011209659600462706, "loss": 1.3259, "step": 33832 }, { "epoch": 0.43964440430574214, "grad_norm": 0.33361801505088806, "learning_rate": 0.00011209399654271566, "loss": 1.3253, "step": 33833 }, { "epoch": 0.439657398849658, "grad_norm": 0.3146879971027374, "learning_rate": 0.00011209139708080427, "loss": 1.4785, "step": 33834 }, { "epoch": 0.4396703933935739, "grad_norm": 0.27605631947517395, "learning_rate": 0.0001120887976188929, "loss": 1.4296, "step": 33835 }, { "epoch": 0.43968338793748973, "grad_norm": 0.4640986919403076, "learning_rate": 0.00011208619815698152, "loss": 1.6448, "step": 33836 }, { "epoch": 0.43969638248140563, "grad_norm": 0.3718549907207489, "learning_rate": 0.00011208359869507013, "loss": 1.2728, "step": 33837 }, { "epoch": 0.4397093770253215, "grad_norm": 0.4482578635215759, "learning_rate": 0.00011208099923315874, "loss": 1.2995, "step": 33838 }, { "epoch": 0.4397223715692374, "grad_norm": 0.5183087587356567, "learning_rate": 0.00011207839977124736, "loss": 1.3979, "step": 33839 }, { "epoch": 0.4397353661131532, "grad_norm": 0.4024648666381836, "learning_rate": 0.00011207580030933597, "loss": 1.4587, "step": 33840 }, { "epoch": 0.4397483606570691, "grad_norm": 0.4905690550804138, "learning_rate": 0.00011207320084742458, "loss": 1.57, "step": 33841 }, { "epoch": 0.43976135520098497, "grad_norm": 0.40656524896621704, "learning_rate": 0.0001120706013855132, "loss": 1.3712, "step": 33842 }, { "epoch": 0.43977434974490087, "grad_norm": 0.4429897964000702, "learning_rate": 0.00011206800192360183, "loss": 1.5636, "step": 33843 }, { "epoch": 0.4397873442888167, "grad_norm": 0.4273623824119568, "learning_rate": 0.00011206540246169044, "loss": 1.3503, "step": 33844 }, { "epoch": 0.4398003388327326, "grad_norm": 0.3867172300815582, "learning_rate": 0.00011206280299977906, "loss": 1.4371, "step": 33845 }, { "epoch": 0.43981333337664846, "grad_norm": 0.4311963617801666, "learning_rate": 0.00011206020353786765, "loss": 1.3528, "step": 33846 }, { "epoch": 0.43982632792056436, "grad_norm": 0.3302820026874542, "learning_rate": 0.00011205760407595629, "loss": 1.2851, "step": 33847 }, { "epoch": 0.4398393224644802, "grad_norm": 0.45143556594848633, "learning_rate": 0.0001120550046140449, "loss": 1.346, "step": 33848 }, { "epoch": 0.4398523170083961, "grad_norm": 0.3516835570335388, "learning_rate": 0.00011205240515213351, "loss": 1.2949, "step": 33849 }, { "epoch": 0.43986531155231196, "grad_norm": 0.47119244933128357, "learning_rate": 0.00011204980569022212, "loss": 1.4602, "step": 33850 }, { "epoch": 0.43987830609622786, "grad_norm": 0.4856862425804138, "learning_rate": 0.00011204720622831075, "loss": 1.5038, "step": 33851 }, { "epoch": 0.4398913006401437, "grad_norm": 0.4068794548511505, "learning_rate": 0.00011204460676639936, "loss": 1.3051, "step": 33852 }, { "epoch": 0.4399042951840596, "grad_norm": 0.5328830480575562, "learning_rate": 0.00011204200730448797, "loss": 1.4396, "step": 33853 }, { "epoch": 0.43991728972797545, "grad_norm": 0.4361875653266907, "learning_rate": 0.00011203940784257658, "loss": 1.4499, "step": 33854 }, { "epoch": 0.43993028427189135, "grad_norm": 0.37575486302375793, "learning_rate": 0.00011203680838066522, "loss": 1.4291, "step": 33855 }, { "epoch": 0.4399432788158072, "grad_norm": 0.48397478461265564, "learning_rate": 0.00011203420891875383, "loss": 1.6989, "step": 33856 }, { "epoch": 0.4399562733597231, "grad_norm": 0.35768431425094604, "learning_rate": 0.00011203160945684244, "loss": 1.0911, "step": 33857 }, { "epoch": 0.43996926790363894, "grad_norm": 0.3889373540878296, "learning_rate": 0.00011202900999493104, "loss": 1.2627, "step": 33858 }, { "epoch": 0.43998226244755484, "grad_norm": 0.4137880504131317, "learning_rate": 0.00011202641053301968, "loss": 1.2442, "step": 33859 }, { "epoch": 0.4399952569914707, "grad_norm": 0.3847433924674988, "learning_rate": 0.00011202381107110829, "loss": 1.3286, "step": 33860 }, { "epoch": 0.4400082515353866, "grad_norm": 0.4896392226219177, "learning_rate": 0.0001120212116091969, "loss": 1.4377, "step": 33861 }, { "epoch": 0.44002124607930243, "grad_norm": 0.3672564923763275, "learning_rate": 0.00011201861214728551, "loss": 1.419, "step": 33862 }, { "epoch": 0.44003424062321833, "grad_norm": 0.46419256925582886, "learning_rate": 0.00011201601268537413, "loss": 1.5734, "step": 33863 }, { "epoch": 0.4400472351671342, "grad_norm": 0.4771384596824646, "learning_rate": 0.00011201341322346274, "loss": 1.3484, "step": 33864 }, { "epoch": 0.4400602297110501, "grad_norm": 0.3740924596786499, "learning_rate": 0.00011201081376155136, "loss": 1.0841, "step": 33865 }, { "epoch": 0.4400732242549659, "grad_norm": 0.4816083312034607, "learning_rate": 0.00011200821429963999, "loss": 1.5036, "step": 33866 }, { "epoch": 0.4400862187988818, "grad_norm": 0.35915622115135193, "learning_rate": 0.0001120056148377286, "loss": 1.2459, "step": 33867 }, { "epoch": 0.44009921334279767, "grad_norm": 0.3351829946041107, "learning_rate": 0.00011200301537581722, "loss": 1.1748, "step": 33868 }, { "epoch": 0.44011220788671357, "grad_norm": 0.4634656608104706, "learning_rate": 0.00011200041591390583, "loss": 1.5248, "step": 33869 }, { "epoch": 0.4401252024306294, "grad_norm": 0.4507223963737488, "learning_rate": 0.00011199781645199445, "loss": 1.4229, "step": 33870 }, { "epoch": 0.4401381969745453, "grad_norm": 0.3536686599254608, "learning_rate": 0.00011199521699008306, "loss": 1.3137, "step": 33871 }, { "epoch": 0.44015119151846116, "grad_norm": 0.49509260058403015, "learning_rate": 0.00011199261752817167, "loss": 1.2775, "step": 33872 }, { "epoch": 0.44016418606237706, "grad_norm": 0.5351803302764893, "learning_rate": 0.00011199001806626028, "loss": 1.4439, "step": 33873 }, { "epoch": 0.4401771806062929, "grad_norm": 0.3896837830543518, "learning_rate": 0.00011198741860434892, "loss": 1.4647, "step": 33874 }, { "epoch": 0.4401901751502088, "grad_norm": 0.46184828877449036, "learning_rate": 0.00011198481914243752, "loss": 1.5146, "step": 33875 }, { "epoch": 0.44020316969412465, "grad_norm": 0.45069700479507446, "learning_rate": 0.00011198221968052613, "loss": 1.3163, "step": 33876 }, { "epoch": 0.44021616423804055, "grad_norm": 0.5432813763618469, "learning_rate": 0.00011197962021861474, "loss": 1.3329, "step": 33877 }, { "epoch": 0.44022915878195645, "grad_norm": 0.328911155462265, "learning_rate": 0.00011197702075670338, "loss": 1.3623, "step": 33878 }, { "epoch": 0.4402421533258723, "grad_norm": 0.2985921800136566, "learning_rate": 0.00011197442129479199, "loss": 1.3356, "step": 33879 }, { "epoch": 0.4402551478697882, "grad_norm": 0.31085917353630066, "learning_rate": 0.0001119718218328806, "loss": 1.1905, "step": 33880 }, { "epoch": 0.44026814241370404, "grad_norm": 0.4325365424156189, "learning_rate": 0.00011196922237096921, "loss": 1.1185, "step": 33881 }, { "epoch": 0.44028113695761995, "grad_norm": 0.41128620505332947, "learning_rate": 0.00011196662290905784, "loss": 1.6154, "step": 33882 }, { "epoch": 0.4402941315015358, "grad_norm": 0.43714362382888794, "learning_rate": 0.00011196402344714645, "loss": 1.3221, "step": 33883 }, { "epoch": 0.4403071260454517, "grad_norm": 0.317518025636673, "learning_rate": 0.00011196142398523506, "loss": 1.2304, "step": 33884 }, { "epoch": 0.44032012058936754, "grad_norm": 0.34049302339553833, "learning_rate": 0.00011195882452332367, "loss": 1.2478, "step": 33885 }, { "epoch": 0.44033311513328344, "grad_norm": 0.38715073466300964, "learning_rate": 0.0001119562250614123, "loss": 1.5095, "step": 33886 }, { "epoch": 0.4403461096771993, "grad_norm": 0.4641999304294586, "learning_rate": 0.00011195362559950092, "loss": 1.5959, "step": 33887 }, { "epoch": 0.4403591042211152, "grad_norm": 0.39159679412841797, "learning_rate": 0.00011195102613758952, "loss": 1.3798, "step": 33888 }, { "epoch": 0.44037209876503103, "grad_norm": 0.39525845646858215, "learning_rate": 0.00011194842667567813, "loss": 1.3122, "step": 33889 }, { "epoch": 0.44038509330894693, "grad_norm": 0.43412327766418457, "learning_rate": 0.00011194582721376676, "loss": 1.3667, "step": 33890 }, { "epoch": 0.4403980878528628, "grad_norm": 0.3792523741722107, "learning_rate": 0.00011194322775185537, "loss": 1.4723, "step": 33891 }, { "epoch": 0.4404110823967787, "grad_norm": 0.3380373418331146, "learning_rate": 0.00011194062828994399, "loss": 1.1903, "step": 33892 }, { "epoch": 0.4404240769406945, "grad_norm": 0.42319849133491516, "learning_rate": 0.0001119380288280326, "loss": 1.3378, "step": 33893 }, { "epoch": 0.4404370714846104, "grad_norm": 0.4520934522151947, "learning_rate": 0.00011193542936612122, "loss": 1.3675, "step": 33894 }, { "epoch": 0.44045006602852627, "grad_norm": 0.4807420074939728, "learning_rate": 0.00011193282990420983, "loss": 1.5238, "step": 33895 }, { "epoch": 0.44046306057244217, "grad_norm": 0.425485759973526, "learning_rate": 0.00011193023044229844, "loss": 1.3506, "step": 33896 }, { "epoch": 0.440476055116358, "grad_norm": 0.3834744393825531, "learning_rate": 0.00011192763098038705, "loss": 1.4788, "step": 33897 }, { "epoch": 0.4404890496602739, "grad_norm": 0.4828564524650574, "learning_rate": 0.00011192503151847569, "loss": 1.3775, "step": 33898 }, { "epoch": 0.44050204420418976, "grad_norm": 0.4058476686477661, "learning_rate": 0.0001119224320565643, "loss": 1.597, "step": 33899 }, { "epoch": 0.44051503874810566, "grad_norm": 0.37737974524497986, "learning_rate": 0.0001119198325946529, "loss": 1.4249, "step": 33900 }, { "epoch": 0.4405280332920215, "grad_norm": 0.3890879452228546, "learning_rate": 0.00011191723313274151, "loss": 1.4542, "step": 33901 }, { "epoch": 0.4405410278359374, "grad_norm": 0.4908926486968994, "learning_rate": 0.00011191463367083015, "loss": 1.3563, "step": 33902 }, { "epoch": 0.44055402237985325, "grad_norm": 0.42307162284851074, "learning_rate": 0.00011191203420891876, "loss": 1.3151, "step": 33903 }, { "epoch": 0.44056701692376915, "grad_norm": 0.3393646478652954, "learning_rate": 0.00011190943474700737, "loss": 1.1379, "step": 33904 }, { "epoch": 0.440580011467685, "grad_norm": 0.3769477903842926, "learning_rate": 0.000111906835285096, "loss": 1.1791, "step": 33905 }, { "epoch": 0.4405930060116009, "grad_norm": 0.48580682277679443, "learning_rate": 0.0001119042358231846, "loss": 1.4894, "step": 33906 }, { "epoch": 0.44060600055551674, "grad_norm": 0.3570936620235443, "learning_rate": 0.00011190163636127322, "loss": 1.3161, "step": 33907 }, { "epoch": 0.44061899509943264, "grad_norm": 0.3767772614955902, "learning_rate": 0.00011189903689936183, "loss": 1.3216, "step": 33908 }, { "epoch": 0.4406319896433485, "grad_norm": 0.4276905953884125, "learning_rate": 0.00011189643743745047, "loss": 1.4417, "step": 33909 }, { "epoch": 0.4406449841872644, "grad_norm": 0.3743925392627716, "learning_rate": 0.00011189383797553908, "loss": 1.3313, "step": 33910 }, { "epoch": 0.44065797873118023, "grad_norm": 0.4535468518733978, "learning_rate": 0.00011189123851362769, "loss": 1.4864, "step": 33911 }, { "epoch": 0.44067097327509613, "grad_norm": 0.2709183990955353, "learning_rate": 0.0001118886390517163, "loss": 1.6668, "step": 33912 }, { "epoch": 0.440683967819012, "grad_norm": 0.3509170413017273, "learning_rate": 0.00011188603958980492, "loss": 1.2957, "step": 33913 }, { "epoch": 0.4406969623629279, "grad_norm": 0.330238938331604, "learning_rate": 0.00011188344012789353, "loss": 1.3562, "step": 33914 }, { "epoch": 0.4407099569068437, "grad_norm": 0.47974923253059387, "learning_rate": 0.00011188084066598215, "loss": 1.5172, "step": 33915 }, { "epoch": 0.4407229514507596, "grad_norm": 0.4952934980392456, "learning_rate": 0.00011187824120407076, "loss": 1.3622, "step": 33916 }, { "epoch": 0.44073594599467547, "grad_norm": 0.37118789553642273, "learning_rate": 0.00011187564174215938, "loss": 1.2489, "step": 33917 }, { "epoch": 0.44074894053859137, "grad_norm": 0.23424631357192993, "learning_rate": 0.00011187304228024799, "loss": 1.2305, "step": 33918 }, { "epoch": 0.4407619350825072, "grad_norm": 0.384907603263855, "learning_rate": 0.0001118704428183366, "loss": 1.5708, "step": 33919 }, { "epoch": 0.4407749296264231, "grad_norm": 0.31708404421806335, "learning_rate": 0.00011186784335642521, "loss": 1.0588, "step": 33920 }, { "epoch": 0.44078792417033896, "grad_norm": 0.4358888864517212, "learning_rate": 0.00011186524389451385, "loss": 1.3023, "step": 33921 }, { "epoch": 0.44080091871425486, "grad_norm": 0.36452725529670715, "learning_rate": 0.00011186264443260246, "loss": 1.3971, "step": 33922 }, { "epoch": 0.4408139132581707, "grad_norm": 0.44835686683654785, "learning_rate": 0.00011186004497069107, "loss": 1.5644, "step": 33923 }, { "epoch": 0.4408269078020866, "grad_norm": 0.4204321801662445, "learning_rate": 0.00011185744550877968, "loss": 1.2754, "step": 33924 }, { "epoch": 0.44083990234600245, "grad_norm": 0.357796847820282, "learning_rate": 0.00011185484604686831, "loss": 1.5435, "step": 33925 }, { "epoch": 0.44085289688991836, "grad_norm": 0.4933660924434662, "learning_rate": 0.00011185224658495692, "loss": 1.5398, "step": 33926 }, { "epoch": 0.4408658914338342, "grad_norm": 0.4068813621997833, "learning_rate": 0.00011184964712304553, "loss": 1.3599, "step": 33927 }, { "epoch": 0.4408788859777501, "grad_norm": 0.41411903500556946, "learning_rate": 0.00011184704766113414, "loss": 1.4149, "step": 33928 }, { "epoch": 0.44089188052166595, "grad_norm": 0.4143437147140503, "learning_rate": 0.00011184444819922278, "loss": 1.4579, "step": 33929 }, { "epoch": 0.44090487506558185, "grad_norm": 0.4094889461994171, "learning_rate": 0.00011184184873731138, "loss": 1.5253, "step": 33930 }, { "epoch": 0.4409178696094977, "grad_norm": 0.388461709022522, "learning_rate": 0.00011183924927539999, "loss": 1.3487, "step": 33931 }, { "epoch": 0.4409308641534136, "grad_norm": 0.3599866032600403, "learning_rate": 0.0001118366498134886, "loss": 1.3316, "step": 33932 }, { "epoch": 0.44094385869732944, "grad_norm": 0.3491574823856354, "learning_rate": 0.00011183405035157724, "loss": 1.2722, "step": 33933 }, { "epoch": 0.44095685324124534, "grad_norm": 0.3522743284702301, "learning_rate": 0.00011183145088966585, "loss": 1.2204, "step": 33934 }, { "epoch": 0.4409698477851612, "grad_norm": 0.38592448830604553, "learning_rate": 0.00011182885142775446, "loss": 1.6383, "step": 33935 }, { "epoch": 0.4409828423290771, "grad_norm": 0.3955994248390198, "learning_rate": 0.00011182625196584307, "loss": 1.4827, "step": 33936 }, { "epoch": 0.44099583687299293, "grad_norm": 0.36901840567588806, "learning_rate": 0.0001118236525039317, "loss": 1.2574, "step": 33937 }, { "epoch": 0.44100883141690883, "grad_norm": 0.4609015882015228, "learning_rate": 0.0001118210530420203, "loss": 1.3762, "step": 33938 }, { "epoch": 0.4410218259608247, "grad_norm": 0.3924921452999115, "learning_rate": 0.00011181845358010892, "loss": 1.301, "step": 33939 }, { "epoch": 0.4410348205047406, "grad_norm": 0.42089369893074036, "learning_rate": 0.00011181585411819755, "loss": 1.5235, "step": 33940 }, { "epoch": 0.4410478150486564, "grad_norm": 0.45505690574645996, "learning_rate": 0.00011181325465628617, "loss": 1.4353, "step": 33941 }, { "epoch": 0.4410608095925723, "grad_norm": 0.388092964887619, "learning_rate": 0.00011181065519437476, "loss": 1.1597, "step": 33942 }, { "epoch": 0.44107380413648817, "grad_norm": 0.37217697501182556, "learning_rate": 0.00011180805573246337, "loss": 1.2961, "step": 33943 }, { "epoch": 0.44108679868040407, "grad_norm": 0.44210129976272583, "learning_rate": 0.00011180545627055201, "loss": 1.3265, "step": 33944 }, { "epoch": 0.4410997932243199, "grad_norm": 0.36260077357292175, "learning_rate": 0.00011180285680864062, "loss": 1.3572, "step": 33945 }, { "epoch": 0.4411127877682358, "grad_norm": 0.4829398989677429, "learning_rate": 0.00011180025734672923, "loss": 1.5183, "step": 33946 }, { "epoch": 0.44112578231215166, "grad_norm": 0.46915286779403687, "learning_rate": 0.00011179765788481784, "loss": 1.682, "step": 33947 }, { "epoch": 0.44113877685606756, "grad_norm": 0.5006355047225952, "learning_rate": 0.00011179505842290647, "loss": 1.3184, "step": 33948 }, { "epoch": 0.4411517713999834, "grad_norm": 0.4542192816734314, "learning_rate": 0.00011179245896099508, "loss": 1.3721, "step": 33949 }, { "epoch": 0.4411647659438993, "grad_norm": 0.35697054862976074, "learning_rate": 0.00011178985949908369, "loss": 1.3939, "step": 33950 }, { "epoch": 0.44117776048781515, "grad_norm": 0.4992166757583618, "learning_rate": 0.0001117872600371723, "loss": 1.3845, "step": 33951 }, { "epoch": 0.44119075503173105, "grad_norm": 0.35691800713539124, "learning_rate": 0.00011178466057526094, "loss": 1.3644, "step": 33952 }, { "epoch": 0.44120374957564695, "grad_norm": 0.5175820589065552, "learning_rate": 0.00011178206111334955, "loss": 1.4384, "step": 33953 }, { "epoch": 0.4412167441195628, "grad_norm": 0.3665894865989685, "learning_rate": 0.00011177946165143816, "loss": 1.3688, "step": 33954 }, { "epoch": 0.4412297386634787, "grad_norm": 0.39191189408302307, "learning_rate": 0.00011177686218952676, "loss": 1.4866, "step": 33955 }, { "epoch": 0.44124273320739454, "grad_norm": 0.35874414443969727, "learning_rate": 0.0001117742627276154, "loss": 1.3883, "step": 33956 }, { "epoch": 0.44125572775131044, "grad_norm": 0.40424150228500366, "learning_rate": 0.00011177166326570401, "loss": 1.4475, "step": 33957 }, { "epoch": 0.4412687222952263, "grad_norm": 0.4540855288505554, "learning_rate": 0.00011176906380379262, "loss": 1.2652, "step": 33958 }, { "epoch": 0.4412817168391422, "grad_norm": 0.37405267357826233, "learning_rate": 0.00011176646434188123, "loss": 1.4242, "step": 33959 }, { "epoch": 0.44129471138305804, "grad_norm": 0.42453327775001526, "learning_rate": 0.00011176386487996985, "loss": 1.3518, "step": 33960 }, { "epoch": 0.44130770592697394, "grad_norm": 0.3619401454925537, "learning_rate": 0.00011176126541805847, "loss": 1.3275, "step": 33961 }, { "epoch": 0.4413207004708898, "grad_norm": 0.41241100430488586, "learning_rate": 0.00011175866595614708, "loss": 1.3013, "step": 33962 }, { "epoch": 0.4413336950148057, "grad_norm": 0.3413083553314209, "learning_rate": 0.00011175606649423569, "loss": 1.3262, "step": 33963 }, { "epoch": 0.4413466895587215, "grad_norm": 0.3298988342285156, "learning_rate": 0.00011175346703232433, "loss": 1.3523, "step": 33964 }, { "epoch": 0.44135968410263743, "grad_norm": 0.4547967314720154, "learning_rate": 0.00011175086757041294, "loss": 1.3906, "step": 33965 }, { "epoch": 0.4413726786465533, "grad_norm": 0.3494797646999359, "learning_rate": 0.00011174826810850155, "loss": 1.5019, "step": 33966 }, { "epoch": 0.4413856731904692, "grad_norm": 0.4248200058937073, "learning_rate": 0.00011174566864659016, "loss": 1.5683, "step": 33967 }, { "epoch": 0.441398667734385, "grad_norm": 0.3502233326435089, "learning_rate": 0.00011174306918467878, "loss": 1.3526, "step": 33968 }, { "epoch": 0.4414116622783009, "grad_norm": 0.41335996985435486, "learning_rate": 0.0001117404697227674, "loss": 1.4993, "step": 33969 }, { "epoch": 0.44142465682221677, "grad_norm": 0.43141835927963257, "learning_rate": 0.000111737870260856, "loss": 1.2899, "step": 33970 }, { "epoch": 0.44143765136613267, "grad_norm": 0.4510093927383423, "learning_rate": 0.00011173527079894462, "loss": 1.373, "step": 33971 }, { "epoch": 0.4414506459100485, "grad_norm": 0.42366617918014526, "learning_rate": 0.00011173267133703324, "loss": 1.3472, "step": 33972 }, { "epoch": 0.4414636404539644, "grad_norm": 0.35545095801353455, "learning_rate": 0.00011173007187512185, "loss": 1.3568, "step": 33973 }, { "epoch": 0.44147663499788026, "grad_norm": 0.42061683535575867, "learning_rate": 0.00011172747241321046, "loss": 1.3641, "step": 33974 }, { "epoch": 0.44148962954179616, "grad_norm": 0.4463011920452118, "learning_rate": 0.00011172487295129907, "loss": 1.6497, "step": 33975 }, { "epoch": 0.441502624085712, "grad_norm": 0.4948456585407257, "learning_rate": 0.00011172227348938771, "loss": 1.4132, "step": 33976 }, { "epoch": 0.4415156186296279, "grad_norm": 0.46984952688217163, "learning_rate": 0.00011171967402747632, "loss": 1.6391, "step": 33977 }, { "epoch": 0.44152861317354375, "grad_norm": 0.36725401878356934, "learning_rate": 0.00011171707456556493, "loss": 1.3872, "step": 33978 }, { "epoch": 0.44154160771745965, "grad_norm": 0.28851690888404846, "learning_rate": 0.00011171447510365356, "loss": 1.2723, "step": 33979 }, { "epoch": 0.4415546022613755, "grad_norm": 0.42832598090171814, "learning_rate": 0.00011171187564174217, "loss": 1.4838, "step": 33980 }, { "epoch": 0.4415675968052914, "grad_norm": 0.4003250002861023, "learning_rate": 0.00011170927617983078, "loss": 1.422, "step": 33981 }, { "epoch": 0.44158059134920724, "grad_norm": 0.41524478793144226, "learning_rate": 0.00011170667671791939, "loss": 1.4542, "step": 33982 }, { "epoch": 0.44159358589312314, "grad_norm": 0.3334995210170746, "learning_rate": 0.00011170407725600803, "loss": 1.2805, "step": 33983 }, { "epoch": 0.441606580437039, "grad_norm": 0.3070078194141388, "learning_rate": 0.00011170147779409663, "loss": 1.4513, "step": 33984 }, { "epoch": 0.4416195749809549, "grad_norm": 0.4811932146549225, "learning_rate": 0.00011169887833218524, "loss": 1.4415, "step": 33985 }, { "epoch": 0.44163256952487073, "grad_norm": 0.4797988831996918, "learning_rate": 0.00011169627887027385, "loss": 1.4277, "step": 33986 }, { "epoch": 0.44164556406878663, "grad_norm": 0.3617285490036011, "learning_rate": 0.00011169367940836249, "loss": 1.2887, "step": 33987 }, { "epoch": 0.4416585586127025, "grad_norm": 0.5502768158912659, "learning_rate": 0.0001116910799464511, "loss": 1.4889, "step": 33988 }, { "epoch": 0.4416715531566184, "grad_norm": 0.337211549282074, "learning_rate": 0.00011168848048453971, "loss": 1.4075, "step": 33989 }, { "epoch": 0.4416845477005342, "grad_norm": 0.37986770272254944, "learning_rate": 0.00011168588102262832, "loss": 1.4778, "step": 33990 }, { "epoch": 0.4416975422444501, "grad_norm": 0.42858952283859253, "learning_rate": 0.00011168328156071694, "loss": 1.3399, "step": 33991 }, { "epoch": 0.44171053678836597, "grad_norm": 0.3652910590171814, "learning_rate": 0.00011168068209880555, "loss": 1.3614, "step": 33992 }, { "epoch": 0.44172353133228187, "grad_norm": 0.43751946091651917, "learning_rate": 0.00011167808263689416, "loss": 1.501, "step": 33993 }, { "epoch": 0.4417365258761977, "grad_norm": 0.4212469160556793, "learning_rate": 0.00011167548317498278, "loss": 1.3336, "step": 33994 }, { "epoch": 0.4417495204201136, "grad_norm": 0.336783230304718, "learning_rate": 0.00011167288371307141, "loss": 1.2714, "step": 33995 }, { "epoch": 0.44176251496402946, "grad_norm": 0.41767245531082153, "learning_rate": 0.00011167028425116002, "loss": 1.339, "step": 33996 }, { "epoch": 0.44177550950794536, "grad_norm": 0.46003207564353943, "learning_rate": 0.00011166768478924862, "loss": 1.4393, "step": 33997 }, { "epoch": 0.4417885040518612, "grad_norm": 0.411734014749527, "learning_rate": 0.00011166508532733723, "loss": 1.4603, "step": 33998 }, { "epoch": 0.4418014985957771, "grad_norm": 0.44157516956329346, "learning_rate": 0.00011166248586542587, "loss": 1.4364, "step": 33999 }, { "epoch": 0.44181449313969295, "grad_norm": 0.39477619528770447, "learning_rate": 0.00011165988640351448, "loss": 1.441, "step": 34000 }, { "epoch": 0.44182748768360885, "grad_norm": 0.4544408619403839, "learning_rate": 0.00011165728694160309, "loss": 1.4328, "step": 34001 }, { "epoch": 0.4418404822275247, "grad_norm": 0.3563541769981384, "learning_rate": 0.0001116546874796917, "loss": 1.6331, "step": 34002 }, { "epoch": 0.4418534767714406, "grad_norm": 0.40583398938179016, "learning_rate": 0.00011165208801778033, "loss": 1.6264, "step": 34003 }, { "epoch": 0.44186647131535645, "grad_norm": 0.4503598213195801, "learning_rate": 0.00011164948855586894, "loss": 1.4198, "step": 34004 }, { "epoch": 0.44187946585927235, "grad_norm": 0.352499783039093, "learning_rate": 0.00011164688909395755, "loss": 1.3936, "step": 34005 }, { "epoch": 0.4418924604031882, "grad_norm": 0.4193965792655945, "learning_rate": 0.00011164428963204616, "loss": 1.3939, "step": 34006 }, { "epoch": 0.4419054549471041, "grad_norm": 0.41913798451423645, "learning_rate": 0.0001116416901701348, "loss": 1.3813, "step": 34007 }, { "epoch": 0.44191844949101994, "grad_norm": 0.46930497884750366, "learning_rate": 0.00011163909070822341, "loss": 1.5188, "step": 34008 }, { "epoch": 0.44193144403493584, "grad_norm": 0.43586206436157227, "learning_rate": 0.00011163649124631202, "loss": 1.4712, "step": 34009 }, { "epoch": 0.4419444385788517, "grad_norm": 0.48863834142684937, "learning_rate": 0.00011163389178440062, "loss": 1.557, "step": 34010 }, { "epoch": 0.4419574331227676, "grad_norm": 0.3437492549419403, "learning_rate": 0.00011163129232248926, "loss": 1.2922, "step": 34011 }, { "epoch": 0.44197042766668343, "grad_norm": 0.36360836029052734, "learning_rate": 0.00011162869286057787, "loss": 1.3329, "step": 34012 }, { "epoch": 0.44198342221059933, "grad_norm": 0.4119648337364197, "learning_rate": 0.00011162609339866648, "loss": 1.4021, "step": 34013 }, { "epoch": 0.4419964167545152, "grad_norm": 0.4602004885673523, "learning_rate": 0.0001116234939367551, "loss": 1.3554, "step": 34014 }, { "epoch": 0.4420094112984311, "grad_norm": 0.34458449482917786, "learning_rate": 0.00011162089447484371, "loss": 1.3315, "step": 34015 }, { "epoch": 0.4420224058423469, "grad_norm": 0.448383092880249, "learning_rate": 0.00011161829501293232, "loss": 1.4859, "step": 34016 }, { "epoch": 0.4420354003862628, "grad_norm": 0.4149613380432129, "learning_rate": 0.00011161569555102094, "loss": 1.4072, "step": 34017 }, { "epoch": 0.44204839493017867, "grad_norm": 0.41026368737220764, "learning_rate": 0.00011161309608910957, "loss": 1.3627, "step": 34018 }, { "epoch": 0.44206138947409457, "grad_norm": 0.4544207751750946, "learning_rate": 0.00011161049662719818, "loss": 1.3649, "step": 34019 }, { "epoch": 0.4420743840180104, "grad_norm": 0.408314049243927, "learning_rate": 0.0001116078971652868, "loss": 1.6069, "step": 34020 }, { "epoch": 0.4420873785619263, "grad_norm": 0.5312645435333252, "learning_rate": 0.0001116052977033754, "loss": 1.4035, "step": 34021 }, { "epoch": 0.44210037310584216, "grad_norm": 0.4624485969543457, "learning_rate": 0.00011160269824146403, "loss": 1.4847, "step": 34022 }, { "epoch": 0.44211336764975806, "grad_norm": 0.5031031966209412, "learning_rate": 0.00011160009877955264, "loss": 1.5948, "step": 34023 }, { "epoch": 0.4421263621936739, "grad_norm": 0.37778013944625854, "learning_rate": 0.00011159749931764125, "loss": 1.5047, "step": 34024 }, { "epoch": 0.4421393567375898, "grad_norm": 0.3765183091163635, "learning_rate": 0.00011159489985572986, "loss": 1.3389, "step": 34025 }, { "epoch": 0.44215235128150565, "grad_norm": 0.4213264286518097, "learning_rate": 0.00011159230039381849, "loss": 1.5316, "step": 34026 }, { "epoch": 0.44216534582542155, "grad_norm": 0.4199904501438141, "learning_rate": 0.0001115897009319071, "loss": 1.4879, "step": 34027 }, { "epoch": 0.4421783403693374, "grad_norm": 0.46864089369773865, "learning_rate": 0.00011158710146999571, "loss": 1.4932, "step": 34028 }, { "epoch": 0.4421913349132533, "grad_norm": 0.4181559383869171, "learning_rate": 0.00011158450200808432, "loss": 1.6198, "step": 34029 }, { "epoch": 0.4422043294571692, "grad_norm": 0.4206635653972626, "learning_rate": 0.00011158190254617296, "loss": 1.3485, "step": 34030 }, { "epoch": 0.44221732400108504, "grad_norm": 0.4653114080429077, "learning_rate": 0.00011157930308426157, "loss": 1.4887, "step": 34031 }, { "epoch": 0.44223031854500094, "grad_norm": 0.42721882462501526, "learning_rate": 0.00011157670362235018, "loss": 1.3516, "step": 34032 }, { "epoch": 0.4422433130889168, "grad_norm": 0.32499122619628906, "learning_rate": 0.00011157410416043879, "loss": 1.3322, "step": 34033 }, { "epoch": 0.4422563076328327, "grad_norm": 0.35488706827163696, "learning_rate": 0.00011157150469852742, "loss": 1.5073, "step": 34034 }, { "epoch": 0.44226930217674854, "grad_norm": 0.34038349986076355, "learning_rate": 0.00011156890523661603, "loss": 1.4003, "step": 34035 }, { "epoch": 0.44228229672066444, "grad_norm": 0.4432404041290283, "learning_rate": 0.00011156630577470464, "loss": 1.3601, "step": 34036 }, { "epoch": 0.4422952912645803, "grad_norm": 0.48277029395103455, "learning_rate": 0.00011156370631279325, "loss": 1.5142, "step": 34037 }, { "epoch": 0.4423082858084962, "grad_norm": 0.3559846878051758, "learning_rate": 0.00011156110685088189, "loss": 1.3681, "step": 34038 }, { "epoch": 0.442321280352412, "grad_norm": 0.44590258598327637, "learning_rate": 0.00011155850738897048, "loss": 1.3953, "step": 34039 }, { "epoch": 0.44233427489632793, "grad_norm": 0.3752189576625824, "learning_rate": 0.0001115559079270591, "loss": 1.2726, "step": 34040 }, { "epoch": 0.4423472694402438, "grad_norm": 0.32315489649772644, "learning_rate": 0.0001115533084651477, "loss": 1.4106, "step": 34041 }, { "epoch": 0.4423602639841597, "grad_norm": 0.4178207218647003, "learning_rate": 0.00011155070900323634, "loss": 1.2862, "step": 34042 }, { "epoch": 0.4423732585280755, "grad_norm": 0.41131237149238586, "learning_rate": 0.00011154810954132495, "loss": 1.3821, "step": 34043 }, { "epoch": 0.4423862530719914, "grad_norm": 0.4077105224132538, "learning_rate": 0.00011154551007941357, "loss": 1.4065, "step": 34044 }, { "epoch": 0.44239924761590727, "grad_norm": 0.42594483494758606, "learning_rate": 0.00011154291061750218, "loss": 1.3171, "step": 34045 }, { "epoch": 0.44241224215982317, "grad_norm": 0.38548025488853455, "learning_rate": 0.0001115403111555908, "loss": 1.3482, "step": 34046 }, { "epoch": 0.442425236703739, "grad_norm": 0.4321030080318451, "learning_rate": 0.00011153771169367941, "loss": 1.3745, "step": 34047 }, { "epoch": 0.4424382312476549, "grad_norm": 0.39287373423576355, "learning_rate": 0.00011153511223176802, "loss": 1.1635, "step": 34048 }, { "epoch": 0.44245122579157076, "grad_norm": 0.44399556517601013, "learning_rate": 0.00011153251276985663, "loss": 1.4099, "step": 34049 }, { "epoch": 0.44246422033548666, "grad_norm": 0.3703921139240265, "learning_rate": 0.00011152991330794527, "loss": 1.3196, "step": 34050 }, { "epoch": 0.4424772148794025, "grad_norm": 0.37313687801361084, "learning_rate": 0.00011152731384603388, "loss": 1.3712, "step": 34051 }, { "epoch": 0.4424902094233184, "grad_norm": 0.3553673028945923, "learning_rate": 0.00011152471438412248, "loss": 1.2903, "step": 34052 }, { "epoch": 0.44250320396723425, "grad_norm": 0.4691884517669678, "learning_rate": 0.00011152211492221112, "loss": 1.3274, "step": 34053 }, { "epoch": 0.44251619851115015, "grad_norm": 0.36158594489097595, "learning_rate": 0.00011151951546029973, "loss": 1.1861, "step": 34054 }, { "epoch": 0.442529193055066, "grad_norm": 0.4169381856918335, "learning_rate": 0.00011151691599838834, "loss": 1.1811, "step": 34055 }, { "epoch": 0.4425421875989819, "grad_norm": 0.4420263469219208, "learning_rate": 0.00011151431653647695, "loss": 1.523, "step": 34056 }, { "epoch": 0.44255518214289774, "grad_norm": 0.48514723777770996, "learning_rate": 0.00011151171707456558, "loss": 1.3097, "step": 34057 }, { "epoch": 0.44256817668681364, "grad_norm": 0.4264439642429352, "learning_rate": 0.00011150911761265419, "loss": 1.2568, "step": 34058 }, { "epoch": 0.4425811712307295, "grad_norm": 0.34638628363609314, "learning_rate": 0.0001115065181507428, "loss": 1.26, "step": 34059 }, { "epoch": 0.4425941657746454, "grad_norm": 0.3641755282878876, "learning_rate": 0.00011150391868883141, "loss": 1.2437, "step": 34060 }, { "epoch": 0.44260716031856123, "grad_norm": 0.3337748050689697, "learning_rate": 0.00011150131922692005, "loss": 1.5411, "step": 34061 }, { "epoch": 0.44262015486247713, "grad_norm": 0.42045578360557556, "learning_rate": 0.00011149871976500866, "loss": 1.5602, "step": 34062 }, { "epoch": 0.442633149406393, "grad_norm": 0.3415861427783966, "learning_rate": 0.00011149612030309727, "loss": 1.4709, "step": 34063 }, { "epoch": 0.4426461439503089, "grad_norm": 0.4140772521495819, "learning_rate": 0.00011149352084118587, "loss": 1.5819, "step": 34064 }, { "epoch": 0.4426591384942247, "grad_norm": 0.34868860244750977, "learning_rate": 0.0001114909213792745, "loss": 1.4697, "step": 34065 }, { "epoch": 0.4426721330381406, "grad_norm": 0.4092274010181427, "learning_rate": 0.00011148832191736311, "loss": 1.4222, "step": 34066 }, { "epoch": 0.44268512758205647, "grad_norm": 0.4174451529979706, "learning_rate": 0.00011148572245545173, "loss": 1.5791, "step": 34067 }, { "epoch": 0.44269812212597237, "grad_norm": 0.4202478229999542, "learning_rate": 0.00011148312299354034, "loss": 1.3845, "step": 34068 }, { "epoch": 0.4427111166698882, "grad_norm": 0.4580093026161194, "learning_rate": 0.00011148052353162896, "loss": 1.4447, "step": 34069 }, { "epoch": 0.4427241112138041, "grad_norm": 0.4523022472858429, "learning_rate": 0.00011147792406971757, "loss": 1.3951, "step": 34070 }, { "epoch": 0.44273710575771996, "grad_norm": 0.4134688079357147, "learning_rate": 0.00011147532460780618, "loss": 1.3314, "step": 34071 }, { "epoch": 0.44275010030163586, "grad_norm": 0.42702803015708923, "learning_rate": 0.0001114727251458948, "loss": 1.402, "step": 34072 }, { "epoch": 0.4427630948455517, "grad_norm": 0.4308590590953827, "learning_rate": 0.00011147012568398343, "loss": 1.3203, "step": 34073 }, { "epoch": 0.4427760893894676, "grad_norm": 0.4127539098262787, "learning_rate": 0.00011146752622207204, "loss": 1.3437, "step": 34074 }, { "epoch": 0.44278908393338345, "grad_norm": 0.2835022807121277, "learning_rate": 0.00011146492676016065, "loss": 1.37, "step": 34075 }, { "epoch": 0.44280207847729935, "grad_norm": 0.3778228461742401, "learning_rate": 0.00011146232729824926, "loss": 1.4776, "step": 34076 }, { "epoch": 0.4428150730212152, "grad_norm": 0.44808027148246765, "learning_rate": 0.00011145972783633789, "loss": 1.4628, "step": 34077 }, { "epoch": 0.4428280675651311, "grad_norm": 0.3863624632358551, "learning_rate": 0.0001114571283744265, "loss": 1.1686, "step": 34078 }, { "epoch": 0.44284106210904695, "grad_norm": 0.3555189371109009, "learning_rate": 0.00011145452891251511, "loss": 1.6037, "step": 34079 }, { "epoch": 0.44285405665296285, "grad_norm": 0.3349853754043579, "learning_rate": 0.00011145192945060372, "loss": 1.1066, "step": 34080 }, { "epoch": 0.4428670511968787, "grad_norm": 0.3734571039676666, "learning_rate": 0.00011144932998869235, "loss": 1.4559, "step": 34081 }, { "epoch": 0.4428800457407946, "grad_norm": 0.38879725337028503, "learning_rate": 0.00011144673052678096, "loss": 1.3851, "step": 34082 }, { "epoch": 0.44289304028471044, "grad_norm": 0.43281811475753784, "learning_rate": 0.00011144413106486957, "loss": 1.3811, "step": 34083 }, { "epoch": 0.44290603482862634, "grad_norm": 0.4733874797821045, "learning_rate": 0.00011144153160295818, "loss": 1.5529, "step": 34084 }, { "epoch": 0.4429190293725422, "grad_norm": 0.40559878945350647, "learning_rate": 0.00011143893214104682, "loss": 1.5049, "step": 34085 }, { "epoch": 0.4429320239164581, "grad_norm": 0.3868595361709595, "learning_rate": 0.00011143633267913543, "loss": 1.2088, "step": 34086 }, { "epoch": 0.44294501846037393, "grad_norm": 0.3120768666267395, "learning_rate": 0.00011143373321722404, "loss": 1.4182, "step": 34087 }, { "epoch": 0.44295801300428983, "grad_norm": 0.341911643743515, "learning_rate": 0.00011143113375531266, "loss": 1.222, "step": 34088 }, { "epoch": 0.4429710075482057, "grad_norm": 0.4585247337818146, "learning_rate": 0.00011142853429340127, "loss": 1.3681, "step": 34089 }, { "epoch": 0.4429840020921216, "grad_norm": 0.4295271933078766, "learning_rate": 0.00011142593483148989, "loss": 1.3337, "step": 34090 }, { "epoch": 0.4429969966360374, "grad_norm": 0.3395138382911682, "learning_rate": 0.0001114233353695785, "loss": 1.4285, "step": 34091 }, { "epoch": 0.4430099911799533, "grad_norm": 0.4354317784309387, "learning_rate": 0.00011142073590766713, "loss": 1.2586, "step": 34092 }, { "epoch": 0.44302298572386917, "grad_norm": 0.3374302089214325, "learning_rate": 0.00011141813644575575, "loss": 1.4206, "step": 34093 }, { "epoch": 0.44303598026778507, "grad_norm": 0.43610942363739014, "learning_rate": 0.00011141553698384434, "loss": 1.4458, "step": 34094 }, { "epoch": 0.4430489748117009, "grad_norm": 0.3810350000858307, "learning_rate": 0.00011141293752193295, "loss": 1.3943, "step": 34095 }, { "epoch": 0.4430619693556168, "grad_norm": 0.4225288927555084, "learning_rate": 0.00011141033806002159, "loss": 1.4428, "step": 34096 }, { "epoch": 0.44307496389953266, "grad_norm": 0.3530668318271637, "learning_rate": 0.0001114077385981102, "loss": 1.5031, "step": 34097 }, { "epoch": 0.44308795844344856, "grad_norm": 0.2999928295612335, "learning_rate": 0.00011140513913619881, "loss": 1.1151, "step": 34098 }, { "epoch": 0.4431009529873644, "grad_norm": 0.37528547644615173, "learning_rate": 0.00011140253967428742, "loss": 1.6368, "step": 34099 }, { "epoch": 0.4431139475312803, "grad_norm": 0.3746996223926544, "learning_rate": 0.00011139994021237605, "loss": 1.331, "step": 34100 }, { "epoch": 0.44312694207519615, "grad_norm": 0.4597117006778717, "learning_rate": 0.00011139734075046466, "loss": 1.3023, "step": 34101 }, { "epoch": 0.44313993661911205, "grad_norm": 0.4087245762348175, "learning_rate": 0.00011139474128855327, "loss": 1.4536, "step": 34102 }, { "epoch": 0.4431529311630279, "grad_norm": 0.365296334028244, "learning_rate": 0.00011139214182664188, "loss": 1.3955, "step": 34103 }, { "epoch": 0.4431659257069438, "grad_norm": 0.37722986936569214, "learning_rate": 0.00011138954236473052, "loss": 1.4297, "step": 34104 }, { "epoch": 0.44317892025085964, "grad_norm": 0.3135521113872528, "learning_rate": 0.00011138694290281913, "loss": 1.2679, "step": 34105 }, { "epoch": 0.44319191479477554, "grad_norm": 0.4270419180393219, "learning_rate": 0.00011138434344090773, "loss": 1.3106, "step": 34106 }, { "epoch": 0.44320490933869144, "grad_norm": 0.42852821946144104, "learning_rate": 0.00011138174397899634, "loss": 1.5136, "step": 34107 }, { "epoch": 0.4432179038826073, "grad_norm": 0.4589688777923584, "learning_rate": 0.00011137914451708498, "loss": 1.2346, "step": 34108 }, { "epoch": 0.4432308984265232, "grad_norm": 0.30132973194122314, "learning_rate": 0.00011137654505517359, "loss": 1.2054, "step": 34109 }, { "epoch": 0.44324389297043904, "grad_norm": 0.4193493723869324, "learning_rate": 0.0001113739455932622, "loss": 1.2162, "step": 34110 }, { "epoch": 0.44325688751435494, "grad_norm": 0.49219849705696106, "learning_rate": 0.00011137134613135081, "loss": 1.5356, "step": 34111 }, { "epoch": 0.4432698820582708, "grad_norm": 0.2943589687347412, "learning_rate": 0.00011136874666943943, "loss": 1.2607, "step": 34112 }, { "epoch": 0.4432828766021867, "grad_norm": 0.4865548610687256, "learning_rate": 0.00011136614720752805, "loss": 1.4802, "step": 34113 }, { "epoch": 0.4432958711461025, "grad_norm": 0.5202329754829407, "learning_rate": 0.00011136354774561666, "loss": 1.5178, "step": 34114 }, { "epoch": 0.4433088656900184, "grad_norm": 0.40417933464050293, "learning_rate": 0.00011136094828370527, "loss": 1.3158, "step": 34115 }, { "epoch": 0.4433218602339343, "grad_norm": 0.39617106318473816, "learning_rate": 0.0001113583488217939, "loss": 1.3696, "step": 34116 }, { "epoch": 0.4433348547778502, "grad_norm": 0.46501678228378296, "learning_rate": 0.00011135574935988252, "loss": 1.3862, "step": 34117 }, { "epoch": 0.443347849321766, "grad_norm": 0.3566831946372986, "learning_rate": 0.00011135314989797113, "loss": 1.2796, "step": 34118 }, { "epoch": 0.4433608438656819, "grad_norm": 0.4189980924129486, "learning_rate": 0.00011135055043605972, "loss": 1.2754, "step": 34119 }, { "epoch": 0.44337383840959776, "grad_norm": 0.3705172538757324, "learning_rate": 0.00011134795097414836, "loss": 1.5568, "step": 34120 }, { "epoch": 0.44338683295351367, "grad_norm": 0.26607921719551086, "learning_rate": 0.00011134535151223697, "loss": 1.2465, "step": 34121 }, { "epoch": 0.4433998274974295, "grad_norm": 0.3442211449146271, "learning_rate": 0.00011134275205032558, "loss": 1.4224, "step": 34122 }, { "epoch": 0.4434128220413454, "grad_norm": 0.3846757411956787, "learning_rate": 0.0001113401525884142, "loss": 1.3092, "step": 34123 }, { "epoch": 0.44342581658526126, "grad_norm": 0.39371925592422485, "learning_rate": 0.00011133755312650282, "loss": 1.2766, "step": 34124 }, { "epoch": 0.44343881112917716, "grad_norm": 0.42275723814964294, "learning_rate": 0.00011133495366459143, "loss": 1.4954, "step": 34125 }, { "epoch": 0.443451805673093, "grad_norm": 0.33947134017944336, "learning_rate": 0.00011133235420268004, "loss": 1.3359, "step": 34126 }, { "epoch": 0.4434648002170089, "grad_norm": 0.3975488543510437, "learning_rate": 0.00011132975474076868, "loss": 1.2365, "step": 34127 }, { "epoch": 0.44347779476092475, "grad_norm": 0.3726840019226074, "learning_rate": 0.00011132715527885729, "loss": 1.5712, "step": 34128 }, { "epoch": 0.44349078930484065, "grad_norm": 0.4731629192829132, "learning_rate": 0.0001113245558169459, "loss": 1.432, "step": 34129 }, { "epoch": 0.4435037838487565, "grad_norm": 0.422348290681839, "learning_rate": 0.00011132195635503451, "loss": 1.5213, "step": 34130 }, { "epoch": 0.4435167783926724, "grad_norm": 0.5347578525543213, "learning_rate": 0.00011131935689312314, "loss": 1.2417, "step": 34131 }, { "epoch": 0.44352977293658824, "grad_norm": 0.46815255284309387, "learning_rate": 0.00011131675743121175, "loss": 1.4664, "step": 34132 }, { "epoch": 0.44354276748050414, "grad_norm": 0.3828120529651642, "learning_rate": 0.00011131415796930036, "loss": 1.372, "step": 34133 }, { "epoch": 0.44355576202442, "grad_norm": 0.4296015501022339, "learning_rate": 0.00011131155850738897, "loss": 1.496, "step": 34134 }, { "epoch": 0.4435687565683359, "grad_norm": 0.30861225724220276, "learning_rate": 0.00011130895904547761, "loss": 1.1474, "step": 34135 }, { "epoch": 0.44358175111225173, "grad_norm": 0.4997391700744629, "learning_rate": 0.0001113063595835662, "loss": 1.322, "step": 34136 }, { "epoch": 0.44359474565616763, "grad_norm": 0.43963226675987244, "learning_rate": 0.00011130376012165482, "loss": 1.3161, "step": 34137 }, { "epoch": 0.4436077402000835, "grad_norm": 0.42827117443084717, "learning_rate": 0.00011130116065974343, "loss": 1.4298, "step": 34138 }, { "epoch": 0.4436207347439994, "grad_norm": 0.389369934797287, "learning_rate": 0.00011129856119783207, "loss": 1.5127, "step": 34139 }, { "epoch": 0.4436337292879152, "grad_norm": 0.36223915219306946, "learning_rate": 0.00011129596173592068, "loss": 1.3228, "step": 34140 }, { "epoch": 0.4436467238318311, "grad_norm": 0.415510892868042, "learning_rate": 0.00011129336227400929, "loss": 1.4097, "step": 34141 }, { "epoch": 0.44365971837574697, "grad_norm": 0.44629859924316406, "learning_rate": 0.0001112907628120979, "loss": 1.3878, "step": 34142 }, { "epoch": 0.44367271291966287, "grad_norm": 0.4795943796634674, "learning_rate": 0.00011128816335018652, "loss": 1.4461, "step": 34143 }, { "epoch": 0.4436857074635787, "grad_norm": 0.4738703668117523, "learning_rate": 0.00011128556388827513, "loss": 1.4526, "step": 34144 }, { "epoch": 0.4436987020074946, "grad_norm": 0.3595874309539795, "learning_rate": 0.00011128296442636374, "loss": 1.3764, "step": 34145 }, { "epoch": 0.44371169655141046, "grad_norm": 0.32588106393814087, "learning_rate": 0.00011128036496445236, "loss": 1.3746, "step": 34146 }, { "epoch": 0.44372469109532636, "grad_norm": 0.4356873035430908, "learning_rate": 0.00011127776550254099, "loss": 1.5584, "step": 34147 }, { "epoch": 0.4437376856392422, "grad_norm": 0.35701823234558105, "learning_rate": 0.00011127516604062959, "loss": 1.1765, "step": 34148 }, { "epoch": 0.4437506801831581, "grad_norm": 0.5933940410614014, "learning_rate": 0.0001112725665787182, "loss": 1.3891, "step": 34149 }, { "epoch": 0.44376367472707395, "grad_norm": 0.31582701206207275, "learning_rate": 0.00011126996711680681, "loss": 1.3308, "step": 34150 }, { "epoch": 0.44377666927098985, "grad_norm": 0.3645959198474884, "learning_rate": 0.00011126736765489545, "loss": 1.2528, "step": 34151 }, { "epoch": 0.4437896638149057, "grad_norm": 0.39703112840652466, "learning_rate": 0.00011126476819298406, "loss": 1.3323, "step": 34152 }, { "epoch": 0.4438026583588216, "grad_norm": 0.38320615887641907, "learning_rate": 0.00011126216873107267, "loss": 1.2923, "step": 34153 }, { "epoch": 0.44381565290273745, "grad_norm": 0.3426392078399658, "learning_rate": 0.00011125956926916128, "loss": 1.2773, "step": 34154 }, { "epoch": 0.44382864744665335, "grad_norm": 0.4377420246601105, "learning_rate": 0.00011125696980724991, "loss": 1.2048, "step": 34155 }, { "epoch": 0.4438416419905692, "grad_norm": 0.4587497115135193, "learning_rate": 0.00011125437034533852, "loss": 1.3823, "step": 34156 }, { "epoch": 0.4438546365344851, "grad_norm": 0.3848799169063568, "learning_rate": 0.00011125177088342713, "loss": 1.3896, "step": 34157 }, { "epoch": 0.44386763107840094, "grad_norm": 0.28870266675949097, "learning_rate": 0.00011124917142151574, "loss": 1.1698, "step": 34158 }, { "epoch": 0.44388062562231684, "grad_norm": 0.34361258149147034, "learning_rate": 0.00011124657195960438, "loss": 1.3781, "step": 34159 }, { "epoch": 0.4438936201662327, "grad_norm": 0.4734368622303009, "learning_rate": 0.00011124397249769299, "loss": 1.2124, "step": 34160 }, { "epoch": 0.4439066147101486, "grad_norm": 0.4326978921890259, "learning_rate": 0.00011124137303578159, "loss": 1.5378, "step": 34161 }, { "epoch": 0.44391960925406443, "grad_norm": 0.40271615982055664, "learning_rate": 0.00011123877357387022, "loss": 1.3707, "step": 34162 }, { "epoch": 0.44393260379798033, "grad_norm": 0.41837170720100403, "learning_rate": 0.00011123617411195884, "loss": 1.4119, "step": 34163 }, { "epoch": 0.4439455983418962, "grad_norm": 0.37991392612457275, "learning_rate": 0.00011123357465004745, "loss": 1.3521, "step": 34164 }, { "epoch": 0.4439585928858121, "grad_norm": 0.5376604199409485, "learning_rate": 0.00011123097518813606, "loss": 1.3996, "step": 34165 }, { "epoch": 0.4439715874297279, "grad_norm": 0.3981701135635376, "learning_rate": 0.00011122837572622468, "loss": 1.4386, "step": 34166 }, { "epoch": 0.4439845819736438, "grad_norm": 0.3578491508960724, "learning_rate": 0.00011122577626431329, "loss": 1.5333, "step": 34167 }, { "epoch": 0.44399757651755967, "grad_norm": 0.42328494787216187, "learning_rate": 0.0001112231768024019, "loss": 1.5149, "step": 34168 }, { "epoch": 0.44401057106147557, "grad_norm": 0.35145577788352966, "learning_rate": 0.00011122057734049051, "loss": 1.3048, "step": 34169 }, { "epoch": 0.4440235656053914, "grad_norm": 0.5234702825546265, "learning_rate": 0.00011121797787857915, "loss": 1.4279, "step": 34170 }, { "epoch": 0.4440365601493073, "grad_norm": 0.3667518198490143, "learning_rate": 0.00011121537841666776, "loss": 1.4894, "step": 34171 }, { "epoch": 0.44404955469322316, "grad_norm": 0.31042590737342834, "learning_rate": 0.00011121277895475637, "loss": 1.4751, "step": 34172 }, { "epoch": 0.44406254923713906, "grad_norm": 0.4522731304168701, "learning_rate": 0.00011121017949284499, "loss": 1.3506, "step": 34173 }, { "epoch": 0.4440755437810549, "grad_norm": 0.41778403520584106, "learning_rate": 0.00011120758003093361, "loss": 1.3166, "step": 34174 }, { "epoch": 0.4440885383249708, "grad_norm": 0.33895090222358704, "learning_rate": 0.00011120498056902222, "loss": 1.3856, "step": 34175 }, { "epoch": 0.44410153286888665, "grad_norm": 0.5224469900131226, "learning_rate": 0.00011120238110711083, "loss": 1.537, "step": 34176 }, { "epoch": 0.44411452741280255, "grad_norm": 0.37412944436073303, "learning_rate": 0.00011119978164519944, "loss": 1.2369, "step": 34177 }, { "epoch": 0.4441275219567184, "grad_norm": 0.3774406909942627, "learning_rate": 0.00011119718218328807, "loss": 1.4191, "step": 34178 }, { "epoch": 0.4441405165006343, "grad_norm": 0.3684643805027008, "learning_rate": 0.00011119458272137668, "loss": 1.2032, "step": 34179 }, { "epoch": 0.44415351104455014, "grad_norm": 0.33266207575798035, "learning_rate": 0.00011119198325946529, "loss": 1.2905, "step": 34180 }, { "epoch": 0.44416650558846604, "grad_norm": 0.3841158151626587, "learning_rate": 0.0001111893837975539, "loss": 1.4892, "step": 34181 }, { "epoch": 0.44417950013238194, "grad_norm": 0.46613848209381104, "learning_rate": 0.00011118678433564254, "loss": 1.4964, "step": 34182 }, { "epoch": 0.4441924946762978, "grad_norm": 0.5643818974494934, "learning_rate": 0.00011118418487373115, "loss": 1.4406, "step": 34183 }, { "epoch": 0.4442054892202137, "grad_norm": 0.3386726975440979, "learning_rate": 0.00011118158541181976, "loss": 1.3488, "step": 34184 }, { "epoch": 0.44421848376412953, "grad_norm": 0.40742337703704834, "learning_rate": 0.00011117898594990837, "loss": 1.1391, "step": 34185 }, { "epoch": 0.44423147830804544, "grad_norm": 0.4673249423503876, "learning_rate": 0.000111176386487997, "loss": 1.5867, "step": 34186 }, { "epoch": 0.4442444728519613, "grad_norm": 0.45573797821998596, "learning_rate": 0.0001111737870260856, "loss": 1.3814, "step": 34187 }, { "epoch": 0.4442574673958772, "grad_norm": 0.45553094148635864, "learning_rate": 0.00011117118756417422, "loss": 1.4798, "step": 34188 }, { "epoch": 0.444270461939793, "grad_norm": 0.47225266695022583, "learning_rate": 0.00011116858810226283, "loss": 1.4148, "step": 34189 }, { "epoch": 0.4442834564837089, "grad_norm": 0.5917590856552124, "learning_rate": 0.00011116598864035145, "loss": 1.4876, "step": 34190 }, { "epoch": 0.4442964510276248, "grad_norm": 0.6290504336357117, "learning_rate": 0.00011116338917844006, "loss": 1.4817, "step": 34191 }, { "epoch": 0.4443094455715407, "grad_norm": 0.4107941687107086, "learning_rate": 0.00011116078971652867, "loss": 1.3413, "step": 34192 }, { "epoch": 0.4443224401154565, "grad_norm": 0.4469742476940155, "learning_rate": 0.00011115819025461729, "loss": 1.2886, "step": 34193 }, { "epoch": 0.4443354346593724, "grad_norm": 0.39618921279907227, "learning_rate": 0.00011115559079270592, "loss": 1.5528, "step": 34194 }, { "epoch": 0.44434842920328826, "grad_norm": 0.4320566654205322, "learning_rate": 0.00011115299133079453, "loss": 1.3278, "step": 34195 }, { "epoch": 0.44436142374720417, "grad_norm": 0.444646418094635, "learning_rate": 0.00011115039186888315, "loss": 1.4441, "step": 34196 }, { "epoch": 0.44437441829112, "grad_norm": 0.3444182872772217, "learning_rate": 0.00011114779240697176, "loss": 1.5241, "step": 34197 }, { "epoch": 0.4443874128350359, "grad_norm": 0.41096407175064087, "learning_rate": 0.00011114519294506038, "loss": 1.3688, "step": 34198 }, { "epoch": 0.44440040737895176, "grad_norm": 0.36175450682640076, "learning_rate": 0.00011114259348314899, "loss": 1.3182, "step": 34199 }, { "epoch": 0.44441340192286766, "grad_norm": 0.4703044593334198, "learning_rate": 0.0001111399940212376, "loss": 1.368, "step": 34200 }, { "epoch": 0.4444263964667835, "grad_norm": 0.40687480568885803, "learning_rate": 0.00011113739455932624, "loss": 1.4014, "step": 34201 }, { "epoch": 0.4444393910106994, "grad_norm": 0.39292845129966736, "learning_rate": 0.00011113479509741485, "loss": 1.5355, "step": 34202 }, { "epoch": 0.44445238555461525, "grad_norm": 0.3628689646720886, "learning_rate": 0.00011113219563550345, "loss": 1.2313, "step": 34203 }, { "epoch": 0.44446538009853115, "grad_norm": 0.40662142634391785, "learning_rate": 0.00011112959617359206, "loss": 1.1035, "step": 34204 }, { "epoch": 0.444478374642447, "grad_norm": 0.40217074751853943, "learning_rate": 0.0001111269967116807, "loss": 1.3639, "step": 34205 }, { "epoch": 0.4444913691863629, "grad_norm": 0.3601079285144806, "learning_rate": 0.00011112439724976931, "loss": 1.3261, "step": 34206 }, { "epoch": 0.44450436373027874, "grad_norm": 0.4247356355190277, "learning_rate": 0.00011112179778785792, "loss": 1.3593, "step": 34207 }, { "epoch": 0.44451735827419464, "grad_norm": 0.4169764220714569, "learning_rate": 0.00011111919832594653, "loss": 1.4128, "step": 34208 }, { "epoch": 0.4445303528181105, "grad_norm": 0.4534686207771301, "learning_rate": 0.00011111659886403516, "loss": 1.298, "step": 34209 }, { "epoch": 0.4445433473620264, "grad_norm": 0.4269981384277344, "learning_rate": 0.00011111399940212377, "loss": 1.3738, "step": 34210 }, { "epoch": 0.44455634190594223, "grad_norm": 0.4119773507118225, "learning_rate": 0.00011111139994021238, "loss": 1.3386, "step": 34211 }, { "epoch": 0.44456933644985813, "grad_norm": 0.39342597126960754, "learning_rate": 0.00011110880047830099, "loss": 1.463, "step": 34212 }, { "epoch": 0.444582330993774, "grad_norm": 0.36418724060058594, "learning_rate": 0.00011110620101638963, "loss": 1.4184, "step": 34213 }, { "epoch": 0.4445953255376899, "grad_norm": 0.3751234710216522, "learning_rate": 0.00011110360155447824, "loss": 1.3731, "step": 34214 }, { "epoch": 0.4446083200816057, "grad_norm": 0.3351527154445648, "learning_rate": 0.00011110100209256685, "loss": 1.2924, "step": 34215 }, { "epoch": 0.4446213146255216, "grad_norm": 0.3286838233470917, "learning_rate": 0.00011109840263065545, "loss": 1.3045, "step": 34216 }, { "epoch": 0.44463430916943747, "grad_norm": 0.34017321467399597, "learning_rate": 0.00011109580316874408, "loss": 1.326, "step": 34217 }, { "epoch": 0.44464730371335337, "grad_norm": 0.4791562557220459, "learning_rate": 0.0001110932037068327, "loss": 1.3919, "step": 34218 }, { "epoch": 0.4446602982572692, "grad_norm": 0.33656057715415955, "learning_rate": 0.0001110906042449213, "loss": 1.4916, "step": 34219 }, { "epoch": 0.4446732928011851, "grad_norm": 0.48291119933128357, "learning_rate": 0.00011108800478300992, "loss": 1.3309, "step": 34220 }, { "epoch": 0.44468628734510096, "grad_norm": 0.42001625895500183, "learning_rate": 0.00011108540532109854, "loss": 1.4117, "step": 34221 }, { "epoch": 0.44469928188901686, "grad_norm": 0.28259068727493286, "learning_rate": 0.00011108280585918715, "loss": 1.2551, "step": 34222 }, { "epoch": 0.4447122764329327, "grad_norm": 0.3199802339076996, "learning_rate": 0.00011108020639727576, "loss": 1.5147, "step": 34223 }, { "epoch": 0.4447252709768486, "grad_norm": 0.389824777841568, "learning_rate": 0.00011107760693536437, "loss": 1.3221, "step": 34224 }, { "epoch": 0.44473826552076445, "grad_norm": 0.4450884461402893, "learning_rate": 0.00011107500747345301, "loss": 1.382, "step": 34225 }, { "epoch": 0.44475126006468035, "grad_norm": 0.289813369512558, "learning_rate": 0.00011107240801154162, "loss": 1.3737, "step": 34226 }, { "epoch": 0.4447642546085962, "grad_norm": 0.46458229422569275, "learning_rate": 0.00011106980854963023, "loss": 1.49, "step": 34227 }, { "epoch": 0.4447772491525121, "grad_norm": 0.35163161158561707, "learning_rate": 0.00011106720908771883, "loss": 1.2006, "step": 34228 }, { "epoch": 0.44479024369642794, "grad_norm": 0.38532984256744385, "learning_rate": 0.00011106460962580747, "loss": 1.6946, "step": 34229 }, { "epoch": 0.44480323824034385, "grad_norm": 0.42093268036842346, "learning_rate": 0.00011106201016389608, "loss": 1.3829, "step": 34230 }, { "epoch": 0.4448162327842597, "grad_norm": 0.3260221779346466, "learning_rate": 0.00011105941070198469, "loss": 1.2602, "step": 34231 }, { "epoch": 0.4448292273281756, "grad_norm": 0.3451603651046753, "learning_rate": 0.0001110568112400733, "loss": 1.2731, "step": 34232 }, { "epoch": 0.44484222187209144, "grad_norm": 0.4718239903450012, "learning_rate": 0.00011105421177816193, "loss": 1.442, "step": 34233 }, { "epoch": 0.44485521641600734, "grad_norm": 0.3400033712387085, "learning_rate": 0.00011105161231625054, "loss": 1.2875, "step": 34234 }, { "epoch": 0.4448682109599232, "grad_norm": 0.3504631519317627, "learning_rate": 0.00011104901285433915, "loss": 1.2278, "step": 34235 }, { "epoch": 0.4448812055038391, "grad_norm": 0.44772931933403015, "learning_rate": 0.00011104641339242779, "loss": 1.2935, "step": 34236 }, { "epoch": 0.44489420004775493, "grad_norm": 0.5015880465507507, "learning_rate": 0.0001110438139305164, "loss": 1.4458, "step": 34237 }, { "epoch": 0.44490719459167083, "grad_norm": 0.406540185213089, "learning_rate": 0.00011104121446860501, "loss": 1.4993, "step": 34238 }, { "epoch": 0.4449201891355867, "grad_norm": 0.3785719573497772, "learning_rate": 0.00011103861500669362, "loss": 1.4054, "step": 34239 }, { "epoch": 0.4449331836795026, "grad_norm": 0.3008996248245239, "learning_rate": 0.00011103601554478224, "loss": 1.1654, "step": 34240 }, { "epoch": 0.4449461782234184, "grad_norm": 0.37364912033081055, "learning_rate": 0.00011103341608287085, "loss": 1.3846, "step": 34241 }, { "epoch": 0.4449591727673343, "grad_norm": 0.4528624713420868, "learning_rate": 0.00011103081662095947, "loss": 1.4743, "step": 34242 }, { "epoch": 0.44497216731125017, "grad_norm": 0.41597968339920044, "learning_rate": 0.00011102821715904808, "loss": 1.3934, "step": 34243 }, { "epoch": 0.44498516185516607, "grad_norm": 0.444993793964386, "learning_rate": 0.00011102561769713671, "loss": 1.3334, "step": 34244 }, { "epoch": 0.4449981563990819, "grad_norm": 0.4268054664134979, "learning_rate": 0.00011102301823522531, "loss": 1.3748, "step": 34245 }, { "epoch": 0.4450111509429978, "grad_norm": 0.46148446202278137, "learning_rate": 0.00011102041877331392, "loss": 1.3597, "step": 34246 }, { "epoch": 0.44502414548691366, "grad_norm": 0.38514938950538635, "learning_rate": 0.00011101781931140253, "loss": 1.2185, "step": 34247 }, { "epoch": 0.44503714003082956, "grad_norm": 0.4315408766269684, "learning_rate": 0.00011101521984949117, "loss": 1.3127, "step": 34248 }, { "epoch": 0.4450501345747454, "grad_norm": 0.3872237205505371, "learning_rate": 0.00011101262038757978, "loss": 1.3722, "step": 34249 }, { "epoch": 0.4450631291186613, "grad_norm": 0.448140949010849, "learning_rate": 0.0001110100209256684, "loss": 1.3254, "step": 34250 }, { "epoch": 0.44507612366257715, "grad_norm": 0.4356841444969177, "learning_rate": 0.000111007421463757, "loss": 1.266, "step": 34251 }, { "epoch": 0.44508911820649305, "grad_norm": 0.3740954101085663, "learning_rate": 0.00011100482200184563, "loss": 1.3384, "step": 34252 }, { "epoch": 0.4451021127504089, "grad_norm": 0.4336915910243988, "learning_rate": 0.00011100222253993424, "loss": 1.4123, "step": 34253 }, { "epoch": 0.4451151072943248, "grad_norm": 0.3967514932155609, "learning_rate": 0.00011099962307802285, "loss": 1.4324, "step": 34254 }, { "epoch": 0.44512810183824064, "grad_norm": 0.5248040556907654, "learning_rate": 0.00011099702361611146, "loss": 1.4017, "step": 34255 }, { "epoch": 0.44514109638215654, "grad_norm": 0.35255858302116394, "learning_rate": 0.0001109944241542001, "loss": 1.446, "step": 34256 }, { "epoch": 0.4451540909260724, "grad_norm": 0.48291704058647156, "learning_rate": 0.00011099182469228871, "loss": 1.4915, "step": 34257 }, { "epoch": 0.4451670854699883, "grad_norm": 0.4787138104438782, "learning_rate": 0.00011098922523037731, "loss": 1.3658, "step": 34258 }, { "epoch": 0.4451800800139042, "grad_norm": 0.3056149184703827, "learning_rate": 0.00011098662576846592, "loss": 1.3565, "step": 34259 }, { "epoch": 0.44519307455782003, "grad_norm": 0.4579535126686096, "learning_rate": 0.00011098402630655456, "loss": 1.5442, "step": 34260 }, { "epoch": 0.44520606910173594, "grad_norm": 0.44152453541755676, "learning_rate": 0.00011098142684464317, "loss": 1.4411, "step": 34261 }, { "epoch": 0.4452190636456518, "grad_norm": 0.4361974000930786, "learning_rate": 0.00011097882738273178, "loss": 1.5052, "step": 34262 }, { "epoch": 0.4452320581895677, "grad_norm": 0.5200278162956238, "learning_rate": 0.00011097622792082039, "loss": 1.4301, "step": 34263 }, { "epoch": 0.4452450527334835, "grad_norm": 0.3853200078010559, "learning_rate": 0.00011097362845890901, "loss": 1.5305, "step": 34264 }, { "epoch": 0.4452580472773994, "grad_norm": 0.37018293142318726, "learning_rate": 0.00011097102899699763, "loss": 1.395, "step": 34265 }, { "epoch": 0.44527104182131527, "grad_norm": 0.3669099807739258, "learning_rate": 0.00011096842953508624, "loss": 1.3937, "step": 34266 }, { "epoch": 0.4452840363652312, "grad_norm": 0.46139511466026306, "learning_rate": 0.00011096583007317485, "loss": 1.4425, "step": 34267 }, { "epoch": 0.445297030909147, "grad_norm": 0.3863597810268402, "learning_rate": 0.00011096323061126349, "loss": 1.5086, "step": 34268 }, { "epoch": 0.4453100254530629, "grad_norm": 0.42210930585861206, "learning_rate": 0.0001109606311493521, "loss": 1.3558, "step": 34269 }, { "epoch": 0.44532301999697876, "grad_norm": 0.3843497335910797, "learning_rate": 0.0001109580316874407, "loss": 1.2021, "step": 34270 }, { "epoch": 0.44533601454089466, "grad_norm": 0.3850443363189697, "learning_rate": 0.0001109554322255293, "loss": 1.412, "step": 34271 }, { "epoch": 0.4453490090848105, "grad_norm": 0.4331141412258148, "learning_rate": 0.00011095283276361794, "loss": 1.3311, "step": 34272 }, { "epoch": 0.4453620036287264, "grad_norm": 0.37451043725013733, "learning_rate": 0.00011095023330170655, "loss": 1.4598, "step": 34273 }, { "epoch": 0.44537499817264226, "grad_norm": 0.3784250319004059, "learning_rate": 0.00011094763383979516, "loss": 1.3684, "step": 34274 }, { "epoch": 0.44538799271655816, "grad_norm": 0.35812050104141235, "learning_rate": 0.00011094503437788379, "loss": 1.2087, "step": 34275 }, { "epoch": 0.445400987260474, "grad_norm": 0.4282328486442566, "learning_rate": 0.0001109424349159724, "loss": 1.5197, "step": 34276 }, { "epoch": 0.4454139818043899, "grad_norm": 0.45058783888816833, "learning_rate": 0.00011093983545406101, "loss": 1.3608, "step": 34277 }, { "epoch": 0.44542697634830575, "grad_norm": 0.41182488203048706, "learning_rate": 0.00011093723599214962, "loss": 1.6681, "step": 34278 }, { "epoch": 0.44543997089222165, "grad_norm": 0.3434402644634247, "learning_rate": 0.00011093463653023826, "loss": 1.2741, "step": 34279 }, { "epoch": 0.4454529654361375, "grad_norm": 0.3482365012168884, "learning_rate": 0.00011093203706832687, "loss": 1.5003, "step": 34280 }, { "epoch": 0.4454659599800534, "grad_norm": 0.36723026633262634, "learning_rate": 0.00011092943760641548, "loss": 1.5793, "step": 34281 }, { "epoch": 0.44547895452396924, "grad_norm": 0.3383881151676178, "learning_rate": 0.00011092683814450409, "loss": 1.3145, "step": 34282 }, { "epoch": 0.44549194906788514, "grad_norm": 0.44511863589286804, "learning_rate": 0.00011092423868259272, "loss": 1.3123, "step": 34283 }, { "epoch": 0.445504943611801, "grad_norm": 0.3695056736469269, "learning_rate": 0.00011092163922068133, "loss": 1.161, "step": 34284 }, { "epoch": 0.4455179381557169, "grad_norm": 0.3754224479198456, "learning_rate": 0.00011091903975876994, "loss": 1.6114, "step": 34285 }, { "epoch": 0.44553093269963273, "grad_norm": 0.34770339727401733, "learning_rate": 0.00011091644029685855, "loss": 1.3797, "step": 34286 }, { "epoch": 0.44554392724354863, "grad_norm": 0.3011084198951721, "learning_rate": 0.00011091384083494717, "loss": 1.3314, "step": 34287 }, { "epoch": 0.4455569217874645, "grad_norm": 0.48076823353767395, "learning_rate": 0.00011091124137303579, "loss": 1.4217, "step": 34288 }, { "epoch": 0.4455699163313804, "grad_norm": 0.41937559843063354, "learning_rate": 0.0001109086419111244, "loss": 1.2841, "step": 34289 }, { "epoch": 0.4455829108752962, "grad_norm": 0.35681232810020447, "learning_rate": 0.00011090604244921301, "loss": 1.3194, "step": 34290 }, { "epoch": 0.4455959054192121, "grad_norm": 0.3575684428215027, "learning_rate": 0.00011090344298730164, "loss": 1.4678, "step": 34291 }, { "epoch": 0.44560889996312797, "grad_norm": 0.376727432012558, "learning_rate": 0.00011090084352539026, "loss": 1.4629, "step": 34292 }, { "epoch": 0.44562189450704387, "grad_norm": 0.4357430338859558, "learning_rate": 0.00011089824406347887, "loss": 1.4222, "step": 34293 }, { "epoch": 0.4456348890509597, "grad_norm": 0.4371733069419861, "learning_rate": 0.00011089564460156748, "loss": 1.3941, "step": 34294 }, { "epoch": 0.4456478835948756, "grad_norm": 0.38030749559402466, "learning_rate": 0.0001108930451396561, "loss": 1.3299, "step": 34295 }, { "epoch": 0.44566087813879146, "grad_norm": 0.41900601983070374, "learning_rate": 0.00011089044567774471, "loss": 1.2317, "step": 34296 }, { "epoch": 0.44567387268270736, "grad_norm": 0.4090757369995117, "learning_rate": 0.00011088784621583332, "loss": 1.4425, "step": 34297 }, { "epoch": 0.4456868672266232, "grad_norm": 0.34591686725616455, "learning_rate": 0.00011088524675392194, "loss": 1.3357, "step": 34298 }, { "epoch": 0.4456998617705391, "grad_norm": 0.466488242149353, "learning_rate": 0.00011088264729201057, "loss": 1.5224, "step": 34299 }, { "epoch": 0.44571285631445495, "grad_norm": 0.39455318450927734, "learning_rate": 0.00011088004783009917, "loss": 1.5287, "step": 34300 }, { "epoch": 0.44572585085837085, "grad_norm": 0.4686304032802582, "learning_rate": 0.00011087744836818778, "loss": 1.4426, "step": 34301 }, { "epoch": 0.4457388454022867, "grad_norm": 0.49372440576553345, "learning_rate": 0.00011087484890627639, "loss": 1.514, "step": 34302 }, { "epoch": 0.4457518399462026, "grad_norm": 0.3624727129936218, "learning_rate": 0.00011087224944436503, "loss": 1.2991, "step": 34303 }, { "epoch": 0.44576483449011844, "grad_norm": 0.36775651574134827, "learning_rate": 0.00011086964998245364, "loss": 1.4698, "step": 34304 }, { "epoch": 0.44577782903403435, "grad_norm": 0.4583088457584381, "learning_rate": 0.00011086705052054225, "loss": 1.3263, "step": 34305 }, { "epoch": 0.4457908235779502, "grad_norm": 0.3856181204319, "learning_rate": 0.00011086445105863086, "loss": 1.345, "step": 34306 }, { "epoch": 0.4458038181218661, "grad_norm": 0.3952924609184265, "learning_rate": 0.00011086185159671949, "loss": 1.4165, "step": 34307 }, { "epoch": 0.44581681266578194, "grad_norm": 0.45340263843536377, "learning_rate": 0.0001108592521348081, "loss": 1.3613, "step": 34308 }, { "epoch": 0.44582980720969784, "grad_norm": 0.4164819121360779, "learning_rate": 0.00011085665267289671, "loss": 1.4309, "step": 34309 }, { "epoch": 0.4458428017536137, "grad_norm": 0.34728381037712097, "learning_rate": 0.00011085405321098532, "loss": 1.3772, "step": 34310 }, { "epoch": 0.4458557962975296, "grad_norm": 0.414786159992218, "learning_rate": 0.00011085145374907396, "loss": 1.2413, "step": 34311 }, { "epoch": 0.44586879084144543, "grad_norm": 0.4210526645183563, "learning_rate": 0.00011084885428716256, "loss": 1.3341, "step": 34312 }, { "epoch": 0.44588178538536133, "grad_norm": 0.3422400653362274, "learning_rate": 0.00011084625482525117, "loss": 1.3576, "step": 34313 }, { "epoch": 0.4458947799292772, "grad_norm": 0.4079273045063019, "learning_rate": 0.0001108436553633398, "loss": 1.5042, "step": 34314 }, { "epoch": 0.4459077744731931, "grad_norm": 0.36557114124298096, "learning_rate": 0.00011084105590142842, "loss": 1.2557, "step": 34315 }, { "epoch": 0.4459207690171089, "grad_norm": 0.3832559883594513, "learning_rate": 0.00011083845643951703, "loss": 1.3405, "step": 34316 }, { "epoch": 0.4459337635610248, "grad_norm": 0.4136294722557068, "learning_rate": 0.00011083585697760564, "loss": 1.4496, "step": 34317 }, { "epoch": 0.44594675810494067, "grad_norm": 0.3425063490867615, "learning_rate": 0.00011083325751569426, "loss": 1.4967, "step": 34318 }, { "epoch": 0.44595975264885657, "grad_norm": 0.24988465011119843, "learning_rate": 0.00011083065805378287, "loss": 1.3096, "step": 34319 }, { "epoch": 0.4459727471927724, "grad_norm": 0.3460886776447296, "learning_rate": 0.00011082805859187148, "loss": 1.2446, "step": 34320 }, { "epoch": 0.4459857417366883, "grad_norm": 0.4185510575771332, "learning_rate": 0.0001108254591299601, "loss": 1.5271, "step": 34321 }, { "epoch": 0.44599873628060416, "grad_norm": 0.34249839186668396, "learning_rate": 0.00011082285966804873, "loss": 1.3808, "step": 34322 }, { "epoch": 0.44601173082452006, "grad_norm": 0.4375338852405548, "learning_rate": 0.00011082026020613734, "loss": 1.5688, "step": 34323 }, { "epoch": 0.4460247253684359, "grad_norm": 0.36479687690734863, "learning_rate": 0.00011081766074422595, "loss": 1.2566, "step": 34324 }, { "epoch": 0.4460377199123518, "grad_norm": 0.3792046308517456, "learning_rate": 0.00011081506128231455, "loss": 1.4592, "step": 34325 }, { "epoch": 0.44605071445626765, "grad_norm": 0.4051467776298523, "learning_rate": 0.00011081246182040319, "loss": 1.3335, "step": 34326 }, { "epoch": 0.44606370900018355, "grad_norm": 0.49074748158454895, "learning_rate": 0.0001108098623584918, "loss": 1.443, "step": 34327 }, { "epoch": 0.4460767035440994, "grad_norm": 0.4439942538738251, "learning_rate": 0.00011080726289658041, "loss": 1.5425, "step": 34328 }, { "epoch": 0.4460896980880153, "grad_norm": 0.35491228103637695, "learning_rate": 0.00011080466343466902, "loss": 1.3911, "step": 34329 }, { "epoch": 0.44610269263193114, "grad_norm": 0.39883947372436523, "learning_rate": 0.00011080206397275765, "loss": 1.397, "step": 34330 }, { "epoch": 0.44611568717584704, "grad_norm": 0.3358071446418762, "learning_rate": 0.00011079946451084626, "loss": 1.3331, "step": 34331 }, { "epoch": 0.4461286817197629, "grad_norm": 0.4334869980812073, "learning_rate": 0.00011079686504893487, "loss": 1.4793, "step": 34332 }, { "epoch": 0.4461416762636788, "grad_norm": 0.4272914230823517, "learning_rate": 0.00011079426558702348, "loss": 1.6548, "step": 34333 }, { "epoch": 0.4461546708075947, "grad_norm": 0.34586331248283386, "learning_rate": 0.00011079166612511212, "loss": 1.2473, "step": 34334 }, { "epoch": 0.44616766535151053, "grad_norm": 0.5395427346229553, "learning_rate": 0.00011078906666320073, "loss": 1.2849, "step": 34335 }, { "epoch": 0.44618065989542643, "grad_norm": 0.3320787250995636, "learning_rate": 0.00011078646720128934, "loss": 1.4171, "step": 34336 }, { "epoch": 0.4461936544393423, "grad_norm": 0.48876485228538513, "learning_rate": 0.00011078386773937795, "loss": 1.6469, "step": 34337 }, { "epoch": 0.4462066489832582, "grad_norm": 0.36836597323417664, "learning_rate": 0.00011078126827746658, "loss": 1.3296, "step": 34338 }, { "epoch": 0.446219643527174, "grad_norm": 0.4083898365497589, "learning_rate": 0.00011077866881555519, "loss": 1.6228, "step": 34339 }, { "epoch": 0.4462326380710899, "grad_norm": 0.4508657157421112, "learning_rate": 0.0001107760693536438, "loss": 1.5912, "step": 34340 }, { "epoch": 0.44624563261500577, "grad_norm": 0.3489711582660675, "learning_rate": 0.00011077346989173241, "loss": 1.5068, "step": 34341 }, { "epoch": 0.4462586271589217, "grad_norm": 0.34017565846443176, "learning_rate": 0.00011077087042982103, "loss": 1.3839, "step": 34342 }, { "epoch": 0.4462716217028375, "grad_norm": 0.4049300253391266, "learning_rate": 0.00011076827096790964, "loss": 1.4943, "step": 34343 }, { "epoch": 0.4462846162467534, "grad_norm": 0.318954735994339, "learning_rate": 0.00011076567150599825, "loss": 1.2029, "step": 34344 }, { "epoch": 0.44629761079066926, "grad_norm": 0.3333900272846222, "learning_rate": 0.00011076307204408687, "loss": 1.2969, "step": 34345 }, { "epoch": 0.44631060533458516, "grad_norm": 0.444560170173645, "learning_rate": 0.0001107604725821755, "loss": 1.5042, "step": 34346 }, { "epoch": 0.446323599878501, "grad_norm": 0.5023630261421204, "learning_rate": 0.00011075787312026411, "loss": 1.5149, "step": 34347 }, { "epoch": 0.4463365944224169, "grad_norm": 0.3006480038166046, "learning_rate": 0.00011075527365835273, "loss": 1.3705, "step": 34348 }, { "epoch": 0.44634958896633276, "grad_norm": 0.4001215100288391, "learning_rate": 0.00011075267419644135, "loss": 1.3932, "step": 34349 }, { "epoch": 0.44636258351024866, "grad_norm": 0.34508216381073, "learning_rate": 0.00011075007473452996, "loss": 1.2686, "step": 34350 }, { "epoch": 0.4463755780541645, "grad_norm": 0.4526347517967224, "learning_rate": 0.00011074747527261857, "loss": 1.601, "step": 34351 }, { "epoch": 0.4463885725980804, "grad_norm": 0.33056217432022095, "learning_rate": 0.00011074487581070718, "loss": 1.377, "step": 34352 }, { "epoch": 0.44640156714199625, "grad_norm": 0.41972944140434265, "learning_rate": 0.00011074227634879582, "loss": 1.4054, "step": 34353 }, { "epoch": 0.44641456168591215, "grad_norm": 0.5326032042503357, "learning_rate": 0.00011073967688688442, "loss": 1.4111, "step": 34354 }, { "epoch": 0.446427556229828, "grad_norm": 0.40371203422546387, "learning_rate": 0.00011073707742497303, "loss": 1.418, "step": 34355 }, { "epoch": 0.4464405507737439, "grad_norm": 0.3251248300075531, "learning_rate": 0.00011073447796306164, "loss": 1.35, "step": 34356 }, { "epoch": 0.44645354531765974, "grad_norm": 0.3798600137233734, "learning_rate": 0.00011073187850115028, "loss": 1.2489, "step": 34357 }, { "epoch": 0.44646653986157564, "grad_norm": 0.3740684688091278, "learning_rate": 0.00011072927903923889, "loss": 1.3124, "step": 34358 }, { "epoch": 0.4464795344054915, "grad_norm": 0.39169231057167053, "learning_rate": 0.0001107266795773275, "loss": 1.5055, "step": 34359 }, { "epoch": 0.4464925289494074, "grad_norm": 0.3304338753223419, "learning_rate": 0.00011072408011541611, "loss": 1.2963, "step": 34360 }, { "epoch": 0.44650552349332323, "grad_norm": 0.33984822034835815, "learning_rate": 0.00011072148065350474, "loss": 1.5574, "step": 34361 }, { "epoch": 0.44651851803723913, "grad_norm": 0.3603609502315521, "learning_rate": 0.00011071888119159335, "loss": 1.2787, "step": 34362 }, { "epoch": 0.446531512581155, "grad_norm": 0.37465938925743103, "learning_rate": 0.00011071628172968196, "loss": 1.3149, "step": 34363 }, { "epoch": 0.4465445071250709, "grad_norm": 0.4414732754230499, "learning_rate": 0.00011071368226777057, "loss": 1.607, "step": 34364 }, { "epoch": 0.4465575016689867, "grad_norm": 0.4735172986984253, "learning_rate": 0.0001107110828058592, "loss": 1.395, "step": 34365 }, { "epoch": 0.4465704962129026, "grad_norm": 0.36253899335861206, "learning_rate": 0.00011070848334394782, "loss": 1.3045, "step": 34366 }, { "epoch": 0.44658349075681847, "grad_norm": 0.3687044382095337, "learning_rate": 0.00011070588388203641, "loss": 1.3396, "step": 34367 }, { "epoch": 0.44659648530073437, "grad_norm": 0.3425123691558838, "learning_rate": 0.00011070328442012503, "loss": 1.5265, "step": 34368 }, { "epoch": 0.4466094798446502, "grad_norm": 0.4352796673774719, "learning_rate": 0.00011070068495821366, "loss": 1.4022, "step": 34369 }, { "epoch": 0.4466224743885661, "grad_norm": 0.39405304193496704, "learning_rate": 0.00011069808549630227, "loss": 1.6418, "step": 34370 }, { "epoch": 0.44663546893248196, "grad_norm": 0.33813199400901794, "learning_rate": 0.00011069548603439089, "loss": 1.2852, "step": 34371 }, { "epoch": 0.44664846347639786, "grad_norm": 0.32915568351745605, "learning_rate": 0.0001106928865724795, "loss": 1.4585, "step": 34372 }, { "epoch": 0.4466614580203137, "grad_norm": 0.49793750047683716, "learning_rate": 0.00011069028711056812, "loss": 1.5444, "step": 34373 }, { "epoch": 0.4466744525642296, "grad_norm": 0.48389121890068054, "learning_rate": 0.00011068768764865673, "loss": 1.329, "step": 34374 }, { "epoch": 0.44668744710814545, "grad_norm": 0.4808426797389984, "learning_rate": 0.00011068508818674534, "loss": 1.3835, "step": 34375 }, { "epoch": 0.44670044165206135, "grad_norm": 0.43283843994140625, "learning_rate": 0.00011068248872483395, "loss": 1.55, "step": 34376 }, { "epoch": 0.4467134361959772, "grad_norm": 0.3492809236049652, "learning_rate": 0.00011067988926292259, "loss": 1.3043, "step": 34377 }, { "epoch": 0.4467264307398931, "grad_norm": 0.414581298828125, "learning_rate": 0.0001106772898010112, "loss": 1.3926, "step": 34378 }, { "epoch": 0.44673942528380894, "grad_norm": 0.4179449677467346, "learning_rate": 0.00011067469033909981, "loss": 1.462, "step": 34379 }, { "epoch": 0.44675241982772484, "grad_norm": 0.43760010600090027, "learning_rate": 0.00011067209087718841, "loss": 1.4484, "step": 34380 }, { "epoch": 0.4467654143716407, "grad_norm": 0.34663277864456177, "learning_rate": 0.00011066949141527705, "loss": 1.4346, "step": 34381 }, { "epoch": 0.4467784089155566, "grad_norm": 0.29440104961395264, "learning_rate": 0.00011066689195336566, "loss": 1.3672, "step": 34382 }, { "epoch": 0.44679140345947244, "grad_norm": 0.40804335474967957, "learning_rate": 0.00011066429249145427, "loss": 1.5027, "step": 34383 }, { "epoch": 0.44680439800338834, "grad_norm": 0.4632614850997925, "learning_rate": 0.00011066169302954288, "loss": 1.5145, "step": 34384 }, { "epoch": 0.4468173925473042, "grad_norm": 0.35557109117507935, "learning_rate": 0.0001106590935676315, "loss": 1.552, "step": 34385 }, { "epoch": 0.4468303870912201, "grad_norm": 0.5257606506347656, "learning_rate": 0.00011065649410572012, "loss": 1.5322, "step": 34386 }, { "epoch": 0.44684338163513593, "grad_norm": 0.4096625745296478, "learning_rate": 0.00011065389464380873, "loss": 1.6029, "step": 34387 }, { "epoch": 0.44685637617905183, "grad_norm": 0.5139939785003662, "learning_rate": 0.00011065129518189737, "loss": 1.4038, "step": 34388 }, { "epoch": 0.4468693707229677, "grad_norm": 0.38154035806655884, "learning_rate": 0.00011064869571998598, "loss": 1.3196, "step": 34389 }, { "epoch": 0.4468823652668836, "grad_norm": 0.40146270394325256, "learning_rate": 0.00011064609625807459, "loss": 1.3116, "step": 34390 }, { "epoch": 0.4468953598107994, "grad_norm": 0.4216993451118469, "learning_rate": 0.0001106434967961632, "loss": 1.2049, "step": 34391 }, { "epoch": 0.4469083543547153, "grad_norm": 0.4717008173465729, "learning_rate": 0.00011064089733425182, "loss": 1.1986, "step": 34392 }, { "epoch": 0.44692134889863117, "grad_norm": 0.3446395695209503, "learning_rate": 0.00011063829787234043, "loss": 1.3047, "step": 34393 }, { "epoch": 0.44693434344254707, "grad_norm": 0.42920851707458496, "learning_rate": 0.00011063569841042905, "loss": 1.5195, "step": 34394 }, { "epoch": 0.4469473379864629, "grad_norm": 0.34217706322669983, "learning_rate": 0.00011063309894851766, "loss": 1.499, "step": 34395 }, { "epoch": 0.4469603325303788, "grad_norm": 0.340866357088089, "learning_rate": 0.00011063049948660628, "loss": 1.6144, "step": 34396 }, { "epoch": 0.44697332707429466, "grad_norm": 0.4250718355178833, "learning_rate": 0.00011062790002469489, "loss": 1.5402, "step": 34397 }, { "epoch": 0.44698632161821056, "grad_norm": 0.38750213384628296, "learning_rate": 0.0001106253005627835, "loss": 1.3533, "step": 34398 }, { "epoch": 0.4469993161621264, "grad_norm": 0.4290032684803009, "learning_rate": 0.00011062270110087211, "loss": 1.3773, "step": 34399 }, { "epoch": 0.4470123107060423, "grad_norm": 0.4724940359592438, "learning_rate": 0.00011062010163896075, "loss": 1.3699, "step": 34400 }, { "epoch": 0.44702530524995815, "grad_norm": 0.3969428837299347, "learning_rate": 0.00011061750217704936, "loss": 1.2504, "step": 34401 }, { "epoch": 0.44703829979387405, "grad_norm": 2.914214611053467, "learning_rate": 0.00011061490271513797, "loss": 1.3565, "step": 34402 }, { "epoch": 0.4470512943377899, "grad_norm": 0.47409042716026306, "learning_rate": 0.00011061230325322658, "loss": 1.4433, "step": 34403 }, { "epoch": 0.4470642888817058, "grad_norm": 0.4451931416988373, "learning_rate": 0.00011060970379131521, "loss": 1.3567, "step": 34404 }, { "epoch": 0.44707728342562164, "grad_norm": 0.4385353624820709, "learning_rate": 0.00011060710432940382, "loss": 1.2889, "step": 34405 }, { "epoch": 0.44709027796953754, "grad_norm": 0.3574380576610565, "learning_rate": 0.00011060450486749243, "loss": 1.4282, "step": 34406 }, { "epoch": 0.4471032725134534, "grad_norm": 0.38908064365386963, "learning_rate": 0.00011060190540558104, "loss": 1.2899, "step": 34407 }, { "epoch": 0.4471162670573693, "grad_norm": 0.4133598506450653, "learning_rate": 0.00011059930594366968, "loss": 1.4081, "step": 34408 }, { "epoch": 0.44712926160128513, "grad_norm": 0.4932892322540283, "learning_rate": 0.00011059670648175828, "loss": 1.519, "step": 34409 }, { "epoch": 0.44714225614520103, "grad_norm": 0.4468093514442444, "learning_rate": 0.00011059410701984689, "loss": 1.4823, "step": 34410 }, { "epoch": 0.44715525068911693, "grad_norm": 0.38885483145713806, "learning_rate": 0.0001105915075579355, "loss": 1.3554, "step": 34411 }, { "epoch": 0.4471682452330328, "grad_norm": 0.2962060570716858, "learning_rate": 0.00011058890809602414, "loss": 1.2117, "step": 34412 }, { "epoch": 0.4471812397769487, "grad_norm": 0.36810505390167236, "learning_rate": 0.00011058630863411275, "loss": 1.2322, "step": 34413 }, { "epoch": 0.4471942343208645, "grad_norm": 0.37185022234916687, "learning_rate": 0.00011058370917220136, "loss": 1.3384, "step": 34414 }, { "epoch": 0.4472072288647804, "grad_norm": 0.3865322470664978, "learning_rate": 0.00011058110971028997, "loss": 1.3367, "step": 34415 }, { "epoch": 0.44722022340869627, "grad_norm": 0.35716453194618225, "learning_rate": 0.0001105785102483786, "loss": 1.3146, "step": 34416 }, { "epoch": 0.44723321795261217, "grad_norm": 0.3830145001411438, "learning_rate": 0.0001105759107864672, "loss": 1.2192, "step": 34417 }, { "epoch": 0.447246212496528, "grad_norm": 0.3806319832801819, "learning_rate": 0.00011057331132455582, "loss": 1.3687, "step": 34418 }, { "epoch": 0.4472592070404439, "grad_norm": 0.4754382073879242, "learning_rate": 0.00011057071186264443, "loss": 1.679, "step": 34419 }, { "epoch": 0.44727220158435976, "grad_norm": 0.479158490896225, "learning_rate": 0.00011056811240073306, "loss": 1.2415, "step": 34420 }, { "epoch": 0.44728519612827566, "grad_norm": 0.5324766635894775, "learning_rate": 0.00011056551293882168, "loss": 1.5294, "step": 34421 }, { "epoch": 0.4472981906721915, "grad_norm": 0.44869571924209595, "learning_rate": 0.00011056291347691027, "loss": 1.4757, "step": 34422 }, { "epoch": 0.4473111852161074, "grad_norm": 0.4435884356498718, "learning_rate": 0.00011056031401499891, "loss": 1.3288, "step": 34423 }, { "epoch": 0.44732417976002325, "grad_norm": 0.39496248960494995, "learning_rate": 0.00011055771455308752, "loss": 1.4739, "step": 34424 }, { "epoch": 0.44733717430393916, "grad_norm": 0.3510694205760956, "learning_rate": 0.00011055511509117613, "loss": 1.3261, "step": 34425 }, { "epoch": 0.447350168847855, "grad_norm": 0.37289169430732727, "learning_rate": 0.00011055251562926474, "loss": 1.5314, "step": 34426 }, { "epoch": 0.4473631633917709, "grad_norm": 0.46063491702079773, "learning_rate": 0.00011054991616735337, "loss": 1.4496, "step": 34427 }, { "epoch": 0.44737615793568675, "grad_norm": 0.396872341632843, "learning_rate": 0.00011054731670544198, "loss": 1.4397, "step": 34428 }, { "epoch": 0.44738915247960265, "grad_norm": 0.48116815090179443, "learning_rate": 0.00011054471724353059, "loss": 1.4658, "step": 34429 }, { "epoch": 0.4474021470235185, "grad_norm": 0.3058229386806488, "learning_rate": 0.0001105421177816192, "loss": 1.4166, "step": 34430 }, { "epoch": 0.4474151415674344, "grad_norm": 0.4168516993522644, "learning_rate": 0.00011053951831970784, "loss": 1.1931, "step": 34431 }, { "epoch": 0.44742813611135024, "grad_norm": 0.3161821961402893, "learning_rate": 0.00011053691885779645, "loss": 1.3013, "step": 34432 }, { "epoch": 0.44744113065526614, "grad_norm": 0.5002384781837463, "learning_rate": 0.00011053431939588506, "loss": 1.4364, "step": 34433 }, { "epoch": 0.447454125199182, "grad_norm": 0.4513053894042969, "learning_rate": 0.00011053171993397366, "loss": 1.2283, "step": 34434 }, { "epoch": 0.4474671197430979, "grad_norm": 0.4575236439704895, "learning_rate": 0.0001105291204720623, "loss": 1.3915, "step": 34435 }, { "epoch": 0.44748011428701373, "grad_norm": 0.46848222613334656, "learning_rate": 0.00011052652101015091, "loss": 1.4672, "step": 34436 }, { "epoch": 0.44749310883092963, "grad_norm": 0.44768261909484863, "learning_rate": 0.00011052392154823952, "loss": 1.4768, "step": 34437 }, { "epoch": 0.4475061033748455, "grad_norm": 0.3839968740940094, "learning_rate": 0.00011052132208632813, "loss": 1.3417, "step": 34438 }, { "epoch": 0.4475190979187614, "grad_norm": 0.4355773329734802, "learning_rate": 0.00011051872262441675, "loss": 1.4958, "step": 34439 }, { "epoch": 0.4475320924626772, "grad_norm": 0.32546356320381165, "learning_rate": 0.00011051612316250536, "loss": 1.4501, "step": 34440 }, { "epoch": 0.4475450870065931, "grad_norm": 0.42174601554870605, "learning_rate": 0.00011051352370059398, "loss": 1.4063, "step": 34441 }, { "epoch": 0.44755808155050897, "grad_norm": 0.3652845025062561, "learning_rate": 0.00011051092423868259, "loss": 1.2826, "step": 34442 }, { "epoch": 0.44757107609442487, "grad_norm": 0.419465035200119, "learning_rate": 0.00011050832477677122, "loss": 1.4158, "step": 34443 }, { "epoch": 0.4475840706383407, "grad_norm": 0.37714046239852905, "learning_rate": 0.00011050572531485984, "loss": 1.1297, "step": 34444 }, { "epoch": 0.4475970651822566, "grad_norm": 0.3914308547973633, "learning_rate": 0.00011050312585294845, "loss": 1.3916, "step": 34445 }, { "epoch": 0.44761005972617246, "grad_norm": 0.3930824100971222, "learning_rate": 0.00011050052639103706, "loss": 1.2877, "step": 34446 }, { "epoch": 0.44762305427008836, "grad_norm": 0.452641099691391, "learning_rate": 0.00011049792692912568, "loss": 1.3427, "step": 34447 }, { "epoch": 0.4476360488140042, "grad_norm": 0.4003320634365082, "learning_rate": 0.00011049532746721429, "loss": 1.2218, "step": 34448 }, { "epoch": 0.4476490433579201, "grad_norm": 0.4607212543487549, "learning_rate": 0.0001104927280053029, "loss": 1.4456, "step": 34449 }, { "epoch": 0.44766203790183595, "grad_norm": 0.3892253339290619, "learning_rate": 0.00011049012854339151, "loss": 1.3554, "step": 34450 }, { "epoch": 0.44767503244575185, "grad_norm": 0.3758752942085266, "learning_rate": 0.00011048752908148014, "loss": 1.3145, "step": 34451 }, { "epoch": 0.4476880269896677, "grad_norm": 0.44572144746780396, "learning_rate": 0.00011048492961956875, "loss": 1.1913, "step": 34452 }, { "epoch": 0.4477010215335836, "grad_norm": 0.33415356278419495, "learning_rate": 0.00011048233015765736, "loss": 1.2895, "step": 34453 }, { "epoch": 0.44771401607749944, "grad_norm": 0.4133395850658417, "learning_rate": 0.00011047973069574597, "loss": 1.4047, "step": 34454 }, { "epoch": 0.44772701062141534, "grad_norm": 0.3269536793231964, "learning_rate": 0.00011047713123383461, "loss": 1.3029, "step": 34455 }, { "epoch": 0.4477400051653312, "grad_norm": 0.35477206110954285, "learning_rate": 0.00011047453177192322, "loss": 1.25, "step": 34456 }, { "epoch": 0.4477529997092471, "grad_norm": 0.31776291131973267, "learning_rate": 0.00011047193231001183, "loss": 1.4291, "step": 34457 }, { "epoch": 0.44776599425316294, "grad_norm": 0.3256598114967346, "learning_rate": 0.00011046933284810044, "loss": 1.391, "step": 34458 }, { "epoch": 0.44777898879707884, "grad_norm": 0.4343120753765106, "learning_rate": 0.00011046673338618907, "loss": 1.4171, "step": 34459 }, { "epoch": 0.4477919833409947, "grad_norm": 0.5201101899147034, "learning_rate": 0.00011046413392427768, "loss": 1.5068, "step": 34460 }, { "epoch": 0.4478049778849106, "grad_norm": 0.4345944821834564, "learning_rate": 0.00011046153446236629, "loss": 1.4696, "step": 34461 }, { "epoch": 0.4478179724288264, "grad_norm": 0.2510223686695099, "learning_rate": 0.00011045893500045493, "loss": 1.169, "step": 34462 }, { "epoch": 0.44783096697274233, "grad_norm": 0.4620783030986786, "learning_rate": 0.00011045633553854354, "loss": 1.4724, "step": 34463 }, { "epoch": 0.4478439615166582, "grad_norm": 0.45253750681877136, "learning_rate": 0.00011045373607663214, "loss": 1.424, "step": 34464 }, { "epoch": 0.4478569560605741, "grad_norm": 0.41018766164779663, "learning_rate": 0.00011045113661472075, "loss": 1.517, "step": 34465 }, { "epoch": 0.4478699506044899, "grad_norm": 0.35038521885871887, "learning_rate": 0.00011044853715280938, "loss": 1.1256, "step": 34466 }, { "epoch": 0.4478829451484058, "grad_norm": 0.33538463711738586, "learning_rate": 0.000110445937690898, "loss": 1.7104, "step": 34467 }, { "epoch": 0.44789593969232167, "grad_norm": 0.4250282347202301, "learning_rate": 0.0001104433382289866, "loss": 1.4537, "step": 34468 }, { "epoch": 0.44790893423623757, "grad_norm": 0.445030152797699, "learning_rate": 0.00011044073876707522, "loss": 1.3583, "step": 34469 }, { "epoch": 0.4479219287801534, "grad_norm": 0.3021256923675537, "learning_rate": 0.00011043813930516384, "loss": 0.97, "step": 34470 }, { "epoch": 0.4479349233240693, "grad_norm": 0.43961846828460693, "learning_rate": 0.00011043553984325245, "loss": 1.4222, "step": 34471 }, { "epoch": 0.44794791786798516, "grad_norm": 0.4001900553703308, "learning_rate": 0.00011043294038134106, "loss": 1.5473, "step": 34472 }, { "epoch": 0.44796091241190106, "grad_norm": 0.40726974606513977, "learning_rate": 0.00011043034091942967, "loss": 1.294, "step": 34473 }, { "epoch": 0.4479739069558169, "grad_norm": 0.32053637504577637, "learning_rate": 0.00011042774145751831, "loss": 1.4564, "step": 34474 }, { "epoch": 0.4479869014997328, "grad_norm": 0.3984828293323517, "learning_rate": 0.00011042514199560692, "loss": 1.3669, "step": 34475 }, { "epoch": 0.44799989604364865, "grad_norm": 0.4609595239162445, "learning_rate": 0.00011042254253369552, "loss": 1.4772, "step": 34476 }, { "epoch": 0.44801289058756455, "grad_norm": 0.3621473014354706, "learning_rate": 0.00011041994307178413, "loss": 1.3998, "step": 34477 }, { "epoch": 0.4480258851314804, "grad_norm": 0.4369111955165863, "learning_rate": 0.00011041734360987277, "loss": 1.4628, "step": 34478 }, { "epoch": 0.4480388796753963, "grad_norm": 0.3987017571926117, "learning_rate": 0.00011041474414796138, "loss": 1.41, "step": 34479 }, { "epoch": 0.44805187421931214, "grad_norm": 0.3982883095741272, "learning_rate": 0.00011041214468604999, "loss": 1.2265, "step": 34480 }, { "epoch": 0.44806486876322804, "grad_norm": 0.36007747054100037, "learning_rate": 0.0001104095452241386, "loss": 1.4363, "step": 34481 }, { "epoch": 0.4480778633071439, "grad_norm": 0.3950411081314087, "learning_rate": 0.00011040694576222723, "loss": 1.3897, "step": 34482 }, { "epoch": 0.4480908578510598, "grad_norm": 0.5346966981887817, "learning_rate": 0.00011040434630031584, "loss": 1.4435, "step": 34483 }, { "epoch": 0.44810385239497563, "grad_norm": 0.32372596859931946, "learning_rate": 0.00011040174683840445, "loss": 1.3193, "step": 34484 }, { "epoch": 0.44811684693889153, "grad_norm": 0.36244630813598633, "learning_rate": 0.00011039914737649306, "loss": 1.2904, "step": 34485 }, { "epoch": 0.44812984148280743, "grad_norm": 0.3881213963031769, "learning_rate": 0.0001103965479145817, "loss": 1.477, "step": 34486 }, { "epoch": 0.4481428360267233, "grad_norm": 0.3589574992656708, "learning_rate": 0.00011039394845267031, "loss": 1.4025, "step": 34487 }, { "epoch": 0.4481558305706392, "grad_norm": 0.4735058844089508, "learning_rate": 0.00011039134899075892, "loss": 1.3803, "step": 34488 }, { "epoch": 0.448168825114555, "grad_norm": 0.398260235786438, "learning_rate": 0.00011038874952884752, "loss": 1.1483, "step": 34489 }, { "epoch": 0.4481818196584709, "grad_norm": 0.3908102810382843, "learning_rate": 0.00011038615006693616, "loss": 1.3377, "step": 34490 }, { "epoch": 0.44819481420238677, "grad_norm": 0.4203278124332428, "learning_rate": 0.00011038355060502477, "loss": 1.3267, "step": 34491 }, { "epoch": 0.44820780874630267, "grad_norm": 0.40055733919143677, "learning_rate": 0.00011038095114311338, "loss": 1.6408, "step": 34492 }, { "epoch": 0.4482208032902185, "grad_norm": 0.401552677154541, "learning_rate": 0.00011037835168120199, "loss": 1.3387, "step": 34493 }, { "epoch": 0.4482337978341344, "grad_norm": 0.3981100022792816, "learning_rate": 0.00011037575221929061, "loss": 1.3632, "step": 34494 }, { "epoch": 0.44824679237805026, "grad_norm": 0.3752848505973816, "learning_rate": 0.00011037315275737922, "loss": 1.34, "step": 34495 }, { "epoch": 0.44825978692196616, "grad_norm": 0.5562670826911926, "learning_rate": 0.00011037055329546783, "loss": 1.4506, "step": 34496 }, { "epoch": 0.448272781465882, "grad_norm": 0.3460952937602997, "learning_rate": 0.00011036795383355647, "loss": 1.1565, "step": 34497 }, { "epoch": 0.4482857760097979, "grad_norm": 0.3721766471862793, "learning_rate": 0.00011036535437164508, "loss": 1.1986, "step": 34498 }, { "epoch": 0.44829877055371375, "grad_norm": 0.4068165719509125, "learning_rate": 0.0001103627549097337, "loss": 1.314, "step": 34499 }, { "epoch": 0.44831176509762966, "grad_norm": 0.5032851696014404, "learning_rate": 0.0001103601554478223, "loss": 1.406, "step": 34500 }, { "epoch": 0.4483247596415455, "grad_norm": 0.43065133690834045, "learning_rate": 0.00011035755598591093, "loss": 1.6215, "step": 34501 }, { "epoch": 0.4483377541854614, "grad_norm": 0.35741227865219116, "learning_rate": 0.00011035495652399954, "loss": 1.4652, "step": 34502 }, { "epoch": 0.44835074872937725, "grad_norm": 0.425245076417923, "learning_rate": 0.00011035235706208815, "loss": 1.4227, "step": 34503 }, { "epoch": 0.44836374327329315, "grad_norm": 0.4913164973258972, "learning_rate": 0.00011034975760017676, "loss": 1.4429, "step": 34504 }, { "epoch": 0.448376737817209, "grad_norm": 0.4327201247215271, "learning_rate": 0.0001103471581382654, "loss": 1.3286, "step": 34505 }, { "epoch": 0.4483897323611249, "grad_norm": 0.33906733989715576, "learning_rate": 0.000110344558676354, "loss": 1.2818, "step": 34506 }, { "epoch": 0.44840272690504074, "grad_norm": 0.3966307044029236, "learning_rate": 0.00011034195921444261, "loss": 1.3729, "step": 34507 }, { "epoch": 0.44841572144895664, "grad_norm": 0.5355483293533325, "learning_rate": 0.00011033935975253122, "loss": 1.3975, "step": 34508 }, { "epoch": 0.4484287159928725, "grad_norm": 0.4202527105808258, "learning_rate": 0.00011033676029061986, "loss": 1.4654, "step": 34509 }, { "epoch": 0.4484417105367884, "grad_norm": 0.28879261016845703, "learning_rate": 0.00011033416082870847, "loss": 1.2599, "step": 34510 }, { "epoch": 0.44845470508070423, "grad_norm": 0.42864328622817993, "learning_rate": 0.00011033156136679708, "loss": 1.4591, "step": 34511 }, { "epoch": 0.44846769962462013, "grad_norm": 0.4456816613674164, "learning_rate": 0.00011032896190488569, "loss": 1.2534, "step": 34512 }, { "epoch": 0.448480694168536, "grad_norm": 0.41308531165122986, "learning_rate": 0.00011032636244297432, "loss": 1.3351, "step": 34513 }, { "epoch": 0.4484936887124519, "grad_norm": 0.47774264216423035, "learning_rate": 0.00011032376298106293, "loss": 1.4716, "step": 34514 }, { "epoch": 0.4485066832563677, "grad_norm": 0.36156168580055237, "learning_rate": 0.00011032116351915154, "loss": 1.376, "step": 34515 }, { "epoch": 0.4485196778002836, "grad_norm": 0.4695911407470703, "learning_rate": 0.00011031856405724015, "loss": 1.5211, "step": 34516 }, { "epoch": 0.44853267234419947, "grad_norm": 0.43510448932647705, "learning_rate": 0.00011031596459532879, "loss": 1.4971, "step": 34517 }, { "epoch": 0.44854566688811537, "grad_norm": 0.37574025988578796, "learning_rate": 0.00011031336513341738, "loss": 1.3829, "step": 34518 }, { "epoch": 0.4485586614320312, "grad_norm": 0.3916834890842438, "learning_rate": 0.000110310765671506, "loss": 1.3704, "step": 34519 }, { "epoch": 0.4485716559759471, "grad_norm": 0.3476482331752777, "learning_rate": 0.0001103081662095946, "loss": 1.3747, "step": 34520 }, { "epoch": 0.44858465051986296, "grad_norm": 0.30428409576416016, "learning_rate": 0.00011030556674768324, "loss": 1.6379, "step": 34521 }, { "epoch": 0.44859764506377886, "grad_norm": 0.3509369492530823, "learning_rate": 0.00011030296728577185, "loss": 1.6192, "step": 34522 }, { "epoch": 0.4486106396076947, "grad_norm": 0.41059157252311707, "learning_rate": 0.00011030036782386047, "loss": 1.4077, "step": 34523 }, { "epoch": 0.4486236341516106, "grad_norm": 0.4512932002544403, "learning_rate": 0.00011029776836194908, "loss": 1.3083, "step": 34524 }, { "epoch": 0.44863662869552645, "grad_norm": 0.40130892395973206, "learning_rate": 0.0001102951689000377, "loss": 1.4744, "step": 34525 }, { "epoch": 0.44864962323944235, "grad_norm": 0.3996976613998413, "learning_rate": 0.00011029256943812631, "loss": 1.5036, "step": 34526 }, { "epoch": 0.4486626177833582, "grad_norm": 0.3865920305252075, "learning_rate": 0.00011028996997621492, "loss": 1.2762, "step": 34527 }, { "epoch": 0.4486756123272741, "grad_norm": 0.4725949466228485, "learning_rate": 0.00011028737051430353, "loss": 1.3748, "step": 34528 }, { "epoch": 0.44868860687118994, "grad_norm": 0.4591662883758545, "learning_rate": 0.00011028477105239217, "loss": 1.5218, "step": 34529 }, { "epoch": 0.44870160141510584, "grad_norm": 0.4514273703098297, "learning_rate": 0.00011028217159048078, "loss": 1.4663, "step": 34530 }, { "epoch": 0.4487145959590217, "grad_norm": 0.5583901405334473, "learning_rate": 0.00011027957212856938, "loss": 1.5054, "step": 34531 }, { "epoch": 0.4487275905029376, "grad_norm": 0.3910730183124542, "learning_rate": 0.00011027697266665799, "loss": 1.2571, "step": 34532 }, { "epoch": 0.44874058504685344, "grad_norm": 0.4569634795188904, "learning_rate": 0.00011027437320474663, "loss": 1.5548, "step": 34533 }, { "epoch": 0.44875357959076934, "grad_norm": 0.3992309272289276, "learning_rate": 0.00011027177374283524, "loss": 1.3556, "step": 34534 }, { "epoch": 0.4487665741346852, "grad_norm": 0.3682311773300171, "learning_rate": 0.00011026917428092385, "loss": 1.5128, "step": 34535 }, { "epoch": 0.4487795686786011, "grad_norm": 0.4828411042690277, "learning_rate": 0.00011026657481901248, "loss": 1.4408, "step": 34536 }, { "epoch": 0.4487925632225169, "grad_norm": 0.3480648994445801, "learning_rate": 0.00011026397535710109, "loss": 1.3544, "step": 34537 }, { "epoch": 0.4488055577664328, "grad_norm": 0.4272501468658447, "learning_rate": 0.0001102613758951897, "loss": 1.5158, "step": 34538 }, { "epoch": 0.4488185523103487, "grad_norm": 0.4681031107902527, "learning_rate": 0.00011025877643327831, "loss": 1.3275, "step": 34539 }, { "epoch": 0.4488315468542646, "grad_norm": 0.40973857045173645, "learning_rate": 0.00011025617697136695, "loss": 1.4468, "step": 34540 }, { "epoch": 0.4488445413981804, "grad_norm": 0.3795461654663086, "learning_rate": 0.00011025357750945556, "loss": 1.3368, "step": 34541 }, { "epoch": 0.4488575359420963, "grad_norm": 0.4213436245918274, "learning_rate": 0.00011025097804754417, "loss": 1.3384, "step": 34542 }, { "epoch": 0.44887053048601216, "grad_norm": 0.41806358098983765, "learning_rate": 0.00011024837858563278, "loss": 1.4118, "step": 34543 }, { "epoch": 0.44888352502992807, "grad_norm": 0.43470683693885803, "learning_rate": 0.0001102457791237214, "loss": 1.5172, "step": 34544 }, { "epoch": 0.4488965195738439, "grad_norm": 0.33741140365600586, "learning_rate": 0.00011024317966181001, "loss": 1.2366, "step": 34545 }, { "epoch": 0.4489095141177598, "grad_norm": 0.3379230201244354, "learning_rate": 0.00011024058019989863, "loss": 1.112, "step": 34546 }, { "epoch": 0.44892250866167566, "grad_norm": 0.3742949068546295, "learning_rate": 0.00011023798073798724, "loss": 1.5657, "step": 34547 }, { "epoch": 0.44893550320559156, "grad_norm": 0.39107751846313477, "learning_rate": 0.00011023538127607586, "loss": 1.2031, "step": 34548 }, { "epoch": 0.4489484977495074, "grad_norm": 0.420554518699646, "learning_rate": 0.00011023278181416447, "loss": 1.4858, "step": 34549 }, { "epoch": 0.4489614922934233, "grad_norm": 0.45585426688194275, "learning_rate": 0.00011023018235225308, "loss": 1.3626, "step": 34550 }, { "epoch": 0.44897448683733915, "grad_norm": 0.3173621594905853, "learning_rate": 0.0001102275828903417, "loss": 1.3216, "step": 34551 }, { "epoch": 0.44898748138125505, "grad_norm": 0.41747456789016724, "learning_rate": 0.00011022498342843033, "loss": 1.4171, "step": 34552 }, { "epoch": 0.4490004759251709, "grad_norm": 0.47379279136657715, "learning_rate": 0.00011022238396651894, "loss": 1.3124, "step": 34553 }, { "epoch": 0.4490134704690868, "grad_norm": 0.4038969576358795, "learning_rate": 0.00011021978450460755, "loss": 1.3056, "step": 34554 }, { "epoch": 0.44902646501300264, "grad_norm": 0.3103243410587311, "learning_rate": 0.00011021718504269616, "loss": 1.2316, "step": 34555 }, { "epoch": 0.44903945955691854, "grad_norm": 0.37601181864738464, "learning_rate": 0.00011021458558078479, "loss": 1.4122, "step": 34556 }, { "epoch": 0.4490524541008344, "grad_norm": 0.4181826412677765, "learning_rate": 0.0001102119861188734, "loss": 1.3827, "step": 34557 }, { "epoch": 0.4490654486447503, "grad_norm": 0.34731724858283997, "learning_rate": 0.00011020938665696201, "loss": 1.4421, "step": 34558 }, { "epoch": 0.44907844318866613, "grad_norm": 0.4501745402812958, "learning_rate": 0.00011020678719505062, "loss": 1.4119, "step": 34559 }, { "epoch": 0.44909143773258203, "grad_norm": 0.4302853047847748, "learning_rate": 0.00011020418773313925, "loss": 1.4055, "step": 34560 }, { "epoch": 0.4491044322764979, "grad_norm": 0.4178692102432251, "learning_rate": 0.00011020158827122786, "loss": 1.4248, "step": 34561 }, { "epoch": 0.4491174268204138, "grad_norm": 0.37288540601730347, "learning_rate": 0.00011019898880931647, "loss": 1.4454, "step": 34562 }, { "epoch": 0.4491304213643297, "grad_norm": 0.538740336894989, "learning_rate": 0.00011019638934740508, "loss": 1.4985, "step": 34563 }, { "epoch": 0.4491434159082455, "grad_norm": 0.3887413442134857, "learning_rate": 0.00011019378988549372, "loss": 1.3569, "step": 34564 }, { "epoch": 0.4491564104521614, "grad_norm": 0.4002898633480072, "learning_rate": 0.00011019119042358233, "loss": 1.402, "step": 34565 }, { "epoch": 0.44916940499607727, "grad_norm": 0.4054885506629944, "learning_rate": 0.00011018859096167094, "loss": 1.3356, "step": 34566 }, { "epoch": 0.44918239953999317, "grad_norm": 0.40970197319984436, "learning_rate": 0.00011018599149975955, "loss": 1.3802, "step": 34567 }, { "epoch": 0.449195394083909, "grad_norm": 0.26434653997421265, "learning_rate": 0.00011018339203784817, "loss": 1.1602, "step": 34568 }, { "epoch": 0.4492083886278249, "grad_norm": 0.3916490375995636, "learning_rate": 0.00011018079257593678, "loss": 1.38, "step": 34569 }, { "epoch": 0.44922138317174076, "grad_norm": 0.5934067368507385, "learning_rate": 0.0001101781931140254, "loss": 1.562, "step": 34570 }, { "epoch": 0.44923437771565666, "grad_norm": 0.41711217164993286, "learning_rate": 0.00011017559365211403, "loss": 1.3323, "step": 34571 }, { "epoch": 0.4492473722595725, "grad_norm": 0.3497278392314911, "learning_rate": 0.00011017299419020264, "loss": 1.343, "step": 34572 }, { "epoch": 0.4492603668034884, "grad_norm": 0.3907304108142853, "learning_rate": 0.00011017039472829124, "loss": 1.6952, "step": 34573 }, { "epoch": 0.44927336134740425, "grad_norm": 0.444450318813324, "learning_rate": 0.00011016779526637985, "loss": 1.407, "step": 34574 }, { "epoch": 0.44928635589132015, "grad_norm": 0.3275794982910156, "learning_rate": 0.00011016519580446849, "loss": 1.4977, "step": 34575 }, { "epoch": 0.449299350435236, "grad_norm": 0.4418283700942993, "learning_rate": 0.0001101625963425571, "loss": 1.2848, "step": 34576 }, { "epoch": 0.4493123449791519, "grad_norm": 0.3973557949066162, "learning_rate": 0.00011015999688064571, "loss": 1.5528, "step": 34577 }, { "epoch": 0.44932533952306775, "grad_norm": 0.5718251466751099, "learning_rate": 0.00011015739741873432, "loss": 1.554, "step": 34578 }, { "epoch": 0.44933833406698365, "grad_norm": 0.4450122117996216, "learning_rate": 0.00011015479795682295, "loss": 1.4901, "step": 34579 }, { "epoch": 0.4493513286108995, "grad_norm": 0.4234688878059387, "learning_rate": 0.00011015219849491156, "loss": 1.5151, "step": 34580 }, { "epoch": 0.4493643231548154, "grad_norm": 0.2874080240726471, "learning_rate": 0.00011014959903300017, "loss": 1.2937, "step": 34581 }, { "epoch": 0.44937731769873124, "grad_norm": 0.40544554591178894, "learning_rate": 0.00011014699957108878, "loss": 1.4859, "step": 34582 }, { "epoch": 0.44939031224264714, "grad_norm": 0.439264178276062, "learning_rate": 0.00011014440010917742, "loss": 1.2281, "step": 34583 }, { "epoch": 0.449403306786563, "grad_norm": 0.3725937008857727, "learning_rate": 0.00011014180064726603, "loss": 1.5603, "step": 34584 }, { "epoch": 0.4494163013304789, "grad_norm": 0.35467728972435, "learning_rate": 0.00011013920118535464, "loss": 1.3394, "step": 34585 }, { "epoch": 0.44942929587439473, "grad_norm": 0.5001130700111389, "learning_rate": 0.00011013660172344324, "loss": 1.4047, "step": 34586 }, { "epoch": 0.44944229041831063, "grad_norm": 0.3618234395980835, "learning_rate": 0.00011013400226153188, "loss": 1.2259, "step": 34587 }, { "epoch": 0.4494552849622265, "grad_norm": 0.42493629455566406, "learning_rate": 0.00011013140279962049, "loss": 1.3431, "step": 34588 }, { "epoch": 0.4494682795061424, "grad_norm": 0.4427499771118164, "learning_rate": 0.0001101288033377091, "loss": 1.442, "step": 34589 }, { "epoch": 0.4494812740500582, "grad_norm": 0.5740325450897217, "learning_rate": 0.00011012620387579771, "loss": 1.4735, "step": 34590 }, { "epoch": 0.4494942685939741, "grad_norm": 0.3771803677082062, "learning_rate": 0.00011012360441388633, "loss": 1.2133, "step": 34591 }, { "epoch": 0.44950726313788997, "grad_norm": 0.35837626457214355, "learning_rate": 0.00011012100495197494, "loss": 1.5354, "step": 34592 }, { "epoch": 0.44952025768180587, "grad_norm": 0.3096320629119873, "learning_rate": 0.00011011840549006356, "loss": 1.2037, "step": 34593 }, { "epoch": 0.4495332522257217, "grad_norm": 0.40373578667640686, "learning_rate": 0.00011011580602815217, "loss": 1.544, "step": 34594 }, { "epoch": 0.4495462467696376, "grad_norm": 0.2228272259235382, "learning_rate": 0.0001101132065662408, "loss": 1.1012, "step": 34595 }, { "epoch": 0.44955924131355346, "grad_norm": 0.42053523659706116, "learning_rate": 0.00011011060710432942, "loss": 1.4426, "step": 34596 }, { "epoch": 0.44957223585746936, "grad_norm": 0.4178027808666229, "learning_rate": 0.00011010800764241803, "loss": 1.3974, "step": 34597 }, { "epoch": 0.4495852304013852, "grad_norm": 0.3505808711051941, "learning_rate": 0.00011010540818050662, "loss": 1.2488, "step": 34598 }, { "epoch": 0.4495982249453011, "grad_norm": 0.5439279079437256, "learning_rate": 0.00011010280871859526, "loss": 1.4412, "step": 34599 }, { "epoch": 0.44961121948921695, "grad_norm": 0.4107949435710907, "learning_rate": 0.00011010020925668387, "loss": 1.5524, "step": 34600 }, { "epoch": 0.44962421403313285, "grad_norm": 0.4186403155326843, "learning_rate": 0.00011009760979477248, "loss": 1.2991, "step": 34601 }, { "epoch": 0.4496372085770487, "grad_norm": 0.5023602843284607, "learning_rate": 0.0001100950103328611, "loss": 1.4306, "step": 34602 }, { "epoch": 0.4496502031209646, "grad_norm": 0.3692898750305176, "learning_rate": 0.00011009241087094972, "loss": 1.4973, "step": 34603 }, { "epoch": 0.44966319766488044, "grad_norm": 0.35382771492004395, "learning_rate": 0.00011008981140903833, "loss": 1.24, "step": 34604 }, { "epoch": 0.44967619220879634, "grad_norm": 0.3985137939453125, "learning_rate": 0.00011008721194712694, "loss": 1.3116, "step": 34605 }, { "epoch": 0.4496891867527122, "grad_norm": 0.46171829104423523, "learning_rate": 0.00011008461248521555, "loss": 1.1653, "step": 34606 }, { "epoch": 0.4497021812966281, "grad_norm": 0.4294068515300751, "learning_rate": 0.00011008201302330419, "loss": 1.4257, "step": 34607 }, { "epoch": 0.44971517584054393, "grad_norm": 0.43269744515419006, "learning_rate": 0.0001100794135613928, "loss": 1.5493, "step": 34608 }, { "epoch": 0.44972817038445984, "grad_norm": 0.4605969786643982, "learning_rate": 0.00011007681409948141, "loss": 1.5076, "step": 34609 }, { "epoch": 0.4497411649283757, "grad_norm": 0.4703240692615509, "learning_rate": 0.00011007421463757004, "loss": 1.309, "step": 34610 }, { "epoch": 0.4497541594722916, "grad_norm": 0.4340241253376007, "learning_rate": 0.00011007161517565865, "loss": 1.3003, "step": 34611 }, { "epoch": 0.4497671540162074, "grad_norm": 0.4455007016658783, "learning_rate": 0.00011006901571374726, "loss": 1.4387, "step": 34612 }, { "epoch": 0.4497801485601233, "grad_norm": 0.41624656319618225, "learning_rate": 0.00011006641625183587, "loss": 1.4398, "step": 34613 }, { "epoch": 0.4497931431040392, "grad_norm": 0.3853873312473297, "learning_rate": 0.00011006381678992451, "loss": 1.4882, "step": 34614 }, { "epoch": 0.4498061376479551, "grad_norm": 0.4715087115764618, "learning_rate": 0.0001100612173280131, "loss": 1.4296, "step": 34615 }, { "epoch": 0.4498191321918709, "grad_norm": 0.4221077859401703, "learning_rate": 0.00011005861786610172, "loss": 1.3067, "step": 34616 }, { "epoch": 0.4498321267357868, "grad_norm": 0.37524187564849854, "learning_rate": 0.00011005601840419033, "loss": 1.314, "step": 34617 }, { "epoch": 0.44984512127970266, "grad_norm": 0.3193286061286926, "learning_rate": 0.00011005341894227896, "loss": 1.4158, "step": 34618 }, { "epoch": 0.44985811582361857, "grad_norm": 0.2873690128326416, "learning_rate": 0.00011005081948036758, "loss": 1.249, "step": 34619 }, { "epoch": 0.4498711103675344, "grad_norm": 0.5461328625679016, "learning_rate": 0.00011004822001845619, "loss": 1.4104, "step": 34620 }, { "epoch": 0.4498841049114503, "grad_norm": 0.45275986194610596, "learning_rate": 0.0001100456205565448, "loss": 1.412, "step": 34621 }, { "epoch": 0.44989709945536616, "grad_norm": 0.39862167835235596, "learning_rate": 0.00011004302109463342, "loss": 1.4582, "step": 34622 }, { "epoch": 0.44991009399928206, "grad_norm": 0.4049936532974243, "learning_rate": 0.00011004042163272203, "loss": 1.3718, "step": 34623 }, { "epoch": 0.4499230885431979, "grad_norm": 0.4950532019138336, "learning_rate": 0.00011003782217081064, "loss": 1.2276, "step": 34624 }, { "epoch": 0.4499360830871138, "grad_norm": 0.33813920617103577, "learning_rate": 0.00011003522270889925, "loss": 1.5944, "step": 34625 }, { "epoch": 0.44994907763102965, "grad_norm": 0.5054971575737, "learning_rate": 0.00011003262324698789, "loss": 1.5062, "step": 34626 }, { "epoch": 0.44996207217494555, "grad_norm": 0.4089203178882599, "learning_rate": 0.0001100300237850765, "loss": 1.4684, "step": 34627 }, { "epoch": 0.4499750667188614, "grad_norm": 0.3653579652309418, "learning_rate": 0.0001100274243231651, "loss": 1.3285, "step": 34628 }, { "epoch": 0.4499880612627773, "grad_norm": 0.4717418849468231, "learning_rate": 0.00011002482486125371, "loss": 1.2705, "step": 34629 }, { "epoch": 0.45000105580669314, "grad_norm": 0.41295260190963745, "learning_rate": 0.00011002222539934235, "loss": 1.4125, "step": 34630 }, { "epoch": 0.45001405035060904, "grad_norm": 0.32432249188423157, "learning_rate": 0.00011001962593743096, "loss": 1.2787, "step": 34631 }, { "epoch": 0.4500270448945249, "grad_norm": 0.39494869112968445, "learning_rate": 0.00011001702647551957, "loss": 1.6407, "step": 34632 }, { "epoch": 0.4500400394384408, "grad_norm": 0.45177191495895386, "learning_rate": 0.00011001442701360818, "loss": 1.42, "step": 34633 }, { "epoch": 0.45005303398235663, "grad_norm": 0.36676496267318726, "learning_rate": 0.00011001182755169681, "loss": 1.3545, "step": 34634 }, { "epoch": 0.45006602852627253, "grad_norm": 0.39819958806037903, "learning_rate": 0.00011000922808978542, "loss": 1.487, "step": 34635 }, { "epoch": 0.4500790230701884, "grad_norm": 0.3898221254348755, "learning_rate": 0.00011000662862787403, "loss": 1.4685, "step": 34636 }, { "epoch": 0.4500920176141043, "grad_norm": 0.3104458153247833, "learning_rate": 0.00011000402916596264, "loss": 1.3507, "step": 34637 }, { "epoch": 0.4501050121580201, "grad_norm": 0.44826236367225647, "learning_rate": 0.00011000142970405128, "loss": 1.3661, "step": 34638 }, { "epoch": 0.450118006701936, "grad_norm": 0.41079476475715637, "learning_rate": 0.00010999883024213989, "loss": 1.3416, "step": 34639 }, { "epoch": 0.4501310012458519, "grad_norm": 0.3759309947490692, "learning_rate": 0.00010999623078022849, "loss": 1.528, "step": 34640 }, { "epoch": 0.45014399578976777, "grad_norm": 0.33685725927352905, "learning_rate": 0.0001099936313183171, "loss": 1.1044, "step": 34641 }, { "epoch": 0.45015699033368367, "grad_norm": 0.378028005361557, "learning_rate": 0.00010999103185640574, "loss": 1.1589, "step": 34642 }, { "epoch": 0.4501699848775995, "grad_norm": 0.452457457780838, "learning_rate": 0.00010998843239449435, "loss": 1.5207, "step": 34643 }, { "epoch": 0.4501829794215154, "grad_norm": 0.3738781213760376, "learning_rate": 0.00010998583293258296, "loss": 1.4272, "step": 34644 }, { "epoch": 0.45019597396543126, "grad_norm": 0.46590104699134827, "learning_rate": 0.00010998323347067158, "loss": 1.4186, "step": 34645 }, { "epoch": 0.45020896850934716, "grad_norm": 0.42637282609939575, "learning_rate": 0.00010998063400876019, "loss": 1.3875, "step": 34646 }, { "epoch": 0.450221963053263, "grad_norm": 0.44957152009010315, "learning_rate": 0.0001099780345468488, "loss": 1.3859, "step": 34647 }, { "epoch": 0.4502349575971789, "grad_norm": 0.4742974638938904, "learning_rate": 0.00010997543508493741, "loss": 1.4974, "step": 34648 }, { "epoch": 0.45024795214109475, "grad_norm": 0.4378099739551544, "learning_rate": 0.00010997283562302605, "loss": 1.4062, "step": 34649 }, { "epoch": 0.45026094668501065, "grad_norm": 0.4108748137950897, "learning_rate": 0.00010997023616111466, "loss": 1.3695, "step": 34650 }, { "epoch": 0.4502739412289265, "grad_norm": 0.4649498164653778, "learning_rate": 0.00010996763669920327, "loss": 1.3068, "step": 34651 }, { "epoch": 0.4502869357728424, "grad_norm": 0.42827990651130676, "learning_rate": 0.00010996503723729189, "loss": 1.4296, "step": 34652 }, { "epoch": 0.45029993031675825, "grad_norm": 0.36306333541870117, "learning_rate": 0.00010996243777538051, "loss": 1.4684, "step": 34653 }, { "epoch": 0.45031292486067415, "grad_norm": 0.4497435986995697, "learning_rate": 0.00010995983831346912, "loss": 1.4764, "step": 34654 }, { "epoch": 0.45032591940459, "grad_norm": 0.3822482228279114, "learning_rate": 0.00010995723885155773, "loss": 1.6684, "step": 34655 }, { "epoch": 0.4503389139485059, "grad_norm": 0.3489624857902527, "learning_rate": 0.00010995463938964634, "loss": 1.3735, "step": 34656 }, { "epoch": 0.45035190849242174, "grad_norm": 0.33736705780029297, "learning_rate": 0.00010995203992773497, "loss": 1.2245, "step": 34657 }, { "epoch": 0.45036490303633764, "grad_norm": 0.29852402210235596, "learning_rate": 0.00010994944046582358, "loss": 1.3669, "step": 34658 }, { "epoch": 0.4503778975802535, "grad_norm": 0.4361458718776703, "learning_rate": 0.00010994684100391219, "loss": 1.6184, "step": 34659 }, { "epoch": 0.4503908921241694, "grad_norm": 0.43799087405204773, "learning_rate": 0.0001099442415420008, "loss": 1.3209, "step": 34660 }, { "epoch": 0.45040388666808523, "grad_norm": 0.4746192693710327, "learning_rate": 0.00010994164208008944, "loss": 1.5739, "step": 34661 }, { "epoch": 0.45041688121200113, "grad_norm": 0.4274558424949646, "learning_rate": 0.00010993904261817805, "loss": 1.3528, "step": 34662 }, { "epoch": 0.450429875755917, "grad_norm": 0.35588523745536804, "learning_rate": 0.00010993644315626666, "loss": 1.3957, "step": 34663 }, { "epoch": 0.4504428702998329, "grad_norm": 0.3102421164512634, "learning_rate": 0.00010993384369435527, "loss": 1.3901, "step": 34664 }, { "epoch": 0.4504558648437487, "grad_norm": 0.39387017488479614, "learning_rate": 0.0001099312442324439, "loss": 1.5012, "step": 34665 }, { "epoch": 0.4504688593876646, "grad_norm": 0.38885587453842163, "learning_rate": 0.0001099286447705325, "loss": 1.5434, "step": 34666 }, { "epoch": 0.45048185393158047, "grad_norm": 0.3618861734867096, "learning_rate": 0.00010992604530862112, "loss": 1.3914, "step": 34667 }, { "epoch": 0.45049484847549637, "grad_norm": 0.44911736249923706, "learning_rate": 0.00010992344584670973, "loss": 1.5369, "step": 34668 }, { "epoch": 0.4505078430194122, "grad_norm": 0.3879886567592621, "learning_rate": 0.00010992084638479837, "loss": 1.1754, "step": 34669 }, { "epoch": 0.4505208375633281, "grad_norm": 0.4212639331817627, "learning_rate": 0.00010991824692288696, "loss": 1.2827, "step": 34670 }, { "epoch": 0.45053383210724396, "grad_norm": 0.41131022572517395, "learning_rate": 0.00010991564746097557, "loss": 1.3663, "step": 34671 }, { "epoch": 0.45054682665115986, "grad_norm": 0.5232630968093872, "learning_rate": 0.00010991304799906419, "loss": 1.4822, "step": 34672 }, { "epoch": 0.4505598211950757, "grad_norm": 0.42684683203697205, "learning_rate": 0.00010991044853715282, "loss": 1.3651, "step": 34673 }, { "epoch": 0.4505728157389916, "grad_norm": 0.5546239614486694, "learning_rate": 0.00010990784907524143, "loss": 1.398, "step": 34674 }, { "epoch": 0.45058581028290745, "grad_norm": 0.3987673223018646, "learning_rate": 0.00010990524961333005, "loss": 1.4861, "step": 34675 }, { "epoch": 0.45059880482682335, "grad_norm": 0.4570951461791992, "learning_rate": 0.00010990265015141866, "loss": 1.3202, "step": 34676 }, { "epoch": 0.4506117993707392, "grad_norm": 0.421896368265152, "learning_rate": 0.00010990005068950728, "loss": 1.392, "step": 34677 }, { "epoch": 0.4506247939146551, "grad_norm": 0.34930211305618286, "learning_rate": 0.00010989745122759589, "loss": 1.6332, "step": 34678 }, { "epoch": 0.45063778845857094, "grad_norm": 0.43266966938972473, "learning_rate": 0.0001098948517656845, "loss": 1.3176, "step": 34679 }, { "epoch": 0.45065078300248684, "grad_norm": 0.42050305008888245, "learning_rate": 0.00010989225230377311, "loss": 1.6115, "step": 34680 }, { "epoch": 0.4506637775464027, "grad_norm": 0.42231515049934387, "learning_rate": 0.00010988965284186175, "loss": 1.518, "step": 34681 }, { "epoch": 0.4506767720903186, "grad_norm": 0.3048645853996277, "learning_rate": 0.00010988705337995035, "loss": 1.2405, "step": 34682 }, { "epoch": 0.45068976663423443, "grad_norm": 0.3793469965457916, "learning_rate": 0.00010988445391803896, "loss": 1.4223, "step": 34683 }, { "epoch": 0.45070276117815034, "grad_norm": 0.3899625837802887, "learning_rate": 0.0001098818544561276, "loss": 1.2016, "step": 34684 }, { "epoch": 0.4507157557220662, "grad_norm": 0.33458301424980164, "learning_rate": 0.00010987925499421621, "loss": 1.1769, "step": 34685 }, { "epoch": 0.4507287502659821, "grad_norm": 0.4201454818248749, "learning_rate": 0.00010987665553230482, "loss": 1.3793, "step": 34686 }, { "epoch": 0.4507417448098979, "grad_norm": 0.2987552583217621, "learning_rate": 0.00010987405607039343, "loss": 1.4608, "step": 34687 }, { "epoch": 0.4507547393538138, "grad_norm": 0.49060991406440735, "learning_rate": 0.00010987145660848206, "loss": 1.4257, "step": 34688 }, { "epoch": 0.45076773389772967, "grad_norm": 0.4518246650695801, "learning_rate": 0.00010986885714657067, "loss": 1.4056, "step": 34689 }, { "epoch": 0.4507807284416456, "grad_norm": 0.38075777888298035, "learning_rate": 0.00010986625768465928, "loss": 1.399, "step": 34690 }, { "epoch": 0.4507937229855614, "grad_norm": 0.348307341337204, "learning_rate": 0.00010986365822274789, "loss": 1.348, "step": 34691 }, { "epoch": 0.4508067175294773, "grad_norm": 0.4711441695690155, "learning_rate": 0.00010986105876083653, "loss": 1.6102, "step": 34692 }, { "epoch": 0.45081971207339316, "grad_norm": 0.3929104804992676, "learning_rate": 0.00010985845929892514, "loss": 1.4359, "step": 34693 }, { "epoch": 0.45083270661730906, "grad_norm": 0.45121195912361145, "learning_rate": 0.00010985585983701375, "loss": 1.5505, "step": 34694 }, { "epoch": 0.4508457011612249, "grad_norm": 0.27183642983436584, "learning_rate": 0.00010985326037510235, "loss": 1.2986, "step": 34695 }, { "epoch": 0.4508586957051408, "grad_norm": 0.4760079085826874, "learning_rate": 0.00010985066091319098, "loss": 1.3248, "step": 34696 }, { "epoch": 0.45087169024905666, "grad_norm": 0.3645307421684265, "learning_rate": 0.0001098480614512796, "loss": 1.3651, "step": 34697 }, { "epoch": 0.45088468479297256, "grad_norm": 0.4608154892921448, "learning_rate": 0.0001098454619893682, "loss": 1.587, "step": 34698 }, { "epoch": 0.4508976793368884, "grad_norm": 0.2584189772605896, "learning_rate": 0.00010984286252745682, "loss": 1.3133, "step": 34699 }, { "epoch": 0.4509106738808043, "grad_norm": 0.4810093343257904, "learning_rate": 0.00010984026306554544, "loss": 1.433, "step": 34700 }, { "epoch": 0.45092366842472015, "grad_norm": 0.4034838378429413, "learning_rate": 0.00010983766360363405, "loss": 1.5186, "step": 34701 }, { "epoch": 0.45093666296863605, "grad_norm": 0.2878677248954773, "learning_rate": 0.00010983506414172266, "loss": 1.3735, "step": 34702 }, { "epoch": 0.4509496575125519, "grad_norm": 0.3981530964374542, "learning_rate": 0.00010983246467981127, "loss": 1.4509, "step": 34703 }, { "epoch": 0.4509626520564678, "grad_norm": 0.3467041552066803, "learning_rate": 0.00010982986521789991, "loss": 1.2871, "step": 34704 }, { "epoch": 0.45097564660038364, "grad_norm": 0.43963155150413513, "learning_rate": 0.00010982726575598852, "loss": 1.5168, "step": 34705 }, { "epoch": 0.45098864114429954, "grad_norm": 0.4081425070762634, "learning_rate": 0.00010982466629407713, "loss": 1.2588, "step": 34706 }, { "epoch": 0.4510016356882154, "grad_norm": 0.4085533916950226, "learning_rate": 0.00010982206683216574, "loss": 1.5025, "step": 34707 }, { "epoch": 0.4510146302321313, "grad_norm": 0.4102005064487457, "learning_rate": 0.00010981946737025437, "loss": 1.513, "step": 34708 }, { "epoch": 0.45102762477604713, "grad_norm": 0.37340712547302246, "learning_rate": 0.00010981686790834298, "loss": 1.3159, "step": 34709 }, { "epoch": 0.45104061931996303, "grad_norm": 0.3997344672679901, "learning_rate": 0.00010981426844643159, "loss": 1.4921, "step": 34710 }, { "epoch": 0.4510536138638789, "grad_norm": 0.3900180757045746, "learning_rate": 0.0001098116689845202, "loss": 1.4969, "step": 34711 }, { "epoch": 0.4510666084077948, "grad_norm": 0.38005638122558594, "learning_rate": 0.00010980906952260883, "loss": 1.3136, "step": 34712 }, { "epoch": 0.4510796029517106, "grad_norm": 0.4513770341873169, "learning_rate": 0.00010980647006069744, "loss": 1.5497, "step": 34713 }, { "epoch": 0.4510925974956265, "grad_norm": 0.3341437876224518, "learning_rate": 0.00010980387059878605, "loss": 1.1552, "step": 34714 }, { "epoch": 0.4511055920395424, "grad_norm": 0.3980044424533844, "learning_rate": 0.00010980127113687466, "loss": 1.4221, "step": 34715 }, { "epoch": 0.45111858658345827, "grad_norm": 0.3460882306098938, "learning_rate": 0.0001097986716749633, "loss": 1.3419, "step": 34716 }, { "epoch": 0.45113158112737417, "grad_norm": 0.42624014616012573, "learning_rate": 0.00010979607221305191, "loss": 1.3568, "step": 34717 }, { "epoch": 0.45114457567129, "grad_norm": 0.2534487247467041, "learning_rate": 0.00010979347275114052, "loss": 1.1958, "step": 34718 }, { "epoch": 0.4511575702152059, "grad_norm": 0.37547239661216736, "learning_rate": 0.00010979087328922914, "loss": 1.3855, "step": 34719 }, { "epoch": 0.45117056475912176, "grad_norm": 0.37876471877098083, "learning_rate": 0.00010978827382731775, "loss": 1.3258, "step": 34720 }, { "epoch": 0.45118355930303766, "grad_norm": 0.4101529121398926, "learning_rate": 0.00010978567436540636, "loss": 1.4229, "step": 34721 }, { "epoch": 0.4511965538469535, "grad_norm": 0.3159874379634857, "learning_rate": 0.00010978307490349498, "loss": 1.2605, "step": 34722 }, { "epoch": 0.4512095483908694, "grad_norm": 0.2980130612850189, "learning_rate": 0.00010978047544158361, "loss": 1.201, "step": 34723 }, { "epoch": 0.45122254293478525, "grad_norm": 0.4047219157218933, "learning_rate": 0.00010977787597967221, "loss": 1.5541, "step": 34724 }, { "epoch": 0.45123553747870115, "grad_norm": 0.4015253782272339, "learning_rate": 0.00010977527651776082, "loss": 1.4204, "step": 34725 }, { "epoch": 0.451248532022617, "grad_norm": 0.4201032817363739, "learning_rate": 0.00010977267705584943, "loss": 1.3287, "step": 34726 }, { "epoch": 0.4512615265665329, "grad_norm": 0.40899431705474854, "learning_rate": 0.00010977007759393807, "loss": 1.4549, "step": 34727 }, { "epoch": 0.45127452111044875, "grad_norm": 0.4537949562072754, "learning_rate": 0.00010976747813202668, "loss": 1.5097, "step": 34728 }, { "epoch": 0.45128751565436465, "grad_norm": 0.3765506148338318, "learning_rate": 0.00010976487867011529, "loss": 1.0697, "step": 34729 }, { "epoch": 0.4513005101982805, "grad_norm": 0.4242401719093323, "learning_rate": 0.0001097622792082039, "loss": 1.4187, "step": 34730 }, { "epoch": 0.4513135047421964, "grad_norm": 0.30027660727500916, "learning_rate": 0.00010975967974629253, "loss": 1.3223, "step": 34731 }, { "epoch": 0.45132649928611224, "grad_norm": 0.38335204124450684, "learning_rate": 0.00010975708028438114, "loss": 1.2228, "step": 34732 }, { "epoch": 0.45133949383002814, "grad_norm": 0.409382700920105, "learning_rate": 0.00010975448082246975, "loss": 1.4447, "step": 34733 }, { "epoch": 0.451352488373944, "grad_norm": 0.3845069408416748, "learning_rate": 0.00010975188136055836, "loss": 1.4398, "step": 34734 }, { "epoch": 0.4513654829178599, "grad_norm": 0.46727463603019714, "learning_rate": 0.000109749281898647, "loss": 1.4233, "step": 34735 }, { "epoch": 0.45137847746177573, "grad_norm": 0.363198846578598, "learning_rate": 0.00010974668243673561, "loss": 1.3647, "step": 34736 }, { "epoch": 0.45139147200569163, "grad_norm": 0.39461666345596313, "learning_rate": 0.00010974408297482421, "loss": 1.2735, "step": 34737 }, { "epoch": 0.4514044665496075, "grad_norm": 0.35019561648368835, "learning_rate": 0.00010974148351291282, "loss": 1.4918, "step": 34738 }, { "epoch": 0.4514174610935234, "grad_norm": 0.3905196487903595, "learning_rate": 0.00010973888405100146, "loss": 1.39, "step": 34739 }, { "epoch": 0.4514304556374392, "grad_norm": 0.45560622215270996, "learning_rate": 0.00010973628458909007, "loss": 1.4607, "step": 34740 }, { "epoch": 0.4514434501813551, "grad_norm": 0.42589449882507324, "learning_rate": 0.00010973368512717868, "loss": 1.3714, "step": 34741 }, { "epoch": 0.45145644472527097, "grad_norm": 0.5541911721229553, "learning_rate": 0.00010973108566526729, "loss": 1.2459, "step": 34742 }, { "epoch": 0.45146943926918687, "grad_norm": 0.4173346757888794, "learning_rate": 0.00010972848620335591, "loss": 1.3731, "step": 34743 }, { "epoch": 0.4514824338131027, "grad_norm": 0.37347304821014404, "learning_rate": 0.00010972588674144452, "loss": 1.5033, "step": 34744 }, { "epoch": 0.4514954283570186, "grad_norm": 0.38289675116539, "learning_rate": 0.00010972328727953314, "loss": 1.3628, "step": 34745 }, { "epoch": 0.45150842290093446, "grad_norm": 0.4913569390773773, "learning_rate": 0.00010972068781762175, "loss": 1.433, "step": 34746 }, { "epoch": 0.45152141744485036, "grad_norm": 0.4125804901123047, "learning_rate": 0.00010971808835571038, "loss": 1.3467, "step": 34747 }, { "epoch": 0.4515344119887662, "grad_norm": 0.3636202812194824, "learning_rate": 0.000109715488893799, "loss": 1.4488, "step": 34748 }, { "epoch": 0.4515474065326821, "grad_norm": 0.3246307373046875, "learning_rate": 0.0001097128894318876, "loss": 1.3717, "step": 34749 }, { "epoch": 0.45156040107659795, "grad_norm": 0.36629313230514526, "learning_rate": 0.0001097102899699762, "loss": 1.2703, "step": 34750 }, { "epoch": 0.45157339562051385, "grad_norm": 0.3734608590602875, "learning_rate": 0.00010970769050806484, "loss": 1.3177, "step": 34751 }, { "epoch": 0.4515863901644297, "grad_norm": 0.3563080430030823, "learning_rate": 0.00010970509104615345, "loss": 1.3234, "step": 34752 }, { "epoch": 0.4515993847083456, "grad_norm": 0.4973289370536804, "learning_rate": 0.00010970249158424206, "loss": 1.3033, "step": 34753 }, { "epoch": 0.45161237925226144, "grad_norm": 0.33764708042144775, "learning_rate": 0.00010969989212233067, "loss": 1.3726, "step": 34754 }, { "epoch": 0.45162537379617734, "grad_norm": 0.3473716676235199, "learning_rate": 0.0001096972926604193, "loss": 1.1291, "step": 34755 }, { "epoch": 0.4516383683400932, "grad_norm": 0.4495391249656677, "learning_rate": 0.00010969469319850791, "loss": 1.4441, "step": 34756 }, { "epoch": 0.4516513628840091, "grad_norm": 0.4151090085506439, "learning_rate": 0.00010969209373659652, "loss": 1.3128, "step": 34757 }, { "epoch": 0.45166435742792493, "grad_norm": 0.506668746471405, "learning_rate": 0.00010968949427468516, "loss": 1.5258, "step": 34758 }, { "epoch": 0.45167735197184083, "grad_norm": 0.4666251838207245, "learning_rate": 0.00010968689481277377, "loss": 1.3781, "step": 34759 }, { "epoch": 0.4516903465157567, "grad_norm": 0.3775852918624878, "learning_rate": 0.00010968429535086238, "loss": 1.4777, "step": 34760 }, { "epoch": 0.4517033410596726, "grad_norm": 0.39806291460990906, "learning_rate": 0.00010968169588895099, "loss": 1.329, "step": 34761 }, { "epoch": 0.4517163356035884, "grad_norm": 0.38272690773010254, "learning_rate": 0.00010967909642703962, "loss": 1.5868, "step": 34762 }, { "epoch": 0.4517293301475043, "grad_norm": 0.3841523826122284, "learning_rate": 0.00010967649696512823, "loss": 1.3323, "step": 34763 }, { "epoch": 0.45174232469142017, "grad_norm": 0.4096437692642212, "learning_rate": 0.00010967389750321684, "loss": 1.2445, "step": 34764 }, { "epoch": 0.4517553192353361, "grad_norm": 0.3654595911502838, "learning_rate": 0.00010967129804130545, "loss": 1.5167, "step": 34765 }, { "epoch": 0.4517683137792519, "grad_norm": 0.5160888433456421, "learning_rate": 0.00010966869857939407, "loss": 1.4523, "step": 34766 }, { "epoch": 0.4517813083231678, "grad_norm": 0.5552314519882202, "learning_rate": 0.00010966609911748268, "loss": 1.4886, "step": 34767 }, { "epoch": 0.45179430286708366, "grad_norm": 0.2955682873725891, "learning_rate": 0.0001096634996555713, "loss": 1.3102, "step": 34768 }, { "epoch": 0.45180729741099956, "grad_norm": 0.38363808393478394, "learning_rate": 0.0001096609001936599, "loss": 1.4417, "step": 34769 }, { "epoch": 0.4518202919549154, "grad_norm": 0.3800087571144104, "learning_rate": 0.00010965830073174854, "loss": 1.1341, "step": 34770 }, { "epoch": 0.4518332864988313, "grad_norm": 0.45169904828071594, "learning_rate": 0.00010965570126983716, "loss": 1.3101, "step": 34771 }, { "epoch": 0.45184628104274716, "grad_norm": 0.3953300416469574, "learning_rate": 0.00010965310180792577, "loss": 1.4107, "step": 34772 }, { "epoch": 0.45185927558666306, "grad_norm": 0.3940204381942749, "learning_rate": 0.00010965050234601438, "loss": 1.421, "step": 34773 }, { "epoch": 0.4518722701305789, "grad_norm": 0.3225698471069336, "learning_rate": 0.000109647902884103, "loss": 1.4676, "step": 34774 }, { "epoch": 0.4518852646744948, "grad_norm": 0.4055837094783783, "learning_rate": 0.00010964530342219161, "loss": 1.5282, "step": 34775 }, { "epoch": 0.45189825921841065, "grad_norm": 0.38688719272613525, "learning_rate": 0.00010964270396028022, "loss": 1.3816, "step": 34776 }, { "epoch": 0.45191125376232655, "grad_norm": 0.392902135848999, "learning_rate": 0.00010964010449836883, "loss": 1.2913, "step": 34777 }, { "epoch": 0.4519242483062424, "grad_norm": 0.43108752369880676, "learning_rate": 0.00010963750503645747, "loss": 1.4338, "step": 34778 }, { "epoch": 0.4519372428501583, "grad_norm": 0.45072755217552185, "learning_rate": 0.00010963490557454607, "loss": 1.5077, "step": 34779 }, { "epoch": 0.45195023739407414, "grad_norm": 0.30004703998565674, "learning_rate": 0.00010963230611263468, "loss": 1.3252, "step": 34780 }, { "epoch": 0.45196323193799004, "grad_norm": 0.3558892607688904, "learning_rate": 0.00010962970665072329, "loss": 1.3765, "step": 34781 }, { "epoch": 0.4519762264819059, "grad_norm": 0.38909611105918884, "learning_rate": 0.00010962710718881193, "loss": 1.3962, "step": 34782 }, { "epoch": 0.4519892210258218, "grad_norm": 0.3764425218105316, "learning_rate": 0.00010962450772690054, "loss": 1.4578, "step": 34783 }, { "epoch": 0.45200221556973763, "grad_norm": 0.3882215917110443, "learning_rate": 0.00010962190826498915, "loss": 1.4854, "step": 34784 }, { "epoch": 0.45201521011365353, "grad_norm": 0.5179300308227539, "learning_rate": 0.00010961930880307776, "loss": 1.4046, "step": 34785 }, { "epoch": 0.4520282046575694, "grad_norm": 0.32083791494369507, "learning_rate": 0.00010961670934116639, "loss": 1.2995, "step": 34786 }, { "epoch": 0.4520411992014853, "grad_norm": 0.38110214471817017, "learning_rate": 0.000109614109879255, "loss": 1.5026, "step": 34787 }, { "epoch": 0.4520541937454011, "grad_norm": 0.4439060688018799, "learning_rate": 0.00010961151041734361, "loss": 1.4338, "step": 34788 }, { "epoch": 0.452067188289317, "grad_norm": 0.3672008812427521, "learning_rate": 0.00010960891095543222, "loss": 1.4792, "step": 34789 }, { "epoch": 0.45208018283323287, "grad_norm": 0.5323629975318909, "learning_rate": 0.00010960631149352086, "loss": 1.5222, "step": 34790 }, { "epoch": 0.45209317737714877, "grad_norm": 0.3720775246620178, "learning_rate": 0.00010960371203160947, "loss": 1.4019, "step": 34791 }, { "epoch": 0.45210617192106467, "grad_norm": 0.3579128086566925, "learning_rate": 0.00010960111256969807, "loss": 1.3139, "step": 34792 }, { "epoch": 0.4521191664649805, "grad_norm": 0.3261306881904602, "learning_rate": 0.0001095985131077867, "loss": 1.1858, "step": 34793 }, { "epoch": 0.4521321610088964, "grad_norm": 0.7582377791404724, "learning_rate": 0.00010959591364587532, "loss": 1.4017, "step": 34794 }, { "epoch": 0.45214515555281226, "grad_norm": 0.33804845809936523, "learning_rate": 0.00010959331418396393, "loss": 1.473, "step": 34795 }, { "epoch": 0.45215815009672816, "grad_norm": 0.32930707931518555, "learning_rate": 0.00010959071472205254, "loss": 1.3993, "step": 34796 }, { "epoch": 0.452171144640644, "grad_norm": 0.3443145155906677, "learning_rate": 0.00010958811526014116, "loss": 1.3995, "step": 34797 }, { "epoch": 0.4521841391845599, "grad_norm": 0.3540690541267395, "learning_rate": 0.00010958551579822977, "loss": 1.1822, "step": 34798 }, { "epoch": 0.45219713372847575, "grad_norm": 0.4201768636703491, "learning_rate": 0.00010958291633631838, "loss": 1.2247, "step": 34799 }, { "epoch": 0.45221012827239165, "grad_norm": 0.3197118639945984, "learning_rate": 0.000109580316874407, "loss": 1.411, "step": 34800 }, { "epoch": 0.4522231228163075, "grad_norm": 0.40830856561660767, "learning_rate": 0.00010957771741249563, "loss": 1.3058, "step": 34801 }, { "epoch": 0.4522361173602234, "grad_norm": 0.46550801396369934, "learning_rate": 0.00010957511795058424, "loss": 1.377, "step": 34802 }, { "epoch": 0.45224911190413924, "grad_norm": 0.33782848715782166, "learning_rate": 0.00010957251848867285, "loss": 1.3992, "step": 34803 }, { "epoch": 0.45226210644805515, "grad_norm": 0.44523853063583374, "learning_rate": 0.00010956991902676145, "loss": 1.4648, "step": 34804 }, { "epoch": 0.452275100991971, "grad_norm": 0.5018695592880249, "learning_rate": 0.00010956731956485009, "loss": 1.5237, "step": 34805 }, { "epoch": 0.4522880955358869, "grad_norm": 0.388094425201416, "learning_rate": 0.0001095647201029387, "loss": 1.4756, "step": 34806 }, { "epoch": 0.45230109007980274, "grad_norm": 0.3974766135215759, "learning_rate": 0.00010956212064102731, "loss": 1.3491, "step": 34807 }, { "epoch": 0.45231408462371864, "grad_norm": 0.4385204017162323, "learning_rate": 0.00010955952117911592, "loss": 1.4852, "step": 34808 }, { "epoch": 0.4523270791676345, "grad_norm": 0.4855717718601227, "learning_rate": 0.00010955692171720455, "loss": 1.4553, "step": 34809 }, { "epoch": 0.4523400737115504, "grad_norm": 0.44082731008529663, "learning_rate": 0.00010955432225529316, "loss": 1.3377, "step": 34810 }, { "epoch": 0.45235306825546623, "grad_norm": 0.5200600028038025, "learning_rate": 0.00010955172279338177, "loss": 1.4526, "step": 34811 }, { "epoch": 0.45236606279938213, "grad_norm": 0.4211069941520691, "learning_rate": 0.00010954912333147038, "loss": 1.4951, "step": 34812 }, { "epoch": 0.452379057343298, "grad_norm": 0.3686727285385132, "learning_rate": 0.00010954652386955902, "loss": 1.4319, "step": 34813 }, { "epoch": 0.4523920518872139, "grad_norm": 0.3405270278453827, "learning_rate": 0.00010954392440764763, "loss": 1.5108, "step": 34814 }, { "epoch": 0.4524050464311297, "grad_norm": 0.4647314250469208, "learning_rate": 0.00010954132494573624, "loss": 1.6067, "step": 34815 }, { "epoch": 0.4524180409750456, "grad_norm": 0.4550323486328125, "learning_rate": 0.00010953872548382485, "loss": 1.2762, "step": 34816 }, { "epoch": 0.45243103551896147, "grad_norm": 0.33415529131889343, "learning_rate": 0.00010953612602191348, "loss": 1.4567, "step": 34817 }, { "epoch": 0.45244403006287737, "grad_norm": 0.4055238664150238, "learning_rate": 0.00010953352656000209, "loss": 1.3988, "step": 34818 }, { "epoch": 0.4524570246067932, "grad_norm": 0.37410417199134827, "learning_rate": 0.0001095309270980907, "loss": 1.3187, "step": 34819 }, { "epoch": 0.4524700191507091, "grad_norm": 0.49783769249916077, "learning_rate": 0.00010952832763617931, "loss": 1.5924, "step": 34820 }, { "epoch": 0.45248301369462496, "grad_norm": 0.3820335566997528, "learning_rate": 0.00010952572817426793, "loss": 1.3797, "step": 34821 }, { "epoch": 0.45249600823854086, "grad_norm": 0.42440611124038696, "learning_rate": 0.00010952312871235654, "loss": 1.6008, "step": 34822 }, { "epoch": 0.4525090027824567, "grad_norm": 0.4065231680870056, "learning_rate": 0.00010952052925044515, "loss": 1.4774, "step": 34823 }, { "epoch": 0.4525219973263726, "grad_norm": 0.37790054082870483, "learning_rate": 0.00010951792978853377, "loss": 1.3494, "step": 34824 }, { "epoch": 0.45253499187028845, "grad_norm": 0.3961838483810425, "learning_rate": 0.0001095153303266224, "loss": 1.3042, "step": 34825 }, { "epoch": 0.45254798641420435, "grad_norm": 0.3773347735404968, "learning_rate": 0.00010951273086471101, "loss": 1.2974, "step": 34826 }, { "epoch": 0.4525609809581202, "grad_norm": 0.3110443949699402, "learning_rate": 0.00010951013140279963, "loss": 1.5116, "step": 34827 }, { "epoch": 0.4525739755020361, "grad_norm": 0.46696510910987854, "learning_rate": 0.00010950753194088824, "loss": 1.3896, "step": 34828 }, { "epoch": 0.45258697004595194, "grad_norm": 0.42911845445632935, "learning_rate": 0.00010950493247897686, "loss": 1.2848, "step": 34829 }, { "epoch": 0.45259996458986784, "grad_norm": 0.48784470558166504, "learning_rate": 0.00010950233301706547, "loss": 1.2603, "step": 34830 }, { "epoch": 0.4526129591337837, "grad_norm": 0.33039045333862305, "learning_rate": 0.00010949973355515408, "loss": 1.4433, "step": 34831 }, { "epoch": 0.4526259536776996, "grad_norm": 0.4040879011154175, "learning_rate": 0.00010949713409324272, "loss": 1.2944, "step": 34832 }, { "epoch": 0.45263894822161543, "grad_norm": 0.4118824601173401, "learning_rate": 0.00010949453463133133, "loss": 1.1739, "step": 34833 }, { "epoch": 0.45265194276553133, "grad_norm": 0.47655031085014343, "learning_rate": 0.00010949193516941993, "loss": 1.3565, "step": 34834 }, { "epoch": 0.4526649373094472, "grad_norm": 0.4509572386741638, "learning_rate": 0.00010948933570750854, "loss": 1.4582, "step": 34835 }, { "epoch": 0.4526779318533631, "grad_norm": 0.46587246656417847, "learning_rate": 0.00010948673624559718, "loss": 1.5548, "step": 34836 }, { "epoch": 0.4526909263972789, "grad_norm": 0.443010538816452, "learning_rate": 0.00010948413678368579, "loss": 1.3653, "step": 34837 }, { "epoch": 0.4527039209411948, "grad_norm": 0.4608035087585449, "learning_rate": 0.0001094815373217744, "loss": 1.4124, "step": 34838 }, { "epoch": 0.45271691548511067, "grad_norm": 0.4232497215270996, "learning_rate": 0.00010947893785986301, "loss": 1.5286, "step": 34839 }, { "epoch": 0.45272991002902657, "grad_norm": 0.32030290365219116, "learning_rate": 0.00010947633839795163, "loss": 1.1871, "step": 34840 }, { "epoch": 0.4527429045729424, "grad_norm": 0.36549127101898193, "learning_rate": 0.00010947373893604025, "loss": 1.3377, "step": 34841 }, { "epoch": 0.4527558991168583, "grad_norm": 0.3893648684024811, "learning_rate": 0.00010947113947412886, "loss": 1.3878, "step": 34842 }, { "epoch": 0.45276889366077416, "grad_norm": 0.40730881690979004, "learning_rate": 0.00010946854001221747, "loss": 1.5255, "step": 34843 }, { "epoch": 0.45278188820469006, "grad_norm": 0.4550569951534271, "learning_rate": 0.0001094659405503061, "loss": 1.5847, "step": 34844 }, { "epoch": 0.4527948827486059, "grad_norm": 0.39973410964012146, "learning_rate": 0.00010946334108839472, "loss": 1.4134, "step": 34845 }, { "epoch": 0.4528078772925218, "grad_norm": 0.43830522894859314, "learning_rate": 0.00010946074162648331, "loss": 1.4112, "step": 34846 }, { "epoch": 0.45282087183643766, "grad_norm": 0.399680495262146, "learning_rate": 0.00010945814216457193, "loss": 1.5347, "step": 34847 }, { "epoch": 0.45283386638035356, "grad_norm": 0.32835090160369873, "learning_rate": 0.00010945554270266056, "loss": 1.4869, "step": 34848 }, { "epoch": 0.4528468609242694, "grad_norm": 0.29390570521354675, "learning_rate": 0.00010945294324074917, "loss": 1.3645, "step": 34849 }, { "epoch": 0.4528598554681853, "grad_norm": 0.40344110131263733, "learning_rate": 0.00010945034377883778, "loss": 1.293, "step": 34850 }, { "epoch": 0.45287285001210115, "grad_norm": 0.2858838737010956, "learning_rate": 0.0001094477443169264, "loss": 1.1227, "step": 34851 }, { "epoch": 0.45288584455601705, "grad_norm": 0.39529165625572205, "learning_rate": 0.00010944514485501502, "loss": 1.4179, "step": 34852 }, { "epoch": 0.4528988390999329, "grad_norm": 0.40686482191085815, "learning_rate": 0.00010944254539310363, "loss": 1.3506, "step": 34853 }, { "epoch": 0.4529118336438488, "grad_norm": 0.34736207127571106, "learning_rate": 0.00010943994593119224, "loss": 1.3002, "step": 34854 }, { "epoch": 0.45292482818776464, "grad_norm": 0.36969509720802307, "learning_rate": 0.00010943734646928085, "loss": 1.155, "step": 34855 }, { "epoch": 0.45293782273168054, "grad_norm": 0.3658212125301361, "learning_rate": 0.00010943474700736949, "loss": 1.3451, "step": 34856 }, { "epoch": 0.4529508172755964, "grad_norm": 0.3960089087486267, "learning_rate": 0.0001094321475454581, "loss": 1.2906, "step": 34857 }, { "epoch": 0.4529638118195123, "grad_norm": 0.3687858581542969, "learning_rate": 0.00010942954808354671, "loss": 1.2775, "step": 34858 }, { "epoch": 0.45297680636342813, "grad_norm": 0.39189523458480835, "learning_rate": 0.00010942694862163531, "loss": 1.3935, "step": 34859 }, { "epoch": 0.45298980090734403, "grad_norm": 0.3685706853866577, "learning_rate": 0.00010942434915972395, "loss": 1.4652, "step": 34860 }, { "epoch": 0.4530027954512599, "grad_norm": 0.48676052689552307, "learning_rate": 0.00010942174969781256, "loss": 1.4153, "step": 34861 }, { "epoch": 0.4530157899951758, "grad_norm": 0.4634558856487274, "learning_rate": 0.00010941915023590117, "loss": 1.3157, "step": 34862 }, { "epoch": 0.4530287845390916, "grad_norm": 0.4022471010684967, "learning_rate": 0.00010941655077398978, "loss": 1.4496, "step": 34863 }, { "epoch": 0.4530417790830075, "grad_norm": 0.3934744894504547, "learning_rate": 0.0001094139513120784, "loss": 1.3123, "step": 34864 }, { "epoch": 0.45305477362692337, "grad_norm": 0.4103308320045471, "learning_rate": 0.00010941135185016702, "loss": 1.468, "step": 34865 }, { "epoch": 0.45306776817083927, "grad_norm": 0.5170601606369019, "learning_rate": 0.00010940875238825563, "loss": 1.3108, "step": 34866 }, { "epoch": 0.45308076271475517, "grad_norm": 0.37900015711784363, "learning_rate": 0.00010940615292634427, "loss": 1.2779, "step": 34867 }, { "epoch": 0.453093757258671, "grad_norm": 0.37011009454727173, "learning_rate": 0.00010940355346443288, "loss": 1.4667, "step": 34868 }, { "epoch": 0.4531067518025869, "grad_norm": 0.32416194677352905, "learning_rate": 0.00010940095400252149, "loss": 1.2279, "step": 34869 }, { "epoch": 0.45311974634650276, "grad_norm": 0.3607569932937622, "learning_rate": 0.0001093983545406101, "loss": 1.3245, "step": 34870 }, { "epoch": 0.45313274089041866, "grad_norm": 0.40057358145713806, "learning_rate": 0.00010939575507869872, "loss": 1.3865, "step": 34871 }, { "epoch": 0.4531457354343345, "grad_norm": 0.430584192276001, "learning_rate": 0.00010939315561678733, "loss": 1.4241, "step": 34872 }, { "epoch": 0.4531587299782504, "grad_norm": 0.4813686013221741, "learning_rate": 0.00010939055615487594, "loss": 1.4919, "step": 34873 }, { "epoch": 0.45317172452216625, "grad_norm": 0.37951257824897766, "learning_rate": 0.00010938795669296456, "loss": 1.3455, "step": 34874 }, { "epoch": 0.45318471906608215, "grad_norm": 0.36754757165908813, "learning_rate": 0.0001093853572310532, "loss": 1.3525, "step": 34875 }, { "epoch": 0.453197713609998, "grad_norm": 0.36494866013526917, "learning_rate": 0.00010938275776914179, "loss": 1.4013, "step": 34876 }, { "epoch": 0.4532107081539139, "grad_norm": 0.3692297339439392, "learning_rate": 0.0001093801583072304, "loss": 1.4688, "step": 34877 }, { "epoch": 0.45322370269782974, "grad_norm": 0.3461282551288605, "learning_rate": 0.00010937755884531901, "loss": 1.512, "step": 34878 }, { "epoch": 0.45323669724174565, "grad_norm": 0.39226582646369934, "learning_rate": 0.00010937495938340765, "loss": 1.3067, "step": 34879 }, { "epoch": 0.4532496917856615, "grad_norm": 0.35757702589035034, "learning_rate": 0.00010937235992149626, "loss": 1.3291, "step": 34880 }, { "epoch": 0.4532626863295774, "grad_norm": 0.44034165143966675, "learning_rate": 0.00010936976045958487, "loss": 1.3574, "step": 34881 }, { "epoch": 0.45327568087349324, "grad_norm": 0.33873802423477173, "learning_rate": 0.00010936716099767348, "loss": 1.3109, "step": 34882 }, { "epoch": 0.45328867541740914, "grad_norm": 0.3512853682041168, "learning_rate": 0.00010936456153576211, "loss": 1.2203, "step": 34883 }, { "epoch": 0.453301669961325, "grad_norm": 0.2922482490539551, "learning_rate": 0.00010936196207385072, "loss": 1.1629, "step": 34884 }, { "epoch": 0.4533146645052409, "grad_norm": 0.4449670910835266, "learning_rate": 0.00010935936261193933, "loss": 1.4845, "step": 34885 }, { "epoch": 0.45332765904915673, "grad_norm": 0.46181347966194153, "learning_rate": 0.00010935676315002794, "loss": 1.3276, "step": 34886 }, { "epoch": 0.45334065359307263, "grad_norm": 0.30708736181259155, "learning_rate": 0.00010935416368811658, "loss": 1.5237, "step": 34887 }, { "epoch": 0.4533536481369885, "grad_norm": 0.38255080580711365, "learning_rate": 0.00010935156422620518, "loss": 1.3888, "step": 34888 }, { "epoch": 0.4533666426809044, "grad_norm": 0.4061680734157562, "learning_rate": 0.00010934896476429379, "loss": 1.4784, "step": 34889 }, { "epoch": 0.4533796372248202, "grad_norm": 0.3476581871509552, "learning_rate": 0.0001093463653023824, "loss": 1.2786, "step": 34890 }, { "epoch": 0.4533926317687361, "grad_norm": 0.4036935865879059, "learning_rate": 0.00010934376584047104, "loss": 1.4064, "step": 34891 }, { "epoch": 0.45340562631265197, "grad_norm": 0.48464447259902954, "learning_rate": 0.00010934116637855965, "loss": 1.38, "step": 34892 }, { "epoch": 0.45341862085656787, "grad_norm": 0.5203121304512024, "learning_rate": 0.00010933856691664826, "loss": 1.4632, "step": 34893 }, { "epoch": 0.4534316154004837, "grad_norm": 0.3555506765842438, "learning_rate": 0.00010933596745473687, "loss": 1.5015, "step": 34894 }, { "epoch": 0.4534446099443996, "grad_norm": 0.39428168535232544, "learning_rate": 0.0001093333679928255, "loss": 1.2106, "step": 34895 }, { "epoch": 0.45345760448831546, "grad_norm": 0.498831570148468, "learning_rate": 0.0001093307685309141, "loss": 1.5395, "step": 34896 }, { "epoch": 0.45347059903223136, "grad_norm": 0.4345197379589081, "learning_rate": 0.00010932816906900272, "loss": 1.3917, "step": 34897 }, { "epoch": 0.4534835935761472, "grad_norm": 0.37654387950897217, "learning_rate": 0.00010932556960709133, "loss": 1.3873, "step": 34898 }, { "epoch": 0.4534965881200631, "grad_norm": 0.3658028244972229, "learning_rate": 0.00010932297014517996, "loss": 1.2453, "step": 34899 }, { "epoch": 0.45350958266397895, "grad_norm": 0.3189201354980469, "learning_rate": 0.00010932037068326858, "loss": 1.1739, "step": 34900 }, { "epoch": 0.45352257720789485, "grad_norm": 0.4434395730495453, "learning_rate": 0.00010931777122135717, "loss": 1.4704, "step": 34901 }, { "epoch": 0.4535355717518107, "grad_norm": 0.41789525747299194, "learning_rate": 0.00010931517175944578, "loss": 1.3896, "step": 34902 }, { "epoch": 0.4535485662957266, "grad_norm": 0.35161980986595154, "learning_rate": 0.00010931257229753442, "loss": 1.4959, "step": 34903 }, { "epoch": 0.45356156083964244, "grad_norm": 0.5179147720336914, "learning_rate": 0.00010930997283562303, "loss": 1.6515, "step": 34904 }, { "epoch": 0.45357455538355834, "grad_norm": 0.4979860782623291, "learning_rate": 0.00010930737337371164, "loss": 1.3833, "step": 34905 }, { "epoch": 0.4535875499274742, "grad_norm": 0.44387370347976685, "learning_rate": 0.00010930477391180027, "loss": 1.3929, "step": 34906 }, { "epoch": 0.4536005444713901, "grad_norm": 0.4943463206291199, "learning_rate": 0.00010930217444988888, "loss": 1.4795, "step": 34907 }, { "epoch": 0.45361353901530593, "grad_norm": 0.48257526755332947, "learning_rate": 0.00010929957498797749, "loss": 1.4454, "step": 34908 }, { "epoch": 0.45362653355922183, "grad_norm": 0.4452836811542511, "learning_rate": 0.0001092969755260661, "loss": 1.351, "step": 34909 }, { "epoch": 0.4536395281031377, "grad_norm": 0.4060641825199127, "learning_rate": 0.00010929437606415474, "loss": 1.4029, "step": 34910 }, { "epoch": 0.4536525226470536, "grad_norm": 0.4017835259437561, "learning_rate": 0.00010929177660224335, "loss": 1.3236, "step": 34911 }, { "epoch": 0.4536655171909694, "grad_norm": 0.3780839145183563, "learning_rate": 0.00010928917714033196, "loss": 1.1553, "step": 34912 }, { "epoch": 0.4536785117348853, "grad_norm": 0.46542033553123474, "learning_rate": 0.00010928657767842057, "loss": 1.5239, "step": 34913 }, { "epoch": 0.45369150627880117, "grad_norm": 0.3736023008823395, "learning_rate": 0.0001092839782165092, "loss": 1.3707, "step": 34914 }, { "epoch": 0.45370450082271707, "grad_norm": 0.41032758355140686, "learning_rate": 0.00010928137875459781, "loss": 1.4228, "step": 34915 }, { "epoch": 0.4537174953666329, "grad_norm": 0.47862017154693604, "learning_rate": 0.00010927877929268642, "loss": 1.4247, "step": 34916 }, { "epoch": 0.4537304899105488, "grad_norm": 0.41875341534614563, "learning_rate": 0.00010927617983077503, "loss": 1.3976, "step": 34917 }, { "epoch": 0.45374348445446466, "grad_norm": 0.43008360266685486, "learning_rate": 0.00010927358036886365, "loss": 1.4219, "step": 34918 }, { "epoch": 0.45375647899838056, "grad_norm": 0.39129018783569336, "learning_rate": 0.00010927098090695226, "loss": 1.4176, "step": 34919 }, { "epoch": 0.4537694735422964, "grad_norm": 0.3203194737434387, "learning_rate": 0.00010926838144504088, "loss": 1.2215, "step": 34920 }, { "epoch": 0.4537824680862123, "grad_norm": 0.26123708486557007, "learning_rate": 0.00010926578198312949, "loss": 1.3103, "step": 34921 }, { "epoch": 0.45379546263012815, "grad_norm": 0.29463663697242737, "learning_rate": 0.00010926318252121812, "loss": 1.454, "step": 34922 }, { "epoch": 0.45380845717404406, "grad_norm": 0.43968164920806885, "learning_rate": 0.00010926058305930674, "loss": 1.3738, "step": 34923 }, { "epoch": 0.4538214517179599, "grad_norm": 0.3553709387779236, "learning_rate": 0.00010925798359739535, "loss": 1.2153, "step": 34924 }, { "epoch": 0.4538344462618758, "grad_norm": 0.39629679918289185, "learning_rate": 0.00010925538413548396, "loss": 1.3975, "step": 34925 }, { "epoch": 0.45384744080579165, "grad_norm": 0.48441317677497864, "learning_rate": 0.00010925278467357258, "loss": 1.5037, "step": 34926 }, { "epoch": 0.45386043534970755, "grad_norm": 0.30706319212913513, "learning_rate": 0.00010925018521166119, "loss": 1.3416, "step": 34927 }, { "epoch": 0.4538734298936234, "grad_norm": 0.384332537651062, "learning_rate": 0.0001092475857497498, "loss": 1.3945, "step": 34928 }, { "epoch": 0.4538864244375393, "grad_norm": 0.3666355013847351, "learning_rate": 0.00010924498628783841, "loss": 1.376, "step": 34929 }, { "epoch": 0.45389941898145514, "grad_norm": 0.40043532848358154, "learning_rate": 0.00010924238682592704, "loss": 1.3757, "step": 34930 }, { "epoch": 0.45391241352537104, "grad_norm": 0.4931693375110626, "learning_rate": 0.00010923978736401565, "loss": 1.4474, "step": 34931 }, { "epoch": 0.4539254080692869, "grad_norm": 0.3278084397315979, "learning_rate": 0.00010923718790210426, "loss": 1.6461, "step": 34932 }, { "epoch": 0.4539384026132028, "grad_norm": 0.3510821461677551, "learning_rate": 0.00010923458844019287, "loss": 1.4728, "step": 34933 }, { "epoch": 0.45395139715711863, "grad_norm": 0.4659602642059326, "learning_rate": 0.00010923198897828151, "loss": 1.3234, "step": 34934 }, { "epoch": 0.45396439170103453, "grad_norm": 0.35574060678482056, "learning_rate": 0.00010922938951637012, "loss": 1.1814, "step": 34935 }, { "epoch": 0.4539773862449504, "grad_norm": 0.3933261036872864, "learning_rate": 0.00010922679005445873, "loss": 1.1879, "step": 34936 }, { "epoch": 0.4539903807888663, "grad_norm": 0.3575122058391571, "learning_rate": 0.00010922419059254734, "loss": 1.4312, "step": 34937 }, { "epoch": 0.4540033753327821, "grad_norm": 0.487956166267395, "learning_rate": 0.00010922159113063597, "loss": 1.4441, "step": 34938 }, { "epoch": 0.454016369876698, "grad_norm": 0.42550304532051086, "learning_rate": 0.00010921899166872458, "loss": 1.3532, "step": 34939 }, { "epoch": 0.45402936442061387, "grad_norm": 0.3599705398082733, "learning_rate": 0.00010921639220681319, "loss": 1.5448, "step": 34940 }, { "epoch": 0.45404235896452977, "grad_norm": 0.4104795455932617, "learning_rate": 0.0001092137927449018, "loss": 1.215, "step": 34941 }, { "epoch": 0.4540553535084456, "grad_norm": 0.3183121383190155, "learning_rate": 0.00010921119328299044, "loss": 1.5554, "step": 34942 }, { "epoch": 0.4540683480523615, "grad_norm": 0.4363402724266052, "learning_rate": 0.00010920859382107904, "loss": 1.3455, "step": 34943 }, { "epoch": 0.4540813425962774, "grad_norm": 0.3751155734062195, "learning_rate": 0.00010920599435916765, "loss": 1.4028, "step": 34944 }, { "epoch": 0.45409433714019326, "grad_norm": 0.4963170289993286, "learning_rate": 0.00010920339489725628, "loss": 1.3854, "step": 34945 }, { "epoch": 0.45410733168410916, "grad_norm": 0.4422619938850403, "learning_rate": 0.0001092007954353449, "loss": 1.2962, "step": 34946 }, { "epoch": 0.454120326228025, "grad_norm": 0.39218616485595703, "learning_rate": 0.0001091981959734335, "loss": 1.4155, "step": 34947 }, { "epoch": 0.4541333207719409, "grad_norm": 0.3759183883666992, "learning_rate": 0.00010919559651152212, "loss": 1.3537, "step": 34948 }, { "epoch": 0.45414631531585675, "grad_norm": 0.3689135015010834, "learning_rate": 0.00010919299704961074, "loss": 1.3818, "step": 34949 }, { "epoch": 0.45415930985977265, "grad_norm": 0.40971893072128296, "learning_rate": 0.00010919039758769935, "loss": 1.4074, "step": 34950 }, { "epoch": 0.4541723044036885, "grad_norm": 0.33112218976020813, "learning_rate": 0.00010918779812578796, "loss": 1.204, "step": 34951 }, { "epoch": 0.4541852989476044, "grad_norm": 0.4678493142127991, "learning_rate": 0.00010918519866387657, "loss": 1.3324, "step": 34952 }, { "epoch": 0.45419829349152024, "grad_norm": 0.4082872271537781, "learning_rate": 0.00010918259920196521, "loss": 1.4707, "step": 34953 }, { "epoch": 0.45421128803543614, "grad_norm": 0.39554452896118164, "learning_rate": 0.00010917999974005382, "loss": 1.3348, "step": 34954 }, { "epoch": 0.454224282579352, "grad_norm": 0.4113595187664032, "learning_rate": 0.00010917740027814243, "loss": 1.423, "step": 34955 }, { "epoch": 0.4542372771232679, "grad_norm": 0.34707385301589966, "learning_rate": 0.00010917480081623103, "loss": 1.3233, "step": 34956 }, { "epoch": 0.45425027166718374, "grad_norm": 0.3725568652153015, "learning_rate": 0.00010917220135431967, "loss": 1.4826, "step": 34957 }, { "epoch": 0.45426326621109964, "grad_norm": 0.39915165305137634, "learning_rate": 0.00010916960189240828, "loss": 1.3383, "step": 34958 }, { "epoch": 0.4542762607550155, "grad_norm": 0.3920382559299469, "learning_rate": 0.00010916700243049689, "loss": 1.4451, "step": 34959 }, { "epoch": 0.4542892552989314, "grad_norm": 0.4853487014770508, "learning_rate": 0.0001091644029685855, "loss": 1.3799, "step": 34960 }, { "epoch": 0.45430224984284723, "grad_norm": 0.46993762254714966, "learning_rate": 0.00010916180350667413, "loss": 1.4378, "step": 34961 }, { "epoch": 0.45431524438676313, "grad_norm": 0.33537721633911133, "learning_rate": 0.00010915920404476274, "loss": 1.321, "step": 34962 }, { "epoch": 0.454328238930679, "grad_norm": 0.466763973236084, "learning_rate": 0.00010915660458285135, "loss": 1.4883, "step": 34963 }, { "epoch": 0.4543412334745949, "grad_norm": 0.47694411873817444, "learning_rate": 0.00010915400512093996, "loss": 1.2671, "step": 34964 }, { "epoch": 0.4543542280185107, "grad_norm": 0.3795941174030304, "learning_rate": 0.0001091514056590286, "loss": 1.421, "step": 34965 }, { "epoch": 0.4543672225624266, "grad_norm": 0.4208022356033325, "learning_rate": 0.00010914880619711721, "loss": 1.4141, "step": 34966 }, { "epoch": 0.45438021710634247, "grad_norm": 0.3960261940956116, "learning_rate": 0.00010914620673520582, "loss": 1.3542, "step": 34967 }, { "epoch": 0.45439321165025837, "grad_norm": 0.4825434982776642, "learning_rate": 0.00010914360727329442, "loss": 1.5779, "step": 34968 }, { "epoch": 0.4544062061941742, "grad_norm": 0.41211599111557007, "learning_rate": 0.00010914100781138305, "loss": 1.283, "step": 34969 }, { "epoch": 0.4544192007380901, "grad_norm": 0.4351232349872589, "learning_rate": 0.00010913840834947167, "loss": 1.3697, "step": 34970 }, { "epoch": 0.45443219528200596, "grad_norm": 0.40193799138069153, "learning_rate": 0.00010913580888756028, "loss": 1.1993, "step": 34971 }, { "epoch": 0.45444518982592186, "grad_norm": 0.455183744430542, "learning_rate": 0.00010913320942564889, "loss": 1.3317, "step": 34972 }, { "epoch": 0.4544581843698377, "grad_norm": 0.39767202734947205, "learning_rate": 0.00010913060996373751, "loss": 1.3024, "step": 34973 }, { "epoch": 0.4544711789137536, "grad_norm": 0.3617262840270996, "learning_rate": 0.00010912801050182612, "loss": 1.3822, "step": 34974 }, { "epoch": 0.45448417345766945, "grad_norm": 0.42992904782295227, "learning_rate": 0.00010912541103991473, "loss": 1.4194, "step": 34975 }, { "epoch": 0.45449716800158535, "grad_norm": 0.448257714509964, "learning_rate": 0.00010912281157800335, "loss": 1.5057, "step": 34976 }, { "epoch": 0.4545101625455012, "grad_norm": 0.45390164852142334, "learning_rate": 0.00010912021211609198, "loss": 1.6032, "step": 34977 }, { "epoch": 0.4545231570894171, "grad_norm": 0.4097226858139038, "learning_rate": 0.0001091176126541806, "loss": 1.5089, "step": 34978 }, { "epoch": 0.45453615163333294, "grad_norm": 0.44250762462615967, "learning_rate": 0.0001091150131922692, "loss": 1.3101, "step": 34979 }, { "epoch": 0.45454914617724884, "grad_norm": 0.2933673560619354, "learning_rate": 0.00010911241373035783, "loss": 1.1957, "step": 34980 }, { "epoch": 0.4545621407211647, "grad_norm": 0.4554346799850464, "learning_rate": 0.00010910981426844644, "loss": 1.4014, "step": 34981 }, { "epoch": 0.4545751352650806, "grad_norm": 0.25293442606925964, "learning_rate": 0.00010910721480653505, "loss": 1.2888, "step": 34982 }, { "epoch": 0.45458812980899643, "grad_norm": 0.3799055516719818, "learning_rate": 0.00010910461534462366, "loss": 1.4616, "step": 34983 }, { "epoch": 0.45460112435291233, "grad_norm": 0.3837970495223999, "learning_rate": 0.0001091020158827123, "loss": 1.3854, "step": 34984 }, { "epoch": 0.4546141188968282, "grad_norm": 0.4756086468696594, "learning_rate": 0.0001090994164208009, "loss": 1.3803, "step": 34985 }, { "epoch": 0.4546271134407441, "grad_norm": 0.3744949698448181, "learning_rate": 0.00010909681695888951, "loss": 1.5854, "step": 34986 }, { "epoch": 0.4546401079846599, "grad_norm": 0.32548439502716064, "learning_rate": 0.00010909421749697812, "loss": 1.2678, "step": 34987 }, { "epoch": 0.4546531025285758, "grad_norm": 0.3792979121208191, "learning_rate": 0.00010909161803506676, "loss": 1.3154, "step": 34988 }, { "epoch": 0.45466609707249167, "grad_norm": 0.43225038051605225, "learning_rate": 0.00010908901857315537, "loss": 1.3494, "step": 34989 }, { "epoch": 0.45467909161640757, "grad_norm": 0.4257349371910095, "learning_rate": 0.00010908641911124398, "loss": 1.4762, "step": 34990 }, { "epoch": 0.4546920861603234, "grad_norm": 0.30412521958351135, "learning_rate": 0.00010908381964933259, "loss": 1.5024, "step": 34991 }, { "epoch": 0.4547050807042393, "grad_norm": 0.4065783619880676, "learning_rate": 0.00010908122018742121, "loss": 1.2806, "step": 34992 }, { "epoch": 0.45471807524815516, "grad_norm": 0.3466070294380188, "learning_rate": 0.00010907862072550983, "loss": 1.3104, "step": 34993 }, { "epoch": 0.45473106979207106, "grad_norm": 0.3153553903102875, "learning_rate": 0.00010907602126359844, "loss": 1.3188, "step": 34994 }, { "epoch": 0.4547440643359869, "grad_norm": 0.39845412969589233, "learning_rate": 0.00010907342180168705, "loss": 1.4495, "step": 34995 }, { "epoch": 0.4547570588799028, "grad_norm": 0.3720419108867645, "learning_rate": 0.00010907082233977569, "loss": 1.3898, "step": 34996 }, { "epoch": 0.45477005342381865, "grad_norm": 0.30603674054145813, "learning_rate": 0.0001090682228778643, "loss": 1.2955, "step": 34997 }, { "epoch": 0.45478304796773455, "grad_norm": 0.4608227014541626, "learning_rate": 0.0001090656234159529, "loss": 1.37, "step": 34998 }, { "epoch": 0.4547960425116504, "grad_norm": 0.4668556749820709, "learning_rate": 0.0001090630239540415, "loss": 1.4287, "step": 34999 }, { "epoch": 0.4548090370555663, "grad_norm": 0.28385376930236816, "learning_rate": 0.00010906042449213014, "loss": 1.2802, "step": 35000 }, { "epoch": 0.45482203159948215, "grad_norm": 0.459938108921051, "learning_rate": 0.00010905782503021875, "loss": 1.4945, "step": 35001 }, { "epoch": 0.45483502614339805, "grad_norm": 0.4360826909542084, "learning_rate": 0.00010905522556830736, "loss": 1.4677, "step": 35002 }, { "epoch": 0.4548480206873139, "grad_norm": 0.4446322023868561, "learning_rate": 0.00010905262610639598, "loss": 1.4812, "step": 35003 }, { "epoch": 0.4548610152312298, "grad_norm": 0.3647370934486389, "learning_rate": 0.0001090500266444846, "loss": 1.3933, "step": 35004 }, { "epoch": 0.45487400977514564, "grad_norm": 0.33514007925987244, "learning_rate": 0.00010904742718257321, "loss": 1.3609, "step": 35005 }, { "epoch": 0.45488700431906154, "grad_norm": 0.3846753239631653, "learning_rate": 0.00010904482772066182, "loss": 1.327, "step": 35006 }, { "epoch": 0.4548999988629774, "grad_norm": 0.38477998971939087, "learning_rate": 0.00010904222825875043, "loss": 1.4785, "step": 35007 }, { "epoch": 0.4549129934068933, "grad_norm": 0.43844062089920044, "learning_rate": 0.00010903962879683907, "loss": 1.4052, "step": 35008 }, { "epoch": 0.45492598795080913, "grad_norm": 0.3931030035018921, "learning_rate": 0.00010903702933492768, "loss": 1.4713, "step": 35009 }, { "epoch": 0.45493898249472503, "grad_norm": 0.4574306011199951, "learning_rate": 0.00010903442987301628, "loss": 1.5572, "step": 35010 }, { "epoch": 0.4549519770386409, "grad_norm": 0.5211104154586792, "learning_rate": 0.00010903183041110489, "loss": 1.2276, "step": 35011 }, { "epoch": 0.4549649715825568, "grad_norm": 0.3815925717353821, "learning_rate": 0.00010902923094919353, "loss": 1.4566, "step": 35012 }, { "epoch": 0.4549779661264726, "grad_norm": 0.47390589118003845, "learning_rate": 0.00010902663148728214, "loss": 1.3785, "step": 35013 }, { "epoch": 0.4549909606703885, "grad_norm": 0.36726316809654236, "learning_rate": 0.00010902403202537075, "loss": 1.62, "step": 35014 }, { "epoch": 0.45500395521430437, "grad_norm": 0.3422521650791168, "learning_rate": 0.00010902143256345936, "loss": 1.2442, "step": 35015 }, { "epoch": 0.45501694975822027, "grad_norm": 0.4108089208602905, "learning_rate": 0.00010901883310154799, "loss": 1.3889, "step": 35016 }, { "epoch": 0.4550299443021361, "grad_norm": 0.43522727489471436, "learning_rate": 0.0001090162336396366, "loss": 1.4911, "step": 35017 }, { "epoch": 0.455042938846052, "grad_norm": 0.4079974591732025, "learning_rate": 0.00010901363417772521, "loss": 1.3812, "step": 35018 }, { "epoch": 0.4550559333899679, "grad_norm": 0.39696234464645386, "learning_rate": 0.00010901103471581385, "loss": 1.5166, "step": 35019 }, { "epoch": 0.45506892793388376, "grad_norm": 0.4703359603881836, "learning_rate": 0.00010900843525390246, "loss": 1.5466, "step": 35020 }, { "epoch": 0.45508192247779966, "grad_norm": 0.47171080112457275, "learning_rate": 0.00010900583579199107, "loss": 1.4436, "step": 35021 }, { "epoch": 0.4550949170217155, "grad_norm": 0.3912294805049896, "learning_rate": 0.00010900323633007968, "loss": 1.4944, "step": 35022 }, { "epoch": 0.4551079115656314, "grad_norm": 0.36724093556404114, "learning_rate": 0.0001090006368681683, "loss": 1.3478, "step": 35023 }, { "epoch": 0.45512090610954725, "grad_norm": 0.3526167571544647, "learning_rate": 0.00010899803740625691, "loss": 1.1736, "step": 35024 }, { "epoch": 0.45513390065346315, "grad_norm": 0.38478177785873413, "learning_rate": 0.00010899543794434552, "loss": 1.3599, "step": 35025 }, { "epoch": 0.455146895197379, "grad_norm": 0.46311745047569275, "learning_rate": 0.00010899283848243414, "loss": 1.4867, "step": 35026 }, { "epoch": 0.4551598897412949, "grad_norm": 0.4363219738006592, "learning_rate": 0.00010899023902052276, "loss": 1.2768, "step": 35027 }, { "epoch": 0.45517288428521074, "grad_norm": 0.3876301944255829, "learning_rate": 0.00010898763955861137, "loss": 1.4893, "step": 35028 }, { "epoch": 0.45518587882912664, "grad_norm": 0.3941620886325836, "learning_rate": 0.00010898504009669998, "loss": 1.3935, "step": 35029 }, { "epoch": 0.4551988733730425, "grad_norm": 0.23850257694721222, "learning_rate": 0.00010898244063478859, "loss": 1.1561, "step": 35030 }, { "epoch": 0.4552118679169584, "grad_norm": 0.44576647877693176, "learning_rate": 0.00010897984117287723, "loss": 1.4445, "step": 35031 }, { "epoch": 0.45522486246087424, "grad_norm": 0.37551572918891907, "learning_rate": 0.00010897724171096584, "loss": 1.3057, "step": 35032 }, { "epoch": 0.45523785700479014, "grad_norm": 0.4259045720100403, "learning_rate": 0.00010897464224905445, "loss": 1.48, "step": 35033 }, { "epoch": 0.455250851548706, "grad_norm": 0.35197895765304565, "learning_rate": 0.00010897204278714306, "loss": 1.344, "step": 35034 }, { "epoch": 0.4552638460926219, "grad_norm": 0.29217153787612915, "learning_rate": 0.00010896944332523169, "loss": 1.2978, "step": 35035 }, { "epoch": 0.4552768406365377, "grad_norm": 0.4392157793045044, "learning_rate": 0.0001089668438633203, "loss": 1.4654, "step": 35036 }, { "epoch": 0.45528983518045363, "grad_norm": 0.3665829002857208, "learning_rate": 0.00010896424440140891, "loss": 1.2654, "step": 35037 }, { "epoch": 0.4553028297243695, "grad_norm": 0.41133347153663635, "learning_rate": 0.00010896164493949752, "loss": 1.3439, "step": 35038 }, { "epoch": 0.4553158242682854, "grad_norm": 0.41872677206993103, "learning_rate": 0.00010895904547758616, "loss": 1.5281, "step": 35039 }, { "epoch": 0.4553288188122012, "grad_norm": 0.439369797706604, "learning_rate": 0.00010895644601567476, "loss": 1.3763, "step": 35040 }, { "epoch": 0.4553418133561171, "grad_norm": 0.40229979157447815, "learning_rate": 0.00010895384655376337, "loss": 1.5444, "step": 35041 }, { "epoch": 0.45535480790003297, "grad_norm": 0.4021202325820923, "learning_rate": 0.00010895124709185198, "loss": 1.4073, "step": 35042 }, { "epoch": 0.45536780244394887, "grad_norm": 0.3818398714065552, "learning_rate": 0.00010894864762994062, "loss": 1.4037, "step": 35043 }, { "epoch": 0.4553807969878647, "grad_norm": 0.39342767000198364, "learning_rate": 0.00010894604816802923, "loss": 1.4022, "step": 35044 }, { "epoch": 0.4553937915317806, "grad_norm": 0.36151599884033203, "learning_rate": 0.00010894344870611784, "loss": 1.2688, "step": 35045 }, { "epoch": 0.45540678607569646, "grad_norm": 0.36676326394081116, "learning_rate": 0.00010894084924420645, "loss": 1.2197, "step": 35046 }, { "epoch": 0.45541978061961236, "grad_norm": 0.3971310257911682, "learning_rate": 0.00010893824978229507, "loss": 1.379, "step": 35047 }, { "epoch": 0.4554327751635282, "grad_norm": 0.4179786443710327, "learning_rate": 0.00010893565032038368, "loss": 1.4474, "step": 35048 }, { "epoch": 0.4554457697074441, "grad_norm": 0.37974312901496887, "learning_rate": 0.0001089330508584723, "loss": 1.4708, "step": 35049 }, { "epoch": 0.45545876425135995, "grad_norm": 0.31294378638267517, "learning_rate": 0.0001089304513965609, "loss": 1.2887, "step": 35050 }, { "epoch": 0.45547175879527585, "grad_norm": 0.3727509379386902, "learning_rate": 0.00010892785193464954, "loss": 1.1632, "step": 35051 }, { "epoch": 0.4554847533391917, "grad_norm": 0.4583154618740082, "learning_rate": 0.00010892525247273814, "loss": 1.3536, "step": 35052 }, { "epoch": 0.4554977478831076, "grad_norm": 0.3647528886795044, "learning_rate": 0.00010892265301082675, "loss": 1.4213, "step": 35053 }, { "epoch": 0.45551074242702344, "grad_norm": 0.3386082053184509, "learning_rate": 0.00010892005354891539, "loss": 1.3928, "step": 35054 }, { "epoch": 0.45552373697093934, "grad_norm": 0.35960879921913147, "learning_rate": 0.000108917454087004, "loss": 1.2011, "step": 35055 }, { "epoch": 0.4555367315148552, "grad_norm": 0.439457505941391, "learning_rate": 0.00010891485462509261, "loss": 1.3372, "step": 35056 }, { "epoch": 0.4555497260587711, "grad_norm": 0.40421563386917114, "learning_rate": 0.00010891225516318122, "loss": 1.5815, "step": 35057 }, { "epoch": 0.45556272060268693, "grad_norm": 0.40897563099861145, "learning_rate": 0.00010890965570126985, "loss": 1.4969, "step": 35058 }, { "epoch": 0.45557571514660283, "grad_norm": 0.33841508626937866, "learning_rate": 0.00010890705623935846, "loss": 1.5263, "step": 35059 }, { "epoch": 0.4555887096905187, "grad_norm": 0.3402288556098938, "learning_rate": 0.00010890445677744707, "loss": 1.5226, "step": 35060 }, { "epoch": 0.4556017042344346, "grad_norm": 0.3883363902568817, "learning_rate": 0.00010890185731553568, "loss": 1.3213, "step": 35061 }, { "epoch": 0.4556146987783504, "grad_norm": 0.4305150508880615, "learning_rate": 0.00010889925785362432, "loss": 1.4375, "step": 35062 }, { "epoch": 0.4556276933222663, "grad_norm": 0.37741154432296753, "learning_rate": 0.00010889665839171293, "loss": 1.3679, "step": 35063 }, { "epoch": 0.45564068786618217, "grad_norm": 0.3971658945083618, "learning_rate": 0.00010889405892980154, "loss": 1.4676, "step": 35064 }, { "epoch": 0.45565368241009807, "grad_norm": 0.5680103898048401, "learning_rate": 0.00010889145946789014, "loss": 1.4097, "step": 35065 }, { "epoch": 0.4556666769540139, "grad_norm": 0.4103486239910126, "learning_rate": 0.00010888886000597878, "loss": 1.4053, "step": 35066 }, { "epoch": 0.4556796714979298, "grad_norm": 0.29176458716392517, "learning_rate": 0.00010888626054406739, "loss": 1.2888, "step": 35067 }, { "epoch": 0.45569266604184566, "grad_norm": 0.3674878180027008, "learning_rate": 0.000108883661082156, "loss": 1.3179, "step": 35068 }, { "epoch": 0.45570566058576156, "grad_norm": 0.3420224189758301, "learning_rate": 0.00010888106162024461, "loss": 1.3068, "step": 35069 }, { "epoch": 0.4557186551296774, "grad_norm": 0.3458615243434906, "learning_rate": 0.00010887846215833323, "loss": 1.2571, "step": 35070 }, { "epoch": 0.4557316496735933, "grad_norm": 0.4455699324607849, "learning_rate": 0.00010887586269642184, "loss": 1.4624, "step": 35071 }, { "epoch": 0.45574464421750915, "grad_norm": 0.44694995880126953, "learning_rate": 0.00010887326323451046, "loss": 1.4674, "step": 35072 }, { "epoch": 0.45575763876142505, "grad_norm": 0.4394267201423645, "learning_rate": 0.00010887066377259907, "loss": 1.5299, "step": 35073 }, { "epoch": 0.4557706333053409, "grad_norm": 0.5419975519180298, "learning_rate": 0.0001088680643106877, "loss": 1.5702, "step": 35074 }, { "epoch": 0.4557836278492568, "grad_norm": 0.4657742977142334, "learning_rate": 0.00010886546484877632, "loss": 1.3794, "step": 35075 }, { "epoch": 0.45579662239317265, "grad_norm": 0.3677768111228943, "learning_rate": 0.00010886286538686493, "loss": 1.3042, "step": 35076 }, { "epoch": 0.45580961693708855, "grad_norm": 0.38906824588775635, "learning_rate": 0.00010886026592495354, "loss": 1.3395, "step": 35077 }, { "epoch": 0.4558226114810044, "grad_norm": 0.441157728433609, "learning_rate": 0.00010885766646304216, "loss": 1.3994, "step": 35078 }, { "epoch": 0.4558356060249203, "grad_norm": 0.40818026661872864, "learning_rate": 0.00010885506700113077, "loss": 1.377, "step": 35079 }, { "epoch": 0.45584860056883614, "grad_norm": 0.3919728100299835, "learning_rate": 0.00010885246753921938, "loss": 1.4499, "step": 35080 }, { "epoch": 0.45586159511275204, "grad_norm": 0.4807562530040741, "learning_rate": 0.000108849868077308, "loss": 1.2835, "step": 35081 }, { "epoch": 0.4558745896566679, "grad_norm": 0.43233057856559753, "learning_rate": 0.00010884726861539662, "loss": 1.2688, "step": 35082 }, { "epoch": 0.4558875842005838, "grad_norm": 0.37879419326782227, "learning_rate": 0.00010884466915348523, "loss": 1.3436, "step": 35083 }, { "epoch": 0.45590057874449963, "grad_norm": 0.4727247953414917, "learning_rate": 0.00010884206969157384, "loss": 1.3208, "step": 35084 }, { "epoch": 0.45591357328841553, "grad_norm": 0.36132052540779114, "learning_rate": 0.00010883947022966245, "loss": 1.4261, "step": 35085 }, { "epoch": 0.4559265678323314, "grad_norm": 0.4445934593677521, "learning_rate": 0.00010883687076775109, "loss": 1.3316, "step": 35086 }, { "epoch": 0.4559395623762473, "grad_norm": 0.3887840211391449, "learning_rate": 0.0001088342713058397, "loss": 1.401, "step": 35087 }, { "epoch": 0.4559525569201631, "grad_norm": 0.41173064708709717, "learning_rate": 0.00010883167184392831, "loss": 1.3259, "step": 35088 }, { "epoch": 0.455965551464079, "grad_norm": 0.3394840657711029, "learning_rate": 0.00010882907238201692, "loss": 1.1398, "step": 35089 }, { "epoch": 0.45597854600799487, "grad_norm": 0.48709434270858765, "learning_rate": 0.00010882647292010555, "loss": 1.378, "step": 35090 }, { "epoch": 0.45599154055191077, "grad_norm": 0.3950914144515991, "learning_rate": 0.00010882387345819416, "loss": 1.4163, "step": 35091 }, { "epoch": 0.4560045350958266, "grad_norm": 0.4286964535713196, "learning_rate": 0.00010882127399628277, "loss": 1.5206, "step": 35092 }, { "epoch": 0.4560175296397425, "grad_norm": 0.339445024728775, "learning_rate": 0.00010881867453437141, "loss": 1.5024, "step": 35093 }, { "epoch": 0.45603052418365836, "grad_norm": 0.49044355750083923, "learning_rate": 0.00010881607507246, "loss": 1.3305, "step": 35094 }, { "epoch": 0.45604351872757426, "grad_norm": 0.39040762186050415, "learning_rate": 0.00010881347561054862, "loss": 1.2209, "step": 35095 }, { "epoch": 0.45605651327149016, "grad_norm": 0.475614458322525, "learning_rate": 0.00010881087614863723, "loss": 1.4488, "step": 35096 }, { "epoch": 0.456069507815406, "grad_norm": 0.42221200466156006, "learning_rate": 0.00010880827668672586, "loss": 1.479, "step": 35097 }, { "epoch": 0.4560825023593219, "grad_norm": 0.7619783878326416, "learning_rate": 0.00010880567722481448, "loss": 1.4547, "step": 35098 }, { "epoch": 0.45609549690323775, "grad_norm": 0.4462756812572479, "learning_rate": 0.00010880307776290309, "loss": 1.527, "step": 35099 }, { "epoch": 0.45610849144715365, "grad_norm": 0.5439983010292053, "learning_rate": 0.0001088004783009917, "loss": 1.4804, "step": 35100 }, { "epoch": 0.4561214859910695, "grad_norm": 0.3381611108779907, "learning_rate": 0.00010879787883908032, "loss": 1.2476, "step": 35101 }, { "epoch": 0.4561344805349854, "grad_norm": 0.49465855956077576, "learning_rate": 0.00010879527937716893, "loss": 1.4513, "step": 35102 }, { "epoch": 0.45614747507890124, "grad_norm": 0.34988129138946533, "learning_rate": 0.00010879267991525754, "loss": 1.3758, "step": 35103 }, { "epoch": 0.45616046962281714, "grad_norm": 0.37241464853286743, "learning_rate": 0.00010879008045334615, "loss": 1.3197, "step": 35104 }, { "epoch": 0.456173464166733, "grad_norm": 0.3859103322029114, "learning_rate": 0.00010878748099143479, "loss": 1.3371, "step": 35105 }, { "epoch": 0.4561864587106489, "grad_norm": 0.4179384410381317, "learning_rate": 0.0001087848815295234, "loss": 1.2031, "step": 35106 }, { "epoch": 0.45619945325456474, "grad_norm": 0.42201900482177734, "learning_rate": 0.000108782282067612, "loss": 1.3764, "step": 35107 }, { "epoch": 0.45621244779848064, "grad_norm": 0.4361856281757355, "learning_rate": 0.00010877968260570061, "loss": 1.2826, "step": 35108 }, { "epoch": 0.4562254423423965, "grad_norm": 0.3928603231906891, "learning_rate": 0.00010877708314378925, "loss": 1.4327, "step": 35109 }, { "epoch": 0.4562384368863124, "grad_norm": 0.3992486298084259, "learning_rate": 0.00010877448368187786, "loss": 1.3784, "step": 35110 }, { "epoch": 0.4562514314302282, "grad_norm": 0.30212706327438354, "learning_rate": 0.00010877188421996647, "loss": 1.3223, "step": 35111 }, { "epoch": 0.4562644259741441, "grad_norm": 0.43496283888816833, "learning_rate": 0.00010876928475805508, "loss": 1.4327, "step": 35112 }, { "epoch": 0.45627742051806, "grad_norm": 0.31552693247795105, "learning_rate": 0.0001087666852961437, "loss": 1.5971, "step": 35113 }, { "epoch": 0.4562904150619759, "grad_norm": 0.39178699254989624, "learning_rate": 0.00010876408583423232, "loss": 1.4298, "step": 35114 }, { "epoch": 0.4563034096058917, "grad_norm": 0.28121933341026306, "learning_rate": 0.00010876148637232093, "loss": 1.5715, "step": 35115 }, { "epoch": 0.4563164041498076, "grad_norm": 0.45745649933815, "learning_rate": 0.00010875888691040954, "loss": 1.5354, "step": 35116 }, { "epoch": 0.45632939869372346, "grad_norm": 0.3201655447483063, "learning_rate": 0.00010875628744849818, "loss": 1.3782, "step": 35117 }, { "epoch": 0.45634239323763937, "grad_norm": 0.33472123742103577, "learning_rate": 0.00010875368798658679, "loss": 1.4377, "step": 35118 }, { "epoch": 0.4563553877815552, "grad_norm": 0.5092899799346924, "learning_rate": 0.0001087510885246754, "loss": 1.2806, "step": 35119 }, { "epoch": 0.4563683823254711, "grad_norm": 0.3529927432537079, "learning_rate": 0.000108748489062764, "loss": 1.3199, "step": 35120 }, { "epoch": 0.45638137686938696, "grad_norm": 0.3585021495819092, "learning_rate": 0.00010874588960085263, "loss": 1.5215, "step": 35121 }, { "epoch": 0.45639437141330286, "grad_norm": 0.4989951550960541, "learning_rate": 0.00010874329013894125, "loss": 1.4494, "step": 35122 }, { "epoch": 0.4564073659572187, "grad_norm": 0.5339763760566711, "learning_rate": 0.00010874069067702986, "loss": 1.2754, "step": 35123 }, { "epoch": 0.4564203605011346, "grad_norm": 0.34773895144462585, "learning_rate": 0.00010873809121511847, "loss": 1.2247, "step": 35124 }, { "epoch": 0.45643335504505045, "grad_norm": 0.4995030164718628, "learning_rate": 0.00010873549175320709, "loss": 1.5771, "step": 35125 }, { "epoch": 0.45644634958896635, "grad_norm": 0.4398910403251648, "learning_rate": 0.0001087328922912957, "loss": 1.442, "step": 35126 }, { "epoch": 0.4564593441328822, "grad_norm": 0.3295517563819885, "learning_rate": 0.00010873029282938431, "loss": 1.3751, "step": 35127 }, { "epoch": 0.4564723386767981, "grad_norm": 0.5005344748497009, "learning_rate": 0.00010872769336747295, "loss": 1.4498, "step": 35128 }, { "epoch": 0.45648533322071394, "grad_norm": 0.5326129794120789, "learning_rate": 0.00010872509390556156, "loss": 1.4191, "step": 35129 }, { "epoch": 0.45649832776462984, "grad_norm": 0.4533020853996277, "learning_rate": 0.00010872249444365017, "loss": 1.4076, "step": 35130 }, { "epoch": 0.4565113223085457, "grad_norm": 0.370254248380661, "learning_rate": 0.00010871989498173878, "loss": 1.222, "step": 35131 }, { "epoch": 0.4565243168524616, "grad_norm": 0.4136718213558197, "learning_rate": 0.00010871729551982741, "loss": 1.4117, "step": 35132 }, { "epoch": 0.45653731139637743, "grad_norm": 0.41696450114250183, "learning_rate": 0.00010871469605791602, "loss": 1.3432, "step": 35133 }, { "epoch": 0.45655030594029333, "grad_norm": 0.4033799469470978, "learning_rate": 0.00010871209659600463, "loss": 1.5524, "step": 35134 }, { "epoch": 0.4565633004842092, "grad_norm": 0.3737904727458954, "learning_rate": 0.00010870949713409324, "loss": 1.468, "step": 35135 }, { "epoch": 0.4565762950281251, "grad_norm": 0.35311636328697205, "learning_rate": 0.00010870689767218187, "loss": 1.1839, "step": 35136 }, { "epoch": 0.4565892895720409, "grad_norm": 0.46390852332115173, "learning_rate": 0.00010870429821027048, "loss": 1.4522, "step": 35137 }, { "epoch": 0.4566022841159568, "grad_norm": 0.4372662603855133, "learning_rate": 0.00010870169874835909, "loss": 1.2583, "step": 35138 }, { "epoch": 0.45661527865987267, "grad_norm": 0.3897049129009247, "learning_rate": 0.0001086990992864477, "loss": 1.3216, "step": 35139 }, { "epoch": 0.45662827320378857, "grad_norm": 0.405662477016449, "learning_rate": 0.00010869649982453634, "loss": 1.4033, "step": 35140 }, { "epoch": 0.4566412677477044, "grad_norm": 0.4160982370376587, "learning_rate": 0.00010869390036262495, "loss": 1.5137, "step": 35141 }, { "epoch": 0.4566542622916203, "grad_norm": 0.40514206886291504, "learning_rate": 0.00010869130090071356, "loss": 1.2781, "step": 35142 }, { "epoch": 0.45666725683553616, "grad_norm": 0.33861401677131653, "learning_rate": 0.00010868870143880217, "loss": 1.3123, "step": 35143 }, { "epoch": 0.45668025137945206, "grad_norm": 0.4271220266819, "learning_rate": 0.0001086861019768908, "loss": 1.3868, "step": 35144 }, { "epoch": 0.4566932459233679, "grad_norm": 0.42899203300476074, "learning_rate": 0.0001086835025149794, "loss": 1.304, "step": 35145 }, { "epoch": 0.4567062404672838, "grad_norm": 0.3812744915485382, "learning_rate": 0.00010868090305306802, "loss": 1.2488, "step": 35146 }, { "epoch": 0.45671923501119965, "grad_norm": 0.3733411431312561, "learning_rate": 0.00010867830359115663, "loss": 1.4233, "step": 35147 }, { "epoch": 0.45673222955511555, "grad_norm": 0.3744480609893799, "learning_rate": 0.00010867570412924527, "loss": 1.366, "step": 35148 }, { "epoch": 0.4567452240990314, "grad_norm": 0.33710265159606934, "learning_rate": 0.00010867310466733386, "loss": 1.358, "step": 35149 }, { "epoch": 0.4567582186429473, "grad_norm": 0.39333394169807434, "learning_rate": 0.00010867050520542247, "loss": 1.4081, "step": 35150 }, { "epoch": 0.45677121318686315, "grad_norm": 0.4287506937980652, "learning_rate": 0.00010866790574351108, "loss": 1.3353, "step": 35151 }, { "epoch": 0.45678420773077905, "grad_norm": 0.437557190656662, "learning_rate": 0.00010866530628159972, "loss": 1.3684, "step": 35152 }, { "epoch": 0.4567972022746949, "grad_norm": 0.3804386258125305, "learning_rate": 0.00010866270681968833, "loss": 1.1482, "step": 35153 }, { "epoch": 0.4568101968186108, "grad_norm": 0.44130581617355347, "learning_rate": 0.00010866010735777694, "loss": 1.4892, "step": 35154 }, { "epoch": 0.45682319136252664, "grad_norm": 0.3461728096008301, "learning_rate": 0.00010865750789586556, "loss": 1.3876, "step": 35155 }, { "epoch": 0.45683618590644254, "grad_norm": 0.4408833384513855, "learning_rate": 0.00010865490843395418, "loss": 1.4337, "step": 35156 }, { "epoch": 0.4568491804503584, "grad_norm": 0.3418590724468231, "learning_rate": 0.00010865230897204279, "loss": 1.408, "step": 35157 }, { "epoch": 0.4568621749942743, "grad_norm": 0.2727314233779907, "learning_rate": 0.0001086497095101314, "loss": 1.2416, "step": 35158 }, { "epoch": 0.45687516953819013, "grad_norm": 0.4399529695510864, "learning_rate": 0.00010864711004822001, "loss": 1.419, "step": 35159 }, { "epoch": 0.45688816408210603, "grad_norm": 0.5010734796524048, "learning_rate": 0.00010864451058630865, "loss": 1.4272, "step": 35160 }, { "epoch": 0.4569011586260219, "grad_norm": 0.3213130831718445, "learning_rate": 0.00010864191112439726, "loss": 1.4723, "step": 35161 }, { "epoch": 0.4569141531699378, "grad_norm": 0.39463120698928833, "learning_rate": 0.00010863931166248586, "loss": 1.4758, "step": 35162 }, { "epoch": 0.4569271477138536, "grad_norm": 0.40219464898109436, "learning_rate": 0.00010863671220057447, "loss": 1.815, "step": 35163 }, { "epoch": 0.4569401422577695, "grad_norm": 0.44015780091285706, "learning_rate": 0.00010863411273866311, "loss": 1.3933, "step": 35164 }, { "epoch": 0.45695313680168537, "grad_norm": 0.3716558516025543, "learning_rate": 0.00010863151327675172, "loss": 1.3976, "step": 35165 }, { "epoch": 0.45696613134560127, "grad_norm": 0.4074903130531311, "learning_rate": 0.00010862891381484033, "loss": 1.5522, "step": 35166 }, { "epoch": 0.4569791258895171, "grad_norm": 0.48126131296157837, "learning_rate": 0.00010862631435292895, "loss": 1.3402, "step": 35167 }, { "epoch": 0.456992120433433, "grad_norm": 0.41142916679382324, "learning_rate": 0.00010862371489101757, "loss": 1.464, "step": 35168 }, { "epoch": 0.45700511497734886, "grad_norm": 0.4035705029964447, "learning_rate": 0.00010862111542910618, "loss": 1.4421, "step": 35169 }, { "epoch": 0.45701810952126476, "grad_norm": 0.3333204984664917, "learning_rate": 0.00010861851596719479, "loss": 1.4334, "step": 35170 }, { "epoch": 0.45703110406518066, "grad_norm": 0.40880200266838074, "learning_rate": 0.00010861591650528343, "loss": 1.1942, "step": 35171 }, { "epoch": 0.4570440986090965, "grad_norm": 0.404093474149704, "learning_rate": 0.00010861331704337204, "loss": 1.3103, "step": 35172 }, { "epoch": 0.4570570931530124, "grad_norm": 0.45474931597709656, "learning_rate": 0.00010861071758146065, "loss": 1.5753, "step": 35173 }, { "epoch": 0.45707008769692825, "grad_norm": 0.4474669098854065, "learning_rate": 0.00010860811811954924, "loss": 1.4011, "step": 35174 }, { "epoch": 0.45708308224084415, "grad_norm": 0.3757783770561218, "learning_rate": 0.00010860551865763788, "loss": 1.5408, "step": 35175 }, { "epoch": 0.45709607678476, "grad_norm": 0.30709052085876465, "learning_rate": 0.0001086029191957265, "loss": 1.3539, "step": 35176 }, { "epoch": 0.4571090713286759, "grad_norm": 0.3956267833709717, "learning_rate": 0.0001086003197338151, "loss": 1.4266, "step": 35177 }, { "epoch": 0.45712206587259174, "grad_norm": 0.45040905475616455, "learning_rate": 0.00010859772027190372, "loss": 1.3055, "step": 35178 }, { "epoch": 0.45713506041650764, "grad_norm": 0.2968219518661499, "learning_rate": 0.00010859512080999234, "loss": 1.4739, "step": 35179 }, { "epoch": 0.4571480549604235, "grad_norm": 0.41308730840682983, "learning_rate": 0.00010859252134808095, "loss": 1.2848, "step": 35180 }, { "epoch": 0.4571610495043394, "grad_norm": 0.37023502588272095, "learning_rate": 0.00010858992188616956, "loss": 1.3686, "step": 35181 }, { "epoch": 0.45717404404825523, "grad_norm": 0.26160967350006104, "learning_rate": 0.00010858732242425817, "loss": 1.1939, "step": 35182 }, { "epoch": 0.45718703859217114, "grad_norm": 0.2825961709022522, "learning_rate": 0.00010858472296234681, "loss": 1.4978, "step": 35183 }, { "epoch": 0.457200033136087, "grad_norm": 0.3860016465187073, "learning_rate": 0.00010858212350043542, "loss": 1.4197, "step": 35184 }, { "epoch": 0.4572130276800029, "grad_norm": 0.4376005530357361, "learning_rate": 0.00010857952403852403, "loss": 1.3316, "step": 35185 }, { "epoch": 0.4572260222239187, "grad_norm": 0.49659183621406555, "learning_rate": 0.00010857692457661264, "loss": 1.4649, "step": 35186 }, { "epoch": 0.4572390167678346, "grad_norm": 0.40608954429626465, "learning_rate": 0.00010857432511470127, "loss": 1.3845, "step": 35187 }, { "epoch": 0.4572520113117505, "grad_norm": 0.4390445947647095, "learning_rate": 0.00010857172565278988, "loss": 1.4882, "step": 35188 }, { "epoch": 0.4572650058556664, "grad_norm": 0.41920405626296997, "learning_rate": 0.00010856912619087849, "loss": 1.4217, "step": 35189 }, { "epoch": 0.4572780003995822, "grad_norm": 0.40508297085762024, "learning_rate": 0.0001085665267289671, "loss": 1.1064, "step": 35190 }, { "epoch": 0.4572909949434981, "grad_norm": 0.38219398260116577, "learning_rate": 0.00010856392726705573, "loss": 1.4556, "step": 35191 }, { "epoch": 0.45730398948741396, "grad_norm": 0.2865232825279236, "learning_rate": 0.00010856132780514434, "loss": 1.2694, "step": 35192 }, { "epoch": 0.45731698403132987, "grad_norm": 0.3481275737285614, "learning_rate": 0.00010855872834323295, "loss": 1.2036, "step": 35193 }, { "epoch": 0.4573299785752457, "grad_norm": 0.2854389548301697, "learning_rate": 0.00010855612888132156, "loss": 1.4205, "step": 35194 }, { "epoch": 0.4573429731191616, "grad_norm": 0.43644359707832336, "learning_rate": 0.0001085535294194102, "loss": 1.3665, "step": 35195 }, { "epoch": 0.45735596766307746, "grad_norm": 0.45816734433174133, "learning_rate": 0.00010855092995749881, "loss": 1.4786, "step": 35196 }, { "epoch": 0.45736896220699336, "grad_norm": 0.5114655494689941, "learning_rate": 0.00010854833049558742, "loss": 1.4149, "step": 35197 }, { "epoch": 0.4573819567509092, "grad_norm": 0.33392441272735596, "learning_rate": 0.00010854573103367603, "loss": 1.4848, "step": 35198 }, { "epoch": 0.4573949512948251, "grad_norm": 0.3224254250526428, "learning_rate": 0.00010854313157176465, "loss": 1.2942, "step": 35199 }, { "epoch": 0.45740794583874095, "grad_norm": 0.2591899335384369, "learning_rate": 0.00010854053210985326, "loss": 1.4688, "step": 35200 }, { "epoch": 0.45742094038265685, "grad_norm": 0.4480980932712555, "learning_rate": 0.00010853793264794188, "loss": 1.4249, "step": 35201 }, { "epoch": 0.4574339349265727, "grad_norm": 0.47865980863571167, "learning_rate": 0.00010853533318603051, "loss": 1.4411, "step": 35202 }, { "epoch": 0.4574469294704886, "grad_norm": 0.3556857407093048, "learning_rate": 0.00010853273372411912, "loss": 1.3199, "step": 35203 }, { "epoch": 0.45745992401440444, "grad_norm": 0.45210039615631104, "learning_rate": 0.00010853013426220772, "loss": 1.5048, "step": 35204 }, { "epoch": 0.45747291855832034, "grad_norm": 0.5351685285568237, "learning_rate": 0.00010852753480029633, "loss": 1.4864, "step": 35205 }, { "epoch": 0.4574859131022362, "grad_norm": 0.4799736738204956, "learning_rate": 0.00010852493533838497, "loss": 1.5032, "step": 35206 }, { "epoch": 0.4574989076461521, "grad_norm": 0.43956881761550903, "learning_rate": 0.00010852233587647358, "loss": 1.5017, "step": 35207 }, { "epoch": 0.45751190219006793, "grad_norm": 0.5154749155044556, "learning_rate": 0.00010851973641456219, "loss": 1.6559, "step": 35208 }, { "epoch": 0.45752489673398383, "grad_norm": 0.4488048553466797, "learning_rate": 0.0001085171369526508, "loss": 1.3764, "step": 35209 }, { "epoch": 0.4575378912778997, "grad_norm": 0.3972122073173523, "learning_rate": 0.00010851453749073943, "loss": 1.6702, "step": 35210 }, { "epoch": 0.4575508858218156, "grad_norm": 0.32879453897476196, "learning_rate": 0.00010851193802882804, "loss": 1.3974, "step": 35211 }, { "epoch": 0.4575638803657314, "grad_norm": 0.33414342999458313, "learning_rate": 0.00010850933856691665, "loss": 1.2937, "step": 35212 }, { "epoch": 0.4575768749096473, "grad_norm": 0.4241344630718231, "learning_rate": 0.00010850673910500526, "loss": 1.5058, "step": 35213 }, { "epoch": 0.45758986945356317, "grad_norm": 0.3188968598842621, "learning_rate": 0.0001085041396430939, "loss": 1.3327, "step": 35214 }, { "epoch": 0.45760286399747907, "grad_norm": 0.3835933804512024, "learning_rate": 0.00010850154018118251, "loss": 1.5318, "step": 35215 }, { "epoch": 0.4576158585413949, "grad_norm": 0.3659355342388153, "learning_rate": 0.00010849894071927111, "loss": 1.585, "step": 35216 }, { "epoch": 0.4576288530853108, "grad_norm": 0.44043996930122375, "learning_rate": 0.00010849634125735972, "loss": 1.1948, "step": 35217 }, { "epoch": 0.45764184762922666, "grad_norm": 0.44834479689598083, "learning_rate": 0.00010849374179544836, "loss": 1.3876, "step": 35218 }, { "epoch": 0.45765484217314256, "grad_norm": 0.3071345388889313, "learning_rate": 0.00010849114233353697, "loss": 1.3022, "step": 35219 }, { "epoch": 0.4576678367170584, "grad_norm": 0.3260709047317505, "learning_rate": 0.00010848854287162558, "loss": 1.2277, "step": 35220 }, { "epoch": 0.4576808312609743, "grad_norm": 0.35543254017829895, "learning_rate": 0.00010848594340971419, "loss": 1.3643, "step": 35221 }, { "epoch": 0.45769382580489015, "grad_norm": 0.3343386948108673, "learning_rate": 0.00010848334394780281, "loss": 1.2112, "step": 35222 }, { "epoch": 0.45770682034880605, "grad_norm": 0.39798232913017273, "learning_rate": 0.00010848074448589142, "loss": 1.5742, "step": 35223 }, { "epoch": 0.4577198148927219, "grad_norm": 0.3496485650539398, "learning_rate": 0.00010847814502398004, "loss": 1.3852, "step": 35224 }, { "epoch": 0.4577328094366378, "grad_norm": 0.3821001648902893, "learning_rate": 0.00010847554556206865, "loss": 1.1962, "step": 35225 }, { "epoch": 0.45774580398055364, "grad_norm": 0.28176069259643555, "learning_rate": 0.00010847294610015728, "loss": 1.1889, "step": 35226 }, { "epoch": 0.45775879852446955, "grad_norm": 0.42420098185539246, "learning_rate": 0.0001084703466382459, "loss": 1.2605, "step": 35227 }, { "epoch": 0.4577717930683854, "grad_norm": 0.4934530258178711, "learning_rate": 0.0001084677471763345, "loss": 1.5385, "step": 35228 }, { "epoch": 0.4577847876123013, "grad_norm": 0.43733447790145874, "learning_rate": 0.0001084651477144231, "loss": 1.3628, "step": 35229 }, { "epoch": 0.45779778215621714, "grad_norm": 0.4146369397640228, "learning_rate": 0.00010846254825251174, "loss": 1.4634, "step": 35230 }, { "epoch": 0.45781077670013304, "grad_norm": 0.36894673109054565, "learning_rate": 0.00010845994879060035, "loss": 1.3395, "step": 35231 }, { "epoch": 0.4578237712440489, "grad_norm": 0.37321045994758606, "learning_rate": 0.00010845734932868896, "loss": 1.2889, "step": 35232 }, { "epoch": 0.4578367657879648, "grad_norm": 0.37834569811820984, "learning_rate": 0.00010845474986677757, "loss": 1.3993, "step": 35233 }, { "epoch": 0.45784976033188063, "grad_norm": 0.3205845057964325, "learning_rate": 0.0001084521504048662, "loss": 1.1027, "step": 35234 }, { "epoch": 0.45786275487579653, "grad_norm": 0.42798498272895813, "learning_rate": 0.00010844955094295481, "loss": 1.1531, "step": 35235 }, { "epoch": 0.4578757494197124, "grad_norm": 0.4186842739582062, "learning_rate": 0.00010844695148104342, "loss": 1.4299, "step": 35236 }, { "epoch": 0.4578887439636283, "grad_norm": 0.41149231791496277, "learning_rate": 0.00010844435201913203, "loss": 1.4173, "step": 35237 }, { "epoch": 0.4579017385075441, "grad_norm": 0.3289128243923187, "learning_rate": 0.00010844175255722067, "loss": 1.4453, "step": 35238 }, { "epoch": 0.45791473305146, "grad_norm": 0.5800630450248718, "learning_rate": 0.00010843915309530928, "loss": 1.3749, "step": 35239 }, { "epoch": 0.45792772759537587, "grad_norm": 0.4329250752925873, "learning_rate": 0.00010843655363339789, "loss": 1.4118, "step": 35240 }, { "epoch": 0.45794072213929177, "grad_norm": 0.38029828667640686, "learning_rate": 0.00010843395417148652, "loss": 1.4105, "step": 35241 }, { "epoch": 0.4579537166832076, "grad_norm": 0.4577818214893341, "learning_rate": 0.00010843135470957513, "loss": 1.4297, "step": 35242 }, { "epoch": 0.4579667112271235, "grad_norm": 0.4916505217552185, "learning_rate": 0.00010842875524766374, "loss": 1.5481, "step": 35243 }, { "epoch": 0.45797970577103936, "grad_norm": 0.38707616925239563, "learning_rate": 0.00010842615578575235, "loss": 1.3447, "step": 35244 }, { "epoch": 0.45799270031495526, "grad_norm": 0.4361681044101715, "learning_rate": 0.00010842355632384099, "loss": 1.3706, "step": 35245 }, { "epoch": 0.4580056948588711, "grad_norm": 0.3618282973766327, "learning_rate": 0.00010842095686192958, "loss": 1.2397, "step": 35246 }, { "epoch": 0.458018689402787, "grad_norm": 0.3238358795642853, "learning_rate": 0.0001084183574000182, "loss": 1.2986, "step": 35247 }, { "epoch": 0.4580316839467029, "grad_norm": 0.37960800528526306, "learning_rate": 0.0001084157579381068, "loss": 1.2536, "step": 35248 }, { "epoch": 0.45804467849061875, "grad_norm": 0.44496873021125793, "learning_rate": 0.00010841315847619544, "loss": 1.5482, "step": 35249 }, { "epoch": 0.45805767303453465, "grad_norm": 0.3626144826412201, "learning_rate": 0.00010841055901428405, "loss": 1.2164, "step": 35250 }, { "epoch": 0.4580706675784505, "grad_norm": 0.36149296164512634, "learning_rate": 0.00010840795955237267, "loss": 1.2694, "step": 35251 }, { "epoch": 0.4580836621223664, "grad_norm": 0.3782879710197449, "learning_rate": 0.00010840536009046128, "loss": 1.2774, "step": 35252 }, { "epoch": 0.45809665666628224, "grad_norm": 0.48522794246673584, "learning_rate": 0.0001084027606285499, "loss": 1.5855, "step": 35253 }, { "epoch": 0.45810965121019814, "grad_norm": 0.4705965220928192, "learning_rate": 0.00010840016116663851, "loss": 1.3872, "step": 35254 }, { "epoch": 0.458122645754114, "grad_norm": 0.2958342134952545, "learning_rate": 0.00010839756170472712, "loss": 1.1364, "step": 35255 }, { "epoch": 0.4581356402980299, "grad_norm": 0.29890361428260803, "learning_rate": 0.00010839496224281573, "loss": 1.1778, "step": 35256 }, { "epoch": 0.45814863484194573, "grad_norm": 0.33634698390960693, "learning_rate": 0.00010839236278090437, "loss": 1.3648, "step": 35257 }, { "epoch": 0.45816162938586164, "grad_norm": 0.4534910321235657, "learning_rate": 0.00010838976331899297, "loss": 1.5126, "step": 35258 }, { "epoch": 0.4581746239297775, "grad_norm": 0.4674331247806549, "learning_rate": 0.00010838716385708158, "loss": 1.3965, "step": 35259 }, { "epoch": 0.4581876184736934, "grad_norm": 0.4149826765060425, "learning_rate": 0.00010838456439517019, "loss": 1.3724, "step": 35260 }, { "epoch": 0.4582006130176092, "grad_norm": 0.43923062086105347, "learning_rate": 0.00010838196493325883, "loss": 1.5823, "step": 35261 }, { "epoch": 0.4582136075615251, "grad_norm": 0.5054402947425842, "learning_rate": 0.00010837936547134744, "loss": 1.4279, "step": 35262 }, { "epoch": 0.45822660210544097, "grad_norm": 0.42773324251174927, "learning_rate": 0.00010837676600943605, "loss": 1.4269, "step": 35263 }, { "epoch": 0.4582395966493569, "grad_norm": 0.47015732526779175, "learning_rate": 0.00010837416654752466, "loss": 1.5479, "step": 35264 }, { "epoch": 0.4582525911932727, "grad_norm": 0.4380808174610138, "learning_rate": 0.00010837156708561329, "loss": 1.4651, "step": 35265 }, { "epoch": 0.4582655857371886, "grad_norm": 0.3586990237236023, "learning_rate": 0.0001083689676237019, "loss": 1.1728, "step": 35266 }, { "epoch": 0.45827858028110446, "grad_norm": 0.7863790392875671, "learning_rate": 0.00010836636816179051, "loss": 1.5038, "step": 35267 }, { "epoch": 0.45829157482502036, "grad_norm": 0.32435423135757446, "learning_rate": 0.00010836376869987912, "loss": 1.1982, "step": 35268 }, { "epoch": 0.4583045693689362, "grad_norm": 0.28237786889076233, "learning_rate": 0.00010836116923796776, "loss": 1.1257, "step": 35269 }, { "epoch": 0.4583175639128521, "grad_norm": 0.43614304065704346, "learning_rate": 0.00010835856977605637, "loss": 1.3819, "step": 35270 }, { "epoch": 0.45833055845676796, "grad_norm": 0.4806976318359375, "learning_rate": 0.00010835597031414497, "loss": 1.4418, "step": 35271 }, { "epoch": 0.45834355300068386, "grad_norm": 0.41072726249694824, "learning_rate": 0.00010835337085223358, "loss": 1.4635, "step": 35272 }, { "epoch": 0.4583565475445997, "grad_norm": 0.40402135252952576, "learning_rate": 0.00010835077139032221, "loss": 1.4885, "step": 35273 }, { "epoch": 0.4583695420885156, "grad_norm": 0.40256422758102417, "learning_rate": 0.00010834817192841083, "loss": 1.3439, "step": 35274 }, { "epoch": 0.45838253663243145, "grad_norm": 0.36553245782852173, "learning_rate": 0.00010834557246649944, "loss": 1.5257, "step": 35275 }, { "epoch": 0.45839553117634735, "grad_norm": 0.377447247505188, "learning_rate": 0.00010834297300458806, "loss": 1.3524, "step": 35276 }, { "epoch": 0.4584085257202632, "grad_norm": 0.2974754273891449, "learning_rate": 0.00010834037354267667, "loss": 1.1978, "step": 35277 }, { "epoch": 0.4584215202641791, "grad_norm": 0.37702056765556335, "learning_rate": 0.00010833777408076528, "loss": 1.3135, "step": 35278 }, { "epoch": 0.45843451480809494, "grad_norm": 0.2538222670555115, "learning_rate": 0.0001083351746188539, "loss": 1.5245, "step": 35279 }, { "epoch": 0.45844750935201084, "grad_norm": 0.4145028591156006, "learning_rate": 0.00010833257515694253, "loss": 1.4954, "step": 35280 }, { "epoch": 0.4584605038959267, "grad_norm": 0.4104863703250885, "learning_rate": 0.00010832997569503114, "loss": 1.1267, "step": 35281 }, { "epoch": 0.4584734984398426, "grad_norm": 0.4241240918636322, "learning_rate": 0.00010832737623311975, "loss": 1.2789, "step": 35282 }, { "epoch": 0.45848649298375843, "grad_norm": 0.4301542043685913, "learning_rate": 0.00010832477677120836, "loss": 1.4101, "step": 35283 }, { "epoch": 0.45849948752767433, "grad_norm": 0.38485342264175415, "learning_rate": 0.00010832217730929699, "loss": 1.4676, "step": 35284 }, { "epoch": 0.4585124820715902, "grad_norm": 0.39717787504196167, "learning_rate": 0.0001083195778473856, "loss": 1.5187, "step": 35285 }, { "epoch": 0.4585254766155061, "grad_norm": 0.3654116988182068, "learning_rate": 0.00010831697838547421, "loss": 1.3772, "step": 35286 }, { "epoch": 0.4585384711594219, "grad_norm": 0.3713887929916382, "learning_rate": 0.00010831437892356282, "loss": 1.2216, "step": 35287 }, { "epoch": 0.4585514657033378, "grad_norm": 0.46072137355804443, "learning_rate": 0.00010831177946165145, "loss": 1.4036, "step": 35288 }, { "epoch": 0.45856446024725367, "grad_norm": 0.47606176137924194, "learning_rate": 0.00010830917999974006, "loss": 1.5429, "step": 35289 }, { "epoch": 0.45857745479116957, "grad_norm": 0.4153774082660675, "learning_rate": 0.00010830658053782867, "loss": 1.4051, "step": 35290 }, { "epoch": 0.4585904493350854, "grad_norm": 0.4043768346309662, "learning_rate": 0.00010830398107591728, "loss": 1.4037, "step": 35291 }, { "epoch": 0.4586034438790013, "grad_norm": 0.3259259760379791, "learning_rate": 0.00010830138161400592, "loss": 1.3242, "step": 35292 }, { "epoch": 0.45861643842291716, "grad_norm": 0.414194256067276, "learning_rate": 0.00010829878215209453, "loss": 1.1967, "step": 35293 }, { "epoch": 0.45862943296683306, "grad_norm": 0.39186134934425354, "learning_rate": 0.00010829618269018314, "loss": 1.1955, "step": 35294 }, { "epoch": 0.4586424275107489, "grad_norm": 0.35264942049980164, "learning_rate": 0.00010829358322827175, "loss": 1.4243, "step": 35295 }, { "epoch": 0.4586554220546648, "grad_norm": 0.3861359655857086, "learning_rate": 0.00010829098376636037, "loss": 1.4644, "step": 35296 }, { "epoch": 0.45866841659858065, "grad_norm": 0.4659987986087799, "learning_rate": 0.00010828838430444899, "loss": 1.3118, "step": 35297 }, { "epoch": 0.45868141114249655, "grad_norm": 0.31979990005493164, "learning_rate": 0.0001082857848425376, "loss": 1.2998, "step": 35298 }, { "epoch": 0.4586944056864124, "grad_norm": 0.4205688238143921, "learning_rate": 0.00010828318538062621, "loss": 1.6669, "step": 35299 }, { "epoch": 0.4587074002303283, "grad_norm": 0.4614083468914032, "learning_rate": 0.00010828058591871483, "loss": 1.4177, "step": 35300 }, { "epoch": 0.45872039477424414, "grad_norm": 0.4444385766983032, "learning_rate": 0.00010827798645680344, "loss": 1.5298, "step": 35301 }, { "epoch": 0.45873338931816005, "grad_norm": 0.4552019536495209, "learning_rate": 0.00010827538699489205, "loss": 1.3475, "step": 35302 }, { "epoch": 0.4587463838620759, "grad_norm": 0.406088262796402, "learning_rate": 0.00010827278753298066, "loss": 1.3874, "step": 35303 }, { "epoch": 0.4587593784059918, "grad_norm": 0.3940732181072235, "learning_rate": 0.0001082701880710693, "loss": 1.2402, "step": 35304 }, { "epoch": 0.45877237294990764, "grad_norm": 0.3841974437236786, "learning_rate": 0.00010826758860915791, "loss": 1.3434, "step": 35305 }, { "epoch": 0.45878536749382354, "grad_norm": 0.33626532554626465, "learning_rate": 0.00010826498914724652, "loss": 1.4824, "step": 35306 }, { "epoch": 0.4587983620377394, "grad_norm": 0.3972049057483673, "learning_rate": 0.00010826238968533514, "loss": 1.2772, "step": 35307 }, { "epoch": 0.4588113565816553, "grad_norm": 0.4353432357311249, "learning_rate": 0.00010825979022342376, "loss": 1.4568, "step": 35308 }, { "epoch": 0.45882435112557113, "grad_norm": 0.44833609461784363, "learning_rate": 0.00010825719076151237, "loss": 1.5144, "step": 35309 }, { "epoch": 0.45883734566948703, "grad_norm": 0.31149277091026306, "learning_rate": 0.00010825459129960098, "loss": 1.3918, "step": 35310 }, { "epoch": 0.4588503402134029, "grad_norm": 0.35204797983169556, "learning_rate": 0.00010825199183768959, "loss": 1.3002, "step": 35311 }, { "epoch": 0.4588633347573188, "grad_norm": 0.41535937786102295, "learning_rate": 0.00010824939237577823, "loss": 1.5458, "step": 35312 }, { "epoch": 0.4588763293012346, "grad_norm": 0.35590389370918274, "learning_rate": 0.00010824679291386683, "loss": 1.3897, "step": 35313 }, { "epoch": 0.4588893238451505, "grad_norm": 0.4094884693622589, "learning_rate": 0.00010824419345195544, "loss": 1.2885, "step": 35314 }, { "epoch": 0.45890231838906637, "grad_norm": 0.5071133971214294, "learning_rate": 0.00010824159399004408, "loss": 1.4719, "step": 35315 }, { "epoch": 0.45891531293298227, "grad_norm": 0.49353688955307007, "learning_rate": 0.00010823899452813269, "loss": 1.5224, "step": 35316 }, { "epoch": 0.4589283074768981, "grad_norm": 0.4569607973098755, "learning_rate": 0.0001082363950662213, "loss": 1.4647, "step": 35317 }, { "epoch": 0.458941302020814, "grad_norm": 0.4611174166202545, "learning_rate": 0.00010823379560430991, "loss": 1.4312, "step": 35318 }, { "epoch": 0.45895429656472986, "grad_norm": 0.4620760679244995, "learning_rate": 0.00010823119614239853, "loss": 1.3677, "step": 35319 }, { "epoch": 0.45896729110864576, "grad_norm": 0.6529434323310852, "learning_rate": 0.00010822859668048715, "loss": 1.4053, "step": 35320 }, { "epoch": 0.4589802856525616, "grad_norm": 0.28861069679260254, "learning_rate": 0.00010822599721857576, "loss": 1.3218, "step": 35321 }, { "epoch": 0.4589932801964775, "grad_norm": 0.4238835573196411, "learning_rate": 0.00010822339775666437, "loss": 1.299, "step": 35322 }, { "epoch": 0.45900627474039335, "grad_norm": 0.2763981819152832, "learning_rate": 0.000108220798294753, "loss": 1.1626, "step": 35323 }, { "epoch": 0.45901926928430925, "grad_norm": 0.43273213505744934, "learning_rate": 0.00010821819883284162, "loss": 1.4008, "step": 35324 }, { "epoch": 0.45903226382822515, "grad_norm": 0.4475223422050476, "learning_rate": 0.00010821559937093023, "loss": 1.542, "step": 35325 }, { "epoch": 0.459045258372141, "grad_norm": 0.39497828483581543, "learning_rate": 0.00010821299990901882, "loss": 1.428, "step": 35326 }, { "epoch": 0.4590582529160569, "grad_norm": 0.46633705496788025, "learning_rate": 0.00010821040044710746, "loss": 1.3103, "step": 35327 }, { "epoch": 0.45907124745997274, "grad_norm": 0.42853882908821106, "learning_rate": 0.00010820780098519607, "loss": 1.44, "step": 35328 }, { "epoch": 0.45908424200388864, "grad_norm": 0.3508547246456146, "learning_rate": 0.00010820520152328468, "loss": 1.3196, "step": 35329 }, { "epoch": 0.4590972365478045, "grad_norm": 0.34700489044189453, "learning_rate": 0.0001082026020613733, "loss": 1.3816, "step": 35330 }, { "epoch": 0.4591102310917204, "grad_norm": 0.4014454185962677, "learning_rate": 0.00010820000259946192, "loss": 1.4173, "step": 35331 }, { "epoch": 0.45912322563563623, "grad_norm": 0.43163809180259705, "learning_rate": 0.00010819740313755053, "loss": 1.3621, "step": 35332 }, { "epoch": 0.45913622017955213, "grad_norm": 0.4336693286895752, "learning_rate": 0.00010819480367563914, "loss": 1.3109, "step": 35333 }, { "epoch": 0.459149214723468, "grad_norm": 0.37387824058532715, "learning_rate": 0.00010819220421372775, "loss": 1.4071, "step": 35334 }, { "epoch": 0.4591622092673839, "grad_norm": 0.375489205121994, "learning_rate": 0.00010818960475181639, "loss": 1.2277, "step": 35335 }, { "epoch": 0.4591752038112997, "grad_norm": 0.4288072884082794, "learning_rate": 0.000108187005289905, "loss": 1.5621, "step": 35336 }, { "epoch": 0.4591881983552156, "grad_norm": 0.37284740805625916, "learning_rate": 0.00010818440582799361, "loss": 1.153, "step": 35337 }, { "epoch": 0.45920119289913147, "grad_norm": 0.389782577753067, "learning_rate": 0.00010818180636608222, "loss": 1.2462, "step": 35338 }, { "epoch": 0.4592141874430474, "grad_norm": 0.36751076579093933, "learning_rate": 0.00010817920690417085, "loss": 1.2772, "step": 35339 }, { "epoch": 0.4592271819869632, "grad_norm": 0.2914290726184845, "learning_rate": 0.00010817660744225946, "loss": 1.1934, "step": 35340 }, { "epoch": 0.4592401765308791, "grad_norm": 0.46337658166885376, "learning_rate": 0.00010817400798034807, "loss": 1.4619, "step": 35341 }, { "epoch": 0.45925317107479496, "grad_norm": 0.40661898255348206, "learning_rate": 0.00010817140851843668, "loss": 1.4287, "step": 35342 }, { "epoch": 0.45926616561871086, "grad_norm": 0.37092944979667664, "learning_rate": 0.0001081688090565253, "loss": 1.3679, "step": 35343 }, { "epoch": 0.4592791601626267, "grad_norm": 0.4093421995639801, "learning_rate": 0.00010816620959461392, "loss": 1.3896, "step": 35344 }, { "epoch": 0.4592921547065426, "grad_norm": 0.35497674345970154, "learning_rate": 0.00010816361013270253, "loss": 1.3961, "step": 35345 }, { "epoch": 0.45930514925045846, "grad_norm": 0.42267873883247375, "learning_rate": 0.00010816101067079114, "loss": 1.3523, "step": 35346 }, { "epoch": 0.45931814379437436, "grad_norm": 0.31545206904411316, "learning_rate": 0.00010815841120887978, "loss": 1.2695, "step": 35347 }, { "epoch": 0.4593311383382902, "grad_norm": 0.412085622549057, "learning_rate": 0.00010815581174696839, "loss": 1.5897, "step": 35348 }, { "epoch": 0.4593441328822061, "grad_norm": 0.42375436425209045, "learning_rate": 0.000108153212285057, "loss": 1.2868, "step": 35349 }, { "epoch": 0.45935712742612195, "grad_norm": 0.4174690544605255, "learning_rate": 0.00010815061282314562, "loss": 1.2312, "step": 35350 }, { "epoch": 0.45937012197003785, "grad_norm": 0.4588826298713684, "learning_rate": 0.00010814801336123423, "loss": 1.4285, "step": 35351 }, { "epoch": 0.4593831165139537, "grad_norm": 0.3641478419303894, "learning_rate": 0.00010814541389932284, "loss": 1.3225, "step": 35352 }, { "epoch": 0.4593961110578696, "grad_norm": 0.33253809809684753, "learning_rate": 0.00010814281443741146, "loss": 1.2146, "step": 35353 }, { "epoch": 0.45940910560178544, "grad_norm": 0.35771578550338745, "learning_rate": 0.0001081402149755001, "loss": 1.4171, "step": 35354 }, { "epoch": 0.45942210014570134, "grad_norm": 0.32943418622016907, "learning_rate": 0.00010813761551358869, "loss": 1.292, "step": 35355 }, { "epoch": 0.4594350946896172, "grad_norm": 0.49043115973472595, "learning_rate": 0.0001081350160516773, "loss": 1.4025, "step": 35356 }, { "epoch": 0.4594480892335331, "grad_norm": 0.44581982493400574, "learning_rate": 0.00010813241658976591, "loss": 1.3602, "step": 35357 }, { "epoch": 0.45946108377744893, "grad_norm": 0.40665319561958313, "learning_rate": 0.00010812981712785455, "loss": 1.3481, "step": 35358 }, { "epoch": 0.45947407832136483, "grad_norm": 0.4171801805496216, "learning_rate": 0.00010812721766594316, "loss": 1.2928, "step": 35359 }, { "epoch": 0.4594870728652807, "grad_norm": 0.44965749979019165, "learning_rate": 0.00010812461820403177, "loss": 1.543, "step": 35360 }, { "epoch": 0.4595000674091966, "grad_norm": 0.433795690536499, "learning_rate": 0.00010812201874212038, "loss": 1.2605, "step": 35361 }, { "epoch": 0.4595130619531124, "grad_norm": 0.43946996331214905, "learning_rate": 0.00010811941928020901, "loss": 1.4665, "step": 35362 }, { "epoch": 0.4595260564970283, "grad_norm": 0.45820093154907227, "learning_rate": 0.00010811681981829762, "loss": 1.3561, "step": 35363 }, { "epoch": 0.45953905104094417, "grad_norm": 0.4809021055698395, "learning_rate": 0.00010811422035638623, "loss": 1.4838, "step": 35364 }, { "epoch": 0.45955204558486007, "grad_norm": 0.3748052716255188, "learning_rate": 0.00010811162089447484, "loss": 1.2569, "step": 35365 }, { "epoch": 0.4595650401287759, "grad_norm": 0.3864237070083618, "learning_rate": 0.00010810902143256348, "loss": 1.3356, "step": 35366 }, { "epoch": 0.4595780346726918, "grad_norm": 0.25659817457199097, "learning_rate": 0.00010810642197065209, "loss": 1.3025, "step": 35367 }, { "epoch": 0.45959102921660766, "grad_norm": 0.3454335629940033, "learning_rate": 0.00010810382250874069, "loss": 1.4603, "step": 35368 }, { "epoch": 0.45960402376052356, "grad_norm": 0.45242515206336975, "learning_rate": 0.0001081012230468293, "loss": 1.4749, "step": 35369 }, { "epoch": 0.4596170183044394, "grad_norm": 0.3889758884906769, "learning_rate": 0.00010809862358491794, "loss": 1.4544, "step": 35370 }, { "epoch": 0.4596300128483553, "grad_norm": 0.39321789145469666, "learning_rate": 0.00010809602412300655, "loss": 1.4275, "step": 35371 }, { "epoch": 0.45964300739227115, "grad_norm": 0.5043295621871948, "learning_rate": 0.00010809342466109516, "loss": 1.4521, "step": 35372 }, { "epoch": 0.45965600193618705, "grad_norm": 0.2803584635257721, "learning_rate": 0.00010809082519918377, "loss": 1.3983, "step": 35373 }, { "epoch": 0.4596689964801029, "grad_norm": 0.49717387557029724, "learning_rate": 0.00010808822573727239, "loss": 1.5593, "step": 35374 }, { "epoch": 0.4596819910240188, "grad_norm": 0.3836407959461212, "learning_rate": 0.000108085626275361, "loss": 1.457, "step": 35375 }, { "epoch": 0.45969498556793464, "grad_norm": 0.39679092168807983, "learning_rate": 0.00010808302681344962, "loss": 1.3771, "step": 35376 }, { "epoch": 0.45970798011185054, "grad_norm": 0.33145424723625183, "learning_rate": 0.00010808042735153823, "loss": 1.3228, "step": 35377 }, { "epoch": 0.4597209746557664, "grad_norm": 0.48426875472068787, "learning_rate": 0.00010807782788962686, "loss": 1.2804, "step": 35378 }, { "epoch": 0.4597339691996823, "grad_norm": 0.3295377194881439, "learning_rate": 0.00010807522842771547, "loss": 1.1717, "step": 35379 }, { "epoch": 0.45974696374359814, "grad_norm": 0.3442242443561554, "learning_rate": 0.00010807262896580409, "loss": 1.3973, "step": 35380 }, { "epoch": 0.45975995828751404, "grad_norm": 0.40900155901908875, "learning_rate": 0.00010807002950389268, "loss": 1.3272, "step": 35381 }, { "epoch": 0.4597729528314299, "grad_norm": 0.4533456861972809, "learning_rate": 0.00010806743004198132, "loss": 1.3745, "step": 35382 }, { "epoch": 0.4597859473753458, "grad_norm": 0.4188876748085022, "learning_rate": 0.00010806483058006993, "loss": 1.4518, "step": 35383 }, { "epoch": 0.45979894191926163, "grad_norm": 0.4020141065120697, "learning_rate": 0.00010806223111815854, "loss": 1.3151, "step": 35384 }, { "epoch": 0.45981193646317753, "grad_norm": 0.44772377610206604, "learning_rate": 0.00010805963165624715, "loss": 1.4599, "step": 35385 }, { "epoch": 0.4598249310070934, "grad_norm": 0.4504566788673401, "learning_rate": 0.00010805703219433578, "loss": 1.5226, "step": 35386 }, { "epoch": 0.4598379255510093, "grad_norm": 0.3728441894054413, "learning_rate": 0.00010805443273242439, "loss": 1.312, "step": 35387 }, { "epoch": 0.4598509200949251, "grad_norm": 0.3854468762874603, "learning_rate": 0.000108051833270513, "loss": 1.2771, "step": 35388 }, { "epoch": 0.459863914638841, "grad_norm": 0.40002134442329407, "learning_rate": 0.00010804923380860164, "loss": 1.4039, "step": 35389 }, { "epoch": 0.45987690918275687, "grad_norm": 0.3797298073768616, "learning_rate": 0.00010804663434669025, "loss": 1.3823, "step": 35390 }, { "epoch": 0.45988990372667277, "grad_norm": 0.3901807963848114, "learning_rate": 0.00010804403488477886, "loss": 1.4519, "step": 35391 }, { "epoch": 0.4599028982705886, "grad_norm": 0.48419633507728577, "learning_rate": 0.00010804143542286747, "loss": 1.5055, "step": 35392 }, { "epoch": 0.4599158928145045, "grad_norm": 0.31742924451828003, "learning_rate": 0.0001080388359609561, "loss": 1.2731, "step": 35393 }, { "epoch": 0.45992888735842036, "grad_norm": 0.3422873020172119, "learning_rate": 0.0001080362364990447, "loss": 1.4728, "step": 35394 }, { "epoch": 0.45994188190233626, "grad_norm": 0.4367825388908386, "learning_rate": 0.00010803363703713332, "loss": 1.4299, "step": 35395 }, { "epoch": 0.4599548764462521, "grad_norm": 0.3603319525718689, "learning_rate": 0.00010803103757522193, "loss": 1.394, "step": 35396 }, { "epoch": 0.459967870990168, "grad_norm": 0.2595377266407013, "learning_rate": 0.00010802843811331055, "loss": 1.2498, "step": 35397 }, { "epoch": 0.45998086553408385, "grad_norm": 0.3504444658756256, "learning_rate": 0.00010802583865139916, "loss": 1.2118, "step": 35398 }, { "epoch": 0.45999386007799975, "grad_norm": 0.3181664049625397, "learning_rate": 0.00010802323918948777, "loss": 1.2841, "step": 35399 }, { "epoch": 0.46000685462191565, "grad_norm": 0.33218568563461304, "learning_rate": 0.00010802063972757639, "loss": 1.3559, "step": 35400 }, { "epoch": 0.4600198491658315, "grad_norm": 0.4189182221889496, "learning_rate": 0.00010801804026566502, "loss": 1.2489, "step": 35401 }, { "epoch": 0.4600328437097474, "grad_norm": 0.5004599690437317, "learning_rate": 0.00010801544080375363, "loss": 1.4222, "step": 35402 }, { "epoch": 0.46004583825366324, "grad_norm": 0.3731621503829956, "learning_rate": 0.00010801284134184225, "loss": 1.4027, "step": 35403 }, { "epoch": 0.46005883279757914, "grad_norm": 0.45000743865966797, "learning_rate": 0.00010801024187993086, "loss": 1.3806, "step": 35404 }, { "epoch": 0.460071827341495, "grad_norm": 0.3740728497505188, "learning_rate": 0.00010800764241801948, "loss": 1.2653, "step": 35405 }, { "epoch": 0.4600848218854109, "grad_norm": 0.3123493194580078, "learning_rate": 0.00010800504295610809, "loss": 1.2877, "step": 35406 }, { "epoch": 0.46009781642932673, "grad_norm": 0.4068834185600281, "learning_rate": 0.0001080024434941967, "loss": 1.4178, "step": 35407 }, { "epoch": 0.46011081097324263, "grad_norm": 0.37257692217826843, "learning_rate": 0.00010799984403228531, "loss": 1.3142, "step": 35408 }, { "epoch": 0.4601238055171585, "grad_norm": 0.32890579104423523, "learning_rate": 0.00010799724457037395, "loss": 1.4415, "step": 35409 }, { "epoch": 0.4601368000610744, "grad_norm": 0.3722566068172455, "learning_rate": 0.00010799464510846255, "loss": 1.2349, "step": 35410 }, { "epoch": 0.4601497946049902, "grad_norm": 0.3525223433971405, "learning_rate": 0.00010799204564655116, "loss": 1.4428, "step": 35411 }, { "epoch": 0.4601627891489061, "grad_norm": 0.34788239002227783, "learning_rate": 0.00010798944618463977, "loss": 1.4219, "step": 35412 }, { "epoch": 0.46017578369282197, "grad_norm": 0.34184110164642334, "learning_rate": 0.00010798684672272841, "loss": 1.4317, "step": 35413 }, { "epoch": 0.46018877823673787, "grad_norm": 0.4213789105415344, "learning_rate": 0.00010798424726081702, "loss": 1.2621, "step": 35414 }, { "epoch": 0.4602017727806537, "grad_norm": 0.3489427864551544, "learning_rate": 0.00010798164779890563, "loss": 1.3305, "step": 35415 }, { "epoch": 0.4602147673245696, "grad_norm": 0.4887595772743225, "learning_rate": 0.00010797904833699424, "loss": 1.2549, "step": 35416 }, { "epoch": 0.46022776186848546, "grad_norm": 0.40959304571151733, "learning_rate": 0.00010797644887508287, "loss": 1.2858, "step": 35417 }, { "epoch": 0.46024075641240136, "grad_norm": 0.39794984459877014, "learning_rate": 0.00010797384941317148, "loss": 1.6147, "step": 35418 }, { "epoch": 0.4602537509563172, "grad_norm": 0.36695581674575806, "learning_rate": 0.00010797124995126009, "loss": 1.3242, "step": 35419 }, { "epoch": 0.4602667455002331, "grad_norm": 0.37768495082855225, "learning_rate": 0.0001079686504893487, "loss": 1.3552, "step": 35420 }, { "epoch": 0.46027974004414896, "grad_norm": 0.418940931558609, "learning_rate": 0.00010796605102743734, "loss": 1.4552, "step": 35421 }, { "epoch": 0.46029273458806486, "grad_norm": 0.4219701886177063, "learning_rate": 0.00010796345156552593, "loss": 1.4322, "step": 35422 }, { "epoch": 0.4603057291319807, "grad_norm": 0.335228830575943, "learning_rate": 0.00010796085210361455, "loss": 1.2513, "step": 35423 }, { "epoch": 0.4603187236758966, "grad_norm": 0.3027455806732178, "learning_rate": 0.00010795825264170318, "loss": 1.6087, "step": 35424 }, { "epoch": 0.46033171821981245, "grad_norm": 0.40976259112358093, "learning_rate": 0.0001079556531797918, "loss": 1.3497, "step": 35425 }, { "epoch": 0.46034471276372835, "grad_norm": 0.35381394624710083, "learning_rate": 0.0001079530537178804, "loss": 1.1744, "step": 35426 }, { "epoch": 0.4603577073076442, "grad_norm": 0.35212162137031555, "learning_rate": 0.00010795045425596902, "loss": 1.2204, "step": 35427 }, { "epoch": 0.4603707018515601, "grad_norm": 0.330342173576355, "learning_rate": 0.00010794785479405764, "loss": 1.34, "step": 35428 }, { "epoch": 0.46038369639547594, "grad_norm": 0.44823870062828064, "learning_rate": 0.00010794525533214625, "loss": 1.3155, "step": 35429 }, { "epoch": 0.46039669093939184, "grad_norm": 0.5320379137992859, "learning_rate": 0.00010794265587023486, "loss": 1.4441, "step": 35430 }, { "epoch": 0.4604096854833077, "grad_norm": 0.4652509391307831, "learning_rate": 0.00010794005640832347, "loss": 1.5682, "step": 35431 }, { "epoch": 0.4604226800272236, "grad_norm": 0.45554524660110474, "learning_rate": 0.00010793745694641211, "loss": 1.3535, "step": 35432 }, { "epoch": 0.46043567457113943, "grad_norm": 0.4099522531032562, "learning_rate": 0.00010793485748450072, "loss": 1.4467, "step": 35433 }, { "epoch": 0.46044866911505533, "grad_norm": 0.4228973984718323, "learning_rate": 0.00010793225802258933, "loss": 1.7375, "step": 35434 }, { "epoch": 0.4604616636589712, "grad_norm": 0.44818004965782166, "learning_rate": 0.00010792965856067793, "loss": 1.3276, "step": 35435 }, { "epoch": 0.4604746582028871, "grad_norm": 0.3492596447467804, "learning_rate": 0.00010792705909876657, "loss": 1.1877, "step": 35436 }, { "epoch": 0.4604876527468029, "grad_norm": 0.3943553864955902, "learning_rate": 0.00010792445963685518, "loss": 1.6689, "step": 35437 }, { "epoch": 0.4605006472907188, "grad_norm": 0.41357937455177307, "learning_rate": 0.00010792186017494379, "loss": 1.3888, "step": 35438 }, { "epoch": 0.46051364183463467, "grad_norm": 0.43631890416145325, "learning_rate": 0.0001079192607130324, "loss": 1.5142, "step": 35439 }, { "epoch": 0.46052663637855057, "grad_norm": 0.41835543513298035, "learning_rate": 0.00010791666125112103, "loss": 1.318, "step": 35440 }, { "epoch": 0.4605396309224664, "grad_norm": 0.4383360743522644, "learning_rate": 0.00010791406178920964, "loss": 1.4763, "step": 35441 }, { "epoch": 0.4605526254663823, "grad_norm": 0.29719245433807373, "learning_rate": 0.00010791146232729825, "loss": 1.26, "step": 35442 }, { "epoch": 0.46056562001029816, "grad_norm": 0.3552534580230713, "learning_rate": 0.00010790886286538686, "loss": 1.2009, "step": 35443 }, { "epoch": 0.46057861455421406, "grad_norm": 0.34179362654685974, "learning_rate": 0.0001079062634034755, "loss": 1.1932, "step": 35444 }, { "epoch": 0.4605916090981299, "grad_norm": 0.4392559826374054, "learning_rate": 0.00010790366394156411, "loss": 1.4365, "step": 35445 }, { "epoch": 0.4606046036420458, "grad_norm": 0.442568838596344, "learning_rate": 0.00010790106447965272, "loss": 1.4342, "step": 35446 }, { "epoch": 0.46061759818596165, "grad_norm": 0.385448694229126, "learning_rate": 0.00010789846501774133, "loss": 1.3128, "step": 35447 }, { "epoch": 0.46063059272987755, "grad_norm": 0.3692645728588104, "learning_rate": 0.00010789586555582995, "loss": 1.2831, "step": 35448 }, { "epoch": 0.4606435872737934, "grad_norm": 0.3242857754230499, "learning_rate": 0.00010789326609391857, "loss": 1.2167, "step": 35449 }, { "epoch": 0.4606565818177093, "grad_norm": 0.34786468744277954, "learning_rate": 0.00010789066663200718, "loss": 1.1588, "step": 35450 }, { "epoch": 0.46066957636162514, "grad_norm": 0.4622904658317566, "learning_rate": 0.00010788806717009579, "loss": 1.3661, "step": 35451 }, { "epoch": 0.46068257090554104, "grad_norm": 0.36055120825767517, "learning_rate": 0.00010788546770818441, "loss": 1.4405, "step": 35452 }, { "epoch": 0.4606955654494569, "grad_norm": 0.2418884038925171, "learning_rate": 0.00010788286824627302, "loss": 1.246, "step": 35453 }, { "epoch": 0.4607085599933728, "grad_norm": 0.4132326543331146, "learning_rate": 0.00010788026878436163, "loss": 1.3699, "step": 35454 }, { "epoch": 0.46072155453728864, "grad_norm": 0.38137102127075195, "learning_rate": 0.00010787766932245024, "loss": 1.5493, "step": 35455 }, { "epoch": 0.46073454908120454, "grad_norm": 0.3754677474498749, "learning_rate": 0.00010787506986053888, "loss": 1.3205, "step": 35456 }, { "epoch": 0.4607475436251204, "grad_norm": 0.356577068567276, "learning_rate": 0.0001078724703986275, "loss": 1.3615, "step": 35457 }, { "epoch": 0.4607605381690363, "grad_norm": 0.3879176676273346, "learning_rate": 0.0001078698709367161, "loss": 1.6491, "step": 35458 }, { "epoch": 0.4607735327129521, "grad_norm": 0.4713696539402008, "learning_rate": 0.00010786727147480472, "loss": 1.5473, "step": 35459 }, { "epoch": 0.46078652725686803, "grad_norm": 0.30050498247146606, "learning_rate": 0.00010786467201289334, "loss": 1.4121, "step": 35460 }, { "epoch": 0.4607995218007839, "grad_norm": 0.3920830488204956, "learning_rate": 0.00010786207255098195, "loss": 1.3375, "step": 35461 }, { "epoch": 0.4608125163446998, "grad_norm": 0.435604989528656, "learning_rate": 0.00010785947308907056, "loss": 1.3839, "step": 35462 }, { "epoch": 0.4608255108886156, "grad_norm": 0.4181916117668152, "learning_rate": 0.0001078568736271592, "loss": 1.4135, "step": 35463 }, { "epoch": 0.4608385054325315, "grad_norm": 0.36078110337257385, "learning_rate": 0.0001078542741652478, "loss": 1.4142, "step": 35464 }, { "epoch": 0.46085149997644737, "grad_norm": 0.3596632182598114, "learning_rate": 0.00010785167470333641, "loss": 1.5991, "step": 35465 }, { "epoch": 0.46086449452036327, "grad_norm": 0.5564031004905701, "learning_rate": 0.00010784907524142502, "loss": 1.4019, "step": 35466 }, { "epoch": 0.4608774890642791, "grad_norm": 0.36761605739593506, "learning_rate": 0.00010784647577951366, "loss": 1.3271, "step": 35467 }, { "epoch": 0.460890483608195, "grad_norm": 0.4357858896255493, "learning_rate": 0.00010784387631760227, "loss": 1.2326, "step": 35468 }, { "epoch": 0.46090347815211086, "grad_norm": 0.395498126745224, "learning_rate": 0.00010784127685569088, "loss": 1.3378, "step": 35469 }, { "epoch": 0.46091647269602676, "grad_norm": 0.3968724310398102, "learning_rate": 0.00010783867739377949, "loss": 1.3906, "step": 35470 }, { "epoch": 0.4609294672399426, "grad_norm": 0.4514101445674896, "learning_rate": 0.00010783607793186811, "loss": 1.408, "step": 35471 }, { "epoch": 0.4609424617838585, "grad_norm": 0.3034187853336334, "learning_rate": 0.00010783347846995673, "loss": 1.5912, "step": 35472 }, { "epoch": 0.46095545632777435, "grad_norm": 0.43063607811927795, "learning_rate": 0.00010783087900804534, "loss": 1.37, "step": 35473 }, { "epoch": 0.46096845087169025, "grad_norm": 0.4944630265235901, "learning_rate": 0.00010782827954613395, "loss": 1.4736, "step": 35474 }, { "epoch": 0.4609814454156061, "grad_norm": 0.35504454374313354, "learning_rate": 0.00010782568008422259, "loss": 1.4337, "step": 35475 }, { "epoch": 0.460994439959522, "grad_norm": 0.4347068965435028, "learning_rate": 0.0001078230806223112, "loss": 1.6104, "step": 35476 }, { "epoch": 0.4610074345034379, "grad_norm": 0.4077186584472656, "learning_rate": 0.0001078204811603998, "loss": 1.3612, "step": 35477 }, { "epoch": 0.46102042904735374, "grad_norm": 0.42140883207321167, "learning_rate": 0.0001078178816984884, "loss": 1.2237, "step": 35478 }, { "epoch": 0.46103342359126964, "grad_norm": 0.39524775743484497, "learning_rate": 0.00010781528223657704, "loss": 1.2876, "step": 35479 }, { "epoch": 0.4610464181351855, "grad_norm": 0.4483894109725952, "learning_rate": 0.00010781268277466565, "loss": 1.5246, "step": 35480 }, { "epoch": 0.4610594126791014, "grad_norm": 0.3846283555030823, "learning_rate": 0.00010781008331275426, "loss": 1.3072, "step": 35481 }, { "epoch": 0.46107240722301723, "grad_norm": 0.4728820025920868, "learning_rate": 0.00010780748385084288, "loss": 1.3395, "step": 35482 }, { "epoch": 0.46108540176693313, "grad_norm": 0.5418142676353455, "learning_rate": 0.0001078048843889315, "loss": 1.5274, "step": 35483 }, { "epoch": 0.461098396310849, "grad_norm": 0.4721275568008423, "learning_rate": 0.00010780228492702011, "loss": 1.3512, "step": 35484 }, { "epoch": 0.4611113908547649, "grad_norm": 0.4507755637168884, "learning_rate": 0.00010779968546510872, "loss": 1.564, "step": 35485 }, { "epoch": 0.4611243853986807, "grad_norm": 0.3780573606491089, "learning_rate": 0.00010779708600319733, "loss": 1.2574, "step": 35486 }, { "epoch": 0.4611373799425966, "grad_norm": 0.40787768363952637, "learning_rate": 0.00010779448654128597, "loss": 1.2394, "step": 35487 }, { "epoch": 0.46115037448651247, "grad_norm": 0.4653986096382141, "learning_rate": 0.00010779188707937458, "loss": 1.3631, "step": 35488 }, { "epoch": 0.46116336903042837, "grad_norm": 0.42890244722366333, "learning_rate": 0.00010778928761746319, "loss": 1.4325, "step": 35489 }, { "epoch": 0.4611763635743442, "grad_norm": 0.4308032989501953, "learning_rate": 0.00010778668815555179, "loss": 1.4119, "step": 35490 }, { "epoch": 0.4611893581182601, "grad_norm": 0.2435729205608368, "learning_rate": 0.00010778408869364043, "loss": 1.3705, "step": 35491 }, { "epoch": 0.46120235266217596, "grad_norm": 0.37892818450927734, "learning_rate": 0.00010778148923172904, "loss": 1.6448, "step": 35492 }, { "epoch": 0.46121534720609186, "grad_norm": 0.3605073094367981, "learning_rate": 0.00010777888976981765, "loss": 1.1891, "step": 35493 }, { "epoch": 0.4612283417500077, "grad_norm": 0.43432673811912537, "learning_rate": 0.00010777629030790626, "loss": 1.3704, "step": 35494 }, { "epoch": 0.4612413362939236, "grad_norm": 0.42827916145324707, "learning_rate": 0.00010777369084599489, "loss": 1.5073, "step": 35495 }, { "epoch": 0.46125433083783945, "grad_norm": 0.3474370241165161, "learning_rate": 0.0001077710913840835, "loss": 1.2173, "step": 35496 }, { "epoch": 0.46126732538175536, "grad_norm": 0.39356929063796997, "learning_rate": 0.00010776849192217211, "loss": 1.3685, "step": 35497 }, { "epoch": 0.4612803199256712, "grad_norm": 0.31644105911254883, "learning_rate": 0.00010776589246026075, "loss": 1.3982, "step": 35498 }, { "epoch": 0.4612933144695871, "grad_norm": 0.41147375106811523, "learning_rate": 0.00010776329299834936, "loss": 1.2236, "step": 35499 }, { "epoch": 0.46130630901350295, "grad_norm": 0.4182385802268982, "learning_rate": 0.00010776069353643797, "loss": 1.3749, "step": 35500 }, { "epoch": 0.46131930355741885, "grad_norm": 0.3499833643436432, "learning_rate": 0.00010775809407452658, "loss": 1.4099, "step": 35501 }, { "epoch": 0.4613322981013347, "grad_norm": 0.49604490399360657, "learning_rate": 0.0001077554946126152, "loss": 1.4814, "step": 35502 }, { "epoch": 0.4613452926452506, "grad_norm": 0.4186013340950012, "learning_rate": 0.00010775289515070381, "loss": 1.3458, "step": 35503 }, { "epoch": 0.46135828718916644, "grad_norm": 0.5104401111602783, "learning_rate": 0.00010775029568879242, "loss": 1.4289, "step": 35504 }, { "epoch": 0.46137128173308234, "grad_norm": 0.31747984886169434, "learning_rate": 0.00010774769622688104, "loss": 1.3687, "step": 35505 }, { "epoch": 0.4613842762769982, "grad_norm": 0.4271358251571655, "learning_rate": 0.00010774509676496966, "loss": 1.361, "step": 35506 }, { "epoch": 0.4613972708209141, "grad_norm": 0.26651349663734436, "learning_rate": 0.00010774249730305827, "loss": 1.3158, "step": 35507 }, { "epoch": 0.46141026536482993, "grad_norm": 0.47682732343673706, "learning_rate": 0.00010773989784114688, "loss": 1.4754, "step": 35508 }, { "epoch": 0.46142325990874583, "grad_norm": 0.45204803347587585, "learning_rate": 0.00010773729837923549, "loss": 1.309, "step": 35509 }, { "epoch": 0.4614362544526617, "grad_norm": 0.386753648519516, "learning_rate": 0.00010773469891732413, "loss": 1.402, "step": 35510 }, { "epoch": 0.4614492489965776, "grad_norm": 0.475290447473526, "learning_rate": 0.00010773209945541274, "loss": 1.2681, "step": 35511 }, { "epoch": 0.4614622435404934, "grad_norm": 0.3560897409915924, "learning_rate": 0.00010772949999350135, "loss": 1.2405, "step": 35512 }, { "epoch": 0.4614752380844093, "grad_norm": 0.4530765116214752, "learning_rate": 0.00010772690053158996, "loss": 1.4329, "step": 35513 }, { "epoch": 0.46148823262832517, "grad_norm": 0.3111562728881836, "learning_rate": 0.00010772430106967859, "loss": 1.1261, "step": 35514 }, { "epoch": 0.46150122717224107, "grad_norm": 0.39080044627189636, "learning_rate": 0.0001077217016077672, "loss": 1.399, "step": 35515 }, { "epoch": 0.4615142217161569, "grad_norm": 0.44555526971817017, "learning_rate": 0.00010771910214585581, "loss": 1.4906, "step": 35516 }, { "epoch": 0.4615272162600728, "grad_norm": 0.40763089060783386, "learning_rate": 0.00010771650268394442, "loss": 1.4015, "step": 35517 }, { "epoch": 0.46154021080398866, "grad_norm": 0.4661819636821747, "learning_rate": 0.00010771390322203306, "loss": 1.5345, "step": 35518 }, { "epoch": 0.46155320534790456, "grad_norm": 0.37947434186935425, "learning_rate": 0.00010771130376012166, "loss": 1.4025, "step": 35519 }, { "epoch": 0.4615661998918204, "grad_norm": 0.40372148156166077, "learning_rate": 0.00010770870429821027, "loss": 1.2942, "step": 35520 }, { "epoch": 0.4615791944357363, "grad_norm": 0.3758772015571594, "learning_rate": 0.00010770610483629888, "loss": 1.3351, "step": 35521 }, { "epoch": 0.46159218897965215, "grad_norm": 0.40708017349243164, "learning_rate": 0.00010770350537438752, "loss": 1.4488, "step": 35522 }, { "epoch": 0.46160518352356805, "grad_norm": 0.49547505378723145, "learning_rate": 0.00010770090591247613, "loss": 1.4185, "step": 35523 }, { "epoch": 0.4616181780674839, "grad_norm": 0.41007938981056213, "learning_rate": 0.00010769830645056474, "loss": 1.5052, "step": 35524 }, { "epoch": 0.4616311726113998, "grad_norm": 0.4089713394641876, "learning_rate": 0.00010769570698865335, "loss": 1.1844, "step": 35525 }, { "epoch": 0.46164416715531564, "grad_norm": 0.3328966498374939, "learning_rate": 0.00010769310752674197, "loss": 1.5177, "step": 35526 }, { "epoch": 0.46165716169923154, "grad_norm": 0.39055484533309937, "learning_rate": 0.00010769050806483058, "loss": 1.4707, "step": 35527 }, { "epoch": 0.4616701562431474, "grad_norm": 0.441169410943985, "learning_rate": 0.0001076879086029192, "loss": 1.423, "step": 35528 }, { "epoch": 0.4616831507870633, "grad_norm": 0.37846776843070984, "learning_rate": 0.0001076853091410078, "loss": 1.4166, "step": 35529 }, { "epoch": 0.46169614533097914, "grad_norm": 0.4110094904899597, "learning_rate": 0.00010768270967909644, "loss": 1.2504, "step": 35530 }, { "epoch": 0.46170913987489504, "grad_norm": 0.37566396594047546, "learning_rate": 0.00010768011021718505, "loss": 1.2148, "step": 35531 }, { "epoch": 0.4617221344188109, "grad_norm": 0.4372609555721283, "learning_rate": 0.00010767751075527365, "loss": 1.3089, "step": 35532 }, { "epoch": 0.4617351289627268, "grad_norm": 0.4025667905807495, "learning_rate": 0.00010767491129336226, "loss": 1.4338, "step": 35533 }, { "epoch": 0.4617481235066426, "grad_norm": 0.37487006187438965, "learning_rate": 0.0001076723118314509, "loss": 1.2738, "step": 35534 }, { "epoch": 0.46176111805055853, "grad_norm": 0.4419200122356415, "learning_rate": 0.00010766971236953951, "loss": 1.4552, "step": 35535 }, { "epoch": 0.4617741125944744, "grad_norm": 0.3112601041793823, "learning_rate": 0.00010766711290762812, "loss": 1.2519, "step": 35536 }, { "epoch": 0.4617871071383903, "grad_norm": 0.4411623775959015, "learning_rate": 0.00010766451344571675, "loss": 1.4343, "step": 35537 }, { "epoch": 0.4618001016823061, "grad_norm": 0.4714076817035675, "learning_rate": 0.00010766191398380536, "loss": 1.4513, "step": 35538 }, { "epoch": 0.461813096226222, "grad_norm": 0.44928380846977234, "learning_rate": 0.00010765931452189397, "loss": 1.4452, "step": 35539 }, { "epoch": 0.46182609077013786, "grad_norm": 0.4142343997955322, "learning_rate": 0.00010765671505998258, "loss": 1.3852, "step": 35540 }, { "epoch": 0.46183908531405377, "grad_norm": 0.41743364930152893, "learning_rate": 0.00010765411559807122, "loss": 1.4237, "step": 35541 }, { "epoch": 0.4618520798579696, "grad_norm": 0.4155822694301605, "learning_rate": 0.00010765151613615983, "loss": 1.4403, "step": 35542 }, { "epoch": 0.4618650744018855, "grad_norm": 0.2787812352180481, "learning_rate": 0.00010764891667424844, "loss": 1.0245, "step": 35543 }, { "epoch": 0.46187806894580136, "grad_norm": 0.5130904316902161, "learning_rate": 0.00010764631721233705, "loss": 1.3957, "step": 35544 }, { "epoch": 0.46189106348971726, "grad_norm": 0.40980926156044006, "learning_rate": 0.00010764371775042568, "loss": 1.2248, "step": 35545 }, { "epoch": 0.4619040580336331, "grad_norm": 0.4229581356048584, "learning_rate": 0.00010764111828851429, "loss": 1.4666, "step": 35546 }, { "epoch": 0.461917052577549, "grad_norm": 0.3763701319694519, "learning_rate": 0.0001076385188266029, "loss": 1.3778, "step": 35547 }, { "epoch": 0.46193004712146485, "grad_norm": 0.43439486622810364, "learning_rate": 0.00010763591936469151, "loss": 1.351, "step": 35548 }, { "epoch": 0.46194304166538075, "grad_norm": 0.3355864882469177, "learning_rate": 0.00010763331990278013, "loss": 1.2932, "step": 35549 }, { "epoch": 0.4619560362092966, "grad_norm": 0.32369762659072876, "learning_rate": 0.00010763072044086874, "loss": 1.2564, "step": 35550 }, { "epoch": 0.4619690307532125, "grad_norm": 0.42036017775535583, "learning_rate": 0.00010762812097895735, "loss": 1.4687, "step": 35551 }, { "epoch": 0.4619820252971284, "grad_norm": 0.49597010016441345, "learning_rate": 0.00010762552151704597, "loss": 1.564, "step": 35552 }, { "epoch": 0.46199501984104424, "grad_norm": 0.5177572965621948, "learning_rate": 0.0001076229220551346, "loss": 1.3751, "step": 35553 }, { "epoch": 0.46200801438496014, "grad_norm": 0.35545286536216736, "learning_rate": 0.00010762032259322321, "loss": 1.3324, "step": 35554 }, { "epoch": 0.462021008928876, "grad_norm": 0.4604887366294861, "learning_rate": 0.00010761772313131183, "loss": 1.4888, "step": 35555 }, { "epoch": 0.4620340034727919, "grad_norm": 0.4378258287906647, "learning_rate": 0.00010761512366940044, "loss": 1.5625, "step": 35556 }, { "epoch": 0.46204699801670773, "grad_norm": 0.499294251203537, "learning_rate": 0.00010761252420748906, "loss": 1.3961, "step": 35557 }, { "epoch": 0.46205999256062363, "grad_norm": 0.4454141855239868, "learning_rate": 0.00010760992474557767, "loss": 1.3144, "step": 35558 }, { "epoch": 0.4620729871045395, "grad_norm": 0.3458491861820221, "learning_rate": 0.00010760732528366628, "loss": 1.3078, "step": 35559 }, { "epoch": 0.4620859816484554, "grad_norm": 0.2784202992916107, "learning_rate": 0.0001076047258217549, "loss": 1.2677, "step": 35560 }, { "epoch": 0.4620989761923712, "grad_norm": 0.4437010586261749, "learning_rate": 0.00010760212635984352, "loss": 1.3901, "step": 35561 }, { "epoch": 0.4621119707362871, "grad_norm": 0.42371848225593567, "learning_rate": 0.00010759952689793213, "loss": 1.398, "step": 35562 }, { "epoch": 0.46212496528020297, "grad_norm": 0.45266059041023254, "learning_rate": 0.00010759692743602074, "loss": 1.3946, "step": 35563 }, { "epoch": 0.46213795982411887, "grad_norm": 0.4476514458656311, "learning_rate": 0.00010759432797410935, "loss": 1.5033, "step": 35564 }, { "epoch": 0.4621509543680347, "grad_norm": 0.3082617521286011, "learning_rate": 0.00010759172851219799, "loss": 1.361, "step": 35565 }, { "epoch": 0.4621639489119506, "grad_norm": 0.29336756467819214, "learning_rate": 0.0001075891290502866, "loss": 1.1328, "step": 35566 }, { "epoch": 0.46217694345586646, "grad_norm": 0.37239986658096313, "learning_rate": 0.00010758652958837521, "loss": 1.4044, "step": 35567 }, { "epoch": 0.46218993799978236, "grad_norm": 0.4684375822544098, "learning_rate": 0.00010758393012646382, "loss": 1.0556, "step": 35568 }, { "epoch": 0.4622029325436982, "grad_norm": 0.38781532645225525, "learning_rate": 0.00010758133066455245, "loss": 1.2834, "step": 35569 }, { "epoch": 0.4622159270876141, "grad_norm": 0.4089552164077759, "learning_rate": 0.00010757873120264106, "loss": 1.5639, "step": 35570 }, { "epoch": 0.46222892163152995, "grad_norm": 0.3284449577331543, "learning_rate": 0.00010757613174072967, "loss": 1.2763, "step": 35571 }, { "epoch": 0.46224191617544585, "grad_norm": 0.3963087499141693, "learning_rate": 0.0001075735322788183, "loss": 1.2888, "step": 35572 }, { "epoch": 0.4622549107193617, "grad_norm": 0.4717348515987396, "learning_rate": 0.00010757093281690692, "loss": 1.4842, "step": 35573 }, { "epoch": 0.4622679052632776, "grad_norm": 0.383375346660614, "learning_rate": 0.00010756833335499551, "loss": 1.3755, "step": 35574 }, { "epoch": 0.46228089980719345, "grad_norm": 0.34396892786026, "learning_rate": 0.00010756573389308413, "loss": 1.4222, "step": 35575 }, { "epoch": 0.46229389435110935, "grad_norm": 0.44204697012901306, "learning_rate": 0.00010756313443117276, "loss": 1.3069, "step": 35576 }, { "epoch": 0.4623068888950252, "grad_norm": 0.37377914786338806, "learning_rate": 0.00010756053496926137, "loss": 1.3046, "step": 35577 }, { "epoch": 0.4623198834389411, "grad_norm": 0.29263922572135925, "learning_rate": 0.00010755793550734999, "loss": 1.3045, "step": 35578 }, { "epoch": 0.46233287798285694, "grad_norm": 0.37501809000968933, "learning_rate": 0.0001075553360454386, "loss": 1.5368, "step": 35579 }, { "epoch": 0.46234587252677284, "grad_norm": 0.3148528039455414, "learning_rate": 0.00010755273658352722, "loss": 1.4602, "step": 35580 }, { "epoch": 0.4623588670706887, "grad_norm": 0.4157966375350952, "learning_rate": 0.00010755013712161583, "loss": 1.4566, "step": 35581 }, { "epoch": 0.4623718616146046, "grad_norm": 0.5455573797225952, "learning_rate": 0.00010754753765970444, "loss": 1.1508, "step": 35582 }, { "epoch": 0.46238485615852043, "grad_norm": 0.5336974859237671, "learning_rate": 0.00010754493819779305, "loss": 1.3638, "step": 35583 }, { "epoch": 0.46239785070243633, "grad_norm": 0.4476361572742462, "learning_rate": 0.00010754233873588169, "loss": 1.541, "step": 35584 }, { "epoch": 0.4624108452463522, "grad_norm": 0.2974816560745239, "learning_rate": 0.0001075397392739703, "loss": 1.4409, "step": 35585 }, { "epoch": 0.4624238397902681, "grad_norm": 0.46337515115737915, "learning_rate": 0.00010753713981205891, "loss": 1.2421, "step": 35586 }, { "epoch": 0.4624368343341839, "grad_norm": 0.4262804687023163, "learning_rate": 0.00010753454035014751, "loss": 1.3876, "step": 35587 }, { "epoch": 0.4624498288780998, "grad_norm": 0.3468291461467743, "learning_rate": 0.00010753194088823615, "loss": 1.4673, "step": 35588 }, { "epoch": 0.46246282342201567, "grad_norm": 0.36996781826019287, "learning_rate": 0.00010752934142632476, "loss": 1.3925, "step": 35589 }, { "epoch": 0.46247581796593157, "grad_norm": 0.3559987246990204, "learning_rate": 0.00010752674196441337, "loss": 1.3626, "step": 35590 }, { "epoch": 0.4624888125098474, "grad_norm": 0.41341087222099304, "learning_rate": 0.00010752414250250198, "loss": 1.4617, "step": 35591 }, { "epoch": 0.4625018070537633, "grad_norm": 0.39191341400146484, "learning_rate": 0.0001075215430405906, "loss": 1.3471, "step": 35592 }, { "epoch": 0.46251480159767916, "grad_norm": 0.36694279313087463, "learning_rate": 0.00010751894357867922, "loss": 1.5318, "step": 35593 }, { "epoch": 0.46252779614159506, "grad_norm": 0.36124345660209656, "learning_rate": 0.00010751634411676783, "loss": 1.3142, "step": 35594 }, { "epoch": 0.4625407906855109, "grad_norm": 0.3686319589614868, "learning_rate": 0.00010751374465485644, "loss": 1.6122, "step": 35595 }, { "epoch": 0.4625537852294268, "grad_norm": 0.23160549998283386, "learning_rate": 0.00010751114519294508, "loss": 1.3645, "step": 35596 }, { "epoch": 0.46256677977334265, "grad_norm": 0.44235989451408386, "learning_rate": 0.00010750854573103369, "loss": 1.3019, "step": 35597 }, { "epoch": 0.46257977431725855, "grad_norm": 0.4268600642681122, "learning_rate": 0.0001075059462691223, "loss": 1.5811, "step": 35598 }, { "epoch": 0.4625927688611744, "grad_norm": 0.3864820599555969, "learning_rate": 0.0001075033468072109, "loss": 1.3508, "step": 35599 }, { "epoch": 0.4626057634050903, "grad_norm": 0.34672778844833374, "learning_rate": 0.00010750074734529953, "loss": 1.2393, "step": 35600 }, { "epoch": 0.46261875794900614, "grad_norm": 0.4089890718460083, "learning_rate": 0.00010749814788338815, "loss": 1.3372, "step": 35601 }, { "epoch": 0.46263175249292204, "grad_norm": 0.48859190940856934, "learning_rate": 0.00010749554842147676, "loss": 1.4937, "step": 35602 }, { "epoch": 0.4626447470368379, "grad_norm": 0.3934830129146576, "learning_rate": 0.00010749294895956537, "loss": 1.3033, "step": 35603 }, { "epoch": 0.4626577415807538, "grad_norm": 0.4286805987358093, "learning_rate": 0.00010749034949765399, "loss": 1.6952, "step": 35604 }, { "epoch": 0.46267073612466963, "grad_norm": 0.41911569237709045, "learning_rate": 0.0001074877500357426, "loss": 1.4163, "step": 35605 }, { "epoch": 0.46268373066858554, "grad_norm": 0.29294317960739136, "learning_rate": 0.00010748515057383121, "loss": 1.3951, "step": 35606 }, { "epoch": 0.4626967252125014, "grad_norm": 0.3785363733768463, "learning_rate": 0.00010748255111191982, "loss": 1.3576, "step": 35607 }, { "epoch": 0.4627097197564173, "grad_norm": 0.457661509513855, "learning_rate": 0.00010747995165000846, "loss": 1.3655, "step": 35608 }, { "epoch": 0.4627227143003331, "grad_norm": 0.4276477098464966, "learning_rate": 0.00010747735218809707, "loss": 1.3633, "step": 35609 }, { "epoch": 0.462735708844249, "grad_norm": 0.39906471967697144, "learning_rate": 0.00010747475272618568, "loss": 1.4687, "step": 35610 }, { "epoch": 0.4627487033881649, "grad_norm": 0.3625788390636444, "learning_rate": 0.00010747215326427431, "loss": 1.5682, "step": 35611 }, { "epoch": 0.4627616979320808, "grad_norm": 0.38163772225379944, "learning_rate": 0.00010746955380236292, "loss": 1.3378, "step": 35612 }, { "epoch": 0.4627746924759966, "grad_norm": 0.47390687465667725, "learning_rate": 0.00010746695434045153, "loss": 1.3152, "step": 35613 }, { "epoch": 0.4627876870199125, "grad_norm": 0.36197197437286377, "learning_rate": 0.00010746435487854014, "loss": 1.3028, "step": 35614 }, { "epoch": 0.46280068156382836, "grad_norm": 0.4514126777648926, "learning_rate": 0.00010746175541662878, "loss": 1.5198, "step": 35615 }, { "epoch": 0.46281367610774427, "grad_norm": 0.5091932415962219, "learning_rate": 0.00010745915595471738, "loss": 1.4468, "step": 35616 }, { "epoch": 0.4628266706516601, "grad_norm": 0.38111212849617004, "learning_rate": 0.00010745655649280599, "loss": 1.4489, "step": 35617 }, { "epoch": 0.462839665195576, "grad_norm": 0.4679569900035858, "learning_rate": 0.0001074539570308946, "loss": 1.5628, "step": 35618 }, { "epoch": 0.46285265973949186, "grad_norm": 0.47226688265800476, "learning_rate": 0.00010745135756898324, "loss": 1.3734, "step": 35619 }, { "epoch": 0.46286565428340776, "grad_norm": 0.49694281816482544, "learning_rate": 0.00010744875810707185, "loss": 1.5586, "step": 35620 }, { "epoch": 0.4628786488273236, "grad_norm": 0.5413099527359009, "learning_rate": 0.00010744615864516046, "loss": 1.3559, "step": 35621 }, { "epoch": 0.4628916433712395, "grad_norm": 0.3707817792892456, "learning_rate": 0.00010744355918324907, "loss": 1.3946, "step": 35622 }, { "epoch": 0.46290463791515535, "grad_norm": 0.41101813316345215, "learning_rate": 0.0001074409597213377, "loss": 1.3852, "step": 35623 }, { "epoch": 0.46291763245907125, "grad_norm": 0.2865454852581024, "learning_rate": 0.0001074383602594263, "loss": 1.1666, "step": 35624 }, { "epoch": 0.4629306270029871, "grad_norm": 0.45384377241134644, "learning_rate": 0.00010743576079751492, "loss": 1.3639, "step": 35625 }, { "epoch": 0.462943621546903, "grad_norm": 0.3580816984176636, "learning_rate": 0.00010743316133560353, "loss": 1.2505, "step": 35626 }, { "epoch": 0.46295661609081884, "grad_norm": 0.458852082490921, "learning_rate": 0.00010743056187369217, "loss": 1.4553, "step": 35627 }, { "epoch": 0.46296961063473474, "grad_norm": 0.41107693314552307, "learning_rate": 0.00010742796241178076, "loss": 1.4153, "step": 35628 }, { "epoch": 0.46298260517865064, "grad_norm": 0.39565780758857727, "learning_rate": 0.00010742536294986937, "loss": 1.2137, "step": 35629 }, { "epoch": 0.4629955997225665, "grad_norm": 0.40981370210647583, "learning_rate": 0.00010742276348795798, "loss": 1.2962, "step": 35630 }, { "epoch": 0.4630085942664824, "grad_norm": 0.4478282332420349, "learning_rate": 0.00010742016402604662, "loss": 1.2749, "step": 35631 }, { "epoch": 0.46302158881039823, "grad_norm": 0.5453441739082336, "learning_rate": 0.00010741756456413523, "loss": 1.6012, "step": 35632 }, { "epoch": 0.46303458335431413, "grad_norm": 0.3136277198791504, "learning_rate": 0.00010741496510222384, "loss": 1.3389, "step": 35633 }, { "epoch": 0.46304757789823, "grad_norm": 0.44794800877571106, "learning_rate": 0.00010741236564031246, "loss": 1.4151, "step": 35634 }, { "epoch": 0.4630605724421459, "grad_norm": 0.42392364144325256, "learning_rate": 0.00010740976617840108, "loss": 1.3304, "step": 35635 }, { "epoch": 0.4630735669860617, "grad_norm": 0.376111775636673, "learning_rate": 0.00010740716671648969, "loss": 1.4198, "step": 35636 }, { "epoch": 0.4630865615299776, "grad_norm": 0.4805116653442383, "learning_rate": 0.0001074045672545783, "loss": 1.3806, "step": 35637 }, { "epoch": 0.46309955607389347, "grad_norm": 0.4525004029273987, "learning_rate": 0.00010740196779266691, "loss": 1.3111, "step": 35638 }, { "epoch": 0.46311255061780937, "grad_norm": 0.4817827641963959, "learning_rate": 0.00010739936833075555, "loss": 1.5681, "step": 35639 }, { "epoch": 0.4631255451617252, "grad_norm": 0.3991628587245941, "learning_rate": 0.00010739676886884416, "loss": 1.1895, "step": 35640 }, { "epoch": 0.4631385397056411, "grad_norm": 0.35473111271858215, "learning_rate": 0.00010739416940693276, "loss": 1.3662, "step": 35641 }, { "epoch": 0.46315153424955696, "grad_norm": 0.5121421813964844, "learning_rate": 0.00010739156994502137, "loss": 1.5058, "step": 35642 }, { "epoch": 0.46316452879347286, "grad_norm": 0.4408717155456543, "learning_rate": 0.00010738897048311001, "loss": 1.3839, "step": 35643 }, { "epoch": 0.4631775233373887, "grad_norm": 0.3886417746543884, "learning_rate": 0.00010738637102119862, "loss": 1.5396, "step": 35644 }, { "epoch": 0.4631905178813046, "grad_norm": 0.46628519892692566, "learning_rate": 0.00010738377155928723, "loss": 1.537, "step": 35645 }, { "epoch": 0.46320351242522045, "grad_norm": 0.41789451241493225, "learning_rate": 0.00010738117209737584, "loss": 1.3968, "step": 35646 }, { "epoch": 0.46321650696913635, "grad_norm": 0.346975177526474, "learning_rate": 0.00010737857263546447, "loss": 1.3174, "step": 35647 }, { "epoch": 0.4632295015130522, "grad_norm": 0.41786059737205505, "learning_rate": 0.00010737597317355308, "loss": 1.3239, "step": 35648 }, { "epoch": 0.4632424960569681, "grad_norm": 0.3134523034095764, "learning_rate": 0.00010737337371164169, "loss": 1.3524, "step": 35649 }, { "epoch": 0.46325549060088395, "grad_norm": 0.34346750378608704, "learning_rate": 0.00010737077424973032, "loss": 1.5149, "step": 35650 }, { "epoch": 0.46326848514479985, "grad_norm": 0.38218626379966736, "learning_rate": 0.00010736817478781894, "loss": 1.4093, "step": 35651 }, { "epoch": 0.4632814796887157, "grad_norm": 0.36848539113998413, "learning_rate": 0.00010736557532590755, "loss": 1.4656, "step": 35652 }, { "epoch": 0.4632944742326316, "grad_norm": 0.427184522151947, "learning_rate": 0.00010736297586399616, "loss": 1.4659, "step": 35653 }, { "epoch": 0.46330746877654744, "grad_norm": 0.4232839047908783, "learning_rate": 0.00010736037640208478, "loss": 1.5128, "step": 35654 }, { "epoch": 0.46332046332046334, "grad_norm": 0.42893972992897034, "learning_rate": 0.00010735777694017339, "loss": 1.367, "step": 35655 }, { "epoch": 0.4633334578643792, "grad_norm": 0.5212001800537109, "learning_rate": 0.000107355177478262, "loss": 1.3552, "step": 35656 }, { "epoch": 0.4633464524082951, "grad_norm": 0.3655027449131012, "learning_rate": 0.00010735257801635062, "loss": 1.5283, "step": 35657 }, { "epoch": 0.46335944695221093, "grad_norm": 0.4473346471786499, "learning_rate": 0.00010734997855443924, "loss": 1.5149, "step": 35658 }, { "epoch": 0.46337244149612683, "grad_norm": 0.3939672112464905, "learning_rate": 0.00010734737909252785, "loss": 1.2939, "step": 35659 }, { "epoch": 0.4633854360400427, "grad_norm": 0.3965427279472351, "learning_rate": 0.00010734477963061646, "loss": 1.5198, "step": 35660 }, { "epoch": 0.4633984305839586, "grad_norm": 0.3588354289531708, "learning_rate": 0.00010734218016870507, "loss": 1.3263, "step": 35661 }, { "epoch": 0.4634114251278744, "grad_norm": 0.33331888914108276, "learning_rate": 0.00010733958070679371, "loss": 1.3672, "step": 35662 }, { "epoch": 0.4634244196717903, "grad_norm": 0.28365492820739746, "learning_rate": 0.00010733698124488232, "loss": 1.4528, "step": 35663 }, { "epoch": 0.46343741421570617, "grad_norm": 0.36620762944221497, "learning_rate": 0.00010733438178297093, "loss": 1.1314, "step": 35664 }, { "epoch": 0.46345040875962207, "grad_norm": 0.21242588758468628, "learning_rate": 0.00010733178232105954, "loss": 1.2866, "step": 35665 }, { "epoch": 0.4634634033035379, "grad_norm": 0.4560832381248474, "learning_rate": 0.00010732918285914817, "loss": 1.3328, "step": 35666 }, { "epoch": 0.4634763978474538, "grad_norm": 0.46458005905151367, "learning_rate": 0.00010732658339723678, "loss": 1.2755, "step": 35667 }, { "epoch": 0.46348939239136966, "grad_norm": 0.3883011043071747, "learning_rate": 0.00010732398393532539, "loss": 1.4119, "step": 35668 }, { "epoch": 0.46350238693528556, "grad_norm": 0.31358370184898376, "learning_rate": 0.000107321384473414, "loss": 1.3884, "step": 35669 }, { "epoch": 0.4635153814792014, "grad_norm": 0.3999238610267639, "learning_rate": 0.00010731878501150262, "loss": 1.4935, "step": 35670 }, { "epoch": 0.4635283760231173, "grad_norm": 0.4081012010574341, "learning_rate": 0.00010731618554959124, "loss": 1.4539, "step": 35671 }, { "epoch": 0.46354137056703315, "grad_norm": 0.4759165346622467, "learning_rate": 0.00010731358608767985, "loss": 1.4321, "step": 35672 }, { "epoch": 0.46355436511094905, "grad_norm": 0.36935994029045105, "learning_rate": 0.00010731098662576846, "loss": 1.5067, "step": 35673 }, { "epoch": 0.4635673596548649, "grad_norm": 0.3831806480884552, "learning_rate": 0.0001073083871638571, "loss": 1.1431, "step": 35674 }, { "epoch": 0.4635803541987808, "grad_norm": 0.5320271253585815, "learning_rate": 0.0001073057877019457, "loss": 1.4181, "step": 35675 }, { "epoch": 0.46359334874269664, "grad_norm": 0.47719433903694153, "learning_rate": 0.00010730318824003432, "loss": 1.4503, "step": 35676 }, { "epoch": 0.46360634328661254, "grad_norm": 0.408518522977829, "learning_rate": 0.00010730058877812293, "loss": 1.4137, "step": 35677 }, { "epoch": 0.4636193378305284, "grad_norm": 0.47981736063957214, "learning_rate": 0.00010729798931621155, "loss": 1.3976, "step": 35678 }, { "epoch": 0.4636323323744443, "grad_norm": 0.4576692283153534, "learning_rate": 0.00010729538985430016, "loss": 1.4868, "step": 35679 }, { "epoch": 0.46364532691836013, "grad_norm": 0.38052305579185486, "learning_rate": 0.00010729279039238877, "loss": 1.4702, "step": 35680 }, { "epoch": 0.46365832146227604, "grad_norm": 0.4078505337238312, "learning_rate": 0.00010729019093047739, "loss": 1.345, "step": 35681 }, { "epoch": 0.4636713160061919, "grad_norm": 0.40665605664253235, "learning_rate": 0.00010728759146856602, "loss": 1.2464, "step": 35682 }, { "epoch": 0.4636843105501078, "grad_norm": 0.47068604826927185, "learning_rate": 0.00010728499200665462, "loss": 1.2225, "step": 35683 }, { "epoch": 0.4636973050940236, "grad_norm": 0.366054892539978, "learning_rate": 0.00010728239254474323, "loss": 1.3867, "step": 35684 }, { "epoch": 0.4637102996379395, "grad_norm": 0.45152339339256287, "learning_rate": 0.00010727979308283187, "loss": 1.4901, "step": 35685 }, { "epoch": 0.46372329418185537, "grad_norm": 0.3877542316913605, "learning_rate": 0.00010727719362092048, "loss": 1.5086, "step": 35686 }, { "epoch": 0.4637362887257713, "grad_norm": 0.40926992893218994, "learning_rate": 0.00010727459415900909, "loss": 1.4042, "step": 35687 }, { "epoch": 0.4637492832696871, "grad_norm": 0.42088642716407776, "learning_rate": 0.0001072719946970977, "loss": 1.2724, "step": 35688 }, { "epoch": 0.463762277813603, "grad_norm": 0.433689147233963, "learning_rate": 0.00010726939523518633, "loss": 1.3573, "step": 35689 }, { "epoch": 0.46377527235751886, "grad_norm": 0.3792582154273987, "learning_rate": 0.00010726679577327494, "loss": 1.3351, "step": 35690 }, { "epoch": 0.46378826690143476, "grad_norm": 0.3365780711174011, "learning_rate": 0.00010726419631136355, "loss": 1.381, "step": 35691 }, { "epoch": 0.4638012614453506, "grad_norm": 0.39113849401474, "learning_rate": 0.00010726159684945216, "loss": 1.5173, "step": 35692 }, { "epoch": 0.4638142559892665, "grad_norm": 0.5222766399383545, "learning_rate": 0.0001072589973875408, "loss": 1.4688, "step": 35693 }, { "epoch": 0.46382725053318236, "grad_norm": 0.32109957933425903, "learning_rate": 0.00010725639792562941, "loss": 1.3709, "step": 35694 }, { "epoch": 0.46384024507709826, "grad_norm": 0.4745495617389679, "learning_rate": 0.00010725379846371802, "loss": 1.3546, "step": 35695 }, { "epoch": 0.4638532396210141, "grad_norm": 0.3927459120750427, "learning_rate": 0.00010725119900180662, "loss": 1.4778, "step": 35696 }, { "epoch": 0.46386623416493, "grad_norm": 0.42750415205955505, "learning_rate": 0.00010724859953989526, "loss": 1.5684, "step": 35697 }, { "epoch": 0.46387922870884585, "grad_norm": 0.2993466854095459, "learning_rate": 0.00010724600007798387, "loss": 1.2642, "step": 35698 }, { "epoch": 0.46389222325276175, "grad_norm": 0.40829238295555115, "learning_rate": 0.00010724340061607248, "loss": 1.4845, "step": 35699 }, { "epoch": 0.4639052177966776, "grad_norm": 0.4349399209022522, "learning_rate": 0.00010724080115416109, "loss": 1.3762, "step": 35700 }, { "epoch": 0.4639182123405935, "grad_norm": 0.44317662715911865, "learning_rate": 0.00010723820169224971, "loss": 1.5711, "step": 35701 }, { "epoch": 0.46393120688450934, "grad_norm": 0.4665480852127075, "learning_rate": 0.00010723560223033832, "loss": 1.4481, "step": 35702 }, { "epoch": 0.46394420142842524, "grad_norm": 0.37910211086273193, "learning_rate": 0.00010723300276842693, "loss": 1.3125, "step": 35703 }, { "epoch": 0.46395719597234114, "grad_norm": 0.3588291108608246, "learning_rate": 0.00010723040330651555, "loss": 1.4044, "step": 35704 }, { "epoch": 0.463970190516257, "grad_norm": 0.32809385657310486, "learning_rate": 0.00010722780384460418, "loss": 1.2349, "step": 35705 }, { "epoch": 0.4639831850601729, "grad_norm": 0.38716432452201843, "learning_rate": 0.0001072252043826928, "loss": 1.4886, "step": 35706 }, { "epoch": 0.46399617960408873, "grad_norm": 0.3898129165172577, "learning_rate": 0.0001072226049207814, "loss": 1.4676, "step": 35707 }, { "epoch": 0.46400917414800463, "grad_norm": 0.4012194275856018, "learning_rate": 0.00010722000545887002, "loss": 1.4584, "step": 35708 }, { "epoch": 0.4640221686919205, "grad_norm": 0.3880549967288971, "learning_rate": 0.00010721740599695864, "loss": 1.3998, "step": 35709 }, { "epoch": 0.4640351632358364, "grad_norm": 0.33373868465423584, "learning_rate": 0.00010721480653504725, "loss": 1.3762, "step": 35710 }, { "epoch": 0.4640481577797522, "grad_norm": 0.4542563855648041, "learning_rate": 0.00010721220707313586, "loss": 1.5327, "step": 35711 }, { "epoch": 0.4640611523236681, "grad_norm": 0.3873993456363678, "learning_rate": 0.00010720960761122447, "loss": 1.3017, "step": 35712 }, { "epoch": 0.46407414686758397, "grad_norm": 0.4531840980052948, "learning_rate": 0.0001072070081493131, "loss": 1.3204, "step": 35713 }, { "epoch": 0.46408714141149987, "grad_norm": 0.33776605129241943, "learning_rate": 0.00010720440868740171, "loss": 1.6071, "step": 35714 }, { "epoch": 0.4641001359554157, "grad_norm": 0.4142756760120392, "learning_rate": 0.00010720180922549032, "loss": 1.3591, "step": 35715 }, { "epoch": 0.4641131304993316, "grad_norm": 0.36697250604629517, "learning_rate": 0.00010719920976357893, "loss": 1.3722, "step": 35716 }, { "epoch": 0.46412612504324746, "grad_norm": 0.345456600189209, "learning_rate": 0.00010719661030166757, "loss": 1.3067, "step": 35717 }, { "epoch": 0.46413911958716336, "grad_norm": 0.3545323610305786, "learning_rate": 0.00010719401083975618, "loss": 1.4086, "step": 35718 }, { "epoch": 0.4641521141310792, "grad_norm": 0.38930609822273254, "learning_rate": 0.00010719141137784479, "loss": 1.4761, "step": 35719 }, { "epoch": 0.4641651086749951, "grad_norm": 0.40637439489364624, "learning_rate": 0.0001071888119159334, "loss": 1.6331, "step": 35720 }, { "epoch": 0.46417810321891095, "grad_norm": 0.38163551688194275, "learning_rate": 0.00010718621245402203, "loss": 1.2254, "step": 35721 }, { "epoch": 0.46419109776282685, "grad_norm": 0.4472687840461731, "learning_rate": 0.00010718361299211064, "loss": 1.4667, "step": 35722 }, { "epoch": 0.4642040923067427, "grad_norm": 0.35414546728134155, "learning_rate": 0.00010718101353019925, "loss": 1.2344, "step": 35723 }, { "epoch": 0.4642170868506586, "grad_norm": 0.4409354031085968, "learning_rate": 0.00010717841406828789, "loss": 1.5897, "step": 35724 }, { "epoch": 0.46423008139457445, "grad_norm": 0.510757565498352, "learning_rate": 0.00010717581460637648, "loss": 1.3826, "step": 35725 }, { "epoch": 0.46424307593849035, "grad_norm": 0.521203339099884, "learning_rate": 0.0001071732151444651, "loss": 1.5454, "step": 35726 }, { "epoch": 0.4642560704824062, "grad_norm": 0.3957363963127136, "learning_rate": 0.0001071706156825537, "loss": 1.5085, "step": 35727 }, { "epoch": 0.4642690650263221, "grad_norm": 0.5520777702331543, "learning_rate": 0.00010716801622064234, "loss": 1.4804, "step": 35728 }, { "epoch": 0.46428205957023794, "grad_norm": 0.32879137992858887, "learning_rate": 0.00010716541675873095, "loss": 1.4548, "step": 35729 }, { "epoch": 0.46429505411415384, "grad_norm": 0.299887478351593, "learning_rate": 0.00010716281729681957, "loss": 1.1202, "step": 35730 }, { "epoch": 0.4643080486580697, "grad_norm": 0.3300218880176544, "learning_rate": 0.00010716021783490818, "loss": 1.3384, "step": 35731 }, { "epoch": 0.4643210432019856, "grad_norm": 0.36573949456214905, "learning_rate": 0.0001071576183729968, "loss": 1.2967, "step": 35732 }, { "epoch": 0.46433403774590143, "grad_norm": 0.4675024747848511, "learning_rate": 0.00010715501891108541, "loss": 1.3292, "step": 35733 }, { "epoch": 0.46434703228981733, "grad_norm": 0.37884876132011414, "learning_rate": 0.00010715241944917402, "loss": 1.4362, "step": 35734 }, { "epoch": 0.4643600268337332, "grad_norm": 0.35983002185821533, "learning_rate": 0.00010714981998726263, "loss": 1.283, "step": 35735 }, { "epoch": 0.4643730213776491, "grad_norm": 0.32473140954971313, "learning_rate": 0.00010714722052535127, "loss": 1.2105, "step": 35736 }, { "epoch": 0.4643860159215649, "grad_norm": 0.4478898048400879, "learning_rate": 0.00010714462106343988, "loss": 1.4179, "step": 35737 }, { "epoch": 0.4643990104654808, "grad_norm": 0.37548181414604187, "learning_rate": 0.00010714202160152848, "loss": 1.4042, "step": 35738 }, { "epoch": 0.46441200500939667, "grad_norm": 0.34154483675956726, "learning_rate": 0.00010713942213961709, "loss": 1.3242, "step": 35739 }, { "epoch": 0.46442499955331257, "grad_norm": 0.5131894946098328, "learning_rate": 0.00010713682267770573, "loss": 1.3617, "step": 35740 }, { "epoch": 0.4644379940972284, "grad_norm": 0.40826016664505005, "learning_rate": 0.00010713422321579434, "loss": 1.5178, "step": 35741 }, { "epoch": 0.4644509886411443, "grad_norm": 0.5026918649673462, "learning_rate": 0.00010713162375388295, "loss": 1.4838, "step": 35742 }, { "epoch": 0.46446398318506016, "grad_norm": 0.394258052110672, "learning_rate": 0.00010712902429197156, "loss": 1.2799, "step": 35743 }, { "epoch": 0.46447697772897606, "grad_norm": 0.3919675350189209, "learning_rate": 0.00010712642483006019, "loss": 1.493, "step": 35744 }, { "epoch": 0.4644899722728919, "grad_norm": 0.4354751408100128, "learning_rate": 0.0001071238253681488, "loss": 1.1643, "step": 35745 }, { "epoch": 0.4645029668168078, "grad_norm": 0.45295071601867676, "learning_rate": 0.00010712122590623741, "loss": 1.4635, "step": 35746 }, { "epoch": 0.46451596136072365, "grad_norm": 0.4486791491508484, "learning_rate": 0.00010711862644432602, "loss": 1.4049, "step": 35747 }, { "epoch": 0.46452895590463955, "grad_norm": 0.505698561668396, "learning_rate": 0.00010711602698241466, "loss": 1.4294, "step": 35748 }, { "epoch": 0.4645419504485554, "grad_norm": 0.4510813355445862, "learning_rate": 0.00010711342752050327, "loss": 1.3102, "step": 35749 }, { "epoch": 0.4645549449924713, "grad_norm": 0.3974493145942688, "learning_rate": 0.00010711082805859188, "loss": 1.3553, "step": 35750 }, { "epoch": 0.46456793953638714, "grad_norm": 0.40255334973335266, "learning_rate": 0.00010710822859668048, "loss": 1.2446, "step": 35751 }, { "epoch": 0.46458093408030304, "grad_norm": 0.4307808578014374, "learning_rate": 0.00010710562913476911, "loss": 1.4597, "step": 35752 }, { "epoch": 0.4645939286242189, "grad_norm": 0.33516693115234375, "learning_rate": 0.00010710302967285773, "loss": 1.3647, "step": 35753 }, { "epoch": 0.4646069231681348, "grad_norm": 0.43638190627098083, "learning_rate": 0.00010710043021094634, "loss": 1.6564, "step": 35754 }, { "epoch": 0.46461991771205063, "grad_norm": 0.3485713303089142, "learning_rate": 0.00010709783074903495, "loss": 1.4224, "step": 35755 }, { "epoch": 0.46463291225596653, "grad_norm": 0.35191163420677185, "learning_rate": 0.00010709523128712357, "loss": 1.1552, "step": 35756 }, { "epoch": 0.4646459067998824, "grad_norm": 0.3450961709022522, "learning_rate": 0.00010709263182521218, "loss": 1.5828, "step": 35757 }, { "epoch": 0.4646589013437983, "grad_norm": 0.43623632192611694, "learning_rate": 0.0001070900323633008, "loss": 1.5343, "step": 35758 }, { "epoch": 0.4646718958877141, "grad_norm": 0.26587674021720886, "learning_rate": 0.00010708743290138943, "loss": 1.2452, "step": 35759 }, { "epoch": 0.46468489043163, "grad_norm": 0.2967045307159424, "learning_rate": 0.00010708483343947804, "loss": 1.2431, "step": 35760 }, { "epoch": 0.46469788497554587, "grad_norm": 0.36637943983078003, "learning_rate": 0.00010708223397756665, "loss": 1.5139, "step": 35761 }, { "epoch": 0.4647108795194618, "grad_norm": 0.3731740415096283, "learning_rate": 0.00010707963451565526, "loss": 1.1725, "step": 35762 }, { "epoch": 0.4647238740633776, "grad_norm": 0.38088053464889526, "learning_rate": 0.00010707703505374389, "loss": 1.4296, "step": 35763 }, { "epoch": 0.4647368686072935, "grad_norm": 0.44334372878074646, "learning_rate": 0.0001070744355918325, "loss": 1.3919, "step": 35764 }, { "epoch": 0.46474986315120936, "grad_norm": 0.27963680028915405, "learning_rate": 0.00010707183612992111, "loss": 1.5254, "step": 35765 }, { "epoch": 0.46476285769512526, "grad_norm": 0.4246121048927307, "learning_rate": 0.00010706923666800972, "loss": 1.3882, "step": 35766 }, { "epoch": 0.4647758522390411, "grad_norm": 0.4229174852371216, "learning_rate": 0.00010706663720609835, "loss": 1.3092, "step": 35767 }, { "epoch": 0.464788846782957, "grad_norm": 0.3841497004032135, "learning_rate": 0.00010706403774418696, "loss": 1.4034, "step": 35768 }, { "epoch": 0.46480184132687286, "grad_norm": 0.4514086842536926, "learning_rate": 0.00010706143828227557, "loss": 1.3607, "step": 35769 }, { "epoch": 0.46481483587078876, "grad_norm": 0.4733964204788208, "learning_rate": 0.00010705883882036418, "loss": 1.5485, "step": 35770 }, { "epoch": 0.4648278304147046, "grad_norm": 0.3369889557361603, "learning_rate": 0.00010705623935845282, "loss": 1.2684, "step": 35771 }, { "epoch": 0.4648408249586205, "grad_norm": 0.3376230001449585, "learning_rate": 0.00010705363989654143, "loss": 1.314, "step": 35772 }, { "epoch": 0.46485381950253635, "grad_norm": 0.38799983263015747, "learning_rate": 0.00010705104043463004, "loss": 1.1607, "step": 35773 }, { "epoch": 0.46486681404645225, "grad_norm": 0.361493319272995, "learning_rate": 0.00010704844097271865, "loss": 1.3729, "step": 35774 }, { "epoch": 0.4648798085903681, "grad_norm": 0.40936052799224854, "learning_rate": 0.00010704584151080727, "loss": 1.6361, "step": 35775 }, { "epoch": 0.464892803134284, "grad_norm": 0.37491458654403687, "learning_rate": 0.00010704324204889589, "loss": 1.451, "step": 35776 }, { "epoch": 0.46490579767819984, "grad_norm": 0.40805554389953613, "learning_rate": 0.0001070406425869845, "loss": 1.3071, "step": 35777 }, { "epoch": 0.46491879222211574, "grad_norm": 0.47869864106178284, "learning_rate": 0.00010703804312507311, "loss": 1.4213, "step": 35778 }, { "epoch": 0.4649317867660316, "grad_norm": 0.4659895896911621, "learning_rate": 0.00010703544366316174, "loss": 1.3119, "step": 35779 }, { "epoch": 0.4649447813099475, "grad_norm": 0.3218241333961487, "learning_rate": 0.00010703284420125034, "loss": 1.4794, "step": 35780 }, { "epoch": 0.4649577758538634, "grad_norm": 0.3543262481689453, "learning_rate": 0.00010703024473933895, "loss": 1.3566, "step": 35781 }, { "epoch": 0.46497077039777923, "grad_norm": 0.4492974281311035, "learning_rate": 0.00010702764527742756, "loss": 1.4828, "step": 35782 }, { "epoch": 0.46498376494169513, "grad_norm": 0.39959627389907837, "learning_rate": 0.0001070250458155162, "loss": 1.3616, "step": 35783 }, { "epoch": 0.464996759485611, "grad_norm": 0.36129021644592285, "learning_rate": 0.00010702244635360481, "loss": 1.3493, "step": 35784 }, { "epoch": 0.4650097540295269, "grad_norm": 0.3807162642478943, "learning_rate": 0.00010701984689169342, "loss": 1.4973, "step": 35785 }, { "epoch": 0.4650227485734427, "grad_norm": 0.38066765666007996, "learning_rate": 0.00010701724742978204, "loss": 1.3361, "step": 35786 }, { "epoch": 0.4650357431173586, "grad_norm": 0.3309139013290405, "learning_rate": 0.00010701464796787066, "loss": 1.5189, "step": 35787 }, { "epoch": 0.46504873766127447, "grad_norm": 0.3651766777038574, "learning_rate": 0.00010701204850595927, "loss": 1.5088, "step": 35788 }, { "epoch": 0.46506173220519037, "grad_norm": 0.4041731655597687, "learning_rate": 0.00010700944904404788, "loss": 1.4147, "step": 35789 }, { "epoch": 0.4650747267491062, "grad_norm": 0.3698086738586426, "learning_rate": 0.00010700684958213649, "loss": 1.4646, "step": 35790 }, { "epoch": 0.4650877212930221, "grad_norm": 0.49434903264045715, "learning_rate": 0.00010700425012022513, "loss": 1.3139, "step": 35791 }, { "epoch": 0.46510071583693796, "grad_norm": 0.39973777532577515, "learning_rate": 0.00010700165065831374, "loss": 1.2901, "step": 35792 }, { "epoch": 0.46511371038085386, "grad_norm": 0.5106237530708313, "learning_rate": 0.00010699905119640234, "loss": 1.3648, "step": 35793 }, { "epoch": 0.4651267049247697, "grad_norm": 0.43667319416999817, "learning_rate": 0.00010699645173449095, "loss": 1.4993, "step": 35794 }, { "epoch": 0.4651396994686856, "grad_norm": 0.3264627754688263, "learning_rate": 0.00010699385227257959, "loss": 1.4228, "step": 35795 }, { "epoch": 0.46515269401260145, "grad_norm": 0.4814237654209137, "learning_rate": 0.0001069912528106682, "loss": 1.4044, "step": 35796 }, { "epoch": 0.46516568855651735, "grad_norm": 0.29576122760772705, "learning_rate": 0.00010698865334875681, "loss": 1.0928, "step": 35797 }, { "epoch": 0.4651786831004332, "grad_norm": 0.41525232791900635, "learning_rate": 0.00010698605388684543, "loss": 1.53, "step": 35798 }, { "epoch": 0.4651916776443491, "grad_norm": 0.4054959714412689, "learning_rate": 0.00010698345442493404, "loss": 1.4397, "step": 35799 }, { "epoch": 0.46520467218826494, "grad_norm": 0.36523696780204773, "learning_rate": 0.00010698085496302266, "loss": 1.5171, "step": 35800 }, { "epoch": 0.46521766673218085, "grad_norm": 0.347986102104187, "learning_rate": 0.00010697825550111127, "loss": 1.4319, "step": 35801 }, { "epoch": 0.4652306612760967, "grad_norm": 0.42010366916656494, "learning_rate": 0.0001069756560391999, "loss": 1.5793, "step": 35802 }, { "epoch": 0.4652436558200126, "grad_norm": 0.36897042393684387, "learning_rate": 0.00010697305657728852, "loss": 1.4585, "step": 35803 }, { "epoch": 0.46525665036392844, "grad_norm": 0.35504722595214844, "learning_rate": 0.00010697045711537713, "loss": 1.3805, "step": 35804 }, { "epoch": 0.46526964490784434, "grad_norm": 0.3372567892074585, "learning_rate": 0.00010696785765346572, "loss": 1.3338, "step": 35805 }, { "epoch": 0.4652826394517602, "grad_norm": 0.35057783126831055, "learning_rate": 0.00010696525819155436, "loss": 1.3047, "step": 35806 }, { "epoch": 0.4652956339956761, "grad_norm": 0.47402527928352356, "learning_rate": 0.00010696265872964297, "loss": 1.3457, "step": 35807 }, { "epoch": 0.46530862853959193, "grad_norm": 0.2996128797531128, "learning_rate": 0.00010696005926773158, "loss": 1.417, "step": 35808 }, { "epoch": 0.46532162308350783, "grad_norm": 0.39967092871665955, "learning_rate": 0.0001069574598058202, "loss": 1.336, "step": 35809 }, { "epoch": 0.4653346176274237, "grad_norm": 0.34992873668670654, "learning_rate": 0.00010695486034390882, "loss": 1.4117, "step": 35810 }, { "epoch": 0.4653476121713396, "grad_norm": 0.359475702047348, "learning_rate": 0.00010695226088199743, "loss": 1.3689, "step": 35811 }, { "epoch": 0.4653606067152554, "grad_norm": 0.45986121892929077, "learning_rate": 0.00010694966142008604, "loss": 1.3775, "step": 35812 }, { "epoch": 0.4653736012591713, "grad_norm": 0.3535115718841553, "learning_rate": 0.00010694706195817465, "loss": 1.5041, "step": 35813 }, { "epoch": 0.46538659580308717, "grad_norm": 0.34997013211250305, "learning_rate": 0.00010694446249626329, "loss": 1.2499, "step": 35814 }, { "epoch": 0.46539959034700307, "grad_norm": 0.4915198087692261, "learning_rate": 0.0001069418630343519, "loss": 1.4018, "step": 35815 }, { "epoch": 0.4654125848909189, "grad_norm": 0.35682377219200134, "learning_rate": 0.00010693926357244051, "loss": 1.5198, "step": 35816 }, { "epoch": 0.4654255794348348, "grad_norm": 0.41563135385513306, "learning_rate": 0.00010693666411052912, "loss": 1.35, "step": 35817 }, { "epoch": 0.46543857397875066, "grad_norm": 0.34668147563934326, "learning_rate": 0.00010693406464861775, "loss": 1.4723, "step": 35818 }, { "epoch": 0.46545156852266656, "grad_norm": 0.4953675866127014, "learning_rate": 0.00010693146518670636, "loss": 1.2904, "step": 35819 }, { "epoch": 0.4654645630665824, "grad_norm": 0.3268541395664215, "learning_rate": 0.00010692886572479497, "loss": 1.2884, "step": 35820 }, { "epoch": 0.4654775576104983, "grad_norm": 0.34770333766937256, "learning_rate": 0.00010692626626288358, "loss": 1.3666, "step": 35821 }, { "epoch": 0.46549055215441415, "grad_norm": 0.40397119522094727, "learning_rate": 0.0001069236668009722, "loss": 1.319, "step": 35822 }, { "epoch": 0.46550354669833005, "grad_norm": 0.5016922354698181, "learning_rate": 0.00010692106733906082, "loss": 1.3252, "step": 35823 }, { "epoch": 0.4655165412422459, "grad_norm": 0.3737371265888214, "learning_rate": 0.00010691846787714943, "loss": 1.4927, "step": 35824 }, { "epoch": 0.4655295357861618, "grad_norm": 0.3321307897567749, "learning_rate": 0.00010691586841523804, "loss": 1.3554, "step": 35825 }, { "epoch": 0.46554253033007764, "grad_norm": 0.4948640763759613, "learning_rate": 0.00010691326895332668, "loss": 1.492, "step": 35826 }, { "epoch": 0.46555552487399354, "grad_norm": 0.4327353239059448, "learning_rate": 0.00010691066949141529, "loss": 1.3797, "step": 35827 }, { "epoch": 0.4655685194179094, "grad_norm": 0.4067939221858978, "learning_rate": 0.0001069080700295039, "loss": 1.5799, "step": 35828 }, { "epoch": 0.4655815139618253, "grad_norm": 0.2698366343975067, "learning_rate": 0.00010690547056759251, "loss": 1.2538, "step": 35829 }, { "epoch": 0.46559450850574113, "grad_norm": 0.3806721270084381, "learning_rate": 0.00010690287110568113, "loss": 1.4179, "step": 35830 }, { "epoch": 0.46560750304965703, "grad_norm": 0.3619813621044159, "learning_rate": 0.00010690027164376974, "loss": 1.1798, "step": 35831 }, { "epoch": 0.4656204975935729, "grad_norm": 0.393474817276001, "learning_rate": 0.00010689767218185835, "loss": 1.2657, "step": 35832 }, { "epoch": 0.4656334921374888, "grad_norm": 0.4190252721309662, "learning_rate": 0.00010689507271994699, "loss": 1.3939, "step": 35833 }, { "epoch": 0.4656464866814046, "grad_norm": 0.37279942631721497, "learning_rate": 0.0001068924732580356, "loss": 1.2785, "step": 35834 }, { "epoch": 0.4656594812253205, "grad_norm": 0.4239563047885895, "learning_rate": 0.0001068898737961242, "loss": 1.4672, "step": 35835 }, { "epoch": 0.46567247576923637, "grad_norm": 0.4341200292110443, "learning_rate": 0.00010688727433421281, "loss": 1.4673, "step": 35836 }, { "epoch": 0.46568547031315227, "grad_norm": 0.49018964171409607, "learning_rate": 0.00010688467487230145, "loss": 1.4178, "step": 35837 }, { "epoch": 0.4656984648570681, "grad_norm": 0.38861218094825745, "learning_rate": 0.00010688207541039006, "loss": 1.2747, "step": 35838 }, { "epoch": 0.465711459400984, "grad_norm": 0.4218166768550873, "learning_rate": 0.00010687947594847867, "loss": 1.5383, "step": 35839 }, { "epoch": 0.46572445394489986, "grad_norm": 0.8024581670761108, "learning_rate": 0.00010687687648656728, "loss": 1.3643, "step": 35840 }, { "epoch": 0.46573744848881576, "grad_norm": 0.400626003742218, "learning_rate": 0.00010687427702465591, "loss": 1.3545, "step": 35841 }, { "epoch": 0.4657504430327316, "grad_norm": 0.37545350193977356, "learning_rate": 0.00010687167756274452, "loss": 1.3793, "step": 35842 }, { "epoch": 0.4657634375766475, "grad_norm": 0.3697686791419983, "learning_rate": 0.00010686907810083313, "loss": 1.268, "step": 35843 }, { "epoch": 0.46577643212056336, "grad_norm": 0.2834019958972931, "learning_rate": 0.00010686647863892174, "loss": 1.3754, "step": 35844 }, { "epoch": 0.46578942666447926, "grad_norm": 0.3353450298309326, "learning_rate": 0.00010686387917701038, "loss": 1.5451, "step": 35845 }, { "epoch": 0.4658024212083951, "grad_norm": 0.34883835911750793, "learning_rate": 0.00010686127971509899, "loss": 1.3699, "step": 35846 }, { "epoch": 0.465815415752311, "grad_norm": 0.3946426212787628, "learning_rate": 0.00010685868025318759, "loss": 1.328, "step": 35847 }, { "epoch": 0.46582841029622685, "grad_norm": 0.4300050735473633, "learning_rate": 0.0001068560807912762, "loss": 1.4763, "step": 35848 }, { "epoch": 0.46584140484014275, "grad_norm": 0.32959356904029846, "learning_rate": 0.00010685348132936484, "loss": 1.4269, "step": 35849 }, { "epoch": 0.4658543993840586, "grad_norm": 0.37990960478782654, "learning_rate": 0.00010685088186745345, "loss": 1.3496, "step": 35850 }, { "epoch": 0.4658673939279745, "grad_norm": 0.34632784128189087, "learning_rate": 0.00010684828240554206, "loss": 1.2209, "step": 35851 }, { "epoch": 0.46588038847189034, "grad_norm": 0.3987644910812378, "learning_rate": 0.00010684568294363067, "loss": 1.3843, "step": 35852 }, { "epoch": 0.46589338301580624, "grad_norm": 0.29834672808647156, "learning_rate": 0.00010684308348171929, "loss": 1.2073, "step": 35853 }, { "epoch": 0.4659063775597221, "grad_norm": 0.40767669677734375, "learning_rate": 0.0001068404840198079, "loss": 1.4338, "step": 35854 }, { "epoch": 0.465919372103638, "grad_norm": 0.3947330713272095, "learning_rate": 0.00010683788455789651, "loss": 1.2716, "step": 35855 }, { "epoch": 0.46593236664755383, "grad_norm": 0.35361751914024353, "learning_rate": 0.00010683528509598513, "loss": 1.4845, "step": 35856 }, { "epoch": 0.46594536119146973, "grad_norm": 0.39010900259017944, "learning_rate": 0.00010683268563407376, "loss": 1.4867, "step": 35857 }, { "epoch": 0.46595835573538563, "grad_norm": 0.3032688796520233, "learning_rate": 0.00010683008617216237, "loss": 1.2686, "step": 35858 }, { "epoch": 0.4659713502793015, "grad_norm": 0.3854002356529236, "learning_rate": 0.00010682748671025099, "loss": 1.6548, "step": 35859 }, { "epoch": 0.4659843448232174, "grad_norm": 0.39423030614852905, "learning_rate": 0.00010682488724833958, "loss": 1.4078, "step": 35860 }, { "epoch": 0.4659973393671332, "grad_norm": 0.36734601855278015, "learning_rate": 0.00010682228778642822, "loss": 1.4832, "step": 35861 }, { "epoch": 0.4660103339110491, "grad_norm": 0.42032724618911743, "learning_rate": 0.00010681968832451683, "loss": 1.4374, "step": 35862 }, { "epoch": 0.46602332845496497, "grad_norm": 0.3666577935218811, "learning_rate": 0.00010681708886260544, "loss": 1.4527, "step": 35863 }, { "epoch": 0.46603632299888087, "grad_norm": 0.3579496145248413, "learning_rate": 0.00010681448940069405, "loss": 1.13, "step": 35864 }, { "epoch": 0.4660493175427967, "grad_norm": 0.30104896426200867, "learning_rate": 0.00010681188993878268, "loss": 1.4693, "step": 35865 }, { "epoch": 0.4660623120867126, "grad_norm": 0.44286391139030457, "learning_rate": 0.00010680929047687129, "loss": 1.3679, "step": 35866 }, { "epoch": 0.46607530663062846, "grad_norm": 0.3589864671230316, "learning_rate": 0.0001068066910149599, "loss": 1.5356, "step": 35867 }, { "epoch": 0.46608830117454436, "grad_norm": 0.3279612362384796, "learning_rate": 0.00010680409155304851, "loss": 1.3134, "step": 35868 }, { "epoch": 0.4661012957184602, "grad_norm": 0.3832318186759949, "learning_rate": 0.00010680149209113715, "loss": 1.1652, "step": 35869 }, { "epoch": 0.4661142902623761, "grad_norm": 0.3821352422237396, "learning_rate": 0.00010679889262922576, "loss": 1.4673, "step": 35870 }, { "epoch": 0.46612728480629195, "grad_norm": 0.3807497024536133, "learning_rate": 0.00010679629316731437, "loss": 1.4372, "step": 35871 }, { "epoch": 0.46614027935020785, "grad_norm": 0.4463353753089905, "learning_rate": 0.000106793693705403, "loss": 1.2001, "step": 35872 }, { "epoch": 0.4661532738941237, "grad_norm": 0.4181680679321289, "learning_rate": 0.0001067910942434916, "loss": 1.4157, "step": 35873 }, { "epoch": 0.4661662684380396, "grad_norm": 0.4164464771747589, "learning_rate": 0.00010678849478158022, "loss": 1.3284, "step": 35874 }, { "epoch": 0.46617926298195544, "grad_norm": 0.2525698244571686, "learning_rate": 0.00010678589531966883, "loss": 1.1925, "step": 35875 }, { "epoch": 0.46619225752587135, "grad_norm": 0.311665803194046, "learning_rate": 0.00010678329585775745, "loss": 1.3553, "step": 35876 }, { "epoch": 0.4662052520697872, "grad_norm": 0.42646273970603943, "learning_rate": 0.00010678069639584606, "loss": 1.3552, "step": 35877 }, { "epoch": 0.4662182466137031, "grad_norm": 0.43143296241760254, "learning_rate": 0.00010677809693393467, "loss": 1.4818, "step": 35878 }, { "epoch": 0.46623124115761894, "grad_norm": 0.39880892634391785, "learning_rate": 0.00010677549747202329, "loss": 1.217, "step": 35879 }, { "epoch": 0.46624423570153484, "grad_norm": 0.30916833877563477, "learning_rate": 0.00010677289801011192, "loss": 1.1269, "step": 35880 }, { "epoch": 0.4662572302454507, "grad_norm": 0.4107491672039032, "learning_rate": 0.00010677029854820053, "loss": 1.4896, "step": 35881 }, { "epoch": 0.4662702247893666, "grad_norm": 0.5182363390922546, "learning_rate": 0.00010676769908628915, "loss": 1.415, "step": 35882 }, { "epoch": 0.46628321933328243, "grad_norm": 0.41161927580833435, "learning_rate": 0.00010676509962437776, "loss": 1.3017, "step": 35883 }, { "epoch": 0.46629621387719833, "grad_norm": 0.5144016146659851, "learning_rate": 0.00010676250016246638, "loss": 1.4735, "step": 35884 }, { "epoch": 0.4663092084211142, "grad_norm": 0.39478158950805664, "learning_rate": 0.00010675990070055499, "loss": 1.4625, "step": 35885 }, { "epoch": 0.4663222029650301, "grad_norm": 0.36517956852912903, "learning_rate": 0.0001067573012386436, "loss": 1.3703, "step": 35886 }, { "epoch": 0.4663351975089459, "grad_norm": 0.3599850833415985, "learning_rate": 0.00010675470177673221, "loss": 1.3172, "step": 35887 }, { "epoch": 0.4663481920528618, "grad_norm": 0.3892595171928406, "learning_rate": 0.00010675210231482085, "loss": 1.291, "step": 35888 }, { "epoch": 0.46636118659677767, "grad_norm": 0.35782092809677124, "learning_rate": 0.00010674950285290945, "loss": 1.345, "step": 35889 }, { "epoch": 0.46637418114069357, "grad_norm": 0.4267702102661133, "learning_rate": 0.00010674690339099806, "loss": 1.2876, "step": 35890 }, { "epoch": 0.4663871756846094, "grad_norm": 0.4545315206050873, "learning_rate": 0.00010674430392908667, "loss": 1.4805, "step": 35891 }, { "epoch": 0.4664001702285253, "grad_norm": 0.3772906959056854, "learning_rate": 0.00010674170446717531, "loss": 1.3717, "step": 35892 }, { "epoch": 0.46641316477244116, "grad_norm": 0.609588086605072, "learning_rate": 0.00010673910500526392, "loss": 1.5314, "step": 35893 }, { "epoch": 0.46642615931635706, "grad_norm": 0.4107741117477417, "learning_rate": 0.00010673650554335253, "loss": 1.408, "step": 35894 }, { "epoch": 0.4664391538602729, "grad_norm": 0.34701094031333923, "learning_rate": 0.00010673390608144114, "loss": 1.5644, "step": 35895 }, { "epoch": 0.4664521484041888, "grad_norm": 0.4221905469894409, "learning_rate": 0.00010673130661952977, "loss": 1.3026, "step": 35896 }, { "epoch": 0.46646514294810465, "grad_norm": 0.490134596824646, "learning_rate": 0.00010672870715761838, "loss": 1.5241, "step": 35897 }, { "epoch": 0.46647813749202055, "grad_norm": 0.4115753471851349, "learning_rate": 0.00010672610769570699, "loss": 1.4513, "step": 35898 }, { "epoch": 0.4664911320359364, "grad_norm": 0.38451868295669556, "learning_rate": 0.0001067235082337956, "loss": 1.3489, "step": 35899 }, { "epoch": 0.4665041265798523, "grad_norm": 0.43244633078575134, "learning_rate": 0.00010672090877188424, "loss": 1.371, "step": 35900 }, { "epoch": 0.46651712112376814, "grad_norm": 0.36380696296691895, "learning_rate": 0.00010671830930997285, "loss": 1.4971, "step": 35901 }, { "epoch": 0.46653011566768404, "grad_norm": 0.4719443619251251, "learning_rate": 0.00010671570984806145, "loss": 1.6278, "step": 35902 }, { "epoch": 0.4665431102115999, "grad_norm": 0.344825804233551, "learning_rate": 0.00010671311038615006, "loss": 1.3417, "step": 35903 }, { "epoch": 0.4665561047555158, "grad_norm": 0.40015196800231934, "learning_rate": 0.0001067105109242387, "loss": 1.3532, "step": 35904 }, { "epoch": 0.46656909929943163, "grad_norm": 0.45742979645729065, "learning_rate": 0.0001067079114623273, "loss": 1.5532, "step": 35905 }, { "epoch": 0.46658209384334753, "grad_norm": 0.33324095606803894, "learning_rate": 0.00010670531200041592, "loss": 1.3681, "step": 35906 }, { "epoch": 0.4665950883872634, "grad_norm": 0.1700827032327652, "learning_rate": 0.00010670271253850454, "loss": 1.2759, "step": 35907 }, { "epoch": 0.4666080829311793, "grad_norm": 0.3229829967021942, "learning_rate": 0.00010670011307659315, "loss": 1.3664, "step": 35908 }, { "epoch": 0.4666210774750951, "grad_norm": 0.539949893951416, "learning_rate": 0.00010669751361468176, "loss": 1.4729, "step": 35909 }, { "epoch": 0.466634072019011, "grad_norm": 0.42248407006263733, "learning_rate": 0.00010669491415277037, "loss": 1.4785, "step": 35910 }, { "epoch": 0.46664706656292687, "grad_norm": 0.42746496200561523, "learning_rate": 0.00010669231469085901, "loss": 1.3603, "step": 35911 }, { "epoch": 0.46666006110684277, "grad_norm": 0.3592301309108734, "learning_rate": 0.00010668971522894762, "loss": 1.2564, "step": 35912 }, { "epoch": 0.4666730556507586, "grad_norm": 0.42517951130867004, "learning_rate": 0.00010668711576703623, "loss": 1.3646, "step": 35913 }, { "epoch": 0.4666860501946745, "grad_norm": 0.4356907308101654, "learning_rate": 0.00010668451630512484, "loss": 1.5278, "step": 35914 }, { "epoch": 0.46669904473859036, "grad_norm": 0.3531091809272766, "learning_rate": 0.00010668191684321347, "loss": 1.5002, "step": 35915 }, { "epoch": 0.46671203928250626, "grad_norm": 0.35297533869743347, "learning_rate": 0.00010667931738130208, "loss": 1.6151, "step": 35916 }, { "epoch": 0.4667250338264221, "grad_norm": 0.41558703780174255, "learning_rate": 0.00010667671791939069, "loss": 1.4992, "step": 35917 }, { "epoch": 0.466738028370338, "grad_norm": 0.4532746970653534, "learning_rate": 0.0001066741184574793, "loss": 1.223, "step": 35918 }, { "epoch": 0.46675102291425385, "grad_norm": 0.3878476917743683, "learning_rate": 0.00010667151899556793, "loss": 1.3433, "step": 35919 }, { "epoch": 0.46676401745816976, "grad_norm": 0.43468937277793884, "learning_rate": 0.00010666891953365654, "loss": 1.2907, "step": 35920 }, { "epoch": 0.4667770120020856, "grad_norm": 0.3536732494831085, "learning_rate": 0.00010666632007174515, "loss": 1.4719, "step": 35921 }, { "epoch": 0.4667900065460015, "grad_norm": 0.47575825452804565, "learning_rate": 0.00010666372060983376, "loss": 1.6255, "step": 35922 }, { "epoch": 0.46680300108991735, "grad_norm": 0.4070780575275421, "learning_rate": 0.0001066611211479224, "loss": 1.5798, "step": 35923 }, { "epoch": 0.46681599563383325, "grad_norm": 0.41202831268310547, "learning_rate": 0.00010665852168601101, "loss": 1.3252, "step": 35924 }, { "epoch": 0.4668289901777491, "grad_norm": 0.3353930115699768, "learning_rate": 0.00010665592222409962, "loss": 1.3818, "step": 35925 }, { "epoch": 0.466841984721665, "grad_norm": 0.3823460042476654, "learning_rate": 0.00010665332276218823, "loss": 1.4417, "step": 35926 }, { "epoch": 0.46685497926558084, "grad_norm": 0.4184887707233429, "learning_rate": 0.00010665072330027685, "loss": 1.4133, "step": 35927 }, { "epoch": 0.46686797380949674, "grad_norm": 0.3773918151855469, "learning_rate": 0.00010664812383836546, "loss": 1.3159, "step": 35928 }, { "epoch": 0.4668809683534126, "grad_norm": 0.4540043771266937, "learning_rate": 0.00010664552437645408, "loss": 1.3884, "step": 35929 }, { "epoch": 0.4668939628973285, "grad_norm": 0.46505650877952576, "learning_rate": 0.00010664292491454269, "loss": 1.591, "step": 35930 }, { "epoch": 0.46690695744124433, "grad_norm": 0.4571464955806732, "learning_rate": 0.00010664032545263131, "loss": 1.454, "step": 35931 }, { "epoch": 0.46691995198516023, "grad_norm": 0.3607877194881439, "learning_rate": 0.00010663772599071992, "loss": 1.2839, "step": 35932 }, { "epoch": 0.46693294652907613, "grad_norm": 0.44766515493392944, "learning_rate": 0.00010663512652880853, "loss": 1.4078, "step": 35933 }, { "epoch": 0.466945941072992, "grad_norm": 0.47576385736465454, "learning_rate": 0.00010663252706689714, "loss": 1.4613, "step": 35934 }, { "epoch": 0.4669589356169079, "grad_norm": 0.3513639271259308, "learning_rate": 0.00010662992760498578, "loss": 1.2397, "step": 35935 }, { "epoch": 0.4669719301608237, "grad_norm": 0.5041713714599609, "learning_rate": 0.00010662732814307439, "loss": 1.5897, "step": 35936 }, { "epoch": 0.4669849247047396, "grad_norm": 0.3738866448402405, "learning_rate": 0.000106624728681163, "loss": 1.4591, "step": 35937 }, { "epoch": 0.46699791924865547, "grad_norm": 0.2819693684577942, "learning_rate": 0.00010662212921925161, "loss": 1.1778, "step": 35938 }, { "epoch": 0.46701091379257137, "grad_norm": 0.36967772245407104, "learning_rate": 0.00010661952975734024, "loss": 1.287, "step": 35939 }, { "epoch": 0.4670239083364872, "grad_norm": 0.29896414279937744, "learning_rate": 0.00010661693029542885, "loss": 1.4467, "step": 35940 }, { "epoch": 0.4670369028804031, "grad_norm": 0.36833497881889343, "learning_rate": 0.00010661433083351746, "loss": 1.5933, "step": 35941 }, { "epoch": 0.46704989742431896, "grad_norm": 0.36904987692832947, "learning_rate": 0.00010661173137160607, "loss": 1.4527, "step": 35942 }, { "epoch": 0.46706289196823486, "grad_norm": 0.3404006361961365, "learning_rate": 0.00010660913190969471, "loss": 1.5577, "step": 35943 }, { "epoch": 0.4670758865121507, "grad_norm": 0.3427305817604065, "learning_rate": 0.00010660653244778331, "loss": 1.3337, "step": 35944 }, { "epoch": 0.4670888810560666, "grad_norm": 0.41978099942207336, "learning_rate": 0.00010660393298587192, "loss": 1.3852, "step": 35945 }, { "epoch": 0.46710187559998245, "grad_norm": 0.40083521604537964, "learning_rate": 0.00010660133352396056, "loss": 1.3534, "step": 35946 }, { "epoch": 0.46711487014389835, "grad_norm": 0.5257160067558289, "learning_rate": 0.00010659873406204917, "loss": 1.4199, "step": 35947 }, { "epoch": 0.4671278646878142, "grad_norm": 0.43245503306388855, "learning_rate": 0.00010659613460013778, "loss": 1.4935, "step": 35948 }, { "epoch": 0.4671408592317301, "grad_norm": 0.41445133090019226, "learning_rate": 0.00010659353513822639, "loss": 1.5975, "step": 35949 }, { "epoch": 0.46715385377564594, "grad_norm": 0.3176117539405823, "learning_rate": 0.00010659093567631501, "loss": 1.407, "step": 35950 }, { "epoch": 0.46716684831956184, "grad_norm": 0.36174675822257996, "learning_rate": 0.00010658833621440362, "loss": 1.3153, "step": 35951 }, { "epoch": 0.4671798428634777, "grad_norm": 0.45502999424934387, "learning_rate": 0.00010658573675249224, "loss": 1.5634, "step": 35952 }, { "epoch": 0.4671928374073936, "grad_norm": 0.39223194122314453, "learning_rate": 0.00010658313729058085, "loss": 1.2524, "step": 35953 }, { "epoch": 0.46720583195130944, "grad_norm": 0.43161889910697937, "learning_rate": 0.00010658053782866948, "loss": 1.2831, "step": 35954 }, { "epoch": 0.46721882649522534, "grad_norm": 0.4511711299419403, "learning_rate": 0.0001065779383667581, "loss": 1.3442, "step": 35955 }, { "epoch": 0.4672318210391412, "grad_norm": 0.3932650685310364, "learning_rate": 0.0001065753389048467, "loss": 1.3912, "step": 35956 }, { "epoch": 0.4672448155830571, "grad_norm": 0.38475996255874634, "learning_rate": 0.0001065727394429353, "loss": 1.3811, "step": 35957 }, { "epoch": 0.46725781012697293, "grad_norm": 1.0559144020080566, "learning_rate": 0.00010657013998102394, "loss": 1.394, "step": 35958 }, { "epoch": 0.46727080467088883, "grad_norm": 0.4285690188407898, "learning_rate": 0.00010656754051911255, "loss": 1.2494, "step": 35959 }, { "epoch": 0.4672837992148047, "grad_norm": 0.45992931723594666, "learning_rate": 0.00010656494105720116, "loss": 1.4023, "step": 35960 }, { "epoch": 0.4672967937587206, "grad_norm": 0.4039871096611023, "learning_rate": 0.00010656234159528977, "loss": 1.4978, "step": 35961 }, { "epoch": 0.4673097883026364, "grad_norm": 0.3369520604610443, "learning_rate": 0.0001065597421333784, "loss": 1.3861, "step": 35962 }, { "epoch": 0.4673227828465523, "grad_norm": 0.3670004606246948, "learning_rate": 0.00010655714267146701, "loss": 1.4783, "step": 35963 }, { "epoch": 0.46733577739046817, "grad_norm": 0.464018315076828, "learning_rate": 0.00010655454320955562, "loss": 1.454, "step": 35964 }, { "epoch": 0.46734877193438407, "grad_norm": 0.3082347810268402, "learning_rate": 0.00010655194374764423, "loss": 1.1011, "step": 35965 }, { "epoch": 0.4673617664782999, "grad_norm": 0.46055933833122253, "learning_rate": 0.00010654934428573287, "loss": 1.3702, "step": 35966 }, { "epoch": 0.4673747610222158, "grad_norm": 0.4583047032356262, "learning_rate": 0.00010654674482382148, "loss": 1.4097, "step": 35967 }, { "epoch": 0.46738775556613166, "grad_norm": 0.34755653142929077, "learning_rate": 0.00010654414536191009, "loss": 1.2728, "step": 35968 }, { "epoch": 0.46740075011004756, "grad_norm": 0.44869697093963623, "learning_rate": 0.00010654154589999869, "loss": 1.4725, "step": 35969 }, { "epoch": 0.4674137446539634, "grad_norm": 0.3317490816116333, "learning_rate": 0.00010653894643808733, "loss": 1.244, "step": 35970 }, { "epoch": 0.4674267391978793, "grad_norm": 0.37015002965927124, "learning_rate": 0.00010653634697617594, "loss": 1.3567, "step": 35971 }, { "epoch": 0.46743973374179515, "grad_norm": 0.3949768543243408, "learning_rate": 0.00010653374751426455, "loss": 1.2916, "step": 35972 }, { "epoch": 0.46745272828571105, "grad_norm": 0.3815925121307373, "learning_rate": 0.00010653114805235316, "loss": 1.2673, "step": 35973 }, { "epoch": 0.4674657228296269, "grad_norm": 0.35038211941719055, "learning_rate": 0.00010652854859044178, "loss": 1.284, "step": 35974 }, { "epoch": 0.4674787173735428, "grad_norm": 0.3745165169239044, "learning_rate": 0.0001065259491285304, "loss": 1.3205, "step": 35975 }, { "epoch": 0.46749171191745864, "grad_norm": 0.5041611194610596, "learning_rate": 0.000106523349666619, "loss": 1.4573, "step": 35976 }, { "epoch": 0.46750470646137454, "grad_norm": 0.364639550447464, "learning_rate": 0.00010652075020470762, "loss": 1.3509, "step": 35977 }, { "epoch": 0.4675177010052904, "grad_norm": 0.3491594195365906, "learning_rate": 0.00010651815074279626, "loss": 1.1403, "step": 35978 }, { "epoch": 0.4675306955492063, "grad_norm": 0.3863477110862732, "learning_rate": 0.00010651555128088487, "loss": 1.5309, "step": 35979 }, { "epoch": 0.46754369009312213, "grad_norm": 0.41628289222717285, "learning_rate": 0.00010651295181897348, "loss": 1.5199, "step": 35980 }, { "epoch": 0.46755668463703803, "grad_norm": 0.41859912872314453, "learning_rate": 0.0001065103523570621, "loss": 1.3811, "step": 35981 }, { "epoch": 0.4675696791809539, "grad_norm": 0.3865775465965271, "learning_rate": 0.00010650775289515071, "loss": 1.332, "step": 35982 }, { "epoch": 0.4675826737248698, "grad_norm": 0.32492291927337646, "learning_rate": 0.00010650515343323932, "loss": 1.3899, "step": 35983 }, { "epoch": 0.4675956682687856, "grad_norm": 0.3021368384361267, "learning_rate": 0.00010650255397132793, "loss": 1.2621, "step": 35984 }, { "epoch": 0.4676086628127015, "grad_norm": 0.3440239727497101, "learning_rate": 0.00010649995450941657, "loss": 1.1482, "step": 35985 }, { "epoch": 0.46762165735661737, "grad_norm": 0.4269804358482361, "learning_rate": 0.00010649735504750517, "loss": 1.5055, "step": 35986 }, { "epoch": 0.46763465190053327, "grad_norm": 0.3038952648639679, "learning_rate": 0.00010649475558559378, "loss": 1.3385, "step": 35987 }, { "epoch": 0.4676476464444491, "grad_norm": 0.44491007924079895, "learning_rate": 0.00010649215612368239, "loss": 1.3492, "step": 35988 }, { "epoch": 0.467660640988365, "grad_norm": 0.3509809672832489, "learning_rate": 0.00010648955666177103, "loss": 1.5611, "step": 35989 }, { "epoch": 0.46767363553228086, "grad_norm": 0.31764981150627136, "learning_rate": 0.00010648695719985964, "loss": 1.3228, "step": 35990 }, { "epoch": 0.46768663007619676, "grad_norm": 0.4681842625141144, "learning_rate": 0.00010648435773794825, "loss": 1.3906, "step": 35991 }, { "epoch": 0.4676996246201126, "grad_norm": 0.3917446732521057, "learning_rate": 0.00010648175827603686, "loss": 1.4447, "step": 35992 }, { "epoch": 0.4677126191640285, "grad_norm": 0.4038437306880951, "learning_rate": 0.00010647915881412549, "loss": 1.3509, "step": 35993 }, { "epoch": 0.46772561370794435, "grad_norm": 0.32946309447288513, "learning_rate": 0.0001064765593522141, "loss": 1.0545, "step": 35994 }, { "epoch": 0.46773860825186026, "grad_norm": 0.44185343384742737, "learning_rate": 0.00010647395989030271, "loss": 1.417, "step": 35995 }, { "epoch": 0.4677516027957761, "grad_norm": 0.3740565776824951, "learning_rate": 0.00010647136042839132, "loss": 1.3879, "step": 35996 }, { "epoch": 0.467764597339692, "grad_norm": 0.5356307625770569, "learning_rate": 0.00010646876096647996, "loss": 1.4336, "step": 35997 }, { "epoch": 0.46777759188360785, "grad_norm": 0.46441715955734253, "learning_rate": 0.00010646616150456857, "loss": 1.4545, "step": 35998 }, { "epoch": 0.46779058642752375, "grad_norm": 0.410014808177948, "learning_rate": 0.00010646356204265717, "loss": 1.3621, "step": 35999 }, { "epoch": 0.4678035809714396, "grad_norm": 0.4279406666755676, "learning_rate": 0.00010646096258074578, "loss": 1.2898, "step": 36000 }, { "epoch": 0.4678165755153555, "grad_norm": 0.48044198751449585, "learning_rate": 0.00010645836311883442, "loss": 1.1618, "step": 36001 }, { "epoch": 0.46782957005927134, "grad_norm": 0.45341384410858154, "learning_rate": 0.00010645576365692303, "loss": 1.4305, "step": 36002 }, { "epoch": 0.46784256460318724, "grad_norm": 0.40557536482810974, "learning_rate": 0.00010645316419501164, "loss": 1.2023, "step": 36003 }, { "epoch": 0.4678555591471031, "grad_norm": 0.3305848240852356, "learning_rate": 0.00010645056473310025, "loss": 1.3105, "step": 36004 }, { "epoch": 0.467868553691019, "grad_norm": 0.4526832401752472, "learning_rate": 0.00010644796527118887, "loss": 1.4626, "step": 36005 }, { "epoch": 0.46788154823493483, "grad_norm": 0.47868451476097107, "learning_rate": 0.00010644536580927748, "loss": 1.367, "step": 36006 }, { "epoch": 0.46789454277885073, "grad_norm": 0.4907841980457306, "learning_rate": 0.0001064427663473661, "loss": 1.3488, "step": 36007 }, { "epoch": 0.4679075373227666, "grad_norm": 0.39040088653564453, "learning_rate": 0.0001064401668854547, "loss": 1.3353, "step": 36008 }, { "epoch": 0.4679205318666825, "grad_norm": 0.37176477909088135, "learning_rate": 0.00010643756742354334, "loss": 1.2878, "step": 36009 }, { "epoch": 0.4679335264105984, "grad_norm": 0.3674590587615967, "learning_rate": 0.00010643496796163195, "loss": 1.3325, "step": 36010 }, { "epoch": 0.4679465209545142, "grad_norm": 0.33845943212509155, "learning_rate": 0.00010643236849972055, "loss": 1.459, "step": 36011 }, { "epoch": 0.4679595154984301, "grad_norm": 0.4534614086151123, "learning_rate": 0.00010642976903780916, "loss": 1.5258, "step": 36012 }, { "epoch": 0.46797251004234597, "grad_norm": 0.35866764187812805, "learning_rate": 0.0001064271695758978, "loss": 1.2132, "step": 36013 }, { "epoch": 0.46798550458626187, "grad_norm": 0.3718441426753998, "learning_rate": 0.00010642457011398641, "loss": 1.4307, "step": 36014 }, { "epoch": 0.4679984991301777, "grad_norm": 0.36010098457336426, "learning_rate": 0.00010642197065207502, "loss": 1.3033, "step": 36015 }, { "epoch": 0.4680114936740936, "grad_norm": 0.37122467160224915, "learning_rate": 0.00010641937119016363, "loss": 1.3975, "step": 36016 }, { "epoch": 0.46802448821800946, "grad_norm": 0.39414042234420776, "learning_rate": 0.00010641677172825226, "loss": 1.3809, "step": 36017 }, { "epoch": 0.46803748276192536, "grad_norm": 0.4476868510246277, "learning_rate": 0.00010641417226634087, "loss": 1.3161, "step": 36018 }, { "epoch": 0.4680504773058412, "grad_norm": 0.4619145393371582, "learning_rate": 0.00010641157280442948, "loss": 1.3019, "step": 36019 }, { "epoch": 0.4680634718497571, "grad_norm": 0.4380275309085846, "learning_rate": 0.00010640897334251812, "loss": 1.3842, "step": 36020 }, { "epoch": 0.46807646639367295, "grad_norm": 0.35434839129447937, "learning_rate": 0.00010640637388060673, "loss": 1.2279, "step": 36021 }, { "epoch": 0.46808946093758885, "grad_norm": 0.3855600953102112, "learning_rate": 0.00010640377441869534, "loss": 1.6073, "step": 36022 }, { "epoch": 0.4681024554815047, "grad_norm": 0.40054965019226074, "learning_rate": 0.00010640117495678395, "loss": 1.3063, "step": 36023 }, { "epoch": 0.4681154500254206, "grad_norm": 0.37935322523117065, "learning_rate": 0.00010639857549487258, "loss": 1.4915, "step": 36024 }, { "epoch": 0.46812844456933644, "grad_norm": 0.43313562870025635, "learning_rate": 0.00010639597603296119, "loss": 1.31, "step": 36025 }, { "epoch": 0.46814143911325234, "grad_norm": 0.3727060854434967, "learning_rate": 0.0001063933765710498, "loss": 1.2527, "step": 36026 }, { "epoch": 0.4681544336571682, "grad_norm": 0.3850649893283844, "learning_rate": 0.00010639077710913841, "loss": 1.4338, "step": 36027 }, { "epoch": 0.4681674282010841, "grad_norm": 0.45306897163391113, "learning_rate": 0.00010638817764722703, "loss": 1.5184, "step": 36028 }, { "epoch": 0.46818042274499994, "grad_norm": 0.4427480399608612, "learning_rate": 0.00010638557818531564, "loss": 1.606, "step": 36029 }, { "epoch": 0.46819341728891584, "grad_norm": 0.393987774848938, "learning_rate": 0.00010638297872340425, "loss": 1.3951, "step": 36030 }, { "epoch": 0.4682064118328317, "grad_norm": 0.40190625190734863, "learning_rate": 0.00010638037926149287, "loss": 1.4212, "step": 36031 }, { "epoch": 0.4682194063767476, "grad_norm": 0.41310665011405945, "learning_rate": 0.0001063777797995815, "loss": 1.5712, "step": 36032 }, { "epoch": 0.4682324009206634, "grad_norm": 0.2970741093158722, "learning_rate": 0.00010637518033767011, "loss": 1.5025, "step": 36033 }, { "epoch": 0.46824539546457933, "grad_norm": 0.4671795964241028, "learning_rate": 0.00010637258087575873, "loss": 1.5475, "step": 36034 }, { "epoch": 0.4682583900084952, "grad_norm": 0.5359475016593933, "learning_rate": 0.00010636998141384734, "loss": 1.552, "step": 36035 }, { "epoch": 0.4682713845524111, "grad_norm": 0.4483073353767395, "learning_rate": 0.00010636738195193596, "loss": 1.5673, "step": 36036 }, { "epoch": 0.4682843790963269, "grad_norm": 0.3945772051811218, "learning_rate": 0.00010636478249002457, "loss": 1.4136, "step": 36037 }, { "epoch": 0.4682973736402428, "grad_norm": 0.45493283867836, "learning_rate": 0.00010636218302811318, "loss": 1.3821, "step": 36038 }, { "epoch": 0.46831036818415867, "grad_norm": 0.37724894285202026, "learning_rate": 0.0001063595835662018, "loss": 1.3312, "step": 36039 }, { "epoch": 0.46832336272807457, "grad_norm": 0.30855613946914673, "learning_rate": 0.00010635698410429043, "loss": 1.4425, "step": 36040 }, { "epoch": 0.4683363572719904, "grad_norm": 0.35224148631095886, "learning_rate": 0.00010635438464237903, "loss": 1.3959, "step": 36041 }, { "epoch": 0.4683493518159063, "grad_norm": 0.2714838981628418, "learning_rate": 0.00010635178518046764, "loss": 1.3151, "step": 36042 }, { "epoch": 0.46836234635982216, "grad_norm": 0.42199426889419556, "learning_rate": 0.00010634918571855625, "loss": 1.4751, "step": 36043 }, { "epoch": 0.46837534090373806, "grad_norm": 0.2458931803703308, "learning_rate": 0.00010634658625664489, "loss": 1.209, "step": 36044 }, { "epoch": 0.4683883354476539, "grad_norm": 0.40135452151298523, "learning_rate": 0.0001063439867947335, "loss": 1.4309, "step": 36045 }, { "epoch": 0.4684013299915698, "grad_norm": 0.4182669520378113, "learning_rate": 0.00010634138733282211, "loss": 1.4965, "step": 36046 }, { "epoch": 0.46841432453548565, "grad_norm": 0.5029852986335754, "learning_rate": 0.00010633878787091072, "loss": 1.5349, "step": 36047 }, { "epoch": 0.46842731907940155, "grad_norm": 0.40614351630210876, "learning_rate": 0.00010633618840899935, "loss": 1.2453, "step": 36048 }, { "epoch": 0.4684403136233174, "grad_norm": 0.5176266431808472, "learning_rate": 0.00010633358894708796, "loss": 1.3921, "step": 36049 }, { "epoch": 0.4684533081672333, "grad_norm": 0.40352463722229004, "learning_rate": 0.00010633098948517657, "loss": 1.4319, "step": 36050 }, { "epoch": 0.46846630271114914, "grad_norm": 0.4335116446018219, "learning_rate": 0.00010632839002326518, "loss": 1.305, "step": 36051 }, { "epoch": 0.46847929725506504, "grad_norm": 0.3310672640800476, "learning_rate": 0.00010632579056135382, "loss": 1.2521, "step": 36052 }, { "epoch": 0.4684922917989809, "grad_norm": 0.40273770689964294, "learning_rate": 0.00010632319109944241, "loss": 1.3588, "step": 36053 }, { "epoch": 0.4685052863428968, "grad_norm": 0.36818644404411316, "learning_rate": 0.00010632059163753103, "loss": 1.2179, "step": 36054 }, { "epoch": 0.46851828088681263, "grad_norm": 0.4689251780509949, "learning_rate": 0.00010631799217561966, "loss": 1.4448, "step": 36055 }, { "epoch": 0.46853127543072853, "grad_norm": 0.3490266501903534, "learning_rate": 0.00010631539271370827, "loss": 1.2343, "step": 36056 }, { "epoch": 0.4685442699746444, "grad_norm": 0.5208413600921631, "learning_rate": 0.00010631279325179689, "loss": 1.5922, "step": 36057 }, { "epoch": 0.4685572645185603, "grad_norm": 0.3412632644176483, "learning_rate": 0.0001063101937898855, "loss": 1.3741, "step": 36058 }, { "epoch": 0.4685702590624761, "grad_norm": 0.5062688589096069, "learning_rate": 0.00010630759432797412, "loss": 1.502, "step": 36059 }, { "epoch": 0.468583253606392, "grad_norm": 0.45065438747406006, "learning_rate": 0.00010630499486606273, "loss": 1.3298, "step": 36060 }, { "epoch": 0.46859624815030787, "grad_norm": 0.4056037664413452, "learning_rate": 0.00010630239540415134, "loss": 1.2575, "step": 36061 }, { "epoch": 0.46860924269422377, "grad_norm": 0.34066224098205566, "learning_rate": 0.00010629979594223995, "loss": 1.2316, "step": 36062 }, { "epoch": 0.4686222372381396, "grad_norm": 0.3735927939414978, "learning_rate": 0.00010629719648032859, "loss": 1.3024, "step": 36063 }, { "epoch": 0.4686352317820555, "grad_norm": 0.5083009004592896, "learning_rate": 0.0001062945970184172, "loss": 1.2928, "step": 36064 }, { "epoch": 0.46864822632597136, "grad_norm": 0.38366150856018066, "learning_rate": 0.00010629199755650581, "loss": 1.4082, "step": 36065 }, { "epoch": 0.46866122086988726, "grad_norm": 0.4026401937007904, "learning_rate": 0.00010628939809459441, "loss": 1.4272, "step": 36066 }, { "epoch": 0.4686742154138031, "grad_norm": 0.4562194347381592, "learning_rate": 0.00010628679863268305, "loss": 1.5625, "step": 36067 }, { "epoch": 0.468687209957719, "grad_norm": 0.4081043004989624, "learning_rate": 0.00010628419917077166, "loss": 1.5431, "step": 36068 }, { "epoch": 0.46870020450163485, "grad_norm": 0.5094277858734131, "learning_rate": 0.00010628159970886027, "loss": 1.4673, "step": 36069 }, { "epoch": 0.46871319904555075, "grad_norm": 0.24190929532051086, "learning_rate": 0.00010627900024694888, "loss": 1.2242, "step": 36070 }, { "epoch": 0.4687261935894666, "grad_norm": 0.4343477785587311, "learning_rate": 0.0001062764007850375, "loss": 1.3976, "step": 36071 }, { "epoch": 0.4687391881333825, "grad_norm": 0.3419545292854309, "learning_rate": 0.00010627380132312612, "loss": 1.4483, "step": 36072 }, { "epoch": 0.46875218267729835, "grad_norm": 0.38569071888923645, "learning_rate": 0.00010627120186121473, "loss": 1.5035, "step": 36073 }, { "epoch": 0.46876517722121425, "grad_norm": 0.3363886773586273, "learning_rate": 0.00010626860239930334, "loss": 1.2194, "step": 36074 }, { "epoch": 0.4687781717651301, "grad_norm": 0.3996689021587372, "learning_rate": 0.00010626600293739198, "loss": 1.5933, "step": 36075 }, { "epoch": 0.468791166309046, "grad_norm": 0.359098881483078, "learning_rate": 0.00010626340347548059, "loss": 1.2473, "step": 36076 }, { "epoch": 0.46880416085296184, "grad_norm": 0.4002552032470703, "learning_rate": 0.0001062608040135692, "loss": 1.4233, "step": 36077 }, { "epoch": 0.46881715539687774, "grad_norm": 0.3113689720630646, "learning_rate": 0.00010625820455165781, "loss": 1.2333, "step": 36078 }, { "epoch": 0.4688301499407936, "grad_norm": 0.4215928912162781, "learning_rate": 0.00010625560508974643, "loss": 1.3374, "step": 36079 }, { "epoch": 0.4688431444847095, "grad_norm": 0.3352264165878296, "learning_rate": 0.00010625300562783504, "loss": 1.3097, "step": 36080 }, { "epoch": 0.46885613902862533, "grad_norm": 0.4702156186103821, "learning_rate": 0.00010625040616592366, "loss": 1.5179, "step": 36081 }, { "epoch": 0.46886913357254123, "grad_norm": 0.4073571264743805, "learning_rate": 0.00010624780670401227, "loss": 1.4341, "step": 36082 }, { "epoch": 0.4688821281164571, "grad_norm": 0.4177555739879608, "learning_rate": 0.00010624520724210089, "loss": 1.3475, "step": 36083 }, { "epoch": 0.468895122660373, "grad_norm": 0.35482218861579895, "learning_rate": 0.0001062426077801895, "loss": 1.3678, "step": 36084 }, { "epoch": 0.4689081172042889, "grad_norm": 0.41617903113365173, "learning_rate": 0.00010624000831827811, "loss": 1.5235, "step": 36085 }, { "epoch": 0.4689211117482047, "grad_norm": 0.37736591696739197, "learning_rate": 0.00010623740885636672, "loss": 1.3755, "step": 36086 }, { "epoch": 0.4689341062921206, "grad_norm": 0.4453181326389313, "learning_rate": 0.00010623480939445536, "loss": 1.3108, "step": 36087 }, { "epoch": 0.46894710083603647, "grad_norm": 0.3362225592136383, "learning_rate": 0.00010623220993254397, "loss": 1.3551, "step": 36088 }, { "epoch": 0.46896009537995237, "grad_norm": 0.3900918662548065, "learning_rate": 0.00010622961047063258, "loss": 1.4762, "step": 36089 }, { "epoch": 0.4689730899238682, "grad_norm": 0.41579288244247437, "learning_rate": 0.0001062270110087212, "loss": 1.4177, "step": 36090 }, { "epoch": 0.4689860844677841, "grad_norm": 0.30973920226097107, "learning_rate": 0.00010622441154680982, "loss": 0.9713, "step": 36091 }, { "epoch": 0.46899907901169996, "grad_norm": 0.4345738887786865, "learning_rate": 0.00010622181208489843, "loss": 1.4786, "step": 36092 }, { "epoch": 0.46901207355561586, "grad_norm": 0.3496934771537781, "learning_rate": 0.00010621921262298704, "loss": 1.3796, "step": 36093 }, { "epoch": 0.4690250680995317, "grad_norm": 0.43879497051239014, "learning_rate": 0.00010621661316107568, "loss": 1.28, "step": 36094 }, { "epoch": 0.4690380626434476, "grad_norm": 0.43912777304649353, "learning_rate": 0.00010621401369916428, "loss": 1.4733, "step": 36095 }, { "epoch": 0.46905105718736345, "grad_norm": 0.3175583481788635, "learning_rate": 0.00010621141423725289, "loss": 1.2069, "step": 36096 }, { "epoch": 0.46906405173127935, "grad_norm": 0.3942506015300751, "learning_rate": 0.0001062088147753415, "loss": 1.4992, "step": 36097 }, { "epoch": 0.4690770462751952, "grad_norm": 0.4250079095363617, "learning_rate": 0.00010620621531343014, "loss": 1.4789, "step": 36098 }, { "epoch": 0.4690900408191111, "grad_norm": 0.40334662795066833, "learning_rate": 0.00010620361585151875, "loss": 1.3985, "step": 36099 }, { "epoch": 0.46910303536302694, "grad_norm": 0.360211044549942, "learning_rate": 0.00010620101638960736, "loss": 1.324, "step": 36100 }, { "epoch": 0.46911602990694284, "grad_norm": 0.43988221883773804, "learning_rate": 0.00010619841692769597, "loss": 1.485, "step": 36101 }, { "epoch": 0.4691290244508587, "grad_norm": 0.3439774513244629, "learning_rate": 0.0001061958174657846, "loss": 1.2393, "step": 36102 }, { "epoch": 0.4691420189947746, "grad_norm": 0.4070330858230591, "learning_rate": 0.0001061932180038732, "loss": 1.4063, "step": 36103 }, { "epoch": 0.46915501353869044, "grad_norm": 0.4167833626270294, "learning_rate": 0.00010619061854196182, "loss": 1.2319, "step": 36104 }, { "epoch": 0.46916800808260634, "grad_norm": 0.3463385999202728, "learning_rate": 0.00010618801908005043, "loss": 1.4347, "step": 36105 }, { "epoch": 0.4691810026265222, "grad_norm": 0.38114556670188904, "learning_rate": 0.00010618541961813906, "loss": 1.4742, "step": 36106 }, { "epoch": 0.4691939971704381, "grad_norm": 0.3757008910179138, "learning_rate": 0.00010618282015622768, "loss": 1.4366, "step": 36107 }, { "epoch": 0.4692069917143539, "grad_norm": 0.43338048458099365, "learning_rate": 0.00010618022069431627, "loss": 1.1882, "step": 36108 }, { "epoch": 0.46921998625826983, "grad_norm": 0.3667354881763458, "learning_rate": 0.00010617762123240488, "loss": 1.343, "step": 36109 }, { "epoch": 0.4692329808021857, "grad_norm": 0.33875957131385803, "learning_rate": 0.00010617502177049352, "loss": 1.2361, "step": 36110 }, { "epoch": 0.4692459753461016, "grad_norm": 0.4703972339630127, "learning_rate": 0.00010617242230858213, "loss": 1.3811, "step": 36111 }, { "epoch": 0.4692589698900174, "grad_norm": 0.43020904064178467, "learning_rate": 0.00010616982284667074, "loss": 1.47, "step": 36112 }, { "epoch": 0.4692719644339333, "grad_norm": 0.45791110396385193, "learning_rate": 0.00010616722338475935, "loss": 1.3289, "step": 36113 }, { "epoch": 0.46928495897784916, "grad_norm": 0.37642526626586914, "learning_rate": 0.00010616462392284798, "loss": 1.5013, "step": 36114 }, { "epoch": 0.46929795352176507, "grad_norm": 0.45203515887260437, "learning_rate": 0.00010616202446093659, "loss": 1.3386, "step": 36115 }, { "epoch": 0.4693109480656809, "grad_norm": 0.3627721071243286, "learning_rate": 0.0001061594249990252, "loss": 1.4903, "step": 36116 }, { "epoch": 0.4693239426095968, "grad_norm": 0.454927533864975, "learning_rate": 0.00010615682553711381, "loss": 1.2457, "step": 36117 }, { "epoch": 0.46933693715351266, "grad_norm": 0.3289344310760498, "learning_rate": 0.00010615422607520245, "loss": 1.3405, "step": 36118 }, { "epoch": 0.46934993169742856, "grad_norm": 0.3561593294143677, "learning_rate": 0.00010615162661329106, "loss": 1.2045, "step": 36119 }, { "epoch": 0.4693629262413444, "grad_norm": 0.46802428364753723, "learning_rate": 0.00010614902715137967, "loss": 1.4741, "step": 36120 }, { "epoch": 0.4693759207852603, "grad_norm": 0.46398335695266724, "learning_rate": 0.00010614642768946827, "loss": 1.5371, "step": 36121 }, { "epoch": 0.46938891532917615, "grad_norm": 0.40989935398101807, "learning_rate": 0.00010614382822755691, "loss": 1.3655, "step": 36122 }, { "epoch": 0.46940190987309205, "grad_norm": 0.3533971607685089, "learning_rate": 0.00010614122876564552, "loss": 1.3547, "step": 36123 }, { "epoch": 0.4694149044170079, "grad_norm": 0.31980404257774353, "learning_rate": 0.00010613862930373413, "loss": 1.2353, "step": 36124 }, { "epoch": 0.4694278989609238, "grad_norm": 0.35931599140167236, "learning_rate": 0.00010613602984182274, "loss": 1.4091, "step": 36125 }, { "epoch": 0.46944089350483964, "grad_norm": 0.4648946523666382, "learning_rate": 0.00010613343037991136, "loss": 1.4173, "step": 36126 }, { "epoch": 0.46945388804875554, "grad_norm": 0.37911030650138855, "learning_rate": 0.00010613083091799998, "loss": 1.3801, "step": 36127 }, { "epoch": 0.4694668825926714, "grad_norm": 0.4516185224056244, "learning_rate": 0.00010612823145608859, "loss": 1.4116, "step": 36128 }, { "epoch": 0.4694798771365873, "grad_norm": 0.4491935968399048, "learning_rate": 0.00010612563199417722, "loss": 1.409, "step": 36129 }, { "epoch": 0.46949287168050313, "grad_norm": 0.3451153635978699, "learning_rate": 0.00010612303253226584, "loss": 1.3469, "step": 36130 }, { "epoch": 0.46950586622441903, "grad_norm": 0.3741462528705597, "learning_rate": 0.00010612043307035445, "loss": 1.2449, "step": 36131 }, { "epoch": 0.4695188607683349, "grad_norm": 0.42647701501846313, "learning_rate": 0.00010611783360844306, "loss": 1.4542, "step": 36132 }, { "epoch": 0.4695318553122508, "grad_norm": 0.4272676706314087, "learning_rate": 0.00010611523414653168, "loss": 1.2582, "step": 36133 }, { "epoch": 0.4695448498561666, "grad_norm": 0.42716488242149353, "learning_rate": 0.00010611263468462029, "loss": 1.5576, "step": 36134 }, { "epoch": 0.4695578444000825, "grad_norm": 0.46606266498565674, "learning_rate": 0.0001061100352227089, "loss": 1.506, "step": 36135 }, { "epoch": 0.46957083894399837, "grad_norm": 0.3855647146701813, "learning_rate": 0.00010610743576079751, "loss": 1.5454, "step": 36136 }, { "epoch": 0.46958383348791427, "grad_norm": 0.36141031980514526, "learning_rate": 0.00010610483629888614, "loss": 1.3278, "step": 36137 }, { "epoch": 0.4695968280318301, "grad_norm": 0.47629648447036743, "learning_rate": 0.00010610223683697475, "loss": 1.5046, "step": 36138 }, { "epoch": 0.469609822575746, "grad_norm": 0.47294700145721436, "learning_rate": 0.00010609963737506336, "loss": 1.4909, "step": 36139 }, { "epoch": 0.46962281711966186, "grad_norm": 0.4332755208015442, "learning_rate": 0.00010609703791315197, "loss": 1.433, "step": 36140 }, { "epoch": 0.46963581166357776, "grad_norm": 0.4063400328159332, "learning_rate": 0.00010609443845124061, "loss": 1.4572, "step": 36141 }, { "epoch": 0.4696488062074936, "grad_norm": 0.352501779794693, "learning_rate": 0.00010609183898932922, "loss": 1.1663, "step": 36142 }, { "epoch": 0.4696618007514095, "grad_norm": 0.4391920268535614, "learning_rate": 0.00010608923952741783, "loss": 1.5187, "step": 36143 }, { "epoch": 0.46967479529532535, "grad_norm": 0.3294104337692261, "learning_rate": 0.00010608664006550644, "loss": 1.3397, "step": 36144 }, { "epoch": 0.46968778983924125, "grad_norm": 0.31026867032051086, "learning_rate": 0.00010608404060359507, "loss": 1.1955, "step": 36145 }, { "epoch": 0.4697007843831571, "grad_norm": 0.4363054037094116, "learning_rate": 0.00010608144114168368, "loss": 1.4178, "step": 36146 }, { "epoch": 0.469713778927073, "grad_norm": 0.4318297505378723, "learning_rate": 0.00010607884167977229, "loss": 1.4472, "step": 36147 }, { "epoch": 0.46972677347098885, "grad_norm": 0.4540519714355469, "learning_rate": 0.0001060762422178609, "loss": 1.3004, "step": 36148 }, { "epoch": 0.46973976801490475, "grad_norm": 0.3911750912666321, "learning_rate": 0.00010607364275594954, "loss": 1.347, "step": 36149 }, { "epoch": 0.4697527625588206, "grad_norm": 0.4785829782485962, "learning_rate": 0.00010607104329403814, "loss": 1.2467, "step": 36150 }, { "epoch": 0.4697657571027365, "grad_norm": 0.36873337626457214, "learning_rate": 0.00010606844383212675, "loss": 1.3852, "step": 36151 }, { "epoch": 0.46977875164665234, "grad_norm": 0.3357747793197632, "learning_rate": 0.00010606584437021536, "loss": 1.611, "step": 36152 }, { "epoch": 0.46979174619056824, "grad_norm": 0.45603954792022705, "learning_rate": 0.000106063244908304, "loss": 1.4254, "step": 36153 }, { "epoch": 0.4698047407344841, "grad_norm": 0.4814583659172058, "learning_rate": 0.0001060606454463926, "loss": 1.3516, "step": 36154 }, { "epoch": 0.4698177352784, "grad_norm": 0.4520021975040436, "learning_rate": 0.00010605804598448122, "loss": 1.3535, "step": 36155 }, { "epoch": 0.46983072982231583, "grad_norm": 0.37971118092536926, "learning_rate": 0.00010605544652256983, "loss": 1.4058, "step": 36156 }, { "epoch": 0.46984372436623173, "grad_norm": 0.4446203410625458, "learning_rate": 0.00010605284706065845, "loss": 1.4121, "step": 36157 }, { "epoch": 0.4698567189101476, "grad_norm": 0.38405925035476685, "learning_rate": 0.00010605024759874706, "loss": 1.31, "step": 36158 }, { "epoch": 0.4698697134540635, "grad_norm": 0.46519026160240173, "learning_rate": 0.00010604764813683567, "loss": 1.1816, "step": 36159 }, { "epoch": 0.4698827079979793, "grad_norm": 0.3670690655708313, "learning_rate": 0.00010604504867492429, "loss": 1.4234, "step": 36160 }, { "epoch": 0.4698957025418952, "grad_norm": 0.4492076337337494, "learning_rate": 0.00010604244921301292, "loss": 1.5289, "step": 36161 }, { "epoch": 0.4699086970858111, "grad_norm": 0.39481213688850403, "learning_rate": 0.00010603984975110153, "loss": 1.3903, "step": 36162 }, { "epoch": 0.46992169162972697, "grad_norm": 0.4510393738746643, "learning_rate": 0.00010603725028919013, "loss": 1.4053, "step": 36163 }, { "epoch": 0.46993468617364287, "grad_norm": 0.36320334672927856, "learning_rate": 0.00010603465082727874, "loss": 1.2432, "step": 36164 }, { "epoch": 0.4699476807175587, "grad_norm": 0.4383608102798462, "learning_rate": 0.00010603205136536738, "loss": 1.3882, "step": 36165 }, { "epoch": 0.4699606752614746, "grad_norm": 0.41992902755737305, "learning_rate": 0.00010602945190345599, "loss": 1.2374, "step": 36166 }, { "epoch": 0.46997366980539046, "grad_norm": 0.398365318775177, "learning_rate": 0.0001060268524415446, "loss": 1.3987, "step": 36167 }, { "epoch": 0.46998666434930636, "grad_norm": 0.47090840339660645, "learning_rate": 0.00010602425297963323, "loss": 1.5249, "step": 36168 }, { "epoch": 0.4699996588932222, "grad_norm": 0.37851324677467346, "learning_rate": 0.00010602165351772184, "loss": 1.2214, "step": 36169 }, { "epoch": 0.4700126534371381, "grad_norm": 0.3921733796596527, "learning_rate": 0.00010601905405581045, "loss": 1.374, "step": 36170 }, { "epoch": 0.47002564798105395, "grad_norm": 0.504626989364624, "learning_rate": 0.00010601645459389906, "loss": 1.451, "step": 36171 }, { "epoch": 0.47003864252496985, "grad_norm": 0.4621210992336273, "learning_rate": 0.0001060138551319877, "loss": 1.7462, "step": 36172 }, { "epoch": 0.4700516370688857, "grad_norm": 0.32782498002052307, "learning_rate": 0.00010601125567007631, "loss": 1.3671, "step": 36173 }, { "epoch": 0.4700646316128016, "grad_norm": 0.25939950346946716, "learning_rate": 0.00010600865620816492, "loss": 1.2563, "step": 36174 }, { "epoch": 0.47007762615671744, "grad_norm": 0.4152630865573883, "learning_rate": 0.00010600605674625352, "loss": 1.2558, "step": 36175 }, { "epoch": 0.47009062070063334, "grad_norm": 0.31859588623046875, "learning_rate": 0.00010600345728434216, "loss": 1.5866, "step": 36176 }, { "epoch": 0.4701036152445492, "grad_norm": 0.4142383933067322, "learning_rate": 0.00010600085782243077, "loss": 1.5393, "step": 36177 }, { "epoch": 0.4701166097884651, "grad_norm": 0.45889878273010254, "learning_rate": 0.00010599825836051938, "loss": 1.4033, "step": 36178 }, { "epoch": 0.47012960433238093, "grad_norm": 0.38180506229400635, "learning_rate": 0.00010599565889860799, "loss": 1.4913, "step": 36179 }, { "epoch": 0.47014259887629684, "grad_norm": 0.4229165315628052, "learning_rate": 0.00010599305943669661, "loss": 1.4389, "step": 36180 }, { "epoch": 0.4701555934202127, "grad_norm": 0.5370358228683472, "learning_rate": 0.00010599045997478522, "loss": 1.4634, "step": 36181 }, { "epoch": 0.4701685879641286, "grad_norm": 0.3709205090999603, "learning_rate": 0.00010598786051287383, "loss": 1.4102, "step": 36182 }, { "epoch": 0.4701815825080444, "grad_norm": 0.41304388642311096, "learning_rate": 0.00010598526105096245, "loss": 1.4797, "step": 36183 }, { "epoch": 0.4701945770519603, "grad_norm": 0.37425580620765686, "learning_rate": 0.00010598266158905108, "loss": 1.4339, "step": 36184 }, { "epoch": 0.4702075715958762, "grad_norm": 0.48043403029441833, "learning_rate": 0.0001059800621271397, "loss": 1.3831, "step": 36185 }, { "epoch": 0.4702205661397921, "grad_norm": 0.49232208728790283, "learning_rate": 0.0001059774626652283, "loss": 1.4723, "step": 36186 }, { "epoch": 0.4702335606837079, "grad_norm": 0.3689571022987366, "learning_rate": 0.00010597486320331692, "loss": 1.42, "step": 36187 }, { "epoch": 0.4702465552276238, "grad_norm": 0.43442389369010925, "learning_rate": 0.00010597226374140554, "loss": 1.3438, "step": 36188 }, { "epoch": 0.47025954977153966, "grad_norm": 0.3032548129558563, "learning_rate": 0.00010596966427949415, "loss": 1.333, "step": 36189 }, { "epoch": 0.47027254431545557, "grad_norm": 0.3889909088611603, "learning_rate": 0.00010596706481758276, "loss": 1.4233, "step": 36190 }, { "epoch": 0.4702855388593714, "grad_norm": 0.6050897240638733, "learning_rate": 0.00010596446535567137, "loss": 1.3794, "step": 36191 }, { "epoch": 0.4702985334032873, "grad_norm": 0.35007837414741516, "learning_rate": 0.00010596186589376, "loss": 1.5835, "step": 36192 }, { "epoch": 0.47031152794720316, "grad_norm": 0.36003994941711426, "learning_rate": 0.00010595926643184861, "loss": 1.4002, "step": 36193 }, { "epoch": 0.47032452249111906, "grad_norm": 0.490487277507782, "learning_rate": 0.00010595666696993722, "loss": 1.3871, "step": 36194 }, { "epoch": 0.4703375170350349, "grad_norm": 0.4509434103965759, "learning_rate": 0.00010595406750802583, "loss": 1.4859, "step": 36195 }, { "epoch": 0.4703505115789508, "grad_norm": 0.41708970069885254, "learning_rate": 0.00010595146804611447, "loss": 1.3753, "step": 36196 }, { "epoch": 0.47036350612286665, "grad_norm": 0.4493131935596466, "learning_rate": 0.00010594886858420308, "loss": 1.3018, "step": 36197 }, { "epoch": 0.47037650066678255, "grad_norm": 0.44261786341667175, "learning_rate": 0.00010594626912229169, "loss": 1.3897, "step": 36198 }, { "epoch": 0.4703894952106984, "grad_norm": 0.5020904541015625, "learning_rate": 0.0001059436696603803, "loss": 1.5236, "step": 36199 }, { "epoch": 0.4704024897546143, "grad_norm": 0.34281373023986816, "learning_rate": 0.00010594107019846893, "loss": 1.3461, "step": 36200 }, { "epoch": 0.47041548429853014, "grad_norm": 0.411078542470932, "learning_rate": 0.00010593847073655754, "loss": 1.3418, "step": 36201 }, { "epoch": 0.47042847884244604, "grad_norm": 0.4068673551082611, "learning_rate": 0.00010593587127464615, "loss": 1.4778, "step": 36202 }, { "epoch": 0.4704414733863619, "grad_norm": 0.40526413917541504, "learning_rate": 0.00010593327181273479, "loss": 1.3133, "step": 36203 }, { "epoch": 0.4704544679302778, "grad_norm": 0.31847894191741943, "learning_rate": 0.0001059306723508234, "loss": 1.4498, "step": 36204 }, { "epoch": 0.47046746247419363, "grad_norm": 0.38429343700408936, "learning_rate": 0.000105928072888912, "loss": 1.4061, "step": 36205 }, { "epoch": 0.47048045701810953, "grad_norm": 0.34437814354896545, "learning_rate": 0.0001059254734270006, "loss": 1.3711, "step": 36206 }, { "epoch": 0.4704934515620254, "grad_norm": 0.40567654371261597, "learning_rate": 0.00010592287396508924, "loss": 1.3001, "step": 36207 }, { "epoch": 0.4705064461059413, "grad_norm": 0.40077120065689087, "learning_rate": 0.00010592027450317785, "loss": 1.4081, "step": 36208 }, { "epoch": 0.4705194406498571, "grad_norm": 0.4596070945262909, "learning_rate": 0.00010591767504126646, "loss": 1.5516, "step": 36209 }, { "epoch": 0.470532435193773, "grad_norm": 0.4369564950466156, "learning_rate": 0.00010591507557935508, "loss": 1.5924, "step": 36210 }, { "epoch": 0.47054542973768887, "grad_norm": 0.4103551506996155, "learning_rate": 0.0001059124761174437, "loss": 1.2621, "step": 36211 }, { "epoch": 0.47055842428160477, "grad_norm": 0.4694206416606903, "learning_rate": 0.00010590987665553231, "loss": 1.3921, "step": 36212 }, { "epoch": 0.4705714188255206, "grad_norm": 0.5605316758155823, "learning_rate": 0.00010590727719362092, "loss": 1.5591, "step": 36213 }, { "epoch": 0.4705844133694365, "grad_norm": 0.44309210777282715, "learning_rate": 0.00010590467773170953, "loss": 1.3643, "step": 36214 }, { "epoch": 0.47059740791335236, "grad_norm": 0.4272473156452179, "learning_rate": 0.00010590207826979817, "loss": 1.2634, "step": 36215 }, { "epoch": 0.47061040245726826, "grad_norm": 0.49067726731300354, "learning_rate": 0.00010589947880788678, "loss": 1.5069, "step": 36216 }, { "epoch": 0.4706233970011841, "grad_norm": 0.4125564992427826, "learning_rate": 0.00010589687934597538, "loss": 1.3796, "step": 36217 }, { "epoch": 0.4706363915451, "grad_norm": 0.4353954493999481, "learning_rate": 0.00010589427988406399, "loss": 1.4034, "step": 36218 }, { "epoch": 0.47064938608901585, "grad_norm": 0.3864523768424988, "learning_rate": 0.00010589168042215263, "loss": 1.4728, "step": 36219 }, { "epoch": 0.47066238063293175, "grad_norm": 0.3618791997432709, "learning_rate": 0.00010588908096024124, "loss": 1.2799, "step": 36220 }, { "epoch": 0.4706753751768476, "grad_norm": 0.4934016764163971, "learning_rate": 0.00010588648149832985, "loss": 1.3761, "step": 36221 }, { "epoch": 0.4706883697207635, "grad_norm": 0.3758346736431122, "learning_rate": 0.00010588388203641846, "loss": 1.3483, "step": 36222 }, { "epoch": 0.47070136426467934, "grad_norm": 0.4102843999862671, "learning_rate": 0.00010588128257450709, "loss": 1.6026, "step": 36223 }, { "epoch": 0.47071435880859525, "grad_norm": 0.3525262773036957, "learning_rate": 0.0001058786831125957, "loss": 1.3236, "step": 36224 }, { "epoch": 0.4707273533525111, "grad_norm": 0.37301939725875854, "learning_rate": 0.00010587608365068431, "loss": 1.3284, "step": 36225 }, { "epoch": 0.470740347896427, "grad_norm": 0.4437795579433441, "learning_rate": 0.00010587348418877292, "loss": 1.3931, "step": 36226 }, { "epoch": 0.47075334244034284, "grad_norm": 0.3568170368671417, "learning_rate": 0.00010587088472686156, "loss": 1.2669, "step": 36227 }, { "epoch": 0.47076633698425874, "grad_norm": 0.3910858929157257, "learning_rate": 0.00010586828526495017, "loss": 1.4464, "step": 36228 }, { "epoch": 0.4707793315281746, "grad_norm": 0.3587120473384857, "learning_rate": 0.00010586568580303878, "loss": 1.3654, "step": 36229 }, { "epoch": 0.4707923260720905, "grad_norm": 0.4077349901199341, "learning_rate": 0.00010586308634112738, "loss": 1.3685, "step": 36230 }, { "epoch": 0.47080532061600633, "grad_norm": 0.3619917035102844, "learning_rate": 0.00010586048687921601, "loss": 1.3385, "step": 36231 }, { "epoch": 0.47081831515992223, "grad_norm": 0.33306464552879333, "learning_rate": 0.00010585788741730462, "loss": 1.3687, "step": 36232 }, { "epoch": 0.4708313097038381, "grad_norm": 0.4068826735019684, "learning_rate": 0.00010585528795539324, "loss": 1.4446, "step": 36233 }, { "epoch": 0.470844304247754, "grad_norm": 0.4388924837112427, "learning_rate": 0.00010585268849348185, "loss": 1.3201, "step": 36234 }, { "epoch": 0.4708572987916698, "grad_norm": 0.40682846307754517, "learning_rate": 0.00010585008903157047, "loss": 1.4232, "step": 36235 }, { "epoch": 0.4708702933355857, "grad_norm": 0.45418500900268555, "learning_rate": 0.00010584748956965908, "loss": 1.3276, "step": 36236 }, { "epoch": 0.4708832878795016, "grad_norm": 0.35614830255508423, "learning_rate": 0.00010584489010774769, "loss": 1.294, "step": 36237 }, { "epoch": 0.47089628242341747, "grad_norm": 0.3680225908756256, "learning_rate": 0.0001058422906458363, "loss": 1.5783, "step": 36238 }, { "epoch": 0.47090927696733337, "grad_norm": 0.4316016137599945, "learning_rate": 0.00010583969118392494, "loss": 1.4524, "step": 36239 }, { "epoch": 0.4709222715112492, "grad_norm": 0.504129946231842, "learning_rate": 0.00010583709172201355, "loss": 1.3935, "step": 36240 }, { "epoch": 0.4709352660551651, "grad_norm": 0.5460162162780762, "learning_rate": 0.00010583449226010216, "loss": 1.4641, "step": 36241 }, { "epoch": 0.47094826059908096, "grad_norm": 0.42141902446746826, "learning_rate": 0.00010583189279819079, "loss": 1.3465, "step": 36242 }, { "epoch": 0.47096125514299686, "grad_norm": 0.3766111135482788, "learning_rate": 0.0001058292933362794, "loss": 1.6174, "step": 36243 }, { "epoch": 0.4709742496869127, "grad_norm": 0.4819640517234802, "learning_rate": 0.00010582669387436801, "loss": 1.282, "step": 36244 }, { "epoch": 0.4709872442308286, "grad_norm": 0.37777456641197205, "learning_rate": 0.00010582409441245662, "loss": 1.3853, "step": 36245 }, { "epoch": 0.47100023877474445, "grad_norm": 0.4598681628704071, "learning_rate": 0.00010582149495054526, "loss": 1.2891, "step": 36246 }, { "epoch": 0.47101323331866035, "grad_norm": 0.4218175709247589, "learning_rate": 0.00010581889548863386, "loss": 1.2058, "step": 36247 }, { "epoch": 0.4710262278625762, "grad_norm": 0.3776571452617645, "learning_rate": 0.00010581629602672247, "loss": 1.3393, "step": 36248 }, { "epoch": 0.4710392224064921, "grad_norm": 0.4550991654396057, "learning_rate": 0.00010581369656481108, "loss": 1.4118, "step": 36249 }, { "epoch": 0.47105221695040794, "grad_norm": 0.45695239305496216, "learning_rate": 0.00010581109710289972, "loss": 1.3107, "step": 36250 }, { "epoch": 0.47106521149432384, "grad_norm": 0.46862199902534485, "learning_rate": 0.00010580849764098833, "loss": 1.5385, "step": 36251 }, { "epoch": 0.4710782060382397, "grad_norm": 0.48222196102142334, "learning_rate": 0.00010580589817907694, "loss": 1.5039, "step": 36252 }, { "epoch": 0.4710912005821556, "grad_norm": 0.4670088291168213, "learning_rate": 0.00010580329871716555, "loss": 1.3562, "step": 36253 }, { "epoch": 0.47110419512607143, "grad_norm": 0.3355959355831146, "learning_rate": 0.00010580069925525417, "loss": 1.0809, "step": 36254 }, { "epoch": 0.47111718966998734, "grad_norm": 0.4073513448238373, "learning_rate": 0.00010579809979334278, "loss": 1.3185, "step": 36255 }, { "epoch": 0.4711301842139032, "grad_norm": 0.5075585842132568, "learning_rate": 0.0001057955003314314, "loss": 1.6212, "step": 36256 }, { "epoch": 0.4711431787578191, "grad_norm": 0.44583189487457275, "learning_rate": 0.00010579290086952, "loss": 1.4864, "step": 36257 }, { "epoch": 0.4711561733017349, "grad_norm": 0.3509584665298462, "learning_rate": 0.00010579030140760864, "loss": 1.3396, "step": 36258 }, { "epoch": 0.4711691678456508, "grad_norm": 0.40845754742622375, "learning_rate": 0.00010578770194569724, "loss": 1.3736, "step": 36259 }, { "epoch": 0.47118216238956667, "grad_norm": 0.4085879325866699, "learning_rate": 0.00010578510248378585, "loss": 1.442, "step": 36260 }, { "epoch": 0.4711951569334826, "grad_norm": 0.3478677570819855, "learning_rate": 0.00010578250302187446, "loss": 1.2844, "step": 36261 }, { "epoch": 0.4712081514773984, "grad_norm": 0.33995118737220764, "learning_rate": 0.0001057799035599631, "loss": 1.4346, "step": 36262 }, { "epoch": 0.4712211460213143, "grad_norm": 0.3997320532798767, "learning_rate": 0.00010577730409805171, "loss": 1.4871, "step": 36263 }, { "epoch": 0.47123414056523016, "grad_norm": 0.4496322572231293, "learning_rate": 0.00010577470463614032, "loss": 1.3367, "step": 36264 }, { "epoch": 0.47124713510914606, "grad_norm": 0.394094854593277, "learning_rate": 0.00010577210517422893, "loss": 1.4986, "step": 36265 }, { "epoch": 0.4712601296530619, "grad_norm": 0.4957236349582672, "learning_rate": 0.00010576950571231756, "loss": 1.3492, "step": 36266 }, { "epoch": 0.4712731241969778, "grad_norm": 0.5052787065505981, "learning_rate": 0.00010576690625040617, "loss": 1.3471, "step": 36267 }, { "epoch": 0.47128611874089366, "grad_norm": 0.44330331683158875, "learning_rate": 0.00010576430678849478, "loss": 1.4152, "step": 36268 }, { "epoch": 0.47129911328480956, "grad_norm": 0.36066582798957825, "learning_rate": 0.00010576170732658339, "loss": 1.3811, "step": 36269 }, { "epoch": 0.4713121078287254, "grad_norm": 0.4625958204269409, "learning_rate": 0.00010575910786467203, "loss": 1.5199, "step": 36270 }, { "epoch": 0.4713251023726413, "grad_norm": 0.33577123284339905, "learning_rate": 0.00010575650840276064, "loss": 1.2746, "step": 36271 }, { "epoch": 0.47133809691655715, "grad_norm": 0.43062230944633484, "learning_rate": 0.00010575390894084924, "loss": 1.2863, "step": 36272 }, { "epoch": 0.47135109146047305, "grad_norm": 0.41660013794898987, "learning_rate": 0.00010575130947893785, "loss": 1.3425, "step": 36273 }, { "epoch": 0.4713640860043889, "grad_norm": 0.4076140224933624, "learning_rate": 0.00010574871001702649, "loss": 1.2959, "step": 36274 }, { "epoch": 0.4713770805483048, "grad_norm": 0.4022112190723419, "learning_rate": 0.0001057461105551151, "loss": 1.5153, "step": 36275 }, { "epoch": 0.47139007509222064, "grad_norm": 0.42412760853767395, "learning_rate": 0.00010574351109320371, "loss": 1.4482, "step": 36276 }, { "epoch": 0.47140306963613654, "grad_norm": 0.4352869391441345, "learning_rate": 0.00010574091163129232, "loss": 1.3564, "step": 36277 }, { "epoch": 0.4714160641800524, "grad_norm": 0.3720398247241974, "learning_rate": 0.00010573831216938094, "loss": 1.2117, "step": 36278 }, { "epoch": 0.4714290587239683, "grad_norm": 0.40006303787231445, "learning_rate": 0.00010573571270746956, "loss": 1.2733, "step": 36279 }, { "epoch": 0.47144205326788413, "grad_norm": 0.4771147072315216, "learning_rate": 0.00010573311324555817, "loss": 1.4599, "step": 36280 }, { "epoch": 0.47145504781180003, "grad_norm": 0.4586285352706909, "learning_rate": 0.0001057305137836468, "loss": 1.4978, "step": 36281 }, { "epoch": 0.4714680423557159, "grad_norm": 0.4073632061481476, "learning_rate": 0.00010572791432173542, "loss": 1.4803, "step": 36282 }, { "epoch": 0.4714810368996318, "grad_norm": 0.3795454800128937, "learning_rate": 0.00010572531485982403, "loss": 1.4133, "step": 36283 }, { "epoch": 0.4714940314435476, "grad_norm": 0.41940581798553467, "learning_rate": 0.00010572271539791264, "loss": 1.4424, "step": 36284 }, { "epoch": 0.4715070259874635, "grad_norm": 0.3589300513267517, "learning_rate": 0.00010572011593600126, "loss": 1.3594, "step": 36285 }, { "epoch": 0.47152002053137937, "grad_norm": 0.353777676820755, "learning_rate": 0.00010571751647408987, "loss": 1.4152, "step": 36286 }, { "epoch": 0.47153301507529527, "grad_norm": 0.42814257740974426, "learning_rate": 0.00010571491701217848, "loss": 1.3781, "step": 36287 }, { "epoch": 0.4715460096192111, "grad_norm": 0.3771434724330902, "learning_rate": 0.0001057123175502671, "loss": 1.1632, "step": 36288 }, { "epoch": 0.471559004163127, "grad_norm": 0.39288365840911865, "learning_rate": 0.00010570971808835572, "loss": 1.505, "step": 36289 }, { "epoch": 0.47157199870704286, "grad_norm": 0.44034984707832336, "learning_rate": 0.00010570711862644433, "loss": 1.335, "step": 36290 }, { "epoch": 0.47158499325095876, "grad_norm": 0.3641902506351471, "learning_rate": 0.00010570451916453294, "loss": 1.3598, "step": 36291 }, { "epoch": 0.4715979877948746, "grad_norm": 0.4167522192001343, "learning_rate": 0.00010570191970262155, "loss": 1.5018, "step": 36292 }, { "epoch": 0.4716109823387905, "grad_norm": 0.4199286997318268, "learning_rate": 0.00010569932024071019, "loss": 1.4505, "step": 36293 }, { "epoch": 0.47162397688270635, "grad_norm": 0.5240686535835266, "learning_rate": 0.0001056967207787988, "loss": 1.4292, "step": 36294 }, { "epoch": 0.47163697142662225, "grad_norm": 0.407202810049057, "learning_rate": 0.00010569412131688741, "loss": 1.4553, "step": 36295 }, { "epoch": 0.4716499659705381, "grad_norm": 0.29592278599739075, "learning_rate": 0.00010569152185497602, "loss": 1.3777, "step": 36296 }, { "epoch": 0.471662960514454, "grad_norm": 0.3960427939891815, "learning_rate": 0.00010568892239306465, "loss": 1.3455, "step": 36297 }, { "epoch": 0.47167595505836984, "grad_norm": 0.40755268931388855, "learning_rate": 0.00010568632293115326, "loss": 1.221, "step": 36298 }, { "epoch": 0.47168894960228575, "grad_norm": 0.373852014541626, "learning_rate": 0.00010568372346924187, "loss": 1.3157, "step": 36299 }, { "epoch": 0.4717019441462016, "grad_norm": 0.4340220093727112, "learning_rate": 0.00010568112400733048, "loss": 1.3878, "step": 36300 }, { "epoch": 0.4717149386901175, "grad_norm": 0.48475176095962524, "learning_rate": 0.0001056785245454191, "loss": 1.5269, "step": 36301 }, { "epoch": 0.47172793323403334, "grad_norm": 0.4359365403652191, "learning_rate": 0.00010567592508350772, "loss": 1.4827, "step": 36302 }, { "epoch": 0.47174092777794924, "grad_norm": 0.4574839770793915, "learning_rate": 0.00010567332562159633, "loss": 1.3209, "step": 36303 }, { "epoch": 0.4717539223218651, "grad_norm": 0.3582971394062042, "learning_rate": 0.00010567072615968494, "loss": 1.2927, "step": 36304 }, { "epoch": 0.471766916865781, "grad_norm": 0.46742722392082214, "learning_rate": 0.00010566812669777358, "loss": 1.3667, "step": 36305 }, { "epoch": 0.47177991140969683, "grad_norm": 0.3916938006877899, "learning_rate": 0.00010566552723586219, "loss": 1.5274, "step": 36306 }, { "epoch": 0.47179290595361273, "grad_norm": 0.44973987340927124, "learning_rate": 0.0001056629277739508, "loss": 1.5543, "step": 36307 }, { "epoch": 0.4718059004975286, "grad_norm": 0.2684609591960907, "learning_rate": 0.00010566032831203941, "loss": 1.3546, "step": 36308 }, { "epoch": 0.4718188950414445, "grad_norm": 0.3880734443664551, "learning_rate": 0.00010565772885012803, "loss": 1.3502, "step": 36309 }, { "epoch": 0.4718318895853603, "grad_norm": 0.31345921754837036, "learning_rate": 0.00010565512938821664, "loss": 1.2805, "step": 36310 }, { "epoch": 0.4718448841292762, "grad_norm": 0.3631223440170288, "learning_rate": 0.00010565252992630525, "loss": 1.4915, "step": 36311 }, { "epoch": 0.47185787867319207, "grad_norm": 0.3622957766056061, "learning_rate": 0.00010564993046439387, "loss": 1.3546, "step": 36312 }, { "epoch": 0.47187087321710797, "grad_norm": 0.4121716618537903, "learning_rate": 0.0001056473310024825, "loss": 1.5297, "step": 36313 }, { "epoch": 0.47188386776102387, "grad_norm": 0.3836487829685211, "learning_rate": 0.0001056447315405711, "loss": 1.239, "step": 36314 }, { "epoch": 0.4718968623049397, "grad_norm": 0.3267301321029663, "learning_rate": 0.00010564213207865971, "loss": 1.4449, "step": 36315 }, { "epoch": 0.4719098568488556, "grad_norm": 0.3430098295211792, "learning_rate": 0.00010563953261674835, "loss": 1.3664, "step": 36316 }, { "epoch": 0.47192285139277146, "grad_norm": 0.3294520676136017, "learning_rate": 0.00010563693315483696, "loss": 1.4008, "step": 36317 }, { "epoch": 0.47193584593668736, "grad_norm": 0.41586562991142273, "learning_rate": 0.00010563433369292557, "loss": 1.4017, "step": 36318 }, { "epoch": 0.4719488404806032, "grad_norm": 0.3817497491836548, "learning_rate": 0.00010563173423101418, "loss": 1.2898, "step": 36319 }, { "epoch": 0.4719618350245191, "grad_norm": 0.3957844376564026, "learning_rate": 0.00010562913476910281, "loss": 1.3424, "step": 36320 }, { "epoch": 0.47197482956843495, "grad_norm": 0.45254358649253845, "learning_rate": 0.00010562653530719142, "loss": 1.468, "step": 36321 }, { "epoch": 0.47198782411235085, "grad_norm": 0.44081953167915344, "learning_rate": 0.00010562393584528003, "loss": 1.3416, "step": 36322 }, { "epoch": 0.4720008186562667, "grad_norm": 0.4020347595214844, "learning_rate": 0.00010562133638336864, "loss": 1.2994, "step": 36323 }, { "epoch": 0.4720138132001826, "grad_norm": 0.2760070264339447, "learning_rate": 0.00010561873692145728, "loss": 1.3504, "step": 36324 }, { "epoch": 0.47202680774409844, "grad_norm": 0.33787959814071655, "learning_rate": 0.00010561613745954589, "loss": 1.3264, "step": 36325 }, { "epoch": 0.47203980228801434, "grad_norm": 0.40708714723587036, "learning_rate": 0.0001056135379976345, "loss": 1.5959, "step": 36326 }, { "epoch": 0.4720527968319302, "grad_norm": 0.4376273453235626, "learning_rate": 0.0001056109385357231, "loss": 1.3635, "step": 36327 }, { "epoch": 0.4720657913758461, "grad_norm": 0.4282427430152893, "learning_rate": 0.00010560833907381174, "loss": 1.2953, "step": 36328 }, { "epoch": 0.47207878591976193, "grad_norm": 0.466429740190506, "learning_rate": 0.00010560573961190035, "loss": 1.4216, "step": 36329 }, { "epoch": 0.47209178046367783, "grad_norm": 0.4172934889793396, "learning_rate": 0.00010560314014998896, "loss": 1.3632, "step": 36330 }, { "epoch": 0.4721047750075937, "grad_norm": 0.394612193107605, "learning_rate": 0.00010560054068807757, "loss": 1.3904, "step": 36331 }, { "epoch": 0.4721177695515096, "grad_norm": 0.3224700391292572, "learning_rate": 0.00010559794122616619, "loss": 1.2999, "step": 36332 }, { "epoch": 0.4721307640954254, "grad_norm": 0.4206881821155548, "learning_rate": 0.0001055953417642548, "loss": 1.3353, "step": 36333 }, { "epoch": 0.4721437586393413, "grad_norm": 0.40402668714523315, "learning_rate": 0.00010559274230234341, "loss": 1.4871, "step": 36334 }, { "epoch": 0.47215675318325717, "grad_norm": 0.3820200264453888, "learning_rate": 0.00010559014284043203, "loss": 1.3762, "step": 36335 }, { "epoch": 0.4721697477271731, "grad_norm": 0.45298463106155396, "learning_rate": 0.00010558754337852066, "loss": 1.2478, "step": 36336 }, { "epoch": 0.4721827422710889, "grad_norm": 0.3556426465511322, "learning_rate": 0.00010558494391660927, "loss": 1.4383, "step": 36337 }, { "epoch": 0.4721957368150048, "grad_norm": 0.4001607298851013, "learning_rate": 0.00010558234445469788, "loss": 1.3547, "step": 36338 }, { "epoch": 0.47220873135892066, "grad_norm": 0.40171200037002563, "learning_rate": 0.00010557974499278648, "loss": 1.5491, "step": 36339 }, { "epoch": 0.47222172590283656, "grad_norm": 0.3648524880409241, "learning_rate": 0.00010557714553087512, "loss": 1.4328, "step": 36340 }, { "epoch": 0.4722347204467524, "grad_norm": 0.3911897540092468, "learning_rate": 0.00010557454606896373, "loss": 1.3298, "step": 36341 }, { "epoch": 0.4722477149906683, "grad_norm": 0.4505600035190582, "learning_rate": 0.00010557194660705234, "loss": 1.4038, "step": 36342 }, { "epoch": 0.47226070953458416, "grad_norm": 0.44918322563171387, "learning_rate": 0.00010556934714514095, "loss": 1.4143, "step": 36343 }, { "epoch": 0.47227370407850006, "grad_norm": 0.4704125225543976, "learning_rate": 0.00010556674768322958, "loss": 1.3556, "step": 36344 }, { "epoch": 0.4722866986224159, "grad_norm": 0.3804517388343811, "learning_rate": 0.00010556414822131819, "loss": 1.4992, "step": 36345 }, { "epoch": 0.4722996931663318, "grad_norm": 0.33664894104003906, "learning_rate": 0.0001055615487594068, "loss": 1.5141, "step": 36346 }, { "epoch": 0.47231268771024765, "grad_norm": 0.3504297137260437, "learning_rate": 0.00010555894929749541, "loss": 1.4532, "step": 36347 }, { "epoch": 0.47232568225416355, "grad_norm": 0.4832122325897217, "learning_rate": 0.00010555634983558405, "loss": 1.221, "step": 36348 }, { "epoch": 0.4723386767980794, "grad_norm": 0.3068981468677521, "learning_rate": 0.00010555375037367266, "loss": 1.3667, "step": 36349 }, { "epoch": 0.4723516713419953, "grad_norm": 0.4074302017688751, "learning_rate": 0.00010555115091176127, "loss": 1.5373, "step": 36350 }, { "epoch": 0.47236466588591114, "grad_norm": 0.43595632910728455, "learning_rate": 0.00010554855144984988, "loss": 1.3623, "step": 36351 }, { "epoch": 0.47237766042982704, "grad_norm": 0.38892650604248047, "learning_rate": 0.0001055459519879385, "loss": 1.2509, "step": 36352 }, { "epoch": 0.4723906549737429, "grad_norm": 0.39849451184272766, "learning_rate": 0.00010554335252602712, "loss": 1.3672, "step": 36353 }, { "epoch": 0.4724036495176588, "grad_norm": 0.4168280363082886, "learning_rate": 0.00010554075306411573, "loss": 1.2418, "step": 36354 }, { "epoch": 0.47241664406157463, "grad_norm": 0.44292542338371277, "learning_rate": 0.00010553815360220437, "loss": 1.4295, "step": 36355 }, { "epoch": 0.47242963860549053, "grad_norm": 0.33598068356513977, "learning_rate": 0.00010553555414029296, "loss": 1.4127, "step": 36356 }, { "epoch": 0.4724426331494064, "grad_norm": 0.3550907373428345, "learning_rate": 0.00010553295467838157, "loss": 1.3317, "step": 36357 }, { "epoch": 0.4724556276933223, "grad_norm": 0.3526102304458618, "learning_rate": 0.00010553035521647018, "loss": 1.1941, "step": 36358 }, { "epoch": 0.4724686222372381, "grad_norm": 0.3644999563694, "learning_rate": 0.00010552775575455882, "loss": 1.3664, "step": 36359 }, { "epoch": 0.472481616781154, "grad_norm": 0.47779789566993713, "learning_rate": 0.00010552515629264743, "loss": 1.5838, "step": 36360 }, { "epoch": 0.47249461132506987, "grad_norm": 0.3704841434955597, "learning_rate": 0.00010552255683073604, "loss": 1.4593, "step": 36361 }, { "epoch": 0.47250760586898577, "grad_norm": 0.49029144644737244, "learning_rate": 0.00010551995736882466, "loss": 1.5015, "step": 36362 }, { "epoch": 0.4725206004129016, "grad_norm": 0.35636621713638306, "learning_rate": 0.00010551735790691328, "loss": 1.4052, "step": 36363 }, { "epoch": 0.4725335949568175, "grad_norm": 0.3917786180973053, "learning_rate": 0.00010551475844500189, "loss": 1.4715, "step": 36364 }, { "epoch": 0.47254658950073336, "grad_norm": 0.37086358666419983, "learning_rate": 0.0001055121589830905, "loss": 1.2612, "step": 36365 }, { "epoch": 0.47255958404464926, "grad_norm": 0.38486021757125854, "learning_rate": 0.00010550955952117911, "loss": 1.383, "step": 36366 }, { "epoch": 0.4725725785885651, "grad_norm": 0.34516850113868713, "learning_rate": 0.00010550696005926775, "loss": 1.3181, "step": 36367 }, { "epoch": 0.472585573132481, "grad_norm": 0.44308778643608093, "learning_rate": 0.00010550436059735636, "loss": 1.376, "step": 36368 }, { "epoch": 0.47259856767639685, "grad_norm": 0.3153568208217621, "learning_rate": 0.00010550176113544496, "loss": 1.3916, "step": 36369 }, { "epoch": 0.47261156222031275, "grad_norm": 0.43162310123443604, "learning_rate": 0.00010549916167353357, "loss": 1.482, "step": 36370 }, { "epoch": 0.4726245567642286, "grad_norm": 0.3791247606277466, "learning_rate": 0.00010549656221162221, "loss": 1.7045, "step": 36371 }, { "epoch": 0.4726375513081445, "grad_norm": 0.4568774104118347, "learning_rate": 0.00010549396274971082, "loss": 1.2982, "step": 36372 }, { "epoch": 0.47265054585206034, "grad_norm": 0.40682920813560486, "learning_rate": 0.00010549136328779943, "loss": 1.3991, "step": 36373 }, { "epoch": 0.47266354039597624, "grad_norm": 0.5829497575759888, "learning_rate": 0.00010548876382588804, "loss": 1.3291, "step": 36374 }, { "epoch": 0.4726765349398921, "grad_norm": 0.39848124980926514, "learning_rate": 0.00010548616436397667, "loss": 1.3377, "step": 36375 }, { "epoch": 0.472689529483808, "grad_norm": 0.5966970324516296, "learning_rate": 0.00010548356490206528, "loss": 1.294, "step": 36376 }, { "epoch": 0.47270252402772384, "grad_norm": 0.3550399839878082, "learning_rate": 0.00010548096544015389, "loss": 1.4642, "step": 36377 }, { "epoch": 0.47271551857163974, "grad_norm": 0.41060203313827515, "learning_rate": 0.0001054783659782425, "loss": 1.459, "step": 36378 }, { "epoch": 0.4727285131155556, "grad_norm": 0.44106003642082214, "learning_rate": 0.00010547576651633114, "loss": 1.4886, "step": 36379 }, { "epoch": 0.4727415076594715, "grad_norm": 0.4346536695957184, "learning_rate": 0.00010547316705441975, "loss": 1.4245, "step": 36380 }, { "epoch": 0.47275450220338733, "grad_norm": 0.4286676347255707, "learning_rate": 0.00010547056759250834, "loss": 1.5067, "step": 36381 }, { "epoch": 0.47276749674730323, "grad_norm": 0.45711028575897217, "learning_rate": 0.00010546796813059696, "loss": 1.4612, "step": 36382 }, { "epoch": 0.4727804912912191, "grad_norm": 0.3808406889438629, "learning_rate": 0.0001054653686686856, "loss": 1.4254, "step": 36383 }, { "epoch": 0.472793485835135, "grad_norm": 0.3095826804637909, "learning_rate": 0.0001054627692067742, "loss": 1.3425, "step": 36384 }, { "epoch": 0.4728064803790508, "grad_norm": 0.4152957499027252, "learning_rate": 0.00010546016974486282, "loss": 1.4307, "step": 36385 }, { "epoch": 0.4728194749229667, "grad_norm": 0.31122854351997375, "learning_rate": 0.00010545757028295143, "loss": 1.2689, "step": 36386 }, { "epoch": 0.47283246946688257, "grad_norm": 0.3050546646118164, "learning_rate": 0.00010545497082104005, "loss": 1.2804, "step": 36387 }, { "epoch": 0.47284546401079847, "grad_norm": 0.38738322257995605, "learning_rate": 0.00010545237135912866, "loss": 1.337, "step": 36388 }, { "epoch": 0.47285845855471437, "grad_norm": 0.3867758512496948, "learning_rate": 0.00010544977189721727, "loss": 1.438, "step": 36389 }, { "epoch": 0.4728714530986302, "grad_norm": 0.418655663728714, "learning_rate": 0.00010544717243530591, "loss": 1.4436, "step": 36390 }, { "epoch": 0.4728844476425461, "grad_norm": 0.4404284656047821, "learning_rate": 0.00010544457297339452, "loss": 1.5482, "step": 36391 }, { "epoch": 0.47289744218646196, "grad_norm": 0.4149889647960663, "learning_rate": 0.00010544197351148313, "loss": 1.4142, "step": 36392 }, { "epoch": 0.47291043673037786, "grad_norm": 0.42605578899383545, "learning_rate": 0.00010543937404957174, "loss": 1.6962, "step": 36393 }, { "epoch": 0.4729234312742937, "grad_norm": 0.3961978256702423, "learning_rate": 0.00010543677458766037, "loss": 1.4155, "step": 36394 }, { "epoch": 0.4729364258182096, "grad_norm": 0.4792509078979492, "learning_rate": 0.00010543417512574898, "loss": 1.4575, "step": 36395 }, { "epoch": 0.47294942036212545, "grad_norm": 0.3547539710998535, "learning_rate": 0.00010543157566383759, "loss": 1.4044, "step": 36396 }, { "epoch": 0.47296241490604135, "grad_norm": 0.3621586561203003, "learning_rate": 0.0001054289762019262, "loss": 1.1845, "step": 36397 }, { "epoch": 0.4729754094499572, "grad_norm": 0.46153756976127625, "learning_rate": 0.00010542637674001483, "loss": 1.4107, "step": 36398 }, { "epoch": 0.4729884039938731, "grad_norm": 0.3382875621318817, "learning_rate": 0.00010542377727810344, "loss": 1.3004, "step": 36399 }, { "epoch": 0.47300139853778894, "grad_norm": 0.5748820900917053, "learning_rate": 0.00010542117781619205, "loss": 1.5482, "step": 36400 }, { "epoch": 0.47301439308170484, "grad_norm": 0.4077175557613373, "learning_rate": 0.00010541857835428066, "loss": 1.2882, "step": 36401 }, { "epoch": 0.4730273876256207, "grad_norm": 0.45156893134117126, "learning_rate": 0.0001054159788923693, "loss": 1.2087, "step": 36402 }, { "epoch": 0.4730403821695366, "grad_norm": 0.43064963817596436, "learning_rate": 0.00010541337943045791, "loss": 1.5427, "step": 36403 }, { "epoch": 0.47305337671345243, "grad_norm": 0.3077194094657898, "learning_rate": 0.00010541077996854652, "loss": 1.21, "step": 36404 }, { "epoch": 0.47306637125736833, "grad_norm": 0.46736305952072144, "learning_rate": 0.00010540818050663513, "loss": 1.3801, "step": 36405 }, { "epoch": 0.4730793658012842, "grad_norm": 0.4443642497062683, "learning_rate": 0.00010540558104472375, "loss": 1.5445, "step": 36406 }, { "epoch": 0.4730923603452001, "grad_norm": 0.3929472863674164, "learning_rate": 0.00010540298158281236, "loss": 1.2371, "step": 36407 }, { "epoch": 0.4731053548891159, "grad_norm": 0.41135480999946594, "learning_rate": 0.00010540038212090098, "loss": 1.2134, "step": 36408 }, { "epoch": 0.4731183494330318, "grad_norm": 0.3893527388572693, "learning_rate": 0.00010539778265898959, "loss": 1.1874, "step": 36409 }, { "epoch": 0.47313134397694767, "grad_norm": 0.4100229740142822, "learning_rate": 0.00010539518319707822, "loss": 1.3522, "step": 36410 }, { "epoch": 0.47314433852086357, "grad_norm": 0.5223816633224487, "learning_rate": 0.00010539258373516682, "loss": 1.3644, "step": 36411 }, { "epoch": 0.4731573330647794, "grad_norm": 0.5095726847648621, "learning_rate": 0.00010538998427325543, "loss": 1.4357, "step": 36412 }, { "epoch": 0.4731703276086953, "grad_norm": 0.29291126132011414, "learning_rate": 0.00010538738481134404, "loss": 1.1927, "step": 36413 }, { "epoch": 0.47318332215261116, "grad_norm": 0.49224966764450073, "learning_rate": 0.00010538478534943268, "loss": 1.4498, "step": 36414 }, { "epoch": 0.47319631669652706, "grad_norm": 0.3949586749076843, "learning_rate": 0.00010538218588752129, "loss": 1.3013, "step": 36415 }, { "epoch": 0.4732093112404429, "grad_norm": 0.5705249309539795, "learning_rate": 0.0001053795864256099, "loss": 1.2863, "step": 36416 }, { "epoch": 0.4732223057843588, "grad_norm": 0.6028515100479126, "learning_rate": 0.00010537698696369851, "loss": 1.4612, "step": 36417 }, { "epoch": 0.47323530032827466, "grad_norm": 0.5409745573997498, "learning_rate": 0.00010537438750178714, "loss": 1.2278, "step": 36418 }, { "epoch": 0.47324829487219056, "grad_norm": 0.4503304958343506, "learning_rate": 0.00010537178803987575, "loss": 1.661, "step": 36419 }, { "epoch": 0.4732612894161064, "grad_norm": 0.47031888365745544, "learning_rate": 0.00010536918857796436, "loss": 1.2354, "step": 36420 }, { "epoch": 0.4732742839600223, "grad_norm": 0.33157649636268616, "learning_rate": 0.00010536658911605297, "loss": 1.2154, "step": 36421 }, { "epoch": 0.47328727850393815, "grad_norm": 0.3777521848678589, "learning_rate": 0.00010536398965414161, "loss": 1.4627, "step": 36422 }, { "epoch": 0.47330027304785405, "grad_norm": 0.3475722372531891, "learning_rate": 0.00010536139019223021, "loss": 1.4711, "step": 36423 }, { "epoch": 0.4733132675917699, "grad_norm": 0.4043409824371338, "learning_rate": 0.00010535879073031882, "loss": 1.3512, "step": 36424 }, { "epoch": 0.4733262621356858, "grad_norm": 0.46424999833106995, "learning_rate": 0.00010535619126840743, "loss": 1.5604, "step": 36425 }, { "epoch": 0.47333925667960164, "grad_norm": 0.29085108637809753, "learning_rate": 0.00010535359180649607, "loss": 1.4514, "step": 36426 }, { "epoch": 0.47335225122351754, "grad_norm": 0.35116955637931824, "learning_rate": 0.00010535099234458468, "loss": 1.4099, "step": 36427 }, { "epoch": 0.4733652457674334, "grad_norm": 0.35608649253845215, "learning_rate": 0.00010534839288267329, "loss": 1.2669, "step": 36428 }, { "epoch": 0.4733782403113493, "grad_norm": 0.4304165244102478, "learning_rate": 0.00010534579342076191, "loss": 1.394, "step": 36429 }, { "epoch": 0.47339123485526513, "grad_norm": 0.5003201961517334, "learning_rate": 0.00010534319395885052, "loss": 1.4146, "step": 36430 }, { "epoch": 0.47340422939918103, "grad_norm": 0.373423308134079, "learning_rate": 0.00010534059449693914, "loss": 1.2369, "step": 36431 }, { "epoch": 0.4734172239430969, "grad_norm": 0.40230199694633484, "learning_rate": 0.00010533799503502775, "loss": 1.6951, "step": 36432 }, { "epoch": 0.4734302184870128, "grad_norm": 0.41639116406440735, "learning_rate": 0.00010533539557311638, "loss": 1.5146, "step": 36433 }, { "epoch": 0.4734432130309286, "grad_norm": 0.33416110277175903, "learning_rate": 0.000105332796111205, "loss": 1.2722, "step": 36434 }, { "epoch": 0.4734562075748445, "grad_norm": 0.36078763008117676, "learning_rate": 0.0001053301966492936, "loss": 1.5051, "step": 36435 }, { "epoch": 0.47346920211876037, "grad_norm": 0.3371979594230652, "learning_rate": 0.0001053275971873822, "loss": 1.3258, "step": 36436 }, { "epoch": 0.47348219666267627, "grad_norm": 0.3522584140300751, "learning_rate": 0.00010532499772547084, "loss": 1.4431, "step": 36437 }, { "epoch": 0.4734951912065921, "grad_norm": 0.44137823581695557, "learning_rate": 0.00010532239826355945, "loss": 1.3905, "step": 36438 }, { "epoch": 0.473508185750508, "grad_norm": 0.409078985452652, "learning_rate": 0.00010531979880164806, "loss": 1.3691, "step": 36439 }, { "epoch": 0.47352118029442386, "grad_norm": 0.4190751314163208, "learning_rate": 0.00010531719933973667, "loss": 1.2521, "step": 36440 }, { "epoch": 0.47353417483833976, "grad_norm": 0.5178038477897644, "learning_rate": 0.0001053145998778253, "loss": 1.4101, "step": 36441 }, { "epoch": 0.4735471693822556, "grad_norm": 0.37695392966270447, "learning_rate": 0.00010531200041591391, "loss": 1.7097, "step": 36442 }, { "epoch": 0.4735601639261715, "grad_norm": 0.40205806493759155, "learning_rate": 0.00010530940095400252, "loss": 1.3161, "step": 36443 }, { "epoch": 0.47357315847008735, "grad_norm": 0.39444082975387573, "learning_rate": 0.00010530680149209113, "loss": 1.1433, "step": 36444 }, { "epoch": 0.47358615301400325, "grad_norm": 0.5432342290878296, "learning_rate": 0.00010530420203017977, "loss": 1.5462, "step": 36445 }, { "epoch": 0.4735991475579191, "grad_norm": 0.40053340792655945, "learning_rate": 0.00010530160256826838, "loss": 1.2002, "step": 36446 }, { "epoch": 0.473612142101835, "grad_norm": 0.40720003843307495, "learning_rate": 0.00010529900310635699, "loss": 1.3962, "step": 36447 }, { "epoch": 0.47362513664575084, "grad_norm": 0.4189891517162323, "learning_rate": 0.0001052964036444456, "loss": 1.4713, "step": 36448 }, { "epoch": 0.47363813118966674, "grad_norm": 0.3476797342300415, "learning_rate": 0.00010529380418253423, "loss": 1.3817, "step": 36449 }, { "epoch": 0.4736511257335826, "grad_norm": 0.44074803590774536, "learning_rate": 0.00010529120472062284, "loss": 1.2675, "step": 36450 }, { "epoch": 0.4736641202774985, "grad_norm": 0.34805727005004883, "learning_rate": 0.00010528860525871145, "loss": 1.2348, "step": 36451 }, { "epoch": 0.47367711482141434, "grad_norm": 0.40573394298553467, "learning_rate": 0.00010528600579680006, "loss": 1.3978, "step": 36452 }, { "epoch": 0.47369010936533024, "grad_norm": 0.41177698969841003, "learning_rate": 0.00010528340633488868, "loss": 1.2685, "step": 36453 }, { "epoch": 0.4737031039092461, "grad_norm": 0.41897687315940857, "learning_rate": 0.0001052808068729773, "loss": 1.3308, "step": 36454 }, { "epoch": 0.473716098453162, "grad_norm": 0.33357471227645874, "learning_rate": 0.0001052782074110659, "loss": 1.3519, "step": 36455 }, { "epoch": 0.4737290929970778, "grad_norm": 0.4196988344192505, "learning_rate": 0.00010527560794915452, "loss": 1.4598, "step": 36456 }, { "epoch": 0.47374208754099373, "grad_norm": 0.4668128788471222, "learning_rate": 0.00010527300848724316, "loss": 1.5618, "step": 36457 }, { "epoch": 0.4737550820849096, "grad_norm": 0.40311822295188904, "learning_rate": 0.00010527040902533177, "loss": 1.4825, "step": 36458 }, { "epoch": 0.4737680766288255, "grad_norm": 0.3562118411064148, "learning_rate": 0.00010526780956342038, "loss": 1.2388, "step": 36459 }, { "epoch": 0.4737810711727413, "grad_norm": 0.43443742394447327, "learning_rate": 0.00010526521010150899, "loss": 1.4315, "step": 36460 }, { "epoch": 0.4737940657166572, "grad_norm": 0.36753880977630615, "learning_rate": 0.00010526261063959761, "loss": 1.2069, "step": 36461 }, { "epoch": 0.47380706026057307, "grad_norm": 0.38647881150245667, "learning_rate": 0.00010526001117768622, "loss": 1.2645, "step": 36462 }, { "epoch": 0.47382005480448897, "grad_norm": 0.4401029050350189, "learning_rate": 0.00010525741171577483, "loss": 1.4452, "step": 36463 }, { "epoch": 0.4738330493484048, "grad_norm": 0.41859200596809387, "learning_rate": 0.00010525481225386347, "loss": 1.2808, "step": 36464 }, { "epoch": 0.4738460438923207, "grad_norm": 0.39292094111442566, "learning_rate": 0.00010525221279195207, "loss": 1.5636, "step": 36465 }, { "epoch": 0.4738590384362366, "grad_norm": 0.38070419430732727, "learning_rate": 0.00010524961333004068, "loss": 1.5081, "step": 36466 }, { "epoch": 0.47387203298015246, "grad_norm": 0.2946373522281647, "learning_rate": 0.00010524701386812929, "loss": 1.2769, "step": 36467 }, { "epoch": 0.47388502752406836, "grad_norm": 0.5027557015419006, "learning_rate": 0.00010524441440621793, "loss": 1.5535, "step": 36468 }, { "epoch": 0.4738980220679842, "grad_norm": 0.42623329162597656, "learning_rate": 0.00010524181494430654, "loss": 1.3172, "step": 36469 }, { "epoch": 0.4739110166119001, "grad_norm": 0.3594422936439514, "learning_rate": 0.00010523921548239515, "loss": 1.5453, "step": 36470 }, { "epoch": 0.47392401115581595, "grad_norm": 0.40100613236427307, "learning_rate": 0.00010523661602048376, "loss": 1.274, "step": 36471 }, { "epoch": 0.47393700569973185, "grad_norm": 0.4887884557247162, "learning_rate": 0.00010523401655857239, "loss": 1.4617, "step": 36472 }, { "epoch": 0.4739500002436477, "grad_norm": 0.3954545259475708, "learning_rate": 0.000105231417096661, "loss": 1.3242, "step": 36473 }, { "epoch": 0.4739629947875636, "grad_norm": 0.4259144961833954, "learning_rate": 0.00010522881763474961, "loss": 1.2899, "step": 36474 }, { "epoch": 0.47397598933147944, "grad_norm": 0.3954712152481079, "learning_rate": 0.00010522621817283822, "loss": 1.4941, "step": 36475 }, { "epoch": 0.47398898387539534, "grad_norm": 0.2992852032184601, "learning_rate": 0.00010522361871092686, "loss": 1.2807, "step": 36476 }, { "epoch": 0.4740019784193112, "grad_norm": 0.41336286067962646, "learning_rate": 0.00010522101924901547, "loss": 1.5216, "step": 36477 }, { "epoch": 0.4740149729632271, "grad_norm": 0.4363195598125458, "learning_rate": 0.00010521841978710407, "loss": 1.2006, "step": 36478 }, { "epoch": 0.47402796750714293, "grad_norm": 0.4402675926685333, "learning_rate": 0.00010521582032519268, "loss": 1.3084, "step": 36479 }, { "epoch": 0.47404096205105883, "grad_norm": 0.40029609203338623, "learning_rate": 0.00010521322086328131, "loss": 1.2702, "step": 36480 }, { "epoch": 0.4740539565949747, "grad_norm": 0.4639246463775635, "learning_rate": 0.00010521062140136993, "loss": 1.3475, "step": 36481 }, { "epoch": 0.4740669511388906, "grad_norm": 0.38363412022590637, "learning_rate": 0.00010520802193945854, "loss": 1.4488, "step": 36482 }, { "epoch": 0.4740799456828064, "grad_norm": 0.37412697076797485, "learning_rate": 0.00010520542247754715, "loss": 1.4547, "step": 36483 }, { "epoch": 0.4740929402267223, "grad_norm": 0.3534381091594696, "learning_rate": 0.00010520282301563577, "loss": 1.27, "step": 36484 }, { "epoch": 0.47410593477063817, "grad_norm": 0.2850721478462219, "learning_rate": 0.00010520022355372438, "loss": 1.0935, "step": 36485 }, { "epoch": 0.47411892931455407, "grad_norm": 0.31452807784080505, "learning_rate": 0.000105197624091813, "loss": 1.4362, "step": 36486 }, { "epoch": 0.4741319238584699, "grad_norm": 0.4571937918663025, "learning_rate": 0.0001051950246299016, "loss": 1.4426, "step": 36487 }, { "epoch": 0.4741449184023858, "grad_norm": 0.35101813077926636, "learning_rate": 0.00010519242516799024, "loss": 1.3846, "step": 36488 }, { "epoch": 0.47415791294630166, "grad_norm": 0.3200477957725525, "learning_rate": 0.00010518982570607885, "loss": 1.3804, "step": 36489 }, { "epoch": 0.47417090749021756, "grad_norm": 0.5071653127670288, "learning_rate": 0.00010518722624416746, "loss": 1.3948, "step": 36490 }, { "epoch": 0.4741839020341334, "grad_norm": 0.40244919061660767, "learning_rate": 0.00010518462678225606, "loss": 1.3707, "step": 36491 }, { "epoch": 0.4741968965780493, "grad_norm": 0.41514334082603455, "learning_rate": 0.0001051820273203447, "loss": 1.5459, "step": 36492 }, { "epoch": 0.47420989112196515, "grad_norm": 0.48066726326942444, "learning_rate": 0.00010517942785843331, "loss": 1.4711, "step": 36493 }, { "epoch": 0.47422288566588106, "grad_norm": 0.30234599113464355, "learning_rate": 0.00010517682839652192, "loss": 1.3041, "step": 36494 }, { "epoch": 0.4742358802097969, "grad_norm": 0.3929380774497986, "learning_rate": 0.00010517422893461053, "loss": 1.5004, "step": 36495 }, { "epoch": 0.4742488747537128, "grad_norm": 0.43211257457733154, "learning_rate": 0.00010517162947269916, "loss": 1.4852, "step": 36496 }, { "epoch": 0.47426186929762865, "grad_norm": 0.3946191966533661, "learning_rate": 0.00010516903001078777, "loss": 1.4782, "step": 36497 }, { "epoch": 0.47427486384154455, "grad_norm": 0.442671000957489, "learning_rate": 0.00010516643054887638, "loss": 1.3607, "step": 36498 }, { "epoch": 0.4742878583854604, "grad_norm": 0.3253670036792755, "learning_rate": 0.00010516383108696499, "loss": 1.4483, "step": 36499 }, { "epoch": 0.4743008529293763, "grad_norm": 0.4677087068557739, "learning_rate": 0.00010516123162505363, "loss": 1.3958, "step": 36500 }, { "epoch": 0.47431384747329214, "grad_norm": 0.33318865299224854, "learning_rate": 0.00010515863216314224, "loss": 1.3909, "step": 36501 }, { "epoch": 0.47432684201720804, "grad_norm": 0.23662245273590088, "learning_rate": 0.00010515603270123085, "loss": 1.0205, "step": 36502 }, { "epoch": 0.4743398365611239, "grad_norm": 0.30335503816604614, "learning_rate": 0.00010515343323931947, "loss": 1.0193, "step": 36503 }, { "epoch": 0.4743528311050398, "grad_norm": 0.39821380376815796, "learning_rate": 0.00010515083377740809, "loss": 1.2421, "step": 36504 }, { "epoch": 0.47436582564895563, "grad_norm": 0.4266816973686218, "learning_rate": 0.0001051482343154967, "loss": 1.2585, "step": 36505 }, { "epoch": 0.47437882019287153, "grad_norm": 0.44197338819503784, "learning_rate": 0.00010514563485358531, "loss": 1.3504, "step": 36506 }, { "epoch": 0.4743918147367874, "grad_norm": 0.3546936511993408, "learning_rate": 0.00010514303539167393, "loss": 1.2846, "step": 36507 }, { "epoch": 0.4744048092807033, "grad_norm": 0.4064730107784271, "learning_rate": 0.00010514043592976254, "loss": 1.3243, "step": 36508 }, { "epoch": 0.4744178038246191, "grad_norm": 0.42016807198524475, "learning_rate": 0.00010513783646785115, "loss": 1.3705, "step": 36509 }, { "epoch": 0.474430798368535, "grad_norm": 0.4166208505630493, "learning_rate": 0.00010513523700593976, "loss": 1.3553, "step": 36510 }, { "epoch": 0.47444379291245087, "grad_norm": 0.3893529772758484, "learning_rate": 0.0001051326375440284, "loss": 1.4648, "step": 36511 }, { "epoch": 0.47445678745636677, "grad_norm": 0.4707178473472595, "learning_rate": 0.00010513003808211701, "loss": 1.3906, "step": 36512 }, { "epoch": 0.4744697820002826, "grad_norm": 0.3569815158843994, "learning_rate": 0.00010512743862020562, "loss": 1.2468, "step": 36513 }, { "epoch": 0.4744827765441985, "grad_norm": 0.5508162379264832, "learning_rate": 0.00010512483915829424, "loss": 1.5861, "step": 36514 }, { "epoch": 0.47449577108811436, "grad_norm": 0.47720858454704285, "learning_rate": 0.00010512223969638286, "loss": 1.4275, "step": 36515 }, { "epoch": 0.47450876563203026, "grad_norm": 0.492017924785614, "learning_rate": 0.00010511964023447147, "loss": 1.4554, "step": 36516 }, { "epoch": 0.4745217601759461, "grad_norm": 0.3388065695762634, "learning_rate": 0.00010511704077256008, "loss": 1.5623, "step": 36517 }, { "epoch": 0.474534754719862, "grad_norm": 0.490404337644577, "learning_rate": 0.00010511444131064869, "loss": 1.3821, "step": 36518 }, { "epoch": 0.47454774926377785, "grad_norm": 0.452975869178772, "learning_rate": 0.00010511184184873733, "loss": 1.454, "step": 36519 }, { "epoch": 0.47456074380769375, "grad_norm": 0.43254348635673523, "learning_rate": 0.00010510924238682593, "loss": 1.3016, "step": 36520 }, { "epoch": 0.4745737383516096, "grad_norm": 0.4077914357185364, "learning_rate": 0.00010510664292491454, "loss": 1.3422, "step": 36521 }, { "epoch": 0.4745867328955255, "grad_norm": 0.49535638093948364, "learning_rate": 0.00010510404346300315, "loss": 1.3262, "step": 36522 }, { "epoch": 0.47459972743944134, "grad_norm": 0.49975574016571045, "learning_rate": 0.00010510144400109179, "loss": 1.5343, "step": 36523 }, { "epoch": 0.47461272198335724, "grad_norm": 0.31914758682250977, "learning_rate": 0.0001050988445391804, "loss": 1.2919, "step": 36524 }, { "epoch": 0.4746257165272731, "grad_norm": 0.38303831219673157, "learning_rate": 0.00010509624507726901, "loss": 1.4184, "step": 36525 }, { "epoch": 0.474638711071189, "grad_norm": 0.38439470529556274, "learning_rate": 0.00010509364561535762, "loss": 1.5687, "step": 36526 }, { "epoch": 0.47465170561510484, "grad_norm": 0.3509228527545929, "learning_rate": 0.00010509104615344625, "loss": 1.4026, "step": 36527 }, { "epoch": 0.47466470015902074, "grad_norm": 0.3775359094142914, "learning_rate": 0.00010508844669153486, "loss": 1.375, "step": 36528 }, { "epoch": 0.4746776947029366, "grad_norm": 0.4158408045768738, "learning_rate": 0.00010508584722962347, "loss": 1.4074, "step": 36529 }, { "epoch": 0.4746906892468525, "grad_norm": 0.3964065611362457, "learning_rate": 0.00010508324776771208, "loss": 1.3681, "step": 36530 }, { "epoch": 0.4747036837907683, "grad_norm": 0.3993860185146332, "learning_rate": 0.00010508064830580072, "loss": 1.2859, "step": 36531 }, { "epoch": 0.47471667833468423, "grad_norm": 0.4389224648475647, "learning_rate": 0.00010507804884388933, "loss": 1.4054, "step": 36532 }, { "epoch": 0.4747296728786001, "grad_norm": 0.37321236729621887, "learning_rate": 0.00010507544938197792, "loss": 1.5025, "step": 36533 }, { "epoch": 0.474742667422516, "grad_norm": 0.49972763657569885, "learning_rate": 0.00010507284992006654, "loss": 1.409, "step": 36534 }, { "epoch": 0.4747556619664318, "grad_norm": 0.43644779920578003, "learning_rate": 0.00010507025045815517, "loss": 1.4305, "step": 36535 }, { "epoch": 0.4747686565103477, "grad_norm": 0.3297608196735382, "learning_rate": 0.00010506765099624378, "loss": 1.3592, "step": 36536 }, { "epoch": 0.47478165105426356, "grad_norm": 0.4776918292045593, "learning_rate": 0.0001050650515343324, "loss": 1.4418, "step": 36537 }, { "epoch": 0.47479464559817947, "grad_norm": 0.4637238681316376, "learning_rate": 0.00010506245207242102, "loss": 1.3202, "step": 36538 }, { "epoch": 0.4748076401420953, "grad_norm": 0.35447439551353455, "learning_rate": 0.00010505985261050963, "loss": 1.5826, "step": 36539 }, { "epoch": 0.4748206346860112, "grad_norm": 0.5458242297172546, "learning_rate": 0.00010505725314859824, "loss": 1.548, "step": 36540 }, { "epoch": 0.47483362922992706, "grad_norm": 0.5168476104736328, "learning_rate": 0.00010505465368668685, "loss": 1.4047, "step": 36541 }, { "epoch": 0.47484662377384296, "grad_norm": 0.4175431728363037, "learning_rate": 0.00010505205422477549, "loss": 1.5352, "step": 36542 }, { "epoch": 0.47485961831775886, "grad_norm": 0.45470473170280457, "learning_rate": 0.0001050494547628641, "loss": 1.4061, "step": 36543 }, { "epoch": 0.4748726128616747, "grad_norm": 0.43922320008277893, "learning_rate": 0.00010504685530095271, "loss": 1.2992, "step": 36544 }, { "epoch": 0.4748856074055906, "grad_norm": 0.3885059356689453, "learning_rate": 0.00010504425583904131, "loss": 1.242, "step": 36545 }, { "epoch": 0.47489860194950645, "grad_norm": 0.2863497734069824, "learning_rate": 0.00010504165637712995, "loss": 1.1572, "step": 36546 }, { "epoch": 0.47491159649342235, "grad_norm": 0.39101648330688477, "learning_rate": 0.00010503905691521856, "loss": 1.3434, "step": 36547 }, { "epoch": 0.4749245910373382, "grad_norm": 0.41483575105667114, "learning_rate": 0.00010503645745330717, "loss": 1.5211, "step": 36548 }, { "epoch": 0.4749375855812541, "grad_norm": 0.4933433532714844, "learning_rate": 0.00010503385799139578, "loss": 1.3986, "step": 36549 }, { "epoch": 0.47495058012516994, "grad_norm": 0.37598884105682373, "learning_rate": 0.0001050312585294844, "loss": 1.3217, "step": 36550 }, { "epoch": 0.47496357466908584, "grad_norm": 0.3773234188556671, "learning_rate": 0.00010502865906757302, "loss": 1.328, "step": 36551 }, { "epoch": 0.4749765692130017, "grad_norm": 0.3681739866733551, "learning_rate": 0.00010502605960566163, "loss": 1.2298, "step": 36552 }, { "epoch": 0.4749895637569176, "grad_norm": 0.48149263858795166, "learning_rate": 0.00010502346014375024, "loss": 1.548, "step": 36553 }, { "epoch": 0.47500255830083343, "grad_norm": 0.4425208568572998, "learning_rate": 0.00010502086068183888, "loss": 1.4054, "step": 36554 }, { "epoch": 0.47501555284474933, "grad_norm": 0.4074486792087555, "learning_rate": 0.00010501826121992749, "loss": 1.4345, "step": 36555 }, { "epoch": 0.4750285473886652, "grad_norm": 0.3843323588371277, "learning_rate": 0.0001050156617580161, "loss": 1.286, "step": 36556 }, { "epoch": 0.4750415419325811, "grad_norm": 0.22954733669757843, "learning_rate": 0.00010501306229610471, "loss": 1.0923, "step": 36557 }, { "epoch": 0.4750545364764969, "grad_norm": 0.31725403666496277, "learning_rate": 0.00010501046283419333, "loss": 1.1898, "step": 36558 }, { "epoch": 0.4750675310204128, "grad_norm": 0.41434192657470703, "learning_rate": 0.00010500786337228194, "loss": 1.2978, "step": 36559 }, { "epoch": 0.47508052556432867, "grad_norm": 0.4388503432273865, "learning_rate": 0.00010500526391037056, "loss": 1.412, "step": 36560 }, { "epoch": 0.47509352010824457, "grad_norm": 0.34441328048706055, "learning_rate": 0.00010500266444845917, "loss": 1.6324, "step": 36561 }, { "epoch": 0.4751065146521604, "grad_norm": 0.32147669792175293, "learning_rate": 0.00010500006498654779, "loss": 1.1815, "step": 36562 }, { "epoch": 0.4751195091960763, "grad_norm": 0.6119058132171631, "learning_rate": 0.0001049974655246364, "loss": 1.4997, "step": 36563 }, { "epoch": 0.47513250373999216, "grad_norm": 0.28984853625297546, "learning_rate": 0.00010499486606272501, "loss": 1.1602, "step": 36564 }, { "epoch": 0.47514549828390806, "grad_norm": 0.39859700202941895, "learning_rate": 0.00010499226660081362, "loss": 1.5069, "step": 36565 }, { "epoch": 0.4751584928278239, "grad_norm": 0.3677665591239929, "learning_rate": 0.00010498966713890226, "loss": 1.4703, "step": 36566 }, { "epoch": 0.4751714873717398, "grad_norm": 0.32948583364486694, "learning_rate": 0.00010498706767699087, "loss": 1.185, "step": 36567 }, { "epoch": 0.47518448191565565, "grad_norm": 0.34933292865753174, "learning_rate": 0.00010498446821507948, "loss": 1.1793, "step": 36568 }, { "epoch": 0.47519747645957156, "grad_norm": 0.33874472975730896, "learning_rate": 0.0001049818687531681, "loss": 1.1813, "step": 36569 }, { "epoch": 0.4752104710034874, "grad_norm": 0.3756799101829529, "learning_rate": 0.00010497926929125672, "loss": 1.5125, "step": 36570 }, { "epoch": 0.4752234655474033, "grad_norm": 0.30656367540359497, "learning_rate": 0.00010497666982934533, "loss": 1.1415, "step": 36571 }, { "epoch": 0.47523646009131915, "grad_norm": 0.3955113887786865, "learning_rate": 0.00010497407036743394, "loss": 1.4206, "step": 36572 }, { "epoch": 0.47524945463523505, "grad_norm": 0.45511651039123535, "learning_rate": 0.00010497147090552255, "loss": 1.5368, "step": 36573 }, { "epoch": 0.4752624491791509, "grad_norm": 0.43069085478782654, "learning_rate": 0.00010496887144361119, "loss": 1.2757, "step": 36574 }, { "epoch": 0.4752754437230668, "grad_norm": 0.35471856594085693, "learning_rate": 0.00010496627198169979, "loss": 1.3408, "step": 36575 }, { "epoch": 0.47528843826698264, "grad_norm": 0.3758014142513275, "learning_rate": 0.0001049636725197884, "loss": 1.2191, "step": 36576 }, { "epoch": 0.47530143281089854, "grad_norm": 0.46816834807395935, "learning_rate": 0.00010496107305787704, "loss": 1.366, "step": 36577 }, { "epoch": 0.4753144273548144, "grad_norm": 0.305191308259964, "learning_rate": 0.00010495847359596565, "loss": 1.4379, "step": 36578 }, { "epoch": 0.4753274218987303, "grad_norm": 0.46390971541404724, "learning_rate": 0.00010495587413405426, "loss": 1.3382, "step": 36579 }, { "epoch": 0.47534041644264613, "grad_norm": 0.4372372031211853, "learning_rate": 0.00010495327467214287, "loss": 1.413, "step": 36580 }, { "epoch": 0.47535341098656203, "grad_norm": 0.4454105496406555, "learning_rate": 0.0001049506752102315, "loss": 1.3159, "step": 36581 }, { "epoch": 0.4753664055304779, "grad_norm": 0.49519380927085876, "learning_rate": 0.0001049480757483201, "loss": 1.4488, "step": 36582 }, { "epoch": 0.4753794000743938, "grad_norm": 0.41635656356811523, "learning_rate": 0.00010494547628640872, "loss": 1.3412, "step": 36583 }, { "epoch": 0.4753923946183096, "grad_norm": 0.392839252948761, "learning_rate": 0.00010494287682449733, "loss": 1.2337, "step": 36584 }, { "epoch": 0.4754053891622255, "grad_norm": 0.44731637835502625, "learning_rate": 0.00010494027736258596, "loss": 1.5325, "step": 36585 }, { "epoch": 0.47541838370614137, "grad_norm": 0.39682236313819885, "learning_rate": 0.00010493767790067458, "loss": 1.494, "step": 36586 }, { "epoch": 0.47543137825005727, "grad_norm": 0.399547278881073, "learning_rate": 0.00010493507843876317, "loss": 1.486, "step": 36587 }, { "epoch": 0.4754443727939731, "grad_norm": 0.3670264482498169, "learning_rate": 0.00010493247897685178, "loss": 1.5844, "step": 36588 }, { "epoch": 0.475457367337889, "grad_norm": 0.4040701389312744, "learning_rate": 0.00010492987951494042, "loss": 1.5111, "step": 36589 }, { "epoch": 0.47547036188180486, "grad_norm": 0.47769248485565186, "learning_rate": 0.00010492728005302903, "loss": 1.4891, "step": 36590 }, { "epoch": 0.47548335642572076, "grad_norm": 0.42718306183815, "learning_rate": 0.00010492468059111764, "loss": 1.5126, "step": 36591 }, { "epoch": 0.4754963509696366, "grad_norm": 0.3339137136936188, "learning_rate": 0.00010492208112920625, "loss": 1.2913, "step": 36592 }, { "epoch": 0.4755093455135525, "grad_norm": 0.35551586747169495, "learning_rate": 0.00010491948166729488, "loss": 1.4391, "step": 36593 }, { "epoch": 0.47552234005746835, "grad_norm": 0.542618453502655, "learning_rate": 0.00010491688220538349, "loss": 1.349, "step": 36594 }, { "epoch": 0.47553533460138425, "grad_norm": 0.3384997546672821, "learning_rate": 0.0001049142827434721, "loss": 1.2489, "step": 36595 }, { "epoch": 0.4755483291453001, "grad_norm": 0.3902345895767212, "learning_rate": 0.00010491168328156071, "loss": 1.3553, "step": 36596 }, { "epoch": 0.475561323689216, "grad_norm": 0.46782544255256653, "learning_rate": 0.00010490908381964935, "loss": 1.5085, "step": 36597 }, { "epoch": 0.47557431823313184, "grad_norm": 0.33022165298461914, "learning_rate": 0.00010490648435773796, "loss": 1.2355, "step": 36598 }, { "epoch": 0.47558731277704774, "grad_norm": 0.46046265959739685, "learning_rate": 0.00010490388489582657, "loss": 1.5841, "step": 36599 }, { "epoch": 0.4756003073209636, "grad_norm": 0.3958882987499237, "learning_rate": 0.00010490128543391517, "loss": 1.3544, "step": 36600 }, { "epoch": 0.4756133018648795, "grad_norm": 0.38472363352775574, "learning_rate": 0.00010489868597200381, "loss": 1.3404, "step": 36601 }, { "epoch": 0.47562629640879533, "grad_norm": 0.3318668305873871, "learning_rate": 0.00010489608651009242, "loss": 1.4403, "step": 36602 }, { "epoch": 0.47563929095271124, "grad_norm": 0.35916343331336975, "learning_rate": 0.00010489348704818103, "loss": 1.3123, "step": 36603 }, { "epoch": 0.4756522854966271, "grad_norm": 0.39827048778533936, "learning_rate": 0.00010489088758626964, "loss": 1.5645, "step": 36604 }, { "epoch": 0.475665280040543, "grad_norm": 0.43854910135269165, "learning_rate": 0.00010488828812435826, "loss": 1.2894, "step": 36605 }, { "epoch": 0.4756782745844588, "grad_norm": 0.4121466279029846, "learning_rate": 0.00010488568866244688, "loss": 1.4554, "step": 36606 }, { "epoch": 0.4756912691283747, "grad_norm": 0.35306692123413086, "learning_rate": 0.00010488308920053549, "loss": 1.1623, "step": 36607 }, { "epoch": 0.4757042636722906, "grad_norm": 0.34602034091949463, "learning_rate": 0.0001048804897386241, "loss": 1.345, "step": 36608 }, { "epoch": 0.4757172582162065, "grad_norm": 0.32779160141944885, "learning_rate": 0.00010487789027671273, "loss": 1.4213, "step": 36609 }, { "epoch": 0.4757302527601223, "grad_norm": 0.3613099455833435, "learning_rate": 0.00010487529081480135, "loss": 1.4689, "step": 36610 }, { "epoch": 0.4757432473040382, "grad_norm": 0.39151838421821594, "learning_rate": 0.00010487269135288996, "loss": 1.5746, "step": 36611 }, { "epoch": 0.47575624184795406, "grad_norm": 0.4524723291397095, "learning_rate": 0.00010487009189097858, "loss": 1.3822, "step": 36612 }, { "epoch": 0.47576923639186997, "grad_norm": 0.3835708200931549, "learning_rate": 0.00010486749242906719, "loss": 1.3472, "step": 36613 }, { "epoch": 0.4757822309357858, "grad_norm": 0.4190136194229126, "learning_rate": 0.0001048648929671558, "loss": 1.6112, "step": 36614 }, { "epoch": 0.4757952254797017, "grad_norm": 0.39557453989982605, "learning_rate": 0.00010486229350524441, "loss": 1.4293, "step": 36615 }, { "epoch": 0.47580822002361756, "grad_norm": 0.4559950530529022, "learning_rate": 0.00010485969404333305, "loss": 1.4053, "step": 36616 }, { "epoch": 0.47582121456753346, "grad_norm": 0.4888871908187866, "learning_rate": 0.00010485709458142165, "loss": 1.34, "step": 36617 }, { "epoch": 0.47583420911144936, "grad_norm": 0.5446009635925293, "learning_rate": 0.00010485449511951026, "loss": 1.3928, "step": 36618 }, { "epoch": 0.4758472036553652, "grad_norm": 0.4229432940483093, "learning_rate": 0.00010485189565759887, "loss": 1.5064, "step": 36619 }, { "epoch": 0.4758601981992811, "grad_norm": 0.3369017541408539, "learning_rate": 0.00010484929619568751, "loss": 1.558, "step": 36620 }, { "epoch": 0.47587319274319695, "grad_norm": 0.31404536962509155, "learning_rate": 0.00010484669673377612, "loss": 1.2749, "step": 36621 }, { "epoch": 0.47588618728711285, "grad_norm": 0.3842542767524719, "learning_rate": 0.00010484409727186473, "loss": 1.3331, "step": 36622 }, { "epoch": 0.4758991818310287, "grad_norm": 0.35615262389183044, "learning_rate": 0.00010484149780995334, "loss": 1.2082, "step": 36623 }, { "epoch": 0.4759121763749446, "grad_norm": 0.42282161116600037, "learning_rate": 0.00010483889834804197, "loss": 1.5044, "step": 36624 }, { "epoch": 0.47592517091886044, "grad_norm": 0.3108629882335663, "learning_rate": 0.00010483629888613058, "loss": 1.4641, "step": 36625 }, { "epoch": 0.47593816546277634, "grad_norm": 0.477609783411026, "learning_rate": 0.00010483369942421919, "loss": 1.3815, "step": 36626 }, { "epoch": 0.4759511600066922, "grad_norm": 0.3897896707057953, "learning_rate": 0.0001048310999623078, "loss": 1.3263, "step": 36627 }, { "epoch": 0.4759641545506081, "grad_norm": 0.3767321705818176, "learning_rate": 0.00010482850050039644, "loss": 1.5048, "step": 36628 }, { "epoch": 0.47597714909452393, "grad_norm": 0.4252581298351288, "learning_rate": 0.00010482590103848503, "loss": 1.4949, "step": 36629 }, { "epoch": 0.47599014363843983, "grad_norm": 0.3715384602546692, "learning_rate": 0.00010482330157657365, "loss": 1.4018, "step": 36630 }, { "epoch": 0.4760031381823557, "grad_norm": 0.43687939643859863, "learning_rate": 0.00010482070211466226, "loss": 1.3171, "step": 36631 }, { "epoch": 0.4760161327262716, "grad_norm": 0.3893674910068512, "learning_rate": 0.0001048181026527509, "loss": 1.2424, "step": 36632 }, { "epoch": 0.4760291272701874, "grad_norm": 0.3173537254333496, "learning_rate": 0.0001048155031908395, "loss": 1.2627, "step": 36633 }, { "epoch": 0.4760421218141033, "grad_norm": 0.37747469544410706, "learning_rate": 0.00010481290372892812, "loss": 1.2943, "step": 36634 }, { "epoch": 0.47605511635801917, "grad_norm": 0.38996821641921997, "learning_rate": 0.00010481030426701673, "loss": 1.3556, "step": 36635 }, { "epoch": 0.47606811090193507, "grad_norm": 0.4663032591342926, "learning_rate": 0.00010480770480510535, "loss": 1.6733, "step": 36636 }, { "epoch": 0.4760811054458509, "grad_norm": 0.3623739778995514, "learning_rate": 0.00010480510534319396, "loss": 1.4272, "step": 36637 }, { "epoch": 0.4760940999897668, "grad_norm": 0.39605262875556946, "learning_rate": 0.00010480250588128257, "loss": 1.4316, "step": 36638 }, { "epoch": 0.47610709453368266, "grad_norm": 0.42283758521080017, "learning_rate": 0.00010479990641937118, "loss": 1.3129, "step": 36639 }, { "epoch": 0.47612008907759856, "grad_norm": 0.3668553829193115, "learning_rate": 0.00010479730695745982, "loss": 1.5104, "step": 36640 }, { "epoch": 0.4761330836215144, "grad_norm": 0.38374996185302734, "learning_rate": 0.00010479470749554843, "loss": 1.446, "step": 36641 }, { "epoch": 0.4761460781654303, "grad_norm": 0.45210209488868713, "learning_rate": 0.00010479210803363703, "loss": 1.2748, "step": 36642 }, { "epoch": 0.47615907270934615, "grad_norm": 0.5122808218002319, "learning_rate": 0.00010478950857172564, "loss": 1.4623, "step": 36643 }, { "epoch": 0.47617206725326205, "grad_norm": 0.42158788442611694, "learning_rate": 0.00010478690910981428, "loss": 1.4949, "step": 36644 }, { "epoch": 0.4761850617971779, "grad_norm": 0.37460780143737793, "learning_rate": 0.00010478430964790289, "loss": 1.4465, "step": 36645 }, { "epoch": 0.4761980563410938, "grad_norm": 0.4514116942882538, "learning_rate": 0.0001047817101859915, "loss": 1.47, "step": 36646 }, { "epoch": 0.47621105088500965, "grad_norm": 0.37568414211273193, "learning_rate": 0.00010477911072408011, "loss": 1.2241, "step": 36647 }, { "epoch": 0.47622404542892555, "grad_norm": 0.42487233877182007, "learning_rate": 0.00010477651126216874, "loss": 1.3388, "step": 36648 }, { "epoch": 0.4762370399728414, "grad_norm": 0.4071959853172302, "learning_rate": 0.00010477391180025735, "loss": 1.4931, "step": 36649 }, { "epoch": 0.4762500345167573, "grad_norm": 0.3203359842300415, "learning_rate": 0.00010477131233834596, "loss": 1.2717, "step": 36650 }, { "epoch": 0.47626302906067314, "grad_norm": 0.46125444769859314, "learning_rate": 0.0001047687128764346, "loss": 1.2407, "step": 36651 }, { "epoch": 0.47627602360458904, "grad_norm": 0.3418574035167694, "learning_rate": 0.00010476611341452321, "loss": 1.2622, "step": 36652 }, { "epoch": 0.4762890181485049, "grad_norm": 0.32993462681770325, "learning_rate": 0.00010476351395261182, "loss": 1.4335, "step": 36653 }, { "epoch": 0.4763020126924208, "grad_norm": 0.4010336697101593, "learning_rate": 0.00010476091449070043, "loss": 1.5089, "step": 36654 }, { "epoch": 0.47631500723633663, "grad_norm": 0.3179466128349304, "learning_rate": 0.00010475831502878905, "loss": 1.3824, "step": 36655 }, { "epoch": 0.47632800178025253, "grad_norm": 0.4837910830974579, "learning_rate": 0.00010475571556687767, "loss": 1.5247, "step": 36656 }, { "epoch": 0.4763409963241684, "grad_norm": 0.3144797682762146, "learning_rate": 0.00010475311610496628, "loss": 1.2134, "step": 36657 }, { "epoch": 0.4763539908680843, "grad_norm": 0.441284716129303, "learning_rate": 0.00010475051664305489, "loss": 1.557, "step": 36658 }, { "epoch": 0.4763669854120001, "grad_norm": 0.36776813864707947, "learning_rate": 0.00010474791718114351, "loss": 1.5993, "step": 36659 }, { "epoch": 0.476379979955916, "grad_norm": 0.43245264887809753, "learning_rate": 0.00010474531771923212, "loss": 1.4374, "step": 36660 }, { "epoch": 0.47639297449983187, "grad_norm": 0.3009413480758667, "learning_rate": 0.00010474271825732073, "loss": 1.3869, "step": 36661 }, { "epoch": 0.47640596904374777, "grad_norm": 0.44934988021850586, "learning_rate": 0.00010474011879540934, "loss": 1.4371, "step": 36662 }, { "epoch": 0.4764189635876636, "grad_norm": 0.29567110538482666, "learning_rate": 0.00010473751933349798, "loss": 1.2406, "step": 36663 }, { "epoch": 0.4764319581315795, "grad_norm": 0.43415939807891846, "learning_rate": 0.0001047349198715866, "loss": 1.4056, "step": 36664 }, { "epoch": 0.47644495267549536, "grad_norm": 0.4963655471801758, "learning_rate": 0.0001047323204096752, "loss": 1.5326, "step": 36665 }, { "epoch": 0.47645794721941126, "grad_norm": 0.3924178183078766, "learning_rate": 0.00010472972094776382, "loss": 1.6769, "step": 36666 }, { "epoch": 0.4764709417633271, "grad_norm": 0.3905147910118103, "learning_rate": 0.00010472712148585244, "loss": 1.3099, "step": 36667 }, { "epoch": 0.476483936307243, "grad_norm": 0.3200380802154541, "learning_rate": 0.00010472452202394105, "loss": 1.4758, "step": 36668 }, { "epoch": 0.47649693085115885, "grad_norm": 0.41207966208457947, "learning_rate": 0.00010472192256202966, "loss": 1.3918, "step": 36669 }, { "epoch": 0.47650992539507475, "grad_norm": 0.4541751742362976, "learning_rate": 0.00010471932310011827, "loss": 1.4424, "step": 36670 }, { "epoch": 0.4765229199389906, "grad_norm": 0.4057697653770447, "learning_rate": 0.0001047167236382069, "loss": 1.3574, "step": 36671 }, { "epoch": 0.4765359144829065, "grad_norm": 0.3971586525440216, "learning_rate": 0.00010471412417629551, "loss": 1.4402, "step": 36672 }, { "epoch": 0.47654890902682234, "grad_norm": 0.3684231638908386, "learning_rate": 0.00010471152471438412, "loss": 1.3239, "step": 36673 }, { "epoch": 0.47656190357073824, "grad_norm": 0.3162926137447357, "learning_rate": 0.00010470892525247273, "loss": 1.5398, "step": 36674 }, { "epoch": 0.4765748981146541, "grad_norm": 0.37970682978630066, "learning_rate": 0.00010470632579056137, "loss": 1.3114, "step": 36675 }, { "epoch": 0.47658789265857, "grad_norm": 0.36380571126937866, "learning_rate": 0.00010470372632864998, "loss": 1.3879, "step": 36676 }, { "epoch": 0.47660088720248583, "grad_norm": 0.43981075286865234, "learning_rate": 0.00010470112686673859, "loss": 1.4355, "step": 36677 }, { "epoch": 0.47661388174640174, "grad_norm": 0.37799328565597534, "learning_rate": 0.0001046985274048272, "loss": 1.301, "step": 36678 }, { "epoch": 0.4766268762903176, "grad_norm": 0.4454060196876526, "learning_rate": 0.00010469592794291583, "loss": 1.3866, "step": 36679 }, { "epoch": 0.4766398708342335, "grad_norm": 0.27437472343444824, "learning_rate": 0.00010469332848100444, "loss": 1.3548, "step": 36680 }, { "epoch": 0.4766528653781493, "grad_norm": 0.3962048888206482, "learning_rate": 0.00010469072901909305, "loss": 1.3742, "step": 36681 }, { "epoch": 0.4766658599220652, "grad_norm": 0.27999815344810486, "learning_rate": 0.00010468812955718166, "loss": 1.5608, "step": 36682 }, { "epoch": 0.47667885446598107, "grad_norm": 0.3239463269710541, "learning_rate": 0.0001046855300952703, "loss": 1.2827, "step": 36683 }, { "epoch": 0.476691849009897, "grad_norm": 0.39868465065956116, "learning_rate": 0.0001046829306333589, "loss": 1.3061, "step": 36684 }, { "epoch": 0.4767048435538128, "grad_norm": 0.3230758011341095, "learning_rate": 0.0001046803311714475, "loss": 1.4353, "step": 36685 }, { "epoch": 0.4767178380977287, "grad_norm": 0.41743355989456177, "learning_rate": 0.00010467773170953614, "loss": 1.3816, "step": 36686 }, { "epoch": 0.47673083264164456, "grad_norm": 0.40204691886901855, "learning_rate": 0.00010467513224762475, "loss": 1.211, "step": 36687 }, { "epoch": 0.47674382718556046, "grad_norm": 0.45041894912719727, "learning_rate": 0.00010467253278571336, "loss": 1.3671, "step": 36688 }, { "epoch": 0.4767568217294763, "grad_norm": 0.4229316711425781, "learning_rate": 0.00010466993332380198, "loss": 1.5293, "step": 36689 }, { "epoch": 0.4767698162733922, "grad_norm": 0.43961793184280396, "learning_rate": 0.0001046673338618906, "loss": 1.4271, "step": 36690 }, { "epoch": 0.47678281081730806, "grad_norm": 0.3478478789329529, "learning_rate": 0.00010466473439997921, "loss": 1.3915, "step": 36691 }, { "epoch": 0.47679580536122396, "grad_norm": 0.38445302844047546, "learning_rate": 0.00010466213493806782, "loss": 1.2262, "step": 36692 }, { "epoch": 0.4768087999051398, "grad_norm": 0.3939126431941986, "learning_rate": 0.00010465953547615643, "loss": 1.2389, "step": 36693 }, { "epoch": 0.4768217944490557, "grad_norm": 0.39434489607810974, "learning_rate": 0.00010465693601424507, "loss": 1.4247, "step": 36694 }, { "epoch": 0.4768347889929716, "grad_norm": 0.3936539590358734, "learning_rate": 0.00010465433655233368, "loss": 1.3082, "step": 36695 }, { "epoch": 0.47684778353688745, "grad_norm": 0.28185704350471497, "learning_rate": 0.00010465173709042229, "loss": 1.1577, "step": 36696 }, { "epoch": 0.47686077808080335, "grad_norm": 0.44948533177375793, "learning_rate": 0.00010464913762851089, "loss": 1.3791, "step": 36697 }, { "epoch": 0.4768737726247192, "grad_norm": 0.3790379464626312, "learning_rate": 0.00010464653816659953, "loss": 1.292, "step": 36698 }, { "epoch": 0.4768867671686351, "grad_norm": 0.4188931882381439, "learning_rate": 0.00010464393870468814, "loss": 1.2847, "step": 36699 }, { "epoch": 0.47689976171255094, "grad_norm": 0.39663296937942505, "learning_rate": 0.00010464133924277675, "loss": 1.3052, "step": 36700 }, { "epoch": 0.47691275625646684, "grad_norm": 0.3618800640106201, "learning_rate": 0.00010463873978086536, "loss": 1.419, "step": 36701 }, { "epoch": 0.4769257508003827, "grad_norm": 0.40006691217422485, "learning_rate": 0.00010463614031895399, "loss": 1.5252, "step": 36702 }, { "epoch": 0.4769387453442986, "grad_norm": 0.47297531366348267, "learning_rate": 0.0001046335408570426, "loss": 1.5889, "step": 36703 }, { "epoch": 0.47695173988821443, "grad_norm": 0.4589444100856781, "learning_rate": 0.00010463094139513121, "loss": 1.5531, "step": 36704 }, { "epoch": 0.47696473443213033, "grad_norm": 0.3990491032600403, "learning_rate": 0.00010462834193321982, "loss": 1.4231, "step": 36705 }, { "epoch": 0.4769777289760462, "grad_norm": 0.4526325762271881, "learning_rate": 0.00010462574247130846, "loss": 1.3566, "step": 36706 }, { "epoch": 0.4769907235199621, "grad_norm": 0.4384739398956299, "learning_rate": 0.00010462314300939707, "loss": 1.4456, "step": 36707 }, { "epoch": 0.4770037180638779, "grad_norm": 0.37170732021331787, "learning_rate": 0.00010462054354748568, "loss": 1.3146, "step": 36708 }, { "epoch": 0.4770167126077938, "grad_norm": 0.3383696973323822, "learning_rate": 0.00010461794408557428, "loss": 1.3831, "step": 36709 }, { "epoch": 0.47702970715170967, "grad_norm": 0.43649008870124817, "learning_rate": 0.00010461534462366291, "loss": 1.5844, "step": 36710 }, { "epoch": 0.47704270169562557, "grad_norm": 0.38313889503479004, "learning_rate": 0.00010461274516175152, "loss": 1.4672, "step": 36711 }, { "epoch": 0.4770556962395414, "grad_norm": 0.35692378878593445, "learning_rate": 0.00010461014569984014, "loss": 1.4175, "step": 36712 }, { "epoch": 0.4770686907834573, "grad_norm": 0.4566735625267029, "learning_rate": 0.00010460754623792875, "loss": 1.5214, "step": 36713 }, { "epoch": 0.47708168532737316, "grad_norm": 0.4379623830318451, "learning_rate": 0.00010460494677601737, "loss": 1.4347, "step": 36714 }, { "epoch": 0.47709467987128906, "grad_norm": 0.47974613308906555, "learning_rate": 0.00010460234731410598, "loss": 1.403, "step": 36715 }, { "epoch": 0.4771076744152049, "grad_norm": 0.34905222058296204, "learning_rate": 0.00010459974785219459, "loss": 1.3785, "step": 36716 }, { "epoch": 0.4771206689591208, "grad_norm": 0.2997996211051941, "learning_rate": 0.0001045971483902832, "loss": 1.4868, "step": 36717 }, { "epoch": 0.47713366350303665, "grad_norm": 0.40797334909439087, "learning_rate": 0.00010459454892837184, "loss": 1.3461, "step": 36718 }, { "epoch": 0.47714665804695255, "grad_norm": 0.3335047662258148, "learning_rate": 0.00010459194946646045, "loss": 1.4098, "step": 36719 }, { "epoch": 0.4771596525908684, "grad_norm": 0.48377835750579834, "learning_rate": 0.00010458935000454906, "loss": 1.4923, "step": 36720 }, { "epoch": 0.4771726471347843, "grad_norm": 0.4701981544494629, "learning_rate": 0.00010458675054263767, "loss": 1.3209, "step": 36721 }, { "epoch": 0.47718564167870015, "grad_norm": 0.35936102271080017, "learning_rate": 0.0001045841510807263, "loss": 1.438, "step": 36722 }, { "epoch": 0.47719863622261605, "grad_norm": 0.5204327702522278, "learning_rate": 0.00010458155161881491, "loss": 1.5019, "step": 36723 }, { "epoch": 0.4772116307665319, "grad_norm": 0.38770392537117004, "learning_rate": 0.00010457895215690352, "loss": 1.299, "step": 36724 }, { "epoch": 0.4772246253104478, "grad_norm": 0.4550442397594452, "learning_rate": 0.00010457635269499216, "loss": 1.4058, "step": 36725 }, { "epoch": 0.47723761985436364, "grad_norm": 0.40520885586738586, "learning_rate": 0.00010457375323308076, "loss": 1.4137, "step": 36726 }, { "epoch": 0.47725061439827954, "grad_norm": 0.38424715399742126, "learning_rate": 0.00010457115377116937, "loss": 1.4156, "step": 36727 }, { "epoch": 0.4772636089421954, "grad_norm": 0.40615326166152954, "learning_rate": 0.00010456855430925798, "loss": 1.5019, "step": 36728 }, { "epoch": 0.4772766034861113, "grad_norm": 0.3706136643886566, "learning_rate": 0.00010456595484734662, "loss": 1.4819, "step": 36729 }, { "epoch": 0.47728959803002713, "grad_norm": 0.3748354911804199, "learning_rate": 0.00010456335538543523, "loss": 1.474, "step": 36730 }, { "epoch": 0.47730259257394303, "grad_norm": 0.5021761059761047, "learning_rate": 0.00010456075592352384, "loss": 1.441, "step": 36731 }, { "epoch": 0.4773155871178589, "grad_norm": 0.3414623737335205, "learning_rate": 0.00010455815646161245, "loss": 1.1933, "step": 36732 }, { "epoch": 0.4773285816617748, "grad_norm": 0.46403640508651733, "learning_rate": 0.00010455555699970107, "loss": 1.3232, "step": 36733 }, { "epoch": 0.4773415762056906, "grad_norm": 0.34871384501457214, "learning_rate": 0.00010455295753778968, "loss": 1.0337, "step": 36734 }, { "epoch": 0.4773545707496065, "grad_norm": 0.5148264765739441, "learning_rate": 0.0001045503580758783, "loss": 1.3805, "step": 36735 }, { "epoch": 0.47736756529352237, "grad_norm": 0.4619481861591339, "learning_rate": 0.0001045477586139669, "loss": 1.3468, "step": 36736 }, { "epoch": 0.47738055983743827, "grad_norm": 0.3305440843105316, "learning_rate": 0.00010454515915205554, "loss": 1.2342, "step": 36737 }, { "epoch": 0.4773935543813541, "grad_norm": 0.44203656911849976, "learning_rate": 0.00010454255969014416, "loss": 1.2909, "step": 36738 }, { "epoch": 0.47740654892527, "grad_norm": 0.37053728103637695, "learning_rate": 0.00010453996022823275, "loss": 1.1953, "step": 36739 }, { "epoch": 0.47741954346918586, "grad_norm": 0.3549066185951233, "learning_rate": 0.00010453736076632136, "loss": 1.4241, "step": 36740 }, { "epoch": 0.47743253801310176, "grad_norm": 0.4641919434070587, "learning_rate": 0.00010453476130441, "loss": 1.2895, "step": 36741 }, { "epoch": 0.4774455325570176, "grad_norm": 0.3824481666088104, "learning_rate": 0.00010453216184249861, "loss": 1.3224, "step": 36742 }, { "epoch": 0.4774585271009335, "grad_norm": 0.5151437520980835, "learning_rate": 0.00010452956238058722, "loss": 1.3017, "step": 36743 }, { "epoch": 0.47747152164484935, "grad_norm": 0.4059079885482788, "learning_rate": 0.00010452696291867583, "loss": 1.3959, "step": 36744 }, { "epoch": 0.47748451618876525, "grad_norm": 0.5170286297798157, "learning_rate": 0.00010452436345676446, "loss": 1.3562, "step": 36745 }, { "epoch": 0.4774975107326811, "grad_norm": 0.4208987057209015, "learning_rate": 0.00010452176399485307, "loss": 1.3479, "step": 36746 }, { "epoch": 0.477510505276597, "grad_norm": 0.38757646083831787, "learning_rate": 0.00010451916453294168, "loss": 1.3206, "step": 36747 }, { "epoch": 0.47752349982051284, "grad_norm": 0.4275495111942291, "learning_rate": 0.00010451656507103029, "loss": 1.3001, "step": 36748 }, { "epoch": 0.47753649436442874, "grad_norm": 0.4168182909488678, "learning_rate": 0.00010451396560911893, "loss": 1.5246, "step": 36749 }, { "epoch": 0.4775494889083446, "grad_norm": 0.4191395044326782, "learning_rate": 0.00010451136614720754, "loss": 1.3147, "step": 36750 }, { "epoch": 0.4775624834522605, "grad_norm": 0.4682447016239166, "learning_rate": 0.00010450876668529614, "loss": 1.3921, "step": 36751 }, { "epoch": 0.47757547799617633, "grad_norm": 0.5731196999549866, "learning_rate": 0.00010450616722338475, "loss": 1.3467, "step": 36752 }, { "epoch": 0.47758847254009223, "grad_norm": 0.3306954801082611, "learning_rate": 0.00010450356776147339, "loss": 1.3271, "step": 36753 }, { "epoch": 0.4776014670840081, "grad_norm": 0.3633701801300049, "learning_rate": 0.000104500968299562, "loss": 1.4138, "step": 36754 }, { "epoch": 0.477614461627924, "grad_norm": 0.3771771788597107, "learning_rate": 0.00010449836883765061, "loss": 1.2619, "step": 36755 }, { "epoch": 0.4776274561718398, "grad_norm": 0.7041621804237366, "learning_rate": 0.00010449576937573922, "loss": 1.35, "step": 36756 }, { "epoch": 0.4776404507157557, "grad_norm": 0.43414121866226196, "learning_rate": 0.00010449316991382784, "loss": 1.3361, "step": 36757 }, { "epoch": 0.47765344525967157, "grad_norm": 0.4131770730018616, "learning_rate": 0.00010449057045191645, "loss": 1.264, "step": 36758 }, { "epoch": 0.4776664398035875, "grad_norm": 0.48765864968299866, "learning_rate": 0.00010448797099000507, "loss": 1.4956, "step": 36759 }, { "epoch": 0.4776794343475033, "grad_norm": 0.36018362641334534, "learning_rate": 0.0001044853715280937, "loss": 1.4049, "step": 36760 }, { "epoch": 0.4776924288914192, "grad_norm": 0.4293461740016937, "learning_rate": 0.00010448277206618231, "loss": 1.3266, "step": 36761 }, { "epoch": 0.47770542343533506, "grad_norm": 0.41854169964790344, "learning_rate": 0.00010448017260427093, "loss": 1.3138, "step": 36762 }, { "epoch": 0.47771841797925096, "grad_norm": 0.38281795382499695, "learning_rate": 0.00010447757314235954, "loss": 1.4074, "step": 36763 }, { "epoch": 0.4777314125231668, "grad_norm": 0.40058383345603943, "learning_rate": 0.00010447497368044816, "loss": 1.4795, "step": 36764 }, { "epoch": 0.4777444070670827, "grad_norm": 0.41241368651390076, "learning_rate": 0.00010447237421853677, "loss": 1.5214, "step": 36765 }, { "epoch": 0.47775740161099856, "grad_norm": 0.40768712759017944, "learning_rate": 0.00010446977475662538, "loss": 1.3962, "step": 36766 }, { "epoch": 0.47777039615491446, "grad_norm": 0.48448818922042847, "learning_rate": 0.000104467175294714, "loss": 1.3316, "step": 36767 }, { "epoch": 0.4777833906988303, "grad_norm": 0.3831901252269745, "learning_rate": 0.00010446457583280262, "loss": 1.5922, "step": 36768 }, { "epoch": 0.4777963852427462, "grad_norm": 0.2974262535572052, "learning_rate": 0.00010446197637089123, "loss": 1.413, "step": 36769 }, { "epoch": 0.4778093797866621, "grad_norm": 0.4693393409252167, "learning_rate": 0.00010445937690897984, "loss": 1.448, "step": 36770 }, { "epoch": 0.47782237433057795, "grad_norm": 0.38295403122901917, "learning_rate": 0.00010445677744706845, "loss": 1.1731, "step": 36771 }, { "epoch": 0.47783536887449385, "grad_norm": 0.3569636046886444, "learning_rate": 0.00010445417798515709, "loss": 1.5175, "step": 36772 }, { "epoch": 0.4778483634184097, "grad_norm": 0.35745254158973694, "learning_rate": 0.0001044515785232457, "loss": 1.3236, "step": 36773 }, { "epoch": 0.4778613579623256, "grad_norm": 0.3957768380641937, "learning_rate": 0.00010444897906133431, "loss": 1.5653, "step": 36774 }, { "epoch": 0.47787435250624144, "grad_norm": 0.38975584506988525, "learning_rate": 0.00010444637959942292, "loss": 1.46, "step": 36775 }, { "epoch": 0.47788734705015734, "grad_norm": 0.36142873764038086, "learning_rate": 0.00010444378013751155, "loss": 1.4422, "step": 36776 }, { "epoch": 0.4779003415940732, "grad_norm": 0.35678035020828247, "learning_rate": 0.00010444118067560016, "loss": 1.3891, "step": 36777 }, { "epoch": 0.4779133361379891, "grad_norm": 0.2892293930053711, "learning_rate": 0.00010443858121368877, "loss": 1.1081, "step": 36778 }, { "epoch": 0.47792633068190493, "grad_norm": 0.5017591714859009, "learning_rate": 0.00010443598175177738, "loss": 1.4209, "step": 36779 }, { "epoch": 0.47793932522582083, "grad_norm": 0.402207612991333, "learning_rate": 0.00010443338228986602, "loss": 1.4113, "step": 36780 }, { "epoch": 0.4779523197697367, "grad_norm": 0.39133259654045105, "learning_rate": 0.00010443078282795461, "loss": 1.3819, "step": 36781 }, { "epoch": 0.4779653143136526, "grad_norm": 0.28217756748199463, "learning_rate": 0.00010442818336604323, "loss": 1.2681, "step": 36782 }, { "epoch": 0.4779783088575684, "grad_norm": 0.29145336151123047, "learning_rate": 0.00010442558390413184, "loss": 1.24, "step": 36783 }, { "epoch": 0.4779913034014843, "grad_norm": 0.33243465423583984, "learning_rate": 0.00010442298444222047, "loss": 1.3431, "step": 36784 }, { "epoch": 0.47800429794540017, "grad_norm": 0.41443878412246704, "learning_rate": 0.00010442038498030909, "loss": 1.3205, "step": 36785 }, { "epoch": 0.47801729248931607, "grad_norm": 0.47226008772850037, "learning_rate": 0.0001044177855183977, "loss": 1.46, "step": 36786 }, { "epoch": 0.4780302870332319, "grad_norm": 0.4109139144420624, "learning_rate": 0.00010441518605648631, "loss": 1.4472, "step": 36787 }, { "epoch": 0.4780432815771478, "grad_norm": 0.3589145839214325, "learning_rate": 0.00010441258659457493, "loss": 1.2934, "step": 36788 }, { "epoch": 0.47805627612106366, "grad_norm": 0.4250454306602478, "learning_rate": 0.00010440998713266354, "loss": 1.298, "step": 36789 }, { "epoch": 0.47806927066497956, "grad_norm": 0.3481067419052124, "learning_rate": 0.00010440738767075215, "loss": 1.2307, "step": 36790 }, { "epoch": 0.4780822652088954, "grad_norm": 0.3831326961517334, "learning_rate": 0.00010440478820884076, "loss": 1.4705, "step": 36791 }, { "epoch": 0.4780952597528113, "grad_norm": 0.402998149394989, "learning_rate": 0.0001044021887469294, "loss": 1.4058, "step": 36792 }, { "epoch": 0.47810825429672715, "grad_norm": 0.3856375813484192, "learning_rate": 0.000104399589285018, "loss": 1.4211, "step": 36793 }, { "epoch": 0.47812124884064305, "grad_norm": 0.38131654262542725, "learning_rate": 0.00010439698982310661, "loss": 1.1716, "step": 36794 }, { "epoch": 0.4781342433845589, "grad_norm": 0.3915690779685974, "learning_rate": 0.00010439439036119522, "loss": 1.2754, "step": 36795 }, { "epoch": 0.4781472379284748, "grad_norm": 0.47065722942352295, "learning_rate": 0.00010439179089928386, "loss": 1.4023, "step": 36796 }, { "epoch": 0.47816023247239064, "grad_norm": 0.3348385691642761, "learning_rate": 0.00010438919143737247, "loss": 1.2122, "step": 36797 }, { "epoch": 0.47817322701630655, "grad_norm": 0.4033292829990387, "learning_rate": 0.00010438659197546108, "loss": 1.3138, "step": 36798 }, { "epoch": 0.4781862215602224, "grad_norm": 0.4553394913673401, "learning_rate": 0.0001043839925135497, "loss": 1.5404, "step": 36799 }, { "epoch": 0.4781992161041383, "grad_norm": 0.5414491891860962, "learning_rate": 0.00010438139305163832, "loss": 1.4562, "step": 36800 }, { "epoch": 0.47821221064805414, "grad_norm": 0.42657220363616943, "learning_rate": 0.00010437879358972693, "loss": 1.5404, "step": 36801 }, { "epoch": 0.47822520519197004, "grad_norm": 0.3631247878074646, "learning_rate": 0.00010437619412781554, "loss": 1.2505, "step": 36802 }, { "epoch": 0.4782381997358859, "grad_norm": 0.34981024265289307, "learning_rate": 0.00010437359466590418, "loss": 1.4006, "step": 36803 }, { "epoch": 0.4782511942798018, "grad_norm": 0.4517924189567566, "learning_rate": 0.00010437099520399279, "loss": 1.2859, "step": 36804 }, { "epoch": 0.47826418882371763, "grad_norm": 0.36635440587997437, "learning_rate": 0.0001043683957420814, "loss": 1.349, "step": 36805 }, { "epoch": 0.47827718336763353, "grad_norm": 0.39470964670181274, "learning_rate": 0.00010436579628017, "loss": 1.5561, "step": 36806 }, { "epoch": 0.4782901779115494, "grad_norm": 0.3966015875339508, "learning_rate": 0.00010436319681825863, "loss": 1.2293, "step": 36807 }, { "epoch": 0.4783031724554653, "grad_norm": 0.45323726534843445, "learning_rate": 0.00010436059735634725, "loss": 1.4741, "step": 36808 }, { "epoch": 0.4783161669993811, "grad_norm": 0.3808179795742035, "learning_rate": 0.00010435799789443586, "loss": 1.3603, "step": 36809 }, { "epoch": 0.478329161543297, "grad_norm": 0.3350905776023865, "learning_rate": 0.00010435539843252447, "loss": 1.3055, "step": 36810 }, { "epoch": 0.47834215608721287, "grad_norm": 0.402875691652298, "learning_rate": 0.00010435279897061309, "loss": 1.4636, "step": 36811 }, { "epoch": 0.47835515063112877, "grad_norm": 0.3703416585922241, "learning_rate": 0.0001043501995087017, "loss": 1.3091, "step": 36812 }, { "epoch": 0.4783681451750446, "grad_norm": 0.31940892338752747, "learning_rate": 0.00010434760004679031, "loss": 1.4521, "step": 36813 }, { "epoch": 0.4783811397189605, "grad_norm": 0.3889502286911011, "learning_rate": 0.00010434500058487892, "loss": 1.3578, "step": 36814 }, { "epoch": 0.47839413426287636, "grad_norm": 0.3272900879383087, "learning_rate": 0.00010434240112296756, "loss": 1.1229, "step": 36815 }, { "epoch": 0.47840712880679226, "grad_norm": 0.4614048898220062, "learning_rate": 0.00010433980166105617, "loss": 1.3222, "step": 36816 }, { "epoch": 0.4784201233507081, "grad_norm": 0.44233179092407227, "learning_rate": 0.00010433720219914478, "loss": 1.4231, "step": 36817 }, { "epoch": 0.478433117894624, "grad_norm": 0.37301528453826904, "learning_rate": 0.0001043346027372334, "loss": 1.2571, "step": 36818 }, { "epoch": 0.47844611243853985, "grad_norm": 0.34721657633781433, "learning_rate": 0.00010433200327532202, "loss": 1.4824, "step": 36819 }, { "epoch": 0.47845910698245575, "grad_norm": 0.34401950240135193, "learning_rate": 0.00010432940381341063, "loss": 1.5498, "step": 36820 }, { "epoch": 0.4784721015263716, "grad_norm": 0.46975505352020264, "learning_rate": 0.00010432680435149924, "loss": 1.4109, "step": 36821 }, { "epoch": 0.4784850960702875, "grad_norm": 0.43760818243026733, "learning_rate": 0.00010432420488958785, "loss": 1.2845, "step": 36822 }, { "epoch": 0.47849809061420334, "grad_norm": 0.4412611126899719, "learning_rate": 0.00010432160542767648, "loss": 1.5004, "step": 36823 }, { "epoch": 0.47851108515811924, "grad_norm": 0.4533897936344147, "learning_rate": 0.00010431900596576509, "loss": 1.4093, "step": 36824 }, { "epoch": 0.4785240797020351, "grad_norm": 0.2852482199668884, "learning_rate": 0.0001043164065038537, "loss": 1.2727, "step": 36825 }, { "epoch": 0.478537074245951, "grad_norm": 0.3644379675388336, "learning_rate": 0.00010431380704194231, "loss": 1.1859, "step": 36826 }, { "epoch": 0.47855006878986683, "grad_norm": 0.4098767936229706, "learning_rate": 0.00010431120758003095, "loss": 1.3433, "step": 36827 }, { "epoch": 0.47856306333378273, "grad_norm": 0.3520409166812897, "learning_rate": 0.00010430860811811956, "loss": 1.4782, "step": 36828 }, { "epoch": 0.4785760578776986, "grad_norm": 0.3600299060344696, "learning_rate": 0.00010430600865620817, "loss": 1.4798, "step": 36829 }, { "epoch": 0.4785890524216145, "grad_norm": 0.3805144131183624, "learning_rate": 0.00010430340919429678, "loss": 1.3649, "step": 36830 }, { "epoch": 0.4786020469655303, "grad_norm": 0.3655862510204315, "learning_rate": 0.0001043008097323854, "loss": 1.5389, "step": 36831 }, { "epoch": 0.4786150415094462, "grad_norm": 0.3184344172477722, "learning_rate": 0.00010429821027047402, "loss": 1.337, "step": 36832 }, { "epoch": 0.47862803605336207, "grad_norm": 0.2409345954656601, "learning_rate": 0.00010429561080856263, "loss": 1.1638, "step": 36833 }, { "epoch": 0.47864103059727797, "grad_norm": 0.3876078724861145, "learning_rate": 0.00010429301134665127, "loss": 1.1411, "step": 36834 }, { "epoch": 0.4786540251411938, "grad_norm": 0.4354804754257202, "learning_rate": 0.00010429041188473986, "loss": 1.1954, "step": 36835 }, { "epoch": 0.4786670196851097, "grad_norm": 0.41031157970428467, "learning_rate": 0.00010428781242282847, "loss": 1.2795, "step": 36836 }, { "epoch": 0.47868001422902556, "grad_norm": 0.43959349393844604, "learning_rate": 0.00010428521296091708, "loss": 1.3217, "step": 36837 }, { "epoch": 0.47869300877294146, "grad_norm": 0.47185084223747253, "learning_rate": 0.00010428261349900572, "loss": 1.3637, "step": 36838 }, { "epoch": 0.4787060033168573, "grad_norm": 0.42578238248825073, "learning_rate": 0.00010428001403709433, "loss": 1.3615, "step": 36839 }, { "epoch": 0.4787189978607732, "grad_norm": 0.4404887557029724, "learning_rate": 0.00010427741457518294, "loss": 1.4168, "step": 36840 }, { "epoch": 0.47873199240468906, "grad_norm": 0.3674813508987427, "learning_rate": 0.00010427481511327156, "loss": 1.3231, "step": 36841 }, { "epoch": 0.47874498694860496, "grad_norm": 0.42611491680145264, "learning_rate": 0.00010427221565136018, "loss": 1.4343, "step": 36842 }, { "epoch": 0.4787579814925208, "grad_norm": 0.4254211187362671, "learning_rate": 0.00010426961618944879, "loss": 1.3801, "step": 36843 }, { "epoch": 0.4787709760364367, "grad_norm": 0.4078112542629242, "learning_rate": 0.0001042670167275374, "loss": 1.3886, "step": 36844 }, { "epoch": 0.47878397058035255, "grad_norm": 0.46777769923210144, "learning_rate": 0.00010426441726562601, "loss": 1.471, "step": 36845 }, { "epoch": 0.47879696512426845, "grad_norm": 0.474729984998703, "learning_rate": 0.00010426181780371465, "loss": 1.3362, "step": 36846 }, { "epoch": 0.47880995966818435, "grad_norm": 0.37238064408302307, "learning_rate": 0.00010425921834180326, "loss": 1.3655, "step": 36847 }, { "epoch": 0.4788229542121002, "grad_norm": 0.40230315923690796, "learning_rate": 0.00010425661887989186, "loss": 1.3212, "step": 36848 }, { "epoch": 0.4788359487560161, "grad_norm": 0.4350760579109192, "learning_rate": 0.00010425401941798047, "loss": 1.4981, "step": 36849 }, { "epoch": 0.47884894329993194, "grad_norm": 0.3952096104621887, "learning_rate": 0.00010425141995606911, "loss": 1.2596, "step": 36850 }, { "epoch": 0.47886193784384784, "grad_norm": 0.4227961599826813, "learning_rate": 0.00010424882049415772, "loss": 1.3455, "step": 36851 }, { "epoch": 0.4788749323877637, "grad_norm": 0.34530699253082275, "learning_rate": 0.00010424622103224633, "loss": 1.4071, "step": 36852 }, { "epoch": 0.4788879269316796, "grad_norm": 0.3591969311237335, "learning_rate": 0.00010424362157033494, "loss": 1.241, "step": 36853 }, { "epoch": 0.47890092147559543, "grad_norm": 0.3156588077545166, "learning_rate": 0.00010424102210842357, "loss": 1.4248, "step": 36854 }, { "epoch": 0.47891391601951133, "grad_norm": 0.2812151610851288, "learning_rate": 0.00010423842264651218, "loss": 1.0086, "step": 36855 }, { "epoch": 0.4789269105634272, "grad_norm": 0.314841628074646, "learning_rate": 0.00010423582318460079, "loss": 1.4356, "step": 36856 }, { "epoch": 0.4789399051073431, "grad_norm": 0.4753133952617645, "learning_rate": 0.0001042332237226894, "loss": 1.3079, "step": 36857 }, { "epoch": 0.4789528996512589, "grad_norm": 0.36326488852500916, "learning_rate": 0.00010423062426077804, "loss": 1.3631, "step": 36858 }, { "epoch": 0.4789658941951748, "grad_norm": 0.4626297950744629, "learning_rate": 0.00010422802479886665, "loss": 1.4716, "step": 36859 }, { "epoch": 0.47897888873909067, "grad_norm": 0.3644844591617584, "learning_rate": 0.00010422542533695526, "loss": 1.299, "step": 36860 }, { "epoch": 0.47899188328300657, "grad_norm": 0.4530969262123108, "learning_rate": 0.00010422282587504386, "loss": 1.4728, "step": 36861 }, { "epoch": 0.4790048778269224, "grad_norm": 0.41439902782440186, "learning_rate": 0.0001042202264131325, "loss": 1.4121, "step": 36862 }, { "epoch": 0.4790178723708383, "grad_norm": 0.4508451521396637, "learning_rate": 0.0001042176269512211, "loss": 1.4569, "step": 36863 }, { "epoch": 0.47903086691475416, "grad_norm": 0.3606402575969696, "learning_rate": 0.00010421502748930972, "loss": 1.4459, "step": 36864 }, { "epoch": 0.47904386145867006, "grad_norm": 0.3238128423690796, "learning_rate": 0.00010421242802739833, "loss": 1.3325, "step": 36865 }, { "epoch": 0.4790568560025859, "grad_norm": 0.38639646768569946, "learning_rate": 0.00010420982856548695, "loss": 1.4325, "step": 36866 }, { "epoch": 0.4790698505465018, "grad_norm": 0.44729191064834595, "learning_rate": 0.00010420722910357556, "loss": 1.4411, "step": 36867 }, { "epoch": 0.47908284509041765, "grad_norm": 0.46440497040748596, "learning_rate": 0.00010420462964166417, "loss": 1.3538, "step": 36868 }, { "epoch": 0.47909583963433355, "grad_norm": 0.4782659411430359, "learning_rate": 0.00010420203017975278, "loss": 1.4666, "step": 36869 }, { "epoch": 0.4791088341782494, "grad_norm": 0.32168638706207275, "learning_rate": 0.00010419943071784142, "loss": 1.3294, "step": 36870 }, { "epoch": 0.4791218287221653, "grad_norm": 0.46257731318473816, "learning_rate": 0.00010419683125593003, "loss": 1.3517, "step": 36871 }, { "epoch": 0.47913482326608114, "grad_norm": 0.44591307640075684, "learning_rate": 0.00010419423179401864, "loss": 1.4001, "step": 36872 }, { "epoch": 0.47914781780999705, "grad_norm": 0.38360798358917236, "learning_rate": 0.00010419163233210727, "loss": 1.2712, "step": 36873 }, { "epoch": 0.4791608123539129, "grad_norm": 0.37663766741752625, "learning_rate": 0.00010418903287019588, "loss": 1.4411, "step": 36874 }, { "epoch": 0.4791738068978288, "grad_norm": 0.49456003308296204, "learning_rate": 0.00010418643340828449, "loss": 1.2319, "step": 36875 }, { "epoch": 0.47918680144174464, "grad_norm": 0.4339449107646942, "learning_rate": 0.0001041838339463731, "loss": 1.4641, "step": 36876 }, { "epoch": 0.47919979598566054, "grad_norm": 0.3863827586174011, "learning_rate": 0.00010418123448446173, "loss": 1.3991, "step": 36877 }, { "epoch": 0.4792127905295764, "grad_norm": 0.3222728371620178, "learning_rate": 0.00010417863502255034, "loss": 1.3971, "step": 36878 }, { "epoch": 0.4792257850734923, "grad_norm": 0.4629512429237366, "learning_rate": 0.00010417603556063895, "loss": 1.4163, "step": 36879 }, { "epoch": 0.47923877961740813, "grad_norm": 0.3564627468585968, "learning_rate": 0.00010417343609872756, "loss": 1.3156, "step": 36880 }, { "epoch": 0.47925177416132403, "grad_norm": 0.4132477045059204, "learning_rate": 0.0001041708366368162, "loss": 1.4443, "step": 36881 }, { "epoch": 0.4792647687052399, "grad_norm": 0.4465820789337158, "learning_rate": 0.00010416823717490481, "loss": 1.2271, "step": 36882 }, { "epoch": 0.4792777632491558, "grad_norm": 0.44737479090690613, "learning_rate": 0.00010416563771299342, "loss": 1.5571, "step": 36883 }, { "epoch": 0.4792907577930716, "grad_norm": 0.40295544266700745, "learning_rate": 0.00010416303825108203, "loss": 1.4604, "step": 36884 }, { "epoch": 0.4793037523369875, "grad_norm": 0.3811386227607727, "learning_rate": 0.00010416043878917065, "loss": 1.2811, "step": 36885 }, { "epoch": 0.47931674688090337, "grad_norm": 0.2680179178714752, "learning_rate": 0.00010415783932725926, "loss": 1.1362, "step": 36886 }, { "epoch": 0.47932974142481927, "grad_norm": 0.4625927805900574, "learning_rate": 0.00010415523986534787, "loss": 1.3885, "step": 36887 }, { "epoch": 0.4793427359687351, "grad_norm": 0.503130316734314, "learning_rate": 0.00010415264040343649, "loss": 1.4347, "step": 36888 }, { "epoch": 0.479355730512651, "grad_norm": 0.37750691175460815, "learning_rate": 0.00010415004094152512, "loss": 1.2633, "step": 36889 }, { "epoch": 0.47936872505656686, "grad_norm": 0.4137214124202728, "learning_rate": 0.00010414744147961372, "loss": 1.2447, "step": 36890 }, { "epoch": 0.47938171960048276, "grad_norm": 0.41304516792297363, "learning_rate": 0.00010414484201770233, "loss": 1.4316, "step": 36891 }, { "epoch": 0.4793947141443986, "grad_norm": 0.45908215641975403, "learning_rate": 0.00010414224255579094, "loss": 1.442, "step": 36892 }, { "epoch": 0.4794077086883145, "grad_norm": 0.4554549753665924, "learning_rate": 0.00010413964309387958, "loss": 1.3797, "step": 36893 }, { "epoch": 0.47942070323223035, "grad_norm": 0.37162935733795166, "learning_rate": 0.00010413704363196819, "loss": 1.432, "step": 36894 }, { "epoch": 0.47943369777614625, "grad_norm": 0.4456905424594879, "learning_rate": 0.0001041344441700568, "loss": 1.4346, "step": 36895 }, { "epoch": 0.4794466923200621, "grad_norm": 0.42448943853378296, "learning_rate": 0.00010413184470814541, "loss": 1.5145, "step": 36896 }, { "epoch": 0.479459686863978, "grad_norm": 0.27512335777282715, "learning_rate": 0.00010412924524623404, "loss": 1.2322, "step": 36897 }, { "epoch": 0.47947268140789384, "grad_norm": 0.31826987862586975, "learning_rate": 0.00010412664578432265, "loss": 1.5515, "step": 36898 }, { "epoch": 0.47948567595180974, "grad_norm": 0.43376055359840393, "learning_rate": 0.00010412404632241126, "loss": 1.4572, "step": 36899 }, { "epoch": 0.4794986704957256, "grad_norm": 0.5189838409423828, "learning_rate": 0.00010412144686049987, "loss": 1.5409, "step": 36900 }, { "epoch": 0.4795116650396415, "grad_norm": 0.3767385482788086, "learning_rate": 0.00010411884739858851, "loss": 1.3058, "step": 36901 }, { "epoch": 0.47952465958355733, "grad_norm": 0.40498068928718567, "learning_rate": 0.00010411624793667712, "loss": 1.4339, "step": 36902 }, { "epoch": 0.47953765412747323, "grad_norm": 0.3439054489135742, "learning_rate": 0.00010411364847476572, "loss": 1.4034, "step": 36903 }, { "epoch": 0.4795506486713891, "grad_norm": 0.29294559359550476, "learning_rate": 0.00010411104901285433, "loss": 1.1773, "step": 36904 }, { "epoch": 0.479563643215305, "grad_norm": 0.2612760066986084, "learning_rate": 0.00010410844955094297, "loss": 1.2663, "step": 36905 }, { "epoch": 0.4795766377592208, "grad_norm": 0.4474465548992157, "learning_rate": 0.00010410585008903158, "loss": 1.4763, "step": 36906 }, { "epoch": 0.4795896323031367, "grad_norm": 0.32873237133026123, "learning_rate": 0.00010410325062712019, "loss": 1.446, "step": 36907 }, { "epoch": 0.47960262684705257, "grad_norm": 0.30595964193344116, "learning_rate": 0.0001041006511652088, "loss": 1.36, "step": 36908 }, { "epoch": 0.47961562139096847, "grad_norm": 0.45038941502571106, "learning_rate": 0.00010409805170329742, "loss": 1.4713, "step": 36909 }, { "epoch": 0.4796286159348843, "grad_norm": 0.4217996895313263, "learning_rate": 0.00010409545224138603, "loss": 1.2901, "step": 36910 }, { "epoch": 0.4796416104788002, "grad_norm": 0.49533677101135254, "learning_rate": 0.00010409285277947465, "loss": 1.404, "step": 36911 }, { "epoch": 0.47965460502271606, "grad_norm": 0.4637061655521393, "learning_rate": 0.00010409025331756328, "loss": 1.3639, "step": 36912 }, { "epoch": 0.47966759956663196, "grad_norm": 0.42781180143356323, "learning_rate": 0.0001040876538556519, "loss": 1.3643, "step": 36913 }, { "epoch": 0.4796805941105478, "grad_norm": 0.3007575571537018, "learning_rate": 0.0001040850543937405, "loss": 1.3189, "step": 36914 }, { "epoch": 0.4796935886544637, "grad_norm": 0.2834354341030121, "learning_rate": 0.0001040824549318291, "loss": 1.2854, "step": 36915 }, { "epoch": 0.47970658319837955, "grad_norm": 0.36999163031578064, "learning_rate": 0.00010407985546991774, "loss": 1.2525, "step": 36916 }, { "epoch": 0.47971957774229546, "grad_norm": 0.5623327493667603, "learning_rate": 0.00010407725600800635, "loss": 1.4012, "step": 36917 }, { "epoch": 0.4797325722862113, "grad_norm": 0.35479265451431274, "learning_rate": 0.00010407465654609496, "loss": 1.2478, "step": 36918 }, { "epoch": 0.4797455668301272, "grad_norm": 0.4296862483024597, "learning_rate": 0.00010407205708418357, "loss": 1.457, "step": 36919 }, { "epoch": 0.47975856137404305, "grad_norm": 0.49682939052581787, "learning_rate": 0.0001040694576222722, "loss": 1.408, "step": 36920 }, { "epoch": 0.47977155591795895, "grad_norm": 0.40639010071754456, "learning_rate": 0.00010406685816036081, "loss": 1.4331, "step": 36921 }, { "epoch": 0.47978455046187485, "grad_norm": 0.39165592193603516, "learning_rate": 0.00010406425869844942, "loss": 1.4263, "step": 36922 }, { "epoch": 0.4797975450057907, "grad_norm": 0.41559869050979614, "learning_rate": 0.00010406165923653803, "loss": 1.4388, "step": 36923 }, { "epoch": 0.4798105395497066, "grad_norm": 0.3431210517883301, "learning_rate": 0.00010405905977462667, "loss": 1.4256, "step": 36924 }, { "epoch": 0.47982353409362244, "grad_norm": 0.39178362488746643, "learning_rate": 0.00010405646031271528, "loss": 1.4861, "step": 36925 }, { "epoch": 0.47983652863753834, "grad_norm": 0.4565635919570923, "learning_rate": 0.00010405386085080389, "loss": 1.3268, "step": 36926 }, { "epoch": 0.4798495231814542, "grad_norm": 0.4415455758571625, "learning_rate": 0.0001040512613888925, "loss": 1.5069, "step": 36927 }, { "epoch": 0.4798625177253701, "grad_norm": 0.47685813903808594, "learning_rate": 0.00010404866192698113, "loss": 1.4058, "step": 36928 }, { "epoch": 0.47987551226928593, "grad_norm": 0.41550666093826294, "learning_rate": 0.00010404606246506974, "loss": 1.3966, "step": 36929 }, { "epoch": 0.47988850681320183, "grad_norm": 0.34026575088500977, "learning_rate": 0.00010404346300315835, "loss": 1.3131, "step": 36930 }, { "epoch": 0.4799015013571177, "grad_norm": 0.4055057466030121, "learning_rate": 0.00010404086354124696, "loss": 1.4229, "step": 36931 }, { "epoch": 0.4799144959010336, "grad_norm": 0.4502200484275818, "learning_rate": 0.00010403826407933558, "loss": 1.4216, "step": 36932 }, { "epoch": 0.4799274904449494, "grad_norm": 0.4330025017261505, "learning_rate": 0.0001040356646174242, "loss": 1.2733, "step": 36933 }, { "epoch": 0.4799404849888653, "grad_norm": 0.3005753457546234, "learning_rate": 0.0001040330651555128, "loss": 1.4774, "step": 36934 }, { "epoch": 0.47995347953278117, "grad_norm": 0.41319289803504944, "learning_rate": 0.00010403046569360142, "loss": 1.3624, "step": 36935 }, { "epoch": 0.47996647407669707, "grad_norm": 0.3968539535999298, "learning_rate": 0.00010402786623169005, "loss": 1.309, "step": 36936 }, { "epoch": 0.4799794686206129, "grad_norm": 0.3321796655654907, "learning_rate": 0.00010402526676977867, "loss": 1.4588, "step": 36937 }, { "epoch": 0.4799924631645288, "grad_norm": 0.38623952865600586, "learning_rate": 0.00010402266730786728, "loss": 1.3366, "step": 36938 }, { "epoch": 0.48000545770844466, "grad_norm": 0.38911959528923035, "learning_rate": 0.00010402006784595589, "loss": 1.4242, "step": 36939 }, { "epoch": 0.48001845225236056, "grad_norm": 0.3286307454109192, "learning_rate": 0.00010401746838404451, "loss": 1.3574, "step": 36940 }, { "epoch": 0.4800314467962764, "grad_norm": 0.4136251211166382, "learning_rate": 0.00010401486892213312, "loss": 1.4438, "step": 36941 }, { "epoch": 0.4800444413401923, "grad_norm": 0.33114486932754517, "learning_rate": 0.00010401226946022173, "loss": 1.2895, "step": 36942 }, { "epoch": 0.48005743588410815, "grad_norm": 0.3806767165660858, "learning_rate": 0.00010400966999831034, "loss": 1.4772, "step": 36943 }, { "epoch": 0.48007043042802405, "grad_norm": 0.34614482522010803, "learning_rate": 0.00010400707053639898, "loss": 1.3742, "step": 36944 }, { "epoch": 0.4800834249719399, "grad_norm": 0.4848361909389496, "learning_rate": 0.00010400447107448758, "loss": 1.5499, "step": 36945 }, { "epoch": 0.4800964195158558, "grad_norm": 0.3694782257080078, "learning_rate": 0.00010400187161257619, "loss": 1.331, "step": 36946 }, { "epoch": 0.48010941405977164, "grad_norm": 0.31021466851234436, "learning_rate": 0.00010399927215066483, "loss": 1.2764, "step": 36947 }, { "epoch": 0.48012240860368754, "grad_norm": 0.4170472025871277, "learning_rate": 0.00010399667268875344, "loss": 1.3761, "step": 36948 }, { "epoch": 0.4801354031476034, "grad_norm": 0.5247902870178223, "learning_rate": 0.00010399407322684205, "loss": 1.4719, "step": 36949 }, { "epoch": 0.4801483976915193, "grad_norm": 0.40657395124435425, "learning_rate": 0.00010399147376493066, "loss": 1.5526, "step": 36950 }, { "epoch": 0.48016139223543514, "grad_norm": 0.3284232020378113, "learning_rate": 0.00010398887430301929, "loss": 1.272, "step": 36951 }, { "epoch": 0.48017438677935104, "grad_norm": 0.34033140540122986, "learning_rate": 0.0001039862748411079, "loss": 1.299, "step": 36952 }, { "epoch": 0.4801873813232669, "grad_norm": 0.34505438804626465, "learning_rate": 0.00010398367537919651, "loss": 1.198, "step": 36953 }, { "epoch": 0.4802003758671828, "grad_norm": 0.32588204741477966, "learning_rate": 0.00010398107591728512, "loss": 1.2629, "step": 36954 }, { "epoch": 0.48021337041109863, "grad_norm": 0.4872061312198639, "learning_rate": 0.00010397847645537376, "loss": 1.4692, "step": 36955 }, { "epoch": 0.48022636495501453, "grad_norm": 0.38380318880081177, "learning_rate": 0.00010397587699346237, "loss": 1.2054, "step": 36956 }, { "epoch": 0.4802393594989304, "grad_norm": 0.4731670022010803, "learning_rate": 0.00010397327753155097, "loss": 1.3808, "step": 36957 }, { "epoch": 0.4802523540428463, "grad_norm": 0.4408159852027893, "learning_rate": 0.00010397067806963958, "loss": 1.331, "step": 36958 }, { "epoch": 0.4802653485867621, "grad_norm": 0.4369319975376129, "learning_rate": 0.00010396807860772821, "loss": 1.4748, "step": 36959 }, { "epoch": 0.480278343130678, "grad_norm": 0.4115489721298218, "learning_rate": 0.00010396547914581683, "loss": 1.435, "step": 36960 }, { "epoch": 0.48029133767459387, "grad_norm": 0.434533029794693, "learning_rate": 0.00010396287968390544, "loss": 1.588, "step": 36961 }, { "epoch": 0.48030433221850977, "grad_norm": 0.42437365651130676, "learning_rate": 0.00010396028022199405, "loss": 1.4981, "step": 36962 }, { "epoch": 0.4803173267624256, "grad_norm": 0.4613930583000183, "learning_rate": 0.00010395768076008267, "loss": 1.338, "step": 36963 }, { "epoch": 0.4803303213063415, "grad_norm": 0.5235838890075684, "learning_rate": 0.00010395508129817128, "loss": 1.3152, "step": 36964 }, { "epoch": 0.48034331585025736, "grad_norm": 0.48191890120506287, "learning_rate": 0.0001039524818362599, "loss": 1.4145, "step": 36965 }, { "epoch": 0.48035631039417326, "grad_norm": 0.40093857049942017, "learning_rate": 0.0001039498823743485, "loss": 1.3925, "step": 36966 }, { "epoch": 0.4803693049380891, "grad_norm": 0.42404696345329285, "learning_rate": 0.00010394728291243714, "loss": 1.4142, "step": 36967 }, { "epoch": 0.480382299482005, "grad_norm": 0.39218422770500183, "learning_rate": 0.00010394468345052575, "loss": 1.2273, "step": 36968 }, { "epoch": 0.48039529402592085, "grad_norm": 0.4282079339027405, "learning_rate": 0.00010394208398861436, "loss": 1.3739, "step": 36969 }, { "epoch": 0.48040828856983675, "grad_norm": 0.37125587463378906, "learning_rate": 0.00010393948452670296, "loss": 1.0972, "step": 36970 }, { "epoch": 0.4804212831137526, "grad_norm": 0.39503636956214905, "learning_rate": 0.0001039368850647916, "loss": 1.4776, "step": 36971 }, { "epoch": 0.4804342776576685, "grad_norm": 0.3474675118923187, "learning_rate": 0.00010393428560288021, "loss": 1.4618, "step": 36972 }, { "epoch": 0.48044727220158434, "grad_norm": 0.31738564372062683, "learning_rate": 0.00010393168614096882, "loss": 1.2595, "step": 36973 }, { "epoch": 0.48046026674550024, "grad_norm": 0.3933612108230591, "learning_rate": 0.00010392908667905743, "loss": 1.4622, "step": 36974 }, { "epoch": 0.4804732612894161, "grad_norm": 0.4312123656272888, "learning_rate": 0.00010392648721714606, "loss": 1.4021, "step": 36975 }, { "epoch": 0.480486255833332, "grad_norm": 0.36490944027900696, "learning_rate": 0.00010392388775523467, "loss": 1.3627, "step": 36976 }, { "epoch": 0.48049925037724783, "grad_norm": 0.45459774136543274, "learning_rate": 0.00010392128829332328, "loss": 1.4745, "step": 36977 }, { "epoch": 0.48051224492116373, "grad_norm": 0.3800516426563263, "learning_rate": 0.00010391868883141189, "loss": 1.4159, "step": 36978 }, { "epoch": 0.4805252394650796, "grad_norm": 0.4457409977912903, "learning_rate": 0.00010391608936950053, "loss": 1.379, "step": 36979 }, { "epoch": 0.4805382340089955, "grad_norm": 0.4562298059463501, "learning_rate": 0.00010391348990758914, "loss": 1.276, "step": 36980 }, { "epoch": 0.4805512285529113, "grad_norm": 0.3313678205013275, "learning_rate": 0.00010391089044567775, "loss": 1.4453, "step": 36981 }, { "epoch": 0.4805642230968272, "grad_norm": 0.47662195563316345, "learning_rate": 0.00010390829098376636, "loss": 1.6202, "step": 36982 }, { "epoch": 0.48057721764074307, "grad_norm": 0.4243510067462921, "learning_rate": 0.00010390569152185499, "loss": 1.4882, "step": 36983 }, { "epoch": 0.48059021218465897, "grad_norm": 0.41490447521209717, "learning_rate": 0.0001039030920599436, "loss": 1.5299, "step": 36984 }, { "epoch": 0.4806032067285748, "grad_norm": 0.35059666633605957, "learning_rate": 0.00010390049259803221, "loss": 1.2663, "step": 36985 }, { "epoch": 0.4806162012724907, "grad_norm": 0.33567747473716736, "learning_rate": 0.00010389789313612085, "loss": 1.212, "step": 36986 }, { "epoch": 0.48062919581640656, "grad_norm": 0.42701902985572815, "learning_rate": 0.00010389529367420944, "loss": 1.3989, "step": 36987 }, { "epoch": 0.48064219036032246, "grad_norm": 0.43308764696121216, "learning_rate": 0.00010389269421229805, "loss": 1.3765, "step": 36988 }, { "epoch": 0.4806551849042383, "grad_norm": 0.32546907663345337, "learning_rate": 0.00010389009475038666, "loss": 1.3773, "step": 36989 }, { "epoch": 0.4806681794481542, "grad_norm": 0.4054744839668274, "learning_rate": 0.0001038874952884753, "loss": 1.3867, "step": 36990 }, { "epoch": 0.48068117399207005, "grad_norm": 0.4095016121864319, "learning_rate": 0.00010388489582656391, "loss": 1.4368, "step": 36991 }, { "epoch": 0.48069416853598596, "grad_norm": 0.37877586483955383, "learning_rate": 0.00010388229636465252, "loss": 1.5284, "step": 36992 }, { "epoch": 0.4807071630799018, "grad_norm": 0.4957578480243683, "learning_rate": 0.00010387969690274114, "loss": 1.2913, "step": 36993 }, { "epoch": 0.4807201576238177, "grad_norm": 0.38614529371261597, "learning_rate": 0.00010387709744082976, "loss": 1.3449, "step": 36994 }, { "epoch": 0.48073315216773355, "grad_norm": 0.4471305012702942, "learning_rate": 0.00010387449797891837, "loss": 1.2938, "step": 36995 }, { "epoch": 0.48074614671164945, "grad_norm": 0.41543376445770264, "learning_rate": 0.00010387189851700698, "loss": 1.4888, "step": 36996 }, { "epoch": 0.4807591412555653, "grad_norm": 0.34518080949783325, "learning_rate": 0.00010386929905509559, "loss": 1.4321, "step": 36997 }, { "epoch": 0.4807721357994812, "grad_norm": 0.3992246687412262, "learning_rate": 0.00010386669959318423, "loss": 1.4655, "step": 36998 }, { "epoch": 0.4807851303433971, "grad_norm": 0.38836345076560974, "learning_rate": 0.00010386410013127283, "loss": 1.5897, "step": 36999 }, { "epoch": 0.48079812488731294, "grad_norm": 0.45370543003082275, "learning_rate": 0.00010386150066936144, "loss": 1.4741, "step": 37000 }, { "epoch": 0.48081111943122884, "grad_norm": 0.442688912153244, "learning_rate": 0.00010385890120745005, "loss": 1.4485, "step": 37001 }, { "epoch": 0.4808241139751447, "grad_norm": 0.3650788962841034, "learning_rate": 0.00010385630174553869, "loss": 1.2932, "step": 37002 }, { "epoch": 0.4808371085190606, "grad_norm": 0.3345593214035034, "learning_rate": 0.0001038537022836273, "loss": 1.4152, "step": 37003 }, { "epoch": 0.48085010306297643, "grad_norm": 0.45666927099227905, "learning_rate": 0.00010385110282171591, "loss": 1.3923, "step": 37004 }, { "epoch": 0.48086309760689233, "grad_norm": 0.34880349040031433, "learning_rate": 0.00010384850335980452, "loss": 1.359, "step": 37005 }, { "epoch": 0.4808760921508082, "grad_norm": 0.4138614237308502, "learning_rate": 0.00010384590389789315, "loss": 1.3376, "step": 37006 }, { "epoch": 0.4808890866947241, "grad_norm": 0.4633117914199829, "learning_rate": 0.00010384330443598176, "loss": 1.4949, "step": 37007 }, { "epoch": 0.4809020812386399, "grad_norm": 0.3345903158187866, "learning_rate": 0.00010384070497407037, "loss": 1.3897, "step": 37008 }, { "epoch": 0.4809150757825558, "grad_norm": 0.4556865692138672, "learning_rate": 0.00010383810551215898, "loss": 1.4332, "step": 37009 }, { "epoch": 0.48092807032647167, "grad_norm": 0.41012030839920044, "learning_rate": 0.00010383550605024762, "loss": 1.3258, "step": 37010 }, { "epoch": 0.48094106487038757, "grad_norm": 0.411729097366333, "learning_rate": 0.00010383290658833623, "loss": 1.5882, "step": 37011 }, { "epoch": 0.4809540594143034, "grad_norm": 0.3972322642803192, "learning_rate": 0.00010383030712642482, "loss": 1.2105, "step": 37012 }, { "epoch": 0.4809670539582193, "grad_norm": 0.3828829824924469, "learning_rate": 0.00010382770766451344, "loss": 1.3966, "step": 37013 }, { "epoch": 0.48098004850213516, "grad_norm": 0.33546182513237, "learning_rate": 0.00010382510820260207, "loss": 1.3979, "step": 37014 }, { "epoch": 0.48099304304605106, "grad_norm": 0.4454078674316406, "learning_rate": 0.00010382250874069068, "loss": 1.2634, "step": 37015 }, { "epoch": 0.4810060375899669, "grad_norm": 0.375124990940094, "learning_rate": 0.0001038199092787793, "loss": 1.2616, "step": 37016 }, { "epoch": 0.4810190321338828, "grad_norm": 0.30314013361930847, "learning_rate": 0.0001038173098168679, "loss": 1.3906, "step": 37017 }, { "epoch": 0.48103202667779865, "grad_norm": 0.4833199977874756, "learning_rate": 0.00010381471035495653, "loss": 1.6719, "step": 37018 }, { "epoch": 0.48104502122171455, "grad_norm": 0.2718501091003418, "learning_rate": 0.00010381211089304514, "loss": 1.3718, "step": 37019 }, { "epoch": 0.4810580157656304, "grad_norm": 0.3884361684322357, "learning_rate": 0.00010380951143113375, "loss": 1.3511, "step": 37020 }, { "epoch": 0.4810710103095463, "grad_norm": 0.3924647271633148, "learning_rate": 0.00010380691196922239, "loss": 1.194, "step": 37021 }, { "epoch": 0.48108400485346214, "grad_norm": 0.44883644580841064, "learning_rate": 0.000103804312507311, "loss": 1.249, "step": 37022 }, { "epoch": 0.48109699939737804, "grad_norm": 0.3738546669483185, "learning_rate": 0.00010380171304539961, "loss": 1.2924, "step": 37023 }, { "epoch": 0.4811099939412939, "grad_norm": 0.43245530128479004, "learning_rate": 0.00010379911358348822, "loss": 1.3289, "step": 37024 }, { "epoch": 0.4811229884852098, "grad_norm": 0.41660571098327637, "learning_rate": 0.00010379651412157685, "loss": 1.3814, "step": 37025 }, { "epoch": 0.48113598302912564, "grad_norm": 0.4113090932369232, "learning_rate": 0.00010379391465966546, "loss": 1.4656, "step": 37026 }, { "epoch": 0.48114897757304154, "grad_norm": 0.40604138374328613, "learning_rate": 0.00010379131519775407, "loss": 1.4552, "step": 37027 }, { "epoch": 0.4811619721169574, "grad_norm": 0.3968238830566406, "learning_rate": 0.00010378871573584268, "loss": 1.4294, "step": 37028 }, { "epoch": 0.4811749666608733, "grad_norm": 0.4388469159603119, "learning_rate": 0.0001037861162739313, "loss": 1.6759, "step": 37029 }, { "epoch": 0.4811879612047891, "grad_norm": 0.46258074045181274, "learning_rate": 0.00010378351681201992, "loss": 1.5526, "step": 37030 }, { "epoch": 0.48120095574870503, "grad_norm": 0.490599125623703, "learning_rate": 0.00010378091735010853, "loss": 1.5859, "step": 37031 }, { "epoch": 0.4812139502926209, "grad_norm": 0.4031514823436737, "learning_rate": 0.00010377831788819714, "loss": 1.443, "step": 37032 }, { "epoch": 0.4812269448365368, "grad_norm": 0.39933234453201294, "learning_rate": 0.00010377571842628578, "loss": 1.4039, "step": 37033 }, { "epoch": 0.4812399393804526, "grad_norm": 0.3389892578125, "learning_rate": 0.00010377311896437439, "loss": 1.2539, "step": 37034 }, { "epoch": 0.4812529339243685, "grad_norm": 0.4143637418746948, "learning_rate": 0.000103770519502463, "loss": 1.3954, "step": 37035 }, { "epoch": 0.48126592846828437, "grad_norm": 0.42558053135871887, "learning_rate": 0.00010376792004055161, "loss": 1.3849, "step": 37036 }, { "epoch": 0.48127892301220027, "grad_norm": 0.2608511447906494, "learning_rate": 0.00010376532057864023, "loss": 1.2232, "step": 37037 }, { "epoch": 0.4812919175561161, "grad_norm": 0.3625309467315674, "learning_rate": 0.00010376272111672884, "loss": 1.399, "step": 37038 }, { "epoch": 0.481304912100032, "grad_norm": 0.3443760871887207, "learning_rate": 0.00010376012165481745, "loss": 1.302, "step": 37039 }, { "epoch": 0.48131790664394786, "grad_norm": 0.40133070945739746, "learning_rate": 0.00010375752219290607, "loss": 1.43, "step": 37040 }, { "epoch": 0.48133090118786376, "grad_norm": 0.41258370876312256, "learning_rate": 0.00010375492273099469, "loss": 1.5163, "step": 37041 }, { "epoch": 0.4813438957317796, "grad_norm": 0.40800005197525024, "learning_rate": 0.0001037523232690833, "loss": 1.5115, "step": 37042 }, { "epoch": 0.4813568902756955, "grad_norm": 0.24220822751522064, "learning_rate": 0.00010374972380717191, "loss": 1.248, "step": 37043 }, { "epoch": 0.48136988481961135, "grad_norm": 0.49225279688835144, "learning_rate": 0.00010374712434526052, "loss": 1.732, "step": 37044 }, { "epoch": 0.48138287936352725, "grad_norm": 0.35616499185562134, "learning_rate": 0.00010374452488334916, "loss": 1.2149, "step": 37045 }, { "epoch": 0.4813958739074431, "grad_norm": 0.3266451954841614, "learning_rate": 0.00010374192542143777, "loss": 1.2898, "step": 37046 }, { "epoch": 0.481408868451359, "grad_norm": 0.43068891763687134, "learning_rate": 0.00010373932595952638, "loss": 1.2392, "step": 37047 }, { "epoch": 0.48142186299527484, "grad_norm": 0.3272014260292053, "learning_rate": 0.000103736726497615, "loss": 1.365, "step": 37048 }, { "epoch": 0.48143485753919074, "grad_norm": 0.37535202503204346, "learning_rate": 0.00010373412703570362, "loss": 1.5827, "step": 37049 }, { "epoch": 0.4814478520831066, "grad_norm": 0.5277919769287109, "learning_rate": 0.00010373152757379223, "loss": 1.5126, "step": 37050 }, { "epoch": 0.4814608466270225, "grad_norm": 0.42219579219818115, "learning_rate": 0.00010372892811188084, "loss": 1.3183, "step": 37051 }, { "epoch": 0.48147384117093833, "grad_norm": 0.38642510771751404, "learning_rate": 0.00010372632864996945, "loss": 1.6148, "step": 37052 }, { "epoch": 0.48148683571485423, "grad_norm": 0.3439028263092041, "learning_rate": 0.00010372372918805809, "loss": 1.2162, "step": 37053 }, { "epoch": 0.4814998302587701, "grad_norm": 0.45564141869544983, "learning_rate": 0.00010372112972614669, "loss": 1.1664, "step": 37054 }, { "epoch": 0.481512824802686, "grad_norm": 0.331000417470932, "learning_rate": 0.0001037185302642353, "loss": 1.2467, "step": 37055 }, { "epoch": 0.4815258193466018, "grad_norm": 0.4542824625968933, "learning_rate": 0.00010371593080232391, "loss": 1.4139, "step": 37056 }, { "epoch": 0.4815388138905177, "grad_norm": 0.41777390241622925, "learning_rate": 0.00010371333134041255, "loss": 1.4423, "step": 37057 }, { "epoch": 0.48155180843443357, "grad_norm": 0.4126218259334564, "learning_rate": 0.00010371073187850116, "loss": 1.3907, "step": 37058 }, { "epoch": 0.48156480297834947, "grad_norm": 0.4334541857242584, "learning_rate": 0.00010370813241658977, "loss": 1.4799, "step": 37059 }, { "epoch": 0.4815777975222653, "grad_norm": 0.3993145525455475, "learning_rate": 0.00010370553295467839, "loss": 1.2806, "step": 37060 }, { "epoch": 0.4815907920661812, "grad_norm": 0.4233919382095337, "learning_rate": 0.000103702933492767, "loss": 1.4537, "step": 37061 }, { "epoch": 0.48160378661009706, "grad_norm": 0.37088096141815186, "learning_rate": 0.00010370033403085561, "loss": 1.348, "step": 37062 }, { "epoch": 0.48161678115401296, "grad_norm": 0.4031206965446472, "learning_rate": 0.00010369773456894423, "loss": 1.5072, "step": 37063 }, { "epoch": 0.4816297756979288, "grad_norm": 0.4621102213859558, "learning_rate": 0.00010369513510703286, "loss": 1.5552, "step": 37064 }, { "epoch": 0.4816427702418447, "grad_norm": 0.43671950697898865, "learning_rate": 0.00010369253564512147, "loss": 1.2471, "step": 37065 }, { "epoch": 0.48165576478576055, "grad_norm": 0.5634618997573853, "learning_rate": 0.00010368993618321009, "loss": 1.443, "step": 37066 }, { "epoch": 0.48166875932967645, "grad_norm": 0.35647347569465637, "learning_rate": 0.00010368733672129868, "loss": 1.503, "step": 37067 }, { "epoch": 0.4816817538735923, "grad_norm": 0.40893930196762085, "learning_rate": 0.00010368473725938732, "loss": 1.3356, "step": 37068 }, { "epoch": 0.4816947484175082, "grad_norm": 0.4411131739616394, "learning_rate": 0.00010368213779747593, "loss": 1.4587, "step": 37069 }, { "epoch": 0.48170774296142405, "grad_norm": 0.508825957775116, "learning_rate": 0.00010367953833556454, "loss": 1.4409, "step": 37070 }, { "epoch": 0.48172073750533995, "grad_norm": 0.4320303201675415, "learning_rate": 0.00010367693887365315, "loss": 1.2929, "step": 37071 }, { "epoch": 0.4817337320492558, "grad_norm": 0.37547269463539124, "learning_rate": 0.00010367433941174178, "loss": 1.5674, "step": 37072 }, { "epoch": 0.4817467265931717, "grad_norm": 0.5642324686050415, "learning_rate": 0.00010367173994983039, "loss": 1.4954, "step": 37073 }, { "epoch": 0.48175972113708754, "grad_norm": 0.406318724155426, "learning_rate": 0.000103669140487919, "loss": 1.4271, "step": 37074 }, { "epoch": 0.48177271568100344, "grad_norm": 0.43324995040893555, "learning_rate": 0.00010366654102600761, "loss": 1.2625, "step": 37075 }, { "epoch": 0.48178571022491934, "grad_norm": 0.32785969972610474, "learning_rate": 0.00010366394156409625, "loss": 1.1535, "step": 37076 }, { "epoch": 0.4817987047688352, "grad_norm": 0.5001927018165588, "learning_rate": 0.00010366134210218486, "loss": 1.5432, "step": 37077 }, { "epoch": 0.4818116993127511, "grad_norm": 0.37814438343048096, "learning_rate": 0.00010365874264027347, "loss": 1.327, "step": 37078 }, { "epoch": 0.48182469385666693, "grad_norm": 0.41916894912719727, "learning_rate": 0.00010365614317836207, "loss": 1.218, "step": 37079 }, { "epoch": 0.48183768840058283, "grad_norm": 0.40626901388168335, "learning_rate": 0.0001036535437164507, "loss": 1.3679, "step": 37080 }, { "epoch": 0.4818506829444987, "grad_norm": 0.43168550729751587, "learning_rate": 0.00010365094425453932, "loss": 1.4426, "step": 37081 }, { "epoch": 0.4818636774884146, "grad_norm": 0.4354667067527771, "learning_rate": 0.00010364834479262793, "loss": 1.482, "step": 37082 }, { "epoch": 0.4818766720323304, "grad_norm": 0.34500545263290405, "learning_rate": 0.00010364574533071654, "loss": 1.4075, "step": 37083 }, { "epoch": 0.4818896665762463, "grad_norm": 0.8157206177711487, "learning_rate": 0.00010364314586880516, "loss": 1.2444, "step": 37084 }, { "epoch": 0.48190266112016217, "grad_norm": 0.334079384803772, "learning_rate": 0.00010364054640689377, "loss": 1.3834, "step": 37085 }, { "epoch": 0.48191565566407807, "grad_norm": 0.3525972068309784, "learning_rate": 0.00010363794694498239, "loss": 1.3397, "step": 37086 }, { "epoch": 0.4819286502079939, "grad_norm": 0.42938733100891113, "learning_rate": 0.000103635347483071, "loss": 1.3493, "step": 37087 }, { "epoch": 0.4819416447519098, "grad_norm": 0.35131803154945374, "learning_rate": 0.00010363274802115963, "loss": 1.3878, "step": 37088 }, { "epoch": 0.48195463929582566, "grad_norm": 0.43994802236557007, "learning_rate": 0.00010363014855924825, "loss": 1.2969, "step": 37089 }, { "epoch": 0.48196763383974156, "grad_norm": 0.20950429141521454, "learning_rate": 0.00010362754909733686, "loss": 1.1947, "step": 37090 }, { "epoch": 0.4819806283836574, "grad_norm": 0.47065064311027527, "learning_rate": 0.00010362494963542547, "loss": 1.3354, "step": 37091 }, { "epoch": 0.4819936229275733, "grad_norm": 0.2952079772949219, "learning_rate": 0.00010362235017351409, "loss": 1.5077, "step": 37092 }, { "epoch": 0.48200661747148915, "grad_norm": 0.4206715524196625, "learning_rate": 0.0001036197507116027, "loss": 1.3999, "step": 37093 }, { "epoch": 0.48201961201540505, "grad_norm": 0.4758639633655548, "learning_rate": 0.00010361715124969131, "loss": 1.4574, "step": 37094 }, { "epoch": 0.4820326065593209, "grad_norm": 0.3254123032093048, "learning_rate": 0.00010361455178777995, "loss": 1.4146, "step": 37095 }, { "epoch": 0.4820456011032368, "grad_norm": 0.3778538703918457, "learning_rate": 0.00010361195232586855, "loss": 1.3906, "step": 37096 }, { "epoch": 0.48205859564715264, "grad_norm": 0.4666554629802704, "learning_rate": 0.00010360935286395716, "loss": 1.3955, "step": 37097 }, { "epoch": 0.48207159019106854, "grad_norm": 0.424966424703598, "learning_rate": 0.00010360675340204577, "loss": 1.2918, "step": 37098 }, { "epoch": 0.4820845847349844, "grad_norm": 0.4617466628551483, "learning_rate": 0.00010360415394013441, "loss": 1.3452, "step": 37099 }, { "epoch": 0.4820975792789003, "grad_norm": 0.413888543844223, "learning_rate": 0.00010360155447822302, "loss": 1.3404, "step": 37100 }, { "epoch": 0.48211057382281614, "grad_norm": 0.34735554456710815, "learning_rate": 0.00010359895501631163, "loss": 1.3954, "step": 37101 }, { "epoch": 0.48212356836673204, "grad_norm": 0.40625226497650146, "learning_rate": 0.00010359635555440024, "loss": 1.4646, "step": 37102 }, { "epoch": 0.4821365629106479, "grad_norm": 0.4119158387184143, "learning_rate": 0.00010359375609248887, "loss": 1.2361, "step": 37103 }, { "epoch": 0.4821495574545638, "grad_norm": 0.3501068949699402, "learning_rate": 0.00010359115663057748, "loss": 1.4238, "step": 37104 }, { "epoch": 0.4821625519984796, "grad_norm": 0.36393558979034424, "learning_rate": 0.00010358855716866609, "loss": 1.2528, "step": 37105 }, { "epoch": 0.48217554654239553, "grad_norm": 0.47136953473091125, "learning_rate": 0.0001035859577067547, "loss": 1.194, "step": 37106 }, { "epoch": 0.4821885410863114, "grad_norm": 0.4365573525428772, "learning_rate": 0.00010358335824484334, "loss": 1.4229, "step": 37107 }, { "epoch": 0.4822015356302273, "grad_norm": 0.4839194118976593, "learning_rate": 0.00010358075878293195, "loss": 1.4068, "step": 37108 }, { "epoch": 0.4822145301741431, "grad_norm": 0.4345313012599945, "learning_rate": 0.00010357815932102055, "loss": 1.4235, "step": 37109 }, { "epoch": 0.482227524718059, "grad_norm": 0.4125238060951233, "learning_rate": 0.00010357555985910916, "loss": 1.2842, "step": 37110 }, { "epoch": 0.48224051926197486, "grad_norm": 0.457811564207077, "learning_rate": 0.0001035729603971978, "loss": 1.4778, "step": 37111 }, { "epoch": 0.48225351380589077, "grad_norm": 0.49580860137939453, "learning_rate": 0.0001035703609352864, "loss": 1.5206, "step": 37112 }, { "epoch": 0.4822665083498066, "grad_norm": 0.42422953248023987, "learning_rate": 0.00010356776147337502, "loss": 1.4659, "step": 37113 }, { "epoch": 0.4822795028937225, "grad_norm": 0.4670541286468506, "learning_rate": 0.00010356516201146363, "loss": 1.5048, "step": 37114 }, { "epoch": 0.48229249743763836, "grad_norm": 0.5135405659675598, "learning_rate": 0.00010356256254955225, "loss": 1.3222, "step": 37115 }, { "epoch": 0.48230549198155426, "grad_norm": 0.45545876026153564, "learning_rate": 0.00010355996308764086, "loss": 1.4475, "step": 37116 }, { "epoch": 0.4823184865254701, "grad_norm": 0.39179643988609314, "learning_rate": 0.00010355736362572947, "loss": 1.383, "step": 37117 }, { "epoch": 0.482331481069386, "grad_norm": 0.46913668513298035, "learning_rate": 0.00010355476416381808, "loss": 1.48, "step": 37118 }, { "epoch": 0.48234447561330185, "grad_norm": 0.32873645424842834, "learning_rate": 0.00010355216470190672, "loss": 1.4631, "step": 37119 }, { "epoch": 0.48235747015721775, "grad_norm": 0.3252403736114502, "learning_rate": 0.00010354956523999533, "loss": 1.329, "step": 37120 }, { "epoch": 0.4823704647011336, "grad_norm": 0.4843003451824188, "learning_rate": 0.00010354696577808393, "loss": 1.2881, "step": 37121 }, { "epoch": 0.4823834592450495, "grad_norm": 0.3669763207435608, "learning_rate": 0.00010354436631617254, "loss": 1.3835, "step": 37122 }, { "epoch": 0.48239645378896534, "grad_norm": 0.4133894741535187, "learning_rate": 0.00010354176685426118, "loss": 1.2308, "step": 37123 }, { "epoch": 0.48240944833288124, "grad_norm": 0.40545177459716797, "learning_rate": 0.00010353916739234979, "loss": 1.3157, "step": 37124 }, { "epoch": 0.4824224428767971, "grad_norm": 0.4551369845867157, "learning_rate": 0.0001035365679304384, "loss": 1.4425, "step": 37125 }, { "epoch": 0.482435437420713, "grad_norm": 0.447760671377182, "learning_rate": 0.00010353396846852701, "loss": 1.5473, "step": 37126 }, { "epoch": 0.48244843196462883, "grad_norm": 0.453803151845932, "learning_rate": 0.00010353136900661564, "loss": 1.6073, "step": 37127 }, { "epoch": 0.48246142650854473, "grad_norm": 0.4930380880832672, "learning_rate": 0.00010352876954470425, "loss": 1.4331, "step": 37128 }, { "epoch": 0.4824744210524606, "grad_norm": 0.3659795820713043, "learning_rate": 0.00010352617008279286, "loss": 1.4077, "step": 37129 }, { "epoch": 0.4824874155963765, "grad_norm": 0.40258270502090454, "learning_rate": 0.00010352357062088147, "loss": 1.2885, "step": 37130 }, { "epoch": 0.4825004101402923, "grad_norm": 0.4075701832771301, "learning_rate": 0.00010352097115897011, "loss": 1.4023, "step": 37131 }, { "epoch": 0.4825134046842082, "grad_norm": 0.3460240364074707, "learning_rate": 0.00010351837169705872, "loss": 1.4505, "step": 37132 }, { "epoch": 0.48252639922812407, "grad_norm": 0.39133960008621216, "learning_rate": 0.00010351577223514733, "loss": 1.4253, "step": 37133 }, { "epoch": 0.48253939377203997, "grad_norm": 0.3858380615711212, "learning_rate": 0.00010351317277323595, "loss": 1.3297, "step": 37134 }, { "epoch": 0.4825523883159558, "grad_norm": 0.4109053313732147, "learning_rate": 0.00010351057331132457, "loss": 1.2859, "step": 37135 }, { "epoch": 0.4825653828598717, "grad_norm": 0.3750620186328888, "learning_rate": 0.00010350797384941318, "loss": 1.4771, "step": 37136 }, { "epoch": 0.48257837740378756, "grad_norm": 0.3399812579154968, "learning_rate": 0.00010350537438750179, "loss": 1.0581, "step": 37137 }, { "epoch": 0.48259137194770346, "grad_norm": 0.41491013765335083, "learning_rate": 0.00010350277492559041, "loss": 1.5068, "step": 37138 }, { "epoch": 0.4826043664916193, "grad_norm": 0.35757890343666077, "learning_rate": 0.00010350017546367902, "loss": 1.5266, "step": 37139 }, { "epoch": 0.4826173610355352, "grad_norm": 0.43575409054756165, "learning_rate": 0.00010349757600176763, "loss": 1.4045, "step": 37140 }, { "epoch": 0.48263035557945105, "grad_norm": 0.2864450216293335, "learning_rate": 0.00010349497653985624, "loss": 1.2481, "step": 37141 }, { "epoch": 0.48264335012336695, "grad_norm": 0.4132607877254486, "learning_rate": 0.00010349237707794488, "loss": 1.5628, "step": 37142 }, { "epoch": 0.4826563446672828, "grad_norm": 0.37440818548202515, "learning_rate": 0.0001034897776160335, "loss": 1.2088, "step": 37143 }, { "epoch": 0.4826693392111987, "grad_norm": 0.44413092732429504, "learning_rate": 0.0001034871781541221, "loss": 1.3784, "step": 37144 }, { "epoch": 0.48268233375511455, "grad_norm": 0.3561576306819916, "learning_rate": 0.00010348457869221072, "loss": 1.3075, "step": 37145 }, { "epoch": 0.48269532829903045, "grad_norm": 0.3423554599285126, "learning_rate": 0.00010348197923029934, "loss": 1.3032, "step": 37146 }, { "epoch": 0.4827083228429463, "grad_norm": 0.4929901361465454, "learning_rate": 0.00010347937976838795, "loss": 1.4953, "step": 37147 }, { "epoch": 0.4827213173868622, "grad_norm": 0.4789813756942749, "learning_rate": 0.00010347678030647656, "loss": 1.2363, "step": 37148 }, { "epoch": 0.48273431193077804, "grad_norm": 0.3408944308757782, "learning_rate": 0.00010347418084456517, "loss": 1.3726, "step": 37149 }, { "epoch": 0.48274730647469394, "grad_norm": 0.2718135118484497, "learning_rate": 0.00010347158138265381, "loss": 1.2941, "step": 37150 }, { "epoch": 0.48276030101860984, "grad_norm": 0.43935272097587585, "learning_rate": 0.00010346898192074241, "loss": 1.2502, "step": 37151 }, { "epoch": 0.4827732955625257, "grad_norm": 0.4239197373390198, "learning_rate": 0.00010346638245883102, "loss": 1.1811, "step": 37152 }, { "epoch": 0.4827862901064416, "grad_norm": 0.43971508741378784, "learning_rate": 0.00010346378299691963, "loss": 1.3636, "step": 37153 }, { "epoch": 0.48279928465035743, "grad_norm": 0.39191752672195435, "learning_rate": 0.00010346118353500827, "loss": 1.3805, "step": 37154 }, { "epoch": 0.48281227919427333, "grad_norm": 0.24109315872192383, "learning_rate": 0.00010345858407309688, "loss": 1.2716, "step": 37155 }, { "epoch": 0.4828252737381892, "grad_norm": 0.45072266459465027, "learning_rate": 0.00010345598461118549, "loss": 1.5644, "step": 37156 }, { "epoch": 0.4828382682821051, "grad_norm": 0.3615192770957947, "learning_rate": 0.0001034533851492741, "loss": 1.469, "step": 37157 }, { "epoch": 0.4828512628260209, "grad_norm": 0.48113152384757996, "learning_rate": 0.00010345078568736272, "loss": 1.4839, "step": 37158 }, { "epoch": 0.4828642573699368, "grad_norm": 0.40739676356315613, "learning_rate": 0.00010344818622545134, "loss": 1.2597, "step": 37159 }, { "epoch": 0.48287725191385267, "grad_norm": 0.5238981246948242, "learning_rate": 0.00010344558676353995, "loss": 1.396, "step": 37160 }, { "epoch": 0.48289024645776857, "grad_norm": 0.35567423701286316, "learning_rate": 0.00010344298730162856, "loss": 1.4781, "step": 37161 }, { "epoch": 0.4829032410016844, "grad_norm": 0.47920507192611694, "learning_rate": 0.0001034403878397172, "loss": 1.5047, "step": 37162 }, { "epoch": 0.4829162355456003, "grad_norm": 0.36311790347099304, "learning_rate": 0.00010343778837780579, "loss": 1.5195, "step": 37163 }, { "epoch": 0.48292923008951616, "grad_norm": 0.410348504781723, "learning_rate": 0.0001034351889158944, "loss": 1.3969, "step": 37164 }, { "epoch": 0.48294222463343206, "grad_norm": 0.4122743308544159, "learning_rate": 0.00010343258945398302, "loss": 1.2918, "step": 37165 }, { "epoch": 0.4829552191773479, "grad_norm": 0.30620068311691284, "learning_rate": 0.00010342998999207165, "loss": 1.2396, "step": 37166 }, { "epoch": 0.4829682137212638, "grad_norm": 0.47491100430488586, "learning_rate": 0.00010342739053016026, "loss": 1.5018, "step": 37167 }, { "epoch": 0.48298120826517965, "grad_norm": 0.44979533553123474, "learning_rate": 0.00010342479106824887, "loss": 1.2966, "step": 37168 }, { "epoch": 0.48299420280909555, "grad_norm": 0.35663050413131714, "learning_rate": 0.0001034221916063375, "loss": 1.3509, "step": 37169 }, { "epoch": 0.4830071973530114, "grad_norm": 0.412049263715744, "learning_rate": 0.00010341959214442611, "loss": 1.3382, "step": 37170 }, { "epoch": 0.4830201918969273, "grad_norm": 0.42856305837631226, "learning_rate": 0.00010341699268251472, "loss": 1.5078, "step": 37171 }, { "epoch": 0.48303318644084314, "grad_norm": 0.3330698609352112, "learning_rate": 0.00010341439322060333, "loss": 1.4553, "step": 37172 }, { "epoch": 0.48304618098475904, "grad_norm": 0.4218689501285553, "learning_rate": 0.00010341179375869197, "loss": 1.2499, "step": 37173 }, { "epoch": 0.4830591755286749, "grad_norm": 0.3141877353191376, "learning_rate": 0.00010340919429678058, "loss": 1.483, "step": 37174 }, { "epoch": 0.4830721700725908, "grad_norm": 0.39505767822265625, "learning_rate": 0.00010340659483486919, "loss": 1.3425, "step": 37175 }, { "epoch": 0.48308516461650663, "grad_norm": 0.36962851881980896, "learning_rate": 0.00010340399537295779, "loss": 1.2817, "step": 37176 }, { "epoch": 0.48309815916042254, "grad_norm": 0.49067923426628113, "learning_rate": 0.00010340139591104643, "loss": 1.4719, "step": 37177 }, { "epoch": 0.4831111537043384, "grad_norm": 0.3833983540534973, "learning_rate": 0.00010339879644913504, "loss": 1.4658, "step": 37178 }, { "epoch": 0.4831241482482543, "grad_norm": 0.39992755651474, "learning_rate": 0.00010339619698722365, "loss": 1.3551, "step": 37179 }, { "epoch": 0.4831371427921701, "grad_norm": 0.3843168020248413, "learning_rate": 0.00010339359752531226, "loss": 1.3564, "step": 37180 }, { "epoch": 0.483150137336086, "grad_norm": 0.467490553855896, "learning_rate": 0.00010339099806340088, "loss": 1.4634, "step": 37181 }, { "epoch": 0.4831631318800019, "grad_norm": 0.5566713213920593, "learning_rate": 0.0001033883986014895, "loss": 1.3063, "step": 37182 }, { "epoch": 0.4831761264239178, "grad_norm": 0.36853906512260437, "learning_rate": 0.0001033857991395781, "loss": 1.5132, "step": 37183 }, { "epoch": 0.4831891209678336, "grad_norm": 0.36428385972976685, "learning_rate": 0.00010338319967766672, "loss": 1.4014, "step": 37184 }, { "epoch": 0.4832021155117495, "grad_norm": 0.45255762338638306, "learning_rate": 0.00010338060021575536, "loss": 1.4467, "step": 37185 }, { "epoch": 0.48321511005566536, "grad_norm": 0.34628692269325256, "learning_rate": 0.00010337800075384397, "loss": 1.3885, "step": 37186 }, { "epoch": 0.48322810459958127, "grad_norm": 0.46361613273620605, "learning_rate": 0.00010337540129193258, "loss": 1.5286, "step": 37187 }, { "epoch": 0.4832410991434971, "grad_norm": 0.4115747809410095, "learning_rate": 0.00010337280183002119, "loss": 1.3885, "step": 37188 }, { "epoch": 0.483254093687413, "grad_norm": 0.3923777639865875, "learning_rate": 0.00010337020236810981, "loss": 1.4679, "step": 37189 }, { "epoch": 0.48326708823132886, "grad_norm": 0.4257797598838806, "learning_rate": 0.00010336760290619842, "loss": 1.5457, "step": 37190 }, { "epoch": 0.48328008277524476, "grad_norm": 0.4121578633785248, "learning_rate": 0.00010336500344428703, "loss": 1.4574, "step": 37191 }, { "epoch": 0.4832930773191606, "grad_norm": 0.38433706760406494, "learning_rate": 0.00010336240398237565, "loss": 1.4184, "step": 37192 }, { "epoch": 0.4833060718630765, "grad_norm": 0.4299604296684265, "learning_rate": 0.00010335980452046427, "loss": 1.2056, "step": 37193 }, { "epoch": 0.48331906640699235, "grad_norm": 0.41900190711021423, "learning_rate": 0.00010335720505855288, "loss": 1.3693, "step": 37194 }, { "epoch": 0.48333206095090825, "grad_norm": 0.3741609454154968, "learning_rate": 0.00010335460559664149, "loss": 1.3709, "step": 37195 }, { "epoch": 0.4833450554948241, "grad_norm": 0.39460936188697815, "learning_rate": 0.0001033520061347301, "loss": 1.2977, "step": 37196 }, { "epoch": 0.48335805003874, "grad_norm": 0.3713022768497467, "learning_rate": 0.00010334940667281874, "loss": 1.3617, "step": 37197 }, { "epoch": 0.48337104458265584, "grad_norm": 0.42974406480789185, "learning_rate": 0.00010334680721090735, "loss": 1.5333, "step": 37198 }, { "epoch": 0.48338403912657174, "grad_norm": 0.41206085681915283, "learning_rate": 0.00010334420774899596, "loss": 1.4894, "step": 37199 }, { "epoch": 0.4833970336704876, "grad_norm": 0.2533392012119293, "learning_rate": 0.00010334160828708457, "loss": 1.3807, "step": 37200 }, { "epoch": 0.4834100282144035, "grad_norm": 0.37510421872138977, "learning_rate": 0.0001033390088251732, "loss": 1.3113, "step": 37201 }, { "epoch": 0.48342302275831933, "grad_norm": 0.32097989320755005, "learning_rate": 0.00010333640936326181, "loss": 1.3608, "step": 37202 }, { "epoch": 0.48343601730223523, "grad_norm": 0.48167771100997925, "learning_rate": 0.00010333380990135042, "loss": 1.3791, "step": 37203 }, { "epoch": 0.4834490118461511, "grad_norm": 0.466338574886322, "learning_rate": 0.00010333121043943903, "loss": 1.5742, "step": 37204 }, { "epoch": 0.483462006390067, "grad_norm": 0.43769240379333496, "learning_rate": 0.00010332861097752766, "loss": 1.4878, "step": 37205 }, { "epoch": 0.4834750009339828, "grad_norm": 0.42374518513679504, "learning_rate": 0.00010332601151561627, "loss": 1.2254, "step": 37206 }, { "epoch": 0.4834879954778987, "grad_norm": 0.35992535948753357, "learning_rate": 0.00010332341205370488, "loss": 1.3777, "step": 37207 }, { "epoch": 0.48350099002181457, "grad_norm": 0.4110301733016968, "learning_rate": 0.00010332081259179352, "loss": 1.347, "step": 37208 }, { "epoch": 0.48351398456573047, "grad_norm": 0.4136654734611511, "learning_rate": 0.00010331821312988213, "loss": 1.5877, "step": 37209 }, { "epoch": 0.4835269791096463, "grad_norm": 0.40613579750061035, "learning_rate": 0.00010331561366797074, "loss": 1.3394, "step": 37210 }, { "epoch": 0.4835399736535622, "grad_norm": 0.4845450520515442, "learning_rate": 0.00010331301420605935, "loss": 1.3919, "step": 37211 }, { "epoch": 0.48355296819747806, "grad_norm": 0.3628627061843872, "learning_rate": 0.00010331041474414797, "loss": 1.3804, "step": 37212 }, { "epoch": 0.48356596274139396, "grad_norm": 0.40418869256973267, "learning_rate": 0.00010330781528223658, "loss": 1.3988, "step": 37213 }, { "epoch": 0.4835789572853098, "grad_norm": 0.33032387495040894, "learning_rate": 0.0001033052158203252, "loss": 1.4622, "step": 37214 }, { "epoch": 0.4835919518292257, "grad_norm": 0.4323943257331848, "learning_rate": 0.0001033026163584138, "loss": 1.6106, "step": 37215 }, { "epoch": 0.48360494637314155, "grad_norm": 0.37121543288230896, "learning_rate": 0.00010330001689650244, "loss": 1.3792, "step": 37216 }, { "epoch": 0.48361794091705745, "grad_norm": 0.4017961323261261, "learning_rate": 0.00010329741743459105, "loss": 1.4648, "step": 37217 }, { "epoch": 0.4836309354609733, "grad_norm": 0.416471391916275, "learning_rate": 0.00010329481797267965, "loss": 1.6313, "step": 37218 }, { "epoch": 0.4836439300048892, "grad_norm": 0.34949880838394165, "learning_rate": 0.00010329221851076826, "loss": 1.2322, "step": 37219 }, { "epoch": 0.48365692454880505, "grad_norm": 0.47480031847953796, "learning_rate": 0.0001032896190488569, "loss": 1.4028, "step": 37220 }, { "epoch": 0.48366991909272095, "grad_norm": 0.4453912377357483, "learning_rate": 0.00010328701958694551, "loss": 1.3699, "step": 37221 }, { "epoch": 0.4836829136366368, "grad_norm": 0.41161414980888367, "learning_rate": 0.00010328442012503412, "loss": 1.2339, "step": 37222 }, { "epoch": 0.4836959081805527, "grad_norm": 0.3298141360282898, "learning_rate": 0.00010328182066312273, "loss": 1.5895, "step": 37223 }, { "epoch": 0.48370890272446854, "grad_norm": 0.34714871644973755, "learning_rate": 0.00010327922120121136, "loss": 1.3556, "step": 37224 }, { "epoch": 0.48372189726838444, "grad_norm": 0.48577478528022766, "learning_rate": 0.00010327662173929997, "loss": 1.2358, "step": 37225 }, { "epoch": 0.4837348918123003, "grad_norm": 0.3123428523540497, "learning_rate": 0.00010327402227738858, "loss": 1.4468, "step": 37226 }, { "epoch": 0.4837478863562162, "grad_norm": 0.4715064764022827, "learning_rate": 0.00010327142281547719, "loss": 1.4772, "step": 37227 }, { "epoch": 0.4837608809001321, "grad_norm": 0.4288148880004883, "learning_rate": 0.00010326882335356583, "loss": 1.4387, "step": 37228 }, { "epoch": 0.48377387544404793, "grad_norm": 0.3944775462150574, "learning_rate": 0.00010326622389165444, "loss": 1.5412, "step": 37229 }, { "epoch": 0.48378686998796383, "grad_norm": 0.3985206186771393, "learning_rate": 0.00010326362442974305, "loss": 1.3873, "step": 37230 }, { "epoch": 0.4837998645318797, "grad_norm": 0.46342065930366516, "learning_rate": 0.00010326102496783165, "loss": 1.5622, "step": 37231 }, { "epoch": 0.4838128590757956, "grad_norm": 0.441742867231369, "learning_rate": 0.00010325842550592029, "loss": 1.453, "step": 37232 }, { "epoch": 0.4838258536197114, "grad_norm": 0.39225175976753235, "learning_rate": 0.0001032558260440089, "loss": 1.4559, "step": 37233 }, { "epoch": 0.4838388481636273, "grad_norm": 0.3734672963619232, "learning_rate": 0.00010325322658209751, "loss": 1.2709, "step": 37234 }, { "epoch": 0.48385184270754317, "grad_norm": 0.3944225609302521, "learning_rate": 0.00010325062712018612, "loss": 1.3263, "step": 37235 }, { "epoch": 0.48386483725145907, "grad_norm": 0.46858030557632446, "learning_rate": 0.00010324802765827474, "loss": 1.4601, "step": 37236 }, { "epoch": 0.4838778317953749, "grad_norm": 0.36646565794944763, "learning_rate": 0.00010324542819636335, "loss": 1.1824, "step": 37237 }, { "epoch": 0.4838908263392908, "grad_norm": 0.3870489001274109, "learning_rate": 0.00010324282873445197, "loss": 1.3307, "step": 37238 }, { "epoch": 0.48390382088320666, "grad_norm": 0.44329240918159485, "learning_rate": 0.00010324022927254058, "loss": 1.2801, "step": 37239 }, { "epoch": 0.48391681542712256, "grad_norm": 0.38681545853614807, "learning_rate": 0.00010323762981062921, "loss": 1.3987, "step": 37240 }, { "epoch": 0.4839298099710384, "grad_norm": 0.4820795953273773, "learning_rate": 0.00010323503034871783, "loss": 1.3726, "step": 37241 }, { "epoch": 0.4839428045149543, "grad_norm": 0.29340028762817383, "learning_rate": 0.00010323243088680644, "loss": 1.4184, "step": 37242 }, { "epoch": 0.48395579905887015, "grad_norm": 0.34914180636405945, "learning_rate": 0.00010322983142489506, "loss": 1.1709, "step": 37243 }, { "epoch": 0.48396879360278605, "grad_norm": 0.3743126392364502, "learning_rate": 0.00010322723196298367, "loss": 1.4648, "step": 37244 }, { "epoch": 0.4839817881467019, "grad_norm": 0.37745729088783264, "learning_rate": 0.00010322463250107228, "loss": 1.1361, "step": 37245 }, { "epoch": 0.4839947826906178, "grad_norm": 0.4105580449104309, "learning_rate": 0.0001032220330391609, "loss": 1.4258, "step": 37246 }, { "epoch": 0.48400777723453364, "grad_norm": 0.5369956493377686, "learning_rate": 0.00010321943357724952, "loss": 1.3895, "step": 37247 }, { "epoch": 0.48402077177844954, "grad_norm": 0.39933329820632935, "learning_rate": 0.00010321683411533813, "loss": 1.3426, "step": 37248 }, { "epoch": 0.4840337663223654, "grad_norm": 0.43829116225242615, "learning_rate": 0.00010321423465342674, "loss": 1.378, "step": 37249 }, { "epoch": 0.4840467608662813, "grad_norm": 0.4701855480670929, "learning_rate": 0.00010321163519151535, "loss": 1.251, "step": 37250 }, { "epoch": 0.48405975541019713, "grad_norm": 0.43581947684288025, "learning_rate": 0.00010320903572960399, "loss": 1.536, "step": 37251 }, { "epoch": 0.48407274995411304, "grad_norm": 0.5097048878669739, "learning_rate": 0.0001032064362676926, "loss": 1.3069, "step": 37252 }, { "epoch": 0.4840857444980289, "grad_norm": 0.40764036774635315, "learning_rate": 0.00010320383680578121, "loss": 1.3116, "step": 37253 }, { "epoch": 0.4840987390419448, "grad_norm": 0.48757895827293396, "learning_rate": 0.00010320123734386982, "loss": 1.5043, "step": 37254 }, { "epoch": 0.4841117335858606, "grad_norm": 0.4363167881965637, "learning_rate": 0.00010319863788195845, "loss": 1.3365, "step": 37255 }, { "epoch": 0.4841247281297765, "grad_norm": 0.375189870595932, "learning_rate": 0.00010319603842004706, "loss": 1.3692, "step": 37256 }, { "epoch": 0.48413772267369237, "grad_norm": 0.38907134532928467, "learning_rate": 0.00010319343895813567, "loss": 1.3163, "step": 37257 }, { "epoch": 0.4841507172176083, "grad_norm": 0.5125067234039307, "learning_rate": 0.00010319083949622428, "loss": 1.4085, "step": 37258 }, { "epoch": 0.4841637117615241, "grad_norm": 0.39381349086761475, "learning_rate": 0.00010318824003431292, "loss": 1.342, "step": 37259 }, { "epoch": 0.48417670630544, "grad_norm": 0.42297041416168213, "learning_rate": 0.00010318564057240151, "loss": 1.6378, "step": 37260 }, { "epoch": 0.48418970084935586, "grad_norm": 0.4624077379703522, "learning_rate": 0.00010318304111049013, "loss": 1.5711, "step": 37261 }, { "epoch": 0.48420269539327176, "grad_norm": 0.35115256905555725, "learning_rate": 0.00010318044164857874, "loss": 1.5156, "step": 37262 }, { "epoch": 0.4842156899371876, "grad_norm": 0.35407575964927673, "learning_rate": 0.00010317784218666737, "loss": 1.2877, "step": 37263 }, { "epoch": 0.4842286844811035, "grad_norm": 0.3609103560447693, "learning_rate": 0.00010317524272475599, "loss": 1.2367, "step": 37264 }, { "epoch": 0.48424167902501936, "grad_norm": 0.5343315601348877, "learning_rate": 0.0001031726432628446, "loss": 1.575, "step": 37265 }, { "epoch": 0.48425467356893526, "grad_norm": 0.3595285415649414, "learning_rate": 0.00010317004380093321, "loss": 1.2495, "step": 37266 }, { "epoch": 0.4842676681128511, "grad_norm": 0.4144512116909027, "learning_rate": 0.00010316744433902183, "loss": 1.5043, "step": 37267 }, { "epoch": 0.484280662656767, "grad_norm": 0.36911237239837646, "learning_rate": 0.00010316484487711044, "loss": 1.1594, "step": 37268 }, { "epoch": 0.48429365720068285, "grad_norm": 0.40948423743247986, "learning_rate": 0.00010316224541519905, "loss": 1.33, "step": 37269 }, { "epoch": 0.48430665174459875, "grad_norm": 0.3155144453048706, "learning_rate": 0.00010315964595328766, "loss": 1.3853, "step": 37270 }, { "epoch": 0.4843196462885146, "grad_norm": 0.37012192606925964, "learning_rate": 0.0001031570464913763, "loss": 1.3815, "step": 37271 }, { "epoch": 0.4843326408324305, "grad_norm": 0.5392526388168335, "learning_rate": 0.00010315444702946491, "loss": 1.5236, "step": 37272 }, { "epoch": 0.48434563537634634, "grad_norm": 0.40471458435058594, "learning_rate": 0.00010315184756755351, "loss": 1.3766, "step": 37273 }, { "epoch": 0.48435862992026224, "grad_norm": 0.375217080116272, "learning_rate": 0.00010314924810564212, "loss": 1.3948, "step": 37274 }, { "epoch": 0.4843716244641781, "grad_norm": 0.4086691439151764, "learning_rate": 0.00010314664864373076, "loss": 1.4578, "step": 37275 }, { "epoch": 0.484384619008094, "grad_norm": 0.34421971440315247, "learning_rate": 0.00010314404918181937, "loss": 1.2724, "step": 37276 }, { "epoch": 0.48439761355200983, "grad_norm": 0.4066241383552551, "learning_rate": 0.00010314144971990798, "loss": 1.5079, "step": 37277 }, { "epoch": 0.48441060809592573, "grad_norm": 0.5166919231414795, "learning_rate": 0.00010313885025799659, "loss": 1.4763, "step": 37278 }, { "epoch": 0.4844236026398416, "grad_norm": 0.4615444242954254, "learning_rate": 0.00010313625079608522, "loss": 1.5526, "step": 37279 }, { "epoch": 0.4844365971837575, "grad_norm": 0.42272472381591797, "learning_rate": 0.00010313365133417383, "loss": 1.3383, "step": 37280 }, { "epoch": 0.4844495917276733, "grad_norm": 0.3164714574813843, "learning_rate": 0.00010313105187226244, "loss": 1.326, "step": 37281 }, { "epoch": 0.4844625862715892, "grad_norm": 0.4370535612106323, "learning_rate": 0.00010312845241035108, "loss": 1.4286, "step": 37282 }, { "epoch": 0.48447558081550507, "grad_norm": 0.43848681449890137, "learning_rate": 0.00010312585294843969, "loss": 1.4454, "step": 37283 }, { "epoch": 0.48448857535942097, "grad_norm": 0.542762279510498, "learning_rate": 0.0001031232534865283, "loss": 1.5935, "step": 37284 }, { "epoch": 0.4845015699033368, "grad_norm": 0.3079347312450409, "learning_rate": 0.00010312065402461691, "loss": 1.3663, "step": 37285 }, { "epoch": 0.4845145644472527, "grad_norm": 0.3275371193885803, "learning_rate": 0.00010311805456270553, "loss": 1.288, "step": 37286 }, { "epoch": 0.48452755899116856, "grad_norm": 0.4336625039577484, "learning_rate": 0.00010311545510079415, "loss": 1.2743, "step": 37287 }, { "epoch": 0.48454055353508446, "grad_norm": 0.3128984868526459, "learning_rate": 0.00010311285563888276, "loss": 1.0264, "step": 37288 }, { "epoch": 0.4845535480790003, "grad_norm": 0.40120580792427063, "learning_rate": 0.00010311025617697137, "loss": 1.3331, "step": 37289 }, { "epoch": 0.4845665426229162, "grad_norm": 0.4253067672252655, "learning_rate": 0.00010310765671505999, "loss": 1.4092, "step": 37290 }, { "epoch": 0.48457953716683205, "grad_norm": 0.5073135495185852, "learning_rate": 0.0001031050572531486, "loss": 1.5909, "step": 37291 }, { "epoch": 0.48459253171074795, "grad_norm": 0.3991956114768982, "learning_rate": 0.00010310245779123721, "loss": 1.3166, "step": 37292 }, { "epoch": 0.4846055262546638, "grad_norm": 0.46882501244544983, "learning_rate": 0.00010309985832932582, "loss": 1.5447, "step": 37293 }, { "epoch": 0.4846185207985797, "grad_norm": 0.35512053966522217, "learning_rate": 0.00010309725886741446, "loss": 1.3552, "step": 37294 }, { "epoch": 0.48463151534249554, "grad_norm": 0.4463934898376465, "learning_rate": 0.00010309465940550307, "loss": 1.5584, "step": 37295 }, { "epoch": 0.48464450988641145, "grad_norm": 0.38918331265449524, "learning_rate": 0.00010309205994359168, "loss": 1.4079, "step": 37296 }, { "epoch": 0.4846575044303273, "grad_norm": 0.4286218285560608, "learning_rate": 0.0001030894604816803, "loss": 1.5875, "step": 37297 }, { "epoch": 0.4846704989742432, "grad_norm": 0.3851255774497986, "learning_rate": 0.00010308686101976892, "loss": 1.5539, "step": 37298 }, { "epoch": 0.48468349351815904, "grad_norm": 0.47664251923561096, "learning_rate": 0.00010308426155785753, "loss": 1.2369, "step": 37299 }, { "epoch": 0.48469648806207494, "grad_norm": 0.3159470558166504, "learning_rate": 0.00010308166209594614, "loss": 1.4097, "step": 37300 }, { "epoch": 0.4847094826059908, "grad_norm": 0.3819974958896637, "learning_rate": 0.00010307906263403475, "loss": 1.3115, "step": 37301 }, { "epoch": 0.4847224771499067, "grad_norm": 0.44311949610710144, "learning_rate": 0.00010307646317212338, "loss": 1.4432, "step": 37302 }, { "epoch": 0.4847354716938226, "grad_norm": 0.39163726568222046, "learning_rate": 0.00010307386371021199, "loss": 1.4441, "step": 37303 }, { "epoch": 0.48474846623773843, "grad_norm": 0.3820706903934479, "learning_rate": 0.0001030712642483006, "loss": 1.3656, "step": 37304 }, { "epoch": 0.48476146078165433, "grad_norm": 0.4349375367164612, "learning_rate": 0.00010306866478638921, "loss": 1.378, "step": 37305 }, { "epoch": 0.4847744553255702, "grad_norm": 0.38828837871551514, "learning_rate": 0.00010306606532447785, "loss": 1.2803, "step": 37306 }, { "epoch": 0.4847874498694861, "grad_norm": 0.3196861743927002, "learning_rate": 0.00010306346586256646, "loss": 1.3451, "step": 37307 }, { "epoch": 0.4848004444134019, "grad_norm": 0.40137940645217896, "learning_rate": 0.00010306086640065507, "loss": 1.217, "step": 37308 }, { "epoch": 0.4848134389573178, "grad_norm": 0.4123448431491852, "learning_rate": 0.00010305826693874368, "loss": 1.3543, "step": 37309 }, { "epoch": 0.48482643350123367, "grad_norm": 0.38477200269699097, "learning_rate": 0.0001030556674768323, "loss": 1.292, "step": 37310 }, { "epoch": 0.48483942804514957, "grad_norm": 0.2858336865901947, "learning_rate": 0.00010305306801492092, "loss": 1.2048, "step": 37311 }, { "epoch": 0.4848524225890654, "grad_norm": 0.4781274199485779, "learning_rate": 0.00010305046855300953, "loss": 1.2614, "step": 37312 }, { "epoch": 0.4848654171329813, "grad_norm": 0.41455015540122986, "learning_rate": 0.00010304786909109814, "loss": 1.4943, "step": 37313 }, { "epoch": 0.48487841167689716, "grad_norm": 0.3462524712085724, "learning_rate": 0.00010304526962918678, "loss": 1.3053, "step": 37314 }, { "epoch": 0.48489140622081306, "grad_norm": 0.5161040425300598, "learning_rate": 0.00010304267016727537, "loss": 1.4796, "step": 37315 }, { "epoch": 0.4849044007647289, "grad_norm": 0.3433058559894562, "learning_rate": 0.00010304007070536398, "loss": 1.4006, "step": 37316 }, { "epoch": 0.4849173953086448, "grad_norm": 0.4409097135066986, "learning_rate": 0.00010303747124345262, "loss": 1.3829, "step": 37317 }, { "epoch": 0.48493038985256065, "grad_norm": 0.46034157276153564, "learning_rate": 0.00010303487178154123, "loss": 1.4914, "step": 37318 }, { "epoch": 0.48494338439647655, "grad_norm": 0.45123717188835144, "learning_rate": 0.00010303227231962984, "loss": 1.4212, "step": 37319 }, { "epoch": 0.4849563789403924, "grad_norm": 0.3614075779914856, "learning_rate": 0.00010302967285771845, "loss": 1.507, "step": 37320 }, { "epoch": 0.4849693734843083, "grad_norm": 0.33442336320877075, "learning_rate": 0.00010302707339580708, "loss": 1.4008, "step": 37321 }, { "epoch": 0.48498236802822414, "grad_norm": 0.3285088539123535, "learning_rate": 0.00010302447393389569, "loss": 1.3035, "step": 37322 }, { "epoch": 0.48499536257214004, "grad_norm": 0.3431672155857086, "learning_rate": 0.0001030218744719843, "loss": 1.3545, "step": 37323 }, { "epoch": 0.4850083571160559, "grad_norm": 0.33698031306266785, "learning_rate": 0.00010301927501007291, "loss": 1.2774, "step": 37324 }, { "epoch": 0.4850213516599718, "grad_norm": 0.40808069705963135, "learning_rate": 0.00010301667554816155, "loss": 1.4727, "step": 37325 }, { "epoch": 0.48503434620388763, "grad_norm": 0.40479081869125366, "learning_rate": 0.00010301407608625016, "loss": 1.3594, "step": 37326 }, { "epoch": 0.48504734074780353, "grad_norm": 0.43282344937324524, "learning_rate": 0.00010301147662433876, "loss": 1.3836, "step": 37327 }, { "epoch": 0.4850603352917194, "grad_norm": 0.3177891969680786, "learning_rate": 0.00010300887716242737, "loss": 1.5342, "step": 37328 }, { "epoch": 0.4850733298356353, "grad_norm": 0.46543434262275696, "learning_rate": 0.00010300627770051601, "loss": 1.4264, "step": 37329 }, { "epoch": 0.4850863243795511, "grad_norm": 0.37158024311065674, "learning_rate": 0.00010300367823860462, "loss": 1.3821, "step": 37330 }, { "epoch": 0.485099318923467, "grad_norm": 0.3421456217765808, "learning_rate": 0.00010300107877669323, "loss": 1.3901, "step": 37331 }, { "epoch": 0.48511231346738287, "grad_norm": 0.4326415956020355, "learning_rate": 0.00010299847931478184, "loss": 1.6147, "step": 37332 }, { "epoch": 0.4851253080112988, "grad_norm": 0.43219414353370667, "learning_rate": 0.00010299587985287046, "loss": 1.5064, "step": 37333 }, { "epoch": 0.4851383025552146, "grad_norm": 0.4410184919834137, "learning_rate": 0.00010299328039095908, "loss": 1.5092, "step": 37334 }, { "epoch": 0.4851512970991305, "grad_norm": 0.3448207676410675, "learning_rate": 0.00010299068092904769, "loss": 1.2973, "step": 37335 }, { "epoch": 0.48516429164304636, "grad_norm": 0.31727349758148193, "learning_rate": 0.0001029880814671363, "loss": 1.2197, "step": 37336 }, { "epoch": 0.48517728618696226, "grad_norm": 0.4648221731185913, "learning_rate": 0.00010298548200522494, "loss": 1.3931, "step": 37337 }, { "epoch": 0.4851902807308781, "grad_norm": 0.3920912742614746, "learning_rate": 0.00010298288254331355, "loss": 1.3688, "step": 37338 }, { "epoch": 0.485203275274794, "grad_norm": 0.5389154553413391, "learning_rate": 0.00010298028308140216, "loss": 1.5543, "step": 37339 }, { "epoch": 0.48521626981870986, "grad_norm": 0.39465224742889404, "learning_rate": 0.00010297768361949075, "loss": 1.5527, "step": 37340 }, { "epoch": 0.48522926436262576, "grad_norm": 0.4820897877216339, "learning_rate": 0.00010297508415757939, "loss": 1.5539, "step": 37341 }, { "epoch": 0.4852422589065416, "grad_norm": 0.41805174946784973, "learning_rate": 0.000102972484695668, "loss": 1.4556, "step": 37342 }, { "epoch": 0.4852552534504575, "grad_norm": 0.3391643762588501, "learning_rate": 0.00010296988523375661, "loss": 1.3382, "step": 37343 }, { "epoch": 0.48526824799437335, "grad_norm": 0.4018622636795044, "learning_rate": 0.00010296728577184523, "loss": 1.4233, "step": 37344 }, { "epoch": 0.48528124253828925, "grad_norm": 0.2915011942386627, "learning_rate": 0.00010296468630993385, "loss": 1.3075, "step": 37345 }, { "epoch": 0.4852942370822051, "grad_norm": 0.36640891432762146, "learning_rate": 0.00010296208684802246, "loss": 1.2986, "step": 37346 }, { "epoch": 0.485307231626121, "grad_norm": 0.29109883308410645, "learning_rate": 0.00010295948738611107, "loss": 1.1549, "step": 37347 }, { "epoch": 0.48532022617003684, "grad_norm": 0.4269489347934723, "learning_rate": 0.00010295688792419968, "loss": 1.4679, "step": 37348 }, { "epoch": 0.48533322071395274, "grad_norm": 0.3677750527858734, "learning_rate": 0.00010295428846228832, "loss": 1.3904, "step": 37349 }, { "epoch": 0.4853462152578686, "grad_norm": 0.4265616536140442, "learning_rate": 0.00010295168900037693, "loss": 1.3991, "step": 37350 }, { "epoch": 0.4853592098017845, "grad_norm": 0.4789171814918518, "learning_rate": 0.00010294908953846554, "loss": 1.6488, "step": 37351 }, { "epoch": 0.48537220434570033, "grad_norm": 0.4408518373966217, "learning_rate": 0.00010294649007655415, "loss": 1.4688, "step": 37352 }, { "epoch": 0.48538519888961623, "grad_norm": 0.3400838375091553, "learning_rate": 0.00010294389061464278, "loss": 1.3789, "step": 37353 }, { "epoch": 0.4853981934335321, "grad_norm": 0.41636016964912415, "learning_rate": 0.00010294129115273139, "loss": 1.4442, "step": 37354 }, { "epoch": 0.485411187977448, "grad_norm": 0.5291188955307007, "learning_rate": 0.00010293869169082, "loss": 1.4994, "step": 37355 }, { "epoch": 0.4854241825213638, "grad_norm": 0.4217016398906708, "learning_rate": 0.00010293609222890864, "loss": 1.4671, "step": 37356 }, { "epoch": 0.4854371770652797, "grad_norm": 0.37189963459968567, "learning_rate": 0.00010293349276699724, "loss": 1.2521, "step": 37357 }, { "epoch": 0.48545017160919557, "grad_norm": 0.5843049883842468, "learning_rate": 0.00010293089330508585, "loss": 1.4887, "step": 37358 }, { "epoch": 0.48546316615311147, "grad_norm": 0.43090713024139404, "learning_rate": 0.00010292829384317446, "loss": 1.3354, "step": 37359 }, { "epoch": 0.4854761606970273, "grad_norm": 0.3094753324985504, "learning_rate": 0.0001029256943812631, "loss": 1.4077, "step": 37360 }, { "epoch": 0.4854891552409432, "grad_norm": 0.3762724995613098, "learning_rate": 0.0001029230949193517, "loss": 1.3662, "step": 37361 }, { "epoch": 0.48550214978485906, "grad_norm": 0.41917455196380615, "learning_rate": 0.00010292049545744032, "loss": 1.6372, "step": 37362 }, { "epoch": 0.48551514432877496, "grad_norm": 0.3764312267303467, "learning_rate": 0.00010291789599552893, "loss": 1.5298, "step": 37363 }, { "epoch": 0.4855281388726908, "grad_norm": 0.3844401240348816, "learning_rate": 0.00010291529653361755, "loss": 1.279, "step": 37364 }, { "epoch": 0.4855411334166067, "grad_norm": 0.34436261653900146, "learning_rate": 0.00010291269707170616, "loss": 1.2351, "step": 37365 }, { "epoch": 0.48555412796052255, "grad_norm": 0.42525362968444824, "learning_rate": 0.00010291009760979477, "loss": 1.4303, "step": 37366 }, { "epoch": 0.48556712250443845, "grad_norm": 0.40984097123146057, "learning_rate": 0.00010290749814788339, "loss": 1.4325, "step": 37367 }, { "epoch": 0.4855801170483543, "grad_norm": 0.42023766040802, "learning_rate": 0.00010290489868597202, "loss": 1.4536, "step": 37368 }, { "epoch": 0.4855931115922702, "grad_norm": 0.31130924820899963, "learning_rate": 0.00010290229922406062, "loss": 1.4841, "step": 37369 }, { "epoch": 0.48560610613618604, "grad_norm": 0.4077592194080353, "learning_rate": 0.00010289969976214923, "loss": 1.4717, "step": 37370 }, { "epoch": 0.48561910068010194, "grad_norm": 0.486330509185791, "learning_rate": 0.00010289710030023784, "loss": 1.419, "step": 37371 }, { "epoch": 0.4856320952240178, "grad_norm": 0.40063440799713135, "learning_rate": 0.00010289450083832648, "loss": 1.4058, "step": 37372 }, { "epoch": 0.4856450897679337, "grad_norm": 0.4688246548175812, "learning_rate": 0.00010289190137641509, "loss": 1.5506, "step": 37373 }, { "epoch": 0.48565808431184954, "grad_norm": 0.4947185218334198, "learning_rate": 0.0001028893019145037, "loss": 1.3609, "step": 37374 }, { "epoch": 0.48567107885576544, "grad_norm": 0.3377351760864258, "learning_rate": 0.00010288670245259231, "loss": 1.4575, "step": 37375 }, { "epoch": 0.4856840733996813, "grad_norm": 0.4332377016544342, "learning_rate": 0.00010288410299068094, "loss": 1.3543, "step": 37376 }, { "epoch": 0.4856970679435972, "grad_norm": 0.5080888271331787, "learning_rate": 0.00010288150352876955, "loss": 1.2991, "step": 37377 }, { "epoch": 0.48571006248751303, "grad_norm": 0.3906863033771515, "learning_rate": 0.00010287890406685816, "loss": 1.4482, "step": 37378 }, { "epoch": 0.48572305703142893, "grad_norm": 0.3924091160297394, "learning_rate": 0.00010287630460494677, "loss": 1.296, "step": 37379 }, { "epoch": 0.48573605157534483, "grad_norm": 0.49593600630760193, "learning_rate": 0.00010287370514303541, "loss": 1.4692, "step": 37380 }, { "epoch": 0.4857490461192607, "grad_norm": 0.40313035249710083, "learning_rate": 0.00010287110568112402, "loss": 1.3329, "step": 37381 }, { "epoch": 0.4857620406631766, "grad_norm": 0.35620787739753723, "learning_rate": 0.00010286850621921262, "loss": 1.2116, "step": 37382 }, { "epoch": 0.4857750352070924, "grad_norm": 0.39096513390541077, "learning_rate": 0.00010286590675730123, "loss": 1.477, "step": 37383 }, { "epoch": 0.4857880297510083, "grad_norm": 0.3853517770767212, "learning_rate": 0.00010286330729538987, "loss": 1.4554, "step": 37384 }, { "epoch": 0.48580102429492417, "grad_norm": 0.452470600605011, "learning_rate": 0.00010286070783347848, "loss": 1.2566, "step": 37385 }, { "epoch": 0.48581401883884007, "grad_norm": 0.4798089563846588, "learning_rate": 0.00010285810837156709, "loss": 1.3867, "step": 37386 }, { "epoch": 0.4858270133827559, "grad_norm": 0.42197123169898987, "learning_rate": 0.0001028555089096557, "loss": 1.5808, "step": 37387 }, { "epoch": 0.4858400079266718, "grad_norm": 0.356563925743103, "learning_rate": 0.00010285290944774432, "loss": 1.3457, "step": 37388 }, { "epoch": 0.48585300247058766, "grad_norm": 0.48244062066078186, "learning_rate": 0.00010285030998583293, "loss": 1.3217, "step": 37389 }, { "epoch": 0.48586599701450356, "grad_norm": 0.4336363673210144, "learning_rate": 0.00010284771052392155, "loss": 1.4281, "step": 37390 }, { "epoch": 0.4858789915584194, "grad_norm": 0.30372154712677, "learning_rate": 0.00010284511106201018, "loss": 1.1803, "step": 37391 }, { "epoch": 0.4858919861023353, "grad_norm": 0.4251910150051117, "learning_rate": 0.0001028425116000988, "loss": 1.3865, "step": 37392 }, { "epoch": 0.48590498064625115, "grad_norm": 0.41322511434555054, "learning_rate": 0.0001028399121381874, "loss": 1.2287, "step": 37393 }, { "epoch": 0.48591797519016705, "grad_norm": 0.3717118501663208, "learning_rate": 0.00010283731267627602, "loss": 1.3218, "step": 37394 }, { "epoch": 0.4859309697340829, "grad_norm": 0.36697232723236084, "learning_rate": 0.00010283471321436464, "loss": 1.4102, "step": 37395 }, { "epoch": 0.4859439642779988, "grad_norm": 0.34533339738845825, "learning_rate": 0.00010283211375245325, "loss": 1.4473, "step": 37396 }, { "epoch": 0.48595695882191464, "grad_norm": 0.3668302595615387, "learning_rate": 0.00010282951429054186, "loss": 1.4159, "step": 37397 }, { "epoch": 0.48596995336583054, "grad_norm": 0.34340932965278625, "learning_rate": 0.00010282691482863047, "loss": 1.4616, "step": 37398 }, { "epoch": 0.4859829479097464, "grad_norm": 0.43444743752479553, "learning_rate": 0.0001028243153667191, "loss": 1.3618, "step": 37399 }, { "epoch": 0.4859959424536623, "grad_norm": 0.42633339762687683, "learning_rate": 0.00010282171590480771, "loss": 1.4518, "step": 37400 }, { "epoch": 0.48600893699757813, "grad_norm": 0.2840575575828552, "learning_rate": 0.00010281911644289632, "loss": 1.0753, "step": 37401 }, { "epoch": 0.48602193154149403, "grad_norm": 0.3826866149902344, "learning_rate": 0.00010281651698098493, "loss": 1.2496, "step": 37402 }, { "epoch": 0.4860349260854099, "grad_norm": 0.32163798809051514, "learning_rate": 0.00010281391751907357, "loss": 1.1408, "step": 37403 }, { "epoch": 0.4860479206293258, "grad_norm": 0.43064308166503906, "learning_rate": 0.00010281131805716218, "loss": 1.364, "step": 37404 }, { "epoch": 0.4860609151732416, "grad_norm": 0.34890252351760864, "learning_rate": 0.00010280871859525079, "loss": 1.5047, "step": 37405 }, { "epoch": 0.4860739097171575, "grad_norm": 0.46419596672058105, "learning_rate": 0.0001028061191333394, "loss": 1.3789, "step": 37406 }, { "epoch": 0.48608690426107337, "grad_norm": 0.439878910779953, "learning_rate": 0.00010280351967142803, "loss": 1.7154, "step": 37407 }, { "epoch": 0.48609989880498927, "grad_norm": 0.47209402918815613, "learning_rate": 0.00010280092020951664, "loss": 1.2824, "step": 37408 }, { "epoch": 0.4861128933489051, "grad_norm": 0.5429522395133972, "learning_rate": 0.00010279832074760525, "loss": 1.4048, "step": 37409 }, { "epoch": 0.486125887892821, "grad_norm": 0.38462162017822266, "learning_rate": 0.00010279572128569386, "loss": 1.4376, "step": 37410 }, { "epoch": 0.48613888243673686, "grad_norm": 0.3830171823501587, "learning_rate": 0.00010279312182378248, "loss": 1.4787, "step": 37411 }, { "epoch": 0.48615187698065276, "grad_norm": 0.2915896475315094, "learning_rate": 0.0001027905223618711, "loss": 1.2151, "step": 37412 }, { "epoch": 0.4861648715245686, "grad_norm": 0.523921549320221, "learning_rate": 0.0001027879228999597, "loss": 1.4017, "step": 37413 }, { "epoch": 0.4861778660684845, "grad_norm": 0.4897431433200836, "learning_rate": 0.00010278532343804832, "loss": 1.3038, "step": 37414 }, { "epoch": 0.48619086061240036, "grad_norm": 0.3100225627422333, "learning_rate": 0.00010278272397613695, "loss": 1.1286, "step": 37415 }, { "epoch": 0.48620385515631626, "grad_norm": 0.3244931697845459, "learning_rate": 0.00010278012451422557, "loss": 1.3238, "step": 37416 }, { "epoch": 0.4862168497002321, "grad_norm": 0.5111957788467407, "learning_rate": 0.00010277752505231418, "loss": 1.603, "step": 37417 }, { "epoch": 0.486229844244148, "grad_norm": 0.40632325410842896, "learning_rate": 0.00010277492559040279, "loss": 1.2489, "step": 37418 }, { "epoch": 0.48624283878806385, "grad_norm": 0.5140093564987183, "learning_rate": 0.00010277232612849141, "loss": 1.3283, "step": 37419 }, { "epoch": 0.48625583333197975, "grad_norm": 0.34243983030319214, "learning_rate": 0.00010276972666658002, "loss": 1.3781, "step": 37420 }, { "epoch": 0.4862688278758956, "grad_norm": 0.4177440106868744, "learning_rate": 0.00010276712720466863, "loss": 1.2799, "step": 37421 }, { "epoch": 0.4862818224198115, "grad_norm": 0.4968373775482178, "learning_rate": 0.00010276452774275724, "loss": 1.4755, "step": 37422 }, { "epoch": 0.48629481696372734, "grad_norm": 0.44247928261756897, "learning_rate": 0.00010276192828084588, "loss": 1.3122, "step": 37423 }, { "epoch": 0.48630781150764324, "grad_norm": 0.4300784170627594, "learning_rate": 0.00010275932881893448, "loss": 1.2055, "step": 37424 }, { "epoch": 0.4863208060515591, "grad_norm": 0.3489111363887787, "learning_rate": 0.00010275672935702309, "loss": 1.1697, "step": 37425 }, { "epoch": 0.486333800595475, "grad_norm": 0.4759625792503357, "learning_rate": 0.0001027541298951117, "loss": 1.3755, "step": 37426 }, { "epoch": 0.48634679513939083, "grad_norm": 0.45506060123443604, "learning_rate": 0.00010275153043320034, "loss": 1.4272, "step": 37427 }, { "epoch": 0.48635978968330673, "grad_norm": 0.5025519728660583, "learning_rate": 0.00010274893097128895, "loss": 1.5254, "step": 37428 }, { "epoch": 0.4863727842272226, "grad_norm": 0.4095103442668915, "learning_rate": 0.00010274633150937756, "loss": 1.5412, "step": 37429 }, { "epoch": 0.4863857787711385, "grad_norm": 0.39778974652290344, "learning_rate": 0.00010274373204746619, "loss": 1.3251, "step": 37430 }, { "epoch": 0.4863987733150543, "grad_norm": 0.4651887118816376, "learning_rate": 0.0001027411325855548, "loss": 1.5838, "step": 37431 }, { "epoch": 0.4864117678589702, "grad_norm": 0.3230370283126831, "learning_rate": 0.00010273853312364341, "loss": 1.7236, "step": 37432 }, { "epoch": 0.48642476240288607, "grad_norm": 0.3638368248939514, "learning_rate": 0.00010273593366173202, "loss": 1.4804, "step": 37433 }, { "epoch": 0.48643775694680197, "grad_norm": 0.413433313369751, "learning_rate": 0.00010273333419982066, "loss": 1.5228, "step": 37434 }, { "epoch": 0.4864507514907178, "grad_norm": 0.30923259258270264, "learning_rate": 0.00010273073473790927, "loss": 1.2484, "step": 37435 }, { "epoch": 0.4864637460346337, "grad_norm": 0.5297190546989441, "learning_rate": 0.00010272813527599788, "loss": 1.6067, "step": 37436 }, { "epoch": 0.48647674057854956, "grad_norm": 0.49403342604637146, "learning_rate": 0.00010272553581408648, "loss": 1.4081, "step": 37437 }, { "epoch": 0.48648973512246546, "grad_norm": 0.4745197892189026, "learning_rate": 0.00010272293635217511, "loss": 1.4268, "step": 37438 }, { "epoch": 0.4865027296663813, "grad_norm": 0.3124939799308777, "learning_rate": 0.00010272033689026372, "loss": 1.4561, "step": 37439 }, { "epoch": 0.4865157242102972, "grad_norm": 0.35278668999671936, "learning_rate": 0.00010271773742835234, "loss": 1.5195, "step": 37440 }, { "epoch": 0.48652871875421305, "grad_norm": 0.43536508083343506, "learning_rate": 0.00010271513796644095, "loss": 1.3404, "step": 37441 }, { "epoch": 0.48654171329812895, "grad_norm": 0.45758774876594543, "learning_rate": 0.00010271253850452957, "loss": 1.402, "step": 37442 }, { "epoch": 0.4865547078420448, "grad_norm": 0.4135849177837372, "learning_rate": 0.00010270993904261818, "loss": 1.5382, "step": 37443 }, { "epoch": 0.4865677023859607, "grad_norm": 0.4844321310520172, "learning_rate": 0.00010270733958070679, "loss": 1.4689, "step": 37444 }, { "epoch": 0.48658069692987654, "grad_norm": 0.3898744285106659, "learning_rate": 0.0001027047401187954, "loss": 1.2793, "step": 37445 }, { "epoch": 0.48659369147379244, "grad_norm": 0.4363359212875366, "learning_rate": 0.00010270214065688404, "loss": 1.4954, "step": 37446 }, { "epoch": 0.4866066860177083, "grad_norm": 0.30738916993141174, "learning_rate": 0.00010269954119497265, "loss": 1.1919, "step": 37447 }, { "epoch": 0.4866196805616242, "grad_norm": 0.368707537651062, "learning_rate": 0.00010269694173306126, "loss": 1.2665, "step": 37448 }, { "epoch": 0.48663267510554004, "grad_norm": 0.3278559446334839, "learning_rate": 0.00010269434227114987, "loss": 1.2652, "step": 37449 }, { "epoch": 0.48664566964945594, "grad_norm": 0.40336546301841736, "learning_rate": 0.0001026917428092385, "loss": 1.5781, "step": 37450 }, { "epoch": 0.4866586641933718, "grad_norm": 0.31379133462905884, "learning_rate": 0.00010268914334732711, "loss": 1.3177, "step": 37451 }, { "epoch": 0.4866716587372877, "grad_norm": 0.30316630005836487, "learning_rate": 0.00010268654388541572, "loss": 1.3077, "step": 37452 }, { "epoch": 0.4866846532812035, "grad_norm": 0.3171525299549103, "learning_rate": 0.00010268394442350433, "loss": 1.1972, "step": 37453 }, { "epoch": 0.48669764782511943, "grad_norm": 0.35748907923698425, "learning_rate": 0.00010268134496159296, "loss": 1.3951, "step": 37454 }, { "epoch": 0.48671064236903533, "grad_norm": 0.3408093750476837, "learning_rate": 0.00010267874549968157, "loss": 1.3051, "step": 37455 }, { "epoch": 0.4867236369129512, "grad_norm": 0.3556941747665405, "learning_rate": 0.00010267614603777018, "loss": 1.3009, "step": 37456 }, { "epoch": 0.4867366314568671, "grad_norm": 0.4327068626880646, "learning_rate": 0.00010267354657585879, "loss": 1.4912, "step": 37457 }, { "epoch": 0.4867496260007829, "grad_norm": 0.29644715785980225, "learning_rate": 0.00010267094711394743, "loss": 1.2904, "step": 37458 }, { "epoch": 0.4867626205446988, "grad_norm": 0.4700661599636078, "learning_rate": 0.00010266834765203604, "loss": 1.4357, "step": 37459 }, { "epoch": 0.48677561508861467, "grad_norm": 0.3661230504512787, "learning_rate": 0.00010266574819012465, "loss": 1.4969, "step": 37460 }, { "epoch": 0.48678860963253057, "grad_norm": 0.4363194704055786, "learning_rate": 0.00010266314872821326, "loss": 1.256, "step": 37461 }, { "epoch": 0.4868016041764464, "grad_norm": 0.4148009121417999, "learning_rate": 0.00010266054926630188, "loss": 1.2995, "step": 37462 }, { "epoch": 0.4868145987203623, "grad_norm": 0.28058937191963196, "learning_rate": 0.0001026579498043905, "loss": 1.3074, "step": 37463 }, { "epoch": 0.48682759326427816, "grad_norm": 0.4307621121406555, "learning_rate": 0.0001026553503424791, "loss": 1.4343, "step": 37464 }, { "epoch": 0.48684058780819406, "grad_norm": 0.4508236050605774, "learning_rate": 0.00010265275088056774, "loss": 1.398, "step": 37465 }, { "epoch": 0.4868535823521099, "grad_norm": 0.3972441554069519, "learning_rate": 0.00010265015141865634, "loss": 1.3853, "step": 37466 }, { "epoch": 0.4868665768960258, "grad_norm": 0.37339210510253906, "learning_rate": 0.00010264755195674495, "loss": 1.3517, "step": 37467 }, { "epoch": 0.48687957143994165, "grad_norm": 0.43052566051483154, "learning_rate": 0.00010264495249483356, "loss": 1.5119, "step": 37468 }, { "epoch": 0.48689256598385755, "grad_norm": 0.38009142875671387, "learning_rate": 0.0001026423530329222, "loss": 1.4826, "step": 37469 }, { "epoch": 0.4869055605277734, "grad_norm": 0.4008859694004059, "learning_rate": 0.00010263975357101081, "loss": 1.374, "step": 37470 }, { "epoch": 0.4869185550716893, "grad_norm": 0.38185590505599976, "learning_rate": 0.00010263715410909942, "loss": 1.417, "step": 37471 }, { "epoch": 0.48693154961560514, "grad_norm": 0.4229472875595093, "learning_rate": 0.00010263455464718803, "loss": 1.3421, "step": 37472 }, { "epoch": 0.48694454415952104, "grad_norm": 0.3342346251010895, "learning_rate": 0.00010263195518527666, "loss": 1.298, "step": 37473 }, { "epoch": 0.4869575387034369, "grad_norm": 0.40374815464019775, "learning_rate": 0.00010262935572336527, "loss": 1.3916, "step": 37474 }, { "epoch": 0.4869705332473528, "grad_norm": 0.30591002106666565, "learning_rate": 0.00010262675626145388, "loss": 1.1477, "step": 37475 }, { "epoch": 0.48698352779126863, "grad_norm": 0.3657781183719635, "learning_rate": 0.00010262415679954249, "loss": 1.3495, "step": 37476 }, { "epoch": 0.48699652233518453, "grad_norm": 0.3618139326572418, "learning_rate": 0.00010262155733763113, "loss": 1.2657, "step": 37477 }, { "epoch": 0.4870095168791004, "grad_norm": 0.3728991448879242, "learning_rate": 0.00010261895787571974, "loss": 1.4445, "step": 37478 }, { "epoch": 0.4870225114230163, "grad_norm": 0.39317935705184937, "learning_rate": 0.00010261635841380834, "loss": 1.4489, "step": 37479 }, { "epoch": 0.4870355059669321, "grad_norm": 0.37254852056503296, "learning_rate": 0.00010261375895189695, "loss": 1.1889, "step": 37480 }, { "epoch": 0.487048500510848, "grad_norm": 0.4452288746833801, "learning_rate": 0.00010261115948998559, "loss": 1.4366, "step": 37481 }, { "epoch": 0.48706149505476387, "grad_norm": 0.4262430965900421, "learning_rate": 0.0001026085600280742, "loss": 1.5928, "step": 37482 }, { "epoch": 0.48707448959867977, "grad_norm": 0.428069144487381, "learning_rate": 0.00010260596056616281, "loss": 1.2481, "step": 37483 }, { "epoch": 0.4870874841425956, "grad_norm": 0.40225380659103394, "learning_rate": 0.00010260336110425142, "loss": 1.3509, "step": 37484 }, { "epoch": 0.4871004786865115, "grad_norm": 0.32335609197616577, "learning_rate": 0.00010260076164234004, "loss": 1.4448, "step": 37485 }, { "epoch": 0.48711347323042736, "grad_norm": 0.41441813111305237, "learning_rate": 0.00010259816218042866, "loss": 1.4519, "step": 37486 }, { "epoch": 0.48712646777434326, "grad_norm": 0.4198852777481079, "learning_rate": 0.00010259556271851727, "loss": 1.4545, "step": 37487 }, { "epoch": 0.4871394623182591, "grad_norm": 0.384016215801239, "learning_rate": 0.00010259296325660588, "loss": 1.1892, "step": 37488 }, { "epoch": 0.487152456862175, "grad_norm": 0.4397445619106293, "learning_rate": 0.00010259036379469452, "loss": 1.4366, "step": 37489 }, { "epoch": 0.48716545140609085, "grad_norm": 0.4898951053619385, "learning_rate": 0.00010258776433278313, "loss": 1.4704, "step": 37490 }, { "epoch": 0.48717844595000676, "grad_norm": 0.4236140549182892, "learning_rate": 0.00010258516487087174, "loss": 1.3478, "step": 37491 }, { "epoch": 0.4871914404939226, "grad_norm": 0.3601870834827423, "learning_rate": 0.00010258256540896033, "loss": 1.3734, "step": 37492 }, { "epoch": 0.4872044350378385, "grad_norm": 0.3674706518650055, "learning_rate": 0.00010257996594704897, "loss": 1.3232, "step": 37493 }, { "epoch": 0.48721742958175435, "grad_norm": 0.4053773581981659, "learning_rate": 0.00010257736648513758, "loss": 1.4695, "step": 37494 }, { "epoch": 0.48723042412567025, "grad_norm": 0.3641587793827057, "learning_rate": 0.0001025747670232262, "loss": 1.5134, "step": 37495 }, { "epoch": 0.4872434186695861, "grad_norm": 0.3774276375770569, "learning_rate": 0.0001025721675613148, "loss": 1.5459, "step": 37496 }, { "epoch": 0.487256413213502, "grad_norm": 0.48215439915657043, "learning_rate": 0.00010256956809940343, "loss": 1.4752, "step": 37497 }, { "epoch": 0.48726940775741784, "grad_norm": 0.32459592819213867, "learning_rate": 0.00010256696863749204, "loss": 1.5509, "step": 37498 }, { "epoch": 0.48728240230133374, "grad_norm": 0.3688407242298126, "learning_rate": 0.00010256436917558065, "loss": 1.5481, "step": 37499 }, { "epoch": 0.4872953968452496, "grad_norm": 0.3435598909854889, "learning_rate": 0.00010256176971366926, "loss": 1.4308, "step": 37500 }, { "epoch": 0.4873083913891655, "grad_norm": 0.4777471125125885, "learning_rate": 0.0001025591702517579, "loss": 1.5712, "step": 37501 }, { "epoch": 0.48732138593308133, "grad_norm": 0.414863646030426, "learning_rate": 0.00010255657078984651, "loss": 1.3593, "step": 37502 }, { "epoch": 0.48733438047699723, "grad_norm": 0.31028228998184204, "learning_rate": 0.00010255397132793512, "loss": 1.0727, "step": 37503 }, { "epoch": 0.4873473750209131, "grad_norm": 0.39195629954338074, "learning_rate": 0.00010255137186602375, "loss": 1.2482, "step": 37504 }, { "epoch": 0.487360369564829, "grad_norm": 0.4595802426338196, "learning_rate": 0.00010254877240411236, "loss": 1.4205, "step": 37505 }, { "epoch": 0.4873733641087448, "grad_norm": 0.36123180389404297, "learning_rate": 0.00010254617294220097, "loss": 1.3052, "step": 37506 }, { "epoch": 0.4873863586526607, "grad_norm": 0.28263694047927856, "learning_rate": 0.00010254357348028958, "loss": 1.182, "step": 37507 }, { "epoch": 0.48739935319657657, "grad_norm": 0.36781421303749084, "learning_rate": 0.0001025409740183782, "loss": 1.3414, "step": 37508 }, { "epoch": 0.48741234774049247, "grad_norm": 0.36337050795555115, "learning_rate": 0.00010253837455646682, "loss": 1.397, "step": 37509 }, { "epoch": 0.4874253422844083, "grad_norm": 0.4091852307319641, "learning_rate": 0.00010253577509455543, "loss": 1.5214, "step": 37510 }, { "epoch": 0.4874383368283242, "grad_norm": 0.5029363036155701, "learning_rate": 0.00010253317563264404, "loss": 1.4945, "step": 37511 }, { "epoch": 0.48745133137224006, "grad_norm": 0.3988547623157501, "learning_rate": 0.00010253057617073268, "loss": 1.3492, "step": 37512 }, { "epoch": 0.48746432591615596, "grad_norm": 0.3847947120666504, "learning_rate": 0.00010252797670882129, "loss": 1.4872, "step": 37513 }, { "epoch": 0.4874773204600718, "grad_norm": 0.4792209267616272, "learning_rate": 0.0001025253772469099, "loss": 1.4328, "step": 37514 }, { "epoch": 0.4874903150039877, "grad_norm": 0.39129284024238586, "learning_rate": 0.00010252277778499851, "loss": 1.2987, "step": 37515 }, { "epoch": 0.48750330954790355, "grad_norm": 0.4576675593852997, "learning_rate": 0.00010252017832308713, "loss": 1.5845, "step": 37516 }, { "epoch": 0.48751630409181945, "grad_norm": 0.4679252803325653, "learning_rate": 0.00010251757886117574, "loss": 1.4264, "step": 37517 }, { "epoch": 0.4875292986357353, "grad_norm": 0.3119916021823883, "learning_rate": 0.00010251497939926435, "loss": 1.4119, "step": 37518 }, { "epoch": 0.4875422931796512, "grad_norm": 0.34806376695632935, "learning_rate": 0.00010251237993735297, "loss": 1.3144, "step": 37519 }, { "epoch": 0.48755528772356704, "grad_norm": 0.5126516819000244, "learning_rate": 0.0001025097804754416, "loss": 1.5164, "step": 37520 }, { "epoch": 0.48756828226748294, "grad_norm": 0.60047847032547, "learning_rate": 0.0001025071810135302, "loss": 1.4571, "step": 37521 }, { "epoch": 0.4875812768113988, "grad_norm": 0.43262067437171936, "learning_rate": 0.00010250458155161881, "loss": 1.3572, "step": 37522 }, { "epoch": 0.4875942713553147, "grad_norm": 0.3384360074996948, "learning_rate": 0.00010250198208970742, "loss": 1.2004, "step": 37523 }, { "epoch": 0.48760726589923054, "grad_norm": 0.396616131067276, "learning_rate": 0.00010249938262779606, "loss": 1.3489, "step": 37524 }, { "epoch": 0.48762026044314644, "grad_norm": 0.42384424805641174, "learning_rate": 0.00010249678316588467, "loss": 1.3587, "step": 37525 }, { "epoch": 0.4876332549870623, "grad_norm": 0.38283175230026245, "learning_rate": 0.00010249418370397328, "loss": 1.182, "step": 37526 }, { "epoch": 0.4876462495309782, "grad_norm": 0.2308875024318695, "learning_rate": 0.0001024915842420619, "loss": 1.1661, "step": 37527 }, { "epoch": 0.487659244074894, "grad_norm": 0.36702489852905273, "learning_rate": 0.00010248898478015052, "loss": 1.3899, "step": 37528 }, { "epoch": 0.48767223861880993, "grad_norm": 0.441802978515625, "learning_rate": 0.00010248638531823913, "loss": 1.2033, "step": 37529 }, { "epoch": 0.4876852331627258, "grad_norm": 0.4085250794887543, "learning_rate": 0.00010248378585632774, "loss": 1.5424, "step": 37530 }, { "epoch": 0.4876982277066417, "grad_norm": 0.33680853247642517, "learning_rate": 0.00010248118639441635, "loss": 1.3212, "step": 37531 }, { "epoch": 0.4877112222505576, "grad_norm": 0.5441884398460388, "learning_rate": 0.00010247858693250499, "loss": 1.4323, "step": 37532 }, { "epoch": 0.4877242167944734, "grad_norm": 0.3772077262401581, "learning_rate": 0.00010247598747059359, "loss": 1.5213, "step": 37533 }, { "epoch": 0.4877372113383893, "grad_norm": 0.3871772587299347, "learning_rate": 0.0001024733880086822, "loss": 1.2222, "step": 37534 }, { "epoch": 0.48775020588230517, "grad_norm": 0.39174339175224304, "learning_rate": 0.00010247078854677081, "loss": 1.4126, "step": 37535 }, { "epoch": 0.48776320042622107, "grad_norm": 0.39793696999549866, "learning_rate": 0.00010246818908485945, "loss": 1.5365, "step": 37536 }, { "epoch": 0.4877761949701369, "grad_norm": 0.4222224950790405, "learning_rate": 0.00010246558962294806, "loss": 1.4894, "step": 37537 }, { "epoch": 0.4877891895140528, "grad_norm": 0.30023080110549927, "learning_rate": 0.00010246299016103667, "loss": 1.2925, "step": 37538 }, { "epoch": 0.48780218405796866, "grad_norm": 0.4016575515270233, "learning_rate": 0.00010246039069912528, "loss": 1.2311, "step": 37539 }, { "epoch": 0.48781517860188456, "grad_norm": 0.4106665253639221, "learning_rate": 0.0001024577912372139, "loss": 1.3298, "step": 37540 }, { "epoch": 0.4878281731458004, "grad_norm": 0.4347458481788635, "learning_rate": 0.00010245519177530251, "loss": 1.4243, "step": 37541 }, { "epoch": 0.4878411676897163, "grad_norm": 0.496207594871521, "learning_rate": 0.00010245259231339113, "loss": 1.5363, "step": 37542 }, { "epoch": 0.48785416223363215, "grad_norm": 0.3152560591697693, "learning_rate": 0.00010244999285147976, "loss": 1.3085, "step": 37543 }, { "epoch": 0.48786715677754805, "grad_norm": 0.4746745228767395, "learning_rate": 0.00010244739338956837, "loss": 1.4684, "step": 37544 }, { "epoch": 0.4878801513214639, "grad_norm": 0.42024633288383484, "learning_rate": 0.00010244479392765699, "loss": 1.2928, "step": 37545 }, { "epoch": 0.4878931458653798, "grad_norm": 0.4881391227245331, "learning_rate": 0.00010244219446574558, "loss": 1.4311, "step": 37546 }, { "epoch": 0.48790614040929564, "grad_norm": 0.39378783106803894, "learning_rate": 0.00010243959500383422, "loss": 1.3778, "step": 37547 }, { "epoch": 0.48791913495321154, "grad_norm": 0.3820206820964813, "learning_rate": 0.00010243699554192283, "loss": 1.4847, "step": 37548 }, { "epoch": 0.4879321294971274, "grad_norm": 0.460351824760437, "learning_rate": 0.00010243439608001144, "loss": 1.5352, "step": 37549 }, { "epoch": 0.4879451240410433, "grad_norm": 0.4360557198524475, "learning_rate": 0.00010243179661810005, "loss": 1.3239, "step": 37550 }, { "epoch": 0.48795811858495913, "grad_norm": 0.4283132553100586, "learning_rate": 0.00010242919715618868, "loss": 1.5656, "step": 37551 }, { "epoch": 0.48797111312887503, "grad_norm": 0.5098937749862671, "learning_rate": 0.00010242659769427729, "loss": 1.4628, "step": 37552 }, { "epoch": 0.4879841076727909, "grad_norm": 0.5641338229179382, "learning_rate": 0.0001024239982323659, "loss": 1.3679, "step": 37553 }, { "epoch": 0.4879971022167068, "grad_norm": 0.35181304812431335, "learning_rate": 0.00010242139877045451, "loss": 1.2374, "step": 37554 }, { "epoch": 0.4880100967606226, "grad_norm": 0.4925462305545807, "learning_rate": 0.00010241879930854315, "loss": 1.3761, "step": 37555 }, { "epoch": 0.4880230913045385, "grad_norm": 0.4691649377346039, "learning_rate": 0.00010241619984663176, "loss": 1.412, "step": 37556 }, { "epoch": 0.48803608584845437, "grad_norm": 0.3909376859664917, "learning_rate": 0.00010241360038472037, "loss": 1.4768, "step": 37557 }, { "epoch": 0.48804908039237027, "grad_norm": 0.42458003759384155, "learning_rate": 0.00010241100092280898, "loss": 1.4398, "step": 37558 }, { "epoch": 0.4880620749362861, "grad_norm": 0.4558770954608917, "learning_rate": 0.0001024084014608976, "loss": 1.256, "step": 37559 }, { "epoch": 0.488075069480202, "grad_norm": 0.39437049627304077, "learning_rate": 0.00010240580199898622, "loss": 1.6077, "step": 37560 }, { "epoch": 0.48808806402411786, "grad_norm": 0.3441692590713501, "learning_rate": 0.00010240320253707483, "loss": 1.4481, "step": 37561 }, { "epoch": 0.48810105856803376, "grad_norm": 0.3714215159416199, "learning_rate": 0.00010240060307516344, "loss": 1.2567, "step": 37562 }, { "epoch": 0.4881140531119496, "grad_norm": 0.3831939399242401, "learning_rate": 0.00010239800361325206, "loss": 1.3913, "step": 37563 }, { "epoch": 0.4881270476558655, "grad_norm": 0.5002745389938354, "learning_rate": 0.00010239540415134067, "loss": 1.2416, "step": 37564 }, { "epoch": 0.48814004219978135, "grad_norm": 0.46416550874710083, "learning_rate": 0.00010239280468942929, "loss": 1.6217, "step": 37565 }, { "epoch": 0.48815303674369726, "grad_norm": 0.47698646783828735, "learning_rate": 0.0001023902052275179, "loss": 1.243, "step": 37566 }, { "epoch": 0.4881660312876131, "grad_norm": 0.3888038992881775, "learning_rate": 0.00010238760576560653, "loss": 1.3425, "step": 37567 }, { "epoch": 0.488179025831529, "grad_norm": 0.36654165387153625, "learning_rate": 0.00010238500630369514, "loss": 1.4717, "step": 37568 }, { "epoch": 0.48819202037544485, "grad_norm": 0.3708533048629761, "learning_rate": 0.00010238240684178376, "loss": 1.4543, "step": 37569 }, { "epoch": 0.48820501491936075, "grad_norm": 0.3568820059299469, "learning_rate": 0.00010237980737987237, "loss": 1.3072, "step": 37570 }, { "epoch": 0.4882180094632766, "grad_norm": 0.4768930971622467, "learning_rate": 0.00010237720791796099, "loss": 1.3287, "step": 37571 }, { "epoch": 0.4882310040071925, "grad_norm": 0.3724338114261627, "learning_rate": 0.0001023746084560496, "loss": 1.3614, "step": 37572 }, { "epoch": 0.48824399855110834, "grad_norm": 0.3386293947696686, "learning_rate": 0.00010237200899413821, "loss": 1.096, "step": 37573 }, { "epoch": 0.48825699309502424, "grad_norm": 0.4686654508113861, "learning_rate": 0.00010236940953222682, "loss": 1.6467, "step": 37574 }, { "epoch": 0.4882699876389401, "grad_norm": 0.38210493326187134, "learning_rate": 0.00010236681007031545, "loss": 1.5063, "step": 37575 }, { "epoch": 0.488282982182856, "grad_norm": 0.38487550616264343, "learning_rate": 0.00010236421060840406, "loss": 1.2872, "step": 37576 }, { "epoch": 0.48829597672677183, "grad_norm": 0.46951600909233093, "learning_rate": 0.00010236161114649267, "loss": 1.5463, "step": 37577 }, { "epoch": 0.48830897127068773, "grad_norm": 0.3652660846710205, "learning_rate": 0.00010235901168458131, "loss": 1.3737, "step": 37578 }, { "epoch": 0.4883219658146036, "grad_norm": 0.3704793155193329, "learning_rate": 0.00010235641222266992, "loss": 1.4383, "step": 37579 }, { "epoch": 0.4883349603585195, "grad_norm": 0.39712756872177124, "learning_rate": 0.00010235381276075853, "loss": 1.4024, "step": 37580 }, { "epoch": 0.4883479549024353, "grad_norm": 0.39344578981399536, "learning_rate": 0.00010235121329884714, "loss": 1.3374, "step": 37581 }, { "epoch": 0.4883609494463512, "grad_norm": 0.41694313287734985, "learning_rate": 0.00010234861383693577, "loss": 1.3853, "step": 37582 }, { "epoch": 0.48837394399026707, "grad_norm": 0.3142113983631134, "learning_rate": 0.00010234601437502438, "loss": 1.2767, "step": 37583 }, { "epoch": 0.48838693853418297, "grad_norm": 0.39100587368011475, "learning_rate": 0.00010234341491311299, "loss": 1.2014, "step": 37584 }, { "epoch": 0.4883999330780988, "grad_norm": 0.41757529973983765, "learning_rate": 0.0001023408154512016, "loss": 1.5005, "step": 37585 }, { "epoch": 0.4884129276220147, "grad_norm": 0.3109648525714874, "learning_rate": 0.00010233821598929024, "loss": 1.3451, "step": 37586 }, { "epoch": 0.48842592216593056, "grad_norm": 0.4887288510799408, "learning_rate": 0.00010233561652737885, "loss": 1.5348, "step": 37587 }, { "epoch": 0.48843891670984646, "grad_norm": 0.3605212867259979, "learning_rate": 0.00010233301706546744, "loss": 1.4812, "step": 37588 }, { "epoch": 0.4884519112537623, "grad_norm": 0.41585421562194824, "learning_rate": 0.00010233041760355606, "loss": 1.3376, "step": 37589 }, { "epoch": 0.4884649057976782, "grad_norm": 0.47421103715896606, "learning_rate": 0.0001023278181416447, "loss": 1.6893, "step": 37590 }, { "epoch": 0.48847790034159405, "grad_norm": 0.40023812651634216, "learning_rate": 0.0001023252186797333, "loss": 1.4516, "step": 37591 }, { "epoch": 0.48849089488550995, "grad_norm": 0.40364524722099304, "learning_rate": 0.00010232261921782192, "loss": 1.2565, "step": 37592 }, { "epoch": 0.4885038894294258, "grad_norm": 0.4134129583835602, "learning_rate": 0.00010232001975591053, "loss": 1.366, "step": 37593 }, { "epoch": 0.4885168839733417, "grad_norm": 0.6423876881599426, "learning_rate": 0.00010231742029399915, "loss": 1.455, "step": 37594 }, { "epoch": 0.48852987851725754, "grad_norm": 0.4163878262042999, "learning_rate": 0.00010231482083208776, "loss": 1.4924, "step": 37595 }, { "epoch": 0.48854287306117344, "grad_norm": 0.3769245147705078, "learning_rate": 0.00010231222137017637, "loss": 1.5278, "step": 37596 }, { "epoch": 0.4885558676050893, "grad_norm": 0.3027964234352112, "learning_rate": 0.00010230962190826498, "loss": 1.2966, "step": 37597 }, { "epoch": 0.4885688621490052, "grad_norm": 0.47191503643989563, "learning_rate": 0.00010230702244635362, "loss": 1.3539, "step": 37598 }, { "epoch": 0.48858185669292103, "grad_norm": 0.46247878670692444, "learning_rate": 0.00010230442298444223, "loss": 1.5996, "step": 37599 }, { "epoch": 0.48859485123683694, "grad_norm": 0.5506212115287781, "learning_rate": 0.00010230182352253084, "loss": 1.587, "step": 37600 }, { "epoch": 0.4886078457807528, "grad_norm": 0.34305495023727417, "learning_rate": 0.00010229922406061944, "loss": 1.2006, "step": 37601 }, { "epoch": 0.4886208403246687, "grad_norm": 0.3670308291912079, "learning_rate": 0.00010229662459870808, "loss": 1.5751, "step": 37602 }, { "epoch": 0.4886338348685845, "grad_norm": 0.38744115829467773, "learning_rate": 0.00010229402513679669, "loss": 1.2699, "step": 37603 }, { "epoch": 0.4886468294125004, "grad_norm": 0.3548983037471771, "learning_rate": 0.0001022914256748853, "loss": 1.4695, "step": 37604 }, { "epoch": 0.4886598239564163, "grad_norm": 0.4576367139816284, "learning_rate": 0.00010228882621297391, "loss": 1.2714, "step": 37605 }, { "epoch": 0.4886728185003322, "grad_norm": 0.3778396546840668, "learning_rate": 0.00010228622675106254, "loss": 1.3412, "step": 37606 }, { "epoch": 0.4886858130442481, "grad_norm": 0.30134060978889465, "learning_rate": 0.00010228362728915115, "loss": 1.2226, "step": 37607 }, { "epoch": 0.4886988075881639, "grad_norm": 0.4299182593822479, "learning_rate": 0.00010228102782723976, "loss": 1.6, "step": 37608 }, { "epoch": 0.4887118021320798, "grad_norm": 0.38845837116241455, "learning_rate": 0.00010227842836532837, "loss": 1.4929, "step": 37609 }, { "epoch": 0.48872479667599567, "grad_norm": 0.3826586902141571, "learning_rate": 0.00010227582890341701, "loss": 1.3567, "step": 37610 }, { "epoch": 0.48873779121991157, "grad_norm": 0.40499347448349, "learning_rate": 0.00010227322944150562, "loss": 1.3441, "step": 37611 }, { "epoch": 0.4887507857638274, "grad_norm": 0.42666059732437134, "learning_rate": 0.00010227062997959423, "loss": 1.3862, "step": 37612 }, { "epoch": 0.4887637803077433, "grad_norm": 0.37470707297325134, "learning_rate": 0.00010226803051768284, "loss": 1.4041, "step": 37613 }, { "epoch": 0.48877677485165916, "grad_norm": 0.33899909257888794, "learning_rate": 0.00010226543105577146, "loss": 1.4744, "step": 37614 }, { "epoch": 0.48878976939557506, "grad_norm": 0.3962824046611786, "learning_rate": 0.00010226283159386008, "loss": 1.3148, "step": 37615 }, { "epoch": 0.4888027639394909, "grad_norm": 0.4831918179988861, "learning_rate": 0.00010226023213194869, "loss": 1.3275, "step": 37616 }, { "epoch": 0.4888157584834068, "grad_norm": 0.3771432638168335, "learning_rate": 0.00010225763267003731, "loss": 1.4849, "step": 37617 }, { "epoch": 0.48882875302732265, "grad_norm": 0.32906803488731384, "learning_rate": 0.00010225503320812592, "loss": 1.3202, "step": 37618 }, { "epoch": 0.48884174757123855, "grad_norm": 0.40759846568107605, "learning_rate": 0.00010225243374621453, "loss": 1.5794, "step": 37619 }, { "epoch": 0.4888547421151544, "grad_norm": 0.42669370770454407, "learning_rate": 0.00010224983428430314, "loss": 1.2868, "step": 37620 }, { "epoch": 0.4888677366590703, "grad_norm": 0.3904821276664734, "learning_rate": 0.00010224723482239178, "loss": 1.4206, "step": 37621 }, { "epoch": 0.48888073120298614, "grad_norm": 0.4412972927093506, "learning_rate": 0.00010224463536048039, "loss": 1.4858, "step": 37622 }, { "epoch": 0.48889372574690204, "grad_norm": 0.37078166007995605, "learning_rate": 0.000102242035898569, "loss": 1.732, "step": 37623 }, { "epoch": 0.4889067202908179, "grad_norm": 0.3906558156013489, "learning_rate": 0.00010223943643665761, "loss": 1.4493, "step": 37624 }, { "epoch": 0.4889197148347338, "grad_norm": 0.4475751519203186, "learning_rate": 0.00010223683697474624, "loss": 1.4804, "step": 37625 }, { "epoch": 0.48893270937864963, "grad_norm": 0.3865770101547241, "learning_rate": 0.00010223423751283485, "loss": 1.5469, "step": 37626 }, { "epoch": 0.48894570392256553, "grad_norm": 0.40844962000846863, "learning_rate": 0.00010223163805092346, "loss": 1.4274, "step": 37627 }, { "epoch": 0.4889586984664814, "grad_norm": 0.3634379804134369, "learning_rate": 0.00010222903858901207, "loss": 1.5192, "step": 37628 }, { "epoch": 0.4889716930103973, "grad_norm": 0.46022528409957886, "learning_rate": 0.00010222643912710071, "loss": 1.389, "step": 37629 }, { "epoch": 0.4889846875543131, "grad_norm": 0.3965018391609192, "learning_rate": 0.00010222383966518931, "loss": 1.4472, "step": 37630 }, { "epoch": 0.488997682098229, "grad_norm": 0.4179500639438629, "learning_rate": 0.00010222124020327792, "loss": 1.153, "step": 37631 }, { "epoch": 0.48901067664214487, "grad_norm": 0.37954720854759216, "learning_rate": 0.00010221864074136653, "loss": 1.405, "step": 37632 }, { "epoch": 0.48902367118606077, "grad_norm": 0.4242473542690277, "learning_rate": 0.00010221604127945517, "loss": 1.3825, "step": 37633 }, { "epoch": 0.4890366657299766, "grad_norm": 0.4180866777896881, "learning_rate": 0.00010221344181754378, "loss": 1.4142, "step": 37634 }, { "epoch": 0.4890496602738925, "grad_norm": 0.4099809527397156, "learning_rate": 0.00010221084235563239, "loss": 1.5064, "step": 37635 }, { "epoch": 0.48906265481780836, "grad_norm": 0.356748104095459, "learning_rate": 0.000102208242893721, "loss": 1.5494, "step": 37636 }, { "epoch": 0.48907564936172426, "grad_norm": 0.4501465857028961, "learning_rate": 0.00010220564343180962, "loss": 1.5439, "step": 37637 }, { "epoch": 0.4890886439056401, "grad_norm": 0.4067637026309967, "learning_rate": 0.00010220304396989824, "loss": 1.37, "step": 37638 }, { "epoch": 0.489101638449556, "grad_norm": 0.4249280095100403, "learning_rate": 0.00010220044450798685, "loss": 1.3493, "step": 37639 }, { "epoch": 0.48911463299347185, "grad_norm": 0.3765154778957367, "learning_rate": 0.00010219784504607546, "loss": 1.4876, "step": 37640 }, { "epoch": 0.48912762753738775, "grad_norm": 0.41758087277412415, "learning_rate": 0.0001021952455841641, "loss": 1.4805, "step": 37641 }, { "epoch": 0.4891406220813036, "grad_norm": 0.43847647309303284, "learning_rate": 0.0001021926461222527, "loss": 1.4674, "step": 37642 }, { "epoch": 0.4891536166252195, "grad_norm": 0.47920501232147217, "learning_rate": 0.0001021900466603413, "loss": 1.3971, "step": 37643 }, { "epoch": 0.48916661116913535, "grad_norm": 0.3893868327140808, "learning_rate": 0.00010218744719842991, "loss": 1.4283, "step": 37644 }, { "epoch": 0.48917960571305125, "grad_norm": 0.3971408009529114, "learning_rate": 0.00010218484773651855, "loss": 1.2859, "step": 37645 }, { "epoch": 0.4891926002569671, "grad_norm": 0.3316631317138672, "learning_rate": 0.00010218224827460716, "loss": 1.3039, "step": 37646 }, { "epoch": 0.489205594800883, "grad_norm": 0.3308313488960266, "learning_rate": 0.00010217964881269577, "loss": 1.3086, "step": 37647 }, { "epoch": 0.48921858934479884, "grad_norm": 0.43697389960289, "learning_rate": 0.00010217704935078439, "loss": 1.306, "step": 37648 }, { "epoch": 0.48923158388871474, "grad_norm": 0.44559353590011597, "learning_rate": 0.00010217444988887301, "loss": 1.4088, "step": 37649 }, { "epoch": 0.4892445784326306, "grad_norm": 0.35831785202026367, "learning_rate": 0.00010217185042696162, "loss": 1.5006, "step": 37650 }, { "epoch": 0.4892575729765465, "grad_norm": 0.4659610390663147, "learning_rate": 0.00010216925096505023, "loss": 1.4051, "step": 37651 }, { "epoch": 0.48927056752046233, "grad_norm": 0.4211437404155731, "learning_rate": 0.00010216665150313887, "loss": 1.3132, "step": 37652 }, { "epoch": 0.48928356206437823, "grad_norm": 0.3066316545009613, "learning_rate": 0.00010216405204122748, "loss": 1.25, "step": 37653 }, { "epoch": 0.4892965566082941, "grad_norm": 0.4758424162864685, "learning_rate": 0.00010216145257931609, "loss": 1.6608, "step": 37654 }, { "epoch": 0.48930955115221, "grad_norm": 0.3729299008846283, "learning_rate": 0.0001021588531174047, "loss": 1.5147, "step": 37655 }, { "epoch": 0.4893225456961258, "grad_norm": 0.35858896374702454, "learning_rate": 0.00010215625365549333, "loss": 1.5821, "step": 37656 }, { "epoch": 0.4893355402400417, "grad_norm": 0.28876805305480957, "learning_rate": 0.00010215365419358194, "loss": 1.3293, "step": 37657 }, { "epoch": 0.48934853478395757, "grad_norm": 0.376742422580719, "learning_rate": 0.00010215105473167055, "loss": 1.413, "step": 37658 }, { "epoch": 0.48936152932787347, "grad_norm": 0.43379685282707214, "learning_rate": 0.00010214845526975916, "loss": 1.4242, "step": 37659 }, { "epoch": 0.4893745238717893, "grad_norm": 0.41608256101608276, "learning_rate": 0.00010214585580784778, "loss": 1.4972, "step": 37660 }, { "epoch": 0.4893875184157052, "grad_norm": 0.4789860248565674, "learning_rate": 0.0001021432563459364, "loss": 1.3728, "step": 37661 }, { "epoch": 0.48940051295962106, "grad_norm": 0.42090538144111633, "learning_rate": 0.000102140656884025, "loss": 1.3533, "step": 37662 }, { "epoch": 0.48941350750353696, "grad_norm": 0.3533662259578705, "learning_rate": 0.00010213805742211362, "loss": 1.4003, "step": 37663 }, { "epoch": 0.4894265020474528, "grad_norm": 0.4685594439506531, "learning_rate": 0.00010213545796020226, "loss": 1.529, "step": 37664 }, { "epoch": 0.4894394965913687, "grad_norm": 0.4252340793609619, "learning_rate": 0.00010213285849829087, "loss": 1.5663, "step": 37665 }, { "epoch": 0.48945249113528455, "grad_norm": 0.4390921890735626, "learning_rate": 0.00010213025903637948, "loss": 1.2333, "step": 37666 }, { "epoch": 0.48946548567920045, "grad_norm": 0.5438433289527893, "learning_rate": 0.00010212765957446809, "loss": 1.4152, "step": 37667 }, { "epoch": 0.4894784802231163, "grad_norm": 0.47453826665878296, "learning_rate": 0.00010212506011255671, "loss": 1.4378, "step": 37668 }, { "epoch": 0.4894914747670322, "grad_norm": 0.45358943939208984, "learning_rate": 0.00010212246065064532, "loss": 1.2644, "step": 37669 }, { "epoch": 0.48950446931094804, "grad_norm": 0.35541918873786926, "learning_rate": 0.00010211986118873393, "loss": 1.4041, "step": 37670 }, { "epoch": 0.48951746385486394, "grad_norm": 0.4046880602836609, "learning_rate": 0.00010211726172682255, "loss": 1.4442, "step": 37671 }, { "epoch": 0.4895304583987798, "grad_norm": 0.39502188563346863, "learning_rate": 0.00010211466226491117, "loss": 1.5049, "step": 37672 }, { "epoch": 0.4895434529426957, "grad_norm": 0.44652628898620605, "learning_rate": 0.00010211206280299978, "loss": 1.3335, "step": 37673 }, { "epoch": 0.48955644748661153, "grad_norm": 0.34121567010879517, "learning_rate": 0.00010210946334108839, "loss": 1.2714, "step": 37674 }, { "epoch": 0.48956944203052744, "grad_norm": 0.351938933134079, "learning_rate": 0.000102106863879177, "loss": 1.3949, "step": 37675 }, { "epoch": 0.4895824365744433, "grad_norm": 0.3815155327320099, "learning_rate": 0.00010210426441726564, "loss": 1.6603, "step": 37676 }, { "epoch": 0.4895954311183592, "grad_norm": 0.32269617915153503, "learning_rate": 0.00010210166495535425, "loss": 1.4687, "step": 37677 }, { "epoch": 0.489608425662275, "grad_norm": 0.3727602958679199, "learning_rate": 0.00010209906549344286, "loss": 1.4158, "step": 37678 }, { "epoch": 0.4896214202061909, "grad_norm": 0.3991808593273163, "learning_rate": 0.00010209646603153147, "loss": 1.4439, "step": 37679 }, { "epoch": 0.4896344147501068, "grad_norm": 0.42426377534866333, "learning_rate": 0.0001020938665696201, "loss": 1.3166, "step": 37680 }, { "epoch": 0.4896474092940227, "grad_norm": 0.42968907952308655, "learning_rate": 0.00010209126710770871, "loss": 1.3213, "step": 37681 }, { "epoch": 0.4896604038379385, "grad_norm": 0.4454449713230133, "learning_rate": 0.00010208866764579732, "loss": 1.4413, "step": 37682 }, { "epoch": 0.4896733983818544, "grad_norm": 0.3516608476638794, "learning_rate": 0.00010208606818388593, "loss": 1.1771, "step": 37683 }, { "epoch": 0.4896863929257703, "grad_norm": 0.3978886902332306, "learning_rate": 0.00010208346872197457, "loss": 1.5342, "step": 37684 }, { "epoch": 0.48969938746968616, "grad_norm": 0.2885812819004059, "learning_rate": 0.00010208086926006317, "loss": 1.1951, "step": 37685 }, { "epoch": 0.48971238201360207, "grad_norm": 0.5368260145187378, "learning_rate": 0.00010207826979815178, "loss": 1.4905, "step": 37686 }, { "epoch": 0.4897253765575179, "grad_norm": 0.5116410255432129, "learning_rate": 0.00010207567033624039, "loss": 1.5111, "step": 37687 }, { "epoch": 0.4897383711014338, "grad_norm": 0.4339734613895416, "learning_rate": 0.00010207307087432903, "loss": 1.1969, "step": 37688 }, { "epoch": 0.48975136564534966, "grad_norm": 0.37022683024406433, "learning_rate": 0.00010207047141241764, "loss": 1.5182, "step": 37689 }, { "epoch": 0.48976436018926556, "grad_norm": 0.31715548038482666, "learning_rate": 0.00010206787195050625, "loss": 1.4243, "step": 37690 }, { "epoch": 0.4897773547331814, "grad_norm": 0.4087826907634735, "learning_rate": 0.00010206527248859487, "loss": 1.3323, "step": 37691 }, { "epoch": 0.4897903492770973, "grad_norm": 0.37552472949028015, "learning_rate": 0.00010206267302668348, "loss": 1.4163, "step": 37692 }, { "epoch": 0.48980334382101315, "grad_norm": 0.4141238331794739, "learning_rate": 0.0001020600735647721, "loss": 1.2786, "step": 37693 }, { "epoch": 0.48981633836492905, "grad_norm": 0.5030965805053711, "learning_rate": 0.0001020574741028607, "loss": 1.5075, "step": 37694 }, { "epoch": 0.4898293329088449, "grad_norm": 0.3129291832447052, "learning_rate": 0.00010205487464094934, "loss": 1.4336, "step": 37695 }, { "epoch": 0.4898423274527608, "grad_norm": 0.5202312469482422, "learning_rate": 0.00010205227517903795, "loss": 1.4532, "step": 37696 }, { "epoch": 0.48985532199667664, "grad_norm": 0.3756698966026306, "learning_rate": 0.00010204967571712657, "loss": 1.3933, "step": 37697 }, { "epoch": 0.48986831654059254, "grad_norm": 0.41458624601364136, "learning_rate": 0.00010204707625521516, "loss": 1.4659, "step": 37698 }, { "epoch": 0.4898813110845084, "grad_norm": 0.2936573624610901, "learning_rate": 0.0001020444767933038, "loss": 1.3798, "step": 37699 }, { "epoch": 0.4898943056284243, "grad_norm": 0.43088358640670776, "learning_rate": 0.00010204187733139241, "loss": 1.3154, "step": 37700 }, { "epoch": 0.48990730017234013, "grad_norm": 0.3775368928909302, "learning_rate": 0.00010203927786948102, "loss": 1.5167, "step": 37701 }, { "epoch": 0.48992029471625603, "grad_norm": 0.32226327061653137, "learning_rate": 0.00010203667840756963, "loss": 1.4052, "step": 37702 }, { "epoch": 0.4899332892601719, "grad_norm": 0.4110506772994995, "learning_rate": 0.00010203407894565826, "loss": 1.6387, "step": 37703 }, { "epoch": 0.4899462838040878, "grad_norm": 0.3727943003177643, "learning_rate": 0.00010203147948374687, "loss": 1.185, "step": 37704 }, { "epoch": 0.4899592783480036, "grad_norm": 0.4041294455528259, "learning_rate": 0.00010202888002183548, "loss": 1.6619, "step": 37705 }, { "epoch": 0.4899722728919195, "grad_norm": 0.3528921902179718, "learning_rate": 0.00010202628055992409, "loss": 1.418, "step": 37706 }, { "epoch": 0.48998526743583537, "grad_norm": 0.3151302635669708, "learning_rate": 0.00010202368109801273, "loss": 1.3567, "step": 37707 }, { "epoch": 0.48999826197975127, "grad_norm": 0.28825438022613525, "learning_rate": 0.00010202108163610134, "loss": 1.3763, "step": 37708 }, { "epoch": 0.4900112565236671, "grad_norm": 0.35700079798698425, "learning_rate": 0.00010201848217418995, "loss": 1.0927, "step": 37709 }, { "epoch": 0.490024251067583, "grad_norm": 0.2915344536304474, "learning_rate": 0.00010201588271227855, "loss": 1.4072, "step": 37710 }, { "epoch": 0.49003724561149886, "grad_norm": 0.428102970123291, "learning_rate": 0.00010201328325036719, "loss": 1.4446, "step": 37711 }, { "epoch": 0.49005024015541476, "grad_norm": 0.3339066505432129, "learning_rate": 0.0001020106837884558, "loss": 1.1874, "step": 37712 }, { "epoch": 0.4900632346993306, "grad_norm": 0.3950852155685425, "learning_rate": 0.00010200808432654441, "loss": 1.3711, "step": 37713 }, { "epoch": 0.4900762292432465, "grad_norm": 0.30970823764801025, "learning_rate": 0.00010200548486463302, "loss": 1.2791, "step": 37714 }, { "epoch": 0.49008922378716235, "grad_norm": 0.3111138343811035, "learning_rate": 0.00010200288540272164, "loss": 1.3152, "step": 37715 }, { "epoch": 0.49010221833107825, "grad_norm": 0.3655491769313812, "learning_rate": 0.00010200028594081025, "loss": 1.4, "step": 37716 }, { "epoch": 0.4901152128749941, "grad_norm": 0.2901107370853424, "learning_rate": 0.00010199768647889886, "loss": 1.4504, "step": 37717 }, { "epoch": 0.49012820741891, "grad_norm": 0.3774755001068115, "learning_rate": 0.00010199508701698748, "loss": 1.5366, "step": 37718 }, { "epoch": 0.49014120196282585, "grad_norm": 0.36180928349494934, "learning_rate": 0.00010199248755507611, "loss": 1.1822, "step": 37719 }, { "epoch": 0.49015419650674175, "grad_norm": 0.4644837975502014, "learning_rate": 0.00010198988809316472, "loss": 1.3597, "step": 37720 }, { "epoch": 0.4901671910506576, "grad_norm": 0.35096731781959534, "learning_rate": 0.00010198728863125334, "loss": 1.5003, "step": 37721 }, { "epoch": 0.4901801855945735, "grad_norm": 0.38475143909454346, "learning_rate": 0.00010198468916934195, "loss": 1.3354, "step": 37722 }, { "epoch": 0.49019318013848934, "grad_norm": 0.43894243240356445, "learning_rate": 0.00010198208970743057, "loss": 1.4731, "step": 37723 }, { "epoch": 0.49020617468240524, "grad_norm": 0.4135128855705261, "learning_rate": 0.00010197949024551918, "loss": 1.3538, "step": 37724 }, { "epoch": 0.4902191692263211, "grad_norm": 0.3755813241004944, "learning_rate": 0.00010197689078360779, "loss": 1.2868, "step": 37725 }, { "epoch": 0.490232163770237, "grad_norm": 0.4043888449668884, "learning_rate": 0.00010197429132169643, "loss": 1.2852, "step": 37726 }, { "epoch": 0.49024515831415283, "grad_norm": 0.4277477264404297, "learning_rate": 0.00010197169185978503, "loss": 1.2776, "step": 37727 }, { "epoch": 0.49025815285806873, "grad_norm": 0.4013367295265198, "learning_rate": 0.00010196909239787364, "loss": 1.5241, "step": 37728 }, { "epoch": 0.4902711474019846, "grad_norm": 0.4822542071342468, "learning_rate": 0.00010196649293596225, "loss": 1.4106, "step": 37729 }, { "epoch": 0.4902841419459005, "grad_norm": 0.45983362197875977, "learning_rate": 0.00010196389347405089, "loss": 1.446, "step": 37730 }, { "epoch": 0.4902971364898163, "grad_norm": 0.45816919207572937, "learning_rate": 0.0001019612940121395, "loss": 1.4044, "step": 37731 }, { "epoch": 0.4903101310337322, "grad_norm": 0.36364760994911194, "learning_rate": 0.00010195869455022811, "loss": 1.3194, "step": 37732 }, { "epoch": 0.49032312557764807, "grad_norm": 0.4717870354652405, "learning_rate": 0.00010195609508831672, "loss": 1.5687, "step": 37733 }, { "epoch": 0.49033612012156397, "grad_norm": 0.3057982921600342, "learning_rate": 0.00010195349562640535, "loss": 1.2409, "step": 37734 }, { "epoch": 0.4903491146654798, "grad_norm": 0.36223775148391724, "learning_rate": 0.00010195089616449396, "loss": 1.4644, "step": 37735 }, { "epoch": 0.4903621092093957, "grad_norm": 0.4018089771270752, "learning_rate": 0.00010194829670258257, "loss": 1.3294, "step": 37736 }, { "epoch": 0.49037510375331156, "grad_norm": 0.2628757059574127, "learning_rate": 0.00010194569724067118, "loss": 1.3375, "step": 37737 }, { "epoch": 0.49038809829722746, "grad_norm": 0.4035939574241638, "learning_rate": 0.00010194309777875982, "loss": 1.37, "step": 37738 }, { "epoch": 0.4904010928411433, "grad_norm": 0.4631110727787018, "learning_rate": 0.00010194049831684843, "loss": 1.3162, "step": 37739 }, { "epoch": 0.4904140873850592, "grad_norm": 0.37760648131370544, "learning_rate": 0.00010193789885493702, "loss": 1.4845, "step": 37740 }, { "epoch": 0.49042708192897505, "grad_norm": 0.4469343423843384, "learning_rate": 0.00010193529939302564, "loss": 1.533, "step": 37741 }, { "epoch": 0.49044007647289095, "grad_norm": 0.5700821876525879, "learning_rate": 0.00010193269993111427, "loss": 1.5045, "step": 37742 }, { "epoch": 0.4904530710168068, "grad_norm": 0.36509913206100464, "learning_rate": 0.00010193010046920288, "loss": 1.3962, "step": 37743 }, { "epoch": 0.4904660655607227, "grad_norm": 0.4578285217285156, "learning_rate": 0.0001019275010072915, "loss": 1.4907, "step": 37744 }, { "epoch": 0.49047906010463854, "grad_norm": 0.3795306086540222, "learning_rate": 0.0001019249015453801, "loss": 1.2938, "step": 37745 }, { "epoch": 0.49049205464855444, "grad_norm": 0.37952038645744324, "learning_rate": 0.00010192230208346873, "loss": 1.3877, "step": 37746 }, { "epoch": 0.4905050491924703, "grad_norm": 0.39149701595306396, "learning_rate": 0.00010191970262155734, "loss": 1.3631, "step": 37747 }, { "epoch": 0.4905180437363862, "grad_norm": 0.4413684010505676, "learning_rate": 0.00010191710315964595, "loss": 1.5862, "step": 37748 }, { "epoch": 0.49053103828030203, "grad_norm": 0.4371064603328705, "learning_rate": 0.00010191450369773456, "loss": 1.4724, "step": 37749 }, { "epoch": 0.49054403282421793, "grad_norm": 0.4671314060688019, "learning_rate": 0.0001019119042358232, "loss": 1.4231, "step": 37750 }, { "epoch": 0.4905570273681338, "grad_norm": 0.4213501811027527, "learning_rate": 0.00010190930477391181, "loss": 1.3799, "step": 37751 }, { "epoch": 0.4905700219120497, "grad_norm": 0.4420798122882843, "learning_rate": 0.00010190670531200041, "loss": 1.5231, "step": 37752 }, { "epoch": 0.4905830164559655, "grad_norm": 0.4246748983860016, "learning_rate": 0.00010190410585008902, "loss": 1.4932, "step": 37753 }, { "epoch": 0.4905960109998814, "grad_norm": 0.3557030260562897, "learning_rate": 0.00010190150638817766, "loss": 1.1935, "step": 37754 }, { "epoch": 0.49060900554379727, "grad_norm": 0.32457053661346436, "learning_rate": 0.00010189890692626627, "loss": 1.2368, "step": 37755 }, { "epoch": 0.4906220000877132, "grad_norm": 0.3853646516799927, "learning_rate": 0.00010189630746435488, "loss": 1.4284, "step": 37756 }, { "epoch": 0.490634994631629, "grad_norm": 0.410287469625473, "learning_rate": 0.00010189370800244349, "loss": 1.3091, "step": 37757 }, { "epoch": 0.4906479891755449, "grad_norm": 0.34918105602264404, "learning_rate": 0.00010189110854053212, "loss": 1.2867, "step": 37758 }, { "epoch": 0.49066098371946076, "grad_norm": 0.41428571939468384, "learning_rate": 0.00010188850907862073, "loss": 1.291, "step": 37759 }, { "epoch": 0.49067397826337666, "grad_norm": 0.3405683636665344, "learning_rate": 0.00010188590961670934, "loss": 1.2035, "step": 37760 }, { "epoch": 0.49068697280729257, "grad_norm": 0.4508454203605652, "learning_rate": 0.00010188331015479795, "loss": 1.4995, "step": 37761 }, { "epoch": 0.4906999673512084, "grad_norm": 0.39756643772125244, "learning_rate": 0.00010188071069288659, "loss": 1.5601, "step": 37762 }, { "epoch": 0.4907129618951243, "grad_norm": 0.39856451749801636, "learning_rate": 0.0001018781112309752, "loss": 1.2366, "step": 37763 }, { "epoch": 0.49072595643904016, "grad_norm": 0.32271090149879456, "learning_rate": 0.00010187551176906381, "loss": 1.3093, "step": 37764 }, { "epoch": 0.49073895098295606, "grad_norm": 0.4080948829650879, "learning_rate": 0.00010187291230715243, "loss": 1.4169, "step": 37765 }, { "epoch": 0.4907519455268719, "grad_norm": 0.3551578223705292, "learning_rate": 0.00010187031284524104, "loss": 1.0811, "step": 37766 }, { "epoch": 0.4907649400707878, "grad_norm": 0.38670992851257324, "learning_rate": 0.00010186771338332966, "loss": 1.3593, "step": 37767 }, { "epoch": 0.49077793461470365, "grad_norm": 0.3447142541408539, "learning_rate": 0.00010186511392141827, "loss": 1.3037, "step": 37768 }, { "epoch": 0.49079092915861955, "grad_norm": 0.4881947338581085, "learning_rate": 0.00010186251445950689, "loss": 1.2725, "step": 37769 }, { "epoch": 0.4908039237025354, "grad_norm": 0.3881729245185852, "learning_rate": 0.0001018599149975955, "loss": 1.3703, "step": 37770 }, { "epoch": 0.4908169182464513, "grad_norm": 0.3906041979789734, "learning_rate": 0.00010185731553568411, "loss": 1.4503, "step": 37771 }, { "epoch": 0.49082991279036714, "grad_norm": 0.43087923526763916, "learning_rate": 0.00010185471607377272, "loss": 1.3202, "step": 37772 }, { "epoch": 0.49084290733428304, "grad_norm": 0.39940306544303894, "learning_rate": 0.00010185211661186136, "loss": 1.4144, "step": 37773 }, { "epoch": 0.4908559018781989, "grad_norm": 0.3941690921783447, "learning_rate": 0.00010184951714994997, "loss": 1.0833, "step": 37774 }, { "epoch": 0.4908688964221148, "grad_norm": 0.3753497302532196, "learning_rate": 0.00010184691768803858, "loss": 1.2368, "step": 37775 }, { "epoch": 0.49088189096603063, "grad_norm": 0.2929832935333252, "learning_rate": 0.0001018443182261272, "loss": 1.3899, "step": 37776 }, { "epoch": 0.49089488550994653, "grad_norm": 0.3228476345539093, "learning_rate": 0.00010184171876421582, "loss": 1.2458, "step": 37777 }, { "epoch": 0.4909078800538624, "grad_norm": 0.4038845896720886, "learning_rate": 0.00010183911930230443, "loss": 1.4574, "step": 37778 }, { "epoch": 0.4909208745977783, "grad_norm": 0.41748881340026855, "learning_rate": 0.00010183651984039304, "loss": 1.4961, "step": 37779 }, { "epoch": 0.4909338691416941, "grad_norm": 0.47549504041671753, "learning_rate": 0.00010183392037848165, "loss": 1.472, "step": 37780 }, { "epoch": 0.49094686368561, "grad_norm": 0.4866889417171478, "learning_rate": 0.00010183132091657028, "loss": 1.4587, "step": 37781 }, { "epoch": 0.49095985822952587, "grad_norm": 0.37075915932655334, "learning_rate": 0.00010182872145465889, "loss": 1.3511, "step": 37782 }, { "epoch": 0.49097285277344177, "grad_norm": 0.38241830468177795, "learning_rate": 0.0001018261219927475, "loss": 1.4365, "step": 37783 }, { "epoch": 0.4909858473173576, "grad_norm": 0.43948444724082947, "learning_rate": 0.00010182352253083611, "loss": 1.4915, "step": 37784 }, { "epoch": 0.4909988418612735, "grad_norm": 0.5068712830543518, "learning_rate": 0.00010182092306892475, "loss": 1.4348, "step": 37785 }, { "epoch": 0.49101183640518936, "grad_norm": 0.4951256513595581, "learning_rate": 0.00010181832360701336, "loss": 1.4303, "step": 37786 }, { "epoch": 0.49102483094910526, "grad_norm": 0.4821823835372925, "learning_rate": 0.00010181572414510197, "loss": 1.5166, "step": 37787 }, { "epoch": 0.4910378254930211, "grad_norm": 0.47578558325767517, "learning_rate": 0.00010181312468319058, "loss": 1.5105, "step": 37788 }, { "epoch": 0.491050820036937, "grad_norm": 0.33312493562698364, "learning_rate": 0.0001018105252212792, "loss": 1.3488, "step": 37789 }, { "epoch": 0.49106381458085285, "grad_norm": 0.40121138095855713, "learning_rate": 0.00010180792575936782, "loss": 1.3689, "step": 37790 }, { "epoch": 0.49107680912476875, "grad_norm": 0.48584118485450745, "learning_rate": 0.00010180532629745643, "loss": 1.3175, "step": 37791 }, { "epoch": 0.4910898036686846, "grad_norm": 0.4626264274120331, "learning_rate": 0.00010180272683554504, "loss": 1.5454, "step": 37792 }, { "epoch": 0.4911027982126005, "grad_norm": 0.42534276843070984, "learning_rate": 0.00010180012737363368, "loss": 1.4869, "step": 37793 }, { "epoch": 0.49111579275651635, "grad_norm": 0.4280928671360016, "learning_rate": 0.00010179752791172227, "loss": 1.348, "step": 37794 }, { "epoch": 0.49112878730043225, "grad_norm": 0.3070835769176483, "learning_rate": 0.00010179492844981088, "loss": 1.0555, "step": 37795 }, { "epoch": 0.4911417818443481, "grad_norm": 0.3631468415260315, "learning_rate": 0.0001017923289878995, "loss": 1.2557, "step": 37796 }, { "epoch": 0.491154776388264, "grad_norm": 0.27955323457717896, "learning_rate": 0.00010178972952598813, "loss": 1.2874, "step": 37797 }, { "epoch": 0.49116777093217984, "grad_norm": 0.49570029973983765, "learning_rate": 0.00010178713006407674, "loss": 1.4581, "step": 37798 }, { "epoch": 0.49118076547609574, "grad_norm": 0.5180969834327698, "learning_rate": 0.00010178453060216535, "loss": 1.5302, "step": 37799 }, { "epoch": 0.4911937600200116, "grad_norm": 0.4341398775577545, "learning_rate": 0.00010178193114025398, "loss": 1.3904, "step": 37800 }, { "epoch": 0.4912067545639275, "grad_norm": 0.402722030878067, "learning_rate": 0.00010177933167834259, "loss": 1.5318, "step": 37801 }, { "epoch": 0.49121974910784333, "grad_norm": 0.28964880108833313, "learning_rate": 0.0001017767322164312, "loss": 1.2989, "step": 37802 }, { "epoch": 0.49123274365175923, "grad_norm": 0.4683733284473419, "learning_rate": 0.00010177413275451981, "loss": 1.277, "step": 37803 }, { "epoch": 0.4912457381956751, "grad_norm": 0.3469351828098297, "learning_rate": 0.00010177153329260845, "loss": 1.4016, "step": 37804 }, { "epoch": 0.491258732739591, "grad_norm": 0.37269794940948486, "learning_rate": 0.00010176893383069706, "loss": 1.4587, "step": 37805 }, { "epoch": 0.4912717272835068, "grad_norm": 0.354561448097229, "learning_rate": 0.00010176633436878567, "loss": 1.2053, "step": 37806 }, { "epoch": 0.4912847218274227, "grad_norm": 0.2931618392467499, "learning_rate": 0.00010176373490687427, "loss": 1.3359, "step": 37807 }, { "epoch": 0.49129771637133857, "grad_norm": 0.4960029721260071, "learning_rate": 0.00010176113544496291, "loss": 1.4593, "step": 37808 }, { "epoch": 0.49131071091525447, "grad_norm": 0.4757802188396454, "learning_rate": 0.00010175853598305152, "loss": 1.2738, "step": 37809 }, { "epoch": 0.4913237054591703, "grad_norm": 0.4474790692329407, "learning_rate": 0.00010175593652114013, "loss": 1.352, "step": 37810 }, { "epoch": 0.4913367000030862, "grad_norm": 0.3817029893398285, "learning_rate": 0.00010175333705922874, "loss": 1.5706, "step": 37811 }, { "epoch": 0.49134969454700206, "grad_norm": 0.5639182329177856, "learning_rate": 0.00010175073759731736, "loss": 1.2678, "step": 37812 }, { "epoch": 0.49136268909091796, "grad_norm": 0.307658314704895, "learning_rate": 0.00010174813813540598, "loss": 1.1545, "step": 37813 }, { "epoch": 0.4913756836348338, "grad_norm": 0.525376558303833, "learning_rate": 0.00010174553867349459, "loss": 1.4462, "step": 37814 }, { "epoch": 0.4913886781787497, "grad_norm": 0.4329998195171356, "learning_rate": 0.0001017429392115832, "loss": 1.5696, "step": 37815 }, { "epoch": 0.49140167272266555, "grad_norm": 0.5260769128799438, "learning_rate": 0.00010174033974967184, "loss": 1.2952, "step": 37816 }, { "epoch": 0.49141466726658145, "grad_norm": 0.4057026505470276, "learning_rate": 0.00010173774028776045, "loss": 1.2807, "step": 37817 }, { "epoch": 0.4914276618104973, "grad_norm": 0.4023517072200775, "learning_rate": 0.00010173514082584906, "loss": 1.1707, "step": 37818 }, { "epoch": 0.4914406563544132, "grad_norm": 0.44709914922714233, "learning_rate": 0.00010173254136393767, "loss": 1.2879, "step": 37819 }, { "epoch": 0.49145365089832904, "grad_norm": 0.4642372727394104, "learning_rate": 0.00010172994190202629, "loss": 1.4165, "step": 37820 }, { "epoch": 0.49146664544224494, "grad_norm": 0.5262324810028076, "learning_rate": 0.0001017273424401149, "loss": 1.3735, "step": 37821 }, { "epoch": 0.4914796399861608, "grad_norm": 0.3654259443283081, "learning_rate": 0.00010172474297820351, "loss": 1.3548, "step": 37822 }, { "epoch": 0.4914926345300767, "grad_norm": 0.43560153245925903, "learning_rate": 0.00010172214351629213, "loss": 1.2534, "step": 37823 }, { "epoch": 0.49150562907399253, "grad_norm": 0.46837756037712097, "learning_rate": 0.00010171954405438075, "loss": 1.3993, "step": 37824 }, { "epoch": 0.49151862361790843, "grad_norm": 0.5833989977836609, "learning_rate": 0.00010171694459246936, "loss": 1.4821, "step": 37825 }, { "epoch": 0.4915316181618243, "grad_norm": 0.4069930911064148, "learning_rate": 0.00010171434513055797, "loss": 1.6053, "step": 37826 }, { "epoch": 0.4915446127057402, "grad_norm": 0.42060330510139465, "learning_rate": 0.00010171174566864658, "loss": 1.4774, "step": 37827 }, { "epoch": 0.491557607249656, "grad_norm": 0.438220739364624, "learning_rate": 0.00010170914620673522, "loss": 1.2956, "step": 37828 }, { "epoch": 0.4915706017935719, "grad_norm": 0.38657090067863464, "learning_rate": 0.00010170654674482383, "loss": 1.3139, "step": 37829 }, { "epoch": 0.49158359633748777, "grad_norm": 0.5094223618507385, "learning_rate": 0.00010170394728291244, "loss": 1.2433, "step": 37830 }, { "epoch": 0.49159659088140367, "grad_norm": 0.36727893352508545, "learning_rate": 0.00010170134782100105, "loss": 1.3583, "step": 37831 }, { "epoch": 0.4916095854253195, "grad_norm": 0.37785881757736206, "learning_rate": 0.00010169874835908968, "loss": 1.3786, "step": 37832 }, { "epoch": 0.4916225799692354, "grad_norm": 0.3355453908443451, "learning_rate": 0.00010169614889717829, "loss": 1.253, "step": 37833 }, { "epoch": 0.49163557451315126, "grad_norm": 0.3342718780040741, "learning_rate": 0.0001016935494352669, "loss": 1.3224, "step": 37834 }, { "epoch": 0.49164856905706716, "grad_norm": 0.4494706392288208, "learning_rate": 0.00010169094997335551, "loss": 1.5681, "step": 37835 }, { "epoch": 0.49166156360098306, "grad_norm": 0.3821546733379364, "learning_rate": 0.00010168835051144414, "loss": 1.4115, "step": 37836 }, { "epoch": 0.4916745581448989, "grad_norm": 0.45482900738716125, "learning_rate": 0.00010168575104953275, "loss": 1.4022, "step": 37837 }, { "epoch": 0.4916875526888148, "grad_norm": 0.3593827188014984, "learning_rate": 0.00010168315158762136, "loss": 1.5212, "step": 37838 }, { "epoch": 0.49170054723273066, "grad_norm": 0.40328511595726013, "learning_rate": 0.00010168055212571, "loss": 1.402, "step": 37839 }, { "epoch": 0.49171354177664656, "grad_norm": 0.3104299008846283, "learning_rate": 0.0001016779526637986, "loss": 1.2954, "step": 37840 }, { "epoch": 0.4917265363205624, "grad_norm": 0.39228469133377075, "learning_rate": 0.00010167535320188722, "loss": 1.302, "step": 37841 }, { "epoch": 0.4917395308644783, "grad_norm": 0.4786580801010132, "learning_rate": 0.00010167275373997583, "loss": 1.4602, "step": 37842 }, { "epoch": 0.49175252540839415, "grad_norm": 0.5236978530883789, "learning_rate": 0.00010167015427806445, "loss": 1.4209, "step": 37843 }, { "epoch": 0.49176551995231005, "grad_norm": 0.28668534755706787, "learning_rate": 0.00010166755481615306, "loss": 1.1925, "step": 37844 }, { "epoch": 0.4917785144962259, "grad_norm": 0.3784046471118927, "learning_rate": 0.00010166495535424167, "loss": 1.3308, "step": 37845 }, { "epoch": 0.4917915090401418, "grad_norm": 0.42403244972229004, "learning_rate": 0.00010166235589233029, "loss": 1.4588, "step": 37846 }, { "epoch": 0.49180450358405764, "grad_norm": 0.45586347579956055, "learning_rate": 0.00010165975643041892, "loss": 1.4756, "step": 37847 }, { "epoch": 0.49181749812797354, "grad_norm": 0.40865907073020935, "learning_rate": 0.00010165715696850753, "loss": 1.4586, "step": 37848 }, { "epoch": 0.4918304926718894, "grad_norm": 0.46635472774505615, "learning_rate": 0.00010165455750659613, "loss": 1.5099, "step": 37849 }, { "epoch": 0.4918434872158053, "grad_norm": 0.5214419364929199, "learning_rate": 0.00010165195804468474, "loss": 1.5649, "step": 37850 }, { "epoch": 0.49185648175972113, "grad_norm": 0.4230841398239136, "learning_rate": 0.00010164935858277338, "loss": 1.319, "step": 37851 }, { "epoch": 0.49186947630363703, "grad_norm": 0.4479019045829773, "learning_rate": 0.00010164675912086199, "loss": 1.341, "step": 37852 }, { "epoch": 0.4918824708475529, "grad_norm": 0.3817611336708069, "learning_rate": 0.0001016441596589506, "loss": 1.3844, "step": 37853 }, { "epoch": 0.4918954653914688, "grad_norm": 0.3892451226711273, "learning_rate": 0.00010164156019703921, "loss": 1.405, "step": 37854 }, { "epoch": 0.4919084599353846, "grad_norm": 0.33282044529914856, "learning_rate": 0.00010163896073512784, "loss": 1.1758, "step": 37855 }, { "epoch": 0.4919214544793005, "grad_norm": 0.37161576747894287, "learning_rate": 0.00010163636127321645, "loss": 1.2568, "step": 37856 }, { "epoch": 0.49193444902321637, "grad_norm": 0.47352680563926697, "learning_rate": 0.00010163376181130506, "loss": 1.3742, "step": 37857 }, { "epoch": 0.49194744356713227, "grad_norm": 0.5469909310340881, "learning_rate": 0.00010163116234939367, "loss": 1.1786, "step": 37858 }, { "epoch": 0.4919604381110481, "grad_norm": 0.4582150876522064, "learning_rate": 0.00010162856288748231, "loss": 1.3074, "step": 37859 }, { "epoch": 0.491973432654964, "grad_norm": 0.4114942252635956, "learning_rate": 0.00010162596342557092, "loss": 1.3766, "step": 37860 }, { "epoch": 0.49198642719887986, "grad_norm": 0.3566601276397705, "learning_rate": 0.00010162336396365953, "loss": 1.3678, "step": 37861 }, { "epoch": 0.49199942174279576, "grad_norm": 0.3429954946041107, "learning_rate": 0.00010162076450174813, "loss": 1.2739, "step": 37862 }, { "epoch": 0.4920124162867116, "grad_norm": 0.33289653062820435, "learning_rate": 0.00010161816503983677, "loss": 1.5205, "step": 37863 }, { "epoch": 0.4920254108306275, "grad_norm": 0.4267808794975281, "learning_rate": 0.00010161556557792538, "loss": 1.4195, "step": 37864 }, { "epoch": 0.49203840537454335, "grad_norm": 0.4022567868232727, "learning_rate": 0.00010161296611601399, "loss": 1.4574, "step": 37865 }, { "epoch": 0.49205139991845925, "grad_norm": 0.3995455503463745, "learning_rate": 0.0001016103666541026, "loss": 1.5695, "step": 37866 }, { "epoch": 0.4920643944623751, "grad_norm": 0.31412267684936523, "learning_rate": 0.00010160776719219122, "loss": 1.5187, "step": 37867 }, { "epoch": 0.492077389006291, "grad_norm": 0.4527989625930786, "learning_rate": 0.00010160516773027983, "loss": 1.4445, "step": 37868 }, { "epoch": 0.49209038355020684, "grad_norm": 0.45526984333992004, "learning_rate": 0.00010160256826836844, "loss": 1.2516, "step": 37869 }, { "epoch": 0.49210337809412275, "grad_norm": 0.5005548596382141, "learning_rate": 0.00010159996880645706, "loss": 1.4749, "step": 37870 }, { "epoch": 0.4921163726380386, "grad_norm": 0.41235893964767456, "learning_rate": 0.0001015973693445457, "loss": 1.3417, "step": 37871 }, { "epoch": 0.4921293671819545, "grad_norm": 0.4259625971317291, "learning_rate": 0.0001015947698826343, "loss": 1.4757, "step": 37872 }, { "epoch": 0.49214236172587034, "grad_norm": 0.3119475841522217, "learning_rate": 0.00010159217042072292, "loss": 1.4265, "step": 37873 }, { "epoch": 0.49215535626978624, "grad_norm": 0.44098174571990967, "learning_rate": 0.00010158957095881154, "loss": 1.3387, "step": 37874 }, { "epoch": 0.4921683508137021, "grad_norm": 0.44032105803489685, "learning_rate": 0.00010158697149690015, "loss": 1.4495, "step": 37875 }, { "epoch": 0.492181345357618, "grad_norm": 0.415775865316391, "learning_rate": 0.00010158437203498876, "loss": 1.5054, "step": 37876 }, { "epoch": 0.49219433990153383, "grad_norm": 0.427044153213501, "learning_rate": 0.00010158177257307737, "loss": 1.4435, "step": 37877 }, { "epoch": 0.49220733444544973, "grad_norm": 0.3788157105445862, "learning_rate": 0.000101579173111166, "loss": 1.2697, "step": 37878 }, { "epoch": 0.4922203289893656, "grad_norm": 0.41348734498023987, "learning_rate": 0.00010157657364925461, "loss": 1.2653, "step": 37879 }, { "epoch": 0.4922333235332815, "grad_norm": 0.41792169213294983, "learning_rate": 0.00010157397418734322, "loss": 1.4294, "step": 37880 }, { "epoch": 0.4922463180771973, "grad_norm": 0.4320823848247528, "learning_rate": 0.00010157137472543183, "loss": 1.4542, "step": 37881 }, { "epoch": 0.4922593126211132, "grad_norm": 0.48169055581092834, "learning_rate": 0.00010156877526352047, "loss": 1.3879, "step": 37882 }, { "epoch": 0.49227230716502907, "grad_norm": 0.420049786567688, "learning_rate": 0.00010156617580160908, "loss": 1.4864, "step": 37883 }, { "epoch": 0.49228530170894497, "grad_norm": 0.3211154043674469, "learning_rate": 0.00010156357633969769, "loss": 1.591, "step": 37884 }, { "epoch": 0.4922982962528608, "grad_norm": 0.4874553680419922, "learning_rate": 0.0001015609768777863, "loss": 1.4195, "step": 37885 }, { "epoch": 0.4923112907967767, "grad_norm": 0.42579734325408936, "learning_rate": 0.00010155837741587493, "loss": 1.3392, "step": 37886 }, { "epoch": 0.49232428534069256, "grad_norm": 0.3239997923374176, "learning_rate": 0.00010155577795396354, "loss": 1.2086, "step": 37887 }, { "epoch": 0.49233727988460846, "grad_norm": 0.344494104385376, "learning_rate": 0.00010155317849205215, "loss": 1.1687, "step": 37888 }, { "epoch": 0.4923502744285243, "grad_norm": 0.5118855237960815, "learning_rate": 0.00010155057903014076, "loss": 1.566, "step": 37889 }, { "epoch": 0.4923632689724402, "grad_norm": 0.38154539465904236, "learning_rate": 0.0001015479795682294, "loss": 1.4061, "step": 37890 }, { "epoch": 0.49237626351635605, "grad_norm": 0.3532242774963379, "learning_rate": 0.000101545380106318, "loss": 1.2443, "step": 37891 }, { "epoch": 0.49238925806027195, "grad_norm": 0.3777334690093994, "learning_rate": 0.0001015427806444066, "loss": 1.3516, "step": 37892 }, { "epoch": 0.4924022526041878, "grad_norm": 0.4226819574832916, "learning_rate": 0.00010154018118249522, "loss": 1.3131, "step": 37893 }, { "epoch": 0.4924152471481037, "grad_norm": 0.383455365896225, "learning_rate": 0.00010153758172058385, "loss": 1.4163, "step": 37894 }, { "epoch": 0.49242824169201954, "grad_norm": 0.4080467224121094, "learning_rate": 0.00010153498225867246, "loss": 1.4903, "step": 37895 }, { "epoch": 0.49244123623593544, "grad_norm": 0.5001527667045593, "learning_rate": 0.00010153238279676108, "loss": 1.3848, "step": 37896 }, { "epoch": 0.4924542307798513, "grad_norm": 0.2543201446533203, "learning_rate": 0.00010152978333484969, "loss": 1.1539, "step": 37897 }, { "epoch": 0.4924672253237672, "grad_norm": 0.45297348499298096, "learning_rate": 0.00010152718387293831, "loss": 1.326, "step": 37898 }, { "epoch": 0.49248021986768303, "grad_norm": 0.3008664548397064, "learning_rate": 0.00010152458441102692, "loss": 1.1783, "step": 37899 }, { "epoch": 0.49249321441159893, "grad_norm": 0.39896097779273987, "learning_rate": 0.00010152198494911553, "loss": 1.4346, "step": 37900 }, { "epoch": 0.4925062089555148, "grad_norm": 0.43429243564605713, "learning_rate": 0.00010151938548720414, "loss": 1.3471, "step": 37901 }, { "epoch": 0.4925192034994307, "grad_norm": 0.3289016783237457, "learning_rate": 0.00010151678602529278, "loss": 1.2925, "step": 37902 }, { "epoch": 0.4925321980433465, "grad_norm": 0.3625752031803131, "learning_rate": 0.00010151418656338139, "loss": 1.4543, "step": 37903 }, { "epoch": 0.4925451925872624, "grad_norm": 0.3840138912200928, "learning_rate": 0.00010151158710146999, "loss": 1.2094, "step": 37904 }, { "epoch": 0.49255818713117827, "grad_norm": 0.44661855697631836, "learning_rate": 0.0001015089876395586, "loss": 1.3202, "step": 37905 }, { "epoch": 0.49257118167509417, "grad_norm": 0.5159459710121155, "learning_rate": 0.00010150638817764724, "loss": 1.5165, "step": 37906 }, { "epoch": 0.49258417621901, "grad_norm": 0.41345009207725525, "learning_rate": 0.00010150378871573585, "loss": 1.5094, "step": 37907 }, { "epoch": 0.4925971707629259, "grad_norm": 0.4461956024169922, "learning_rate": 0.00010150118925382446, "loss": 1.5059, "step": 37908 }, { "epoch": 0.49261016530684176, "grad_norm": 0.39571529626846313, "learning_rate": 0.00010149858979191307, "loss": 1.3985, "step": 37909 }, { "epoch": 0.49262315985075766, "grad_norm": 0.4356691241264343, "learning_rate": 0.0001014959903300017, "loss": 1.3349, "step": 37910 }, { "epoch": 0.4926361543946735, "grad_norm": 0.4733138084411621, "learning_rate": 0.00010149339086809031, "loss": 1.368, "step": 37911 }, { "epoch": 0.4926491489385894, "grad_norm": 0.32814696431159973, "learning_rate": 0.00010149079140617892, "loss": 1.3374, "step": 37912 }, { "epoch": 0.4926621434825053, "grad_norm": 0.4171048700809479, "learning_rate": 0.00010148819194426756, "loss": 1.3955, "step": 37913 }, { "epoch": 0.49267513802642116, "grad_norm": 0.4319785237312317, "learning_rate": 0.00010148559248235617, "loss": 1.4779, "step": 37914 }, { "epoch": 0.49268813257033706, "grad_norm": 0.424127995967865, "learning_rate": 0.00010148299302044478, "loss": 1.5297, "step": 37915 }, { "epoch": 0.4927011271142529, "grad_norm": 0.37903890013694763, "learning_rate": 0.00010148039355853338, "loss": 1.2705, "step": 37916 }, { "epoch": 0.4927141216581688, "grad_norm": 0.35672634840011597, "learning_rate": 0.00010147779409662201, "loss": 1.6217, "step": 37917 }, { "epoch": 0.49272711620208465, "grad_norm": 0.3906625807285309, "learning_rate": 0.00010147519463471062, "loss": 1.2418, "step": 37918 }, { "epoch": 0.49274011074600055, "grad_norm": 0.40231311321258545, "learning_rate": 0.00010147259517279924, "loss": 1.4281, "step": 37919 }, { "epoch": 0.4927531052899164, "grad_norm": 0.4384446442127228, "learning_rate": 0.00010146999571088785, "loss": 1.4005, "step": 37920 }, { "epoch": 0.4927660998338323, "grad_norm": 0.4290167987346649, "learning_rate": 0.00010146739624897647, "loss": 1.3525, "step": 37921 }, { "epoch": 0.49277909437774814, "grad_norm": 0.46006879210472107, "learning_rate": 0.00010146479678706508, "loss": 1.4284, "step": 37922 }, { "epoch": 0.49279208892166404, "grad_norm": 0.4378680884838104, "learning_rate": 0.00010146219732515369, "loss": 1.4095, "step": 37923 }, { "epoch": 0.4928050834655799, "grad_norm": 0.2542547285556793, "learning_rate": 0.0001014595978632423, "loss": 1.1708, "step": 37924 }, { "epoch": 0.4928180780094958, "grad_norm": 0.39742404222488403, "learning_rate": 0.00010145699840133094, "loss": 1.3833, "step": 37925 }, { "epoch": 0.49283107255341163, "grad_norm": 0.4774917662143707, "learning_rate": 0.00010145439893941955, "loss": 1.286, "step": 37926 }, { "epoch": 0.49284406709732753, "grad_norm": 0.5005674958229065, "learning_rate": 0.00010145179947750816, "loss": 1.3239, "step": 37927 }, { "epoch": 0.4928570616412434, "grad_norm": 0.40683239698410034, "learning_rate": 0.00010144920001559677, "loss": 1.4927, "step": 37928 }, { "epoch": 0.4928700561851593, "grad_norm": 0.466808944940567, "learning_rate": 0.0001014466005536854, "loss": 1.4526, "step": 37929 }, { "epoch": 0.4928830507290751, "grad_norm": 0.5208826065063477, "learning_rate": 0.00010144400109177401, "loss": 1.3834, "step": 37930 }, { "epoch": 0.492896045272991, "grad_norm": 0.43565046787261963, "learning_rate": 0.00010144140162986262, "loss": 1.4564, "step": 37931 }, { "epoch": 0.49290903981690687, "grad_norm": 0.430633544921875, "learning_rate": 0.00010143880216795123, "loss": 1.4805, "step": 37932 }, { "epoch": 0.49292203436082277, "grad_norm": 0.47882312536239624, "learning_rate": 0.00010143620270603986, "loss": 1.5159, "step": 37933 }, { "epoch": 0.4929350289047386, "grad_norm": 0.42850902676582336, "learning_rate": 0.00010143360324412847, "loss": 1.3549, "step": 37934 }, { "epoch": 0.4929480234486545, "grad_norm": 0.41156506538391113, "learning_rate": 0.00010143100378221708, "loss": 1.327, "step": 37935 }, { "epoch": 0.49296101799257036, "grad_norm": 0.4543415904045105, "learning_rate": 0.00010142840432030569, "loss": 1.368, "step": 37936 }, { "epoch": 0.49297401253648626, "grad_norm": 0.4010559916496277, "learning_rate": 0.00010142580485839433, "loss": 1.2507, "step": 37937 }, { "epoch": 0.4929870070804021, "grad_norm": 0.5090144872665405, "learning_rate": 0.00010142320539648294, "loss": 1.4973, "step": 37938 }, { "epoch": 0.493000001624318, "grad_norm": 0.39104536175727844, "learning_rate": 0.00010142060593457155, "loss": 1.4625, "step": 37939 }, { "epoch": 0.49301299616823385, "grad_norm": 0.38204225897789, "learning_rate": 0.00010141800647266016, "loss": 1.3766, "step": 37940 }, { "epoch": 0.49302599071214975, "grad_norm": 0.44885388016700745, "learning_rate": 0.00010141540701074878, "loss": 1.4984, "step": 37941 }, { "epoch": 0.4930389852560656, "grad_norm": 0.5591496825218201, "learning_rate": 0.0001014128075488374, "loss": 1.4563, "step": 37942 }, { "epoch": 0.4930519797999815, "grad_norm": 0.48022717237472534, "learning_rate": 0.000101410208086926, "loss": 1.439, "step": 37943 }, { "epoch": 0.49306497434389734, "grad_norm": 0.4503045678138733, "learning_rate": 0.00010140760862501462, "loss": 1.5237, "step": 37944 }, { "epoch": 0.49307796888781324, "grad_norm": 0.3211735486984253, "learning_rate": 0.00010140500916310326, "loss": 1.4258, "step": 37945 }, { "epoch": 0.4930909634317291, "grad_norm": 0.5986831188201904, "learning_rate": 0.00010140240970119185, "loss": 1.4574, "step": 37946 }, { "epoch": 0.493103957975645, "grad_norm": 0.42015311121940613, "learning_rate": 0.00010139981023928046, "loss": 1.4285, "step": 37947 }, { "epoch": 0.49311695251956084, "grad_norm": 0.5040203332901001, "learning_rate": 0.0001013972107773691, "loss": 1.3174, "step": 37948 }, { "epoch": 0.49312994706347674, "grad_norm": 0.41828641295433044, "learning_rate": 0.00010139461131545771, "loss": 1.5426, "step": 37949 }, { "epoch": 0.4931429416073926, "grad_norm": 0.39809930324554443, "learning_rate": 0.00010139201185354632, "loss": 1.4105, "step": 37950 }, { "epoch": 0.4931559361513085, "grad_norm": 0.35233503580093384, "learning_rate": 0.00010138941239163493, "loss": 1.5495, "step": 37951 }, { "epoch": 0.49316893069522433, "grad_norm": 0.3805144727230072, "learning_rate": 0.00010138681292972356, "loss": 1.4797, "step": 37952 }, { "epoch": 0.49318192523914023, "grad_norm": 0.4082876741886139, "learning_rate": 0.00010138421346781217, "loss": 1.4722, "step": 37953 }, { "epoch": 0.4931949197830561, "grad_norm": 0.35256877541542053, "learning_rate": 0.00010138161400590078, "loss": 1.3716, "step": 37954 }, { "epoch": 0.493207914326972, "grad_norm": 0.49692463874816895, "learning_rate": 0.00010137901454398939, "loss": 1.3552, "step": 37955 }, { "epoch": 0.4932209088708878, "grad_norm": 0.4395529627799988, "learning_rate": 0.00010137641508207803, "loss": 1.214, "step": 37956 }, { "epoch": 0.4932339034148037, "grad_norm": 0.4587078094482422, "learning_rate": 0.00010137381562016664, "loss": 1.3421, "step": 37957 }, { "epoch": 0.49324689795871957, "grad_norm": 0.4206991493701935, "learning_rate": 0.00010137121615825524, "loss": 1.4955, "step": 37958 }, { "epoch": 0.49325989250263547, "grad_norm": 0.3584602177143097, "learning_rate": 0.00010136861669634385, "loss": 1.3994, "step": 37959 }, { "epoch": 0.4932728870465513, "grad_norm": 0.4250337779521942, "learning_rate": 0.00010136601723443249, "loss": 1.4151, "step": 37960 }, { "epoch": 0.4932858815904672, "grad_norm": 0.4662173390388489, "learning_rate": 0.0001013634177725211, "loss": 1.4237, "step": 37961 }, { "epoch": 0.49329887613438306, "grad_norm": 0.388580858707428, "learning_rate": 0.00010136081831060971, "loss": 1.2744, "step": 37962 }, { "epoch": 0.49331187067829896, "grad_norm": 0.32799094915390015, "learning_rate": 0.00010135821884869832, "loss": 1.1594, "step": 37963 }, { "epoch": 0.4933248652222148, "grad_norm": 0.4237835109233856, "learning_rate": 0.00010135561938678694, "loss": 1.5096, "step": 37964 }, { "epoch": 0.4933378597661307, "grad_norm": 0.3883950114250183, "learning_rate": 0.00010135301992487556, "loss": 1.3216, "step": 37965 }, { "epoch": 0.49335085431004655, "grad_norm": 0.35629549622535706, "learning_rate": 0.00010135042046296417, "loss": 1.3716, "step": 37966 }, { "epoch": 0.49336384885396245, "grad_norm": 0.40324509143829346, "learning_rate": 0.00010134782100105278, "loss": 1.4888, "step": 37967 }, { "epoch": 0.4933768433978783, "grad_norm": 0.4503353536128998, "learning_rate": 0.00010134522153914141, "loss": 1.194, "step": 37968 }, { "epoch": 0.4933898379417942, "grad_norm": 0.3677539825439453, "learning_rate": 0.00010134262207723003, "loss": 1.1002, "step": 37969 }, { "epoch": 0.49340283248571004, "grad_norm": 0.3776630759239197, "learning_rate": 0.00010134002261531864, "loss": 1.235, "step": 37970 }, { "epoch": 0.49341582702962594, "grad_norm": 0.3005678057670593, "learning_rate": 0.00010133742315340723, "loss": 1.2759, "step": 37971 }, { "epoch": 0.4934288215735418, "grad_norm": 0.37763339281082153, "learning_rate": 0.00010133482369149587, "loss": 1.3875, "step": 37972 }, { "epoch": 0.4934418161174577, "grad_norm": 0.3466019630432129, "learning_rate": 0.00010133222422958448, "loss": 1.4156, "step": 37973 }, { "epoch": 0.49345481066137353, "grad_norm": 0.3192394971847534, "learning_rate": 0.0001013296247676731, "loss": 1.3902, "step": 37974 }, { "epoch": 0.49346780520528943, "grad_norm": 0.33244433999061584, "learning_rate": 0.0001013270253057617, "loss": 1.6655, "step": 37975 }, { "epoch": 0.4934807997492053, "grad_norm": 0.4626505672931671, "learning_rate": 0.00010132442584385033, "loss": 1.4667, "step": 37976 }, { "epoch": 0.4934937942931212, "grad_norm": 0.5244022011756897, "learning_rate": 0.00010132182638193894, "loss": 1.2626, "step": 37977 }, { "epoch": 0.493506788837037, "grad_norm": 0.41166967153549194, "learning_rate": 0.00010131922692002755, "loss": 1.4199, "step": 37978 }, { "epoch": 0.4935197833809529, "grad_norm": 0.3562083840370178, "learning_rate": 0.00010131662745811616, "loss": 1.4426, "step": 37979 }, { "epoch": 0.49353277792486877, "grad_norm": 0.4225623309612274, "learning_rate": 0.0001013140279962048, "loss": 1.4487, "step": 37980 }, { "epoch": 0.49354577246878467, "grad_norm": 0.3182221055030823, "learning_rate": 0.00010131142853429341, "loss": 1.3752, "step": 37981 }, { "epoch": 0.4935587670127005, "grad_norm": 0.37063705921173096, "learning_rate": 0.00010130882907238202, "loss": 1.4088, "step": 37982 }, { "epoch": 0.4935717615566164, "grad_norm": 0.3993184268474579, "learning_rate": 0.00010130622961047063, "loss": 1.4983, "step": 37983 }, { "epoch": 0.49358475610053226, "grad_norm": 0.41423940658569336, "learning_rate": 0.00010130363014855926, "loss": 1.2387, "step": 37984 }, { "epoch": 0.49359775064444816, "grad_norm": 0.3359740972518921, "learning_rate": 0.00010130103068664787, "loss": 1.4156, "step": 37985 }, { "epoch": 0.493610745188364, "grad_norm": 0.37544915080070496, "learning_rate": 0.00010129843122473648, "loss": 1.3729, "step": 37986 }, { "epoch": 0.4936237397322799, "grad_norm": 0.4646163880825043, "learning_rate": 0.00010129583176282512, "loss": 1.5846, "step": 37987 }, { "epoch": 0.4936367342761958, "grad_norm": 0.4488069713115692, "learning_rate": 0.00010129323230091371, "loss": 1.4046, "step": 37988 }, { "epoch": 0.49364972882011166, "grad_norm": 0.42781439423561096, "learning_rate": 0.00010129063283900233, "loss": 1.5327, "step": 37989 }, { "epoch": 0.49366272336402756, "grad_norm": 0.38465964794158936, "learning_rate": 0.00010128803337709094, "loss": 1.5006, "step": 37990 }, { "epoch": 0.4936757179079434, "grad_norm": 0.4315025210380554, "learning_rate": 0.00010128543391517957, "loss": 1.5311, "step": 37991 }, { "epoch": 0.4936887124518593, "grad_norm": 0.3920021653175354, "learning_rate": 0.00010128283445326819, "loss": 1.5596, "step": 37992 }, { "epoch": 0.49370170699577515, "grad_norm": 0.39746302366256714, "learning_rate": 0.0001012802349913568, "loss": 1.2285, "step": 37993 }, { "epoch": 0.49371470153969105, "grad_norm": 0.32346588373184204, "learning_rate": 0.00010127763552944541, "loss": 1.3456, "step": 37994 }, { "epoch": 0.4937276960836069, "grad_norm": 0.43340837955474854, "learning_rate": 0.00010127503606753403, "loss": 1.5953, "step": 37995 }, { "epoch": 0.4937406906275228, "grad_norm": 0.4179762899875641, "learning_rate": 0.00010127243660562264, "loss": 1.4177, "step": 37996 }, { "epoch": 0.49375368517143864, "grad_norm": 0.2999354600906372, "learning_rate": 0.00010126983714371125, "loss": 1.1806, "step": 37997 }, { "epoch": 0.49376667971535454, "grad_norm": 0.441028356552124, "learning_rate": 0.00010126723768179986, "loss": 1.2673, "step": 37998 }, { "epoch": 0.4937796742592704, "grad_norm": 0.3732874095439911, "learning_rate": 0.0001012646382198885, "loss": 1.3215, "step": 37999 }, { "epoch": 0.4937926688031863, "grad_norm": 0.3397403657436371, "learning_rate": 0.0001012620387579771, "loss": 1.3268, "step": 38000 }, { "epoch": 0.49380566334710213, "grad_norm": 0.4676308333873749, "learning_rate": 0.00010125943929606571, "loss": 1.3105, "step": 38001 }, { "epoch": 0.49381865789101803, "grad_norm": 0.4722161293029785, "learning_rate": 0.00010125683983415432, "loss": 1.3708, "step": 38002 }, { "epoch": 0.4938316524349339, "grad_norm": 0.2911146283149719, "learning_rate": 0.00010125424037224296, "loss": 1.2611, "step": 38003 }, { "epoch": 0.4938446469788498, "grad_norm": 0.4634213447570801, "learning_rate": 0.00010125164091033157, "loss": 1.3869, "step": 38004 }, { "epoch": 0.4938576415227656, "grad_norm": 0.3736705183982849, "learning_rate": 0.00010124904144842018, "loss": 1.4028, "step": 38005 }, { "epoch": 0.4938706360666815, "grad_norm": 0.36504679918289185, "learning_rate": 0.00010124644198650879, "loss": 1.4478, "step": 38006 }, { "epoch": 0.49388363061059737, "grad_norm": 0.41356122493743896, "learning_rate": 0.00010124384252459742, "loss": 1.4253, "step": 38007 }, { "epoch": 0.49389662515451327, "grad_norm": 0.3205413818359375, "learning_rate": 0.00010124124306268603, "loss": 1.332, "step": 38008 }, { "epoch": 0.4939096196984291, "grad_norm": 0.48153674602508545, "learning_rate": 0.00010123864360077464, "loss": 1.4926, "step": 38009 }, { "epoch": 0.493922614242345, "grad_norm": 0.33620941638946533, "learning_rate": 0.00010123604413886325, "loss": 1.5743, "step": 38010 }, { "epoch": 0.49393560878626086, "grad_norm": 0.3998759388923645, "learning_rate": 0.00010123344467695189, "loss": 1.3283, "step": 38011 }, { "epoch": 0.49394860333017676, "grad_norm": 0.47262445092201233, "learning_rate": 0.0001012308452150405, "loss": 1.366, "step": 38012 }, { "epoch": 0.4939615978740926, "grad_norm": 0.4668542742729187, "learning_rate": 0.0001012282457531291, "loss": 1.4604, "step": 38013 }, { "epoch": 0.4939745924180085, "grad_norm": 0.4626637101173401, "learning_rate": 0.00010122564629121771, "loss": 1.565, "step": 38014 }, { "epoch": 0.49398758696192435, "grad_norm": 0.45223796367645264, "learning_rate": 0.00010122304682930635, "loss": 1.3877, "step": 38015 }, { "epoch": 0.49400058150584025, "grad_norm": 0.3459091782569885, "learning_rate": 0.00010122044736739496, "loss": 1.3367, "step": 38016 }, { "epoch": 0.4940135760497561, "grad_norm": 0.3948054313659668, "learning_rate": 0.00010121784790548357, "loss": 1.2261, "step": 38017 }, { "epoch": 0.494026570593672, "grad_norm": 0.3407975435256958, "learning_rate": 0.00010121524844357218, "loss": 1.3084, "step": 38018 }, { "epoch": 0.49403956513758784, "grad_norm": 0.3612014949321747, "learning_rate": 0.0001012126489816608, "loss": 1.3939, "step": 38019 }, { "epoch": 0.49405255968150374, "grad_norm": 0.29565030336380005, "learning_rate": 0.00010121004951974941, "loss": 1.4792, "step": 38020 }, { "epoch": 0.4940655542254196, "grad_norm": 0.3904449939727783, "learning_rate": 0.00010120745005783802, "loss": 1.3609, "step": 38021 }, { "epoch": 0.4940785487693355, "grad_norm": 0.4191446900367737, "learning_rate": 0.00010120485059592666, "loss": 1.3346, "step": 38022 }, { "epoch": 0.49409154331325134, "grad_norm": 0.45718222856521606, "learning_rate": 0.00010120225113401527, "loss": 1.3972, "step": 38023 }, { "epoch": 0.49410453785716724, "grad_norm": 0.39793214201927185, "learning_rate": 0.00010119965167210388, "loss": 1.4745, "step": 38024 }, { "epoch": 0.4941175324010831, "grad_norm": 0.42547401785850525, "learning_rate": 0.0001011970522101925, "loss": 1.4827, "step": 38025 }, { "epoch": 0.494130526944999, "grad_norm": 0.415231853723526, "learning_rate": 0.00010119445274828112, "loss": 1.4155, "step": 38026 }, { "epoch": 0.4941435214889148, "grad_norm": 0.4106965959072113, "learning_rate": 0.00010119185328636973, "loss": 1.5041, "step": 38027 }, { "epoch": 0.49415651603283073, "grad_norm": 0.3416058421134949, "learning_rate": 0.00010118925382445834, "loss": 1.2758, "step": 38028 }, { "epoch": 0.4941695105767466, "grad_norm": 0.42380955815315247, "learning_rate": 0.00010118665436254695, "loss": 1.2997, "step": 38029 }, { "epoch": 0.4941825051206625, "grad_norm": 0.3871223032474518, "learning_rate": 0.00010118405490063558, "loss": 1.2986, "step": 38030 }, { "epoch": 0.4941954996645783, "grad_norm": 0.44861894845962524, "learning_rate": 0.00010118145543872419, "loss": 1.3392, "step": 38031 }, { "epoch": 0.4942084942084942, "grad_norm": 0.4205858111381531, "learning_rate": 0.0001011788559768128, "loss": 1.5934, "step": 38032 }, { "epoch": 0.49422148875241007, "grad_norm": 0.4282827377319336, "learning_rate": 0.00010117625651490141, "loss": 1.422, "step": 38033 }, { "epoch": 0.49423448329632597, "grad_norm": 0.47219786047935486, "learning_rate": 0.00010117365705299005, "loss": 1.4074, "step": 38034 }, { "epoch": 0.4942474778402418, "grad_norm": 0.27033543586730957, "learning_rate": 0.00010117105759107866, "loss": 1.3483, "step": 38035 }, { "epoch": 0.4942604723841577, "grad_norm": 0.43650442361831665, "learning_rate": 0.00010116845812916727, "loss": 1.5132, "step": 38036 }, { "epoch": 0.49427346692807356, "grad_norm": 0.39370986819267273, "learning_rate": 0.00010116585866725588, "loss": 1.3898, "step": 38037 }, { "epoch": 0.49428646147198946, "grad_norm": 0.41571998596191406, "learning_rate": 0.0001011632592053445, "loss": 1.4039, "step": 38038 }, { "epoch": 0.4942994560159053, "grad_norm": 0.39217615127563477, "learning_rate": 0.00010116065974343312, "loss": 1.2554, "step": 38039 }, { "epoch": 0.4943124505598212, "grad_norm": 0.2416711300611496, "learning_rate": 0.00010115806028152173, "loss": 1.3932, "step": 38040 }, { "epoch": 0.49432544510373705, "grad_norm": 0.4639711380004883, "learning_rate": 0.00010115546081961034, "loss": 1.5079, "step": 38041 }, { "epoch": 0.49433843964765295, "grad_norm": 0.3492540121078491, "learning_rate": 0.00010115286135769896, "loss": 1.3331, "step": 38042 }, { "epoch": 0.4943514341915688, "grad_norm": 0.46703892946243286, "learning_rate": 0.00010115026189578757, "loss": 1.4683, "step": 38043 }, { "epoch": 0.4943644287354847, "grad_norm": 0.5077219605445862, "learning_rate": 0.00010114766243387618, "loss": 1.4193, "step": 38044 }, { "epoch": 0.49437742327940054, "grad_norm": 0.4638369083404541, "learning_rate": 0.0001011450629719648, "loss": 1.4103, "step": 38045 }, { "epoch": 0.49439041782331644, "grad_norm": 0.24610057473182678, "learning_rate": 0.00010114246351005343, "loss": 1.3075, "step": 38046 }, { "epoch": 0.4944034123672323, "grad_norm": 0.4314155578613281, "learning_rate": 0.00010113986404814204, "loss": 1.4749, "step": 38047 }, { "epoch": 0.4944164069111482, "grad_norm": 0.4158130884170532, "learning_rate": 0.00010113726458623066, "loss": 1.2552, "step": 38048 }, { "epoch": 0.49442940145506403, "grad_norm": 0.447780042886734, "learning_rate": 0.00010113466512431927, "loss": 1.386, "step": 38049 }, { "epoch": 0.49444239599897993, "grad_norm": 0.36035841703414917, "learning_rate": 0.00010113206566240789, "loss": 1.4623, "step": 38050 }, { "epoch": 0.4944553905428958, "grad_norm": 0.5047826766967773, "learning_rate": 0.0001011294662004965, "loss": 1.4424, "step": 38051 }, { "epoch": 0.4944683850868117, "grad_norm": 0.39765664935112, "learning_rate": 0.00010112686673858511, "loss": 1.2496, "step": 38052 }, { "epoch": 0.4944813796307275, "grad_norm": 0.3847762644290924, "learning_rate": 0.00010112426727667372, "loss": 1.3808, "step": 38053 }, { "epoch": 0.4944943741746434, "grad_norm": 0.34074097871780396, "learning_rate": 0.00010112166781476236, "loss": 1.4476, "step": 38054 }, { "epoch": 0.49450736871855927, "grad_norm": 0.3757489323616028, "learning_rate": 0.00010111906835285096, "loss": 1.4605, "step": 38055 }, { "epoch": 0.49452036326247517, "grad_norm": 0.30726081132888794, "learning_rate": 0.00010111646889093957, "loss": 1.1356, "step": 38056 }, { "epoch": 0.494533357806391, "grad_norm": 0.4372595250606537, "learning_rate": 0.00010111386942902818, "loss": 1.3606, "step": 38057 }, { "epoch": 0.4945463523503069, "grad_norm": 0.3018917441368103, "learning_rate": 0.00010111126996711682, "loss": 1.3616, "step": 38058 }, { "epoch": 0.49455934689422276, "grad_norm": 0.3442380726337433, "learning_rate": 0.00010110867050520543, "loss": 1.5974, "step": 38059 }, { "epoch": 0.49457234143813866, "grad_norm": 0.44635239243507385, "learning_rate": 0.00010110607104329404, "loss": 1.4694, "step": 38060 }, { "epoch": 0.4945853359820545, "grad_norm": 0.4729903042316437, "learning_rate": 0.00010110347158138267, "loss": 1.5467, "step": 38061 }, { "epoch": 0.4945983305259704, "grad_norm": 0.4541347622871399, "learning_rate": 0.00010110087211947128, "loss": 1.2839, "step": 38062 }, { "epoch": 0.49461132506988625, "grad_norm": 0.4162755012512207, "learning_rate": 0.00010109827265755989, "loss": 1.5634, "step": 38063 }, { "epoch": 0.49462431961380215, "grad_norm": 0.37976840138435364, "learning_rate": 0.0001010956731956485, "loss": 1.3361, "step": 38064 }, { "epoch": 0.49463731415771806, "grad_norm": 0.4237111210823059, "learning_rate": 0.00010109307373373714, "loss": 1.4506, "step": 38065 }, { "epoch": 0.4946503087016339, "grad_norm": 0.4040474593639374, "learning_rate": 0.00010109047427182575, "loss": 1.3602, "step": 38066 }, { "epoch": 0.4946633032455498, "grad_norm": 0.49096718430519104, "learning_rate": 0.00010108787480991436, "loss": 1.4101, "step": 38067 }, { "epoch": 0.49467629778946565, "grad_norm": 0.3960370421409607, "learning_rate": 0.00010108527534800296, "loss": 1.2334, "step": 38068 }, { "epoch": 0.49468929233338155, "grad_norm": 0.7094560265541077, "learning_rate": 0.0001010826758860916, "loss": 1.5798, "step": 38069 }, { "epoch": 0.4947022868772974, "grad_norm": 0.4135470390319824, "learning_rate": 0.0001010800764241802, "loss": 1.4375, "step": 38070 }, { "epoch": 0.4947152814212133, "grad_norm": 0.5411341190338135, "learning_rate": 0.00010107747696226882, "loss": 1.5351, "step": 38071 }, { "epoch": 0.49472827596512914, "grad_norm": 0.4583306312561035, "learning_rate": 0.00010107487750035743, "loss": 1.3228, "step": 38072 }, { "epoch": 0.49474127050904504, "grad_norm": 0.437509149312973, "learning_rate": 0.00010107227803844605, "loss": 1.5489, "step": 38073 }, { "epoch": 0.4947542650529609, "grad_norm": 0.4036235213279724, "learning_rate": 0.00010106967857653466, "loss": 1.5532, "step": 38074 }, { "epoch": 0.4947672595968768, "grad_norm": 0.504324197769165, "learning_rate": 0.00010106707911462327, "loss": 1.2805, "step": 38075 }, { "epoch": 0.49478025414079263, "grad_norm": 0.37474584579467773, "learning_rate": 0.00010106447965271188, "loss": 1.4843, "step": 38076 }, { "epoch": 0.49479324868470853, "grad_norm": 0.3595208525657654, "learning_rate": 0.00010106188019080052, "loss": 1.4961, "step": 38077 }, { "epoch": 0.4948062432286244, "grad_norm": 0.414688378572464, "learning_rate": 0.00010105928072888913, "loss": 1.3105, "step": 38078 }, { "epoch": 0.4948192377725403, "grad_norm": 0.4075782299041748, "learning_rate": 0.00010105668126697774, "loss": 1.4428, "step": 38079 }, { "epoch": 0.4948322323164561, "grad_norm": 0.3834506869316101, "learning_rate": 0.00010105408180506634, "loss": 1.4417, "step": 38080 }, { "epoch": 0.494845226860372, "grad_norm": 0.4564828872680664, "learning_rate": 0.00010105148234315498, "loss": 1.4381, "step": 38081 }, { "epoch": 0.49485822140428787, "grad_norm": 0.4751845598220825, "learning_rate": 0.00010104888288124359, "loss": 1.4776, "step": 38082 }, { "epoch": 0.49487121594820377, "grad_norm": 0.40767902135849, "learning_rate": 0.0001010462834193322, "loss": 1.4437, "step": 38083 }, { "epoch": 0.4948842104921196, "grad_norm": 0.40385448932647705, "learning_rate": 0.00010104368395742081, "loss": 1.462, "step": 38084 }, { "epoch": 0.4948972050360355, "grad_norm": 0.3900837302207947, "learning_rate": 0.00010104108449550944, "loss": 1.1103, "step": 38085 }, { "epoch": 0.49491019957995136, "grad_norm": 0.4465446472167969, "learning_rate": 0.00010103848503359805, "loss": 1.3841, "step": 38086 }, { "epoch": 0.49492319412386726, "grad_norm": 0.4772008955478668, "learning_rate": 0.00010103588557168666, "loss": 1.5146, "step": 38087 }, { "epoch": 0.4949361886677831, "grad_norm": 0.517224133014679, "learning_rate": 0.00010103328610977527, "loss": 1.5706, "step": 38088 }, { "epoch": 0.494949183211699, "grad_norm": 0.42201143503189087, "learning_rate": 0.00010103068664786391, "loss": 1.5045, "step": 38089 }, { "epoch": 0.49496217775561485, "grad_norm": 0.41769346594810486, "learning_rate": 0.00010102808718595252, "loss": 1.2715, "step": 38090 }, { "epoch": 0.49497517229953075, "grad_norm": 0.39498546719551086, "learning_rate": 0.00010102548772404113, "loss": 1.2837, "step": 38091 }, { "epoch": 0.4949881668434466, "grad_norm": 0.4292343556880951, "learning_rate": 0.00010102288826212974, "loss": 1.4145, "step": 38092 }, { "epoch": 0.4950011613873625, "grad_norm": 0.2744852900505066, "learning_rate": 0.00010102028880021836, "loss": 1.1836, "step": 38093 }, { "epoch": 0.49501415593127834, "grad_norm": 0.47898560762405396, "learning_rate": 0.00010101768933830698, "loss": 1.3381, "step": 38094 }, { "epoch": 0.49502715047519424, "grad_norm": 0.4035773277282715, "learning_rate": 0.00010101508987639559, "loss": 1.4612, "step": 38095 }, { "epoch": 0.4950401450191101, "grad_norm": 0.4253118634223938, "learning_rate": 0.00010101249041448422, "loss": 1.2472, "step": 38096 }, { "epoch": 0.495053139563026, "grad_norm": 0.4103727638721466, "learning_rate": 0.00010100989095257282, "loss": 1.3235, "step": 38097 }, { "epoch": 0.49506613410694184, "grad_norm": 0.34784695506095886, "learning_rate": 0.00010100729149066143, "loss": 1.4793, "step": 38098 }, { "epoch": 0.49507912865085774, "grad_norm": 0.3973008096218109, "learning_rate": 0.00010100469202875004, "loss": 1.3408, "step": 38099 }, { "epoch": 0.4950921231947736, "grad_norm": 0.39903563261032104, "learning_rate": 0.00010100209256683868, "loss": 1.2727, "step": 38100 }, { "epoch": 0.4951051177386895, "grad_norm": 0.4041145145893097, "learning_rate": 0.00010099949310492729, "loss": 1.513, "step": 38101 }, { "epoch": 0.4951181122826053, "grad_norm": 0.4685211479663849, "learning_rate": 0.0001009968936430159, "loss": 1.5599, "step": 38102 }, { "epoch": 0.49513110682652123, "grad_norm": 0.4299374520778656, "learning_rate": 0.00010099429418110451, "loss": 1.5151, "step": 38103 }, { "epoch": 0.4951441013704371, "grad_norm": 0.4188407063484192, "learning_rate": 0.00010099169471919314, "loss": 1.2669, "step": 38104 }, { "epoch": 0.495157095914353, "grad_norm": 0.4171513319015503, "learning_rate": 0.00010098909525728175, "loss": 1.3941, "step": 38105 }, { "epoch": 0.4951700904582688, "grad_norm": 0.40999868512153625, "learning_rate": 0.00010098649579537036, "loss": 1.4132, "step": 38106 }, { "epoch": 0.4951830850021847, "grad_norm": 0.30557218194007874, "learning_rate": 0.00010098389633345897, "loss": 1.373, "step": 38107 }, { "epoch": 0.49519607954610056, "grad_norm": 0.42449942231178284, "learning_rate": 0.00010098129687154761, "loss": 1.4162, "step": 38108 }, { "epoch": 0.49520907409001647, "grad_norm": 0.46149638295173645, "learning_rate": 0.00010097869740963622, "loss": 1.4531, "step": 38109 }, { "epoch": 0.4952220686339323, "grad_norm": 0.445597380399704, "learning_rate": 0.00010097609794772482, "loss": 1.3368, "step": 38110 }, { "epoch": 0.4952350631778482, "grad_norm": 0.5412014126777649, "learning_rate": 0.00010097349848581343, "loss": 1.4705, "step": 38111 }, { "epoch": 0.49524805772176406, "grad_norm": 0.4127027690410614, "learning_rate": 0.00010097089902390207, "loss": 1.2966, "step": 38112 }, { "epoch": 0.49526105226567996, "grad_norm": 0.3508322536945343, "learning_rate": 0.00010096829956199068, "loss": 1.3049, "step": 38113 }, { "epoch": 0.4952740468095958, "grad_norm": 0.4369240701198578, "learning_rate": 0.00010096570010007929, "loss": 1.3868, "step": 38114 }, { "epoch": 0.4952870413535117, "grad_norm": 0.3675299882888794, "learning_rate": 0.0001009631006381679, "loss": 1.3366, "step": 38115 }, { "epoch": 0.49530003589742755, "grad_norm": 0.39378783106803894, "learning_rate": 0.00010096050117625652, "loss": 1.6476, "step": 38116 }, { "epoch": 0.49531303044134345, "grad_norm": 0.49533969163894653, "learning_rate": 0.00010095790171434513, "loss": 1.3325, "step": 38117 }, { "epoch": 0.4953260249852593, "grad_norm": 0.3428540825843811, "learning_rate": 0.00010095530225243375, "loss": 1.4273, "step": 38118 }, { "epoch": 0.4953390195291752, "grad_norm": 0.4104280173778534, "learning_rate": 0.00010095270279052236, "loss": 1.4006, "step": 38119 }, { "epoch": 0.49535201407309104, "grad_norm": 0.3324749171733856, "learning_rate": 0.000100950103328611, "loss": 1.1713, "step": 38120 }, { "epoch": 0.49536500861700694, "grad_norm": 0.4256455600261688, "learning_rate": 0.0001009475038666996, "loss": 1.4412, "step": 38121 }, { "epoch": 0.4953780031609228, "grad_norm": 0.42627793550491333, "learning_rate": 0.0001009449044047882, "loss": 1.4096, "step": 38122 }, { "epoch": 0.4953909977048387, "grad_norm": 0.3781941533088684, "learning_rate": 0.00010094230494287681, "loss": 1.3487, "step": 38123 }, { "epoch": 0.49540399224875453, "grad_norm": 0.3275192975997925, "learning_rate": 0.00010093970548096545, "loss": 1.479, "step": 38124 }, { "epoch": 0.49541698679267043, "grad_norm": 0.270579069852829, "learning_rate": 0.00010093710601905406, "loss": 1.3982, "step": 38125 }, { "epoch": 0.4954299813365863, "grad_norm": 0.3849242925643921, "learning_rate": 0.00010093450655714267, "loss": 1.4817, "step": 38126 }, { "epoch": 0.4954429758805022, "grad_norm": 0.4871739447116852, "learning_rate": 0.00010093190709523128, "loss": 1.447, "step": 38127 }, { "epoch": 0.495455970424418, "grad_norm": 0.34128451347351074, "learning_rate": 0.00010092930763331991, "loss": 1.2267, "step": 38128 }, { "epoch": 0.4954689649683339, "grad_norm": 0.3936249613761902, "learning_rate": 0.00010092670817140852, "loss": 1.2636, "step": 38129 }, { "epoch": 0.49548195951224977, "grad_norm": 0.3078303635120392, "learning_rate": 0.00010092410870949713, "loss": 1.5205, "step": 38130 }, { "epoch": 0.49549495405616567, "grad_norm": 0.4474124610424042, "learning_rate": 0.00010092150924758574, "loss": 1.3391, "step": 38131 }, { "epoch": 0.4955079486000815, "grad_norm": 0.3340831398963928, "learning_rate": 0.00010091890978567438, "loss": 1.3877, "step": 38132 }, { "epoch": 0.4955209431439974, "grad_norm": 0.3591429889202118, "learning_rate": 0.00010091631032376299, "loss": 1.4964, "step": 38133 }, { "epoch": 0.49553393768791326, "grad_norm": 0.40667182207107544, "learning_rate": 0.0001009137108618516, "loss": 1.3645, "step": 38134 }, { "epoch": 0.49554693223182916, "grad_norm": 0.4645344913005829, "learning_rate": 0.00010091111139994023, "loss": 1.4408, "step": 38135 }, { "epoch": 0.495559926775745, "grad_norm": 0.5129203200340271, "learning_rate": 0.00010090851193802884, "loss": 1.4038, "step": 38136 }, { "epoch": 0.4955729213196609, "grad_norm": 0.37310346961021423, "learning_rate": 0.00010090591247611745, "loss": 1.3076, "step": 38137 }, { "epoch": 0.49558591586357675, "grad_norm": 0.4951123595237732, "learning_rate": 0.00010090331301420606, "loss": 1.3905, "step": 38138 }, { "epoch": 0.49559891040749265, "grad_norm": 0.40237176418304443, "learning_rate": 0.00010090071355229468, "loss": 1.2664, "step": 38139 }, { "epoch": 0.49561190495140856, "grad_norm": 0.4635857343673706, "learning_rate": 0.0001008981140903833, "loss": 1.2616, "step": 38140 }, { "epoch": 0.4956248994953244, "grad_norm": 0.5286194682121277, "learning_rate": 0.0001008955146284719, "loss": 1.3302, "step": 38141 }, { "epoch": 0.4956378940392403, "grad_norm": 0.5238842368125916, "learning_rate": 0.00010089291516656052, "loss": 1.5706, "step": 38142 }, { "epoch": 0.49565088858315615, "grad_norm": 0.4664166569709778, "learning_rate": 0.00010089031570464915, "loss": 1.6148, "step": 38143 }, { "epoch": 0.49566388312707205, "grad_norm": 0.44058454036712646, "learning_rate": 0.00010088771624273777, "loss": 1.4235, "step": 38144 }, { "epoch": 0.4956768776709879, "grad_norm": 0.5516623854637146, "learning_rate": 0.00010088511678082638, "loss": 1.3676, "step": 38145 }, { "epoch": 0.4956898722149038, "grad_norm": 0.4085869789123535, "learning_rate": 0.00010088251731891499, "loss": 1.3035, "step": 38146 }, { "epoch": 0.49570286675881964, "grad_norm": 0.4047647714614868, "learning_rate": 0.00010087991785700361, "loss": 1.3883, "step": 38147 }, { "epoch": 0.49571586130273554, "grad_norm": 0.34162425994873047, "learning_rate": 0.00010087731839509222, "loss": 1.3107, "step": 38148 }, { "epoch": 0.4957288558466514, "grad_norm": 0.45023488998413086, "learning_rate": 0.00010087471893318083, "loss": 1.5084, "step": 38149 }, { "epoch": 0.4957418503905673, "grad_norm": 0.40335461497306824, "learning_rate": 0.00010087211947126944, "loss": 1.4245, "step": 38150 }, { "epoch": 0.49575484493448313, "grad_norm": 0.4403659701347351, "learning_rate": 0.00010086952000935808, "loss": 1.3721, "step": 38151 }, { "epoch": 0.49576783947839903, "grad_norm": 0.4788984954357147, "learning_rate": 0.00010086692054744668, "loss": 1.4036, "step": 38152 }, { "epoch": 0.4957808340223149, "grad_norm": 0.31269076466560364, "learning_rate": 0.00010086432108553529, "loss": 1.2356, "step": 38153 }, { "epoch": 0.4957938285662308, "grad_norm": 0.2996072769165039, "learning_rate": 0.0001008617216236239, "loss": 1.4761, "step": 38154 }, { "epoch": 0.4958068231101466, "grad_norm": 0.446087509393692, "learning_rate": 0.00010085912216171254, "loss": 1.3956, "step": 38155 }, { "epoch": 0.4958198176540625, "grad_norm": 0.2937715947628021, "learning_rate": 0.00010085652269980115, "loss": 1.5011, "step": 38156 }, { "epoch": 0.49583281219797837, "grad_norm": 0.3581866919994354, "learning_rate": 0.00010085392323788976, "loss": 1.4644, "step": 38157 }, { "epoch": 0.49584580674189427, "grad_norm": 0.3880802392959595, "learning_rate": 0.00010085132377597837, "loss": 1.3598, "step": 38158 }, { "epoch": 0.4958588012858101, "grad_norm": 0.5193506479263306, "learning_rate": 0.000100848724314067, "loss": 1.2529, "step": 38159 }, { "epoch": 0.495871795829726, "grad_norm": 0.4425947666168213, "learning_rate": 0.00010084612485215561, "loss": 1.1278, "step": 38160 }, { "epoch": 0.49588479037364186, "grad_norm": 0.38908982276916504, "learning_rate": 0.00010084352539024422, "loss": 1.4684, "step": 38161 }, { "epoch": 0.49589778491755776, "grad_norm": 0.3877604603767395, "learning_rate": 0.00010084092592833283, "loss": 1.4409, "step": 38162 }, { "epoch": 0.4959107794614736, "grad_norm": 0.4295874834060669, "learning_rate": 0.00010083832646642147, "loss": 1.24, "step": 38163 }, { "epoch": 0.4959237740053895, "grad_norm": 0.3835473656654358, "learning_rate": 0.00010083572700451007, "loss": 1.3604, "step": 38164 }, { "epoch": 0.49593676854930535, "grad_norm": 0.4596775472164154, "learning_rate": 0.00010083312754259868, "loss": 1.203, "step": 38165 }, { "epoch": 0.49594976309322125, "grad_norm": 0.48684290051460266, "learning_rate": 0.00010083052808068729, "loss": 1.3622, "step": 38166 }, { "epoch": 0.4959627576371371, "grad_norm": 0.3998570740222931, "learning_rate": 0.00010082792861877593, "loss": 1.3433, "step": 38167 }, { "epoch": 0.495975752181053, "grad_norm": 0.42381227016448975, "learning_rate": 0.00010082532915686454, "loss": 1.2974, "step": 38168 }, { "epoch": 0.49598874672496884, "grad_norm": 0.46143975853919983, "learning_rate": 0.00010082272969495315, "loss": 1.3469, "step": 38169 }, { "epoch": 0.49600174126888474, "grad_norm": 0.3237660527229309, "learning_rate": 0.00010082013023304176, "loss": 1.5836, "step": 38170 }, { "epoch": 0.4960147358128006, "grad_norm": 0.40177983045578003, "learning_rate": 0.00010081753077113038, "loss": 1.4561, "step": 38171 }, { "epoch": 0.4960277303567165, "grad_norm": 0.39886289834976196, "learning_rate": 0.000100814931309219, "loss": 1.3258, "step": 38172 }, { "epoch": 0.49604072490063233, "grad_norm": 0.44304659962654114, "learning_rate": 0.0001008123318473076, "loss": 1.6858, "step": 38173 }, { "epoch": 0.49605371944454824, "grad_norm": 0.4643542766571045, "learning_rate": 0.00010080973238539624, "loss": 1.6115, "step": 38174 }, { "epoch": 0.4960667139884641, "grad_norm": 0.371380090713501, "learning_rate": 0.00010080713292348485, "loss": 1.3763, "step": 38175 }, { "epoch": 0.49607970853238, "grad_norm": 0.44603514671325684, "learning_rate": 0.00010080453346157346, "loss": 1.3742, "step": 38176 }, { "epoch": 0.4960927030762958, "grad_norm": 0.35637059807777405, "learning_rate": 0.00010080193399966206, "loss": 1.1879, "step": 38177 }, { "epoch": 0.4961056976202117, "grad_norm": 0.45656099915504456, "learning_rate": 0.0001007993345377507, "loss": 1.3443, "step": 38178 }, { "epoch": 0.4961186921641276, "grad_norm": 0.39014869928359985, "learning_rate": 0.00010079673507583931, "loss": 1.3491, "step": 38179 }, { "epoch": 0.4961316867080435, "grad_norm": 0.23265451192855835, "learning_rate": 0.00010079413561392792, "loss": 1.4216, "step": 38180 }, { "epoch": 0.4961446812519593, "grad_norm": 0.40407150983810425, "learning_rate": 0.00010079153615201653, "loss": 1.3644, "step": 38181 }, { "epoch": 0.4961576757958752, "grad_norm": 0.3973194360733032, "learning_rate": 0.00010078893669010516, "loss": 1.45, "step": 38182 }, { "epoch": 0.49617067033979106, "grad_norm": 0.46652403473854065, "learning_rate": 0.00010078633722819377, "loss": 1.4179, "step": 38183 }, { "epoch": 0.49618366488370697, "grad_norm": 0.4539624750614166, "learning_rate": 0.00010078373776628238, "loss": 1.2773, "step": 38184 }, { "epoch": 0.4961966594276228, "grad_norm": 0.4824382960796356, "learning_rate": 0.00010078113830437099, "loss": 1.443, "step": 38185 }, { "epoch": 0.4962096539715387, "grad_norm": 0.42717623710632324, "learning_rate": 0.00010077853884245963, "loss": 1.3168, "step": 38186 }, { "epoch": 0.49622264851545456, "grad_norm": 0.4497928023338318, "learning_rate": 0.00010077593938054824, "loss": 1.2448, "step": 38187 }, { "epoch": 0.49623564305937046, "grad_norm": 0.2845083475112915, "learning_rate": 0.00010077333991863685, "loss": 1.2271, "step": 38188 }, { "epoch": 0.4962486376032863, "grad_norm": 0.380996435880661, "learning_rate": 0.00010077074045672546, "loss": 1.3912, "step": 38189 }, { "epoch": 0.4962616321472022, "grad_norm": 0.4074379503726959, "learning_rate": 0.00010076814099481409, "loss": 1.5681, "step": 38190 }, { "epoch": 0.49627462669111805, "grad_norm": 0.3348466157913208, "learning_rate": 0.0001007655415329027, "loss": 1.2558, "step": 38191 }, { "epoch": 0.49628762123503395, "grad_norm": 0.40191611647605896, "learning_rate": 0.00010076294207099131, "loss": 1.5807, "step": 38192 }, { "epoch": 0.4963006157789498, "grad_norm": 0.333392471075058, "learning_rate": 0.00010076034260907992, "loss": 1.3125, "step": 38193 }, { "epoch": 0.4963136103228657, "grad_norm": 0.4480747580528259, "learning_rate": 0.00010075774314716854, "loss": 1.3607, "step": 38194 }, { "epoch": 0.49632660486678154, "grad_norm": 0.3587522506713867, "learning_rate": 0.00010075514368525715, "loss": 1.2453, "step": 38195 }, { "epoch": 0.49633959941069744, "grad_norm": 0.46608126163482666, "learning_rate": 0.00010075254422334576, "loss": 1.5219, "step": 38196 }, { "epoch": 0.4963525939546133, "grad_norm": 0.3593595325946808, "learning_rate": 0.00010074994476143438, "loss": 1.3089, "step": 38197 }, { "epoch": 0.4963655884985292, "grad_norm": 0.47288548946380615, "learning_rate": 0.00010074734529952301, "loss": 1.5494, "step": 38198 }, { "epoch": 0.49637858304244503, "grad_norm": 0.47059309482574463, "learning_rate": 0.00010074474583761162, "loss": 1.2216, "step": 38199 }, { "epoch": 0.49639157758636093, "grad_norm": 0.38760021328926086, "learning_rate": 0.00010074214637570024, "loss": 1.2507, "step": 38200 }, { "epoch": 0.4964045721302768, "grad_norm": 0.489494264125824, "learning_rate": 0.00010073954691378885, "loss": 1.5185, "step": 38201 }, { "epoch": 0.4964175666741927, "grad_norm": 0.38859695196151733, "learning_rate": 0.00010073694745187747, "loss": 1.4479, "step": 38202 }, { "epoch": 0.4964305612181085, "grad_norm": 0.5058392286300659, "learning_rate": 0.00010073434798996608, "loss": 1.5596, "step": 38203 }, { "epoch": 0.4964435557620244, "grad_norm": 0.4065495729446411, "learning_rate": 0.00010073174852805469, "loss": 1.3671, "step": 38204 }, { "epoch": 0.49645655030594027, "grad_norm": 0.5195760726928711, "learning_rate": 0.0001007291490661433, "loss": 1.5253, "step": 38205 }, { "epoch": 0.49646954484985617, "grad_norm": 0.3791457414627075, "learning_rate": 0.00010072654960423193, "loss": 1.2625, "step": 38206 }, { "epoch": 0.496482539393772, "grad_norm": 0.34332340955734253, "learning_rate": 0.00010072395014232054, "loss": 1.4659, "step": 38207 }, { "epoch": 0.4964955339376879, "grad_norm": 0.46960121393203735, "learning_rate": 0.00010072135068040915, "loss": 1.532, "step": 38208 }, { "epoch": 0.49650852848160376, "grad_norm": 0.3552747368812561, "learning_rate": 0.00010071875121849779, "loss": 1.4114, "step": 38209 }, { "epoch": 0.49652152302551966, "grad_norm": 0.43553653359413147, "learning_rate": 0.0001007161517565864, "loss": 1.3434, "step": 38210 }, { "epoch": 0.4965345175694355, "grad_norm": 0.43856823444366455, "learning_rate": 0.00010071355229467501, "loss": 1.4191, "step": 38211 }, { "epoch": 0.4965475121133514, "grad_norm": 0.3386930823326111, "learning_rate": 0.00010071095283276362, "loss": 1.1422, "step": 38212 }, { "epoch": 0.49656050665726725, "grad_norm": 0.34609055519104004, "learning_rate": 0.00010070835337085225, "loss": 1.3922, "step": 38213 }, { "epoch": 0.49657350120118315, "grad_norm": 0.48085880279541016, "learning_rate": 0.00010070575390894086, "loss": 1.3229, "step": 38214 }, { "epoch": 0.496586495745099, "grad_norm": 0.3659428358078003, "learning_rate": 0.00010070315444702947, "loss": 1.2003, "step": 38215 }, { "epoch": 0.4965994902890149, "grad_norm": 0.4486415982246399, "learning_rate": 0.00010070055498511808, "loss": 1.5332, "step": 38216 }, { "epoch": 0.4966124848329308, "grad_norm": 0.3283880054950714, "learning_rate": 0.00010069795552320672, "loss": 1.2941, "step": 38217 }, { "epoch": 0.49662547937684665, "grad_norm": 0.3791065812110901, "learning_rate": 0.00010069535606129533, "loss": 1.6234, "step": 38218 }, { "epoch": 0.49663847392076255, "grad_norm": 0.36310726404190063, "learning_rate": 0.00010069275659938392, "loss": 1.3119, "step": 38219 }, { "epoch": 0.4966514684646784, "grad_norm": 0.32768315076828003, "learning_rate": 0.00010069015713747254, "loss": 1.1876, "step": 38220 }, { "epoch": 0.4966644630085943, "grad_norm": 0.3908523917198181, "learning_rate": 0.00010068755767556117, "loss": 1.5275, "step": 38221 }, { "epoch": 0.49667745755251014, "grad_norm": 0.40550461411476135, "learning_rate": 0.00010068495821364978, "loss": 1.3288, "step": 38222 }, { "epoch": 0.49669045209642604, "grad_norm": 0.4728432893753052, "learning_rate": 0.0001006823587517384, "loss": 1.4515, "step": 38223 }, { "epoch": 0.4967034466403419, "grad_norm": 0.4008466303348541, "learning_rate": 0.000100679759289827, "loss": 1.4084, "step": 38224 }, { "epoch": 0.4967164411842578, "grad_norm": 0.45927658677101135, "learning_rate": 0.00010067715982791563, "loss": 1.4489, "step": 38225 }, { "epoch": 0.49672943572817363, "grad_norm": 0.4260965585708618, "learning_rate": 0.00010067456036600424, "loss": 1.3681, "step": 38226 }, { "epoch": 0.49674243027208953, "grad_norm": 0.5579988360404968, "learning_rate": 0.00010067196090409285, "loss": 1.3052, "step": 38227 }, { "epoch": 0.4967554248160054, "grad_norm": 0.39217960834503174, "learning_rate": 0.00010066936144218146, "loss": 1.6686, "step": 38228 }, { "epoch": 0.4967684193599213, "grad_norm": 0.42197975516319275, "learning_rate": 0.0001006667619802701, "loss": 1.2826, "step": 38229 }, { "epoch": 0.4967814139038371, "grad_norm": 0.41482314467430115, "learning_rate": 0.00010066416251835871, "loss": 1.3626, "step": 38230 }, { "epoch": 0.496794408447753, "grad_norm": 0.32807472348213196, "learning_rate": 0.00010066156305644732, "loss": 1.1998, "step": 38231 }, { "epoch": 0.49680740299166887, "grad_norm": 0.44601133465766907, "learning_rate": 0.00010065896359453592, "loss": 1.4028, "step": 38232 }, { "epoch": 0.49682039753558477, "grad_norm": 0.31964483857154846, "learning_rate": 0.00010065636413262456, "loss": 1.2503, "step": 38233 }, { "epoch": 0.4968333920795006, "grad_norm": 0.41199636459350586, "learning_rate": 0.00010065376467071317, "loss": 1.4452, "step": 38234 }, { "epoch": 0.4968463866234165, "grad_norm": 0.3892856538295746, "learning_rate": 0.00010065116520880178, "loss": 1.422, "step": 38235 }, { "epoch": 0.49685938116733236, "grad_norm": 0.3586713373661041, "learning_rate": 0.00010064856574689039, "loss": 1.3028, "step": 38236 }, { "epoch": 0.49687237571124826, "grad_norm": 0.37764057517051697, "learning_rate": 0.00010064596628497902, "loss": 1.4124, "step": 38237 }, { "epoch": 0.4968853702551641, "grad_norm": 0.3756992816925049, "learning_rate": 0.00010064336682306763, "loss": 1.3134, "step": 38238 }, { "epoch": 0.49689836479908, "grad_norm": 0.40205854177474976, "learning_rate": 0.00010064076736115624, "loss": 1.4236, "step": 38239 }, { "epoch": 0.49691135934299585, "grad_norm": 0.3651718199253082, "learning_rate": 0.00010063816789924485, "loss": 1.2067, "step": 38240 }, { "epoch": 0.49692435388691175, "grad_norm": 0.32568302750587463, "learning_rate": 0.00010063556843733349, "loss": 1.3528, "step": 38241 }, { "epoch": 0.4969373484308276, "grad_norm": 0.38428765535354614, "learning_rate": 0.0001006329689754221, "loss": 1.3754, "step": 38242 }, { "epoch": 0.4969503429747435, "grad_norm": 0.5420585870742798, "learning_rate": 0.00010063036951351071, "loss": 1.4364, "step": 38243 }, { "epoch": 0.49696333751865934, "grad_norm": 0.35904571413993835, "learning_rate": 0.0001006277700515993, "loss": 1.312, "step": 38244 }, { "epoch": 0.49697633206257524, "grad_norm": 0.32716450095176697, "learning_rate": 0.00010062517058968794, "loss": 1.4958, "step": 38245 }, { "epoch": 0.4969893266064911, "grad_norm": 0.39068934321403503, "learning_rate": 0.00010062257112777656, "loss": 1.306, "step": 38246 }, { "epoch": 0.497002321150407, "grad_norm": 0.41192135214805603, "learning_rate": 0.00010061997166586517, "loss": 1.4447, "step": 38247 }, { "epoch": 0.49701531569432283, "grad_norm": 0.38641610741615295, "learning_rate": 0.00010061737220395379, "loss": 1.4291, "step": 38248 }, { "epoch": 0.49702831023823874, "grad_norm": 0.37446409463882446, "learning_rate": 0.0001006147727420424, "loss": 1.5246, "step": 38249 }, { "epoch": 0.4970413047821546, "grad_norm": 0.47554197907447815, "learning_rate": 0.00010061217328013101, "loss": 1.3895, "step": 38250 }, { "epoch": 0.4970542993260705, "grad_norm": 0.34382396936416626, "learning_rate": 0.00010060957381821962, "loss": 1.2602, "step": 38251 }, { "epoch": 0.4970672938699863, "grad_norm": 0.38223326206207275, "learning_rate": 0.00010060697435630826, "loss": 1.4299, "step": 38252 }, { "epoch": 0.4970802884139022, "grad_norm": 0.37078580260276794, "learning_rate": 0.00010060437489439687, "loss": 1.4051, "step": 38253 }, { "epoch": 0.4970932829578181, "grad_norm": 0.45896589756011963, "learning_rate": 0.00010060177543248548, "loss": 1.4015, "step": 38254 }, { "epoch": 0.497106277501734, "grad_norm": 0.5382101535797119, "learning_rate": 0.0001005991759705741, "loss": 1.5385, "step": 38255 }, { "epoch": 0.4971192720456498, "grad_norm": 0.44979390501976013, "learning_rate": 0.00010059657650866272, "loss": 1.4383, "step": 38256 }, { "epoch": 0.4971322665895657, "grad_norm": 0.42438235878944397, "learning_rate": 0.00010059397704675133, "loss": 1.4412, "step": 38257 }, { "epoch": 0.49714526113348156, "grad_norm": 0.39806994795799255, "learning_rate": 0.00010059137758483994, "loss": 1.5253, "step": 38258 }, { "epoch": 0.49715825567739746, "grad_norm": 0.3334304690361023, "learning_rate": 0.00010058877812292855, "loss": 1.1342, "step": 38259 }, { "epoch": 0.4971712502213133, "grad_norm": 0.39490842819213867, "learning_rate": 0.00010058617866101719, "loss": 1.3501, "step": 38260 }, { "epoch": 0.4971842447652292, "grad_norm": 0.28818637132644653, "learning_rate": 0.00010058357919910579, "loss": 1.2178, "step": 38261 }, { "epoch": 0.49719723930914506, "grad_norm": 0.38212302327156067, "learning_rate": 0.0001005809797371944, "loss": 1.5273, "step": 38262 }, { "epoch": 0.49721023385306096, "grad_norm": 0.37797409296035767, "learning_rate": 0.00010057838027528301, "loss": 1.3841, "step": 38263 }, { "epoch": 0.4972232283969768, "grad_norm": 0.4057205021381378, "learning_rate": 0.00010057578081337165, "loss": 1.3569, "step": 38264 }, { "epoch": 0.4972362229408927, "grad_norm": 0.4671332538127899, "learning_rate": 0.00010057318135146026, "loss": 1.4952, "step": 38265 }, { "epoch": 0.49724921748480855, "grad_norm": 0.33796024322509766, "learning_rate": 0.00010057058188954887, "loss": 1.5194, "step": 38266 }, { "epoch": 0.49726221202872445, "grad_norm": 0.3439628779888153, "learning_rate": 0.00010056798242763748, "loss": 1.3359, "step": 38267 }, { "epoch": 0.4972752065726403, "grad_norm": 0.3598017990589142, "learning_rate": 0.0001005653829657261, "loss": 1.3682, "step": 38268 }, { "epoch": 0.4972882011165562, "grad_norm": 0.3693104088306427, "learning_rate": 0.00010056278350381471, "loss": 1.3575, "step": 38269 }, { "epoch": 0.49730119566047204, "grad_norm": 0.373563677072525, "learning_rate": 0.00010056018404190333, "loss": 1.2384, "step": 38270 }, { "epoch": 0.49731419020438794, "grad_norm": 0.4228780269622803, "learning_rate": 0.00010055758457999194, "loss": 1.4962, "step": 38271 }, { "epoch": 0.4973271847483038, "grad_norm": 0.47320982813835144, "learning_rate": 0.00010055498511808057, "loss": 1.2786, "step": 38272 }, { "epoch": 0.4973401792922197, "grad_norm": 0.5204222202301025, "learning_rate": 0.00010055238565616919, "loss": 1.2811, "step": 38273 }, { "epoch": 0.49735317383613553, "grad_norm": 0.32358795404434204, "learning_rate": 0.00010054978619425778, "loss": 1.3961, "step": 38274 }, { "epoch": 0.49736616838005143, "grad_norm": 0.35843977332115173, "learning_rate": 0.0001005471867323464, "loss": 1.4548, "step": 38275 }, { "epoch": 0.4973791629239673, "grad_norm": 0.4199320375919342, "learning_rate": 0.00010054458727043503, "loss": 1.3252, "step": 38276 }, { "epoch": 0.4973921574678832, "grad_norm": 0.4474913775920868, "learning_rate": 0.00010054198780852364, "loss": 1.4012, "step": 38277 }, { "epoch": 0.497405152011799, "grad_norm": 0.37559759616851807, "learning_rate": 0.00010053938834661225, "loss": 1.2223, "step": 38278 }, { "epoch": 0.4974181465557149, "grad_norm": 0.346578449010849, "learning_rate": 0.00010053678888470086, "loss": 1.3298, "step": 38279 }, { "epoch": 0.49743114109963077, "grad_norm": 0.3738930821418762, "learning_rate": 0.00010053418942278949, "loss": 1.282, "step": 38280 }, { "epoch": 0.49744413564354667, "grad_norm": 0.4037734866142273, "learning_rate": 0.0001005315899608781, "loss": 1.341, "step": 38281 }, { "epoch": 0.4974571301874625, "grad_norm": 0.4619690179824829, "learning_rate": 0.00010052899049896671, "loss": 1.6006, "step": 38282 }, { "epoch": 0.4974701247313784, "grad_norm": 0.3990549147129059, "learning_rate": 0.00010052639103705535, "loss": 1.4708, "step": 38283 }, { "epoch": 0.49748311927529426, "grad_norm": 0.3018411099910736, "learning_rate": 0.00010052379157514396, "loss": 1.1721, "step": 38284 }, { "epoch": 0.49749611381921016, "grad_norm": 0.42095980048179626, "learning_rate": 0.00010052119211323257, "loss": 1.3796, "step": 38285 }, { "epoch": 0.497509108363126, "grad_norm": 0.4593113362789154, "learning_rate": 0.00010051859265132117, "loss": 1.37, "step": 38286 }, { "epoch": 0.4975221029070419, "grad_norm": 0.4668925404548645, "learning_rate": 0.0001005159931894098, "loss": 1.5363, "step": 38287 }, { "epoch": 0.49753509745095775, "grad_norm": 0.3969510793685913, "learning_rate": 0.00010051339372749842, "loss": 1.5789, "step": 38288 }, { "epoch": 0.49754809199487365, "grad_norm": 0.44508442282676697, "learning_rate": 0.00010051079426558703, "loss": 1.3749, "step": 38289 }, { "epoch": 0.4975610865387895, "grad_norm": 0.4621100127696991, "learning_rate": 0.00010050819480367564, "loss": 1.501, "step": 38290 }, { "epoch": 0.4975740810827054, "grad_norm": 0.5140964388847351, "learning_rate": 0.00010050559534176426, "loss": 1.4981, "step": 38291 }, { "epoch": 0.49758707562662124, "grad_norm": 0.47806936502456665, "learning_rate": 0.00010050299587985287, "loss": 1.3719, "step": 38292 }, { "epoch": 0.49760007017053715, "grad_norm": 0.43101707100868225, "learning_rate": 0.00010050039641794149, "loss": 1.3017, "step": 38293 }, { "epoch": 0.49761306471445305, "grad_norm": 0.3664492666721344, "learning_rate": 0.0001004977969560301, "loss": 1.4411, "step": 38294 }, { "epoch": 0.4976260592583689, "grad_norm": 0.40567442774772644, "learning_rate": 0.00010049519749411873, "loss": 1.4569, "step": 38295 }, { "epoch": 0.4976390538022848, "grad_norm": 0.36975419521331787, "learning_rate": 0.00010049259803220735, "loss": 1.431, "step": 38296 }, { "epoch": 0.49765204834620064, "grad_norm": 0.350332111120224, "learning_rate": 0.00010048999857029596, "loss": 1.3976, "step": 38297 }, { "epoch": 0.49766504289011654, "grad_norm": 0.5274938344955444, "learning_rate": 0.00010048739910838457, "loss": 1.4596, "step": 38298 }, { "epoch": 0.4976780374340324, "grad_norm": 0.4731733202934265, "learning_rate": 0.00010048479964647319, "loss": 1.3106, "step": 38299 }, { "epoch": 0.4976910319779483, "grad_norm": 0.378708153963089, "learning_rate": 0.0001004822001845618, "loss": 1.319, "step": 38300 }, { "epoch": 0.49770402652186413, "grad_norm": 0.3066316545009613, "learning_rate": 0.00010047960072265041, "loss": 1.2844, "step": 38301 }, { "epoch": 0.49771702106578003, "grad_norm": 0.38162386417388916, "learning_rate": 0.00010047700126073902, "loss": 1.5328, "step": 38302 }, { "epoch": 0.4977300156096959, "grad_norm": 0.41711825132369995, "learning_rate": 0.00010047440179882765, "loss": 1.5256, "step": 38303 }, { "epoch": 0.4977430101536118, "grad_norm": 0.4321335554122925, "learning_rate": 0.00010047180233691626, "loss": 1.3369, "step": 38304 }, { "epoch": 0.4977560046975276, "grad_norm": 0.38317978382110596, "learning_rate": 0.00010046920287500487, "loss": 1.1886, "step": 38305 }, { "epoch": 0.4977689992414435, "grad_norm": 0.43253058195114136, "learning_rate": 0.00010046660341309348, "loss": 1.3418, "step": 38306 }, { "epoch": 0.49778199378535937, "grad_norm": 0.3355443775653839, "learning_rate": 0.00010046400395118212, "loss": 1.3033, "step": 38307 }, { "epoch": 0.49779498832927527, "grad_norm": 0.34117597341537476, "learning_rate": 0.00010046140448927073, "loss": 1.2317, "step": 38308 }, { "epoch": 0.4978079828731911, "grad_norm": 0.3505120575428009, "learning_rate": 0.00010045880502735934, "loss": 1.5223, "step": 38309 }, { "epoch": 0.497820977417107, "grad_norm": 0.4736119508743286, "learning_rate": 0.00010045620556544795, "loss": 1.372, "step": 38310 }, { "epoch": 0.49783397196102286, "grad_norm": 0.35333389043807983, "learning_rate": 0.00010045360610353658, "loss": 1.3972, "step": 38311 }, { "epoch": 0.49784696650493876, "grad_norm": 0.3708445727825165, "learning_rate": 0.00010045100664162519, "loss": 1.2807, "step": 38312 }, { "epoch": 0.4978599610488546, "grad_norm": 0.39415881037712097, "learning_rate": 0.0001004484071797138, "loss": 1.4524, "step": 38313 }, { "epoch": 0.4978729555927705, "grad_norm": 0.3399812579154968, "learning_rate": 0.00010044580771780241, "loss": 1.1114, "step": 38314 }, { "epoch": 0.49788595013668635, "grad_norm": 0.38117313385009766, "learning_rate": 0.00010044320825589105, "loss": 1.3756, "step": 38315 }, { "epoch": 0.49789894468060225, "grad_norm": 0.3988131582736969, "learning_rate": 0.00010044060879397965, "loss": 1.4865, "step": 38316 }, { "epoch": 0.4979119392245181, "grad_norm": 0.4566514492034912, "learning_rate": 0.00010043800933206826, "loss": 1.4258, "step": 38317 }, { "epoch": 0.497924933768434, "grad_norm": 0.31859347224235535, "learning_rate": 0.00010043540987015687, "loss": 1.4005, "step": 38318 }, { "epoch": 0.49793792831234984, "grad_norm": 0.41094642877578735, "learning_rate": 0.0001004328104082455, "loss": 1.2045, "step": 38319 }, { "epoch": 0.49795092285626574, "grad_norm": 0.2256426066160202, "learning_rate": 0.00010043021094633412, "loss": 1.1688, "step": 38320 }, { "epoch": 0.4979639174001816, "grad_norm": 0.4185001254081726, "learning_rate": 0.00010042761148442273, "loss": 1.2655, "step": 38321 }, { "epoch": 0.4979769119440975, "grad_norm": 0.38996875286102295, "learning_rate": 0.00010042501202251135, "loss": 1.4205, "step": 38322 }, { "epoch": 0.49798990648801333, "grad_norm": 0.3991868197917938, "learning_rate": 0.00010042241256059996, "loss": 1.1118, "step": 38323 }, { "epoch": 0.49800290103192923, "grad_norm": 0.3766166865825653, "learning_rate": 0.00010041981309868857, "loss": 1.3052, "step": 38324 }, { "epoch": 0.4980158955758451, "grad_norm": 0.447470486164093, "learning_rate": 0.00010041721363677718, "loss": 1.2146, "step": 38325 }, { "epoch": 0.498028890119761, "grad_norm": 0.3612557351589203, "learning_rate": 0.00010041461417486582, "loss": 1.3292, "step": 38326 }, { "epoch": 0.4980418846636768, "grad_norm": 0.43454593420028687, "learning_rate": 0.00010041201471295443, "loss": 1.3403, "step": 38327 }, { "epoch": 0.4980548792075927, "grad_norm": 0.3371090888977051, "learning_rate": 0.00010040941525104303, "loss": 1.305, "step": 38328 }, { "epoch": 0.49806787375150857, "grad_norm": 0.3942055404186249, "learning_rate": 0.00010040681578913164, "loss": 1.3997, "step": 38329 }, { "epoch": 0.4980808682954245, "grad_norm": 0.44579729437828064, "learning_rate": 0.00010040421632722028, "loss": 1.5097, "step": 38330 }, { "epoch": 0.4980938628393403, "grad_norm": 0.42233356833457947, "learning_rate": 0.00010040161686530889, "loss": 1.315, "step": 38331 }, { "epoch": 0.4981068573832562, "grad_norm": 0.3737773001194, "learning_rate": 0.0001003990174033975, "loss": 1.5219, "step": 38332 }, { "epoch": 0.49811985192717206, "grad_norm": 0.37200042605400085, "learning_rate": 0.00010039641794148611, "loss": 1.4068, "step": 38333 }, { "epoch": 0.49813284647108796, "grad_norm": 0.4165978729724884, "learning_rate": 0.00010039381847957474, "loss": 1.344, "step": 38334 }, { "epoch": 0.4981458410150038, "grad_norm": 0.40556177496910095, "learning_rate": 0.00010039121901766335, "loss": 1.5079, "step": 38335 }, { "epoch": 0.4981588355589197, "grad_norm": 0.38753145933151245, "learning_rate": 0.00010038861955575196, "loss": 1.321, "step": 38336 }, { "epoch": 0.49817183010283556, "grad_norm": 0.360279381275177, "learning_rate": 0.00010038602009384057, "loss": 1.3479, "step": 38337 }, { "epoch": 0.49818482464675146, "grad_norm": 0.414491206407547, "learning_rate": 0.00010038342063192921, "loss": 1.365, "step": 38338 }, { "epoch": 0.4981978191906673, "grad_norm": 0.4049168825149536, "learning_rate": 0.00010038082117001782, "loss": 1.2997, "step": 38339 }, { "epoch": 0.4982108137345832, "grad_norm": 0.37992316484451294, "learning_rate": 0.00010037822170810643, "loss": 1.3229, "step": 38340 }, { "epoch": 0.49822380827849905, "grad_norm": 0.4375469386577606, "learning_rate": 0.00010037562224619503, "loss": 1.4972, "step": 38341 }, { "epoch": 0.49823680282241495, "grad_norm": 0.36174288392066956, "learning_rate": 0.00010037302278428367, "loss": 1.4114, "step": 38342 }, { "epoch": 0.4982497973663308, "grad_norm": 0.34159186482429504, "learning_rate": 0.00010037042332237228, "loss": 1.2551, "step": 38343 }, { "epoch": 0.4982627919102467, "grad_norm": 0.34516406059265137, "learning_rate": 0.00010036782386046089, "loss": 1.0434, "step": 38344 }, { "epoch": 0.49827578645416254, "grad_norm": 0.39406538009643555, "learning_rate": 0.0001003652243985495, "loss": 1.4022, "step": 38345 }, { "epoch": 0.49828878099807844, "grad_norm": 0.402893990278244, "learning_rate": 0.00010036262493663812, "loss": 1.3157, "step": 38346 }, { "epoch": 0.4983017755419943, "grad_norm": 0.41723233461380005, "learning_rate": 0.00010036002547472673, "loss": 1.3904, "step": 38347 }, { "epoch": 0.4983147700859102, "grad_norm": 0.4517935514450073, "learning_rate": 0.00010035742601281534, "loss": 1.4417, "step": 38348 }, { "epoch": 0.49832776462982603, "grad_norm": 0.46642187237739563, "learning_rate": 0.00010035482655090396, "loss": 1.5132, "step": 38349 }, { "epoch": 0.49834075917374193, "grad_norm": 0.3711899220943451, "learning_rate": 0.0001003522270889926, "loss": 1.2458, "step": 38350 }, { "epoch": 0.4983537537176578, "grad_norm": 0.4891204237937927, "learning_rate": 0.0001003496276270812, "loss": 1.5312, "step": 38351 }, { "epoch": 0.4983667482615737, "grad_norm": 0.33175399899482727, "learning_rate": 0.00010034702816516982, "loss": 1.3662, "step": 38352 }, { "epoch": 0.4983797428054895, "grad_norm": 0.37135010957717896, "learning_rate": 0.00010034442870325843, "loss": 1.3684, "step": 38353 }, { "epoch": 0.4983927373494054, "grad_norm": 0.32203182578086853, "learning_rate": 0.00010034182924134705, "loss": 1.5153, "step": 38354 }, { "epoch": 0.49840573189332127, "grad_norm": 0.3203030526638031, "learning_rate": 0.00010033922977943566, "loss": 1.2636, "step": 38355 }, { "epoch": 0.49841872643723717, "grad_norm": 0.34687569737434387, "learning_rate": 0.00010033663031752427, "loss": 1.3934, "step": 38356 }, { "epoch": 0.498431720981153, "grad_norm": 0.34121039509773254, "learning_rate": 0.00010033403085561291, "loss": 1.4566, "step": 38357 }, { "epoch": 0.4984447155250689, "grad_norm": 0.44214165210723877, "learning_rate": 0.00010033143139370151, "loss": 1.452, "step": 38358 }, { "epoch": 0.49845771006898476, "grad_norm": 0.39845171570777893, "learning_rate": 0.00010032883193179012, "loss": 1.331, "step": 38359 }, { "epoch": 0.49847070461290066, "grad_norm": 0.4112400412559509, "learning_rate": 0.00010032623246987873, "loss": 1.6181, "step": 38360 }, { "epoch": 0.4984836991568165, "grad_norm": 0.36326760053634644, "learning_rate": 0.00010032363300796737, "loss": 1.4904, "step": 38361 }, { "epoch": 0.4984966937007324, "grad_norm": 0.4529985189437866, "learning_rate": 0.00010032103354605598, "loss": 1.4531, "step": 38362 }, { "epoch": 0.49850968824464825, "grad_norm": 0.3607131540775299, "learning_rate": 0.00010031843408414459, "loss": 1.17, "step": 38363 }, { "epoch": 0.49852268278856415, "grad_norm": 0.4582368731498718, "learning_rate": 0.0001003158346222332, "loss": 1.4177, "step": 38364 }, { "epoch": 0.49853567733248, "grad_norm": 0.36332833766937256, "learning_rate": 0.00010031323516032183, "loss": 1.4414, "step": 38365 }, { "epoch": 0.4985486718763959, "grad_norm": 0.38659659028053284, "learning_rate": 0.00010031063569841044, "loss": 1.2598, "step": 38366 }, { "epoch": 0.49856166642031174, "grad_norm": 0.502297580242157, "learning_rate": 0.00010030803623649905, "loss": 1.567, "step": 38367 }, { "epoch": 0.49857466096422765, "grad_norm": 0.37501534819602966, "learning_rate": 0.00010030543677458766, "loss": 1.205, "step": 38368 }, { "epoch": 0.49858765550814355, "grad_norm": 0.39188697934150696, "learning_rate": 0.0001003028373126763, "loss": 1.3353, "step": 38369 }, { "epoch": 0.4986006500520594, "grad_norm": 0.4263515770435333, "learning_rate": 0.0001003002378507649, "loss": 1.4962, "step": 38370 }, { "epoch": 0.4986136445959753, "grad_norm": 0.4459836184978485, "learning_rate": 0.0001002976383888535, "loss": 1.483, "step": 38371 }, { "epoch": 0.49862663913989114, "grad_norm": 0.41951653361320496, "learning_rate": 0.00010029503892694212, "loss": 1.4803, "step": 38372 }, { "epoch": 0.49863963368380704, "grad_norm": 0.5109822750091553, "learning_rate": 0.00010029243946503075, "loss": 1.4027, "step": 38373 }, { "epoch": 0.4986526282277229, "grad_norm": 0.405787855386734, "learning_rate": 0.00010028984000311936, "loss": 1.2191, "step": 38374 }, { "epoch": 0.4986656227716388, "grad_norm": 0.4110288918018341, "learning_rate": 0.00010028724054120798, "loss": 1.1934, "step": 38375 }, { "epoch": 0.49867861731555463, "grad_norm": 0.42943495512008667, "learning_rate": 0.00010028464107929659, "loss": 1.4349, "step": 38376 }, { "epoch": 0.49869161185947053, "grad_norm": 0.37985706329345703, "learning_rate": 0.00010028204161738521, "loss": 1.2487, "step": 38377 }, { "epoch": 0.4987046064033864, "grad_norm": 0.480416476726532, "learning_rate": 0.00010027944215547382, "loss": 1.4297, "step": 38378 }, { "epoch": 0.4987176009473023, "grad_norm": 0.3507551848888397, "learning_rate": 0.00010027684269356243, "loss": 1.3726, "step": 38379 }, { "epoch": 0.4987305954912181, "grad_norm": 0.3495630919933319, "learning_rate": 0.00010027424323165104, "loss": 1.4046, "step": 38380 }, { "epoch": 0.498743590035134, "grad_norm": 0.41064468026161194, "learning_rate": 0.00010027164376973968, "loss": 1.3079, "step": 38381 }, { "epoch": 0.49875658457904987, "grad_norm": 0.37248387932777405, "learning_rate": 0.00010026904430782829, "loss": 1.583, "step": 38382 }, { "epoch": 0.49876957912296577, "grad_norm": 0.5132301449775696, "learning_rate": 0.00010026644484591689, "loss": 1.3397, "step": 38383 }, { "epoch": 0.4987825736668816, "grad_norm": 0.43373948335647583, "learning_rate": 0.0001002638453840055, "loss": 1.5919, "step": 38384 }, { "epoch": 0.4987955682107975, "grad_norm": 0.42042919993400574, "learning_rate": 0.00010026124592209414, "loss": 1.2714, "step": 38385 }, { "epoch": 0.49880856275471336, "grad_norm": 0.47053229808807373, "learning_rate": 0.00010025864646018275, "loss": 1.5696, "step": 38386 }, { "epoch": 0.49882155729862926, "grad_norm": 0.3646489381790161, "learning_rate": 0.00010025604699827136, "loss": 1.4927, "step": 38387 }, { "epoch": 0.4988345518425451, "grad_norm": 0.47750934958457947, "learning_rate": 0.00010025344753635997, "loss": 1.3547, "step": 38388 }, { "epoch": 0.498847546386461, "grad_norm": 0.38673293590545654, "learning_rate": 0.0001002508480744486, "loss": 1.4759, "step": 38389 }, { "epoch": 0.49886054093037685, "grad_norm": 0.37473753094673157, "learning_rate": 0.00010024824861253721, "loss": 1.5265, "step": 38390 }, { "epoch": 0.49887353547429275, "grad_norm": 0.4663291871547699, "learning_rate": 0.00010024564915062582, "loss": 1.3869, "step": 38391 }, { "epoch": 0.4988865300182086, "grad_norm": 0.45083218812942505, "learning_rate": 0.00010024304968871443, "loss": 1.5214, "step": 38392 }, { "epoch": 0.4988995245621245, "grad_norm": 0.30985012650489807, "learning_rate": 0.00010024045022680307, "loss": 1.4202, "step": 38393 }, { "epoch": 0.49891251910604034, "grad_norm": 0.2806839048862457, "learning_rate": 0.00010023785076489168, "loss": 1.2232, "step": 38394 }, { "epoch": 0.49892551364995624, "grad_norm": 0.501163125038147, "learning_rate": 0.00010023525130298029, "loss": 1.4695, "step": 38395 }, { "epoch": 0.4989385081938721, "grad_norm": 0.4245503544807434, "learning_rate": 0.00010023265184106891, "loss": 1.3073, "step": 38396 }, { "epoch": 0.498951502737788, "grad_norm": 0.2749496102333069, "learning_rate": 0.00010023005237915752, "loss": 1.3263, "step": 38397 }, { "epoch": 0.49896449728170383, "grad_norm": 0.5117831826210022, "learning_rate": 0.00010022745291724613, "loss": 1.5459, "step": 38398 }, { "epoch": 0.49897749182561973, "grad_norm": 0.3722374439239502, "learning_rate": 0.00010022485345533475, "loss": 1.2086, "step": 38399 }, { "epoch": 0.4989904863695356, "grad_norm": 0.2619899809360504, "learning_rate": 0.00010022225399342337, "loss": 1.3916, "step": 38400 }, { "epoch": 0.4990034809134515, "grad_norm": 0.4068641662597656, "learning_rate": 0.00010021965453151198, "loss": 1.5562, "step": 38401 }, { "epoch": 0.4990164754573673, "grad_norm": 0.44853901863098145, "learning_rate": 0.00010021705506960059, "loss": 1.3538, "step": 38402 }, { "epoch": 0.4990294700012832, "grad_norm": 0.3799043893814087, "learning_rate": 0.0001002144556076892, "loss": 1.5034, "step": 38403 }, { "epoch": 0.49904246454519907, "grad_norm": 0.3458322584629059, "learning_rate": 0.00010021185614577784, "loss": 1.3797, "step": 38404 }, { "epoch": 0.49905545908911497, "grad_norm": 0.4032128155231476, "learning_rate": 0.00010020925668386645, "loss": 1.4429, "step": 38405 }, { "epoch": 0.4990684536330308, "grad_norm": 0.4673705995082855, "learning_rate": 0.00010020665722195506, "loss": 1.2205, "step": 38406 }, { "epoch": 0.4990814481769467, "grad_norm": 0.40213948488235474, "learning_rate": 0.00010020405776004367, "loss": 1.357, "step": 38407 }, { "epoch": 0.49909444272086256, "grad_norm": 0.27652838826179504, "learning_rate": 0.0001002014582981323, "loss": 1.1771, "step": 38408 }, { "epoch": 0.49910743726477846, "grad_norm": 0.3292090892791748, "learning_rate": 0.00010019885883622091, "loss": 1.2742, "step": 38409 }, { "epoch": 0.4991204318086943, "grad_norm": 0.3631819486618042, "learning_rate": 0.00010019625937430952, "loss": 1.3741, "step": 38410 }, { "epoch": 0.4991334263526102, "grad_norm": 0.34049108624458313, "learning_rate": 0.00010019365991239813, "loss": 1.3688, "step": 38411 }, { "epoch": 0.49914642089652606, "grad_norm": 0.43600165843963623, "learning_rate": 0.00010019106045048676, "loss": 1.5146, "step": 38412 }, { "epoch": 0.49915941544044196, "grad_norm": 0.4129011332988739, "learning_rate": 0.00010018846098857537, "loss": 1.4571, "step": 38413 }, { "epoch": 0.4991724099843578, "grad_norm": 0.44655948877334595, "learning_rate": 0.00010018586152666398, "loss": 1.3895, "step": 38414 }, { "epoch": 0.4991854045282737, "grad_norm": 0.437338650226593, "learning_rate": 0.00010018326206475259, "loss": 1.3987, "step": 38415 }, { "epoch": 0.49919839907218955, "grad_norm": 0.3843490481376648, "learning_rate": 0.00010018066260284123, "loss": 1.2321, "step": 38416 }, { "epoch": 0.49921139361610545, "grad_norm": 0.437357634305954, "learning_rate": 0.00010017806314092984, "loss": 1.4667, "step": 38417 }, { "epoch": 0.4992243881600213, "grad_norm": 0.41078388690948486, "learning_rate": 0.00010017546367901845, "loss": 1.1576, "step": 38418 }, { "epoch": 0.4992373827039372, "grad_norm": 0.3237869441509247, "learning_rate": 0.00010017286421710706, "loss": 1.3675, "step": 38419 }, { "epoch": 0.49925037724785304, "grad_norm": 0.40829840302467346, "learning_rate": 0.00010017026475519568, "loss": 1.4013, "step": 38420 }, { "epoch": 0.49926337179176894, "grad_norm": 0.4732895493507385, "learning_rate": 0.0001001676652932843, "loss": 1.2678, "step": 38421 }, { "epoch": 0.4992763663356848, "grad_norm": 0.42385509610176086, "learning_rate": 0.0001001650658313729, "loss": 1.4338, "step": 38422 }, { "epoch": 0.4992893608796007, "grad_norm": 0.5055204033851624, "learning_rate": 0.00010016246636946152, "loss": 1.2829, "step": 38423 }, { "epoch": 0.49930235542351653, "grad_norm": 0.4948206841945648, "learning_rate": 0.00010015986690755015, "loss": 1.4836, "step": 38424 }, { "epoch": 0.49931534996743243, "grad_norm": 0.3998386263847351, "learning_rate": 0.00010015726744563875, "loss": 1.4044, "step": 38425 }, { "epoch": 0.4993283445113483, "grad_norm": 0.3959960341453552, "learning_rate": 0.00010015466798372736, "loss": 1.5296, "step": 38426 }, { "epoch": 0.4993413390552642, "grad_norm": 0.40329161286354065, "learning_rate": 0.00010015206852181597, "loss": 1.4067, "step": 38427 }, { "epoch": 0.49935433359918, "grad_norm": 0.42405247688293457, "learning_rate": 0.00010014946905990461, "loss": 1.5782, "step": 38428 }, { "epoch": 0.4993673281430959, "grad_norm": 0.3255196213722229, "learning_rate": 0.00010014686959799322, "loss": 1.3941, "step": 38429 }, { "epoch": 0.49938032268701177, "grad_norm": 0.36559823155403137, "learning_rate": 0.00010014427013608183, "loss": 1.4683, "step": 38430 }, { "epoch": 0.49939331723092767, "grad_norm": 0.4623715281486511, "learning_rate": 0.00010014167067417046, "loss": 1.49, "step": 38431 }, { "epoch": 0.4994063117748435, "grad_norm": 0.41243699193000793, "learning_rate": 0.00010013907121225907, "loss": 1.2236, "step": 38432 }, { "epoch": 0.4994193063187594, "grad_norm": 0.39457741379737854, "learning_rate": 0.00010013647175034768, "loss": 1.3908, "step": 38433 }, { "epoch": 0.49943230086267526, "grad_norm": 0.38218870759010315, "learning_rate": 0.00010013387228843629, "loss": 1.5183, "step": 38434 }, { "epoch": 0.49944529540659116, "grad_norm": 0.47133591771125793, "learning_rate": 0.00010013127282652493, "loss": 1.3844, "step": 38435 }, { "epoch": 0.499458289950507, "grad_norm": 0.3822469711303711, "learning_rate": 0.00010012867336461354, "loss": 1.3654, "step": 38436 }, { "epoch": 0.4994712844944229, "grad_norm": 0.4620053172111511, "learning_rate": 0.00010012607390270215, "loss": 1.3816, "step": 38437 }, { "epoch": 0.49948427903833875, "grad_norm": 0.4246847331523895, "learning_rate": 0.00010012347444079075, "loss": 1.3448, "step": 38438 }, { "epoch": 0.49949727358225465, "grad_norm": 0.40838822722435, "learning_rate": 0.00010012087497887939, "loss": 1.2657, "step": 38439 }, { "epoch": 0.4995102681261705, "grad_norm": 0.48224392533302307, "learning_rate": 0.000100118275516968, "loss": 1.4957, "step": 38440 }, { "epoch": 0.4995232626700864, "grad_norm": 0.4037822186946869, "learning_rate": 0.00010011567605505661, "loss": 1.3534, "step": 38441 }, { "epoch": 0.49953625721400224, "grad_norm": 0.45346084237098694, "learning_rate": 0.00010011307659314522, "loss": 1.5174, "step": 38442 }, { "epoch": 0.49954925175791814, "grad_norm": 0.45121335983276367, "learning_rate": 0.00010011047713123384, "loss": 1.5282, "step": 38443 }, { "epoch": 0.499562246301834, "grad_norm": 0.39843347668647766, "learning_rate": 0.00010010787766932245, "loss": 1.3467, "step": 38444 }, { "epoch": 0.4995752408457499, "grad_norm": 0.3001526892185211, "learning_rate": 0.00010010527820741107, "loss": 1.2267, "step": 38445 }, { "epoch": 0.4995882353896658, "grad_norm": 0.3839610517024994, "learning_rate": 0.00010010267874549968, "loss": 1.3876, "step": 38446 }, { "epoch": 0.49960122993358164, "grad_norm": 0.3017396330833435, "learning_rate": 0.00010010007928358831, "loss": 1.3724, "step": 38447 }, { "epoch": 0.49961422447749754, "grad_norm": 0.33791786432266235, "learning_rate": 0.00010009747982167693, "loss": 1.2568, "step": 38448 }, { "epoch": 0.4996272190214134, "grad_norm": 0.575527012348175, "learning_rate": 0.00010009488035976554, "loss": 1.5191, "step": 38449 }, { "epoch": 0.4996402135653293, "grad_norm": 0.32862597703933716, "learning_rate": 0.00010009228089785413, "loss": 1.4963, "step": 38450 }, { "epoch": 0.49965320810924513, "grad_norm": 0.42390745878219604, "learning_rate": 0.00010008968143594277, "loss": 1.2572, "step": 38451 }, { "epoch": 0.49966620265316103, "grad_norm": 0.38366666436195374, "learning_rate": 0.00010008708197403138, "loss": 1.4559, "step": 38452 }, { "epoch": 0.4996791971970769, "grad_norm": 0.35566309094429016, "learning_rate": 0.00010008448251212, "loss": 1.4391, "step": 38453 }, { "epoch": 0.4996921917409928, "grad_norm": 0.48613864183425903, "learning_rate": 0.0001000818830502086, "loss": 1.4462, "step": 38454 }, { "epoch": 0.4997051862849086, "grad_norm": 0.45702236890792847, "learning_rate": 0.00010007928358829723, "loss": 1.4134, "step": 38455 }, { "epoch": 0.4997181808288245, "grad_norm": 0.4362688362598419, "learning_rate": 0.00010007668412638584, "loss": 1.4077, "step": 38456 }, { "epoch": 0.49973117537274037, "grad_norm": 0.40399742126464844, "learning_rate": 0.00010007408466447445, "loss": 1.3296, "step": 38457 }, { "epoch": 0.49974416991665627, "grad_norm": 0.36409419775009155, "learning_rate": 0.00010007148520256306, "loss": 1.2316, "step": 38458 }, { "epoch": 0.4997571644605721, "grad_norm": 0.3736996054649353, "learning_rate": 0.0001000688857406517, "loss": 1.2609, "step": 38459 }, { "epoch": 0.499770159004488, "grad_norm": 0.5145350694656372, "learning_rate": 0.00010006628627874031, "loss": 1.4305, "step": 38460 }, { "epoch": 0.49978315354840386, "grad_norm": 0.5018911361694336, "learning_rate": 0.00010006368681682892, "loss": 1.3918, "step": 38461 }, { "epoch": 0.49979614809231976, "grad_norm": 0.38357535004615784, "learning_rate": 0.00010006108735491753, "loss": 1.2867, "step": 38462 }, { "epoch": 0.4998091426362356, "grad_norm": 0.47817549109458923, "learning_rate": 0.00010005848789300616, "loss": 1.3797, "step": 38463 }, { "epoch": 0.4998221371801515, "grad_norm": 0.5503483414649963, "learning_rate": 0.00010005588843109477, "loss": 1.546, "step": 38464 }, { "epoch": 0.49983513172406735, "grad_norm": 0.46716955304145813, "learning_rate": 0.00010005328896918338, "loss": 1.4489, "step": 38465 }, { "epoch": 0.49984812626798325, "grad_norm": 0.3614872694015503, "learning_rate": 0.00010005068950727199, "loss": 1.1966, "step": 38466 }, { "epoch": 0.4998611208118991, "grad_norm": 0.36548638343811035, "learning_rate": 0.00010004809004536061, "loss": 1.3136, "step": 38467 }, { "epoch": 0.499874115355815, "grad_norm": 0.3795667290687561, "learning_rate": 0.00010004549058344923, "loss": 1.1901, "step": 38468 }, { "epoch": 0.49988710989973084, "grad_norm": 0.4318860173225403, "learning_rate": 0.00010004289112153784, "loss": 1.2734, "step": 38469 }, { "epoch": 0.49990010444364674, "grad_norm": 0.3916262686252594, "learning_rate": 0.00010004029165962647, "loss": 1.383, "step": 38470 }, { "epoch": 0.4999130989875626, "grad_norm": 0.45661166310310364, "learning_rate": 0.00010003769219771509, "loss": 1.4667, "step": 38471 }, { "epoch": 0.4999260935314785, "grad_norm": 0.3498865067958832, "learning_rate": 0.0001000350927358037, "loss": 1.4206, "step": 38472 }, { "epoch": 0.49993908807539433, "grad_norm": 0.41647881269454956, "learning_rate": 0.00010003249327389231, "loss": 1.072, "step": 38473 }, { "epoch": 0.49995208261931023, "grad_norm": 0.3985632658004761, "learning_rate": 0.00010002989381198093, "loss": 1.4362, "step": 38474 }, { "epoch": 0.4999650771632261, "grad_norm": 0.41346773505210876, "learning_rate": 0.00010002729435006954, "loss": 1.3832, "step": 38475 }, { "epoch": 0.499978071707142, "grad_norm": 0.3916938900947571, "learning_rate": 0.00010002469488815815, "loss": 1.3308, "step": 38476 }, { "epoch": 0.4999910662510578, "grad_norm": 0.40697628259658813, "learning_rate": 0.00010002209542624676, "loss": 1.4365, "step": 38477 }, { "epoch": 0.5000040607949737, "grad_norm": 0.39586490392684937, "learning_rate": 0.0001000194959643354, "loss": 1.3116, "step": 38478 }, { "epoch": 0.5000170553388896, "grad_norm": 0.4302930533885956, "learning_rate": 0.00010001689650242401, "loss": 1.2871, "step": 38479 }, { "epoch": 0.5000300498828054, "grad_norm": 0.39699772000312805, "learning_rate": 0.00010001429704051261, "loss": 1.4254, "step": 38480 }, { "epoch": 0.5000430444267213, "grad_norm": 0.33105841279029846, "learning_rate": 0.00010001169757860122, "loss": 1.3135, "step": 38481 }, { "epoch": 0.5000560389706372, "grad_norm": 0.363977313041687, "learning_rate": 0.00010000909811668986, "loss": 1.1808, "step": 38482 }, { "epoch": 0.5000690335145531, "grad_norm": 0.3896322548389435, "learning_rate": 0.00010000649865477847, "loss": 1.2437, "step": 38483 }, { "epoch": 0.5000820280584689, "grad_norm": 0.4553676247596741, "learning_rate": 0.00010000389919286708, "loss": 1.3405, "step": 38484 }, { "epoch": 0.5000950226023848, "grad_norm": 0.31064674258232117, "learning_rate": 0.00010000129973095569, "loss": 1.3983, "step": 38485 }, { "epoch": 0.5001080171463007, "grad_norm": 0.5178108811378479, "learning_rate": 9.99987002690443e-05, "loss": 1.3524, "step": 38486 }, { "epoch": 0.5001210116902166, "grad_norm": 0.4099293351173401, "learning_rate": 9.999610080713293e-05, "loss": 1.4344, "step": 38487 }, { "epoch": 0.5001340062341324, "grad_norm": 0.34738999605178833, "learning_rate": 9.999350134522154e-05, "loss": 1.1937, "step": 38488 }, { "epoch": 0.5001470007780483, "grad_norm": 0.4242170751094818, "learning_rate": 9.999090188331016e-05, "loss": 1.5287, "step": 38489 }, { "epoch": 0.5001599953219642, "grad_norm": 0.36498019099235535, "learning_rate": 9.998830242139877e-05, "loss": 1.3708, "step": 38490 }, { "epoch": 0.5001729898658801, "grad_norm": 0.37078672647476196, "learning_rate": 9.99857029594874e-05, "loss": 1.3509, "step": 38491 }, { "epoch": 0.5001859844097959, "grad_norm": 0.4015561044216156, "learning_rate": 9.9983103497576e-05, "loss": 1.4638, "step": 38492 }, { "epoch": 0.5001989789537118, "grad_norm": 0.39904215931892395, "learning_rate": 9.998050403566462e-05, "loss": 1.1768, "step": 38493 }, { "epoch": 0.5002119734976277, "grad_norm": 0.5054426193237305, "learning_rate": 9.997790457375323e-05, "loss": 1.5331, "step": 38494 }, { "epoch": 0.5002249680415436, "grad_norm": 0.39435285329818726, "learning_rate": 9.997530511184186e-05, "loss": 1.4728, "step": 38495 }, { "epoch": 0.5002379625854594, "grad_norm": 0.3388907015323639, "learning_rate": 9.997270564993047e-05, "loss": 1.4266, "step": 38496 }, { "epoch": 0.5002509571293753, "grad_norm": 0.4165821373462677, "learning_rate": 9.997010618801909e-05, "loss": 1.3252, "step": 38497 }, { "epoch": 0.5002639516732912, "grad_norm": 0.42864224314689636, "learning_rate": 9.99675067261077e-05, "loss": 1.3126, "step": 38498 }, { "epoch": 0.5002769462172071, "grad_norm": 0.3871716558933258, "learning_rate": 9.996490726419631e-05, "loss": 1.3973, "step": 38499 }, { "epoch": 0.5002899407611229, "grad_norm": 0.5376816987991333, "learning_rate": 9.996230780228492e-05, "loss": 1.5091, "step": 38500 }, { "epoch": 0.5003029353050388, "grad_norm": 0.4054618775844574, "learning_rate": 9.995970834037355e-05, "loss": 1.3816, "step": 38501 }, { "epoch": 0.5003159298489547, "grad_norm": 0.42202767729759216, "learning_rate": 9.995710887846216e-05, "loss": 1.4288, "step": 38502 }, { "epoch": 0.5003289243928706, "grad_norm": 0.48954129219055176, "learning_rate": 9.995450941655078e-05, "loss": 1.3647, "step": 38503 }, { "epoch": 0.5003419189367864, "grad_norm": 0.3736002445220947, "learning_rate": 9.99519099546394e-05, "loss": 1.3228, "step": 38504 }, { "epoch": 0.5003549134807023, "grad_norm": 0.4459300935268402, "learning_rate": 9.9949310492728e-05, "loss": 1.3371, "step": 38505 }, { "epoch": 0.5003679080246182, "grad_norm": 0.48064523935317993, "learning_rate": 9.994671103081663e-05, "loss": 1.322, "step": 38506 }, { "epoch": 0.5003809025685341, "grad_norm": 0.39444148540496826, "learning_rate": 9.994411156890524e-05, "loss": 1.4052, "step": 38507 }, { "epoch": 0.5003938971124499, "grad_norm": 0.4147461950778961, "learning_rate": 9.994151210699387e-05, "loss": 1.4575, "step": 38508 }, { "epoch": 0.5004068916563658, "grad_norm": 0.33071663975715637, "learning_rate": 9.993891264508248e-05, "loss": 1.3274, "step": 38509 }, { "epoch": 0.5004198862002817, "grad_norm": 0.38374415040016174, "learning_rate": 9.993631318317109e-05, "loss": 1.3927, "step": 38510 }, { "epoch": 0.5004328807441976, "grad_norm": 0.3648830056190491, "learning_rate": 9.99337137212597e-05, "loss": 1.4289, "step": 38511 }, { "epoch": 0.5004458752881134, "grad_norm": 0.43707484006881714, "learning_rate": 9.993111425934832e-05, "loss": 1.4538, "step": 38512 }, { "epoch": 0.5004588698320293, "grad_norm": 0.4712204933166504, "learning_rate": 9.992851479743693e-05, "loss": 1.312, "step": 38513 }, { "epoch": 0.5004718643759452, "grad_norm": 0.36538490653038025, "learning_rate": 9.992591533552556e-05, "loss": 1.1525, "step": 38514 }, { "epoch": 0.500484858919861, "grad_norm": 0.46205270290374756, "learning_rate": 9.992331587361417e-05, "loss": 1.3015, "step": 38515 }, { "epoch": 0.5004978534637768, "grad_norm": 0.39334115386009216, "learning_rate": 9.992071641170278e-05, "loss": 1.455, "step": 38516 }, { "epoch": 0.5005108480076927, "grad_norm": 0.38330259919166565, "learning_rate": 9.991811694979139e-05, "loss": 1.2619, "step": 38517 }, { "epoch": 0.5005238425516086, "grad_norm": 0.3695862591266632, "learning_rate": 9.991551748788002e-05, "loss": 1.395, "step": 38518 }, { "epoch": 0.5005368370955245, "grad_norm": 0.3695862591266632, "learning_rate": 9.991291802596863e-05, "loss": 1.5547, "step": 38519 }, { "epoch": 0.5005498316394403, "grad_norm": 0.34565216302871704, "learning_rate": 9.991031856405725e-05, "loss": 1.3367, "step": 38520 }, { "epoch": 0.5005628261833562, "grad_norm": 0.3844248950481415, "learning_rate": 9.990771910214586e-05, "loss": 1.3098, "step": 38521 }, { "epoch": 0.5005758207272721, "grad_norm": 0.4144669771194458, "learning_rate": 9.990511964023447e-05, "loss": 1.6605, "step": 38522 }, { "epoch": 0.500588815271188, "grad_norm": 0.43518581986427307, "learning_rate": 9.990252017832308e-05, "loss": 1.4268, "step": 38523 }, { "epoch": 0.5006018098151039, "grad_norm": 0.4018695652484894, "learning_rate": 9.989992071641171e-05, "loss": 1.4326, "step": 38524 }, { "epoch": 0.5006148043590197, "grad_norm": 0.4548830986022949, "learning_rate": 9.989732125450032e-05, "loss": 1.4517, "step": 38525 }, { "epoch": 0.5006277989029356, "grad_norm": 0.44514504075050354, "learning_rate": 9.989472179258894e-05, "loss": 1.8069, "step": 38526 }, { "epoch": 0.5006407934468515, "grad_norm": 0.3615792393684387, "learning_rate": 9.989212233067755e-05, "loss": 1.301, "step": 38527 }, { "epoch": 0.5006537879907674, "grad_norm": 0.3245408535003662, "learning_rate": 9.988952286876617e-05, "loss": 1.3064, "step": 38528 }, { "epoch": 0.5006667825346832, "grad_norm": 0.4558870494365692, "learning_rate": 9.988692340685478e-05, "loss": 1.4604, "step": 38529 }, { "epoch": 0.5006797770785991, "grad_norm": 0.46221646666526794, "learning_rate": 9.98843239449434e-05, "loss": 1.5992, "step": 38530 }, { "epoch": 0.500692771622515, "grad_norm": 0.38172295689582825, "learning_rate": 9.988172448303201e-05, "loss": 1.3839, "step": 38531 }, { "epoch": 0.5007057661664309, "grad_norm": 0.4264618158340454, "learning_rate": 9.987912502112064e-05, "loss": 1.3797, "step": 38532 }, { "epoch": 0.5007187607103467, "grad_norm": 0.32991787791252136, "learning_rate": 9.987652555920925e-05, "loss": 1.2988, "step": 38533 }, { "epoch": 0.5007317552542626, "grad_norm": 0.38463470339775085, "learning_rate": 9.987392609729786e-05, "loss": 1.2009, "step": 38534 }, { "epoch": 0.5007447497981785, "grad_norm": 0.430191308259964, "learning_rate": 9.987132663538647e-05, "loss": 1.3779, "step": 38535 }, { "epoch": 0.5007577443420944, "grad_norm": 0.4753797948360443, "learning_rate": 9.98687271734751e-05, "loss": 1.3091, "step": 38536 }, { "epoch": 0.5007707388860102, "grad_norm": 0.4227485954761505, "learning_rate": 9.98661277115637e-05, "loss": 1.6048, "step": 38537 }, { "epoch": 0.5007837334299261, "grad_norm": 0.5142156481742859, "learning_rate": 9.986352824965233e-05, "loss": 1.4795, "step": 38538 }, { "epoch": 0.500796727973842, "grad_norm": 0.44958633184432983, "learning_rate": 9.986092878774094e-05, "loss": 1.471, "step": 38539 }, { "epoch": 0.5008097225177579, "grad_norm": 0.39768242835998535, "learning_rate": 9.985832932582956e-05, "loss": 1.2665, "step": 38540 }, { "epoch": 0.5008227170616737, "grad_norm": 0.4176120162010193, "learning_rate": 9.985572986391816e-05, "loss": 1.5671, "step": 38541 }, { "epoch": 0.5008357116055896, "grad_norm": 0.5168113112449646, "learning_rate": 9.985313040200679e-05, "loss": 1.2514, "step": 38542 }, { "epoch": 0.5008487061495055, "grad_norm": 0.46453577280044556, "learning_rate": 9.985053094009541e-05, "loss": 1.2353, "step": 38543 }, { "epoch": 0.5008617006934214, "grad_norm": 0.4418941140174866, "learning_rate": 9.984793147818402e-05, "loss": 1.4894, "step": 38544 }, { "epoch": 0.5008746952373372, "grad_norm": 0.40218988060951233, "learning_rate": 9.984533201627265e-05, "loss": 1.3141, "step": 38545 }, { "epoch": 0.5008876897812531, "grad_norm": 0.4375206232070923, "learning_rate": 9.984273255436126e-05, "loss": 1.1583, "step": 38546 }, { "epoch": 0.500900684325169, "grad_norm": 0.45491257309913635, "learning_rate": 9.984013309244987e-05, "loss": 1.4359, "step": 38547 }, { "epoch": 0.5009136788690849, "grad_norm": 0.4308406412601471, "learning_rate": 9.983753363053848e-05, "loss": 1.5101, "step": 38548 }, { "epoch": 0.5009266734130007, "grad_norm": 0.25547799468040466, "learning_rate": 9.98349341686271e-05, "loss": 1.2887, "step": 38549 }, { "epoch": 0.5009396679569166, "grad_norm": 0.3874829411506653, "learning_rate": 9.983233470671571e-05, "loss": 1.2919, "step": 38550 }, { "epoch": 0.5009526625008325, "grad_norm": 0.47302722930908203, "learning_rate": 9.982973524480434e-05, "loss": 1.2919, "step": 38551 }, { "epoch": 0.5009656570447484, "grad_norm": 0.2810894548892975, "learning_rate": 9.982713578289295e-05, "loss": 1.3824, "step": 38552 }, { "epoch": 0.5009786515886642, "grad_norm": 0.43941783905029297, "learning_rate": 9.982453632098156e-05, "loss": 1.2925, "step": 38553 }, { "epoch": 0.5009916461325801, "grad_norm": 0.40376999974250793, "learning_rate": 9.982193685907017e-05, "loss": 1.5715, "step": 38554 }, { "epoch": 0.501004640676496, "grad_norm": 0.3744410574436188, "learning_rate": 9.98193373971588e-05, "loss": 1.3405, "step": 38555 }, { "epoch": 0.5010176352204119, "grad_norm": 0.44319969415664673, "learning_rate": 9.981673793524741e-05, "loss": 1.4598, "step": 38556 }, { "epoch": 0.5010306297643277, "grad_norm": 0.3882030248641968, "learning_rate": 9.981413847333603e-05, "loss": 1.1661, "step": 38557 }, { "epoch": 0.5010436243082436, "grad_norm": 0.37690043449401855, "learning_rate": 9.981153901142464e-05, "loss": 1.5137, "step": 38558 }, { "epoch": 0.5010566188521595, "grad_norm": 0.38836580514907837, "learning_rate": 9.980893954951325e-05, "loss": 1.3738, "step": 38559 }, { "epoch": 0.5010696133960754, "grad_norm": 0.41097086668014526, "learning_rate": 9.980634008760186e-05, "loss": 1.5838, "step": 38560 }, { "epoch": 0.5010826079399912, "grad_norm": 0.4168701469898224, "learning_rate": 9.980374062569049e-05, "loss": 1.3225, "step": 38561 }, { "epoch": 0.501095602483907, "grad_norm": 0.3650151789188385, "learning_rate": 9.98011411637791e-05, "loss": 1.7385, "step": 38562 }, { "epoch": 0.501108597027823, "grad_norm": 0.4143138825893402, "learning_rate": 9.979854170186772e-05, "loss": 1.3563, "step": 38563 }, { "epoch": 0.5011215915717389, "grad_norm": 0.34220296144485474, "learning_rate": 9.979594223995634e-05, "loss": 1.484, "step": 38564 }, { "epoch": 0.5011345861156546, "grad_norm": 0.38319987058639526, "learning_rate": 9.979334277804495e-05, "loss": 1.3797, "step": 38565 }, { "epoch": 0.5011475806595705, "grad_norm": 0.3852391839027405, "learning_rate": 9.979074331613356e-05, "loss": 1.2817, "step": 38566 }, { "epoch": 0.5011605752034864, "grad_norm": 0.39859554171562195, "learning_rate": 9.978814385422218e-05, "loss": 1.3184, "step": 38567 }, { "epoch": 0.5011735697474023, "grad_norm": 0.5399718284606934, "learning_rate": 9.978554439231079e-05, "loss": 1.2967, "step": 38568 }, { "epoch": 0.5011865642913181, "grad_norm": 0.45720845460891724, "learning_rate": 9.978294493039942e-05, "loss": 1.4042, "step": 38569 }, { "epoch": 0.501199558835234, "grad_norm": 0.42943382263183594, "learning_rate": 9.978034546848803e-05, "loss": 1.4733, "step": 38570 }, { "epoch": 0.5012125533791499, "grad_norm": 0.3036157786846161, "learning_rate": 9.977774600657664e-05, "loss": 1.202, "step": 38571 }, { "epoch": 0.5012255479230658, "grad_norm": 0.434392511844635, "learning_rate": 9.977514654466525e-05, "loss": 1.429, "step": 38572 }, { "epoch": 0.5012385424669816, "grad_norm": 0.42071211338043213, "learning_rate": 9.977254708275387e-05, "loss": 1.3919, "step": 38573 }, { "epoch": 0.5012515370108975, "grad_norm": 0.34020864963531494, "learning_rate": 9.976994762084249e-05, "loss": 1.4991, "step": 38574 }, { "epoch": 0.5012645315548134, "grad_norm": 0.36502739787101746, "learning_rate": 9.976734815893111e-05, "loss": 1.4106, "step": 38575 }, { "epoch": 0.5012775260987293, "grad_norm": 0.28484347462654114, "learning_rate": 9.976474869701972e-05, "loss": 1.3428, "step": 38576 }, { "epoch": 0.5012905206426451, "grad_norm": 0.448452353477478, "learning_rate": 9.976214923510833e-05, "loss": 1.2886, "step": 38577 }, { "epoch": 0.501303515186561, "grad_norm": 0.4120859205722809, "learning_rate": 9.975954977319694e-05, "loss": 1.2847, "step": 38578 }, { "epoch": 0.5013165097304769, "grad_norm": 0.3343343436717987, "learning_rate": 9.975695031128557e-05, "loss": 1.2084, "step": 38579 }, { "epoch": 0.5013295042743928, "grad_norm": 0.344948410987854, "learning_rate": 9.975435084937419e-05, "loss": 1.2986, "step": 38580 }, { "epoch": 0.5013424988183086, "grad_norm": 0.42767760157585144, "learning_rate": 9.97517513874628e-05, "loss": 1.3068, "step": 38581 }, { "epoch": 0.5013554933622245, "grad_norm": 0.376751571893692, "learning_rate": 9.974915192555143e-05, "loss": 1.3869, "step": 38582 }, { "epoch": 0.5013684879061404, "grad_norm": 0.44073906540870667, "learning_rate": 9.974655246364002e-05, "loss": 1.3422, "step": 38583 }, { "epoch": 0.5013814824500563, "grad_norm": 0.34657928347587585, "learning_rate": 9.974395300172865e-05, "loss": 1.5945, "step": 38584 }, { "epoch": 0.5013944769939721, "grad_norm": 0.4015274941921234, "learning_rate": 9.974135353981726e-05, "loss": 1.3947, "step": 38585 }, { "epoch": 0.501407471537888, "grad_norm": 0.42863163352012634, "learning_rate": 9.973875407790588e-05, "loss": 1.4117, "step": 38586 }, { "epoch": 0.5014204660818039, "grad_norm": 0.436213880777359, "learning_rate": 9.97361546159945e-05, "loss": 1.4672, "step": 38587 }, { "epoch": 0.5014334606257198, "grad_norm": 0.382867693901062, "learning_rate": 9.973355515408312e-05, "loss": 1.3857, "step": 38588 }, { "epoch": 0.5014464551696356, "grad_norm": 0.4462325870990753, "learning_rate": 9.973095569217172e-05, "loss": 1.5183, "step": 38589 }, { "epoch": 0.5014594497135515, "grad_norm": 0.34867414832115173, "learning_rate": 9.972835623026034e-05, "loss": 1.3722, "step": 38590 }, { "epoch": 0.5014724442574674, "grad_norm": 0.4105249047279358, "learning_rate": 9.972575676834895e-05, "loss": 1.2695, "step": 38591 }, { "epoch": 0.5014854388013833, "grad_norm": 0.43936577439308167, "learning_rate": 9.972315730643758e-05, "loss": 1.3983, "step": 38592 }, { "epoch": 0.5014984333452991, "grad_norm": 0.4319203197956085, "learning_rate": 9.972055784452619e-05, "loss": 1.5116, "step": 38593 }, { "epoch": 0.501511427889215, "grad_norm": 0.433403342962265, "learning_rate": 9.971795838261481e-05, "loss": 1.4435, "step": 38594 }, { "epoch": 0.5015244224331309, "grad_norm": 0.35866764187812805, "learning_rate": 9.971535892070341e-05, "loss": 1.3241, "step": 38595 }, { "epoch": 0.5015374169770468, "grad_norm": 0.41441044211387634, "learning_rate": 9.971275945879203e-05, "loss": 1.3137, "step": 38596 }, { "epoch": 0.5015504115209627, "grad_norm": 0.4729395806789398, "learning_rate": 9.971015999688065e-05, "loss": 1.308, "step": 38597 }, { "epoch": 0.5015634060648785, "grad_norm": 0.36779484152793884, "learning_rate": 9.970756053496927e-05, "loss": 1.5165, "step": 38598 }, { "epoch": 0.5015764006087944, "grad_norm": 0.3837573528289795, "learning_rate": 9.970496107305788e-05, "loss": 1.4158, "step": 38599 }, { "epoch": 0.5015893951527103, "grad_norm": 0.44191521406173706, "learning_rate": 9.97023616111465e-05, "loss": 1.4352, "step": 38600 }, { "epoch": 0.5016023896966262, "grad_norm": 0.4868957996368408, "learning_rate": 9.969976214923512e-05, "loss": 1.4142, "step": 38601 }, { "epoch": 0.501615384240542, "grad_norm": 0.3141123056411743, "learning_rate": 9.969716268732373e-05, "loss": 1.3555, "step": 38602 }, { "epoch": 0.5016283787844579, "grad_norm": 0.5004069805145264, "learning_rate": 9.969456322541234e-05, "loss": 1.45, "step": 38603 }, { "epoch": 0.5016413733283738, "grad_norm": 0.3260367214679718, "learning_rate": 9.969196376350096e-05, "loss": 1.3377, "step": 38604 }, { "epoch": 0.5016543678722897, "grad_norm": 0.42804521322250366, "learning_rate": 9.968936430158957e-05, "loss": 1.3283, "step": 38605 }, { "epoch": 0.5016673624162055, "grad_norm": 0.525400698184967, "learning_rate": 9.96867648396782e-05, "loss": 1.3515, "step": 38606 }, { "epoch": 0.5016803569601214, "grad_norm": 0.37241220474243164, "learning_rate": 9.968416537776681e-05, "loss": 1.3381, "step": 38607 }, { "epoch": 0.5016933515040373, "grad_norm": 0.36682796478271484, "learning_rate": 9.968156591585542e-05, "loss": 1.3541, "step": 38608 }, { "epoch": 0.5017063460479532, "grad_norm": 0.457168847322464, "learning_rate": 9.967896645394403e-05, "loss": 1.3386, "step": 38609 }, { "epoch": 0.501719340591869, "grad_norm": 0.4600222706794739, "learning_rate": 9.967636699203266e-05, "loss": 1.3159, "step": 38610 }, { "epoch": 0.5017323351357849, "grad_norm": 0.4694440960884094, "learning_rate": 9.967376753012127e-05, "loss": 1.5498, "step": 38611 }, { "epoch": 0.5017453296797008, "grad_norm": 0.4455929696559906, "learning_rate": 9.967116806820989e-05, "loss": 1.4967, "step": 38612 }, { "epoch": 0.5017583242236167, "grad_norm": 0.5129976868629456, "learning_rate": 9.96685686062985e-05, "loss": 1.6864, "step": 38613 }, { "epoch": 0.5017713187675324, "grad_norm": 0.3728010356426239, "learning_rate": 9.966596914438711e-05, "loss": 1.2252, "step": 38614 }, { "epoch": 0.5017843133114483, "grad_norm": 0.34261587262153625, "learning_rate": 9.966336968247572e-05, "loss": 1.2211, "step": 38615 }, { "epoch": 0.5017973078553642, "grad_norm": 0.43765607476234436, "learning_rate": 9.966077022056435e-05, "loss": 1.5892, "step": 38616 }, { "epoch": 0.5018103023992802, "grad_norm": 0.34832432866096497, "learning_rate": 9.965817075865297e-05, "loss": 1.234, "step": 38617 }, { "epoch": 0.5018232969431959, "grad_norm": 0.47121068835258484, "learning_rate": 9.965557129674158e-05, "loss": 1.4693, "step": 38618 }, { "epoch": 0.5018362914871118, "grad_norm": 0.47093334794044495, "learning_rate": 9.96529718348302e-05, "loss": 1.3836, "step": 38619 }, { "epoch": 0.5018492860310277, "grad_norm": 0.41128021478652954, "learning_rate": 9.96503723729188e-05, "loss": 1.4557, "step": 38620 }, { "epoch": 0.5018622805749436, "grad_norm": 0.4500844180583954, "learning_rate": 9.964777291100743e-05, "loss": 1.4342, "step": 38621 }, { "epoch": 0.5018752751188594, "grad_norm": 0.37473171949386597, "learning_rate": 9.964517344909604e-05, "loss": 1.1323, "step": 38622 }, { "epoch": 0.5018882696627753, "grad_norm": 0.389114111661911, "learning_rate": 9.964257398718467e-05, "loss": 1.3187, "step": 38623 }, { "epoch": 0.5019012642066912, "grad_norm": 0.40775954723358154, "learning_rate": 9.963997452527328e-05, "loss": 1.5444, "step": 38624 }, { "epoch": 0.5019142587506071, "grad_norm": 0.4420469105243683, "learning_rate": 9.963737506336189e-05, "loss": 1.544, "step": 38625 }, { "epoch": 0.5019272532945229, "grad_norm": 0.34388697147369385, "learning_rate": 9.96347756014505e-05, "loss": 1.1994, "step": 38626 }, { "epoch": 0.5019402478384388, "grad_norm": 0.3449591100215912, "learning_rate": 9.963217613953912e-05, "loss": 1.1656, "step": 38627 }, { "epoch": 0.5019532423823547, "grad_norm": 0.46845781803131104, "learning_rate": 9.962957667762773e-05, "loss": 1.4602, "step": 38628 }, { "epoch": 0.5019662369262706, "grad_norm": 0.31290364265441895, "learning_rate": 9.962697721571636e-05, "loss": 1.3737, "step": 38629 }, { "epoch": 0.5019792314701864, "grad_norm": 0.40682268142700195, "learning_rate": 9.962437775380497e-05, "loss": 1.4536, "step": 38630 }, { "epoch": 0.5019922260141023, "grad_norm": 0.4838141202926636, "learning_rate": 9.962177829189358e-05, "loss": 1.347, "step": 38631 }, { "epoch": 0.5020052205580182, "grad_norm": 0.4010850191116333, "learning_rate": 9.961917882998219e-05, "loss": 1.2836, "step": 38632 }, { "epoch": 0.5020182151019341, "grad_norm": 0.3325026333332062, "learning_rate": 9.961657936807082e-05, "loss": 1.3295, "step": 38633 }, { "epoch": 0.5020312096458499, "grad_norm": 0.38825252652168274, "learning_rate": 9.961397990615943e-05, "loss": 1.4019, "step": 38634 }, { "epoch": 0.5020442041897658, "grad_norm": 0.2932383120059967, "learning_rate": 9.961138044424805e-05, "loss": 1.4335, "step": 38635 }, { "epoch": 0.5020571987336817, "grad_norm": 0.4004434049129486, "learning_rate": 9.960878098233666e-05, "loss": 1.4192, "step": 38636 }, { "epoch": 0.5020701932775976, "grad_norm": 0.4199526309967041, "learning_rate": 9.960618152042527e-05, "loss": 1.2629, "step": 38637 }, { "epoch": 0.5020831878215134, "grad_norm": 0.38937196135520935, "learning_rate": 9.960358205851388e-05, "loss": 1.3638, "step": 38638 }, { "epoch": 0.5020961823654293, "grad_norm": 0.4084378778934479, "learning_rate": 9.960098259660251e-05, "loss": 1.5108, "step": 38639 }, { "epoch": 0.5021091769093452, "grad_norm": 0.38959378004074097, "learning_rate": 9.959838313469112e-05, "loss": 1.4613, "step": 38640 }, { "epoch": 0.5021221714532611, "grad_norm": 0.43904009461402893, "learning_rate": 9.959578367277974e-05, "loss": 1.4719, "step": 38641 }, { "epoch": 0.5021351659971769, "grad_norm": 0.4299328029155731, "learning_rate": 9.959318421086835e-05, "loss": 1.2845, "step": 38642 }, { "epoch": 0.5021481605410928, "grad_norm": 0.4287305176258087, "learning_rate": 9.959058474895698e-05, "loss": 1.5222, "step": 38643 }, { "epoch": 0.5021611550850087, "grad_norm": 0.34740859270095825, "learning_rate": 9.958798528704558e-05, "loss": 1.3907, "step": 38644 }, { "epoch": 0.5021741496289246, "grad_norm": 0.33845412731170654, "learning_rate": 9.95853858251342e-05, "loss": 1.4029, "step": 38645 }, { "epoch": 0.5021871441728404, "grad_norm": 0.41307365894317627, "learning_rate": 9.958278636322281e-05, "loss": 1.3485, "step": 38646 }, { "epoch": 0.5022001387167563, "grad_norm": 0.4106910228729248, "learning_rate": 9.958018690131144e-05, "loss": 1.3823, "step": 38647 }, { "epoch": 0.5022131332606722, "grad_norm": 0.40577781200408936, "learning_rate": 9.957758743940005e-05, "loss": 1.5168, "step": 38648 }, { "epoch": 0.5022261278045881, "grad_norm": 0.5476912260055542, "learning_rate": 9.957498797748867e-05, "loss": 1.4737, "step": 38649 }, { "epoch": 0.5022391223485039, "grad_norm": 0.4123094975948334, "learning_rate": 9.957238851557727e-05, "loss": 1.4891, "step": 38650 }, { "epoch": 0.5022521168924198, "grad_norm": 0.43160751461982727, "learning_rate": 9.95697890536659e-05, "loss": 1.3068, "step": 38651 }, { "epoch": 0.5022651114363357, "grad_norm": 0.3636293411254883, "learning_rate": 9.95671895917545e-05, "loss": 1.2352, "step": 38652 }, { "epoch": 0.5022781059802516, "grad_norm": 0.4430590569972992, "learning_rate": 9.956459012984313e-05, "loss": 1.4589, "step": 38653 }, { "epoch": 0.5022911005241674, "grad_norm": 0.4093201160430908, "learning_rate": 9.956199066793175e-05, "loss": 1.5918, "step": 38654 }, { "epoch": 0.5023040950680833, "grad_norm": 0.406325101852417, "learning_rate": 9.955939120602036e-05, "loss": 1.3187, "step": 38655 }, { "epoch": 0.5023170896119992, "grad_norm": 0.3986407220363617, "learning_rate": 9.955679174410898e-05, "loss": 1.332, "step": 38656 }, { "epoch": 0.5023300841559151, "grad_norm": 0.39856845140457153, "learning_rate": 9.955419228219759e-05, "loss": 1.4349, "step": 38657 }, { "epoch": 0.5023430786998309, "grad_norm": 0.38072022795677185, "learning_rate": 9.955159282028621e-05, "loss": 1.3082, "step": 38658 }, { "epoch": 0.5023560732437468, "grad_norm": 0.2640657424926758, "learning_rate": 9.954899335837482e-05, "loss": 1.4964, "step": 38659 }, { "epoch": 0.5023690677876627, "grad_norm": 0.4046262204647064, "learning_rate": 9.954639389646345e-05, "loss": 1.2537, "step": 38660 }, { "epoch": 0.5023820623315786, "grad_norm": 0.41886183619499207, "learning_rate": 9.954379443455206e-05, "loss": 1.1939, "step": 38661 }, { "epoch": 0.5023950568754944, "grad_norm": 0.32854607701301575, "learning_rate": 9.954119497264067e-05, "loss": 1.3968, "step": 38662 }, { "epoch": 0.5024080514194103, "grad_norm": 0.3777220845222473, "learning_rate": 9.953859551072928e-05, "loss": 1.4049, "step": 38663 }, { "epoch": 0.5024210459633262, "grad_norm": 0.47061794996261597, "learning_rate": 9.95359960488179e-05, "loss": 1.4819, "step": 38664 }, { "epoch": 0.502434040507242, "grad_norm": 0.3082543909549713, "learning_rate": 9.953339658690651e-05, "loss": 1.2269, "step": 38665 }, { "epoch": 0.5024470350511578, "grad_norm": 0.3603893518447876, "learning_rate": 9.953079712499514e-05, "loss": 1.383, "step": 38666 }, { "epoch": 0.5024600295950737, "grad_norm": 0.3619822859764099, "learning_rate": 9.952819766308375e-05, "loss": 1.4762, "step": 38667 }, { "epoch": 0.5024730241389896, "grad_norm": 0.34904932975769043, "learning_rate": 9.952559820117236e-05, "loss": 1.5327, "step": 38668 }, { "epoch": 0.5024860186829055, "grad_norm": 0.44304129481315613, "learning_rate": 9.952299873926097e-05, "loss": 1.3144, "step": 38669 }, { "epoch": 0.5024990132268213, "grad_norm": 0.25501298904418945, "learning_rate": 9.95203992773496e-05, "loss": 1.2992, "step": 38670 }, { "epoch": 0.5025120077707372, "grad_norm": 0.34362560510635376, "learning_rate": 9.95177998154382e-05, "loss": 1.377, "step": 38671 }, { "epoch": 0.5025250023146531, "grad_norm": 0.4482221305370331, "learning_rate": 9.951520035352683e-05, "loss": 1.367, "step": 38672 }, { "epoch": 0.502537996858569, "grad_norm": 0.41808897256851196, "learning_rate": 9.951260089161544e-05, "loss": 1.491, "step": 38673 }, { "epoch": 0.5025509914024849, "grad_norm": 0.3629595935344696, "learning_rate": 9.951000142970405e-05, "loss": 1.2651, "step": 38674 }, { "epoch": 0.5025639859464007, "grad_norm": 0.4167031943798065, "learning_rate": 9.950740196779266e-05, "loss": 1.5082, "step": 38675 }, { "epoch": 0.5025769804903166, "grad_norm": 0.40896928310394287, "learning_rate": 9.950480250588129e-05, "loss": 1.344, "step": 38676 }, { "epoch": 0.5025899750342325, "grad_norm": 0.4458857476711273, "learning_rate": 9.95022030439699e-05, "loss": 1.4298, "step": 38677 }, { "epoch": 0.5026029695781484, "grad_norm": 0.4971230626106262, "learning_rate": 9.949960358205852e-05, "loss": 1.4142, "step": 38678 }, { "epoch": 0.5026159641220642, "grad_norm": 0.47731155157089233, "learning_rate": 9.949700412014713e-05, "loss": 1.2656, "step": 38679 }, { "epoch": 0.5026289586659801, "grad_norm": 0.5380831360816956, "learning_rate": 9.949440465823575e-05, "loss": 1.4705, "step": 38680 }, { "epoch": 0.502641953209896, "grad_norm": 0.39273661375045776, "learning_rate": 9.949180519632436e-05, "loss": 1.3998, "step": 38681 }, { "epoch": 0.5026549477538119, "grad_norm": 0.49421244859695435, "learning_rate": 9.948920573441298e-05, "loss": 1.6533, "step": 38682 }, { "epoch": 0.5026679422977277, "grad_norm": 0.4012127220630646, "learning_rate": 9.948660627250159e-05, "loss": 1.3524, "step": 38683 }, { "epoch": 0.5026809368416436, "grad_norm": 0.3115918040275574, "learning_rate": 9.948400681059022e-05, "loss": 1.2522, "step": 38684 }, { "epoch": 0.5026939313855595, "grad_norm": 0.3512091040611267, "learning_rate": 9.948140734867883e-05, "loss": 1.2833, "step": 38685 }, { "epoch": 0.5027069259294754, "grad_norm": 0.45960733294487, "learning_rate": 9.947880788676744e-05, "loss": 1.3587, "step": 38686 }, { "epoch": 0.5027199204733912, "grad_norm": 0.35364776849746704, "learning_rate": 9.947620842485605e-05, "loss": 1.3527, "step": 38687 }, { "epoch": 0.5027329150173071, "grad_norm": 0.361489474773407, "learning_rate": 9.947360896294467e-05, "loss": 1.4331, "step": 38688 }, { "epoch": 0.502745909561223, "grad_norm": 0.45128002762794495, "learning_rate": 9.947100950103328e-05, "loss": 1.4558, "step": 38689 }, { "epoch": 0.5027589041051389, "grad_norm": 0.3806818723678589, "learning_rate": 9.946841003912191e-05, "loss": 1.3243, "step": 38690 }, { "epoch": 0.5027718986490547, "grad_norm": 0.2227061241865158, "learning_rate": 9.946581057721053e-05, "loss": 1.1331, "step": 38691 }, { "epoch": 0.5027848931929706, "grad_norm": 0.4346643388271332, "learning_rate": 9.946321111529913e-05, "loss": 1.3201, "step": 38692 }, { "epoch": 0.5027978877368865, "grad_norm": 0.37980860471725464, "learning_rate": 9.946061165338776e-05, "loss": 1.4911, "step": 38693 }, { "epoch": 0.5028108822808024, "grad_norm": 0.5231842994689941, "learning_rate": 9.945801219147637e-05, "loss": 1.5323, "step": 38694 }, { "epoch": 0.5028238768247182, "grad_norm": 0.37290629744529724, "learning_rate": 9.945541272956499e-05, "loss": 1.4404, "step": 38695 }, { "epoch": 0.5028368713686341, "grad_norm": 0.3669590651988983, "learning_rate": 9.94528132676536e-05, "loss": 1.4594, "step": 38696 }, { "epoch": 0.50284986591255, "grad_norm": 0.4750474691390991, "learning_rate": 9.945021380574223e-05, "loss": 1.3476, "step": 38697 }, { "epoch": 0.5028628604564659, "grad_norm": 0.48590344190597534, "learning_rate": 9.944761434383082e-05, "loss": 1.4391, "step": 38698 }, { "epoch": 0.5028758550003817, "grad_norm": 0.3370203673839569, "learning_rate": 9.944501488191945e-05, "loss": 1.3266, "step": 38699 }, { "epoch": 0.5028888495442976, "grad_norm": 0.34570881724357605, "learning_rate": 9.944241542000806e-05, "loss": 1.3115, "step": 38700 }, { "epoch": 0.5029018440882135, "grad_norm": 0.43542373180389404, "learning_rate": 9.943981595809668e-05, "loss": 1.2441, "step": 38701 }, { "epoch": 0.5029148386321294, "grad_norm": 0.46609562635421753, "learning_rate": 9.94372164961853e-05, "loss": 1.471, "step": 38702 }, { "epoch": 0.5029278331760452, "grad_norm": 0.5439035892486572, "learning_rate": 9.943461703427392e-05, "loss": 1.3798, "step": 38703 }, { "epoch": 0.5029408277199611, "grad_norm": 0.4468325078487396, "learning_rate": 9.943201757236253e-05, "loss": 1.3957, "step": 38704 }, { "epoch": 0.502953822263877, "grad_norm": 0.499202162027359, "learning_rate": 9.942941811045114e-05, "loss": 1.5699, "step": 38705 }, { "epoch": 0.5029668168077929, "grad_norm": 0.3493598401546478, "learning_rate": 9.942681864853975e-05, "loss": 1.4434, "step": 38706 }, { "epoch": 0.5029798113517087, "grad_norm": 0.41630589962005615, "learning_rate": 9.942421918662838e-05, "loss": 1.2732, "step": 38707 }, { "epoch": 0.5029928058956246, "grad_norm": 0.3793299198150635, "learning_rate": 9.942161972471699e-05, "loss": 1.4012, "step": 38708 }, { "epoch": 0.5030058004395405, "grad_norm": 0.417537659406662, "learning_rate": 9.941902026280561e-05, "loss": 1.4816, "step": 38709 }, { "epoch": 0.5030187949834564, "grad_norm": 0.4429754614830017, "learning_rate": 9.941642080089422e-05, "loss": 1.5595, "step": 38710 }, { "epoch": 0.5030317895273722, "grad_norm": 0.39553695917129517, "learning_rate": 9.941382133898283e-05, "loss": 1.4568, "step": 38711 }, { "epoch": 0.503044784071288, "grad_norm": 0.37764808535575867, "learning_rate": 9.941122187707144e-05, "loss": 1.276, "step": 38712 }, { "epoch": 0.503057778615204, "grad_norm": 0.37186166644096375, "learning_rate": 9.940862241516007e-05, "loss": 1.3691, "step": 38713 }, { "epoch": 0.5030707731591199, "grad_norm": 0.3819955885410309, "learning_rate": 9.940602295324868e-05, "loss": 1.3951, "step": 38714 }, { "epoch": 0.5030837677030356, "grad_norm": 0.321040540933609, "learning_rate": 9.94034234913373e-05, "loss": 1.3353, "step": 38715 }, { "epoch": 0.5030967622469515, "grad_norm": 0.3601124882698059, "learning_rate": 9.940082402942592e-05, "loss": 1.4068, "step": 38716 }, { "epoch": 0.5031097567908674, "grad_norm": 0.43082499504089355, "learning_rate": 9.939822456751453e-05, "loss": 1.4183, "step": 38717 }, { "epoch": 0.5031227513347833, "grad_norm": 0.3668937683105469, "learning_rate": 9.939562510560314e-05, "loss": 1.4715, "step": 38718 }, { "epoch": 0.5031357458786991, "grad_norm": 0.4214024543762207, "learning_rate": 9.939302564369176e-05, "loss": 1.5099, "step": 38719 }, { "epoch": 0.503148740422615, "grad_norm": 0.36616218090057373, "learning_rate": 9.939042618178037e-05, "loss": 1.3421, "step": 38720 }, { "epoch": 0.5031617349665309, "grad_norm": 0.3801290690898895, "learning_rate": 9.9387826719869e-05, "loss": 1.4003, "step": 38721 }, { "epoch": 0.5031747295104468, "grad_norm": 0.3637678623199463, "learning_rate": 9.938522725795761e-05, "loss": 1.3397, "step": 38722 }, { "epoch": 0.5031877240543626, "grad_norm": 0.4232293665409088, "learning_rate": 9.938262779604622e-05, "loss": 1.5857, "step": 38723 }, { "epoch": 0.5032007185982785, "grad_norm": 0.5212748646736145, "learning_rate": 9.938002833413483e-05, "loss": 1.4806, "step": 38724 }, { "epoch": 0.5032137131421944, "grad_norm": 0.41660821437835693, "learning_rate": 9.937742887222345e-05, "loss": 1.46, "step": 38725 }, { "epoch": 0.5032267076861103, "grad_norm": 0.46641552448272705, "learning_rate": 9.937482941031207e-05, "loss": 1.307, "step": 38726 }, { "epoch": 0.5032397022300261, "grad_norm": 0.35546934604644775, "learning_rate": 9.937222994840069e-05, "loss": 1.1158, "step": 38727 }, { "epoch": 0.503252696773942, "grad_norm": 0.36998599767684937, "learning_rate": 9.93696304864893e-05, "loss": 1.4282, "step": 38728 }, { "epoch": 0.5032656913178579, "grad_norm": 0.42102518677711487, "learning_rate": 9.936703102457791e-05, "loss": 1.275, "step": 38729 }, { "epoch": 0.5032786858617738, "grad_norm": 0.4139999747276306, "learning_rate": 9.936443156266654e-05, "loss": 1.5319, "step": 38730 }, { "epoch": 0.5032916804056896, "grad_norm": 0.4046679437160492, "learning_rate": 9.936183210075515e-05, "loss": 1.4493, "step": 38731 }, { "epoch": 0.5033046749496055, "grad_norm": 0.4516332447528839, "learning_rate": 9.935923263884377e-05, "loss": 1.55, "step": 38732 }, { "epoch": 0.5033176694935214, "grad_norm": 0.45310911536216736, "learning_rate": 9.935663317693238e-05, "loss": 1.4751, "step": 38733 }, { "epoch": 0.5033306640374373, "grad_norm": 0.4741683006286621, "learning_rate": 9.9354033715021e-05, "loss": 1.3601, "step": 38734 }, { "epoch": 0.5033436585813531, "grad_norm": 0.32811546325683594, "learning_rate": 9.93514342531096e-05, "loss": 1.2244, "step": 38735 }, { "epoch": 0.503356653125269, "grad_norm": 0.38871097564697266, "learning_rate": 9.934883479119823e-05, "loss": 1.4579, "step": 38736 }, { "epoch": 0.5033696476691849, "grad_norm": 0.4746989905834198, "learning_rate": 9.934623532928684e-05, "loss": 1.4118, "step": 38737 }, { "epoch": 0.5033826422131008, "grad_norm": 0.38275283575057983, "learning_rate": 9.934363586737546e-05, "loss": 1.3145, "step": 38738 }, { "epoch": 0.5033956367570166, "grad_norm": 0.3828461468219757, "learning_rate": 9.934103640546408e-05, "loss": 1.4685, "step": 38739 }, { "epoch": 0.5034086313009325, "grad_norm": 0.44905635714530945, "learning_rate": 9.933843694355269e-05, "loss": 1.6201, "step": 38740 }, { "epoch": 0.5034216258448484, "grad_norm": 0.36987483501434326, "learning_rate": 9.93358374816413e-05, "loss": 1.3206, "step": 38741 }, { "epoch": 0.5034346203887643, "grad_norm": 0.4826483726501465, "learning_rate": 9.933323801972992e-05, "loss": 1.422, "step": 38742 }, { "epoch": 0.5034476149326801, "grad_norm": 0.35331761837005615, "learning_rate": 9.933063855781853e-05, "loss": 1.6108, "step": 38743 }, { "epoch": 0.503460609476596, "grad_norm": 0.5542351007461548, "learning_rate": 9.932803909590716e-05, "loss": 1.4403, "step": 38744 }, { "epoch": 0.5034736040205119, "grad_norm": 0.37678608298301697, "learning_rate": 9.932543963399577e-05, "loss": 1.3974, "step": 38745 }, { "epoch": 0.5034865985644278, "grad_norm": 0.38738587498664856, "learning_rate": 9.932284017208439e-05, "loss": 1.2872, "step": 38746 }, { "epoch": 0.5034995931083436, "grad_norm": 0.3126541078090668, "learning_rate": 9.932024071017299e-05, "loss": 1.1688, "step": 38747 }, { "epoch": 0.5035125876522595, "grad_norm": 0.3494420647621155, "learning_rate": 9.931764124826161e-05, "loss": 1.3304, "step": 38748 }, { "epoch": 0.5035255821961754, "grad_norm": 0.43524011969566345, "learning_rate": 9.931504178635023e-05, "loss": 1.3694, "step": 38749 }, { "epoch": 0.5035385767400913, "grad_norm": 0.3468901216983795, "learning_rate": 9.931244232443885e-05, "loss": 1.4129, "step": 38750 }, { "epoch": 0.5035515712840072, "grad_norm": 0.37986692786216736, "learning_rate": 9.930984286252746e-05, "loss": 1.3642, "step": 38751 }, { "epoch": 0.503564565827923, "grad_norm": 0.4223717749118805, "learning_rate": 9.930724340061609e-05, "loss": 1.6082, "step": 38752 }, { "epoch": 0.5035775603718389, "grad_norm": 0.42933374643325806, "learning_rate": 9.930464393870468e-05, "loss": 1.186, "step": 38753 }, { "epoch": 0.5035905549157548, "grad_norm": 0.34949082136154175, "learning_rate": 9.930204447679331e-05, "loss": 1.3982, "step": 38754 }, { "epoch": 0.5036035494596707, "grad_norm": 0.37462541460990906, "learning_rate": 9.929944501488192e-05, "loss": 1.4427, "step": 38755 }, { "epoch": 0.5036165440035865, "grad_norm": 0.4260838031768799, "learning_rate": 9.929684555297054e-05, "loss": 1.3889, "step": 38756 }, { "epoch": 0.5036295385475024, "grad_norm": 0.2994789183139801, "learning_rate": 9.929424609105915e-05, "loss": 1.3296, "step": 38757 }, { "epoch": 0.5036425330914183, "grad_norm": 0.41050267219543457, "learning_rate": 9.929164662914778e-05, "loss": 1.3246, "step": 38758 }, { "epoch": 0.5036555276353342, "grad_norm": 0.4634935259819031, "learning_rate": 9.928904716723638e-05, "loss": 1.5292, "step": 38759 }, { "epoch": 0.50366852217925, "grad_norm": 0.4125591814517975, "learning_rate": 9.9286447705325e-05, "loss": 1.2944, "step": 38760 }, { "epoch": 0.5036815167231659, "grad_norm": 0.3591509759426117, "learning_rate": 9.928384824341361e-05, "loss": 1.2711, "step": 38761 }, { "epoch": 0.5036945112670818, "grad_norm": 0.4216979742050171, "learning_rate": 9.928124878150224e-05, "loss": 1.5474, "step": 38762 }, { "epoch": 0.5037075058109977, "grad_norm": 0.3817099332809448, "learning_rate": 9.927864931959085e-05, "loss": 1.4197, "step": 38763 }, { "epoch": 0.5037205003549134, "grad_norm": 0.3384837508201599, "learning_rate": 9.927604985767947e-05, "loss": 1.2255, "step": 38764 }, { "epoch": 0.5037334948988293, "grad_norm": 0.42455190420150757, "learning_rate": 9.927345039576808e-05, "loss": 1.5031, "step": 38765 }, { "epoch": 0.5037464894427452, "grad_norm": 0.42044880986213684, "learning_rate": 9.927085093385669e-05, "loss": 1.2972, "step": 38766 }, { "epoch": 0.5037594839866611, "grad_norm": 0.3709751069545746, "learning_rate": 9.926825147194532e-05, "loss": 1.3916, "step": 38767 }, { "epoch": 0.5037724785305769, "grad_norm": 0.40946483612060547, "learning_rate": 9.926565201003393e-05, "loss": 1.339, "step": 38768 }, { "epoch": 0.5037854730744928, "grad_norm": 0.3375990688800812, "learning_rate": 9.926305254812255e-05, "loss": 1.3489, "step": 38769 }, { "epoch": 0.5037984676184087, "grad_norm": 0.524073600769043, "learning_rate": 9.926045308621116e-05, "loss": 1.5011, "step": 38770 }, { "epoch": 0.5038114621623246, "grad_norm": 0.4260469675064087, "learning_rate": 9.925785362429977e-05, "loss": 1.3027, "step": 38771 }, { "epoch": 0.5038244567062404, "grad_norm": 0.34187766909599304, "learning_rate": 9.925525416238839e-05, "loss": 1.2047, "step": 38772 }, { "epoch": 0.5038374512501563, "grad_norm": 0.3449990749359131, "learning_rate": 9.925265470047701e-05, "loss": 1.1701, "step": 38773 }, { "epoch": 0.5038504457940722, "grad_norm": 0.43694669008255005, "learning_rate": 9.925005523856562e-05, "loss": 1.4895, "step": 38774 }, { "epoch": 0.5038634403379881, "grad_norm": 0.4068067967891693, "learning_rate": 9.924745577665425e-05, "loss": 1.3221, "step": 38775 }, { "epoch": 0.5038764348819039, "grad_norm": 0.5102118253707886, "learning_rate": 9.924485631474286e-05, "loss": 1.5115, "step": 38776 }, { "epoch": 0.5038894294258198, "grad_norm": 0.44939833879470825, "learning_rate": 9.924225685283147e-05, "loss": 1.7191, "step": 38777 }, { "epoch": 0.5039024239697357, "grad_norm": 0.4997943639755249, "learning_rate": 9.923965739092008e-05, "loss": 1.2966, "step": 38778 }, { "epoch": 0.5039154185136516, "grad_norm": 0.36760884523391724, "learning_rate": 9.92370579290087e-05, "loss": 1.5071, "step": 38779 }, { "epoch": 0.5039284130575674, "grad_norm": 0.43209052085876465, "learning_rate": 9.923445846709731e-05, "loss": 1.3443, "step": 38780 }, { "epoch": 0.5039414076014833, "grad_norm": 0.38719630241394043, "learning_rate": 9.923185900518594e-05, "loss": 1.3434, "step": 38781 }, { "epoch": 0.5039544021453992, "grad_norm": 0.3981705904006958, "learning_rate": 9.922925954327455e-05, "loss": 1.3683, "step": 38782 }, { "epoch": 0.5039673966893151, "grad_norm": 0.3411445617675781, "learning_rate": 9.922666008136316e-05, "loss": 1.2915, "step": 38783 }, { "epoch": 0.5039803912332309, "grad_norm": 0.37403544783592224, "learning_rate": 9.922406061945177e-05, "loss": 1.3626, "step": 38784 }, { "epoch": 0.5039933857771468, "grad_norm": 0.37046870589256287, "learning_rate": 9.92214611575404e-05, "loss": 1.3146, "step": 38785 }, { "epoch": 0.5040063803210627, "grad_norm": 0.43736395239830017, "learning_rate": 9.9218861695629e-05, "loss": 1.5803, "step": 38786 }, { "epoch": 0.5040193748649786, "grad_norm": 0.39136070013046265, "learning_rate": 9.921626223371763e-05, "loss": 1.4286, "step": 38787 }, { "epoch": 0.5040323694088944, "grad_norm": 0.3854241669178009, "learning_rate": 9.921366277180624e-05, "loss": 1.4189, "step": 38788 }, { "epoch": 0.5040453639528103, "grad_norm": 0.2745227515697479, "learning_rate": 9.921106330989485e-05, "loss": 1.3281, "step": 38789 }, { "epoch": 0.5040583584967262, "grad_norm": 0.3839605748653412, "learning_rate": 9.920846384798346e-05, "loss": 1.3319, "step": 38790 }, { "epoch": 0.5040713530406421, "grad_norm": 0.3870304524898529, "learning_rate": 9.920586438607209e-05, "loss": 1.1957, "step": 38791 }, { "epoch": 0.5040843475845579, "grad_norm": 0.407545268535614, "learning_rate": 9.92032649241607e-05, "loss": 1.5558, "step": 38792 }, { "epoch": 0.5040973421284738, "grad_norm": 0.44787779450416565, "learning_rate": 9.920066546224932e-05, "loss": 1.2102, "step": 38793 }, { "epoch": 0.5041103366723897, "grad_norm": 0.32703739404678345, "learning_rate": 9.919806600033793e-05, "loss": 1.5048, "step": 38794 }, { "epoch": 0.5041233312163056, "grad_norm": 0.3944266438484192, "learning_rate": 9.919546653842655e-05, "loss": 1.2845, "step": 38795 }, { "epoch": 0.5041363257602214, "grad_norm": 0.42107751965522766, "learning_rate": 9.919286707651516e-05, "loss": 1.3035, "step": 38796 }, { "epoch": 0.5041493203041373, "grad_norm": 0.4647158980369568, "learning_rate": 9.919026761460378e-05, "loss": 1.3693, "step": 38797 }, { "epoch": 0.5041623148480532, "grad_norm": 0.498198926448822, "learning_rate": 9.918766815269239e-05, "loss": 1.3105, "step": 38798 }, { "epoch": 0.5041753093919691, "grad_norm": 0.32860368490219116, "learning_rate": 9.918506869078102e-05, "loss": 1.3167, "step": 38799 }, { "epoch": 0.5041883039358849, "grad_norm": 0.5228272676467896, "learning_rate": 9.918246922886963e-05, "loss": 1.5022, "step": 38800 }, { "epoch": 0.5042012984798008, "grad_norm": 0.3779371678829193, "learning_rate": 9.917986976695824e-05, "loss": 1.2422, "step": 38801 }, { "epoch": 0.5042142930237167, "grad_norm": 0.488620400428772, "learning_rate": 9.917727030504686e-05, "loss": 1.472, "step": 38802 }, { "epoch": 0.5042272875676326, "grad_norm": 0.3369017541408539, "learning_rate": 9.917467084313547e-05, "loss": 1.4109, "step": 38803 }, { "epoch": 0.5042402821115484, "grad_norm": 0.44672322273254395, "learning_rate": 9.91720713812241e-05, "loss": 1.2417, "step": 38804 }, { "epoch": 0.5042532766554643, "grad_norm": 0.37324458360671997, "learning_rate": 9.916947191931271e-05, "loss": 1.395, "step": 38805 }, { "epoch": 0.5042662711993802, "grad_norm": 0.39913618564605713, "learning_rate": 9.916687245740133e-05, "loss": 1.2996, "step": 38806 }, { "epoch": 0.5042792657432961, "grad_norm": 0.3723594546318054, "learning_rate": 9.916427299548994e-05, "loss": 1.2374, "step": 38807 }, { "epoch": 0.5042922602872119, "grad_norm": 0.3540034890174866, "learning_rate": 9.916167353357855e-05, "loss": 1.2051, "step": 38808 }, { "epoch": 0.5043052548311278, "grad_norm": 0.3539971113204956, "learning_rate": 9.915907407166717e-05, "loss": 1.3571, "step": 38809 }, { "epoch": 0.5043182493750437, "grad_norm": 0.4498256742954254, "learning_rate": 9.915647460975579e-05, "loss": 1.3756, "step": 38810 }, { "epoch": 0.5043312439189596, "grad_norm": 0.44468775391578674, "learning_rate": 9.91538751478444e-05, "loss": 1.5797, "step": 38811 }, { "epoch": 0.5043442384628753, "grad_norm": 0.4160136878490448, "learning_rate": 9.915127568593303e-05, "loss": 1.2572, "step": 38812 }, { "epoch": 0.5043572330067913, "grad_norm": 0.39796027541160583, "learning_rate": 9.914867622402164e-05, "loss": 1.4628, "step": 38813 }, { "epoch": 0.5043702275507072, "grad_norm": 0.3821994662284851, "learning_rate": 9.914607676211025e-05, "loss": 1.3208, "step": 38814 }, { "epoch": 0.504383222094623, "grad_norm": 0.40463051199913025, "learning_rate": 9.914347730019886e-05, "loss": 1.41, "step": 38815 }, { "epoch": 0.5043962166385388, "grad_norm": 0.3927914500236511, "learning_rate": 9.914087783828748e-05, "loss": 1.3729, "step": 38816 }, { "epoch": 0.5044092111824547, "grad_norm": 0.453128844499588, "learning_rate": 9.91382783763761e-05, "loss": 1.4636, "step": 38817 }, { "epoch": 0.5044222057263706, "grad_norm": 0.40080153942108154, "learning_rate": 9.913567891446472e-05, "loss": 1.3695, "step": 38818 }, { "epoch": 0.5044352002702865, "grad_norm": 0.4682604670524597, "learning_rate": 9.913307945255333e-05, "loss": 1.5759, "step": 38819 }, { "epoch": 0.5044481948142023, "grad_norm": 0.4354437589645386, "learning_rate": 9.913047999064194e-05, "loss": 1.3116, "step": 38820 }, { "epoch": 0.5044611893581182, "grad_norm": 0.3466787040233612, "learning_rate": 9.912788052873055e-05, "loss": 1.353, "step": 38821 }, { "epoch": 0.5044741839020341, "grad_norm": 0.4939839243888855, "learning_rate": 9.912528106681918e-05, "loss": 1.5435, "step": 38822 }, { "epoch": 0.50448717844595, "grad_norm": 0.43136346340179443, "learning_rate": 9.912268160490779e-05, "loss": 1.4541, "step": 38823 }, { "epoch": 0.5045001729898658, "grad_norm": 0.4758949875831604, "learning_rate": 9.912008214299641e-05, "loss": 1.4675, "step": 38824 }, { "epoch": 0.5045131675337817, "grad_norm": 0.39081668853759766, "learning_rate": 9.911748268108502e-05, "loss": 1.4176, "step": 38825 }, { "epoch": 0.5045261620776976, "grad_norm": 0.4540448486804962, "learning_rate": 9.911488321917363e-05, "loss": 1.5372, "step": 38826 }, { "epoch": 0.5045391566216135, "grad_norm": 0.41783830523490906, "learning_rate": 9.911228375726224e-05, "loss": 1.5114, "step": 38827 }, { "epoch": 0.5045521511655294, "grad_norm": 0.41544675827026367, "learning_rate": 9.910968429535087e-05, "loss": 1.2909, "step": 38828 }, { "epoch": 0.5045651457094452, "grad_norm": 0.44033315777778625, "learning_rate": 9.910708483343948e-05, "loss": 1.3254, "step": 38829 }, { "epoch": 0.5045781402533611, "grad_norm": 0.41473838686943054, "learning_rate": 9.91044853715281e-05, "loss": 1.3971, "step": 38830 }, { "epoch": 0.504591134797277, "grad_norm": 0.3708394169807434, "learning_rate": 9.910188590961671e-05, "loss": 1.38, "step": 38831 }, { "epoch": 0.5046041293411929, "grad_norm": 0.35531821846961975, "learning_rate": 9.909928644770533e-05, "loss": 1.2907, "step": 38832 }, { "epoch": 0.5046171238851087, "grad_norm": 0.39857718348503113, "learning_rate": 9.909668698579394e-05, "loss": 1.2973, "step": 38833 }, { "epoch": 0.5046301184290246, "grad_norm": 0.4134206175804138, "learning_rate": 9.909408752388256e-05, "loss": 1.3216, "step": 38834 }, { "epoch": 0.5046431129729405, "grad_norm": 0.38989394903182983, "learning_rate": 9.909148806197117e-05, "loss": 1.3817, "step": 38835 }, { "epoch": 0.5046561075168564, "grad_norm": 0.41405975818634033, "learning_rate": 9.90888886000598e-05, "loss": 1.4141, "step": 38836 }, { "epoch": 0.5046691020607722, "grad_norm": 0.40060386061668396, "learning_rate": 9.908628913814841e-05, "loss": 1.2089, "step": 38837 }, { "epoch": 0.5046820966046881, "grad_norm": 0.43636980652809143, "learning_rate": 9.908368967623702e-05, "loss": 1.3592, "step": 38838 }, { "epoch": 0.504695091148604, "grad_norm": 0.4051605761051178, "learning_rate": 9.908109021432564e-05, "loss": 1.4045, "step": 38839 }, { "epoch": 0.5047080856925199, "grad_norm": 0.3659214973449707, "learning_rate": 9.907849075241425e-05, "loss": 1.2664, "step": 38840 }, { "epoch": 0.5047210802364357, "grad_norm": 0.296008437871933, "learning_rate": 9.907589129050288e-05, "loss": 1.3665, "step": 38841 }, { "epoch": 0.5047340747803516, "grad_norm": 0.45115482807159424, "learning_rate": 9.907329182859149e-05, "loss": 1.5345, "step": 38842 }, { "epoch": 0.5047470693242675, "grad_norm": 0.38254332542419434, "learning_rate": 9.90706923666801e-05, "loss": 1.5103, "step": 38843 }, { "epoch": 0.5047600638681834, "grad_norm": 0.5251911878585815, "learning_rate": 9.906809290476871e-05, "loss": 1.4813, "step": 38844 }, { "epoch": 0.5047730584120992, "grad_norm": 0.3894405961036682, "learning_rate": 9.906549344285734e-05, "loss": 1.3541, "step": 38845 }, { "epoch": 0.5047860529560151, "grad_norm": 0.40301117300987244, "learning_rate": 9.906289398094595e-05, "loss": 1.4809, "step": 38846 }, { "epoch": 0.504799047499931, "grad_norm": 0.3689195215702057, "learning_rate": 9.906029451903457e-05, "loss": 1.2862, "step": 38847 }, { "epoch": 0.5048120420438469, "grad_norm": 0.4489176273345947, "learning_rate": 9.905769505712318e-05, "loss": 1.4535, "step": 38848 }, { "epoch": 0.5048250365877627, "grad_norm": 0.4705716073513031, "learning_rate": 9.90550955952118e-05, "loss": 1.4097, "step": 38849 }, { "epoch": 0.5048380311316786, "grad_norm": 0.3903043270111084, "learning_rate": 9.90524961333004e-05, "loss": 1.3098, "step": 38850 }, { "epoch": 0.5048510256755945, "grad_norm": 0.40608447790145874, "learning_rate": 9.904989667138903e-05, "loss": 1.5943, "step": 38851 }, { "epoch": 0.5048640202195104, "grad_norm": 0.44438889622688293, "learning_rate": 9.904729720947764e-05, "loss": 1.2881, "step": 38852 }, { "epoch": 0.5048770147634262, "grad_norm": 0.3680826723575592, "learning_rate": 9.904469774756626e-05, "loss": 1.4008, "step": 38853 }, { "epoch": 0.5048900093073421, "grad_norm": 0.32832080125808716, "learning_rate": 9.904209828565487e-05, "loss": 1.2712, "step": 38854 }, { "epoch": 0.504903003851258, "grad_norm": 0.3889751732349396, "learning_rate": 9.90394988237435e-05, "loss": 1.1345, "step": 38855 }, { "epoch": 0.5049159983951739, "grad_norm": 0.43394264578819275, "learning_rate": 9.90368993618321e-05, "loss": 1.4156, "step": 38856 }, { "epoch": 0.5049289929390897, "grad_norm": 0.313986212015152, "learning_rate": 9.903429989992072e-05, "loss": 1.4014, "step": 38857 }, { "epoch": 0.5049419874830056, "grad_norm": 0.45069023966789246, "learning_rate": 9.903170043800933e-05, "loss": 1.44, "step": 38858 }, { "epoch": 0.5049549820269215, "grad_norm": 0.3618573844432831, "learning_rate": 9.902910097609796e-05, "loss": 1.4056, "step": 38859 }, { "epoch": 0.5049679765708374, "grad_norm": 0.37697628140449524, "learning_rate": 9.902650151418657e-05, "loss": 1.3655, "step": 38860 }, { "epoch": 0.5049809711147532, "grad_norm": 0.41419944167137146, "learning_rate": 9.902390205227519e-05, "loss": 1.4111, "step": 38861 }, { "epoch": 0.504993965658669, "grad_norm": 0.39759454131126404, "learning_rate": 9.902130259036379e-05, "loss": 1.4643, "step": 38862 }, { "epoch": 0.505006960202585, "grad_norm": 0.4913485646247864, "learning_rate": 9.901870312845241e-05, "loss": 1.5706, "step": 38863 }, { "epoch": 0.5050199547465009, "grad_norm": 0.3975781500339508, "learning_rate": 9.901610366654102e-05, "loss": 1.3024, "step": 38864 }, { "epoch": 0.5050329492904166, "grad_norm": 0.3487274646759033, "learning_rate": 9.901350420462965e-05, "loss": 1.4192, "step": 38865 }, { "epoch": 0.5050459438343325, "grad_norm": 0.3969731032848358, "learning_rate": 9.901090474271826e-05, "loss": 1.2585, "step": 38866 }, { "epoch": 0.5050589383782484, "grad_norm": 0.3640156686306, "learning_rate": 9.900830528080688e-05, "loss": 1.2446, "step": 38867 }, { "epoch": 0.5050719329221643, "grad_norm": 0.40280553698539734, "learning_rate": 9.90057058188955e-05, "loss": 1.1372, "step": 38868 }, { "epoch": 0.5050849274660801, "grad_norm": 0.39918363094329834, "learning_rate": 9.90031063569841e-05, "loss": 1.302, "step": 38869 }, { "epoch": 0.505097922009996, "grad_norm": 0.3400992155075073, "learning_rate": 9.900050689507272e-05, "loss": 1.5995, "step": 38870 }, { "epoch": 0.5051109165539119, "grad_norm": 0.37101495265960693, "learning_rate": 9.899790743316134e-05, "loss": 1.4047, "step": 38871 }, { "epoch": 0.5051239110978278, "grad_norm": 0.4693084955215454, "learning_rate": 9.899530797124995e-05, "loss": 1.4236, "step": 38872 }, { "epoch": 0.5051369056417436, "grad_norm": 0.3461991846561432, "learning_rate": 9.899270850933858e-05, "loss": 1.4447, "step": 38873 }, { "epoch": 0.5051499001856595, "grad_norm": 0.4144139885902405, "learning_rate": 9.899010904742719e-05, "loss": 1.3334, "step": 38874 }, { "epoch": 0.5051628947295754, "grad_norm": 0.4546304941177368, "learning_rate": 9.89875095855158e-05, "loss": 1.3361, "step": 38875 }, { "epoch": 0.5051758892734913, "grad_norm": 0.32750195264816284, "learning_rate": 9.898491012360442e-05, "loss": 1.2921, "step": 38876 }, { "epoch": 0.5051888838174071, "grad_norm": 0.346220999956131, "learning_rate": 9.898231066169303e-05, "loss": 1.358, "step": 38877 }, { "epoch": 0.505201878361323, "grad_norm": 0.4384554624557495, "learning_rate": 9.897971119978166e-05, "loss": 1.3008, "step": 38878 }, { "epoch": 0.5052148729052389, "grad_norm": 0.405154287815094, "learning_rate": 9.897711173787027e-05, "loss": 1.3757, "step": 38879 }, { "epoch": 0.5052278674491548, "grad_norm": 0.39751484990119934, "learning_rate": 9.897451227595888e-05, "loss": 1.5712, "step": 38880 }, { "epoch": 0.5052408619930706, "grad_norm": 0.37242722511291504, "learning_rate": 9.897191281404749e-05, "loss": 1.4477, "step": 38881 }, { "epoch": 0.5052538565369865, "grad_norm": 0.4137822389602661, "learning_rate": 9.896931335213612e-05, "loss": 1.4915, "step": 38882 }, { "epoch": 0.5052668510809024, "grad_norm": 0.3734016716480255, "learning_rate": 9.896671389022473e-05, "loss": 1.3301, "step": 38883 }, { "epoch": 0.5052798456248183, "grad_norm": 0.4223211109638214, "learning_rate": 9.896411442831335e-05, "loss": 1.5394, "step": 38884 }, { "epoch": 0.5052928401687341, "grad_norm": 0.3834093511104584, "learning_rate": 9.896151496640196e-05, "loss": 1.4788, "step": 38885 }, { "epoch": 0.50530583471265, "grad_norm": 0.40939652919769287, "learning_rate": 9.895891550449057e-05, "loss": 1.3724, "step": 38886 }, { "epoch": 0.5053188292565659, "grad_norm": 0.4255099296569824, "learning_rate": 9.895631604257918e-05, "loss": 1.398, "step": 38887 }, { "epoch": 0.5053318238004818, "grad_norm": 0.4145475924015045, "learning_rate": 9.895371658066781e-05, "loss": 1.3874, "step": 38888 }, { "epoch": 0.5053448183443976, "grad_norm": 0.35068678855895996, "learning_rate": 9.895111711875642e-05, "loss": 1.2846, "step": 38889 }, { "epoch": 0.5053578128883135, "grad_norm": 0.3572659492492676, "learning_rate": 9.894851765684504e-05, "loss": 1.4202, "step": 38890 }, { "epoch": 0.5053708074322294, "grad_norm": 0.4885404109954834, "learning_rate": 9.894591819493366e-05, "loss": 1.4595, "step": 38891 }, { "epoch": 0.5053838019761453, "grad_norm": 0.3293319344520569, "learning_rate": 9.894331873302227e-05, "loss": 1.2981, "step": 38892 }, { "epoch": 0.5053967965200611, "grad_norm": 0.43456119298934937, "learning_rate": 9.894071927111088e-05, "loss": 1.251, "step": 38893 }, { "epoch": 0.505409791063977, "grad_norm": 0.39235758781433105, "learning_rate": 9.89381198091995e-05, "loss": 1.5713, "step": 38894 }, { "epoch": 0.5054227856078929, "grad_norm": 0.39271003007888794, "learning_rate": 9.893552034728811e-05, "loss": 1.4574, "step": 38895 }, { "epoch": 0.5054357801518088, "grad_norm": 0.4353560209274292, "learning_rate": 9.893292088537674e-05, "loss": 1.3783, "step": 38896 }, { "epoch": 0.5054487746957246, "grad_norm": 0.41434329748153687, "learning_rate": 9.893032142346535e-05, "loss": 1.3336, "step": 38897 }, { "epoch": 0.5054617692396405, "grad_norm": 0.3412036597728729, "learning_rate": 9.892772196155396e-05, "loss": 1.1938, "step": 38898 }, { "epoch": 0.5054747637835564, "grad_norm": 0.37865379452705383, "learning_rate": 9.892512249964257e-05, "loss": 1.1599, "step": 38899 }, { "epoch": 0.5054877583274723, "grad_norm": 0.7900471687316895, "learning_rate": 9.89225230377312e-05, "loss": 1.4303, "step": 38900 }, { "epoch": 0.5055007528713881, "grad_norm": 0.42494142055511475, "learning_rate": 9.89199235758198e-05, "loss": 1.3065, "step": 38901 }, { "epoch": 0.505513747415304, "grad_norm": 0.3619062602519989, "learning_rate": 9.891732411390843e-05, "loss": 1.4312, "step": 38902 }, { "epoch": 0.5055267419592199, "grad_norm": 0.4374343752861023, "learning_rate": 9.891472465199704e-05, "loss": 1.2428, "step": 38903 }, { "epoch": 0.5055397365031358, "grad_norm": 0.3707488775253296, "learning_rate": 9.891212519008565e-05, "loss": 1.4428, "step": 38904 }, { "epoch": 0.5055527310470517, "grad_norm": 0.427950918674469, "learning_rate": 9.890952572817426e-05, "loss": 1.349, "step": 38905 }, { "epoch": 0.5055657255909675, "grad_norm": 0.39433446526527405, "learning_rate": 9.890692626626289e-05, "loss": 1.3938, "step": 38906 }, { "epoch": 0.5055787201348834, "grad_norm": 0.44621676206588745, "learning_rate": 9.89043268043515e-05, "loss": 1.5067, "step": 38907 }, { "epoch": 0.5055917146787993, "grad_norm": 0.4836346507072449, "learning_rate": 9.890172734244012e-05, "loss": 1.5258, "step": 38908 }, { "epoch": 0.5056047092227152, "grad_norm": 0.34925729036331177, "learning_rate": 9.889912788052873e-05, "loss": 1.2934, "step": 38909 }, { "epoch": 0.505617703766631, "grad_norm": 0.4168814420700073, "learning_rate": 9.889652841861736e-05, "loss": 1.339, "step": 38910 }, { "epoch": 0.5056306983105469, "grad_norm": 0.311712384223938, "learning_rate": 9.889392895670596e-05, "loss": 1.1879, "step": 38911 }, { "epoch": 0.5056436928544628, "grad_norm": 0.3923894166946411, "learning_rate": 9.889132949479458e-05, "loss": 1.3416, "step": 38912 }, { "epoch": 0.5056566873983787, "grad_norm": 0.32185640931129456, "learning_rate": 9.88887300328832e-05, "loss": 1.3659, "step": 38913 }, { "epoch": 0.5056696819422944, "grad_norm": 0.4125804603099823, "learning_rate": 9.888613057097182e-05, "loss": 1.2537, "step": 38914 }, { "epoch": 0.5056826764862103, "grad_norm": 0.4877341091632843, "learning_rate": 9.888353110906044e-05, "loss": 1.4629, "step": 38915 }, { "epoch": 0.5056956710301262, "grad_norm": 0.25266018509864807, "learning_rate": 9.888093164714905e-05, "loss": 1.1514, "step": 38916 }, { "epoch": 0.5057086655740421, "grad_norm": 0.39433324337005615, "learning_rate": 9.887833218523766e-05, "loss": 1.2624, "step": 38917 }, { "epoch": 0.5057216601179579, "grad_norm": 0.40970510244369507, "learning_rate": 9.887573272332627e-05, "loss": 1.316, "step": 38918 }, { "epoch": 0.5057346546618738, "grad_norm": 0.41925790905952454, "learning_rate": 9.88731332614149e-05, "loss": 1.4473, "step": 38919 }, { "epoch": 0.5057476492057897, "grad_norm": 0.4835989475250244, "learning_rate": 9.887053379950351e-05, "loss": 1.4043, "step": 38920 }, { "epoch": 0.5057606437497056, "grad_norm": 0.4846076965332031, "learning_rate": 9.886793433759213e-05, "loss": 1.2623, "step": 38921 }, { "epoch": 0.5057736382936214, "grad_norm": 0.39049088954925537, "learning_rate": 9.886533487568074e-05, "loss": 1.3817, "step": 38922 }, { "epoch": 0.5057866328375373, "grad_norm": 0.4195057451725006, "learning_rate": 9.886273541376935e-05, "loss": 1.4288, "step": 38923 }, { "epoch": 0.5057996273814532, "grad_norm": 0.34025663137435913, "learning_rate": 9.886013595185797e-05, "loss": 1.206, "step": 38924 }, { "epoch": 0.5058126219253691, "grad_norm": 0.3454870879650116, "learning_rate": 9.885753648994659e-05, "loss": 1.4942, "step": 38925 }, { "epoch": 0.5058256164692849, "grad_norm": 0.4575982391834259, "learning_rate": 9.88549370280352e-05, "loss": 1.3897, "step": 38926 }, { "epoch": 0.5058386110132008, "grad_norm": 0.442023903131485, "learning_rate": 9.885233756612382e-05, "loss": 1.3317, "step": 38927 }, { "epoch": 0.5058516055571167, "grad_norm": 0.32065555453300476, "learning_rate": 9.884973810421244e-05, "loss": 1.2771, "step": 38928 }, { "epoch": 0.5058646001010326, "grad_norm": 0.3846389949321747, "learning_rate": 9.884713864230105e-05, "loss": 1.467, "step": 38929 }, { "epoch": 0.5058775946449484, "grad_norm": 0.4648045003414154, "learning_rate": 9.884453918038966e-05, "loss": 1.3416, "step": 38930 }, { "epoch": 0.5058905891888643, "grad_norm": 0.3404772877693176, "learning_rate": 9.884193971847828e-05, "loss": 1.5463, "step": 38931 }, { "epoch": 0.5059035837327802, "grad_norm": 0.4219919443130493, "learning_rate": 9.883934025656689e-05, "loss": 1.4257, "step": 38932 }, { "epoch": 0.5059165782766961, "grad_norm": 0.6982499957084656, "learning_rate": 9.883674079465552e-05, "loss": 1.3381, "step": 38933 }, { "epoch": 0.5059295728206119, "grad_norm": 0.3580003082752228, "learning_rate": 9.883414133274413e-05, "loss": 1.451, "step": 38934 }, { "epoch": 0.5059425673645278, "grad_norm": 0.39006948471069336, "learning_rate": 9.883154187083274e-05, "loss": 1.3216, "step": 38935 }, { "epoch": 0.5059555619084437, "grad_norm": 0.6378609538078308, "learning_rate": 9.882894240892135e-05, "loss": 1.2982, "step": 38936 }, { "epoch": 0.5059685564523596, "grad_norm": 0.3807947635650635, "learning_rate": 9.882634294700997e-05, "loss": 1.2957, "step": 38937 }, { "epoch": 0.5059815509962754, "grad_norm": 0.4316331446170807, "learning_rate": 9.882374348509859e-05, "loss": 1.4239, "step": 38938 }, { "epoch": 0.5059945455401913, "grad_norm": 0.38596537709236145, "learning_rate": 9.882114402318721e-05, "loss": 1.4194, "step": 38939 }, { "epoch": 0.5060075400841072, "grad_norm": 0.41458335518836975, "learning_rate": 9.881854456127582e-05, "loss": 1.239, "step": 38940 }, { "epoch": 0.5060205346280231, "grad_norm": 0.4952690303325653, "learning_rate": 9.881594509936443e-05, "loss": 1.5313, "step": 38941 }, { "epoch": 0.5060335291719389, "grad_norm": 0.4348819851875305, "learning_rate": 9.881334563745304e-05, "loss": 1.2912, "step": 38942 }, { "epoch": 0.5060465237158548, "grad_norm": 0.4328376054763794, "learning_rate": 9.881074617554167e-05, "loss": 1.5716, "step": 38943 }, { "epoch": 0.5060595182597707, "grad_norm": 0.3950212299823761, "learning_rate": 9.880814671363028e-05, "loss": 1.3277, "step": 38944 }, { "epoch": 0.5060725128036866, "grad_norm": 0.4061471223831177, "learning_rate": 9.88055472517189e-05, "loss": 1.4136, "step": 38945 }, { "epoch": 0.5060855073476024, "grad_norm": 0.35404038429260254, "learning_rate": 9.880294778980751e-05, "loss": 1.4198, "step": 38946 }, { "epoch": 0.5060985018915183, "grad_norm": 0.3556307852268219, "learning_rate": 9.880034832789612e-05, "loss": 1.2953, "step": 38947 }, { "epoch": 0.5061114964354342, "grad_norm": 0.4540856182575226, "learning_rate": 9.879774886598474e-05, "loss": 1.336, "step": 38948 }, { "epoch": 0.5061244909793501, "grad_norm": 0.5268908739089966, "learning_rate": 9.879514940407336e-05, "loss": 1.3452, "step": 38949 }, { "epoch": 0.5061374855232659, "grad_norm": 0.4249366521835327, "learning_rate": 9.879254994216198e-05, "loss": 1.3342, "step": 38950 }, { "epoch": 0.5061504800671818, "grad_norm": 0.42012661695480347, "learning_rate": 9.87899504802506e-05, "loss": 1.349, "step": 38951 }, { "epoch": 0.5061634746110977, "grad_norm": 0.43349936604499817, "learning_rate": 9.878735101833922e-05, "loss": 1.4633, "step": 38952 }, { "epoch": 0.5061764691550136, "grad_norm": 0.4422926902770996, "learning_rate": 9.878475155642782e-05, "loss": 1.3919, "step": 38953 }, { "epoch": 0.5061894636989294, "grad_norm": 0.4255259335041046, "learning_rate": 9.878215209451644e-05, "loss": 1.4353, "step": 38954 }, { "epoch": 0.5062024582428453, "grad_norm": 0.38872551918029785, "learning_rate": 9.877955263260505e-05, "loss": 1.2441, "step": 38955 }, { "epoch": 0.5062154527867612, "grad_norm": 0.4083339273929596, "learning_rate": 9.877695317069368e-05, "loss": 1.4566, "step": 38956 }, { "epoch": 0.5062284473306771, "grad_norm": 0.3543145954608917, "learning_rate": 9.877435370878229e-05, "loss": 1.3937, "step": 38957 }, { "epoch": 0.5062414418745929, "grad_norm": 0.3212784230709076, "learning_rate": 9.877175424687091e-05, "loss": 1.1536, "step": 38958 }, { "epoch": 0.5062544364185088, "grad_norm": 0.25021910667419434, "learning_rate": 9.876915478495951e-05, "loss": 1.3443, "step": 38959 }, { "epoch": 0.5062674309624247, "grad_norm": 0.31177476048469543, "learning_rate": 9.876655532304813e-05, "loss": 1.2439, "step": 38960 }, { "epoch": 0.5062804255063406, "grad_norm": 0.3537243902683258, "learning_rate": 9.876395586113675e-05, "loss": 1.5179, "step": 38961 }, { "epoch": 0.5062934200502563, "grad_norm": 0.44082117080688477, "learning_rate": 9.876135639922537e-05, "loss": 1.4276, "step": 38962 }, { "epoch": 0.5063064145941722, "grad_norm": 0.36659103631973267, "learning_rate": 9.875875693731398e-05, "loss": 1.327, "step": 38963 }, { "epoch": 0.5063194091380882, "grad_norm": 0.3681972622871399, "learning_rate": 9.87561574754026e-05, "loss": 1.4731, "step": 38964 }, { "epoch": 0.506332403682004, "grad_norm": 0.3321286141872406, "learning_rate": 9.87535580134912e-05, "loss": 1.2838, "step": 38965 }, { "epoch": 0.5063453982259198, "grad_norm": 0.4140274226665497, "learning_rate": 9.875095855157983e-05, "loss": 1.5132, "step": 38966 }, { "epoch": 0.5063583927698357, "grad_norm": 0.4781914949417114, "learning_rate": 9.874835908966844e-05, "loss": 1.4204, "step": 38967 }, { "epoch": 0.5063713873137516, "grad_norm": 0.42894309759140015, "learning_rate": 9.874575962775706e-05, "loss": 1.3322, "step": 38968 }, { "epoch": 0.5063843818576675, "grad_norm": 0.3696809411048889, "learning_rate": 9.874316016584567e-05, "loss": 1.0725, "step": 38969 }, { "epoch": 0.5063973764015833, "grad_norm": 0.3757809102535248, "learning_rate": 9.87405607039343e-05, "loss": 1.3738, "step": 38970 }, { "epoch": 0.5064103709454992, "grad_norm": 0.4483136832714081, "learning_rate": 9.873796124202291e-05, "loss": 1.3979, "step": 38971 }, { "epoch": 0.5064233654894151, "grad_norm": 0.3810258209705353, "learning_rate": 9.873536178011152e-05, "loss": 1.3499, "step": 38972 }, { "epoch": 0.506436360033331, "grad_norm": 0.43310102820396423, "learning_rate": 9.873276231820013e-05, "loss": 1.2242, "step": 38973 }, { "epoch": 0.5064493545772468, "grad_norm": 0.5236282348632812, "learning_rate": 9.873016285628876e-05, "loss": 1.3746, "step": 38974 }, { "epoch": 0.5064623491211627, "grad_norm": 0.37277260422706604, "learning_rate": 9.872756339437737e-05, "loss": 1.2893, "step": 38975 }, { "epoch": 0.5064753436650786, "grad_norm": 0.4919889569282532, "learning_rate": 9.872496393246599e-05, "loss": 1.3748, "step": 38976 }, { "epoch": 0.5064883382089945, "grad_norm": 0.47015029191970825, "learning_rate": 9.87223644705546e-05, "loss": 1.3494, "step": 38977 }, { "epoch": 0.5065013327529104, "grad_norm": 0.41589295864105225, "learning_rate": 9.871976500864321e-05, "loss": 1.1958, "step": 38978 }, { "epoch": 0.5065143272968262, "grad_norm": 0.45331794023513794, "learning_rate": 9.871716554673182e-05, "loss": 1.5814, "step": 38979 }, { "epoch": 0.5065273218407421, "grad_norm": 0.3352465033531189, "learning_rate": 9.871456608482045e-05, "loss": 1.3548, "step": 38980 }, { "epoch": 0.506540316384658, "grad_norm": 0.359514981508255, "learning_rate": 9.871196662290906e-05, "loss": 1.2862, "step": 38981 }, { "epoch": 0.5065533109285739, "grad_norm": 0.4791959822177887, "learning_rate": 9.870936716099768e-05, "loss": 1.3639, "step": 38982 }, { "epoch": 0.5065663054724897, "grad_norm": 0.43867239356040955, "learning_rate": 9.87067676990863e-05, "loss": 1.3326, "step": 38983 }, { "epoch": 0.5065793000164056, "grad_norm": 0.44322293996810913, "learning_rate": 9.87041682371749e-05, "loss": 1.3724, "step": 38984 }, { "epoch": 0.5065922945603215, "grad_norm": 0.41319164633750916, "learning_rate": 9.870156877526352e-05, "loss": 1.2863, "step": 38985 }, { "epoch": 0.5066052891042374, "grad_norm": 0.31432044506073, "learning_rate": 9.869896931335214e-05, "loss": 1.2856, "step": 38986 }, { "epoch": 0.5066182836481532, "grad_norm": 0.4204002618789673, "learning_rate": 9.869636985144077e-05, "loss": 1.3697, "step": 38987 }, { "epoch": 0.5066312781920691, "grad_norm": 0.3710818290710449, "learning_rate": 9.869377038952938e-05, "loss": 1.5202, "step": 38988 }, { "epoch": 0.506644272735985, "grad_norm": 0.39185553789138794, "learning_rate": 9.869117092761799e-05, "loss": 1.1378, "step": 38989 }, { "epoch": 0.5066572672799009, "grad_norm": 0.4404742121696472, "learning_rate": 9.86885714657066e-05, "loss": 1.3626, "step": 38990 }, { "epoch": 0.5066702618238167, "grad_norm": 0.3249245882034302, "learning_rate": 9.868597200379522e-05, "loss": 1.2427, "step": 38991 }, { "epoch": 0.5066832563677326, "grad_norm": 0.4008246660232544, "learning_rate": 9.868337254188383e-05, "loss": 1.4032, "step": 38992 }, { "epoch": 0.5066962509116485, "grad_norm": 0.4222026467323303, "learning_rate": 9.868077307997246e-05, "loss": 1.5242, "step": 38993 }, { "epoch": 0.5067092454555644, "grad_norm": 0.5606344938278198, "learning_rate": 9.867817361806107e-05, "loss": 1.4008, "step": 38994 }, { "epoch": 0.5067222399994802, "grad_norm": 0.4248940646648407, "learning_rate": 9.867557415614968e-05, "loss": 1.2444, "step": 38995 }, { "epoch": 0.5067352345433961, "grad_norm": 0.47065213322639465, "learning_rate": 9.867297469423829e-05, "loss": 1.4587, "step": 38996 }, { "epoch": 0.506748229087312, "grad_norm": 0.36539891362190247, "learning_rate": 9.867037523232692e-05, "loss": 1.3329, "step": 38997 }, { "epoch": 0.5067612236312279, "grad_norm": 0.43173322081565857, "learning_rate": 9.866777577041553e-05, "loss": 1.5516, "step": 38998 }, { "epoch": 0.5067742181751437, "grad_norm": 0.37651559710502625, "learning_rate": 9.866517630850415e-05, "loss": 1.2981, "step": 38999 }, { "epoch": 0.5067872127190596, "grad_norm": 0.43880975246429443, "learning_rate": 9.866257684659276e-05, "loss": 1.5334, "step": 39000 }, { "epoch": 0.5068002072629755, "grad_norm": 0.5258310437202454, "learning_rate": 9.865997738468137e-05, "loss": 1.412, "step": 39001 }, { "epoch": 0.5068132018068914, "grad_norm": 0.364282488822937, "learning_rate": 9.865737792276998e-05, "loss": 1.4415, "step": 39002 }, { "epoch": 0.5068261963508072, "grad_norm": 0.425642728805542, "learning_rate": 9.865477846085861e-05, "loss": 1.3678, "step": 39003 }, { "epoch": 0.5068391908947231, "grad_norm": 0.4209429621696472, "learning_rate": 9.865217899894722e-05, "loss": 1.621, "step": 39004 }, { "epoch": 0.506852185438639, "grad_norm": 0.3565278649330139, "learning_rate": 9.864957953703584e-05, "loss": 1.3581, "step": 39005 }, { "epoch": 0.5068651799825549, "grad_norm": 0.44529885053634644, "learning_rate": 9.864698007512445e-05, "loss": 1.415, "step": 39006 }, { "epoch": 0.5068781745264707, "grad_norm": 0.48272332549095154, "learning_rate": 9.864438061321307e-05, "loss": 1.4084, "step": 39007 }, { "epoch": 0.5068911690703866, "grad_norm": 0.4302612543106079, "learning_rate": 9.864178115130168e-05, "loss": 1.2888, "step": 39008 }, { "epoch": 0.5069041636143025, "grad_norm": 0.4196456968784332, "learning_rate": 9.86391816893903e-05, "loss": 1.4415, "step": 39009 }, { "epoch": 0.5069171581582184, "grad_norm": 0.4898987114429474, "learning_rate": 9.863658222747891e-05, "loss": 1.487, "step": 39010 }, { "epoch": 0.5069301527021342, "grad_norm": 0.3473092019557953, "learning_rate": 9.863398276556754e-05, "loss": 1.353, "step": 39011 }, { "epoch": 0.50694314724605, "grad_norm": 0.37998655438423157, "learning_rate": 9.863138330365615e-05, "loss": 1.376, "step": 39012 }, { "epoch": 0.506956141789966, "grad_norm": 0.4746607542037964, "learning_rate": 9.862878384174477e-05, "loss": 1.5109, "step": 39013 }, { "epoch": 0.5069691363338819, "grad_norm": 0.32741448283195496, "learning_rate": 9.862618437983337e-05, "loss": 1.323, "step": 39014 }, { "epoch": 0.5069821308777976, "grad_norm": 0.39243850111961365, "learning_rate": 9.8623584917922e-05, "loss": 1.5047, "step": 39015 }, { "epoch": 0.5069951254217135, "grad_norm": 0.4433901906013489, "learning_rate": 9.86209854560106e-05, "loss": 1.4594, "step": 39016 }, { "epoch": 0.5070081199656294, "grad_norm": 0.3639475405216217, "learning_rate": 9.861838599409923e-05, "loss": 1.4614, "step": 39017 }, { "epoch": 0.5070211145095453, "grad_norm": 0.3475119471549988, "learning_rate": 9.861578653218784e-05, "loss": 1.3216, "step": 39018 }, { "epoch": 0.5070341090534611, "grad_norm": 0.37647977471351624, "learning_rate": 9.861318707027646e-05, "loss": 1.5529, "step": 39019 }, { "epoch": 0.507047103597377, "grad_norm": 0.31217002868652344, "learning_rate": 9.861058760836506e-05, "loss": 1.5192, "step": 39020 }, { "epoch": 0.5070600981412929, "grad_norm": 0.39649656414985657, "learning_rate": 9.860798814645369e-05, "loss": 1.4413, "step": 39021 }, { "epoch": 0.5070730926852088, "grad_norm": 0.26817792654037476, "learning_rate": 9.86053886845423e-05, "loss": 1.3552, "step": 39022 }, { "epoch": 0.5070860872291246, "grad_norm": 0.42410463094711304, "learning_rate": 9.860278922263092e-05, "loss": 1.3251, "step": 39023 }, { "epoch": 0.5070990817730405, "grad_norm": 0.4862684905529022, "learning_rate": 9.860018976071955e-05, "loss": 1.3354, "step": 39024 }, { "epoch": 0.5071120763169564, "grad_norm": 0.384348064661026, "learning_rate": 9.859759029880816e-05, "loss": 1.3511, "step": 39025 }, { "epoch": 0.5071250708608723, "grad_norm": 0.47815629839897156, "learning_rate": 9.859499083689677e-05, "loss": 1.4294, "step": 39026 }, { "epoch": 0.5071380654047881, "grad_norm": 0.26062968373298645, "learning_rate": 9.859239137498538e-05, "loss": 1.1733, "step": 39027 }, { "epoch": 0.507151059948704, "grad_norm": 0.35596537590026855, "learning_rate": 9.8589791913074e-05, "loss": 1.5939, "step": 39028 }, { "epoch": 0.5071640544926199, "grad_norm": 0.47660031914711, "learning_rate": 9.858719245116261e-05, "loss": 1.3364, "step": 39029 }, { "epoch": 0.5071770490365358, "grad_norm": 0.7936979532241821, "learning_rate": 9.858459298925124e-05, "loss": 1.2979, "step": 39030 }, { "epoch": 0.5071900435804516, "grad_norm": 0.4277774691581726, "learning_rate": 9.858199352733985e-05, "loss": 1.2778, "step": 39031 }, { "epoch": 0.5072030381243675, "grad_norm": 0.42370083928108215, "learning_rate": 9.857939406542846e-05, "loss": 1.3735, "step": 39032 }, { "epoch": 0.5072160326682834, "grad_norm": 0.28332576155662537, "learning_rate": 9.857679460351707e-05, "loss": 1.2286, "step": 39033 }, { "epoch": 0.5072290272121993, "grad_norm": 0.4282630980014801, "learning_rate": 9.85741951416057e-05, "loss": 1.3522, "step": 39034 }, { "epoch": 0.5072420217561151, "grad_norm": 0.2657581567764282, "learning_rate": 9.857159567969431e-05, "loss": 1.2845, "step": 39035 }, { "epoch": 0.507255016300031, "grad_norm": 0.5029018521308899, "learning_rate": 9.856899621778293e-05, "loss": 1.3669, "step": 39036 }, { "epoch": 0.5072680108439469, "grad_norm": 0.44701889157295227, "learning_rate": 9.856639675587154e-05, "loss": 1.4461, "step": 39037 }, { "epoch": 0.5072810053878628, "grad_norm": 0.45277971029281616, "learning_rate": 9.856379729396015e-05, "loss": 1.4303, "step": 39038 }, { "epoch": 0.5072939999317786, "grad_norm": 0.3852752447128296, "learning_rate": 9.856119783204876e-05, "loss": 1.4924, "step": 39039 }, { "epoch": 0.5073069944756945, "grad_norm": 0.3843984007835388, "learning_rate": 9.855859837013739e-05, "loss": 1.4405, "step": 39040 }, { "epoch": 0.5073199890196104, "grad_norm": 0.29779189825057983, "learning_rate": 9.8555998908226e-05, "loss": 1.5695, "step": 39041 }, { "epoch": 0.5073329835635263, "grad_norm": 0.5727490186691284, "learning_rate": 9.855339944631462e-05, "loss": 1.3484, "step": 39042 }, { "epoch": 0.5073459781074421, "grad_norm": 0.39712148904800415, "learning_rate": 9.855079998440324e-05, "loss": 1.5144, "step": 39043 }, { "epoch": 0.507358972651358, "grad_norm": 0.4007851779460907, "learning_rate": 9.854820052249185e-05, "loss": 1.3443, "step": 39044 }, { "epoch": 0.5073719671952739, "grad_norm": 0.3793415129184723, "learning_rate": 9.854560106058046e-05, "loss": 1.3422, "step": 39045 }, { "epoch": 0.5073849617391898, "grad_norm": 0.46783238649368286, "learning_rate": 9.854300159866908e-05, "loss": 1.5594, "step": 39046 }, { "epoch": 0.5073979562831056, "grad_norm": 0.38685062527656555, "learning_rate": 9.854040213675769e-05, "loss": 1.4416, "step": 39047 }, { "epoch": 0.5074109508270215, "grad_norm": 0.49881842732429504, "learning_rate": 9.853780267484632e-05, "loss": 1.3938, "step": 39048 }, { "epoch": 0.5074239453709374, "grad_norm": 0.4347364902496338, "learning_rate": 9.853520321293493e-05, "loss": 1.497, "step": 39049 }, { "epoch": 0.5074369399148533, "grad_norm": 0.45195472240448, "learning_rate": 9.853260375102354e-05, "loss": 1.4096, "step": 39050 }, { "epoch": 0.5074499344587691, "grad_norm": 0.37484413385391235, "learning_rate": 9.853000428911215e-05, "loss": 1.4546, "step": 39051 }, { "epoch": 0.507462929002685, "grad_norm": 0.37836262583732605, "learning_rate": 9.852740482720077e-05, "loss": 1.1198, "step": 39052 }, { "epoch": 0.5074759235466009, "grad_norm": 0.32535144686698914, "learning_rate": 9.852480536528939e-05, "loss": 1.4362, "step": 39053 }, { "epoch": 0.5074889180905168, "grad_norm": 0.45424124598503113, "learning_rate": 9.852220590337801e-05, "loss": 1.3379, "step": 39054 }, { "epoch": 0.5075019126344327, "grad_norm": 0.36031848192214966, "learning_rate": 9.851960644146662e-05, "loss": 1.4038, "step": 39055 }, { "epoch": 0.5075149071783485, "grad_norm": 0.40901780128479004, "learning_rate": 9.851700697955523e-05, "loss": 1.5447, "step": 39056 }, { "epoch": 0.5075279017222644, "grad_norm": 0.427380234003067, "learning_rate": 9.851440751764384e-05, "loss": 1.492, "step": 39057 }, { "epoch": 0.5075408962661803, "grad_norm": 0.38958027958869934, "learning_rate": 9.851180805573247e-05, "loss": 1.4365, "step": 39058 }, { "epoch": 0.5075538908100962, "grad_norm": 0.4590432643890381, "learning_rate": 9.850920859382108e-05, "loss": 1.4199, "step": 39059 }, { "epoch": 0.507566885354012, "grad_norm": 0.47250598669052124, "learning_rate": 9.85066091319097e-05, "loss": 1.3995, "step": 39060 }, { "epoch": 0.5075798798979279, "grad_norm": 0.36515259742736816, "learning_rate": 9.850400966999833e-05, "loss": 1.2916, "step": 39061 }, { "epoch": 0.5075928744418438, "grad_norm": 0.3330346345901489, "learning_rate": 9.850141020808692e-05, "loss": 1.5427, "step": 39062 }, { "epoch": 0.5076058689857597, "grad_norm": 0.42624878883361816, "learning_rate": 9.849881074617555e-05, "loss": 1.312, "step": 39063 }, { "epoch": 0.5076188635296754, "grad_norm": 0.44789859652519226, "learning_rate": 9.849621128426416e-05, "loss": 1.23, "step": 39064 }, { "epoch": 0.5076318580735913, "grad_norm": 0.37938860058784485, "learning_rate": 9.849361182235278e-05, "loss": 1.4681, "step": 39065 }, { "epoch": 0.5076448526175072, "grad_norm": 0.3430190682411194, "learning_rate": 9.84910123604414e-05, "loss": 1.4245, "step": 39066 }, { "epoch": 0.5076578471614231, "grad_norm": 0.4929802417755127, "learning_rate": 9.848841289853002e-05, "loss": 1.4092, "step": 39067 }, { "epoch": 0.5076708417053389, "grad_norm": 0.44188693165779114, "learning_rate": 9.848581343661862e-05, "loss": 1.2332, "step": 39068 }, { "epoch": 0.5076838362492548, "grad_norm": 0.391022264957428, "learning_rate": 9.848321397470724e-05, "loss": 1.2075, "step": 39069 }, { "epoch": 0.5076968307931707, "grad_norm": 0.41861647367477417, "learning_rate": 9.848061451279585e-05, "loss": 1.4019, "step": 39070 }, { "epoch": 0.5077098253370866, "grad_norm": 0.3480142056941986, "learning_rate": 9.847801505088448e-05, "loss": 1.4242, "step": 39071 }, { "epoch": 0.5077228198810024, "grad_norm": 0.3744737207889557, "learning_rate": 9.847541558897309e-05, "loss": 1.4627, "step": 39072 }, { "epoch": 0.5077358144249183, "grad_norm": 0.4499123692512512, "learning_rate": 9.847281612706171e-05, "loss": 1.4574, "step": 39073 }, { "epoch": 0.5077488089688342, "grad_norm": 0.34960511326789856, "learning_rate": 9.847021666515032e-05, "loss": 1.3298, "step": 39074 }, { "epoch": 0.5077618035127501, "grad_norm": 0.36144495010375977, "learning_rate": 9.846761720323893e-05, "loss": 1.2475, "step": 39075 }, { "epoch": 0.5077747980566659, "grad_norm": 0.41158127784729004, "learning_rate": 9.846501774132754e-05, "loss": 1.3089, "step": 39076 }, { "epoch": 0.5077877926005818, "grad_norm": 0.33869001269340515, "learning_rate": 9.846241827941617e-05, "loss": 1.4093, "step": 39077 }, { "epoch": 0.5078007871444977, "grad_norm": 0.3813190162181854, "learning_rate": 9.845981881750478e-05, "loss": 1.3357, "step": 39078 }, { "epoch": 0.5078137816884136, "grad_norm": 0.36648333072662354, "learning_rate": 9.84572193555934e-05, "loss": 1.3756, "step": 39079 }, { "epoch": 0.5078267762323294, "grad_norm": 0.4366242289543152, "learning_rate": 9.845461989368202e-05, "loss": 1.3843, "step": 39080 }, { "epoch": 0.5078397707762453, "grad_norm": 0.4592461585998535, "learning_rate": 9.845202043177063e-05, "loss": 1.3387, "step": 39081 }, { "epoch": 0.5078527653201612, "grad_norm": 0.35144972801208496, "learning_rate": 9.844942096985924e-05, "loss": 1.2979, "step": 39082 }, { "epoch": 0.5078657598640771, "grad_norm": 0.4178408086299896, "learning_rate": 9.844682150794786e-05, "loss": 1.2145, "step": 39083 }, { "epoch": 0.5078787544079929, "grad_norm": 0.5404536128044128, "learning_rate": 9.844422204603647e-05, "loss": 1.435, "step": 39084 }, { "epoch": 0.5078917489519088, "grad_norm": 0.42538902163505554, "learning_rate": 9.84416225841251e-05, "loss": 1.5258, "step": 39085 }, { "epoch": 0.5079047434958247, "grad_norm": 0.3881109654903412, "learning_rate": 9.843902312221371e-05, "loss": 1.5943, "step": 39086 }, { "epoch": 0.5079177380397406, "grad_norm": 0.373271107673645, "learning_rate": 9.843642366030232e-05, "loss": 1.4006, "step": 39087 }, { "epoch": 0.5079307325836564, "grad_norm": 0.38096803426742554, "learning_rate": 9.843382419839093e-05, "loss": 1.2663, "step": 39088 }, { "epoch": 0.5079437271275723, "grad_norm": 0.3490661680698395, "learning_rate": 9.843122473647955e-05, "loss": 1.3182, "step": 39089 }, { "epoch": 0.5079567216714882, "grad_norm": 0.3150794804096222, "learning_rate": 9.842862527456817e-05, "loss": 1.4092, "step": 39090 }, { "epoch": 0.5079697162154041, "grad_norm": 0.4496360719203949, "learning_rate": 9.842602581265679e-05, "loss": 1.4594, "step": 39091 }, { "epoch": 0.5079827107593199, "grad_norm": 0.4075656533241272, "learning_rate": 9.84234263507454e-05, "loss": 1.3182, "step": 39092 }, { "epoch": 0.5079957053032358, "grad_norm": 0.4551810920238495, "learning_rate": 9.842082688883401e-05, "loss": 1.3865, "step": 39093 }, { "epoch": 0.5080086998471517, "grad_norm": 0.34509798884391785, "learning_rate": 9.841822742692262e-05, "loss": 1.4242, "step": 39094 }, { "epoch": 0.5080216943910676, "grad_norm": 0.4474751949310303, "learning_rate": 9.841562796501125e-05, "loss": 1.3955, "step": 39095 }, { "epoch": 0.5080346889349834, "grad_norm": 0.3474105894565582, "learning_rate": 9.841302850309986e-05, "loss": 1.3519, "step": 39096 }, { "epoch": 0.5080476834788993, "grad_norm": 0.294693261384964, "learning_rate": 9.841042904118848e-05, "loss": 1.3867, "step": 39097 }, { "epoch": 0.5080606780228152, "grad_norm": 0.37856578826904297, "learning_rate": 9.84078295792771e-05, "loss": 1.3108, "step": 39098 }, { "epoch": 0.5080736725667311, "grad_norm": 0.41657671332359314, "learning_rate": 9.84052301173657e-05, "loss": 1.2933, "step": 39099 }, { "epoch": 0.5080866671106469, "grad_norm": 0.4832209348678589, "learning_rate": 9.840263065545433e-05, "loss": 1.504, "step": 39100 }, { "epoch": 0.5080996616545628, "grad_norm": 0.32037147879600525, "learning_rate": 9.840003119354294e-05, "loss": 1.3887, "step": 39101 }, { "epoch": 0.5081126561984787, "grad_norm": 0.4562925398349762, "learning_rate": 9.839743173163156e-05, "loss": 1.338, "step": 39102 }, { "epoch": 0.5081256507423946, "grad_norm": 0.41976720094680786, "learning_rate": 9.839483226972018e-05, "loss": 1.7332, "step": 39103 }, { "epoch": 0.5081386452863104, "grad_norm": 0.4253847897052765, "learning_rate": 9.839223280780879e-05, "loss": 1.4224, "step": 39104 }, { "epoch": 0.5081516398302263, "grad_norm": 0.3479412794113159, "learning_rate": 9.83896333458974e-05, "loss": 1.5214, "step": 39105 }, { "epoch": 0.5081646343741422, "grad_norm": 0.46863052248954773, "learning_rate": 9.838703388398602e-05, "loss": 1.4047, "step": 39106 }, { "epoch": 0.5081776289180581, "grad_norm": 0.33401528000831604, "learning_rate": 9.838443442207463e-05, "loss": 1.4721, "step": 39107 }, { "epoch": 0.5081906234619739, "grad_norm": 0.42769506573677063, "learning_rate": 9.838183496016326e-05, "loss": 1.5025, "step": 39108 }, { "epoch": 0.5082036180058898, "grad_norm": 0.38392290472984314, "learning_rate": 9.837923549825187e-05, "loss": 1.5524, "step": 39109 }, { "epoch": 0.5082166125498057, "grad_norm": 0.39655613899230957, "learning_rate": 9.837663603634048e-05, "loss": 1.4895, "step": 39110 }, { "epoch": 0.5082296070937216, "grad_norm": 0.24204331636428833, "learning_rate": 9.837403657442909e-05, "loss": 1.3271, "step": 39111 }, { "epoch": 0.5082426016376373, "grad_norm": 0.3175000250339508, "learning_rate": 9.837143711251771e-05, "loss": 1.357, "step": 39112 }, { "epoch": 0.5082555961815532, "grad_norm": 0.3003745675086975, "learning_rate": 9.836883765060633e-05, "loss": 1.3441, "step": 39113 }, { "epoch": 0.5082685907254691, "grad_norm": 0.43871405720710754, "learning_rate": 9.836623818869495e-05, "loss": 1.3118, "step": 39114 }, { "epoch": 0.508281585269385, "grad_norm": 0.37494778633117676, "learning_rate": 9.836363872678356e-05, "loss": 1.3706, "step": 39115 }, { "epoch": 0.5082945798133008, "grad_norm": 0.48025625944137573, "learning_rate": 9.836103926487219e-05, "loss": 1.4312, "step": 39116 }, { "epoch": 0.5083075743572167, "grad_norm": 0.33407461643218994, "learning_rate": 9.835843980296078e-05, "loss": 1.3946, "step": 39117 }, { "epoch": 0.5083205689011326, "grad_norm": 0.3723430633544922, "learning_rate": 9.835584034104941e-05, "loss": 1.422, "step": 39118 }, { "epoch": 0.5083335634450485, "grad_norm": 0.38238099217414856, "learning_rate": 9.835324087913802e-05, "loss": 1.4006, "step": 39119 }, { "epoch": 0.5083465579889643, "grad_norm": 0.33316078782081604, "learning_rate": 9.835064141722664e-05, "loss": 1.159, "step": 39120 }, { "epoch": 0.5083595525328802, "grad_norm": 0.39543792605400085, "learning_rate": 9.834804195531525e-05, "loss": 1.3235, "step": 39121 }, { "epoch": 0.5083725470767961, "grad_norm": 0.39916759729385376, "learning_rate": 9.834544249340388e-05, "loss": 1.5048, "step": 39122 }, { "epoch": 0.508385541620712, "grad_norm": 0.30125388503074646, "learning_rate": 9.834284303149248e-05, "loss": 1.4396, "step": 39123 }, { "epoch": 0.5083985361646278, "grad_norm": 0.38635626435279846, "learning_rate": 9.83402435695811e-05, "loss": 1.4106, "step": 39124 }, { "epoch": 0.5084115307085437, "grad_norm": 0.363315612077713, "learning_rate": 9.833764410766971e-05, "loss": 1.2013, "step": 39125 }, { "epoch": 0.5084245252524596, "grad_norm": 0.25509172677993774, "learning_rate": 9.833504464575834e-05, "loss": 1.3336, "step": 39126 }, { "epoch": 0.5084375197963755, "grad_norm": 0.36476683616638184, "learning_rate": 9.833244518384695e-05, "loss": 1.4105, "step": 39127 }, { "epoch": 0.5084505143402913, "grad_norm": 0.3915826082229614, "learning_rate": 9.832984572193557e-05, "loss": 1.2316, "step": 39128 }, { "epoch": 0.5084635088842072, "grad_norm": 0.4001163840293884, "learning_rate": 9.832724626002417e-05, "loss": 1.4599, "step": 39129 }, { "epoch": 0.5084765034281231, "grad_norm": 0.36212319135665894, "learning_rate": 9.832464679811279e-05, "loss": 1.325, "step": 39130 }, { "epoch": 0.508489497972039, "grad_norm": 0.3813785910606384, "learning_rate": 9.83220473362014e-05, "loss": 1.3975, "step": 39131 }, { "epoch": 0.5085024925159549, "grad_norm": 0.37070780992507935, "learning_rate": 9.831944787429003e-05, "loss": 1.3775, "step": 39132 }, { "epoch": 0.5085154870598707, "grad_norm": 0.4906761646270752, "learning_rate": 9.831684841237864e-05, "loss": 1.4019, "step": 39133 }, { "epoch": 0.5085284816037866, "grad_norm": 0.5181283354759216, "learning_rate": 9.831424895046726e-05, "loss": 1.3807, "step": 39134 }, { "epoch": 0.5085414761477025, "grad_norm": 0.3241029381752014, "learning_rate": 9.831164948855587e-05, "loss": 1.364, "step": 39135 }, { "epoch": 0.5085544706916184, "grad_norm": 0.4425843358039856, "learning_rate": 9.830905002664449e-05, "loss": 1.1851, "step": 39136 }, { "epoch": 0.5085674652355342, "grad_norm": 0.2734488248825073, "learning_rate": 9.830645056473311e-05, "loss": 1.4825, "step": 39137 }, { "epoch": 0.5085804597794501, "grad_norm": 0.38445886969566345, "learning_rate": 9.830385110282172e-05, "loss": 1.2179, "step": 39138 }, { "epoch": 0.508593454323366, "grad_norm": 0.45285069942474365, "learning_rate": 9.830125164091035e-05, "loss": 1.2794, "step": 39139 }, { "epoch": 0.5086064488672819, "grad_norm": 0.43517419695854187, "learning_rate": 9.829865217899896e-05, "loss": 1.3594, "step": 39140 }, { "epoch": 0.5086194434111977, "grad_norm": 0.4396021068096161, "learning_rate": 9.829605271708757e-05, "loss": 1.4293, "step": 39141 }, { "epoch": 0.5086324379551136, "grad_norm": 0.42517006397247314, "learning_rate": 9.829345325517618e-05, "loss": 1.2422, "step": 39142 }, { "epoch": 0.5086454324990295, "grad_norm": 0.3792587220668793, "learning_rate": 9.82908537932648e-05, "loss": 1.3222, "step": 39143 }, { "epoch": 0.5086584270429454, "grad_norm": 0.47380226850509644, "learning_rate": 9.828825433135341e-05, "loss": 1.3948, "step": 39144 }, { "epoch": 0.5086714215868612, "grad_norm": 0.37331247329711914, "learning_rate": 9.828565486944204e-05, "loss": 1.3523, "step": 39145 }, { "epoch": 0.5086844161307771, "grad_norm": 0.3739831745624542, "learning_rate": 9.828305540753065e-05, "loss": 1.1868, "step": 39146 }, { "epoch": 0.508697410674693, "grad_norm": 0.3964419662952423, "learning_rate": 9.828045594561926e-05, "loss": 1.2513, "step": 39147 }, { "epoch": 0.5087104052186089, "grad_norm": 0.39319562911987305, "learning_rate": 9.827785648370787e-05, "loss": 1.3898, "step": 39148 }, { "epoch": 0.5087233997625247, "grad_norm": 0.4910820722579956, "learning_rate": 9.82752570217965e-05, "loss": 1.2613, "step": 39149 }, { "epoch": 0.5087363943064406, "grad_norm": 0.4563422203063965, "learning_rate": 9.82726575598851e-05, "loss": 1.4288, "step": 39150 }, { "epoch": 0.5087493888503565, "grad_norm": 0.4918176829814911, "learning_rate": 9.827005809797373e-05, "loss": 1.505, "step": 39151 }, { "epoch": 0.5087623833942724, "grad_norm": 0.33197033405303955, "learning_rate": 9.826745863606234e-05, "loss": 1.2825, "step": 39152 }, { "epoch": 0.5087753779381882, "grad_norm": 0.3984086215496063, "learning_rate": 9.826485917415095e-05, "loss": 1.3721, "step": 39153 }, { "epoch": 0.5087883724821041, "grad_norm": 0.5225964188575745, "learning_rate": 9.826225971223956e-05, "loss": 1.3568, "step": 39154 }, { "epoch": 0.50880136702602, "grad_norm": 0.45638221502304077, "learning_rate": 9.825966025032819e-05, "loss": 1.3787, "step": 39155 }, { "epoch": 0.5088143615699359, "grad_norm": 0.44852665066719055, "learning_rate": 9.82570607884168e-05, "loss": 1.3562, "step": 39156 }, { "epoch": 0.5088273561138517, "grad_norm": 0.37948957085609436, "learning_rate": 9.825446132650542e-05, "loss": 1.4091, "step": 39157 }, { "epoch": 0.5088403506577676, "grad_norm": 0.20879511535167694, "learning_rate": 9.825186186459403e-05, "loss": 1.1155, "step": 39158 }, { "epoch": 0.5088533452016835, "grad_norm": 0.45720037817955017, "learning_rate": 9.824926240268265e-05, "loss": 1.4597, "step": 39159 }, { "epoch": 0.5088663397455994, "grad_norm": 0.3994595408439636, "learning_rate": 9.824666294077126e-05, "loss": 1.5436, "step": 39160 }, { "epoch": 0.5088793342895152, "grad_norm": 0.3915811777114868, "learning_rate": 9.824406347885988e-05, "loss": 1.3103, "step": 39161 }, { "epoch": 0.508892328833431, "grad_norm": 0.41099846363067627, "learning_rate": 9.824146401694849e-05, "loss": 1.263, "step": 39162 }, { "epoch": 0.508905323377347, "grad_norm": 0.4201980531215668, "learning_rate": 9.823886455503712e-05, "loss": 1.3202, "step": 39163 }, { "epoch": 0.5089183179212629, "grad_norm": 0.41457399725914, "learning_rate": 9.823626509312573e-05, "loss": 1.37, "step": 39164 }, { "epoch": 0.5089313124651786, "grad_norm": 0.31223076581954956, "learning_rate": 9.823366563121434e-05, "loss": 1.4406, "step": 39165 }, { "epoch": 0.5089443070090945, "grad_norm": 0.4344368577003479, "learning_rate": 9.823106616930295e-05, "loss": 1.3944, "step": 39166 }, { "epoch": 0.5089573015530104, "grad_norm": 0.3481143116950989, "learning_rate": 9.822846670739157e-05, "loss": 1.3729, "step": 39167 }, { "epoch": 0.5089702960969263, "grad_norm": 0.41374996304512024, "learning_rate": 9.822586724548018e-05, "loss": 1.4794, "step": 39168 }, { "epoch": 0.5089832906408421, "grad_norm": 0.3319886028766632, "learning_rate": 9.822326778356881e-05, "loss": 1.2633, "step": 39169 }, { "epoch": 0.508996285184758, "grad_norm": 0.4248144030570984, "learning_rate": 9.822066832165742e-05, "loss": 1.4331, "step": 39170 }, { "epoch": 0.5090092797286739, "grad_norm": 0.4264313578605652, "learning_rate": 9.821806885974603e-05, "loss": 1.2653, "step": 39171 }, { "epoch": 0.5090222742725898, "grad_norm": 0.3684108853340149, "learning_rate": 9.821546939783464e-05, "loss": 1.2743, "step": 39172 }, { "epoch": 0.5090352688165056, "grad_norm": 0.4924924373626709, "learning_rate": 9.821286993592327e-05, "loss": 1.4386, "step": 39173 }, { "epoch": 0.5090482633604215, "grad_norm": 0.4306740164756775, "learning_rate": 9.821027047401189e-05, "loss": 1.4667, "step": 39174 }, { "epoch": 0.5090612579043374, "grad_norm": 0.4500599801540375, "learning_rate": 9.82076710121005e-05, "loss": 1.3339, "step": 39175 }, { "epoch": 0.5090742524482533, "grad_norm": 0.34499624371528625, "learning_rate": 9.820507155018913e-05, "loss": 1.2237, "step": 39176 }, { "epoch": 0.5090872469921691, "grad_norm": 0.3862970173358917, "learning_rate": 9.820247208827774e-05, "loss": 1.2948, "step": 39177 }, { "epoch": 0.509100241536085, "grad_norm": 0.33407288789749146, "learning_rate": 9.819987262636635e-05, "loss": 1.4451, "step": 39178 }, { "epoch": 0.5091132360800009, "grad_norm": 0.3596533238887787, "learning_rate": 9.819727316445496e-05, "loss": 1.4126, "step": 39179 }, { "epoch": 0.5091262306239168, "grad_norm": 0.35112443566322327, "learning_rate": 9.819467370254358e-05, "loss": 1.2163, "step": 39180 }, { "epoch": 0.5091392251678326, "grad_norm": 0.30794188380241394, "learning_rate": 9.81920742406322e-05, "loss": 1.1117, "step": 39181 }, { "epoch": 0.5091522197117485, "grad_norm": 0.4689164161682129, "learning_rate": 9.818947477872082e-05, "loss": 1.3546, "step": 39182 }, { "epoch": 0.5091652142556644, "grad_norm": 0.5049583315849304, "learning_rate": 9.818687531680943e-05, "loss": 1.4558, "step": 39183 }, { "epoch": 0.5091782087995803, "grad_norm": 0.45010024309158325, "learning_rate": 9.818427585489804e-05, "loss": 1.1078, "step": 39184 }, { "epoch": 0.5091912033434961, "grad_norm": 0.49955010414123535, "learning_rate": 9.818167639298665e-05, "loss": 1.5761, "step": 39185 }, { "epoch": 0.509204197887412, "grad_norm": 0.38533174991607666, "learning_rate": 9.817907693107528e-05, "loss": 1.4038, "step": 39186 }, { "epoch": 0.5092171924313279, "grad_norm": 0.376058429479599, "learning_rate": 9.817647746916389e-05, "loss": 1.3392, "step": 39187 }, { "epoch": 0.5092301869752438, "grad_norm": 0.4223913252353668, "learning_rate": 9.817387800725251e-05, "loss": 1.3768, "step": 39188 }, { "epoch": 0.5092431815191596, "grad_norm": 0.3299860656261444, "learning_rate": 9.817127854534112e-05, "loss": 1.2803, "step": 39189 }, { "epoch": 0.5092561760630755, "grad_norm": 0.40014559030532837, "learning_rate": 9.816867908342973e-05, "loss": 1.2292, "step": 39190 }, { "epoch": 0.5092691706069914, "grad_norm": 0.4480552673339844, "learning_rate": 9.816607962151834e-05, "loss": 1.3112, "step": 39191 }, { "epoch": 0.5092821651509073, "grad_norm": 0.4934820234775543, "learning_rate": 9.816348015960697e-05, "loss": 1.4381, "step": 39192 }, { "epoch": 0.5092951596948231, "grad_norm": 0.35670486092567444, "learning_rate": 9.816088069769558e-05, "loss": 1.4641, "step": 39193 }, { "epoch": 0.509308154238739, "grad_norm": 0.36212441325187683, "learning_rate": 9.81582812357842e-05, "loss": 1.2408, "step": 39194 }, { "epoch": 0.5093211487826549, "grad_norm": 0.2892414331436157, "learning_rate": 9.815568177387282e-05, "loss": 1.2475, "step": 39195 }, { "epoch": 0.5093341433265708, "grad_norm": 0.33519724011421204, "learning_rate": 9.815308231196143e-05, "loss": 1.4386, "step": 39196 }, { "epoch": 0.5093471378704866, "grad_norm": 0.4160432517528534, "learning_rate": 9.815048285005004e-05, "loss": 1.4025, "step": 39197 }, { "epoch": 0.5093601324144025, "grad_norm": 0.3615199625492096, "learning_rate": 9.814788338813866e-05, "loss": 1.337, "step": 39198 }, { "epoch": 0.5093731269583184, "grad_norm": 0.4740733206272125, "learning_rate": 9.814528392622727e-05, "loss": 1.2486, "step": 39199 }, { "epoch": 0.5093861215022343, "grad_norm": 0.3478700816631317, "learning_rate": 9.81426844643159e-05, "loss": 1.3605, "step": 39200 }, { "epoch": 0.5093991160461501, "grad_norm": 0.42419081926345825, "learning_rate": 9.814008500240451e-05, "loss": 1.4241, "step": 39201 }, { "epoch": 0.509412110590066, "grad_norm": 0.4373956024646759, "learning_rate": 9.813748554049312e-05, "loss": 1.355, "step": 39202 }, { "epoch": 0.5094251051339819, "grad_norm": 0.34196627140045166, "learning_rate": 9.813488607858173e-05, "loss": 1.1621, "step": 39203 }, { "epoch": 0.5094380996778978, "grad_norm": 0.39044463634490967, "learning_rate": 9.813228661667035e-05, "loss": 1.3256, "step": 39204 }, { "epoch": 0.5094510942218136, "grad_norm": 0.414628267288208, "learning_rate": 9.812968715475897e-05, "loss": 1.2813, "step": 39205 }, { "epoch": 0.5094640887657295, "grad_norm": 0.4861219525337219, "learning_rate": 9.812708769284759e-05, "loss": 1.6509, "step": 39206 }, { "epoch": 0.5094770833096454, "grad_norm": 0.41792428493499756, "learning_rate": 9.81244882309362e-05, "loss": 1.651, "step": 39207 }, { "epoch": 0.5094900778535613, "grad_norm": 0.4257180988788605, "learning_rate": 9.812188876902481e-05, "loss": 1.3115, "step": 39208 }, { "epoch": 0.5095030723974772, "grad_norm": 0.46537283062934875, "learning_rate": 9.811928930711342e-05, "loss": 1.5039, "step": 39209 }, { "epoch": 0.509516066941393, "grad_norm": 0.3252220153808594, "learning_rate": 9.811668984520205e-05, "loss": 1.3486, "step": 39210 }, { "epoch": 0.5095290614853089, "grad_norm": 0.4003971815109253, "learning_rate": 9.811409038329067e-05, "loss": 1.2687, "step": 39211 }, { "epoch": 0.5095420560292248, "grad_norm": 0.48370397090911865, "learning_rate": 9.811149092137928e-05, "loss": 1.4693, "step": 39212 }, { "epoch": 0.5095550505731407, "grad_norm": 0.3933558464050293, "learning_rate": 9.810889145946789e-05, "loss": 1.3913, "step": 39213 }, { "epoch": 0.5095680451170564, "grad_norm": 0.36668825149536133, "learning_rate": 9.81062919975565e-05, "loss": 1.1639, "step": 39214 }, { "epoch": 0.5095810396609723, "grad_norm": 0.39010128378868103, "learning_rate": 9.810369253564513e-05, "loss": 1.5945, "step": 39215 }, { "epoch": 0.5095940342048882, "grad_norm": 0.38122034072875977, "learning_rate": 9.810109307373374e-05, "loss": 1.5294, "step": 39216 }, { "epoch": 0.5096070287488041, "grad_norm": 0.47352004051208496, "learning_rate": 9.809849361182236e-05, "loss": 1.5688, "step": 39217 }, { "epoch": 0.5096200232927199, "grad_norm": 0.37617090344429016, "learning_rate": 9.809589414991097e-05, "loss": 1.4715, "step": 39218 }, { "epoch": 0.5096330178366358, "grad_norm": 0.4637976884841919, "learning_rate": 9.80932946879996e-05, "loss": 1.52, "step": 39219 }, { "epoch": 0.5096460123805517, "grad_norm": 0.3982177972793579, "learning_rate": 9.80906952260882e-05, "loss": 1.3725, "step": 39220 }, { "epoch": 0.5096590069244676, "grad_norm": 0.3927983045578003, "learning_rate": 9.808809576417682e-05, "loss": 1.4453, "step": 39221 }, { "epoch": 0.5096720014683834, "grad_norm": 0.46731603145599365, "learning_rate": 9.808549630226543e-05, "loss": 1.4025, "step": 39222 }, { "epoch": 0.5096849960122993, "grad_norm": 0.4086238145828247, "learning_rate": 9.808289684035406e-05, "loss": 1.3231, "step": 39223 }, { "epoch": 0.5096979905562152, "grad_norm": 0.424526572227478, "learning_rate": 9.808029737844267e-05, "loss": 1.5135, "step": 39224 }, { "epoch": 0.5097109851001311, "grad_norm": 0.4239373505115509, "learning_rate": 9.807769791653129e-05, "loss": 1.3703, "step": 39225 }, { "epoch": 0.5097239796440469, "grad_norm": 0.3453957438468933, "learning_rate": 9.807509845461989e-05, "loss": 1.3502, "step": 39226 }, { "epoch": 0.5097369741879628, "grad_norm": 0.4526543617248535, "learning_rate": 9.807249899270851e-05, "loss": 1.4249, "step": 39227 }, { "epoch": 0.5097499687318787, "grad_norm": 0.4349013566970825, "learning_rate": 9.806989953079712e-05, "loss": 1.4725, "step": 39228 }, { "epoch": 0.5097629632757946, "grad_norm": 0.351409375667572, "learning_rate": 9.806730006888575e-05, "loss": 1.3898, "step": 39229 }, { "epoch": 0.5097759578197104, "grad_norm": 0.3883218765258789, "learning_rate": 9.806470060697436e-05, "loss": 1.2212, "step": 39230 }, { "epoch": 0.5097889523636263, "grad_norm": 0.34148740768432617, "learning_rate": 9.806210114506298e-05, "loss": 1.4885, "step": 39231 }, { "epoch": 0.5098019469075422, "grad_norm": 0.32814446091651917, "learning_rate": 9.805950168315158e-05, "loss": 1.352, "step": 39232 }, { "epoch": 0.5098149414514581, "grad_norm": 0.3692649006843567, "learning_rate": 9.80569022212402e-05, "loss": 1.3238, "step": 39233 }, { "epoch": 0.5098279359953739, "grad_norm": 0.39818865060806274, "learning_rate": 9.805430275932882e-05, "loss": 1.357, "step": 39234 }, { "epoch": 0.5098409305392898, "grad_norm": 0.5230275988578796, "learning_rate": 9.805170329741744e-05, "loss": 1.4339, "step": 39235 }, { "epoch": 0.5098539250832057, "grad_norm": 0.4490581452846527, "learning_rate": 9.804910383550605e-05, "loss": 1.4215, "step": 39236 }, { "epoch": 0.5098669196271216, "grad_norm": 0.47048327326774597, "learning_rate": 9.804650437359468e-05, "loss": 1.2651, "step": 39237 }, { "epoch": 0.5098799141710374, "grad_norm": 0.36902928352355957, "learning_rate": 9.804390491168329e-05, "loss": 1.4508, "step": 39238 }, { "epoch": 0.5098929087149533, "grad_norm": 0.36316853761672974, "learning_rate": 9.80413054497719e-05, "loss": 1.2027, "step": 39239 }, { "epoch": 0.5099059032588692, "grad_norm": 0.28990671038627625, "learning_rate": 9.803870598786051e-05, "loss": 1.4653, "step": 39240 }, { "epoch": 0.5099188978027851, "grad_norm": 0.4087803363800049, "learning_rate": 9.803610652594913e-05, "loss": 1.4851, "step": 39241 }, { "epoch": 0.5099318923467009, "grad_norm": 0.40900230407714844, "learning_rate": 9.803350706403775e-05, "loss": 1.4249, "step": 39242 }, { "epoch": 0.5099448868906168, "grad_norm": 0.43276965618133545, "learning_rate": 9.803090760212637e-05, "loss": 1.2779, "step": 39243 }, { "epoch": 0.5099578814345327, "grad_norm": 0.4382627308368683, "learning_rate": 9.802830814021498e-05, "loss": 1.3701, "step": 39244 }, { "epoch": 0.5099708759784486, "grad_norm": 0.3575490415096283, "learning_rate": 9.802570867830359e-05, "loss": 1.3614, "step": 39245 }, { "epoch": 0.5099838705223644, "grad_norm": 0.4890463054180145, "learning_rate": 9.80231092163922e-05, "loss": 1.4129, "step": 39246 }, { "epoch": 0.5099968650662803, "grad_norm": 0.5059266686439514, "learning_rate": 9.802050975448083e-05, "loss": 1.5434, "step": 39247 }, { "epoch": 0.5100098596101962, "grad_norm": 0.3912336826324463, "learning_rate": 9.801791029256945e-05, "loss": 1.2874, "step": 39248 }, { "epoch": 0.5100228541541121, "grad_norm": 0.5042760968208313, "learning_rate": 9.801531083065806e-05, "loss": 1.3362, "step": 39249 }, { "epoch": 0.5100358486980279, "grad_norm": 0.4623928964138031, "learning_rate": 9.801271136874667e-05, "loss": 1.4268, "step": 39250 }, { "epoch": 0.5100488432419438, "grad_norm": 0.36923345923423767, "learning_rate": 9.801011190683528e-05, "loss": 1.3538, "step": 39251 }, { "epoch": 0.5100618377858597, "grad_norm": 0.34344062209129333, "learning_rate": 9.800751244492391e-05, "loss": 1.0877, "step": 39252 }, { "epoch": 0.5100748323297756, "grad_norm": 0.38448214530944824, "learning_rate": 9.800491298301252e-05, "loss": 1.377, "step": 39253 }, { "epoch": 0.5100878268736914, "grad_norm": 0.37683412432670593, "learning_rate": 9.800231352110114e-05, "loss": 1.1389, "step": 39254 }, { "epoch": 0.5101008214176073, "grad_norm": 0.4299337565898895, "learning_rate": 9.799971405918976e-05, "loss": 1.4249, "step": 39255 }, { "epoch": 0.5101138159615232, "grad_norm": 0.3300829529762268, "learning_rate": 9.799711459727837e-05, "loss": 1.6148, "step": 39256 }, { "epoch": 0.5101268105054391, "grad_norm": 0.4856196939945221, "learning_rate": 9.799451513536698e-05, "loss": 1.5518, "step": 39257 }, { "epoch": 0.5101398050493549, "grad_norm": 0.35708701610565186, "learning_rate": 9.79919156734556e-05, "loss": 1.4513, "step": 39258 }, { "epoch": 0.5101527995932708, "grad_norm": 0.3373710811138153, "learning_rate": 9.798931621154421e-05, "loss": 1.325, "step": 39259 }, { "epoch": 0.5101657941371867, "grad_norm": 0.4144609570503235, "learning_rate": 9.798671674963284e-05, "loss": 1.4631, "step": 39260 }, { "epoch": 0.5101787886811026, "grad_norm": 0.33534038066864014, "learning_rate": 9.798411728772145e-05, "loss": 1.4006, "step": 39261 }, { "epoch": 0.5101917832250183, "grad_norm": 0.35719284415245056, "learning_rate": 9.798151782581006e-05, "loss": 1.2885, "step": 39262 }, { "epoch": 0.5102047777689342, "grad_norm": 0.39487341046333313, "learning_rate": 9.797891836389867e-05, "loss": 1.3962, "step": 39263 }, { "epoch": 0.5102177723128501, "grad_norm": 0.45949462056159973, "learning_rate": 9.79763189019873e-05, "loss": 1.5093, "step": 39264 }, { "epoch": 0.510230766856766, "grad_norm": 0.42805638909339905, "learning_rate": 9.79737194400759e-05, "loss": 1.5651, "step": 39265 }, { "epoch": 0.5102437614006818, "grad_norm": 0.4566091299057007, "learning_rate": 9.797111997816453e-05, "loss": 1.5625, "step": 39266 }, { "epoch": 0.5102567559445977, "grad_norm": 0.34924131631851196, "learning_rate": 9.796852051625314e-05, "loss": 1.447, "step": 39267 }, { "epoch": 0.5102697504885136, "grad_norm": 0.5608017444610596, "learning_rate": 9.796592105434175e-05, "loss": 1.4102, "step": 39268 }, { "epoch": 0.5102827450324295, "grad_norm": 0.3222692906856537, "learning_rate": 9.796332159243036e-05, "loss": 1.1683, "step": 39269 }, { "epoch": 0.5102957395763453, "grad_norm": 0.4071926772594452, "learning_rate": 9.796072213051899e-05, "loss": 1.5976, "step": 39270 }, { "epoch": 0.5103087341202612, "grad_norm": 0.36689645051956177, "learning_rate": 9.79581226686076e-05, "loss": 1.3351, "step": 39271 }, { "epoch": 0.5103217286641771, "grad_norm": 0.44225069880485535, "learning_rate": 9.795552320669622e-05, "loss": 1.2521, "step": 39272 }, { "epoch": 0.510334723208093, "grad_norm": 0.4344242215156555, "learning_rate": 9.795292374478483e-05, "loss": 1.3839, "step": 39273 }, { "epoch": 0.5103477177520088, "grad_norm": 0.3886382579803467, "learning_rate": 9.795032428287344e-05, "loss": 1.4903, "step": 39274 }, { "epoch": 0.5103607122959247, "grad_norm": 0.48813384771347046, "learning_rate": 9.794772482096206e-05, "loss": 1.4209, "step": 39275 }, { "epoch": 0.5103737068398406, "grad_norm": 0.4030526280403137, "learning_rate": 9.794512535905068e-05, "loss": 1.5349, "step": 39276 }, { "epoch": 0.5103867013837565, "grad_norm": 0.44391322135925293, "learning_rate": 9.794252589713929e-05, "loss": 1.374, "step": 39277 }, { "epoch": 0.5103996959276723, "grad_norm": 0.4312180280685425, "learning_rate": 9.793992643522792e-05, "loss": 1.3403, "step": 39278 }, { "epoch": 0.5104126904715882, "grad_norm": 0.31165820360183716, "learning_rate": 9.793732697331653e-05, "loss": 1.0907, "step": 39279 }, { "epoch": 0.5104256850155041, "grad_norm": 0.49674123525619507, "learning_rate": 9.793472751140515e-05, "loss": 1.5206, "step": 39280 }, { "epoch": 0.51043867955942, "grad_norm": 0.4100476801395416, "learning_rate": 9.793212804949375e-05, "loss": 1.3613, "step": 39281 }, { "epoch": 0.5104516741033359, "grad_norm": 0.4155726730823517, "learning_rate": 9.792952858758237e-05, "loss": 1.4438, "step": 39282 }, { "epoch": 0.5104646686472517, "grad_norm": 0.38523587584495544, "learning_rate": 9.792692912567098e-05, "loss": 1.6815, "step": 39283 }, { "epoch": 0.5104776631911676, "grad_norm": 0.3895767331123352, "learning_rate": 9.792432966375961e-05, "loss": 1.5873, "step": 39284 }, { "epoch": 0.5104906577350835, "grad_norm": 0.4110410511493683, "learning_rate": 9.792173020184823e-05, "loss": 1.4763, "step": 39285 }, { "epoch": 0.5105036522789994, "grad_norm": 0.2810681462287903, "learning_rate": 9.791913073993684e-05, "loss": 1.3509, "step": 39286 }, { "epoch": 0.5105166468229152, "grad_norm": 0.43798908591270447, "learning_rate": 9.791653127802545e-05, "loss": 1.163, "step": 39287 }, { "epoch": 0.5105296413668311, "grad_norm": 0.3786788284778595, "learning_rate": 9.791393181611407e-05, "loss": 1.3499, "step": 39288 }, { "epoch": 0.510542635910747, "grad_norm": 0.3901594281196594, "learning_rate": 9.791133235420269e-05, "loss": 1.3245, "step": 39289 }, { "epoch": 0.5105556304546629, "grad_norm": 0.3720622956752777, "learning_rate": 9.79087328922913e-05, "loss": 1.2647, "step": 39290 }, { "epoch": 0.5105686249985787, "grad_norm": 0.5036496520042419, "learning_rate": 9.790613343037993e-05, "loss": 1.3893, "step": 39291 }, { "epoch": 0.5105816195424946, "grad_norm": 0.3622131943702698, "learning_rate": 9.790353396846854e-05, "loss": 1.1889, "step": 39292 }, { "epoch": 0.5105946140864105, "grad_norm": 0.33060070872306824, "learning_rate": 9.790093450655715e-05, "loss": 1.6825, "step": 39293 }, { "epoch": 0.5106076086303264, "grad_norm": 0.36880016326904297, "learning_rate": 9.789833504464576e-05, "loss": 1.3704, "step": 39294 }, { "epoch": 0.5106206031742422, "grad_norm": 0.3040933609008789, "learning_rate": 9.789573558273438e-05, "loss": 1.287, "step": 39295 }, { "epoch": 0.5106335977181581, "grad_norm": 0.3780258297920227, "learning_rate": 9.7893136120823e-05, "loss": 1.1544, "step": 39296 }, { "epoch": 0.510646592262074, "grad_norm": 0.33794379234313965, "learning_rate": 9.789053665891162e-05, "loss": 1.3383, "step": 39297 }, { "epoch": 0.5106595868059899, "grad_norm": 0.34207865595817566, "learning_rate": 9.788793719700023e-05, "loss": 1.402, "step": 39298 }, { "epoch": 0.5106725813499057, "grad_norm": 0.3777764141559601, "learning_rate": 9.788533773508884e-05, "loss": 1.5684, "step": 39299 }, { "epoch": 0.5106855758938216, "grad_norm": 0.43486225605010986, "learning_rate": 9.788273827317745e-05, "loss": 1.3576, "step": 39300 }, { "epoch": 0.5106985704377375, "grad_norm": 0.34987571835517883, "learning_rate": 9.788013881126608e-05, "loss": 1.6069, "step": 39301 }, { "epoch": 0.5107115649816534, "grad_norm": 0.37480953335762024, "learning_rate": 9.787753934935469e-05, "loss": 1.0948, "step": 39302 }, { "epoch": 0.5107245595255692, "grad_norm": 0.5006477236747742, "learning_rate": 9.787493988744331e-05, "loss": 1.5047, "step": 39303 }, { "epoch": 0.5107375540694851, "grad_norm": 0.28755712509155273, "learning_rate": 9.787234042553192e-05, "loss": 1.2244, "step": 39304 }, { "epoch": 0.510750548613401, "grad_norm": 0.47323811054229736, "learning_rate": 9.786974096362053e-05, "loss": 1.4976, "step": 39305 }, { "epoch": 0.5107635431573169, "grad_norm": 0.4157203733921051, "learning_rate": 9.786714150170914e-05, "loss": 1.4582, "step": 39306 }, { "epoch": 0.5107765377012327, "grad_norm": 0.4243549704551697, "learning_rate": 9.786454203979777e-05, "loss": 1.5274, "step": 39307 }, { "epoch": 0.5107895322451486, "grad_norm": 0.32249003648757935, "learning_rate": 9.786194257788638e-05, "loss": 1.3357, "step": 39308 }, { "epoch": 0.5108025267890645, "grad_norm": 0.4227658212184906, "learning_rate": 9.7859343115975e-05, "loss": 1.4523, "step": 39309 }, { "epoch": 0.5108155213329804, "grad_norm": 0.3778490424156189, "learning_rate": 9.785674365406361e-05, "loss": 1.3866, "step": 39310 }, { "epoch": 0.5108285158768961, "grad_norm": 0.395142138004303, "learning_rate": 9.785414419215223e-05, "loss": 1.3202, "step": 39311 }, { "epoch": 0.510841510420812, "grad_norm": 0.552781879901886, "learning_rate": 9.785154473024084e-05, "loss": 1.6138, "step": 39312 }, { "epoch": 0.510854504964728, "grad_norm": 0.4020445644855499, "learning_rate": 9.784894526832946e-05, "loss": 1.5534, "step": 39313 }, { "epoch": 0.5108674995086439, "grad_norm": 0.3734736144542694, "learning_rate": 9.784634580641807e-05, "loss": 1.2685, "step": 39314 }, { "epoch": 0.5108804940525596, "grad_norm": 0.34066110849380493, "learning_rate": 9.78437463445067e-05, "loss": 1.2275, "step": 39315 }, { "epoch": 0.5108934885964755, "grad_norm": 0.39267152547836304, "learning_rate": 9.784114688259531e-05, "loss": 1.3644, "step": 39316 }, { "epoch": 0.5109064831403914, "grad_norm": 0.3191804587841034, "learning_rate": 9.783854742068392e-05, "loss": 1.3228, "step": 39317 }, { "epoch": 0.5109194776843073, "grad_norm": 0.41228315234184265, "learning_rate": 9.783594795877253e-05, "loss": 1.5185, "step": 39318 }, { "epoch": 0.5109324722282231, "grad_norm": 0.4741588532924652, "learning_rate": 9.783334849686115e-05, "loss": 1.2785, "step": 39319 }, { "epoch": 0.510945466772139, "grad_norm": 0.3594357669353485, "learning_rate": 9.783074903494976e-05, "loss": 1.2011, "step": 39320 }, { "epoch": 0.5109584613160549, "grad_norm": 0.3708149492740631, "learning_rate": 9.782814957303839e-05, "loss": 1.4429, "step": 39321 }, { "epoch": 0.5109714558599708, "grad_norm": 0.36683204770088196, "learning_rate": 9.782555011112701e-05, "loss": 1.2601, "step": 39322 }, { "epoch": 0.5109844504038866, "grad_norm": 0.35370591282844543, "learning_rate": 9.782295064921561e-05, "loss": 1.4153, "step": 39323 }, { "epoch": 0.5109974449478025, "grad_norm": 0.5464272499084473, "learning_rate": 9.782035118730424e-05, "loss": 1.3843, "step": 39324 }, { "epoch": 0.5110104394917184, "grad_norm": 0.38672149181365967, "learning_rate": 9.781775172539285e-05, "loss": 1.4909, "step": 39325 }, { "epoch": 0.5110234340356343, "grad_norm": 0.37967124581336975, "learning_rate": 9.781515226348147e-05, "loss": 1.4825, "step": 39326 }, { "epoch": 0.5110364285795501, "grad_norm": 0.40882858633995056, "learning_rate": 9.781255280157008e-05, "loss": 1.4778, "step": 39327 }, { "epoch": 0.511049423123466, "grad_norm": 0.43306443095207214, "learning_rate": 9.78099533396587e-05, "loss": 1.2781, "step": 39328 }, { "epoch": 0.5110624176673819, "grad_norm": 0.3639138340950012, "learning_rate": 9.78073538777473e-05, "loss": 1.2938, "step": 39329 }, { "epoch": 0.5110754122112978, "grad_norm": 0.3407760262489319, "learning_rate": 9.780475441583593e-05, "loss": 1.4925, "step": 39330 }, { "epoch": 0.5110884067552136, "grad_norm": 0.4410576820373535, "learning_rate": 9.780215495392454e-05, "loss": 1.5092, "step": 39331 }, { "epoch": 0.5111014012991295, "grad_norm": 0.4604591727256775, "learning_rate": 9.779955549201316e-05, "loss": 1.3982, "step": 39332 }, { "epoch": 0.5111143958430454, "grad_norm": 0.45295262336730957, "learning_rate": 9.779695603010177e-05, "loss": 1.4926, "step": 39333 }, { "epoch": 0.5111273903869613, "grad_norm": 0.5161308646202087, "learning_rate": 9.77943565681904e-05, "loss": 1.4366, "step": 39334 }, { "epoch": 0.5111403849308771, "grad_norm": 0.38867849111557007, "learning_rate": 9.7791757106279e-05, "loss": 1.5089, "step": 39335 }, { "epoch": 0.511153379474793, "grad_norm": 0.3477202355861664, "learning_rate": 9.778915764436762e-05, "loss": 1.2176, "step": 39336 }, { "epoch": 0.5111663740187089, "grad_norm": 0.41309410333633423, "learning_rate": 9.778655818245623e-05, "loss": 1.3876, "step": 39337 }, { "epoch": 0.5111793685626248, "grad_norm": 0.4485919177532196, "learning_rate": 9.778395872054486e-05, "loss": 1.5452, "step": 39338 }, { "epoch": 0.5111923631065406, "grad_norm": 0.4592667520046234, "learning_rate": 9.778135925863347e-05, "loss": 1.3997, "step": 39339 }, { "epoch": 0.5112053576504565, "grad_norm": 0.34632593393325806, "learning_rate": 9.777875979672209e-05, "loss": 1.2485, "step": 39340 }, { "epoch": 0.5112183521943724, "grad_norm": 0.41406524181365967, "learning_rate": 9.77761603348107e-05, "loss": 1.3343, "step": 39341 }, { "epoch": 0.5112313467382883, "grad_norm": 0.26073506474494934, "learning_rate": 9.777356087289931e-05, "loss": 1.1468, "step": 39342 }, { "epoch": 0.5112443412822041, "grad_norm": 0.4435719847679138, "learning_rate": 9.777096141098792e-05, "loss": 1.407, "step": 39343 }, { "epoch": 0.51125733582612, "grad_norm": 0.5298016667366028, "learning_rate": 9.776836194907655e-05, "loss": 1.2463, "step": 39344 }, { "epoch": 0.5112703303700359, "grad_norm": 0.5006462335586548, "learning_rate": 9.776576248716516e-05, "loss": 1.4353, "step": 39345 }, { "epoch": 0.5112833249139518, "grad_norm": 0.4609855115413666, "learning_rate": 9.776316302525378e-05, "loss": 1.3395, "step": 39346 }, { "epoch": 0.5112963194578676, "grad_norm": 0.3615628182888031, "learning_rate": 9.77605635633424e-05, "loss": 1.2369, "step": 39347 }, { "epoch": 0.5113093140017835, "grad_norm": 0.4094845950603485, "learning_rate": 9.7757964101431e-05, "loss": 1.426, "step": 39348 }, { "epoch": 0.5113223085456994, "grad_norm": 0.3444294333457947, "learning_rate": 9.775536463951962e-05, "loss": 1.481, "step": 39349 }, { "epoch": 0.5113353030896153, "grad_norm": 0.38126417994499207, "learning_rate": 9.775276517760824e-05, "loss": 1.4755, "step": 39350 }, { "epoch": 0.5113482976335311, "grad_norm": 0.29789432883262634, "learning_rate": 9.775016571569685e-05, "loss": 1.2326, "step": 39351 }, { "epoch": 0.511361292177447, "grad_norm": 0.3457271456718445, "learning_rate": 9.774756625378548e-05, "loss": 1.1788, "step": 39352 }, { "epoch": 0.5113742867213629, "grad_norm": 0.4097297489643097, "learning_rate": 9.774496679187409e-05, "loss": 1.4295, "step": 39353 }, { "epoch": 0.5113872812652788, "grad_norm": 0.4819372296333313, "learning_rate": 9.77423673299627e-05, "loss": 1.4335, "step": 39354 }, { "epoch": 0.5114002758091946, "grad_norm": 0.29294726252555847, "learning_rate": 9.773976786805131e-05, "loss": 1.2147, "step": 39355 }, { "epoch": 0.5114132703531105, "grad_norm": 0.3596588671207428, "learning_rate": 9.773716840613993e-05, "loss": 1.4775, "step": 39356 }, { "epoch": 0.5114262648970264, "grad_norm": 0.4221736788749695, "learning_rate": 9.773456894422854e-05, "loss": 1.3847, "step": 39357 }, { "epoch": 0.5114392594409423, "grad_norm": 0.3831305205821991, "learning_rate": 9.773196948231717e-05, "loss": 1.344, "step": 39358 }, { "epoch": 0.5114522539848582, "grad_norm": 0.4026622474193573, "learning_rate": 9.772937002040578e-05, "loss": 1.2924, "step": 39359 }, { "epoch": 0.511465248528774, "grad_norm": 0.36827659606933594, "learning_rate": 9.772677055849439e-05, "loss": 1.2902, "step": 39360 }, { "epoch": 0.5114782430726899, "grad_norm": 0.32670995593070984, "learning_rate": 9.772417109658302e-05, "loss": 1.3947, "step": 39361 }, { "epoch": 0.5114912376166058, "grad_norm": 0.28596577048301697, "learning_rate": 9.772157163467163e-05, "loss": 1.1981, "step": 39362 }, { "epoch": 0.5115042321605217, "grad_norm": 0.3686067759990692, "learning_rate": 9.771897217276025e-05, "loss": 1.1774, "step": 39363 }, { "epoch": 0.5115172267044374, "grad_norm": 0.4227554500102997, "learning_rate": 9.771637271084886e-05, "loss": 1.2169, "step": 39364 }, { "epoch": 0.5115302212483533, "grad_norm": 0.39554816484451294, "learning_rate": 9.771377324893747e-05, "loss": 1.3379, "step": 39365 }, { "epoch": 0.5115432157922692, "grad_norm": 0.39307141304016113, "learning_rate": 9.771117378702608e-05, "loss": 1.4063, "step": 39366 }, { "epoch": 0.5115562103361851, "grad_norm": 0.40418288111686707, "learning_rate": 9.770857432511471e-05, "loss": 1.3704, "step": 39367 }, { "epoch": 0.5115692048801009, "grad_norm": 0.40767961740493774, "learning_rate": 9.770597486320332e-05, "loss": 1.4369, "step": 39368 }, { "epoch": 0.5115821994240168, "grad_norm": 0.48716607689857483, "learning_rate": 9.770337540129194e-05, "loss": 1.3882, "step": 39369 }, { "epoch": 0.5115951939679327, "grad_norm": 0.4574127793312073, "learning_rate": 9.770077593938055e-05, "loss": 1.5414, "step": 39370 }, { "epoch": 0.5116081885118486, "grad_norm": 0.35702288150787354, "learning_rate": 9.769817647746917e-05, "loss": 1.2803, "step": 39371 }, { "epoch": 0.5116211830557644, "grad_norm": 0.43470922112464905, "learning_rate": 9.769557701555778e-05, "loss": 1.3974, "step": 39372 }, { "epoch": 0.5116341775996803, "grad_norm": 0.3507862985134125, "learning_rate": 9.76929775536464e-05, "loss": 1.4185, "step": 39373 }, { "epoch": 0.5116471721435962, "grad_norm": 0.4715103805065155, "learning_rate": 9.769037809173501e-05, "loss": 1.5131, "step": 39374 }, { "epoch": 0.5116601666875121, "grad_norm": 0.3318878710269928, "learning_rate": 9.768777862982364e-05, "loss": 1.1944, "step": 39375 }, { "epoch": 0.5116731612314279, "grad_norm": 0.4630686044692993, "learning_rate": 9.768517916791225e-05, "loss": 1.2553, "step": 39376 }, { "epoch": 0.5116861557753438, "grad_norm": 0.43654918670654297, "learning_rate": 9.768257970600086e-05, "loss": 1.3665, "step": 39377 }, { "epoch": 0.5116991503192597, "grad_norm": 0.392220675945282, "learning_rate": 9.767998024408947e-05, "loss": 1.2356, "step": 39378 }, { "epoch": 0.5117121448631756, "grad_norm": 0.49385005235671997, "learning_rate": 9.76773807821781e-05, "loss": 1.5824, "step": 39379 }, { "epoch": 0.5117251394070914, "grad_norm": 0.3665119707584381, "learning_rate": 9.76747813202667e-05, "loss": 1.3948, "step": 39380 }, { "epoch": 0.5117381339510073, "grad_norm": 0.3519100844860077, "learning_rate": 9.767218185835533e-05, "loss": 1.1824, "step": 39381 }, { "epoch": 0.5117511284949232, "grad_norm": 0.44039052724838257, "learning_rate": 9.766958239644394e-05, "loss": 1.3796, "step": 39382 }, { "epoch": 0.5117641230388391, "grad_norm": 0.4043805003166199, "learning_rate": 9.766698293453256e-05, "loss": 1.4108, "step": 39383 }, { "epoch": 0.5117771175827549, "grad_norm": 0.4009416401386261, "learning_rate": 9.766438347262116e-05, "loss": 1.2185, "step": 39384 }, { "epoch": 0.5117901121266708, "grad_norm": 0.39937108755111694, "learning_rate": 9.766178401070979e-05, "loss": 1.3887, "step": 39385 }, { "epoch": 0.5118031066705867, "grad_norm": 0.3723333477973938, "learning_rate": 9.76591845487984e-05, "loss": 1.3458, "step": 39386 }, { "epoch": 0.5118161012145026, "grad_norm": 0.42617493867874146, "learning_rate": 9.765658508688702e-05, "loss": 1.3766, "step": 39387 }, { "epoch": 0.5118290957584184, "grad_norm": 0.4693741798400879, "learning_rate": 9.765398562497563e-05, "loss": 1.3107, "step": 39388 }, { "epoch": 0.5118420903023343, "grad_norm": 0.3658781051635742, "learning_rate": 9.765138616306426e-05, "loss": 1.2589, "step": 39389 }, { "epoch": 0.5118550848462502, "grad_norm": 0.35842278599739075, "learning_rate": 9.764878670115285e-05, "loss": 1.3256, "step": 39390 }, { "epoch": 0.5118680793901661, "grad_norm": 0.46786054968833923, "learning_rate": 9.764618723924148e-05, "loss": 1.5239, "step": 39391 }, { "epoch": 0.5118810739340819, "grad_norm": 0.5196093916893005, "learning_rate": 9.764358777733009e-05, "loss": 1.406, "step": 39392 }, { "epoch": 0.5118940684779978, "grad_norm": 0.5426972508430481, "learning_rate": 9.764098831541871e-05, "loss": 1.3829, "step": 39393 }, { "epoch": 0.5119070630219137, "grad_norm": 0.3667483329772949, "learning_rate": 9.763838885350733e-05, "loss": 1.4969, "step": 39394 }, { "epoch": 0.5119200575658296, "grad_norm": 0.42266958951950073, "learning_rate": 9.763578939159595e-05, "loss": 1.2915, "step": 39395 }, { "epoch": 0.5119330521097454, "grad_norm": 0.3246303200721741, "learning_rate": 9.763318992968456e-05, "loss": 1.2917, "step": 39396 }, { "epoch": 0.5119460466536613, "grad_norm": 0.4075184762477875, "learning_rate": 9.763059046777317e-05, "loss": 1.596, "step": 39397 }, { "epoch": 0.5119590411975772, "grad_norm": 0.34920117259025574, "learning_rate": 9.76279910058618e-05, "loss": 1.4556, "step": 39398 }, { "epoch": 0.5119720357414931, "grad_norm": 0.38396698236465454, "learning_rate": 9.762539154395041e-05, "loss": 1.2795, "step": 39399 }, { "epoch": 0.5119850302854089, "grad_norm": 0.32200586795806885, "learning_rate": 9.762279208203903e-05, "loss": 1.5126, "step": 39400 }, { "epoch": 0.5119980248293248, "grad_norm": 0.3854883313179016, "learning_rate": 9.762019262012764e-05, "loss": 1.3088, "step": 39401 }, { "epoch": 0.5120110193732407, "grad_norm": 0.46787258982658386, "learning_rate": 9.761759315821625e-05, "loss": 1.4954, "step": 39402 }, { "epoch": 0.5120240139171566, "grad_norm": 0.3591521084308624, "learning_rate": 9.761499369630486e-05, "loss": 1.2844, "step": 39403 }, { "epoch": 0.5120370084610724, "grad_norm": 0.32724836468696594, "learning_rate": 9.761239423439349e-05, "loss": 1.2669, "step": 39404 }, { "epoch": 0.5120500030049883, "grad_norm": 0.3592516779899597, "learning_rate": 9.76097947724821e-05, "loss": 1.2995, "step": 39405 }, { "epoch": 0.5120629975489042, "grad_norm": 0.38421496748924255, "learning_rate": 9.760719531057072e-05, "loss": 1.2846, "step": 39406 }, { "epoch": 0.5120759920928201, "grad_norm": 0.4078837037086487, "learning_rate": 9.760459584865934e-05, "loss": 1.3143, "step": 39407 }, { "epoch": 0.5120889866367359, "grad_norm": 0.3870948851108551, "learning_rate": 9.760199638674795e-05, "loss": 1.267, "step": 39408 }, { "epoch": 0.5121019811806518, "grad_norm": 0.4500604271888733, "learning_rate": 9.759939692483656e-05, "loss": 1.3788, "step": 39409 }, { "epoch": 0.5121149757245677, "grad_norm": 0.3979552984237671, "learning_rate": 9.759679746292518e-05, "loss": 1.4474, "step": 39410 }, { "epoch": 0.5121279702684836, "grad_norm": 0.4356045424938202, "learning_rate": 9.759419800101379e-05, "loss": 1.3508, "step": 39411 }, { "epoch": 0.5121409648123993, "grad_norm": 0.37838214635849, "learning_rate": 9.759159853910242e-05, "loss": 1.2991, "step": 39412 }, { "epoch": 0.5121539593563152, "grad_norm": 0.3353853225708008, "learning_rate": 9.758899907719103e-05, "loss": 1.3965, "step": 39413 }, { "epoch": 0.5121669539002311, "grad_norm": 0.36219966411590576, "learning_rate": 9.758639961527964e-05, "loss": 1.2188, "step": 39414 }, { "epoch": 0.512179948444147, "grad_norm": 0.3651082515716553, "learning_rate": 9.758380015336825e-05, "loss": 1.3183, "step": 39415 }, { "epoch": 0.5121929429880628, "grad_norm": 0.4207509160041809, "learning_rate": 9.758120069145687e-05, "loss": 1.4562, "step": 39416 }, { "epoch": 0.5122059375319787, "grad_norm": 0.3921317160129547, "learning_rate": 9.757860122954549e-05, "loss": 1.256, "step": 39417 }, { "epoch": 0.5122189320758946, "grad_norm": 0.47295695543289185, "learning_rate": 9.757600176763411e-05, "loss": 1.3844, "step": 39418 }, { "epoch": 0.5122319266198105, "grad_norm": 0.3705073595046997, "learning_rate": 9.757340230572272e-05, "loss": 1.4691, "step": 39419 }, { "epoch": 0.5122449211637263, "grad_norm": 0.3770361542701721, "learning_rate": 9.757080284381133e-05, "loss": 1.3377, "step": 39420 }, { "epoch": 0.5122579157076422, "grad_norm": 0.34595030546188354, "learning_rate": 9.756820338189994e-05, "loss": 1.4983, "step": 39421 }, { "epoch": 0.5122709102515581, "grad_norm": 0.4200112223625183, "learning_rate": 9.756560391998857e-05, "loss": 1.3209, "step": 39422 }, { "epoch": 0.512283904795474, "grad_norm": 0.44866743683815, "learning_rate": 9.756300445807718e-05, "loss": 1.3751, "step": 39423 }, { "epoch": 0.5122968993393898, "grad_norm": 0.43044301867485046, "learning_rate": 9.75604049961658e-05, "loss": 1.4599, "step": 39424 }, { "epoch": 0.5123098938833057, "grad_norm": 0.31085312366485596, "learning_rate": 9.755780553425441e-05, "loss": 1.3919, "step": 39425 }, { "epoch": 0.5123228884272216, "grad_norm": 0.42447277903556824, "learning_rate": 9.755520607234302e-05, "loss": 1.4848, "step": 39426 }, { "epoch": 0.5123358829711375, "grad_norm": 0.41111063957214355, "learning_rate": 9.755260661043164e-05, "loss": 1.5893, "step": 39427 }, { "epoch": 0.5123488775150533, "grad_norm": 0.3333425223827362, "learning_rate": 9.755000714852026e-05, "loss": 1.183, "step": 39428 }, { "epoch": 0.5123618720589692, "grad_norm": 0.3708946108818054, "learning_rate": 9.754740768660887e-05, "loss": 1.2738, "step": 39429 }, { "epoch": 0.5123748666028851, "grad_norm": 0.4210493564605713, "learning_rate": 9.75448082246975e-05, "loss": 1.3708, "step": 39430 }, { "epoch": 0.512387861146801, "grad_norm": 0.41169503331184387, "learning_rate": 9.75422087627861e-05, "loss": 1.3323, "step": 39431 }, { "epoch": 0.5124008556907168, "grad_norm": 0.41974085569381714, "learning_rate": 9.753960930087472e-05, "loss": 1.5254, "step": 39432 }, { "epoch": 0.5124138502346327, "grad_norm": 0.4018036723136902, "learning_rate": 9.753700983896334e-05, "loss": 1.2555, "step": 39433 }, { "epoch": 0.5124268447785486, "grad_norm": 0.4323458969593048, "learning_rate": 9.753441037705195e-05, "loss": 1.5239, "step": 39434 }, { "epoch": 0.5124398393224645, "grad_norm": 0.48945188522338867, "learning_rate": 9.753181091514058e-05, "loss": 1.3043, "step": 39435 }, { "epoch": 0.5124528338663804, "grad_norm": 0.3842877447605133, "learning_rate": 9.752921145322919e-05, "loss": 1.3522, "step": 39436 }, { "epoch": 0.5124658284102962, "grad_norm": 0.3731060028076172, "learning_rate": 9.752661199131781e-05, "loss": 1.2713, "step": 39437 }, { "epoch": 0.5124788229542121, "grad_norm": 0.4615040123462677, "learning_rate": 9.752401252940642e-05, "loss": 1.3999, "step": 39438 }, { "epoch": 0.512491817498128, "grad_norm": 0.45647865533828735, "learning_rate": 9.752141306749503e-05, "loss": 1.3285, "step": 39439 }, { "epoch": 0.5125048120420439, "grad_norm": 0.4202924072742462, "learning_rate": 9.751881360558365e-05, "loss": 1.4438, "step": 39440 }, { "epoch": 0.5125178065859597, "grad_norm": 0.32791075110435486, "learning_rate": 9.751621414367227e-05, "loss": 1.3588, "step": 39441 }, { "epoch": 0.5125308011298756, "grad_norm": 0.3537735044956207, "learning_rate": 9.751361468176088e-05, "loss": 1.4101, "step": 39442 }, { "epoch": 0.5125437956737915, "grad_norm": 0.4185813367366791, "learning_rate": 9.75110152198495e-05, "loss": 1.3588, "step": 39443 }, { "epoch": 0.5125567902177074, "grad_norm": 0.35989633202552795, "learning_rate": 9.750841575793812e-05, "loss": 1.3411, "step": 39444 }, { "epoch": 0.5125697847616232, "grad_norm": 0.26838019490242004, "learning_rate": 9.750581629602673e-05, "loss": 1.2136, "step": 39445 }, { "epoch": 0.5125827793055391, "grad_norm": 0.5781556367874146, "learning_rate": 9.750321683411534e-05, "loss": 1.462, "step": 39446 }, { "epoch": 0.512595773849455, "grad_norm": 0.37682318687438965, "learning_rate": 9.750061737220396e-05, "loss": 1.2769, "step": 39447 }, { "epoch": 0.5126087683933709, "grad_norm": 0.41718778014183044, "learning_rate": 9.749801791029257e-05, "loss": 1.3063, "step": 39448 }, { "epoch": 0.5126217629372867, "grad_norm": 0.47583910822868347, "learning_rate": 9.74954184483812e-05, "loss": 1.3256, "step": 39449 }, { "epoch": 0.5126347574812026, "grad_norm": 0.42350900173187256, "learning_rate": 9.749281898646981e-05, "loss": 1.5627, "step": 39450 }, { "epoch": 0.5126477520251185, "grad_norm": 0.49644577503204346, "learning_rate": 9.749021952455842e-05, "loss": 1.3146, "step": 39451 }, { "epoch": 0.5126607465690344, "grad_norm": 0.4172748625278473, "learning_rate": 9.748762006264703e-05, "loss": 1.4177, "step": 39452 }, { "epoch": 0.5126737411129502, "grad_norm": 0.5760789513587952, "learning_rate": 9.748502060073566e-05, "loss": 1.324, "step": 39453 }, { "epoch": 0.5126867356568661, "grad_norm": 0.363398015499115, "learning_rate": 9.748242113882427e-05, "loss": 1.5696, "step": 39454 }, { "epoch": 0.512699730200782, "grad_norm": 0.41613560914993286, "learning_rate": 9.747982167691289e-05, "loss": 1.4868, "step": 39455 }, { "epoch": 0.5127127247446979, "grad_norm": 0.4418090879917145, "learning_rate": 9.74772222150015e-05, "loss": 1.3785, "step": 39456 }, { "epoch": 0.5127257192886137, "grad_norm": 0.28069639205932617, "learning_rate": 9.747462275309011e-05, "loss": 1.2093, "step": 39457 }, { "epoch": 0.5127387138325296, "grad_norm": 0.4408092796802521, "learning_rate": 9.747202329117872e-05, "loss": 1.1734, "step": 39458 }, { "epoch": 0.5127517083764455, "grad_norm": 0.3360148072242737, "learning_rate": 9.746942382926735e-05, "loss": 1.2117, "step": 39459 }, { "epoch": 0.5127647029203614, "grad_norm": 0.42431241273880005, "learning_rate": 9.746682436735596e-05, "loss": 1.462, "step": 39460 }, { "epoch": 0.5127776974642771, "grad_norm": 0.4087930917739868, "learning_rate": 9.746422490544458e-05, "loss": 1.4759, "step": 39461 }, { "epoch": 0.512790692008193, "grad_norm": 0.5163681507110596, "learning_rate": 9.74616254435332e-05, "loss": 1.4411, "step": 39462 }, { "epoch": 0.512803686552109, "grad_norm": 0.3801685571670532, "learning_rate": 9.74590259816218e-05, "loss": 1.5252, "step": 39463 }, { "epoch": 0.5128166810960249, "grad_norm": 0.5072066783905029, "learning_rate": 9.745642651971042e-05, "loss": 1.3552, "step": 39464 }, { "epoch": 0.5128296756399406, "grad_norm": 0.3318372070789337, "learning_rate": 9.745382705779904e-05, "loss": 1.4463, "step": 39465 }, { "epoch": 0.5128426701838565, "grad_norm": 0.5001636147499084, "learning_rate": 9.745122759588765e-05, "loss": 1.4276, "step": 39466 }, { "epoch": 0.5128556647277724, "grad_norm": 0.3696518540382385, "learning_rate": 9.744862813397628e-05, "loss": 1.2401, "step": 39467 }, { "epoch": 0.5128686592716883, "grad_norm": 0.29905152320861816, "learning_rate": 9.744602867206489e-05, "loss": 1.1754, "step": 39468 }, { "epoch": 0.5128816538156041, "grad_norm": 0.334574818611145, "learning_rate": 9.74434292101535e-05, "loss": 1.2266, "step": 39469 }, { "epoch": 0.51289464835952, "grad_norm": 0.4141849875450134, "learning_rate": 9.744082974824212e-05, "loss": 1.3044, "step": 39470 }, { "epoch": 0.5129076429034359, "grad_norm": 0.4707565903663635, "learning_rate": 9.743823028633073e-05, "loss": 1.4208, "step": 39471 }, { "epoch": 0.5129206374473518, "grad_norm": 0.2881731390953064, "learning_rate": 9.743563082441936e-05, "loss": 1.2949, "step": 39472 }, { "epoch": 0.5129336319912676, "grad_norm": 0.48114830255508423, "learning_rate": 9.743303136250797e-05, "loss": 1.2387, "step": 39473 }, { "epoch": 0.5129466265351835, "grad_norm": 0.481478214263916, "learning_rate": 9.743043190059658e-05, "loss": 1.2808, "step": 39474 }, { "epoch": 0.5129596210790994, "grad_norm": 0.3560812175273895, "learning_rate": 9.742783243868519e-05, "loss": 1.5284, "step": 39475 }, { "epoch": 0.5129726156230153, "grad_norm": 0.41663411259651184, "learning_rate": 9.742523297677381e-05, "loss": 1.4942, "step": 39476 }, { "epoch": 0.5129856101669311, "grad_norm": 0.38447460532188416, "learning_rate": 9.742263351486243e-05, "loss": 1.4993, "step": 39477 }, { "epoch": 0.512998604710847, "grad_norm": 0.36054709553718567, "learning_rate": 9.742003405295105e-05, "loss": 1.2638, "step": 39478 }, { "epoch": 0.5130115992547629, "grad_norm": 0.4156758785247803, "learning_rate": 9.741743459103966e-05, "loss": 1.431, "step": 39479 }, { "epoch": 0.5130245937986788, "grad_norm": 0.3394497334957123, "learning_rate": 9.741483512912827e-05, "loss": 1.3306, "step": 39480 }, { "epoch": 0.5130375883425946, "grad_norm": 0.46876341104507446, "learning_rate": 9.741223566721688e-05, "loss": 1.5192, "step": 39481 }, { "epoch": 0.5130505828865105, "grad_norm": 0.4184974730014801, "learning_rate": 9.740963620530551e-05, "loss": 1.3944, "step": 39482 }, { "epoch": 0.5130635774304264, "grad_norm": 0.46014732122421265, "learning_rate": 9.740703674339412e-05, "loss": 1.3378, "step": 39483 }, { "epoch": 0.5130765719743423, "grad_norm": 0.37448692321777344, "learning_rate": 9.740443728148274e-05, "loss": 1.3002, "step": 39484 }, { "epoch": 0.5130895665182581, "grad_norm": 0.40476009249687195, "learning_rate": 9.740183781957135e-05, "loss": 1.5501, "step": 39485 }, { "epoch": 0.513102561062174, "grad_norm": 0.41558411717414856, "learning_rate": 9.739923835765998e-05, "loss": 1.4701, "step": 39486 }, { "epoch": 0.5131155556060899, "grad_norm": 0.44448649883270264, "learning_rate": 9.739663889574858e-05, "loss": 1.5139, "step": 39487 }, { "epoch": 0.5131285501500058, "grad_norm": 0.43545255064964294, "learning_rate": 9.73940394338372e-05, "loss": 1.3608, "step": 39488 }, { "epoch": 0.5131415446939216, "grad_norm": 0.42789942026138306, "learning_rate": 9.739143997192581e-05, "loss": 1.362, "step": 39489 }, { "epoch": 0.5131545392378375, "grad_norm": 0.35289517045021057, "learning_rate": 9.738884051001444e-05, "loss": 1.1577, "step": 39490 }, { "epoch": 0.5131675337817534, "grad_norm": 0.3240531384944916, "learning_rate": 9.738624104810305e-05, "loss": 1.4687, "step": 39491 }, { "epoch": 0.5131805283256693, "grad_norm": 0.4367976486682892, "learning_rate": 9.738364158619167e-05, "loss": 1.4106, "step": 39492 }, { "epoch": 0.5131935228695851, "grad_norm": 0.3416028916835785, "learning_rate": 9.738104212428027e-05, "loss": 1.2996, "step": 39493 }, { "epoch": 0.513206517413501, "grad_norm": 0.393548846244812, "learning_rate": 9.737844266236889e-05, "loss": 1.1513, "step": 39494 }, { "epoch": 0.5132195119574169, "grad_norm": 0.4714078903198242, "learning_rate": 9.73758432004575e-05, "loss": 1.5379, "step": 39495 }, { "epoch": 0.5132325065013328, "grad_norm": 0.3536761999130249, "learning_rate": 9.737324373854613e-05, "loss": 1.3223, "step": 39496 }, { "epoch": 0.5132455010452486, "grad_norm": 0.4359242618083954, "learning_rate": 9.737064427663474e-05, "loss": 1.4809, "step": 39497 }, { "epoch": 0.5132584955891645, "grad_norm": 0.38661080598831177, "learning_rate": 9.736804481472336e-05, "loss": 1.3359, "step": 39498 }, { "epoch": 0.5132714901330804, "grad_norm": 0.49218955636024475, "learning_rate": 9.736544535281197e-05, "loss": 1.5116, "step": 39499 }, { "epoch": 0.5132844846769963, "grad_norm": 0.3969157934188843, "learning_rate": 9.736284589090059e-05, "loss": 1.592, "step": 39500 }, { "epoch": 0.5132974792209121, "grad_norm": 0.3856186270713806, "learning_rate": 9.73602464289892e-05, "loss": 1.3175, "step": 39501 }, { "epoch": 0.513310473764828, "grad_norm": 0.6555805206298828, "learning_rate": 9.735764696707782e-05, "loss": 1.4554, "step": 39502 }, { "epoch": 0.5133234683087439, "grad_norm": 0.4810357093811035, "learning_rate": 9.735504750516643e-05, "loss": 1.4785, "step": 39503 }, { "epoch": 0.5133364628526598, "grad_norm": 0.41863593459129333, "learning_rate": 9.735244804325506e-05, "loss": 1.2289, "step": 39504 }, { "epoch": 0.5133494573965756, "grad_norm": 0.4902775287628174, "learning_rate": 9.734984858134367e-05, "loss": 1.4196, "step": 39505 }, { "epoch": 0.5133624519404915, "grad_norm": 0.319583922624588, "learning_rate": 9.734724911943228e-05, "loss": 1.2695, "step": 39506 }, { "epoch": 0.5133754464844074, "grad_norm": 0.43820762634277344, "learning_rate": 9.73446496575209e-05, "loss": 1.582, "step": 39507 }, { "epoch": 0.5133884410283233, "grad_norm": 0.3769552409648895, "learning_rate": 9.734205019560951e-05, "loss": 1.3192, "step": 39508 }, { "epoch": 0.513401435572239, "grad_norm": 0.3626807928085327, "learning_rate": 9.733945073369814e-05, "loss": 1.3745, "step": 39509 }, { "epoch": 0.513414430116155, "grad_norm": 0.3693755567073822, "learning_rate": 9.733685127178675e-05, "loss": 1.3985, "step": 39510 }, { "epoch": 0.5134274246600709, "grad_norm": 0.37674593925476074, "learning_rate": 9.733425180987536e-05, "loss": 1.3443, "step": 39511 }, { "epoch": 0.5134404192039868, "grad_norm": 0.36541858315467834, "learning_rate": 9.733165234796397e-05, "loss": 1.3516, "step": 39512 }, { "epoch": 0.5134534137479027, "grad_norm": 0.3605427145957947, "learning_rate": 9.73290528860526e-05, "loss": 1.2629, "step": 39513 }, { "epoch": 0.5134664082918184, "grad_norm": 0.38582688570022583, "learning_rate": 9.73264534241412e-05, "loss": 1.346, "step": 39514 }, { "epoch": 0.5134794028357343, "grad_norm": 0.3768206834793091, "learning_rate": 9.732385396222983e-05, "loss": 1.5225, "step": 39515 }, { "epoch": 0.5134923973796502, "grad_norm": 0.47960832715034485, "learning_rate": 9.732125450031844e-05, "loss": 1.336, "step": 39516 }, { "epoch": 0.5135053919235661, "grad_norm": 0.3815993368625641, "learning_rate": 9.731865503840705e-05, "loss": 1.3352, "step": 39517 }, { "epoch": 0.5135183864674819, "grad_norm": 0.45172834396362305, "learning_rate": 9.731605557649566e-05, "loss": 1.4784, "step": 39518 }, { "epoch": 0.5135313810113978, "grad_norm": 0.3518627882003784, "learning_rate": 9.731345611458429e-05, "loss": 1.3606, "step": 39519 }, { "epoch": 0.5135443755553137, "grad_norm": 0.38611143827438354, "learning_rate": 9.73108566526729e-05, "loss": 1.1186, "step": 39520 }, { "epoch": 0.5135573700992296, "grad_norm": 0.4205883741378784, "learning_rate": 9.730825719076152e-05, "loss": 1.5008, "step": 39521 }, { "epoch": 0.5135703646431454, "grad_norm": 0.4126805365085602, "learning_rate": 9.730565772885013e-05, "loss": 1.3918, "step": 39522 }, { "epoch": 0.5135833591870613, "grad_norm": 0.35251301527023315, "learning_rate": 9.730305826693875e-05, "loss": 1.2244, "step": 39523 }, { "epoch": 0.5135963537309772, "grad_norm": 0.48083367943763733, "learning_rate": 9.730045880502736e-05, "loss": 1.5146, "step": 39524 }, { "epoch": 0.5136093482748931, "grad_norm": 0.44507211446762085, "learning_rate": 9.729785934311598e-05, "loss": 1.3203, "step": 39525 }, { "epoch": 0.5136223428188089, "grad_norm": 0.4734783172607422, "learning_rate": 9.729525988120459e-05, "loss": 1.3451, "step": 39526 }, { "epoch": 0.5136353373627248, "grad_norm": 0.2835654020309448, "learning_rate": 9.729266041929322e-05, "loss": 1.2581, "step": 39527 }, { "epoch": 0.5136483319066407, "grad_norm": 0.3735317587852478, "learning_rate": 9.729006095738183e-05, "loss": 1.4256, "step": 39528 }, { "epoch": 0.5136613264505566, "grad_norm": 0.4685862958431244, "learning_rate": 9.728746149547044e-05, "loss": 1.3742, "step": 39529 }, { "epoch": 0.5136743209944724, "grad_norm": 0.3782171308994293, "learning_rate": 9.728486203355905e-05, "loss": 1.4356, "step": 39530 }, { "epoch": 0.5136873155383883, "grad_norm": 0.40176665782928467, "learning_rate": 9.728226257164767e-05, "loss": 1.3824, "step": 39531 }, { "epoch": 0.5137003100823042, "grad_norm": 0.3948041796684265, "learning_rate": 9.727966310973628e-05, "loss": 1.3063, "step": 39532 }, { "epoch": 0.5137133046262201, "grad_norm": 0.4475201964378357, "learning_rate": 9.727706364782491e-05, "loss": 1.4033, "step": 39533 }, { "epoch": 0.5137262991701359, "grad_norm": 0.41835859417915344, "learning_rate": 9.727446418591352e-05, "loss": 1.4636, "step": 39534 }, { "epoch": 0.5137392937140518, "grad_norm": 0.39145171642303467, "learning_rate": 9.727186472400213e-05, "loss": 1.4561, "step": 39535 }, { "epoch": 0.5137522882579677, "grad_norm": 0.3844452500343323, "learning_rate": 9.726926526209074e-05, "loss": 1.3852, "step": 39536 }, { "epoch": 0.5137652828018836, "grad_norm": 0.3905602693557739, "learning_rate": 9.726666580017937e-05, "loss": 1.2901, "step": 39537 }, { "epoch": 0.5137782773457994, "grad_norm": 0.40003377199172974, "learning_rate": 9.726406633826798e-05, "loss": 1.3927, "step": 39538 }, { "epoch": 0.5137912718897153, "grad_norm": 0.4175763428211212, "learning_rate": 9.72614668763566e-05, "loss": 1.5071, "step": 39539 }, { "epoch": 0.5138042664336312, "grad_norm": 0.5333362221717834, "learning_rate": 9.725886741444521e-05, "loss": 1.5085, "step": 39540 }, { "epoch": 0.5138172609775471, "grad_norm": 0.4528881311416626, "learning_rate": 9.725626795253384e-05, "loss": 1.3516, "step": 39541 }, { "epoch": 0.5138302555214629, "grad_norm": 0.36816319823265076, "learning_rate": 9.725366849062243e-05, "loss": 1.3222, "step": 39542 }, { "epoch": 0.5138432500653788, "grad_norm": 0.4030925929546356, "learning_rate": 9.725106902871106e-05, "loss": 1.5036, "step": 39543 }, { "epoch": 0.5138562446092947, "grad_norm": 0.44089964032173157, "learning_rate": 9.724846956679968e-05, "loss": 1.3359, "step": 39544 }, { "epoch": 0.5138692391532106, "grad_norm": 0.2526116967201233, "learning_rate": 9.72458701048883e-05, "loss": 1.3223, "step": 39545 }, { "epoch": 0.5138822336971264, "grad_norm": 0.39631563425064087, "learning_rate": 9.724327064297692e-05, "loss": 1.2672, "step": 39546 }, { "epoch": 0.5138952282410423, "grad_norm": 0.440799355506897, "learning_rate": 9.724067118106553e-05, "loss": 1.3313, "step": 39547 }, { "epoch": 0.5139082227849582, "grad_norm": 0.3612401485443115, "learning_rate": 9.723807171915414e-05, "loss": 1.3459, "step": 39548 }, { "epoch": 0.5139212173288741, "grad_norm": 0.4177757203578949, "learning_rate": 9.723547225724275e-05, "loss": 1.3998, "step": 39549 }, { "epoch": 0.5139342118727899, "grad_norm": 0.42302751541137695, "learning_rate": 9.723287279533138e-05, "loss": 1.4905, "step": 39550 }, { "epoch": 0.5139472064167058, "grad_norm": 0.3923909366130829, "learning_rate": 9.723027333341999e-05, "loss": 1.4384, "step": 39551 }, { "epoch": 0.5139602009606217, "grad_norm": 0.45204275846481323, "learning_rate": 9.722767387150861e-05, "loss": 1.5433, "step": 39552 }, { "epoch": 0.5139731955045376, "grad_norm": 0.3555446267127991, "learning_rate": 9.722507440959722e-05, "loss": 1.3435, "step": 39553 }, { "epoch": 0.5139861900484534, "grad_norm": 0.4486521780490875, "learning_rate": 9.722247494768583e-05, "loss": 1.5393, "step": 39554 }, { "epoch": 0.5139991845923693, "grad_norm": 0.41485920548439026, "learning_rate": 9.721987548577444e-05, "loss": 1.5329, "step": 39555 }, { "epoch": 0.5140121791362852, "grad_norm": 0.42809656262397766, "learning_rate": 9.721727602386307e-05, "loss": 1.252, "step": 39556 }, { "epoch": 0.5140251736802011, "grad_norm": 0.3964829742908478, "learning_rate": 9.721467656195168e-05, "loss": 1.3866, "step": 39557 }, { "epoch": 0.5140381682241169, "grad_norm": 0.41932404041290283, "learning_rate": 9.72120771000403e-05, "loss": 1.4595, "step": 39558 }, { "epoch": 0.5140511627680328, "grad_norm": 0.30819523334503174, "learning_rate": 9.720947763812892e-05, "loss": 1.2134, "step": 39559 }, { "epoch": 0.5140641573119487, "grad_norm": 0.4475322365760803, "learning_rate": 9.720687817621753e-05, "loss": 1.4524, "step": 39560 }, { "epoch": 0.5140771518558646, "grad_norm": 0.3976454734802246, "learning_rate": 9.720427871430614e-05, "loss": 1.3617, "step": 39561 }, { "epoch": 0.5140901463997803, "grad_norm": 0.474162220954895, "learning_rate": 9.720167925239476e-05, "loss": 1.3463, "step": 39562 }, { "epoch": 0.5141031409436962, "grad_norm": 0.3635695278644562, "learning_rate": 9.719907979048337e-05, "loss": 1.3694, "step": 39563 }, { "epoch": 0.5141161354876121, "grad_norm": 0.40568265318870544, "learning_rate": 9.7196480328572e-05, "loss": 1.3383, "step": 39564 }, { "epoch": 0.514129130031528, "grad_norm": 0.46171995997428894, "learning_rate": 9.719388086666061e-05, "loss": 1.6526, "step": 39565 }, { "epoch": 0.5141421245754438, "grad_norm": 0.29516011476516724, "learning_rate": 9.719128140474922e-05, "loss": 1.2467, "step": 39566 }, { "epoch": 0.5141551191193597, "grad_norm": 0.3648897111415863, "learning_rate": 9.718868194283783e-05, "loss": 1.2264, "step": 39567 }, { "epoch": 0.5141681136632756, "grad_norm": 0.9092336893081665, "learning_rate": 9.718608248092645e-05, "loss": 1.6395, "step": 39568 }, { "epoch": 0.5141811082071915, "grad_norm": 0.3998500108718872, "learning_rate": 9.718348301901507e-05, "loss": 1.3913, "step": 39569 }, { "epoch": 0.5141941027511073, "grad_norm": 0.39480289816856384, "learning_rate": 9.718088355710369e-05, "loss": 1.3587, "step": 39570 }, { "epoch": 0.5142070972950232, "grad_norm": 0.4250936806201935, "learning_rate": 9.71782840951923e-05, "loss": 1.2375, "step": 39571 }, { "epoch": 0.5142200918389391, "grad_norm": 0.42685559391975403, "learning_rate": 9.717568463328091e-05, "loss": 1.3746, "step": 39572 }, { "epoch": 0.514233086382855, "grad_norm": 0.6645125150680542, "learning_rate": 9.717308517136952e-05, "loss": 1.4749, "step": 39573 }, { "epoch": 0.5142460809267708, "grad_norm": 0.42402684688568115, "learning_rate": 9.717048570945815e-05, "loss": 1.5369, "step": 39574 }, { "epoch": 0.5142590754706867, "grad_norm": 0.38953161239624023, "learning_rate": 9.716788624754676e-05, "loss": 1.4201, "step": 39575 }, { "epoch": 0.5142720700146026, "grad_norm": 0.44474655389785767, "learning_rate": 9.716528678563538e-05, "loss": 1.3513, "step": 39576 }, { "epoch": 0.5142850645585185, "grad_norm": 0.3826843500137329, "learning_rate": 9.7162687323724e-05, "loss": 1.3743, "step": 39577 }, { "epoch": 0.5142980591024343, "grad_norm": 0.4314892888069153, "learning_rate": 9.71600878618126e-05, "loss": 1.3754, "step": 39578 }, { "epoch": 0.5143110536463502, "grad_norm": 0.4289112687110901, "learning_rate": 9.715748839990122e-05, "loss": 1.3172, "step": 39579 }, { "epoch": 0.5143240481902661, "grad_norm": 0.3868785500526428, "learning_rate": 9.715488893798984e-05, "loss": 1.4085, "step": 39580 }, { "epoch": 0.514337042734182, "grad_norm": 0.4201281666755676, "learning_rate": 9.715228947607846e-05, "loss": 1.4202, "step": 39581 }, { "epoch": 0.5143500372780978, "grad_norm": 0.4700254797935486, "learning_rate": 9.714969001416708e-05, "loss": 1.4354, "step": 39582 }, { "epoch": 0.5143630318220137, "grad_norm": 0.4276207983493805, "learning_rate": 9.714709055225569e-05, "loss": 1.3266, "step": 39583 }, { "epoch": 0.5143760263659296, "grad_norm": 0.47100603580474854, "learning_rate": 9.71444910903443e-05, "loss": 1.4775, "step": 39584 }, { "epoch": 0.5143890209098455, "grad_norm": 0.4283423125743866, "learning_rate": 9.714189162843292e-05, "loss": 1.2042, "step": 39585 }, { "epoch": 0.5144020154537613, "grad_norm": 0.38923966884613037, "learning_rate": 9.713929216652153e-05, "loss": 1.3739, "step": 39586 }, { "epoch": 0.5144150099976772, "grad_norm": 0.3972034454345703, "learning_rate": 9.713669270461016e-05, "loss": 1.3613, "step": 39587 }, { "epoch": 0.5144280045415931, "grad_norm": 0.35842642188072205, "learning_rate": 9.713409324269877e-05, "loss": 1.4798, "step": 39588 }, { "epoch": 0.514440999085509, "grad_norm": 0.2264215052127838, "learning_rate": 9.713149378078739e-05, "loss": 1.2238, "step": 39589 }, { "epoch": 0.5144539936294249, "grad_norm": 0.4745984971523285, "learning_rate": 9.712889431887599e-05, "loss": 1.3859, "step": 39590 }, { "epoch": 0.5144669881733407, "grad_norm": 0.4654080271720886, "learning_rate": 9.712629485696461e-05, "loss": 1.5567, "step": 39591 }, { "epoch": 0.5144799827172566, "grad_norm": 0.41530296206474304, "learning_rate": 9.712369539505323e-05, "loss": 1.4064, "step": 39592 }, { "epoch": 0.5144929772611725, "grad_norm": 0.46620091795921326, "learning_rate": 9.712109593314185e-05, "loss": 1.4118, "step": 39593 }, { "epoch": 0.5145059718050884, "grad_norm": 0.4257492125034332, "learning_rate": 9.711849647123046e-05, "loss": 1.53, "step": 39594 }, { "epoch": 0.5145189663490042, "grad_norm": 0.34607985615730286, "learning_rate": 9.711589700931909e-05, "loss": 1.4411, "step": 39595 }, { "epoch": 0.5145319608929201, "grad_norm": 0.35827481746673584, "learning_rate": 9.711329754740768e-05, "loss": 1.4432, "step": 39596 }, { "epoch": 0.514544955436836, "grad_norm": 0.37934985756874084, "learning_rate": 9.711069808549631e-05, "loss": 1.5502, "step": 39597 }, { "epoch": 0.5145579499807519, "grad_norm": 0.33695855736732483, "learning_rate": 9.710809862358492e-05, "loss": 1.0542, "step": 39598 }, { "epoch": 0.5145709445246677, "grad_norm": 0.41489583253860474, "learning_rate": 9.710549916167354e-05, "loss": 1.2033, "step": 39599 }, { "epoch": 0.5145839390685836, "grad_norm": 0.5226443409919739, "learning_rate": 9.710289969976215e-05, "loss": 1.5031, "step": 39600 }, { "epoch": 0.5145969336124995, "grad_norm": 0.488183856010437, "learning_rate": 9.710030023785078e-05, "loss": 1.4077, "step": 39601 }, { "epoch": 0.5146099281564154, "grad_norm": 0.4549994170665741, "learning_rate": 9.709770077593939e-05, "loss": 1.4703, "step": 39602 }, { "epoch": 0.5146229227003312, "grad_norm": 0.46214890480041504, "learning_rate": 9.7095101314028e-05, "loss": 1.4448, "step": 39603 }, { "epoch": 0.5146359172442471, "grad_norm": 0.35513556003570557, "learning_rate": 9.709250185211661e-05, "loss": 1.1875, "step": 39604 }, { "epoch": 0.514648911788163, "grad_norm": 0.441771924495697, "learning_rate": 9.708990239020524e-05, "loss": 1.4226, "step": 39605 }, { "epoch": 0.5146619063320789, "grad_norm": 0.3566654622554779, "learning_rate": 9.708730292829385e-05, "loss": 1.4356, "step": 39606 }, { "epoch": 0.5146749008759947, "grad_norm": 0.4244002103805542, "learning_rate": 9.708470346638247e-05, "loss": 1.3938, "step": 39607 }, { "epoch": 0.5146878954199106, "grad_norm": 0.46922242641448975, "learning_rate": 9.708210400447108e-05, "loss": 1.5661, "step": 39608 }, { "epoch": 0.5147008899638265, "grad_norm": 0.34383782744407654, "learning_rate": 9.707950454255969e-05, "loss": 1.3873, "step": 39609 }, { "epoch": 0.5147138845077424, "grad_norm": 0.3621924817562103, "learning_rate": 9.70769050806483e-05, "loss": 1.4737, "step": 39610 }, { "epoch": 0.5147268790516581, "grad_norm": 0.41974350810050964, "learning_rate": 9.707430561873693e-05, "loss": 1.2503, "step": 39611 }, { "epoch": 0.514739873595574, "grad_norm": 0.4380539655685425, "learning_rate": 9.707170615682554e-05, "loss": 1.4793, "step": 39612 }, { "epoch": 0.51475286813949, "grad_norm": 0.4075803756713867, "learning_rate": 9.706910669491416e-05, "loss": 1.3166, "step": 39613 }, { "epoch": 0.5147658626834059, "grad_norm": 0.3006149232387543, "learning_rate": 9.706650723300277e-05, "loss": 1.1098, "step": 39614 }, { "epoch": 0.5147788572273216, "grad_norm": 0.48501020669937134, "learning_rate": 9.706390777109139e-05, "loss": 1.2855, "step": 39615 }, { "epoch": 0.5147918517712375, "grad_norm": 0.3923147916793823, "learning_rate": 9.706130830918e-05, "loss": 1.3287, "step": 39616 }, { "epoch": 0.5148048463151534, "grad_norm": 0.33656594157218933, "learning_rate": 9.705870884726862e-05, "loss": 1.3181, "step": 39617 }, { "epoch": 0.5148178408590693, "grad_norm": 0.3882770240306854, "learning_rate": 9.705610938535724e-05, "loss": 1.3012, "step": 39618 }, { "epoch": 0.5148308354029851, "grad_norm": 0.3996526598930359, "learning_rate": 9.705350992344586e-05, "loss": 1.2602, "step": 39619 }, { "epoch": 0.514843829946901, "grad_norm": 0.4422810971736908, "learning_rate": 9.705091046153447e-05, "loss": 1.3228, "step": 39620 }, { "epoch": 0.5148568244908169, "grad_norm": 0.4335898756980896, "learning_rate": 9.704831099962308e-05, "loss": 1.3476, "step": 39621 }, { "epoch": 0.5148698190347328, "grad_norm": 0.4514157474040985, "learning_rate": 9.70457115377117e-05, "loss": 1.398, "step": 39622 }, { "epoch": 0.5148828135786486, "grad_norm": 0.448866069316864, "learning_rate": 9.704311207580031e-05, "loss": 1.5592, "step": 39623 }, { "epoch": 0.5148958081225645, "grad_norm": 0.42010459303855896, "learning_rate": 9.704051261388894e-05, "loss": 1.5571, "step": 39624 }, { "epoch": 0.5149088026664804, "grad_norm": 0.4195231795310974, "learning_rate": 9.703791315197755e-05, "loss": 1.4552, "step": 39625 }, { "epoch": 0.5149217972103963, "grad_norm": 0.4092920124530792, "learning_rate": 9.703531369006616e-05, "loss": 1.429, "step": 39626 }, { "epoch": 0.5149347917543121, "grad_norm": 0.43227365612983704, "learning_rate": 9.703271422815477e-05, "loss": 1.3812, "step": 39627 }, { "epoch": 0.514947786298228, "grad_norm": 0.4294111132621765, "learning_rate": 9.70301147662434e-05, "loss": 1.397, "step": 39628 }, { "epoch": 0.5149607808421439, "grad_norm": 0.4405810534954071, "learning_rate": 9.7027515304332e-05, "loss": 1.6005, "step": 39629 }, { "epoch": 0.5149737753860598, "grad_norm": 0.3107149302959442, "learning_rate": 9.702491584242063e-05, "loss": 1.2027, "step": 39630 }, { "epoch": 0.5149867699299756, "grad_norm": 0.49265652894973755, "learning_rate": 9.702231638050924e-05, "loss": 1.3595, "step": 39631 }, { "epoch": 0.5149997644738915, "grad_norm": 0.4751925766468048, "learning_rate": 9.701971691859785e-05, "loss": 1.3913, "step": 39632 }, { "epoch": 0.5150127590178074, "grad_norm": 0.3191274404525757, "learning_rate": 9.701711745668646e-05, "loss": 1.1649, "step": 39633 }, { "epoch": 0.5150257535617233, "grad_norm": 0.403568834066391, "learning_rate": 9.701451799477509e-05, "loss": 1.4818, "step": 39634 }, { "epoch": 0.5150387481056391, "grad_norm": 0.4050021171569824, "learning_rate": 9.70119185328637e-05, "loss": 1.4224, "step": 39635 }, { "epoch": 0.515051742649555, "grad_norm": 0.4036855101585388, "learning_rate": 9.700931907095232e-05, "loss": 1.598, "step": 39636 }, { "epoch": 0.5150647371934709, "grad_norm": 0.6103993654251099, "learning_rate": 9.700671960904093e-05, "loss": 1.5138, "step": 39637 }, { "epoch": 0.5150777317373868, "grad_norm": 0.4858066141605377, "learning_rate": 9.700412014712954e-05, "loss": 1.3041, "step": 39638 }, { "epoch": 0.5150907262813026, "grad_norm": 0.4496895968914032, "learning_rate": 9.700152068521816e-05, "loss": 1.4757, "step": 39639 }, { "epoch": 0.5151037208252185, "grad_norm": 0.3962342441082001, "learning_rate": 9.699892122330678e-05, "loss": 1.3397, "step": 39640 }, { "epoch": 0.5151167153691344, "grad_norm": 0.4005768895149231, "learning_rate": 9.699632176139539e-05, "loss": 1.2683, "step": 39641 }, { "epoch": 0.5151297099130503, "grad_norm": 0.4648260474205017, "learning_rate": 9.699372229948402e-05, "loss": 1.3596, "step": 39642 }, { "epoch": 0.5151427044569661, "grad_norm": 0.269114226102829, "learning_rate": 9.699112283757263e-05, "loss": 1.5445, "step": 39643 }, { "epoch": 0.515155699000882, "grad_norm": 0.3677331209182739, "learning_rate": 9.698852337566125e-05, "loss": 1.1855, "step": 39644 }, { "epoch": 0.5151686935447979, "grad_norm": 0.38550108671188354, "learning_rate": 9.698592391374985e-05, "loss": 1.3194, "step": 39645 }, { "epoch": 0.5151816880887138, "grad_norm": 0.424008309841156, "learning_rate": 9.698332445183847e-05, "loss": 1.3432, "step": 39646 }, { "epoch": 0.5151946826326296, "grad_norm": 0.38239532709121704, "learning_rate": 9.698072498992708e-05, "loss": 1.2656, "step": 39647 }, { "epoch": 0.5152076771765455, "grad_norm": 0.4066025912761688, "learning_rate": 9.697812552801571e-05, "loss": 1.3697, "step": 39648 }, { "epoch": 0.5152206717204614, "grad_norm": 0.46402406692504883, "learning_rate": 9.697552606610432e-05, "loss": 1.466, "step": 39649 }, { "epoch": 0.5152336662643773, "grad_norm": 0.370604008436203, "learning_rate": 9.697292660419294e-05, "loss": 1.1553, "step": 39650 }, { "epoch": 0.5152466608082931, "grad_norm": 0.33773642778396606, "learning_rate": 9.697032714228154e-05, "loss": 1.3052, "step": 39651 }, { "epoch": 0.515259655352209, "grad_norm": 0.4245837926864624, "learning_rate": 9.696772768037017e-05, "loss": 1.1692, "step": 39652 }, { "epoch": 0.5152726498961249, "grad_norm": 0.37763452529907227, "learning_rate": 9.696512821845878e-05, "loss": 1.3546, "step": 39653 }, { "epoch": 0.5152856444400408, "grad_norm": 0.3826252818107605, "learning_rate": 9.69625287565474e-05, "loss": 1.3515, "step": 39654 }, { "epoch": 0.5152986389839566, "grad_norm": 0.40364518761634827, "learning_rate": 9.695992929463603e-05, "loss": 1.3175, "step": 39655 }, { "epoch": 0.5153116335278725, "grad_norm": 0.3601331114768982, "learning_rate": 9.695732983272464e-05, "loss": 1.3983, "step": 39656 }, { "epoch": 0.5153246280717884, "grad_norm": 0.4910103678703308, "learning_rate": 9.695473037081325e-05, "loss": 1.4178, "step": 39657 }, { "epoch": 0.5153376226157043, "grad_norm": 0.3635617196559906, "learning_rate": 9.695213090890186e-05, "loss": 1.4992, "step": 39658 }, { "epoch": 0.51535061715962, "grad_norm": 0.4244706630706787, "learning_rate": 9.694953144699048e-05, "loss": 1.3825, "step": 39659 }, { "epoch": 0.515363611703536, "grad_norm": 0.4413972795009613, "learning_rate": 9.69469319850791e-05, "loss": 1.2561, "step": 39660 }, { "epoch": 0.5153766062474519, "grad_norm": 0.4252251088619232, "learning_rate": 9.694433252316772e-05, "loss": 1.5165, "step": 39661 }, { "epoch": 0.5153896007913678, "grad_norm": 0.38003408908843994, "learning_rate": 9.694173306125633e-05, "loss": 1.4168, "step": 39662 }, { "epoch": 0.5154025953352837, "grad_norm": 0.4139915108680725, "learning_rate": 9.693913359934494e-05, "loss": 1.34, "step": 39663 }, { "epoch": 0.5154155898791994, "grad_norm": 0.41516634821891785, "learning_rate": 9.693653413743355e-05, "loss": 1.3181, "step": 39664 }, { "epoch": 0.5154285844231153, "grad_norm": 0.40572717785835266, "learning_rate": 9.693393467552218e-05, "loss": 1.3159, "step": 39665 }, { "epoch": 0.5154415789670312, "grad_norm": 0.43985527753829956, "learning_rate": 9.693133521361079e-05, "loss": 1.3505, "step": 39666 }, { "epoch": 0.5154545735109471, "grad_norm": 0.38294097781181335, "learning_rate": 9.692873575169941e-05, "loss": 1.4556, "step": 39667 }, { "epoch": 0.5154675680548629, "grad_norm": 0.40433546900749207, "learning_rate": 9.692613628978802e-05, "loss": 1.2838, "step": 39668 }, { "epoch": 0.5154805625987788, "grad_norm": 0.33904653787612915, "learning_rate": 9.692353682787663e-05, "loss": 1.4997, "step": 39669 }, { "epoch": 0.5154935571426947, "grad_norm": 0.4727816581726074, "learning_rate": 9.692093736596524e-05, "loss": 1.3736, "step": 39670 }, { "epoch": 0.5155065516866106, "grad_norm": 0.4772983193397522, "learning_rate": 9.691833790405387e-05, "loss": 1.3239, "step": 39671 }, { "epoch": 0.5155195462305264, "grad_norm": 0.295014888048172, "learning_rate": 9.691573844214248e-05, "loss": 1.1912, "step": 39672 }, { "epoch": 0.5155325407744423, "grad_norm": 0.3867585361003876, "learning_rate": 9.69131389802311e-05, "loss": 1.4461, "step": 39673 }, { "epoch": 0.5155455353183582, "grad_norm": 0.39707034826278687, "learning_rate": 9.691053951831971e-05, "loss": 1.4088, "step": 39674 }, { "epoch": 0.5155585298622741, "grad_norm": 0.27397042512893677, "learning_rate": 9.690794005640833e-05, "loss": 1.1664, "step": 39675 }, { "epoch": 0.5155715244061899, "grad_norm": 0.39887845516204834, "learning_rate": 9.690534059449694e-05, "loss": 1.4501, "step": 39676 }, { "epoch": 0.5155845189501058, "grad_norm": 0.3815763592720032, "learning_rate": 9.690274113258556e-05, "loss": 1.277, "step": 39677 }, { "epoch": 0.5155975134940217, "grad_norm": 0.44615644216537476, "learning_rate": 9.690014167067417e-05, "loss": 1.4794, "step": 39678 }, { "epoch": 0.5156105080379376, "grad_norm": 0.4535128176212311, "learning_rate": 9.68975422087628e-05, "loss": 1.3566, "step": 39679 }, { "epoch": 0.5156235025818534, "grad_norm": 0.37491074204444885, "learning_rate": 9.689494274685141e-05, "loss": 1.1551, "step": 39680 }, { "epoch": 0.5156364971257693, "grad_norm": 0.3964325785636902, "learning_rate": 9.689234328494002e-05, "loss": 1.4746, "step": 39681 }, { "epoch": 0.5156494916696852, "grad_norm": 0.4339883327484131, "learning_rate": 9.688974382302863e-05, "loss": 1.4004, "step": 39682 }, { "epoch": 0.5156624862136011, "grad_norm": 0.42420902848243713, "learning_rate": 9.688714436111725e-05, "loss": 1.343, "step": 39683 }, { "epoch": 0.5156754807575169, "grad_norm": 0.4737933874130249, "learning_rate": 9.688454489920586e-05, "loss": 1.5135, "step": 39684 }, { "epoch": 0.5156884753014328, "grad_norm": 0.39823395013809204, "learning_rate": 9.688194543729449e-05, "loss": 1.3754, "step": 39685 }, { "epoch": 0.5157014698453487, "grad_norm": 0.3858322203159332, "learning_rate": 9.68793459753831e-05, "loss": 1.455, "step": 39686 }, { "epoch": 0.5157144643892646, "grad_norm": 0.385077565908432, "learning_rate": 9.687674651347171e-05, "loss": 1.3362, "step": 39687 }, { "epoch": 0.5157274589331804, "grad_norm": 0.3967994153499603, "learning_rate": 9.687414705156032e-05, "loss": 1.519, "step": 39688 }, { "epoch": 0.5157404534770963, "grad_norm": 0.40071114897727966, "learning_rate": 9.687154758964895e-05, "loss": 1.4978, "step": 39689 }, { "epoch": 0.5157534480210122, "grad_norm": 0.38770627975463867, "learning_rate": 9.686894812773756e-05, "loss": 1.4567, "step": 39690 }, { "epoch": 0.5157664425649281, "grad_norm": 0.4315352737903595, "learning_rate": 9.686634866582618e-05, "loss": 1.4558, "step": 39691 }, { "epoch": 0.5157794371088439, "grad_norm": 0.36199596524238586, "learning_rate": 9.68637492039148e-05, "loss": 1.4656, "step": 39692 }, { "epoch": 0.5157924316527598, "grad_norm": 0.4555199146270752, "learning_rate": 9.68611497420034e-05, "loss": 1.4621, "step": 39693 }, { "epoch": 0.5158054261966757, "grad_norm": 0.3427242040634155, "learning_rate": 9.685855028009203e-05, "loss": 1.2882, "step": 39694 }, { "epoch": 0.5158184207405916, "grad_norm": 0.3931252062320709, "learning_rate": 9.685595081818064e-05, "loss": 1.4466, "step": 39695 }, { "epoch": 0.5158314152845074, "grad_norm": 0.41686004400253296, "learning_rate": 9.685335135626926e-05, "loss": 1.4782, "step": 39696 }, { "epoch": 0.5158444098284233, "grad_norm": 0.448379784822464, "learning_rate": 9.685075189435787e-05, "loss": 1.4749, "step": 39697 }, { "epoch": 0.5158574043723392, "grad_norm": 0.4665580093860626, "learning_rate": 9.68481524324465e-05, "loss": 1.4845, "step": 39698 }, { "epoch": 0.5158703989162551, "grad_norm": 0.344743013381958, "learning_rate": 9.68455529705351e-05, "loss": 1.4014, "step": 39699 }, { "epoch": 0.5158833934601709, "grad_norm": 0.17722588777542114, "learning_rate": 9.684295350862372e-05, "loss": 0.9985, "step": 39700 }, { "epoch": 0.5158963880040868, "grad_norm": 0.3760932385921478, "learning_rate": 9.684035404671233e-05, "loss": 1.5692, "step": 39701 }, { "epoch": 0.5159093825480027, "grad_norm": 0.3735870122909546, "learning_rate": 9.683775458480096e-05, "loss": 1.3973, "step": 39702 }, { "epoch": 0.5159223770919186, "grad_norm": 0.42061847448349, "learning_rate": 9.683515512288957e-05, "loss": 1.4381, "step": 39703 }, { "epoch": 0.5159353716358344, "grad_norm": 0.4074678122997284, "learning_rate": 9.683255566097819e-05, "loss": 1.3, "step": 39704 }, { "epoch": 0.5159483661797503, "grad_norm": 0.4492395520210266, "learning_rate": 9.68299561990668e-05, "loss": 1.4017, "step": 39705 }, { "epoch": 0.5159613607236662, "grad_norm": 0.35534337162971497, "learning_rate": 9.682735673715541e-05, "loss": 1.48, "step": 39706 }, { "epoch": 0.5159743552675821, "grad_norm": 0.38247454166412354, "learning_rate": 9.682475727524402e-05, "loss": 1.2794, "step": 39707 }, { "epoch": 0.5159873498114979, "grad_norm": 0.4027169644832611, "learning_rate": 9.682215781333265e-05, "loss": 1.3626, "step": 39708 }, { "epoch": 0.5160003443554138, "grad_norm": 0.42067059874534607, "learning_rate": 9.681955835142126e-05, "loss": 1.5148, "step": 39709 }, { "epoch": 0.5160133388993297, "grad_norm": 0.43157386779785156, "learning_rate": 9.681695888950988e-05, "loss": 1.2895, "step": 39710 }, { "epoch": 0.5160263334432456, "grad_norm": 0.4166816473007202, "learning_rate": 9.68143594275985e-05, "loss": 1.3646, "step": 39711 }, { "epoch": 0.5160393279871613, "grad_norm": 0.4115425944328308, "learning_rate": 9.68117599656871e-05, "loss": 1.2935, "step": 39712 }, { "epoch": 0.5160523225310772, "grad_norm": 0.3313804566860199, "learning_rate": 9.680916050377572e-05, "loss": 1.4153, "step": 39713 }, { "epoch": 0.5160653170749931, "grad_norm": 0.4072376787662506, "learning_rate": 9.680656104186434e-05, "loss": 1.4457, "step": 39714 }, { "epoch": 0.516078311618909, "grad_norm": 0.4364437162876129, "learning_rate": 9.680396157995295e-05, "loss": 1.4303, "step": 39715 }, { "epoch": 0.5160913061628248, "grad_norm": 0.3545844554901123, "learning_rate": 9.680136211804158e-05, "loss": 1.3587, "step": 39716 }, { "epoch": 0.5161043007067407, "grad_norm": 0.4468756318092346, "learning_rate": 9.679876265613019e-05, "loss": 1.3551, "step": 39717 }, { "epoch": 0.5161172952506566, "grad_norm": 0.4774535894393921, "learning_rate": 9.67961631942188e-05, "loss": 1.3784, "step": 39718 }, { "epoch": 0.5161302897945725, "grad_norm": 0.2824321389198303, "learning_rate": 9.679356373230741e-05, "loss": 1.372, "step": 39719 }, { "epoch": 0.5161432843384883, "grad_norm": 0.32836994528770447, "learning_rate": 9.679096427039603e-05, "loss": 1.4254, "step": 39720 }, { "epoch": 0.5161562788824042, "grad_norm": 0.3381592333316803, "learning_rate": 9.678836480848465e-05, "loss": 1.3536, "step": 39721 }, { "epoch": 0.5161692734263201, "grad_norm": 0.42833465337753296, "learning_rate": 9.678576534657327e-05, "loss": 1.5646, "step": 39722 }, { "epoch": 0.516182267970236, "grad_norm": 0.31299206614494324, "learning_rate": 9.678316588466188e-05, "loss": 1.2645, "step": 39723 }, { "epoch": 0.5161952625141518, "grad_norm": 0.4437852203845978, "learning_rate": 9.678056642275049e-05, "loss": 1.429, "step": 39724 }, { "epoch": 0.5162082570580677, "grad_norm": 0.42146965861320496, "learning_rate": 9.67779669608391e-05, "loss": 1.3933, "step": 39725 }, { "epoch": 0.5162212516019836, "grad_norm": 0.385261207818985, "learning_rate": 9.677536749892773e-05, "loss": 1.384, "step": 39726 }, { "epoch": 0.5162342461458995, "grad_norm": 0.4106166362762451, "learning_rate": 9.677276803701634e-05, "loss": 1.3839, "step": 39727 }, { "epoch": 0.5162472406898153, "grad_norm": 0.441175252199173, "learning_rate": 9.677016857510496e-05, "loss": 1.348, "step": 39728 }, { "epoch": 0.5162602352337312, "grad_norm": 0.40549442172050476, "learning_rate": 9.676756911319357e-05, "loss": 1.3247, "step": 39729 }, { "epoch": 0.5162732297776471, "grad_norm": 0.43162834644317627, "learning_rate": 9.676496965128218e-05, "loss": 1.5191, "step": 39730 }, { "epoch": 0.516286224321563, "grad_norm": 0.3366645574569702, "learning_rate": 9.676237018937081e-05, "loss": 1.4965, "step": 39731 }, { "epoch": 0.5162992188654788, "grad_norm": 0.45134490728378296, "learning_rate": 9.675977072745942e-05, "loss": 1.4184, "step": 39732 }, { "epoch": 0.5163122134093947, "grad_norm": 0.49368637800216675, "learning_rate": 9.675717126554804e-05, "loss": 1.3428, "step": 39733 }, { "epoch": 0.5163252079533106, "grad_norm": 0.4337972104549408, "learning_rate": 9.675457180363666e-05, "loss": 1.4095, "step": 39734 }, { "epoch": 0.5163382024972265, "grad_norm": 0.4925230145454407, "learning_rate": 9.675197234172527e-05, "loss": 1.2615, "step": 39735 }, { "epoch": 0.5163511970411423, "grad_norm": 0.42128655314445496, "learning_rate": 9.674937287981388e-05, "loss": 1.3554, "step": 39736 }, { "epoch": 0.5163641915850582, "grad_norm": 0.31247860193252563, "learning_rate": 9.67467734179025e-05, "loss": 1.3697, "step": 39737 }, { "epoch": 0.5163771861289741, "grad_norm": 0.4025348424911499, "learning_rate": 9.674417395599111e-05, "loss": 1.3012, "step": 39738 }, { "epoch": 0.51639018067289, "grad_norm": 0.36298179626464844, "learning_rate": 9.674157449407974e-05, "loss": 1.1191, "step": 39739 }, { "epoch": 0.5164031752168059, "grad_norm": 0.3341827988624573, "learning_rate": 9.673897503216835e-05, "loss": 1.1723, "step": 39740 }, { "epoch": 0.5164161697607217, "grad_norm": 0.5221813321113586, "learning_rate": 9.673637557025696e-05, "loss": 1.4704, "step": 39741 }, { "epoch": 0.5164291643046376, "grad_norm": 0.4140227437019348, "learning_rate": 9.673377610834557e-05, "loss": 1.3517, "step": 39742 }, { "epoch": 0.5164421588485535, "grad_norm": 0.43374601006507874, "learning_rate": 9.67311766464342e-05, "loss": 1.1578, "step": 39743 }, { "epoch": 0.5164551533924694, "grad_norm": 0.37288859486579895, "learning_rate": 9.67285771845228e-05, "loss": 1.3072, "step": 39744 }, { "epoch": 0.5164681479363852, "grad_norm": 0.3364974856376648, "learning_rate": 9.672597772261143e-05, "loss": 1.4689, "step": 39745 }, { "epoch": 0.5164811424803011, "grad_norm": 0.42575275897979736, "learning_rate": 9.672337826070004e-05, "loss": 1.1894, "step": 39746 }, { "epoch": 0.516494137024217, "grad_norm": 0.343377947807312, "learning_rate": 9.672077879878866e-05, "loss": 1.3062, "step": 39747 }, { "epoch": 0.5165071315681329, "grad_norm": 0.408843457698822, "learning_rate": 9.671817933687726e-05, "loss": 1.2573, "step": 39748 }, { "epoch": 0.5165201261120487, "grad_norm": 0.4673076868057251, "learning_rate": 9.671557987496589e-05, "loss": 1.2183, "step": 39749 }, { "epoch": 0.5165331206559646, "grad_norm": 0.40669989585876465, "learning_rate": 9.67129804130545e-05, "loss": 1.2672, "step": 39750 }, { "epoch": 0.5165461151998805, "grad_norm": 0.3623637855052948, "learning_rate": 9.671038095114312e-05, "loss": 1.4054, "step": 39751 }, { "epoch": 0.5165591097437964, "grad_norm": 0.4518243372440338, "learning_rate": 9.670778148923173e-05, "loss": 1.4425, "step": 39752 }, { "epoch": 0.5165721042877122, "grad_norm": 0.38863709568977356, "learning_rate": 9.670518202732036e-05, "loss": 1.0689, "step": 39753 }, { "epoch": 0.5165850988316281, "grad_norm": 0.4522719383239746, "learning_rate": 9.670258256540896e-05, "loss": 1.2337, "step": 39754 }, { "epoch": 0.516598093375544, "grad_norm": 0.4815397560596466, "learning_rate": 9.669998310349758e-05, "loss": 1.4118, "step": 39755 }, { "epoch": 0.5166110879194599, "grad_norm": 0.5005756616592407, "learning_rate": 9.669738364158619e-05, "loss": 1.043, "step": 39756 }, { "epoch": 0.5166240824633757, "grad_norm": 0.4224078357219696, "learning_rate": 9.669478417967481e-05, "loss": 1.434, "step": 39757 }, { "epoch": 0.5166370770072916, "grad_norm": 0.4652017652988434, "learning_rate": 9.669218471776343e-05, "loss": 1.2413, "step": 39758 }, { "epoch": 0.5166500715512075, "grad_norm": 0.24948734045028687, "learning_rate": 9.668958525585205e-05, "loss": 1.2508, "step": 39759 }, { "epoch": 0.5166630660951234, "grad_norm": 0.35661581158638, "learning_rate": 9.668698579394065e-05, "loss": 1.1946, "step": 39760 }, { "epoch": 0.5166760606390391, "grad_norm": 0.4033752679824829, "learning_rate": 9.668438633202927e-05, "loss": 1.4716, "step": 39761 }, { "epoch": 0.516689055182955, "grad_norm": 0.47122421860694885, "learning_rate": 9.668178687011788e-05, "loss": 1.41, "step": 39762 }, { "epoch": 0.516702049726871, "grad_norm": 0.3335406184196472, "learning_rate": 9.667918740820651e-05, "loss": 1.3852, "step": 39763 }, { "epoch": 0.5167150442707868, "grad_norm": 0.5164328217506409, "learning_rate": 9.667658794629512e-05, "loss": 1.5719, "step": 39764 }, { "epoch": 0.5167280388147026, "grad_norm": 0.4332098662853241, "learning_rate": 9.667398848438374e-05, "loss": 1.5024, "step": 39765 }, { "epoch": 0.5167410333586185, "grad_norm": 0.40759408473968506, "learning_rate": 9.667138902247235e-05, "loss": 1.3069, "step": 39766 }, { "epoch": 0.5167540279025344, "grad_norm": 0.38008755445480347, "learning_rate": 9.666878956056096e-05, "loss": 1.4045, "step": 39767 }, { "epoch": 0.5167670224464503, "grad_norm": 0.45996609330177307, "learning_rate": 9.666619009864959e-05, "loss": 1.4022, "step": 39768 }, { "epoch": 0.5167800169903661, "grad_norm": 0.543403148651123, "learning_rate": 9.66635906367382e-05, "loss": 1.624, "step": 39769 }, { "epoch": 0.516793011534282, "grad_norm": 0.3805897831916809, "learning_rate": 9.666099117482682e-05, "loss": 1.2429, "step": 39770 }, { "epoch": 0.5168060060781979, "grad_norm": 0.3329651951789856, "learning_rate": 9.665839171291544e-05, "loss": 1.4482, "step": 39771 }, { "epoch": 0.5168190006221138, "grad_norm": 0.4025849997997284, "learning_rate": 9.665579225100405e-05, "loss": 1.4648, "step": 39772 }, { "epoch": 0.5168319951660296, "grad_norm": 0.37977755069732666, "learning_rate": 9.665319278909266e-05, "loss": 1.4802, "step": 39773 }, { "epoch": 0.5168449897099455, "grad_norm": 0.42401570081710815, "learning_rate": 9.665059332718128e-05, "loss": 1.4939, "step": 39774 }, { "epoch": 0.5168579842538614, "grad_norm": 0.37666407227516174, "learning_rate": 9.664799386526989e-05, "loss": 1.3415, "step": 39775 }, { "epoch": 0.5168709787977773, "grad_norm": 0.40926486253738403, "learning_rate": 9.664539440335852e-05, "loss": 1.2638, "step": 39776 }, { "epoch": 0.5168839733416931, "grad_norm": 0.43744245171546936, "learning_rate": 9.664279494144713e-05, "loss": 1.4516, "step": 39777 }, { "epoch": 0.516896967885609, "grad_norm": 0.41427087783813477, "learning_rate": 9.664019547953574e-05, "loss": 1.4958, "step": 39778 }, { "epoch": 0.5169099624295249, "grad_norm": 0.42561405897140503, "learning_rate": 9.663759601762435e-05, "loss": 1.4731, "step": 39779 }, { "epoch": 0.5169229569734408, "grad_norm": 0.3866291046142578, "learning_rate": 9.663499655571297e-05, "loss": 1.41, "step": 39780 }, { "epoch": 0.5169359515173566, "grad_norm": 0.40552273392677307, "learning_rate": 9.663239709380159e-05, "loss": 1.2356, "step": 39781 }, { "epoch": 0.5169489460612725, "grad_norm": 0.42753568291664124, "learning_rate": 9.662979763189021e-05, "loss": 1.4893, "step": 39782 }, { "epoch": 0.5169619406051884, "grad_norm": 0.4228987991809845, "learning_rate": 9.662719816997882e-05, "loss": 1.4465, "step": 39783 }, { "epoch": 0.5169749351491043, "grad_norm": 0.33820393681526184, "learning_rate": 9.662459870806743e-05, "loss": 1.2627, "step": 39784 }, { "epoch": 0.5169879296930201, "grad_norm": 0.38927245140075684, "learning_rate": 9.662199924615604e-05, "loss": 1.2624, "step": 39785 }, { "epoch": 0.517000924236936, "grad_norm": 0.4500969350337982, "learning_rate": 9.661939978424467e-05, "loss": 1.3489, "step": 39786 }, { "epoch": 0.5170139187808519, "grad_norm": 0.47534242272377014, "learning_rate": 9.661680032233328e-05, "loss": 1.3918, "step": 39787 }, { "epoch": 0.5170269133247678, "grad_norm": 0.4441378116607666, "learning_rate": 9.66142008604219e-05, "loss": 1.3423, "step": 39788 }, { "epoch": 0.5170399078686836, "grad_norm": 0.45378854870796204, "learning_rate": 9.661160139851051e-05, "loss": 1.295, "step": 39789 }, { "epoch": 0.5170529024125995, "grad_norm": 0.4024064242839813, "learning_rate": 9.660900193659912e-05, "loss": 1.3863, "step": 39790 }, { "epoch": 0.5170658969565154, "grad_norm": 0.3129641115665436, "learning_rate": 9.660640247468774e-05, "loss": 1.2861, "step": 39791 }, { "epoch": 0.5170788915004313, "grad_norm": 0.5997129082679749, "learning_rate": 9.660380301277636e-05, "loss": 1.4397, "step": 39792 }, { "epoch": 0.5170918860443471, "grad_norm": 0.34037235379219055, "learning_rate": 9.660120355086497e-05, "loss": 1.5064, "step": 39793 }, { "epoch": 0.517104880588263, "grad_norm": 0.4425402879714966, "learning_rate": 9.65986040889536e-05, "loss": 1.5402, "step": 39794 }, { "epoch": 0.5171178751321789, "grad_norm": 0.4149685502052307, "learning_rate": 9.65960046270422e-05, "loss": 1.4033, "step": 39795 }, { "epoch": 0.5171308696760948, "grad_norm": 0.40826696157455444, "learning_rate": 9.659340516513082e-05, "loss": 1.4657, "step": 39796 }, { "epoch": 0.5171438642200106, "grad_norm": 0.27776089310646057, "learning_rate": 9.659080570321943e-05, "loss": 1.3309, "step": 39797 }, { "epoch": 0.5171568587639265, "grad_norm": 0.4421710669994354, "learning_rate": 9.658820624130805e-05, "loss": 1.4369, "step": 39798 }, { "epoch": 0.5171698533078424, "grad_norm": 0.4532667398452759, "learning_rate": 9.658560677939666e-05, "loss": 1.5156, "step": 39799 }, { "epoch": 0.5171828478517583, "grad_norm": 0.40888866782188416, "learning_rate": 9.658300731748529e-05, "loss": 1.3444, "step": 39800 }, { "epoch": 0.5171958423956741, "grad_norm": 0.4366384744644165, "learning_rate": 9.65804078555739e-05, "loss": 1.4396, "step": 39801 }, { "epoch": 0.51720883693959, "grad_norm": 0.4267401397228241, "learning_rate": 9.657780839366251e-05, "loss": 1.3359, "step": 39802 }, { "epoch": 0.5172218314835059, "grad_norm": 0.372907817363739, "learning_rate": 9.657520893175112e-05, "loss": 1.4627, "step": 39803 }, { "epoch": 0.5172348260274218, "grad_norm": 0.40663430094718933, "learning_rate": 9.657260946983975e-05, "loss": 1.3293, "step": 39804 }, { "epoch": 0.5172478205713376, "grad_norm": 0.32831236720085144, "learning_rate": 9.657001000792837e-05, "loss": 1.4796, "step": 39805 }, { "epoch": 0.5172608151152535, "grad_norm": 0.41455942392349243, "learning_rate": 9.656741054601698e-05, "loss": 1.2911, "step": 39806 }, { "epoch": 0.5172738096591694, "grad_norm": 0.4237598478794098, "learning_rate": 9.65648110841056e-05, "loss": 1.3466, "step": 39807 }, { "epoch": 0.5172868042030853, "grad_norm": 0.5078092813491821, "learning_rate": 9.656221162219422e-05, "loss": 1.4124, "step": 39808 }, { "epoch": 0.517299798747001, "grad_norm": 0.43971511721611023, "learning_rate": 9.655961216028283e-05, "loss": 1.4124, "step": 39809 }, { "epoch": 0.517312793290917, "grad_norm": 0.378388911485672, "learning_rate": 9.655701269837144e-05, "loss": 1.3398, "step": 39810 }, { "epoch": 0.5173257878348329, "grad_norm": 0.3556126356124878, "learning_rate": 9.655441323646006e-05, "loss": 1.1744, "step": 39811 }, { "epoch": 0.5173387823787488, "grad_norm": 0.35911452770233154, "learning_rate": 9.655181377454867e-05, "loss": 1.2825, "step": 39812 }, { "epoch": 0.5173517769226645, "grad_norm": 0.3927856981754303, "learning_rate": 9.65492143126373e-05, "loss": 1.1968, "step": 39813 }, { "epoch": 0.5173647714665804, "grad_norm": 0.31085440516471863, "learning_rate": 9.654661485072591e-05, "loss": 1.3163, "step": 39814 }, { "epoch": 0.5173777660104963, "grad_norm": 0.3252396583557129, "learning_rate": 9.654401538881452e-05, "loss": 1.3741, "step": 39815 }, { "epoch": 0.5173907605544122, "grad_norm": 0.4776071310043335, "learning_rate": 9.654141592690313e-05, "loss": 1.4171, "step": 39816 }, { "epoch": 0.5174037550983281, "grad_norm": 0.33488571643829346, "learning_rate": 9.653881646499176e-05, "loss": 1.2847, "step": 39817 }, { "epoch": 0.5174167496422439, "grad_norm": 0.43468913435935974, "learning_rate": 9.653621700308037e-05, "loss": 1.2042, "step": 39818 }, { "epoch": 0.5174297441861598, "grad_norm": 0.3939213752746582, "learning_rate": 9.653361754116899e-05, "loss": 1.5204, "step": 39819 }, { "epoch": 0.5174427387300757, "grad_norm": 0.4318459630012512, "learning_rate": 9.65310180792576e-05, "loss": 1.5786, "step": 39820 }, { "epoch": 0.5174557332739916, "grad_norm": 0.36560386419296265, "learning_rate": 9.652841861734621e-05, "loss": 1.2486, "step": 39821 }, { "epoch": 0.5174687278179074, "grad_norm": 0.473521888256073, "learning_rate": 9.652581915543482e-05, "loss": 1.4041, "step": 39822 }, { "epoch": 0.5174817223618233, "grad_norm": 0.5077974796295166, "learning_rate": 9.652321969352345e-05, "loss": 1.449, "step": 39823 }, { "epoch": 0.5174947169057392, "grad_norm": 0.41945451498031616, "learning_rate": 9.652062023161206e-05, "loss": 1.4509, "step": 39824 }, { "epoch": 0.5175077114496551, "grad_norm": 0.4337124228477478, "learning_rate": 9.651802076970068e-05, "loss": 1.5603, "step": 39825 }, { "epoch": 0.5175207059935709, "grad_norm": 0.3450642228126526, "learning_rate": 9.65154213077893e-05, "loss": 1.3649, "step": 39826 }, { "epoch": 0.5175337005374868, "grad_norm": 0.4827076196670532, "learning_rate": 9.65128218458779e-05, "loss": 1.435, "step": 39827 }, { "epoch": 0.5175466950814027, "grad_norm": 0.37749630212783813, "learning_rate": 9.651022238396652e-05, "loss": 1.4146, "step": 39828 }, { "epoch": 0.5175596896253186, "grad_norm": 0.34670841693878174, "learning_rate": 9.650762292205514e-05, "loss": 1.2127, "step": 39829 }, { "epoch": 0.5175726841692344, "grad_norm": 0.4276048541069031, "learning_rate": 9.650502346014375e-05, "loss": 1.7008, "step": 39830 }, { "epoch": 0.5175856787131503, "grad_norm": 0.3143344223499298, "learning_rate": 9.650242399823238e-05, "loss": 1.2626, "step": 39831 }, { "epoch": 0.5175986732570662, "grad_norm": 0.3719705045223236, "learning_rate": 9.649982453632099e-05, "loss": 1.3233, "step": 39832 }, { "epoch": 0.5176116678009821, "grad_norm": 0.37398555874824524, "learning_rate": 9.64972250744096e-05, "loss": 1.2199, "step": 39833 }, { "epoch": 0.5176246623448979, "grad_norm": 0.5182440876960754, "learning_rate": 9.649462561249821e-05, "loss": 1.4561, "step": 39834 }, { "epoch": 0.5176376568888138, "grad_norm": 0.3954979181289673, "learning_rate": 9.649202615058683e-05, "loss": 1.2075, "step": 39835 }, { "epoch": 0.5176506514327297, "grad_norm": 0.2489006072282791, "learning_rate": 9.648942668867544e-05, "loss": 1.1991, "step": 39836 }, { "epoch": 0.5176636459766456, "grad_norm": 0.4750337600708008, "learning_rate": 9.648682722676407e-05, "loss": 1.4766, "step": 39837 }, { "epoch": 0.5176766405205614, "grad_norm": 0.43312010169029236, "learning_rate": 9.648422776485268e-05, "loss": 1.4388, "step": 39838 }, { "epoch": 0.5176896350644773, "grad_norm": 0.408152312040329, "learning_rate": 9.648162830294129e-05, "loss": 1.3923, "step": 39839 }, { "epoch": 0.5177026296083932, "grad_norm": 0.4312520921230316, "learning_rate": 9.64790288410299e-05, "loss": 1.3184, "step": 39840 }, { "epoch": 0.5177156241523091, "grad_norm": 0.3170221149921417, "learning_rate": 9.647642937911853e-05, "loss": 1.1946, "step": 39841 }, { "epoch": 0.5177286186962249, "grad_norm": 0.4742594361305237, "learning_rate": 9.647382991720715e-05, "loss": 1.4002, "step": 39842 }, { "epoch": 0.5177416132401408, "grad_norm": 0.42664453387260437, "learning_rate": 9.647123045529576e-05, "loss": 1.4475, "step": 39843 }, { "epoch": 0.5177546077840567, "grad_norm": 0.4326609969139099, "learning_rate": 9.646863099338437e-05, "loss": 1.3398, "step": 39844 }, { "epoch": 0.5177676023279726, "grad_norm": 0.44383418560028076, "learning_rate": 9.646603153147298e-05, "loss": 1.4035, "step": 39845 }, { "epoch": 0.5177805968718884, "grad_norm": 0.3674788475036621, "learning_rate": 9.646343206956161e-05, "loss": 1.424, "step": 39846 }, { "epoch": 0.5177935914158043, "grad_norm": 0.3745240271091461, "learning_rate": 9.646083260765022e-05, "loss": 1.2085, "step": 39847 }, { "epoch": 0.5178065859597202, "grad_norm": 0.34725555777549744, "learning_rate": 9.645823314573884e-05, "loss": 1.3426, "step": 39848 }, { "epoch": 0.5178195805036361, "grad_norm": 0.5519569516181946, "learning_rate": 9.645563368382745e-05, "loss": 1.4524, "step": 39849 }, { "epoch": 0.5178325750475519, "grad_norm": 0.4079243838787079, "learning_rate": 9.645303422191608e-05, "loss": 1.2667, "step": 39850 }, { "epoch": 0.5178455695914678, "grad_norm": 0.433389276266098, "learning_rate": 9.645043476000468e-05, "loss": 1.457, "step": 39851 }, { "epoch": 0.5178585641353837, "grad_norm": 0.4642544090747833, "learning_rate": 9.64478352980933e-05, "loss": 1.4968, "step": 39852 }, { "epoch": 0.5178715586792996, "grad_norm": 0.5511395335197449, "learning_rate": 9.644523583618191e-05, "loss": 1.4382, "step": 39853 }, { "epoch": 0.5178845532232154, "grad_norm": 0.3607686460018158, "learning_rate": 9.644263637427054e-05, "loss": 1.3017, "step": 39854 }, { "epoch": 0.5178975477671313, "grad_norm": 0.31322386860847473, "learning_rate": 9.644003691235915e-05, "loss": 1.4886, "step": 39855 }, { "epoch": 0.5179105423110472, "grad_norm": 0.3395468294620514, "learning_rate": 9.643743745044777e-05, "loss": 1.1908, "step": 39856 }, { "epoch": 0.5179235368549631, "grad_norm": 0.4174630045890808, "learning_rate": 9.643483798853637e-05, "loss": 1.3528, "step": 39857 }, { "epoch": 0.5179365313988789, "grad_norm": 0.34309256076812744, "learning_rate": 9.6432238526625e-05, "loss": 1.1808, "step": 39858 }, { "epoch": 0.5179495259427948, "grad_norm": 0.4298976957798004, "learning_rate": 9.64296390647136e-05, "loss": 1.2467, "step": 39859 }, { "epoch": 0.5179625204867107, "grad_norm": 0.26785096526145935, "learning_rate": 9.642703960280223e-05, "loss": 1.264, "step": 39860 }, { "epoch": 0.5179755150306266, "grad_norm": 0.43830612301826477, "learning_rate": 9.642444014089084e-05, "loss": 1.3387, "step": 39861 }, { "epoch": 0.5179885095745423, "grad_norm": 0.45528340339660645, "learning_rate": 9.642184067897946e-05, "loss": 1.3545, "step": 39862 }, { "epoch": 0.5180015041184582, "grad_norm": 0.4136285185813904, "learning_rate": 9.641924121706806e-05, "loss": 1.4656, "step": 39863 }, { "epoch": 0.5180144986623741, "grad_norm": 0.4535764753818512, "learning_rate": 9.641664175515669e-05, "loss": 1.4776, "step": 39864 }, { "epoch": 0.51802749320629, "grad_norm": 0.365784227848053, "learning_rate": 9.64140422932453e-05, "loss": 1.2931, "step": 39865 }, { "epoch": 0.5180404877502058, "grad_norm": 0.43346884846687317, "learning_rate": 9.641144283133392e-05, "loss": 1.4059, "step": 39866 }, { "epoch": 0.5180534822941217, "grad_norm": 0.3246438503265381, "learning_rate": 9.640884336942253e-05, "loss": 1.3472, "step": 39867 }, { "epoch": 0.5180664768380376, "grad_norm": 0.482332319021225, "learning_rate": 9.640624390751116e-05, "loss": 1.2797, "step": 39868 }, { "epoch": 0.5180794713819535, "grad_norm": 0.4121081233024597, "learning_rate": 9.640364444559977e-05, "loss": 1.5128, "step": 39869 }, { "epoch": 0.5180924659258693, "grad_norm": 0.37354040145874023, "learning_rate": 9.640104498368838e-05, "loss": 1.4179, "step": 39870 }, { "epoch": 0.5181054604697852, "grad_norm": 0.5037780404090881, "learning_rate": 9.639844552177699e-05, "loss": 1.5753, "step": 39871 }, { "epoch": 0.5181184550137011, "grad_norm": 0.40410736203193665, "learning_rate": 9.639584605986561e-05, "loss": 1.3085, "step": 39872 }, { "epoch": 0.518131449557617, "grad_norm": 0.4117945432662964, "learning_rate": 9.639324659795423e-05, "loss": 1.1837, "step": 39873 }, { "epoch": 0.5181444441015328, "grad_norm": 0.5411643981933594, "learning_rate": 9.639064713604285e-05, "loss": 1.3347, "step": 39874 }, { "epoch": 0.5181574386454487, "grad_norm": 0.4504357874393463, "learning_rate": 9.638804767413146e-05, "loss": 1.3478, "step": 39875 }, { "epoch": 0.5181704331893646, "grad_norm": 0.6726453304290771, "learning_rate": 9.638544821222007e-05, "loss": 1.4861, "step": 39876 }, { "epoch": 0.5181834277332805, "grad_norm": 0.5539174675941467, "learning_rate": 9.638284875030868e-05, "loss": 1.5024, "step": 39877 }, { "epoch": 0.5181964222771963, "grad_norm": 0.5677586197853088, "learning_rate": 9.638024928839731e-05, "loss": 1.588, "step": 39878 }, { "epoch": 0.5182094168211122, "grad_norm": 0.3257874846458435, "learning_rate": 9.637764982648593e-05, "loss": 1.3361, "step": 39879 }, { "epoch": 0.5182224113650281, "grad_norm": 0.3789377212524414, "learning_rate": 9.637505036457454e-05, "loss": 1.5491, "step": 39880 }, { "epoch": 0.518235405908944, "grad_norm": 0.35225480794906616, "learning_rate": 9.637245090266315e-05, "loss": 1.3352, "step": 39881 }, { "epoch": 0.5182484004528598, "grad_norm": 0.35333380103111267, "learning_rate": 9.636985144075176e-05, "loss": 1.458, "step": 39882 }, { "epoch": 0.5182613949967757, "grad_norm": 0.3810538053512573, "learning_rate": 9.636725197884039e-05, "loss": 1.5951, "step": 39883 }, { "epoch": 0.5182743895406916, "grad_norm": 0.5290937423706055, "learning_rate": 9.6364652516929e-05, "loss": 1.2644, "step": 39884 }, { "epoch": 0.5182873840846075, "grad_norm": 0.33791014552116394, "learning_rate": 9.636205305501762e-05, "loss": 1.4552, "step": 39885 }, { "epoch": 0.5183003786285233, "grad_norm": 0.36395886540412903, "learning_rate": 9.635945359310623e-05, "loss": 1.2726, "step": 39886 }, { "epoch": 0.5183133731724392, "grad_norm": 0.4548851251602173, "learning_rate": 9.635685413119485e-05, "loss": 1.364, "step": 39887 }, { "epoch": 0.5183263677163551, "grad_norm": 0.36717653274536133, "learning_rate": 9.635425466928346e-05, "loss": 1.4723, "step": 39888 }, { "epoch": 0.518339362260271, "grad_norm": 0.4422442615032196, "learning_rate": 9.635165520737208e-05, "loss": 1.3459, "step": 39889 }, { "epoch": 0.5183523568041868, "grad_norm": 0.4063000977039337, "learning_rate": 9.634905574546069e-05, "loss": 1.3891, "step": 39890 }, { "epoch": 0.5183653513481027, "grad_norm": 0.4366697072982788, "learning_rate": 9.634645628354932e-05, "loss": 1.3686, "step": 39891 }, { "epoch": 0.5183783458920186, "grad_norm": 0.2905206084251404, "learning_rate": 9.634385682163793e-05, "loss": 1.3038, "step": 39892 }, { "epoch": 0.5183913404359345, "grad_norm": 0.450033962726593, "learning_rate": 9.634125735972654e-05, "loss": 1.2095, "step": 39893 }, { "epoch": 0.5184043349798504, "grad_norm": 0.2715192437171936, "learning_rate": 9.633865789781515e-05, "loss": 1.4236, "step": 39894 }, { "epoch": 0.5184173295237662, "grad_norm": 0.40323707461357117, "learning_rate": 9.633605843590377e-05, "loss": 1.2899, "step": 39895 }, { "epoch": 0.5184303240676821, "grad_norm": 0.46104416251182556, "learning_rate": 9.633345897399238e-05, "loss": 1.4659, "step": 39896 }, { "epoch": 0.518443318611598, "grad_norm": 0.4568787217140198, "learning_rate": 9.633085951208101e-05, "loss": 1.7088, "step": 39897 }, { "epoch": 0.5184563131555139, "grad_norm": 0.44228917360305786, "learning_rate": 9.632826005016962e-05, "loss": 1.4369, "step": 39898 }, { "epoch": 0.5184693076994297, "grad_norm": 0.40326258540153503, "learning_rate": 9.632566058825823e-05, "loss": 1.3603, "step": 39899 }, { "epoch": 0.5184823022433456, "grad_norm": 0.5430493354797363, "learning_rate": 9.632306112634684e-05, "loss": 1.4205, "step": 39900 }, { "epoch": 0.5184952967872615, "grad_norm": 0.3709203898906708, "learning_rate": 9.632046166443547e-05, "loss": 1.357, "step": 39901 }, { "epoch": 0.5185082913311774, "grad_norm": 0.4018144905567169, "learning_rate": 9.631786220252408e-05, "loss": 1.2487, "step": 39902 }, { "epoch": 0.5185212858750932, "grad_norm": 0.3554931581020355, "learning_rate": 9.63152627406127e-05, "loss": 1.3591, "step": 39903 }, { "epoch": 0.5185342804190091, "grad_norm": 0.42651045322418213, "learning_rate": 9.631266327870131e-05, "loss": 1.3985, "step": 39904 }, { "epoch": 0.518547274962925, "grad_norm": 0.35157230496406555, "learning_rate": 9.631006381678992e-05, "loss": 1.4667, "step": 39905 }, { "epoch": 0.5185602695068409, "grad_norm": 0.4239737391471863, "learning_rate": 9.630746435487853e-05, "loss": 1.4522, "step": 39906 }, { "epoch": 0.5185732640507567, "grad_norm": 0.3595733046531677, "learning_rate": 9.630486489296716e-05, "loss": 1.5159, "step": 39907 }, { "epoch": 0.5185862585946726, "grad_norm": 0.39164459705352783, "learning_rate": 9.630226543105577e-05, "loss": 1.5744, "step": 39908 }, { "epoch": 0.5185992531385885, "grad_norm": 0.35062751173973083, "learning_rate": 9.62996659691444e-05, "loss": 1.3477, "step": 39909 }, { "epoch": 0.5186122476825044, "grad_norm": 0.5065329670906067, "learning_rate": 9.6297066507233e-05, "loss": 1.3549, "step": 39910 }, { "epoch": 0.5186252422264201, "grad_norm": 0.41593268513679504, "learning_rate": 9.629446704532163e-05, "loss": 1.4537, "step": 39911 }, { "epoch": 0.518638236770336, "grad_norm": 0.4158235192298889, "learning_rate": 9.629186758341023e-05, "loss": 1.2937, "step": 39912 }, { "epoch": 0.518651231314252, "grad_norm": 0.3900696635246277, "learning_rate": 9.628926812149885e-05, "loss": 1.5217, "step": 39913 }, { "epoch": 0.5186642258581678, "grad_norm": 0.47524160146713257, "learning_rate": 9.628666865958746e-05, "loss": 1.5086, "step": 39914 }, { "epoch": 0.5186772204020836, "grad_norm": 0.40152740478515625, "learning_rate": 9.628406919767609e-05, "loss": 1.5261, "step": 39915 }, { "epoch": 0.5186902149459995, "grad_norm": 0.4031531810760498, "learning_rate": 9.628146973576471e-05, "loss": 1.2405, "step": 39916 }, { "epoch": 0.5187032094899154, "grad_norm": 0.42054039239883423, "learning_rate": 9.627887027385332e-05, "loss": 1.3224, "step": 39917 }, { "epoch": 0.5187162040338313, "grad_norm": 0.44588547945022583, "learning_rate": 9.627627081194193e-05, "loss": 1.3928, "step": 39918 }, { "epoch": 0.5187291985777471, "grad_norm": 0.4453255534172058, "learning_rate": 9.627367135003054e-05, "loss": 1.4701, "step": 39919 }, { "epoch": 0.518742193121663, "grad_norm": 0.4229930341243744, "learning_rate": 9.627107188811917e-05, "loss": 1.4649, "step": 39920 }, { "epoch": 0.5187551876655789, "grad_norm": 0.32511550188064575, "learning_rate": 9.626847242620778e-05, "loss": 1.3025, "step": 39921 }, { "epoch": 0.5187681822094948, "grad_norm": 0.46386486291885376, "learning_rate": 9.62658729642964e-05, "loss": 1.3834, "step": 39922 }, { "epoch": 0.5187811767534106, "grad_norm": 0.43011224269866943, "learning_rate": 9.626327350238502e-05, "loss": 1.4239, "step": 39923 }, { "epoch": 0.5187941712973265, "grad_norm": 0.4847968518733978, "learning_rate": 9.626067404047363e-05, "loss": 1.4064, "step": 39924 }, { "epoch": 0.5188071658412424, "grad_norm": 0.3036381006240845, "learning_rate": 9.625807457856224e-05, "loss": 1.4301, "step": 39925 }, { "epoch": 0.5188201603851583, "grad_norm": 0.5004080533981323, "learning_rate": 9.625547511665086e-05, "loss": 1.3712, "step": 39926 }, { "epoch": 0.5188331549290741, "grad_norm": 0.4350956082344055, "learning_rate": 9.625287565473947e-05, "loss": 1.3599, "step": 39927 }, { "epoch": 0.51884614947299, "grad_norm": 0.36283931136131287, "learning_rate": 9.62502761928281e-05, "loss": 1.2028, "step": 39928 }, { "epoch": 0.5188591440169059, "grad_norm": 0.5054888725280762, "learning_rate": 9.624767673091671e-05, "loss": 1.3245, "step": 39929 }, { "epoch": 0.5188721385608218, "grad_norm": 0.45617255568504333, "learning_rate": 9.624507726900532e-05, "loss": 1.2946, "step": 39930 }, { "epoch": 0.5188851331047376, "grad_norm": 0.4867818057537079, "learning_rate": 9.624247780709393e-05, "loss": 1.5105, "step": 39931 }, { "epoch": 0.5188981276486535, "grad_norm": 0.3775416612625122, "learning_rate": 9.623987834518255e-05, "loss": 1.479, "step": 39932 }, { "epoch": 0.5189111221925694, "grad_norm": 0.4536754786968231, "learning_rate": 9.623727888327117e-05, "loss": 1.3278, "step": 39933 }, { "epoch": 0.5189241167364853, "grad_norm": 0.4363205134868622, "learning_rate": 9.623467942135979e-05, "loss": 1.5842, "step": 39934 }, { "epoch": 0.5189371112804011, "grad_norm": 0.40178975462913513, "learning_rate": 9.62320799594484e-05, "loss": 1.3725, "step": 39935 }, { "epoch": 0.518950105824317, "grad_norm": 0.34961575269699097, "learning_rate": 9.622948049753701e-05, "loss": 1.348, "step": 39936 }, { "epoch": 0.5189631003682329, "grad_norm": 0.341223806142807, "learning_rate": 9.622688103562562e-05, "loss": 1.2666, "step": 39937 }, { "epoch": 0.5189760949121488, "grad_norm": 0.417461097240448, "learning_rate": 9.622428157371425e-05, "loss": 1.5806, "step": 39938 }, { "epoch": 0.5189890894560646, "grad_norm": 0.39730849862098694, "learning_rate": 9.622168211180286e-05, "loss": 1.4125, "step": 39939 }, { "epoch": 0.5190020839999805, "grad_norm": 0.4442359209060669, "learning_rate": 9.621908264989148e-05, "loss": 1.3605, "step": 39940 }, { "epoch": 0.5190150785438964, "grad_norm": 0.41551077365875244, "learning_rate": 9.62164831879801e-05, "loss": 1.373, "step": 39941 }, { "epoch": 0.5190280730878123, "grad_norm": 0.4439445436000824, "learning_rate": 9.62138837260687e-05, "loss": 1.3941, "step": 39942 }, { "epoch": 0.5190410676317281, "grad_norm": 0.4588831663131714, "learning_rate": 9.621128426415732e-05, "loss": 1.3986, "step": 39943 }, { "epoch": 0.519054062175644, "grad_norm": 0.46232840418815613, "learning_rate": 9.620868480224594e-05, "loss": 1.4797, "step": 39944 }, { "epoch": 0.5190670567195599, "grad_norm": 0.34890320897102356, "learning_rate": 9.620608534033455e-05, "loss": 1.4271, "step": 39945 }, { "epoch": 0.5190800512634758, "grad_norm": 0.4595262110233307, "learning_rate": 9.620348587842318e-05, "loss": 1.4983, "step": 39946 }, { "epoch": 0.5190930458073916, "grad_norm": 0.3593679368495941, "learning_rate": 9.620088641651179e-05, "loss": 1.182, "step": 39947 }, { "epoch": 0.5191060403513075, "grad_norm": 0.413931280374527, "learning_rate": 9.61982869546004e-05, "loss": 1.3121, "step": 39948 }, { "epoch": 0.5191190348952234, "grad_norm": 0.4017539322376251, "learning_rate": 9.619568749268901e-05, "loss": 1.1954, "step": 39949 }, { "epoch": 0.5191320294391393, "grad_norm": 0.45622384548187256, "learning_rate": 9.619308803077763e-05, "loss": 1.4627, "step": 39950 }, { "epoch": 0.5191450239830551, "grad_norm": 0.38428208231925964, "learning_rate": 9.619048856886624e-05, "loss": 1.3374, "step": 39951 }, { "epoch": 0.519158018526971, "grad_norm": 0.3753044903278351, "learning_rate": 9.618788910695487e-05, "loss": 1.3387, "step": 39952 }, { "epoch": 0.5191710130708869, "grad_norm": 0.39697080850601196, "learning_rate": 9.618528964504349e-05, "loss": 1.3966, "step": 39953 }, { "epoch": 0.5191840076148028, "grad_norm": 0.46103984117507935, "learning_rate": 9.618269018313209e-05, "loss": 1.3871, "step": 39954 }, { "epoch": 0.5191970021587186, "grad_norm": 0.4444766640663147, "learning_rate": 9.618009072122071e-05, "loss": 1.3533, "step": 39955 }, { "epoch": 0.5192099967026345, "grad_norm": 0.3617517352104187, "learning_rate": 9.617749125930933e-05, "loss": 1.485, "step": 39956 }, { "epoch": 0.5192229912465504, "grad_norm": 0.4572962522506714, "learning_rate": 9.617489179739795e-05, "loss": 1.2551, "step": 39957 }, { "epoch": 0.5192359857904663, "grad_norm": 0.4589724838733673, "learning_rate": 9.617229233548656e-05, "loss": 1.5344, "step": 39958 }, { "epoch": 0.519248980334382, "grad_norm": 0.4317861795425415, "learning_rate": 9.616969287357519e-05, "loss": 1.2749, "step": 39959 }, { "epoch": 0.519261974878298, "grad_norm": 0.4390122592449188, "learning_rate": 9.616709341166378e-05, "loss": 1.4224, "step": 39960 }, { "epoch": 0.5192749694222139, "grad_norm": 0.41167759895324707, "learning_rate": 9.616449394975241e-05, "loss": 1.3775, "step": 39961 }, { "epoch": 0.5192879639661298, "grad_norm": 0.39599475264549255, "learning_rate": 9.616189448784102e-05, "loss": 1.3691, "step": 39962 }, { "epoch": 0.5193009585100455, "grad_norm": 0.39877018332481384, "learning_rate": 9.615929502592964e-05, "loss": 1.516, "step": 39963 }, { "epoch": 0.5193139530539614, "grad_norm": 0.3969963788986206, "learning_rate": 9.615669556401825e-05, "loss": 1.3909, "step": 39964 }, { "epoch": 0.5193269475978773, "grad_norm": 0.358370840549469, "learning_rate": 9.615409610210688e-05, "loss": 1.3347, "step": 39965 }, { "epoch": 0.5193399421417932, "grad_norm": 0.39317548274993896, "learning_rate": 9.615149664019548e-05, "loss": 1.4535, "step": 39966 }, { "epoch": 0.5193529366857091, "grad_norm": 0.32412514090538025, "learning_rate": 9.61488971782841e-05, "loss": 1.4027, "step": 39967 }, { "epoch": 0.5193659312296249, "grad_norm": 0.3520224988460541, "learning_rate": 9.614629771637271e-05, "loss": 1.4431, "step": 39968 }, { "epoch": 0.5193789257735408, "grad_norm": 0.4667593538761139, "learning_rate": 9.614369825446134e-05, "loss": 1.5183, "step": 39969 }, { "epoch": 0.5193919203174567, "grad_norm": 0.4510015547275543, "learning_rate": 9.614109879254995e-05, "loss": 1.2715, "step": 39970 }, { "epoch": 0.5194049148613726, "grad_norm": 0.47441092133522034, "learning_rate": 9.613849933063857e-05, "loss": 1.4617, "step": 39971 }, { "epoch": 0.5194179094052884, "grad_norm": 0.4711161255836487, "learning_rate": 9.613589986872718e-05, "loss": 1.4063, "step": 39972 }, { "epoch": 0.5194309039492043, "grad_norm": 0.32250046730041504, "learning_rate": 9.613330040681579e-05, "loss": 1.3051, "step": 39973 }, { "epoch": 0.5194438984931202, "grad_norm": 0.42298680543899536, "learning_rate": 9.61307009449044e-05, "loss": 1.3107, "step": 39974 }, { "epoch": 0.5194568930370361, "grad_norm": 0.3625754117965698, "learning_rate": 9.612810148299303e-05, "loss": 1.3295, "step": 39975 }, { "epoch": 0.5194698875809519, "grad_norm": 0.44999998807907104, "learning_rate": 9.612550202108164e-05, "loss": 1.5497, "step": 39976 }, { "epoch": 0.5194828821248678, "grad_norm": 0.36292049288749695, "learning_rate": 9.612290255917026e-05, "loss": 1.3161, "step": 39977 }, { "epoch": 0.5194958766687837, "grad_norm": 0.40168920159339905, "learning_rate": 9.612030309725887e-05, "loss": 1.376, "step": 39978 }, { "epoch": 0.5195088712126996, "grad_norm": 0.41131851077079773, "learning_rate": 9.611770363534749e-05, "loss": 1.4771, "step": 39979 }, { "epoch": 0.5195218657566154, "grad_norm": 0.3358024060726166, "learning_rate": 9.61151041734361e-05, "loss": 1.4687, "step": 39980 }, { "epoch": 0.5195348603005313, "grad_norm": 0.41045722365379333, "learning_rate": 9.611250471152472e-05, "loss": 1.512, "step": 39981 }, { "epoch": 0.5195478548444472, "grad_norm": 0.37855643033981323, "learning_rate": 9.610990524961333e-05, "loss": 1.457, "step": 39982 }, { "epoch": 0.5195608493883631, "grad_norm": 0.6064843535423279, "learning_rate": 9.610730578770196e-05, "loss": 1.4736, "step": 39983 }, { "epoch": 0.5195738439322789, "grad_norm": 0.43973588943481445, "learning_rate": 9.610470632579057e-05, "loss": 1.4998, "step": 39984 }, { "epoch": 0.5195868384761948, "grad_norm": 0.45167720317840576, "learning_rate": 9.610210686387918e-05, "loss": 1.4831, "step": 39985 }, { "epoch": 0.5195998330201107, "grad_norm": 0.4005807936191559, "learning_rate": 9.609950740196779e-05, "loss": 1.4136, "step": 39986 }, { "epoch": 0.5196128275640266, "grad_norm": 0.31736108660697937, "learning_rate": 9.609690794005641e-05, "loss": 1.2567, "step": 39987 }, { "epoch": 0.5196258221079424, "grad_norm": 0.4036203622817993, "learning_rate": 9.609430847814502e-05, "loss": 1.3578, "step": 39988 }, { "epoch": 0.5196388166518583, "grad_norm": 0.3702143430709839, "learning_rate": 9.609170901623365e-05, "loss": 1.378, "step": 39989 }, { "epoch": 0.5196518111957742, "grad_norm": 0.4238418638706207, "learning_rate": 9.608910955432226e-05, "loss": 1.537, "step": 39990 }, { "epoch": 0.5196648057396901, "grad_norm": 0.43518656492233276, "learning_rate": 9.608651009241087e-05, "loss": 1.257, "step": 39991 }, { "epoch": 0.5196778002836059, "grad_norm": 0.42913341522216797, "learning_rate": 9.60839106304995e-05, "loss": 1.4435, "step": 39992 }, { "epoch": 0.5196907948275218, "grad_norm": 0.4853131175041199, "learning_rate": 9.60813111685881e-05, "loss": 1.4001, "step": 39993 }, { "epoch": 0.5197037893714377, "grad_norm": 0.3982795774936676, "learning_rate": 9.607871170667673e-05, "loss": 1.1793, "step": 39994 }, { "epoch": 0.5197167839153536, "grad_norm": 0.5129113793373108, "learning_rate": 9.607611224476534e-05, "loss": 1.4162, "step": 39995 }, { "epoch": 0.5197297784592694, "grad_norm": 0.40881386399269104, "learning_rate": 9.607351278285395e-05, "loss": 1.5367, "step": 39996 }, { "epoch": 0.5197427730031853, "grad_norm": 0.4761309325695038, "learning_rate": 9.607091332094256e-05, "loss": 1.5653, "step": 39997 }, { "epoch": 0.5197557675471012, "grad_norm": 0.477521687746048, "learning_rate": 9.606831385903119e-05, "loss": 1.2835, "step": 39998 }, { "epoch": 0.5197687620910171, "grad_norm": 0.3403060734272003, "learning_rate": 9.60657143971198e-05, "loss": 1.0931, "step": 39999 }, { "epoch": 0.5197817566349329, "grad_norm": 0.3844358026981354, "learning_rate": 9.606311493520842e-05, "loss": 1.5826, "step": 40000 } ], "logging_steps": 1, "max_steps": 76955, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.5725484746843685e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }