{ "best_metric": 0.5096783638000488, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.07796257796257797, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005197505197505198, "grad_norm": 0.32254523038864136, "learning_rate": 2.9999999999999997e-05, "loss": 0.4008, "step": 1 }, { "epoch": 0.0005197505197505198, "eval_loss": 0.7325336933135986, "eval_runtime": 11.447, "eval_samples_per_second": 4.368, "eval_steps_per_second": 0.612, "step": 1 }, { "epoch": 0.0010395010395010396, "grad_norm": 0.20825836062431335, "learning_rate": 5.9999999999999995e-05, "loss": 0.477, "step": 2 }, { "epoch": 0.0015592515592515593, "grad_norm": 0.22193248569965363, "learning_rate": 8.999999999999999e-05, "loss": 0.5648, "step": 3 }, { "epoch": 0.002079002079002079, "grad_norm": 0.2992827594280243, "learning_rate": 0.00011999999999999999, "loss": 0.5735, "step": 4 }, { "epoch": 0.002598752598752599, "grad_norm": 0.21446818113327026, "learning_rate": 0.00015, "loss": 0.5238, "step": 5 }, { "epoch": 0.0031185031185031187, "grad_norm": 0.2041025012731552, "learning_rate": 0.00017999999999999998, "loss": 0.5336, "step": 6 }, { "epoch": 0.0036382536382536385, "grad_norm": 0.3615117073059082, "learning_rate": 0.00020999999999999998, "loss": 0.5941, "step": 7 }, { "epoch": 0.004158004158004158, "grad_norm": 0.3850712180137634, "learning_rate": 0.00023999999999999998, "loss": 0.5643, "step": 8 }, { "epoch": 0.004677754677754678, "grad_norm": 0.2402263879776001, "learning_rate": 0.00027, "loss": 0.5537, "step": 9 }, { "epoch": 0.005197505197505198, "grad_norm": 0.23989036679267883, "learning_rate": 0.0003, "loss": 0.5733, "step": 10 }, { "epoch": 0.005717255717255718, "grad_norm": 0.22981992363929749, "learning_rate": 0.0002999794957488703, "loss": 0.5729, "step": 11 }, { "epoch": 0.006237006237006237, "grad_norm": 0.27373000979423523, "learning_rate": 0.0002999179886011389, "loss": 0.5999, "step": 12 }, { "epoch": 0.006756756756756757, "grad_norm": 0.30983805656433105, "learning_rate": 0.0002998154953722457, "loss": 0.5903, "step": 13 }, { "epoch": 0.007276507276507277, "grad_norm": 0.2732364237308502, "learning_rate": 0.00029967204408281613, "loss": 0.5295, "step": 14 }, { "epoch": 0.007796257796257797, "grad_norm": 0.2508494257926941, "learning_rate": 0.00029948767395100045, "loss": 0.5413, "step": 15 }, { "epoch": 0.008316008316008316, "grad_norm": 0.2607696056365967, "learning_rate": 0.0002992624353817517, "loss": 0.6063, "step": 16 }, { "epoch": 0.008835758835758836, "grad_norm": 0.2565303444862366, "learning_rate": 0.0002989963899530457, "loss": 0.574, "step": 17 }, { "epoch": 0.009355509355509356, "grad_norm": 0.35899603366851807, "learning_rate": 0.00029868961039904624, "loss": 0.5968, "step": 18 }, { "epoch": 0.009875259875259876, "grad_norm": 0.27378013730049133, "learning_rate": 0.00029834218059022024, "loss": 0.6434, "step": 19 }, { "epoch": 0.010395010395010396, "grad_norm": 0.26049938797950745, "learning_rate": 0.00029795419551040833, "loss": 0.6407, "step": 20 }, { "epoch": 0.010914760914760915, "grad_norm": 0.23414486646652222, "learning_rate": 0.00029752576123085736, "loss": 0.5113, "step": 21 }, { "epoch": 0.011434511434511435, "grad_norm": 0.26911309361457825, "learning_rate": 0.0002970569948812214, "loss": 0.6066, "step": 22 }, { "epoch": 0.011954261954261955, "grad_norm": 0.26919862627983093, "learning_rate": 0.0002965480246175399, "loss": 0.6391, "step": 23 }, { "epoch": 0.012474012474012475, "grad_norm": 0.28419116139411926, "learning_rate": 0.0002959989895872009, "loss": 0.5721, "step": 24 }, { "epoch": 0.012993762993762994, "grad_norm": 0.3130952715873718, "learning_rate": 0.0002954100398908995, "loss": 0.5899, "step": 25 }, { "epoch": 0.012993762993762994, "eval_loss": 0.540122389793396, "eval_runtime": 9.7128, "eval_samples_per_second": 5.148, "eval_steps_per_second": 0.721, "step": 25 }, { "epoch": 0.013513513513513514, "grad_norm": 0.4303876459598541, "learning_rate": 0.0002947813365416023, "loss": 0.6755, "step": 26 }, { "epoch": 0.014033264033264034, "grad_norm": 0.36919379234313965, "learning_rate": 0.0002941130514205272, "loss": 0.5538, "step": 27 }, { "epoch": 0.014553014553014554, "grad_norm": 0.33030325174331665, "learning_rate": 0.0002934053672301536, "loss": 0.6243, "step": 28 }, { "epoch": 0.015072765072765074, "grad_norm": 0.34687164425849915, "learning_rate": 0.00029265847744427303, "loss": 0.604, "step": 29 }, { "epoch": 0.015592515592515593, "grad_norm": 0.3886769115924835, "learning_rate": 0.00029187258625509513, "loss": 0.5096, "step": 30 }, { "epoch": 0.016112266112266113, "grad_norm": 0.3807097375392914, "learning_rate": 0.00029104790851742417, "loss": 0.6, "step": 31 }, { "epoch": 0.016632016632016633, "grad_norm": 0.36538225412368774, "learning_rate": 0.0002901846696899191, "loss": 0.6471, "step": 32 }, { "epoch": 0.017151767151767153, "grad_norm": 0.5048696994781494, "learning_rate": 0.00028928310577345606, "loss": 0.6444, "step": 33 }, { "epoch": 0.017671517671517672, "grad_norm": 0.553619921207428, "learning_rate": 0.0002883434632466077, "loss": 0.6453, "step": 34 }, { "epoch": 0.018191268191268192, "grad_norm": 0.4972146153450012, "learning_rate": 0.00028736599899825856, "loss": 0.6088, "step": 35 }, { "epoch": 0.018711018711018712, "grad_norm": 0.47668373584747314, "learning_rate": 0.00028635098025737434, "loss": 0.6455, "step": 36 }, { "epoch": 0.019230769230769232, "grad_norm": 0.4956304728984833, "learning_rate": 0.00028529868451994384, "loss": 0.6739, "step": 37 }, { "epoch": 0.01975051975051975, "grad_norm": 0.4820132851600647, "learning_rate": 0.0002842093994731145, "loss": 0.6184, "step": 38 }, { "epoch": 0.02027027027027027, "grad_norm": 0.5695618987083435, "learning_rate": 0.00028308342291654174, "loss": 0.6457, "step": 39 }, { "epoch": 0.02079002079002079, "grad_norm": 0.6215941905975342, "learning_rate": 0.00028192106268097334, "loss": 0.7308, "step": 40 }, { "epoch": 0.02130977130977131, "grad_norm": 0.7127547264099121, "learning_rate": 0.00028072263654409154, "loss": 0.6398, "step": 41 }, { "epoch": 0.02182952182952183, "grad_norm": 0.7794021368026733, "learning_rate": 0.0002794884721436361, "loss": 0.7203, "step": 42 }, { "epoch": 0.02234927234927235, "grad_norm": 0.6468542814254761, "learning_rate": 0.00027821890688783083, "loss": 0.7176, "step": 43 }, { "epoch": 0.02286902286902287, "grad_norm": 0.7985377311706543, "learning_rate": 0.0002769142878631403, "loss": 0.7033, "step": 44 }, { "epoch": 0.02338877338877339, "grad_norm": 0.7809744477272034, "learning_rate": 0.00027557497173937923, "loss": 0.8077, "step": 45 }, { "epoch": 0.02390852390852391, "grad_norm": 0.9611108303070068, "learning_rate": 0.000274201324672203, "loss": 0.841, "step": 46 }, { "epoch": 0.02442827442827443, "grad_norm": 1.0788683891296387, "learning_rate": 0.00027279372220300385, "loss": 0.8095, "step": 47 }, { "epoch": 0.02494802494802495, "grad_norm": 0.8641987442970276, "learning_rate": 0.0002713525491562421, "loss": 0.8089, "step": 48 }, { "epoch": 0.02546777546777547, "grad_norm": 1.215991497039795, "learning_rate": 0.00026987819953423867, "loss": 0.7884, "step": 49 }, { "epoch": 0.02598752598752599, "grad_norm": 1.888974905014038, "learning_rate": 0.00026837107640945905, "loss": 0.9488, "step": 50 }, { "epoch": 0.02598752598752599, "eval_loss": 0.5958743691444397, "eval_runtime": 9.7046, "eval_samples_per_second": 5.152, "eval_steps_per_second": 0.721, "step": 50 }, { "epoch": 0.02650727650727651, "grad_norm": 0.4678969085216522, "learning_rate": 0.0002668315918143169, "loss": 0.427, "step": 51 }, { "epoch": 0.02702702702702703, "grad_norm": 0.3184508681297302, "learning_rate": 0.00026526016662852886, "loss": 0.5281, "step": 52 }, { "epoch": 0.02754677754677755, "grad_norm": 0.25341641902923584, "learning_rate": 0.00026365723046405023, "loss": 0.4908, "step": 53 }, { "epoch": 0.028066528066528068, "grad_norm": 0.23108318448066711, "learning_rate": 0.0002620232215476231, "loss": 0.483, "step": 54 }, { "epoch": 0.028586278586278588, "grad_norm": 0.2675793170928955, "learning_rate": 0.0002603585866009697, "loss": 0.4906, "step": 55 }, { "epoch": 0.029106029106029108, "grad_norm": 0.26088351011276245, "learning_rate": 0.00025866378071866334, "loss": 0.5292, "step": 56 }, { "epoch": 0.029625779625779627, "grad_norm": 0.23946167528629303, "learning_rate": 0.00025693926724370956, "loss": 0.5545, "step": 57 }, { "epoch": 0.030145530145530147, "grad_norm": 0.26028525829315186, "learning_rate": 0.00025518551764087326, "loss": 0.5487, "step": 58 }, { "epoch": 0.030665280665280667, "grad_norm": 0.26847925782203674, "learning_rate": 0.00025340301136778483, "loss": 0.4912, "step": 59 }, { "epoch": 0.031185031185031187, "grad_norm": 0.2771508991718292, "learning_rate": 0.00025159223574386114, "loss": 0.5403, "step": 60 }, { "epoch": 0.03170478170478171, "grad_norm": 0.2898350954055786, "learning_rate": 0.0002497536858170772, "loss": 0.5789, "step": 61 }, { "epoch": 0.032224532224532226, "grad_norm": 0.25867152214050293, "learning_rate": 0.00024788786422862526, "loss": 0.5064, "step": 62 }, { "epoch": 0.032744282744282746, "grad_norm": 0.2536691129207611, "learning_rate": 0.00024599528107549745, "loss": 0.5948, "step": 63 }, { "epoch": 0.033264033264033266, "grad_norm": 0.26933252811431885, "learning_rate": 0.00024407645377103054, "loss": 0.5598, "step": 64 }, { "epoch": 0.033783783783783786, "grad_norm": 0.24513447284698486, "learning_rate": 0.00024213190690345018, "loss": 0.5593, "step": 65 }, { "epoch": 0.034303534303534305, "grad_norm": 0.3047233819961548, "learning_rate": 0.00024016217209245374, "loss": 0.5761, "step": 66 }, { "epoch": 0.034823284823284825, "grad_norm": 0.258892685174942, "learning_rate": 0.00023816778784387094, "loss": 0.5551, "step": 67 }, { "epoch": 0.035343035343035345, "grad_norm": 0.22836266458034515, "learning_rate": 0.0002361492994024415, "loss": 0.541, "step": 68 }, { "epoch": 0.035862785862785865, "grad_norm": 0.25990796089172363, "learning_rate": 0.0002341072586027509, "loss": 0.5461, "step": 69 }, { "epoch": 0.036382536382536385, "grad_norm": 0.25337180495262146, "learning_rate": 0.00023204222371836405, "loss": 0.6029, "step": 70 }, { "epoch": 0.036902286902286904, "grad_norm": 0.2999207079410553, "learning_rate": 0.00022995475930919905, "loss": 0.5432, "step": 71 }, { "epoch": 0.037422037422037424, "grad_norm": 0.3323523700237274, "learning_rate": 0.00022784543606718227, "loss": 0.585, "step": 72 }, { "epoch": 0.037941787941787944, "grad_norm": 0.3124600648880005, "learning_rate": 0.00022571483066022657, "loss": 0.5428, "step": 73 }, { "epoch": 0.038461538461538464, "grad_norm": 0.30299338698387146, "learning_rate": 0.0002235635255745762, "loss": 0.5492, "step": 74 }, { "epoch": 0.03898128898128898, "grad_norm": 0.32373517751693726, "learning_rate": 0.00022139210895556104, "loss": 0.5737, "step": 75 }, { "epoch": 0.03898128898128898, "eval_loss": 0.5234647989273071, "eval_runtime": 10.397, "eval_samples_per_second": 4.809, "eval_steps_per_second": 0.673, "step": 75 }, { "epoch": 0.0395010395010395, "grad_norm": 0.2775340974330902, "learning_rate": 0.00021920117444680317, "loss": 0.5398, "step": 76 }, { "epoch": 0.04002079002079002, "grad_norm": 0.34127601981163025, "learning_rate": 0.00021699132102792097, "loss": 0.5863, "step": 77 }, { "epoch": 0.04054054054054054, "grad_norm": 0.32936781644821167, "learning_rate": 0.0002147631528507739, "loss": 0.5702, "step": 78 }, { "epoch": 0.04106029106029106, "grad_norm": 0.32185012102127075, "learning_rate": 0.00021251727907429355, "loss": 0.5604, "step": 79 }, { "epoch": 0.04158004158004158, "grad_norm": 0.3725656270980835, "learning_rate": 0.0002102543136979454, "loss": 0.5096, "step": 80 }, { "epoch": 0.0420997920997921, "grad_norm": 0.43758270144462585, "learning_rate": 0.0002079748753938678, "loss": 0.6213, "step": 81 }, { "epoch": 0.04261954261954262, "grad_norm": 0.4397500157356262, "learning_rate": 0.0002056795873377331, "loss": 0.5776, "step": 82 }, { "epoch": 0.04313929313929314, "grad_norm": 0.41127029061317444, "learning_rate": 0.00020336907703837748, "loss": 0.5965, "step": 83 }, { "epoch": 0.04365904365904366, "grad_norm": 0.41310176253318787, "learning_rate": 0.00020104397616624645, "loss": 0.5568, "step": 84 }, { "epoch": 0.04417879417879418, "grad_norm": 0.5008036494255066, "learning_rate": 0.00019870492038070252, "loss": 0.5604, "step": 85 }, { "epoch": 0.0446985446985447, "grad_norm": 0.5008141398429871, "learning_rate": 0.0001963525491562421, "loss": 0.6194, "step": 86 }, { "epoch": 0.04521829521829522, "grad_norm": 0.5280492901802063, "learning_rate": 0.0001939875056076697, "loss": 0.5421, "step": 87 }, { "epoch": 0.04573804573804574, "grad_norm": 0.509255588054657, "learning_rate": 0.00019161043631427666, "loss": 0.5172, "step": 88 }, { "epoch": 0.04625779625779626, "grad_norm": 0.5679823160171509, "learning_rate": 0.00018922199114307294, "loss": 0.6475, "step": 89 }, { "epoch": 0.04677754677754678, "grad_norm": 0.6964303851127625, "learning_rate": 0.00018682282307111987, "loss": 0.6635, "step": 90 }, { "epoch": 0.0472972972972973, "grad_norm": 0.6682361960411072, "learning_rate": 0.00018441358800701273, "loss": 0.6719, "step": 91 }, { "epoch": 0.04781704781704782, "grad_norm": 0.6329060792922974, "learning_rate": 0.00018199494461156203, "loss": 0.5602, "step": 92 }, { "epoch": 0.04833679833679834, "grad_norm": 0.7174376249313354, "learning_rate": 0.000179567554117722, "loss": 0.5954, "step": 93 }, { "epoch": 0.04885654885654886, "grad_norm": 0.8649759888648987, "learning_rate": 0.00017713208014981648, "loss": 0.7054, "step": 94 }, { "epoch": 0.04937629937629938, "grad_norm": 0.8587183356285095, "learning_rate": 0.00017468918854211007, "loss": 0.7378, "step": 95 }, { "epoch": 0.0498960498960499, "grad_norm": 0.9487538933753967, "learning_rate": 0.00017223954715677627, "loss": 0.673, "step": 96 }, { "epoch": 0.05041580041580042, "grad_norm": 0.9698479771614075, "learning_rate": 0.00016978382570131034, "loss": 0.6394, "step": 97 }, { "epoch": 0.05093555093555094, "grad_norm": 1.1705135107040405, "learning_rate": 0.00016732269554543794, "loss": 0.7881, "step": 98 }, { "epoch": 0.05145530145530146, "grad_norm": 1.147175908088684, "learning_rate": 0.00016485682953756942, "loss": 0.8175, "step": 99 }, { "epoch": 0.05197505197505198, "grad_norm": 1.5819910764694214, "learning_rate": 0.00016238690182084986, "loss": 0.8187, "step": 100 }, { "epoch": 0.05197505197505198, "eval_loss": 0.5713622570037842, "eval_runtime": 10.7862, "eval_samples_per_second": 4.636, "eval_steps_per_second": 0.649, "step": 100 }, { "epoch": 0.0524948024948025, "grad_norm": 0.3077780604362488, "learning_rate": 0.0001599135876488549, "loss": 0.3882, "step": 101 }, { "epoch": 0.05301455301455302, "grad_norm": 0.33069512248039246, "learning_rate": 0.00015743756320098332, "loss": 0.4347, "step": 102 }, { "epoch": 0.05353430353430354, "grad_norm": 0.2965315580368042, "learning_rate": 0.0001549595053975962, "loss": 0.4631, "step": 103 }, { "epoch": 0.05405405405405406, "grad_norm": 0.2445397824048996, "learning_rate": 0.00015248009171495378, "loss": 0.5486, "step": 104 }, { "epoch": 0.05457380457380458, "grad_norm": 0.24396640062332153, "learning_rate": 0.00015, "loss": 0.5298, "step": 105 }, { "epoch": 0.0550935550935551, "grad_norm": 0.2391216903924942, "learning_rate": 0.00014751990828504622, "loss": 0.5678, "step": 106 }, { "epoch": 0.055613305613305616, "grad_norm": 0.2599236071109772, "learning_rate": 0.00014504049460240375, "loss": 0.5262, "step": 107 }, { "epoch": 0.056133056133056136, "grad_norm": 0.2846571207046509, "learning_rate": 0.00014256243679901663, "loss": 0.5928, "step": 108 }, { "epoch": 0.056652806652806656, "grad_norm": 0.23013561964035034, "learning_rate": 0.00014008641235114508, "loss": 0.5225, "step": 109 }, { "epoch": 0.057172557172557176, "grad_norm": 0.2311960905790329, "learning_rate": 0.00013761309817915014, "loss": 0.5661, "step": 110 }, { "epoch": 0.057692307692307696, "grad_norm": 0.2271488606929779, "learning_rate": 0.00013514317046243058, "loss": 0.5381, "step": 111 }, { "epoch": 0.058212058212058215, "grad_norm": 0.30388909578323364, "learning_rate": 0.00013267730445456208, "loss": 0.5819, "step": 112 }, { "epoch": 0.058731808731808735, "grad_norm": 0.23286861181259155, "learning_rate": 0.00013021617429868963, "loss": 0.5441, "step": 113 }, { "epoch": 0.059251559251559255, "grad_norm": 0.21719612181186676, "learning_rate": 0.00012776045284322368, "loss": 0.5258, "step": 114 }, { "epoch": 0.059771309771309775, "grad_norm": 0.23778563737869263, "learning_rate": 0.00012531081145788987, "loss": 0.592, "step": 115 }, { "epoch": 0.060291060291060294, "grad_norm": 0.25448745489120483, "learning_rate": 0.00012286791985018355, "loss": 0.5558, "step": 116 }, { "epoch": 0.060810810810810814, "grad_norm": 0.2358551174402237, "learning_rate": 0.00012043244588227796, "loss": 0.5425, "step": 117 }, { "epoch": 0.061330561330561334, "grad_norm": 0.2500148415565491, "learning_rate": 0.00011800505538843798, "loss": 0.5688, "step": 118 }, { "epoch": 0.061850311850311854, "grad_norm": 0.23617619276046753, "learning_rate": 0.00011558641199298727, "loss": 0.569, "step": 119 }, { "epoch": 0.062370062370062374, "grad_norm": 0.2460618019104004, "learning_rate": 0.00011317717692888012, "loss": 0.5258, "step": 120 }, { "epoch": 0.0628898128898129, "grad_norm": 0.25035300850868225, "learning_rate": 0.00011077800885692702, "loss": 0.5452, "step": 121 }, { "epoch": 0.06340956340956341, "grad_norm": 0.2552037835121155, "learning_rate": 0.00010838956368572334, "loss": 0.5386, "step": 122 }, { "epoch": 0.06392931392931393, "grad_norm": 0.2568017244338989, "learning_rate": 0.0001060124943923303, "loss": 0.5054, "step": 123 }, { "epoch": 0.06444906444906445, "grad_norm": 0.3237874507904053, "learning_rate": 0.0001036474508437579, "loss": 0.5206, "step": 124 }, { "epoch": 0.06496881496881497, "grad_norm": 0.32453036308288574, "learning_rate": 0.00010129507961929748, "loss": 0.6088, "step": 125 }, { "epoch": 0.06496881496881497, "eval_loss": 0.5060738325119019, "eval_runtime": 10.7588, "eval_samples_per_second": 4.647, "eval_steps_per_second": 0.651, "step": 125 }, { "epoch": 0.06548856548856549, "grad_norm": 0.33183586597442627, "learning_rate": 9.895602383375353e-05, "loss": 0.5469, "step": 126 }, { "epoch": 0.06600831600831601, "grad_norm": 0.35012179613113403, "learning_rate": 9.663092296162251e-05, "loss": 0.597, "step": 127 }, { "epoch": 0.06652806652806653, "grad_norm": 0.3140457570552826, "learning_rate": 9.432041266226686e-05, "loss": 0.5094, "step": 128 }, { "epoch": 0.06704781704781705, "grad_norm": 0.34678980708122253, "learning_rate": 9.202512460613219e-05, "loss": 0.5487, "step": 129 }, { "epoch": 0.06756756756756757, "grad_norm": 0.33957555890083313, "learning_rate": 8.97456863020546e-05, "loss": 0.5276, "step": 130 }, { "epoch": 0.06808731808731809, "grad_norm": 0.3753758370876312, "learning_rate": 8.748272092570646e-05, "loss": 0.5253, "step": 131 }, { "epoch": 0.06860706860706861, "grad_norm": 0.36970919370651245, "learning_rate": 8.523684714922608e-05, "loss": 0.5681, "step": 132 }, { "epoch": 0.06912681912681913, "grad_norm": 0.38359901309013367, "learning_rate": 8.300867897207903e-05, "loss": 0.5572, "step": 133 }, { "epoch": 0.06964656964656965, "grad_norm": 0.40657931566238403, "learning_rate": 8.079882555319684e-05, "loss": 0.5439, "step": 134 }, { "epoch": 0.07016632016632017, "grad_norm": 0.46938619017601013, "learning_rate": 7.860789104443896e-05, "loss": 0.6336, "step": 135 }, { "epoch": 0.07068607068607069, "grad_norm": 0.5070523619651794, "learning_rate": 7.643647442542382e-05, "loss": 0.6577, "step": 136 }, { "epoch": 0.07120582120582121, "grad_norm": 0.499677836894989, "learning_rate": 7.428516933977347e-05, "loss": 0.6595, "step": 137 }, { "epoch": 0.07172557172557173, "grad_norm": 0.5089967250823975, "learning_rate": 7.215456393281776e-05, "loss": 0.6318, "step": 138 }, { "epoch": 0.07224532224532225, "grad_norm": 0.5152573585510254, "learning_rate": 7.004524069080096e-05, "loss": 0.5942, "step": 139 }, { "epoch": 0.07276507276507277, "grad_norm": 0.60390305519104, "learning_rate": 6.795777628163599e-05, "loss": 0.6321, "step": 140 }, { "epoch": 0.07328482328482329, "grad_norm": 0.6076339483261108, "learning_rate": 6.58927413972491e-05, "loss": 0.5815, "step": 141 }, { "epoch": 0.07380457380457381, "grad_norm": 0.6375054121017456, "learning_rate": 6.385070059755846e-05, "loss": 0.6469, "step": 142 }, { "epoch": 0.07432432432432433, "grad_norm": 0.6294693350791931, "learning_rate": 6.183221215612904e-05, "loss": 0.6006, "step": 143 }, { "epoch": 0.07484407484407485, "grad_norm": 0.7831677794456482, "learning_rate": 5.983782790754623e-05, "loss": 0.5724, "step": 144 }, { "epoch": 0.07536382536382537, "grad_norm": 0.7516241073608398, "learning_rate": 5.786809309654982e-05, "loss": 0.5826, "step": 145 }, { "epoch": 0.07588357588357589, "grad_norm": 0.9946649074554443, "learning_rate": 5.592354622896944e-05, "loss": 0.7365, "step": 146 }, { "epoch": 0.07640332640332641, "grad_norm": 0.8894773125648499, "learning_rate": 5.40047189245025e-05, "loss": 0.7055, "step": 147 }, { "epoch": 0.07692307692307693, "grad_norm": 1.0826866626739502, "learning_rate": 5.211213577137469e-05, "loss": 0.6749, "step": 148 }, { "epoch": 0.07744282744282745, "grad_norm": 1.1890193223953247, "learning_rate": 5.024631418292274e-05, "loss": 0.7258, "step": 149 }, { "epoch": 0.07796257796257797, "grad_norm": 1.688950538635254, "learning_rate": 4.840776425613886e-05, "loss": 0.8355, "step": 150 }, { "epoch": 0.07796257796257797, "eval_loss": 0.5096783638000488, "eval_runtime": 9.6989, "eval_samples_per_second": 5.155, "eval_steps_per_second": 0.722, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.5105929355237786e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }