{ "best_metric": 11.920330047607422, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.061833359097232955, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003091667954861648, "grad_norm": 0.011082077398896217, "learning_rate": 2.9999999999999997e-05, "loss": 11.9332, "step": 1 }, { "epoch": 0.0003091667954861648, "eval_loss": 11.933963775634766, "eval_runtime": 0.3137, "eval_samples_per_second": 159.412, "eval_steps_per_second": 22.318, "step": 1 }, { "epoch": 0.0006183335909723295, "grad_norm": 0.010560370050370693, "learning_rate": 5.9999999999999995e-05, "loss": 11.9329, "step": 2 }, { "epoch": 0.0009275003864584944, "grad_norm": 0.014500396326184273, "learning_rate": 8.999999999999999e-05, "loss": 11.935, "step": 3 }, { "epoch": 0.001236667181944659, "grad_norm": 0.012597071006894112, "learning_rate": 0.00011999999999999999, "loss": 11.9331, "step": 4 }, { "epoch": 0.001545833977430824, "grad_norm": 0.011617187410593033, "learning_rate": 0.00015, "loss": 11.936, "step": 5 }, { "epoch": 0.0018550007729169888, "grad_norm": 0.014956319704651833, "learning_rate": 0.00017999999999999998, "loss": 11.9321, "step": 6 }, { "epoch": 0.0021641675684031534, "grad_norm": 0.009852694347500801, "learning_rate": 0.00020999999999999998, "loss": 11.9326, "step": 7 }, { "epoch": 0.002473334363889318, "grad_norm": 0.013949165120720863, "learning_rate": 0.00023999999999999998, "loss": 11.9325, "step": 8 }, { "epoch": 0.002782501159375483, "grad_norm": 0.011659414507448673, "learning_rate": 0.00027, "loss": 11.9319, "step": 9 }, { "epoch": 0.003091667954861648, "grad_norm": 0.011611159890890121, "learning_rate": 0.0003, "loss": 11.9336, "step": 10 }, { "epoch": 0.0034008347503478127, "grad_norm": 0.014143702574074268, "learning_rate": 0.0002999794957488703, "loss": 11.9317, "step": 11 }, { "epoch": 0.0037100015458339775, "grad_norm": 0.016038434579968452, "learning_rate": 0.0002999179886011389, "loss": 11.9345, "step": 12 }, { "epoch": 0.004019168341320142, "grad_norm": 0.017547190189361572, "learning_rate": 0.0002998154953722457, "loss": 11.9337, "step": 13 }, { "epoch": 0.004328335136806307, "grad_norm": 0.012028004042804241, "learning_rate": 0.00029967204408281613, "loss": 11.933, "step": 14 }, { "epoch": 0.004637501932292472, "grad_norm": 0.01833096332848072, "learning_rate": 0.00029948767395100045, "loss": 11.9338, "step": 15 }, { "epoch": 0.004946668727778636, "grad_norm": 0.017104797065258026, "learning_rate": 0.0002992624353817517, "loss": 11.9336, "step": 16 }, { "epoch": 0.005255835523264802, "grad_norm": 0.024092281237244606, "learning_rate": 0.0002989963899530457, "loss": 11.9342, "step": 17 }, { "epoch": 0.005565002318750966, "grad_norm": 0.022879665717482567, "learning_rate": 0.00029868961039904624, "loss": 11.9317, "step": 18 }, { "epoch": 0.005874169114237131, "grad_norm": 0.022642433643341064, "learning_rate": 0.00029834218059022024, "loss": 11.9338, "step": 19 }, { "epoch": 0.006183335909723296, "grad_norm": 0.020425595343112946, "learning_rate": 0.00029795419551040833, "loss": 11.934, "step": 20 }, { "epoch": 0.00649250270520946, "grad_norm": 0.022517619654536247, "learning_rate": 0.00029752576123085736, "loss": 11.9328, "step": 21 }, { "epoch": 0.006801669500695625, "grad_norm": 0.02568974532186985, "learning_rate": 0.0002970569948812214, "loss": 11.9333, "step": 22 }, { "epoch": 0.00711083629618179, "grad_norm": 0.024739008396863937, "learning_rate": 0.0002965480246175399, "loss": 11.931, "step": 23 }, { "epoch": 0.007420003091667955, "grad_norm": 0.032434217631816864, "learning_rate": 0.0002959989895872009, "loss": 11.93, "step": 24 }, { "epoch": 0.007729169887154119, "grad_norm": 0.025767942890524864, "learning_rate": 0.0002954100398908995, "loss": 11.9326, "step": 25 }, { "epoch": 0.007729169887154119, "eval_loss": 11.931744575500488, "eval_runtime": 0.3111, "eval_samples_per_second": 160.746, "eval_steps_per_second": 22.504, "step": 25 }, { "epoch": 0.008038336682640285, "grad_norm": 0.04258275404572487, "learning_rate": 0.0002947813365416023, "loss": 11.9318, "step": 26 }, { "epoch": 0.008347503478126449, "grad_norm": 0.02904968149960041, "learning_rate": 0.0002941130514205272, "loss": 11.932, "step": 27 }, { "epoch": 0.008656670273612613, "grad_norm": 0.041743796318769455, "learning_rate": 0.0002934053672301536, "loss": 11.9299, "step": 28 }, { "epoch": 0.00896583706909878, "grad_norm": 0.03630542382597923, "learning_rate": 0.00029265847744427303, "loss": 11.9305, "step": 29 }, { "epoch": 0.009275003864584944, "grad_norm": 0.0299307182431221, "learning_rate": 0.00029187258625509513, "loss": 11.9302, "step": 30 }, { "epoch": 0.009584170660071108, "grad_norm": 0.0390288382768631, "learning_rate": 0.00029104790851742417, "loss": 11.9308, "step": 31 }, { "epoch": 0.009893337455557273, "grad_norm": 0.0380086749792099, "learning_rate": 0.0002901846696899191, "loss": 11.9298, "step": 32 }, { "epoch": 0.010202504251043437, "grad_norm": 0.03649143502116203, "learning_rate": 0.00028928310577345606, "loss": 11.9307, "step": 33 }, { "epoch": 0.010511671046529603, "grad_norm": 0.05261366814374924, "learning_rate": 0.0002883434632466077, "loss": 11.9291, "step": 34 }, { "epoch": 0.010820837842015768, "grad_norm": 0.04119294881820679, "learning_rate": 0.00028736599899825856, "loss": 11.9301, "step": 35 }, { "epoch": 0.011130004637501932, "grad_norm": 0.05613045021891594, "learning_rate": 0.00028635098025737434, "loss": 11.9284, "step": 36 }, { "epoch": 0.011439171432988096, "grad_norm": 0.05253393575549126, "learning_rate": 0.00028529868451994384, "loss": 11.9265, "step": 37 }, { "epoch": 0.011748338228474263, "grad_norm": 0.048098281025886536, "learning_rate": 0.0002842093994731145, "loss": 11.9272, "step": 38 }, { "epoch": 0.012057505023960427, "grad_norm": 0.05176955461502075, "learning_rate": 0.00028308342291654174, "loss": 11.9252, "step": 39 }, { "epoch": 0.012366671819446591, "grad_norm": 0.04504713788628578, "learning_rate": 0.00028192106268097334, "loss": 11.924, "step": 40 }, { "epoch": 0.012675838614932756, "grad_norm": 0.040081366896629333, "learning_rate": 0.00028072263654409154, "loss": 11.9252, "step": 41 }, { "epoch": 0.01298500541041892, "grad_norm": 0.04069807380437851, "learning_rate": 0.0002794884721436361, "loss": 11.9262, "step": 42 }, { "epoch": 0.013294172205905086, "grad_norm": 0.0486382395029068, "learning_rate": 0.00027821890688783083, "loss": 11.9255, "step": 43 }, { "epoch": 0.01360333900139125, "grad_norm": 0.04034913703799248, "learning_rate": 0.0002769142878631403, "loss": 11.9265, "step": 44 }, { "epoch": 0.013912505796877415, "grad_norm": 0.053514041006565094, "learning_rate": 0.00027557497173937923, "loss": 11.9251, "step": 45 }, { "epoch": 0.01422167259236358, "grad_norm": 0.04568411037325859, "learning_rate": 0.000274201324672203, "loss": 11.924, "step": 46 }, { "epoch": 0.014530839387849746, "grad_norm": 0.039840299636125565, "learning_rate": 0.00027279372220300385, "loss": 11.9258, "step": 47 }, { "epoch": 0.01484000618333591, "grad_norm": 0.042658720165491104, "learning_rate": 0.0002713525491562421, "loss": 11.9236, "step": 48 }, { "epoch": 0.015149172978822074, "grad_norm": 0.043333932757377625, "learning_rate": 0.00026987819953423867, "loss": 11.9252, "step": 49 }, { "epoch": 0.015458339774308239, "grad_norm": 0.04685673117637634, "learning_rate": 0.00026837107640945905, "loss": 11.9263, "step": 50 }, { "epoch": 0.015458339774308239, "eval_loss": 11.922983169555664, "eval_runtime": 0.312, "eval_samples_per_second": 160.278, "eval_steps_per_second": 22.439, "step": 50 }, { "epoch": 0.015767506569794403, "grad_norm": 0.031208673492074013, "learning_rate": 0.0002668315918143169, "loss": 11.9262, "step": 51 }, { "epoch": 0.01607667336528057, "grad_norm": 0.0354318805038929, "learning_rate": 0.00026526016662852886, "loss": 11.9228, "step": 52 }, { "epoch": 0.016385840160766732, "grad_norm": 0.030587337911128998, "learning_rate": 0.00026365723046405023, "loss": 11.9219, "step": 53 }, { "epoch": 0.016695006956252898, "grad_norm": 0.03195672482252121, "learning_rate": 0.0002620232215476231, "loss": 11.9218, "step": 54 }, { "epoch": 0.017004173751739064, "grad_norm": 0.0215512253344059, "learning_rate": 0.0002603585866009697, "loss": 11.9224, "step": 55 }, { "epoch": 0.017313340547225227, "grad_norm": 0.022623790428042412, "learning_rate": 0.00025866378071866334, "loss": 11.9209, "step": 56 }, { "epoch": 0.017622507342711393, "grad_norm": 0.01941165328025818, "learning_rate": 0.00025693926724370956, "loss": 11.9218, "step": 57 }, { "epoch": 0.01793167413819756, "grad_norm": 0.026768945157527924, "learning_rate": 0.00025518551764087326, "loss": 11.9228, "step": 58 }, { "epoch": 0.018240840933683722, "grad_norm": 0.02009095624089241, "learning_rate": 0.00025340301136778483, "loss": 11.9226, "step": 59 }, { "epoch": 0.018550007729169888, "grad_norm": 0.025961967185139656, "learning_rate": 0.00025159223574386114, "loss": 11.9202, "step": 60 }, { "epoch": 0.01885917452465605, "grad_norm": 0.018053073436021805, "learning_rate": 0.0002497536858170772, "loss": 11.9212, "step": 61 }, { "epoch": 0.019168341320142217, "grad_norm": 0.018706616014242172, "learning_rate": 0.00024788786422862526, "loss": 11.921, "step": 62 }, { "epoch": 0.019477508115628383, "grad_norm": 0.01734154112637043, "learning_rate": 0.00024599528107549745, "loss": 11.9215, "step": 63 }, { "epoch": 0.019786674911114546, "grad_norm": 0.0244760625064373, "learning_rate": 0.00024407645377103054, "loss": 11.9225, "step": 64 }, { "epoch": 0.02009584170660071, "grad_norm": 0.021497942507267, "learning_rate": 0.00024213190690345018, "loss": 11.9235, "step": 65 }, { "epoch": 0.020405008502086874, "grad_norm": 0.02055254578590393, "learning_rate": 0.00024016217209245374, "loss": 11.9207, "step": 66 }, { "epoch": 0.02071417529757304, "grad_norm": 0.012986048124730587, "learning_rate": 0.00023816778784387094, "loss": 11.9197, "step": 67 }, { "epoch": 0.021023342093059207, "grad_norm": 0.018432628363370895, "learning_rate": 0.0002361492994024415, "loss": 11.9214, "step": 68 }, { "epoch": 0.02133250888854537, "grad_norm": 0.019169418141245842, "learning_rate": 0.0002341072586027509, "loss": 11.9201, "step": 69 }, { "epoch": 0.021641675684031535, "grad_norm": 0.013602902181446552, "learning_rate": 0.00023204222371836405, "loss": 11.9208, "step": 70 }, { "epoch": 0.0219508424795177, "grad_norm": 0.018773363903164864, "learning_rate": 0.00022995475930919905, "loss": 11.9222, "step": 71 }, { "epoch": 0.022260009275003864, "grad_norm": 0.01886664144694805, "learning_rate": 0.00022784543606718227, "loss": 11.9218, "step": 72 }, { "epoch": 0.02256917607049003, "grad_norm": 0.03048502467572689, "learning_rate": 0.00022571483066022657, "loss": 11.9205, "step": 73 }, { "epoch": 0.022878342865976193, "grad_norm": 0.024672266095876694, "learning_rate": 0.0002235635255745762, "loss": 11.9238, "step": 74 }, { "epoch": 0.02318750966146236, "grad_norm": 0.015406430698931217, "learning_rate": 0.00022139210895556104, "loss": 11.9207, "step": 75 }, { "epoch": 0.02318750966146236, "eval_loss": 11.921063423156738, "eval_runtime": 0.3135, "eval_samples_per_second": 159.472, "eval_steps_per_second": 22.326, "step": 75 }, { "epoch": 0.023496676456948525, "grad_norm": 0.018032951280474663, "learning_rate": 0.00021920117444680317, "loss": 11.9235, "step": 76 }, { "epoch": 0.023805843252434688, "grad_norm": 0.01647859252989292, "learning_rate": 0.00021699132102792097, "loss": 11.923, "step": 77 }, { "epoch": 0.024115010047920854, "grad_norm": 0.019747408106923103, "learning_rate": 0.0002147631528507739, "loss": 11.922, "step": 78 }, { "epoch": 0.024424176843407017, "grad_norm": 0.01621292717754841, "learning_rate": 0.00021251727907429355, "loss": 11.9214, "step": 79 }, { "epoch": 0.024733343638893183, "grad_norm": 0.02479792758822441, "learning_rate": 0.0002102543136979454, "loss": 11.9244, "step": 80 }, { "epoch": 0.02504251043437935, "grad_norm": 0.014410406351089478, "learning_rate": 0.0002079748753938678, "loss": 11.9194, "step": 81 }, { "epoch": 0.02535167722986551, "grad_norm": 0.021437469869852066, "learning_rate": 0.0002056795873377331, "loss": 11.9216, "step": 82 }, { "epoch": 0.025660844025351678, "grad_norm": 0.019916469231247902, "learning_rate": 0.00020336907703837748, "loss": 11.9208, "step": 83 }, { "epoch": 0.02597001082083784, "grad_norm": 0.012713909149169922, "learning_rate": 0.00020104397616624645, "loss": 11.9202, "step": 84 }, { "epoch": 0.026279177616324007, "grad_norm": 0.013600163161754608, "learning_rate": 0.00019870492038070252, "loss": 11.9218, "step": 85 }, { "epoch": 0.026588344411810173, "grad_norm": 0.01585683971643448, "learning_rate": 0.0001963525491562421, "loss": 11.9209, "step": 86 }, { "epoch": 0.026897511207296335, "grad_norm": 0.01941349171102047, "learning_rate": 0.0001939875056076697, "loss": 11.9209, "step": 87 }, { "epoch": 0.0272066780027825, "grad_norm": 0.0167936310172081, "learning_rate": 0.00019161043631427666, "loss": 11.9213, "step": 88 }, { "epoch": 0.027515844798268668, "grad_norm": 0.01990320347249508, "learning_rate": 0.00018922199114307294, "loss": 11.9225, "step": 89 }, { "epoch": 0.02782501159375483, "grad_norm": 0.01512246485799551, "learning_rate": 0.00018682282307111987, "loss": 11.9201, "step": 90 }, { "epoch": 0.028134178389240996, "grad_norm": 0.015815284103155136, "learning_rate": 0.00018441358800701273, "loss": 11.9207, "step": 91 }, { "epoch": 0.02844334518472716, "grad_norm": 0.01741873286664486, "learning_rate": 0.00018199494461156203, "loss": 11.9188, "step": 92 }, { "epoch": 0.028752511980213325, "grad_norm": 0.013978100381791592, "learning_rate": 0.000179567554117722, "loss": 11.9198, "step": 93 }, { "epoch": 0.02906167877569949, "grad_norm": 0.022170469164848328, "learning_rate": 0.00017713208014981648, "loss": 11.9207, "step": 94 }, { "epoch": 0.029370845571185654, "grad_norm": 0.020483536645770073, "learning_rate": 0.00017468918854211007, "loss": 11.9209, "step": 95 }, { "epoch": 0.02968001236667182, "grad_norm": 0.015020486898720264, "learning_rate": 0.00017223954715677627, "loss": 11.9203, "step": 96 }, { "epoch": 0.029989179162157983, "grad_norm": 0.023938920348882675, "learning_rate": 0.00016978382570131034, "loss": 11.9204, "step": 97 }, { "epoch": 0.03029834595764415, "grad_norm": 0.01607600599527359, "learning_rate": 0.00016732269554543794, "loss": 11.9194, "step": 98 }, { "epoch": 0.030607512753130315, "grad_norm": 0.019864091649651527, "learning_rate": 0.00016485682953756942, "loss": 11.9219, "step": 99 }, { "epoch": 0.030916679548616478, "grad_norm": 0.036099571734666824, "learning_rate": 0.00016238690182084986, "loss": 11.9224, "step": 100 }, { "epoch": 0.030916679548616478, "eval_loss": 11.920659065246582, "eval_runtime": 0.3122, "eval_samples_per_second": 160.146, "eval_steps_per_second": 22.42, "step": 100 }, { "epoch": 0.031225846344102644, "grad_norm": 0.01348921936005354, "learning_rate": 0.0001599135876488549, "loss": 11.9204, "step": 101 }, { "epoch": 0.031535013139588806, "grad_norm": 0.0159300584346056, "learning_rate": 0.00015743756320098332, "loss": 11.9216, "step": 102 }, { "epoch": 0.031844179935074976, "grad_norm": 0.00881663616746664, "learning_rate": 0.0001549595053975962, "loss": 11.9207, "step": 103 }, { "epoch": 0.03215334673056114, "grad_norm": 0.011139464564621449, "learning_rate": 0.00015248009171495378, "loss": 11.9201, "step": 104 }, { "epoch": 0.0324625135260473, "grad_norm": 0.020778406411409378, "learning_rate": 0.00015, "loss": 11.9204, "step": 105 }, { "epoch": 0.032771680321533464, "grad_norm": 0.01628156565129757, "learning_rate": 0.00014751990828504622, "loss": 11.9197, "step": 106 }, { "epoch": 0.033080847117019634, "grad_norm": 0.016714414581656456, "learning_rate": 0.00014504049460240375, "loss": 11.9209, "step": 107 }, { "epoch": 0.033390013912505796, "grad_norm": 0.014749337919056416, "learning_rate": 0.00014256243679901663, "loss": 11.9208, "step": 108 }, { "epoch": 0.03369918070799196, "grad_norm": 0.009742476977407932, "learning_rate": 0.00014008641235114508, "loss": 11.9199, "step": 109 }, { "epoch": 0.03400834750347813, "grad_norm": 0.020538056269288063, "learning_rate": 0.00013761309817915014, "loss": 11.9208, "step": 110 }, { "epoch": 0.03431751429896429, "grad_norm": 0.01151787769049406, "learning_rate": 0.00013514317046243058, "loss": 11.9215, "step": 111 }, { "epoch": 0.034626681094450454, "grad_norm": 0.010870077647268772, "learning_rate": 0.00013267730445456208, "loss": 11.92, "step": 112 }, { "epoch": 0.03493584788993662, "grad_norm": 0.015953300520777702, "learning_rate": 0.00013021617429868963, "loss": 11.9199, "step": 113 }, { "epoch": 0.035245014685422786, "grad_norm": 0.01493159495294094, "learning_rate": 0.00012776045284322368, "loss": 11.9207, "step": 114 }, { "epoch": 0.03555418148090895, "grad_norm": 0.01618473045527935, "learning_rate": 0.00012531081145788987, "loss": 11.9195, "step": 115 }, { "epoch": 0.03586334827639512, "grad_norm": 0.011493315920233727, "learning_rate": 0.00012286791985018355, "loss": 11.9207, "step": 116 }, { "epoch": 0.03617251507188128, "grad_norm": 0.016949845477938652, "learning_rate": 0.00012043244588227796, "loss": 11.919, "step": 117 }, { "epoch": 0.036481681867367444, "grad_norm": 0.015888245776295662, "learning_rate": 0.00011800505538843798, "loss": 11.9216, "step": 118 }, { "epoch": 0.036790848662853606, "grad_norm": 0.014239443466067314, "learning_rate": 0.00011558641199298727, "loss": 11.9203, "step": 119 }, { "epoch": 0.037100015458339776, "grad_norm": 0.013258833438158035, "learning_rate": 0.00011317717692888012, "loss": 11.9217, "step": 120 }, { "epoch": 0.03740918225382594, "grad_norm": 0.01647377945482731, "learning_rate": 0.00011077800885692702, "loss": 11.9225, "step": 121 }, { "epoch": 0.0377183490493121, "grad_norm": 0.013093708083033562, "learning_rate": 0.00010838956368572334, "loss": 11.9203, "step": 122 }, { "epoch": 0.03802751584479827, "grad_norm": 0.01176339853554964, "learning_rate": 0.0001060124943923303, "loss": 11.92, "step": 123 }, { "epoch": 0.038336682640284434, "grad_norm": 0.015000365674495697, "learning_rate": 0.0001036474508437579, "loss": 11.9198, "step": 124 }, { "epoch": 0.038645849435770596, "grad_norm": 0.013567100279033184, "learning_rate": 0.00010129507961929748, "loss": 11.9203, "step": 125 }, { "epoch": 0.038645849435770596, "eval_loss": 11.920571327209473, "eval_runtime": 0.3125, "eval_samples_per_second": 160.021, "eval_steps_per_second": 22.403, "step": 125 }, { "epoch": 0.038955016231256766, "grad_norm": 0.02192496880888939, "learning_rate": 9.895602383375353e-05, "loss": 11.92, "step": 126 }, { "epoch": 0.03926418302674293, "grad_norm": 0.010150052607059479, "learning_rate": 9.663092296162251e-05, "loss": 11.92, "step": 127 }, { "epoch": 0.03957334982222909, "grad_norm": 0.00961561780422926, "learning_rate": 9.432041266226686e-05, "loss": 11.9207, "step": 128 }, { "epoch": 0.03988251661771526, "grad_norm": 0.011108974926173687, "learning_rate": 9.202512460613219e-05, "loss": 11.9206, "step": 129 }, { "epoch": 0.04019168341320142, "grad_norm": 0.014158480800688267, "learning_rate": 8.97456863020546e-05, "loss": 11.9209, "step": 130 }, { "epoch": 0.040500850208687586, "grad_norm": 0.009706816636025906, "learning_rate": 8.748272092570646e-05, "loss": 11.9201, "step": 131 }, { "epoch": 0.04081001700417375, "grad_norm": 0.013424043543636799, "learning_rate": 8.523684714922608e-05, "loss": 11.9223, "step": 132 }, { "epoch": 0.04111918379965992, "grad_norm": 0.01730511151254177, "learning_rate": 8.300867897207903e-05, "loss": 11.9221, "step": 133 }, { "epoch": 0.04142835059514608, "grad_norm": 0.02061345800757408, "learning_rate": 8.079882555319684e-05, "loss": 11.9211, "step": 134 }, { "epoch": 0.041737517390632244, "grad_norm": 0.01282366644591093, "learning_rate": 7.860789104443896e-05, "loss": 11.9208, "step": 135 }, { "epoch": 0.04204668418611841, "grad_norm": 0.013586231507360935, "learning_rate": 7.643647442542382e-05, "loss": 11.9198, "step": 136 }, { "epoch": 0.042355850981604576, "grad_norm": 0.01187863852828741, "learning_rate": 7.428516933977347e-05, "loss": 11.9203, "step": 137 }, { "epoch": 0.04266501777709074, "grad_norm": 0.015415227971971035, "learning_rate": 7.215456393281776e-05, "loss": 11.9204, "step": 138 }, { "epoch": 0.04297418457257691, "grad_norm": 0.014501603320240974, "learning_rate": 7.004524069080096e-05, "loss": 11.9204, "step": 139 }, { "epoch": 0.04328335136806307, "grad_norm": 0.021058429032564163, "learning_rate": 6.795777628163599e-05, "loss": 11.9197, "step": 140 }, { "epoch": 0.04359251816354923, "grad_norm": 0.01594395935535431, "learning_rate": 6.58927413972491e-05, "loss": 11.9219, "step": 141 }, { "epoch": 0.0439016849590354, "grad_norm": 0.02341919206082821, "learning_rate": 6.385070059755846e-05, "loss": 11.9186, "step": 142 }, { "epoch": 0.044210851754521566, "grad_norm": 0.02027195319533348, "learning_rate": 6.183221215612904e-05, "loss": 11.9204, "step": 143 }, { "epoch": 0.04452001855000773, "grad_norm": 0.016454853117465973, "learning_rate": 5.983782790754623e-05, "loss": 11.9212, "step": 144 }, { "epoch": 0.04482918534549389, "grad_norm": 0.017924537882208824, "learning_rate": 5.786809309654982e-05, "loss": 11.9185, "step": 145 }, { "epoch": 0.04513835214098006, "grad_norm": 0.01718844473361969, "learning_rate": 5.592354622896944e-05, "loss": 11.9207, "step": 146 }, { "epoch": 0.04544751893646622, "grad_norm": 0.02122536674141884, "learning_rate": 5.40047189245025e-05, "loss": 11.9202, "step": 147 }, { "epoch": 0.045756685731952386, "grad_norm": 0.022999104112386703, "learning_rate": 5.211213577137469e-05, "loss": 11.9187, "step": 148 }, { "epoch": 0.046065852527438556, "grad_norm": 0.018542252480983734, "learning_rate": 5.024631418292274e-05, "loss": 11.9204, "step": 149 }, { "epoch": 0.04637501932292472, "grad_norm": 0.03244847059249878, "learning_rate": 4.840776425613886e-05, "loss": 11.9199, "step": 150 }, { "epoch": 0.04637501932292472, "eval_loss": 11.920450210571289, "eval_runtime": 0.312, "eval_samples_per_second": 160.265, "eval_steps_per_second": 22.437, "step": 150 }, { "epoch": 0.04668418611841088, "grad_norm": 0.018326055258512497, "learning_rate": 4.659698863221513e-05, "loss": 11.9223, "step": 151 }, { "epoch": 0.04699335291389705, "grad_norm": 0.015073740854859352, "learning_rate": 4.481448235912671e-05, "loss": 11.9185, "step": 152 }, { "epoch": 0.04730251970938321, "grad_norm": 0.014074956998229027, "learning_rate": 4.306073275629044e-05, "loss": 11.9191, "step": 153 }, { "epoch": 0.047611686504869376, "grad_norm": 0.017671145498752594, "learning_rate": 4.133621928133665e-05, "loss": 11.9207, "step": 154 }, { "epoch": 0.04792085330035554, "grad_norm": 0.016020517796278, "learning_rate": 3.964141339903026e-05, "loss": 11.9197, "step": 155 }, { "epoch": 0.04823002009584171, "grad_norm": 0.019396420568227768, "learning_rate": 3.797677845237696e-05, "loss": 11.9215, "step": 156 }, { "epoch": 0.04853918689132787, "grad_norm": 0.010420717298984528, "learning_rate": 3.634276953594982e-05, "loss": 11.9209, "step": 157 }, { "epoch": 0.04884835368681403, "grad_norm": 0.01945878006517887, "learning_rate": 3.473983337147118e-05, "loss": 11.9211, "step": 158 }, { "epoch": 0.0491575204823002, "grad_norm": 0.015013448894023895, "learning_rate": 3.316840818568315e-05, "loss": 11.9209, "step": 159 }, { "epoch": 0.049466687277786366, "grad_norm": 0.01401391439139843, "learning_rate": 3.162892359054098e-05, "loss": 11.9211, "step": 160 }, { "epoch": 0.04977585407327253, "grad_norm": 0.017041126266121864, "learning_rate": 3.0121800465761293e-05, "loss": 11.9195, "step": 161 }, { "epoch": 0.0500850208687587, "grad_norm": 0.01230812631547451, "learning_rate": 2.8647450843757897e-05, "loss": 11.92, "step": 162 }, { "epoch": 0.05039418766424486, "grad_norm": 0.013004007749259472, "learning_rate": 2.7206277796996144e-05, "loss": 11.9208, "step": 163 }, { "epoch": 0.05070335445973102, "grad_norm": 0.01057471428066492, "learning_rate": 2.5798675327796993e-05, "loss": 11.9199, "step": 164 }, { "epoch": 0.05101252125521719, "grad_norm": 0.013515602797269821, "learning_rate": 2.4425028260620715e-05, "loss": 11.9216, "step": 165 }, { "epoch": 0.051321688050703355, "grad_norm": 0.01369836088269949, "learning_rate": 2.3085712136859668e-05, "loss": 11.921, "step": 166 }, { "epoch": 0.05163085484618952, "grad_norm": 0.012219869531691074, "learning_rate": 2.178109311216913e-05, "loss": 11.9192, "step": 167 }, { "epoch": 0.05194002164167568, "grad_norm": 0.018385522067546844, "learning_rate": 2.0511527856363912e-05, "loss": 11.9201, "step": 168 }, { "epoch": 0.05224918843716185, "grad_norm": 0.018301496282219887, "learning_rate": 1.927736345590839e-05, "loss": 11.9198, "step": 169 }, { "epoch": 0.05255835523264801, "grad_norm": 0.01974300853908062, "learning_rate": 1.8078937319026654e-05, "loss": 11.9206, "step": 170 }, { "epoch": 0.052867522028134176, "grad_norm": 0.010678272694349289, "learning_rate": 1.6916577083458228e-05, "loss": 11.9222, "step": 171 }, { "epoch": 0.053176688823620345, "grad_norm": 0.019584400579333305, "learning_rate": 1.579060052688548e-05, "loss": 11.9203, "step": 172 }, { "epoch": 0.05348585561910651, "grad_norm": 0.012021908536553383, "learning_rate": 1.4701315480056164e-05, "loss": 11.9207, "step": 173 }, { "epoch": 0.05379502241459267, "grad_norm": 0.016563573852181435, "learning_rate": 1.3649019742625623e-05, "loss": 11.9191, "step": 174 }, { "epoch": 0.05410418921007884, "grad_norm": 0.013715994544327259, "learning_rate": 1.2634001001741373e-05, "loss": 11.9195, "step": 175 }, { "epoch": 0.05410418921007884, "eval_loss": 11.920323371887207, "eval_runtime": 0.3127, "eval_samples_per_second": 159.882, "eval_steps_per_second": 22.383, "step": 175 }, { "epoch": 0.054413356005565, "grad_norm": 0.016443433240056038, "learning_rate": 1.1656536753392287e-05, "loss": 11.9194, "step": 176 }, { "epoch": 0.054722522801051166, "grad_norm": 0.020550759509205818, "learning_rate": 1.0716894226543953e-05, "loss": 11.9196, "step": 177 }, { "epoch": 0.055031689596537335, "grad_norm": 0.008727199397981167, "learning_rate": 9.815330310080887e-06, "loss": 11.921, "step": 178 }, { "epoch": 0.0553408563920235, "grad_norm": 0.016882145777344704, "learning_rate": 8.952091482575824e-06, "loss": 11.9219, "step": 179 }, { "epoch": 0.05565002318750966, "grad_norm": 0.014393302612006664, "learning_rate": 8.127413744904804e-06, "loss": 11.9213, "step": 180 }, { "epoch": 0.05595918998299582, "grad_norm": 0.024645017459988594, "learning_rate": 7.34152255572697e-06, "loss": 11.9191, "step": 181 }, { "epoch": 0.05626835677848199, "grad_norm": 0.015890292823314667, "learning_rate": 6.594632769846353e-06, "loss": 11.9192, "step": 182 }, { "epoch": 0.056577523573968155, "grad_norm": 0.015051585622131824, "learning_rate": 5.886948579472778e-06, "loss": 11.921, "step": 183 }, { "epoch": 0.05688669036945432, "grad_norm": 0.0134736318141222, "learning_rate": 5.218663458397715e-06, "loss": 11.9198, "step": 184 }, { "epoch": 0.05719585716494049, "grad_norm": 0.018503418192267418, "learning_rate": 4.589960109100444e-06, "loss": 11.921, "step": 185 }, { "epoch": 0.05750502396042665, "grad_norm": 0.013675318099558353, "learning_rate": 4.001010412799138e-06, "loss": 11.9198, "step": 186 }, { "epoch": 0.05781419075591281, "grad_norm": 0.016024842858314514, "learning_rate": 3.451975382460109e-06, "loss": 11.9193, "step": 187 }, { "epoch": 0.05812335755139898, "grad_norm": 0.01202288456261158, "learning_rate": 2.9430051187785962e-06, "loss": 11.9214, "step": 188 }, { "epoch": 0.058432524346885145, "grad_norm": 0.014596652239561081, "learning_rate": 2.4742387691426445e-06, "loss": 11.9172, "step": 189 }, { "epoch": 0.05874169114237131, "grad_norm": 0.013716932386159897, "learning_rate": 2.0458044895916513e-06, "loss": 11.9202, "step": 190 }, { "epoch": 0.05905085793785748, "grad_norm": 0.02387954853475094, "learning_rate": 1.6578194097797258e-06, "loss": 11.9207, "step": 191 }, { "epoch": 0.05936002473334364, "grad_norm": 0.014165020547807217, "learning_rate": 1.3103896009537207e-06, "loss": 11.9204, "step": 192 }, { "epoch": 0.0596691915288298, "grad_norm": 0.025135591626167297, "learning_rate": 1.0036100469542786e-06, "loss": 11.9194, "step": 193 }, { "epoch": 0.059978358324315965, "grad_norm": 0.02197529375553131, "learning_rate": 7.375646182482875e-07, "loss": 11.922, "step": 194 }, { "epoch": 0.060287525119802135, "grad_norm": 0.015857767313718796, "learning_rate": 5.123260489995229e-07, "loss": 11.9212, "step": 195 }, { "epoch": 0.0605966919152883, "grad_norm": 0.012184608727693558, "learning_rate": 3.2795591718381975e-07, "loss": 11.9211, "step": 196 }, { "epoch": 0.06090585871077446, "grad_norm": 0.020152973011136055, "learning_rate": 1.8450462775428942e-07, "loss": 11.919, "step": 197 }, { "epoch": 0.06121502550626063, "grad_norm": 0.02711832895874977, "learning_rate": 8.201139886109264e-08, "loss": 11.921, "step": 198 }, { "epoch": 0.06152419230174679, "grad_norm": 0.018907776102423668, "learning_rate": 2.0504251129649374e-08, "loss": 11.9194, "step": 199 }, { "epoch": 0.061833359097232955, "grad_norm": 0.029230380430817604, "learning_rate": 0.0, "loss": 11.9202, "step": 200 }, { "epoch": 0.061833359097232955, "eval_loss": 11.920330047607422, "eval_runtime": 0.312, "eval_samples_per_second": 160.246, "eval_steps_per_second": 22.434, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 318524620800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }