|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2181, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004585052728106373, |
|
"grad_norm": 25.35940676221757, |
|
"learning_rate": 4.5662100456621004e-08, |
|
"loss": 1.4356, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022925263640531865, |
|
"grad_norm": 23.277460508193656, |
|
"learning_rate": 2.2831050228310502e-07, |
|
"loss": 1.4178, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004585052728106373, |
|
"grad_norm": 15.396159390081614, |
|
"learning_rate": 4.5662100456621004e-07, |
|
"loss": 1.3928, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0068775790921595595, |
|
"grad_norm": 9.927996187561872, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 1.2487, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009170105456212746, |
|
"grad_norm": 8.936136397262343, |
|
"learning_rate": 9.132420091324201e-07, |
|
"loss": 1.1467, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011462631820265932, |
|
"grad_norm": 3.9419002716272007, |
|
"learning_rate": 1.1415525114155251e-06, |
|
"loss": 1.0321, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013755158184319119, |
|
"grad_norm": 3.2824292809209212, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 0.9911, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016047684548372305, |
|
"grad_norm": 3.198808731865913, |
|
"learning_rate": 1.5981735159817353e-06, |
|
"loss": 0.9499, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.018340210912425492, |
|
"grad_norm": 3.200026153105945, |
|
"learning_rate": 1.8264840182648401e-06, |
|
"loss": 0.9394, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02063273727647868, |
|
"grad_norm": 3.1015042038551264, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 0.9374, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.022925263640531865, |
|
"grad_norm": 3.0638884680066116, |
|
"learning_rate": 2.2831050228310503e-06, |
|
"loss": 0.9366, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02521779000458505, |
|
"grad_norm": 3.1218708697344337, |
|
"learning_rate": 2.511415525114155e-06, |
|
"loss": 0.9072, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.027510316368638238, |
|
"grad_norm": 3.030931859384564, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 0.896, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.029802842732691424, |
|
"grad_norm": 3.183215428730836, |
|
"learning_rate": 2.9680365296803653e-06, |
|
"loss": 0.904, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03209536909674461, |
|
"grad_norm": 3.1193991823217884, |
|
"learning_rate": 3.1963470319634706e-06, |
|
"loss": 0.8992, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0343878954607978, |
|
"grad_norm": 3.1562480345048662, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 0.9008, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.036680421824850984, |
|
"grad_norm": 3.1106379275365263, |
|
"learning_rate": 3.6529680365296803e-06, |
|
"loss": 0.8835, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03897294818890417, |
|
"grad_norm": 3.1659334626442455, |
|
"learning_rate": 3.881278538812785e-06, |
|
"loss": 0.8798, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04126547455295736, |
|
"grad_norm": 3.1010027836059533, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 0.879, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04355800091701054, |
|
"grad_norm": 3.3519588401192273, |
|
"learning_rate": 4.337899543378996e-06, |
|
"loss": 0.8615, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04585052728106373, |
|
"grad_norm": 3.049285908948199, |
|
"learning_rate": 4.566210045662101e-06, |
|
"loss": 0.8529, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.048143053645116916, |
|
"grad_norm": 3.109756439871898, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 0.8654, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0504355800091701, |
|
"grad_norm": 3.1513505710159335, |
|
"learning_rate": 5.02283105022831e-06, |
|
"loss": 0.8663, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05272810637322329, |
|
"grad_norm": 3.1767156567086614, |
|
"learning_rate": 5.251141552511416e-06, |
|
"loss": 0.8613, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.055020632737276476, |
|
"grad_norm": 3.453537287264967, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 0.8771, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05731315910132966, |
|
"grad_norm": 3.013155684535603, |
|
"learning_rate": 5.7077625570776266e-06, |
|
"loss": 0.8473, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05960568546538285, |
|
"grad_norm": 3.425642520518735, |
|
"learning_rate": 5.936073059360731e-06, |
|
"loss": 0.8521, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.061898211829436035, |
|
"grad_norm": 3.031927176672884, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 0.84, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06419073819348922, |
|
"grad_norm": 3.239390421336056, |
|
"learning_rate": 6.392694063926941e-06, |
|
"loss": 0.859, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06648326455754242, |
|
"grad_norm": 3.017820442924467, |
|
"learning_rate": 6.621004566210046e-06, |
|
"loss": 0.86, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0687757909215956, |
|
"grad_norm": 3.0002036905279503, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 0.8525, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07106831728564879, |
|
"grad_norm": 3.1828998491124016, |
|
"learning_rate": 7.077625570776257e-06, |
|
"loss": 0.8433, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07336084364970197, |
|
"grad_norm": 3.087610569097963, |
|
"learning_rate": 7.305936073059361e-06, |
|
"loss": 0.8361, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07565337001375516, |
|
"grad_norm": 3.115099552868115, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 0.8436, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07794589637780834, |
|
"grad_norm": 3.1551201699069282, |
|
"learning_rate": 7.76255707762557e-06, |
|
"loss": 0.8311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08023842274186153, |
|
"grad_norm": 3.2013023977541617, |
|
"learning_rate": 7.990867579908676e-06, |
|
"loss": 0.8244, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08253094910591471, |
|
"grad_norm": 3.1031180959674716, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 0.8362, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08482347546996791, |
|
"grad_norm": 3.056534274967503, |
|
"learning_rate": 8.447488584474887e-06, |
|
"loss": 0.827, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08711600183402109, |
|
"grad_norm": 2.8738007240926016, |
|
"learning_rate": 8.675799086757991e-06, |
|
"loss": 0.8264, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08940852819807428, |
|
"grad_norm": 2.9833947743009044, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 0.8364, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09170105456212746, |
|
"grad_norm": 3.0590617698737606, |
|
"learning_rate": 9.132420091324201e-06, |
|
"loss": 0.8385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09399358092618065, |
|
"grad_norm": 2.9544649860589964, |
|
"learning_rate": 9.360730593607307e-06, |
|
"loss": 0.8306, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09628610729023383, |
|
"grad_norm": 3.156467119939513, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 0.812, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09857863365428703, |
|
"grad_norm": 3.241792877196348, |
|
"learning_rate": 9.817351598173517e-06, |
|
"loss": 0.8098, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1008711600183402, |
|
"grad_norm": 3.329896188306964, |
|
"learning_rate": 9.999993590241675e-06, |
|
"loss": 0.8321, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1031636863823934, |
|
"grad_norm": 2.961456684151267, |
|
"learning_rate": 9.999769250425817e-06, |
|
"loss": 0.8296, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10545621274644658, |
|
"grad_norm": 3.0123856993460723, |
|
"learning_rate": 9.999224439127452e-06, |
|
"loss": 0.8223, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10774873911049977, |
|
"grad_norm": 3.1722352404227263, |
|
"learning_rate": 9.998359191267488e-06, |
|
"loss": 0.8183, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11004126547455295, |
|
"grad_norm": 3.339283823835408, |
|
"learning_rate": 9.997173562305937e-06, |
|
"loss": 0.812, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11233379183860615, |
|
"grad_norm": 3.051005936600519, |
|
"learning_rate": 9.995667628238362e-06, |
|
"loss": 0.8159, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11462631820265932, |
|
"grad_norm": 3.621892868476315, |
|
"learning_rate": 9.993841485591e-06, |
|
"loss": 0.8265, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11691884456671252, |
|
"grad_norm": 3.1501195933267727, |
|
"learning_rate": 9.991695251414584e-06, |
|
"loss": 0.7829, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.1192113709307657, |
|
"grad_norm": 3.2077051728198436, |
|
"learning_rate": 9.989229063276829e-06, |
|
"loss": 0.8061, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12150389729481889, |
|
"grad_norm": 2.813867856532736, |
|
"learning_rate": 9.986443079253628e-06, |
|
"loss": 0.8088, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12379642365887207, |
|
"grad_norm": 2.953479405448006, |
|
"learning_rate": 9.983337477918904e-06, |
|
"loss": 0.8013, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12608895002292525, |
|
"grad_norm": 2.9765536692485752, |
|
"learning_rate": 9.979912458333179e-06, |
|
"loss": 0.8112, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12838147638697844, |
|
"grad_norm": 2.9261553011693313, |
|
"learning_rate": 9.976168240030804e-06, |
|
"loss": 0.797, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13067400275103164, |
|
"grad_norm": 2.7549890848982668, |
|
"learning_rate": 9.972105063005895e-06, |
|
"loss": 0.8047, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13296652911508483, |
|
"grad_norm": 2.783923747108222, |
|
"learning_rate": 9.96772318769694e-06, |
|
"loss": 0.8045, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13525905547913802, |
|
"grad_norm": 2.922181282361273, |
|
"learning_rate": 9.96302289497012e-06, |
|
"loss": 0.7891, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.1375515818431912, |
|
"grad_norm": 2.8387565382348807, |
|
"learning_rate": 9.958004486101293e-06, |
|
"loss": 0.7756, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13984410820724438, |
|
"grad_norm": 2.869327340764152, |
|
"learning_rate": 9.952668282756692e-06, |
|
"loss": 0.8027, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.14213663457129758, |
|
"grad_norm": 2.874303723785054, |
|
"learning_rate": 9.947014626972298e-06, |
|
"loss": 0.7826, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14442916093535077, |
|
"grad_norm": 2.737834462358364, |
|
"learning_rate": 9.941043881131928e-06, |
|
"loss": 0.7702, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14672168729940394, |
|
"grad_norm": 2.858629644409334, |
|
"learning_rate": 9.934756427943996e-06, |
|
"loss": 0.7761, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14901421366345713, |
|
"grad_norm": 2.941702373835629, |
|
"learning_rate": 9.92815267041699e-06, |
|
"loss": 0.7778, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15130674002751032, |
|
"grad_norm": 2.832449171435636, |
|
"learning_rate": 9.921233031833639e-06, |
|
"loss": 0.7747, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15359926639156352, |
|
"grad_norm": 2.838327247569131, |
|
"learning_rate": 9.913997955723777e-06, |
|
"loss": 0.7798, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15589179275561668, |
|
"grad_norm": 3.0053878829121357, |
|
"learning_rate": 9.90644790583592e-06, |
|
"loss": 0.7504, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15818431911966988, |
|
"grad_norm": 2.737407601036532, |
|
"learning_rate": 9.898583366107539e-06, |
|
"loss": 0.7655, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.16047684548372307, |
|
"grad_norm": 3.0259958169837717, |
|
"learning_rate": 9.890404840634037e-06, |
|
"loss": 0.7582, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16276937184777626, |
|
"grad_norm": 2.804766086619055, |
|
"learning_rate": 9.881912853636445e-06, |
|
"loss": 0.7747, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.16506189821182943, |
|
"grad_norm": 2.7915942235581785, |
|
"learning_rate": 9.873107949427815e-06, |
|
"loss": 0.7584, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16735442457588262, |
|
"grad_norm": 2.8708773578370588, |
|
"learning_rate": 9.863990692378333e-06, |
|
"loss": 0.7538, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16964695093993581, |
|
"grad_norm": 2.8372441642155097, |
|
"learning_rate": 9.854561666879148e-06, |
|
"loss": 0.7457, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.171939477303989, |
|
"grad_norm": 2.7820083192682197, |
|
"learning_rate": 9.844821477304904e-06, |
|
"loss": 0.775, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17423200366804217, |
|
"grad_norm": 2.6780715561867066, |
|
"learning_rate": 9.834770747975015e-06, |
|
"loss": 0.7442, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17652453003209537, |
|
"grad_norm": 2.7545319149727763, |
|
"learning_rate": 9.824410123113634e-06, |
|
"loss": 0.7416, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17881705639614856, |
|
"grad_norm": 2.6402444423405225, |
|
"learning_rate": 9.813740266808375e-06, |
|
"loss": 0.7362, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.18110958276020175, |
|
"grad_norm": 2.730909608534738, |
|
"learning_rate": 9.802761862967731e-06, |
|
"loss": 0.7252, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.18340210912425492, |
|
"grad_norm": 2.9284254959639355, |
|
"learning_rate": 9.791475615277248e-06, |
|
"loss": 0.7453, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1856946354883081, |
|
"grad_norm": 2.790088757652803, |
|
"learning_rate": 9.779882247154419e-06, |
|
"loss": 0.7344, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1879871618523613, |
|
"grad_norm": 2.725250925456166, |
|
"learning_rate": 9.76798250170231e-06, |
|
"loss": 0.7246, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1902796882164145, |
|
"grad_norm": 2.667869321574359, |
|
"learning_rate": 9.755777141661937e-06, |
|
"loss": 0.7193, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.19257221458046767, |
|
"grad_norm": 2.5119646512097997, |
|
"learning_rate": 9.743266949363368e-06, |
|
"loss": 0.7402, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19486474094452086, |
|
"grad_norm": 2.847215415311532, |
|
"learning_rate": 9.730452726675583e-06, |
|
"loss": 0.7173, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19715726730857405, |
|
"grad_norm": 2.779126735326216, |
|
"learning_rate": 9.717335294955078e-06, |
|
"loss": 0.7157, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19944979367262725, |
|
"grad_norm": 3.4561646981046454, |
|
"learning_rate": 9.703915494993215e-06, |
|
"loss": 0.7312, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2017423200366804, |
|
"grad_norm": 2.7730394910581913, |
|
"learning_rate": 9.690194186962326e-06, |
|
"loss": 0.7335, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2040348464007336, |
|
"grad_norm": 2.859201150645261, |
|
"learning_rate": 9.676172250360583e-06, |
|
"loss": 0.7383, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2063273727647868, |
|
"grad_norm": 2.9209175577350313, |
|
"learning_rate": 9.66185058395563e-06, |
|
"loss": 0.7263, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20861989912884, |
|
"grad_norm": 2.704547531489439, |
|
"learning_rate": 9.647230105726963e-06, |
|
"loss": 0.7143, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.21091242549289316, |
|
"grad_norm": 2.670951446360455, |
|
"learning_rate": 9.632311752807097e-06, |
|
"loss": 0.7307, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21320495185694635, |
|
"grad_norm": 3.2268092839390485, |
|
"learning_rate": 9.617096481421498e-06, |
|
"loss": 0.6985, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.21549747822099954, |
|
"grad_norm": 2.939723635315935, |
|
"learning_rate": 9.601585266827288e-06, |
|
"loss": 0.7181, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21779000458505274, |
|
"grad_norm": 2.7240300289732082, |
|
"learning_rate": 9.58577910325074e-06, |
|
"loss": 0.7079, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2200825309491059, |
|
"grad_norm": 2.7348057628577815, |
|
"learning_rate": 9.569679003823542e-06, |
|
"loss": 0.7063, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2223750573131591, |
|
"grad_norm": 2.6209148336683894, |
|
"learning_rate": 9.55328600051787e-06, |
|
"loss": 0.7019, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2246675836772123, |
|
"grad_norm": 2.7094717894075093, |
|
"learning_rate": 9.536601144080224e-06, |
|
"loss": 0.6933, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22696011004126548, |
|
"grad_norm": 2.6005478056383393, |
|
"learning_rate": 9.5196255039641e-06, |
|
"loss": 0.7008, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.22925263640531865, |
|
"grad_norm": 2.9435017052734933, |
|
"learning_rate": 9.502360168261424e-06, |
|
"loss": 0.7168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23154516276937184, |
|
"grad_norm": 15.281241231781962, |
|
"learning_rate": 9.48480624363281e-06, |
|
"loss": 0.6968, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.23383768913342504, |
|
"grad_norm": 2.803746155734926, |
|
"learning_rate": 9.46696485523664e-06, |
|
"loss": 0.7176, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23613021549747823, |
|
"grad_norm": 2.9572910983459275, |
|
"learning_rate": 9.448837146656924e-06, |
|
"loss": 0.6983, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2384227418615314, |
|
"grad_norm": 2.66575290909559, |
|
"learning_rate": 9.430424279830014e-06, |
|
"loss": 0.679, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2407152682255846, |
|
"grad_norm": 2.6071015601683056, |
|
"learning_rate": 9.411727434970121e-06, |
|
"loss": 0.6796, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.24300779458963778, |
|
"grad_norm": 2.6190152299969975, |
|
"learning_rate": 9.392747810493675e-06, |
|
"loss": 0.6922, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24530032095369098, |
|
"grad_norm": 2.9035286162764624, |
|
"learning_rate": 9.373486622942494e-06, |
|
"loss": 0.6881, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.24759284731774414, |
|
"grad_norm": 2.722112266367375, |
|
"learning_rate": 9.353945106905822e-06, |
|
"loss": 0.691, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24988537368179733, |
|
"grad_norm": 2.8551591177378173, |
|
"learning_rate": 9.334124514941185e-06, |
|
"loss": 0.6786, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.2521779000458505, |
|
"grad_norm": 2.789372421806793, |
|
"learning_rate": 9.314026117494116e-06, |
|
"loss": 0.6965, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2544704264099037, |
|
"grad_norm": 2.943178087845294, |
|
"learning_rate": 9.29365120281671e-06, |
|
"loss": 0.6734, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.2567629527739569, |
|
"grad_norm": 2.9269593678262678, |
|
"learning_rate": 9.273001076885059e-06, |
|
"loss": 0.6567, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2590554791380101, |
|
"grad_norm": 2.7577714835234457, |
|
"learning_rate": 9.252077063315545e-06, |
|
"loss": 0.6628, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.2613480055020633, |
|
"grad_norm": 2.595587224144848, |
|
"learning_rate": 9.230880503279991e-06, |
|
"loss": 0.6593, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.26364053186611647, |
|
"grad_norm": 2.6421320876444425, |
|
"learning_rate": 9.209412755419703e-06, |
|
"loss": 0.6616, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.26593305823016966, |
|
"grad_norm": 2.5889083746551487, |
|
"learning_rate": 9.18767519575838e-06, |
|
"loss": 0.6574, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.26822558459422285, |
|
"grad_norm": 2.644361824371662, |
|
"learning_rate": 9.165669217613919e-06, |
|
"loss": 0.6631, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.27051811095827605, |
|
"grad_norm": 2.7328270481402166, |
|
"learning_rate": 9.143396231509102e-06, |
|
"loss": 0.6591, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2728106373223292, |
|
"grad_norm": 2.6202953814608247, |
|
"learning_rate": 9.12085766508119e-06, |
|
"loss": 0.6465, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2751031636863824, |
|
"grad_norm": 2.688621083531908, |
|
"learning_rate": 9.098054962990415e-06, |
|
"loss": 0.6678, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2773956900504356, |
|
"grad_norm": 2.684577688850206, |
|
"learning_rate": 9.074989586827375e-06, |
|
"loss": 0.6478, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.27968821641448877, |
|
"grad_norm": 2.6991742230220708, |
|
"learning_rate": 9.05166301501936e-06, |
|
"loss": 0.6575, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.28198074277854196, |
|
"grad_norm": 2.8422733898390353, |
|
"learning_rate": 9.028076742735583e-06, |
|
"loss": 0.6606, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.28427326914259515, |
|
"grad_norm": 3.3111069999457174, |
|
"learning_rate": 9.004232281791341e-06, |
|
"loss": 0.6501, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.28656579550664835, |
|
"grad_norm": 2.8352207612326676, |
|
"learning_rate": 8.980131160551118e-06, |
|
"loss": 0.6497, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.28885832187070154, |
|
"grad_norm": 2.622577509095012, |
|
"learning_rate": 8.955774923830618e-06, |
|
"loss": 0.6265, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2911508482347547, |
|
"grad_norm": 2.6180287881898363, |
|
"learning_rate": 8.931165132797747e-06, |
|
"loss": 0.6397, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.29344337459880787, |
|
"grad_norm": 2.7463986227282713, |
|
"learning_rate": 8.906303364872545e-06, |
|
"loss": 0.6668, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.29573590096286106, |
|
"grad_norm": 2.6468423935127254, |
|
"learning_rate": 8.881191213626084e-06, |
|
"loss": 0.6393, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29802842732691426, |
|
"grad_norm": 2.6005030935816245, |
|
"learning_rate": 8.855830288678311e-06, |
|
"loss": 0.644, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.30032095369096745, |
|
"grad_norm": 2.7192686848560554, |
|
"learning_rate": 8.83022221559489e-06, |
|
"loss": 0.6479, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.30261348005502064, |
|
"grad_norm": 2.673457233400223, |
|
"learning_rate": 8.804368635783002e-06, |
|
"loss": 0.6384, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.30490600641907384, |
|
"grad_norm": 2.850654385793331, |
|
"learning_rate": 8.778271206386135e-06, |
|
"loss": 0.6456, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.30719853278312703, |
|
"grad_norm": 2.6958806241423643, |
|
"learning_rate": 8.751931600177863e-06, |
|
"loss": 0.6025, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.30949105914718017, |
|
"grad_norm": 2.764991202053115, |
|
"learning_rate": 8.725351505454631e-06, |
|
"loss": 0.6194, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.31178358551123336, |
|
"grad_norm": 2.6590991144561906, |
|
"learning_rate": 8.69853262592754e-06, |
|
"loss": 0.6348, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.31407611187528656, |
|
"grad_norm": 2.708732600879308, |
|
"learning_rate": 8.671476680613134e-06, |
|
"loss": 0.6411, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.31636863823933975, |
|
"grad_norm": 2.5456418831079457, |
|
"learning_rate": 8.644185403723231e-06, |
|
"loss": 0.6138, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.31866116460339294, |
|
"grad_norm": 2.903106819651818, |
|
"learning_rate": 8.616660544553754e-06, |
|
"loss": 0.6237, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.32095369096744614, |
|
"grad_norm": 2.7280408027219942, |
|
"learning_rate": 8.588903867372607e-06, |
|
"loss": 0.6138, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.32324621733149933, |
|
"grad_norm": 2.886662280669305, |
|
"learning_rate": 8.560917151306594e-06, |
|
"loss": 0.6066, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3255387436955525, |
|
"grad_norm": 2.6016420791711994, |
|
"learning_rate": 8.53270219022738e-06, |
|
"loss": 0.6126, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.32783127005960566, |
|
"grad_norm": 2.5696831024854827, |
|
"learning_rate": 8.50426079263651e-06, |
|
"loss": 0.6191, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.33012379642365886, |
|
"grad_norm": 2.789642739261612, |
|
"learning_rate": 8.475594781549483e-06, |
|
"loss": 0.6171, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.33241632278771205, |
|
"grad_norm": 2.662350967821026, |
|
"learning_rate": 8.446705994378913e-06, |
|
"loss": 0.6262, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.33470884915176524, |
|
"grad_norm": 2.749133969632543, |
|
"learning_rate": 8.417596282816742e-06, |
|
"loss": 0.6084, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33700137551581844, |
|
"grad_norm": 2.8389384155162736, |
|
"learning_rate": 8.388267512715565e-06, |
|
"loss": 0.6089, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.33929390187987163, |
|
"grad_norm": 2.6423715957870115, |
|
"learning_rate": 8.358721563969027e-06, |
|
"loss": 0.5912, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3415864282439248, |
|
"grad_norm": 2.582427374014035, |
|
"learning_rate": 8.328960330391325e-06, |
|
"loss": 0.6015, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.343878954607978, |
|
"grad_norm": 2.5641005198848763, |
|
"learning_rate": 8.298985719595824e-06, |
|
"loss": 0.6127, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.34617148097203115, |
|
"grad_norm": 2.573968171901929, |
|
"learning_rate": 8.268799652872786e-06, |
|
"loss": 0.6108, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.34846400733608435, |
|
"grad_norm": 2.555840575858041, |
|
"learning_rate": 8.23840406506621e-06, |
|
"loss": 0.6013, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.35075653370013754, |
|
"grad_norm": 2.608505400595271, |
|
"learning_rate": 8.207800904449829e-06, |
|
"loss": 0.5868, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.35304906006419073, |
|
"grad_norm": 2.564041005915397, |
|
"learning_rate": 8.176992132602221e-06, |
|
"loss": 0.5935, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3553415864282439, |
|
"grad_norm": 2.835188198766609, |
|
"learning_rate": 8.145979724281079e-06, |
|
"loss": 0.577, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3576341127922971, |
|
"grad_norm": 2.624154236961289, |
|
"learning_rate": 8.114765667296628e-06, |
|
"loss": 0.5807, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3599266391563503, |
|
"grad_norm": 2.803920892055745, |
|
"learning_rate": 8.083351962384234e-06, |
|
"loss": 0.5827, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3622191655204035, |
|
"grad_norm": 2.7453769474392438, |
|
"learning_rate": 8.051740623076132e-06, |
|
"loss": 0.5743, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.36451169188445665, |
|
"grad_norm": 2.642012832230722, |
|
"learning_rate": 8.019933675572389e-06, |
|
"loss": 0.5924, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.36680421824850984, |
|
"grad_norm": 2.5959618878893496, |
|
"learning_rate": 7.987933158611013e-06, |
|
"loss": 0.5765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.36909674461256303, |
|
"grad_norm": 2.6981842811728107, |
|
"learning_rate": 7.95574112333729e-06, |
|
"loss": 0.5636, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.3713892709766162, |
|
"grad_norm": 2.7155825019244246, |
|
"learning_rate": 7.923359633172299e-06, |
|
"loss": 0.5676, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3736817973406694, |
|
"grad_norm": 2.722727252289237, |
|
"learning_rate": 7.890790763680658e-06, |
|
"loss": 0.5849, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3759743237047226, |
|
"grad_norm": 2.5941959497564073, |
|
"learning_rate": 7.85803660243749e-06, |
|
"loss": 0.582, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3782668500687758, |
|
"grad_norm": 2.448527666302428, |
|
"learning_rate": 7.8250992488946e-06, |
|
"loss": 0.586, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.380559376432829, |
|
"grad_norm": 2.786081596311819, |
|
"learning_rate": 7.791980814245931e-06, |
|
"loss": 0.5547, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.38285190279688214, |
|
"grad_norm": 2.6225345564151237, |
|
"learning_rate": 7.758683421292217e-06, |
|
"loss": 0.5562, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.38514442916093533, |
|
"grad_norm": 2.495977821656378, |
|
"learning_rate": 7.72520920430493e-06, |
|
"loss": 0.5728, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3874369555249885, |
|
"grad_norm": 2.5523314447232535, |
|
"learning_rate": 7.691560308889478e-06, |
|
"loss": 0.5748, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3897294818890417, |
|
"grad_norm": 2.702511447586494, |
|
"learning_rate": 7.657738891847679e-06, |
|
"loss": 0.5651, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3920220082530949, |
|
"grad_norm": 2.6729070020445533, |
|
"learning_rate": 7.623747121039512e-06, |
|
"loss": 0.5716, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.3943145346171481, |
|
"grad_norm": 2.7351708064638665, |
|
"learning_rate": 7.589587175244162e-06, |
|
"loss": 0.565, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3966070609812013, |
|
"grad_norm": 2.5916997954156638, |
|
"learning_rate": 7.555261244020371e-06, |
|
"loss": 0.5691, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.3988995873452545, |
|
"grad_norm": 2.4806248685486407, |
|
"learning_rate": 7.520771527566093e-06, |
|
"loss": 0.5672, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.40119211370930763, |
|
"grad_norm": 2.691711711440267, |
|
"learning_rate": 7.486120236577464e-06, |
|
"loss": 0.5555, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4034846400733608, |
|
"grad_norm": 2.6506103202422797, |
|
"learning_rate": 7.451309592107104e-06, |
|
"loss": 0.5548, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.405777166437414, |
|
"grad_norm": 2.5210545941984983, |
|
"learning_rate": 7.416341825421755e-06, |
|
"loss": 0.573, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4080696928014672, |
|
"grad_norm": 2.7103495153803627, |
|
"learning_rate": 7.381219177859257e-06, |
|
"loss": 0.5428, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4103622191655204, |
|
"grad_norm": 2.5223081344987826, |
|
"learning_rate": 7.345943900684896e-06, |
|
"loss": 0.5605, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4126547455295736, |
|
"grad_norm": 2.5684242617186364, |
|
"learning_rate": 7.310518254947092e-06, |
|
"loss": 0.5432, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4149472718936268, |
|
"grad_norm": 2.8905063764239327, |
|
"learning_rate": 7.274944511332479e-06, |
|
"loss": 0.5355, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.41723979825768, |
|
"grad_norm": 2.7288840976281543, |
|
"learning_rate": 7.239224950020359e-06, |
|
"loss": 0.5583, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4195323246217332, |
|
"grad_norm": 2.573090270715344, |
|
"learning_rate": 7.203361860536544e-06, |
|
"loss": 0.5528, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4218248509857863, |
|
"grad_norm": 2.7074335935753897, |
|
"learning_rate": 7.167357541606613e-06, |
|
"loss": 0.5457, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4241173773498395, |
|
"grad_norm": 2.6225623425429614, |
|
"learning_rate": 7.131214301008564e-06, |
|
"loss": 0.5405, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4264099037138927, |
|
"grad_norm": 2.638186367850455, |
|
"learning_rate": 7.094934455424889e-06, |
|
"loss": 0.5457, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4287024300779459, |
|
"grad_norm": 2.663625944879504, |
|
"learning_rate": 7.058520330294087e-06, |
|
"loss": 0.5499, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4309949564419991, |
|
"grad_norm": 2.594656111210185, |
|
"learning_rate": 7.021974259661607e-06, |
|
"loss": 0.5471, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4332874828060523, |
|
"grad_norm": 2.558300587882855, |
|
"learning_rate": 6.985298586030241e-06, |
|
"loss": 0.5465, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4355800091701055, |
|
"grad_norm": 2.6435075817238425, |
|
"learning_rate": 6.948495660209983e-06, |
|
"loss": 0.5331, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.43787253553415867, |
|
"grad_norm": 2.494991656905618, |
|
"learning_rate": 6.9115678411673345e-06, |
|
"loss": 0.5371, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.4401650618982118, |
|
"grad_norm": 2.4881542600695643, |
|
"learning_rate": 6.8745174958741164e-06, |
|
"loss": 0.5329, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.442457588262265, |
|
"grad_norm": 2.552409503690461, |
|
"learning_rate": 6.837346999155743e-06, |
|
"loss": 0.532, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4447501146263182, |
|
"grad_norm": 2.4970182042863445, |
|
"learning_rate": 6.800058733539003e-06, |
|
"loss": 0.5376, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4470426409903714, |
|
"grad_norm": 2.468594629574796, |
|
"learning_rate": 6.762655089099353e-06, |
|
"loss": 0.513, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4493351673544246, |
|
"grad_norm": 2.5797501981324453, |
|
"learning_rate": 6.725138463307714e-06, |
|
"loss": 0.5408, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4516276937184778, |
|
"grad_norm": 2.8482359445979246, |
|
"learning_rate": 6.687511260876799e-06, |
|
"loss": 0.5189, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.45392022008253097, |
|
"grad_norm": 2.6612518014120816, |
|
"learning_rate": 6.649775893606982e-06, |
|
"loss": 0.5318, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.45621274644658416, |
|
"grad_norm": 2.5372082111080347, |
|
"learning_rate": 6.611934780231704e-06, |
|
"loss": 0.5076, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.4585052728106373, |
|
"grad_norm": 2.4460238122171916, |
|
"learning_rate": 6.573990346262445e-06, |
|
"loss": 0.5028, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4607977991746905, |
|
"grad_norm": 2.5523381259232747, |
|
"learning_rate": 6.535945023833249e-06, |
|
"loss": 0.5188, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.4630903255387437, |
|
"grad_norm": 2.6717883324323104, |
|
"learning_rate": 6.497801251544833e-06, |
|
"loss": 0.5137, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4653828519027969, |
|
"grad_norm": 2.4441200104866763, |
|
"learning_rate": 6.459561474308278e-06, |
|
"loss": 0.513, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4676753782668501, |
|
"grad_norm": 2.4626953473958046, |
|
"learning_rate": 6.421228143188325e-06, |
|
"loss": 0.5241, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.46996790463090327, |
|
"grad_norm": 2.414799048761899, |
|
"learning_rate": 6.382803715246254e-06, |
|
"loss": 0.5265, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.47226043099495646, |
|
"grad_norm": 2.661888186403354, |
|
"learning_rate": 6.344290653382408e-06, |
|
"loss": 0.5122, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.47455295735900965, |
|
"grad_norm": 2.705613301623184, |
|
"learning_rate": 6.305691426178316e-06, |
|
"loss": 0.5076, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.4768454837230628, |
|
"grad_norm": 2.5901180556298007, |
|
"learning_rate": 6.267008507738472e-06, |
|
"loss": 0.5309, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.479138010087116, |
|
"grad_norm": 2.5393961483789345, |
|
"learning_rate": 6.228244377531747e-06, |
|
"loss": 0.506, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.4814305364511692, |
|
"grad_norm": 2.5959034041763154, |
|
"learning_rate": 6.189401520232464e-06, |
|
"loss": 0.5065, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48372306281522237, |
|
"grad_norm": 2.6419168193929963, |
|
"learning_rate": 6.150482425561135e-06, |
|
"loss": 0.5189, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.48601558917927556, |
|
"grad_norm": 2.58024430648069, |
|
"learning_rate": 6.11148958812488e-06, |
|
"loss": 0.5071, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.48830811554332876, |
|
"grad_norm": 2.4501378891077987, |
|
"learning_rate": 6.072425507257528e-06, |
|
"loss": 0.5033, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.49060064190738195, |
|
"grad_norm": 2.783006969507733, |
|
"learning_rate": 6.033292686859414e-06, |
|
"loss": 0.4955, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.49289316827143514, |
|
"grad_norm": 2.428894458608491, |
|
"learning_rate": 5.99409363523689e-06, |
|
"loss": 0.4973, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4951856946354883, |
|
"grad_norm": 2.7389561374869342, |
|
"learning_rate": 5.9548308649415486e-06, |
|
"loss": 0.5051, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4974782209995415, |
|
"grad_norm": 2.5456232835838124, |
|
"learning_rate": 5.91550689260917e-06, |
|
"loss": 0.4935, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.49977074736359467, |
|
"grad_norm": 2.6057045786417685, |
|
"learning_rate": 5.876124238798424e-06, |
|
"loss": 0.501, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5020632737276479, |
|
"grad_norm": 2.4695060680872873, |
|
"learning_rate": 5.836685427829296e-06, |
|
"loss": 0.5032, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.504355800091701, |
|
"grad_norm": 2.3783397469941376, |
|
"learning_rate": 5.797192987621293e-06, |
|
"loss": 0.4985, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5066483264557542, |
|
"grad_norm": 2.491153548859691, |
|
"learning_rate": 5.7576494495314105e-06, |
|
"loss": 0.5043, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5089408528198074, |
|
"grad_norm": 2.6062141152111673, |
|
"learning_rate": 5.718057348191874e-06, |
|
"loss": 0.4868, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5112333791838606, |
|
"grad_norm": 2.5012205713207405, |
|
"learning_rate": 5.678419221347687e-06, |
|
"loss": 0.4979, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5135259055479138, |
|
"grad_norm": 2.609877005241944, |
|
"learning_rate": 5.638737609693953e-06, |
|
"loss": 0.495, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.515818431911967, |
|
"grad_norm": 2.684672446431491, |
|
"learning_rate": 5.599015056713037e-06, |
|
"loss": 0.4823, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5181109582760202, |
|
"grad_norm": 2.4771534112729228, |
|
"learning_rate": 5.559254108511531e-06, |
|
"loss": 0.5016, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5204034846400734, |
|
"grad_norm": 2.46810743209868, |
|
"learning_rate": 5.519457313657056e-06, |
|
"loss": 0.4896, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5226960110041265, |
|
"grad_norm": 2.5795208204825983, |
|
"learning_rate": 5.479627223014902e-06, |
|
"loss": 0.4886, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5249885373681797, |
|
"grad_norm": 2.434086073989824, |
|
"learning_rate": 5.439766389584527e-06, |
|
"loss": 0.4865, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5272810637322329, |
|
"grad_norm": 2.4538097489169934, |
|
"learning_rate": 5.399877368335922e-06, |
|
"loss": 0.4914, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5295735900962861, |
|
"grad_norm": 2.5415775013932063, |
|
"learning_rate": 5.359962716045836e-06, |
|
"loss": 0.4936, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5318661164603393, |
|
"grad_norm": 2.56697946552087, |
|
"learning_rate": 5.3200249911338986e-06, |
|
"loss": 0.4894, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5341586428243925, |
|
"grad_norm": 2.572922499741503, |
|
"learning_rate": 5.280066753498632e-06, |
|
"loss": 0.4794, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5364511691884457, |
|
"grad_norm": 2.623599926005301, |
|
"learning_rate": 5.240090564353365e-06, |
|
"loss": 0.4959, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5387436955524989, |
|
"grad_norm": 2.4231120561633324, |
|
"learning_rate": 5.200098986062072e-06, |
|
"loss": 0.4753, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5410362219165521, |
|
"grad_norm": 2.5196186316057108, |
|
"learning_rate": 5.160094581975127e-06, |
|
"loss": 0.4783, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5433287482806052, |
|
"grad_norm": 2.527690400984075, |
|
"learning_rate": 5.1200799162650035e-06, |
|
"loss": 0.4916, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5456212746446584, |
|
"grad_norm": 2.6015322908629415, |
|
"learning_rate": 5.080057553761917e-06, |
|
"loss": 0.4738, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5479138010087116, |
|
"grad_norm": 2.3467602506879786, |
|
"learning_rate": 5.040030059789426e-06, |
|
"loss": 0.476, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5502063273727648, |
|
"grad_norm": 2.570425940808593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4903, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.552498853736818, |
|
"grad_norm": 2.5543989632263284, |
|
"learning_rate": 4.9599699402105755e-06, |
|
"loss": 0.4673, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5547913801008711, |
|
"grad_norm": 2.5213973685823277, |
|
"learning_rate": 4.919942446238085e-06, |
|
"loss": 0.4693, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5570839064649243, |
|
"grad_norm": 2.4952425404718075, |
|
"learning_rate": 4.879920083734997e-06, |
|
"loss": 0.4692, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5593764328289775, |
|
"grad_norm": 2.5419193115674776, |
|
"learning_rate": 4.839905418024875e-06, |
|
"loss": 0.4814, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5616689591930307, |
|
"grad_norm": 2.558303192571574, |
|
"learning_rate": 4.7999010139379295e-06, |
|
"loss": 0.4698, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5639614855570839, |
|
"grad_norm": 2.4678859101946315, |
|
"learning_rate": 4.759909435646636e-06, |
|
"loss": 0.4896, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5662540119211371, |
|
"grad_norm": 2.6716519633665783, |
|
"learning_rate": 4.719933246501369e-06, |
|
"loss": 0.4852, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5685465382851903, |
|
"grad_norm": 2.4330925797194807, |
|
"learning_rate": 4.679975008866103e-06, |
|
"loss": 0.4554, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5708390646492435, |
|
"grad_norm": 2.437937005459216, |
|
"learning_rate": 4.640037283954165e-06, |
|
"loss": 0.4598, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5731315910132967, |
|
"grad_norm": 2.413361545021729, |
|
"learning_rate": 4.6001226316640804e-06, |
|
"loss": 0.4739, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5754241173773499, |
|
"grad_norm": 2.3552453394422503, |
|
"learning_rate": 4.5602336104154745e-06, |
|
"loss": 0.4646, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5777166437414031, |
|
"grad_norm": 2.623470049632146, |
|
"learning_rate": 4.520372776985101e-06, |
|
"loss": 0.4579, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5800091701054562, |
|
"grad_norm": 2.4219278336672874, |
|
"learning_rate": 4.480542686342946e-06, |
|
"loss": 0.4613, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.5823016964695094, |
|
"grad_norm": 2.517369439139374, |
|
"learning_rate": 4.440745891488471e-06, |
|
"loss": 0.4523, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5845942228335625, |
|
"grad_norm": 2.501700820037027, |
|
"learning_rate": 4.400984943286965e-06, |
|
"loss": 0.4671, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5868867491976157, |
|
"grad_norm": 2.4011689731614605, |
|
"learning_rate": 4.361262390306049e-06, |
|
"loss": 0.4527, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5891792755616689, |
|
"grad_norm": 2.5994696717863706, |
|
"learning_rate": 4.321580778652316e-06, |
|
"loss": 0.4493, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5914718019257221, |
|
"grad_norm": 2.491956972995198, |
|
"learning_rate": 4.2819426518081265e-06, |
|
"loss": 0.456, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5937643282897753, |
|
"grad_norm": 2.4353572047335996, |
|
"learning_rate": 4.2423505504685894e-06, |
|
"loss": 0.4611, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5960568546538285, |
|
"grad_norm": 2.4904358458702944, |
|
"learning_rate": 4.202807012378707e-06, |
|
"loss": 0.4546, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5983493810178817, |
|
"grad_norm": 2.4617619082762636, |
|
"learning_rate": 4.163314572170704e-06, |
|
"loss": 0.458, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.6006419073819349, |
|
"grad_norm": 2.354023280982333, |
|
"learning_rate": 4.123875761201576e-06, |
|
"loss": 0.4433, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6029344337459881, |
|
"grad_norm": 2.540723557518342, |
|
"learning_rate": 4.08449310739083e-06, |
|
"loss": 0.4484, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6052269601100413, |
|
"grad_norm": 2.4043887566981446, |
|
"learning_rate": 4.045169135058452e-06, |
|
"loss": 0.4416, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6075194864740945, |
|
"grad_norm": 2.481355244310724, |
|
"learning_rate": 4.0059063647631105e-06, |
|
"loss": 0.4645, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6098120128381477, |
|
"grad_norm": 2.499493147862873, |
|
"learning_rate": 3.966707313140587e-06, |
|
"loss": 0.4542, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6121045392022009, |
|
"grad_norm": 2.5034183191594477, |
|
"learning_rate": 3.927574492742473e-06, |
|
"loss": 0.4465, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6143970655662541, |
|
"grad_norm": 2.450159706952634, |
|
"learning_rate": 3.888510411875121e-06, |
|
"loss": 0.4451, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6166895919303071, |
|
"grad_norm": 2.437273107870038, |
|
"learning_rate": 3.849517574438866e-06, |
|
"loss": 0.4393, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.6189821182943603, |
|
"grad_norm": 2.4867270897195164, |
|
"learning_rate": 3.8105984797675364e-06, |
|
"loss": 0.4369, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6212746446584135, |
|
"grad_norm": 2.4474532182002156, |
|
"learning_rate": 3.771755622468254e-06, |
|
"loss": 0.4459, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6235671710224667, |
|
"grad_norm": 2.3883568752400737, |
|
"learning_rate": 3.7329914922615283e-06, |
|
"loss": 0.4414, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6258596973865199, |
|
"grad_norm": 2.323604786191338, |
|
"learning_rate": 3.6943085738216855e-06, |
|
"loss": 0.4294, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6281522237505731, |
|
"grad_norm": 2.5364327673030553, |
|
"learning_rate": 3.655709346617593e-06, |
|
"loss": 0.4482, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6304447501146263, |
|
"grad_norm": 2.528211312039227, |
|
"learning_rate": 3.6171962847537466e-06, |
|
"loss": 0.4483, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6327372764786795, |
|
"grad_norm": 2.4014535334880533, |
|
"learning_rate": 3.5787718568116764e-06, |
|
"loss": 0.4479, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6350298028427327, |
|
"grad_norm": 2.6961239350559687, |
|
"learning_rate": 3.540438525691723e-06, |
|
"loss": 0.4375, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6373223292067859, |
|
"grad_norm": 2.4568407427026027, |
|
"learning_rate": 3.502198748455169e-06, |
|
"loss": 0.4461, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6396148555708391, |
|
"grad_norm": 2.444432290321262, |
|
"learning_rate": 3.464054976166753e-06, |
|
"loss": 0.4409, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6419073819348923, |
|
"grad_norm": 2.3930367223498927, |
|
"learning_rate": 3.4260096537375553e-06, |
|
"loss": 0.433, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6441999082989455, |
|
"grad_norm": 2.431394532574176, |
|
"learning_rate": 3.3880652197682974e-06, |
|
"loss": 0.4229, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6464924346629987, |
|
"grad_norm": 2.434581693659057, |
|
"learning_rate": 3.3502241063930196e-06, |
|
"loss": 0.4389, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6487849610270519, |
|
"grad_norm": 2.3993499417107156, |
|
"learning_rate": 3.3124887391232026e-06, |
|
"loss": 0.4219, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.651077487391105, |
|
"grad_norm": 2.476740652860741, |
|
"learning_rate": 3.2748615366922864e-06, |
|
"loss": 0.427, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6533700137551581, |
|
"grad_norm": 2.507048548706466, |
|
"learning_rate": 3.2373449109006476e-06, |
|
"loss": 0.4341, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6556625401192113, |
|
"grad_norm": 2.418497030941838, |
|
"learning_rate": 3.1999412664609986e-06, |
|
"loss": 0.4329, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6579550664832645, |
|
"grad_norm": 2.4312888314629144, |
|
"learning_rate": 3.162653000844259e-06, |
|
"loss": 0.4227, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6602475928473177, |
|
"grad_norm": 2.353877004261892, |
|
"learning_rate": 3.1254825041258852e-06, |
|
"loss": 0.4302, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6625401192113709, |
|
"grad_norm": 2.381814531488306, |
|
"learning_rate": 3.0884321588326668e-06, |
|
"loss": 0.4376, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6648326455754241, |
|
"grad_norm": 2.4501307973874287, |
|
"learning_rate": 3.051504339790019e-06, |
|
"loss": 0.4254, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6671251719394773, |
|
"grad_norm": 2.459251255110059, |
|
"learning_rate": 3.0147014139697596e-06, |
|
"loss": 0.4263, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6694176983035305, |
|
"grad_norm": 2.5254030222294466, |
|
"learning_rate": 2.978025740338396e-06, |
|
"loss": 0.4195, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6717102246675837, |
|
"grad_norm": 2.2951603398964235, |
|
"learning_rate": 2.9414796697059155e-06, |
|
"loss": 0.4129, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6740027510316369, |
|
"grad_norm": 2.364236291272217, |
|
"learning_rate": 2.905065544575114e-06, |
|
"loss": 0.4197, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6762952773956901, |
|
"grad_norm": 2.4601102682369205, |
|
"learning_rate": 2.8687856989914393e-06, |
|
"loss": 0.4234, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6785878037597433, |
|
"grad_norm": 2.686432591416178, |
|
"learning_rate": 2.8326424583933878e-06, |
|
"loss": 0.4223, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6808803301237965, |
|
"grad_norm": 2.3448228852350788, |
|
"learning_rate": 2.796638139463456e-06, |
|
"loss": 0.4149, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.6831728564878496, |
|
"grad_norm": 2.317745266155718, |
|
"learning_rate": 2.7607750499796426e-06, |
|
"loss": 0.4161, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6854653828519028, |
|
"grad_norm": 2.3719922106424725, |
|
"learning_rate": 2.725055488667522e-06, |
|
"loss": 0.4275, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.687757909215956, |
|
"grad_norm": 2.4553896347366746, |
|
"learning_rate": 2.689481745052908e-06, |
|
"loss": 0.3954, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6900504355800092, |
|
"grad_norm": 2.471280707724599, |
|
"learning_rate": 2.6540560993151045e-06, |
|
"loss": 0.408, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6923429619440623, |
|
"grad_norm": 2.375550619652342, |
|
"learning_rate": 2.6187808221407433e-06, |
|
"loss": 0.4091, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6946354883081155, |
|
"grad_norm": 2.3794291144670865, |
|
"learning_rate": 2.5836581745782474e-06, |
|
"loss": 0.4203, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.6969280146721687, |
|
"grad_norm": 2.3959254909604133, |
|
"learning_rate": 2.5486904078928954e-06, |
|
"loss": 0.4019, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6992205410362219, |
|
"grad_norm": 2.4572132670378593, |
|
"learning_rate": 2.5138797634225358e-06, |
|
"loss": 0.4025, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7015130674002751, |
|
"grad_norm": 2.567664513817577, |
|
"learning_rate": 2.4792284724339077e-06, |
|
"loss": 0.4096, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7038055937643283, |
|
"grad_norm": 2.473854002398598, |
|
"learning_rate": 2.4447387559796306e-06, |
|
"loss": 0.4129, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.7060981201283815, |
|
"grad_norm": 2.2347261984430844, |
|
"learning_rate": 2.410412824755839e-06, |
|
"loss": 0.4147, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7083906464924347, |
|
"grad_norm": 2.45007211279529, |
|
"learning_rate": 2.3762528789604887e-06, |
|
"loss": 0.4159, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.7106831728564879, |
|
"grad_norm": 2.57319881552059, |
|
"learning_rate": 2.3422611081523215e-06, |
|
"loss": 0.4044, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.712975699220541, |
|
"grad_norm": 2.40694698697041, |
|
"learning_rate": 2.3084396911105233e-06, |
|
"loss": 0.3888, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.7152682255845942, |
|
"grad_norm": 2.6193951641238837, |
|
"learning_rate": 2.274790795695071e-06, |
|
"loss": 0.4186, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7175607519486474, |
|
"grad_norm": 2.3915420788033686, |
|
"learning_rate": 2.2413165787077844e-06, |
|
"loss": 0.4105, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.7198532783127006, |
|
"grad_norm": 2.4922945082662706, |
|
"learning_rate": 2.20801918575407e-06, |
|
"loss": 0.41, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7221458046767538, |
|
"grad_norm": 2.361018492853961, |
|
"learning_rate": 2.1749007511054005e-06, |
|
"loss": 0.4075, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.724438331040807, |
|
"grad_norm": 2.453915782459234, |
|
"learning_rate": 2.1419633975625113e-06, |
|
"loss": 0.4123, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7267308574048602, |
|
"grad_norm": 2.2558599145275458, |
|
"learning_rate": 2.109209236319342e-06, |
|
"loss": 0.3971, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7290233837689133, |
|
"grad_norm": 2.3942282865574103, |
|
"learning_rate": 2.076640366827703e-06, |
|
"loss": 0.4012, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7313159101329665, |
|
"grad_norm": 2.4100293351001714, |
|
"learning_rate": 2.04425887666271e-06, |
|
"loss": 0.3926, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7336084364970197, |
|
"grad_norm": 2.5693096989442927, |
|
"learning_rate": 2.0120668413889877e-06, |
|
"loss": 0.4021, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7359009628610729, |
|
"grad_norm": 2.513834629858347, |
|
"learning_rate": 1.980066324427613e-06, |
|
"loss": 0.3926, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7381934892251261, |
|
"grad_norm": 2.500502153829468, |
|
"learning_rate": 1.9482593769238695e-06, |
|
"loss": 0.3932, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7404860155891793, |
|
"grad_norm": 2.2943690678553827, |
|
"learning_rate": 1.916648037615767e-06, |
|
"loss": 0.3961, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.7427785419532325, |
|
"grad_norm": 2.4947450845729904, |
|
"learning_rate": 1.8852343327033717e-06, |
|
"loss": 0.3918, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7450710683172856, |
|
"grad_norm": 2.475640064869192, |
|
"learning_rate": 1.854020275718924e-06, |
|
"loss": 0.3953, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7473635946813388, |
|
"grad_norm": 2.380898479266151, |
|
"learning_rate": 1.8230078673977802e-06, |
|
"loss": 0.3767, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.749656121045392, |
|
"grad_norm": 2.3124836007659444, |
|
"learning_rate": 1.7921990955501705e-06, |
|
"loss": 0.386, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.7519486474094452, |
|
"grad_norm": 2.3942291132445375, |
|
"learning_rate": 1.7615959349337914e-06, |
|
"loss": 0.3964, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7542411737734984, |
|
"grad_norm": 2.4125792225674614, |
|
"learning_rate": 1.731200347127217e-06, |
|
"loss": 0.3918, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7565337001375516, |
|
"grad_norm": 2.4570540617910788, |
|
"learning_rate": 1.7010142804041785e-06, |
|
"loss": 0.4012, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7588262265016048, |
|
"grad_norm": 2.3060832536528006, |
|
"learning_rate": 1.6710396696086768e-06, |
|
"loss": 0.4026, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.761118752865658, |
|
"grad_norm": 2.357410070095031, |
|
"learning_rate": 1.6412784360309753e-06, |
|
"loss": 0.3876, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7634112792297112, |
|
"grad_norm": 2.5569987658890434, |
|
"learning_rate": 1.611732487284437e-06, |
|
"loss": 0.3875, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7657038055937643, |
|
"grad_norm": 2.5367416684876805, |
|
"learning_rate": 1.5824037171832595e-06, |
|
"loss": 0.3923, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7679963319578175, |
|
"grad_norm": 2.370553404803813, |
|
"learning_rate": 1.5532940056210882e-06, |
|
"loss": 0.3916, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7702888583218707, |
|
"grad_norm": 2.445473374507484, |
|
"learning_rate": 1.524405218450517e-06, |
|
"loss": 0.4005, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7725813846859239, |
|
"grad_norm": 2.416383451707918, |
|
"learning_rate": 1.4957392073634912e-06, |
|
"loss": 0.385, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.774873911049977, |
|
"grad_norm": 2.4307180782279976, |
|
"learning_rate": 1.4672978097726204e-06, |
|
"loss": 0.3857, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7771664374140302, |
|
"grad_norm": 2.4572760495599795, |
|
"learning_rate": 1.439082848693406e-06, |
|
"loss": 0.3916, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7794589637780834, |
|
"grad_norm": 2.408412846059606, |
|
"learning_rate": 1.4110961326273936e-06, |
|
"loss": 0.3908, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7817514901421366, |
|
"grad_norm": 2.6601098763821596, |
|
"learning_rate": 1.3833394554462477e-06, |
|
"loss": 0.3859, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7840440165061898, |
|
"grad_norm": 2.520675032421566, |
|
"learning_rate": 1.35581459627677e-06, |
|
"loss": 0.3936, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.786336542870243, |
|
"grad_norm": 2.257467358094596, |
|
"learning_rate": 1.3285233193868663e-06, |
|
"loss": 0.3799, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7886290692342962, |
|
"grad_norm": 2.327829634660073, |
|
"learning_rate": 1.3014673740724615e-06, |
|
"loss": 0.3876, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7909215955983494, |
|
"grad_norm": 2.366347981314184, |
|
"learning_rate": 1.2746484945453691e-06, |
|
"loss": 0.3829, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7932141219624026, |
|
"grad_norm": 2.391058577508851, |
|
"learning_rate": 1.2480683998221365e-06, |
|
"loss": 0.3825, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7955066483264558, |
|
"grad_norm": 2.470899547865623, |
|
"learning_rate": 1.221728793613865e-06, |
|
"loss": 0.3895, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.797799174690509, |
|
"grad_norm": 2.399551521415764, |
|
"learning_rate": 1.1956313642169974e-06, |
|
"loss": 0.3846, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8000917010545622, |
|
"grad_norm": 2.463312952219633, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.3788, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.8023842274186153, |
|
"grad_norm": 2.4348320894873092, |
|
"learning_rate": 1.1441697113216893e-06, |
|
"loss": 0.3803, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8046767537826685, |
|
"grad_norm": 2.385545108416876, |
|
"learning_rate": 1.1188087863739173e-06, |
|
"loss": 0.3859, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.8069692801467216, |
|
"grad_norm": 2.4484362721344195, |
|
"learning_rate": 1.0936966351274554e-06, |
|
"loss": 0.3739, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8092618065107748, |
|
"grad_norm": 2.4361451039130317, |
|
"learning_rate": 1.0688348672022547e-06, |
|
"loss": 0.4012, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.811554332874828, |
|
"grad_norm": 2.5671935693516192, |
|
"learning_rate": 1.0442250761693829e-06, |
|
"loss": 0.3717, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8138468592388812, |
|
"grad_norm": 2.3910678127476475, |
|
"learning_rate": 1.0198688394488837e-06, |
|
"loss": 0.3824, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8161393856029344, |
|
"grad_norm": 2.4337476998865237, |
|
"learning_rate": 9.957677182086611e-07, |
|
"loss": 0.3754, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8184319119669876, |
|
"grad_norm": 2.3930303860053055, |
|
"learning_rate": 9.719232572644189e-07, |
|
"loss": 0.3814, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.8207244383310408, |
|
"grad_norm": 2.4070725664187194, |
|
"learning_rate": 9.483369849806401e-07, |
|
"loss": 0.3681, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.823016964695094, |
|
"grad_norm": 2.4234654890940277, |
|
"learning_rate": 9.250104131726256e-07, |
|
"loss": 0.3748, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.8253094910591472, |
|
"grad_norm": 2.4405075201633486, |
|
"learning_rate": 9.019450370095867e-07, |
|
"loss": 0.3852, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8276020174232004, |
|
"grad_norm": 2.4157009817816535, |
|
"learning_rate": 8.791423349188111e-07, |
|
"loss": 0.3738, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.8298945437872536, |
|
"grad_norm": 2.3817117068747695, |
|
"learning_rate": 8.566037684908985e-07, |
|
"loss": 0.3774, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8321870701513068, |
|
"grad_norm": 2.643862606121901, |
|
"learning_rate": 8.343307823860819e-07, |
|
"loss": 0.3747, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.83447959651536, |
|
"grad_norm": 2.415451666660326, |
|
"learning_rate": 8.123248042416209e-07, |
|
"loss": 0.3807, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8367721228794132, |
|
"grad_norm": 2.367699275816763, |
|
"learning_rate": 7.905872445802976e-07, |
|
"loss": 0.3819, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8390646492434664, |
|
"grad_norm": 2.401428866906129, |
|
"learning_rate": 7.691194967200099e-07, |
|
"loss": 0.3773, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8413571756075194, |
|
"grad_norm": 2.3851132017870444, |
|
"learning_rate": 7.47922936684457e-07, |
|
"loss": 0.3848, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8436497019715726, |
|
"grad_norm": 2.334920050986847, |
|
"learning_rate": 7.269989231149432e-07, |
|
"loss": 0.3646, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8459422283356258, |
|
"grad_norm": 2.302533584527464, |
|
"learning_rate": 7.063487971832922e-07, |
|
"loss": 0.3719, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.848234754699679, |
|
"grad_norm": 2.4631469089449443, |
|
"learning_rate": 6.85973882505886e-07, |
|
"loss": 0.3951, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8505272810637322, |
|
"grad_norm": 2.4860937019904426, |
|
"learning_rate": 6.658754850588161e-07, |
|
"loss": 0.3877, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8528198074277854, |
|
"grad_norm": 2.366824744001058, |
|
"learning_rate": 6.460548930941801e-07, |
|
"loss": 0.3711, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8551123337918386, |
|
"grad_norm": 2.587488334709295, |
|
"learning_rate": 6.265133770575066e-07, |
|
"loss": 0.366, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8574048601558918, |
|
"grad_norm": 2.4606917803825072, |
|
"learning_rate": 6.072521895063255e-07, |
|
"loss": 0.3818, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.859697386519945, |
|
"grad_norm": 2.4967563072720576, |
|
"learning_rate": 5.882725650298787e-07, |
|
"loss": 0.3804, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8619899128839982, |
|
"grad_norm": 2.4902108475668214, |
|
"learning_rate": 5.695757201699875e-07, |
|
"loss": 0.3751, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8642824392480514, |
|
"grad_norm": 2.3545990508632713, |
|
"learning_rate": 5.511628533430769e-07, |
|
"loss": 0.3887, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8665749656121046, |
|
"grad_norm": 2.4583864322248363, |
|
"learning_rate": 5.330351447633603e-07, |
|
"loss": 0.3846, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8688674919761578, |
|
"grad_norm": 2.558178264129578, |
|
"learning_rate": 5.151937563671889e-07, |
|
"loss": 0.3761, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.871160018340211, |
|
"grad_norm": 2.4125538046249133, |
|
"learning_rate": 4.976398317385767e-07, |
|
"loss": 0.3789, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8734525447042641, |
|
"grad_norm": 2.5261586438137718, |
|
"learning_rate": 4.803744960358992e-07, |
|
"loss": 0.3692, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8757450710683173, |
|
"grad_norm": 2.5343814063203913, |
|
"learning_rate": 4.633988559197761e-07, |
|
"loss": 0.3741, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8780375974323704, |
|
"grad_norm": 2.5455270767430305, |
|
"learning_rate": 4.4671399948213233e-07, |
|
"loss": 0.3742, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8803301237964236, |
|
"grad_norm": 2.4299267640638442, |
|
"learning_rate": 4.3032099617645874e-07, |
|
"loss": 0.3793, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8826226501604768, |
|
"grad_norm": 2.5350282869807215, |
|
"learning_rate": 4.1422089674926113e-07, |
|
"loss": 0.3708, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.88491517652453, |
|
"grad_norm": 2.4052098639642745, |
|
"learning_rate": 3.984147331727128e-07, |
|
"loss": 0.3815, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8872077028885832, |
|
"grad_norm": 2.440029806154777, |
|
"learning_rate": 3.829035185785035e-07, |
|
"loss": 0.3559, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8895002292526364, |
|
"grad_norm": 2.4757422584836783, |
|
"learning_rate": 3.676882471929044e-07, |
|
"loss": 0.3724, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8917927556166896, |
|
"grad_norm": 2.405181037542438, |
|
"learning_rate": 3.527698942730384e-07, |
|
"loss": 0.3678, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8940852819807428, |
|
"grad_norm": 2.477077740628022, |
|
"learning_rate": 3.3814941604437155e-07, |
|
"loss": 0.3696, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.896377808344796, |
|
"grad_norm": 2.594645970360135, |
|
"learning_rate": 3.2382774963941823e-07, |
|
"loss": 0.3689, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.8986703347088492, |
|
"grad_norm": 2.4996924524050526, |
|
"learning_rate": 3.0980581303767576e-07, |
|
"loss": 0.3641, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9009628610729024, |
|
"grad_norm": 2.5351766412057364, |
|
"learning_rate": 2.9608450500678566e-07, |
|
"loss": 0.3736, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.9032553874369555, |
|
"grad_norm": 2.4812985119515374, |
|
"learning_rate": 2.826647050449216e-07, |
|
"loss": 0.3652, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9055479138010087, |
|
"grad_norm": 2.4498300583099506, |
|
"learning_rate": 2.69547273324417e-07, |
|
"loss": 0.3653, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9078404401650619, |
|
"grad_norm": 2.546961383266402, |
|
"learning_rate": 2.5673305063663335e-07, |
|
"loss": 0.3723, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9101329665291151, |
|
"grad_norm": 2.34777660611532, |
|
"learning_rate": 2.442228583380646e-07, |
|
"loss": 0.3596, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.9124254928931683, |
|
"grad_norm": 2.410870301241545, |
|
"learning_rate": 2.3201749829769083e-07, |
|
"loss": 0.3783, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9147180192572214, |
|
"grad_norm": 2.519326020963244, |
|
"learning_rate": 2.201177528455828e-07, |
|
"loss": 0.3739, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.9170105456212746, |
|
"grad_norm": 2.4872058403028574, |
|
"learning_rate": 2.085243847227525e-07, |
|
"loss": 0.3768, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9193030719853278, |
|
"grad_norm": 2.4175176965392544, |
|
"learning_rate": 1.9723813703227013e-07, |
|
"loss": 0.3794, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.921595598349381, |
|
"grad_norm": 2.514035461894725, |
|
"learning_rate": 1.8625973319162605e-07, |
|
"loss": 0.3656, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9238881247134342, |
|
"grad_norm": 2.4532676789082166, |
|
"learning_rate": 1.7558987688636675e-07, |
|
"loss": 0.361, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.9261806510774874, |
|
"grad_norm": 2.580005311393483, |
|
"learning_rate": 1.652292520249865e-07, |
|
"loss": 0.369, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9284731774415406, |
|
"grad_norm": 2.359368965829793, |
|
"learning_rate": 1.5517852269509692e-07, |
|
"loss": 0.3571, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.9307657038055938, |
|
"grad_norm": 2.4993672807867178, |
|
"learning_rate": 1.4543833312085365e-07, |
|
"loss": 0.3588, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.933058230169647, |
|
"grad_norm": 2.41149322411576, |
|
"learning_rate": 1.360093076216673e-07, |
|
"loss": 0.3705, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9353507565337001, |
|
"grad_norm": 2.474736948512413, |
|
"learning_rate": 1.2689205057218602e-07, |
|
"loss": 0.361, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9376432828977533, |
|
"grad_norm": 2.3336360044904736, |
|
"learning_rate": 1.1808714636355634e-07, |
|
"loss": 0.3568, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9399358092618065, |
|
"grad_norm": 2.566200023951429, |
|
"learning_rate": 1.0959515936596387e-07, |
|
"loss": 0.3783, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9422283356258597, |
|
"grad_norm": 2.5160190954507264, |
|
"learning_rate": 1.014166338924627e-07, |
|
"loss": 0.372, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9445208619899129, |
|
"grad_norm": 2.509256348018165, |
|
"learning_rate": 9.355209416408051e-08, |
|
"loss": 0.3853, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9468133883539661, |
|
"grad_norm": 2.5224442995349152, |
|
"learning_rate": 8.600204427622438e-08, |
|
"loss": 0.365, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9491059147180193, |
|
"grad_norm": 2.4001792608745602, |
|
"learning_rate": 7.876696816636276e-08, |
|
"loss": 0.3736, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9513984410820725, |
|
"grad_norm": 2.4422332203602553, |
|
"learning_rate": 7.184732958301078e-08, |
|
"loss": 0.3651, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9536909674461256, |
|
"grad_norm": 2.471890892444078, |
|
"learning_rate": 6.524357205600518e-08, |
|
"loss": 0.3624, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9559834938101788, |
|
"grad_norm": 2.523417346804641, |
|
"learning_rate": 5.895611886807317e-08, |
|
"loss": 0.369, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.958276020174232, |
|
"grad_norm": 2.4584360575665776, |
|
"learning_rate": 5.2985373027702455e-08, |
|
"loss": 0.363, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9605685465382852, |
|
"grad_norm": 2.467603595232153, |
|
"learning_rate": 4.733171724330854e-08, |
|
"loss": 0.3814, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9628610729023384, |
|
"grad_norm": 2.5238201533198072, |
|
"learning_rate": 4.19955138987066e-08, |
|
"loss": 0.369, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9651535992663915, |
|
"grad_norm": 2.5600424647957807, |
|
"learning_rate": 3.697710502988006e-08, |
|
"loss": 0.3652, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9674461256304447, |
|
"grad_norm": 2.475992842961113, |
|
"learning_rate": 3.2276812303060346e-08, |
|
"loss": 0.3741, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9697386519944979, |
|
"grad_norm": 2.4735410644370606, |
|
"learning_rate": 2.7894936994106724e-08, |
|
"loss": 0.3571, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9720311783585511, |
|
"grad_norm": 2.384962513457078, |
|
"learning_rate": 2.383175996919673e-08, |
|
"loss": 0.3654, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9743237047226043, |
|
"grad_norm": 2.4369560907719414, |
|
"learning_rate": 2.008754166682225e-08, |
|
"loss": 0.3614, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9766162310866575, |
|
"grad_norm": 2.334334624814976, |
|
"learning_rate": 1.6662522081097308e-08, |
|
"loss": 0.3598, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9789087574507107, |
|
"grad_norm": 2.515966550970349, |
|
"learning_rate": 1.3556920746373714e-08, |
|
"loss": 0.3539, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9812012838147639, |
|
"grad_norm": 2.4578356166282704, |
|
"learning_rate": 1.0770936723171199e-08, |
|
"loss": 0.3684, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9834938101788171, |
|
"grad_norm": 2.534561019356648, |
|
"learning_rate": 8.304748585417077e-09, |
|
"loss": 0.3629, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9857863365428703, |
|
"grad_norm": 2.4815228224834254, |
|
"learning_rate": 6.158514409000393e-09, |
|
"loss": 0.3617, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9880788629069235, |
|
"grad_norm": 2.520302407708297, |
|
"learning_rate": 4.332371761638921e-09, |
|
"loss": 0.3716, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9903713892709766, |
|
"grad_norm": 2.939805778253569, |
|
"learning_rate": 2.8264376940634332e-09, |
|
"loss": 0.3685, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9926639156350298, |
|
"grad_norm": 2.6736093020039484, |
|
"learning_rate": 1.640808732513155e-09, |
|
"loss": 0.3724, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.994956441999083, |
|
"grad_norm": 2.3884833213363144, |
|
"learning_rate": 7.755608725490415e-10, |
|
"loss": 0.354, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9972489683631361, |
|
"grad_norm": 2.378189457774983, |
|
"learning_rate": 2.307495741843413e-10, |
|
"loss": 0.356, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9995414947271893, |
|
"grad_norm": 2.543796138070999, |
|
"learning_rate": 6.4097583263311725e-12, |
|
"loss": 0.3664, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 2.6844, |
|
"eval_samples_per_second": 3.725, |
|
"eval_steps_per_second": 1.118, |
|
"step": 2181 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2181, |
|
"total_flos": 228328514519040.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0089, |
|
"train_samples_per_second": 3927873.864, |
|
"train_steps_per_second": 245597.686 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2181, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 228328514519040.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|