|
{ |
|
"best_metric": 0.48741263151168823, |
|
"best_model_checkpoint": "/home/ray/default/save/checkpoint-1000", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1154, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004332755632582322, |
|
"grad_norm": 0.10313185304403305, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 1.0706, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008665511265164644, |
|
"grad_norm": 0.10810094326734543, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.0694, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.012998266897746967, |
|
"grad_norm": 0.09731286019086838, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.0706, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01733102253032929, |
|
"grad_norm": 0.11459868401288986, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.0772, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021663778162911613, |
|
"grad_norm": 0.10845116525888443, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.0809, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.025996533795493933, |
|
"grad_norm": 0.12091381102800369, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0737, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030329289428076257, |
|
"grad_norm": 0.12315661460161209, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.0675, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03466204506065858, |
|
"grad_norm": 0.13054250180721283, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.0638, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0389948006932409, |
|
"grad_norm": 0.13414187729358673, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.0588, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.043327556325823226, |
|
"grad_norm": 0.12992985546588898, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.0512, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.047660311958405546, |
|
"grad_norm": 0.11500700563192368, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.0545, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05199306759098787, |
|
"grad_norm": 0.10958714783191681, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0357, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05632582322357019, |
|
"grad_norm": 0.10853663086891174, |
|
"learning_rate": 1.0833333333333334e-05, |
|
"loss": 1.0268, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.060658578856152515, |
|
"grad_norm": 0.12498235702514648, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 1.0135, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06499133448873484, |
|
"grad_norm": 0.11716682463884354, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.0014, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06932409012131716, |
|
"grad_norm": 0.09530466794967651, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.9824, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07365684575389948, |
|
"grad_norm": 0.09298353642225266, |
|
"learning_rate": 1.4166666666666668e-05, |
|
"loss": 0.9676, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0779896013864818, |
|
"grad_norm": 0.09433547407388687, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.9463, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08232235701906412, |
|
"grad_norm": 0.09255563467741013, |
|
"learning_rate": 1.5833333333333333e-05, |
|
"loss": 0.9341, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08665511265164645, |
|
"grad_norm": 0.09951213002204895, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.9209, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09098786828422877, |
|
"grad_norm": 0.10951012372970581, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.8927, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09532062391681109, |
|
"grad_norm": 0.110866479575634, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.8755, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09965337954939342, |
|
"grad_norm": 0.11648208647966385, |
|
"learning_rate": 1.9166666666666667e-05, |
|
"loss": 0.8453, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.10398613518197573, |
|
"grad_norm": 0.12016862630844116, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8081, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10831889081455806, |
|
"grad_norm": 0.12387488037347794, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.7784, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11265164644714037, |
|
"grad_norm": 0.12779255211353302, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.7353, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1169844020797227, |
|
"grad_norm": 0.12649372220039368, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.7085, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12131715771230503, |
|
"grad_norm": 0.1445430964231491, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.6753, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12564991334488734, |
|
"grad_norm": 0.1329505294561386, |
|
"learning_rate": 2.4166666666666667e-05, |
|
"loss": 0.6448, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12998266897746968, |
|
"grad_norm": 0.13544394075870514, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.621, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.134315424610052, |
|
"grad_norm": 0.1351090520620346, |
|
"learning_rate": 2.5833333333333336e-05, |
|
"loss": 0.5997, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1386481802426343, |
|
"grad_norm": 0.127303346991539, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.5791, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14298093587521662, |
|
"grad_norm": 0.1476767510175705, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.57, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14731369150779897, |
|
"grad_norm": 0.13856437802314758, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.5645, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15164644714038128, |
|
"grad_norm": 0.1533508449792862, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.5583, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1559792027729636, |
|
"grad_norm": 0.13325001299381256, |
|
"learning_rate": 3e-05, |
|
"loss": 0.555, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16031195840554593, |
|
"grad_norm": 0.13416974246501923, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 0.5439, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16464471403812825, |
|
"grad_norm": 0.1278882771730423, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.537, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16897746967071056, |
|
"grad_norm": 0.14047101140022278, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.5381, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1733102253032929, |
|
"grad_norm": 0.13340455293655396, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5344, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17764298093587522, |
|
"grad_norm": 0.13049094378948212, |
|
"learning_rate": 3.4166666666666666e-05, |
|
"loss": 0.5291, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18197573656845753, |
|
"grad_norm": 0.16296444833278656, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.5342, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18630849220103987, |
|
"grad_norm": 0.1682613343000412, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 0.5285, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19064124783362218, |
|
"grad_norm": 0.1439386010169983, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.5268, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1949740034662045, |
|
"grad_norm": 0.15248768031597137, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.5252, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19930675909878684, |
|
"grad_norm": 0.1604214906692505, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.5201, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20363951473136915, |
|
"grad_norm": 0.16192543506622314, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"loss": 0.5222, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.20797227036395147, |
|
"grad_norm": 0.14945088326931, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5158, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2123050259965338, |
|
"grad_norm": 0.16204427182674408, |
|
"learning_rate": 4.0833333333333334e-05, |
|
"loss": 0.5127, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.21663778162911612, |
|
"grad_norm": 0.1618306040763855, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5181, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22097053726169844, |
|
"grad_norm": 0.14193885028362274, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.5164, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.22530329289428075, |
|
"grad_norm": 0.12552691996097565, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.5149, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2296360485268631, |
|
"grad_norm": 0.14471225440502167, |
|
"learning_rate": 4.4166666666666665e-05, |
|
"loss": 0.5137, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2339688041594454, |
|
"grad_norm": 0.13988590240478516, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5066, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.23830155979202772, |
|
"grad_norm": 0.13964875042438507, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.5116, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.24263431542461006, |
|
"grad_norm": 0.12847208976745605, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.5095, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24696707105719237, |
|
"grad_norm": 0.13142219185829163, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5077, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2512998266897747, |
|
"grad_norm": 0.19098567962646484, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.5099, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.255632582322357, |
|
"grad_norm": 0.1430283784866333, |
|
"learning_rate": 4.9166666666666665e-05, |
|
"loss": 0.5036, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.25996533795493937, |
|
"grad_norm": 0.12594453990459442, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5085, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26429809358752165, |
|
"grad_norm": 0.14377984404563904, |
|
"learning_rate": 4.999577115486055e-05, |
|
"loss": 0.4978, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.268630849220104, |
|
"grad_norm": 0.12468158453702927, |
|
"learning_rate": 4.998308605009268e-05, |
|
"loss": 0.5095, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2729636048526863, |
|
"grad_norm": 0.12854167819023132, |
|
"learning_rate": 4.996194897716389e-05, |
|
"loss": 0.5037, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2772963604852686, |
|
"grad_norm": 0.13529527187347412, |
|
"learning_rate": 4.993236708690683e-05, |
|
"loss": 0.5058, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.28162911611785096, |
|
"grad_norm": 0.1377994418144226, |
|
"learning_rate": 4.9894350387100126e-05, |
|
"loss": 0.4998, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.28596187175043325, |
|
"grad_norm": 0.14942322671413422, |
|
"learning_rate": 4.984791173908267e-05, |
|
"loss": 0.5007, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2902946273830156, |
|
"grad_norm": 0.1366725116968155, |
|
"learning_rate": 4.9793066853402536e-05, |
|
"loss": 0.5038, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.29462738301559793, |
|
"grad_norm": 0.13133087754249573, |
|
"learning_rate": 4.9729834284501995e-05, |
|
"loss": 0.5062, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2989601386481802, |
|
"grad_norm": 0.11725670844316483, |
|
"learning_rate": 4.965823542444037e-05, |
|
"loss": 0.5025, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.30329289428076256, |
|
"grad_norm": 0.10628046840429306, |
|
"learning_rate": 4.9578294495656965e-05, |
|
"loss": 0.4999, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3076256499133449, |
|
"grad_norm": 0.13826170563697815, |
|
"learning_rate": 4.949003854277644e-05, |
|
"loss": 0.4978, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3119584055459272, |
|
"grad_norm": 0.1305851936340332, |
|
"learning_rate": 4.9393497423459376e-05, |
|
"loss": 0.4997, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.31629116117850953, |
|
"grad_norm": 0.11465763300657272, |
|
"learning_rate": 4.928870379830124e-05, |
|
"loss": 0.5037, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.32062391681109187, |
|
"grad_norm": 0.15975706279277802, |
|
"learning_rate": 4.9175693119783013e-05, |
|
"loss": 0.4982, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.32495667244367415, |
|
"grad_norm": 0.15360799431800842, |
|
"learning_rate": 4.905450362027738e-05, |
|
"loss": 0.5013, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3292894280762565, |
|
"grad_norm": 0.14006198942661285, |
|
"learning_rate": 4.8925176299114416e-05, |
|
"loss": 0.5008, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.33362218370883884, |
|
"grad_norm": 0.14255651831626892, |
|
"learning_rate": 4.878775490871121e-05, |
|
"loss": 0.4975, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3379549393414211, |
|
"grad_norm": 0.12012791633605957, |
|
"learning_rate": 4.864228593977006e-05, |
|
"loss": 0.5047, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.34228769497400346, |
|
"grad_norm": 0.11433300375938416, |
|
"learning_rate": 4.848881860555035e-05, |
|
"loss": 0.4986, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3466204506065858, |
|
"grad_norm": 0.11102011054754257, |
|
"learning_rate": 4.832740482521931e-05, |
|
"loss": 0.4981, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3509532062391681, |
|
"grad_norm": 0.12340573221445084, |
|
"learning_rate": 4.815809920628738e-05, |
|
"loss": 0.4984, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.35528596187175043, |
|
"grad_norm": 0.1148650050163269, |
|
"learning_rate": 4.7980959026134044e-05, |
|
"loss": 0.4942, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3596187175043328, |
|
"grad_norm": 0.10840712487697601, |
|
"learning_rate": 4.7796044212630486e-05, |
|
"loss": 0.4903, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.36395147313691506, |
|
"grad_norm": 0.11093516647815704, |
|
"learning_rate": 4.7603417323865547e-05, |
|
"loss": 0.4957, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3682842287694974, |
|
"grad_norm": 0.1237047016620636, |
|
"learning_rate": 4.74031435269818e-05, |
|
"loss": 0.499, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.37261698440207974, |
|
"grad_norm": 0.11613244563341141, |
|
"learning_rate": 4.7195290576129034e-05, |
|
"loss": 0.4959, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.37694974003466203, |
|
"grad_norm": 0.10890854150056839, |
|
"learning_rate": 4.697992878954255e-05, |
|
"loss": 0.4944, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.38128249566724437, |
|
"grad_norm": 0.11364572495222092, |
|
"learning_rate": 4.6757131025753886e-05, |
|
"loss": 0.4909, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3856152512998267, |
|
"grad_norm": 0.12619757652282715, |
|
"learning_rate": 4.652697265894228e-05, |
|
"loss": 0.4966, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.389948006932409, |
|
"grad_norm": 0.11669816076755524, |
|
"learning_rate": 4.628953155343499e-05, |
|
"loss": 0.4956, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.39428076256499134, |
|
"grad_norm": 0.13808482885360718, |
|
"learning_rate": 4.604488803736523e-05, |
|
"loss": 0.4973, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3986135181975737, |
|
"grad_norm": 0.11171045899391174, |
|
"learning_rate": 4.579312487549649e-05, |
|
"loss": 0.4903, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.40294627383015597, |
|
"grad_norm": 0.11149395257234573, |
|
"learning_rate": 4.553432724122265e-05, |
|
"loss": 0.4999, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4072790294627383, |
|
"grad_norm": 0.11308333277702332, |
|
"learning_rate": 4.526858268775313e-05, |
|
"loss": 0.4967, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.41161178509532065, |
|
"grad_norm": 0.10610105097293854, |
|
"learning_rate": 4.499598111849299e-05, |
|
"loss": 0.4936, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.41594454072790293, |
|
"grad_norm": 0.11356962472200394, |
|
"learning_rate": 4.471661475662792e-05, |
|
"loss": 0.493, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4202772963604853, |
|
"grad_norm": 0.10310888290405273, |
|
"learning_rate": 4.443057811392445e-05, |
|
"loss": 0.5002, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4246100519930676, |
|
"grad_norm": 0.11358631402254105, |
|
"learning_rate": 4.413796795875586e-05, |
|
"loss": 0.4983, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4289428076256499, |
|
"grad_norm": 0.12575574219226837, |
|
"learning_rate": 4.383888328336476e-05, |
|
"loss": 0.4949, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.43327556325823224, |
|
"grad_norm": 0.09878399968147278, |
|
"learning_rate": 4.3533425270373216e-05, |
|
"loss": 0.4953, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43327556325823224, |
|
"eval_loss": 0.4938061535358429, |
|
"eval_runtime": 140.4236, |
|
"eval_samples_per_second": 2.685, |
|
"eval_steps_per_second": 0.677, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4376083188908146, |
|
"grad_norm": 0.11693672835826874, |
|
"learning_rate": 4.3221697258551906e-05, |
|
"loss": 0.4934, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.44194107452339687, |
|
"grad_norm": 0.11737816035747528, |
|
"learning_rate": 4.2903804707859835e-05, |
|
"loss": 0.4934, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4462738301559792, |
|
"grad_norm": 0.11542918533086777, |
|
"learning_rate": 4.257985516376644e-05, |
|
"loss": 0.4927, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4506065857885615, |
|
"grad_norm": 0.10959002375602722, |
|
"learning_rate": 4.224995822086812e-05, |
|
"loss": 0.4935, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.45493934142114384, |
|
"grad_norm": 0.10852424800395966, |
|
"learning_rate": 4.191422548581154e-05, |
|
"loss": 0.4947, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4592720970537262, |
|
"grad_norm": 0.11614954471588135, |
|
"learning_rate": 4.157277053953631e-05, |
|
"loss": 0.4935, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46360485268630847, |
|
"grad_norm": 0.11209428310394287, |
|
"learning_rate": 4.1225708898849594e-05, |
|
"loss": 0.4975, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4679376083188908, |
|
"grad_norm": 0.11714442819356918, |
|
"learning_rate": 4.0873157977346e-05, |
|
"loss": 0.4923, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.47227036395147315, |
|
"grad_norm": 0.1179489716887474, |
|
"learning_rate": 4.051523704568557e-05, |
|
"loss": 0.4939, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.47660311958405543, |
|
"grad_norm": 0.11872310936450958, |
|
"learning_rate": 4.0152067191243696e-05, |
|
"loss": 0.4927, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4809358752166378, |
|
"grad_norm": 0.1182030588388443, |
|
"learning_rate": 3.978377127714628e-05, |
|
"loss": 0.4908, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4852686308492201, |
|
"grad_norm": 0.11203285306692123, |
|
"learning_rate": 3.941047390070419e-05, |
|
"loss": 0.4898, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4896013864818024, |
|
"grad_norm": 0.10341402143239975, |
|
"learning_rate": 3.903230135126113e-05, |
|
"loss": 0.4899, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.49393414211438474, |
|
"grad_norm": 0.11410869657993317, |
|
"learning_rate": 3.864938156746891e-05, |
|
"loss": 0.4914, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4982668977469671, |
|
"grad_norm": 0.11390075832605362, |
|
"learning_rate": 3.8261844094004815e-05, |
|
"loss": 0.4922, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5025996533795494, |
|
"grad_norm": 0.10876569896936417, |
|
"learning_rate": 3.7869820037745776e-05, |
|
"loss": 0.4964, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5069324090121318, |
|
"grad_norm": 0.11602895706892014, |
|
"learning_rate": 3.747344202341373e-05, |
|
"loss": 0.4945, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.511265164644714, |
|
"grad_norm": 0.12035666406154633, |
|
"learning_rate": 3.707284414870786e-05, |
|
"loss": 0.499, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5155979202772963, |
|
"grad_norm": 0.1848023533821106, |
|
"learning_rate": 3.666816193893817e-05, |
|
"loss": 0.4947, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5199306759098787, |
|
"grad_norm": 0.12017575651407242, |
|
"learning_rate": 3.6259532301176335e-05, |
|
"loss": 0.4905, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.524263431542461, |
|
"grad_norm": 0.10070477426052094, |
|
"learning_rate": 3.5847093477938956e-05, |
|
"loss": 0.4974, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5285961871750433, |
|
"grad_norm": 0.11581992357969284, |
|
"learning_rate": 3.543098500041906e-05, |
|
"loss": 0.4927, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5329289428076257, |
|
"grad_norm": 0.09777580201625824, |
|
"learning_rate": 3.501134764128167e-05, |
|
"loss": 0.4907, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.537261698440208, |
|
"grad_norm": 0.10121456533670425, |
|
"learning_rate": 3.458832336703929e-05, |
|
"loss": 0.491, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5415944540727903, |
|
"grad_norm": 0.10312589257955551, |
|
"learning_rate": 3.416205529002363e-05, |
|
"loss": 0.4942, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5459272097053726, |
|
"grad_norm": 0.10299069434404373, |
|
"learning_rate": 3.37326876199695e-05, |
|
"loss": 0.493, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.550259965337955, |
|
"grad_norm": 0.11771389842033386, |
|
"learning_rate": 3.3300365615227685e-05, |
|
"loss": 0.4942, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5545927209705372, |
|
"grad_norm": 0.1121087372303009, |
|
"learning_rate": 3.286523553362287e-05, |
|
"loss": 0.4946, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5589254766031195, |
|
"grad_norm": 0.10604743659496307, |
|
"learning_rate": 3.242744458297348e-05, |
|
"loss": 0.4907, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5632582322357019, |
|
"grad_norm": 0.10975582152605057, |
|
"learning_rate": 3.1987140871290236e-05, |
|
"loss": 0.4889, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5675909878682842, |
|
"grad_norm": 0.10636895149946213, |
|
"learning_rate": 3.154447335667001e-05, |
|
"loss": 0.4935, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5719237435008665, |
|
"grad_norm": 0.10076680779457092, |
|
"learning_rate": 3.1099591796902215e-05, |
|
"loss": 0.4925, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5762564991334489, |
|
"grad_norm": 0.10885344445705414, |
|
"learning_rate": 3.0652646698804585e-05, |
|
"loss": 0.4892, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5805892547660312, |
|
"grad_norm": 0.11372784525156021, |
|
"learning_rate": 3.0203789267305567e-05, |
|
"loss": 0.4918, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5849220103986135, |
|
"grad_norm": 0.11742879450321198, |
|
"learning_rate": 2.975317135429056e-05, |
|
"loss": 0.4885, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5892547660311959, |
|
"grad_norm": 0.09908663481473923, |
|
"learning_rate": 2.930094540722927e-05, |
|
"loss": 0.4889, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5935875216637782, |
|
"grad_norm": 0.11032566428184509, |
|
"learning_rate": 2.884726441760155e-05, |
|
"loss": 0.4936, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5979202772963604, |
|
"grad_norm": 0.11033419519662857, |
|
"learning_rate": 2.8392281869139213e-05, |
|
"loss": 0.4925, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6022530329289428, |
|
"grad_norm": 0.12115441262722015, |
|
"learning_rate": 2.7936151685901373e-05, |
|
"loss": 0.49, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6065857885615251, |
|
"grad_norm": 0.13125091791152954, |
|
"learning_rate": 2.747902818020067e-05, |
|
"loss": 0.4919, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6109185441941074, |
|
"grad_norm": 0.11565785109996796, |
|
"learning_rate": 2.7021066000398344e-05, |
|
"loss": 0.4928, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6152512998266898, |
|
"grad_norm": 0.12640348076820374, |
|
"learning_rate": 2.6562420078585433e-05, |
|
"loss": 0.489, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6195840554592721, |
|
"grad_norm": 0.12297876179218292, |
|
"learning_rate": 2.6103245578168106e-05, |
|
"loss": 0.4919, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6239168110918544, |
|
"grad_norm": 0.10511300712823868, |
|
"learning_rate": 2.564369784137472e-05, |
|
"loss": 0.4883, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6282495667244368, |
|
"grad_norm": 0.10803347080945969, |
|
"learning_rate": 2.5183932336702297e-05, |
|
"loss": 0.4909, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6325823223570191, |
|
"grad_norm": 0.11415662616491318, |
|
"learning_rate": 2.4724104606320445e-05, |
|
"loss": 0.4878, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6369150779896013, |
|
"grad_norm": 0.10404873639345169, |
|
"learning_rate": 2.426437021345015e-05, |
|
"loss": 0.4901, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6412478336221837, |
|
"grad_norm": 0.11284226924180984, |
|
"learning_rate": 2.3804884689735642e-05, |
|
"loss": 0.4906, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.645580589254766, |
|
"grad_norm": 0.1138242855668068, |
|
"learning_rate": 2.3345803482626797e-05, |
|
"loss": 0.494, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6499133448873483, |
|
"grad_norm": 0.18223117291927338, |
|
"learning_rate": 2.288728190279014e-05, |
|
"loss": 0.4886, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6542461005199307, |
|
"grad_norm": 0.1119702160358429, |
|
"learning_rate": 2.2429475071565987e-05, |
|
"loss": 0.4908, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.658578856152513, |
|
"grad_norm": 0.11668991297483444, |
|
"learning_rate": 2.1972537868489797e-05, |
|
"loss": 0.4896, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6629116117850953, |
|
"grad_norm": 0.10587523132562637, |
|
"learning_rate": 2.151662487889518e-05, |
|
"loss": 0.4885, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6672443674176777, |
|
"grad_norm": 0.10824766755104065, |
|
"learning_rate": 2.1061890341616558e-05, |
|
"loss": 0.4873, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.67157712305026, |
|
"grad_norm": 0.11111487448215485, |
|
"learning_rate": 2.060848809680893e-05, |
|
"loss": 0.4853, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6759098786828422, |
|
"grad_norm": 0.10754924267530441, |
|
"learning_rate": 2.0156571533902627e-05, |
|
"loss": 0.4849, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6802426343154246, |
|
"grad_norm": 0.10407605022192001, |
|
"learning_rate": 1.97062935397105e-05, |
|
"loss": 0.488, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6845753899480069, |
|
"grad_norm": 0.1077013909816742, |
|
"learning_rate": 1.9257806446705116e-05, |
|
"loss": 0.4867, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6889081455805892, |
|
"grad_norm": 0.11459195613861084, |
|
"learning_rate": 1.8811261981483548e-05, |
|
"loss": 0.4911, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6932409012131716, |
|
"grad_norm": 0.12345458567142487, |
|
"learning_rate": 1.8366811213437092e-05, |
|
"loss": 0.4888, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6975736568457539, |
|
"grad_norm": 0.10806908458471298, |
|
"learning_rate": 1.7924604503643367e-05, |
|
"loss": 0.4903, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7019064124783362, |
|
"grad_norm": 0.12911449372768402, |
|
"learning_rate": 1.7484791453998006e-05, |
|
"loss": 0.4874, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7062391681109186, |
|
"grad_norm": 0.11117111146450043, |
|
"learning_rate": 1.7047520856603183e-05, |
|
"loss": 0.4895, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7105719237435009, |
|
"grad_norm": 0.10993503779172897, |
|
"learning_rate": 1.6612940643430138e-05, |
|
"loss": 0.4909, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7149046793760832, |
|
"grad_norm": 0.10971593856811523, |
|
"learning_rate": 1.618119783627263e-05, |
|
"loss": 0.4898, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7192374350086655, |
|
"grad_norm": 0.12124701589345932, |
|
"learning_rate": 1.5752438497008405e-05, |
|
"loss": 0.4886, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7235701906412478, |
|
"grad_norm": 0.10207706689834595, |
|
"learning_rate": 1.5326807678185267e-05, |
|
"loss": 0.4893, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7279029462738301, |
|
"grad_norm": 0.10885774344205856, |
|
"learning_rate": 1.490444937394879e-05, |
|
"loss": 0.4891, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7322357019064125, |
|
"grad_norm": 0.10901923477649689, |
|
"learning_rate": 1.4485506471327914e-05, |
|
"loss": 0.4968, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7365684575389948, |
|
"grad_norm": 0.11790922284126282, |
|
"learning_rate": 1.407012070189524e-05, |
|
"loss": 0.4861, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7409012131715771, |
|
"grad_norm": 0.11575620621442795, |
|
"learning_rate": 1.3658432593818149e-05, |
|
"loss": 0.4888, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7452339688041595, |
|
"grad_norm": 0.11284902691841125, |
|
"learning_rate": 1.325058142431701e-05, |
|
"loss": 0.4879, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7495667244367418, |
|
"grad_norm": 0.11264630407094955, |
|
"learning_rate": 1.2846705172546675e-05, |
|
"loss": 0.4909, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7538994800693241, |
|
"grad_norm": 0.11645620316267014, |
|
"learning_rate": 1.2446940472917099e-05, |
|
"loss": 0.485, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7582322357019065, |
|
"grad_norm": 0.10355979204177856, |
|
"learning_rate": 1.2051422568868833e-05, |
|
"loss": 0.4891, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7625649913344887, |
|
"grad_norm": 0.10600101947784424, |
|
"learning_rate": 1.1660285267119167e-05, |
|
"loss": 0.4903, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.766897746967071, |
|
"grad_norm": 0.11851372569799423, |
|
"learning_rate": 1.1273660892394297e-05, |
|
"loss": 0.4895, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7712305025996534, |
|
"grad_norm": 0.11533911526203156, |
|
"learning_rate": 1.0891680242662835e-05, |
|
"loss": 0.4843, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7755632582322357, |
|
"grad_norm": 0.09746929258108139, |
|
"learning_rate": 1.051447254488591e-05, |
|
"loss": 0.4894, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.779896013864818, |
|
"grad_norm": 0.1193188726902008, |
|
"learning_rate": 1.0142165411298662e-05, |
|
"loss": 0.4884, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7842287694974004, |
|
"grad_norm": 0.11370085179805756, |
|
"learning_rate": 9.774884796238085e-06, |
|
"loss": 0.49, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7885615251299827, |
|
"grad_norm": 0.1011599749326706, |
|
"learning_rate": 9.412754953531663e-06, |
|
"loss": 0.4884, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.792894280762565, |
|
"grad_norm": 0.11001604050397873, |
|
"learning_rate": 9.055898394461423e-06, |
|
"loss": 0.4883, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7972270363951474, |
|
"grad_norm": 0.11630392074584961, |
|
"learning_rate": 8.704435846317386e-06, |
|
"loss": 0.4869, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8015597920277296, |
|
"grad_norm": 0.10578346997499466, |
|
"learning_rate": 8.358486211554637e-06, |
|
"loss": 0.4887, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8058925476603119, |
|
"grad_norm": 0.11845128238201141, |
|
"learning_rate": 8.018166527567672e-06, |
|
"loss": 0.4852, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8102253032928943, |
|
"grad_norm": 0.10032304376363754, |
|
"learning_rate": 7.683591927095824e-06, |
|
"loss": 0.4865, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8145580589254766, |
|
"grad_norm": 0.10029744356870651, |
|
"learning_rate": 7.354875599272928e-06, |
|
"loss": 0.4902, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8188908145580589, |
|
"grad_norm": 0.10729606449604034, |
|
"learning_rate": 7.0321287513346074e-06, |
|
"loss": 0.4879, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8232235701906413, |
|
"grad_norm": 0.11077585816383362, |
|
"learning_rate": 6.715460570995988e-06, |
|
"loss": 0.4905, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8275563258232236, |
|
"grad_norm": 0.112645223736763, |
|
"learning_rate": 6.404978189512681e-06, |
|
"loss": 0.492, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8318890814558059, |
|
"grad_norm": 0.11094575375318527, |
|
"learning_rate": 6.100786645437481e-06, |
|
"loss": 0.4855, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8362218370883883, |
|
"grad_norm": 0.10790159553289413, |
|
"learning_rate": 5.8029888490850005e-06, |
|
"loss": 0.4942, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8405545927209706, |
|
"grad_norm": 0.11253953725099564, |
|
"learning_rate": 5.511685547716328e-06, |
|
"loss": 0.4829, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8448873483535528, |
|
"grad_norm": 0.11395300179719925, |
|
"learning_rate": 5.226975291455477e-06, |
|
"loss": 0.4856, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8492201039861352, |
|
"grad_norm": 0.1128508597612381, |
|
"learning_rate": 4.9489543999491045e-06, |
|
"loss": 0.4852, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8535528596187175, |
|
"grad_norm": 0.11124628782272339, |
|
"learning_rate": 4.67771692978087e-06, |
|
"loss": 0.4885, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8578856152512998, |
|
"grad_norm": 0.10993985086679459, |
|
"learning_rate": 4.413354642651369e-06, |
|
"loss": 0.4867, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8622183708838822, |
|
"grad_norm": 0.1036507710814476, |
|
"learning_rate": 4.1559569743344405e-06, |
|
"loss": 0.4839, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8665511265164645, |
|
"grad_norm": 0.10577788203954697, |
|
"learning_rate": 3.90561100442036e-06, |
|
"loss": 0.4909, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8665511265164645, |
|
"eval_loss": 0.48741263151168823, |
|
"eval_runtime": 139.1562, |
|
"eval_samples_per_second": 2.709, |
|
"eval_steps_per_second": 0.683, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8708838821490468, |
|
"grad_norm": 0.10687436163425446, |
|
"learning_rate": 3.662401426856177e-06, |
|
"loss": 0.4912, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8752166377816292, |
|
"grad_norm": 0.10468524694442749, |
|
"learning_rate": 3.4264105212930915e-06, |
|
"loss": 0.4882, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8795493934142115, |
|
"grad_norm": 0.11409106850624084, |
|
"learning_rate": 3.197718125250618e-06, |
|
"loss": 0.4834, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8838821490467937, |
|
"grad_norm": 0.1069907397031784, |
|
"learning_rate": 2.9764016071069434e-06, |
|
"loss": 0.4881, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8882149046793761, |
|
"grad_norm": 0.10976656526327133, |
|
"learning_rate": 2.7625358399246376e-06, |
|
"loss": 0.4852, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8925476603119584, |
|
"grad_norm": 0.11071466654539108, |
|
"learning_rate": 2.5561931761205082e-06, |
|
"loss": 0.4879, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8968804159445407, |
|
"grad_norm": 0.10794007033109665, |
|
"learning_rate": 2.3574434229882145e-06, |
|
"loss": 0.4865, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.901213171577123, |
|
"grad_norm": 0.11333485692739487, |
|
"learning_rate": 2.166353819081968e-06, |
|
"loss": 0.4855, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9055459272097054, |
|
"grad_norm": 0.10130605846643448, |
|
"learning_rate": 1.982989011469172e-06, |
|
"loss": 0.4868, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9098786828422877, |
|
"grad_norm": 0.10454142093658447, |
|
"learning_rate": 1.8074110338598682e-06, |
|
"loss": 0.487, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.91421143847487, |
|
"grad_norm": 0.10157745331525803, |
|
"learning_rate": 1.6396792856202298e-06, |
|
"loss": 0.4881, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9185441941074524, |
|
"grad_norm": 0.11562693864107132, |
|
"learning_rate": 1.479850511677322e-06, |
|
"loss": 0.4877, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9228769497400346, |
|
"grad_norm": 0.12777337431907654, |
|
"learning_rate": 1.3279787833218537e-06, |
|
"loss": 0.4886, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.9272097053726169, |
|
"grad_norm": 0.10074026137590408, |
|
"learning_rate": 1.1841154799154374e-06, |
|
"loss": 0.4856, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9315424610051993, |
|
"grad_norm": 0.11187005788087845, |
|
"learning_rate": 1.0483092715085879e-06, |
|
"loss": 0.4872, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.9358752166377816, |
|
"grad_norm": 0.10271576046943665, |
|
"learning_rate": 9.206061023752516e-07, |
|
"loss": 0.4867, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9402079722703639, |
|
"grad_norm": 0.11686773598194122, |
|
"learning_rate": 8.010491754695177e-07, |
|
"loss": 0.4868, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.9445407279029463, |
|
"grad_norm": 0.12068697810173035, |
|
"learning_rate": 6.896789378097179e-07, |
|
"loss": 0.4911, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9488734835355286, |
|
"grad_norm": 0.10469616949558258, |
|
"learning_rate": 5.865330667949115e-07, |
|
"loss": 0.485, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.9532062391681109, |
|
"grad_norm": 0.11302938312292099, |
|
"learning_rate": 4.916464574583251e-07, |
|
"loss": 0.4864, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9575389948006933, |
|
"grad_norm": 0.11449886858463287, |
|
"learning_rate": 4.050512106620913e-07, |
|
"loss": 0.4873, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9618717504332756, |
|
"grad_norm": 0.1062023788690567, |
|
"learning_rate": 3.267766222372931e-07, |
|
"loss": 0.4885, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9662045060658578, |
|
"grad_norm": 0.11473377794027328, |
|
"learning_rate": 2.568491730729539e-07, |
|
"loss": 0.4869, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.9705372616984402, |
|
"grad_norm": 0.10777969658374786, |
|
"learning_rate": 1.95292520157353e-07, |
|
"loss": 0.489, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9748700173310225, |
|
"grad_norm": 0.10438723862171173, |
|
"learning_rate": 1.4212748857468926e-07, |
|
"loss": 0.4871, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9792027729636048, |
|
"grad_norm": 0.10945618152618408, |
|
"learning_rate": 9.737206445979696e-08, |
|
"loss": 0.4875, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9835355285961872, |
|
"grad_norm": 0.10462047904729843, |
|
"learning_rate": 6.104138891329659e-08, |
|
"loss": 0.4895, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9878682842287695, |
|
"grad_norm": 0.1181010752916336, |
|
"learning_rate": 3.314775287923677e-08, |
|
"loss": 0.4857, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9922010398613518, |
|
"grad_norm": 0.10257267206907272, |
|
"learning_rate": 1.3700592986998217e-08, |
|
"loss": 0.4866, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9965337954939342, |
|
"grad_norm": 0.10488082468509674, |
|
"learning_rate": 2.7064883587807345e-09, |
|
"loss": 0.4893, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1154, |
|
"total_flos": 8.066906520991826e+17, |
|
"train_loss": 0.5547944433354423, |
|
"train_runtime": 22221.3478, |
|
"train_samples_per_second": 0.831, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1154, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.066906520991826e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|