|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 5914, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0033818058843422386, |
|
"grad_norm": 1.5605809688568115, |
|
"learning_rate": 9.991538331358945e-05, |
|
"loss": 1.5931, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006763611768684477, |
|
"grad_norm": 0.2704204320907593, |
|
"learning_rate": 9.974614994076833e-05, |
|
"loss": 0.6298, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010145417653026716, |
|
"grad_norm": 0.20275332033634186, |
|
"learning_rate": 9.957691656794721e-05, |
|
"loss": 0.6147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013527223537368955, |
|
"grad_norm": 0.1859731376171112, |
|
"learning_rate": 9.940768319512609e-05, |
|
"loss": 0.5784, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016909029421711193, |
|
"grad_norm": 0.20401914417743683, |
|
"learning_rate": 9.923844982230497e-05, |
|
"loss": 0.5696, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020290835306053433, |
|
"grad_norm": 0.21193653345108032, |
|
"learning_rate": 9.906921644948385e-05, |
|
"loss": 0.565, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.023672641190395673, |
|
"grad_norm": 0.20135045051574707, |
|
"learning_rate": 9.889998307666272e-05, |
|
"loss": 0.5251, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02705444707473791, |
|
"grad_norm": 0.20686441659927368, |
|
"learning_rate": 9.873074970384161e-05, |
|
"loss": 0.5295, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03043625295908015, |
|
"grad_norm": 0.20559485256671906, |
|
"learning_rate": 9.856151633102048e-05, |
|
"loss": 0.5272, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.033818058843422386, |
|
"grad_norm": 0.23076851665973663, |
|
"learning_rate": 9.839228295819937e-05, |
|
"loss": 0.5284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.037199864727764625, |
|
"grad_norm": 0.21439723670482635, |
|
"learning_rate": 9.822304958537824e-05, |
|
"loss": 0.5088, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.040581670612106865, |
|
"grad_norm": 0.23800402879714966, |
|
"learning_rate": 9.805381621255713e-05, |
|
"loss": 0.5123, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.043963476496449105, |
|
"grad_norm": 0.2768985629081726, |
|
"learning_rate": 9.7884582839736e-05, |
|
"loss": 0.4884, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.047345282380791345, |
|
"grad_norm": 0.26437142491340637, |
|
"learning_rate": 9.771534946691489e-05, |
|
"loss": 0.4748, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05072708826513358, |
|
"grad_norm": 0.2534578740596771, |
|
"learning_rate": 9.754611609409376e-05, |
|
"loss": 0.4766, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05410889414947582, |
|
"grad_norm": 0.2550220787525177, |
|
"learning_rate": 9.737688272127265e-05, |
|
"loss": 0.4613, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05749070003381806, |
|
"grad_norm": 0.2937515377998352, |
|
"learning_rate": 9.720764934845152e-05, |
|
"loss": 0.4631, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0608725059181603, |
|
"grad_norm": 0.31693148612976074, |
|
"learning_rate": 9.703841597563041e-05, |
|
"loss": 0.4454, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06425431180250253, |
|
"grad_norm": 0.3209972679615021, |
|
"learning_rate": 9.686918260280928e-05, |
|
"loss": 0.4389, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06763611768684477, |
|
"grad_norm": 0.26218757033348083, |
|
"learning_rate": 9.669994922998817e-05, |
|
"loss": 0.4393, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07101792357118701, |
|
"grad_norm": 0.23638683557510376, |
|
"learning_rate": 9.653071585716704e-05, |
|
"loss": 0.393, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07439972945552925, |
|
"grad_norm": 0.2807103395462036, |
|
"learning_rate": 9.636148248434593e-05, |
|
"loss": 0.4012, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07778153533987149, |
|
"grad_norm": 0.29169249534606934, |
|
"learning_rate": 9.61922491115248e-05, |
|
"loss": 0.4146, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08116334122421373, |
|
"grad_norm": 0.26288753747940063, |
|
"learning_rate": 9.602301573870369e-05, |
|
"loss": 0.4185, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08454514710855597, |
|
"grad_norm": 0.2438131868839264, |
|
"learning_rate": 9.585378236588256e-05, |
|
"loss": 0.3786, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08792695299289821, |
|
"grad_norm": 0.2577499449253082, |
|
"learning_rate": 9.568454899306143e-05, |
|
"loss": 0.4075, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09130875887724045, |
|
"grad_norm": 0.2468397617340088, |
|
"learning_rate": 9.551531562024032e-05, |
|
"loss": 0.382, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09469056476158269, |
|
"grad_norm": 0.27210426330566406, |
|
"learning_rate": 9.53460822474192e-05, |
|
"loss": 0.3853, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09807237064592493, |
|
"grad_norm": 0.2579999566078186, |
|
"learning_rate": 9.517684887459808e-05, |
|
"loss": 0.3816, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10145417653026716, |
|
"grad_norm": 0.24862495064735413, |
|
"learning_rate": 9.500761550177695e-05, |
|
"loss": 0.3481, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1048359824146094, |
|
"grad_norm": 0.289042204618454, |
|
"learning_rate": 9.483838212895584e-05, |
|
"loss": 0.3961, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10821778829895164, |
|
"grad_norm": 0.2919853627681732, |
|
"learning_rate": 9.466914875613471e-05, |
|
"loss": 0.3332, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11159959418329388, |
|
"grad_norm": 0.25639480352401733, |
|
"learning_rate": 9.44999153833136e-05, |
|
"loss": 0.3644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11498140006763612, |
|
"grad_norm": 0.26778921484947205, |
|
"learning_rate": 9.433068201049247e-05, |
|
"loss": 0.3695, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11836320595197836, |
|
"grad_norm": 0.24935244023799896, |
|
"learning_rate": 9.416144863767136e-05, |
|
"loss": 0.3433, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1217450118363206, |
|
"grad_norm": 0.2668505609035492, |
|
"learning_rate": 9.399221526485023e-05, |
|
"loss": 0.3689, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12512681772066284, |
|
"grad_norm": 0.2972855865955353, |
|
"learning_rate": 9.382298189202912e-05, |
|
"loss": 0.3153, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12850862360500506, |
|
"grad_norm": 0.2680118978023529, |
|
"learning_rate": 9.3653748519208e-05, |
|
"loss": 0.3332, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13189042948934732, |
|
"grad_norm": 0.28004977107048035, |
|
"learning_rate": 9.348451514638688e-05, |
|
"loss": 0.342, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13527223537368954, |
|
"grad_norm": 0.2519853115081787, |
|
"learning_rate": 9.331528177356575e-05, |
|
"loss": 0.3194, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1386540412580318, |
|
"grad_norm": 0.2775399386882782, |
|
"learning_rate": 9.314604840074464e-05, |
|
"loss": 0.3113, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14203584714237402, |
|
"grad_norm": 0.27479955554008484, |
|
"learning_rate": 9.297681502792351e-05, |
|
"loss": 0.3148, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14541765302671628, |
|
"grad_norm": 0.3637036681175232, |
|
"learning_rate": 9.28075816551024e-05, |
|
"loss": 0.3065, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1487994589110585, |
|
"grad_norm": 0.2898998260498047, |
|
"learning_rate": 9.263834828228127e-05, |
|
"loss": 0.2799, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15218126479540076, |
|
"grad_norm": 0.295907199382782, |
|
"learning_rate": 9.246911490946015e-05, |
|
"loss": 0.2984, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15556307067974298, |
|
"grad_norm": 0.3111323118209839, |
|
"learning_rate": 9.229988153663903e-05, |
|
"loss": 0.2854, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1589448765640852, |
|
"grad_norm": 0.2768048346042633, |
|
"learning_rate": 9.21306481638179e-05, |
|
"loss": 0.2895, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16232668244842746, |
|
"grad_norm": 0.34783947467803955, |
|
"learning_rate": 9.19614147909968e-05, |
|
"loss": 0.2686, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1657084883327697, |
|
"grad_norm": 0.3182440400123596, |
|
"learning_rate": 9.179218141817567e-05, |
|
"loss": 0.2603, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16909029421711194, |
|
"grad_norm": 0.3452625572681427, |
|
"learning_rate": 9.162294804535455e-05, |
|
"loss": 0.296, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17247210010145417, |
|
"grad_norm": 0.329106867313385, |
|
"learning_rate": 9.145371467253343e-05, |
|
"loss": 0.2665, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17585390598579642, |
|
"grad_norm": 0.3779897689819336, |
|
"learning_rate": 9.128448129971231e-05, |
|
"loss": 0.2545, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17923571187013865, |
|
"grad_norm": 0.33585628867149353, |
|
"learning_rate": 9.111524792689119e-05, |
|
"loss": 0.2683, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1826175177544809, |
|
"grad_norm": 0.27421489357948303, |
|
"learning_rate": 9.094601455407007e-05, |
|
"loss": 0.2585, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18599932363882313, |
|
"grad_norm": 0.37735405564308167, |
|
"learning_rate": 9.077678118124895e-05, |
|
"loss": 0.2676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18938112952316538, |
|
"grad_norm": 0.30768734216690063, |
|
"learning_rate": 9.060754780842783e-05, |
|
"loss": 0.2543, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1927629354075076, |
|
"grad_norm": 0.3587827682495117, |
|
"learning_rate": 9.04383144356067e-05, |
|
"loss": 0.2249, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.19614474129184986, |
|
"grad_norm": 0.3213902711868286, |
|
"learning_rate": 9.026908106278559e-05, |
|
"loss": 0.2443, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1995265471761921, |
|
"grad_norm": 0.4303820729255676, |
|
"learning_rate": 9.009984768996447e-05, |
|
"loss": 0.2256, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2029083530605343, |
|
"grad_norm": 0.2948843538761139, |
|
"learning_rate": 8.993061431714335e-05, |
|
"loss": 0.2295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.20629015894487657, |
|
"grad_norm": 0.3263229727745056, |
|
"learning_rate": 8.976138094432223e-05, |
|
"loss": 0.239, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2096719648292188, |
|
"grad_norm": 0.3471197783946991, |
|
"learning_rate": 8.95921475715011e-05, |
|
"loss": 0.2227, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21305377071356105, |
|
"grad_norm": 0.3152846693992615, |
|
"learning_rate": 8.942291419867999e-05, |
|
"loss": 0.2359, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.21643557659790327, |
|
"grad_norm": 0.4284135401248932, |
|
"learning_rate": 8.925368082585886e-05, |
|
"loss": 0.2453, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21981738248224553, |
|
"grad_norm": 0.37850794196128845, |
|
"learning_rate": 8.908444745303775e-05, |
|
"loss": 0.2245, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22319918836658775, |
|
"grad_norm": 0.38802939653396606, |
|
"learning_rate": 8.891521408021662e-05, |
|
"loss": 0.2227, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.22658099425093, |
|
"grad_norm": 0.32752034068107605, |
|
"learning_rate": 8.87459807073955e-05, |
|
"loss": 0.2008, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.22996280013527223, |
|
"grad_norm": 0.3419714570045471, |
|
"learning_rate": 8.857674733457438e-05, |
|
"loss": 0.2049, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2333446060196145, |
|
"grad_norm": 0.3725152313709259, |
|
"learning_rate": 8.840751396175327e-05, |
|
"loss": 0.1922, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2367264119039567, |
|
"grad_norm": 0.3231143653392792, |
|
"learning_rate": 8.823828058893214e-05, |
|
"loss": 0.2094, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24010821778829894, |
|
"grad_norm": 0.3786492347717285, |
|
"learning_rate": 8.806904721611103e-05, |
|
"loss": 0.1917, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2434900236726412, |
|
"grad_norm": 0.3543280363082886, |
|
"learning_rate": 8.78998138432899e-05, |
|
"loss": 0.2005, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.24687182955698342, |
|
"grad_norm": 0.336761474609375, |
|
"learning_rate": 8.773058047046879e-05, |
|
"loss": 0.174, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2502536354413257, |
|
"grad_norm": 0.3629204332828522, |
|
"learning_rate": 8.756134709764766e-05, |
|
"loss": 0.1851, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2536354413256679, |
|
"grad_norm": 0.3555070459842682, |
|
"learning_rate": 8.739211372482655e-05, |
|
"loss": 0.1832, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2570172472100101, |
|
"grad_norm": 0.41113823652267456, |
|
"learning_rate": 8.722288035200542e-05, |
|
"loss": 0.1772, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2603990530943524, |
|
"grad_norm": 0.32061767578125, |
|
"learning_rate": 8.70536469791843e-05, |
|
"loss": 0.1707, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.26378085897869463, |
|
"grad_norm": 0.35991913080215454, |
|
"learning_rate": 8.688441360636318e-05, |
|
"loss": 0.1487, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.26716266486303686, |
|
"grad_norm": 0.3859257102012634, |
|
"learning_rate": 8.671518023354207e-05, |
|
"loss": 0.1688, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2705444707473791, |
|
"grad_norm": 0.31017613410949707, |
|
"learning_rate": 8.654594686072094e-05, |
|
"loss": 0.1624, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27392627663172137, |
|
"grad_norm": 0.35503777861595154, |
|
"learning_rate": 8.637671348789981e-05, |
|
"loss": 0.1642, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2773080825160636, |
|
"grad_norm": 0.3433260917663574, |
|
"learning_rate": 8.62074801150787e-05, |
|
"loss": 0.1844, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2806898884004058, |
|
"grad_norm": 0.3865962624549866, |
|
"learning_rate": 8.603824674225757e-05, |
|
"loss": 0.1506, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.28407169428474804, |
|
"grad_norm": 0.3324780762195587, |
|
"learning_rate": 8.586901336943646e-05, |
|
"loss": 0.1632, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.28745350016909027, |
|
"grad_norm": 0.4064556062221527, |
|
"learning_rate": 8.569977999661533e-05, |
|
"loss": 0.1519, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29083530605343255, |
|
"grad_norm": 0.3946615159511566, |
|
"learning_rate": 8.553054662379422e-05, |
|
"loss": 0.157, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2942171119377748, |
|
"grad_norm": 0.3700334429740906, |
|
"learning_rate": 8.536131325097309e-05, |
|
"loss": 0.141, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.297598917822117, |
|
"grad_norm": 0.420950710773468, |
|
"learning_rate": 8.519207987815198e-05, |
|
"loss": 0.1488, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.30098072370645923, |
|
"grad_norm": 0.39633265137672424, |
|
"learning_rate": 8.502284650533085e-05, |
|
"loss": 0.1545, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3043625295908015, |
|
"grad_norm": 0.38442814350128174, |
|
"learning_rate": 8.485361313250974e-05, |
|
"loss": 0.141, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.30774433547514374, |
|
"grad_norm": 0.41429537534713745, |
|
"learning_rate": 8.468437975968861e-05, |
|
"loss": 0.144, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.31112614135948596, |
|
"grad_norm": 0.3561369478702545, |
|
"learning_rate": 8.45151463868675e-05, |
|
"loss": 0.134, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3145079472438282, |
|
"grad_norm": 0.3710062801837921, |
|
"learning_rate": 8.434591301404637e-05, |
|
"loss": 0.1457, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3178897531281704, |
|
"grad_norm": 0.3607446849346161, |
|
"learning_rate": 8.417667964122526e-05, |
|
"loss": 0.122, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3212715590125127, |
|
"grad_norm": 0.34604352712631226, |
|
"learning_rate": 8.400744626840413e-05, |
|
"loss": 0.1414, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3246533648968549, |
|
"grad_norm": 0.27822059392929077, |
|
"learning_rate": 8.383821289558302e-05, |
|
"loss": 0.1281, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.32803517078119715, |
|
"grad_norm": 0.38080519437789917, |
|
"learning_rate": 8.366897952276189e-05, |
|
"loss": 0.1392, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3314169766655394, |
|
"grad_norm": 0.4061764180660248, |
|
"learning_rate": 8.349974614994078e-05, |
|
"loss": 0.1179, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.33479878254988166, |
|
"grad_norm": 0.28504157066345215, |
|
"learning_rate": 8.333051277711965e-05, |
|
"loss": 0.1284, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3381805884342239, |
|
"grad_norm": 0.3242844343185425, |
|
"learning_rate": 8.316127940429852e-05, |
|
"loss": 0.1184, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3415623943185661, |
|
"grad_norm": 0.3317604064941406, |
|
"learning_rate": 8.299204603147741e-05, |
|
"loss": 0.13, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.34494420020290834, |
|
"grad_norm": 0.39583227038383484, |
|
"learning_rate": 8.282281265865628e-05, |
|
"loss": 0.1146, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3483260060872506, |
|
"grad_norm": 0.33169683814048767, |
|
"learning_rate": 8.265357928583517e-05, |
|
"loss": 0.1162, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.35170781197159284, |
|
"grad_norm": 0.38136041164398193, |
|
"learning_rate": 8.248434591301404e-05, |
|
"loss": 0.1268, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.35508961785593507, |
|
"grad_norm": 0.28301218152046204, |
|
"learning_rate": 8.231511254019293e-05, |
|
"loss": 0.1117, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3584714237402773, |
|
"grad_norm": 0.3950181007385254, |
|
"learning_rate": 8.21458791673718e-05, |
|
"loss": 0.1172, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3618532296246195, |
|
"grad_norm": 0.40106379985809326, |
|
"learning_rate": 8.197664579455069e-05, |
|
"loss": 0.1134, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3652350355089618, |
|
"grad_norm": 0.3036629259586334, |
|
"learning_rate": 8.180741242172956e-05, |
|
"loss": 0.0977, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.36861684139330403, |
|
"grad_norm": 0.35834598541259766, |
|
"learning_rate": 8.163817904890845e-05, |
|
"loss": 0.1226, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.37199864727764625, |
|
"grad_norm": 0.3107944130897522, |
|
"learning_rate": 8.146894567608732e-05, |
|
"loss": 0.0932, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3753804531619885, |
|
"grad_norm": 0.2708600163459778, |
|
"learning_rate": 8.129971230326621e-05, |
|
"loss": 0.1071, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.37876225904633076, |
|
"grad_norm": 0.3161390721797943, |
|
"learning_rate": 8.113047893044508e-05, |
|
"loss": 0.0951, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.382144064930673, |
|
"grad_norm": 0.2967537045478821, |
|
"learning_rate": 8.096124555762397e-05, |
|
"loss": 0.0961, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3855258708150152, |
|
"grad_norm": 0.3615525960922241, |
|
"learning_rate": 8.079201218480284e-05, |
|
"loss": 0.0955, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.38890767669935744, |
|
"grad_norm": 0.33469459414482117, |
|
"learning_rate": 8.062277881198173e-05, |
|
"loss": 0.1028, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3922894825836997, |
|
"grad_norm": 0.44299551844596863, |
|
"learning_rate": 8.04535454391606e-05, |
|
"loss": 0.1065, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.39567128846804195, |
|
"grad_norm": 0.3324688971042633, |
|
"learning_rate": 8.028431206633949e-05, |
|
"loss": 0.1026, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3990530943523842, |
|
"grad_norm": 0.3845025599002838, |
|
"learning_rate": 8.011507869351836e-05, |
|
"loss": 0.1166, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4024349002367264, |
|
"grad_norm": 0.36462751030921936, |
|
"learning_rate": 7.994584532069724e-05, |
|
"loss": 0.0955, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.4058167061210686, |
|
"grad_norm": 0.33478081226348877, |
|
"learning_rate": 7.977661194787612e-05, |
|
"loss": 0.0966, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4091985120054109, |
|
"grad_norm": 0.3839697539806366, |
|
"learning_rate": 7.9607378575055e-05, |
|
"loss": 0.1011, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.41258031788975313, |
|
"grad_norm": 0.33828845620155334, |
|
"learning_rate": 7.943814520223388e-05, |
|
"loss": 0.094, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.41596212377409536, |
|
"grad_norm": 0.35532712936401367, |
|
"learning_rate": 7.926891182941276e-05, |
|
"loss": 0.0771, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.4193439296584376, |
|
"grad_norm": 0.32812246680259705, |
|
"learning_rate": 7.909967845659164e-05, |
|
"loss": 0.0931, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.42272573554277987, |
|
"grad_norm": 0.4312772750854492, |
|
"learning_rate": 7.893044508377052e-05, |
|
"loss": 0.091, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4261075414271221, |
|
"grad_norm": 0.38822853565216064, |
|
"learning_rate": 7.87612117109494e-05, |
|
"loss": 0.0936, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4294893473114643, |
|
"grad_norm": 0.30626174807548523, |
|
"learning_rate": 7.859197833812828e-05, |
|
"loss": 0.0773, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.43287115319580655, |
|
"grad_norm": 0.36472904682159424, |
|
"learning_rate": 7.842274496530716e-05, |
|
"loss": 0.0776, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.43625295908014877, |
|
"grad_norm": 0.312610000371933, |
|
"learning_rate": 7.825351159248604e-05, |
|
"loss": 0.0882, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.43963476496449105, |
|
"grad_norm": 0.34325119853019714, |
|
"learning_rate": 7.808427821966492e-05, |
|
"loss": 0.0836, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4430165708488333, |
|
"grad_norm": 0.39519986510276794, |
|
"learning_rate": 7.79150448468438e-05, |
|
"loss": 0.084, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4463983767331755, |
|
"grad_norm": 0.35263752937316895, |
|
"learning_rate": 7.774581147402268e-05, |
|
"loss": 0.0713, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.44978018261751773, |
|
"grad_norm": 0.36037376523017883, |
|
"learning_rate": 7.757657810120156e-05, |
|
"loss": 0.069, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.45316198850186, |
|
"grad_norm": 0.38161391019821167, |
|
"learning_rate": 7.740734472838044e-05, |
|
"loss": 0.0693, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.45654379438620224, |
|
"grad_norm": 0.3896481394767761, |
|
"learning_rate": 7.723811135555932e-05, |
|
"loss": 0.0793, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.45992560027054447, |
|
"grad_norm": 0.3284771740436554, |
|
"learning_rate": 7.70688779827382e-05, |
|
"loss": 0.0874, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4633074061548867, |
|
"grad_norm": 0.3116714358329773, |
|
"learning_rate": 7.689964460991708e-05, |
|
"loss": 0.0734, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.466689212039229, |
|
"grad_norm": 0.29977601766586304, |
|
"learning_rate": 7.673041123709595e-05, |
|
"loss": 0.0696, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4700710179235712, |
|
"grad_norm": 0.30298420786857605, |
|
"learning_rate": 7.656117786427484e-05, |
|
"loss": 0.0755, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4734528238079134, |
|
"grad_norm": 0.3856925368309021, |
|
"learning_rate": 7.639194449145371e-05, |
|
"loss": 0.0791, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.47683462969225565, |
|
"grad_norm": 0.3421929180622101, |
|
"learning_rate": 7.62227111186326e-05, |
|
"loss": 0.0674, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4802164355765979, |
|
"grad_norm": 0.3304542899131775, |
|
"learning_rate": 7.605347774581147e-05, |
|
"loss": 0.076, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.48359824146094016, |
|
"grad_norm": 0.29870203137397766, |
|
"learning_rate": 7.588424437299036e-05, |
|
"loss": 0.0703, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4869800473452824, |
|
"grad_norm": 0.2472551167011261, |
|
"learning_rate": 7.571501100016923e-05, |
|
"loss": 0.069, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4903618532296246, |
|
"grad_norm": 0.2860611081123352, |
|
"learning_rate": 7.554577762734812e-05, |
|
"loss": 0.0667, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.49374365911396684, |
|
"grad_norm": 0.3618302345275879, |
|
"learning_rate": 7.537654425452699e-05, |
|
"loss": 0.0597, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4971254649983091, |
|
"grad_norm": 0.34842416644096375, |
|
"learning_rate": 7.520731088170588e-05, |
|
"loss": 0.0715, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5005072708826513, |
|
"grad_norm": 0.2862137258052826, |
|
"learning_rate": 7.503807750888475e-05, |
|
"loss": 0.0778, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5038890767669936, |
|
"grad_norm": 0.3283160626888275, |
|
"learning_rate": 7.486884413606364e-05, |
|
"loss": 0.073, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5072708826513358, |
|
"grad_norm": 0.2540145218372345, |
|
"learning_rate": 7.469961076324251e-05, |
|
"loss": 0.067, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.510652688535678, |
|
"grad_norm": 0.32044336199760437, |
|
"learning_rate": 7.45303773904214e-05, |
|
"loss": 0.063, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5140344944200202, |
|
"grad_norm": 0.3898891508579254, |
|
"learning_rate": 7.436114401760027e-05, |
|
"loss": 0.0695, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5174163003043625, |
|
"grad_norm": 0.30922603607177734, |
|
"learning_rate": 7.419191064477916e-05, |
|
"loss": 0.071, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5207981061887048, |
|
"grad_norm": 0.3513735234737396, |
|
"learning_rate": 7.402267727195803e-05, |
|
"loss": 0.0722, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.524179912073047, |
|
"grad_norm": 0.2806611955165863, |
|
"learning_rate": 7.38534438991369e-05, |
|
"loss": 0.0662, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5275617179573893, |
|
"grad_norm": 0.32641470432281494, |
|
"learning_rate": 7.368421052631579e-05, |
|
"loss": 0.0645, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5309435238417315, |
|
"grad_norm": 0.2627265751361847, |
|
"learning_rate": 7.351497715349466e-05, |
|
"loss": 0.0594, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5343253297260737, |
|
"grad_norm": 0.2586800158023834, |
|
"learning_rate": 7.334574378067355e-05, |
|
"loss": 0.0603, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5377071356104159, |
|
"grad_norm": 0.24359896779060364, |
|
"learning_rate": 7.317651040785242e-05, |
|
"loss": 0.0638, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5410889414947582, |
|
"grad_norm": 0.3084506392478943, |
|
"learning_rate": 7.300727703503131e-05, |
|
"loss": 0.0658, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5444707473791004, |
|
"grad_norm": 0.29731103777885437, |
|
"learning_rate": 7.283804366221018e-05, |
|
"loss": 0.0548, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5478525532634427, |
|
"grad_norm": 0.25320005416870117, |
|
"learning_rate": 7.266881028938907e-05, |
|
"loss": 0.0533, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.551234359147785, |
|
"grad_norm": 0.274166077375412, |
|
"learning_rate": 7.249957691656794e-05, |
|
"loss": 0.048, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5546161650321272, |
|
"grad_norm": 0.29488497972488403, |
|
"learning_rate": 7.233034354374683e-05, |
|
"loss": 0.054, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5579979709164694, |
|
"grad_norm": 0.2893562316894531, |
|
"learning_rate": 7.21611101709257e-05, |
|
"loss": 0.0589, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5613797768008116, |
|
"grad_norm": 0.29615455865859985, |
|
"learning_rate": 7.199187679810459e-05, |
|
"loss": 0.0524, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5647615826851539, |
|
"grad_norm": 0.3037727475166321, |
|
"learning_rate": 7.182264342528346e-05, |
|
"loss": 0.0486, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5681433885694961, |
|
"grad_norm": 0.2997746765613556, |
|
"learning_rate": 7.165341005246235e-05, |
|
"loss": 0.047, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5715251944538383, |
|
"grad_norm": 0.31440579891204834, |
|
"learning_rate": 7.148417667964122e-05, |
|
"loss": 0.0467, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5749070003381805, |
|
"grad_norm": 0.345804363489151, |
|
"learning_rate": 7.131494330682011e-05, |
|
"loss": 0.0543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5782888062225229, |
|
"grad_norm": 0.3310382664203644, |
|
"learning_rate": 7.114570993399898e-05, |
|
"loss": 0.0561, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5816706121068651, |
|
"grad_norm": 0.33962053060531616, |
|
"learning_rate": 7.097647656117787e-05, |
|
"loss": 0.0468, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5850524179912073, |
|
"grad_norm": 0.303245484828949, |
|
"learning_rate": 7.080724318835674e-05, |
|
"loss": 0.0503, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5884342238755496, |
|
"grad_norm": 0.3114894926548004, |
|
"learning_rate": 7.063800981553562e-05, |
|
"loss": 0.0482, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5918160297598918, |
|
"grad_norm": 0.23478536307811737, |
|
"learning_rate": 7.04687764427145e-05, |
|
"loss": 0.0496, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.595197835644234, |
|
"grad_norm": 0.3116193413734436, |
|
"learning_rate": 7.029954306989338e-05, |
|
"loss": 0.0498, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5985796415285762, |
|
"grad_norm": 0.30709898471832275, |
|
"learning_rate": 7.013030969707226e-05, |
|
"loss": 0.0528, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6019614474129185, |
|
"grad_norm": 0.3018944561481476, |
|
"learning_rate": 6.996107632425114e-05, |
|
"loss": 0.0466, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6053432532972607, |
|
"grad_norm": 0.32787784934043884, |
|
"learning_rate": 6.979184295143002e-05, |
|
"loss": 0.0513, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.608725059181603, |
|
"grad_norm": 0.30039939284324646, |
|
"learning_rate": 6.962260957860891e-05, |
|
"loss": 0.0504, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6121068650659452, |
|
"grad_norm": 0.3414636552333832, |
|
"learning_rate": 6.94533762057878e-05, |
|
"loss": 0.0499, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6154886709502875, |
|
"grad_norm": 0.20751340687274933, |
|
"learning_rate": 6.928414283296667e-05, |
|
"loss": 0.0437, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6188704768346297, |
|
"grad_norm": 0.3419632315635681, |
|
"learning_rate": 6.911490946014556e-05, |
|
"loss": 0.049, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6222522827189719, |
|
"grad_norm": 0.27273499965667725, |
|
"learning_rate": 6.894567608732443e-05, |
|
"loss": 0.0444, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.6256340886033142, |
|
"grad_norm": 0.31872832775115967, |
|
"learning_rate": 6.87764427145033e-05, |
|
"loss": 0.0535, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6290158944876564, |
|
"grad_norm": 0.24325081706047058, |
|
"learning_rate": 6.860720934168219e-05, |
|
"loss": 0.049, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6323977003719986, |
|
"grad_norm": 0.3723507821559906, |
|
"learning_rate": 6.843797596886106e-05, |
|
"loss": 0.0434, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6357795062563408, |
|
"grad_norm": 0.24727103114128113, |
|
"learning_rate": 6.826874259603995e-05, |
|
"loss": 0.0452, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6391613121406832, |
|
"grad_norm": 0.3109544515609741, |
|
"learning_rate": 6.809950922321882e-05, |
|
"loss": 0.044, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6425431180250254, |
|
"grad_norm": 0.27794697880744934, |
|
"learning_rate": 6.793027585039771e-05, |
|
"loss": 0.0403, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6459249239093676, |
|
"grad_norm": 0.3061220347881317, |
|
"learning_rate": 6.776104247757658e-05, |
|
"loss": 0.0425, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6493067297937098, |
|
"grad_norm": 0.25127366185188293, |
|
"learning_rate": 6.759180910475547e-05, |
|
"loss": 0.0432, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6526885356780521, |
|
"grad_norm": 0.26195022463798523, |
|
"learning_rate": 6.742257573193434e-05, |
|
"loss": 0.042, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6560703415623943, |
|
"grad_norm": 0.23891383409500122, |
|
"learning_rate": 6.725334235911323e-05, |
|
"loss": 0.0404, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6594521474467365, |
|
"grad_norm": 0.26952946186065674, |
|
"learning_rate": 6.70841089862921e-05, |
|
"loss": 0.0414, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6628339533310788, |
|
"grad_norm": 0.2608715295791626, |
|
"learning_rate": 6.691487561347099e-05, |
|
"loss": 0.0418, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6662157592154211, |
|
"grad_norm": 0.3117476999759674, |
|
"learning_rate": 6.674564224064986e-05, |
|
"loss": 0.0417, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6695975650997633, |
|
"grad_norm": 0.38972756266593933, |
|
"learning_rate": 6.657640886782875e-05, |
|
"loss": 0.0432, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6729793709841055, |
|
"grad_norm": 0.23405376076698303, |
|
"learning_rate": 6.640717549500762e-05, |
|
"loss": 0.0448, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6763611768684478, |
|
"grad_norm": 0.2317565679550171, |
|
"learning_rate": 6.623794212218651e-05, |
|
"loss": 0.0391, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.67974298275279, |
|
"grad_norm": 0.2788334786891937, |
|
"learning_rate": 6.606870874936538e-05, |
|
"loss": 0.041, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6831247886371322, |
|
"grad_norm": 0.2472919374704361, |
|
"learning_rate": 6.589947537654427e-05, |
|
"loss": 0.0413, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6865065945214744, |
|
"grad_norm": 0.31981515884399414, |
|
"learning_rate": 6.573024200372314e-05, |
|
"loss": 0.0394, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6898884004058167, |
|
"grad_norm": 0.3317371606826782, |
|
"learning_rate": 6.556100863090201e-05, |
|
"loss": 0.0405, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6932702062901589, |
|
"grad_norm": 0.22539494931697845, |
|
"learning_rate": 6.53917752580809e-05, |
|
"loss": 0.0364, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6966520121745012, |
|
"grad_norm": 0.2573080360889435, |
|
"learning_rate": 6.522254188525977e-05, |
|
"loss": 0.0404, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7000338180588435, |
|
"grad_norm": 0.3429962992668152, |
|
"learning_rate": 6.505330851243866e-05, |
|
"loss": 0.0432, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7034156239431857, |
|
"grad_norm": 0.26425597071647644, |
|
"learning_rate": 6.488407513961753e-05, |
|
"loss": 0.0383, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7067974298275279, |
|
"grad_norm": 0.28535112738609314, |
|
"learning_rate": 6.471484176679642e-05, |
|
"loss": 0.042, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7101792357118701, |
|
"grad_norm": 0.27274271845817566, |
|
"learning_rate": 6.45456083939753e-05, |
|
"loss": 0.0358, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7135610415962124, |
|
"grad_norm": 0.24476689100265503, |
|
"learning_rate": 6.437637502115418e-05, |
|
"loss": 0.0358, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7169428474805546, |
|
"grad_norm": 0.295635461807251, |
|
"learning_rate": 6.420714164833305e-05, |
|
"loss": 0.0364, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7203246533648968, |
|
"grad_norm": 0.3371022343635559, |
|
"learning_rate": 6.403790827551194e-05, |
|
"loss": 0.0381, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.723706459249239, |
|
"grad_norm": 0.24469931423664093, |
|
"learning_rate": 6.386867490269081e-05, |
|
"loss": 0.0384, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7270882651335814, |
|
"grad_norm": 0.20693735778331757, |
|
"learning_rate": 6.36994415298697e-05, |
|
"loss": 0.0369, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7304700710179236, |
|
"grad_norm": 0.24583208560943604, |
|
"learning_rate": 6.353020815704857e-05, |
|
"loss": 0.0367, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7338518769022658, |
|
"grad_norm": 0.29009056091308594, |
|
"learning_rate": 6.336097478422746e-05, |
|
"loss": 0.038, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7372336827866081, |
|
"grad_norm": 0.260128378868103, |
|
"learning_rate": 6.319174141140633e-05, |
|
"loss": 0.039, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.7406154886709503, |
|
"grad_norm": 0.2804548442363739, |
|
"learning_rate": 6.302250803858522e-05, |
|
"loss": 0.0399, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7439972945552925, |
|
"grad_norm": 0.3354940116405487, |
|
"learning_rate": 6.28532746657641e-05, |
|
"loss": 0.0363, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7473791004396347, |
|
"grad_norm": 0.23447370529174805, |
|
"learning_rate": 6.268404129294298e-05, |
|
"loss": 0.042, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.750760906323977, |
|
"grad_norm": 0.26304641366004944, |
|
"learning_rate": 6.251480792012185e-05, |
|
"loss": 0.0364, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7541427122083192, |
|
"grad_norm": 0.26174917817115784, |
|
"learning_rate": 6.234557454730073e-05, |
|
"loss": 0.0333, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7575245180926615, |
|
"grad_norm": 0.2674359977245331, |
|
"learning_rate": 6.217634117447961e-05, |
|
"loss": 0.0387, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7609063239770038, |
|
"grad_norm": 0.21239162981510162, |
|
"learning_rate": 6.200710780165849e-05, |
|
"loss": 0.0374, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.764288129861346, |
|
"grad_norm": 0.2588539719581604, |
|
"learning_rate": 6.183787442883737e-05, |
|
"loss": 0.0354, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7676699357456882, |
|
"grad_norm": 0.253009557723999, |
|
"learning_rate": 6.166864105601625e-05, |
|
"loss": 0.0346, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7710517416300304, |
|
"grad_norm": 0.2577788531780243, |
|
"learning_rate": 6.149940768319513e-05, |
|
"loss": 0.032, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7744335475143727, |
|
"grad_norm": 0.2186667025089264, |
|
"learning_rate": 6.133017431037401e-05, |
|
"loss": 0.0329, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7778153533987149, |
|
"grad_norm": 0.278125524520874, |
|
"learning_rate": 6.11609409375529e-05, |
|
"loss": 0.0338, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7811971592830571, |
|
"grad_norm": 0.3377946615219116, |
|
"learning_rate": 6.0991707564731774e-05, |
|
"loss": 0.037, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7845789651673994, |
|
"grad_norm": 0.29264363646507263, |
|
"learning_rate": 6.082247419191065e-05, |
|
"loss": 0.0361, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7879607710517417, |
|
"grad_norm": 0.23570716381072998, |
|
"learning_rate": 6.065324081908953e-05, |
|
"loss": 0.0368, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7913425769360839, |
|
"grad_norm": 0.27007460594177246, |
|
"learning_rate": 6.048400744626841e-05, |
|
"loss": 0.0334, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7947243828204261, |
|
"grad_norm": 0.2671580910682678, |
|
"learning_rate": 6.031477407344729e-05, |
|
"loss": 0.0342, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7981061887047683, |
|
"grad_norm": 0.2466389387845993, |
|
"learning_rate": 6.014554070062617e-05, |
|
"loss": 0.0403, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8014879945891106, |
|
"grad_norm": 0.252189964056015, |
|
"learning_rate": 5.997630732780505e-05, |
|
"loss": 0.0336, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.8048698004734528, |
|
"grad_norm": 0.20659616589546204, |
|
"learning_rate": 5.980707395498393e-05, |
|
"loss": 0.0349, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.808251606357795, |
|
"grad_norm": 0.32111021876335144, |
|
"learning_rate": 5.963784058216281e-05, |
|
"loss": 0.0334, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8116334122421373, |
|
"grad_norm": 0.21712100505828857, |
|
"learning_rate": 5.946860720934169e-05, |
|
"loss": 0.0325, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8150152181264796, |
|
"grad_norm": 0.2319028079509735, |
|
"learning_rate": 5.929937383652057e-05, |
|
"loss": 0.0316, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8183970240108218, |
|
"grad_norm": 0.2731321156024933, |
|
"learning_rate": 5.9130140463699447e-05, |
|
"loss": 0.0319, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.821778829895164, |
|
"grad_norm": 0.25032860040664673, |
|
"learning_rate": 5.8960907090878327e-05, |
|
"loss": 0.0332, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8251606357795063, |
|
"grad_norm": 0.2227882593870163, |
|
"learning_rate": 5.8791673718057207e-05, |
|
"loss": 0.0329, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8285424416638485, |
|
"grad_norm": 0.2757587134838104, |
|
"learning_rate": 5.8622440345236087e-05, |
|
"loss": 0.0305, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8319242475481907, |
|
"grad_norm": 0.23499512672424316, |
|
"learning_rate": 5.8453206972414966e-05, |
|
"loss": 0.0342, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.835306053432533, |
|
"grad_norm": 0.19104419648647308, |
|
"learning_rate": 5.8283973599593846e-05, |
|
"loss": 0.0341, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8386878593168752, |
|
"grad_norm": 0.21760663390159607, |
|
"learning_rate": 5.8114740226772726e-05, |
|
"loss": 0.0287, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8420696652012174, |
|
"grad_norm": 0.25679340958595276, |
|
"learning_rate": 5.7945506853951606e-05, |
|
"loss": 0.0356, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8454514710855597, |
|
"grad_norm": 0.25008776783943176, |
|
"learning_rate": 5.777627348113048e-05, |
|
"loss": 0.0288, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.848833276969902, |
|
"grad_norm": 0.25389808416366577, |
|
"learning_rate": 5.760704010830936e-05, |
|
"loss": 0.0316, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.8522150828542442, |
|
"grad_norm": 0.2889176905155182, |
|
"learning_rate": 5.743780673548824e-05, |
|
"loss": 0.0333, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8555968887385864, |
|
"grad_norm": 0.2402850240468979, |
|
"learning_rate": 5.726857336266712e-05, |
|
"loss": 0.0313, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.8589786946229286, |
|
"grad_norm": 0.217300683259964, |
|
"learning_rate": 5.7099339989846e-05, |
|
"loss": 0.0306, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8623605005072709, |
|
"grad_norm": 0.32617250084877014, |
|
"learning_rate": 5.693010661702488e-05, |
|
"loss": 0.0293, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8657423063916131, |
|
"grad_norm": 0.2338167428970337, |
|
"learning_rate": 5.676087324420376e-05, |
|
"loss": 0.0275, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8691241122759553, |
|
"grad_norm": 0.2444329410791397, |
|
"learning_rate": 5.659163987138264e-05, |
|
"loss": 0.0281, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8725059181602975, |
|
"grad_norm": 0.2600383758544922, |
|
"learning_rate": 5.642240649856152e-05, |
|
"loss": 0.0293, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8758877240446399, |
|
"grad_norm": 0.19937843084335327, |
|
"learning_rate": 5.62531731257404e-05, |
|
"loss": 0.0327, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.8792695299289821, |
|
"grad_norm": 0.25247564911842346, |
|
"learning_rate": 5.608393975291928e-05, |
|
"loss": 0.0353, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8826513358133243, |
|
"grad_norm": 0.20916889607906342, |
|
"learning_rate": 5.591470638009816e-05, |
|
"loss": 0.0311, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8860331416976666, |
|
"grad_norm": 0.26659512519836426, |
|
"learning_rate": 5.574547300727704e-05, |
|
"loss": 0.0299, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8894149475820088, |
|
"grad_norm": 0.25376033782958984, |
|
"learning_rate": 5.557623963445592e-05, |
|
"loss": 0.03, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.892796753466351, |
|
"grad_norm": 0.20384323596954346, |
|
"learning_rate": 5.54070062616348e-05, |
|
"loss": 0.0302, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8961785593506932, |
|
"grad_norm": 0.2751913070678711, |
|
"learning_rate": 5.523777288881368e-05, |
|
"loss": 0.0296, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8995603652350355, |
|
"grad_norm": 0.20069202780723572, |
|
"learning_rate": 5.506853951599256e-05, |
|
"loss": 0.0261, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.9029421711193778, |
|
"grad_norm": 0.21684013307094574, |
|
"learning_rate": 5.489930614317144e-05, |
|
"loss": 0.0311, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.90632397700372, |
|
"grad_norm": 0.23668934404850006, |
|
"learning_rate": 5.473007277035032e-05, |
|
"loss": 0.0303, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9097057828880623, |
|
"grad_norm": 0.24122962355613708, |
|
"learning_rate": 5.456083939752919e-05, |
|
"loss": 0.0294, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.9130875887724045, |
|
"grad_norm": 0.19830548763275146, |
|
"learning_rate": 5.439160602470807e-05, |
|
"loss": 0.0264, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9164693946567467, |
|
"grad_norm": 0.20941804349422455, |
|
"learning_rate": 5.422237265188695e-05, |
|
"loss": 0.029, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.9198512005410889, |
|
"grad_norm": 0.2681092619895935, |
|
"learning_rate": 5.405313927906583e-05, |
|
"loss": 0.0304, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9232330064254312, |
|
"grad_norm": 0.20790086686611176, |
|
"learning_rate": 5.388390590624471e-05, |
|
"loss": 0.027, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.9266148123097734, |
|
"grad_norm": 0.227376326918602, |
|
"learning_rate": 5.371467253342359e-05, |
|
"loss": 0.0267, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.9299966181941156, |
|
"grad_norm": 0.26523780822753906, |
|
"learning_rate": 5.354543916060247e-05, |
|
"loss": 0.0279, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.933378424078458, |
|
"grad_norm": 0.21116450428962708, |
|
"learning_rate": 5.337620578778135e-05, |
|
"loss": 0.027, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.9367602299628002, |
|
"grad_norm": 0.22488407790660858, |
|
"learning_rate": 5.320697241496023e-05, |
|
"loss": 0.0258, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.9401420358471424, |
|
"grad_norm": 0.23635748028755188, |
|
"learning_rate": 5.303773904213911e-05, |
|
"loss": 0.0277, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.9435238417314846, |
|
"grad_norm": 0.1881859302520752, |
|
"learning_rate": 5.286850566931799e-05, |
|
"loss": 0.026, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.9469056476158269, |
|
"grad_norm": 0.28154221177101135, |
|
"learning_rate": 5.269927229649687e-05, |
|
"loss": 0.0269, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9502874535001691, |
|
"grad_norm": 0.2318260818719864, |
|
"learning_rate": 5.253003892367575e-05, |
|
"loss": 0.0271, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.9536692593845113, |
|
"grad_norm": 0.2238345593214035, |
|
"learning_rate": 5.236080555085463e-05, |
|
"loss": 0.0253, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.9570510652688535, |
|
"grad_norm": 0.2091434746980667, |
|
"learning_rate": 5.219157217803351e-05, |
|
"loss": 0.0251, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.9604328711531958, |
|
"grad_norm": 0.2255164235830307, |
|
"learning_rate": 5.202233880521239e-05, |
|
"loss": 0.0281, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.9638146770375381, |
|
"grad_norm": 0.245024174451828, |
|
"learning_rate": 5.185310543239127e-05, |
|
"loss": 0.0257, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9671964829218803, |
|
"grad_norm": 0.18670466542243958, |
|
"learning_rate": 5.168387205957015e-05, |
|
"loss": 0.027, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9705782888062225, |
|
"grad_norm": 0.22900165617465973, |
|
"learning_rate": 5.1514638686749025e-05, |
|
"loss": 0.0268, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9739600946905648, |
|
"grad_norm": 0.2123824506998062, |
|
"learning_rate": 5.1345405313927905e-05, |
|
"loss": 0.0255, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.977341900574907, |
|
"grad_norm": 0.17296041548252106, |
|
"learning_rate": 5.1176171941106785e-05, |
|
"loss": 0.0256, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9807237064592492, |
|
"grad_norm": 0.21980832517147064, |
|
"learning_rate": 5.1006938568285665e-05, |
|
"loss": 0.027, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9841055123435914, |
|
"grad_norm": 0.21194496750831604, |
|
"learning_rate": 5.0837705195464545e-05, |
|
"loss": 0.0253, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9874873182279337, |
|
"grad_norm": 0.28237712383270264, |
|
"learning_rate": 5.0668471822643425e-05, |
|
"loss": 0.0261, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9908691241122759, |
|
"grad_norm": 0.22561867535114288, |
|
"learning_rate": 5.0499238449822305e-05, |
|
"loss": 0.0263, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9942509299966182, |
|
"grad_norm": 0.19687789678573608, |
|
"learning_rate": 5.0330005077001185e-05, |
|
"loss": 0.0253, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9976327358809605, |
|
"grad_norm": 0.26815053820610046, |
|
"learning_rate": 5.0160771704180065e-05, |
|
"loss": 0.0315, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.025125615298748016, |
|
"eval_runtime": 350.9983, |
|
"eval_samples_per_second": 11.35, |
|
"eval_steps_per_second": 0.356, |
|
"step": 2957 |
|
}, |
|
{ |
|
"epoch": 1.0010145417653027, |
|
"grad_norm": 0.17259906232357025, |
|
"learning_rate": 4.9991538331358945e-05, |
|
"loss": 0.0235, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.004396347649645, |
|
"grad_norm": 0.23677606880664825, |
|
"learning_rate": 4.9822304958537825e-05, |
|
"loss": 0.0231, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0077781535339871, |
|
"grad_norm": 0.2753681540489197, |
|
"learning_rate": 4.9653071585716705e-05, |
|
"loss": 0.0225, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.0111599594183294, |
|
"grad_norm": 0.1379474550485611, |
|
"learning_rate": 4.9483838212895585e-05, |
|
"loss": 0.0209, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0145417653026716, |
|
"grad_norm": 0.1725788563489914, |
|
"learning_rate": 4.9314604840074465e-05, |
|
"loss": 0.0231, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0179235711870138, |
|
"grad_norm": 0.16469170153141022, |
|
"learning_rate": 4.9145371467253345e-05, |
|
"loss": 0.0235, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.021305377071356, |
|
"grad_norm": 0.1656704694032669, |
|
"learning_rate": 4.8976138094432225e-05, |
|
"loss": 0.0214, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.0246871829556983, |
|
"grad_norm": 0.22718942165374756, |
|
"learning_rate": 4.8806904721611104e-05, |
|
"loss": 0.0235, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.0280689888400405, |
|
"grad_norm": 0.18668334186077118, |
|
"learning_rate": 4.8637671348789984e-05, |
|
"loss": 0.0226, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.0314507947243827, |
|
"grad_norm": 0.1626449078321457, |
|
"learning_rate": 4.8468437975968864e-05, |
|
"loss": 0.0211, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0348326006087252, |
|
"grad_norm": 0.14618806540966034, |
|
"learning_rate": 4.829920460314774e-05, |
|
"loss": 0.0224, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.0382144064930674, |
|
"grad_norm": 0.20951628684997559, |
|
"learning_rate": 4.812997123032662e-05, |
|
"loss": 0.0225, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.0415962123774096, |
|
"grad_norm": 0.15292339026927948, |
|
"learning_rate": 4.79607378575055e-05, |
|
"loss": 0.0236, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.0449780182617519, |
|
"grad_norm": 0.23356632888317108, |
|
"learning_rate": 4.779150448468438e-05, |
|
"loss": 0.022, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.048359824146094, |
|
"grad_norm": 0.17371255159378052, |
|
"learning_rate": 4.762227111186326e-05, |
|
"loss": 0.0221, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0517416300304363, |
|
"grad_norm": 0.20166213810443878, |
|
"learning_rate": 4.745303773904214e-05, |
|
"loss": 0.0227, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.0551234359147785, |
|
"grad_norm": 0.18544991314411163, |
|
"learning_rate": 4.728380436622102e-05, |
|
"loss": 0.0227, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.0585052417991208, |
|
"grad_norm": 0.23601798713207245, |
|
"learning_rate": 4.71145709933999e-05, |
|
"loss": 0.022, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.061887047683463, |
|
"grad_norm": 0.20726293325424194, |
|
"learning_rate": 4.6945337620578784e-05, |
|
"loss": 0.021, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.0652688535678052, |
|
"grad_norm": 0.20292699337005615, |
|
"learning_rate": 4.6776104247757664e-05, |
|
"loss": 0.0214, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0686506594521474, |
|
"grad_norm": 0.2641526162624359, |
|
"learning_rate": 4.6606870874936544e-05, |
|
"loss": 0.0222, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.0720324653364897, |
|
"grad_norm": 0.19379054009914398, |
|
"learning_rate": 4.6437637502115424e-05, |
|
"loss": 0.0213, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0754142712208319, |
|
"grad_norm": 0.17844127118587494, |
|
"learning_rate": 4.6268404129294304e-05, |
|
"loss": 0.0247, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.0787960771051741, |
|
"grad_norm": 0.19252558052539825, |
|
"learning_rate": 4.6099170756473184e-05, |
|
"loss": 0.0215, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.0821778829895163, |
|
"grad_norm": 0.18375921249389648, |
|
"learning_rate": 4.5929937383652064e-05, |
|
"loss": 0.0225, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.0855596888738586, |
|
"grad_norm": 0.20377720892429352, |
|
"learning_rate": 4.576070401083094e-05, |
|
"loss": 0.0218, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.0889414947582008, |
|
"grad_norm": 0.17277295887470245, |
|
"learning_rate": 4.559147063800982e-05, |
|
"loss": 0.0203, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.092323300642543, |
|
"grad_norm": 0.17021779716014862, |
|
"learning_rate": 4.54222372651887e-05, |
|
"loss": 0.0195, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.0957051065268852, |
|
"grad_norm": 0.1649063676595688, |
|
"learning_rate": 4.525300389236758e-05, |
|
"loss": 0.0221, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.0990869124112277, |
|
"grad_norm": 0.12819623947143555, |
|
"learning_rate": 4.508377051954646e-05, |
|
"loss": 0.0216, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.10246871829557, |
|
"grad_norm": 0.20793533325195312, |
|
"learning_rate": 4.491453714672534e-05, |
|
"loss": 0.0205, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.1058505241799121, |
|
"grad_norm": 0.23723694682121277, |
|
"learning_rate": 4.474530377390422e-05, |
|
"loss": 0.0208, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.1092323300642544, |
|
"grad_norm": 0.20238593220710754, |
|
"learning_rate": 4.45760704010831e-05, |
|
"loss": 0.0212, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.1126141359485966, |
|
"grad_norm": 0.1898476779460907, |
|
"learning_rate": 4.440683702826198e-05, |
|
"loss": 0.0215, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.1159959418329388, |
|
"grad_norm": 0.1884363740682602, |
|
"learning_rate": 4.423760365544086e-05, |
|
"loss": 0.021, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.119377747717281, |
|
"grad_norm": 0.1632470339536667, |
|
"learning_rate": 4.406837028261974e-05, |
|
"loss": 0.0213, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.1227595536016233, |
|
"grad_norm": 0.1657821387052536, |
|
"learning_rate": 4.389913690979862e-05, |
|
"loss": 0.0203, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.1261413594859655, |
|
"grad_norm": 0.12199438363313675, |
|
"learning_rate": 4.37299035369775e-05, |
|
"loss": 0.0202, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.1295231653703077, |
|
"grad_norm": 0.16210108995437622, |
|
"learning_rate": 4.356067016415638e-05, |
|
"loss": 0.0205, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.13290497125465, |
|
"grad_norm": 0.18802976608276367, |
|
"learning_rate": 4.339143679133526e-05, |
|
"loss": 0.0207, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1362867771389922, |
|
"grad_norm": 0.1435530036687851, |
|
"learning_rate": 4.322220341851414e-05, |
|
"loss": 0.0215, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.1396685830233344, |
|
"grad_norm": 0.25706106424331665, |
|
"learning_rate": 4.305297004569302e-05, |
|
"loss": 0.022, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.1430503889076766, |
|
"grad_norm": 0.19478261470794678, |
|
"learning_rate": 4.28837366728719e-05, |
|
"loss": 0.0207, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.1464321947920189, |
|
"grad_norm": 0.14196892082691193, |
|
"learning_rate": 4.271450330005077e-05, |
|
"loss": 0.0203, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.149814000676361, |
|
"grad_norm": 0.15680982172489166, |
|
"learning_rate": 4.254526992722965e-05, |
|
"loss": 0.0213, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1531958065607033, |
|
"grad_norm": 0.1810910403728485, |
|
"learning_rate": 4.237603655440853e-05, |
|
"loss": 0.0213, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.1565776124450458, |
|
"grad_norm": 0.13093525171279907, |
|
"learning_rate": 4.220680318158741e-05, |
|
"loss": 0.0193, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.159959418329388, |
|
"grad_norm": 0.1480521410703659, |
|
"learning_rate": 4.203756980876629e-05, |
|
"loss": 0.0192, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.1633412242137302, |
|
"grad_norm": 0.20289231836795807, |
|
"learning_rate": 4.186833643594517e-05, |
|
"loss": 0.0189, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.1667230300980724, |
|
"grad_norm": 0.15804171562194824, |
|
"learning_rate": 4.169910306312405e-05, |
|
"loss": 0.0204, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.1701048359824147, |
|
"grad_norm": 0.12550042569637299, |
|
"learning_rate": 4.152986969030293e-05, |
|
"loss": 0.0193, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.1734866418667569, |
|
"grad_norm": 0.153706893324852, |
|
"learning_rate": 4.136063631748181e-05, |
|
"loss": 0.0204, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.1768684477510991, |
|
"grad_norm": 0.17369280755519867, |
|
"learning_rate": 4.119140294466069e-05, |
|
"loss": 0.0194, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.1802502536354413, |
|
"grad_norm": 0.20219433307647705, |
|
"learning_rate": 4.102216957183957e-05, |
|
"loss": 0.0234, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.1836320595197836, |
|
"grad_norm": 0.14274387061595917, |
|
"learning_rate": 4.085293619901845e-05, |
|
"loss": 0.0201, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.1870138654041258, |
|
"grad_norm": 0.1277850866317749, |
|
"learning_rate": 4.068370282619733e-05, |
|
"loss": 0.0197, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.190395671288468, |
|
"grad_norm": 0.12567327916622162, |
|
"learning_rate": 4.051446945337621e-05, |
|
"loss": 0.0193, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.1937774771728102, |
|
"grad_norm": 0.18639731407165527, |
|
"learning_rate": 4.034523608055509e-05, |
|
"loss": 0.0208, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.1971592830571525, |
|
"grad_norm": 0.17870990931987762, |
|
"learning_rate": 4.017600270773397e-05, |
|
"loss": 0.0207, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.2005410889414947, |
|
"grad_norm": 0.1992855817079544, |
|
"learning_rate": 4.000676933491285e-05, |
|
"loss": 0.0197, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.203922894825837, |
|
"grad_norm": 0.19009998440742493, |
|
"learning_rate": 3.983753596209173e-05, |
|
"loss": 0.0193, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.2073047007101791, |
|
"grad_norm": 0.2152644246816635, |
|
"learning_rate": 3.966830258927061e-05, |
|
"loss": 0.0193, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.2106865065945214, |
|
"grad_norm": 0.17644502222537994, |
|
"learning_rate": 3.949906921644948e-05, |
|
"loss": 0.0189, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.2140683124788638, |
|
"grad_norm": 0.16940535604953766, |
|
"learning_rate": 3.932983584362836e-05, |
|
"loss": 0.0195, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.217450118363206, |
|
"grad_norm": 0.1200951337814331, |
|
"learning_rate": 3.916060247080724e-05, |
|
"loss": 0.0205, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2208319242475483, |
|
"grad_norm": 0.12686198949813843, |
|
"learning_rate": 3.899136909798612e-05, |
|
"loss": 0.0208, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.2242137301318905, |
|
"grad_norm": 0.16317637264728546, |
|
"learning_rate": 3.8822135725165e-05, |
|
"loss": 0.0198, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.2275955360162327, |
|
"grad_norm": 0.16301840543746948, |
|
"learning_rate": 3.865290235234388e-05, |
|
"loss": 0.0196, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.230977341900575, |
|
"grad_norm": 0.14271463453769684, |
|
"learning_rate": 3.848366897952276e-05, |
|
"loss": 0.0198, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.2343591477849172, |
|
"grad_norm": 0.13855944573879242, |
|
"learning_rate": 3.831443560670164e-05, |
|
"loss": 0.0192, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2377409536692594, |
|
"grad_norm": 0.16393686830997467, |
|
"learning_rate": 3.814520223388052e-05, |
|
"loss": 0.0181, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.2411227595536016, |
|
"grad_norm": 0.13381467759609222, |
|
"learning_rate": 3.79759688610594e-05, |
|
"loss": 0.0199, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.2445045654379439, |
|
"grad_norm": 0.20619070529937744, |
|
"learning_rate": 3.780673548823828e-05, |
|
"loss": 0.02, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.247886371322286, |
|
"grad_norm": 0.1646890640258789, |
|
"learning_rate": 3.763750211541716e-05, |
|
"loss": 0.0207, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.2512681772066283, |
|
"grad_norm": 0.18202926218509674, |
|
"learning_rate": 3.746826874259604e-05, |
|
"loss": 0.0194, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2546499830909705, |
|
"grad_norm": 0.16690859198570251, |
|
"learning_rate": 3.729903536977492e-05, |
|
"loss": 0.0198, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.2580317889753128, |
|
"grad_norm": 0.14021560549736023, |
|
"learning_rate": 3.71298019969538e-05, |
|
"loss": 0.019, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.261413594859655, |
|
"grad_norm": 0.1812976896762848, |
|
"learning_rate": 3.696056862413268e-05, |
|
"loss": 0.0202, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.2647954007439974, |
|
"grad_norm": 0.19519537687301636, |
|
"learning_rate": 3.679133525131156e-05, |
|
"loss": 0.0201, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.2681772066283394, |
|
"grad_norm": 0.1934451311826706, |
|
"learning_rate": 3.662210187849044e-05, |
|
"loss": 0.0191, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.2715590125126819, |
|
"grad_norm": 0.16278310120105743, |
|
"learning_rate": 3.645286850566932e-05, |
|
"loss": 0.0198, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.274940818397024, |
|
"grad_norm": 0.13240766525268555, |
|
"learning_rate": 3.6283635132848195e-05, |
|
"loss": 0.0185, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.2783226242813663, |
|
"grad_norm": 0.1842852085828781, |
|
"learning_rate": 3.6114401760027075e-05, |
|
"loss": 0.0192, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.2817044301657086, |
|
"grad_norm": 0.15183594822883606, |
|
"learning_rate": 3.5945168387205955e-05, |
|
"loss": 0.0208, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.2850862360500508, |
|
"grad_norm": 0.131156325340271, |
|
"learning_rate": 3.5775935014384835e-05, |
|
"loss": 0.0196, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.288468041934393, |
|
"grad_norm": 0.12825001776218414, |
|
"learning_rate": 3.5606701641563715e-05, |
|
"loss": 0.0192, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.2918498478187352, |
|
"grad_norm": 0.1289099007844925, |
|
"learning_rate": 3.5437468268742595e-05, |
|
"loss": 0.0193, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.2952316537030775, |
|
"grad_norm": 0.18116918206214905, |
|
"learning_rate": 3.5268234895921475e-05, |
|
"loss": 0.0181, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.2986134595874197, |
|
"grad_norm": 0.1824859082698822, |
|
"learning_rate": 3.5099001523100355e-05, |
|
"loss": 0.0189, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.301995265471762, |
|
"grad_norm": 0.18777935206890106, |
|
"learning_rate": 3.4929768150279235e-05, |
|
"loss": 0.0184, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.3053770713561041, |
|
"grad_norm": 0.11837860941886902, |
|
"learning_rate": 3.4760534777458115e-05, |
|
"loss": 0.0191, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.3087588772404464, |
|
"grad_norm": 0.13773775100708008, |
|
"learning_rate": 3.4591301404636995e-05, |
|
"loss": 0.0189, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.3121406831247886, |
|
"grad_norm": 0.148502916097641, |
|
"learning_rate": 3.4422068031815875e-05, |
|
"loss": 0.0184, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.3155224890091308, |
|
"grad_norm": 0.12410616874694824, |
|
"learning_rate": 3.4252834658994755e-05, |
|
"loss": 0.018, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.318904294893473, |
|
"grad_norm": 0.12317556142807007, |
|
"learning_rate": 3.4083601286173635e-05, |
|
"loss": 0.0177, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3222861007778153, |
|
"grad_norm": 0.133961483836174, |
|
"learning_rate": 3.3914367913352515e-05, |
|
"loss": 0.0184, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.3256679066621575, |
|
"grad_norm": 0.1317528784275055, |
|
"learning_rate": 3.3745134540531395e-05, |
|
"loss": 0.02, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.3290497125465, |
|
"grad_norm": 0.14328566193580627, |
|
"learning_rate": 3.3575901167710275e-05, |
|
"loss": 0.0185, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.332431518430842, |
|
"grad_norm": 0.1426452249288559, |
|
"learning_rate": 3.3406667794889155e-05, |
|
"loss": 0.0187, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.3358133243151844, |
|
"grad_norm": 0.13055744767189026, |
|
"learning_rate": 3.323743442206803e-05, |
|
"loss": 0.0178, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.3391951301995266, |
|
"grad_norm": 0.14115405082702637, |
|
"learning_rate": 3.306820104924691e-05, |
|
"loss": 0.0201, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.3425769360838689, |
|
"grad_norm": 0.11101550608873367, |
|
"learning_rate": 3.289896767642579e-05, |
|
"loss": 0.0177, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.345958741968211, |
|
"grad_norm": 0.10724030435085297, |
|
"learning_rate": 3.272973430360467e-05, |
|
"loss": 0.0197, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.3493405478525533, |
|
"grad_norm": 0.15012751519680023, |
|
"learning_rate": 3.256050093078355e-05, |
|
"loss": 0.0184, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.3527223537368955, |
|
"grad_norm": 0.16174957156181335, |
|
"learning_rate": 3.239126755796243e-05, |
|
"loss": 0.0187, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3561041596212378, |
|
"grad_norm": 0.12000412493944168, |
|
"learning_rate": 3.222203418514131e-05, |
|
"loss": 0.0176, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.35948596550558, |
|
"grad_norm": 0.14733338356018066, |
|
"learning_rate": 3.205280081232019e-05, |
|
"loss": 0.0177, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.3628677713899222, |
|
"grad_norm": 0.12138606607913971, |
|
"learning_rate": 3.188356743949907e-05, |
|
"loss": 0.0176, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.3662495772742644, |
|
"grad_norm": 0.1111988052725792, |
|
"learning_rate": 3.1714334066677954e-05, |
|
"loss": 0.0177, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.3696313831586067, |
|
"grad_norm": 0.22300873696804047, |
|
"learning_rate": 3.1545100693856834e-05, |
|
"loss": 0.0182, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.373013189042949, |
|
"grad_norm": 0.11778800189495087, |
|
"learning_rate": 3.1375867321035714e-05, |
|
"loss": 0.0176, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.3763949949272911, |
|
"grad_norm": 0.14025694131851196, |
|
"learning_rate": 3.1206633948214594e-05, |
|
"loss": 0.018, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.3797768008116333, |
|
"grad_norm": 0.22355903685092926, |
|
"learning_rate": 3.1037400575393474e-05, |
|
"loss": 0.02, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.3831586066959756, |
|
"grad_norm": 0.15728473663330078, |
|
"learning_rate": 3.0868167202572354e-05, |
|
"loss": 0.0175, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.386540412580318, |
|
"grad_norm": 0.13040538132190704, |
|
"learning_rate": 3.069893382975123e-05, |
|
"loss": 0.0177, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.38992221846466, |
|
"grad_norm": 0.13674882054328918, |
|
"learning_rate": 3.052970045693011e-05, |
|
"loss": 0.0189, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.3933040243490025, |
|
"grad_norm": 0.13062675297260284, |
|
"learning_rate": 3.036046708410899e-05, |
|
"loss": 0.0174, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.3966858302333445, |
|
"grad_norm": 0.07586494088172913, |
|
"learning_rate": 3.019123371128787e-05, |
|
"loss": 0.0174, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.400067636117687, |
|
"grad_norm": 0.12583595514297485, |
|
"learning_rate": 3.002200033846675e-05, |
|
"loss": 0.0174, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.4034494420020291, |
|
"grad_norm": 0.11460530757904053, |
|
"learning_rate": 2.9852766965645627e-05, |
|
"loss": 0.017, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.4068312478863714, |
|
"grad_norm": 0.14381511509418488, |
|
"learning_rate": 2.9683533592824507e-05, |
|
"loss": 0.0189, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.4102130537707136, |
|
"grad_norm": 0.12002012878656387, |
|
"learning_rate": 2.9514300220003387e-05, |
|
"loss": 0.0172, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.4135948596550558, |
|
"grad_norm": 0.1379200518131256, |
|
"learning_rate": 2.9345066847182267e-05, |
|
"loss": 0.018, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.416976665539398, |
|
"grad_norm": 0.098530113697052, |
|
"learning_rate": 2.9175833474361147e-05, |
|
"loss": 0.0172, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.4203584714237403, |
|
"grad_norm": 0.11576182395219803, |
|
"learning_rate": 2.9006600101540027e-05, |
|
"loss": 0.0187, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.4237402773080825, |
|
"grad_norm": 0.15769566595554352, |
|
"learning_rate": 2.8837366728718907e-05, |
|
"loss": 0.0179, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.4271220831924247, |
|
"grad_norm": 0.1257631480693817, |
|
"learning_rate": 2.8668133355897787e-05, |
|
"loss": 0.0174, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.430503889076767, |
|
"grad_norm": 0.09543973207473755, |
|
"learning_rate": 2.8498899983076667e-05, |
|
"loss": 0.0174, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.4338856949611092, |
|
"grad_norm": 0.12915478646755219, |
|
"learning_rate": 2.8329666610255544e-05, |
|
"loss": 0.0169, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.4372675008454514, |
|
"grad_norm": 0.14382801949977875, |
|
"learning_rate": 2.8160433237434424e-05, |
|
"loss": 0.0169, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4406493067297936, |
|
"grad_norm": 0.1201377660036087, |
|
"learning_rate": 2.7991199864613304e-05, |
|
"loss": 0.0167, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.444031112614136, |
|
"grad_norm": 0.11754725128412247, |
|
"learning_rate": 2.7821966491792184e-05, |
|
"loss": 0.0213, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.447412918498478, |
|
"grad_norm": 0.0879068523645401, |
|
"learning_rate": 2.7652733118971064e-05, |
|
"loss": 0.0175, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.4507947243828205, |
|
"grad_norm": 0.10662252455949783, |
|
"learning_rate": 2.7483499746149944e-05, |
|
"loss": 0.019, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.4541765302671625, |
|
"grad_norm": 0.09592264890670776, |
|
"learning_rate": 2.7314266373328824e-05, |
|
"loss": 0.0182, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.457558336151505, |
|
"grad_norm": 0.19000816345214844, |
|
"learning_rate": 2.7145033000507703e-05, |
|
"loss": 0.0181, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.4609401420358472, |
|
"grad_norm": 0.18730321526527405, |
|
"learning_rate": 2.6975799627686583e-05, |
|
"loss": 0.017, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.4643219479201894, |
|
"grad_norm": 0.11948239058256149, |
|
"learning_rate": 2.680656625486546e-05, |
|
"loss": 0.0163, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.4677037538045317, |
|
"grad_norm": 0.11915738880634308, |
|
"learning_rate": 2.663733288204434e-05, |
|
"loss": 0.0176, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.471085559688874, |
|
"grad_norm": 0.11469963937997818, |
|
"learning_rate": 2.646809950922322e-05, |
|
"loss": 0.0174, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.4744673655732161, |
|
"grad_norm": 0.13674333691596985, |
|
"learning_rate": 2.62988661364021e-05, |
|
"loss": 0.0169, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.4778491714575583, |
|
"grad_norm": 0.1072223111987114, |
|
"learning_rate": 2.612963276358098e-05, |
|
"loss": 0.017, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.4812309773419006, |
|
"grad_norm": 0.12718436121940613, |
|
"learning_rate": 2.596039939075986e-05, |
|
"loss": 0.0185, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.4846127832262428, |
|
"grad_norm": 0.09312810003757477, |
|
"learning_rate": 2.579116601793874e-05, |
|
"loss": 0.0167, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.487994589110585, |
|
"grad_norm": 0.1793229579925537, |
|
"learning_rate": 2.562193264511762e-05, |
|
"loss": 0.0177, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.4913763949949272, |
|
"grad_norm": 0.10192491114139557, |
|
"learning_rate": 2.54526992722965e-05, |
|
"loss": 0.0163, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.4947582008792695, |
|
"grad_norm": 0.10123202204704285, |
|
"learning_rate": 2.528346589947538e-05, |
|
"loss": 0.017, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.4981400067636117, |
|
"grad_norm": 0.13236495852470398, |
|
"learning_rate": 2.5114232526654256e-05, |
|
"loss": 0.0172, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.5015218126479541, |
|
"grad_norm": 0.11559689790010452, |
|
"learning_rate": 2.4944999153833136e-05, |
|
"loss": 0.018, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.5049036185322962, |
|
"grad_norm": 0.1355433166027069, |
|
"learning_rate": 2.4775765781012016e-05, |
|
"loss": 0.0177, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.5082854244166386, |
|
"grad_norm": 0.09492843598127365, |
|
"learning_rate": 2.4606532408190896e-05, |
|
"loss": 0.0166, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.5116672303009806, |
|
"grad_norm": 0.14215636253356934, |
|
"learning_rate": 2.4437299035369776e-05, |
|
"loss": 0.0169, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.515049036185323, |
|
"grad_norm": 0.10210006684064865, |
|
"learning_rate": 2.4268065662548656e-05, |
|
"loss": 0.0168, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.518430842069665, |
|
"grad_norm": 0.14845940470695496, |
|
"learning_rate": 2.4098832289727536e-05, |
|
"loss": 0.0179, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.5218126479540075, |
|
"grad_norm": 0.18286077678203583, |
|
"learning_rate": 2.3929598916906416e-05, |
|
"loss": 0.0172, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5251944538383497, |
|
"grad_norm": 0.11884652823209763, |
|
"learning_rate": 2.3760365544085296e-05, |
|
"loss": 0.0161, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.528576259722692, |
|
"grad_norm": 0.1169377937912941, |
|
"learning_rate": 2.3591132171264173e-05, |
|
"loss": 0.0168, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.5319580656070342, |
|
"grad_norm": 0.10672768950462341, |
|
"learning_rate": 2.3421898798443053e-05, |
|
"loss": 0.017, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.5353398714913764, |
|
"grad_norm": 0.06240490451455116, |
|
"learning_rate": 2.3252665425621933e-05, |
|
"loss": 0.0173, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.5387216773757186, |
|
"grad_norm": 0.12996572256088257, |
|
"learning_rate": 2.3083432052800813e-05, |
|
"loss": 0.0164, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.5421034832600609, |
|
"grad_norm": 0.10913591086864471, |
|
"learning_rate": 2.2914198679979693e-05, |
|
"loss": 0.0166, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.545485289144403, |
|
"grad_norm": 0.09768196195363998, |
|
"learning_rate": 2.2744965307158573e-05, |
|
"loss": 0.0173, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.5488670950287453, |
|
"grad_norm": 0.06998474895954132, |
|
"learning_rate": 2.2575731934337453e-05, |
|
"loss": 0.017, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.5522489009130875, |
|
"grad_norm": 0.07349732518196106, |
|
"learning_rate": 2.2406498561516333e-05, |
|
"loss": 0.0212, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.5556307067974298, |
|
"grad_norm": 0.13526155054569244, |
|
"learning_rate": 2.2237265188695212e-05, |
|
"loss": 0.016, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.5590125126817722, |
|
"grad_norm": 0.12724880874156952, |
|
"learning_rate": 2.206803181587409e-05, |
|
"loss": 0.0171, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.5623943185661142, |
|
"grad_norm": 0.08398682624101639, |
|
"learning_rate": 2.189879844305297e-05, |
|
"loss": 0.0168, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.5657761244504567, |
|
"grad_norm": 0.0917474552989006, |
|
"learning_rate": 2.172956507023185e-05, |
|
"loss": 0.0179, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.5691579303347987, |
|
"grad_norm": 0.12279586493968964, |
|
"learning_rate": 2.156033169741073e-05, |
|
"loss": 0.0166, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.5725397362191411, |
|
"grad_norm": 0.14498373866081238, |
|
"learning_rate": 2.139109832458961e-05, |
|
"loss": 0.0174, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.5759215421034831, |
|
"grad_norm": 0.1070031225681305, |
|
"learning_rate": 2.122186495176849e-05, |
|
"loss": 0.0164, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.5793033479878256, |
|
"grad_norm": 0.0644792914390564, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.0164, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.5826851538721676, |
|
"grad_norm": 0.13356371223926544, |
|
"learning_rate": 2.088339820612625e-05, |
|
"loss": 0.0171, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.58606695975651, |
|
"grad_norm": 0.061863575130701065, |
|
"learning_rate": 2.071416483330513e-05, |
|
"loss": 0.0158, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.5894487656408522, |
|
"grad_norm": 0.15974536538124084, |
|
"learning_rate": 2.0544931460484005e-05, |
|
"loss": 0.0168, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.5928305715251945, |
|
"grad_norm": 0.10037930309772491, |
|
"learning_rate": 2.037569808766289e-05, |
|
"loss": 0.016, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.5962123774095367, |
|
"grad_norm": 0.09223894774913788, |
|
"learning_rate": 2.020646471484177e-05, |
|
"loss": 0.0159, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.599594183293879, |
|
"grad_norm": 0.08334498107433319, |
|
"learning_rate": 2.003723134202065e-05, |
|
"loss": 0.0169, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.6029759891782212, |
|
"grad_norm": 0.127294659614563, |
|
"learning_rate": 1.986799796919953e-05, |
|
"loss": 0.0168, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.6063577950625634, |
|
"grad_norm": 0.0788227841258049, |
|
"learning_rate": 1.969876459637841e-05, |
|
"loss": 0.0159, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.6097396009469056, |
|
"grad_norm": 0.11868683248758316, |
|
"learning_rate": 1.952953122355729e-05, |
|
"loss": 0.0156, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.6131214068312478, |
|
"grad_norm": 0.09021388739347458, |
|
"learning_rate": 1.936029785073617e-05, |
|
"loss": 0.0169, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.6165032127155903, |
|
"grad_norm": 0.10817821323871613, |
|
"learning_rate": 1.9191064477915045e-05, |
|
"loss": 0.0168, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.6198850185999323, |
|
"grad_norm": 0.09980209171772003, |
|
"learning_rate": 1.9021831105093925e-05, |
|
"loss": 0.0163, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.6232668244842747, |
|
"grad_norm": 0.09977928549051285, |
|
"learning_rate": 1.8852597732272805e-05, |
|
"loss": 0.0165, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.6266486303686167, |
|
"grad_norm": 0.07304428517818451, |
|
"learning_rate": 1.8683364359451685e-05, |
|
"loss": 0.0159, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.6300304362529592, |
|
"grad_norm": 0.10551423579454422, |
|
"learning_rate": 1.8514130986630565e-05, |
|
"loss": 0.0167, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.6334122421373012, |
|
"grad_norm": 0.06610710173845291, |
|
"learning_rate": 1.8344897613809445e-05, |
|
"loss": 0.0164, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.6367940480216436, |
|
"grad_norm": 0.19381451606750488, |
|
"learning_rate": 1.8175664240988325e-05, |
|
"loss": 0.0166, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.6401758539059856, |
|
"grad_norm": 0.09147250652313232, |
|
"learning_rate": 1.8006430868167205e-05, |
|
"loss": 0.0163, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.643557659790328, |
|
"grad_norm": 0.1002357229590416, |
|
"learning_rate": 1.7837197495346085e-05, |
|
"loss": 0.0164, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.6469394656746703, |
|
"grad_norm": 0.08374287933111191, |
|
"learning_rate": 1.766796412252496e-05, |
|
"loss": 0.0168, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.6503212715590125, |
|
"grad_norm": 0.11019756644964218, |
|
"learning_rate": 1.749873074970384e-05, |
|
"loss": 0.0175, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.6537030774433548, |
|
"grad_norm": 0.06535135954618454, |
|
"learning_rate": 1.732949737688272e-05, |
|
"loss": 0.0157, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.657084883327697, |
|
"grad_norm": 0.0966026782989502, |
|
"learning_rate": 1.71602640040616e-05, |
|
"loss": 0.0159, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.6604666892120392, |
|
"grad_norm": 0.07409293204545975, |
|
"learning_rate": 1.699103063124048e-05, |
|
"loss": 0.0168, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.6638484950963814, |
|
"grad_norm": 0.13448843359947205, |
|
"learning_rate": 1.682179725841936e-05, |
|
"loss": 0.017, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.6672303009807237, |
|
"grad_norm": 0.11097908765077591, |
|
"learning_rate": 1.665256388559824e-05, |
|
"loss": 0.0164, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.670612106865066, |
|
"grad_norm": 0.08099315315485, |
|
"learning_rate": 1.648333051277712e-05, |
|
"loss": 0.0169, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.6739939127494083, |
|
"grad_norm": 0.10239287465810776, |
|
"learning_rate": 1.6314097139956e-05, |
|
"loss": 0.0168, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.6773757186337503, |
|
"grad_norm": 0.07663831859827042, |
|
"learning_rate": 1.614486376713488e-05, |
|
"loss": 0.0152, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.6807575245180928, |
|
"grad_norm": 0.07033158838748932, |
|
"learning_rate": 1.5975630394313758e-05, |
|
"loss": 0.0162, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.6841393304024348, |
|
"grad_norm": 0.10326120257377625, |
|
"learning_rate": 1.5806397021492638e-05, |
|
"loss": 0.0157, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.6875211362867772, |
|
"grad_norm": 0.12648841738700867, |
|
"learning_rate": 1.5637163648671518e-05, |
|
"loss": 0.0165, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.6909029421711192, |
|
"grad_norm": 0.07291960716247559, |
|
"learning_rate": 1.5467930275850398e-05, |
|
"loss": 0.0163, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6942847480554617, |
|
"grad_norm": 0.062065865844488144, |
|
"learning_rate": 1.5298696903029278e-05, |
|
"loss": 0.0156, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.6976665539398037, |
|
"grad_norm": 0.08348127454519272, |
|
"learning_rate": 1.5129463530208158e-05, |
|
"loss": 0.0161, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.7010483598241462, |
|
"grad_norm": 0.05632692202925682, |
|
"learning_rate": 1.4960230157387036e-05, |
|
"loss": 0.0162, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.7044301657084884, |
|
"grad_norm": 0.08220499753952026, |
|
"learning_rate": 1.4790996784565916e-05, |
|
"loss": 0.0156, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.7078119715928306, |
|
"grad_norm": 0.09087113291025162, |
|
"learning_rate": 1.4621763411744796e-05, |
|
"loss": 0.0163, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.7111937774771728, |
|
"grad_norm": 0.07073336839675903, |
|
"learning_rate": 1.4452530038923676e-05, |
|
"loss": 0.0157, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.714575583361515, |
|
"grad_norm": 0.0676584392786026, |
|
"learning_rate": 1.4283296666102556e-05, |
|
"loss": 0.0156, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.7179573892458573, |
|
"grad_norm": 0.08832427114248276, |
|
"learning_rate": 1.4114063293281434e-05, |
|
"loss": 0.016, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.7213391951301995, |
|
"grad_norm": 0.14378510415554047, |
|
"learning_rate": 1.3944829920460314e-05, |
|
"loss": 0.0193, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.7247210010145417, |
|
"grad_norm": 0.0655590146780014, |
|
"learning_rate": 1.3775596547639194e-05, |
|
"loss": 0.0158, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.728102806898884, |
|
"grad_norm": 0.0975392535328865, |
|
"learning_rate": 1.3606363174818074e-05, |
|
"loss": 0.0163, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.7314846127832264, |
|
"grad_norm": 0.060749635100364685, |
|
"learning_rate": 1.3437129801996954e-05, |
|
"loss": 0.0154, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.7348664186675684, |
|
"grad_norm": 0.07566985487937927, |
|
"learning_rate": 1.3267896429175832e-05, |
|
"loss": 0.0165, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.7382482245519109, |
|
"grad_norm": 0.09878894686698914, |
|
"learning_rate": 1.3098663056354712e-05, |
|
"loss": 0.0159, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.7416300304362529, |
|
"grad_norm": 0.07339813560247421, |
|
"learning_rate": 1.2929429683533592e-05, |
|
"loss": 0.0166, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.7450118363205953, |
|
"grad_norm": 0.11021166294813156, |
|
"learning_rate": 1.2760196310712474e-05, |
|
"loss": 0.0153, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.7483936422049373, |
|
"grad_norm": 0.15555544197559357, |
|
"learning_rate": 1.2590962937891354e-05, |
|
"loss": 0.0156, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.7517754480892798, |
|
"grad_norm": 0.07689628005027771, |
|
"learning_rate": 1.2421729565070232e-05, |
|
"loss": 0.0156, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.7551572539736218, |
|
"grad_norm": 0.06506321579217911, |
|
"learning_rate": 1.2252496192249112e-05, |
|
"loss": 0.0151, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.7585390598579642, |
|
"grad_norm": 0.07474467903375626, |
|
"learning_rate": 1.2083262819427992e-05, |
|
"loss": 0.0152, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.7619208657423064, |
|
"grad_norm": 0.0740489810705185, |
|
"learning_rate": 1.191402944660687e-05, |
|
"loss": 0.0156, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.7653026716266487, |
|
"grad_norm": 0.05762564763426781, |
|
"learning_rate": 1.174479607378575e-05, |
|
"loss": 0.0148, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.768684477510991, |
|
"grad_norm": 0.06422838568687439, |
|
"learning_rate": 1.157556270096463e-05, |
|
"loss": 0.016, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.7720662833953331, |
|
"grad_norm": 0.06867840886116028, |
|
"learning_rate": 1.140632932814351e-05, |
|
"loss": 0.0148, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.7754480892796753, |
|
"grad_norm": 0.09431049972772598, |
|
"learning_rate": 1.123709595532239e-05, |
|
"loss": 0.0162, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.7788298951640176, |
|
"grad_norm": 0.13230328261852264, |
|
"learning_rate": 1.1067862582501269e-05, |
|
"loss": 0.016, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.7822117010483598, |
|
"grad_norm": 0.08878026902675629, |
|
"learning_rate": 1.089862920968015e-05, |
|
"loss": 0.0156, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.785593506932702, |
|
"grad_norm": 0.11089925467967987, |
|
"learning_rate": 1.072939583685903e-05, |
|
"loss": 0.0157, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.7889753128170442, |
|
"grad_norm": 0.08735585957765579, |
|
"learning_rate": 1.056016246403791e-05, |
|
"loss": 0.0161, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.7923571187013865, |
|
"grad_norm": 0.08069667965173721, |
|
"learning_rate": 1.0390929091216788e-05, |
|
"loss": 0.0154, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.795738924585729, |
|
"grad_norm": 0.09315579384565353, |
|
"learning_rate": 1.0221695718395668e-05, |
|
"loss": 0.0152, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.799120730470071, |
|
"grad_norm": 0.10615842044353485, |
|
"learning_rate": 1.0052462345574548e-05, |
|
"loss": 0.0158, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.8025025363544134, |
|
"grad_norm": 0.08738986402750015, |
|
"learning_rate": 9.883228972753428e-06, |
|
"loss": 0.0159, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.8058843422387554, |
|
"grad_norm": 0.08308973908424377, |
|
"learning_rate": 9.713995599932307e-06, |
|
"loss": 0.0154, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.8092661481230978, |
|
"grad_norm": 0.056414760649204254, |
|
"learning_rate": 9.544762227111187e-06, |
|
"loss": 0.0159, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.8126479540074398, |
|
"grad_norm": 0.11174483597278595, |
|
"learning_rate": 9.375528854290067e-06, |
|
"loss": 0.0157, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.8160297598917823, |
|
"grad_norm": 0.12031450867652893, |
|
"learning_rate": 9.206295481468947e-06, |
|
"loss": 0.016, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.8194115657761243, |
|
"grad_norm": 0.07165198028087616, |
|
"learning_rate": 9.037062108647826e-06, |
|
"loss": 0.0154, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.8227933716604667, |
|
"grad_norm": 0.09684468805789948, |
|
"learning_rate": 8.867828735826705e-06, |
|
"loss": 0.0161, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.826175177544809, |
|
"grad_norm": 0.06861387938261032, |
|
"learning_rate": 8.698595363005585e-06, |
|
"loss": 0.0157, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.8295569834291512, |
|
"grad_norm": 0.06754298508167267, |
|
"learning_rate": 8.529361990184465e-06, |
|
"loss": 0.0156, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.8329387893134934, |
|
"grad_norm": 0.07989432662725449, |
|
"learning_rate": 8.360128617363345e-06, |
|
"loss": 0.0166, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.8363205951978356, |
|
"grad_norm": 0.07799744606018066, |
|
"learning_rate": 8.190895244542223e-06, |
|
"loss": 0.0153, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.8397024010821779, |
|
"grad_norm": 0.07402709126472473, |
|
"learning_rate": 8.021661871721103e-06, |
|
"loss": 0.0158, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.84308420696652, |
|
"grad_norm": 0.05629491060972214, |
|
"learning_rate": 7.852428498899983e-06, |
|
"loss": 0.0158, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.8464660128508623, |
|
"grad_norm": 0.05271398648619652, |
|
"learning_rate": 7.683195126078863e-06, |
|
"loss": 0.0161, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.8498478187352045, |
|
"grad_norm": 0.06697215884923935, |
|
"learning_rate": 7.513961753257742e-06, |
|
"loss": 0.0151, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.853229624619547, |
|
"grad_norm": 0.06437050551176071, |
|
"learning_rate": 7.344728380436622e-06, |
|
"loss": 0.0162, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.856611430503889, |
|
"grad_norm": 0.0728522315621376, |
|
"learning_rate": 7.175495007615503e-06, |
|
"loss": 0.0157, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.8599932363882314, |
|
"grad_norm": 0.08476530015468597, |
|
"learning_rate": 7.006261634794383e-06, |
|
"loss": 0.0153, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8633750422725734, |
|
"grad_norm": 0.06566642969846725, |
|
"learning_rate": 6.837028261973262e-06, |
|
"loss": 0.0148, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.866756848156916, |
|
"grad_norm": 0.09338746964931488, |
|
"learning_rate": 6.667794889152142e-06, |
|
"loss": 0.0157, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.870138654041258, |
|
"grad_norm": 0.05955313891172409, |
|
"learning_rate": 6.498561516331021e-06, |
|
"loss": 0.0157, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.8735204599256003, |
|
"grad_norm": 0.11766281723976135, |
|
"learning_rate": 6.329328143509901e-06, |
|
"loss": 0.0157, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.8769022658099423, |
|
"grad_norm": 0.06120260804891586, |
|
"learning_rate": 6.16009477068878e-06, |
|
"loss": 0.0154, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.8802840716942848, |
|
"grad_norm": 0.09879166632890701, |
|
"learning_rate": 5.99086139786766e-06, |
|
"loss": 0.015, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.883665877578627, |
|
"grad_norm": 0.0957876592874527, |
|
"learning_rate": 5.82162802504654e-06, |
|
"loss": 0.0161, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.8870476834629692, |
|
"grad_norm": 0.07821828126907349, |
|
"learning_rate": 5.652394652225419e-06, |
|
"loss": 0.0162, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.8904294893473115, |
|
"grad_norm": 0.06772307306528091, |
|
"learning_rate": 5.483161279404299e-06, |
|
"loss": 0.0153, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.8938112952316537, |
|
"grad_norm": 0.09948345273733139, |
|
"learning_rate": 5.313927906583178e-06, |
|
"loss": 0.0156, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.897193101115996, |
|
"grad_norm": 0.07948364317417145, |
|
"learning_rate": 5.144694533762058e-06, |
|
"loss": 0.017, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.9005749070003382, |
|
"grad_norm": 0.10279282182455063, |
|
"learning_rate": 4.975461160940937e-06, |
|
"loss": 0.0151, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.9039567128846804, |
|
"grad_norm": 0.08826450258493423, |
|
"learning_rate": 4.806227788119817e-06, |
|
"loss": 0.0154, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.9073385187690226, |
|
"grad_norm": 0.09827665984630585, |
|
"learning_rate": 4.636994415298697e-06, |
|
"loss": 0.0156, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.910720324653365, |
|
"grad_norm": 0.06779050081968307, |
|
"learning_rate": 4.467761042477576e-06, |
|
"loss": 0.0154, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.914102130537707, |
|
"grad_norm": 0.05314110219478607, |
|
"learning_rate": 4.298527669656456e-06, |
|
"loss": 0.0156, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.9174839364220495, |
|
"grad_norm": 0.0623813197016716, |
|
"learning_rate": 4.129294296835336e-06, |
|
"loss": 0.0163, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.9208657423063915, |
|
"grad_norm": 0.06000444293022156, |
|
"learning_rate": 3.960060924014216e-06, |
|
"loss": 0.0152, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.924247548190734, |
|
"grad_norm": 0.059466857463121414, |
|
"learning_rate": 3.7908275511930954e-06, |
|
"loss": 0.0152, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.927629354075076, |
|
"grad_norm": 0.04620284587144852, |
|
"learning_rate": 3.6215941783719754e-06, |
|
"loss": 0.0151, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.9310111599594184, |
|
"grad_norm": 0.129261776804924, |
|
"learning_rate": 3.452360805550855e-06, |
|
"loss": 0.0152, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.9343929658437604, |
|
"grad_norm": 0.07549330592155457, |
|
"learning_rate": 3.2831274327297345e-06, |
|
"loss": 0.0157, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.9377747717281029, |
|
"grad_norm": 0.0915280282497406, |
|
"learning_rate": 3.113894059908614e-06, |
|
"loss": 0.0148, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.941156577612445, |
|
"grad_norm": 0.09341620653867722, |
|
"learning_rate": 2.9446606870874936e-06, |
|
"loss": 0.0151, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.9445383834967873, |
|
"grad_norm": 0.07050041854381561, |
|
"learning_rate": 2.7754273142663736e-06, |
|
"loss": 0.0153, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.9479201893811295, |
|
"grad_norm": 0.05427863821387291, |
|
"learning_rate": 2.606193941445253e-06, |
|
"loss": 0.0149, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.9513019952654718, |
|
"grad_norm": 0.08598488569259644, |
|
"learning_rate": 2.4369605686241327e-06, |
|
"loss": 0.0149, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.954683801149814, |
|
"grad_norm": 0.08857986330986023, |
|
"learning_rate": 2.267727195803012e-06, |
|
"loss": 0.0147, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.9580656070341562, |
|
"grad_norm": 0.09314049780368805, |
|
"learning_rate": 2.098493822981892e-06, |
|
"loss": 0.0158, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.9614474129184984, |
|
"grad_norm": 0.057003553956747055, |
|
"learning_rate": 1.929260450160772e-06, |
|
"loss": 0.0155, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.9648292188028407, |
|
"grad_norm": 0.0754491537809372, |
|
"learning_rate": 1.7600270773396515e-06, |
|
"loss": 0.0152, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.9682110246871831, |
|
"grad_norm": 0.07122204452753067, |
|
"learning_rate": 1.5907937045185312e-06, |
|
"loss": 0.0152, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.9715928305715251, |
|
"grad_norm": 0.06150359660387039, |
|
"learning_rate": 1.4215603316974108e-06, |
|
"loss": 0.0144, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.9749746364558676, |
|
"grad_norm": 0.04940520599484444, |
|
"learning_rate": 1.2523269588762905e-06, |
|
"loss": 0.0149, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.9783564423402096, |
|
"grad_norm": 0.06114586442708969, |
|
"learning_rate": 1.08309358605517e-06, |
|
"loss": 0.0157, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.981738248224552, |
|
"grad_norm": 0.06917215138673782, |
|
"learning_rate": 9.138602132340497e-07, |
|
"loss": 0.0155, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.985120054108894, |
|
"grad_norm": 0.04913664981722832, |
|
"learning_rate": 7.446268404129294e-07, |
|
"loss": 0.0155, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.9885018599932365, |
|
"grad_norm": 0.07977385073900223, |
|
"learning_rate": 5.753934675918092e-07, |
|
"loss": 0.0144, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.9918836658775785, |
|
"grad_norm": 0.07336314022541046, |
|
"learning_rate": 4.061600947706888e-07, |
|
"loss": 0.015, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.995265471761921, |
|
"grad_norm": 0.05290009453892708, |
|
"learning_rate": 2.3692672194956846e-07, |
|
"loss": 0.0154, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.9986472776462632, |
|
"grad_norm": 0.06475949287414551, |
|
"learning_rate": 6.769334912844813e-08, |
|
"loss": 0.0151, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.016348782926797867, |
|
"eval_runtime": 342.3753, |
|
"eval_samples_per_second": 11.636, |
|
"eval_steps_per_second": 0.365, |
|
"step": 5914 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5914, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 8.364323669652013e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|