|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.571428571428573, |
|
"eval_steps": 500, |
|
"global_step": 7000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.530505657196045, |
|
"learning_rate": 0.0002, |
|
"loss": 2.7431, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.857336163520813, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8606, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.33056318759918213, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6893, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.2524290680885315, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6122, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.2743304669857025, |
|
"learning_rate": 0.0002, |
|
"loss": 1.586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.23376622796058655, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5535, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.25336822867393494, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5735, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.2602108418941498, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5429, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.26768583059310913, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5375, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.25630825757980347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.526, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.25623977184295654, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5298, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.25586453080177307, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5183, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.23581761121749878, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5248, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.23642699420452118, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5168, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.24895624816417694, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.2535015940666199, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5084, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.23402313888072968, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5068, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.2852034568786621, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5113, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.2674921154975891, |
|
"learning_rate": 0.0002, |
|
"loss": 1.498, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.23999463021755219, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4894, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.23633356392383575, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4992, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.24229061603546143, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4827, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.2810875177383423, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4954, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.2547338902950287, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4881, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.2462625801563263, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4704, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.2700079679489136, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4527, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.26295939087867737, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4482, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.26326537132263184, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4453, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.28642284870147705, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4439, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.26639145612716675, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4398, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.2638935446739197, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4545, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.2797674238681793, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4487, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.2942632734775543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4441, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.2636421024799347, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4401, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.2771311402320862, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4477, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.26533156633377075, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4404, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.27516719698905945, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4409, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.2677710950374603, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4414, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.30368202924728394, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4334, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.2915685176849365, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4434, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.2796754539012909, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4368, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.2571493983268738, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4388, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.2667161226272583, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4286, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.26809781789779663, |
|
"learning_rate": 0.0002, |
|
"loss": 1.435, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 0.27343207597732544, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4568, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.30309197306632996, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4386, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.2669704556465149, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4459, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.2722471058368683, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4372, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.2800472676753998, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4334, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.2805831730365753, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3786, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.2959790527820587, |
|
"learning_rate": 0.0002, |
|
"loss": 1.387, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.2905080318450928, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3776, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.29701876640319824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3894, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.28363561630249023, |
|
"learning_rate": 0.0002, |
|
"loss": 1.388, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.30612465739250183, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3889, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.3003120422363281, |
|
"learning_rate": 0.0002, |
|
"loss": 1.383, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 0.3208465874195099, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3983, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.29617467522621155, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3922, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.3164435029029846, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3845, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.30847829580307007, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3897, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 0.29763272404670715, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4008, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.29194703698158264, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3868, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 0.300700843334198, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3937, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 0.307488352060318, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3837, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.29698067903518677, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3958, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 0.3050895035266876, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3819, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.3051309585571289, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3798, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 0.31160038709640503, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3905, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.28441959619522095, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3931, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 0.30637431144714355, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3834, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.3127947449684143, |
|
"learning_rate": 0.0002, |
|
"loss": 1.39, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 0.29069823026657104, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3918, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.2803474962711334, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3854, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.30701911449432373, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3639, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.32369452714920044, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3234, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.32104864716529846, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3304, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.34017834067344666, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3288, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 0.3346630036830902, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3336, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 0.32203203439712524, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3414, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 0.32335567474365234, |
|
"learning_rate": 0.0002, |
|
"loss": 1.337, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"grad_norm": 0.3451857566833496, |
|
"learning_rate": 0.0002, |
|
"loss": 1.341, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.3403126895427704, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3458, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 0.33177173137664795, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3205, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 0.3264249861240387, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3406, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.33892783522605896, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3425, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 0.33876103162765503, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3456, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.3388749957084656, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3377, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"grad_norm": 0.33248665928840637, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3423, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.32318586111068726, |
|
"learning_rate": 0.0002, |
|
"loss": 1.342, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"grad_norm": 0.3268294334411621, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3366, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.3288852870464325, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3469, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.3296649158000946, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3364, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.34164395928382874, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3491, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.32904016971588135, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3442, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"grad_norm": 0.3377957344055176, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3502, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.3297536075115204, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3442, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 0.34833455085754395, |
|
"learning_rate": 0.0002, |
|
"loss": 1.339, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.32754483819007874, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3565, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.3560408651828766, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2724, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.3614589273929596, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2782, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 0.34403491020202637, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2794, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 0.35711953043937683, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2898, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.3667463958263397, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2783, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.35901930928230286, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2905, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.3650436997413635, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2947, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 0.36757028102874756, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2811, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 0.3632780611515045, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2944, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"grad_norm": 0.3927798271179199, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2957, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.366366982460022, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3086, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 0.37549176812171936, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2879, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"grad_norm": 0.3678297698497772, |
|
"learning_rate": 0.0002, |
|
"loss": 1.289, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.3672493100166321, |
|
"learning_rate": 0.0002, |
|
"loss": 1.29, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 0.3762926161289215, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3001, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.36861684918403625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2947, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 0.3841796815395355, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2968, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 0.37184152007102966, |
|
"learning_rate": 0.0002, |
|
"loss": 1.297, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"grad_norm": 0.3747577369213104, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2922, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"grad_norm": 0.36549532413482666, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2963, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.3813631534576416, |
|
"learning_rate": 0.0002, |
|
"loss": 1.313, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.3664815127849579, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2999, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"grad_norm": 0.38972264528274536, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3066, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"grad_norm": 0.3710123300552368, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3133, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.3784914016723633, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2625, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.416645348072052, |
|
"learning_rate": 0.0002, |
|
"loss": 1.236, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.3829210102558136, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2314, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"grad_norm": 0.38960739970207214, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2239, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.3890588581562042, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2399, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"grad_norm": 0.4003150165081024, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2365, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"grad_norm": 0.4123793840408325, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2429, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"grad_norm": 0.43428850173950195, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2407, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 0.4154336154460907, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2417, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.40663963556289673, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2517, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"grad_norm": 0.40631529688835144, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2617, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"grad_norm": 0.4252465069293976, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2564, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"grad_norm": 0.4079826772212982, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2554, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"grad_norm": 0.41097384691238403, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2503, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"grad_norm": 0.42485907673835754, |
|
"learning_rate": 0.0002, |
|
"loss": 1.249, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"grad_norm": 0.4294757544994354, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2423, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"grad_norm": 0.4221188426017761, |
|
"learning_rate": 0.0002, |
|
"loss": 1.25, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"grad_norm": 0.3959333002567291, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2428, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 0.3953990340232849, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2706, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 0.3927258849143982, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2535, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.4078606069087982, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2764, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 0.3999166786670685, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2641, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 0.39490771293640137, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 0.4049830138683319, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2672, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.40505218505859375, |
|
"learning_rate": 0.0002, |
|
"loss": 1.276, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.45407840609550476, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1899, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.43418335914611816, |
|
"learning_rate": 0.0002, |
|
"loss": 1.185, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.44777223467826843, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1904, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"grad_norm": 0.45681995153427124, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1749, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 0.4477551281452179, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1987, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 0.4677373766899109, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2036, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"grad_norm": 0.4448174238204956, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1921, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"grad_norm": 0.47542881965637207, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2062, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"grad_norm": 0.4679628312587738, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1987, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"grad_norm": 0.45168885588645935, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2101, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 0.4579172730445862, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2143, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"grad_norm": 0.44909214973449707, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2047, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"grad_norm": 0.4445750415325165, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2098, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"grad_norm": 0.455679327249527, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2149, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"grad_norm": 0.44062715768814087, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2209, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"grad_norm": 0.4541163444519043, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2164, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"grad_norm": 0.43290677666664124, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2092, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"grad_norm": 0.44760000705718994, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2211, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"grad_norm": 0.4385235011577606, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2205, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"grad_norm": 0.44804102182388306, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2244, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"grad_norm": 0.4405447244644165, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2388, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 0.4315509796142578, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2322, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 0.44136691093444824, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2252, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"grad_norm": 0.44583484530448914, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2238, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.471617728471756, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1937, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.477583110332489, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1456, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.4998668432235718, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1457, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"grad_norm": 0.4928749203681946, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1546, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"grad_norm": 0.49452856183052063, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1453, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 0.5097588896751404, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1501, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 0.4926673471927643, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1606, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.5037740468978882, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1675, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"grad_norm": 0.4997263550758362, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1653, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"grad_norm": 0.4845424294471741, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1544, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 0.49940812587738037, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1812, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"grad_norm": 0.4817129373550415, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1714, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"grad_norm": 0.4989680051803589, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1741, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"grad_norm": 0.5034418106079102, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1808, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 0.5104469060897827, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1773, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"grad_norm": 0.4944339394569397, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1758, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 0.4833206236362457, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1786, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"grad_norm": 0.48878568410873413, |
|
"learning_rate": 0.0002, |
|
"loss": 1.176, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 0.4889214038848877, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1796, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 0.49351948499679565, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1847, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.4790671169757843, |
|
"learning_rate": 0.0002, |
|
"loss": 1.184, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"grad_norm": 0.49084314703941345, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1905, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 0.49543461203575134, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1998, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"grad_norm": 0.4767250716686249, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1921, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.4650532901287079, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1992, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.5073546767234802, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1067, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.5141373872756958, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1067, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 0.5466707348823547, |
|
"learning_rate": 0.0002, |
|
"loss": 1.106, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.5313953161239624, |
|
"learning_rate": 0.0002, |
|
"loss": 1.118, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"grad_norm": 0.5327037572860718, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1202, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 0.5393937230110168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1195, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"grad_norm": 0.5502837896347046, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1163, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 0.5364841818809509, |
|
"learning_rate": 0.0002, |
|
"loss": 1.131, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"grad_norm": 0.5162956118583679, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1349, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"grad_norm": 0.5319573283195496, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1331, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"grad_norm": 0.5466243028640747, |
|
"learning_rate": 0.0002, |
|
"loss": 1.138, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.5623811483383179, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1358, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"grad_norm": 0.5489851832389832, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1368, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"grad_norm": 0.5441839694976807, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1433, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"grad_norm": 0.5348924398422241, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1457, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"grad_norm": 0.5622116327285767, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1498, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"grad_norm": 0.530335545539856, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1316, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 0.529975175857544, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1485, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.5368651151657104, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1551, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"grad_norm": 0.534313976764679, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1513, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.5263510346412659, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1538, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"grad_norm": 0.5384341478347778, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1493, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 0.5207712054252625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1633, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"grad_norm": 0.5430459976196289, |
|
"learning_rate": 0.0002, |
|
"loss": 1.159, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.5784776210784912, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1247, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.544442892074585, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0716, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.559004545211792, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0747, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"grad_norm": 0.5693913698196411, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0856, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"grad_norm": 0.5680646896362305, |
|
"learning_rate": 0.0002, |
|
"loss": 1.071, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 0.5713136196136475, |
|
"learning_rate": 0.0002, |
|
"loss": 1.077, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"grad_norm": 0.5708666443824768, |
|
"learning_rate": 0.0002, |
|
"loss": 1.09, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"grad_norm": 0.5802373290061951, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0949, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"grad_norm": 0.5722843408584595, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0973, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"grad_norm": 0.5759919881820679, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0966, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"grad_norm": 0.609473466873169, |
|
"learning_rate": 0.0002, |
|
"loss": 1.107, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 0.5732268691062927, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1016, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"grad_norm": 0.5877196192741394, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0994, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"grad_norm": 0.5665601491928101, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1022, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 0.5648427605628967, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1135, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"grad_norm": 0.5821457505226135, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1129, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"grad_norm": 0.5789903402328491, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1108, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"grad_norm": 0.5680549144744873, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1149, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"grad_norm": 0.5805366039276123, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1176, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"grad_norm": 0.575652003288269, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1175, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.5799399614334106, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1145, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 0.5476598143577576, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1241, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"grad_norm": 0.5581275224685669, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1253, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"grad_norm": 0.5669559836387634, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1261, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.5465613603591919, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1384, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"grad_norm": 0.6073935031890869, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0359, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"grad_norm": 0.6463605165481567, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0479, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"grad_norm": 0.6026530861854553, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0404, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"grad_norm": 0.6082301139831543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0428, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"grad_norm": 0.612219512462616, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0477, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"grad_norm": 0.6004548668861389, |
|
"learning_rate": 0.0002, |
|
"loss": 1.052, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"grad_norm": 0.6319270133972168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0643, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"grad_norm": 0.644594132900238, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0536, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"grad_norm": 0.6086618304252625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0633, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"grad_norm": 0.6085098385810852, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0688, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"grad_norm": 0.6334928870201111, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0797, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"grad_norm": 0.5865157246589661, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0689, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"grad_norm": 0.6139411926269531, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0754, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"grad_norm": 0.6306671500205994, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0831, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"grad_norm": 0.5879333019256592, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0838, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"grad_norm": 0.6128151416778564, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0862, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"grad_norm": 0.6163910031318665, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0784, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"grad_norm": 0.6311454772949219, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0858, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"grad_norm": 0.5947273373603821, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0885, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"grad_norm": 0.622706949710846, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0953, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"grad_norm": 0.606569230556488, |
|
"learning_rate": 0.0002, |
|
"loss": 1.092, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"grad_norm": 0.6014845371246338, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0998, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"grad_norm": 0.6037503480911255, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0997, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"grad_norm": 0.5877541899681091, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1033, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"grad_norm": 0.6770003437995911, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0473, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"grad_norm": 0.6640401482582092, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0122, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"grad_norm": 0.6727384924888611, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0112, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"grad_norm": 0.6241724491119385, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0163, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"grad_norm": 0.6085687875747681, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0273, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"grad_norm": 0.6746591329574585, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0187, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"grad_norm": 0.6451332569122314, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0229, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"grad_norm": 0.6338198781013489, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0256, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"grad_norm": 0.6467145085334778, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0301, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"grad_norm": 0.6614335179328918, |
|
"learning_rate": 0.0002, |
|
"loss": 1.044, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"grad_norm": 0.6373239755630493, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0453, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"grad_norm": 0.6863211393356323, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0461, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"grad_norm": 0.608102023601532, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0356, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"grad_norm": 0.6649318933486938, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0521, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"grad_norm": 0.6344706416130066, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0556, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"grad_norm": 0.6278392672538757, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0476, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"grad_norm": 0.6885977983474731, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0565, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"grad_norm": 0.6405135989189148, |
|
"learning_rate": 0.0002, |
|
"loss": 1.064, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"grad_norm": 0.639030396938324, |
|
"learning_rate": 0.0002, |
|
"loss": 1.069, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"grad_norm": 0.6438204050064087, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0726, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"grad_norm": 0.654549777507782, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0697, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"grad_norm": 0.626146674156189, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0682, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"grad_norm": 0.6238541007041931, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0669, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"grad_norm": 0.6308706402778625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.07, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.6104647517204285, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0726, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"grad_norm": 0.6474860906600952, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9839, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"grad_norm": 0.647906482219696, |
|
"learning_rate": 0.0002, |
|
"loss": 0.982, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"grad_norm": 0.6834690570831299, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9793, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"grad_norm": 0.6847568154335022, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9796, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"grad_norm": 0.6866670846939087, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9938, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"grad_norm": 0.6928325891494751, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9911, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"grad_norm": 0.680347204208374, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0056, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"grad_norm": 0.656384289264679, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0116, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"grad_norm": 0.6849060654640198, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0191, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"grad_norm": 0.6677911877632141, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0152, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"grad_norm": 0.697119414806366, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0144, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"grad_norm": 0.6748687028884888, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0099, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"grad_norm": 0.6613055467605591, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0213, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"grad_norm": 0.6857287287712097, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0309, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"grad_norm": 0.6758785843849182, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0447, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"grad_norm": 0.6673085689544678, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0445, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"grad_norm": 0.6487051844596863, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0271, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"grad_norm": 0.7024916410446167, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0357, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"grad_norm": 0.6643483638763428, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0296, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"grad_norm": 0.6508316993713379, |
|
"learning_rate": 0.0002, |
|
"loss": 1.05, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"grad_norm": 0.6692200303077698, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0419, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"grad_norm": 0.6588943004608154, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0413, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"grad_norm": 0.7068329453468323, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0469, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"grad_norm": 0.638953685760498, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0491, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"grad_norm": 0.7413187026977539, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0124, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"grad_norm": 0.7384817600250244, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9625, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"grad_norm": 0.7144999504089355, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9614, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"grad_norm": 0.7315141558647156, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9597, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"grad_norm": 0.6805746555328369, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9732, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"grad_norm": 0.6961046457290649, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9827, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"grad_norm": 0.7174761891365051, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9706, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"grad_norm": 0.7133183479309082, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9731, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"grad_norm": 0.7056016325950623, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9816, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"grad_norm": 0.7086601853370667, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9971, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"grad_norm": 0.6886954307556152, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9845, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"grad_norm": 0.7162395119667053, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9968, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"grad_norm": 0.711045503616333, |
|
"learning_rate": 0.0002, |
|
"loss": 0.999, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"grad_norm": 0.6801750659942627, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9953, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"grad_norm": 0.721878707408905, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0077, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"grad_norm": 0.6965034008026123, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0135, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"grad_norm": 0.7133123278617859, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0047, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"grad_norm": 0.7012418508529663, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0086, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"grad_norm": 0.6652898788452148, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0069, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"grad_norm": 0.6638497114181519, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0126, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"grad_norm": 0.6930794715881348, |
|
"learning_rate": 0.0002, |
|
"loss": 1.009, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"grad_norm": 0.7075713276863098, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0263, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"grad_norm": 0.7108717560768127, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0321, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"grad_norm": 0.6672636866569519, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0277, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.667779803276062, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0282, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"grad_norm": 0.7257822751998901, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9243, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"grad_norm": 0.7209460735321045, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9293, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"grad_norm": 0.773958146572113, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9408, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"grad_norm": 0.7353633642196655, |
|
"learning_rate": 0.0002, |
|
"loss": 0.939, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"grad_norm": 0.7394313812255859, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9425, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"grad_norm": 0.7675743699073792, |
|
"learning_rate": 0.0002, |
|
"loss": 0.953, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"grad_norm": 0.7433834075927734, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9611, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"grad_norm": 0.7248120903968811, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9671, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"grad_norm": 0.7551497220993042, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9766, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"grad_norm": 0.7119362354278564, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9735, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"grad_norm": 0.7423197031021118, |
|
"learning_rate": 0.0002, |
|
"loss": 0.97, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"grad_norm": 0.7277734279632568, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9675, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"grad_norm": 0.7504876852035522, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9821, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"grad_norm": 0.7577353119850159, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9813, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"grad_norm": 0.7336677312850952, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9858, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"grad_norm": 0.7539398670196533, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9973, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"grad_norm": 0.7318299412727356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9886, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"grad_norm": 0.6988388299942017, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9927, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"grad_norm": 0.7509441375732422, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9999, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"grad_norm": 0.7021549344062805, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0011, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"grad_norm": 0.7372128367424011, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"grad_norm": 0.7391555905342102, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9976, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"grad_norm": 0.7135056853294373, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0077, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"grad_norm": 0.7650583982467651, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9963, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"grad_norm": 0.8550894856452942, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9701, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"grad_norm": 0.8099115490913391, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9164, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"grad_norm": 0.7715261578559875, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9122, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"grad_norm": 0.7662339806556702, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9292, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"grad_norm": 0.7850679755210876, |
|
"learning_rate": 0.0002, |
|
"loss": 0.927, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"grad_norm": 0.775543212890625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9323, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"grad_norm": 0.75054931640625, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9387, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"grad_norm": 0.758657693862915, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9404, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15.35, |
|
"grad_norm": 0.7817949056625366, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9367, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"grad_norm": 0.7654836773872375, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9466, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"grad_norm": 0.7418621778488159, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9446, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"grad_norm": 0.7546373009681702, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9456, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"grad_norm": 0.7481057047843933, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9532, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"grad_norm": 0.7622807025909424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9618, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"grad_norm": 0.7714546322822571, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9596, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"grad_norm": 0.7699543237686157, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9655, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"grad_norm": 0.7812892198562622, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9691, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"grad_norm": 0.7776221036911011, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9659, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"grad_norm": 0.7372971773147583, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9709, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"grad_norm": 0.7554543018341064, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9763, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"grad_norm": 0.77300626039505, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9856, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"grad_norm": 0.7235614061355591, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9782, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"grad_norm": 0.7121375799179077, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9886, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"grad_norm": 0.7238226532936096, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9896, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.7624945640563965, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9885, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"grad_norm": 0.7497121691703796, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8911, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"grad_norm": 0.7473620176315308, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8938, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"grad_norm": 0.75458824634552, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8986, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"grad_norm": 0.788559079170227, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9065, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"grad_norm": 0.8157529830932617, |
|
"learning_rate": 0.0002, |
|
"loss": 0.91, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"grad_norm": 0.8045533895492554, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9108, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"grad_norm": 0.8106245398521423, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9243, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"grad_norm": 0.7821611762046814, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9207, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"grad_norm": 0.7940720915794373, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9216, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"grad_norm": 0.8178911209106445, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9305, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"grad_norm": 0.768060564994812, |
|
"learning_rate": 0.0002, |
|
"loss": 0.931, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"grad_norm": 0.7514263987541199, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9358, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"grad_norm": 0.7860275506973267, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9395, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"grad_norm": 0.8064606785774231, |
|
"learning_rate": 0.0002, |
|
"loss": 0.947, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"grad_norm": 0.7772034406661987, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9334, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"grad_norm": 0.7714650630950928, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9493, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"grad_norm": 0.7864323258399963, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9594, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"grad_norm": 0.7611095905303955, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9526, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"grad_norm": 0.7637573480606079, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9504, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"grad_norm": 0.7966247200965881, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9617, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"grad_norm": 0.7575141191482544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9726, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"grad_norm": 0.7849740982055664, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9674, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"grad_norm": 0.7860226631164551, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9645, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"grad_norm": 0.7693030834197998, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9714, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"grad_norm": 0.8382769227027893, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9205, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"grad_norm": 0.8301939368247986, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8819, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"grad_norm": 0.820909321308136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8783, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"grad_norm": 0.8272302150726318, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8888, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"grad_norm": 0.8278018236160278, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8841, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"grad_norm": 0.8393099308013916, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8974, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"grad_norm": 0.8167082071304321, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8938, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"grad_norm": 0.7832567691802979, |
|
"learning_rate": 0.0002, |
|
"loss": 0.905, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"grad_norm": 0.794981837272644, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9109, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"grad_norm": 0.828493058681488, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9087, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"grad_norm": 0.7956497669219971, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9181, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"grad_norm": 0.8334782123565674, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9177, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"grad_norm": 0.820360004901886, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9268, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"grad_norm": 0.8249423503875732, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9238, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"grad_norm": 0.8076381683349609, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9256, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"grad_norm": 0.792510986328125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9404, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"grad_norm": 0.835329532623291, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9316, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"grad_norm": 0.8437380790710449, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9416, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"grad_norm": 0.7976727485656738, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9326, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"grad_norm": 0.7968472242355347, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9409, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"grad_norm": 0.7687345147132874, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9419, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"grad_norm": 0.7815808653831482, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9522, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"grad_norm": 0.816125214099884, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9427, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"grad_norm": 0.7884161472320557, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9494, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.7613294124603271, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9592, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"grad_norm": 0.7931154370307922, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8652, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"grad_norm": 0.855189323425293, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8679, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"grad_norm": 0.8522574305534363, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8644, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"grad_norm": 0.8371242880821228, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8635, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"grad_norm": 0.8409127593040466, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8723, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"grad_norm": 0.8761357665061951, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8783, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"grad_norm": 0.8433621525764465, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8876, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"grad_norm": 0.8117638230323792, |
|
"learning_rate": 0.0002, |
|
"loss": 0.892, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"grad_norm": 0.8237631916999817, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8943, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"grad_norm": 0.8152850270271301, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8909, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"grad_norm": 0.9371042251586914, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8989, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"grad_norm": 0.8668140172958374, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9023, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"grad_norm": 0.8652714490890503, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9007, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"grad_norm": 0.8392309546470642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.919, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"grad_norm": 0.8538224101066589, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9169, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"grad_norm": 0.7933239340782166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9183, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"grad_norm": 0.8141274452209473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9179, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"grad_norm": 0.8790974617004395, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9297, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"grad_norm": 0.8103710412979126, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9331, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"grad_norm": 0.7885395288467407, |
|
"learning_rate": 0.0002, |
|
"loss": 0.931, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"grad_norm": 0.80265212059021, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9323, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"grad_norm": 0.8537465929985046, |
|
"learning_rate": 0.0002, |
|
"loss": 0.932, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"grad_norm": 0.7968863844871521, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9288, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"grad_norm": 0.8105510473251343, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9361, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"grad_norm": 0.8854714632034302, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8973, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"grad_norm": 0.8548041582107544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8499, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"grad_norm": 0.870018482208252, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8518, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"grad_norm": 0.8623254895210266, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8517, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"grad_norm": 0.863254725933075, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8652, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"grad_norm": 0.867749810218811, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8659, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"grad_norm": 0.8433037996292114, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8728, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"grad_norm": 0.8570507764816284, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8647, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"grad_norm": 0.830174446105957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8837, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"grad_norm": 0.8436498641967773, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8792, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"grad_norm": 0.8207152485847473, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8945, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"grad_norm": 0.8116417527198792, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8813, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"grad_norm": 0.8591784834861755, |
|
"learning_rate": 0.0002, |
|
"loss": 0.885, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"grad_norm": 0.8506168127059937, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8994, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"grad_norm": 0.816745400428772, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8994, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"grad_norm": 0.8365273475646973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8909, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"grad_norm": 0.8306399583816528, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9127, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"grad_norm": 0.8440532088279724, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9106, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"grad_norm": 0.8330156803131104, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9093, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"grad_norm": 0.8263149857521057, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9057, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"grad_norm": 0.8197040557861328, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9145, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"grad_norm": 0.8366541266441345, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9107, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"grad_norm": 0.8039381504058838, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9162, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"grad_norm": 0.8434823751449585, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9264, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.8329556584358215, |
|
"learning_rate": 0.0002, |
|
"loss": 0.923, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"grad_norm": 0.8620508313179016, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8292, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"grad_norm": 0.8627603650093079, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8284, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"grad_norm": 0.8428844213485718, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8452, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"grad_norm": 0.8328269720077515, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8424, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"grad_norm": 0.8306934237480164, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8471, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"grad_norm": 0.8320675492286682, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8498, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"grad_norm": 0.8537613153457642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8675, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"grad_norm": 0.8733486533164978, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8682, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"grad_norm": 0.8653712868690491, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8717, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 20.41, |
|
"grad_norm": 0.8138574957847595, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8655, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"grad_norm": 0.8490427732467651, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8761, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"grad_norm": 0.8796558380126953, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8751, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"grad_norm": 0.8779765963554382, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8755, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"grad_norm": 0.8556944131851196, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8799, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"grad_norm": 0.8503381609916687, |
|
"learning_rate": 0.0002, |
|
"loss": 0.879, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"grad_norm": 0.837138831615448, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8818, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"grad_norm": 0.8664600849151611, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8952, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"grad_norm": 0.8737181425094604, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8997, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"grad_norm": 0.8698042631149292, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9058, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"grad_norm": 0.8353410959243774, |
|
"learning_rate": 0.0002, |
|
"loss": 0.905, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"grad_norm": 0.8429268002510071, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9037, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"grad_norm": 0.8391215205192566, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9017, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 20.94, |
|
"grad_norm": 0.8510826826095581, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9075, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 20.98, |
|
"grad_norm": 0.8314224481582642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9158, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"grad_norm": 0.9201707243919373, |
|
"learning_rate": 0.0002, |
|
"loss": 0.859, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"grad_norm": 0.8647860884666443, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8171, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"grad_norm": 0.8973507881164551, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8298, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"grad_norm": 0.9124497175216675, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8338, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 21.18, |
|
"grad_norm": 0.8619136214256287, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8246, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"grad_norm": 0.8383534550666809, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8379, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"grad_norm": 0.8382851481437683, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8409, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"grad_norm": 0.8646281957626343, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8475, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 21.35, |
|
"grad_norm": 0.9145320057868958, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8591, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 21.39, |
|
"grad_norm": 0.8727798461914062, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8564, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"grad_norm": 0.8649429082870483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8569, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"grad_norm": 0.8714181184768677, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8596, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 21.51, |
|
"grad_norm": 0.8774384260177612, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8713, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"grad_norm": 0.8977541327476501, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8626, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"grad_norm": 0.8677563667297363, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8784, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 21.63, |
|
"grad_norm": 0.8705365657806396, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8722, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"grad_norm": 0.8717777132987976, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8833, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"grad_norm": 0.8530117273330688, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8829, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"grad_norm": 0.8550623655319214, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8797, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 21.8, |
|
"grad_norm": 0.8520897030830383, |
|
"learning_rate": 0.0002, |
|
"loss": 0.893, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"grad_norm": 0.8366932272911072, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8916, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"grad_norm": 0.8556586503982544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8992, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"grad_norm": 0.869047224521637, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8932, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"grad_norm": 0.8768820762634277, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8959, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.849677562713623, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8959, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"grad_norm": 0.8397349119186401, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7987, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"grad_norm": 0.8741442561149597, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8045, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"grad_norm": 0.9024834036827087, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8124, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"grad_norm": 0.8942772746086121, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8169, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"grad_norm": 0.8915479183197021, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8271, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"grad_norm": 0.9181793332099915, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8283, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"grad_norm": 0.8821321725845337, |
|
"learning_rate": 0.0002, |
|
"loss": 0.827, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"grad_norm": 0.8514798879623413, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8345, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 22.37, |
|
"grad_norm": 0.8772318363189697, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8325, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"grad_norm": 0.8917196989059448, |
|
"learning_rate": 0.0002, |
|
"loss": 0.854, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"grad_norm": 0.8567050695419312, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8576, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"grad_norm": 0.8994786143302917, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8482, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"grad_norm": 0.8770323395729065, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8568, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"grad_norm": 0.870095431804657, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8617, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"grad_norm": 0.8750589489936829, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8668, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 22.65, |
|
"grad_norm": 0.8779079914093018, |
|
"learning_rate": 0.0002, |
|
"loss": 0.868, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"grad_norm": 0.868596613407135, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8788, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"grad_norm": 0.8535884022712708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8783, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"grad_norm": 0.8387210965156555, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8717, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"grad_norm": 0.8729193806648254, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8789, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"grad_norm": 0.8541831970214844, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8804, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"grad_norm": 0.8559602499008179, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8777, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"grad_norm": 0.8594103455543518, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8841, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"grad_norm": 0.884156346321106, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8936, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"grad_norm": 0.9736906290054321, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8374, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 23.06, |
|
"grad_norm": 0.9304066300392151, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8011, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"grad_norm": 0.8798370361328125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7971, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 23.14, |
|
"grad_norm": 0.8726293444633484, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8068, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 23.18, |
|
"grad_norm": 0.918418824672699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8051, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"grad_norm": 0.8974969387054443, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8089, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"grad_norm": 0.9124098420143127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.821, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"grad_norm": 0.8955199122428894, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8203, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"grad_norm": 0.8901395201683044, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8324, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"grad_norm": 0.9032876491546631, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8348, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"grad_norm": 0.9339714646339417, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8424, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"grad_norm": 0.8993861675262451, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8364, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"grad_norm": 0.8764722347259521, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8457, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 23.55, |
|
"grad_norm": 0.8839731812477112, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8459, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"grad_norm": 0.8804398775100708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8564, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"grad_norm": 0.922806978225708, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8613, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"grad_norm": 0.8472769260406494, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8597, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 23.71, |
|
"grad_norm": 0.8768579959869385, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8738, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 23.76, |
|
"grad_norm": 0.895548403263092, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8569, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"grad_norm": 0.9207329154014587, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8677, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 23.84, |
|
"grad_norm": 0.8925573825836182, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8711, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"grad_norm": 0.8939485549926758, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8735, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"grad_norm": 0.8766481280326843, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8805, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 23.96, |
|
"grad_norm": 0.8925796747207642, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8695, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.8501390814781189, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8646, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"grad_norm": 0.8877448439598083, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7849, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"grad_norm": 0.860008955001831, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7923, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"grad_norm": 0.9319033622741699, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7965, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"grad_norm": 0.928855299949646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7961, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"grad_norm": 0.9242786765098572, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7964, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"grad_norm": 0.8834139108657837, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8113, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"grad_norm": 0.8913507461547852, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8051, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 24.33, |
|
"grad_norm": 0.8687690496444702, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8158, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"grad_norm": 0.9044011831283569, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8232, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"grad_norm": 0.8891428709030151, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8234, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"grad_norm": 0.910785436630249, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8343, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"grad_norm": 0.8859900832176208, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"grad_norm": 0.8689603209495544, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8257, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 24.57, |
|
"grad_norm": 0.908275306224823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8429, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"grad_norm": 0.9454265832901001, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8423, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"grad_norm": 0.874933123588562, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8464, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 24.69, |
|
"grad_norm": 0.9061406254768372, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8523, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"grad_norm": 0.9278773665428162, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8487, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"grad_norm": 0.924838125705719, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8463, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"grad_norm": 0.9214856624603271, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8565, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"grad_norm": 0.8918023705482483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8645, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"grad_norm": 0.9123814702033997, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8645, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 24.94, |
|
"grad_norm": 0.8688812851905823, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8679, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"grad_norm": 0.9204630851745605, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8685, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"grad_norm": 0.9178334474563599, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8285, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"grad_norm": 0.915169894695282, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7686, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"grad_norm": 0.8733026385307312, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7739, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 25.14, |
|
"grad_norm": 0.9435804486274719, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7821, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"grad_norm": 0.9279885292053223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7951, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"grad_norm": 0.8908799886703491, |
|
"learning_rate": 0.0002, |
|
"loss": 0.796, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"grad_norm": 0.9093460440635681, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8075, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 25.31, |
|
"grad_norm": 0.9308035373687744, |
|
"learning_rate": 0.0002, |
|
"loss": 0.808, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 25.35, |
|
"grad_norm": 0.9349119663238525, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8114, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"grad_norm": 0.9189342856407166, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8171, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"grad_norm": 0.9681046605110168, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8207, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"grad_norm": 0.9576332569122314, |
|
"learning_rate": 0.0002, |
|
"loss": 0.827, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 25.51, |
|
"grad_norm": 0.9052371382713318, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8129, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"grad_norm": 0.9296489357948303, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8267, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 25.59, |
|
"grad_norm": 0.9182922840118408, |
|
"learning_rate": 0.0002, |
|
"loss": 0.831, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 25.63, |
|
"grad_norm": 0.9139277338981628, |
|
"learning_rate": 0.0002, |
|
"loss": 0.833, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"grad_norm": 0.9042106866836548, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8393, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"grad_norm": 0.9238719344139099, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8394, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 25.76, |
|
"grad_norm": 0.8891541957855225, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8513, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"grad_norm": 0.8962535858154297, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8407, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 25.84, |
|
"grad_norm": 0.9288328886032104, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8504, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"grad_norm": 0.8737248778343201, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8483, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"grad_norm": 0.8971844911575317, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8455, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"grad_norm": 0.9223408102989197, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8489, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.8855815529823303, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8668, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 26.04, |
|
"grad_norm": 0.9498929977416992, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7654, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 26.08, |
|
"grad_norm": 0.9300951957702637, |
|
"learning_rate": 0.0002, |
|
"loss": 0.773, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"grad_norm": 0.9423607587814331, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7736, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.16, |
|
"grad_norm": 0.9024314284324646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7751, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 26.2, |
|
"grad_norm": 0.9432561993598938, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7874, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"grad_norm": 0.9214005470275879, |
|
"learning_rate": 0.0002, |
|
"loss": 0.792, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"grad_norm": 0.911280632019043, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7925, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"grad_norm": 0.9321004748344421, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8028, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"grad_norm": 0.9027372598648071, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8058, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"grad_norm": 0.9106818437576294, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8035, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 26.45, |
|
"grad_norm": 0.9135387539863586, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8075, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 26.49, |
|
"grad_norm": 0.9544934034347534, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8114, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"grad_norm": 0.9143434762954712, |
|
"learning_rate": 0.0002, |
|
"loss": 0.814, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"grad_norm": 0.9120767712593079, |
|
"learning_rate": 0.0002, |
|
"loss": 0.823, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"grad_norm": 0.9214150905609131, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8263, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"grad_norm": 0.909246563911438, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8313, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 26.69, |
|
"grad_norm": 0.8854065537452698, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8309, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 26.73, |
|
"grad_norm": 0.8996104001998901, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8276, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"grad_norm": 0.9389463067054749, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8276, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"grad_norm": 0.9044690132141113, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8384, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"grad_norm": 0.9313196539878845, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8362, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"grad_norm": 0.9324984550476074, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8483, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"grad_norm": 0.9040074348449707, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8502, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"grad_norm": 0.9061781764030457, |
|
"learning_rate": 0.0002, |
|
"loss": 0.839, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"grad_norm": 0.9643654823303223, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8103, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 27.06, |
|
"grad_norm": 0.9526975154876709, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7568, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"grad_norm": 0.9363268613815308, |
|
"learning_rate": 0.0002, |
|
"loss": 0.768, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"grad_norm": 0.9408143162727356, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7657, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"grad_norm": 0.9114474654197693, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7704, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"grad_norm": 0.9404338002204895, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7752, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"grad_norm": 0.924858570098877, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7791, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 27.31, |
|
"grad_norm": 0.8972033262252808, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7875, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"grad_norm": 0.9168384075164795, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7833, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 27.39, |
|
"grad_norm": 0.9331495761871338, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7934, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"grad_norm": 0.9218735694885254, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8008, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"grad_norm": 0.9129732847213745, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8017, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"grad_norm": 0.927643895149231, |
|
"learning_rate": 0.0002, |
|
"loss": 0.806, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 27.55, |
|
"grad_norm": 0.9381563067436218, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8121, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"grad_norm": 0.9063361287117004, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8076, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 27.63, |
|
"grad_norm": 0.9507086873054504, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8201, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 27.67, |
|
"grad_norm": 0.9145575165748596, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8215, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"grad_norm": 0.9236624836921692, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8217, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"grad_norm": 0.9380799531936646, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8268, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"grad_norm": 0.9496703743934631, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8285, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"grad_norm": 0.9168816208839417, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8323, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 27.88, |
|
"grad_norm": 0.9149271845817566, |
|
"learning_rate": 0.0002, |
|
"loss": 0.835, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 27.92, |
|
"grad_norm": 0.996411144733429, |
|
"learning_rate": 0.0002, |
|
"loss": 0.837, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 27.96, |
|
"grad_norm": 0.9247832894325256, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8409, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.9518089294433594, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8404, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"grad_norm": 0.8640215396881104, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7456, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 28.08, |
|
"grad_norm": 0.9442835450172424, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7483, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"grad_norm": 0.9471011757850647, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7511, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"grad_norm": 0.9230690002441406, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7554, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"grad_norm": 0.917255699634552, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7686, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"grad_norm": 0.9495767951011658, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7692, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 28.29, |
|
"grad_norm": 0.9603391289710999, |
|
"learning_rate": 0.0002, |
|
"loss": 0.784, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"grad_norm": 0.9270275235176086, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7817, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"grad_norm": 0.966315507888794, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7857, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 28.41, |
|
"grad_norm": 0.9359502196311951, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7963, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"grad_norm": 0.9358251094818115, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7962, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 28.49, |
|
"grad_norm": 0.8844741582870483, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8093, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 28.53, |
|
"grad_norm": 0.9237121939659119, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7941, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"grad_norm": 0.9162352681159973, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8014, |
|
"step": 7000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 41, |
|
"save_steps": 100, |
|
"total_flos": 2.74107564294144e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|