|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8527131782945734, |
|
"eval_steps": 500, |
|
"global_step": 774, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003875968992248062, |
|
"grad_norm": 0.2644451856613159, |
|
"learning_rate": 6.6e-06, |
|
"loss": 2.7134, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007751937984496124, |
|
"grad_norm": 0.3978240191936493, |
|
"learning_rate": 1.32e-05, |
|
"loss": 3.1698, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011627906976744186, |
|
"grad_norm": 0.29106518626213074, |
|
"learning_rate": 1.98e-05, |
|
"loss": 2.8065, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.015503875968992248, |
|
"grad_norm": 0.26795685291290283, |
|
"learning_rate": 2.64e-05, |
|
"loss": 2.7225, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01937984496124031, |
|
"grad_norm": 0.18800821900367737, |
|
"learning_rate": 3.3e-05, |
|
"loss": 2.3743, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 0.2275751829147339, |
|
"learning_rate": 3.96e-05, |
|
"loss": 2.474, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.027131782945736434, |
|
"grad_norm": 0.37303999066352844, |
|
"learning_rate": 4.6200000000000005e-05, |
|
"loss": 2.9455, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.031007751937984496, |
|
"grad_norm": 0.5618475079536438, |
|
"learning_rate": 5.28e-05, |
|
"loss": 3.0509, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 0.2064945548772812, |
|
"learning_rate": 5.94e-05, |
|
"loss": 2.3145, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03875968992248062, |
|
"grad_norm": 0.1987352818250656, |
|
"learning_rate": 6.6e-05, |
|
"loss": 2.6002, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04263565891472868, |
|
"grad_norm": 0.20411251485347748, |
|
"learning_rate": 7.26e-05, |
|
"loss": 2.4743, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 0.1682678759098053, |
|
"learning_rate": 7.92e-05, |
|
"loss": 2.2844, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.050387596899224806, |
|
"grad_norm": 0.2902089059352875, |
|
"learning_rate": 8.58e-05, |
|
"loss": 2.7268, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05426356589147287, |
|
"grad_norm": 0.34279119968414307, |
|
"learning_rate": 9.240000000000001e-05, |
|
"loss": 2.8172, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05813953488372093, |
|
"grad_norm": 0.25758570432662964, |
|
"learning_rate": 9.9e-05, |
|
"loss": 2.5574, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06201550387596899, |
|
"grad_norm": 0.22630520164966583, |
|
"learning_rate": 0.0001056, |
|
"loss": 2.4886, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06589147286821706, |
|
"grad_norm": 0.24086053669452667, |
|
"learning_rate": 0.0001122, |
|
"loss": 2.1352, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 0.31760409474372864, |
|
"learning_rate": 0.0001188, |
|
"loss": 2.3757, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07364341085271318, |
|
"grad_norm": 0.27383461594581604, |
|
"learning_rate": 0.0001254, |
|
"loss": 2.2567, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07751937984496124, |
|
"grad_norm": 0.25780704617500305, |
|
"learning_rate": 0.000132, |
|
"loss": 2.497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08139534883720931, |
|
"grad_norm": 0.20427410304546356, |
|
"learning_rate": 0.00013859999999999998, |
|
"loss": 2.2143, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08527131782945736, |
|
"grad_norm": 0.2947242558002472, |
|
"learning_rate": 0.0001452, |
|
"loss": 2.6828, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08914728682170543, |
|
"grad_norm": 0.23378527164459229, |
|
"learning_rate": 0.0001518, |
|
"loss": 2.3352, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 0.23147471249103546, |
|
"learning_rate": 0.0001584, |
|
"loss": 2.2845, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09689922480620156, |
|
"grad_norm": 0.21582217514514923, |
|
"learning_rate": 0.000165, |
|
"loss": 2.5595, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10077519379844961, |
|
"grad_norm": 0.1932438313961029, |
|
"learning_rate": 0.0001716, |
|
"loss": 2.3431, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"grad_norm": 0.18576449155807495, |
|
"learning_rate": 0.00017820000000000002, |
|
"loss": 2.3948, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10852713178294573, |
|
"grad_norm": 0.1673685610294342, |
|
"learning_rate": 0.00018480000000000002, |
|
"loss": 2.2775, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1124031007751938, |
|
"grad_norm": 0.17178471386432648, |
|
"learning_rate": 0.0001914, |
|
"loss": 1.9809, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 0.17103907465934753, |
|
"learning_rate": 0.000198, |
|
"loss": 2.2033, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12015503875968993, |
|
"grad_norm": 0.17570005357265472, |
|
"learning_rate": 0.00020459999999999999, |
|
"loss": 2.1358, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12403100775193798, |
|
"grad_norm": 0.16803690791130066, |
|
"learning_rate": 0.0002112, |
|
"loss": 2.164, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12790697674418605, |
|
"grad_norm": 0.17673198878765106, |
|
"learning_rate": 0.0002178, |
|
"loss": 2.2587, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.13178294573643412, |
|
"grad_norm": 0.16170482337474823, |
|
"learning_rate": 0.0002244, |
|
"loss": 2.1818, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13565891472868216, |
|
"grad_norm": 0.1507701873779297, |
|
"learning_rate": 0.00023099999999999998, |
|
"loss": 2.3218, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 0.16273871064186096, |
|
"learning_rate": 0.0002376, |
|
"loss": 2.1462, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1434108527131783, |
|
"grad_norm": 0.15038099884986877, |
|
"learning_rate": 0.00024419999999999997, |
|
"loss": 2.1007, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14728682170542637, |
|
"grad_norm": 0.19681762158870697, |
|
"learning_rate": 0.0002508, |
|
"loss": 2.3083, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1511627906976744, |
|
"grad_norm": 0.15614789724349976, |
|
"learning_rate": 0.0002574, |
|
"loss": 2.0723, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15503875968992248, |
|
"grad_norm": 0.14269952476024628, |
|
"learning_rate": 0.000264, |
|
"loss": 2.1325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15891472868217055, |
|
"grad_norm": 0.16603954136371613, |
|
"learning_rate": 0.00027059999999999996, |
|
"loss": 2.2062, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.16279069767441862, |
|
"grad_norm": 0.16374431550502777, |
|
"learning_rate": 0.00027719999999999996, |
|
"loss": 2.0389, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.13400350511074066, |
|
"learning_rate": 0.0002838, |
|
"loss": 1.9439, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.17054263565891473, |
|
"grad_norm": 0.13477088510990143, |
|
"learning_rate": 0.0002904, |
|
"loss": 2.3466, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1744186046511628, |
|
"grad_norm": 0.14038704335689545, |
|
"learning_rate": 0.000297, |
|
"loss": 2.3133, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17829457364341086, |
|
"grad_norm": 0.14085592329502106, |
|
"learning_rate": 0.0003036, |
|
"loss": 2.3503, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1821705426356589, |
|
"grad_norm": 0.14309249818325043, |
|
"learning_rate": 0.0003102, |
|
"loss": 2.4691, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 0.1526148021221161, |
|
"learning_rate": 0.0003168, |
|
"loss": 2.1829, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18992248062015504, |
|
"grad_norm": 0.15030066668987274, |
|
"learning_rate": 0.0003234, |
|
"loss": 2.1198, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1937984496124031, |
|
"grad_norm": 0.13567374646663666, |
|
"learning_rate": 0.00033, |
|
"loss": 2.3342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19767441860465115, |
|
"grad_norm": 0.14513419568538666, |
|
"learning_rate": 0.00032999844662654247, |
|
"loss": 2.1416, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.20155038759689922, |
|
"grad_norm": 0.1639217734336853, |
|
"learning_rate": 0.00032999378653541785, |
|
"loss": 2.06, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2054263565891473, |
|
"grad_norm": 0.15029025077819824, |
|
"learning_rate": 0.0003299860198143701, |
|
"loss": 2.3455, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 0.15286165475845337, |
|
"learning_rate": 0.00032997514660963685, |
|
"loss": 2.2418, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2131782945736434, |
|
"grad_norm": 0.17834632098674774, |
|
"learning_rate": 0.00032996116712594723, |
|
"loss": 2.3385, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.21705426356589147, |
|
"grad_norm": 0.1593712419271469, |
|
"learning_rate": 0.00032994408162651776, |
|
"loss": 2.2248, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.22093023255813954, |
|
"grad_norm": 0.13914993405342102, |
|
"learning_rate": 0.00032992389043304726, |
|
"loss": 2.3005, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2248062015503876, |
|
"grad_norm": 0.13964952528476715, |
|
"learning_rate": 0.0003299005939257111, |
|
"loss": 2.284, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22868217054263565, |
|
"grad_norm": 0.151286780834198, |
|
"learning_rate": 0.0003298741925431539, |
|
"loss": 2.1277, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 0.12762194871902466, |
|
"learning_rate": 0.0003298446867824811, |
|
"loss": 2.046, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2364341085271318, |
|
"grad_norm": 0.13711339235305786, |
|
"learning_rate": 0.00032981207719924994, |
|
"loss": 2.1179, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.24031007751937986, |
|
"grad_norm": 0.15553320944309235, |
|
"learning_rate": 0.0003297763644074587, |
|
"loss": 2.3921, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2441860465116279, |
|
"grad_norm": 0.14784659445285797, |
|
"learning_rate": 0.0003297375490795353, |
|
"loss": 2.072, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.24806201550387597, |
|
"grad_norm": 0.14965656399726868, |
|
"learning_rate": 0.0003296956319463245, |
|
"loss": 2.314, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.25193798449612403, |
|
"grad_norm": 0.14986053109169006, |
|
"learning_rate": 0.0003296506137970745, |
|
"loss": 2.16, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2558139534883721, |
|
"grad_norm": 0.14208155870437622, |
|
"learning_rate": 0.0003296024954794215, |
|
"loss": 1.9537, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2596899224806202, |
|
"grad_norm": 0.1486920267343521, |
|
"learning_rate": 0.0003295512778993743, |
|
"loss": 2.1949, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.26356589147286824, |
|
"grad_norm": 0.14816056191921234, |
|
"learning_rate": 0.0003294969620212968, |
|
"loss": 2.274, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.26744186046511625, |
|
"grad_norm": 0.14654242992401123, |
|
"learning_rate": 0.00032943954886789016, |
|
"loss": 2.3158, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2713178294573643, |
|
"grad_norm": 0.1430758833885193, |
|
"learning_rate": 0.0003293790395201734, |
|
"loss": 2.2095, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2751937984496124, |
|
"grad_norm": 0.13157042860984802, |
|
"learning_rate": 0.00032931543511746306, |
|
"loss": 2.3029, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 0.15158364176750183, |
|
"learning_rate": 0.0003292487368573518, |
|
"loss": 2.4751, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.28294573643410853, |
|
"grad_norm": 0.14293590188026428, |
|
"learning_rate": 0.0003291789459956857, |
|
"loss": 2.2384, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2868217054263566, |
|
"grad_norm": 0.13672257959842682, |
|
"learning_rate": 0.00032910606384654086, |
|
"loss": 2.389, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 0.17896883189678192, |
|
"learning_rate": 0.00032903009178219834, |
|
"loss": 2.26, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.29457364341085274, |
|
"grad_norm": 0.1485362946987152, |
|
"learning_rate": 0.0003289510312331187, |
|
"loss": 1.9685, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.29844961240310075, |
|
"grad_norm": 0.1551189124584198, |
|
"learning_rate": 0.0003288688836879147, |
|
"loss": 2.0234, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.3023255813953488, |
|
"grad_norm": 0.14495842158794403, |
|
"learning_rate": 0.0003287836506933235, |
|
"loss": 2.0907, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3062015503875969, |
|
"grad_norm": 0.16551914811134338, |
|
"learning_rate": 0.0003286953338541776, |
|
"loss": 2.1908, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.31007751937984496, |
|
"grad_norm": 0.14821241796016693, |
|
"learning_rate": 0.0003286039348333743, |
|
"loss": 1.9845, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.313953488372093, |
|
"grad_norm": 0.17918680608272552, |
|
"learning_rate": 0.0003285094553518446, |
|
"loss": 2.4009, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3178294573643411, |
|
"grad_norm": 0.15694545209407806, |
|
"learning_rate": 0.000328411897188521, |
|
"loss": 2.1897, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.32170542635658916, |
|
"grad_norm": 0.17306634783744812, |
|
"learning_rate": 0.0003283112621803035, |
|
"loss": 2.2274, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.32558139534883723, |
|
"grad_norm": 0.14742298424243927, |
|
"learning_rate": 0.0003282075522220255, |
|
"loss": 2.1474, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.32945736434108525, |
|
"grad_norm": 0.17422489821910858, |
|
"learning_rate": 0.0003281007692664178, |
|
"loss": 2.3424, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.14565351605415344, |
|
"learning_rate": 0.0003279909153240722, |
|
"loss": 2.1367, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3372093023255814, |
|
"grad_norm": 0.16405850648880005, |
|
"learning_rate": 0.00032787799246340304, |
|
"loss": 2.0857, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.34108527131782945, |
|
"grad_norm": 0.1722440868616104, |
|
"learning_rate": 0.0003277620028106089, |
|
"loss": 2.0891, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3449612403100775, |
|
"grad_norm": 0.1501605361700058, |
|
"learning_rate": 0.00032764294854963213, |
|
"loss": 2.1899, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 0.14743047952651978, |
|
"learning_rate": 0.0003275208319221179, |
|
"loss": 2.165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35271317829457366, |
|
"grad_norm": 0.14289091527462006, |
|
"learning_rate": 0.00032739565522737216, |
|
"loss": 2.0733, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.35658914728682173, |
|
"grad_norm": 0.13758860528469086, |
|
"learning_rate": 0.00032726742082231787, |
|
"loss": 2.185, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.36046511627906974, |
|
"grad_norm": 0.13637179136276245, |
|
"learning_rate": 0.00032713613112145105, |
|
"loss": 2.2052, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3643410852713178, |
|
"grad_norm": 0.14775022864341736, |
|
"learning_rate": 0.0003270017885967952, |
|
"loss": 2.1404, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.3682170542635659, |
|
"grad_norm": 0.1494913548231125, |
|
"learning_rate": 0.0003268643957778546, |
|
"loss": 1.9495, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 0.1620800644159317, |
|
"learning_rate": 0.00032672395525156715, |
|
"loss": 2.0199, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.375968992248062, |
|
"grad_norm": 0.15806061029434204, |
|
"learning_rate": 0.0003265804696622549, |
|
"loss": 1.9932, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3798449612403101, |
|
"grad_norm": 0.16337577998638153, |
|
"learning_rate": 0.00032643394171157504, |
|
"loss": 2.1522, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.38372093023255816, |
|
"grad_norm": 0.1602049618959427, |
|
"learning_rate": 0.0003262843741584684, |
|
"loss": 2.215, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3875968992248062, |
|
"grad_norm": 0.17709355056285858, |
|
"learning_rate": 0.00032613176981910805, |
|
"loss": 1.8921, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39147286821705424, |
|
"grad_norm": 0.14379487931728363, |
|
"learning_rate": 0.0003259761315668458, |
|
"loss": 2.0393, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3953488372093023, |
|
"grad_norm": 0.15646255016326904, |
|
"learning_rate": 0.0003258174623321583, |
|
"loss": 2.2783, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3992248062015504, |
|
"grad_norm": 0.17745360732078552, |
|
"learning_rate": 0.00032565576510259207, |
|
"loss": 2.0816, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.40310077519379844, |
|
"grad_norm": 0.14521199464797974, |
|
"learning_rate": 0.0003254910429227069, |
|
"loss": 2.061, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4069767441860465, |
|
"grad_norm": 0.14865247905254364, |
|
"learning_rate": 0.0003253232988940186, |
|
"loss": 2.1272, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4108527131782946, |
|
"grad_norm": 0.15088239312171936, |
|
"learning_rate": 0.0003251525361749409, |
|
"loss": 1.9842, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.41472868217054265, |
|
"grad_norm": 0.15884487330913544, |
|
"learning_rate": 0.00032497875798072557, |
|
"loss": 2.2848, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4186046511627907, |
|
"grad_norm": 0.13841639459133148, |
|
"learning_rate": 0.000324801967583402, |
|
"loss": 2.2508, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.42248062015503873, |
|
"grad_norm": 0.15195606648921967, |
|
"learning_rate": 0.00032462216831171607, |
|
"loss": 2.2611, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4263565891472868, |
|
"grad_norm": 0.17446410655975342, |
|
"learning_rate": 0.0003244393635510664, |
|
"loss": 2.3199, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.43023255813953487, |
|
"grad_norm": 0.1482134312391281, |
|
"learning_rate": 0.0003242535567434419, |
|
"loss": 2.106, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.43410852713178294, |
|
"grad_norm": 0.17814427614212036, |
|
"learning_rate": 0.000324064751387356, |
|
"loss": 2.3546, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.437984496124031, |
|
"grad_norm": 0.15309078991413116, |
|
"learning_rate": 0.00032387295103778114, |
|
"loss": 1.9944, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.4418604651162791, |
|
"grad_norm": 0.14015917479991913, |
|
"learning_rate": 0.00032367815930608177, |
|
"loss": 2.2504, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.44573643410852715, |
|
"grad_norm": 0.14815208315849304, |
|
"learning_rate": 0.0003234803798599466, |
|
"loss": 2.1902, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.4496124031007752, |
|
"grad_norm": 0.14948517084121704, |
|
"learning_rate": 0.000323279616423319, |
|
"loss": 2.2916, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.45348837209302323, |
|
"grad_norm": 0.18051539361476898, |
|
"learning_rate": 0.00032307587277632753, |
|
"loss": 2.2055, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.4573643410852713, |
|
"grad_norm": 0.17824648320674896, |
|
"learning_rate": 0.00032286915275521423, |
|
"loss": 1.9562, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.46124031007751937, |
|
"grad_norm": 0.15700489282608032, |
|
"learning_rate": 0.0003226594602522626, |
|
"loss": 2.0874, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 0.15389874577522278, |
|
"learning_rate": 0.00032244679921572447, |
|
"loss": 2.314, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4689922480620155, |
|
"grad_norm": 0.15873467922210693, |
|
"learning_rate": 0.0003222311736497454, |
|
"loss": 2.3363, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.4728682170542636, |
|
"grad_norm": 0.15486344695091248, |
|
"learning_rate": 0.0003220125876142893, |
|
"loss": 2.2502, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.47674418604651164, |
|
"grad_norm": 0.15193265676498413, |
|
"learning_rate": 0.00032179104522506234, |
|
"loss": 2.1667, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.4806201550387597, |
|
"grad_norm": 0.17307482659816742, |
|
"learning_rate": 0.00032156655065343487, |
|
"loss": 2.2361, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.4844961240310077, |
|
"grad_norm": 0.17050062119960785, |
|
"learning_rate": 0.00032133910812636346, |
|
"loss": 1.9976, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4883720930232558, |
|
"grad_norm": 0.15616333484649658, |
|
"learning_rate": 0.00032110872192631096, |
|
"loss": 2.2286, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.49224806201550386, |
|
"grad_norm": 0.15690878033638, |
|
"learning_rate": 0.00032087539639116596, |
|
"loss": 2.2743, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.49612403100775193, |
|
"grad_norm": 0.15837594866752625, |
|
"learning_rate": 0.000320639135914161, |
|
"loss": 2.2109, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.16106249392032623, |
|
"learning_rate": 0.0003203999449437902, |
|
"loss": 2.0109, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5038759689922481, |
|
"grad_norm": 0.15444137156009674, |
|
"learning_rate": 0.00032015782798372504, |
|
"loss": 1.8895, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5077519379844961, |
|
"grad_norm": 0.16820791363716125, |
|
"learning_rate": 0.0003199127895927299, |
|
"loss": 2.0923, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.5116279069767442, |
|
"grad_norm": 0.17274028062820435, |
|
"learning_rate": 0.0003196648343845761, |
|
"loss": 2.1964, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5155038759689923, |
|
"grad_norm": 0.17962628602981567, |
|
"learning_rate": 0.00031941396702795505, |
|
"loss": 2.2031, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5193798449612403, |
|
"grad_norm": 0.15597763657569885, |
|
"learning_rate": 0.00031916019224639017, |
|
"loss": 1.9934, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5232558139534884, |
|
"grad_norm": 0.15726816654205322, |
|
"learning_rate": 0.00031890351481814844, |
|
"loss": 2.004, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5271317829457365, |
|
"grad_norm": 0.16479064524173737, |
|
"learning_rate": 0.0003186439395761498, |
|
"loss": 2.024, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5310077519379846, |
|
"grad_norm": 0.1560487300157547, |
|
"learning_rate": 0.00031838147140787656, |
|
"loss": 2.2763, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5348837209302325, |
|
"grad_norm": 0.15172263979911804, |
|
"learning_rate": 0.0003181161152552813, |
|
"loss": 2.2485, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5387596899224806, |
|
"grad_norm": 0.14550605416297913, |
|
"learning_rate": 0.00031784787611469377, |
|
"loss": 2.319, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5426356589147286, |
|
"grad_norm": 0.15684114396572113, |
|
"learning_rate": 0.0003175767590367269, |
|
"loss": 2.1815, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5465116279069767, |
|
"grad_norm": 0.15643136203289032, |
|
"learning_rate": 0.00031730276912618146, |
|
"loss": 2.2636, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5503875968992248, |
|
"grad_norm": 0.15671825408935547, |
|
"learning_rate": 0.00031702591154195023, |
|
"loss": 2.2284, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5542635658914729, |
|
"grad_norm": 0.16105671226978302, |
|
"learning_rate": 0.0003167461914969208, |
|
"loss": 2.2209, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 0.15666881203651428, |
|
"learning_rate": 0.0003164636142578775, |
|
"loss": 2.1917, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.562015503875969, |
|
"grad_norm": 0.16629591584205627, |
|
"learning_rate": 0.00031617818514540164, |
|
"loss": 2.2236, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5658914728682171, |
|
"grad_norm": 0.15989044308662415, |
|
"learning_rate": 0.00031588990953377227, |
|
"loss": 2.1643, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5697674418604651, |
|
"grad_norm": 0.15829624235630035, |
|
"learning_rate": 0.0003155987928508643, |
|
"loss": 2.0441, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5736434108527132, |
|
"grad_norm": 0.16886377334594727, |
|
"learning_rate": 0.0003153048405780466, |
|
"loss": 2.3204, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5775193798449613, |
|
"grad_norm": 0.14943009614944458, |
|
"learning_rate": 0.0003150080582500786, |
|
"loss": 2.2651, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 0.14913472533226013, |
|
"learning_rate": 0.0003147084514550064, |
|
"loss": 2.0444, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5852713178294574, |
|
"grad_norm": 0.17818570137023926, |
|
"learning_rate": 0.0003144060258340569, |
|
"loss": 2.389, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5891472868217055, |
|
"grad_norm": 0.15725958347320557, |
|
"learning_rate": 0.0003141007870815326, |
|
"loss": 2.0757, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5930232558139535, |
|
"grad_norm": 0.17349454760551453, |
|
"learning_rate": 0.0003137927409447034, |
|
"loss": 2.0407, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.5968992248062015, |
|
"grad_norm": 0.16550661623477936, |
|
"learning_rate": 0.0003134818932236989, |
|
"loss": 2.1548, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6007751937984496, |
|
"grad_norm": 0.17092445492744446, |
|
"learning_rate": 0.00031316824977139905, |
|
"loss": 2.1501, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6046511627906976, |
|
"grad_norm": 0.16574212908744812, |
|
"learning_rate": 0.00031285181649332413, |
|
"loss": 2.3126, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6085271317829457, |
|
"grad_norm": 0.18889443576335907, |
|
"learning_rate": 0.00031253259934752324, |
|
"loss": 2.0974, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6124031007751938, |
|
"grad_norm": 0.14138291776180267, |
|
"learning_rate": 0.0003122106043444623, |
|
"loss": 2.1352, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6162790697674418, |
|
"grad_norm": 0.15668374300003052, |
|
"learning_rate": 0.000311885837546911, |
|
"loss": 2.0748, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6201550387596899, |
|
"grad_norm": 0.19118763506412506, |
|
"learning_rate": 0.0003115583050698283, |
|
"loss": 2.126, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.624031007751938, |
|
"grad_norm": 0.19029273092746735, |
|
"learning_rate": 0.0003112280130802476, |
|
"loss": 2.2312, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.627906976744186, |
|
"grad_norm": 0.16116315126419067, |
|
"learning_rate": 0.0003108949677971607, |
|
"loss": 2.1802, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6317829457364341, |
|
"grad_norm": 0.15257392823696136, |
|
"learning_rate": 0.00031055917549140013, |
|
"loss": 1.8636, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.6356589147286822, |
|
"grad_norm": 0.14367957413196564, |
|
"learning_rate": 0.00031022064248552197, |
|
"loss": 2.2663, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6395348837209303, |
|
"grad_norm": 0.155740886926651, |
|
"learning_rate": 0.0003098793751536858, |
|
"loss": 2.0618, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6434108527131783, |
|
"grad_norm": 0.15573202073574066, |
|
"learning_rate": 0.0003095353799215356, |
|
"loss": 2.147, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6472868217054264, |
|
"grad_norm": 0.17215795814990997, |
|
"learning_rate": 0.0003091886632660781, |
|
"loss": 2.381, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 0.16937744617462158, |
|
"learning_rate": 0.0003088392317155612, |
|
"loss": 2.0703, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6550387596899225, |
|
"grad_norm": 0.16653715074062347, |
|
"learning_rate": 0.000308487091849351, |
|
"loss": 2.3338, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6589147286821705, |
|
"grad_norm": 0.1645222008228302, |
|
"learning_rate": 0.0003081322502978077, |
|
"loss": 2.2578, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6627906976744186, |
|
"grad_norm": 0.17217321693897247, |
|
"learning_rate": 0.0003077747137421609, |
|
"loss": 2.3059, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.15005135536193848, |
|
"learning_rate": 0.000307414488914384, |
|
"loss": 2.3234, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6705426356589147, |
|
"grad_norm": 0.1632111668586731, |
|
"learning_rate": 0.0003070515825970671, |
|
"loss": 2.3237, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6744186046511628, |
|
"grad_norm": 0.16381341218948364, |
|
"learning_rate": 0.0003066860016232894, |
|
"loss": 2.2046, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6782945736434108, |
|
"grad_norm": 0.1917879581451416, |
|
"learning_rate": 0.00030631775287649077, |
|
"loss": 2.2253, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6821705426356589, |
|
"grad_norm": 0.16225776076316833, |
|
"learning_rate": 0.00030594684329034183, |
|
"loss": 2.1078, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.686046511627907, |
|
"grad_norm": 0.16708023846149445, |
|
"learning_rate": 0.00030557327984861353, |
|
"loss": 2.3477, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.689922480620155, |
|
"grad_norm": 0.1940573751926422, |
|
"learning_rate": 0.00030519706958504573, |
|
"loss": 2.133, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.6937984496124031, |
|
"grad_norm": 0.16478034853935242, |
|
"learning_rate": 0.00030481821958321467, |
|
"loss": 1.9538, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 0.16942182183265686, |
|
"learning_rate": 0.00030443673697639955, |
|
"loss": 2.3055, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7015503875968992, |
|
"grad_norm": 0.16306325793266296, |
|
"learning_rate": 0.0003040526289474484, |
|
"loss": 2.0216, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7054263565891473, |
|
"grad_norm": 0.1586439609527588, |
|
"learning_rate": 0.0003036659027286425, |
|
"loss": 2.242, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7093023255813954, |
|
"grad_norm": 0.18067839741706848, |
|
"learning_rate": 0.00030327656560156065, |
|
"loss": 2.201, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7131782945736435, |
|
"grad_norm": 0.17424075305461884, |
|
"learning_rate": 0.00030288462489694176, |
|
"loss": 2.0878, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.7170542635658915, |
|
"grad_norm": 0.17749184370040894, |
|
"learning_rate": 0.0003024900879945468, |
|
"loss": 2.0539, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7209302325581395, |
|
"grad_norm": 0.1625218391418457, |
|
"learning_rate": 0.00030209296232302016, |
|
"loss": 2.252, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7248062015503876, |
|
"grad_norm": 0.1539207398891449, |
|
"learning_rate": 0.0003016932553597493, |
|
"loss": 2.261, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7286821705426356, |
|
"grad_norm": 0.1533818393945694, |
|
"learning_rate": 0.00030129097463072455, |
|
"loss": 2.0823, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.7325581395348837, |
|
"grad_norm": 0.16169801354408264, |
|
"learning_rate": 0.0003008861277103968, |
|
"loss": 2.3093, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.7364341085271318, |
|
"grad_norm": 0.16680897772312164, |
|
"learning_rate": 0.00030047872222153525, |
|
"loss": 1.8175, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7403100775193798, |
|
"grad_norm": 0.15020348131656647, |
|
"learning_rate": 0.0003000687658350839, |
|
"loss": 2.054, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 0.1465124785900116, |
|
"learning_rate": 0.0002996562662700168, |
|
"loss": 1.9802, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.748062015503876, |
|
"grad_norm": 0.1666540950536728, |
|
"learning_rate": 0.00029924123129319315, |
|
"loss": 2.2759, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.751937984496124, |
|
"grad_norm": 0.20095106959342957, |
|
"learning_rate": 0.0002988236687192107, |
|
"loss": 2.2055, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7558139534883721, |
|
"grad_norm": 0.15560601651668549, |
|
"learning_rate": 0.00029840358641025877, |
|
"loss": 2.2051, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7596899224806202, |
|
"grad_norm": 0.16015595197677612, |
|
"learning_rate": 0.0002979809922759702, |
|
"loss": 2.1635, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7635658914728682, |
|
"grad_norm": 0.15207257866859436, |
|
"learning_rate": 0.00029755589427327237, |
|
"loss": 2.1105, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.7674418604651163, |
|
"grad_norm": 0.18203739821910858, |
|
"learning_rate": 0.0002971283004062375, |
|
"loss": 2.0973, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7713178294573644, |
|
"grad_norm": 0.165207177400589, |
|
"learning_rate": 0.00029669821872593187, |
|
"loss": 2.2404, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.7751937984496124, |
|
"grad_norm": 0.16326136887073517, |
|
"learning_rate": 0.000296265657330264, |
|
"loss": 2.0473, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7790697674418605, |
|
"grad_norm": 0.17581120133399963, |
|
"learning_rate": 0.0002958306243638327, |
|
"loss": 2.1448, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7829457364341085, |
|
"grad_norm": 0.16953004896640778, |
|
"learning_rate": 0.0002953931280177732, |
|
"loss": 1.9978, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7868217054263565, |
|
"grad_norm": 0.17670084536075592, |
|
"learning_rate": 0.0002949531765296033, |
|
"loss": 2.3193, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7906976744186046, |
|
"grad_norm": 0.16181717813014984, |
|
"learning_rate": 0.000294510778183068, |
|
"loss": 1.9897, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7945736434108527, |
|
"grad_norm": 0.16492588818073273, |
|
"learning_rate": 0.00029406594130798364, |
|
"loss": 2.2115, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7984496124031008, |
|
"grad_norm": 0.1594804972410202, |
|
"learning_rate": 0.00029361867428008104, |
|
"loss": 2.205, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.8023255813953488, |
|
"grad_norm": 0.1583336740732193, |
|
"learning_rate": 0.0002931689855208479, |
|
"loss": 1.9226, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.8062015503875969, |
|
"grad_norm": 0.18443737924098969, |
|
"learning_rate": 0.00029271688349737, |
|
"loss": 2.3384, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.810077519379845, |
|
"grad_norm": 0.17095071077346802, |
|
"learning_rate": 0.00029226237672217196, |
|
"loss": 2.0513, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 0.1657685935497284, |
|
"learning_rate": 0.00029180547375305704, |
|
"loss": 2.0806, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8178294573643411, |
|
"grad_norm": 0.16090308129787445, |
|
"learning_rate": 0.00029134618319294573, |
|
"loss": 2.1466, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.8217054263565892, |
|
"grad_norm": 0.1775992214679718, |
|
"learning_rate": 0.00029088451368971396, |
|
"loss": 2.084, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.8255813953488372, |
|
"grad_norm": 0.16048915684223175, |
|
"learning_rate": 0.00029042047393603034, |
|
"loss": 1.8937, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.8294573643410853, |
|
"grad_norm": 0.17156359553337097, |
|
"learning_rate": 0.0002899540726691922, |
|
"loss": 2.2556, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.15980640053749084, |
|
"learning_rate": 0.0002894853186709613, |
|
"loss": 1.9849, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 0.15312501788139343, |
|
"learning_rate": 0.00028901422076739854, |
|
"loss": 2.2545, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8410852713178295, |
|
"grad_norm": 0.14746591448783875, |
|
"learning_rate": 0.0002885407878286976, |
|
"loss": 2.236, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.8449612403100775, |
|
"grad_norm": 0.16490989923477173, |
|
"learning_rate": 0.0002880650287690181, |
|
"loss": 2.1494, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.8488372093023255, |
|
"grad_norm": 0.1589692234992981, |
|
"learning_rate": 0.0002875869525463173, |
|
"loss": 2.1665, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.8527131782945736, |
|
"grad_norm": 0.17528380453586578, |
|
"learning_rate": 0.0002871065681621823, |
|
"loss": 1.9999, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8565891472868217, |
|
"grad_norm": 0.1483374536037445, |
|
"learning_rate": 0.0002866238846616597, |
|
"loss": 2.1133, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8604651162790697, |
|
"grad_norm": 0.1600683033466339, |
|
"learning_rate": 0.00028613891113308566, |
|
"loss": 2.061, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8643410852713178, |
|
"grad_norm": 0.16365991532802582, |
|
"learning_rate": 0.000285651656707915, |
|
"loss": 2.0138, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.8682170542635659, |
|
"grad_norm": 0.19533555209636688, |
|
"learning_rate": 0.0002851621305605486, |
|
"loss": 2.3357, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 0.17096424102783203, |
|
"learning_rate": 0.0002846703419081614, |
|
"loss": 2.1513, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.875968992248062, |
|
"grad_norm": 0.1619122326374054, |
|
"learning_rate": 0.00028417630001052844, |
|
"loss": 2.0947, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.8798449612403101, |
|
"grad_norm": 0.16593049466609955, |
|
"learning_rate": 0.00028368001416985054, |
|
"loss": 1.9596, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8837209302325582, |
|
"grad_norm": 0.15227988362312317, |
|
"learning_rate": 0.0002831814937305792, |
|
"loss": 1.9498, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.8875968992248062, |
|
"grad_norm": 0.16401511430740356, |
|
"learning_rate": 0.0002826807480792408, |
|
"loss": 2.2326, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8914728682170543, |
|
"grad_norm": 0.20172202587127686, |
|
"learning_rate": 0.0002821777866442596, |
|
"loss": 2.2973, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8953488372093024, |
|
"grad_norm": 0.15986618399620056, |
|
"learning_rate": 0.00028167261889578034, |
|
"loss": 1.9597, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8992248062015504, |
|
"grad_norm": 0.16535168886184692, |
|
"learning_rate": 0.0002811652543454899, |
|
"loss": 1.9582, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.9031007751937985, |
|
"grad_norm": 0.18099620938301086, |
|
"learning_rate": 0.0002806557025464385, |
|
"loss": 2.3131, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.9069767441860465, |
|
"grad_norm": 0.16411282122135162, |
|
"learning_rate": 0.0002801439730928591, |
|
"loss": 2.0324, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.9108527131782945, |
|
"grad_norm": 0.1549196094274521, |
|
"learning_rate": 0.00027963007561998765, |
|
"loss": 2.0878, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9147286821705426, |
|
"grad_norm": 0.1656661331653595, |
|
"learning_rate": 0.00027911401980388093, |
|
"loss": 1.9896, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.9186046511627907, |
|
"grad_norm": 0.1635042279958725, |
|
"learning_rate": 0.0002785958153612348, |
|
"loss": 1.9988, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.9224806201550387, |
|
"grad_norm": 0.18084381520748138, |
|
"learning_rate": 0.0002780754720492012, |
|
"loss": 2.1724, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.9263565891472868, |
|
"grad_norm": 0.1592792123556137, |
|
"learning_rate": 0.0002775529996652041, |
|
"loss": 2.1535, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 0.15081603825092316, |
|
"learning_rate": 0.00027702840804675553, |
|
"loss": 2.2441, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9341085271317829, |
|
"grad_norm": 0.1784433275461197, |
|
"learning_rate": 0.00027650170707127, |
|
"loss": 2.2237, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.937984496124031, |
|
"grad_norm": 0.15613843500614166, |
|
"learning_rate": 0.00027597290665587855, |
|
"loss": 1.893, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.9418604651162791, |
|
"grad_norm": 0.15053103864192963, |
|
"learning_rate": 0.0002754420167572421, |
|
"loss": 2.282, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.9457364341085271, |
|
"grad_norm": 0.15055972337722778, |
|
"learning_rate": 0.0002749090473713641, |
|
"loss": 2.0519, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.9496124031007752, |
|
"grad_norm": 0.15554535388946533, |
|
"learning_rate": 0.00027437400853340215, |
|
"loss": 1.9656, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9534883720930233, |
|
"grad_norm": 0.16338570415973663, |
|
"learning_rate": 0.00027383691031747885, |
|
"loss": 2.3299, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.9573643410852714, |
|
"grad_norm": 0.16029350459575653, |
|
"learning_rate": 0.0002732977628364927, |
|
"loss": 2.0619, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9612403100775194, |
|
"grad_norm": 0.1667211353778839, |
|
"learning_rate": 0.0002727565762419271, |
|
"loss": 2.1308, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9651162790697675, |
|
"grad_norm": 0.1782902479171753, |
|
"learning_rate": 0.0002722133607236595, |
|
"loss": 1.9959, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9689922480620154, |
|
"grad_norm": 0.15829254686832428, |
|
"learning_rate": 0.0002716681265097696, |
|
"loss": 2.1666, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9728682170542635, |
|
"grad_norm": 0.15275566279888153, |
|
"learning_rate": 0.00027112088386634645, |
|
"loss": 2.2501, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9767441860465116, |
|
"grad_norm": 0.17710180580615997, |
|
"learning_rate": 0.00027057164309729557, |
|
"loss": 2.2943, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9806201550387597, |
|
"grad_norm": 0.14490962028503418, |
|
"learning_rate": 0.0002700204145441446, |
|
"loss": 2.1836, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9844961240310077, |
|
"grad_norm": 0.16613595187664032, |
|
"learning_rate": 0.0002694672085858487, |
|
"loss": 2.1681, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.9883720930232558, |
|
"grad_norm": 0.15662476420402527, |
|
"learning_rate": 0.0002689120356385952, |
|
"loss": 1.9142, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9922480620155039, |
|
"grad_norm": 0.1657550036907196, |
|
"learning_rate": 0.0002683549061556074, |
|
"loss": 2.3238, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9961240310077519, |
|
"grad_norm": 0.1951078623533249, |
|
"learning_rate": 0.0002677958306269477, |
|
"loss": 2.368, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.14835147559642792, |
|
"learning_rate": 0.00026723481957932025, |
|
"loss": 1.9971, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.003875968992248, |
|
"grad_norm": 0.1541881114244461, |
|
"learning_rate": 0.0002666718835758724, |
|
"loss": 2.1023, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.0077519379844961, |
|
"grad_norm": 0.1681731790304184, |
|
"learning_rate": 0.00026610703321599643, |
|
"loss": 2.1575, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0116279069767442, |
|
"grad_norm": 0.1625296175479889, |
|
"learning_rate": 0.0002655402791351292, |
|
"loss": 2.0262, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.0155038759689923, |
|
"grad_norm": 0.1610299050807953, |
|
"learning_rate": 0.0002649716320045527, |
|
"loss": 2.1875, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.0193798449612403, |
|
"grad_norm": 0.16845448315143585, |
|
"learning_rate": 0.00026440110253119235, |
|
"loss": 2.1463, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.0232558139534884, |
|
"grad_norm": 0.15429221093654633, |
|
"learning_rate": 0.00026382870145741594, |
|
"loss": 2.2537, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0271317829457365, |
|
"grad_norm": 0.16575424373149872, |
|
"learning_rate": 0.0002632544395608312, |
|
"loss": 2.1336, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.0310077519379846, |
|
"grad_norm": 0.15692879259586334, |
|
"learning_rate": 0.0002626783276540828, |
|
"loss": 1.9072, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.0348837209302326, |
|
"grad_norm": 0.20094941556453705, |
|
"learning_rate": 0.00026210037658464886, |
|
"loss": 2.1421, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.0387596899224807, |
|
"grad_norm": 0.19174852967262268, |
|
"learning_rate": 0.00026152059723463664, |
|
"loss": 1.9939, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.0426356589147288, |
|
"grad_norm": 0.17150387167930603, |
|
"learning_rate": 0.00026093900052057774, |
|
"loss": 2.1736, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.0465116279069768, |
|
"grad_norm": 0.15520396828651428, |
|
"learning_rate": 0.0002603555973932225, |
|
"loss": 2.0932, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.050387596899225, |
|
"grad_norm": 0.1705746054649353, |
|
"learning_rate": 0.0002597703988373336, |
|
"loss": 2.2486, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.054263565891473, |
|
"grad_norm": 0.1639339029788971, |
|
"learning_rate": 0.0002591834158714797, |
|
"loss": 2.2351, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.058139534883721, |
|
"grad_norm": 0.17867504060268402, |
|
"learning_rate": 0.00025859465954782744, |
|
"loss": 2.25, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.062015503875969, |
|
"grad_norm": 0.20789186656475067, |
|
"learning_rate": 0.0002580041409519339, |
|
"loss": 1.9999, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.065891472868217, |
|
"grad_norm": 0.15863926708698273, |
|
"learning_rate": 0.0002574118712025374, |
|
"loss": 2.0538, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.069767441860465, |
|
"grad_norm": 0.16435660421848297, |
|
"learning_rate": 0.00025681786145134825, |
|
"loss": 2.0152, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.073643410852713, |
|
"grad_norm": 0.1803198754787445, |
|
"learning_rate": 0.000256222122882839, |
|
"loss": 2.3157, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.0775193798449612, |
|
"grad_norm": 0.19417981803417206, |
|
"learning_rate": 0.0002556246667140337, |
|
"loss": 2.1564, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.003875968992248, |
|
"grad_norm": 0.17067064344882965, |
|
"learning_rate": 0.0002550255041942965, |
|
"loss": 1.8682, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.0077519379844961, |
|
"grad_norm": 0.20531415939331055, |
|
"learning_rate": 0.0002544246466051203, |
|
"loss": 1.799, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0116279069767442, |
|
"grad_norm": 0.21423804759979248, |
|
"learning_rate": 0.000253822105259914, |
|
"loss": 1.623, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.0155038759689923, |
|
"grad_norm": 0.17934457957744598, |
|
"learning_rate": 0.0002532178915037894, |
|
"loss": 1.8665, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.0193798449612403, |
|
"grad_norm": 0.19024011492729187, |
|
"learning_rate": 0.0002526120167133479, |
|
"loss": 1.8837, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.0232558139534884, |
|
"grad_norm": 0.26798877120018005, |
|
"learning_rate": 0.0002520044922964662, |
|
"loss": 1.8213, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.0271317829457365, |
|
"grad_norm": 0.19613595306873322, |
|
"learning_rate": 0.0002513953296920811, |
|
"loss": 1.8714, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.0310077519379846, |
|
"grad_norm": 0.17628167569637299, |
|
"learning_rate": 0.0002507845403699748, |
|
"loss": 1.8617, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.0348837209302326, |
|
"grad_norm": 0.19648019969463348, |
|
"learning_rate": 0.0002501721358305584, |
|
"loss": 2.1, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.0387596899224807, |
|
"grad_norm": 0.21330788731575012, |
|
"learning_rate": 0.0002495581276046557, |
|
"loss": 1.8563, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.0426356589147288, |
|
"grad_norm": 0.18956711888313293, |
|
"learning_rate": 0.00024894252725328583, |
|
"loss": 1.769, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.0465116279069768, |
|
"grad_norm": 0.17926643788814545, |
|
"learning_rate": 0.00024832534636744566, |
|
"loss": 1.8329, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.050387596899225, |
|
"grad_norm": 0.1793479472398758, |
|
"learning_rate": 0.0002477065965678917, |
|
"loss": 1.5659, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.054263565891473, |
|
"grad_norm": 0.19575071334838867, |
|
"learning_rate": 0.00024708628950492116, |
|
"loss": 1.7525, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.058139534883721, |
|
"grad_norm": 0.20665033161640167, |
|
"learning_rate": 0.00024646443685815247, |
|
"loss": 1.7979, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.062015503875969, |
|
"grad_norm": 0.2565125823020935, |
|
"learning_rate": 0.0002458410503363055, |
|
"loss": 1.8589, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.0658914728682172, |
|
"grad_norm": 0.18845273554325104, |
|
"learning_rate": 0.0002452161416769813, |
|
"loss": 1.9473, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.069767441860465, |
|
"grad_norm": 0.18714243173599243, |
|
"learning_rate": 0.0002445897226464408, |
|
"loss": 1.7174, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.073643410852713, |
|
"grad_norm": 0.20173239707946777, |
|
"learning_rate": 0.0002439618050393833, |
|
"loss": 1.7722, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0775193798449612, |
|
"grad_norm": 0.1991068571805954, |
|
"learning_rate": 0.0002433324006787244, |
|
"loss": 1.8291, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.0813953488372092, |
|
"grad_norm": 0.20716890692710876, |
|
"learning_rate": 0.00024270152141537367, |
|
"loss": 1.8379, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.0852713178294573, |
|
"grad_norm": 0.18439684808254242, |
|
"learning_rate": 0.00024206917912801104, |
|
"loss": 1.7841, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0891472868217054, |
|
"grad_norm": 0.1679239273071289, |
|
"learning_rate": 0.00024143538572286356, |
|
"loss": 1.8962, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.0930232558139534, |
|
"grad_norm": 0.1798442304134369, |
|
"learning_rate": 0.00024080015313348086, |
|
"loss": 1.9051, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.0968992248062015, |
|
"grad_norm": 0.18058885633945465, |
|
"learning_rate": 0.00024016349332051082, |
|
"loss": 1.9452, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.1007751937984496, |
|
"grad_norm": 0.17939876019954681, |
|
"learning_rate": 0.00023952541827147416, |
|
"loss": 1.7577, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.1046511627906976, |
|
"grad_norm": 0.1852078139781952, |
|
"learning_rate": 0.00023888594000053855, |
|
"loss": 1.9129, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.1085271317829457, |
|
"grad_norm": 0.18097913265228271, |
|
"learning_rate": 0.0002382450705482929, |
|
"loss": 1.8608, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.1124031007751938, |
|
"grad_norm": 0.21096573770046234, |
|
"learning_rate": 0.00023760282198152007, |
|
"loss": 2.0269, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.1162790697674418, |
|
"grad_norm": 0.18521122634410858, |
|
"learning_rate": 0.0002369592063929702, |
|
"loss": 1.7748, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.12015503875969, |
|
"grad_norm": 0.20764781534671783, |
|
"learning_rate": 0.00023631423590113259, |
|
"loss": 1.5756, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.124031007751938, |
|
"grad_norm": 0.17593301832675934, |
|
"learning_rate": 0.00023566792265000772, |
|
"loss": 1.7376, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.127906976744186, |
|
"grad_norm": 0.1941826045513153, |
|
"learning_rate": 0.00023502027880887874, |
|
"loss": 2.0077, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.1317829457364341, |
|
"grad_norm": 0.1869935393333435, |
|
"learning_rate": 0.00023437131657208193, |
|
"loss": 1.7087, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.1356589147286822, |
|
"grad_norm": 0.18788675963878632, |
|
"learning_rate": 0.0002337210481587775, |
|
"loss": 1.5735, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.1395348837209303, |
|
"grad_norm": 0.2027387022972107, |
|
"learning_rate": 0.0002330694858127193, |
|
"loss": 1.9059, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.1434108527131783, |
|
"grad_norm": 0.18780571222305298, |
|
"learning_rate": 0.0002324166418020244, |
|
"loss": 1.7282, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.1472868217054264, |
|
"grad_norm": 0.1976962685585022, |
|
"learning_rate": 0.00023176252841894187, |
|
"loss": 2.018, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.1511627906976745, |
|
"grad_norm": 0.19255152344703674, |
|
"learning_rate": 0.00023110715797962177, |
|
"loss": 1.729, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.1550387596899225, |
|
"grad_norm": 0.18749909102916718, |
|
"learning_rate": 0.00023045054282388273, |
|
"loss": 1.7549, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.1589147286821706, |
|
"grad_norm": 0.22557330131530762, |
|
"learning_rate": 0.00022979269531497995, |
|
"loss": 2.0677, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 0.1746273934841156, |
|
"learning_rate": 0.0002291336278393724, |
|
"loss": 1.7589, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.17809656262397766, |
|
"learning_rate": 0.00022847335280648943, |
|
"loss": 1.7851, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.1705426356589148, |
|
"grad_norm": 0.19532962143421173, |
|
"learning_rate": 0.0002278118826484972, |
|
"loss": 1.8515, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.1744186046511629, |
|
"grad_norm": 0.20081496238708496, |
|
"learning_rate": 0.00022714922982006467, |
|
"loss": 1.7351, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.178294573643411, |
|
"grad_norm": 0.22169405221939087, |
|
"learning_rate": 0.000226485406798129, |
|
"loss": 1.8978, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.1821705426356588, |
|
"grad_norm": 0.1990557610988617, |
|
"learning_rate": 0.00022582042608166063, |
|
"loss": 1.7837, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.1860465116279069, |
|
"grad_norm": 0.19927580654621124, |
|
"learning_rate": 0.00022515430019142793, |
|
"loss": 1.7193, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.189922480620155, |
|
"grad_norm": 0.19535136222839355, |
|
"learning_rate": 0.00022448704166976166, |
|
"loss": 2.1294, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.193798449612403, |
|
"grad_norm": 0.2005845010280609, |
|
"learning_rate": 0.00022381866308031839, |
|
"loss": 1.9261, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.197674418604651, |
|
"grad_norm": 0.20202966034412384, |
|
"learning_rate": 0.00022314917700784438, |
|
"loss": 1.8773, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.2015503875968991, |
|
"grad_norm": 0.23102688789367676, |
|
"learning_rate": 0.00022247859605793835, |
|
"loss": 1.7446, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.2054263565891472, |
|
"grad_norm": 0.19356843829154968, |
|
"learning_rate": 0.00022180693285681419, |
|
"loss": 1.7991, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.2093023255813953, |
|
"grad_norm": 0.22885212302207947, |
|
"learning_rate": 0.0002211342000510633, |
|
"loss": 1.6866, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.2131782945736433, |
|
"grad_norm": 0.18752378225326538, |
|
"learning_rate": 0.00022046041030741645, |
|
"loss": 1.8063, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.2170542635658914, |
|
"grad_norm": 0.18102799355983734, |
|
"learning_rate": 0.00021978557631250505, |
|
"loss": 2.0372, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.2209302325581395, |
|
"grad_norm": 0.19595623016357422, |
|
"learning_rate": 0.0002191097107726228, |
|
"loss": 1.9375, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.2248062015503876, |
|
"grad_norm": 0.1951194554567337, |
|
"learning_rate": 0.00021843282641348586, |
|
"loss": 1.9015, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.2286821705426356, |
|
"grad_norm": 0.19051988422870636, |
|
"learning_rate": 0.00021775493597999359, |
|
"loss": 1.978, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.2325581395348837, |
|
"grad_norm": 0.20707543194293976, |
|
"learning_rate": 0.00021707605223598853, |
|
"loss": 1.648, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.2364341085271318, |
|
"grad_norm": 0.1884542554616928, |
|
"learning_rate": 0.00021639618796401595, |
|
"loss": 1.8844, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.2403100775193798, |
|
"grad_norm": 0.19425931572914124, |
|
"learning_rate": 0.00021571535596508337, |
|
"loss": 1.8605, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.244186046511628, |
|
"grad_norm": 0.20685045421123505, |
|
"learning_rate": 0.0002150335690584193, |
|
"loss": 1.865, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.248062015503876, |
|
"grad_norm": 0.1994500607252121, |
|
"learning_rate": 0.00021435084008123202, |
|
"loss": 2.01, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.251937984496124, |
|
"grad_norm": 0.18918661773204803, |
|
"learning_rate": 0.00021366718188846795, |
|
"loss": 1.9086, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.255813953488372, |
|
"grad_norm": 0.18662329018115997, |
|
"learning_rate": 0.0002129826073525693, |
|
"loss": 1.7579, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.2596899224806202, |
|
"grad_norm": 0.21391060948371887, |
|
"learning_rate": 0.00021229712936323206, |
|
"loss": 1.8439, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.2635658914728682, |
|
"grad_norm": 0.19121584296226501, |
|
"learning_rate": 0.000211610760827163, |
|
"loss": 1.7058, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.2674418604651163, |
|
"grad_norm": 0.19377876818180084, |
|
"learning_rate": 0.00021092351466783695, |
|
"loss": 1.7458, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.2713178294573644, |
|
"grad_norm": 0.181095689535141, |
|
"learning_rate": 0.00021023540382525313, |
|
"loss": 1.8254, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.2751937984496124, |
|
"grad_norm": 0.18665671348571777, |
|
"learning_rate": 0.00020954644125569186, |
|
"loss": 1.8718, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.2790697674418605, |
|
"grad_norm": 0.19250045716762543, |
|
"learning_rate": 0.00020885663993147022, |
|
"loss": 1.9262, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2829457364341086, |
|
"grad_norm": 0.17288216948509216, |
|
"learning_rate": 0.00020816601284069818, |
|
"loss": 1.8269, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.2868217054263567, |
|
"grad_norm": 0.18719065189361572, |
|
"learning_rate": 0.00020747457298703388, |
|
"loss": 1.6828, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.2906976744186047, |
|
"grad_norm": 0.214838445186615, |
|
"learning_rate": 0.00020678233338943861, |
|
"loss": 1.6086, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.2945736434108528, |
|
"grad_norm": 0.20970992743968964, |
|
"learning_rate": 0.0002060893070819321, |
|
"loss": 1.8411, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.2984496124031009, |
|
"grad_norm": 0.19377633929252625, |
|
"learning_rate": 0.00020539550711334666, |
|
"loss": 1.7452, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.302325581395349, |
|
"grad_norm": 0.21270965039730072, |
|
"learning_rate": 0.0002047009465470818, |
|
"loss": 1.8104, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.306201550387597, |
|
"grad_norm": 0.20388975739479065, |
|
"learning_rate": 0.00020400563846085804, |
|
"loss": 1.8727, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.310077519379845, |
|
"grad_norm": 0.20084132254123688, |
|
"learning_rate": 0.00020330959594647087, |
|
"loss": 1.833, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.3139534883720931, |
|
"grad_norm": 0.21278367936611176, |
|
"learning_rate": 0.00020261283210954418, |
|
"loss": 1.8254, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.3178294573643412, |
|
"grad_norm": 0.20003218948841095, |
|
"learning_rate": 0.00020191536006928338, |
|
"loss": 1.6369, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.3217054263565893, |
|
"grad_norm": 0.1956341415643692, |
|
"learning_rate": 0.00020121719295822856, |
|
"loss": 1.56, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.3255813953488373, |
|
"grad_norm": 0.2056959867477417, |
|
"learning_rate": 0.0002005183439220071, |
|
"loss": 1.9608, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.3294573643410852, |
|
"grad_norm": 0.20560531318187714, |
|
"learning_rate": 0.00019981882611908616, |
|
"loss": 1.5359, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.20920397341251373, |
|
"learning_rate": 0.000199118652720525, |
|
"loss": 1.7916, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.3372093023255813, |
|
"grad_norm": 0.20703092217445374, |
|
"learning_rate": 0.00019841783690972695, |
|
"loss": 1.7696, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.3410852713178294, |
|
"grad_norm": 0.2005213499069214, |
|
"learning_rate": 0.00019771639188219118, |
|
"loss": 1.755, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.3449612403100775, |
|
"grad_norm": 0.19476981461048126, |
|
"learning_rate": 0.00019701433084526416, |
|
"loss": 1.9473, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.3488372093023255, |
|
"grad_norm": 0.1986457258462906, |
|
"learning_rate": 0.00019631166701789115, |
|
"loss": 1.5943, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.3527131782945736, |
|
"grad_norm": 0.1900886595249176, |
|
"learning_rate": 0.0001956084136303671, |
|
"loss": 1.829, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.3565891472868217, |
|
"grad_norm": 0.19796966016292572, |
|
"learning_rate": 0.00019490458392408777, |
|
"loss": 1.6652, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.3604651162790697, |
|
"grad_norm": 0.19012434780597687, |
|
"learning_rate": 0.00019420019115130022, |
|
"loss": 2.0372, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.3643410852713178, |
|
"grad_norm": 0.20295971632003784, |
|
"learning_rate": 0.0001934952485748534, |
|
"loss": 1.7472, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.3682170542635659, |
|
"grad_norm": 0.24137261509895325, |
|
"learning_rate": 0.00019278976946794838, |
|
"loss": 1.7415, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.372093023255814, |
|
"grad_norm": 0.19063839316368103, |
|
"learning_rate": 0.00019208376711388837, |
|
"loss": 1.6088, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.375968992248062, |
|
"grad_norm": 0.21088172495365143, |
|
"learning_rate": 0.00019137725480582884, |
|
"loss": 2.0018, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.37984496124031, |
|
"grad_norm": 0.18817083537578583, |
|
"learning_rate": 0.00019067024584652688, |
|
"loss": 1.7041, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.3837209302325582, |
|
"grad_norm": 0.19199052453041077, |
|
"learning_rate": 0.000189962753548091, |
|
"loss": 1.7152, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.3875968992248062, |
|
"grad_norm": 0.19253146648406982, |
|
"learning_rate": 0.00018925479123173042, |
|
"loss": 1.8655, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.3914728682170543, |
|
"grad_norm": 0.19239702820777893, |
|
"learning_rate": 0.00018854637222750418, |
|
"loss": 1.9605, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.3953488372093024, |
|
"grad_norm": 0.19195358455181122, |
|
"learning_rate": 0.00018783750987407015, |
|
"loss": 1.7392, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.3992248062015504, |
|
"grad_norm": 0.22022220492362976, |
|
"learning_rate": 0.00018712821751843398, |
|
"loss": 1.7857, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.4031007751937985, |
|
"grad_norm": 0.20397743582725525, |
|
"learning_rate": 0.0001864185085156978, |
|
"loss": 1.8367, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.4069767441860466, |
|
"grad_norm": 0.21266423165798187, |
|
"learning_rate": 0.00018570839622880848, |
|
"loss": 1.7252, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.4108527131782946, |
|
"grad_norm": 0.2537650465965271, |
|
"learning_rate": 0.0001849978940283065, |
|
"loss": 1.6755, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.4147286821705427, |
|
"grad_norm": 0.2007053792476654, |
|
"learning_rate": 0.00018428701529207367, |
|
"loss": 1.8301, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.4186046511627908, |
|
"grad_norm": 0.2502276301383972, |
|
"learning_rate": 0.0001835757734050817, |
|
"loss": 1.8444, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.4224806201550386, |
|
"grad_norm": 0.21285481750965118, |
|
"learning_rate": 0.00018286418175913986, |
|
"loss": 1.8921, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.4263565891472867, |
|
"grad_norm": 0.192243754863739, |
|
"learning_rate": 0.0001821522537526431, |
|
"loss": 1.7928, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.4302325581395348, |
|
"grad_norm": 0.18463876843452454, |
|
"learning_rate": 0.0001814400027903194, |
|
"loss": 1.7785, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.4341085271317828, |
|
"grad_norm": 0.19326354563236237, |
|
"learning_rate": 0.00018072744228297784, |
|
"loss": 1.8843, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.437984496124031, |
|
"grad_norm": 0.1868479996919632, |
|
"learning_rate": 0.00018001458564725572, |
|
"loss": 1.5857, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.441860465116279, |
|
"grad_norm": 0.23313790559768677, |
|
"learning_rate": 0.00017930144630536607, |
|
"loss": 1.8934, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.445736434108527, |
|
"grad_norm": 0.2015128880739212, |
|
"learning_rate": 0.00017858803768484497, |
|
"loss": 1.6785, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.449612403100775, |
|
"grad_norm": 0.20901648700237274, |
|
"learning_rate": 0.00017787437321829862, |
|
"loss": 1.6072, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.4534883720930232, |
|
"grad_norm": 0.21417102217674255, |
|
"learning_rate": 0.0001771604663431506, |
|
"loss": 1.8706, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.4573643410852712, |
|
"grad_norm": 0.26056888699531555, |
|
"learning_rate": 0.00017644633050138862, |
|
"loss": 1.8553, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.4612403100775193, |
|
"grad_norm": 0.19383443892002106, |
|
"learning_rate": 0.00017573197913931166, |
|
"loss": 1.7848, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.4651162790697674, |
|
"grad_norm": 0.2180427759885788, |
|
"learning_rate": 0.00017501742570727666, |
|
"loss": 1.8704, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.4689922480620154, |
|
"grad_norm": 0.2105623483657837, |
|
"learning_rate": 0.00017430268365944518, |
|
"loss": 1.6322, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.4728682170542635, |
|
"grad_norm": 0.19639936089515686, |
|
"learning_rate": 0.0001735877664535303, |
|
"loss": 1.7703, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4767441860465116, |
|
"grad_norm": 0.18290045857429504, |
|
"learning_rate": 0.00017287268755054305, |
|
"loss": 1.7469, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.4806201550387597, |
|
"grad_norm": 0.20623917877674103, |
|
"learning_rate": 0.00017215746041453913, |
|
"loss": 1.7563, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.4844961240310077, |
|
"grad_norm": 0.1958610713481903, |
|
"learning_rate": 0.00017144209851236504, |
|
"loss": 1.8197, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.4883720930232558, |
|
"grad_norm": 0.1950949728488922, |
|
"learning_rate": 0.00017072661531340502, |
|
"loss": 1.8786, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.4922480620155039, |
|
"grad_norm": 0.21500743925571442, |
|
"learning_rate": 0.0001700110242893271, |
|
"loss": 1.7809, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.496124031007752, |
|
"grad_norm": 0.2010657638311386, |
|
"learning_rate": 0.00016929533891382945, |
|
"loss": 1.6953, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.19953016936779022, |
|
"learning_rate": 0.00016857957266238688, |
|
"loss": 1.466, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.503875968992248, |
|
"grad_norm": 0.19881819188594818, |
|
"learning_rate": 0.00016786373901199684, |
|
"loss": 1.9139, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.5077519379844961, |
|
"grad_norm": 0.1984422504901886, |
|
"learning_rate": 0.000167147851440926, |
|
"loss": 1.9282, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.5116279069767442, |
|
"grad_norm": 0.19584600627422333, |
|
"learning_rate": 0.00016643192342845602, |
|
"loss": 1.7367, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.5155038759689923, |
|
"grad_norm": 0.1895333230495453, |
|
"learning_rate": 0.00016571596845463044, |
|
"loss": 1.8496, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.5193798449612403, |
|
"grad_norm": 0.1868915855884552, |
|
"learning_rate": 0.000165, |
|
"loss": 1.7451, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.5232558139534884, |
|
"grad_norm": 0.192902609705925, |
|
"learning_rate": 0.00016428403154536962, |
|
"loss": 1.996, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.5271317829457365, |
|
"grad_norm": 0.22422343492507935, |
|
"learning_rate": 0.00016356807657154395, |
|
"loss": 1.8168, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.5310077519379846, |
|
"grad_norm": 0.1989205926656723, |
|
"learning_rate": 0.00016285214855907406, |
|
"loss": 1.8789, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.5348837209302326, |
|
"grad_norm": 0.19881393015384674, |
|
"learning_rate": 0.0001621362609880032, |
|
"loss": 1.732, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.5387596899224807, |
|
"grad_norm": 0.197114959359169, |
|
"learning_rate": 0.00016142042733761317, |
|
"loss": 1.7816, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.5426356589147288, |
|
"grad_norm": 0.20128700137138367, |
|
"learning_rate": 0.00016070466108617055, |
|
"loss": 1.8697, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.5465116279069768, |
|
"grad_norm": 0.2489478439092636, |
|
"learning_rate": 0.00015998897571067292, |
|
"loss": 1.7559, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.550387596899225, |
|
"grad_norm": 0.2148464322090149, |
|
"learning_rate": 0.00015927338468659497, |
|
"loss": 1.6178, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.554263565891473, |
|
"grad_norm": 0.18926125764846802, |
|
"learning_rate": 0.00015855790148763498, |
|
"loss": 1.8989, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.558139534883721, |
|
"grad_norm": 0.2206331193447113, |
|
"learning_rate": 0.00015784253958546092, |
|
"loss": 1.4578, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.562015503875969, |
|
"grad_norm": 0.20707029104232788, |
|
"learning_rate": 0.00015712731244945697, |
|
"loss": 1.8484, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.5658914728682172, |
|
"grad_norm": 0.2055550217628479, |
|
"learning_rate": 0.00015641223354646977, |
|
"loss": 1.7355, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.5697674418604652, |
|
"grad_norm": 0.22906459867954254, |
|
"learning_rate": 0.00015569731634055482, |
|
"loss": 1.8945, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.5736434108527133, |
|
"grad_norm": 0.1993347406387329, |
|
"learning_rate": 0.00015498257429272336, |
|
"loss": 1.6523, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.5775193798449614, |
|
"grad_norm": 0.20685455203056335, |
|
"learning_rate": 0.00015426802086068833, |
|
"loss": 1.7329, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.5813953488372094, |
|
"grad_norm": 0.2264314740896225, |
|
"learning_rate": 0.00015355366949861135, |
|
"loss": 1.7889, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.5852713178294575, |
|
"grad_norm": 0.23605959117412567, |
|
"learning_rate": 0.00015283953365684941, |
|
"loss": 1.8501, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.5891472868217056, |
|
"grad_norm": 0.1887684017419815, |
|
"learning_rate": 0.0001521256267817014, |
|
"loss": 1.7191, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.5930232558139537, |
|
"grad_norm": 0.1952105611562729, |
|
"learning_rate": 0.00015141196231515508, |
|
"loss": 1.7227, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.5968992248062015, |
|
"grad_norm": 0.19459757208824158, |
|
"learning_rate": 0.00015069855369463392, |
|
"loss": 1.4825, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.6007751937984496, |
|
"grad_norm": 0.20146332681179047, |
|
"learning_rate": 0.0001499854143527443, |
|
"loss": 1.8256, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.6046511627906976, |
|
"grad_norm": 0.2057608962059021, |
|
"learning_rate": 0.00014927255771702219, |
|
"loss": 1.6188, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.6085271317829457, |
|
"grad_norm": 0.2795553207397461, |
|
"learning_rate": 0.00014855999720968062, |
|
"loss": 1.5248, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.6124031007751938, |
|
"grad_norm": 0.20762935280799866, |
|
"learning_rate": 0.00014784774624735695, |
|
"loss": 1.812, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.6162790697674418, |
|
"grad_norm": 0.20336000621318817, |
|
"learning_rate": 0.00014713581824086014, |
|
"loss": 2.0534, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.62015503875969, |
|
"grad_norm": 0.20982185006141663, |
|
"learning_rate": 0.00014642422659491837, |
|
"loss": 1.708, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.624031007751938, |
|
"grad_norm": 0.19309498369693756, |
|
"learning_rate": 0.00014571298470792635, |
|
"loss": 1.7285, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.627906976744186, |
|
"grad_norm": 0.2094258964061737, |
|
"learning_rate": 0.00014500210597169353, |
|
"loss": 1.9404, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.6317829457364341, |
|
"grad_norm": 0.20995444059371948, |
|
"learning_rate": 0.00014429160377119152, |
|
"loss": 1.8059, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.6356589147286822, |
|
"grad_norm": 0.19962961971759796, |
|
"learning_rate": 0.00014358149148430225, |
|
"loss": 1.5697, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.6395348837209303, |
|
"grad_norm": 0.2032284438610077, |
|
"learning_rate": 0.000142871782481566, |
|
"loss": 1.8137, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.6434108527131783, |
|
"grad_norm": 0.19651709496974945, |
|
"learning_rate": 0.00014216249012592987, |
|
"loss": 1.7584, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.6472868217054264, |
|
"grad_norm": 0.20097319781780243, |
|
"learning_rate": 0.00014145362777249587, |
|
"loss": 1.875, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.6511627906976745, |
|
"grad_norm": 0.1974637806415558, |
|
"learning_rate": 0.00014074520876826955, |
|
"loss": 1.9157, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.6550387596899225, |
|
"grad_norm": 0.1912589967250824, |
|
"learning_rate": 0.00014003724645190898, |
|
"loss": 1.8198, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.6589147286821704, |
|
"grad_norm": 0.22384630143642426, |
|
"learning_rate": 0.00013932975415347314, |
|
"loss": 2.0134, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.6627906976744184, |
|
"grad_norm": 0.22261658310890198, |
|
"learning_rate": 0.00013862274519417118, |
|
"loss": 1.9937, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.20569778978824615, |
|
"learning_rate": 0.0001379162328861116, |
|
"loss": 1.7494, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6705426356589146, |
|
"grad_norm": 0.19439737498760223, |
|
"learning_rate": 0.00013721023053205164, |
|
"loss": 1.8848, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.6744186046511627, |
|
"grad_norm": 0.1926531344652176, |
|
"learning_rate": 0.0001365047514251466, |
|
"loss": 1.8646, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.6782945736434107, |
|
"grad_norm": 0.2144368290901184, |
|
"learning_rate": 0.00013579980884869977, |
|
"loss": 1.8047, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.6821705426356588, |
|
"grad_norm": 0.20255987346172333, |
|
"learning_rate": 0.00013509541607591226, |
|
"loss": 1.8394, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.6860465116279069, |
|
"grad_norm": 0.1870235800743103, |
|
"learning_rate": 0.00013439158636963292, |
|
"loss": 1.8102, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.689922480620155, |
|
"grad_norm": 0.20020346343517303, |
|
"learning_rate": 0.0001336883329821089, |
|
"loss": 1.8804, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.693798449612403, |
|
"grad_norm": 0.1906907856464386, |
|
"learning_rate": 0.00013298566915473581, |
|
"loss": 1.9384, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.697674418604651, |
|
"grad_norm": 0.2018539309501648, |
|
"learning_rate": 0.00013228360811780882, |
|
"loss": 1.8863, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.7015503875968991, |
|
"grad_norm": 0.1981591433286667, |
|
"learning_rate": 0.00013158216309027305, |
|
"loss": 1.9656, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.7054263565891472, |
|
"grad_norm": 0.2033264935016632, |
|
"learning_rate": 0.000130881347279475, |
|
"loss": 1.6587, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.7093023255813953, |
|
"grad_norm": 0.1924613118171692, |
|
"learning_rate": 0.00013018117388091386, |
|
"loss": 1.6004, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.7131782945736433, |
|
"grad_norm": 0.19790911674499512, |
|
"learning_rate": 0.00012948165607799296, |
|
"loss": 1.8981, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.7170542635658914, |
|
"grad_norm": 0.20132358372211456, |
|
"learning_rate": 0.0001287828070417715, |
|
"loss": 1.9168, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.7209302325581395, |
|
"grad_norm": 0.19085679948329926, |
|
"learning_rate": 0.00012808463993071661, |
|
"loss": 1.6915, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.7248062015503876, |
|
"grad_norm": 0.2047281414270401, |
|
"learning_rate": 0.00012738716789045582, |
|
"loss": 1.6943, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.7286821705426356, |
|
"grad_norm": 0.21793071925640106, |
|
"learning_rate": 0.00012669040405352916, |
|
"loss": 1.9838, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.7325581395348837, |
|
"grad_norm": 0.21391068398952484, |
|
"learning_rate": 0.00012599436153914198, |
|
"loss": 1.8181, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.7364341085271318, |
|
"grad_norm": 0.21556377410888672, |
|
"learning_rate": 0.00012529905345291825, |
|
"loss": 1.8422, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.7403100775193798, |
|
"grad_norm": 0.2581706941127777, |
|
"learning_rate": 0.00012460449288665337, |
|
"loss": 1.9742, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.744186046511628, |
|
"grad_norm": 0.19443494081497192, |
|
"learning_rate": 0.00012391069291806792, |
|
"loss": 1.6538, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.748062015503876, |
|
"grad_norm": 0.1977704018354416, |
|
"learning_rate": 0.00012321766661056138, |
|
"loss": 1.5853, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.751937984496124, |
|
"grad_norm": 0.21871508657932281, |
|
"learning_rate": 0.00012252542701296612, |
|
"loss": 1.8603, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.755813953488372, |
|
"grad_norm": 0.21219857037067413, |
|
"learning_rate": 0.00012183398715930184, |
|
"loss": 2.0966, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.7596899224806202, |
|
"grad_norm": 0.2329765260219574, |
|
"learning_rate": 0.00012114336006852986, |
|
"loss": 1.62, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.7635658914728682, |
|
"grad_norm": 0.2001282125711441, |
|
"learning_rate": 0.00012045355874430818, |
|
"loss": 1.6761, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.7674418604651163, |
|
"grad_norm": 0.21139495074748993, |
|
"learning_rate": 0.0001197645961747469, |
|
"loss": 1.6687, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.7713178294573644, |
|
"grad_norm": 0.20851844549179077, |
|
"learning_rate": 0.00011907648533216309, |
|
"loss": 1.7873, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.7751937984496124, |
|
"grad_norm": 0.20665644109249115, |
|
"learning_rate": 0.00011838923917283697, |
|
"loss": 2.0133, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.7790697674418605, |
|
"grad_norm": 0.21509003639221191, |
|
"learning_rate": 0.00011770287063676794, |
|
"loss": 1.8063, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.7829457364341086, |
|
"grad_norm": 0.20424823462963104, |
|
"learning_rate": 0.0001170173926474307, |
|
"loss": 1.7912, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.7868217054263567, |
|
"grad_norm": 0.1964096873998642, |
|
"learning_rate": 0.00011633281811153209, |
|
"loss": 1.7892, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.7906976744186047, |
|
"grad_norm": 0.19199204444885254, |
|
"learning_rate": 0.00011564915991876793, |
|
"loss": 1.5681, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.7945736434108528, |
|
"grad_norm": 0.20553924143314362, |
|
"learning_rate": 0.00011496643094158072, |
|
"loss": 1.7004, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.7984496124031009, |
|
"grad_norm": 0.20590077340602875, |
|
"learning_rate": 0.00011428464403491667, |
|
"loss": 1.9121, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.802325581395349, |
|
"grad_norm": 0.2159814089536667, |
|
"learning_rate": 0.00011360381203598404, |
|
"loss": 1.7843, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.806201550387597, |
|
"grad_norm": 0.19363482296466827, |
|
"learning_rate": 0.00011292394776401152, |
|
"loss": 2.0043, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.810077519379845, |
|
"grad_norm": 0.20251871645450592, |
|
"learning_rate": 0.00011224506402000645, |
|
"loss": 1.7657, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.8139534883720931, |
|
"grad_norm": 0.2011270970106125, |
|
"learning_rate": 0.0001115671735865142, |
|
"loss": 1.9705, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.8178294573643412, |
|
"grad_norm": 0.20194721221923828, |
|
"learning_rate": 0.00011089028922737721, |
|
"loss": 1.7546, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.8217054263565893, |
|
"grad_norm": 0.19558173418045044, |
|
"learning_rate": 0.00011021442368749496, |
|
"loss": 1.7589, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.8255813953488373, |
|
"grad_norm": 0.1979272961616516, |
|
"learning_rate": 0.00010953958969258363, |
|
"loss": 1.9451, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.8294573643410854, |
|
"grad_norm": 0.1853906363248825, |
|
"learning_rate": 0.00010886579994893669, |
|
"loss": 1.6267, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.22289901971817017, |
|
"learning_rate": 0.0001081930671431858, |
|
"loss": 1.8587, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.8372093023255816, |
|
"grad_norm": 0.1921209841966629, |
|
"learning_rate": 0.00010752140394206169, |
|
"loss": 1.543, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.8410852713178296, |
|
"grad_norm": 0.1961943507194519, |
|
"learning_rate": 0.00010685082299215565, |
|
"loss": 1.7071, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.8449612403100775, |
|
"grad_norm": 0.19070090353488922, |
|
"learning_rate": 0.00010618133691968159, |
|
"loss": 1.7627, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.8488372093023255, |
|
"grad_norm": 0.1972433626651764, |
|
"learning_rate": 0.00010551295833023834, |
|
"loss": 1.9585, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.8527131782945736, |
|
"grad_norm": 0.19546210765838623, |
|
"learning_rate": 0.00010484569980857207, |
|
"loss": 1.7804, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.8565891472868217, |
|
"grad_norm": 0.18603426218032837, |
|
"learning_rate": 0.00010417957391833937, |
|
"loss": 1.7544, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.8604651162790697, |
|
"grad_norm": 0.21424385905265808, |
|
"learning_rate": 0.00010351459320187102, |
|
"loss": 2.0136, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8643410852713178, |
|
"grad_norm": 0.19600240886211395, |
|
"learning_rate": 0.00010285077017993536, |
|
"loss": 2.1243, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.8682170542635659, |
|
"grad_norm": 0.19789910316467285, |
|
"learning_rate": 0.00010218811735150283, |
|
"loss": 1.772, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.872093023255814, |
|
"grad_norm": 0.24993658065795898, |
|
"learning_rate": 0.0001015266471935106, |
|
"loss": 1.7537, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.875968992248062, |
|
"grad_norm": 0.2263271063566208, |
|
"learning_rate": 0.0001008663721606276, |
|
"loss": 1.6373, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.87984496124031, |
|
"grad_norm": 0.19725754857063293, |
|
"learning_rate": 0.00010020730468502006, |
|
"loss": 1.5511, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.8837209302325582, |
|
"grad_norm": 0.18990269303321838, |
|
"learning_rate": 9.954945717611734e-05, |
|
"loss": 1.9, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.8875968992248062, |
|
"grad_norm": 0.20127974450588226, |
|
"learning_rate": 9.889284202037826e-05, |
|
"loss": 1.8697, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.8914728682170543, |
|
"grad_norm": 0.19391153752803802, |
|
"learning_rate": 9.823747158105813e-05, |
|
"loss": 1.64, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.8953488372093024, |
|
"grad_norm": 0.19547265768051147, |
|
"learning_rate": 9.758335819797565e-05, |
|
"loss": 1.4242, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.8992248062015504, |
|
"grad_norm": 0.19835124909877777, |
|
"learning_rate": 9.693051418728067e-05, |
|
"loss": 1.6888, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.9031007751937985, |
|
"grad_norm": 0.2248801290988922, |
|
"learning_rate": 9.627895184122254e-05, |
|
"loss": 1.8677, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.9069767441860463, |
|
"grad_norm": 0.20312006771564484, |
|
"learning_rate": 9.562868342791809e-05, |
|
"loss": 1.7185, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.9108527131782944, |
|
"grad_norm": 0.2256229966878891, |
|
"learning_rate": 9.497972119112128e-05, |
|
"loss": 1.7775, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.9147286821705425, |
|
"grad_norm": 0.2004292607307434, |
|
"learning_rate": 9.433207734999223e-05, |
|
"loss": 1.7049, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.9186046511627906, |
|
"grad_norm": 0.19018398225307465, |
|
"learning_rate": 9.368576409886741e-05, |
|
"loss": 1.5486, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.9224806201550386, |
|
"grad_norm": 0.2169780731201172, |
|
"learning_rate": 9.304079360702984e-05, |
|
"loss": 1.7943, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.9263565891472867, |
|
"grad_norm": 0.18505652248859406, |
|
"learning_rate": 9.239717801847988e-05, |
|
"loss": 1.4213, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.9302325581395348, |
|
"grad_norm": 0.19745871424674988, |
|
"learning_rate": 9.175492945170716e-05, |
|
"loss": 1.7484, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.9341085271317828, |
|
"grad_norm": 0.19631022214889526, |
|
"learning_rate": 9.111405999946145e-05, |
|
"loss": 1.6813, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.937984496124031, |
|
"grad_norm": 0.20178832113742828, |
|
"learning_rate": 9.047458172852592e-05, |
|
"loss": 1.797, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.941860465116279, |
|
"grad_norm": 0.19533181190490723, |
|
"learning_rate": 8.983650667948915e-05, |
|
"loss": 1.7296, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.945736434108527, |
|
"grad_norm": 0.19685396552085876, |
|
"learning_rate": 8.919984686651917e-05, |
|
"loss": 1.9964, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.949612403100775, |
|
"grad_norm": 0.21287135779857635, |
|
"learning_rate": 8.856461427713651e-05, |
|
"loss": 1.7688, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.9534883720930232, |
|
"grad_norm": 0.19684074819087982, |
|
"learning_rate": 8.793082087198896e-05, |
|
"loss": 1.7588, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.9573643410852712, |
|
"grad_norm": 0.20424464344978333, |
|
"learning_rate": 8.729847858462635e-05, |
|
"loss": 1.804, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.9612403100775193, |
|
"grad_norm": 0.2178419530391693, |
|
"learning_rate": 8.666759932127563e-05, |
|
"loss": 1.7219, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.9651162790697674, |
|
"grad_norm": 0.20854228734970093, |
|
"learning_rate": 8.603819496061677e-05, |
|
"loss": 1.6547, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.9689922480620154, |
|
"grad_norm": 0.19146962463855743, |
|
"learning_rate": 8.54102773535592e-05, |
|
"loss": 1.7676, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.9728682170542635, |
|
"grad_norm": 0.20440424978733063, |
|
"learning_rate": 8.478385832301868e-05, |
|
"loss": 1.863, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.9767441860465116, |
|
"grad_norm": 0.19717960059642792, |
|
"learning_rate": 8.415894966369449e-05, |
|
"loss": 1.9114, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.9806201550387597, |
|
"grad_norm": 0.19504162669181824, |
|
"learning_rate": 8.353556314184756e-05, |
|
"loss": 1.8955, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.9844961240310077, |
|
"grad_norm": 0.21640577912330627, |
|
"learning_rate": 8.291371049507882e-05, |
|
"loss": 1.5879, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.9883720930232558, |
|
"grad_norm": 0.20818272233009338, |
|
"learning_rate": 8.229340343210828e-05, |
|
"loss": 1.6215, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.9922480620155039, |
|
"grad_norm": 0.199616938829422, |
|
"learning_rate": 8.167465363255434e-05, |
|
"loss": 1.8259, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.996124031007752, |
|
"grad_norm": 0.21627697348594666, |
|
"learning_rate": 8.105747274671419e-05, |
|
"loss": 2.042, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.2383459061384201, |
|
"learning_rate": 8.044187239534429e-05, |
|
"loss": 1.7217, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 2.003875968992248, |
|
"grad_norm": 0.2019493281841278, |
|
"learning_rate": 7.982786416944163e-05, |
|
"loss": 1.6845, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 2.007751937984496, |
|
"grad_norm": 0.2237274944782257, |
|
"learning_rate": 7.92154596300252e-05, |
|
"loss": 1.4392, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 2.011627906976744, |
|
"grad_norm": 0.22651292383670807, |
|
"learning_rate": 7.860467030791895e-05, |
|
"loss": 1.5396, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 2.0155038759689923, |
|
"grad_norm": 0.2347649782896042, |
|
"learning_rate": 7.799550770353385e-05, |
|
"loss": 1.7393, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.0193798449612403, |
|
"grad_norm": 0.22472216188907623, |
|
"learning_rate": 7.738798328665212e-05, |
|
"loss": 1.7223, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 2.0232558139534884, |
|
"grad_norm": 0.20871466398239136, |
|
"learning_rate": 7.678210849621058e-05, |
|
"loss": 1.6426, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 2.0271317829457365, |
|
"grad_norm": 0.20463484525680542, |
|
"learning_rate": 7.617789474008606e-05, |
|
"loss": 1.6959, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 2.0310077519379846, |
|
"grad_norm": 0.21245327591896057, |
|
"learning_rate": 7.55753533948797e-05, |
|
"loss": 1.4701, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 2.0348837209302326, |
|
"grad_norm": 0.20299085974693298, |
|
"learning_rate": 7.497449580570352e-05, |
|
"loss": 1.6918, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 2.0387596899224807, |
|
"grad_norm": 0.20907790958881378, |
|
"learning_rate": 7.437533328596635e-05, |
|
"loss": 2.0656, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 2.0426356589147288, |
|
"grad_norm": 0.20104455947875977, |
|
"learning_rate": 7.377787711716099e-05, |
|
"loss": 1.7928, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 2.046511627906977, |
|
"grad_norm": 0.20086194574832916, |
|
"learning_rate": 7.318213854865179e-05, |
|
"loss": 1.6931, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.050387596899225, |
|
"grad_norm": 0.19403457641601562, |
|
"learning_rate": 7.258812879746258e-05, |
|
"loss": 1.648, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.054263565891473, |
|
"grad_norm": 0.19223634898662567, |
|
"learning_rate": 7.19958590480661e-05, |
|
"loss": 1.7832, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.058139534883721, |
|
"grad_norm": 0.2628048360347748, |
|
"learning_rate": 7.140534045217254e-05, |
|
"loss": 1.8131, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.062015503875969, |
|
"grad_norm": 0.19258858263492584, |
|
"learning_rate": 7.08165841285204e-05, |
|
"loss": 2.0739, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.065891472868217, |
|
"grad_norm": 0.19849108159542084, |
|
"learning_rate": 7.02296011626664e-05, |
|
"loss": 1.5161, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.0697674418604652, |
|
"grad_norm": 0.2288181036710739, |
|
"learning_rate": 6.964440260677757e-05, |
|
"loss": 1.8076, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.003875968992248, |
|
"grad_norm": 0.2025528997182846, |
|
"learning_rate": 6.906099947942226e-05, |
|
"loss": 1.3829, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.007751937984496, |
|
"grad_norm": 0.2283174991607666, |
|
"learning_rate": 6.847940276536336e-05, |
|
"loss": 1.3538, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.011627906976744, |
|
"grad_norm": 0.22030316293239594, |
|
"learning_rate": 6.789962341535114e-05, |
|
"loss": 1.4349, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.0155038759689923, |
|
"grad_norm": 0.2190942019224167, |
|
"learning_rate": 6.732167234591726e-05, |
|
"loss": 1.3267, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.0193798449612403, |
|
"grad_norm": 0.24970054626464844, |
|
"learning_rate": 6.674556043916883e-05, |
|
"loss": 1.4094, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.0232558139534884, |
|
"grad_norm": 0.24456344544887543, |
|
"learning_rate": 6.617129854258406e-05, |
|
"loss": 1.129, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.0271317829457365, |
|
"grad_norm": 0.27006039023399353, |
|
"learning_rate": 6.559889746880764e-05, |
|
"loss": 1.1812, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.0310077519379846, |
|
"grad_norm": 0.27538713812828064, |
|
"learning_rate": 6.502836799544728e-05, |
|
"loss": 1.1389, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.0348837209302326, |
|
"grad_norm": 0.3555956482887268, |
|
"learning_rate": 6.445972086487074e-05, |
|
"loss": 1.5623, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.0387596899224807, |
|
"grad_norm": 0.35483232140541077, |
|
"learning_rate": 6.389296678400357e-05, |
|
"loss": 1.1459, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.0426356589147288, |
|
"grad_norm": 0.36105233430862427, |
|
"learning_rate": 6.332811642412762e-05, |
|
"loss": 1.2068, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.046511627906977, |
|
"grad_norm": 0.25484374165534973, |
|
"learning_rate": 6.276518042067982e-05, |
|
"loss": 1.3551, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.050387596899225, |
|
"grad_norm": 0.24508428573608398, |
|
"learning_rate": 6.22041693730523e-05, |
|
"loss": 1.3885, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.054263565891473, |
|
"grad_norm": 0.24084091186523438, |
|
"learning_rate": 6.164509384439258e-05, |
|
"loss": 1.2559, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.058139534883721, |
|
"grad_norm": 0.24399569630622864, |
|
"learning_rate": 6.10879643614048e-05, |
|
"loss": 1.2449, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.062015503875969, |
|
"grad_norm": 0.22874587774276733, |
|
"learning_rate": 6.053279141415124e-05, |
|
"loss": 1.2807, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.065891472868217, |
|
"grad_norm": 0.23427356779575348, |
|
"learning_rate": 5.997958545585541e-05, |
|
"loss": 1.223, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.0697674418604652, |
|
"grad_norm": 0.23317009210586548, |
|
"learning_rate": 5.9428356902704406e-05, |
|
"loss": 1.3503, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.0736434108527133, |
|
"grad_norm": 0.2481788992881775, |
|
"learning_rate": 5.887911613365358e-05, |
|
"loss": 1.5248, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.0775193798449614, |
|
"grad_norm": 0.2363855540752411, |
|
"learning_rate": 5.833187349023042e-05, |
|
"loss": 1.3902, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.0813953488372094, |
|
"grad_norm": 0.21973107755184174, |
|
"learning_rate": 5.778663927634054e-05, |
|
"loss": 1.1951, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.0852713178294575, |
|
"grad_norm": 0.24858590960502625, |
|
"learning_rate": 5.724342375807296e-05, |
|
"loss": 1.306, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.0891472868217056, |
|
"grad_norm": 0.2570536136627197, |
|
"learning_rate": 5.670223716350734e-05, |
|
"loss": 1.2423, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.0930232558139537, |
|
"grad_norm": 0.2506595849990845, |
|
"learning_rate": 5.616308968252116e-05, |
|
"loss": 1.5181, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.0968992248062017, |
|
"grad_norm": 0.2839462459087372, |
|
"learning_rate": 5.562599146659789e-05, |
|
"loss": 1.379, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.10077519379845, |
|
"grad_norm": 0.245258629322052, |
|
"learning_rate": 5.509095262863592e-05, |
|
"loss": 1.3119, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.104651162790698, |
|
"grad_norm": 0.25935980677604675, |
|
"learning_rate": 5.455798324275785e-05, |
|
"loss": 1.2769, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.108527131782946, |
|
"grad_norm": 0.2609214186668396, |
|
"learning_rate": 5.4027093344121484e-05, |
|
"loss": 1.3932, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.112403100775194, |
|
"grad_norm": 0.26635345816612244, |
|
"learning_rate": 5.3498292928730014e-05, |
|
"loss": 1.3211, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.116279069767442, |
|
"grad_norm": 0.28587472438812256, |
|
"learning_rate": 5.2971591953244504e-05, |
|
"loss": 1.1748, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.12015503875969, |
|
"grad_norm": 0.26677605509757996, |
|
"learning_rate": 5.244700033479588e-05, |
|
"loss": 1.2278, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.124031007751938, |
|
"grad_norm": 0.26765909790992737, |
|
"learning_rate": 5.192452795079885e-05, |
|
"loss": 1.2706, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.1279069767441863, |
|
"grad_norm": 0.28276336193084717, |
|
"learning_rate": 5.140418463876519e-05, |
|
"loss": 1.522, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.1317829457364343, |
|
"grad_norm": 0.2587178945541382, |
|
"learning_rate": 5.088598019611909e-05, |
|
"loss": 1.2271, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.135658914728682, |
|
"grad_norm": 0.24723570048809052, |
|
"learning_rate": 5.036992438001236e-05, |
|
"loss": 1.1214, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.13953488372093, |
|
"grad_norm": 0.2772403359413147, |
|
"learning_rate": 4.985602690714091e-05, |
|
"loss": 1.2235, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.143410852713178, |
|
"grad_norm": 0.24144676327705383, |
|
"learning_rate": 4.934429745356153e-05, |
|
"loss": 1.1351, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.147286821705426, |
|
"grad_norm": 0.24926505982875824, |
|
"learning_rate": 4.883474565451004e-05, |
|
"loss": 1.3695, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.1511627906976742, |
|
"grad_norm": 0.2533572316169739, |
|
"learning_rate": 4.832738110421967e-05, |
|
"loss": 1.2713, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.1550387596899223, |
|
"grad_norm": 0.2442951202392578, |
|
"learning_rate": 4.7822213355740394e-05, |
|
"loss": 1.4233, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.1589147286821704, |
|
"grad_norm": 0.2528780698776245, |
|
"learning_rate": 4.7319251920759175e-05, |
|
"loss": 1.4764, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.1627906976744184, |
|
"grad_norm": 0.26493072509765625, |
|
"learning_rate": 4.681850626942078e-05, |
|
"loss": 1.16, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.1666666666666665, |
|
"grad_norm": 0.23339848220348358, |
|
"learning_rate": 4.6319985830149517e-05, |
|
"loss": 1.2937, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.1705426356589146, |
|
"grad_norm": 0.25760483741760254, |
|
"learning_rate": 4.582369998947161e-05, |
|
"loss": 1.381, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.1744186046511627, |
|
"grad_norm": 0.28579002618789673, |
|
"learning_rate": 4.532965809183861e-05, |
|
"loss": 1.3233, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.1782945736434107, |
|
"grad_norm": 0.25108829140663147, |
|
"learning_rate": 4.4837869439451415e-05, |
|
"loss": 1.2818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.182170542635659, |
|
"grad_norm": 0.23311974108219147, |
|
"learning_rate": 4.434834329208507e-05, |
|
"loss": 1.096, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.186046511627907, |
|
"grad_norm": 0.2580869495868683, |
|
"learning_rate": 4.3861088866914274e-05, |
|
"loss": 1.0973, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.189922480620155, |
|
"grad_norm": 0.24833805859088898, |
|
"learning_rate": 4.337611533834032e-05, |
|
"loss": 1.3511, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.193798449612403, |
|
"grad_norm": 0.26399341225624084, |
|
"learning_rate": 4.289343183781769e-05, |
|
"loss": 1.218, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.197674418604651, |
|
"grad_norm": 0.2563190162181854, |
|
"learning_rate": 4.241304745368273e-05, |
|
"loss": 1.2484, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.201550387596899, |
|
"grad_norm": 0.27118462324142456, |
|
"learning_rate": 4.193497123098196e-05, |
|
"loss": 1.2956, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.205426356589147, |
|
"grad_norm": 0.25910139083862305, |
|
"learning_rate": 4.14592121713024e-05, |
|
"loss": 1.3615, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.2093023255813953, |
|
"grad_norm": 0.2624959945678711, |
|
"learning_rate": 4.098577923260146e-05, |
|
"loss": 1.5, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.2131782945736433, |
|
"grad_norm": 0.26731380820274353, |
|
"learning_rate": 4.051468132903872e-05, |
|
"loss": 1.2989, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.2170542635658914, |
|
"grad_norm": 0.2650803327560425, |
|
"learning_rate": 4.004592733080782e-05, |
|
"loss": 1.5101, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.2209302325581395, |
|
"grad_norm": 0.23880143463611603, |
|
"learning_rate": 3.957952606396964e-05, |
|
"loss": 1.1257, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.2248062015503876, |
|
"grad_norm": 0.25078123807907104, |
|
"learning_rate": 3.911548631028602e-05, |
|
"loss": 1.4061, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.2286821705426356, |
|
"grad_norm": 0.2630424201488495, |
|
"learning_rate": 3.865381680705422e-05, |
|
"loss": 1.284, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.2325581395348837, |
|
"grad_norm": 0.2509412467479706, |
|
"learning_rate": 3.8194526246942975e-05, |
|
"loss": 1.4054, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.2364341085271318, |
|
"grad_norm": 0.24037672579288483, |
|
"learning_rate": 3.7737623277828025e-05, |
|
"loss": 1.3992, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.24031007751938, |
|
"grad_norm": 0.26423701643943787, |
|
"learning_rate": 3.728311650263008e-05, |
|
"loss": 1.461, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.244186046511628, |
|
"grad_norm": 0.25176694989204407, |
|
"learning_rate": 3.683101447915211e-05, |
|
"loss": 1.402, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.248062015503876, |
|
"grad_norm": 0.25743353366851807, |
|
"learning_rate": 3.6381325719918976e-05, |
|
"loss": 1.3649, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.251937984496124, |
|
"grad_norm": 0.24827177822589874, |
|
"learning_rate": 3.593405869201637e-05, |
|
"loss": 1.4252, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.255813953488372, |
|
"grad_norm": 0.2707473039627075, |
|
"learning_rate": 3.548922181693199e-05, |
|
"loss": 1.3534, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.25968992248062, |
|
"grad_norm": 0.2554126977920532, |
|
"learning_rate": 3.504682347039667e-05, |
|
"loss": 1.2142, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.2635658914728682, |
|
"grad_norm": 0.2582671642303467, |
|
"learning_rate": 3.460687198222681e-05, |
|
"loss": 1.4812, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.2674418604651163, |
|
"grad_norm": 0.24931831657886505, |
|
"learning_rate": 3.416937563616733e-05, |
|
"loss": 1.24, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.2713178294573644, |
|
"grad_norm": 0.254974901676178, |
|
"learning_rate": 3.373434266973601e-05, |
|
"loss": 1.3225, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.2751937984496124, |
|
"grad_norm": 0.25716301798820496, |
|
"learning_rate": 3.330178127406817e-05, |
|
"loss": 1.3147, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.2790697674418605, |
|
"grad_norm": 0.25187844038009644, |
|
"learning_rate": 3.2871699593762476e-05, |
|
"loss": 1.2468, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.2829457364341086, |
|
"grad_norm": 0.2536908686161041, |
|
"learning_rate": 3.24441057267276e-05, |
|
"loss": 1.3098, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.2868217054263567, |
|
"grad_norm": 0.28247135877609253, |
|
"learning_rate": 3.201900772402978e-05, |
|
"loss": 1.3243, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.2906976744186047, |
|
"grad_norm": 0.27947402000427246, |
|
"learning_rate": 3.159641358974126e-05, |
|
"loss": 1.272, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.294573643410853, |
|
"grad_norm": 0.27251169085502625, |
|
"learning_rate": 3.117633128078931e-05, |
|
"loss": 1.209, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.298449612403101, |
|
"grad_norm": 0.2831606864929199, |
|
"learning_rate": 3.0758768706806846e-05, |
|
"loss": 1.3171, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.302325581395349, |
|
"grad_norm": 0.33359742164611816, |
|
"learning_rate": 3.0343733729983185e-05, |
|
"loss": 1.2489, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.306201550387597, |
|
"grad_norm": 0.3156187832355499, |
|
"learning_rate": 2.9931234164916155e-05, |
|
"loss": 1.3697, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.310077519379845, |
|
"grad_norm": 0.24116098880767822, |
|
"learning_rate": 2.952127777846472e-05, |
|
"loss": 1.2184, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.313953488372093, |
|
"grad_norm": 0.30879032611846924, |
|
"learning_rate": 2.911387228960322e-05, |
|
"loss": 1.362, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.317829457364341, |
|
"grad_norm": 0.25808271765708923, |
|
"learning_rate": 2.8709025369275442e-05, |
|
"loss": 1.2858, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.3217054263565893, |
|
"grad_norm": 0.2646956443786621, |
|
"learning_rate": 2.8306744640250702e-05, |
|
"loss": 1.3307, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 0.25518307089805603, |
|
"learning_rate": 2.790703767697985e-05, |
|
"loss": 1.2729, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.3294573643410854, |
|
"grad_norm": 0.2450859695672989, |
|
"learning_rate": 2.7509912005453224e-05, |
|
"loss": 1.067, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.26299986243247986, |
|
"learning_rate": 2.7115375103058288e-05, |
|
"loss": 1.3176, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.3372093023255816, |
|
"grad_norm": 0.2573457956314087, |
|
"learning_rate": 2.672343439843936e-05, |
|
"loss": 1.2987, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.3410852713178296, |
|
"grad_norm": 0.25773999094963074, |
|
"learning_rate": 2.6334097271357512e-05, |
|
"loss": 1.3351, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.3449612403100777, |
|
"grad_norm": 0.2760131359100342, |
|
"learning_rate": 2.5947371052551607e-05, |
|
"loss": 1.212, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.3488372093023258, |
|
"grad_norm": 0.2653418779373169, |
|
"learning_rate": 2.556326302360044e-05, |
|
"loss": 1.2889, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.352713178294574, |
|
"grad_norm": 0.26841187477111816, |
|
"learning_rate": 2.5181780416785284e-05, |
|
"loss": 1.2536, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.356589147286822, |
|
"grad_norm": 0.25338879227638245, |
|
"learning_rate": 2.4802930414954242e-05, |
|
"loss": 1.2338, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.3604651162790695, |
|
"grad_norm": 0.27084293961524963, |
|
"learning_rate": 2.4426720151386478e-05, |
|
"loss": 1.5218, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.3643410852713176, |
|
"grad_norm": 0.27807363867759705, |
|
"learning_rate": 2.4053156709658235e-05, |
|
"loss": 1.355, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.3682170542635657, |
|
"grad_norm": 0.25639334321022034, |
|
"learning_rate": 2.3682247123509232e-05, |
|
"loss": 1.1381, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.3720930232558137, |
|
"grad_norm": 0.27499181032180786, |
|
"learning_rate": 2.3313998376710624e-05, |
|
"loss": 1.3216, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.375968992248062, |
|
"grad_norm": 0.2780856490135193, |
|
"learning_rate": 2.294841740293295e-05, |
|
"loss": 1.165, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.37984496124031, |
|
"grad_norm": 0.24923618137836456, |
|
"learning_rate": 2.2585511085615996e-05, |
|
"loss": 1.1451, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.383720930232558, |
|
"grad_norm": 0.2662268877029419, |
|
"learning_rate": 2.2225286257839073e-05, |
|
"loss": 1.4445, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.387596899224806, |
|
"grad_norm": 0.2631353735923767, |
|
"learning_rate": 2.1867749702192334e-05, |
|
"loss": 1.3777, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.391472868217054, |
|
"grad_norm": 0.2878567576408386, |
|
"learning_rate": 2.151290815064901e-05, |
|
"loss": 1.2377, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.395348837209302, |
|
"grad_norm": 0.2559579908847809, |
|
"learning_rate": 2.1160768284438775e-05, |
|
"loss": 1.3982, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.39922480620155, |
|
"grad_norm": 0.2648860514163971, |
|
"learning_rate": 2.0811336733921926e-05, |
|
"loss": 1.237, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.4031007751937983, |
|
"grad_norm": 0.2823598384857178, |
|
"learning_rate": 2.046462007846444e-05, |
|
"loss": 1.2538, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.4069767441860463, |
|
"grad_norm": 0.2508406639099121, |
|
"learning_rate": 2.0120624846314217e-05, |
|
"loss": 1.3027, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.4108527131782944, |
|
"grad_norm": 0.26299890875816345, |
|
"learning_rate": 1.9779357514478066e-05, |
|
"loss": 1.4021, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.4147286821705425, |
|
"grad_norm": 0.2663334608078003, |
|
"learning_rate": 1.944082450859986e-05, |
|
"loss": 1.4515, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.4186046511627906, |
|
"grad_norm": 0.25878193974494934, |
|
"learning_rate": 1.910503220283934e-05, |
|
"loss": 1.3473, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.4224806201550386, |
|
"grad_norm": 0.25262004137039185, |
|
"learning_rate": 1.8771986919752367e-05, |
|
"loss": 1.3389, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.4263565891472867, |
|
"grad_norm": 0.26410895586013794, |
|
"learning_rate": 1.844169493017171e-05, |
|
"loss": 1.3108, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.4302325581395348, |
|
"grad_norm": 0.24397780001163483, |
|
"learning_rate": 1.8114162453089036e-05, |
|
"loss": 1.2601, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.434108527131783, |
|
"grad_norm": 0.24081014096736908, |
|
"learning_rate": 1.778939565553765e-05, |
|
"loss": 1.2056, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.437984496124031, |
|
"grad_norm": 0.271356999874115, |
|
"learning_rate": 1.7467400652476762e-05, |
|
"loss": 1.4752, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.441860465116279, |
|
"grad_norm": 0.2456904500722885, |
|
"learning_rate": 1.7148183506675864e-05, |
|
"loss": 1.4085, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.445736434108527, |
|
"grad_norm": 0.2549789249897003, |
|
"learning_rate": 1.6831750228600952e-05, |
|
"loss": 1.4168, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.449612403100775, |
|
"grad_norm": 0.2592543363571167, |
|
"learning_rate": 1.6518106776301112e-05, |
|
"loss": 1.3086, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.453488372093023, |
|
"grad_norm": 0.254226952791214, |
|
"learning_rate": 1.620725905529663e-05, |
|
"loss": 1.2994, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.4573643410852712, |
|
"grad_norm": 0.2479204684495926, |
|
"learning_rate": 1.589921291846741e-05, |
|
"loss": 1.2711, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.4612403100775193, |
|
"grad_norm": 0.2645493447780609, |
|
"learning_rate": 1.5593974165943074e-05, |
|
"loss": 1.3929, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.4651162790697674, |
|
"grad_norm": 0.2703893780708313, |
|
"learning_rate": 1.5291548544993655e-05, |
|
"loss": 1.4931, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.4689922480620154, |
|
"grad_norm": 0.2592923939228058, |
|
"learning_rate": 1.4991941749921369e-05, |
|
"loss": 1.2048, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.4728682170542635, |
|
"grad_norm": 0.2813677489757538, |
|
"learning_rate": 1.4695159421953419e-05, |
|
"loss": 1.2728, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.4767441860465116, |
|
"grad_norm": 0.2619397044181824, |
|
"learning_rate": 1.4401207149135698e-05, |
|
"loss": 1.1384, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.4806201550387597, |
|
"grad_norm": 0.27888745069503784, |
|
"learning_rate": 1.411009046622775e-05, |
|
"loss": 1.3301, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.4844961240310077, |
|
"grad_norm": 0.2559435963630676, |
|
"learning_rate": 1.3821814854598378e-05, |
|
"loss": 1.2627, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.488372093023256, |
|
"grad_norm": 0.25446653366088867, |
|
"learning_rate": 1.3536385742122538e-05, |
|
"loss": 1.128, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.492248062015504, |
|
"grad_norm": 0.2939082682132721, |
|
"learning_rate": 1.3253808503079129e-05, |
|
"loss": 1.2637, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.496124031007752, |
|
"grad_norm": 0.24750369787216187, |
|
"learning_rate": 1.2974088458049774e-05, |
|
"loss": 1.2881, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.25020653009414673, |
|
"learning_rate": 1.2697230873818587e-05, |
|
"loss": 1.3799, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.503875968992248, |
|
"grad_norm": 0.25006988644599915, |
|
"learning_rate": 1.2423240963273123e-05, |
|
"loss": 1.2513, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.507751937984496, |
|
"grad_norm": 0.27393513917922974, |
|
"learning_rate": 1.2152123885306202e-05, |
|
"loss": 1.2718, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.511627906976744, |
|
"grad_norm": 0.2598724961280823, |
|
"learning_rate": 1.1883884744718704e-05, |
|
"loss": 1.4166, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.5155038759689923, |
|
"grad_norm": 0.31768569350242615, |
|
"learning_rate": 1.1618528592123451e-05, |
|
"loss": 1.2255, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.5193798449612403, |
|
"grad_norm": 0.2585943341255188, |
|
"learning_rate": 1.135606042385021e-05, |
|
"loss": 1.2544, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.5232558139534884, |
|
"grad_norm": 0.2472546398639679, |
|
"learning_rate": 1.1096485181851552e-05, |
|
"loss": 1.1121, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.5271317829457365, |
|
"grad_norm": 0.26214876770973206, |
|
"learning_rate": 1.0839807753609787e-05, |
|
"loss": 1.3611, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.5310077519379846, |
|
"grad_norm": 0.26527321338653564, |
|
"learning_rate": 1.0586032972044979e-05, |
|
"loss": 0.9816, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.5348837209302326, |
|
"grad_norm": 0.2589716911315918, |
|
"learning_rate": 1.0335165615423909e-05, |
|
"loss": 1.1664, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.5387596899224807, |
|
"grad_norm": 0.26980432868003845, |
|
"learning_rate": 1.0087210407270105e-05, |
|
"loss": 1.5304, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.5426356589147288, |
|
"grad_norm": 0.26864027976989746, |
|
"learning_rate": 9.842172016274983e-06, |
|
"loss": 1.4349, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.546511627906977, |
|
"grad_norm": 0.24509486556053162, |
|
"learning_rate": 9.600055056209803e-06, |
|
"loss": 1.3234, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.550387596899225, |
|
"grad_norm": 0.24848882853984833, |
|
"learning_rate": 9.360864085838973e-06, |
|
"loss": 1.3879, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.554263565891473, |
|
"grad_norm": 0.27096468210220337, |
|
"learning_rate": 9.124603608834071e-06, |
|
"loss": 1.4016, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.558139534883721, |
|
"grad_norm": 0.24919958412647247, |
|
"learning_rate": 8.891278073688985e-06, |
|
"loss": 1.3114, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.562015503875969, |
|
"grad_norm": 0.2680855691432953, |
|
"learning_rate": 8.660891873636498e-06, |
|
"loss": 1.2027, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.565891472868217, |
|
"grad_norm": 0.2762061655521393, |
|
"learning_rate": 8.433449346565108e-06, |
|
"loss": 1.4543, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.5697674418604652, |
|
"grad_norm": 0.252043753862381, |
|
"learning_rate": 8.208954774937692e-06, |
|
"loss": 1.2538, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.5736434108527133, |
|
"grad_norm": 0.27675777673721313, |
|
"learning_rate": 7.987412385710668e-06, |
|
"loss": 1.2478, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.5775193798449614, |
|
"grad_norm": 0.25470390915870667, |
|
"learning_rate": 7.768826350254634e-06, |
|
"loss": 1.3053, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.5813953488372094, |
|
"grad_norm": 0.25610047578811646, |
|
"learning_rate": 7.553200784275528e-06, |
|
"loss": 1.366, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.5852713178294575, |
|
"grad_norm": 0.25024861097335815, |
|
"learning_rate": 7.3405397477373775e-06, |
|
"loss": 1.3907, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.5891472868217056, |
|
"grad_norm": 0.2611881196498871, |
|
"learning_rate": 7.130847244785779e-06, |
|
"loss": 1.2982, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.5930232558139537, |
|
"grad_norm": 0.2670976519584656, |
|
"learning_rate": 6.9241272236724684e-06, |
|
"loss": 1.4931, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.5968992248062017, |
|
"grad_norm": 0.25510528683662415, |
|
"learning_rate": 6.720383576680977e-06, |
|
"loss": 1.3591, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.60077519379845, |
|
"grad_norm": 0.3071907162666321, |
|
"learning_rate": 6.519620140053416e-06, |
|
"loss": 1.4327, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.604651162790698, |
|
"grad_norm": 0.27962440252304077, |
|
"learning_rate": 6.321840693918205e-06, |
|
"loss": 1.1472, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.608527131782946, |
|
"grad_norm": 0.2648572623729706, |
|
"learning_rate": 6.127048962218875e-06, |
|
"loss": 1.5193, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.612403100775194, |
|
"grad_norm": 0.2580854892730713, |
|
"learning_rate": 5.9352486126439925e-06, |
|
"loss": 1.4547, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.616279069767442, |
|
"grad_norm": 0.26917338371276855, |
|
"learning_rate": 5.746443256558062e-06, |
|
"loss": 1.4236, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.62015503875969, |
|
"grad_norm": 0.2621963620185852, |
|
"learning_rate": 5.560636448933566e-06, |
|
"loss": 1.4411, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.624031007751938, |
|
"grad_norm": 0.297036737203598, |
|
"learning_rate": 5.377831688283975e-06, |
|
"loss": 1.4621, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.6279069767441863, |
|
"grad_norm": 0.2709357440471649, |
|
"learning_rate": 5.198032416597942e-06, |
|
"loss": 1.3117, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.6317829457364343, |
|
"grad_norm": 0.2600153982639313, |
|
"learning_rate": 5.021242019274458e-06, |
|
"loss": 1.2619, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.6356589147286824, |
|
"grad_norm": 0.28544628620147705, |
|
"learning_rate": 4.847463825059103e-06, |
|
"loss": 1.2316, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.6395348837209305, |
|
"grad_norm": 0.25166937708854675, |
|
"learning_rate": 4.6767011059813755e-06, |
|
"loss": 1.2125, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.6434108527131785, |
|
"grad_norm": 0.2691721022129059, |
|
"learning_rate": 4.5089570772931255e-06, |
|
"loss": 1.2869, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.6472868217054266, |
|
"grad_norm": 0.28785765171051025, |
|
"learning_rate": 4.344234897407919e-06, |
|
"loss": 1.3122, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.6511627906976747, |
|
"grad_norm": 0.24798186123371124, |
|
"learning_rate": 4.182537667841703e-06, |
|
"loss": 1.1715, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.6550387596899228, |
|
"grad_norm": 0.28408151865005493, |
|
"learning_rate": 4.023868433154223e-06, |
|
"loss": 1.3831, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.6589147286821704, |
|
"grad_norm": 0.2505200207233429, |
|
"learning_rate": 3.868230180891944e-06, |
|
"loss": 1.3047, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.6627906976744184, |
|
"grad_norm": 0.24836868047714233, |
|
"learning_rate": 3.7156258415315505e-06, |
|
"loss": 1.2543, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.2672933340072632, |
|
"learning_rate": 3.566058288424942e-06, |
|
"loss": 1.4565, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.6705426356589146, |
|
"grad_norm": 0.24122579395771027, |
|
"learning_rate": 3.4195303377450594e-06, |
|
"loss": 1.2132, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.6744186046511627, |
|
"grad_norm": 0.31489935517311096, |
|
"learning_rate": 3.2760447484328548e-06, |
|
"loss": 1.2152, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.6782945736434107, |
|
"grad_norm": 0.24762488901615143, |
|
"learning_rate": 3.1356042221453766e-06, |
|
"loss": 1.1724, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.682170542635659, |
|
"grad_norm": 0.2507333755493164, |
|
"learning_rate": 2.998211403204843e-06, |
|
"loss": 1.4529, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.686046511627907, |
|
"grad_norm": 0.27383190393447876, |
|
"learning_rate": 2.863868878548979e-06, |
|
"loss": 1.3259, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.689922480620155, |
|
"grad_norm": 0.24372157454490662, |
|
"learning_rate": 2.7325791776821445e-06, |
|
"loss": 1.2469, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.693798449612403, |
|
"grad_norm": 0.25599318742752075, |
|
"learning_rate": 2.60434477262785e-06, |
|
"loss": 1.3733, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.697674418604651, |
|
"grad_norm": 0.2459976226091385, |
|
"learning_rate": 2.4791680778820455e-06, |
|
"loss": 1.3466, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.701550387596899, |
|
"grad_norm": 0.2694660723209381, |
|
"learning_rate": 2.357051450367873e-06, |
|
"loss": 1.5102, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.705426356589147, |
|
"grad_norm": 0.27480435371398926, |
|
"learning_rate": 2.2379971893911144e-06, |
|
"loss": 1.4782, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.7093023255813953, |
|
"grad_norm": 0.253065288066864, |
|
"learning_rate": 2.122007536596961e-06, |
|
"loss": 1.1246, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.7131782945736433, |
|
"grad_norm": 0.269283652305603, |
|
"learning_rate": 2.0090846759278064e-06, |
|
"loss": 1.5083, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.7170542635658914, |
|
"grad_norm": 0.2605230510234833, |
|
"learning_rate": 1.8992307335821763e-06, |
|
"loss": 1.2764, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.7209302325581395, |
|
"grad_norm": 0.2701490819454193, |
|
"learning_rate": 1.7924477779745368e-06, |
|
"loss": 1.2148, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.7248062015503876, |
|
"grad_norm": 0.2751643657684326, |
|
"learning_rate": 1.688737819696533e-06, |
|
"loss": 1.3495, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.7286821705426356, |
|
"grad_norm": 0.25778815150260925, |
|
"learning_rate": 1.5881028114790319e-06, |
|
"loss": 1.3935, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.7325581395348837, |
|
"grad_norm": 0.26106202602386475, |
|
"learning_rate": 1.4905446481553752e-06, |
|
"loss": 1.3074, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.7364341085271318, |
|
"grad_norm": 0.30311137437820435, |
|
"learning_rate": 1.3960651666257135e-06, |
|
"loss": 1.5208, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.74031007751938, |
|
"grad_norm": 0.2914187014102936, |
|
"learning_rate": 1.304666145822383e-06, |
|
"loss": 1.2835, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.744186046511628, |
|
"grad_norm": 0.2645190358161926, |
|
"learning_rate": 1.2163493066764564e-06, |
|
"loss": 1.4204, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.748062015503876, |
|
"grad_norm": 0.3073311448097229, |
|
"learning_rate": 1.1311163120853002e-06, |
|
"loss": 1.4899, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.751937984496124, |
|
"grad_norm": 0.25737103819847107, |
|
"learning_rate": 1.0489687668813048e-06, |
|
"loss": 1.2717, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.755813953488372, |
|
"grad_norm": 0.27693504095077515, |
|
"learning_rate": 9.6990821780164e-07, |
|
"loss": 1.4203, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.75968992248062, |
|
"grad_norm": 0.3176767826080322, |
|
"learning_rate": 8.939361534591472e-07, |
|
"loss": 1.4588, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.7635658914728682, |
|
"grad_norm": 0.34483450651168823, |
|
"learning_rate": 8.21054004314275e-07, |
|
"loss": 1.2022, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.7674418604651163, |
|
"grad_norm": 0.33260253071784973, |
|
"learning_rate": 7.512631426481869e-07, |
|
"loss": 1.2149, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.7713178294573644, |
|
"grad_norm": 0.277261346578598, |
|
"learning_rate": 6.845648825369143e-07, |
|
"loss": 1.311, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.7751937984496124, |
|
"grad_norm": 0.32710519433021545, |
|
"learning_rate": 6.209604798265894e-07, |
|
"loss": 1.5075, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.7790697674418605, |
|
"grad_norm": 0.2529277205467224, |
|
"learning_rate": 5.604511321098504e-07, |
|
"loss": 1.4405, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.7829457364341086, |
|
"grad_norm": 0.273580938577652, |
|
"learning_rate": 5.030379787032185e-07, |
|
"loss": 1.3051, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.7868217054263567, |
|
"grad_norm": 0.2600751519203186, |
|
"learning_rate": 4.487221006257197e-07, |
|
"loss": 1.3619, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.7906976744186047, |
|
"grad_norm": 0.2721461057662964, |
|
"learning_rate": 3.9750452057847775e-07, |
|
"loss": 1.2126, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.794573643410853, |
|
"grad_norm": 0.24929043650627136, |
|
"learning_rate": 3.493862029254979e-07, |
|
"loss": 1.4246, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.798449612403101, |
|
"grad_norm": 0.25558316707611084, |
|
"learning_rate": 3.043680536754767e-07, |
|
"loss": 1.4952, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.802325581395349, |
|
"grad_norm": 0.3264126181602478, |
|
"learning_rate": 2.624509204647285e-07, |
|
"loss": 1.4849, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.806201550387597, |
|
"grad_norm": 0.25953492522239685, |
|
"learning_rate": 2.236355925413036e-07, |
|
"loss": 1.2316, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.810077519379845, |
|
"grad_norm": 0.25779008865356445, |
|
"learning_rate": 1.8792280075005829e-07, |
|
"loss": 1.2583, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.813953488372093, |
|
"grad_norm": 0.263167142868042, |
|
"learning_rate": 1.5531321751887928e-07, |
|
"loss": 1.507, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.817829457364341, |
|
"grad_norm": 0.2614395320415497, |
|
"learning_rate": 1.2580745684609872e-07, |
|
"loss": 1.2681, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.8217054263565893, |
|
"grad_norm": 0.2607451379299164, |
|
"learning_rate": 9.940607428888027e-08, |
|
"loss": 1.3721, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.8255813953488373, |
|
"grad_norm": 0.27197694778442383, |
|
"learning_rate": 7.610956695275895e-08, |
|
"loss": 1.3152, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.8294573643410854, |
|
"grad_norm": 0.25525933504104614, |
|
"learning_rate": 5.591837348228046e-08, |
|
"loss": 1.3361, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.8333333333333335, |
|
"grad_norm": 0.2669144868850708, |
|
"learning_rate": 3.883287405277602e-08, |
|
"loss": 1.2546, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.8372093023255816, |
|
"grad_norm": 0.2557834982872009, |
|
"learning_rate": 2.4853390363163142e-08, |
|
"loss": 1.1581, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.8410852713178296, |
|
"grad_norm": 0.26671895384788513, |
|
"learning_rate": 1.398018562993708e-08, |
|
"loss": 1.2647, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.8449612403100772, |
|
"grad_norm": 0.26321831345558167, |
|
"learning_rate": 6.213464582133232e-09, |
|
"loss": 1.2884, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.8488372093023253, |
|
"grad_norm": 0.2515983581542969, |
|
"learning_rate": 1.5533734575534641e-09, |
|
"loss": 1.412, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.8527131782945734, |
|
"grad_norm": 0.29071158170700073, |
|
"learning_rate": 0.0, |
|
"loss": 1.3587, |
|
"step": 774 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 774, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 258, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.6656805872756326e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|