|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9986348122866895, |
|
"eval_steps": 500, |
|
"global_step": 366, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005460750853242321, |
|
"grad_norm": 3.971062381322324, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 0.5016, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010921501706484642, |
|
"grad_norm": 4.14826323264813, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.5208, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.016382252559726963, |
|
"grad_norm": 3.745507693250097, |
|
"learning_rate": 8.108108108108109e-07, |
|
"loss": 0.4379, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.021843003412969283, |
|
"grad_norm": 3.8553920899968372, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 0.4602, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027303754266211604, |
|
"grad_norm": 4.006658163193665, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 0.4578, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.032764505119453925, |
|
"grad_norm": 3.0693234037079433, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 0.4416, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03822525597269624, |
|
"grad_norm": 2.3221220798320386, |
|
"learning_rate": 1.8918918918918922e-06, |
|
"loss": 0.4635, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04368600682593857, |
|
"grad_norm": 1.9622941289180669, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 0.4471, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.049146757679180884, |
|
"grad_norm": 1.5785046223376498, |
|
"learning_rate": 2.432432432432433e-06, |
|
"loss": 0.3669, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05460750853242321, |
|
"grad_norm": 1.592167130140165, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.4232, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.060068259385665526, |
|
"grad_norm": 1.4756853657479083, |
|
"learning_rate": 2.9729729729729736e-06, |
|
"loss": 0.4105, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06552901023890785, |
|
"grad_norm": 1.327708040902143, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 0.386, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07098976109215017, |
|
"grad_norm": 1.5458703208762807, |
|
"learning_rate": 3.513513513513514e-06, |
|
"loss": 0.405, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07645051194539249, |
|
"grad_norm": 1.6200361209703527, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 0.3908, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08191126279863481, |
|
"grad_norm": 1.5715327605819764, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 0.3972, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08737201365187713, |
|
"grad_norm": 1.2301811554389595, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 0.3739, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09283276450511946, |
|
"grad_norm": 1.0413606131616007, |
|
"learning_rate": 4.594594594594596e-06, |
|
"loss": 0.3521, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09829351535836177, |
|
"grad_norm": 1.1239884207253636, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 0.4348, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1037542662116041, |
|
"grad_norm": 1.1123432515923368, |
|
"learning_rate": 5.135135135135135e-06, |
|
"loss": 0.3949, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10921501706484642, |
|
"grad_norm": 1.0996938196641266, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 0.3775, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11467576791808874, |
|
"grad_norm": 1.0868866085505373, |
|
"learning_rate": 5.675675675675676e-06, |
|
"loss": 0.3721, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12013651877133105, |
|
"grad_norm": 1.10559810934531, |
|
"learning_rate": 5.945945945945947e-06, |
|
"loss": 0.3884, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12559726962457338, |
|
"grad_norm": 1.0187684787484814, |
|
"learning_rate": 6.2162162162162164e-06, |
|
"loss": 0.394, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1310580204778157, |
|
"grad_norm": 0.9515401547070604, |
|
"learning_rate": 6.486486486486487e-06, |
|
"loss": 0.3664, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13651877133105803, |
|
"grad_norm": 0.9873275768348283, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.3872, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14197952218430035, |
|
"grad_norm": 0.9420302821261468, |
|
"learning_rate": 7.027027027027028e-06, |
|
"loss": 0.3926, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14744027303754267, |
|
"grad_norm": 0.8628951646680264, |
|
"learning_rate": 7.297297297297298e-06, |
|
"loss": 0.3395, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15290102389078497, |
|
"grad_norm": 0.8883050014456254, |
|
"learning_rate": 7.567567567567569e-06, |
|
"loss": 0.3692, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1583617747440273, |
|
"grad_norm": 0.9314104245334247, |
|
"learning_rate": 7.837837837837838e-06, |
|
"loss": 0.3562, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16382252559726962, |
|
"grad_norm": 0.8388999546883599, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 0.3291, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16928327645051194, |
|
"grad_norm": 0.9110394289660935, |
|
"learning_rate": 8.378378378378378e-06, |
|
"loss": 0.3761, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17474402730375427, |
|
"grad_norm": 0.8529619793059433, |
|
"learning_rate": 8.64864864864865e-06, |
|
"loss": 0.3634, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1802047781569966, |
|
"grad_norm": 1.034615680095172, |
|
"learning_rate": 8.91891891891892e-06, |
|
"loss": 0.4073, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18566552901023892, |
|
"grad_norm": 0.9654399340446536, |
|
"learning_rate": 9.189189189189191e-06, |
|
"loss": 0.3832, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19112627986348124, |
|
"grad_norm": 0.8266008406999349, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 0.366, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.19658703071672354, |
|
"grad_norm": 1.0298041047732736, |
|
"learning_rate": 9.729729729729732e-06, |
|
"loss": 0.3828, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20204778156996586, |
|
"grad_norm": 1.0253984164765952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4253, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2075085324232082, |
|
"grad_norm": 0.845565254392157, |
|
"learning_rate": 9.999772047343259e-06, |
|
"loss": 0.3426, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2129692832764505, |
|
"grad_norm": 0.9194984294487474, |
|
"learning_rate": 9.999088210158001e-06, |
|
"loss": 0.343, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21843003412969283, |
|
"grad_norm": 0.8148322661922577, |
|
"learning_rate": 9.997948550797227e-06, |
|
"loss": 0.325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22389078498293516, |
|
"grad_norm": 0.9623977105015672, |
|
"learning_rate": 9.99635317317629e-06, |
|
"loss": 0.385, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.22935153583617748, |
|
"grad_norm": 0.814300333357098, |
|
"learning_rate": 9.994302222763415e-06, |
|
"loss": 0.3462, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2348122866894198, |
|
"grad_norm": 0.8725030955526336, |
|
"learning_rate": 9.991795886566443e-06, |
|
"loss": 0.3401, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2402730375426621, |
|
"grad_norm": 0.9974480242764955, |
|
"learning_rate": 9.988834393115768e-06, |
|
"loss": 0.3424, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.24573378839590443, |
|
"grad_norm": 0.8805730275929089, |
|
"learning_rate": 9.98541801244351e-06, |
|
"loss": 0.3742, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25119453924914675, |
|
"grad_norm": 0.8332420506302001, |
|
"learning_rate": 9.981547056058893e-06, |
|
"loss": 0.3435, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2566552901023891, |
|
"grad_norm": 0.9445729244701234, |
|
"learning_rate": 9.977221876919833e-06, |
|
"loss": 0.3442, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2621160409556314, |
|
"grad_norm": 0.859922027597315, |
|
"learning_rate": 9.97244286940076e-06, |
|
"loss": 0.358, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2675767918088737, |
|
"grad_norm": 0.8022442917536148, |
|
"learning_rate": 9.967210469256657e-06, |
|
"loss": 0.3329, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.27303754266211605, |
|
"grad_norm": 0.8369993999252197, |
|
"learning_rate": 9.961525153583327e-06, |
|
"loss": 0.3474, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2784982935153584, |
|
"grad_norm": 0.8719055464818419, |
|
"learning_rate": 9.955387440773902e-06, |
|
"loss": 0.3364, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2839590443686007, |
|
"grad_norm": 0.9269845480680101, |
|
"learning_rate": 9.948797890471552e-06, |
|
"loss": 0.3684, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.289419795221843, |
|
"grad_norm": 0.8246571303849338, |
|
"learning_rate": 9.94175710351848e-06, |
|
"loss": 0.3564, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.29488054607508535, |
|
"grad_norm": 0.9162125135698432, |
|
"learning_rate": 9.93426572190112e-06, |
|
"loss": 0.3526, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3003412969283277, |
|
"grad_norm": 0.9689985766932336, |
|
"learning_rate": 9.926324428691612e-06, |
|
"loss": 0.3825, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30580204778156994, |
|
"grad_norm": 0.9203465649703365, |
|
"learning_rate": 9.917933947985508e-06, |
|
"loss": 0.3492, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.31126279863481227, |
|
"grad_norm": 0.810691112658576, |
|
"learning_rate": 9.909095044835755e-06, |
|
"loss": 0.3147, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3167235494880546, |
|
"grad_norm": 0.8980168883992854, |
|
"learning_rate": 9.899808525182935e-06, |
|
"loss": 0.3351, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3221843003412969, |
|
"grad_norm": 0.8843165617295874, |
|
"learning_rate": 9.89007523578178e-06, |
|
"loss": 0.3452, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.32764505119453924, |
|
"grad_norm": 0.8660715276442186, |
|
"learning_rate": 9.879896064123961e-06, |
|
"loss": 0.3601, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33310580204778156, |
|
"grad_norm": 0.8638898824914902, |
|
"learning_rate": 9.869271938357168e-06, |
|
"loss": 0.3565, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3385665529010239, |
|
"grad_norm": 0.8349466672789928, |
|
"learning_rate": 9.858203827200477e-06, |
|
"loss": 0.3592, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3440273037542662, |
|
"grad_norm": 0.9346433422616252, |
|
"learning_rate": 9.846692739856023e-06, |
|
"loss": 0.3935, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.34948805460750854, |
|
"grad_norm": 0.8287634034991234, |
|
"learning_rate": 9.834739725916988e-06, |
|
"loss": 0.3089, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.35494880546075086, |
|
"grad_norm": 0.8040217244181859, |
|
"learning_rate": 9.822345875271884e-06, |
|
"loss": 0.313, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3604095563139932, |
|
"grad_norm": 0.8053513263090958, |
|
"learning_rate": 9.80951231800518e-06, |
|
"loss": 0.3355, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3658703071672355, |
|
"grad_norm": 0.7533298814162714, |
|
"learning_rate": 9.79624022429427e-06, |
|
"loss": 0.3067, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.37133105802047783, |
|
"grad_norm": 0.9386782501271983, |
|
"learning_rate": 9.782530804302763e-06, |
|
"loss": 0.3593, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.37679180887372016, |
|
"grad_norm": 0.8507056335702303, |
|
"learning_rate": 9.768385308070139e-06, |
|
"loss": 0.3629, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3822525597269625, |
|
"grad_norm": 0.782049564136347, |
|
"learning_rate": 9.75380502539778e-06, |
|
"loss": 0.3458, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.38771331058020475, |
|
"grad_norm": 0.9109652113851044, |
|
"learning_rate": 9.738791285731353e-06, |
|
"loss": 0.348, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3931740614334471, |
|
"grad_norm": 0.8457379953081087, |
|
"learning_rate": 9.723345458039595e-06, |
|
"loss": 0.3701, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3986348122866894, |
|
"grad_norm": 0.8354411100444213, |
|
"learning_rate": 9.70746895068949e-06, |
|
"loss": 0.3453, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4040955631399317, |
|
"grad_norm": 0.7824544054631952, |
|
"learning_rate": 9.691163211317853e-06, |
|
"loss": 0.3393, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.40955631399317405, |
|
"grad_norm": 0.7752036290890001, |
|
"learning_rate": 9.674429726699324e-06, |
|
"loss": 0.3121, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4150170648464164, |
|
"grad_norm": 0.9047037383020493, |
|
"learning_rate": 9.657270022610814e-06, |
|
"loss": 0.3507, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4204778156996587, |
|
"grad_norm": 0.8453635648693023, |
|
"learning_rate": 9.63968566369238e-06, |
|
"loss": 0.3641, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.425938566552901, |
|
"grad_norm": 0.8290743120901927, |
|
"learning_rate": 9.62167825330455e-06, |
|
"loss": 0.3739, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.43139931740614335, |
|
"grad_norm": 0.8977215293449932, |
|
"learning_rate": 9.603249433382145e-06, |
|
"loss": 0.3185, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.43686006825938567, |
|
"grad_norm": 0.9078617748361664, |
|
"learning_rate": 9.584400884284546e-06, |
|
"loss": 0.3415, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.442320819112628, |
|
"grad_norm": 0.8589830385419883, |
|
"learning_rate": 9.565134324642491e-06, |
|
"loss": 0.3331, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4477815699658703, |
|
"grad_norm": 0.804380018393787, |
|
"learning_rate": 9.545451511201365e-06, |
|
"loss": 0.322, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.45324232081911264, |
|
"grad_norm": 0.8685230840996425, |
|
"learning_rate": 9.52535423866101e-06, |
|
"loss": 0.3476, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.45870307167235497, |
|
"grad_norm": 0.9643956240091752, |
|
"learning_rate": 9.504844339512096e-06, |
|
"loss": 0.3671, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4641638225255973, |
|
"grad_norm": 0.8997894029115073, |
|
"learning_rate": 9.483923683869025e-06, |
|
"loss": 0.352, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4696245733788396, |
|
"grad_norm": 0.9409163478885427, |
|
"learning_rate": 9.462594179299408e-06, |
|
"loss": 0.3533, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4750853242320819, |
|
"grad_norm": 1.0349789755076704, |
|
"learning_rate": 9.440857770650139e-06, |
|
"loss": 0.3501, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4805460750853242, |
|
"grad_norm": 0.7719492270463393, |
|
"learning_rate": 9.418716439870056e-06, |
|
"loss": 0.3092, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.48600682593856653, |
|
"grad_norm": 0.9082886643398166, |
|
"learning_rate": 9.396172205829235e-06, |
|
"loss": 0.3514, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.49146757679180886, |
|
"grad_norm": 0.8358389654564478, |
|
"learning_rate": 9.373227124134888e-06, |
|
"loss": 0.3489, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4969283276450512, |
|
"grad_norm": 0.8974753686960236, |
|
"learning_rate": 9.349883286943951e-06, |
|
"loss": 0.3632, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5023890784982935, |
|
"grad_norm": 0.8827201059716774, |
|
"learning_rate": 9.326142822772301e-06, |
|
"loss": 0.3584, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5078498293515359, |
|
"grad_norm": 0.813182991570662, |
|
"learning_rate": 9.302007896300697e-06, |
|
"loss": 0.3591, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5133105802047782, |
|
"grad_norm": 0.7442842781039997, |
|
"learning_rate": 9.27748070817738e-06, |
|
"loss": 0.3143, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5187713310580204, |
|
"grad_norm": 0.906866423901588, |
|
"learning_rate": 9.252563494817426e-06, |
|
"loss": 0.3772, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5242320819112628, |
|
"grad_norm": 0.7894206448318375, |
|
"learning_rate": 9.227258528198832e-06, |
|
"loss": 0.3131, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5296928327645051, |
|
"grad_norm": 0.8009536933279702, |
|
"learning_rate": 9.201568115655343e-06, |
|
"loss": 0.329, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5351535836177475, |
|
"grad_norm": 0.8048929927509286, |
|
"learning_rate": 9.175494599666078e-06, |
|
"loss": 0.3278, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5406143344709897, |
|
"grad_norm": 0.814222453793431, |
|
"learning_rate": 9.14904035764193e-06, |
|
"loss": 0.3225, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5460750853242321, |
|
"grad_norm": 0.8732367458543802, |
|
"learning_rate": 9.122207801708802e-06, |
|
"loss": 0.3524, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5515358361774744, |
|
"grad_norm": 0.8134905761183274, |
|
"learning_rate": 9.094999378487659e-06, |
|
"loss": 0.3546, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5569965870307167, |
|
"grad_norm": 0.8567463415353727, |
|
"learning_rate": 9.067417568871444e-06, |
|
"loss": 0.3548, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.562457337883959, |
|
"grad_norm": 0.8151562254810784, |
|
"learning_rate": 9.03946488779887e-06, |
|
"loss": 0.3439, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5679180887372014, |
|
"grad_norm": 0.8746438505757359, |
|
"learning_rate": 9.0111438840251e-06, |
|
"loss": 0.3242, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5733788395904437, |
|
"grad_norm": 0.8085121266810896, |
|
"learning_rate": 8.982457139889358e-06, |
|
"loss": 0.3598, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.578839590443686, |
|
"grad_norm": 0.8191769217039168, |
|
"learning_rate": 8.953407271079456e-06, |
|
"loss": 0.3425, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5843003412969283, |
|
"grad_norm": 0.874872463262842, |
|
"learning_rate": 8.923996926393306e-06, |
|
"loss": 0.3795, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5897610921501707, |
|
"grad_norm": 0.8469769243731713, |
|
"learning_rate": 8.894228787497389e-06, |
|
"loss": 0.3555, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.595221843003413, |
|
"grad_norm": 0.7907533312057188, |
|
"learning_rate": 8.864105568682245e-06, |
|
"loss": 0.3425, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6006825938566553, |
|
"grad_norm": 0.9105392675920642, |
|
"learning_rate": 8.833630016614976e-06, |
|
"loss": 0.3214, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6061433447098976, |
|
"grad_norm": 0.7743632593675985, |
|
"learning_rate": 8.80280491008881e-06, |
|
"loss": 0.3477, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6116040955631399, |
|
"grad_norm": 0.9007334854740756, |
|
"learning_rate": 8.771633059769712e-06, |
|
"loss": 0.3836, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6170648464163823, |
|
"grad_norm": 0.810760704066922, |
|
"learning_rate": 8.740117307940123e-06, |
|
"loss": 0.3397, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6225255972696245, |
|
"grad_norm": 0.9072711750424595, |
|
"learning_rate": 8.708260528239788e-06, |
|
"loss": 0.3389, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6279863481228669, |
|
"grad_norm": 0.8621760047744049, |
|
"learning_rate": 8.676065625403733e-06, |
|
"loss": 0.3788, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6334470989761092, |
|
"grad_norm": 0.8388020553913726, |
|
"learning_rate": 8.64353553499741e-06, |
|
"loss": 0.3274, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6389078498293516, |
|
"grad_norm": 0.8591445453801212, |
|
"learning_rate": 8.610673223149036e-06, |
|
"loss": 0.3598, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6443686006825938, |
|
"grad_norm": 0.8057297251815362, |
|
"learning_rate": 8.577481686279123e-06, |
|
"loss": 0.3522, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6498293515358362, |
|
"grad_norm": 0.779914515334107, |
|
"learning_rate": 8.543963950827279e-06, |
|
"loss": 0.3416, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6552901023890785, |
|
"grad_norm": 0.8241347234199242, |
|
"learning_rate": 8.51012307297624e-06, |
|
"loss": 0.341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6607508532423209, |
|
"grad_norm": 0.7674873201219691, |
|
"learning_rate": 8.475962138373212e-06, |
|
"loss": 0.3268, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6662116040955631, |
|
"grad_norm": 0.7983877124268901, |
|
"learning_rate": 8.441484261848514e-06, |
|
"loss": 0.3744, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6716723549488055, |
|
"grad_norm": 0.924444516956386, |
|
"learning_rate": 8.406692587131569e-06, |
|
"loss": 0.341, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6771331058020478, |
|
"grad_norm": 0.7648013324474998, |
|
"learning_rate": 8.371590286564247e-06, |
|
"loss": 0.3239, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6825938566552902, |
|
"grad_norm": 0.8275858574184091, |
|
"learning_rate": 8.336180560811619e-06, |
|
"loss": 0.3588, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6880546075085324, |
|
"grad_norm": 0.7874924257335151, |
|
"learning_rate": 8.30046663857011e-06, |
|
"loss": 0.3431, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6935153583617747, |
|
"grad_norm": 0.8745641415217126, |
|
"learning_rate": 8.264451776273104e-06, |
|
"loss": 0.3489, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.6989761092150171, |
|
"grad_norm": 0.8858812955767805, |
|
"learning_rate": 8.228139257794012e-06, |
|
"loss": 0.3595, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7044368600682593, |
|
"grad_norm": 0.812177330348684, |
|
"learning_rate": 8.191532394146865e-06, |
|
"loss": 0.328, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7098976109215017, |
|
"grad_norm": 0.7755088132854933, |
|
"learning_rate": 8.154634523184389e-06, |
|
"loss": 0.3392, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.715358361774744, |
|
"grad_norm": 0.8715295143660003, |
|
"learning_rate": 8.117449009293668e-06, |
|
"loss": 0.3482, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7208191126279864, |
|
"grad_norm": 0.7737148258150855, |
|
"learning_rate": 8.07997924308938e-06, |
|
"loss": 0.3258, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7262798634812286, |
|
"grad_norm": 0.7616464397737633, |
|
"learning_rate": 8.042228641104622e-06, |
|
"loss": 0.3164, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.731740614334471, |
|
"grad_norm": 0.7706843428925245, |
|
"learning_rate": 8.004200645479403e-06, |
|
"loss": 0.3267, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7372013651877133, |
|
"grad_norm": 0.807087784184599, |
|
"learning_rate": 7.965898723646777e-06, |
|
"loss": 0.3556, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7426621160409557, |
|
"grad_norm": 0.8571787444948499, |
|
"learning_rate": 7.927326368016677e-06, |
|
"loss": 0.349, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7481228668941979, |
|
"grad_norm": 0.758611440956407, |
|
"learning_rate": 7.888487095657484e-06, |
|
"loss": 0.3301, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7535836177474403, |
|
"grad_norm": 0.7924167608304931, |
|
"learning_rate": 7.849384447975322e-06, |
|
"loss": 0.3534, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7590443686006826, |
|
"grad_norm": 0.8750460208633537, |
|
"learning_rate": 7.810021990391163e-06, |
|
"loss": 0.3405, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.764505119453925, |
|
"grad_norm": 0.7895037781717571, |
|
"learning_rate": 7.77040331201572e-06, |
|
"loss": 0.3678, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7699658703071672, |
|
"grad_norm": 0.8223142510608592, |
|
"learning_rate": 7.73053202532219e-06, |
|
"loss": 0.3469, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7754266211604095, |
|
"grad_norm": 0.8172036706667312, |
|
"learning_rate": 7.690411765816864e-06, |
|
"loss": 0.3395, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7808873720136519, |
|
"grad_norm": 0.7717521340469524, |
|
"learning_rate": 7.650046191707641e-06, |
|
"loss": 0.3352, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7863481228668942, |
|
"grad_norm": 0.8745843729938327, |
|
"learning_rate": 7.609438983570461e-06, |
|
"loss": 0.34, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7918088737201365, |
|
"grad_norm": 0.8462879664073518, |
|
"learning_rate": 7.5685938440137185e-06, |
|
"loss": 0.3434, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.7972696245733788, |
|
"grad_norm": 0.8887194246240154, |
|
"learning_rate": 7.527514497340642e-06, |
|
"loss": 0.3536, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8027303754266212, |
|
"grad_norm": 0.720734982965855, |
|
"learning_rate": 7.486204689209719e-06, |
|
"loss": 0.3071, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8081911262798634, |
|
"grad_norm": 0.7852180915891143, |
|
"learning_rate": 7.444668186293153e-06, |
|
"loss": 0.3318, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8136518771331058, |
|
"grad_norm": 0.8169236397844766, |
|
"learning_rate": 7.402908775933419e-06, |
|
"loss": 0.3282, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8191126279863481, |
|
"grad_norm": 0.8409146266167947, |
|
"learning_rate": 7.360930265797934e-06, |
|
"loss": 0.3592, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8245733788395905, |
|
"grad_norm": 0.7893736430445095, |
|
"learning_rate": 7.318736483531861e-06, |
|
"loss": 0.3455, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8300341296928327, |
|
"grad_norm": 0.7092487578490618, |
|
"learning_rate": 7.2763312764091055e-06, |
|
"loss": 0.307, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8354948805460751, |
|
"grad_norm": 0.7643841671055314, |
|
"learning_rate": 7.23371851098152e-06, |
|
"loss": 0.3104, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8409556313993174, |
|
"grad_norm": 0.8743703462981528, |
|
"learning_rate": 7.190902072726336e-06, |
|
"loss": 0.3601, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8464163822525598, |
|
"grad_norm": 0.8748161240027253, |
|
"learning_rate": 7.147885865691899e-06, |
|
"loss": 0.3592, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.851877133105802, |
|
"grad_norm": 0.6528952892311825, |
|
"learning_rate": 7.104673812141676e-06, |
|
"loss": 0.2919, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8573378839590444, |
|
"grad_norm": 0.8161745547126792, |
|
"learning_rate": 7.061269852196633e-06, |
|
"loss": 0.345, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8627986348122867, |
|
"grad_norm": 0.8321903783865391, |
|
"learning_rate": 7.017677943475962e-06, |
|
"loss": 0.321, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.868259385665529, |
|
"grad_norm": 0.83313681444351, |
|
"learning_rate": 6.973902060736226e-06, |
|
"loss": 0.3435, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8737201365187713, |
|
"grad_norm": 0.7505151585539925, |
|
"learning_rate": 6.929946195508933e-06, |
|
"loss": 0.3163, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8791808873720136, |
|
"grad_norm": 0.7304802524364322, |
|
"learning_rate": 6.8858143557365865e-06, |
|
"loss": 0.328, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.884641638225256, |
|
"grad_norm": 0.8143420713928606, |
|
"learning_rate": 6.841510565407235e-06, |
|
"loss": 0.3341, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8901023890784983, |
|
"grad_norm": 0.7567204344075086, |
|
"learning_rate": 6.797038864187564e-06, |
|
"loss": 0.3059, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8955631399317406, |
|
"grad_norm": 0.7826567782778101, |
|
"learning_rate": 6.752403307054549e-06, |
|
"loss": 0.3283, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9010238907849829, |
|
"grad_norm": 0.7886900433942758, |
|
"learning_rate": 6.707607963925725e-06, |
|
"loss": 0.3592, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9064846416382253, |
|
"grad_norm": 0.820709571232716, |
|
"learning_rate": 6.66265691928808e-06, |
|
"loss": 0.3605, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.9119453924914676, |
|
"grad_norm": 0.7681789982648866, |
|
"learning_rate": 6.617554271825636e-06, |
|
"loss": 0.3051, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9174061433447099, |
|
"grad_norm": 0.8006459558215293, |
|
"learning_rate": 6.5723041340457175e-06, |
|
"loss": 0.3542, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9228668941979522, |
|
"grad_norm": 0.7333102829214887, |
|
"learning_rate": 6.526910631903973e-06, |
|
"loss": 0.3254, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9283276450511946, |
|
"grad_norm": 0.7766899671870917, |
|
"learning_rate": 6.481377904428171e-06, |
|
"loss": 0.3297, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9337883959044369, |
|
"grad_norm": 0.8887532080533157, |
|
"learning_rate": 6.435710103340787e-06, |
|
"loss": 0.3531, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9392491467576792, |
|
"grad_norm": 0.7606421967689092, |
|
"learning_rate": 6.3899113926804565e-06, |
|
"loss": 0.3279, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9447098976109215, |
|
"grad_norm": 0.7894203946388427, |
|
"learning_rate": 6.3439859484222874e-06, |
|
"loss": 0.3206, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9501706484641638, |
|
"grad_norm": 0.8106143896629081, |
|
"learning_rate": 6.297937958097094e-06, |
|
"loss": 0.3185, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9556313993174061, |
|
"grad_norm": 0.7673331407317434, |
|
"learning_rate": 6.251771620409563e-06, |
|
"loss": 0.3408, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9610921501706484, |
|
"grad_norm": 0.7678720102410665, |
|
"learning_rate": 6.205491144855432e-06, |
|
"loss": 0.3388, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9665529010238908, |
|
"grad_norm": 0.8058357314804626, |
|
"learning_rate": 6.1591007513376425e-06, |
|
"loss": 0.348, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9720136518771331, |
|
"grad_norm": 0.7150290944167804, |
|
"learning_rate": 6.112604669781572e-06, |
|
"loss": 0.3187, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9774744027303754, |
|
"grad_norm": 0.7724885943742522, |
|
"learning_rate": 6.066007139749351e-06, |
|
"loss": 0.3112, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9829351535836177, |
|
"grad_norm": 0.7079636144073458, |
|
"learning_rate": 6.019312410053286e-06, |
|
"loss": 0.3115, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9883959044368601, |
|
"grad_norm": 0.7145124027416198, |
|
"learning_rate": 5.972524738368452e-06, |
|
"loss": 0.3015, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9938566552901024, |
|
"grad_norm": 0.7747190577463166, |
|
"learning_rate": 5.925648390844476e-06, |
|
"loss": 0.3405, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9993174061433447, |
|
"grad_norm": 0.7411495697672651, |
|
"learning_rate": 5.878687641716539e-06, |
|
"loss": 0.3241, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.004778156996587, |
|
"grad_norm": 2.2845026949908367, |
|
"learning_rate": 5.831646772915651e-06, |
|
"loss": 0.5887, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.0102389078498293, |
|
"grad_norm": 0.767067987662548, |
|
"learning_rate": 5.7845300736782205e-06, |
|
"loss": 0.2696, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0156996587030718, |
|
"grad_norm": 0.6444558204539116, |
|
"learning_rate": 5.7373418401549565e-06, |
|
"loss": 0.2179, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.021160409556314, |
|
"grad_norm": 0.6326875020427418, |
|
"learning_rate": 5.690086375019135e-06, |
|
"loss": 0.2063, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0266211604095563, |
|
"grad_norm": 0.6386737207560813, |
|
"learning_rate": 5.642767987074288e-06, |
|
"loss": 0.2395, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.0320819112627986, |
|
"grad_norm": 0.7191360204333792, |
|
"learning_rate": 5.595390990861311e-06, |
|
"loss": 0.2593, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0375426621160408, |
|
"grad_norm": 0.7139153218951271, |
|
"learning_rate": 5.547959706265068e-06, |
|
"loss": 0.25, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0430034129692833, |
|
"grad_norm": 0.7132174285440807, |
|
"learning_rate": 5.500478458120493e-06, |
|
"loss": 0.2656, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0484641638225256, |
|
"grad_norm": 0.7373698973683902, |
|
"learning_rate": 5.45295157581825e-06, |
|
"loss": 0.2643, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.0539249146757679, |
|
"grad_norm": 0.6806301830473086, |
|
"learning_rate": 5.405383392909973e-06, |
|
"loss": 0.2521, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.0593856655290101, |
|
"grad_norm": 0.6785558579468979, |
|
"learning_rate": 5.357778246713131e-06, |
|
"loss": 0.254, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.0648464163822526, |
|
"grad_norm": 0.6701400051635917, |
|
"learning_rate": 5.310140477915544e-06, |
|
"loss": 0.2303, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.070307167235495, |
|
"grad_norm": 0.7408218245910705, |
|
"learning_rate": 5.262474430179597e-06, |
|
"loss": 0.2587, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.0757679180887372, |
|
"grad_norm": 0.652029496994987, |
|
"learning_rate": 5.2147844497461745e-06, |
|
"loss": 0.2201, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.0812286689419794, |
|
"grad_norm": 0.6145756306559864, |
|
"learning_rate": 5.1670748850383734e-06, |
|
"loss": 0.2131, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.086689419795222, |
|
"grad_norm": 0.6166424191446999, |
|
"learning_rate": 5.1193500862650045e-06, |
|
"loss": 0.2272, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.0921501706484642, |
|
"grad_norm": 0.6183167944122974, |
|
"learning_rate": 5.071614405023938e-06, |
|
"loss": 0.2239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0976109215017065, |
|
"grad_norm": 0.7073017446193981, |
|
"learning_rate": 5.023872193905316e-06, |
|
"loss": 0.2564, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.1030716723549487, |
|
"grad_norm": 0.6785368103283103, |
|
"learning_rate": 4.976127806094685e-06, |
|
"loss": 0.2598, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.108532423208191, |
|
"grad_norm": 0.6686099383276679, |
|
"learning_rate": 4.928385594976063e-06, |
|
"loss": 0.2391, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.1139931740614335, |
|
"grad_norm": 0.6046536649329635, |
|
"learning_rate": 4.880649913734996e-06, |
|
"loss": 0.2111, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1194539249146758, |
|
"grad_norm": 0.6455972829075776, |
|
"learning_rate": 4.832925114961629e-06, |
|
"loss": 0.2291, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.124914675767918, |
|
"grad_norm": 0.6294601922178525, |
|
"learning_rate": 4.785215550253826e-06, |
|
"loss": 0.2237, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.1303754266211605, |
|
"grad_norm": 0.6539972986726327, |
|
"learning_rate": 4.737525569820405e-06, |
|
"loss": 0.2415, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.1358361774744028, |
|
"grad_norm": 0.6841776523547041, |
|
"learning_rate": 4.689859522084457e-06, |
|
"loss": 0.2573, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.141296928327645, |
|
"grad_norm": 0.708275852733329, |
|
"learning_rate": 4.64222175328687e-06, |
|
"loss": 0.2535, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.1467576791808873, |
|
"grad_norm": 0.6327858698732379, |
|
"learning_rate": 4.594616607090028e-06, |
|
"loss": 0.2284, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1522184300341296, |
|
"grad_norm": 0.6408257648532151, |
|
"learning_rate": 4.547048424181751e-06, |
|
"loss": 0.2294, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.157679180887372, |
|
"grad_norm": 0.6159123552870842, |
|
"learning_rate": 4.499521541879508e-06, |
|
"loss": 0.2226, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.1631399317406144, |
|
"grad_norm": 0.5823300781202381, |
|
"learning_rate": 4.452040293734934e-06, |
|
"loss": 0.2108, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.1686006825938566, |
|
"grad_norm": 0.6041391928150867, |
|
"learning_rate": 4.40460900913869e-06, |
|
"loss": 0.2224, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.174061433447099, |
|
"grad_norm": 0.6641306892375002, |
|
"learning_rate": 4.357232012925714e-06, |
|
"loss": 0.2384, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.1795221843003414, |
|
"grad_norm": 0.6503207204016519, |
|
"learning_rate": 4.309913624980866e-06, |
|
"loss": 0.2347, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.1849829351535837, |
|
"grad_norm": 0.62805580635999, |
|
"learning_rate": 4.262658159845046e-06, |
|
"loss": 0.229, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.190443686006826, |
|
"grad_norm": 0.6275617918722145, |
|
"learning_rate": 4.2154699263217794e-06, |
|
"loss": 0.2286, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.1959044368600682, |
|
"grad_norm": 0.7617460871871701, |
|
"learning_rate": 4.1683532270843505e-06, |
|
"loss": 0.2574, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.2013651877133107, |
|
"grad_norm": 0.6140110702778818, |
|
"learning_rate": 4.121312358283464e-06, |
|
"loss": 0.2149, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.206825938566553, |
|
"grad_norm": 0.6247259244040378, |
|
"learning_rate": 4.074351609155527e-06, |
|
"loss": 0.2381, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2122866894197952, |
|
"grad_norm": 0.6591171660703798, |
|
"learning_rate": 4.0274752616315485e-06, |
|
"loss": 0.2344, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2177474402730375, |
|
"grad_norm": 0.6436293629709356, |
|
"learning_rate": 3.980687589946715e-06, |
|
"loss": 0.2319, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.2232081911262798, |
|
"grad_norm": 0.7670288305294722, |
|
"learning_rate": 3.9339928602506505e-06, |
|
"loss": 0.2497, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.2286689419795223, |
|
"grad_norm": 0.6962942427446093, |
|
"learning_rate": 3.887395330218429e-06, |
|
"loss": 0.2336, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2341296928327645, |
|
"grad_norm": 0.6965676226191562, |
|
"learning_rate": 3.840899248662358e-06, |
|
"loss": 0.2552, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.2395904436860068, |
|
"grad_norm": 0.6641042493963545, |
|
"learning_rate": 3.7945088551445698e-06, |
|
"loss": 0.2563, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.245051194539249, |
|
"grad_norm": 0.6196955536234605, |
|
"learning_rate": 3.748228379590438e-06, |
|
"loss": 0.2291, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.2505119453924913, |
|
"grad_norm": 0.6278753799486634, |
|
"learning_rate": 3.7020620419029095e-06, |
|
"loss": 0.2141, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.2559726962457338, |
|
"grad_norm": 0.6082644009007588, |
|
"learning_rate": 3.656014051577713e-06, |
|
"loss": 0.2122, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.261433447098976, |
|
"grad_norm": 0.6736359984841271, |
|
"learning_rate": 3.610088607319544e-06, |
|
"loss": 0.2367, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.2668941979522184, |
|
"grad_norm": 0.6329301932656438, |
|
"learning_rate": 3.5642898966592145e-06, |
|
"loss": 0.235, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.2723549488054609, |
|
"grad_norm": 0.619963960271499, |
|
"learning_rate": 3.518622095571831e-06, |
|
"loss": 0.2208, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.2778156996587031, |
|
"grad_norm": 0.6693990598652739, |
|
"learning_rate": 3.4730893680960267e-06, |
|
"loss": 0.2406, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.2832764505119454, |
|
"grad_norm": 0.6864592317171182, |
|
"learning_rate": 3.4276958659542838e-06, |
|
"loss": 0.243, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.2887372013651877, |
|
"grad_norm": 0.7236681291816511, |
|
"learning_rate": 3.382445728174365e-06, |
|
"loss": 0.2586, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.29419795221843, |
|
"grad_norm": 0.6176752667693888, |
|
"learning_rate": 3.3373430807119212e-06, |
|
"loss": 0.2251, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.2996587030716724, |
|
"grad_norm": 0.7022262485772638, |
|
"learning_rate": 3.292392036074277e-06, |
|
"loss": 0.2316, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3051194539249147, |
|
"grad_norm": 0.6404240889042992, |
|
"learning_rate": 3.2475966929454505e-06, |
|
"loss": 0.2384, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.310580204778157, |
|
"grad_norm": 0.7080127357360425, |
|
"learning_rate": 3.202961135812437e-06, |
|
"loss": 0.248, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3160409556313994, |
|
"grad_norm": 0.668638034601711, |
|
"learning_rate": 3.1584894345927663e-06, |
|
"loss": 0.2212, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.3215017064846417, |
|
"grad_norm": 0.6729621818012642, |
|
"learning_rate": 3.114185644263415e-06, |
|
"loss": 0.222, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.326962457337884, |
|
"grad_norm": 0.6723316181938683, |
|
"learning_rate": 3.0700538044910684e-06, |
|
"loss": 0.2246, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3324232081911263, |
|
"grad_norm": 0.6671061425745013, |
|
"learning_rate": 3.0260979392637753e-06, |
|
"loss": 0.2518, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.3378839590443685, |
|
"grad_norm": 0.5962051816320753, |
|
"learning_rate": 2.9823220565240396e-06, |
|
"loss": 0.2224, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.343344709897611, |
|
"grad_norm": 0.6511243444073086, |
|
"learning_rate": 2.9387301478033694e-06, |
|
"loss": 0.2521, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.3488054607508533, |
|
"grad_norm": 0.6112346949791394, |
|
"learning_rate": 2.8953261878583263e-06, |
|
"loss": 0.2164, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.3542662116040955, |
|
"grad_norm": 0.6844064518509092, |
|
"learning_rate": 2.852114134308104e-06, |
|
"loss": 0.2532, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.3597269624573378, |
|
"grad_norm": 0.6383549765315465, |
|
"learning_rate": 2.8090979272736663e-06, |
|
"loss": 0.2401, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.36518771331058, |
|
"grad_norm": 0.5944587646093914, |
|
"learning_rate": 2.766281489018482e-06, |
|
"loss": 0.2293, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3706484641638226, |
|
"grad_norm": 0.6307687611402706, |
|
"learning_rate": 2.7236687235908953e-06, |
|
"loss": 0.2188, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.3761092150170648, |
|
"grad_norm": 0.6238745929037437, |
|
"learning_rate": 2.681263516468139e-06, |
|
"loss": 0.2475, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.3815699658703071, |
|
"grad_norm": 0.6373763499985442, |
|
"learning_rate": 2.6390697342020665e-06, |
|
"loss": 0.2343, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.3870307167235496, |
|
"grad_norm": 0.6734367954708225, |
|
"learning_rate": 2.5970912240665815e-06, |
|
"loss": 0.2353, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.3924914675767919, |
|
"grad_norm": 0.6582720475674197, |
|
"learning_rate": 2.5553318137068473e-06, |
|
"loss": 0.2474, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.3979522184300341, |
|
"grad_norm": 0.6052116695135601, |
|
"learning_rate": 2.5137953107902814e-06, |
|
"loss": 0.2322, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.4034129692832764, |
|
"grad_norm": 0.6552372504854818, |
|
"learning_rate": 2.472485502659358e-06, |
|
"loss": 0.2468, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.4088737201365187, |
|
"grad_norm": 0.6302233591025854, |
|
"learning_rate": 2.4314061559862836e-06, |
|
"loss": 0.2398, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.4143344709897612, |
|
"grad_norm": 0.654084080401935, |
|
"learning_rate": 2.3905610164295394e-06, |
|
"loss": 0.2329, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.4197952218430034, |
|
"grad_norm": 0.6485524489438387, |
|
"learning_rate": 2.3499538082923607e-06, |
|
"loss": 0.2446, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4252559726962457, |
|
"grad_norm": 0.6260148485778105, |
|
"learning_rate": 2.309588234183137e-06, |
|
"loss": 0.215, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.430716723549488, |
|
"grad_norm": 0.6165139801898837, |
|
"learning_rate": 2.2694679746778116e-06, |
|
"loss": 0.2235, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.4361774744027302, |
|
"grad_norm": 0.6073663452431178, |
|
"learning_rate": 2.22959668798428e-06, |
|
"loss": 0.21, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.4416382252559727, |
|
"grad_norm": 0.6687068934985456, |
|
"learning_rate": 2.1899780096088375e-06, |
|
"loss": 0.2609, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.447098976109215, |
|
"grad_norm": 0.5999286753849784, |
|
"learning_rate": 2.1506155520246795e-06, |
|
"loss": 0.2275, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.4525597269624573, |
|
"grad_norm": 0.6562378405208374, |
|
"learning_rate": 2.1115129043425188e-06, |
|
"loss": 0.2577, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.4580204778156998, |
|
"grad_norm": 0.6564390690276799, |
|
"learning_rate": 2.072673631983323e-06, |
|
"loss": 0.2583, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.463481228668942, |
|
"grad_norm": 0.6175405413313049, |
|
"learning_rate": 2.0341012763532243e-06, |
|
"loss": 0.2252, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.4689419795221843, |
|
"grad_norm": 0.6294576454771881, |
|
"learning_rate": 1.995799354520598e-06, |
|
"loss": 0.2282, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.4744027303754266, |
|
"grad_norm": 0.6491408881404807, |
|
"learning_rate": 1.9577713588953797e-06, |
|
"loss": 0.2204, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4798634812286688, |
|
"grad_norm": 0.626492440911862, |
|
"learning_rate": 1.9200207569106216e-06, |
|
"loss": 0.2363, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.4853242320819113, |
|
"grad_norm": 0.6328542452711655, |
|
"learning_rate": 1.8825509907063328e-06, |
|
"loss": 0.2312, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.4907849829351536, |
|
"grad_norm": 0.6502462448470019, |
|
"learning_rate": 1.8453654768156138e-06, |
|
"loss": 0.2512, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.4962457337883959, |
|
"grad_norm": 0.6012861830171234, |
|
"learning_rate": 1.8084676058531376e-06, |
|
"loss": 0.2285, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.5017064846416384, |
|
"grad_norm": 0.6834276368175269, |
|
"learning_rate": 1.771860742205988e-06, |
|
"loss": 0.2512, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.5071672354948804, |
|
"grad_norm": 0.6528553917231606, |
|
"learning_rate": 1.7355482237268983e-06, |
|
"loss": 0.2382, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.512627986348123, |
|
"grad_norm": 0.5819350883243867, |
|
"learning_rate": 1.6995333614298908e-06, |
|
"loss": 0.2097, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.5180887372013652, |
|
"grad_norm": 0.6231114790212122, |
|
"learning_rate": 1.6638194391883822e-06, |
|
"loss": 0.2352, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.5235494880546074, |
|
"grad_norm": 0.6082248229800555, |
|
"learning_rate": 1.6284097134357535e-06, |
|
"loss": 0.2241, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.52901023890785, |
|
"grad_norm": 0.5824479788396233, |
|
"learning_rate": 1.5933074128684333e-06, |
|
"loss": 0.2149, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.5344709897610922, |
|
"grad_norm": 0.6414988574509947, |
|
"learning_rate": 1.5585157381514875e-06, |
|
"loss": 0.2629, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.5399317406143345, |
|
"grad_norm": 0.6281763626708758, |
|
"learning_rate": 1.5240378616267887e-06, |
|
"loss": 0.2213, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.545392491467577, |
|
"grad_norm": 0.6342381052621304, |
|
"learning_rate": 1.4898769270237611e-06, |
|
"loss": 0.2469, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.550853242320819, |
|
"grad_norm": 0.63348306460088, |
|
"learning_rate": 1.4560360491727233e-06, |
|
"loss": 0.2369, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.5563139931740615, |
|
"grad_norm": 0.6211597914243564, |
|
"learning_rate": 1.4225183137208775e-06, |
|
"loss": 0.2464, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.5617747440273038, |
|
"grad_norm": 0.6287566376737247, |
|
"learning_rate": 1.389326776850966e-06, |
|
"loss": 0.2378, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.567235494880546, |
|
"grad_norm": 0.6155227503256832, |
|
"learning_rate": 1.3564644650025894e-06, |
|
"loss": 0.2501, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.5726962457337885, |
|
"grad_norm": 0.6705552802557545, |
|
"learning_rate": 1.323934374596268e-06, |
|
"loss": 0.2642, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.5781569965870306, |
|
"grad_norm": 0.6468155028186762, |
|
"learning_rate": 1.2917394717602123e-06, |
|
"loss": 0.2391, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.583617747440273, |
|
"grad_norm": 0.6059123207863814, |
|
"learning_rate": 1.2598826920598773e-06, |
|
"loss": 0.2471, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5890784982935153, |
|
"grad_norm": 0.622906276078447, |
|
"learning_rate": 1.2283669402302878e-06, |
|
"loss": 0.2441, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.5945392491467576, |
|
"grad_norm": 0.6123734431964898, |
|
"learning_rate": 1.197195089911191e-06, |
|
"loss": 0.2359, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.6173696097759185, |
|
"learning_rate": 1.166369983385024e-06, |
|
"loss": 0.2302, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.6054607508532424, |
|
"grad_norm": 0.6309581460125253, |
|
"learning_rate": 1.1358944313177566e-06, |
|
"loss": 0.2312, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6109215017064846, |
|
"grad_norm": 0.6390016010862638, |
|
"learning_rate": 1.1057712125026116e-06, |
|
"loss": 0.2442, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.6163822525597271, |
|
"grad_norm": 0.6276135127186313, |
|
"learning_rate": 1.0760030736066952e-06, |
|
"loss": 0.2297, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.6218430034129692, |
|
"grad_norm": 0.612896281499447, |
|
"learning_rate": 1.0465927289205452e-06, |
|
"loss": 0.2346, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6273037542662117, |
|
"grad_norm": 0.5859909784104106, |
|
"learning_rate": 1.0175428601106441e-06, |
|
"loss": 0.2119, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.632764505119454, |
|
"grad_norm": 0.6209786013367464, |
|
"learning_rate": 9.888561159748995e-07, |
|
"loss": 0.2432, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.6382252559726962, |
|
"grad_norm": 0.6352233419147338, |
|
"learning_rate": 9.605351122011308e-07, |
|
"loss": 0.2392, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6436860068259387, |
|
"grad_norm": 0.5719749997080835, |
|
"learning_rate": 9.325824311285564e-07, |
|
"loss": 0.2173, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.6491467576791807, |
|
"grad_norm": 0.6657684609315078, |
|
"learning_rate": 9.050006215123419e-07, |
|
"loss": 0.2606, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.6546075085324232, |
|
"grad_norm": 0.5728781873883457, |
|
"learning_rate": 8.777921982911996e-07, |
|
"loss": 0.2214, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.6600682593856655, |
|
"grad_norm": 0.6162844560531199, |
|
"learning_rate": 8.509596423580712e-07, |
|
"loss": 0.2464, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.6655290102389078, |
|
"grad_norm": 0.5952374213138941, |
|
"learning_rate": 8.245054003339247e-07, |
|
"loss": 0.226, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.6709897610921502, |
|
"grad_norm": 0.5961891699890017, |
|
"learning_rate": 7.984318843446593e-07, |
|
"loss": 0.2221, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.6764505119453925, |
|
"grad_norm": 0.5937715818201527, |
|
"learning_rate": 7.727414718011706e-07, |
|
"loss": 0.2117, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.6819112627986348, |
|
"grad_norm": 0.6853681886295775, |
|
"learning_rate": 7.474365051825749e-07, |
|
"loss": 0.257, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.6873720136518773, |
|
"grad_norm": 0.6359287265633253, |
|
"learning_rate": 7.225192918226215e-07, |
|
"loss": 0.2395, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.6928327645051193, |
|
"grad_norm": 0.5963026735275871, |
|
"learning_rate": 6.979921036993042e-07, |
|
"loss": 0.2233, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6982935153583618, |
|
"grad_norm": 0.6319949149208983, |
|
"learning_rate": 6.738571772276997e-07, |
|
"loss": 0.2416, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.703754266211604, |
|
"grad_norm": 0.6203658184125409, |
|
"learning_rate": 6.501167130560515e-07, |
|
"loss": 0.2283, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.7092150170648464, |
|
"grad_norm": 0.5938364868062161, |
|
"learning_rate": 6.267728758651131e-07, |
|
"loss": 0.2302, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.7146757679180888, |
|
"grad_norm": 0.6087142064405593, |
|
"learning_rate": 6.038277941707671e-07, |
|
"loss": 0.2039, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.7201365187713311, |
|
"grad_norm": 0.6401443657158584, |
|
"learning_rate": 5.812835601299438e-07, |
|
"loss": 0.254, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.7255972696245734, |
|
"grad_norm": 0.5597403672115617, |
|
"learning_rate": 5.591422293498633e-07, |
|
"loss": 0.2074, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.7310580204778157, |
|
"grad_norm": 0.6510913944426799, |
|
"learning_rate": 5.374058207005945e-07, |
|
"loss": 0.242, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.736518771331058, |
|
"grad_norm": 0.5851164556251767, |
|
"learning_rate": 5.160763161309768e-07, |
|
"loss": 0.2208, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.7419795221843004, |
|
"grad_norm": 0.5863228147924701, |
|
"learning_rate": 4.951556604879049e-07, |
|
"loss": 0.2167, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.7474402730375427, |
|
"grad_norm": 0.6434185214532853, |
|
"learning_rate": 4.7464576133899043e-07, |
|
"loss": 0.2208, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.752901023890785, |
|
"grad_norm": 0.6536708886817881, |
|
"learning_rate": 4.545484887986368e-07, |
|
"loss": 0.2558, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.7583617747440274, |
|
"grad_norm": 0.5913503349988765, |
|
"learning_rate": 4.348656753575092e-07, |
|
"loss": 0.2412, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.7638225255972695, |
|
"grad_norm": 0.6056503972371652, |
|
"learning_rate": 4.1559911571545544e-07, |
|
"loss": 0.2302, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.769283276450512, |
|
"grad_norm": 0.6287557474105558, |
|
"learning_rate": 3.9675056661785563e-07, |
|
"loss": 0.2184, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.7747440273037542, |
|
"grad_norm": 0.6387002743522794, |
|
"learning_rate": 3.783217466954503e-07, |
|
"loss": 0.2302, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.7802047781569965, |
|
"grad_norm": 0.5919282825342829, |
|
"learning_rate": 3.603143363076217e-07, |
|
"loss": 0.2155, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.785665529010239, |
|
"grad_norm": 0.6473381627525607, |
|
"learning_rate": 3.427299773891868e-07, |
|
"loss": 0.2661, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.7911262798634813, |
|
"grad_norm": 0.5931604711623788, |
|
"learning_rate": 3.255702733006766e-07, |
|
"loss": 0.2338, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.7965870307167235, |
|
"grad_norm": 0.6129008231127687, |
|
"learning_rate": 3.088367886821481e-07, |
|
"loss": 0.2514, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.802047781569966, |
|
"grad_norm": 0.6062339345391793, |
|
"learning_rate": 2.925310493105099e-07, |
|
"loss": 0.208, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.807508532423208, |
|
"grad_norm": 0.640691825777882, |
|
"learning_rate": 2.7665454196040665e-07, |
|
"loss": 0.2568, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.8129692832764506, |
|
"grad_norm": 0.6228877907424235, |
|
"learning_rate": 2.6120871426864866e-07, |
|
"loss": 0.2445, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.8184300341296928, |
|
"grad_norm": 0.6109720444847905, |
|
"learning_rate": 2.4619497460222184e-07, |
|
"loss": 0.2408, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.823890784982935, |
|
"grad_norm": 0.6345094654832795, |
|
"learning_rate": 2.316146919298623e-07, |
|
"loss": 0.221, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.8293515358361776, |
|
"grad_norm": 0.5612296424074616, |
|
"learning_rate": 2.1746919569723858e-07, |
|
"loss": 0.2137, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.8348122866894196, |
|
"grad_norm": 0.5831279872950338, |
|
"learning_rate": 2.037597757057297e-07, |
|
"loss": 0.2178, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.8402730375426621, |
|
"grad_norm": 0.639477342500686, |
|
"learning_rate": 1.9048768199481983e-07, |
|
"loss": 0.2417, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.8457337883959044, |
|
"grad_norm": 0.5861730216308635, |
|
"learning_rate": 1.776541247281177e-07, |
|
"loss": 0.229, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.8511945392491467, |
|
"grad_norm": 0.6019956045442054, |
|
"learning_rate": 1.6526027408301227e-07, |
|
"loss": 0.2212, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.8566552901023892, |
|
"grad_norm": 0.59132952240458, |
|
"learning_rate": 1.5330726014397668e-07, |
|
"loss": 0.2301, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.8621160409556314, |
|
"grad_norm": 0.6078005334703832, |
|
"learning_rate": 1.417961727995254e-07, |
|
"loss": 0.2239, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.8675767918088737, |
|
"grad_norm": 0.585630251596369, |
|
"learning_rate": 1.307280616428336e-07, |
|
"loss": 0.2093, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.8730375426621162, |
|
"grad_norm": 0.6003005675220531, |
|
"learning_rate": 1.2010393587603975e-07, |
|
"loss": 0.2558, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.8784982935153582, |
|
"grad_norm": 0.6007862550074166, |
|
"learning_rate": 1.0992476421822052e-07, |
|
"loss": 0.2217, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.8839590443686007, |
|
"grad_norm": 0.5798264754441842, |
|
"learning_rate": 1.0019147481706626e-07, |
|
"loss": 0.2, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.889419795221843, |
|
"grad_norm": 0.5801152259959127, |
|
"learning_rate": 9.090495516424713e-08, |
|
"loss": 0.2219, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.8948805460750853, |
|
"grad_norm": 0.5923845690395708, |
|
"learning_rate": 8.206605201449447e-08, |
|
"loss": 0.2029, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.9003412969283278, |
|
"grad_norm": 0.5987478859251119, |
|
"learning_rate": 7.367557130838921e-08, |
|
"loss": 0.2256, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.9058020477815698, |
|
"grad_norm": 0.5842539453063824, |
|
"learning_rate": 6.573427809888067e-08, |
|
"loss": 0.2003, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.9112627986348123, |
|
"grad_norm": 0.6351293922330473, |
|
"learning_rate": 5.824289648152126e-08, |
|
"loss": 0.2395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9167235494880546, |
|
"grad_norm": 0.6674020151826169, |
|
"learning_rate": 5.120210952844873e-08, |
|
"loss": 0.271, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.9221843003412968, |
|
"grad_norm": 0.5675294646121625, |
|
"learning_rate": 4.461255922609986e-08, |
|
"loss": 0.2172, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.9276450511945393, |
|
"grad_norm": 0.6285436049388992, |
|
"learning_rate": 3.8474846416672874e-08, |
|
"loss": 0.2399, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.9331058020477816, |
|
"grad_norm": 0.6175868696026672, |
|
"learning_rate": 3.278953074334512e-08, |
|
"loss": 0.2212, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.9385665529010239, |
|
"grad_norm": 0.6190764065365002, |
|
"learning_rate": 2.75571305992417e-08, |
|
"loss": 0.2374, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.9440273037542664, |
|
"grad_norm": 0.6366754795576353, |
|
"learning_rate": 2.2778123080167136e-08, |
|
"loss": 0.2317, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.9494880546075084, |
|
"grad_norm": 0.6088811286257059, |
|
"learning_rate": 1.845294394110686e-08, |
|
"loss": 0.2161, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.954948805460751, |
|
"grad_norm": 0.5983150959140915, |
|
"learning_rate": 1.4581987556490095e-08, |
|
"loss": 0.2273, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.9604095563139932, |
|
"grad_norm": 0.6172951456789394, |
|
"learning_rate": 1.1165606884234182e-08, |
|
"loss": 0.2514, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.9658703071672354, |
|
"grad_norm": 0.6110206348502005, |
|
"learning_rate": 8.204113433559202e-09, |
|
"loss": 0.2248, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.971331058020478, |
|
"grad_norm": 0.5973553403473014, |
|
"learning_rate": 5.6977772365857105e-09, |
|
"loss": 0.2415, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.9767918088737202, |
|
"grad_norm": 0.6109000206055947, |
|
"learning_rate": 3.6468268237105364e-09, |
|
"loss": 0.2319, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.9822525597269625, |
|
"grad_norm": 0.6357125881065677, |
|
"learning_rate": 2.0514492027728928e-09, |
|
"loss": 0.2457, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.9877133105802047, |
|
"grad_norm": 0.669166818877388, |
|
"learning_rate": 9.117898419991333e-10, |
|
"loss": 0.2574, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.993174061433447, |
|
"grad_norm": 0.6418127603308712, |
|
"learning_rate": 2.2795265674113721e-10, |
|
"loss": 0.2499, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.9986348122866895, |
|
"grad_norm": 0.6075592770961785, |
|
"learning_rate": 0.0, |
|
"loss": 0.2179, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.9986348122866895, |
|
"step": 366, |
|
"total_flos": 86796684656640.0, |
|
"train_loss": 0.2950779539965541, |
|
"train_runtime": 2999.324, |
|
"train_samples_per_second": 15.628, |
|
"train_steps_per_second": 0.122 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 366, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 86796684656640.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|