|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.032734074612906575, |
|
"eval_steps": 500, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.967684234392304, |
|
"learning_rate": 8.722043470761813e-09, |
|
"loss": 1.4217, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0271119924702425, |
|
"learning_rate": 1.7444086941523626e-08, |
|
"loss": 1.3854, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0212962641045844, |
|
"learning_rate": 2.6166130412285438e-08, |
|
"loss": 1.421, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.969098210999342, |
|
"learning_rate": 3.488817388304725e-08, |
|
"loss": 1.3894, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9600462061407877, |
|
"learning_rate": 4.361021735380907e-08, |
|
"loss": 1.3803, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.074991458239833, |
|
"learning_rate": 5.2332260824570876e-08, |
|
"loss": 1.3815, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.026445479098331, |
|
"learning_rate": 6.105430429533269e-08, |
|
"loss": 1.3684, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9268992001978744, |
|
"learning_rate": 6.97763477660945e-08, |
|
"loss": 1.4307, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.020642302186043, |
|
"learning_rate": 7.849839123685631e-08, |
|
"loss": 1.3318, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0765469986494853, |
|
"learning_rate": 8.722043470761814e-08, |
|
"loss": 1.3942, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.081729945762728, |
|
"learning_rate": 9.594247817837994e-08, |
|
"loss": 1.3555, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0021774146670808, |
|
"learning_rate": 1.0466452164914175e-07, |
|
"loss": 1.4366, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.059550099782956, |
|
"learning_rate": 1.1338656511990357e-07, |
|
"loss": 1.3999, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9198920843423073, |
|
"learning_rate": 1.2210860859066538e-07, |
|
"loss": 1.3949, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.013793892067568, |
|
"learning_rate": 1.308306520614272e-07, |
|
"loss": 1.4062, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9189606946894022, |
|
"learning_rate": 1.39552695532189e-07, |
|
"loss": 1.401, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9989048069702475, |
|
"learning_rate": 1.4827473900295083e-07, |
|
"loss": 1.39, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8873329349365613, |
|
"learning_rate": 1.5699678247371262e-07, |
|
"loss": 1.4061, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8867416403445345, |
|
"learning_rate": 1.6571882594447446e-07, |
|
"loss": 1.4032, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9725069440958127, |
|
"learning_rate": 1.7444086941523627e-07, |
|
"loss": 1.3772, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0049844241627675, |
|
"learning_rate": 1.8316291288599806e-07, |
|
"loss": 1.3907, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.878905929699835, |
|
"learning_rate": 1.9188495635675987e-07, |
|
"loss": 1.4029, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8262523355216014, |
|
"learning_rate": 2.006069998275217e-07, |
|
"loss": 1.3488, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.913459302454502, |
|
"learning_rate": 2.093290432982835e-07, |
|
"loss": 1.3888, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.924777837323026, |
|
"learning_rate": 2.1805108676904532e-07, |
|
"loss": 1.3423, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0238436977422207, |
|
"learning_rate": 2.2677313023980713e-07, |
|
"loss": 1.4083, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9404632380821036, |
|
"learning_rate": 2.3549517371056895e-07, |
|
"loss": 1.3325, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9624670572200222, |
|
"learning_rate": 2.4421721718133076e-07, |
|
"loss": 1.4221, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9307501329723236, |
|
"learning_rate": 2.5293926065209255e-07, |
|
"loss": 1.3827, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8568182093611365, |
|
"learning_rate": 2.616613041228544e-07, |
|
"loss": 1.394, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0056868600386544, |
|
"learning_rate": 2.703833475936162e-07, |
|
"loss": 1.3585, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.928843591728527, |
|
"learning_rate": 2.79105391064378e-07, |
|
"loss": 1.3927, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.979059280785437, |
|
"learning_rate": 2.878274345351398e-07, |
|
"loss": 1.4622, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.948156874136048, |
|
"learning_rate": 2.9654947800590165e-07, |
|
"loss": 1.3583, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.883247676830032, |
|
"learning_rate": 3.0527152147666344e-07, |
|
"loss": 1.3533, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.058107134456641, |
|
"learning_rate": 3.1399356494742523e-07, |
|
"loss": 1.3904, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0535192707251237, |
|
"learning_rate": 3.2271560841818707e-07, |
|
"loss": 1.3978, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.897725634603183, |
|
"learning_rate": 3.314376518889489e-07, |
|
"loss": 1.3422, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.869420148217502, |
|
"learning_rate": 3.401596953597107e-07, |
|
"loss": 1.4465, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9815636403050676, |
|
"learning_rate": 3.4888173883047254e-07, |
|
"loss": 1.407, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9486391625075123, |
|
"learning_rate": 3.576037823012343e-07, |
|
"loss": 1.391, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9396534742524283, |
|
"learning_rate": 3.663258257719961e-07, |
|
"loss": 1.4191, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.954105365539368, |
|
"learning_rate": 3.7504786924275796e-07, |
|
"loss": 1.4039, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9899210526299482, |
|
"learning_rate": 3.8376991271351975e-07, |
|
"loss": 1.3606, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9715202250605515, |
|
"learning_rate": 3.924919561842816e-07, |
|
"loss": 1.3561, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8726950910953133, |
|
"learning_rate": 4.012139996550434e-07, |
|
"loss": 1.4004, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9356806631744568, |
|
"learning_rate": 4.0993604312580517e-07, |
|
"loss": 1.3626, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.828385414972167, |
|
"learning_rate": 4.18658086596567e-07, |
|
"loss": 1.416, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.860775941749999, |
|
"learning_rate": 4.273801300673288e-07, |
|
"loss": 1.4232, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.908555437519401, |
|
"learning_rate": 4.3610217353809064e-07, |
|
"loss": 1.371, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0110809579629283, |
|
"learning_rate": 4.448242170088525e-07, |
|
"loss": 1.3886, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8879956038976062, |
|
"learning_rate": 4.5354626047961427e-07, |
|
"loss": 1.3677, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9475494022582405, |
|
"learning_rate": 4.622683039503761e-07, |
|
"loss": 1.3596, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9208201499436677, |
|
"learning_rate": 4.709903474211379e-07, |
|
"loss": 1.3795, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9250691956165293, |
|
"learning_rate": 4.797123908918997e-07, |
|
"loss": 1.3521, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0059604294844795, |
|
"learning_rate": 4.884344343626615e-07, |
|
"loss": 1.3872, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9408348770709347, |
|
"learning_rate": 4.971564778334233e-07, |
|
"loss": 1.41, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9941841393676247, |
|
"learning_rate": 5.058785213041851e-07, |
|
"loss": 1.4293, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9287843738148744, |
|
"learning_rate": 5.146005647749469e-07, |
|
"loss": 1.411, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9468093301597533, |
|
"learning_rate": 5.233226082457088e-07, |
|
"loss": 1.3508, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.854893968299532, |
|
"learning_rate": 5.320446517164706e-07, |
|
"loss": 1.4042, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9224674242946387, |
|
"learning_rate": 5.407666951872324e-07, |
|
"loss": 1.4182, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.025591867750147, |
|
"learning_rate": 5.494887386579943e-07, |
|
"loss": 1.4162, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9351339151935045, |
|
"learning_rate": 5.58210782128756e-07, |
|
"loss": 1.2956, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9740598113404677, |
|
"learning_rate": 5.669328255995178e-07, |
|
"loss": 1.4181, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8698658052840464, |
|
"learning_rate": 5.756548690702796e-07, |
|
"loss": 1.3885, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.847745090454755, |
|
"learning_rate": 5.843769125410415e-07, |
|
"loss": 1.4268, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.53006793293518, |
|
"learning_rate": 5.930989560118033e-07, |
|
"loss": 1.4369, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.02012038605618, |
|
"learning_rate": 6.01820999482565e-07, |
|
"loss": 1.406, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8502665195139074, |
|
"learning_rate": 6.105430429533269e-07, |
|
"loss": 1.4273, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.966110454182966, |
|
"learning_rate": 6.192650864240887e-07, |
|
"loss": 1.3822, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9117409152319604, |
|
"learning_rate": 6.279871298948505e-07, |
|
"loss": 1.3758, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9583103749551247, |
|
"learning_rate": 6.367091733656124e-07, |
|
"loss": 1.3821, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9976941225678524, |
|
"learning_rate": 6.454312168363741e-07, |
|
"loss": 1.3818, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9208562033277237, |
|
"learning_rate": 6.541532603071359e-07, |
|
"loss": 1.4249, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9753607287344868, |
|
"learning_rate": 6.628753037778978e-07, |
|
"loss": 1.394, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.852059681146882, |
|
"learning_rate": 6.715973472486596e-07, |
|
"loss": 1.3735, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9721541467116075, |
|
"learning_rate": 6.803193907194214e-07, |
|
"loss": 1.3284, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.959460844473033, |
|
"learning_rate": 6.890414341901832e-07, |
|
"loss": 1.3998, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8741209861248747, |
|
"learning_rate": 6.977634776609451e-07, |
|
"loss": 1.3696, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.916879772765294, |
|
"learning_rate": 7.064855211317069e-07, |
|
"loss": 1.4736, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.069300514821944, |
|
"learning_rate": 7.152075646024686e-07, |
|
"loss": 1.3341, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.966078167553101, |
|
"learning_rate": 7.239296080732305e-07, |
|
"loss": 1.3705, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.891366947828864, |
|
"learning_rate": 7.326516515439922e-07, |
|
"loss": 1.4036, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0015624379292687, |
|
"learning_rate": 7.41373695014754e-07, |
|
"loss": 1.3548, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9762714989519297, |
|
"learning_rate": 7.500957384855159e-07, |
|
"loss": 1.4038, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0417489896152325, |
|
"learning_rate": 7.588177819562777e-07, |
|
"loss": 1.3972, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.957367413809754, |
|
"learning_rate": 7.675398254270395e-07, |
|
"loss": 1.376, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9811178726545466, |
|
"learning_rate": 7.762618688978014e-07, |
|
"loss": 1.366, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9882512869731994, |
|
"learning_rate": 7.849839123685632e-07, |
|
"loss": 1.3835, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.878782548871192, |
|
"learning_rate": 7.93705955839325e-07, |
|
"loss": 1.3372, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.9971417051306344, |
|
"learning_rate": 8.024279993100868e-07, |
|
"loss": 1.4037, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 3.0231933357399066, |
|
"learning_rate": 8.111500427808487e-07, |
|
"loss": 1.3692, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.8777305685815024, |
|
"learning_rate": 8.198720862516103e-07, |
|
"loss": 1.3493, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.990027107011572, |
|
"learning_rate": 8.285941297223721e-07, |
|
"loss": 1.3586, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.818072292309908, |
|
"learning_rate": 8.37316173193134e-07, |
|
"loss": 1.3612, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1271514088993144, |
|
"learning_rate": 8.460382166638958e-07, |
|
"loss": 1.336, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0169265133833396, |
|
"learning_rate": 8.547602601346576e-07, |
|
"loss": 1.3665, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9482660829507994, |
|
"learning_rate": 8.634823036054195e-07, |
|
"loss": 1.3551, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.087617940804514, |
|
"learning_rate": 8.722043470761813e-07, |
|
"loss": 1.3659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.894747860556493, |
|
"learning_rate": 8.72204341205319e-07, |
|
"loss": 1.4032, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9567796592966546, |
|
"learning_rate": 8.722043235927325e-07, |
|
"loss": 1.3568, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.130564519854559, |
|
"learning_rate": 8.72204294238422e-07, |
|
"loss": 1.3567, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.98891915499642, |
|
"learning_rate": 8.722042531423884e-07, |
|
"loss": 1.3865, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.036577692929289, |
|
"learning_rate": 8.722042003046327e-07, |
|
"loss": 1.3901, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.001761214468399, |
|
"learning_rate": 8.722041357251567e-07, |
|
"loss": 1.4117, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.006505553779288, |
|
"learning_rate": 8.722040594039618e-07, |
|
"loss": 1.4083, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.970081989296889, |
|
"learning_rate": 8.722039713410501e-07, |
|
"loss": 1.3163, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9452680507051863, |
|
"learning_rate": 8.72203871536424e-07, |
|
"loss": 1.3367, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0057027079906216, |
|
"learning_rate": 8.722037599900863e-07, |
|
"loss": 1.3893, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9731127424958697, |
|
"learning_rate": 8.722036367020397e-07, |
|
"loss": 1.3604, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.884218184477182, |
|
"learning_rate": 8.722035016722879e-07, |
|
"loss": 1.4651, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8763866239897644, |
|
"learning_rate": 8.722033549008343e-07, |
|
"loss": 1.3947, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.927176134008426, |
|
"learning_rate": 8.722031963876829e-07, |
|
"loss": 1.3667, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9924621440798664, |
|
"learning_rate": 8.72203026132838e-07, |
|
"loss": 1.3632, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0499504528400943, |
|
"learning_rate": 8.72202844136304e-07, |
|
"loss": 1.4176, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9684475725772392, |
|
"learning_rate": 8.722026503980863e-07, |
|
"loss": 1.4146, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9148264494394662, |
|
"learning_rate": 8.722024449181895e-07, |
|
"loss": 1.4205, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.947431519938494, |
|
"learning_rate": 8.722022276966194e-07, |
|
"loss": 1.3281, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.955626479582277, |
|
"learning_rate": 8.72201998733382e-07, |
|
"loss": 1.3465, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.028540598737732, |
|
"learning_rate": 8.722017580284832e-07, |
|
"loss": 1.3472, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9197378030040753, |
|
"learning_rate": 8.722015055819296e-07, |
|
"loss": 1.381, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0574320249257227, |
|
"learning_rate": 8.722012413937282e-07, |
|
"loss": 1.4225, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.913538682906452, |
|
"learning_rate": 8.722009654638856e-07, |
|
"loss": 1.3536, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.886066575609779, |
|
"learning_rate": 8.722006777924096e-07, |
|
"loss": 1.3736, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.957758024407401, |
|
"learning_rate": 8.722003783793081e-07, |
|
"loss": 1.3973, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.958995782934072, |
|
"learning_rate": 8.722000672245888e-07, |
|
"loss": 1.3954, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.987991679162308, |
|
"learning_rate": 8.721997443282602e-07, |
|
"loss": 1.3757, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0909686056252434, |
|
"learning_rate": 8.721994096903311e-07, |
|
"loss": 1.3462, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9477825764652494, |
|
"learning_rate": 8.721990633108104e-07, |
|
"loss": 1.4295, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.947921751933276, |
|
"learning_rate": 8.721987051897074e-07, |
|
"loss": 1.3854, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8440480288328427, |
|
"learning_rate": 8.721983353270319e-07, |
|
"loss": 1.4106, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9792049006251906, |
|
"learning_rate": 8.721979537227935e-07, |
|
"loss": 1.3913, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9238731192373746, |
|
"learning_rate": 8.721975603770031e-07, |
|
"loss": 1.3695, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0773774260583155, |
|
"learning_rate": 8.721971552896706e-07, |
|
"loss": 1.3629, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9679219250269044, |
|
"learning_rate": 8.721967384608074e-07, |
|
"loss": 1.4205, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.030565370577699, |
|
"learning_rate": 8.721963098904246e-07, |
|
"loss": 1.4311, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0375097512582503, |
|
"learning_rate": 8.721958695785336e-07, |
|
"loss": 1.4069, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.92214551378445, |
|
"learning_rate": 8.721954175251462e-07, |
|
"loss": 1.422, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1283303266405578, |
|
"learning_rate": 8.721949537302749e-07, |
|
"loss": 1.432, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1049340924381705, |
|
"learning_rate": 8.72194478193932e-07, |
|
"loss": 1.3815, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.869253522521714, |
|
"learning_rate": 8.721939909161303e-07, |
|
"loss": 1.391, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8752461370622306, |
|
"learning_rate": 8.721934918968828e-07, |
|
"loss": 1.3769, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.909751532098998, |
|
"learning_rate": 8.721929811362032e-07, |
|
"loss": 1.3995, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0462324191538572, |
|
"learning_rate": 8.72192458634105e-07, |
|
"loss": 1.3689, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9750194133891363, |
|
"learning_rate": 8.721919243906024e-07, |
|
"loss": 1.3707, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.932835851287147, |
|
"learning_rate": 8.721913784057099e-07, |
|
"loss": 1.3676, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9429516387295926, |
|
"learning_rate": 8.721908206794419e-07, |
|
"loss": 1.3731, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.994182650351975, |
|
"learning_rate": 8.721902512118136e-07, |
|
"loss": 1.3542, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.876833046636617, |
|
"learning_rate": 8.721896700028404e-07, |
|
"loss": 1.4124, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.945327518176284, |
|
"learning_rate": 8.721890770525377e-07, |
|
"loss": 1.4137, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 21.047240653653276, |
|
"learning_rate": 8.721884723609218e-07, |
|
"loss": 1.4264, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.029952956834553, |
|
"learning_rate": 8.721878559280086e-07, |
|
"loss": 1.4372, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.137230528895406, |
|
"learning_rate": 8.721872277538151e-07, |
|
"loss": 1.4019, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9696146910825694, |
|
"learning_rate": 8.72186587838358e-07, |
|
"loss": 1.4515, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.980760336638325, |
|
"learning_rate": 8.721859361816546e-07, |
|
"loss": 1.4203, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.086006727040003, |
|
"learning_rate": 8.721852727837222e-07, |
|
"loss": 1.3712, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.003419403761712, |
|
"learning_rate": 8.72184597644579e-07, |
|
"loss": 1.4107, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0463864266769773, |
|
"learning_rate": 8.72183910764243e-07, |
|
"loss": 1.4082, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.950362004991645, |
|
"learning_rate": 8.721832121427326e-07, |
|
"loss": 1.352, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8779668562920815, |
|
"learning_rate": 8.721825017800669e-07, |
|
"loss": 1.4236, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0172810234945455, |
|
"learning_rate": 8.721817796762648e-07, |
|
"loss": 1.3871, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9726094865888224, |
|
"learning_rate": 8.721810458313457e-07, |
|
"loss": 1.349, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0322153773349334, |
|
"learning_rate": 8.721803002453297e-07, |
|
"loss": 1.3935, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0249194383352283, |
|
"learning_rate": 8.721795429182364e-07, |
|
"loss": 1.3849, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9938901642866718, |
|
"learning_rate": 8.721787738500866e-07, |
|
"loss": 1.4267, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.923274256584191, |
|
"learning_rate": 8.721779930409007e-07, |
|
"loss": 1.4283, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8466301519873785, |
|
"learning_rate": 8.721772004906999e-07, |
|
"loss": 1.3842, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0052783960700165, |
|
"learning_rate": 8.721763961995056e-07, |
|
"loss": 1.4335, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0935980229307614, |
|
"learning_rate": 8.721755801673391e-07, |
|
"loss": 1.3751, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0201926532008505, |
|
"learning_rate": 8.721747523942229e-07, |
|
"loss": 1.383, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.964731945306275, |
|
"learning_rate": 8.721739128801788e-07, |
|
"loss": 1.3359, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.928301273992501, |
|
"learning_rate": 8.721730616252297e-07, |
|
"loss": 1.3461, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9759904501938617, |
|
"learning_rate": 8.721721986293985e-07, |
|
"loss": 1.3644, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.040389211247673, |
|
"learning_rate": 8.721713238927082e-07, |
|
"loss": 1.4341, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.005645814777094, |
|
"learning_rate": 8.721704374151826e-07, |
|
"loss": 1.3967, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1272949994089823, |
|
"learning_rate": 8.721695391968456e-07, |
|
"loss": 1.3796, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0684527006439533, |
|
"learning_rate": 8.721686292377211e-07, |
|
"loss": 1.3905, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9348716760076603, |
|
"learning_rate": 8.721677075378338e-07, |
|
"loss": 1.3905, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9873597802095304, |
|
"learning_rate": 8.721667740972085e-07, |
|
"loss": 1.4103, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.979793648840201, |
|
"learning_rate": 8.721658289158703e-07, |
|
"loss": 1.3622, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.937468774579186, |
|
"learning_rate": 8.721648719938447e-07, |
|
"loss": 1.414, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.879104091071243, |
|
"learning_rate": 8.721639033311573e-07, |
|
"loss": 1.3108, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0663878291218203, |
|
"learning_rate": 8.721629229278344e-07, |
|
"loss": 1.3543, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9407287447315826, |
|
"learning_rate": 8.721619307839025e-07, |
|
"loss": 1.3753, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.967538550932994, |
|
"learning_rate": 8.721609268993879e-07, |
|
"loss": 1.3973, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.057519293009879, |
|
"learning_rate": 8.721599112743179e-07, |
|
"loss": 1.4036, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.936392616519391, |
|
"learning_rate": 8.721588839087197e-07, |
|
"loss": 1.4852, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9670464594249197, |
|
"learning_rate": 8.721578448026212e-07, |
|
"loss": 1.3643, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0273720809079663, |
|
"learning_rate": 8.721567939560502e-07, |
|
"loss": 1.4109, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0651462806238854, |
|
"learning_rate": 8.721557313690349e-07, |
|
"loss": 1.3599, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9487059919929326, |
|
"learning_rate": 8.721546570416042e-07, |
|
"loss": 1.3377, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0998938976371146, |
|
"learning_rate": 8.721535709737867e-07, |
|
"loss": 1.3685, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.940826121224176, |
|
"learning_rate": 8.721524731656118e-07, |
|
"loss": 1.4174, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.014412763659776, |
|
"learning_rate": 8.721513636171093e-07, |
|
"loss": 1.3758, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0608004542672678, |
|
"learning_rate": 8.721502423283086e-07, |
|
"loss": 1.3716, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.94244083669587, |
|
"learning_rate": 8.721491092992403e-07, |
|
"loss": 1.3937, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9911802591493144, |
|
"learning_rate": 8.721479645299345e-07, |
|
"loss": 1.4164, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0344694715702065, |
|
"learning_rate": 8.721468080204223e-07, |
|
"loss": 1.4167, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.865667879330454, |
|
"learning_rate": 8.72145639770735e-07, |
|
"loss": 1.4041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.072739743995507, |
|
"learning_rate": 8.721444597809037e-07, |
|
"loss": 1.4133, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0276333439793843, |
|
"learning_rate": 8.721432680509603e-07, |
|
"loss": 1.3605, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0465572110487686, |
|
"learning_rate": 8.721420645809369e-07, |
|
"loss": 1.3134, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9542599421921376, |
|
"learning_rate": 8.721408493708659e-07, |
|
"loss": 1.4148, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.90720227559915, |
|
"learning_rate": 8.721396224207801e-07, |
|
"loss": 1.3997, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0246259249156617, |
|
"learning_rate": 8.721383837307123e-07, |
|
"loss": 1.4238, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9975853807488453, |
|
"learning_rate": 8.721371333006962e-07, |
|
"loss": 1.3879, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0021383888830258, |
|
"learning_rate": 8.721358711307651e-07, |
|
"loss": 1.3349, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0201205392426296, |
|
"learning_rate": 8.721345972209533e-07, |
|
"loss": 1.3692, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.972139743981842, |
|
"learning_rate": 8.721333115712948e-07, |
|
"loss": 1.3856, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9209251468195276, |
|
"learning_rate": 8.721320141818245e-07, |
|
"loss": 1.3726, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9320196507721277, |
|
"learning_rate": 8.721307050525772e-07, |
|
"loss": 1.4143, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0002549106343337, |
|
"learning_rate": 8.72129384183588e-07, |
|
"loss": 1.3897, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9582570275362206, |
|
"learning_rate": 8.721280515748928e-07, |
|
"loss": 1.3756, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0145408653891526, |
|
"learning_rate": 8.721267072265271e-07, |
|
"loss": 1.3929, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0495215598431553, |
|
"learning_rate": 8.721253511385274e-07, |
|
"loss": 1.4061, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9254365712957613, |
|
"learning_rate": 8.721239833109302e-07, |
|
"loss": 1.3903, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.997455071778174, |
|
"learning_rate": 8.72122603743772e-07, |
|
"loss": 1.4246, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9195114563849627, |
|
"learning_rate": 8.721212124370902e-07, |
|
"loss": 1.3968, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0160422542520706, |
|
"learning_rate": 8.721198093909225e-07, |
|
"loss": 1.4347, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.01404121750226, |
|
"learning_rate": 8.721183946053062e-07, |
|
"loss": 1.3945, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0611502119276692, |
|
"learning_rate": 8.721169680802796e-07, |
|
"loss": 1.3975, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0684020412598727, |
|
"learning_rate": 8.721155298158811e-07, |
|
"loss": 1.373, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9440206694677027, |
|
"learning_rate": 8.721140798121494e-07, |
|
"loss": 1.3432, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.974581850771121, |
|
"learning_rate": 8.721126180691237e-07, |
|
"loss": 1.3095, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9933489105960844, |
|
"learning_rate": 8.721111445868431e-07, |
|
"loss": 1.3885, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9831661987665528, |
|
"learning_rate": 8.721096593653475e-07, |
|
"loss": 1.3126, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.100080295310524, |
|
"learning_rate": 8.721081624046766e-07, |
|
"loss": 1.3567, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.989885190608965, |
|
"learning_rate": 8.72106653704871e-07, |
|
"loss": 1.3899, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0712176271885023, |
|
"learning_rate": 8.721051332659713e-07, |
|
"loss": 1.4208, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.062312373029536, |
|
"learning_rate": 8.721036010880183e-07, |
|
"loss": 1.4147, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9701616634317083, |
|
"learning_rate": 8.721020571710533e-07, |
|
"loss": 1.434, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9572643731393646, |
|
"learning_rate": 8.721005015151179e-07, |
|
"loss": 1.3795, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.997406682050713, |
|
"learning_rate": 8.720989341202539e-07, |
|
"loss": 1.4501, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9106126480356522, |
|
"learning_rate": 8.720973549865035e-07, |
|
"loss": 1.3684, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9444070042644817, |
|
"learning_rate": 8.720957641139094e-07, |
|
"loss": 1.4213, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8728037311842822, |
|
"learning_rate": 8.720941615025142e-07, |
|
"loss": 1.3519, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.015889105815668, |
|
"learning_rate": 8.720925471523613e-07, |
|
"loss": 1.4162, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9419377055914744, |
|
"learning_rate": 8.72090921063494e-07, |
|
"loss": 1.3357, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.89008663153287, |
|
"learning_rate": 8.720892832359559e-07, |
|
"loss": 1.3647, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9219232048658736, |
|
"learning_rate": 8.720876336697914e-07, |
|
"loss": 1.4069, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.96537590149616, |
|
"learning_rate": 8.72085972365045e-07, |
|
"loss": 1.4118, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.883655732971505, |
|
"learning_rate": 8.720842993217609e-07, |
|
"loss": 1.4136, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9788747864733764, |
|
"learning_rate": 8.720826145399848e-07, |
|
"loss": 1.3976, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.875570982035785, |
|
"learning_rate": 8.720809180197616e-07, |
|
"loss": 1.426, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9926412719867304, |
|
"learning_rate": 8.720792097611372e-07, |
|
"loss": 1.3629, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.958723584893194, |
|
"learning_rate": 8.720774897641574e-07, |
|
"loss": 1.3918, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.968992238648431, |
|
"learning_rate": 8.720757580288688e-07, |
|
"loss": 1.4241, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.889688463405204, |
|
"learning_rate": 8.720740145553177e-07, |
|
"loss": 1.4101, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9993006762652312, |
|
"learning_rate": 8.720722593435512e-07, |
|
"loss": 1.3857, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.980847240255761, |
|
"learning_rate": 8.720704923936167e-07, |
|
"loss": 1.4077, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.884007977441845, |
|
"learning_rate": 8.720687137055615e-07, |
|
"loss": 1.3822, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9646728227580645, |
|
"learning_rate": 8.720669232794336e-07, |
|
"loss": 1.3737, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.893382783809774, |
|
"learning_rate": 8.720651211152813e-07, |
|
"loss": 1.3762, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.024418707419624, |
|
"learning_rate": 8.72063307213153e-07, |
|
"loss": 1.3546, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0252657870696495, |
|
"learning_rate": 8.720614815730977e-07, |
|
"loss": 1.3661, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0208601885030606, |
|
"learning_rate": 8.720596441951642e-07, |
|
"loss": 1.4182, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0552975630549954, |
|
"learning_rate": 8.720577950794024e-07, |
|
"loss": 1.38, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.916749346833794, |
|
"learning_rate": 8.720559342258619e-07, |
|
"loss": 1.4049, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.035247531851327, |
|
"learning_rate": 8.720540616345928e-07, |
|
"loss": 1.4256, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9902303644665205, |
|
"learning_rate": 8.720521773056454e-07, |
|
"loss": 1.3356, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.119044393884763, |
|
"learning_rate": 8.720502812390706e-07, |
|
"loss": 1.4103, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9552269954583803, |
|
"learning_rate": 8.720483734349194e-07, |
|
"loss": 1.3855, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.017213443982555, |
|
"learning_rate": 8.720464538932433e-07, |
|
"loss": 1.3902, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.925567645830183, |
|
"learning_rate": 8.720445226140937e-07, |
|
"loss": 1.4519, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.983266195022755, |
|
"learning_rate": 8.720425795975228e-07, |
|
"loss": 1.3971, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.992218221530755, |
|
"learning_rate": 8.720406248435828e-07, |
|
"loss": 1.4231, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9250759809857882, |
|
"learning_rate": 8.720386583523264e-07, |
|
"loss": 1.3877, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.022334104434051, |
|
"learning_rate": 8.720366801238065e-07, |
|
"loss": 1.4133, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.90248743689466, |
|
"learning_rate": 8.720346901580765e-07, |
|
"loss": 1.3889, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.006981215139682, |
|
"learning_rate": 8.720326884551899e-07, |
|
"loss": 1.3657, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.996511837199606, |
|
"learning_rate": 8.720306750152005e-07, |
|
"loss": 1.3918, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9853146230235317, |
|
"learning_rate": 8.720286498381625e-07, |
|
"loss": 1.3983, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.936322205558776, |
|
"learning_rate": 8.720266129241307e-07, |
|
"loss": 1.3549, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0336499138604984, |
|
"learning_rate": 8.720245642731596e-07, |
|
"loss": 1.3614, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9761367095195514, |
|
"learning_rate": 8.720225038853046e-07, |
|
"loss": 1.4223, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.8383029378391256, |
|
"learning_rate": 8.72020431760621e-07, |
|
"loss": 1.3027, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0012216332284964, |
|
"learning_rate": 8.720183478991647e-07, |
|
"loss": 1.307, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.202220665410359, |
|
"learning_rate": 8.720162523009919e-07, |
|
"loss": 1.3495, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.926537743004413, |
|
"learning_rate": 8.720141449661587e-07, |
|
"loss": 1.346, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.025411005245412, |
|
"learning_rate": 8.720120258947223e-07, |
|
"loss": 1.3581, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.010956090798263, |
|
"learning_rate": 8.720098950867392e-07, |
|
"loss": 1.3634, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.0383205174055727, |
|
"learning_rate": 8.720077525422671e-07, |
|
"loss": 1.3642, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.967895550740301, |
|
"learning_rate": 8.720055982613638e-07, |
|
"loss": 1.3841, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.9365373503076246, |
|
"learning_rate": 8.720034322440872e-07, |
|
"loss": 1.3527, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.1104370711279214, |
|
"learning_rate": 8.720012544904955e-07, |
|
"loss": 1.3483, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.952339289555111, |
|
"learning_rate": 8.719990650006473e-07, |
|
"loss": 1.3956, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.942959986729864, |
|
"learning_rate": 8.719968637746018e-07, |
|
"loss": 1.4256, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1019407497257507, |
|
"learning_rate": 8.71994650812418e-07, |
|
"loss": 1.3786, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0501476908245984, |
|
"learning_rate": 8.719924261141557e-07, |
|
"loss": 1.4158, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9781394711393507, |
|
"learning_rate": 8.719901896798748e-07, |
|
"loss": 1.427, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0411329565229646, |
|
"learning_rate": 8.719879415096352e-07, |
|
"loss": 1.4281, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.022154009359811, |
|
"learning_rate": 8.719856816034978e-07, |
|
"loss": 1.435, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.965921869395771, |
|
"learning_rate": 8.719834099615232e-07, |
|
"loss": 1.3766, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.026938014636579, |
|
"learning_rate": 8.719811265837728e-07, |
|
"loss": 1.3612, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9969914810093115, |
|
"learning_rate": 8.719788314703078e-07, |
|
"loss": 1.3371, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8906070169866545, |
|
"learning_rate": 8.719765246211902e-07, |
|
"loss": 1.3826, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0301378229830593, |
|
"learning_rate": 8.71974206036482e-07, |
|
"loss": 1.3937, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9673359121672145, |
|
"learning_rate": 8.719718757162457e-07, |
|
"loss": 1.3838, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.972360304451488, |
|
"learning_rate": 8.719695336605439e-07, |
|
"loss": 1.4382, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9493767886841242, |
|
"learning_rate": 8.7196717986944e-07, |
|
"loss": 1.3651, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.031661286320472, |
|
"learning_rate": 8.719648143429969e-07, |
|
"loss": 1.3482, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.920995409830151, |
|
"learning_rate": 8.719624370812787e-07, |
|
"loss": 1.4115, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.920727312220773, |
|
"learning_rate": 8.719600480843491e-07, |
|
"loss": 1.396, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.945684201201641, |
|
"learning_rate": 8.719576473522726e-07, |
|
"loss": 1.3557, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0651768014119956, |
|
"learning_rate": 8.719552348851139e-07, |
|
"loss": 1.389, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1250149616393577, |
|
"learning_rate": 8.719528106829378e-07, |
|
"loss": 1.469, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.977539941978143, |
|
"learning_rate": 8.719503747458096e-07, |
|
"loss": 1.3536, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0745479693463924, |
|
"learning_rate": 8.71947927073795e-07, |
|
"loss": 1.3877, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.2160266553797667, |
|
"learning_rate": 8.719454676669596e-07, |
|
"loss": 1.3988, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.134783336833123, |
|
"learning_rate": 8.719429965253698e-07, |
|
"loss": 1.4104, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.908012731710042, |
|
"learning_rate": 8.719405136490924e-07, |
|
"loss": 1.4186, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9868078254055934, |
|
"learning_rate": 8.71938019038194e-07, |
|
"loss": 1.2836, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.747982306246115, |
|
"learning_rate": 8.719355126927416e-07, |
|
"loss": 1.3331, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9503527741257476, |
|
"learning_rate": 8.719329946128029e-07, |
|
"loss": 1.3993, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.000724848973538, |
|
"learning_rate": 8.719304647984458e-07, |
|
"loss": 1.3621, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0259025600243032, |
|
"learning_rate": 8.719279232497381e-07, |
|
"loss": 1.4128, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.95272448132235, |
|
"learning_rate": 8.719253699667485e-07, |
|
"loss": 1.4239, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.7978451838553986, |
|
"learning_rate": 8.719228049495456e-07, |
|
"loss": 1.3694, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.976459149812301, |
|
"learning_rate": 8.719202281981985e-07, |
|
"loss": 1.3413, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9711797841726377, |
|
"learning_rate": 8.719176397127765e-07, |
|
"loss": 1.3616, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0204333297285815, |
|
"learning_rate": 8.719150394933495e-07, |
|
"loss": 1.4126, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0181766925849556, |
|
"learning_rate": 8.719124275399874e-07, |
|
"loss": 1.4323, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9167139561107893, |
|
"learning_rate": 8.719098038527604e-07, |
|
"loss": 1.4484, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9378575085429013, |
|
"learning_rate": 8.719071684317393e-07, |
|
"loss": 1.3775, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.94943978740792, |
|
"learning_rate": 8.719045212769951e-07, |
|
"loss": 1.3897, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.958505276332381, |
|
"learning_rate": 8.719018623885988e-07, |
|
"loss": 1.394, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.145867503995205, |
|
"learning_rate": 8.718991917666222e-07, |
|
"loss": 1.379, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9734480635815728, |
|
"learning_rate": 8.718965094111372e-07, |
|
"loss": 1.3953, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9049260797132335, |
|
"learning_rate": 8.71893815322216e-07, |
|
"loss": 1.3827, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.98953226106279, |
|
"learning_rate": 8.718911094999311e-07, |
|
"loss": 1.3862, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.898375435464832, |
|
"learning_rate": 8.718883919443554e-07, |
|
"loss": 1.4134, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9986915176885027, |
|
"learning_rate": 8.718856626555621e-07, |
|
"loss": 1.391, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.054114308668177, |
|
"learning_rate": 8.718829216336246e-07, |
|
"loss": 1.414, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.966556344517333, |
|
"learning_rate": 8.718801688786166e-07, |
|
"loss": 1.4188, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1040321426982134, |
|
"learning_rate": 8.718774043906126e-07, |
|
"loss": 1.3538, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0055709258768832, |
|
"learning_rate": 8.718746281696866e-07, |
|
"loss": 1.4413, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9449143169829277, |
|
"learning_rate": 8.718718402159136e-07, |
|
"loss": 1.3449, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9706428099571305, |
|
"learning_rate": 8.718690405293686e-07, |
|
"loss": 1.4158, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0814964982203112, |
|
"learning_rate": 8.718662291101268e-07, |
|
"loss": 1.3981, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.036501892302947, |
|
"learning_rate": 8.718634059582641e-07, |
|
"loss": 1.4047, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0667912243139535, |
|
"learning_rate": 8.718605710738567e-07, |
|
"loss": 1.4436, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.096788465549673, |
|
"learning_rate": 8.718577244569806e-07, |
|
"loss": 1.4332, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.031370455846918, |
|
"learning_rate": 8.718548661077125e-07, |
|
"loss": 1.3962, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9672083000240344, |
|
"learning_rate": 8.718519960261294e-07, |
|
"loss": 1.4205, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.007883947100492, |
|
"learning_rate": 8.718491142123086e-07, |
|
"loss": 1.3446, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9748300479515253, |
|
"learning_rate": 8.718462206663277e-07, |
|
"loss": 1.3854, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0397247689440396, |
|
"learning_rate": 8.718433153882645e-07, |
|
"loss": 1.4125, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.001960481680682, |
|
"learning_rate": 8.718403983781974e-07, |
|
"loss": 1.3947, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0664806351413088, |
|
"learning_rate": 8.718374696362047e-07, |
|
"loss": 1.3624, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0210504760194175, |
|
"learning_rate": 8.718345291623656e-07, |
|
"loss": 1.4671, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.971388652881855, |
|
"learning_rate": 8.718315769567588e-07, |
|
"loss": 1.3472, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.952431051174851, |
|
"learning_rate": 8.718286130194643e-07, |
|
"loss": 1.3779, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.12691241920494, |
|
"learning_rate": 8.718256373505615e-07, |
|
"loss": 1.4117, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0148285597590796, |
|
"learning_rate": 8.718226499501307e-07, |
|
"loss": 1.3676, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.98672263886442, |
|
"learning_rate": 8.718196508182523e-07, |
|
"loss": 1.4435, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.968930206941385, |
|
"learning_rate": 8.718166399550071e-07, |
|
"loss": 1.4378, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0425445228617187, |
|
"learning_rate": 8.718136173604761e-07, |
|
"loss": 1.3597, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9276879829658107, |
|
"learning_rate": 8.718105830347405e-07, |
|
"loss": 1.3689, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9317936344250413, |
|
"learning_rate": 8.718075369778825e-07, |
|
"loss": 1.3721, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9645490048095637, |
|
"learning_rate": 8.718044791899837e-07, |
|
"loss": 1.3987, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0365564821005977, |
|
"learning_rate": 8.718014096711265e-07, |
|
"loss": 1.3868, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9682770215203553, |
|
"learning_rate": 8.717983284213936e-07, |
|
"loss": 1.3415, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.134935849661195, |
|
"learning_rate": 8.717952354408679e-07, |
|
"loss": 1.3293, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8124045479008384, |
|
"learning_rate": 8.717921307296327e-07, |
|
"loss": 1.4101, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.870855459457384, |
|
"learning_rate": 8.717890142877717e-07, |
|
"loss": 1.4129, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.996734500928963, |
|
"learning_rate": 8.717858861153686e-07, |
|
"loss": 1.4188, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.905301300393811, |
|
"learning_rate": 8.717827462125079e-07, |
|
"loss": 1.3503, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.963215198908182, |
|
"learning_rate": 8.717795945792739e-07, |
|
"loss": 1.3539, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.058342559604312, |
|
"learning_rate": 8.717764312157515e-07, |
|
"loss": 1.3911, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9650165681938128, |
|
"learning_rate": 8.717732561220258e-07, |
|
"loss": 1.4207, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9793800118049454, |
|
"learning_rate": 8.717700692981826e-07, |
|
"loss": 1.3691, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9162869272769556, |
|
"learning_rate": 8.717668707443075e-07, |
|
"loss": 1.395, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9636673086391485, |
|
"learning_rate": 8.717636604604865e-07, |
|
"loss": 1.4023, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.959298293762338, |
|
"learning_rate": 8.717604384468061e-07, |
|
"loss": 1.4328, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.965235704696395, |
|
"learning_rate": 8.717572047033532e-07, |
|
"loss": 1.4354, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.127226354296215, |
|
"learning_rate": 8.717539592302147e-07, |
|
"loss": 1.3904, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9772007199986428, |
|
"learning_rate": 8.717507020274781e-07, |
|
"loss": 1.3997, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1654089044177103, |
|
"learning_rate": 8.717474330952311e-07, |
|
"loss": 1.3664, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.897024330171534, |
|
"learning_rate": 8.717441524335616e-07, |
|
"loss": 1.3815, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9908064118392947, |
|
"learning_rate": 8.717408600425579e-07, |
|
"loss": 1.4008, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1674379998791475, |
|
"learning_rate": 8.717375559223089e-07, |
|
"loss": 1.4134, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.810265806813017, |
|
"learning_rate": 8.717342400729033e-07, |
|
"loss": 1.4046, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9789538191272626, |
|
"learning_rate": 8.717309124944306e-07, |
|
"loss": 1.3957, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.995888914357539, |
|
"learning_rate": 8.717275731869801e-07, |
|
"loss": 1.3823, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.94123727534938, |
|
"learning_rate": 8.71724222150642e-07, |
|
"loss": 1.3577, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.907211393678648, |
|
"learning_rate": 8.717208593855062e-07, |
|
"loss": 1.4016, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9549644446432546, |
|
"learning_rate": 8.717174848916635e-07, |
|
"loss": 1.3554, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.047404295254929, |
|
"learning_rate": 8.717140986692047e-07, |
|
"loss": 1.3977, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0015613969735218, |
|
"learning_rate": 8.717107007182211e-07, |
|
"loss": 1.4159, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.99276033713888, |
|
"learning_rate": 8.71707291038804e-07, |
|
"loss": 1.4194, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9613959661694427, |
|
"learning_rate": 8.717038696310452e-07, |
|
"loss": 1.4072, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.984569722219675, |
|
"learning_rate": 8.717004364950369e-07, |
|
"loss": 1.4018, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.927152138759416, |
|
"learning_rate": 8.716969916308715e-07, |
|
"loss": 1.4038, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0771539333400764, |
|
"learning_rate": 8.716935350386416e-07, |
|
"loss": 1.3754, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.970985940726889, |
|
"learning_rate": 8.716900667184406e-07, |
|
"loss": 1.4458, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0928065462412633, |
|
"learning_rate": 8.716865866703617e-07, |
|
"loss": 1.371, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9309896563133697, |
|
"learning_rate": 8.716830948944986e-07, |
|
"loss": 1.3509, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0455988834094736, |
|
"learning_rate": 8.716795913909452e-07, |
|
"loss": 1.3827, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.061462286190086, |
|
"learning_rate": 8.716760761597961e-07, |
|
"loss": 1.3926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9080714574645516, |
|
"learning_rate": 8.716725492011458e-07, |
|
"loss": 1.4101, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9046604352395207, |
|
"learning_rate": 8.716690105150891e-07, |
|
"loss": 1.335, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.946411297505849, |
|
"learning_rate": 8.716654601017216e-07, |
|
"loss": 1.4109, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9811491538335915, |
|
"learning_rate": 8.716618979611386e-07, |
|
"loss": 1.4007, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.828391151750033, |
|
"learning_rate": 8.716583240934361e-07, |
|
"loss": 1.4194, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.861846008744537, |
|
"learning_rate": 8.716547384987104e-07, |
|
"loss": 1.3164, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8751261295501274, |
|
"learning_rate": 8.716511411770581e-07, |
|
"loss": 1.4447, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.085208227054159, |
|
"learning_rate": 8.716475321285758e-07, |
|
"loss": 1.3732, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.926763955103524, |
|
"learning_rate": 8.716439113533609e-07, |
|
"loss": 1.427, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.021150407067775, |
|
"learning_rate": 8.716402788515107e-07, |
|
"loss": 1.4123, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0835953078832476, |
|
"learning_rate": 8.716366346231232e-07, |
|
"loss": 1.3225, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.084681723058033, |
|
"learning_rate": 8.716329786682964e-07, |
|
"loss": 1.4007, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9354891039496507, |
|
"learning_rate": 8.716293109871288e-07, |
|
"loss": 1.374, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.072363269165642, |
|
"learning_rate": 8.71625631579719e-07, |
|
"loss": 1.355, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9000514864569373, |
|
"learning_rate": 8.716219404461663e-07, |
|
"loss": 1.3718, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0562397719571766, |
|
"learning_rate": 8.716182375865698e-07, |
|
"loss": 1.3814, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0526646692685717, |
|
"learning_rate": 8.716145230010296e-07, |
|
"loss": 1.3772, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0966573543083538, |
|
"learning_rate": 8.716107966896452e-07, |
|
"loss": 1.4287, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0880361698977263, |
|
"learning_rate": 8.716070586525174e-07, |
|
"loss": 1.3751, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.832576232775376, |
|
"learning_rate": 8.716033088897465e-07, |
|
"loss": 1.416, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.965237748755546, |
|
"learning_rate": 8.715995474014337e-07, |
|
"loss": 1.3621, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0463957419921783, |
|
"learning_rate": 8.7159577418768e-07, |
|
"loss": 1.357, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9739015917876723, |
|
"learning_rate": 8.715919892485873e-07, |
|
"loss": 1.3873, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8676852163892037, |
|
"learning_rate": 8.715881925842573e-07, |
|
"loss": 1.4051, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.088087320089484, |
|
"learning_rate": 8.715843841947923e-07, |
|
"loss": 1.3151, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0270665713855367, |
|
"learning_rate": 8.715805640802949e-07, |
|
"loss": 1.3679, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9869536543983366, |
|
"learning_rate": 8.715767322408678e-07, |
|
"loss": 1.3528, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9112845523257675, |
|
"learning_rate": 8.715728886766143e-07, |
|
"loss": 1.442, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0049960527344544, |
|
"learning_rate": 8.715690333876378e-07, |
|
"loss": 1.3681, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9921307040597664, |
|
"learning_rate": 8.715651663740421e-07, |
|
"loss": 1.4314, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0109768676656605, |
|
"learning_rate": 8.715612876359315e-07, |
|
"loss": 1.3847, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.039247694352697, |
|
"learning_rate": 8.715573971734103e-07, |
|
"loss": 1.4317, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.019898918932762, |
|
"learning_rate": 8.71553494986583e-07, |
|
"loss": 1.3623, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.00522079476457, |
|
"learning_rate": 8.71549581075555e-07, |
|
"loss": 1.3884, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9610972813683794, |
|
"learning_rate": 8.715456554404316e-07, |
|
"loss": 1.3315, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.967170476790787, |
|
"learning_rate": 8.715417180813185e-07, |
|
"loss": 1.4207, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9522241028635285, |
|
"learning_rate": 8.715377689983216e-07, |
|
"loss": 1.4012, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.928961724018662, |
|
"learning_rate": 8.715338081915475e-07, |
|
"loss": 1.3869, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.952221930130988, |
|
"learning_rate": 8.715298356611025e-07, |
|
"loss": 1.3703, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0536781428675335, |
|
"learning_rate": 8.715258514070937e-07, |
|
"loss": 1.3682, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.128913913582799, |
|
"learning_rate": 8.715218554296284e-07, |
|
"loss": 1.3435, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0301910736766318, |
|
"learning_rate": 8.715178477288141e-07, |
|
"loss": 1.3975, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.962217823506329, |
|
"learning_rate": 8.715138283047589e-07, |
|
"loss": 1.3488, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0039607070469647, |
|
"learning_rate": 8.715097971575708e-07, |
|
"loss": 1.378, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0602288102993938, |
|
"learning_rate": 8.715057542873585e-07, |
|
"loss": 1.3572, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.969434960629606, |
|
"learning_rate": 8.715016996942307e-07, |
|
"loss": 1.3713, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9917667276430477, |
|
"learning_rate": 8.714976333782967e-07, |
|
"loss": 1.4607, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.2046134996002436, |
|
"learning_rate": 8.714935553396659e-07, |
|
"loss": 1.3853, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1097764355868733, |
|
"learning_rate": 8.714894655784481e-07, |
|
"loss": 1.371, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9715612348659124, |
|
"learning_rate": 8.714853640947534e-07, |
|
"loss": 1.392, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9321597700386333, |
|
"learning_rate": 8.714812508886925e-07, |
|
"loss": 1.4051, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.949095796893927, |
|
"learning_rate": 8.714771259603758e-07, |
|
"loss": 1.3469, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9290487074046756, |
|
"learning_rate": 8.714729893099144e-07, |
|
"loss": 1.3629, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0295532590400196, |
|
"learning_rate": 8.714688409374198e-07, |
|
"loss": 1.3689, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.965897130998238, |
|
"learning_rate": 8.714646808430036e-07, |
|
"loss": 1.3619, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0935335333535336, |
|
"learning_rate": 8.714605090267779e-07, |
|
"loss": 1.3781, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.986770714363214, |
|
"learning_rate": 8.71456325488855e-07, |
|
"loss": 1.364, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.950553960144967, |
|
"learning_rate": 8.714521302293475e-07, |
|
"loss": 1.4063, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.014174300238565, |
|
"learning_rate": 8.714479232483683e-07, |
|
"loss": 1.3676, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.24671603526281, |
|
"learning_rate": 8.714437045460308e-07, |
|
"loss": 1.3578, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1623556544347724, |
|
"learning_rate": 8.714394741224484e-07, |
|
"loss": 1.3645, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0025864567325122, |
|
"learning_rate": 8.714352319777354e-07, |
|
"loss": 1.3607, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.94717894620833, |
|
"learning_rate": 8.714309781120056e-07, |
|
"loss": 1.3498, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8964926477515935, |
|
"learning_rate": 8.714267125253735e-07, |
|
"loss": 1.3728, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1656664213505823, |
|
"learning_rate": 8.714224352179544e-07, |
|
"loss": 1.4176, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.020188270295796, |
|
"learning_rate": 8.71418146189863e-07, |
|
"loss": 1.4194, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.978569270321128, |
|
"learning_rate": 8.71413845441215e-07, |
|
"loss": 1.3749, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9396855197827283, |
|
"learning_rate": 8.714095329721261e-07, |
|
"loss": 1.3795, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9422669465457387, |
|
"learning_rate": 8.714052087827125e-07, |
|
"loss": 1.3834, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.010282715520298, |
|
"learning_rate": 8.714008728730907e-07, |
|
"loss": 1.3531, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9717500175741116, |
|
"learning_rate": 8.713965252433773e-07, |
|
"loss": 1.3667, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.052704318228861, |
|
"learning_rate": 8.713921658936892e-07, |
|
"loss": 1.3456, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.528710625831412, |
|
"learning_rate": 8.713877948241442e-07, |
|
"loss": 1.3936, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9466834730933833, |
|
"learning_rate": 8.713834120348596e-07, |
|
"loss": 1.3217, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0996541898765226, |
|
"learning_rate": 8.713790175259536e-07, |
|
"loss": 1.3855, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.982252067970699, |
|
"learning_rate": 8.713746112975446e-07, |
|
"loss": 1.384, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.1109077626663844, |
|
"learning_rate": 8.713701933497509e-07, |
|
"loss": 1.3561, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0394061264038115, |
|
"learning_rate": 8.713657636826918e-07, |
|
"loss": 1.468, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0969706383479, |
|
"learning_rate": 8.713613222964863e-07, |
|
"loss": 1.3993, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0348133446475662, |
|
"learning_rate": 8.713568691912542e-07, |
|
"loss": 1.387, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.0273826285615195, |
|
"learning_rate": 8.713524043671153e-07, |
|
"loss": 1.3959, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.141130816615921, |
|
"learning_rate": 8.713479278241898e-07, |
|
"loss": 1.4479, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0490824173681945, |
|
"learning_rate": 8.713434395625983e-07, |
|
"loss": 1.3583, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9474474845957404, |
|
"learning_rate": 8.713389395824614e-07, |
|
"loss": 1.3344, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9486918699061118, |
|
"learning_rate": 8.713344278839005e-07, |
|
"loss": 1.4022, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9302492343255344, |
|
"learning_rate": 8.71329904467037e-07, |
|
"loss": 1.3344, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9531126631866753, |
|
"learning_rate": 8.713253693319929e-07, |
|
"loss": 1.3451, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8555050387157213, |
|
"learning_rate": 8.713208224788899e-07, |
|
"loss": 1.3287, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0850474903019305, |
|
"learning_rate": 8.713162639078507e-07, |
|
"loss": 1.4153, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9916349483264004, |
|
"learning_rate": 8.71311693618998e-07, |
|
"loss": 1.4025, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0448173115664545, |
|
"learning_rate": 8.713071116124549e-07, |
|
"loss": 1.4129, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9652773442800022, |
|
"learning_rate": 8.713025178883445e-07, |
|
"loss": 1.3688, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8707509231071127, |
|
"learning_rate": 8.712979124467906e-07, |
|
"loss": 1.3714, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.968740384281211, |
|
"learning_rate": 8.712932952879176e-07, |
|
"loss": 1.4012, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.938361663169202, |
|
"learning_rate": 8.712886664118492e-07, |
|
"loss": 1.406, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1217977871072775, |
|
"learning_rate": 8.712840258187104e-07, |
|
"loss": 1.3822, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9353035115177915, |
|
"learning_rate": 8.71279373508626e-07, |
|
"loss": 1.3578, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.014072001165337, |
|
"learning_rate": 8.712747094817213e-07, |
|
"loss": 1.4454, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.969346395207918, |
|
"learning_rate": 8.71270033738122e-07, |
|
"loss": 1.3705, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.050484611788353, |
|
"learning_rate": 8.712653462779539e-07, |
|
"loss": 1.3551, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.940761479764165, |
|
"learning_rate": 8.71260647101343e-07, |
|
"loss": 1.4147, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9928956127151944, |
|
"learning_rate": 8.712559362084161e-07, |
|
"loss": 1.3604, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0125472864930805, |
|
"learning_rate": 8.712512135993e-07, |
|
"loss": 1.4183, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.940381713821346, |
|
"learning_rate": 8.712464792741218e-07, |
|
"loss": 1.4414, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0279511535103336, |
|
"learning_rate": 8.712417332330089e-07, |
|
"loss": 1.3505, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9922191158837954, |
|
"learning_rate": 8.712369754760892e-07, |
|
"loss": 1.4028, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0184990677208834, |
|
"learning_rate": 8.712322060034907e-07, |
|
"loss": 1.3465, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.05423073796875, |
|
"learning_rate": 8.712274248153418e-07, |
|
"loss": 1.3416, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1326601737410025, |
|
"learning_rate": 8.712226319117715e-07, |
|
"loss": 1.3924, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.995785462842688, |
|
"learning_rate": 8.712178272929084e-07, |
|
"loss": 1.3895, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9760799944825806, |
|
"learning_rate": 8.712130109588823e-07, |
|
"loss": 1.4104, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9654696172148896, |
|
"learning_rate": 8.712081829098225e-07, |
|
"loss": 1.378, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9968608921090265, |
|
"learning_rate": 8.712033431458593e-07, |
|
"loss": 1.4264, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.969211803711655, |
|
"learning_rate": 8.711984916671229e-07, |
|
"loss": 1.3607, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.942118444739339, |
|
"learning_rate": 8.711936284737438e-07, |
|
"loss": 1.3899, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.98791974032871, |
|
"learning_rate": 8.711887535658529e-07, |
|
"loss": 1.3459, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9216444622419537, |
|
"learning_rate": 8.711838669435818e-07, |
|
"loss": 1.4116, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9987312946412206, |
|
"learning_rate": 8.711789686070618e-07, |
|
"loss": 1.4126, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9997602427201926, |
|
"learning_rate": 8.711740585564249e-07, |
|
"loss": 1.3392, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9736815652309683, |
|
"learning_rate": 8.711691367918032e-07, |
|
"loss": 1.3483, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0854886077878487, |
|
"learning_rate": 8.711642033133292e-07, |
|
"loss": 1.3842, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.061838785099914, |
|
"learning_rate": 8.711592581211358e-07, |
|
"loss": 1.3658, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.023168820723632, |
|
"learning_rate": 8.711543012153561e-07, |
|
"loss": 1.3929, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.970076769155023, |
|
"learning_rate": 8.711493325961236e-07, |
|
"loss": 1.3587, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.002070690376611, |
|
"learning_rate": 8.71144352263572e-07, |
|
"loss": 1.367, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.995354883459173, |
|
"learning_rate": 8.711393602178357e-07, |
|
"loss": 1.4185, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 24.26995863156332, |
|
"learning_rate": 8.711343564590487e-07, |
|
"loss": 1.3982, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.065387181865347, |
|
"learning_rate": 8.711293409873459e-07, |
|
"loss": 1.3668, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.965733738900826, |
|
"learning_rate": 8.711243138028624e-07, |
|
"loss": 1.3989, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.909922562005101, |
|
"learning_rate": 8.711192749057334e-07, |
|
"loss": 1.3548, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9485719715840824, |
|
"learning_rate": 8.711142242960946e-07, |
|
"loss": 1.3982, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0587521051463598, |
|
"learning_rate": 8.711091619740822e-07, |
|
"loss": 1.4208, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.016741791642564, |
|
"learning_rate": 8.711040879398322e-07, |
|
"loss": 1.3776, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.087201787399773, |
|
"learning_rate": 8.710990021934814e-07, |
|
"loss": 1.3509, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1398002317055873, |
|
"learning_rate": 8.710939047351665e-07, |
|
"loss": 1.4054, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.003141522348306, |
|
"learning_rate": 8.710887955650252e-07, |
|
"loss": 1.3895, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0461324715788862, |
|
"learning_rate": 8.710836746831946e-07, |
|
"loss": 1.4143, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.956952816424475, |
|
"learning_rate": 8.710785420898127e-07, |
|
"loss": 1.4008, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8987555743390168, |
|
"learning_rate": 8.710733977850179e-07, |
|
"loss": 1.3823, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0536968376300284, |
|
"learning_rate": 8.710682417689485e-07, |
|
"loss": 1.4578, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.969066979149947, |
|
"learning_rate": 8.710630740417435e-07, |
|
"loss": 1.428, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1272335317584905, |
|
"learning_rate": 8.710578946035417e-07, |
|
"loss": 1.3562, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9753498641377742, |
|
"learning_rate": 8.710527034544828e-07, |
|
"loss": 1.3953, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.943913278878306, |
|
"learning_rate": 8.710475005947067e-07, |
|
"loss": 1.3626, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9513572324834727, |
|
"learning_rate": 8.710422860243531e-07, |
|
"loss": 1.3461, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0849634901753284, |
|
"learning_rate": 8.710370597435629e-07, |
|
"loss": 1.3663, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.138354533416878, |
|
"learning_rate": 8.710318217524763e-07, |
|
"loss": 1.3141, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.972312260895404, |
|
"learning_rate": 8.710265720512346e-07, |
|
"loss": 1.3633, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0591517440498097, |
|
"learning_rate": 8.710213106399791e-07, |
|
"loss": 1.3557, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8524186182545987, |
|
"learning_rate": 8.710160375188516e-07, |
|
"loss": 1.3676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9822890833732956, |
|
"learning_rate": 8.710107526879938e-07, |
|
"loss": 1.4331, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9176351551822868, |
|
"learning_rate": 8.710054561475481e-07, |
|
"loss": 1.371, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.94388048856757, |
|
"learning_rate": 8.71000147897657e-07, |
|
"loss": 1.393, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.085541520730895, |
|
"learning_rate": 8.709948279384639e-07, |
|
"loss": 1.3937, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9491352117439757, |
|
"learning_rate": 8.709894962701115e-07, |
|
"loss": 1.3526, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9933004115427444, |
|
"learning_rate": 8.709841528927436e-07, |
|
"loss": 1.3916, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.959458221957766, |
|
"learning_rate": 8.70978797806504e-07, |
|
"loss": 1.3462, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.963203467802987, |
|
"learning_rate": 8.709734310115368e-07, |
|
"loss": 1.3783, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9902951999179312, |
|
"learning_rate": 8.709680525079866e-07, |
|
"loss": 1.362, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.948654339418917, |
|
"learning_rate": 8.709626622959983e-07, |
|
"loss": 1.3841, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.001564005849718, |
|
"learning_rate": 8.709572603757169e-07, |
|
"loss": 1.3572, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0380595324448416, |
|
"learning_rate": 8.709518467472878e-07, |
|
"loss": 1.4171, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.896631790474921, |
|
"learning_rate": 8.709464214108568e-07, |
|
"loss": 1.3448, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9319635944784475, |
|
"learning_rate": 8.709409843665701e-07, |
|
"loss": 1.3917, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.963100470234962, |
|
"learning_rate": 8.709355356145739e-07, |
|
"loss": 1.3655, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.030553118915969, |
|
"learning_rate": 8.709300751550151e-07, |
|
"loss": 1.3365, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.4928530382016887, |
|
"learning_rate": 8.709246029880405e-07, |
|
"loss": 1.3662, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.030669221337684, |
|
"learning_rate": 8.709191191137976e-07, |
|
"loss": 1.4529, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.155457882933603, |
|
"learning_rate": 8.70913623532434e-07, |
|
"loss": 1.4022, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9856956704586115, |
|
"learning_rate": 8.709081162440975e-07, |
|
"loss": 1.3989, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9343339295028015, |
|
"learning_rate": 8.709025972489367e-07, |
|
"loss": 1.4212, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1952129991940543, |
|
"learning_rate": 8.708970665471e-07, |
|
"loss": 1.4436, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.015106319776789, |
|
"learning_rate": 8.708915241387364e-07, |
|
"loss": 1.4422, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.022781702862628, |
|
"learning_rate": 8.708859700239951e-07, |
|
"loss": 1.3789, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.932454535389442, |
|
"learning_rate": 8.708804042030254e-07, |
|
"loss": 1.3783, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1075106961875165, |
|
"learning_rate": 8.708748266759774e-07, |
|
"loss": 1.3904, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8613959925148356, |
|
"learning_rate": 8.708692374430014e-07, |
|
"loss": 1.3701, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9916282178733447, |
|
"learning_rate": 8.708636365042476e-07, |
|
"loss": 1.3517, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8578706799379554, |
|
"learning_rate": 8.70858023859867e-07, |
|
"loss": 1.4124, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0055644119433533, |
|
"learning_rate": 8.708523995100105e-07, |
|
"loss": 1.3869, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.027942807639001, |
|
"learning_rate": 8.708467634548298e-07, |
|
"loss": 1.3703, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1584010302430836, |
|
"learning_rate": 8.708411156944765e-07, |
|
"loss": 1.3852, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.08434624814754, |
|
"learning_rate": 8.708354562291027e-07, |
|
"loss": 1.4008, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.937949504985518, |
|
"learning_rate": 8.708297850588607e-07, |
|
"loss": 1.4026, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0080974250898116, |
|
"learning_rate": 8.708241021839032e-07, |
|
"loss": 1.3992, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.986930909094618, |
|
"learning_rate": 8.708184076043833e-07, |
|
"loss": 1.3504, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1912323292885363, |
|
"learning_rate": 8.708127013204543e-07, |
|
"loss": 1.3919, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.203030731079236, |
|
"learning_rate": 8.708069833322698e-07, |
|
"loss": 1.3601, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.097916109715531, |
|
"learning_rate": 8.708012536399837e-07, |
|
"loss": 1.3619, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0656903032705327, |
|
"learning_rate": 8.707955122437504e-07, |
|
"loss": 1.3162, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.002875260439292, |
|
"learning_rate": 8.707897591437243e-07, |
|
"loss": 1.389, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0143271490705077, |
|
"learning_rate": 8.707839943400606e-07, |
|
"loss": 1.3323, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.096313689982386, |
|
"learning_rate": 8.707782178329142e-07, |
|
"loss": 1.3813, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.960471861335116, |
|
"learning_rate": 8.707724296224408e-07, |
|
"loss": 1.3472, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0072247729027115, |
|
"learning_rate": 8.707666297087963e-07, |
|
"loss": 1.3522, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.949063388714746, |
|
"learning_rate": 8.707608180921366e-07, |
|
"loss": 1.3928, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.1121870350751064, |
|
"learning_rate": 8.707549947726183e-07, |
|
"loss": 1.4399, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9609559500955003, |
|
"learning_rate": 8.707491597503982e-07, |
|
"loss": 1.3898, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9585328248619587, |
|
"learning_rate": 8.707433130256336e-07, |
|
"loss": 1.379, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.918687510312111, |
|
"learning_rate": 8.707374545984816e-07, |
|
"loss": 1.4176, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.15488138702356, |
|
"learning_rate": 8.707315844691002e-07, |
|
"loss": 1.3706, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9610415881399677, |
|
"learning_rate": 8.707257026376471e-07, |
|
"loss": 1.3641, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0862098441133234, |
|
"learning_rate": 8.707198091042811e-07, |
|
"loss": 1.3893, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9328888264242106, |
|
"learning_rate": 8.707139038691606e-07, |
|
"loss": 1.333, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.97580891347495, |
|
"learning_rate": 8.707079869324446e-07, |
|
"loss": 1.3607, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.000962296933563, |
|
"learning_rate": 8.707020582942925e-07, |
|
"loss": 1.424, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.984700606909975, |
|
"learning_rate": 8.706961179548639e-07, |
|
"loss": 1.3912, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.998907818017932, |
|
"learning_rate": 8.706901659143189e-07, |
|
"loss": 1.4241, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.95254670370234, |
|
"learning_rate": 8.706842021728173e-07, |
|
"loss": 1.3759, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.134652976665768, |
|
"learning_rate": 8.706782267305202e-07, |
|
"loss": 1.3767, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.972318633221704, |
|
"learning_rate": 8.706722395875881e-07, |
|
"loss": 1.3648, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8607560362869124, |
|
"learning_rate": 8.706662407441824e-07, |
|
"loss": 1.3946, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8921206621422653, |
|
"learning_rate": 8.706602302004645e-07, |
|
"loss": 1.4396, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.949264928829496, |
|
"learning_rate": 8.706542079565962e-07, |
|
"loss": 1.3475, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.996499503605724, |
|
"learning_rate": 8.706481740127399e-07, |
|
"loss": 1.37, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9235932803770868, |
|
"learning_rate": 8.706421283690578e-07, |
|
"loss": 1.2987, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0655234851680824, |
|
"learning_rate": 8.706360710257128e-07, |
|
"loss": 1.3903, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.968020759665533, |
|
"learning_rate": 8.706300019828679e-07, |
|
"loss": 1.4227, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.116571254583397, |
|
"learning_rate": 8.706239212406866e-07, |
|
"loss": 1.4153, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0578559650775383, |
|
"learning_rate": 8.706178287993326e-07, |
|
"loss": 1.4168, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9399476742786907, |
|
"learning_rate": 8.706117246589699e-07, |
|
"loss": 1.3448, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.045715774895227, |
|
"learning_rate": 8.706056088197628e-07, |
|
"loss": 1.4323, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8892619986413655, |
|
"learning_rate": 8.705994812818759e-07, |
|
"loss": 1.3688, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0537421060657257, |
|
"learning_rate": 8.705933420454745e-07, |
|
"loss": 1.2805, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.02315805498333, |
|
"learning_rate": 8.705871911107236e-07, |
|
"loss": 1.3664, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0968669538104705, |
|
"learning_rate": 8.70581028477789e-07, |
|
"loss": 1.4156, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.958411205891844, |
|
"learning_rate": 8.705748541468365e-07, |
|
"loss": 1.3879, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0644896141269213, |
|
"learning_rate": 8.705686681180324e-07, |
|
"loss": 1.406, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.9355330340666947, |
|
"learning_rate": 8.705624703915431e-07, |
|
"loss": 1.4157, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 3.0295134625264732, |
|
"learning_rate": 8.705562609675357e-07, |
|
"loss": 1.3595, |
|
"step": 630 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 19246, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 105, |
|
"total_flos": 164886478848000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|