|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6454566185356987, |
|
"eval_steps": 500, |
|
"global_step": 72000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000896467525744026, |
|
"grad_norm": 0.5214600563049316, |
|
"learning_rate": 0.0004999990104957794, |
|
"loss": 4.1615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.001792935051488052, |
|
"grad_norm": 0.5064386129379272, |
|
"learning_rate": 0.0004999960380279981, |
|
"loss": 3.6511, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.002689402577232078, |
|
"grad_norm": 0.5299622416496277, |
|
"learning_rate": 0.0004999910826192423, |
|
"loss": 3.537, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003585870102976104, |
|
"grad_norm": 0.49445027112960815, |
|
"learning_rate": 0.0004999841443088176, |
|
"loss": 3.4766, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.00448233762872013, |
|
"grad_norm": 0.5226134061813354, |
|
"learning_rate": 0.0004999752231517578, |
|
"loss": 3.4303, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005378805154464156, |
|
"grad_norm": 0.4533003568649292, |
|
"learning_rate": 0.0004999643192188245, |
|
"loss": 3.387, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0062752726802081825, |
|
"grad_norm": 0.4646398723125458, |
|
"learning_rate": 0.0004999514325965069, |
|
"loss": 3.3508, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.007171740205952208, |
|
"grad_norm": 0.47208401560783386, |
|
"learning_rate": 0.0004999365633870197, |
|
"loss": 3.3181, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.008068207731696234, |
|
"grad_norm": 0.45212841033935547, |
|
"learning_rate": 0.0004999197117083043, |
|
"loss": 3.2928, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.00896467525744026, |
|
"grad_norm": 0.4529814124107361, |
|
"learning_rate": 0.0004999008776940261, |
|
"loss": 3.2734, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.009861142783184286, |
|
"grad_norm": 0.4323786795139313, |
|
"learning_rate": 0.0004998800614935745, |
|
"loss": 3.2607, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.010757610308928313, |
|
"grad_norm": 0.45383089780807495, |
|
"learning_rate": 0.0004998572632720614, |
|
"loss": 3.2526, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.011654077834672339, |
|
"grad_norm": 0.4236689805984497, |
|
"learning_rate": 0.0004998324832103195, |
|
"loss": 3.216, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.012550545360416365, |
|
"grad_norm": 0.3684336841106415, |
|
"learning_rate": 0.0004998057215049018, |
|
"loss": 3.2024, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.013447012886160391, |
|
"grad_norm": 0.39953121542930603, |
|
"learning_rate": 0.000499776978368079, |
|
"loss": 3.1853, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.014343480411904416, |
|
"grad_norm": 0.3904390335083008, |
|
"learning_rate": 0.0004997462540278386, |
|
"loss": 3.1799, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.015239947937648442, |
|
"grad_norm": 0.3936985433101654, |
|
"learning_rate": 0.0004997135487278825, |
|
"loss": 3.1613, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.016136415463392468, |
|
"grad_norm": 0.36848315596580505, |
|
"learning_rate": 0.0004996788627276258, |
|
"loss": 3.1438, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.017032882989136494, |
|
"grad_norm": 0.3514796495437622, |
|
"learning_rate": 0.0004996421963021939, |
|
"loss": 3.1358, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.01792935051488052, |
|
"grad_norm": 0.3697972893714905, |
|
"learning_rate": 0.0004996035497424208, |
|
"loss": 3.1413, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.018825818040624547, |
|
"grad_norm": 0.3728257715702057, |
|
"learning_rate": 0.0004995629233548467, |
|
"loss": 3.1187, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.019722285566368573, |
|
"grad_norm": 0.3539767861366272, |
|
"learning_rate": 0.0004995203174617158, |
|
"loss": 3.1038, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.0206187530921126, |
|
"grad_norm": 0.3395736813545227, |
|
"learning_rate": 0.0004994757324009732, |
|
"loss": 3.0942, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.021515220617856625, |
|
"grad_norm": 0.36709773540496826, |
|
"learning_rate": 0.0004994291685262627, |
|
"loss": 3.0961, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02241168814360065, |
|
"grad_norm": 0.32454609870910645, |
|
"learning_rate": 0.0004993806262069239, |
|
"loss": 3.0847, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.023308155669344677, |
|
"grad_norm": 0.33812129497528076, |
|
"learning_rate": 0.000499330105827989, |
|
"loss": 3.0767, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.024204623195088704, |
|
"grad_norm": 0.33684587478637695, |
|
"learning_rate": 0.0004992776077901801, |
|
"loss": 3.0641, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.02510109072083273, |
|
"grad_norm": 0.3293500542640686, |
|
"learning_rate": 0.0004992231325099059, |
|
"loss": 3.0715, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.025997558246576756, |
|
"grad_norm": 0.3323623836040497, |
|
"learning_rate": 0.0004991666804192582, |
|
"loss": 3.055, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.026894025772320782, |
|
"grad_norm": 0.3230840861797333, |
|
"learning_rate": 0.0004991082519660089, |
|
"loss": 3.0366, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02779049329806481, |
|
"grad_norm": 0.31436941027641296, |
|
"learning_rate": 0.0004990478476136059, |
|
"loss": 3.035, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.02868696082380883, |
|
"grad_norm": 0.32258903980255127, |
|
"learning_rate": 0.0004989854678411699, |
|
"loss": 3.0321, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.029583428349552857, |
|
"grad_norm": 0.3148656487464905, |
|
"learning_rate": 0.0004989211131434904, |
|
"loss": 3.0146, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.030479895875296883, |
|
"grad_norm": 0.31875699758529663, |
|
"learning_rate": 0.0004988547840310217, |
|
"loss": 3.0214, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03137636340104091, |
|
"grad_norm": 0.3068808913230896, |
|
"learning_rate": 0.0004987864810298791, |
|
"loss": 3.0135, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.032272830926784936, |
|
"grad_norm": 0.3052406311035156, |
|
"learning_rate": 0.0004987162046818343, |
|
"loss": 3.0042, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.033169298452528966, |
|
"grad_norm": 0.3155302405357361, |
|
"learning_rate": 0.0004986439555443118, |
|
"loss": 3.008, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03406576597827299, |
|
"grad_norm": 0.31643468141555786, |
|
"learning_rate": 0.0004985697341903834, |
|
"loss": 2.9999, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.03496223350401702, |
|
"grad_norm": 0.29758599400520325, |
|
"learning_rate": 0.0004984935412087651, |
|
"loss": 2.9957, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03585870102976104, |
|
"grad_norm": 0.30581027269363403, |
|
"learning_rate": 0.0004984153772038109, |
|
"loss": 2.9873, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03675516855550507, |
|
"grad_norm": 0.30977991223335266, |
|
"learning_rate": 0.0004983352427955092, |
|
"loss": 2.9864, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03765163608124909, |
|
"grad_norm": 0.30028942227363586, |
|
"learning_rate": 0.0004982531386194774, |
|
"loss": 2.9801, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.038548103606993116, |
|
"grad_norm": 0.2785912752151489, |
|
"learning_rate": 0.0004981690653269564, |
|
"loss": 2.9707, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.039444571132737145, |
|
"grad_norm": 0.30985763669013977, |
|
"learning_rate": 0.0004980830235848066, |
|
"loss": 2.9673, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.04034103865848117, |
|
"grad_norm": 0.2833133339881897, |
|
"learning_rate": 0.0004979950140755015, |
|
"loss": 2.9683, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0412375061842252, |
|
"grad_norm": 0.27662980556488037, |
|
"learning_rate": 0.0004979050374971228, |
|
"loss": 2.9525, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04213397370996922, |
|
"grad_norm": 0.284396231174469, |
|
"learning_rate": 0.0004978130945633548, |
|
"loss": 2.9643, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.04303044123571325, |
|
"grad_norm": 0.2856046259403229, |
|
"learning_rate": 0.0004977191860034787, |
|
"loss": 2.9566, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04392690876145727, |
|
"grad_norm": 0.27780982851982117, |
|
"learning_rate": 0.0004976233125623668, |
|
"loss": 2.9501, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.0448233762872013, |
|
"grad_norm": 0.282685786485672, |
|
"learning_rate": 0.0004975254750004767, |
|
"loss": 2.943, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.045719843812945325, |
|
"grad_norm": 0.28598085045814514, |
|
"learning_rate": 0.0004974256740938451, |
|
"loss": 2.9507, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.046616311338689355, |
|
"grad_norm": 0.2788136899471283, |
|
"learning_rate": 0.0004973239106340817, |
|
"loss": 2.9437, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.04751277886443338, |
|
"grad_norm": 0.2748227119445801, |
|
"learning_rate": 0.0004972201854283632, |
|
"loss": 2.9439, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04840924639017741, |
|
"grad_norm": 0.27777764201164246, |
|
"learning_rate": 0.0004971144992994265, |
|
"loss": 2.9362, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.04930571391592143, |
|
"grad_norm": 0.273762583732605, |
|
"learning_rate": 0.0004970068530855621, |
|
"loss": 2.9336, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.05020218144166546, |
|
"grad_norm": 0.27693507075309753, |
|
"learning_rate": 0.0004968972476406079, |
|
"loss": 2.9285, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.05109864896740948, |
|
"grad_norm": 0.26276636123657227, |
|
"learning_rate": 0.0004967856838339424, |
|
"loss": 2.9161, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.05199511649315351, |
|
"grad_norm": 0.26518794894218445, |
|
"learning_rate": 0.0004966721625504772, |
|
"loss": 2.9268, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.052891584018897535, |
|
"grad_norm": 0.2554951012134552, |
|
"learning_rate": 0.0004965566846906506, |
|
"loss": 2.922, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.053788051544641564, |
|
"grad_norm": 0.25945886969566345, |
|
"learning_rate": 0.0004964392511704205, |
|
"loss": 2.9179, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05468451907038559, |
|
"grad_norm": 0.2560843527317047, |
|
"learning_rate": 0.0004963198629212563, |
|
"loss": 2.9127, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.05558098659612962, |
|
"grad_norm": 0.29079943895339966, |
|
"learning_rate": 0.0004961985208901326, |
|
"loss": 2.9097, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05647745412187364, |
|
"grad_norm": 0.2522130608558655, |
|
"learning_rate": 0.0004960752260395208, |
|
"loss": 2.918, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.05737392164761766, |
|
"grad_norm": 0.26022017002105713, |
|
"learning_rate": 0.0004959499793473822, |
|
"loss": 2.9088, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05827038917336169, |
|
"grad_norm": 0.25249192118644714, |
|
"learning_rate": 0.0004958227818071597, |
|
"loss": 2.8943, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.059166856699105715, |
|
"grad_norm": 0.24774658679962158, |
|
"learning_rate": 0.0004956936344277703, |
|
"loss": 2.9003, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.060063324224849744, |
|
"grad_norm": 0.243002787232399, |
|
"learning_rate": 0.0004955625382335964, |
|
"loss": 2.8918, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.06095979175059377, |
|
"grad_norm": 0.2450036108493805, |
|
"learning_rate": 0.0004954294942644788, |
|
"loss": 2.8959, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.0618562592763378, |
|
"grad_norm": 0.24282772839069366, |
|
"learning_rate": 0.0004952945035757076, |
|
"loss": 2.8954, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.06275272680208183, |
|
"grad_norm": 0.24141670763492584, |
|
"learning_rate": 0.0004951575672380139, |
|
"loss": 2.8933, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06364919432782584, |
|
"grad_norm": 0.23969806730747223, |
|
"learning_rate": 0.0004950186863375616, |
|
"loss": 2.8874, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.06454566185356987, |
|
"grad_norm": 0.24008285999298096, |
|
"learning_rate": 0.0004948778619759389, |
|
"loss": 2.8894, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.0654421293793139, |
|
"grad_norm": 0.24920688569545746, |
|
"learning_rate": 0.0004947350952701489, |
|
"loss": 2.8913, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.06633859690505793, |
|
"grad_norm": 0.23809005320072174, |
|
"learning_rate": 0.0004945903873526013, |
|
"loss": 2.8832, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.06723506443080195, |
|
"grad_norm": 0.2459540218114853, |
|
"learning_rate": 0.0004944437393711034, |
|
"loss": 2.8836, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06813153195654598, |
|
"grad_norm": 0.2407006472349167, |
|
"learning_rate": 0.0004942951524888509, |
|
"loss": 2.8717, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06902799948229, |
|
"grad_norm": 0.22954203188419342, |
|
"learning_rate": 0.0004941446278844185, |
|
"loss": 2.8766, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06992446700803404, |
|
"grad_norm": 0.2322009801864624, |
|
"learning_rate": 0.0004939921667517505, |
|
"loss": 2.8691, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.07082093453377805, |
|
"grad_norm": 0.2393633872270584, |
|
"learning_rate": 0.0004938377703001521, |
|
"loss": 2.874, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.07171740205952208, |
|
"grad_norm": 0.23171070218086243, |
|
"learning_rate": 0.0004936814397542788, |
|
"loss": 2.8763, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07261386958526611, |
|
"grad_norm": 0.23274166882038116, |
|
"learning_rate": 0.0004935231763541272, |
|
"loss": 2.8668, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.07351033711101014, |
|
"grad_norm": 0.23799176514148712, |
|
"learning_rate": 0.0004933629813550251, |
|
"loss": 2.8675, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.07440680463675416, |
|
"grad_norm": 0.2373514026403427, |
|
"learning_rate": 0.0004932008560276217, |
|
"loss": 2.8648, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.07530327216249819, |
|
"grad_norm": 0.22208142280578613, |
|
"learning_rate": 0.0004930368016578769, |
|
"loss": 2.8605, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.07619973968824222, |
|
"grad_norm": 0.230060413479805, |
|
"learning_rate": 0.0004928708195470521, |
|
"loss": 2.8626, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07709620721398623, |
|
"grad_norm": 0.22975729405879974, |
|
"learning_rate": 0.000492702911011699, |
|
"loss": 2.8612, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07799267473973026, |
|
"grad_norm": 0.2305164784193039, |
|
"learning_rate": 0.0004925330773836496, |
|
"loss": 2.8615, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07888914226547429, |
|
"grad_norm": 0.22835880517959595, |
|
"learning_rate": 0.0004923613200100054, |
|
"loss": 2.8563, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07978560979121832, |
|
"grad_norm": 0.23437829315662384, |
|
"learning_rate": 0.000492187640253127, |
|
"loss": 2.8429, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.08068207731696234, |
|
"grad_norm": 0.21821853518486023, |
|
"learning_rate": 0.0004920120394906231, |
|
"loss": 2.8556, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.08157854484270637, |
|
"grad_norm": 0.22391639649868011, |
|
"learning_rate": 0.0004918345191153395, |
|
"loss": 2.8439, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.0824750123684504, |
|
"grad_norm": 0.22824177145957947, |
|
"learning_rate": 0.0004916550805353483, |
|
"loss": 2.851, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.08337147989419443, |
|
"grad_norm": 0.22843202948570251, |
|
"learning_rate": 0.0004914737251739363, |
|
"loss": 2.8493, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.08426794741993844, |
|
"grad_norm": 0.22368155419826508, |
|
"learning_rate": 0.0004912904544695944, |
|
"loss": 2.8447, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.08516441494568247, |
|
"grad_norm": 0.22078120708465576, |
|
"learning_rate": 0.0004911052698760055, |
|
"loss": 2.8498, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.0860608824714265, |
|
"grad_norm": 0.21971289813518524, |
|
"learning_rate": 0.0004909181728620333, |
|
"loss": 2.846, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.08695734999717053, |
|
"grad_norm": 0.217596173286438, |
|
"learning_rate": 0.0004907291649117109, |
|
"loss": 2.8415, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.08785381752291455, |
|
"grad_norm": 0.2179151475429535, |
|
"learning_rate": 0.0004905382475242282, |
|
"loss": 2.8454, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.08875028504865858, |
|
"grad_norm": 0.2107924073934555, |
|
"learning_rate": 0.0004903454222139212, |
|
"loss": 2.8413, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.0896467525744026, |
|
"grad_norm": 0.20650827884674072, |
|
"learning_rate": 0.0004901506905102585, |
|
"loss": 2.825, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.09054322010014663, |
|
"grad_norm": 0.2175520658493042, |
|
"learning_rate": 0.0004899540539578308, |
|
"loss": 2.8297, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.09143968762589065, |
|
"grad_norm": 0.21441052854061127, |
|
"learning_rate": 0.0004897555141163376, |
|
"loss": 2.8265, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.09233615515163468, |
|
"grad_norm": 0.21620461344718933, |
|
"learning_rate": 0.0004895550725605748, |
|
"loss": 2.8266, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.09323262267737871, |
|
"grad_norm": 0.21449413895606995, |
|
"learning_rate": 0.0004893527308804227, |
|
"loss": 2.8308, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.09412909020312274, |
|
"grad_norm": 0.2166638970375061, |
|
"learning_rate": 0.0004891484906808332, |
|
"loss": 2.8243, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.09502555772886676, |
|
"grad_norm": 0.2123963087797165, |
|
"learning_rate": 0.0004889423535818172, |
|
"loss": 2.8224, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.09592202525461078, |
|
"grad_norm": 0.20958571135997772, |
|
"learning_rate": 0.0004887343212184311, |
|
"loss": 2.8235, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.09681849278035481, |
|
"grad_norm": 0.20609615743160248, |
|
"learning_rate": 0.0004885243952407648, |
|
"loss": 2.8304, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.09771496030609883, |
|
"grad_norm": 0.20418639481067657, |
|
"learning_rate": 0.000488312577313928, |
|
"loss": 2.8251, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.09861142783184286, |
|
"grad_norm": 0.20903444290161133, |
|
"learning_rate": 0.00048809886911803705, |
|
"loss": 2.8244, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09950789535758689, |
|
"grad_norm": 0.20328710973262787, |
|
"learning_rate": 0.00048788327234820187, |
|
"loss": 2.8212, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.10040436288333092, |
|
"grad_norm": 0.21136407554149628, |
|
"learning_rate": 0.0004876657887145121, |
|
"loss": 2.8116, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.10130083040907494, |
|
"grad_norm": 0.20518264174461365, |
|
"learning_rate": 0.00048744641994202397, |
|
"loss": 2.8183, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.10219729793481896, |
|
"grad_norm": 0.20106618106365204, |
|
"learning_rate": 0.0004872251677707464, |
|
"loss": 2.8147, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.103093765460563, |
|
"grad_norm": 0.20347937941551208, |
|
"learning_rate": 0.00048700203395562715, |
|
"loss": 2.8081, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.10399023298630702, |
|
"grad_norm": 0.1995743364095688, |
|
"learning_rate": 0.000486777020266539, |
|
"loss": 2.8204, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.10488670051205104, |
|
"grad_norm": 0.20354920625686646, |
|
"learning_rate": 0.00048655012848826574, |
|
"loss": 2.8077, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.10578316803779507, |
|
"grad_norm": 0.1998705416917801, |
|
"learning_rate": 0.00048632136042048784, |
|
"loss": 2.8129, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.1066796355635391, |
|
"grad_norm": 0.19472312927246094, |
|
"learning_rate": 0.00048609071787776846, |
|
"loss": 2.8141, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.10757610308928313, |
|
"grad_norm": 0.1991991549730301, |
|
"learning_rate": 0.0004858582026895387, |
|
"loss": 2.8148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.10847257061502714, |
|
"grad_norm": 0.20118767023086548, |
|
"learning_rate": 0.0004856238167000834, |
|
"loss": 2.8126, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.10936903814077117, |
|
"grad_norm": 0.20079652965068817, |
|
"learning_rate": 0.0004853875617685263, |
|
"loss": 2.8128, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.1102655056665152, |
|
"grad_norm": 0.20117126405239105, |
|
"learning_rate": 0.0004851494397688154, |
|
"loss": 2.8065, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.11116197319225923, |
|
"grad_norm": 0.19336390495300293, |
|
"learning_rate": 0.00048490945258970833, |
|
"loss": 2.8034, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.11205844071800325, |
|
"grad_norm": 0.20018301904201508, |
|
"learning_rate": 0.00048466760213475665, |
|
"loss": 2.8033, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.11295490824374728, |
|
"grad_norm": 0.19468124210834503, |
|
"learning_rate": 0.00048442389032229163, |
|
"loss": 2.7959, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.11385137576949131, |
|
"grad_norm": 0.1926909238100052, |
|
"learning_rate": 0.00048417831908540836, |
|
"loss": 2.7999, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.11474784329523532, |
|
"grad_norm": 0.19469407200813293, |
|
"learning_rate": 0.00048393089037195085, |
|
"loss": 2.8011, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.11564431082097935, |
|
"grad_norm": 0.19478073716163635, |
|
"learning_rate": 0.0004836816061444964, |
|
"loss": 2.8136, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.11654077834672338, |
|
"grad_norm": 0.19283322989940643, |
|
"learning_rate": 0.00048343046838033975, |
|
"loss": 2.7914, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.11743724587246741, |
|
"grad_norm": 0.19198790192604065, |
|
"learning_rate": 0.0004831774790714781, |
|
"loss": 2.8133, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.11833371339821143, |
|
"grad_norm": 0.18972335755825043, |
|
"learning_rate": 0.0004829226402245948, |
|
"loss": 2.7983, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.11923018092395546, |
|
"grad_norm": 0.18818192183971405, |
|
"learning_rate": 0.0004826659538610433, |
|
"loss": 2.7911, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.12012664844969949, |
|
"grad_norm": 0.19898109138011932, |
|
"learning_rate": 0.0004824074220168315, |
|
"loss": 2.7961, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.12102311597544352, |
|
"grad_norm": 0.1899373084306717, |
|
"learning_rate": 0.00048214704674260544, |
|
"loss": 2.7923, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.12191958350118753, |
|
"grad_norm": 0.19022135436534882, |
|
"learning_rate": 0.00048188483010363315, |
|
"loss": 2.7933, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.12281605102693156, |
|
"grad_norm": 0.19756515324115753, |
|
"learning_rate": 0.000481620774179788, |
|
"loss": 2.7928, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.1237125185526756, |
|
"grad_norm": 0.18114596605300903, |
|
"learning_rate": 0.00048135488106553246, |
|
"loss": 2.7896, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.12460898607841962, |
|
"grad_norm": 0.19043464958667755, |
|
"learning_rate": 0.0004810871528699013, |
|
"loss": 2.8021, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.12550545360416365, |
|
"grad_norm": 0.18425244092941284, |
|
"learning_rate": 0.000480817591716485, |
|
"loss": 2.7863, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.12640192112990767, |
|
"grad_norm": 0.186711385846138, |
|
"learning_rate": 0.00048054619974341293, |
|
"loss": 2.7866, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.12729838865565168, |
|
"grad_norm": 0.18656401336193085, |
|
"learning_rate": 0.00048027297910333634, |
|
"loss": 2.7888, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.12819485618139573, |
|
"grad_norm": 0.19122888147830963, |
|
"learning_rate": 0.00048000069146883305, |
|
"loss": 2.7899, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.12909132370713974, |
|
"grad_norm": 0.19195199012756348, |
|
"learning_rate": 0.000479723838243038, |
|
"loss": 2.7819, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.1299877912328838, |
|
"grad_norm": 0.1867845505475998, |
|
"learning_rate": 0.00047944516287311896, |
|
"loss": 2.7853, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.1308842587586278, |
|
"grad_norm": 0.18608401715755463, |
|
"learning_rate": 0.0004791646675694982, |
|
"loss": 2.7899, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.13178072628437182, |
|
"grad_norm": 0.18311014771461487, |
|
"learning_rate": 0.0004788823545570335, |
|
"loss": 2.7835, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.13267719381011586, |
|
"grad_norm": 0.18312698602676392, |
|
"learning_rate": 0.00047859822607500046, |
|
"loss": 2.7814, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.13357366133585988, |
|
"grad_norm": 0.1876700073480606, |
|
"learning_rate": 0.00047831228437707495, |
|
"loss": 2.7925, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.1344701288616039, |
|
"grad_norm": 0.18215568363666534, |
|
"learning_rate": 0.0004780245317313149, |
|
"loss": 2.7815, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.13536659638734794, |
|
"grad_norm": 0.18465806543827057, |
|
"learning_rate": 0.0004777349704201426, |
|
"loss": 2.7845, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.13626306391309195, |
|
"grad_norm": 0.18615123629570007, |
|
"learning_rate": 0.00047744360274032616, |
|
"loss": 2.7788, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.13715953143883597, |
|
"grad_norm": 0.18580564856529236, |
|
"learning_rate": 0.0004771504310029621, |
|
"loss": 2.7727, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.13805599896458, |
|
"grad_norm": 0.19075210392475128, |
|
"learning_rate": 0.00047685545753345615, |
|
"loss": 2.7777, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.13895246649032403, |
|
"grad_norm": 0.17943909764289856, |
|
"learning_rate": 0.00047655868467150534, |
|
"loss": 2.7842, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.13984893401606807, |
|
"grad_norm": 0.19626812636852264, |
|
"learning_rate": 0.00047626011477107925, |
|
"loss": 2.7736, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.1407454015418121, |
|
"grad_norm": 0.18597416579723358, |
|
"learning_rate": 0.0004759597502004014, |
|
"loss": 2.7789, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.1416418690675561, |
|
"grad_norm": 0.18150849640369415, |
|
"learning_rate": 0.0004756575933419304, |
|
"loss": 2.7771, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.14253833659330015, |
|
"grad_norm": 0.17691569030284882, |
|
"learning_rate": 0.00047535364659234095, |
|
"loss": 2.7689, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.14343480411904416, |
|
"grad_norm": 0.1855591982603073, |
|
"learning_rate": 0.00047504791236250535, |
|
"loss": 2.7755, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.14433127164478818, |
|
"grad_norm": 0.18740758299827576, |
|
"learning_rate": 0.00047474039307747354, |
|
"loss": 2.7756, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.14522773917053222, |
|
"grad_norm": 0.17985744774341583, |
|
"learning_rate": 0.00047443109117645466, |
|
"loss": 2.774, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.14612420669627624, |
|
"grad_norm": 0.17564240097999573, |
|
"learning_rate": 0.0004741200091127973, |
|
"loss": 2.7719, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.14702067422202028, |
|
"grad_norm": 0.17614321410655975, |
|
"learning_rate": 0.00047380714935396986, |
|
"loss": 2.7679, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.1479171417477643, |
|
"grad_norm": 0.17652449011802673, |
|
"learning_rate": 0.00047349251438154154, |
|
"loss": 2.77, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.1488136092735083, |
|
"grad_norm": 0.1796472817659378, |
|
"learning_rate": 0.0004731761066911622, |
|
"loss": 2.7652, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.14971007679925236, |
|
"grad_norm": 0.17830465734004974, |
|
"learning_rate": 0.00047285792879254274, |
|
"loss": 2.7684, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.15060654432499637, |
|
"grad_norm": 0.17882998287677765, |
|
"learning_rate": 0.000472537983209435, |
|
"loss": 2.7709, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.1515030118507404, |
|
"grad_norm": 0.17409980297088623, |
|
"learning_rate": 0.0004722162724796122, |
|
"loss": 2.7692, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.15239947937648443, |
|
"grad_norm": 0.1783556491136551, |
|
"learning_rate": 0.00047189279915484816, |
|
"loss": 2.7736, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.15329594690222845, |
|
"grad_norm": 0.17761445045471191, |
|
"learning_rate": 0.00047156756580089766, |
|
"loss": 2.7668, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.15419241442797246, |
|
"grad_norm": 0.17673403024673462, |
|
"learning_rate": 0.00047124057499747573, |
|
"loss": 2.7712, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.1550888819537165, |
|
"grad_norm": 0.1734510213136673, |
|
"learning_rate": 0.00047091182933823737, |
|
"loss": 2.7579, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.15598534947946052, |
|
"grad_norm": 0.17702840268611908, |
|
"learning_rate": 0.0004705813314307569, |
|
"loss": 2.7612, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.15688181700520457, |
|
"grad_norm": 0.17082242667675018, |
|
"learning_rate": 0.00047024908389650704, |
|
"loss": 2.7597, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.15777828453094858, |
|
"grad_norm": 0.18025079369544983, |
|
"learning_rate": 0.00046991508937083875, |
|
"loss": 2.7559, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.1586747520566926, |
|
"grad_norm": 0.17427673935890198, |
|
"learning_rate": 0.00046957935050295963, |
|
"loss": 2.7577, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.15957121958243664, |
|
"grad_norm": 0.17229335010051727, |
|
"learning_rate": 0.0004692418699559134, |
|
"loss": 2.7559, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.16046768710818066, |
|
"grad_norm": 0.17168091237545013, |
|
"learning_rate": 0.0004689026504065585, |
|
"loss": 2.7582, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.16136415463392467, |
|
"grad_norm": 0.17163404822349548, |
|
"learning_rate": 0.0004685616945455469, |
|
"loss": 2.7621, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.16226062215966872, |
|
"grad_norm": 0.16957524418830872, |
|
"learning_rate": 0.00046821900507730275, |
|
"loss": 2.7586, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.16315708968541273, |
|
"grad_norm": 0.165022075176239, |
|
"learning_rate": 0.0004678745847200012, |
|
"loss": 2.7541, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.16405355721115678, |
|
"grad_norm": 0.17016685009002686, |
|
"learning_rate": 0.00046752843620554655, |
|
"loss": 2.7522, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.1649500247369008, |
|
"grad_norm": 0.16751013696193695, |
|
"learning_rate": 0.00046718056227955043, |
|
"loss": 2.7501, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.1658464922626448, |
|
"grad_norm": 0.1747274398803711, |
|
"learning_rate": 0.0004668309657013106, |
|
"loss": 2.7551, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.16674295978838885, |
|
"grad_norm": 0.1704045832157135, |
|
"learning_rate": 0.0004664796492437884, |
|
"loss": 2.7522, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.16763942731413287, |
|
"grad_norm": 0.16978523135185242, |
|
"learning_rate": 0.0004661266156935873, |
|
"loss": 2.7494, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.16853589483987688, |
|
"grad_norm": 0.1745961755514145, |
|
"learning_rate": 0.0004657718678509303, |
|
"loss": 2.7546, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.16943236236562093, |
|
"grad_norm": 0.16892167925834656, |
|
"learning_rate": 0.0004654154085296382, |
|
"loss": 2.7597, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.17032882989136494, |
|
"grad_norm": 0.17090590298175812, |
|
"learning_rate": 0.000465057240557107, |
|
"loss": 2.7545, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.17122529741710896, |
|
"grad_norm": 0.1694989651441574, |
|
"learning_rate": 0.00046469736677428556, |
|
"loss": 2.7525, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.172121764942853, |
|
"grad_norm": 0.16777247190475464, |
|
"learning_rate": 0.00046433579003565286, |
|
"loss": 2.7478, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.17301823246859702, |
|
"grad_norm": 0.1667390614748001, |
|
"learning_rate": 0.00046397251320919584, |
|
"loss": 2.7606, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.17391469999434106, |
|
"grad_norm": 0.16497531533241272, |
|
"learning_rate": 0.00046360753917638604, |
|
"loss": 2.7459, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.17481116752008508, |
|
"grad_norm": 0.16512423753738403, |
|
"learning_rate": 0.00046324087083215727, |
|
"loss": 2.748, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.1757076350458291, |
|
"grad_norm": 0.1689414232969284, |
|
"learning_rate": 0.0004628725110848823, |
|
"loss": 2.7553, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.17660410257157313, |
|
"grad_norm": 0.17020919919013977, |
|
"learning_rate": 0.00046250246285635, |
|
"loss": 2.7498, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.17750057009731715, |
|
"grad_norm": 0.16616366803646088, |
|
"learning_rate": 0.00046213072908174213, |
|
"loss": 2.7459, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.17839703762306117, |
|
"grad_norm": 0.16394533216953278, |
|
"learning_rate": 0.00046175731270961, |
|
"loss": 2.7494, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.1792935051488052, |
|
"grad_norm": 0.1853574514389038, |
|
"learning_rate": 0.0004613822167018508, |
|
"loss": 2.7406, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.18018997267454923, |
|
"grad_norm": 0.16652604937553406, |
|
"learning_rate": 0.00046100544403368483, |
|
"loss": 2.7364, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.18108644020029327, |
|
"grad_norm": 0.16384656727313995, |
|
"learning_rate": 0.0004606269976936314, |
|
"loss": 2.747, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.18198290772603729, |
|
"grad_norm": 0.16769762337207794, |
|
"learning_rate": 0.00046024688068348484, |
|
"loss": 2.7465, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.1828793752517813, |
|
"grad_norm": 0.15770003199577332, |
|
"learning_rate": 0.0004598650960182915, |
|
"loss": 2.7459, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.18377584277752534, |
|
"grad_norm": 0.16936561465263367, |
|
"learning_rate": 0.00045948164672632514, |
|
"loss": 2.7496, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.18467231030326936, |
|
"grad_norm": 0.16903486847877502, |
|
"learning_rate": 0.0004590965358490632, |
|
"loss": 2.7447, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.18556877782901338, |
|
"grad_norm": 0.16535791754722595, |
|
"learning_rate": 0.00045870976644116267, |
|
"loss": 2.7375, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.18646524535475742, |
|
"grad_norm": 0.16924481093883514, |
|
"learning_rate": 0.0004583213415704358, |
|
"loss": 2.7494, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.18736171288050144, |
|
"grad_norm": 0.16382168233394623, |
|
"learning_rate": 0.00045793126431782584, |
|
"loss": 2.7411, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.18825818040624548, |
|
"grad_norm": 0.16305121779441833, |
|
"learning_rate": 0.00045753953777738233, |
|
"loss": 2.7465, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.1891546479319895, |
|
"grad_norm": 0.1712082326412201, |
|
"learning_rate": 0.00045714616505623703, |
|
"loss": 2.7368, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.1900511154577335, |
|
"grad_norm": 0.15486562252044678, |
|
"learning_rate": 0.00045675114927457895, |
|
"loss": 2.7385, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.19094758298347755, |
|
"grad_norm": 0.16223442554473877, |
|
"learning_rate": 0.0004563544935656296, |
|
"loss": 2.7404, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.19184405050922157, |
|
"grad_norm": 0.16115334630012512, |
|
"learning_rate": 0.0004559562010756185, |
|
"loss": 2.7325, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.19274051803496559, |
|
"grad_norm": 0.15933333337306976, |
|
"learning_rate": 0.0004555562749637574, |
|
"loss": 2.7438, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.19363698556070963, |
|
"grad_norm": 0.16439130902290344, |
|
"learning_rate": 0.00045515471840221623, |
|
"loss": 2.7353, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.19453345308645364, |
|
"grad_norm": 0.1617208570241928, |
|
"learning_rate": 0.0004547515345760973, |
|
"loss": 2.7307, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.19542992061219766, |
|
"grad_norm": 0.16066919267177582, |
|
"learning_rate": 0.00045434672668341017, |
|
"loss": 2.7379, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.1963263881379417, |
|
"grad_norm": 0.16751715540885925, |
|
"learning_rate": 0.0004539402979350464, |
|
"loss": 2.7363, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.19722285566368572, |
|
"grad_norm": 0.16093355417251587, |
|
"learning_rate": 0.00045353225155475384, |
|
"loss": 2.7412, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.19811932318942976, |
|
"grad_norm": 0.16404040157794952, |
|
"learning_rate": 0.00045312259077911153, |
|
"loss": 2.7436, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.19901579071517378, |
|
"grad_norm": 0.15982191264629364, |
|
"learning_rate": 0.0004527113188575034, |
|
"loss": 2.735, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.1999122582409178, |
|
"grad_norm": 0.15941210091114044, |
|
"learning_rate": 0.00045229843905209287, |
|
"loss": 2.7457, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.20080872576666184, |
|
"grad_norm": 0.15950894355773926, |
|
"learning_rate": 0.00045188395463779707, |
|
"loss": 2.736, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.20170519329240585, |
|
"grad_norm": 0.1595742106437683, |
|
"learning_rate": 0.0004514678689022606, |
|
"loss": 2.7345, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.20260166081814987, |
|
"grad_norm": 0.16472984850406647, |
|
"learning_rate": 0.00045105018514582953, |
|
"loss": 2.7316, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.2034981283438939, |
|
"grad_norm": 0.1674964427947998, |
|
"learning_rate": 0.0004506309066815254, |
|
"loss": 2.7273, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.20439459586963793, |
|
"grad_norm": 0.159923255443573, |
|
"learning_rate": 0.00045021003683501853, |
|
"loss": 2.7275, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.20529106339538197, |
|
"grad_norm": 0.16554875671863556, |
|
"learning_rate": 0.0004497875789446023, |
|
"loss": 2.7388, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.206187530921126, |
|
"grad_norm": 0.16701550781726837, |
|
"learning_rate": 0.0004493635363611659, |
|
"loss": 2.7303, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.20708399844687, |
|
"grad_norm": 0.16078519821166992, |
|
"learning_rate": 0.0004489379124481683, |
|
"loss": 2.7312, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.20798046597261405, |
|
"grad_norm": 0.16200828552246094, |
|
"learning_rate": 0.0004485107105816115, |
|
"loss": 2.7358, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.20887693349835806, |
|
"grad_norm": 0.15977811813354492, |
|
"learning_rate": 0.00044808193415001344, |
|
"loss": 2.7309, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.20977340102410208, |
|
"grad_norm": 0.16767093539237976, |
|
"learning_rate": 0.0004476515865543815, |
|
"loss": 2.7315, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.21066986854984612, |
|
"grad_norm": 0.15728136897087097, |
|
"learning_rate": 0.0004472196712081852, |
|
"loss": 2.735, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.21156633607559014, |
|
"grad_norm": 0.15546581149101257, |
|
"learning_rate": 0.0004467861915373295, |
|
"loss": 2.7301, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.21246280360133415, |
|
"grad_norm": 0.15766388177871704, |
|
"learning_rate": 0.0004463511509801273, |
|
"loss": 2.7248, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.2133592711270782, |
|
"grad_norm": 0.15457496047019958, |
|
"learning_rate": 0.00044591455298727213, |
|
"loss": 2.7234, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.21425573865282221, |
|
"grad_norm": 0.15532919764518738, |
|
"learning_rate": 0.0004454764010218112, |
|
"loss": 2.7234, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.21515220617856626, |
|
"grad_norm": 0.15373288094997406, |
|
"learning_rate": 0.00044503669855911756, |
|
"loss": 2.7254, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.21604867370431027, |
|
"grad_norm": 0.15703774988651276, |
|
"learning_rate": 0.00044459544908686236, |
|
"loss": 2.7221, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.2169451412300543, |
|
"grad_norm": 0.15091052651405334, |
|
"learning_rate": 0.000444152656104988, |
|
"loss": 2.7288, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.21784160875579833, |
|
"grad_norm": 0.15579521656036377, |
|
"learning_rate": 0.0004437083231256794, |
|
"loss": 2.7192, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.21873807628154235, |
|
"grad_norm": 0.15548183023929596, |
|
"learning_rate": 0.0004432624536733367, |
|
"loss": 2.735, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.21963454380728636, |
|
"grad_norm": 0.15607748925685883, |
|
"learning_rate": 0.00044281505128454713, |
|
"loss": 2.7218, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.2205310113330304, |
|
"grad_norm": 0.16912153363227844, |
|
"learning_rate": 0.00044236611950805707, |
|
"loss": 2.7198, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.22142747885877442, |
|
"grad_norm": 0.15691158175468445, |
|
"learning_rate": 0.0004419156619047439, |
|
"loss": 2.7233, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.22232394638451847, |
|
"grad_norm": 0.1513551026582718, |
|
"learning_rate": 0.0004414636820475875, |
|
"loss": 2.7198, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.22322041391026248, |
|
"grad_norm": 0.15347003936767578, |
|
"learning_rate": 0.0004410101835216422, |
|
"loss": 2.7185, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.2241168814360065, |
|
"grad_norm": 0.15076322853565216, |
|
"learning_rate": 0.00044055516992400827, |
|
"loss": 2.7232, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.22501334896175054, |
|
"grad_norm": 0.1548827737569809, |
|
"learning_rate": 0.00044009864486380335, |
|
"loss": 2.7195, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.22590981648749456, |
|
"grad_norm": 0.15956267714500427, |
|
"learning_rate": 0.0004396406119621338, |
|
"loss": 2.7231, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.22680628401323857, |
|
"grad_norm": 0.14837653934955597, |
|
"learning_rate": 0.00043918107485206603, |
|
"loss": 2.7211, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.22770275153898262, |
|
"grad_norm": 0.15196016430854797, |
|
"learning_rate": 0.0004387200371785977, |
|
"loss": 2.7209, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.22859921906472663, |
|
"grad_norm": 0.15298311412334442, |
|
"learning_rate": 0.00043825750259862873, |
|
"loss": 2.7132, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.22949568659047065, |
|
"grad_norm": 0.16172164678573608, |
|
"learning_rate": 0.00043779347478093225, |
|
"loss": 2.72, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.2303921541162147, |
|
"grad_norm": 0.15316250920295715, |
|
"learning_rate": 0.0004373279574061258, |
|
"loss": 2.7284, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.2312886216419587, |
|
"grad_norm": 0.1559763252735138, |
|
"learning_rate": 0.0004368609541666417, |
|
"loss": 2.7287, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.23218508916770275, |
|
"grad_norm": 0.15110167860984802, |
|
"learning_rate": 0.0004363924687666982, |
|
"loss": 2.7144, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.23308155669344677, |
|
"grad_norm": 0.15237966179847717, |
|
"learning_rate": 0.0004359225049222697, |
|
"loss": 2.7236, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.23397802421919078, |
|
"grad_norm": 0.1501353681087494, |
|
"learning_rate": 0.0004354510663610576, |
|
"loss": 2.7132, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.23487449174493483, |
|
"grad_norm": 0.15224196016788483, |
|
"learning_rate": 0.00043497815682246044, |
|
"loss": 2.7213, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.23577095927067884, |
|
"grad_norm": 0.1475629061460495, |
|
"learning_rate": 0.00043450378005754453, |
|
"loss": 2.7138, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.23666742679642286, |
|
"grad_norm": 0.15193389356136322, |
|
"learning_rate": 0.000434027939829014, |
|
"loss": 2.7079, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.2375638943221669, |
|
"grad_norm": 0.1544477641582489, |
|
"learning_rate": 0.00043355063991118095, |
|
"loss": 2.7213, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.23846036184791092, |
|
"grad_norm": 0.14745964109897614, |
|
"learning_rate": 0.0004330718840899357, |
|
"loss": 2.7175, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.23935682937365496, |
|
"grad_norm": 0.1513916403055191, |
|
"learning_rate": 0.00043259167616271644, |
|
"loss": 2.7168, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.24025329689939898, |
|
"grad_norm": 0.15179474651813507, |
|
"learning_rate": 0.00043211001993847967, |
|
"loss": 2.7099, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.241149764425143, |
|
"grad_norm": 0.1514696627855301, |
|
"learning_rate": 0.00043162691923766917, |
|
"loss": 2.7201, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.24204623195088704, |
|
"grad_norm": 0.15021684765815735, |
|
"learning_rate": 0.0004311423778921865, |
|
"loss": 2.7178, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.24294269947663105, |
|
"grad_norm": 0.15158309042453766, |
|
"learning_rate": 0.0004306563997453601, |
|
"loss": 2.7076, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.24383916700237507, |
|
"grad_norm": 0.14725959300994873, |
|
"learning_rate": 0.00043016898865191487, |
|
"loss": 2.7078, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.2447356345281191, |
|
"grad_norm": 0.14671307802200317, |
|
"learning_rate": 0.0004296801484779419, |
|
"loss": 2.7109, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.24563210205386313, |
|
"grad_norm": 0.1534958928823471, |
|
"learning_rate": 0.0004291898831008675, |
|
"loss": 2.7154, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.24652856957960714, |
|
"grad_norm": 0.14696376025676727, |
|
"learning_rate": 0.00042869819640942245, |
|
"loss": 2.704, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.2474250371053512, |
|
"grad_norm": 0.15045034885406494, |
|
"learning_rate": 0.00042820509230361134, |
|
"loss": 2.7145, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.2483215046310952, |
|
"grad_norm": 0.1504196673631668, |
|
"learning_rate": 0.0004277105746946814, |
|
"loss": 2.7076, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.24921797215683925, |
|
"grad_norm": 0.14724665880203247, |
|
"learning_rate": 0.00042721464750509195, |
|
"loss": 2.7034, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.25011443968258323, |
|
"grad_norm": 0.14712798595428467, |
|
"learning_rate": 0.00042671731466848253, |
|
"loss": 2.7139, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.2510109072083273, |
|
"grad_norm": 0.15577539801597595, |
|
"learning_rate": 0.0004262185801296422, |
|
"loss": 2.7077, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.2519073747340713, |
|
"grad_norm": 0.14991851150989532, |
|
"learning_rate": 0.0004257184478444785, |
|
"loss": 2.7028, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.25280384225981534, |
|
"grad_norm": 0.14650234580039978, |
|
"learning_rate": 0.00042521692177998537, |
|
"loss": 2.7069, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.25370030978555935, |
|
"grad_norm": 0.1475357860326767, |
|
"learning_rate": 0.0004247140059142123, |
|
"loss": 2.7074, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.25459677731130337, |
|
"grad_norm": 0.14877575635910034, |
|
"learning_rate": 0.0004242097042362322, |
|
"loss": 2.6997, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.25549324483704744, |
|
"grad_norm": 0.15285931527614594, |
|
"learning_rate": 0.0004237040207461104, |
|
"loss": 2.7016, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.25638971236279146, |
|
"grad_norm": 0.1494821012020111, |
|
"learning_rate": 0.00042319695945487257, |
|
"loss": 2.7093, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.25728617988853547, |
|
"grad_norm": 0.14891798794269562, |
|
"learning_rate": 0.00042268852438447297, |
|
"loss": 2.7037, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.2581826474142795, |
|
"grad_norm": 0.14432930946350098, |
|
"learning_rate": 0.0004221787195677623, |
|
"loss": 2.6977, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.2590791149400235, |
|
"grad_norm": 0.1493179351091385, |
|
"learning_rate": 0.0004216675490484561, |
|
"loss": 2.7135, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.2599755824657676, |
|
"grad_norm": 0.14740417897701263, |
|
"learning_rate": 0.00042115501688110257, |
|
"loss": 2.7079, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.2608720499915116, |
|
"grad_norm": 0.1458692103624344, |
|
"learning_rate": 0.0004206411271310502, |
|
"loss": 2.7029, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.2617685175172556, |
|
"grad_norm": 0.14873549342155457, |
|
"learning_rate": 0.00042012588387441586, |
|
"loss": 2.7096, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.2626649850429996, |
|
"grad_norm": 0.14170105755329132, |
|
"learning_rate": 0.00041960929119805215, |
|
"loss": 2.7045, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.26356145256874364, |
|
"grad_norm": 0.14562779664993286, |
|
"learning_rate": 0.00041909135319951495, |
|
"loss": 2.7044, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.26445792009448765, |
|
"grad_norm": 0.14775702357292175, |
|
"learning_rate": 0.00041857207398703154, |
|
"loss": 2.7016, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.2653543876202317, |
|
"grad_norm": 0.14575980603694916, |
|
"learning_rate": 0.0004180514576794673, |
|
"loss": 2.7095, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.26625085514597574, |
|
"grad_norm": 0.14411023259162903, |
|
"learning_rate": 0.0004175295084062931, |
|
"loss": 2.7031, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.26714732267171976, |
|
"grad_norm": 0.15136118233203888, |
|
"learning_rate": 0.0004170062303075531, |
|
"loss": 2.7052, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.26804379019746377, |
|
"grad_norm": 0.14870133996009827, |
|
"learning_rate": 0.00041648162753383144, |
|
"loss": 2.7044, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.2689402577232078, |
|
"grad_norm": 0.1507934033870697, |
|
"learning_rate": 0.0004159557042462193, |
|
"loss": 2.7113, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.26983672524895186, |
|
"grad_norm": 0.14786238968372345, |
|
"learning_rate": 0.0004154284646162822, |
|
"loss": 2.6978, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.2707331927746959, |
|
"grad_norm": 0.14871017634868622, |
|
"learning_rate": 0.00041489991282602667, |
|
"loss": 2.6969, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.2716296603004399, |
|
"grad_norm": 0.1434573084115982, |
|
"learning_rate": 0.00041437005306786716, |
|
"loss": 2.7015, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.2725261278261839, |
|
"grad_norm": 0.14657068252563477, |
|
"learning_rate": 0.0004138388895445928, |
|
"loss": 2.6979, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.2734225953519279, |
|
"grad_norm": 0.14329074323177338, |
|
"learning_rate": 0.00041330642646933397, |
|
"loss": 2.7004, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.27431906287767194, |
|
"grad_norm": 0.14054018259048462, |
|
"learning_rate": 0.00041277266806552906, |
|
"loss": 2.7023, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.275215530403416, |
|
"grad_norm": 0.1455359160900116, |
|
"learning_rate": 0.00041223761856689067, |
|
"loss": 2.6972, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.27611199792916, |
|
"grad_norm": 0.14418181777000427, |
|
"learning_rate": 0.0004117012822173725, |
|
"loss": 2.7018, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.27700846545490404, |
|
"grad_norm": 0.14956611394882202, |
|
"learning_rate": 0.0004111636632711353, |
|
"loss": 2.7062, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.27790493298064806, |
|
"grad_norm": 0.15083587169647217, |
|
"learning_rate": 0.00041062476599251297, |
|
"loss": 2.6939, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.27880140050639207, |
|
"grad_norm": 0.14435066282749176, |
|
"learning_rate": 0.00041008459465597947, |
|
"loss": 2.699, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.27969786803213614, |
|
"grad_norm": 0.14453698694705963, |
|
"learning_rate": 0.00040954315354611403, |
|
"loss": 2.7017, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.28059433555788016, |
|
"grad_norm": 0.14497888088226318, |
|
"learning_rate": 0.0004090004469575679, |
|
"loss": 2.7007, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.2814908030836242, |
|
"grad_norm": 0.1438531130552292, |
|
"learning_rate": 0.0004084564791950298, |
|
"loss": 2.7045, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.2823872706093682, |
|
"grad_norm": 0.14402426779270172, |
|
"learning_rate": 0.0004079112545731919, |
|
"loss": 2.6936, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.2832837381351122, |
|
"grad_norm": 0.14424774050712585, |
|
"learning_rate": 0.00040736477741671576, |
|
"loss": 2.6976, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.2841802056608562, |
|
"grad_norm": 0.1417878419160843, |
|
"learning_rate": 0.0004068170520601978, |
|
"loss": 2.6965, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.2850766731866003, |
|
"grad_norm": 0.14787960052490234, |
|
"learning_rate": 0.0004062680828481352, |
|
"loss": 2.6921, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.2859731407123443, |
|
"grad_norm": 0.14073611795902252, |
|
"learning_rate": 0.00040571787413489104, |
|
"loss": 2.6997, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.2868696082380883, |
|
"grad_norm": 0.13744498789310455, |
|
"learning_rate": 0.0004051664302846601, |
|
"loss": 2.6971, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.28776607576383234, |
|
"grad_norm": 0.1451827883720398, |
|
"learning_rate": 0.00040461375567143413, |
|
"loss": 2.6972, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.28866254328957636, |
|
"grad_norm": 0.14334948360919952, |
|
"learning_rate": 0.0004040598546789672, |
|
"loss": 2.6915, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.28955901081532043, |
|
"grad_norm": 0.14681456983089447, |
|
"learning_rate": 0.00040350473170074075, |
|
"loss": 2.6943, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.29045547834106444, |
|
"grad_norm": 0.14370225369930267, |
|
"learning_rate": 0.00040294839113992907, |
|
"loss": 2.6919, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.29135194586680846, |
|
"grad_norm": 0.14142639935016632, |
|
"learning_rate": 0.000402390837409364, |
|
"loss": 2.6997, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2922484133925525, |
|
"grad_norm": 0.14680539071559906, |
|
"learning_rate": 0.0004018320749315001, |
|
"loss": 2.6939, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.2931448809182965, |
|
"grad_norm": 0.14420635998249054, |
|
"learning_rate": 0.00040127210813837975, |
|
"loss": 2.6892, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.29404134844404056, |
|
"grad_norm": 0.1437712162733078, |
|
"learning_rate": 0.00040071094147159776, |
|
"loss": 2.695, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.2949378159697846, |
|
"grad_norm": 0.14098462462425232, |
|
"learning_rate": 0.00040014857938226614, |
|
"loss": 2.6851, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.2958342834955286, |
|
"grad_norm": 0.13981753587722778, |
|
"learning_rate": 0.00039958502633097895, |
|
"loss": 2.6938, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.2967307510212726, |
|
"grad_norm": 0.13768906891345978, |
|
"learning_rate": 0.0003990202867877766, |
|
"loss": 2.6834, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.2976272185470166, |
|
"grad_norm": 0.13921628892421722, |
|
"learning_rate": 0.000398454365232111, |
|
"loss": 2.6977, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.29852368607276064, |
|
"grad_norm": 0.14550244808197021, |
|
"learning_rate": 0.0003978872661528094, |
|
"loss": 2.6897, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.2994201535985047, |
|
"grad_norm": 0.13877898454666138, |
|
"learning_rate": 0.00039731899404803905, |
|
"loss": 2.7007, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.30031662112424873, |
|
"grad_norm": 0.14092972874641418, |
|
"learning_rate": 0.00039674955342527165, |
|
"loss": 2.6964, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.30121308864999274, |
|
"grad_norm": 0.13762860000133514, |
|
"learning_rate": 0.00039617894880124716, |
|
"loss": 2.6908, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.30210955617573676, |
|
"grad_norm": 0.14549992978572845, |
|
"learning_rate": 0.00039560718470193866, |
|
"loss": 2.6874, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.3030060237014808, |
|
"grad_norm": 0.1463141292333603, |
|
"learning_rate": 0.00039503426566251575, |
|
"loss": 2.6867, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.30390249122722485, |
|
"grad_norm": 0.13913968205451965, |
|
"learning_rate": 0.0003944601962273091, |
|
"loss": 2.6949, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.30479895875296886, |
|
"grad_norm": 0.14150671660900116, |
|
"learning_rate": 0.000393884980949774, |
|
"loss": 2.6921, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.3056954262787129, |
|
"grad_norm": 0.14346466958522797, |
|
"learning_rate": 0.0003933086243924545, |
|
"loss": 2.6935, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.3065918938044569, |
|
"grad_norm": 0.14826858043670654, |
|
"learning_rate": 0.00039273113112694736, |
|
"loss": 2.6841, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.3074883613302009, |
|
"grad_norm": 0.13878753781318665, |
|
"learning_rate": 0.0003921525057338652, |
|
"loss": 2.6858, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.3083848288559449, |
|
"grad_norm": 0.1484604924917221, |
|
"learning_rate": 0.0003915727528028009, |
|
"loss": 2.6928, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.309281296381689, |
|
"grad_norm": 0.14599835872650146, |
|
"learning_rate": 0.00039099187693229066, |
|
"loss": 2.6878, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.310177763907433, |
|
"grad_norm": 0.14739172160625458, |
|
"learning_rate": 0.0003904098827297777, |
|
"loss": 2.6846, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.31107423143317703, |
|
"grad_norm": 0.14112310111522675, |
|
"learning_rate": 0.0003898267748115759, |
|
"loss": 2.6879, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.31197069895892104, |
|
"grad_norm": 0.13632678985595703, |
|
"learning_rate": 0.00038924255780283277, |
|
"loss": 2.6899, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.31286716648466506, |
|
"grad_norm": 0.14148631691932678, |
|
"learning_rate": 0.0003886572363374933, |
|
"loss": 2.6901, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.31376363401040913, |
|
"grad_norm": 0.14057014882564545, |
|
"learning_rate": 0.0003880708150582626, |
|
"loss": 2.6896, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.31466010153615315, |
|
"grad_norm": 0.1407003551721573, |
|
"learning_rate": 0.00038748329861656945, |
|
"loss": 2.6833, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.31555656906189716, |
|
"grad_norm": 0.1437043398618698, |
|
"learning_rate": 0.00038689469167252957, |
|
"loss": 2.6878, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.3164530365876412, |
|
"grad_norm": 0.13795500993728638, |
|
"learning_rate": 0.0003863049988949081, |
|
"loss": 2.6831, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.3173495041133852, |
|
"grad_norm": 0.13427403569221497, |
|
"learning_rate": 0.0003857142249610833, |
|
"loss": 2.6855, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.31824597163912927, |
|
"grad_norm": 0.14448797702789307, |
|
"learning_rate": 0.0003851223745570085, |
|
"loss": 2.6934, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.3191424391648733, |
|
"grad_norm": 0.13982614874839783, |
|
"learning_rate": 0.00038452945237717597, |
|
"loss": 2.6853, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.3200389066906173, |
|
"grad_norm": 0.1487363576889038, |
|
"learning_rate": 0.0003839354631245789, |
|
"loss": 2.6812, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.3209353742163613, |
|
"grad_norm": 0.1380891501903534, |
|
"learning_rate": 0.00038334041151067447, |
|
"loss": 2.6825, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.32183184174210533, |
|
"grad_norm": 0.14058321714401245, |
|
"learning_rate": 0.00038274430225534637, |
|
"loss": 2.6868, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.32272830926784934, |
|
"grad_norm": 0.1391235589981079, |
|
"learning_rate": 0.00038214714008686745, |
|
"loss": 2.6914, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.3236247767935934, |
|
"grad_norm": 0.13752683997154236, |
|
"learning_rate": 0.0003815489297418621, |
|
"loss": 2.6854, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.32452124431933743, |
|
"grad_norm": 0.14746305346488953, |
|
"learning_rate": 0.00038094967596526873, |
|
"loss": 2.6827, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.32541771184508145, |
|
"grad_norm": 0.14420130848884583, |
|
"learning_rate": 0.00038034938351030206, |
|
"loss": 2.6876, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.32631417937082546, |
|
"grad_norm": 0.1397886425256729, |
|
"learning_rate": 0.0003797480571384157, |
|
"loss": 2.6798, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.3272106468965695, |
|
"grad_norm": 0.14020013809204102, |
|
"learning_rate": 0.00037914570161926405, |
|
"loss": 2.6868, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.32810711442231355, |
|
"grad_norm": 0.13931307196617126, |
|
"learning_rate": 0.0003785423217306645, |
|
"loss": 2.6836, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.32900358194805757, |
|
"grad_norm": 0.14019936323165894, |
|
"learning_rate": 0.00037793792225855973, |
|
"loss": 2.6871, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.3299000494738016, |
|
"grad_norm": 0.13788673281669617, |
|
"learning_rate": 0.0003773325079969796, |
|
"loss": 2.6785, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.3307965169995456, |
|
"grad_norm": 0.13793878257274628, |
|
"learning_rate": 0.0003767260837480032, |
|
"loss": 2.6785, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.3316929845252896, |
|
"grad_norm": 0.13882067799568176, |
|
"learning_rate": 0.0003761186543217209, |
|
"loss": 2.6791, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.33258945205103363, |
|
"grad_norm": 0.1409679502248764, |
|
"learning_rate": 0.00037551022453619564, |
|
"loss": 2.6809, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.3334859195767777, |
|
"grad_norm": 0.1374359279870987, |
|
"learning_rate": 0.0003749007992174254, |
|
"loss": 2.6836, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.3343823871025217, |
|
"grad_norm": 0.14376886188983917, |
|
"learning_rate": 0.00037429038319930453, |
|
"loss": 2.6798, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.33527885462826573, |
|
"grad_norm": 0.1401790827512741, |
|
"learning_rate": 0.00037367898132358544, |
|
"loss": 2.6711, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.33617532215400975, |
|
"grad_norm": 0.13772746920585632, |
|
"learning_rate": 0.0003730665984398404, |
|
"loss": 2.6768, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.33707178967975376, |
|
"grad_norm": 0.14339695870876312, |
|
"learning_rate": 0.0003724532394054228, |
|
"loss": 2.6825, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.33796825720549784, |
|
"grad_norm": 0.13825613260269165, |
|
"learning_rate": 0.0003718389090854287, |
|
"loss": 2.6796, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.33886472473124185, |
|
"grad_norm": 0.13653145730495453, |
|
"learning_rate": 0.00037122361235265855, |
|
"loss": 2.6715, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.33976119225698587, |
|
"grad_norm": 0.13996466994285583, |
|
"learning_rate": 0.00037060735408757795, |
|
"loss": 2.6852, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.3406576597827299, |
|
"grad_norm": 0.13708461821079254, |
|
"learning_rate": 0.0003699901391782795, |
|
"loss": 2.6801, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.3415541273084739, |
|
"grad_norm": 0.1354062259197235, |
|
"learning_rate": 0.00036937197252044374, |
|
"loss": 2.6748, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.3424505948342179, |
|
"grad_norm": 0.13771981000900269, |
|
"learning_rate": 0.00036875285901730035, |
|
"loss": 2.6823, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.343347062359962, |
|
"grad_norm": 0.13679315149784088, |
|
"learning_rate": 0.0003681328035795892, |
|
"loss": 2.6733, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.344243529885706, |
|
"grad_norm": 0.14163535833358765, |
|
"learning_rate": 0.0003675118111255216, |
|
"loss": 2.6796, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.34513999741145, |
|
"grad_norm": 0.13825653493404388, |
|
"learning_rate": 0.00036688988658074124, |
|
"loss": 2.6762, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.34603646493719403, |
|
"grad_norm": 0.14156493544578552, |
|
"learning_rate": 0.00036626703487828487, |
|
"loss": 2.6694, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.34693293246293805, |
|
"grad_norm": 0.13615700602531433, |
|
"learning_rate": 0.0003656432609585435, |
|
"loss": 2.6779, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.3478293999886821, |
|
"grad_norm": 0.13561411201953888, |
|
"learning_rate": 0.0003650185697692229, |
|
"loss": 2.6838, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.34872586751442614, |
|
"grad_norm": 0.1366826444864273, |
|
"learning_rate": 0.0003643929662653046, |
|
"loss": 2.6663, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.34962233504017015, |
|
"grad_norm": 0.14089661836624146, |
|
"learning_rate": 0.00036376645540900663, |
|
"loss": 2.6708, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.35051880256591417, |
|
"grad_norm": 0.13665175437927246, |
|
"learning_rate": 0.000363139042169744, |
|
"loss": 2.6705, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.3514152700916582, |
|
"grad_norm": 0.13564077019691467, |
|
"learning_rate": 0.0003625107315240891, |
|
"loss": 2.6698, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.35231173761740225, |
|
"grad_norm": 0.14218759536743164, |
|
"learning_rate": 0.0003618815284557326, |
|
"loss": 2.6816, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.35320820514314627, |
|
"grad_norm": 0.14430591464042664, |
|
"learning_rate": 0.00036125143795544387, |
|
"loss": 2.6763, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.3541046726688903, |
|
"grad_norm": 0.13792584836483002, |
|
"learning_rate": 0.0003606204650210312, |
|
"loss": 2.6674, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.3550011401946343, |
|
"grad_norm": 0.136796236038208, |
|
"learning_rate": 0.0003599886146573022, |
|
"loss": 2.6718, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.3558976077203783, |
|
"grad_norm": 0.13962402939796448, |
|
"learning_rate": 0.00035935589187602426, |
|
"loss": 2.6776, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.35679407524612233, |
|
"grad_norm": 0.1402391791343689, |
|
"learning_rate": 0.0003587223016958845, |
|
"loss": 2.6739, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.3576905427718664, |
|
"grad_norm": 0.13909810781478882, |
|
"learning_rate": 0.0003580878491424504, |
|
"loss": 2.6799, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.3585870102976104, |
|
"grad_norm": 0.13748426735401154, |
|
"learning_rate": 0.0003574525392481295, |
|
"loss": 2.6765, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.35948347782335444, |
|
"grad_norm": 0.132966548204422, |
|
"learning_rate": 0.0003568163770521299, |
|
"loss": 2.6702, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.36037994534909845, |
|
"grad_norm": 0.13667432963848114, |
|
"learning_rate": 0.00035617936760041976, |
|
"loss": 2.6645, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.36127641287484247, |
|
"grad_norm": 0.14251597225666046, |
|
"learning_rate": 0.00035554151594568767, |
|
"loss": 2.6719, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.36217288040058654, |
|
"grad_norm": 0.1350788176059723, |
|
"learning_rate": 0.00035490282714730273, |
|
"loss": 2.6701, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.36306934792633055, |
|
"grad_norm": 0.1342398077249527, |
|
"learning_rate": 0.0003542633062712738, |
|
"loss": 2.6653, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.36396581545207457, |
|
"grad_norm": 0.13306699693202972, |
|
"learning_rate": 0.00035362295839020996, |
|
"loss": 2.6724, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.3648622829778186, |
|
"grad_norm": 0.13337896764278412, |
|
"learning_rate": 0.0003529817885832799, |
|
"loss": 2.6633, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.3657587505035626, |
|
"grad_norm": 0.13504943251609802, |
|
"learning_rate": 0.00035233980193617184, |
|
"loss": 2.6678, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.3666552180293066, |
|
"grad_norm": 0.1362612545490265, |
|
"learning_rate": 0.0003516970035410527, |
|
"loss": 2.6669, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.3675516855550507, |
|
"grad_norm": 0.1384200006723404, |
|
"learning_rate": 0.00035105339849652873, |
|
"loss": 2.6706, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.3684481530807947, |
|
"grad_norm": 0.1365375816822052, |
|
"learning_rate": 0.0003504089919076039, |
|
"loss": 2.6647, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.3693446206065387, |
|
"grad_norm": 0.13598588109016418, |
|
"learning_rate": 0.00034976378888564014, |
|
"loss": 2.6647, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.37024108813228274, |
|
"grad_norm": 0.13357794284820557, |
|
"learning_rate": 0.00034911779454831665, |
|
"loss": 2.6694, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.37113755565802675, |
|
"grad_norm": 0.13431541621685028, |
|
"learning_rate": 0.00034847101401958914, |
|
"loss": 2.6644, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.3720340231837708, |
|
"grad_norm": 0.13836424052715302, |
|
"learning_rate": 0.0003478234524296494, |
|
"loss": 2.6752, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.37293049070951484, |
|
"grad_norm": 0.13321442902088165, |
|
"learning_rate": 0.00034717511491488454, |
|
"loss": 2.6755, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.37382695823525885, |
|
"grad_norm": 0.13987848162651062, |
|
"learning_rate": 0.00034652600661783594, |
|
"loss": 2.6666, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.37472342576100287, |
|
"grad_norm": 0.1362723857164383, |
|
"learning_rate": 0.00034587613268715917, |
|
"loss": 2.6736, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.3756198932867469, |
|
"grad_norm": 0.13275477290153503, |
|
"learning_rate": 0.0003452254982775824, |
|
"loss": 2.6658, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.37651636081249096, |
|
"grad_norm": 0.13551685214042664, |
|
"learning_rate": 0.0003445741085498659, |
|
"loss": 2.6666, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.377412828338235, |
|
"grad_norm": 0.13056619465351105, |
|
"learning_rate": 0.00034392196867076124, |
|
"loss": 2.6638, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.378309295863979, |
|
"grad_norm": 0.1297016590833664, |
|
"learning_rate": 0.0003432690838129698, |
|
"loss": 2.6666, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.379205763389723, |
|
"grad_norm": 0.1312173455953598, |
|
"learning_rate": 0.00034261545915510223, |
|
"loss": 2.6665, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.380102230915467, |
|
"grad_norm": 0.13310836255550385, |
|
"learning_rate": 0.00034196109988163714, |
|
"loss": 2.6629, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.38099869844121104, |
|
"grad_norm": 0.13753747940063477, |
|
"learning_rate": 0.0003413060111828801, |
|
"loss": 2.6682, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.3818951659669551, |
|
"grad_norm": 0.13357776403427124, |
|
"learning_rate": 0.00034065019825492237, |
|
"loss": 2.6657, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.3827916334926991, |
|
"grad_norm": 0.13426382839679718, |
|
"learning_rate": 0.00033999366629959956, |
|
"loss": 2.6663, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.38368810101844314, |
|
"grad_norm": 0.14345508813858032, |
|
"learning_rate": 0.0003393364205244508, |
|
"loss": 2.6652, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.38458456854418716, |
|
"grad_norm": 0.1440078616142273, |
|
"learning_rate": 0.00033867846614267695, |
|
"loss": 2.6729, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.38548103606993117, |
|
"grad_norm": 0.1365692913532257, |
|
"learning_rate": 0.00033801980837309953, |
|
"loss": 2.6725, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.38637750359567524, |
|
"grad_norm": 0.13232523202896118, |
|
"learning_rate": 0.0003373604524401193, |
|
"loss": 2.6655, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.38727397112141926, |
|
"grad_norm": 0.13531488180160522, |
|
"learning_rate": 0.0003367004035736747, |
|
"loss": 2.6694, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.3881704386471633, |
|
"grad_norm": 0.1343296617269516, |
|
"learning_rate": 0.0003360396670092004, |
|
"loss": 2.6576, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.3890669061729073, |
|
"grad_norm": 0.13563938438892365, |
|
"learning_rate": 0.00033537824798758603, |
|
"loss": 2.6682, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.3899633736986513, |
|
"grad_norm": 0.13184553384780884, |
|
"learning_rate": 0.00033471615175513416, |
|
"loss": 2.6544, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.3908598412243953, |
|
"grad_norm": 0.13147491216659546, |
|
"learning_rate": 0.00033405338356351906, |
|
"loss": 2.659, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.3917563087501394, |
|
"grad_norm": 0.128835991024971, |
|
"learning_rate": 0.00033338994866974477, |
|
"loss": 2.668, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.3926527762758834, |
|
"grad_norm": 0.13172951340675354, |
|
"learning_rate": 0.00033272585233610363, |
|
"loss": 2.668, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.3935492438016274, |
|
"grad_norm": 0.13852158188819885, |
|
"learning_rate": 0.00033206109983013464, |
|
"loss": 2.6618, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.39444571132737144, |
|
"grad_norm": 0.13343538343906403, |
|
"learning_rate": 0.00033139569642458104, |
|
"loss": 2.6694, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.39534217885311546, |
|
"grad_norm": 0.13819359242916107, |
|
"learning_rate": 0.00033072964739734936, |
|
"loss": 2.6645, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.3962386463788595, |
|
"grad_norm": 0.14136740565299988, |
|
"learning_rate": 0.0003300629580314668, |
|
"loss": 2.6596, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.39713511390460354, |
|
"grad_norm": 0.13478617370128632, |
|
"learning_rate": 0.00032939563361503995, |
|
"loss": 2.6682, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.39803158143034756, |
|
"grad_norm": 0.13910335302352905, |
|
"learning_rate": 0.00032872767944121234, |
|
"loss": 2.6561, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.3989280489560916, |
|
"grad_norm": 0.12869343161582947, |
|
"learning_rate": 0.0003280591008081227, |
|
"loss": 2.6698, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.3998245164818356, |
|
"grad_norm": 0.13684409856796265, |
|
"learning_rate": 0.00032738990301886306, |
|
"loss": 2.6606, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.4007209840075796, |
|
"grad_norm": 0.13658647239208221, |
|
"learning_rate": 0.00032672009138143634, |
|
"loss": 2.6641, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.4016174515333237, |
|
"grad_norm": 0.13844211399555206, |
|
"learning_rate": 0.00032604967120871444, |
|
"loss": 2.6548, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.4025139190590677, |
|
"grad_norm": 0.12905430793762207, |
|
"learning_rate": 0.0003253786478183963, |
|
"loss": 2.6601, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.4034103865848117, |
|
"grad_norm": 0.1291283369064331, |
|
"learning_rate": 0.00032470702653296515, |
|
"loss": 2.659, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.4043068541105557, |
|
"grad_norm": 0.1292611062526703, |
|
"learning_rate": 0.0003240348126796471, |
|
"loss": 2.6575, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.40520332163629974, |
|
"grad_norm": 0.13455387949943542, |
|
"learning_rate": 0.00032336201159036815, |
|
"loss": 2.6514, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.4060997891620438, |
|
"grad_norm": 0.12952381372451782, |
|
"learning_rate": 0.00032268862860171214, |
|
"loss": 2.6559, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.4069962566877878, |
|
"grad_norm": 0.13285262882709503, |
|
"learning_rate": 0.00032201466905487885, |
|
"loss": 2.66, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.40789272421353184, |
|
"grad_norm": 0.13195694983005524, |
|
"learning_rate": 0.0003213401382956408, |
|
"loss": 2.6638, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.40878919173927586, |
|
"grad_norm": 0.12948913872241974, |
|
"learning_rate": 0.0003206650416743017, |
|
"loss": 2.6568, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.4096856592650199, |
|
"grad_norm": 0.13518835604190826, |
|
"learning_rate": 0.0003199893845456531, |
|
"loss": 2.6584, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.41058212679076395, |
|
"grad_norm": 0.13390909135341644, |
|
"learning_rate": 0.00031931317226893295, |
|
"loss": 2.6477, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.41147859431650796, |
|
"grad_norm": 0.13253462314605713, |
|
"learning_rate": 0.00031863641020778247, |
|
"loss": 2.6549, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.412375061842252, |
|
"grad_norm": 0.13521717488765717, |
|
"learning_rate": 0.0003179591037302035, |
|
"loss": 2.6527, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.413271529367996, |
|
"grad_norm": 0.14094848930835724, |
|
"learning_rate": 0.0003172812582085163, |
|
"loss": 2.6608, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.41416799689374, |
|
"grad_norm": 0.1350257843732834, |
|
"learning_rate": 0.0003166028790193166, |
|
"loss": 2.6568, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.415064464419484, |
|
"grad_norm": 0.1422998309135437, |
|
"learning_rate": 0.00031592397154343334, |
|
"loss": 2.6567, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.4159609319452281, |
|
"grad_norm": 0.12808671593666077, |
|
"learning_rate": 0.0003152445411658856, |
|
"loss": 2.6528, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.4168573994709721, |
|
"grad_norm": 0.12911584973335266, |
|
"learning_rate": 0.00031456459327584027, |
|
"loss": 2.6641, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.41775386699671613, |
|
"grad_norm": 0.13506324589252472, |
|
"learning_rate": 0.00031388413326656885, |
|
"loss": 2.655, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.41865033452246014, |
|
"grad_norm": 0.13051171600818634, |
|
"learning_rate": 0.00031320316653540495, |
|
"loss": 2.6625, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.41954680204820416, |
|
"grad_norm": 0.1310121864080429, |
|
"learning_rate": 0.00031252169848370155, |
|
"loss": 2.6587, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.42044326957394823, |
|
"grad_norm": 0.13700339198112488, |
|
"learning_rate": 0.00031183973451678806, |
|
"loss": 2.6501, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.42133973709969225, |
|
"grad_norm": 0.13302241265773773, |
|
"learning_rate": 0.0003111572800439273, |
|
"loss": 2.6557, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.42223620462543626, |
|
"grad_norm": 0.13645824790000916, |
|
"learning_rate": 0.00031047434047827294, |
|
"loss": 2.655, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.4231326721511803, |
|
"grad_norm": 0.13405530154705048, |
|
"learning_rate": 0.0003097909212368261, |
|
"loss": 2.6616, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.4240291396769243, |
|
"grad_norm": 0.1315753012895584, |
|
"learning_rate": 0.0003091070277403927, |
|
"loss": 2.6516, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.4249256072026683, |
|
"grad_norm": 0.13063915073871613, |
|
"learning_rate": 0.0003084226654135406, |
|
"loss": 2.6581, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.4258220747284124, |
|
"grad_norm": 0.1367933750152588, |
|
"learning_rate": 0.00030773783968455614, |
|
"loss": 2.6541, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.4267185422541564, |
|
"grad_norm": 0.13026690483093262, |
|
"learning_rate": 0.0003070525559854015, |
|
"loss": 2.651, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.4276150097799004, |
|
"grad_norm": 0.13831181824207306, |
|
"learning_rate": 0.00030636681975167114, |
|
"loss": 2.6596, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.42851147730564443, |
|
"grad_norm": 0.13564659655094147, |
|
"learning_rate": 0.0003056806364225493, |
|
"loss": 2.6502, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.42940794483138844, |
|
"grad_norm": 0.13234366476535797, |
|
"learning_rate": 0.00030499401144076636, |
|
"loss": 2.647, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.4303044123571325, |
|
"grad_norm": 0.13098286092281342, |
|
"learning_rate": 0.00030430695025255596, |
|
"loss": 2.6519, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.43120087988287653, |
|
"grad_norm": 0.13402335345745087, |
|
"learning_rate": 0.00030361945830761146, |
|
"loss": 2.6581, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.43209734740862055, |
|
"grad_norm": 0.13898345828056335, |
|
"learning_rate": 0.0003029315410590431, |
|
"loss": 2.652, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.43299381493436456, |
|
"grad_norm": 0.1286890059709549, |
|
"learning_rate": 0.00030224320396333456, |
|
"loss": 2.6529, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.4338902824601086, |
|
"grad_norm": 0.13320975005626678, |
|
"learning_rate": 0.00030155445248029975, |
|
"loss": 2.6533, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.4347867499858526, |
|
"grad_norm": 0.128444641828537, |
|
"learning_rate": 0.00030086529207303935, |
|
"loss": 2.6537, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.43568321751159667, |
|
"grad_norm": 0.1326085776090622, |
|
"learning_rate": 0.00030017572820789765, |
|
"loss": 2.6522, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.4365796850373407, |
|
"grad_norm": 0.13274775445461273, |
|
"learning_rate": 0.00029948576635441905, |
|
"loss": 2.6506, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.4374761525630847, |
|
"grad_norm": 0.13352380692958832, |
|
"learning_rate": 0.0002987954119853048, |
|
"loss": 2.6552, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.4383726200888287, |
|
"grad_norm": 0.13431380689144135, |
|
"learning_rate": 0.0002981046705763696, |
|
"loss": 2.6532, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.43926908761457273, |
|
"grad_norm": 0.1325884610414505, |
|
"learning_rate": 0.0002974135476064981, |
|
"loss": 2.6586, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.4401655551403168, |
|
"grad_norm": 0.13121579587459564, |
|
"learning_rate": 0.0002967220485576013, |
|
"loss": 2.6486, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.4410620226660608, |
|
"grad_norm": 0.12927518784999847, |
|
"learning_rate": 0.0002960301789145733, |
|
"loss": 2.6553, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.44195849019180483, |
|
"grad_norm": 0.1333087980747223, |
|
"learning_rate": 0.0002953379441652478, |
|
"loss": 2.6525, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.44285495771754885, |
|
"grad_norm": 0.13250477612018585, |
|
"learning_rate": 0.0002946453498003543, |
|
"loss": 2.6441, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.44375142524329286, |
|
"grad_norm": 0.13053111732006073, |
|
"learning_rate": 0.00029395240131347507, |
|
"loss": 2.6549, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.44464789276903693, |
|
"grad_norm": 0.1300216168165207, |
|
"learning_rate": 0.00029325910420100083, |
|
"loss": 2.6509, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.44554436029478095, |
|
"grad_norm": 0.13556109368801117, |
|
"learning_rate": 0.00029256546396208766, |
|
"loss": 2.6403, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.44644082782052497, |
|
"grad_norm": 0.12819913029670715, |
|
"learning_rate": 0.00029187148609861353, |
|
"loss": 2.6435, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.447337295346269, |
|
"grad_norm": 0.1305069774389267, |
|
"learning_rate": 0.0002911771761151342, |
|
"loss": 2.654, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.448233762872013, |
|
"grad_norm": 0.1274418979883194, |
|
"learning_rate": 0.0002904825395188397, |
|
"loss": 2.65, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.449130230397757, |
|
"grad_norm": 0.13119454681873322, |
|
"learning_rate": 0.0002897875818195111, |
|
"loss": 2.6608, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.4500266979235011, |
|
"grad_norm": 0.13181808590888977, |
|
"learning_rate": 0.00028909230852947575, |
|
"loss": 2.6457, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.4509231654492451, |
|
"grad_norm": 0.1294698864221573, |
|
"learning_rate": 0.00028839672516356495, |
|
"loss": 2.6537, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.4518196329749891, |
|
"grad_norm": 0.13653060793876648, |
|
"learning_rate": 0.00028770083723906904, |
|
"loss": 2.6575, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.45271610050073313, |
|
"grad_norm": 0.12975502014160156, |
|
"learning_rate": 0.0002870046502756942, |
|
"loss": 2.645, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.45361256802647715, |
|
"grad_norm": 0.13355258107185364, |
|
"learning_rate": 0.0002863081697955187, |
|
"loss": 2.6435, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.4545090355522212, |
|
"grad_norm": 0.1305021345615387, |
|
"learning_rate": 0.00028561140132294863, |
|
"loss": 2.6454, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.45540550307796523, |
|
"grad_norm": 0.13262712955474854, |
|
"learning_rate": 0.00028491435038467466, |
|
"loss": 2.6534, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.45630197060370925, |
|
"grad_norm": 0.13376927375793457, |
|
"learning_rate": 0.00028421702250962786, |
|
"loss": 2.646, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.45719843812945327, |
|
"grad_norm": 0.13164885342121124, |
|
"learning_rate": 0.0002835194232289361, |
|
"loss": 2.6567, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.4580949056551973, |
|
"grad_norm": 0.13637390732765198, |
|
"learning_rate": 0.0002828215580758798, |
|
"loss": 2.6466, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.4589913731809413, |
|
"grad_norm": 0.13366563618183136, |
|
"learning_rate": 0.0002821234325858482, |
|
"loss": 2.6475, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.45988784070668537, |
|
"grad_norm": 0.1311609447002411, |
|
"learning_rate": 0.0002814250522962956, |
|
"loss": 2.653, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.4607843082324294, |
|
"grad_norm": 0.13002333045005798, |
|
"learning_rate": 0.0002807264227466975, |
|
"loss": 2.6472, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.4616807757581734, |
|
"grad_norm": 0.12868809700012207, |
|
"learning_rate": 0.00028002754947850623, |
|
"loss": 2.6347, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.4625772432839174, |
|
"grad_norm": 0.1310162991285324, |
|
"learning_rate": 0.00027932843803510755, |
|
"loss": 2.6414, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.46347371080966143, |
|
"grad_norm": 0.12595106661319733, |
|
"learning_rate": 0.00027862909396177615, |
|
"loss": 2.6427, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.4643701783354055, |
|
"grad_norm": 0.130056232213974, |
|
"learning_rate": 0.000277929522805632, |
|
"loss": 2.6461, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.4652666458611495, |
|
"grad_norm": 0.12970997393131256, |
|
"learning_rate": 0.00027722973011559633, |
|
"loss": 2.6483, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.46616311338689354, |
|
"grad_norm": 0.13723085820674896, |
|
"learning_rate": 0.00027652972144234745, |
|
"loss": 2.6384, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.46705958091263755, |
|
"grad_norm": 0.13320958614349365, |
|
"learning_rate": 0.000275829502338277, |
|
"loss": 2.6573, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.46795604843838157, |
|
"grad_norm": 0.1280314177274704, |
|
"learning_rate": 0.00027512907835744547, |
|
"loss": 2.6469, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.46885251596412564, |
|
"grad_norm": 0.13650980591773987, |
|
"learning_rate": 0.0002744284550555385, |
|
"loss": 2.6457, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.46974898348986965, |
|
"grad_norm": 0.1296728104352951, |
|
"learning_rate": 0.0002737276379898229, |
|
"loss": 2.6386, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.47064545101561367, |
|
"grad_norm": 0.13340330123901367, |
|
"learning_rate": 0.0002730266327191023, |
|
"loss": 2.6419, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.4715419185413577, |
|
"grad_norm": 0.1351691633462906, |
|
"learning_rate": 0.0002723254448036731, |
|
"loss": 2.6474, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.4724383860671017, |
|
"grad_norm": 0.135370671749115, |
|
"learning_rate": 0.00027162407980528037, |
|
"loss": 2.6486, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.4733348535928457, |
|
"grad_norm": 0.12964408099651337, |
|
"learning_rate": 0.0002709225432870738, |
|
"loss": 2.6437, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.4742313211185898, |
|
"grad_norm": 0.12840570509433746, |
|
"learning_rate": 0.0002702208408135637, |
|
"loss": 2.6409, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.4751277886443338, |
|
"grad_norm": 0.13321109116077423, |
|
"learning_rate": 0.00026951897795057653, |
|
"loss": 2.6443, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.4760242561700778, |
|
"grad_norm": 0.13473448157310486, |
|
"learning_rate": 0.0002688169602652113, |
|
"loss": 2.6404, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.47692072369582184, |
|
"grad_norm": 0.1295260488986969, |
|
"learning_rate": 0.00026811479332579445, |
|
"loss": 2.6359, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.47781719122156585, |
|
"grad_norm": 0.13443715870380402, |
|
"learning_rate": 0.0002674124827018368, |
|
"loss": 2.6423, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.4787136587473099, |
|
"grad_norm": 0.13394586741924286, |
|
"learning_rate": 0.0002667100339639886, |
|
"loss": 2.6353, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.47961012627305394, |
|
"grad_norm": 0.1299637407064438, |
|
"learning_rate": 0.00026600745268399583, |
|
"loss": 2.6376, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.48050659379879795, |
|
"grad_norm": 0.12686701118946075, |
|
"learning_rate": 0.0002653047444346556, |
|
"loss": 2.6426, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.48140306132454197, |
|
"grad_norm": 0.13109557330608368, |
|
"learning_rate": 0.00026460191478977203, |
|
"loss": 2.6413, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.482299528850286, |
|
"grad_norm": 0.12681497633457184, |
|
"learning_rate": 0.00026389896932411234, |
|
"loss": 2.6469, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.48319599637603, |
|
"grad_norm": 0.12837442755699158, |
|
"learning_rate": 0.0002631959136133624, |
|
"loss": 2.6444, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.4840924639017741, |
|
"grad_norm": 0.13397662341594696, |
|
"learning_rate": 0.0002624927532340825, |
|
"loss": 2.6385, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.4849889314275181, |
|
"grad_norm": 0.1271597146987915, |
|
"learning_rate": 0.00026178949376366316, |
|
"loss": 2.637, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.4858853989532621, |
|
"grad_norm": 0.12960323691368103, |
|
"learning_rate": 0.00026108614078028077, |
|
"loss": 2.6347, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.4867818664790061, |
|
"grad_norm": 0.13047392666339874, |
|
"learning_rate": 0.0002603826998628536, |
|
"loss": 2.6414, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.48767833400475014, |
|
"grad_norm": 0.1311124563217163, |
|
"learning_rate": 0.0002596791765909973, |
|
"loss": 2.6422, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.4885748015304942, |
|
"grad_norm": 0.13057972490787506, |
|
"learning_rate": 0.00025897557654498086, |
|
"loss": 2.6416, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.4894712690562382, |
|
"grad_norm": 0.12886947393417358, |
|
"learning_rate": 0.0002582719053056822, |
|
"loss": 2.6402, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.49036773658198224, |
|
"grad_norm": 0.12779085338115692, |
|
"learning_rate": 0.00025756816845454384, |
|
"loss": 2.6349, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.49126420410772625, |
|
"grad_norm": 0.12994657456874847, |
|
"learning_rate": 0.0002568643715735288, |
|
"loss": 2.6384, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.49216067163347027, |
|
"grad_norm": 0.1322993040084839, |
|
"learning_rate": 0.00025616052024507625, |
|
"loss": 2.6413, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.4930571391592143, |
|
"grad_norm": 0.1368289738893509, |
|
"learning_rate": 0.00025545662005205716, |
|
"loss": 2.641, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.49395360668495836, |
|
"grad_norm": 0.1309647113084793, |
|
"learning_rate": 0.00025475267657773027, |
|
"loss": 2.6389, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.4948500742107024, |
|
"grad_norm": 0.13188649713993073, |
|
"learning_rate": 0.0002540486954056975, |
|
"loss": 2.6445, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.4957465417364464, |
|
"grad_norm": 0.13085578382015228, |
|
"learning_rate": 0.0002533446821198597, |
|
"loss": 2.6373, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.4966430092621904, |
|
"grad_norm": 0.13119032979011536, |
|
"learning_rate": 0.0002526406423043725, |
|
"loss": 2.6449, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.4975394767879344, |
|
"grad_norm": 0.1324346512556076, |
|
"learning_rate": 0.0002519365815436021, |
|
"loss": 2.6332, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.4984359443136785, |
|
"grad_norm": 0.13353855907917023, |
|
"learning_rate": 0.0002512325054220807, |
|
"loss": 2.6365, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.4993324118394225, |
|
"grad_norm": 0.13728708028793335, |
|
"learning_rate": 0.0002505284195244624, |
|
"loss": 2.6378, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.5002288793651665, |
|
"grad_norm": 0.12542614340782166, |
|
"learning_rate": 0.0002498243294354787, |
|
"loss": 2.6436, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.5011253468909106, |
|
"grad_norm": 0.12808193266391754, |
|
"learning_rate": 0.0002491202407398945, |
|
"loss": 2.6439, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.5020218144166546, |
|
"grad_norm": 0.1336560845375061, |
|
"learning_rate": 0.0002484161590224637, |
|
"loss": 2.6317, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.5029182819423986, |
|
"grad_norm": 0.1294037103652954, |
|
"learning_rate": 0.0002477120898678847, |
|
"loss": 2.6397, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.5038147494681426, |
|
"grad_norm": 0.12634062767028809, |
|
"learning_rate": 0.0002470080388607563, |
|
"loss": 2.6345, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.5047112169938867, |
|
"grad_norm": 0.12850134074687958, |
|
"learning_rate": 0.0002463040115855333, |
|
"loss": 2.6355, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.5056076845196307, |
|
"grad_norm": 0.13224093616008759, |
|
"learning_rate": 0.00024560001362648233, |
|
"loss": 2.6333, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.5065041520453747, |
|
"grad_norm": 0.129663348197937, |
|
"learning_rate": 0.00024489605056763757, |
|
"loss": 2.6363, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.5074006195711187, |
|
"grad_norm": 0.1328994631767273, |
|
"learning_rate": 0.00024419212799275627, |
|
"loss": 2.6307, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.5082970870968627, |
|
"grad_norm": 0.12859898805618286, |
|
"learning_rate": 0.0002434882514852745, |
|
"loss": 2.638, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.5091935546226067, |
|
"grad_norm": 0.13029265403747559, |
|
"learning_rate": 0.0002427844266282631, |
|
"loss": 2.6369, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.5100900221483509, |
|
"grad_norm": 0.1283547580242157, |
|
"learning_rate": 0.00024208065900438309, |
|
"loss": 2.634, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.5109864896740949, |
|
"grad_norm": 0.12610678374767303, |
|
"learning_rate": 0.00024137695419584163, |
|
"loss": 2.6389, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.5118829571998389, |
|
"grad_norm": 0.12752105295658112, |
|
"learning_rate": 0.0002406733177843475, |
|
"loss": 2.6364, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.5127794247255829, |
|
"grad_norm": 0.13300961256027222, |
|
"learning_rate": 0.0002399697553510671, |
|
"loss": 2.6272, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.5136758922513269, |
|
"grad_norm": 0.1362060010433197, |
|
"learning_rate": 0.00023926627247657987, |
|
"loss": 2.6327, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.5145723597770709, |
|
"grad_norm": 0.12719836831092834, |
|
"learning_rate": 0.00023856287474083437, |
|
"loss": 2.6335, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.515468827302815, |
|
"grad_norm": 0.13754625618457794, |
|
"learning_rate": 0.0002378595677231038, |
|
"loss": 2.627, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.516365294828559, |
|
"grad_norm": 0.13351771235466003, |
|
"learning_rate": 0.00023715635700194185, |
|
"loss": 2.641, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.517261762354303, |
|
"grad_norm": 0.12546195089817047, |
|
"learning_rate": 0.0002364532481551382, |
|
"loss": 2.639, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.518158229880047, |
|
"grad_norm": 0.13150018453598022, |
|
"learning_rate": 0.00023575024675967464, |
|
"loss": 2.6328, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.519054697405791, |
|
"grad_norm": 0.12908001244068146, |
|
"learning_rate": 0.00023504735839168062, |
|
"loss": 2.6236, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.5199511649315351, |
|
"grad_norm": 0.12657436728477478, |
|
"learning_rate": 0.00023434458862638919, |
|
"loss": 2.6363, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.5208476324572792, |
|
"grad_norm": 0.13314677774906158, |
|
"learning_rate": 0.0002336419430380924, |
|
"loss": 2.6304, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.5217440999830232, |
|
"grad_norm": 0.1295042335987091, |
|
"learning_rate": 0.0002329394272000976, |
|
"loss": 2.6269, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.5226405675087672, |
|
"grad_norm": 0.13147129118442535, |
|
"learning_rate": 0.00023223704668468275, |
|
"loss": 2.6355, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.5235370350345112, |
|
"grad_norm": 0.13861297070980072, |
|
"learning_rate": 0.0002315348070630526, |
|
"loss": 2.6307, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.5244335025602552, |
|
"grad_norm": 0.12782630324363708, |
|
"learning_rate": 0.0002308327139052943, |
|
"loss": 2.6396, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.5253299700859992, |
|
"grad_norm": 0.1270517110824585, |
|
"learning_rate": 0.00023013077278033334, |
|
"loss": 2.6343, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.5262264376117433, |
|
"grad_norm": 0.12780235707759857, |
|
"learning_rate": 0.00022942898925588912, |
|
"loss": 2.635, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.5271229051374873, |
|
"grad_norm": 0.12590976059436798, |
|
"learning_rate": 0.00022872736889843116, |
|
"loss": 2.6274, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.5280193726632313, |
|
"grad_norm": 0.1275419443845749, |
|
"learning_rate": 0.0002280259172731346, |
|
"loss": 2.6303, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.5289158401889753, |
|
"grad_norm": 0.12726274132728577, |
|
"learning_rate": 0.00022732463994383641, |
|
"loss": 2.6297, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.5298123077147194, |
|
"grad_norm": 0.13650214672088623, |
|
"learning_rate": 0.00022662354247299086, |
|
"loss": 2.6233, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.5307087752404634, |
|
"grad_norm": 0.12631866335868835, |
|
"learning_rate": 0.00022592263042162577, |
|
"loss": 2.6333, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.5316052427662075, |
|
"grad_norm": 0.12630638480186462, |
|
"learning_rate": 0.00022522190934929801, |
|
"loss": 2.6275, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.5325017102919515, |
|
"grad_norm": 0.12765736877918243, |
|
"learning_rate": 0.00022452138481404986, |
|
"loss": 2.6306, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.5333981778176955, |
|
"grad_norm": 0.12750397622585297, |
|
"learning_rate": 0.00022382106237236457, |
|
"loss": 2.6271, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.5342946453434395, |
|
"grad_norm": 0.13162820041179657, |
|
"learning_rate": 0.0002231209475791225, |
|
"loss": 2.6287, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.5351911128691835, |
|
"grad_norm": 0.13135115802288055, |
|
"learning_rate": 0.0002224210459875568, |
|
"loss": 2.6322, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.5360875803949275, |
|
"grad_norm": 0.12867547571659088, |
|
"learning_rate": 0.00022172136314920968, |
|
"loss": 2.6353, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.5369840479206716, |
|
"grad_norm": 0.1288844645023346, |
|
"learning_rate": 0.00022102190461388818, |
|
"loss": 2.6355, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.5378805154464156, |
|
"grad_norm": 0.12920548021793365, |
|
"learning_rate": 0.00022032267592962023, |
|
"loss": 2.6274, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.5387769829721596, |
|
"grad_norm": 0.1328190267086029, |
|
"learning_rate": 0.0002196236826426104, |
|
"loss": 2.6318, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.5396734504979037, |
|
"grad_norm": 0.12695330381393433, |
|
"learning_rate": 0.00021892493029719652, |
|
"loss": 2.6287, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.5405699180236477, |
|
"grad_norm": 0.13476692140102386, |
|
"learning_rate": 0.00021822642443580483, |
|
"loss": 2.6335, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.5414663855493917, |
|
"grad_norm": 0.13216422498226166, |
|
"learning_rate": 0.00021752817059890673, |
|
"loss": 2.6262, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.5423628530751358, |
|
"grad_norm": 0.13027572631835938, |
|
"learning_rate": 0.00021683017432497464, |
|
"loss": 2.6189, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.5432593206008798, |
|
"grad_norm": 0.12778045237064362, |
|
"learning_rate": 0.00021613244115043802, |
|
"loss": 2.6295, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.5441557881266238, |
|
"grad_norm": 0.13152672350406647, |
|
"learning_rate": 0.00021543497660963924, |
|
"loss": 2.6303, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.5450522556523678, |
|
"grad_norm": 0.12792283296585083, |
|
"learning_rate": 0.00021473778623479006, |
|
"loss": 2.6272, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.5459487231781118, |
|
"grad_norm": 0.1273530125617981, |
|
"learning_rate": 0.00021404087555592762, |
|
"loss": 2.625, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.5468451907038558, |
|
"grad_norm": 0.1318267434835434, |
|
"learning_rate": 0.0002133442501008705, |
|
"loss": 2.6254, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.5477416582295999, |
|
"grad_norm": 0.13691310584545135, |
|
"learning_rate": 0.00021264791539517486, |
|
"loss": 2.6211, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.5486381257553439, |
|
"grad_norm": 0.12379534542560577, |
|
"learning_rate": 0.00021195187696209072, |
|
"loss": 2.6248, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.549534593281088, |
|
"grad_norm": 0.13373887538909912, |
|
"learning_rate": 0.00021125614032251822, |
|
"loss": 2.6304, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.550431060806832, |
|
"grad_norm": 0.13529092073440552, |
|
"learning_rate": 0.00021056071099496333, |
|
"loss": 2.6268, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.551327528332576, |
|
"grad_norm": 0.13044504821300507, |
|
"learning_rate": 0.0002098655944954948, |
|
"loss": 2.6265, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.55222399585832, |
|
"grad_norm": 0.12602747976779938, |
|
"learning_rate": 0.00020917079633770005, |
|
"loss": 2.6248, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.5531204633840641, |
|
"grad_norm": 0.12802156805992126, |
|
"learning_rate": 0.00020847632203264128, |
|
"loss": 2.6194, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.5540169309098081, |
|
"grad_norm": 0.1250976324081421, |
|
"learning_rate": 0.000207782177088812, |
|
"loss": 2.6297, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.5549133984355521, |
|
"grad_norm": 0.13608118891716003, |
|
"learning_rate": 0.00020708836701209332, |
|
"loss": 2.6286, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.5558098659612961, |
|
"grad_norm": 0.1274801641702652, |
|
"learning_rate": 0.00020639489730571014, |
|
"loss": 2.6283, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.5567063334870401, |
|
"grad_norm": 0.12696389853954315, |
|
"learning_rate": 0.0002057017734701877, |
|
"loss": 2.6262, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.5576028010127841, |
|
"grad_norm": 0.12838256359100342, |
|
"learning_rate": 0.00020501592697065653, |
|
"loss": 2.6261, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.5584992685385282, |
|
"grad_norm": 0.12481874227523804, |
|
"learning_rate": 0.00020432350777158638, |
|
"loss": 2.6197, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.5593957360642723, |
|
"grad_norm": 0.13098376989364624, |
|
"learning_rate": 0.00020363145087340989, |
|
"loss": 2.6218, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.5602922035900163, |
|
"grad_norm": 0.1315186321735382, |
|
"learning_rate": 0.0002029397617654461, |
|
"loss": 2.6229, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.5611886711157603, |
|
"grad_norm": 0.13062401115894318, |
|
"learning_rate": 0.00020224844593409665, |
|
"loss": 2.6224, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.5620851386415043, |
|
"grad_norm": 0.12941262125968933, |
|
"learning_rate": 0.00020155750886280245, |
|
"loss": 2.6264, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.5629816061672483, |
|
"grad_norm": 0.12870369851589203, |
|
"learning_rate": 0.00020086695603199994, |
|
"loss": 2.6176, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.5638780736929924, |
|
"grad_norm": 0.12999162077903748, |
|
"learning_rate": 0.00020017679291907804, |
|
"loss": 2.6217, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.5647745412187364, |
|
"grad_norm": 0.12885361909866333, |
|
"learning_rate": 0.00019948702499833436, |
|
"loss": 2.6248, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.5656710087444804, |
|
"grad_norm": 0.12850242853164673, |
|
"learning_rate": 0.00019879765774093188, |
|
"loss": 2.6276, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.5665674762702244, |
|
"grad_norm": 0.12949827313423157, |
|
"learning_rate": 0.0001981086966148556, |
|
"loss": 2.6191, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.5674639437959684, |
|
"grad_norm": 0.1278415471315384, |
|
"learning_rate": 0.00019742014708486904, |
|
"loss": 2.6321, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.5683604113217124, |
|
"grad_norm": 0.12578001618385315, |
|
"learning_rate": 0.00019673201461247125, |
|
"loss": 2.616, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.5692568788474566, |
|
"grad_norm": 0.12869875133037567, |
|
"learning_rate": 0.00019604430465585277, |
|
"loss": 2.6195, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.5701533463732006, |
|
"grad_norm": 0.12808062136173248, |
|
"learning_rate": 0.0001953570226698532, |
|
"loss": 2.6231, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.5710498138989446, |
|
"grad_norm": 0.13366073369979858, |
|
"learning_rate": 0.0001946701741059174, |
|
"loss": 2.6234, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.5719462814246886, |
|
"grad_norm": 0.12796130776405334, |
|
"learning_rate": 0.00019398376441205227, |
|
"loss": 2.6268, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.5728427489504326, |
|
"grad_norm": 0.13759686052799225, |
|
"learning_rate": 0.00019329779903278375, |
|
"loss": 2.619, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.5737392164761766, |
|
"grad_norm": 0.12661823630332947, |
|
"learning_rate": 0.00019261228340911352, |
|
"loss": 2.6152, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.5746356840019207, |
|
"grad_norm": 0.13385765254497528, |
|
"learning_rate": 0.00019192722297847587, |
|
"loss": 2.6217, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.5755321515276647, |
|
"grad_norm": 0.1298641860485077, |
|
"learning_rate": 0.00019124262317469443, |
|
"loss": 2.622, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.5764286190534087, |
|
"grad_norm": 0.12806908786296844, |
|
"learning_rate": 0.00019056532844060364, |
|
"loss": 2.6181, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.5773250865791527, |
|
"grad_norm": 0.12761425971984863, |
|
"learning_rate": 0.00018988849575433026, |
|
"loss": 2.6178, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.5782215541048968, |
|
"grad_norm": 0.1286136656999588, |
|
"learning_rate": 0.00018920530080606497, |
|
"loss": 2.6202, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.5791180216306409, |
|
"grad_norm": 0.12874256074428558, |
|
"learning_rate": 0.0001885225880746421, |
|
"loss": 2.6064, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.5800144891563849, |
|
"grad_norm": 0.12678521871566772, |
|
"learning_rate": 0.00018784036297526366, |
|
"loss": 2.6256, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.5809109566821289, |
|
"grad_norm": 0.1318819671869278, |
|
"learning_rate": 0.0001871586309192639, |
|
"loss": 2.6207, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.5818074242078729, |
|
"grad_norm": 0.13057227432727814, |
|
"learning_rate": 0.00018647739731406606, |
|
"loss": 2.6188, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.5827038917336169, |
|
"grad_norm": 0.1309261918067932, |
|
"learning_rate": 0.00018579666756314, |
|
"loss": 2.6225, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.5836003592593609, |
|
"grad_norm": 0.12833261489868164, |
|
"learning_rate": 0.0001851164470659589, |
|
"loss": 2.6151, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.584496826785105, |
|
"grad_norm": 0.1268189400434494, |
|
"learning_rate": 0.00018443674121795678, |
|
"loss": 2.6177, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.585393294310849, |
|
"grad_norm": 0.1255330890417099, |
|
"learning_rate": 0.00018375755541048526, |
|
"loss": 2.6206, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.586289761836593, |
|
"grad_norm": 0.1327618807554245, |
|
"learning_rate": 0.0001830788950307713, |
|
"loss": 2.6236, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.587186229362337, |
|
"grad_norm": 0.12842676043510437, |
|
"learning_rate": 0.0001824007654618739, |
|
"loss": 2.6176, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.5880826968880811, |
|
"grad_norm": 0.1308692991733551, |
|
"learning_rate": 0.00018172317208264212, |
|
"loss": 2.6171, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.5889791644138251, |
|
"grad_norm": 0.12851682305335999, |
|
"learning_rate": 0.0001810461202676717, |
|
"loss": 2.6164, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.5898756319395692, |
|
"grad_norm": 0.14155201613903046, |
|
"learning_rate": 0.00018036961538726314, |
|
"loss": 2.6198, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.5907720994653132, |
|
"grad_norm": 0.1263512820005417, |
|
"learning_rate": 0.0001796936628073782, |
|
"loss": 2.6202, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.5916685669910572, |
|
"grad_norm": 0.12849842011928558, |
|
"learning_rate": 0.00017901826788959825, |
|
"loss": 2.6154, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.5925650345168012, |
|
"grad_norm": 0.12791509926319122, |
|
"learning_rate": 0.0001783434359910811, |
|
"loss": 2.6201, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.5934615020425452, |
|
"grad_norm": 0.12755149602890015, |
|
"learning_rate": 0.00017766917246451902, |
|
"loss": 2.6166, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.5943579695682892, |
|
"grad_norm": 0.13783405721187592, |
|
"learning_rate": 0.00017699548265809578, |
|
"loss": 2.6165, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.5952544370940333, |
|
"grad_norm": 0.1294228881597519, |
|
"learning_rate": 0.00017632237191544462, |
|
"loss": 2.6155, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.5961509046197773, |
|
"grad_norm": 0.13272565603256226, |
|
"learning_rate": 0.00017564984557560555, |
|
"loss": 2.6116, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.5970473721455213, |
|
"grad_norm": 0.13658390939235687, |
|
"learning_rate": 0.00017497790897298327, |
|
"loss": 2.6159, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.5979438396712654, |
|
"grad_norm": 0.13049334287643433, |
|
"learning_rate": 0.00017430656743730477, |
|
"loss": 2.6191, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.5988403071970094, |
|
"grad_norm": 0.13098062574863434, |
|
"learning_rate": 0.00017363582629357705, |
|
"loss": 2.618, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.5997367747227534, |
|
"grad_norm": 0.12643533945083618, |
|
"learning_rate": 0.00017296569086204479, |
|
"loss": 2.6172, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.6006332422484975, |
|
"grad_norm": 0.12678323686122894, |
|
"learning_rate": 0.00017229616645814813, |
|
"loss": 2.6185, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.6015297097742415, |
|
"grad_norm": 0.12956561148166656, |
|
"learning_rate": 0.00017162725839248077, |
|
"loss": 2.6089, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.6024261772999855, |
|
"grad_norm": 0.129245787858963, |
|
"learning_rate": 0.00017095897197074773, |
|
"loss": 2.6043, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.6033226448257295, |
|
"grad_norm": 0.12439849972724915, |
|
"learning_rate": 0.00017029131249372305, |
|
"loss": 2.6174, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.6042191123514735, |
|
"grad_norm": 0.1366392970085144, |
|
"learning_rate": 0.00016962428525720807, |
|
"loss": 2.623, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.6051155798772175, |
|
"grad_norm": 0.12857592105865479, |
|
"learning_rate": 0.00016895789555198908, |
|
"loss": 2.61, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.6060120474029616, |
|
"grad_norm": 0.1319228857755661, |
|
"learning_rate": 0.00016829214866379572, |
|
"loss": 2.611, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.6069085149287056, |
|
"grad_norm": 0.1241278126835823, |
|
"learning_rate": 0.00016762704987325884, |
|
"loss": 2.6049, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.6078049824544497, |
|
"grad_norm": 0.13301433622837067, |
|
"learning_rate": 0.00016696260445586865, |
|
"loss": 2.6089, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.6087014499801937, |
|
"grad_norm": 0.1289006769657135, |
|
"learning_rate": 0.00016629881768193271, |
|
"loss": 2.6123, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.6095979175059377, |
|
"grad_norm": 0.1281334012746811, |
|
"learning_rate": 0.00016563569481653457, |
|
"loss": 2.6086, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.6104943850316817, |
|
"grad_norm": 0.13358446955680847, |
|
"learning_rate": 0.00016497324111949146, |
|
"loss": 2.6162, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.6113908525574258, |
|
"grad_norm": 0.12993699312210083, |
|
"learning_rate": 0.00016431146184531304, |
|
"loss": 2.6077, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.6122873200831698, |
|
"grad_norm": 0.12562768161296844, |
|
"learning_rate": 0.00016365036224315948, |
|
"loss": 2.6099, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.6131837876089138, |
|
"grad_norm": 0.12500709295272827, |
|
"learning_rate": 0.00016298994755679986, |
|
"loss": 2.6186, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.6140802551346578, |
|
"grad_norm": 0.13538555800914764, |
|
"learning_rate": 0.00016233681683644314, |
|
"loss": 2.6057, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.6149767226604018, |
|
"grad_norm": 0.13071191310882568, |
|
"learning_rate": 0.00016167778071145164, |
|
"loss": 2.6108, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.6158731901861458, |
|
"grad_norm": 0.12502072751522064, |
|
"learning_rate": 0.00016101944514855299, |
|
"loss": 2.6098, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.6167696577118899, |
|
"grad_norm": 0.130848690867424, |
|
"learning_rate": 0.0001603618153695922, |
|
"loss": 2.6067, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.617666125237634, |
|
"grad_norm": 0.12595491111278534, |
|
"learning_rate": 0.0001597048965908161, |
|
"loss": 2.6009, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.618562592763378, |
|
"grad_norm": 0.13049688935279846, |
|
"learning_rate": 0.0001590486940228319, |
|
"loss": 2.6069, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.619459060289122, |
|
"grad_norm": 0.1312711089849472, |
|
"learning_rate": 0.00015839321287056585, |
|
"loss": 2.6088, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.620355527814866, |
|
"grad_norm": 0.13168184459209442, |
|
"learning_rate": 0.00015773845833322208, |
|
"loss": 2.6188, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.62125199534061, |
|
"grad_norm": 0.12749746441841125, |
|
"learning_rate": 0.00015708443560424141, |
|
"loss": 2.6151, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.6221484628663541, |
|
"grad_norm": 0.13357709348201752, |
|
"learning_rate": 0.0001564311498712599, |
|
"loss": 2.6064, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.6230449303920981, |
|
"grad_norm": 0.12775705754756927, |
|
"learning_rate": 0.00015577860631606783, |
|
"loss": 2.6118, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.6239413979178421, |
|
"grad_norm": 0.13166511058807373, |
|
"learning_rate": 0.00015512681011456859, |
|
"loss": 2.602, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.6248378654435861, |
|
"grad_norm": 0.1261209398508072, |
|
"learning_rate": 0.00015447576643673778, |
|
"loss": 2.61, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.6257343329693301, |
|
"grad_norm": 0.12618456780910492, |
|
"learning_rate": 0.00015382548044658194, |
|
"loss": 2.6102, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.6266308004950741, |
|
"grad_norm": 0.12650653719902039, |
|
"learning_rate": 0.00015317595730209783, |
|
"loss": 2.6137, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.6275272680208183, |
|
"grad_norm": 0.12881776690483093, |
|
"learning_rate": 0.0001525272021552313, |
|
"loss": 2.6134, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.6284237355465623, |
|
"grad_norm": 0.1257714331150055, |
|
"learning_rate": 0.00015187922015183638, |
|
"loss": 2.6172, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.6293202030723063, |
|
"grad_norm": 0.12860926985740662, |
|
"learning_rate": 0.00015123201643163495, |
|
"loss": 2.6093, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.6302166705980503, |
|
"grad_norm": 0.1268460750579834, |
|
"learning_rate": 0.0001505855961281754, |
|
"loss": 2.6065, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.6311131381237943, |
|
"grad_norm": 0.1278506964445114, |
|
"learning_rate": 0.00014993996436879232, |
|
"loss": 2.6117, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.6320096056495383, |
|
"grad_norm": 0.1345282644033432, |
|
"learning_rate": 0.00014930157071006556, |
|
"loss": 2.6175, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.6329060731752824, |
|
"grad_norm": 0.13263201713562012, |
|
"learning_rate": 0.0001486575233826827, |
|
"loss": 2.6045, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.6338025407010264, |
|
"grad_norm": 0.12888827919960022, |
|
"learning_rate": 0.00014801427989263638, |
|
"loss": 2.606, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.6346990082267704, |
|
"grad_norm": 0.12627027928829193, |
|
"learning_rate": 0.00014737184534206293, |
|
"loss": 2.6056, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.6355954757525144, |
|
"grad_norm": 0.12864083051681519, |
|
"learning_rate": 0.00014673022482668242, |
|
"loss": 2.6085, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.6364919432782585, |
|
"grad_norm": 0.1318010538816452, |
|
"learning_rate": 0.00014608942343575788, |
|
"loss": 2.6087, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.6373884108040025, |
|
"grad_norm": 0.12685802578926086, |
|
"learning_rate": 0.0001454494462520551, |
|
"loss": 2.6069, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.6382848783297466, |
|
"grad_norm": 0.12846295535564423, |
|
"learning_rate": 0.0001448102983518025, |
|
"loss": 2.6005, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.6391813458554906, |
|
"grad_norm": 0.1338125467300415, |
|
"learning_rate": 0.00014417198480465068, |
|
"loss": 2.6043, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.6400778133812346, |
|
"grad_norm": 0.1277831345796585, |
|
"learning_rate": 0.00014353451067363227, |
|
"loss": 2.6048, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.6409742809069786, |
|
"grad_norm": 0.1271323561668396, |
|
"learning_rate": 0.0001428978810151216, |
|
"loss": 2.6066, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.6418707484327226, |
|
"grad_norm": 0.12640222907066345, |
|
"learning_rate": 0.00014226210087879494, |
|
"loss": 2.6082, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.6427672159584666, |
|
"grad_norm": 0.12706422805786133, |
|
"learning_rate": 0.0001416271753075902, |
|
"loss": 2.6041, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.6436636834842107, |
|
"grad_norm": 0.12946538627147675, |
|
"learning_rate": 0.0001409931093376667, |
|
"loss": 2.6042, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.6445601510099547, |
|
"grad_norm": 0.13379494845867157, |
|
"learning_rate": 0.00014035990799836584, |
|
"loss": 2.6064, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.6454566185356987, |
|
"grad_norm": 0.13055512309074402, |
|
"learning_rate": 0.00013972757631217075, |
|
"loss": 2.6067, |
|
"step": 72000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 111548, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 2000, |
|
"total_flos": 3.857626077069312e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|