| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7733994606041957, | |
| "eval_steps": 500, | |
| "global_step": 21364, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011222328814234257, | |
| "grad_norm": 37.096622467041016, | |
| "learning_rate": 1.0157273918741808e-06, | |
| "loss": 8.8686, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0022444657628468514, | |
| "grad_norm": 13.880346298217773, | |
| "learning_rate": 2.0314547837483616e-06, | |
| "loss": 7.6419, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0033666986442702773, | |
| "grad_norm": 16.09684944152832, | |
| "learning_rate": 3.0471821756225426e-06, | |
| "loss": 6.4382, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.004488931525693703, | |
| "grad_norm": 19.170230865478516, | |
| "learning_rate": 4.062909567496723e-06, | |
| "loss": 5.3399, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.005611164407117128, | |
| "grad_norm": 24.654130935668945, | |
| "learning_rate": 5.078636959370905e-06, | |
| "loss": 4.7646, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.006733397288540555, | |
| "grad_norm": 24.712974548339844, | |
| "learning_rate": 6.094364351245085e-06, | |
| "loss": 4.4667, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.00785563016996398, | |
| "grad_norm": 17.238990783691406, | |
| "learning_rate": 7.110091743119267e-06, | |
| "loss": 4.2168, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.008977863051387406, | |
| "grad_norm": 20.40213394165039, | |
| "learning_rate": 8.125819134993446e-06, | |
| "loss": 4.0355, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.010100095932810832, | |
| "grad_norm": 15.052313804626465, | |
| "learning_rate": 9.141546526867629e-06, | |
| "loss": 3.8458, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.011222328814234257, | |
| "grad_norm": 18.802026748657227, | |
| "learning_rate": 1.015727391874181e-05, | |
| "loss": 3.6688, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.012344561695657683, | |
| "grad_norm": 16.62171745300293, | |
| "learning_rate": 1.117300131061599e-05, | |
| "loss": 3.52, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.01346679457708111, | |
| "grad_norm": 16.29236602783203, | |
| "learning_rate": 1.218872870249017e-05, | |
| "loss": 3.402, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.014589027458504534, | |
| "grad_norm": 11.65068531036377, | |
| "learning_rate": 1.3204456094364351e-05, | |
| "loss": 3.2829, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.01571126033992796, | |
| "grad_norm": 10.617654800415039, | |
| "learning_rate": 1.4220183486238533e-05, | |
| "loss": 3.2008, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.016833493221351387, | |
| "grad_norm": 10.611294746398926, | |
| "learning_rate": 1.5235910878112714e-05, | |
| "loss": 3.1249, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.01795572610277481, | |
| "grad_norm": 9.946114540100098, | |
| "learning_rate": 1.6251638269986893e-05, | |
| "loss": 3.0503, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.019077958984198236, | |
| "grad_norm": 10.92148494720459, | |
| "learning_rate": 1.7267365661861077e-05, | |
| "loss": 2.9903, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.020200191865621664, | |
| "grad_norm": 8.329671859741211, | |
| "learning_rate": 1.8283093053735257e-05, | |
| "loss": 2.9261, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.02132242474704509, | |
| "grad_norm": 7.897571086883545, | |
| "learning_rate": 1.9298820445609438e-05, | |
| "loss": 2.889, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.022444657628468513, | |
| "grad_norm": 7.548309326171875, | |
| "learning_rate": 2.031454783748362e-05, | |
| "loss": 2.7945, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.02356689050989194, | |
| "grad_norm": 8.54383659362793, | |
| "learning_rate": 2.13302752293578e-05, | |
| "loss": 2.7538, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.024689123391315366, | |
| "grad_norm": 7.025435924530029, | |
| "learning_rate": 2.234600262123198e-05, | |
| "loss": 2.7075, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.02581135627273879, | |
| "grad_norm": 7.59956169128418, | |
| "learning_rate": 2.336173001310616e-05, | |
| "loss": 2.6625, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.02693358915416222, | |
| "grad_norm": 6.982921123504639, | |
| "learning_rate": 2.437745740498034e-05, | |
| "loss": 2.6248, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.028055822035585643, | |
| "grad_norm": 6.033556938171387, | |
| "learning_rate": 2.5393184796854525e-05, | |
| "loss": 2.5724, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.029178054917009068, | |
| "grad_norm": 6.674008846282959, | |
| "learning_rate": 2.6408912188728702e-05, | |
| "loss": 2.5292, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.030300287798432492, | |
| "grad_norm": 6.499022006988525, | |
| "learning_rate": 2.7424639580602886e-05, | |
| "loss": 2.496, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.03142252067985592, | |
| "grad_norm": 6.163687229156494, | |
| "learning_rate": 2.8440366972477066e-05, | |
| "loss": 2.4541, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.032544753561279345, | |
| "grad_norm": 5.20266580581665, | |
| "learning_rate": 2.9456094364351244e-05, | |
| "loss": 2.449, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.03366698644270277, | |
| "grad_norm": 5.6633830070495605, | |
| "learning_rate": 3.0471821756225428e-05, | |
| "loss": 2.4085, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.034789219324126194, | |
| "grad_norm": 6.414912700653076, | |
| "learning_rate": 3.148754914809961e-05, | |
| "loss": 2.3791, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.03591145220554962, | |
| "grad_norm": 4.983119964599609, | |
| "learning_rate": 3.2503276539973785e-05, | |
| "loss": 2.3505, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.03703368508697305, | |
| "grad_norm": 5.280698299407959, | |
| "learning_rate": 3.351900393184797e-05, | |
| "loss": 2.3191, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.03815591796839647, | |
| "grad_norm": 5.565277099609375, | |
| "learning_rate": 3.453473132372215e-05, | |
| "loss": 2.2957, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.0392781508498199, | |
| "grad_norm": 5.02451753616333, | |
| "learning_rate": 3.555045871559633e-05, | |
| "loss": 2.2618, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.04040038373124333, | |
| "grad_norm": 4.424225807189941, | |
| "learning_rate": 3.6566186107470514e-05, | |
| "loss": 2.2512, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.04152261661266675, | |
| "grad_norm": 6.270051002502441, | |
| "learning_rate": 3.7581913499344695e-05, | |
| "loss": 2.2354, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.04264484949409018, | |
| "grad_norm": 14.256332397460938, | |
| "learning_rate": 3.8597640891218876e-05, | |
| "loss": 2.2364, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.043767082375513605, | |
| "grad_norm": 4.837010383605957, | |
| "learning_rate": 3.9613368283093056e-05, | |
| "loss": 2.2346, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.044889315256937026, | |
| "grad_norm": 3.9555633068084717, | |
| "learning_rate": 4.062909567496724e-05, | |
| "loss": 2.2003, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.046011548138360454, | |
| "grad_norm": 4.136904716491699, | |
| "learning_rate": 4.164482306684142e-05, | |
| "loss": 2.2056, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.04713378101978388, | |
| "grad_norm": 4.25378942489624, | |
| "learning_rate": 4.26605504587156e-05, | |
| "loss": 2.1395, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.048256013901207304, | |
| "grad_norm": 3.6108360290527344, | |
| "learning_rate": 4.367627785058978e-05, | |
| "loss": 2.1296, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.04937824678263073, | |
| "grad_norm": 3.66212797164917, | |
| "learning_rate": 4.469200524246396e-05, | |
| "loss": 2.1316, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.05050047966405416, | |
| "grad_norm": 3.5523183345794678, | |
| "learning_rate": 4.570773263433814e-05, | |
| "loss": 2.1381, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.05162271254547758, | |
| "grad_norm": 3.710803747177124, | |
| "learning_rate": 4.672346002621232e-05, | |
| "loss": 2.1296, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.05274494542690101, | |
| "grad_norm": 3.346266031265259, | |
| "learning_rate": 4.77391874180865e-05, | |
| "loss": 2.0755, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.05386717830832444, | |
| "grad_norm": 3.264901876449585, | |
| "learning_rate": 4.875491480996068e-05, | |
| "loss": 2.0902, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.05498941118974786, | |
| "grad_norm": 3.031913995742798, | |
| "learning_rate": 4.977064220183487e-05, | |
| "loss": 2.1002, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.056111644071171286, | |
| "grad_norm": 3.3827006816864014, | |
| "learning_rate": 4.9999915451558777e-05, | |
| "loss": 2.111, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.057233876952594714, | |
| "grad_norm": 3.5572054386138916, | |
| "learning_rate": 4.999955597496219e-05, | |
| "loss": 2.0809, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.058356109834018136, | |
| "grad_norm": 3.2875311374664307, | |
| "learning_rate": 4.9998914381774255e-05, | |
| "loss": 2.0562, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.059478342715441564, | |
| "grad_norm": 2.903362274169922, | |
| "learning_rate": 4.999799067923527e-05, | |
| "loss": 2.0598, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 0.060600575596864985, | |
| "grad_norm": 2.980804681777954, | |
| "learning_rate": 4.999678487776908e-05, | |
| "loss": 2.0458, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.06172280847828841, | |
| "grad_norm": 2.880610466003418, | |
| "learning_rate": 4.9995296990983006e-05, | |
| "loss": 2.0433, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.06284504135971183, | |
| "grad_norm": 2.7269234657287598, | |
| "learning_rate": 4.999352703566763e-05, | |
| "loss": 2.0189, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.06396727424113527, | |
| "grad_norm": 2.808084487915039, | |
| "learning_rate": 4.999147503179668e-05, | |
| "loss": 2.0083, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.06508950712255869, | |
| "grad_norm": 2.925065279006958, | |
| "learning_rate": 4.998914100252672e-05, | |
| "loss": 2.001, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.06621174000398211, | |
| "grad_norm": 2.996300458908081, | |
| "learning_rate": 4.998652497419696e-05, | |
| "loss": 1.9877, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 0.06733397288540555, | |
| "grad_norm": 2.6028084754943848, | |
| "learning_rate": 4.9983626976328927e-05, | |
| "loss": 1.9778, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.06845620576682897, | |
| "grad_norm": 2.4577603340148926, | |
| "learning_rate": 4.998044704162613e-05, | |
| "loss": 1.9998, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 0.06957843864825239, | |
| "grad_norm": 2.4269509315490723, | |
| "learning_rate": 4.9976985205973705e-05, | |
| "loss": 1.9813, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.07070067152967582, | |
| "grad_norm": 2.6069250106811523, | |
| "learning_rate": 4.997324150843799e-05, | |
| "loss": 1.9781, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.07182290441109924, | |
| "grad_norm": 2.5287699699401855, | |
| "learning_rate": 4.99692159912661e-05, | |
| "loss": 1.9684, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.07294513729252267, | |
| "grad_norm": 2.6519899368286133, | |
| "learning_rate": 4.996490869988546e-05, | |
| "loss": 1.9821, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.0740673701739461, | |
| "grad_norm": 2.525928497314453, | |
| "learning_rate": 4.996031968290326e-05, | |
| "loss": 1.9512, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.07518960305536952, | |
| "grad_norm": 2.4517486095428467, | |
| "learning_rate": 4.995544899210594e-05, | |
| "loss": 1.9283, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 0.07631183593679294, | |
| "grad_norm": 2.7807457447052, | |
| "learning_rate": 4.9950296682458583e-05, | |
| "loss": 1.9448, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.07743406881821638, | |
| "grad_norm": 2.4739558696746826, | |
| "learning_rate": 4.994486281210429e-05, | |
| "loss": 1.946, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 0.0785563016996398, | |
| "grad_norm": 2.6515214443206787, | |
| "learning_rate": 4.9939147442363566e-05, | |
| "loss": 1.9474, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.07967853458106322, | |
| "grad_norm": 2.8361852169036865, | |
| "learning_rate": 4.9933150637733574e-05, | |
| "loss": 1.9463, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 0.08080076746248666, | |
| "grad_norm": 2.332261323928833, | |
| "learning_rate": 4.992687246588743e-05, | |
| "loss": 1.9607, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.08192300034391008, | |
| "grad_norm": 2.3486499786376953, | |
| "learning_rate": 4.992031299767347e-05, | |
| "loss": 1.9248, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 0.0830452332253335, | |
| "grad_norm": 3.125208616256714, | |
| "learning_rate": 4.9913472307114386e-05, | |
| "loss": 1.9088, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.08416746610675693, | |
| "grad_norm": 2.2809853553771973, | |
| "learning_rate": 4.9906350471406446e-05, | |
| "loss": 1.9199, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.08528969898818035, | |
| "grad_norm": 2.567641258239746, | |
| "learning_rate": 4.989894757091861e-05, | |
| "loss": 1.9054, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.08641193186960378, | |
| "grad_norm": 2.2755303382873535, | |
| "learning_rate": 4.989126368919158e-05, | |
| "loss": 1.903, | |
| "step": 2387 | |
| }, | |
| { | |
| "epoch": 0.08753416475102721, | |
| "grad_norm": 2.147775888442993, | |
| "learning_rate": 4.988329891293693e-05, | |
| "loss": 1.8993, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.08865639763245063, | |
| "grad_norm": 2.2279839515686035, | |
| "learning_rate": 4.987505333203608e-05, | |
| "loss": 1.905, | |
| "step": 2449 | |
| }, | |
| { | |
| "epoch": 0.08977863051387405, | |
| "grad_norm": 2.317538022994995, | |
| "learning_rate": 4.9866527039539276e-05, | |
| "loss": 1.8776, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.09090086339529749, | |
| "grad_norm": 2.296868324279785, | |
| "learning_rate": 4.9857720131664594e-05, | |
| "loss": 1.8714, | |
| "step": 2511 | |
| }, | |
| { | |
| "epoch": 0.09202309627672091, | |
| "grad_norm": 2.282538890838623, | |
| "learning_rate": 4.9848632707796773e-05, | |
| "loss": 1.8765, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.09314532915814433, | |
| "grad_norm": 2.1396827697753906, | |
| "learning_rate": 4.9839264870486155e-05, | |
| "loss": 1.8827, | |
| "step": 2573 | |
| }, | |
| { | |
| "epoch": 0.09426756203956776, | |
| "grad_norm": 2.1897048950195312, | |
| "learning_rate": 4.9829616725447526e-05, | |
| "loss": 1.8655, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.09538979492099119, | |
| "grad_norm": 2.1385130882263184, | |
| "learning_rate": 4.981968838155888e-05, | |
| "loss": 1.8768, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.09651202780241461, | |
| "grad_norm": 2.264171600341797, | |
| "learning_rate": 4.980947995086024e-05, | |
| "loss": 1.8734, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.09763426068383804, | |
| "grad_norm": 2.089871883392334, | |
| "learning_rate": 4.979899154855234e-05, | |
| "loss": 1.8516, | |
| "step": 2697 | |
| }, | |
| { | |
| "epoch": 0.09875649356526146, | |
| "grad_norm": 2.092179298400879, | |
| "learning_rate": 4.9788223292995386e-05, | |
| "loss": 1.8729, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 0.09987872644668488, | |
| "grad_norm": 2.3216769695281982, | |
| "learning_rate": 4.977717530570768e-05, | |
| "loss": 1.8673, | |
| "step": 2759 | |
| }, | |
| { | |
| "epoch": 0.10100095932810832, | |
| "grad_norm": 2.104457139968872, | |
| "learning_rate": 4.976584771136425e-05, | |
| "loss": 1.8734, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.10212319220953174, | |
| "grad_norm": 2.236363649368286, | |
| "learning_rate": 4.975424063779547e-05, | |
| "loss": 1.8316, | |
| "step": 2821 | |
| }, | |
| { | |
| "epoch": 0.10324542509095516, | |
| "grad_norm": 2.264967203140259, | |
| "learning_rate": 4.974235421598557e-05, | |
| "loss": 1.8614, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 0.1043676579723786, | |
| "grad_norm": 2.1815454959869385, | |
| "learning_rate": 4.973018858007122e-05, | |
| "loss": 1.8365, | |
| "step": 2883 | |
| }, | |
| { | |
| "epoch": 0.10548989085380202, | |
| "grad_norm": 2.049677848815918, | |
| "learning_rate": 4.9717743867339963e-05, | |
| "loss": 1.8454, | |
| "step": 2914 | |
| }, | |
| { | |
| "epoch": 0.10661212373522544, | |
| "grad_norm": 1.9844895601272583, | |
| "learning_rate": 4.9705020218228695e-05, | |
| "loss": 1.8419, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.10773435661664887, | |
| "grad_norm": 2.052708387374878, | |
| "learning_rate": 4.969201777632205e-05, | |
| "loss": 1.8509, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 0.1088565894980723, | |
| "grad_norm": 2.014535665512085, | |
| "learning_rate": 4.9678736688350846e-05, | |
| "loss": 1.8129, | |
| "step": 3007 | |
| }, | |
| { | |
| "epoch": 0.10997882237949572, | |
| "grad_norm": 1.9768311977386475, | |
| "learning_rate": 4.966517710419033e-05, | |
| "loss": 1.8375, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 0.11110105526091915, | |
| "grad_norm": 2.046293258666992, | |
| "learning_rate": 4.965133917685858e-05, | |
| "loss": 1.8132, | |
| "step": 3069 | |
| }, | |
| { | |
| "epoch": 0.11222328814234257, | |
| "grad_norm": 2.104555368423462, | |
| "learning_rate": 4.9637223062514714e-05, | |
| "loss": 1.8147, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.113345521023766, | |
| "grad_norm": 2.04533052444458, | |
| "learning_rate": 4.962282892045718e-05, | |
| "loss": 1.8591, | |
| "step": 3131 | |
| }, | |
| { | |
| "epoch": 0.11446775390518943, | |
| "grad_norm": 1.967282772064209, | |
| "learning_rate": 4.9608156913121904e-05, | |
| "loss": 1.7966, | |
| "step": 3162 | |
| }, | |
| { | |
| "epoch": 0.11558998678661285, | |
| "grad_norm": 2.092106342315674, | |
| "learning_rate": 4.959320720608049e-05, | |
| "loss": 1.8301, | |
| "step": 3193 | |
| }, | |
| { | |
| "epoch": 0.11671221966803627, | |
| "grad_norm": 2.0512046813964844, | |
| "learning_rate": 4.9577979968038354e-05, | |
| "loss": 1.8211, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 0.11783445254945969, | |
| "grad_norm": 1.9260915517807007, | |
| "learning_rate": 4.956247537083282e-05, | |
| "loss": 1.7989, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.11895668543088313, | |
| "grad_norm": 2.0938026905059814, | |
| "learning_rate": 4.9546693589431145e-05, | |
| "loss": 1.8336, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 0.12007891831230655, | |
| "grad_norm": 1.9972988367080688, | |
| "learning_rate": 4.9530634801928595e-05, | |
| "loss": 1.8147, | |
| "step": 3317 | |
| }, | |
| { | |
| "epoch": 0.12120115119372997, | |
| "grad_norm": 1.9120224714279175, | |
| "learning_rate": 4.9514299189546395e-05, | |
| "loss": 1.8028, | |
| "step": 3348 | |
| }, | |
| { | |
| "epoch": 0.1223233840751534, | |
| "grad_norm": 1.959033727645874, | |
| "learning_rate": 4.949768693662973e-05, | |
| "loss": 1.8281, | |
| "step": 3379 | |
| }, | |
| { | |
| "epoch": 0.12344561695657683, | |
| "grad_norm": 1.9182357788085938, | |
| "learning_rate": 4.948079823064559e-05, | |
| "loss": 1.8165, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.12456784983800025, | |
| "grad_norm": 1.9079999923706055, | |
| "learning_rate": 4.946363326218074e-05, | |
| "loss": 1.7916, | |
| "step": 3441 | |
| }, | |
| { | |
| "epoch": 0.12569008271942367, | |
| "grad_norm": 1.916276216506958, | |
| "learning_rate": 4.9446192224939525e-05, | |
| "loss": 1.8086, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 0.1268123156008471, | |
| "grad_norm": 1.903389811515808, | |
| "learning_rate": 4.942847531574167e-05, | |
| "loss": 1.8116, | |
| "step": 3503 | |
| }, | |
| { | |
| "epoch": 0.12793454848227054, | |
| "grad_norm": 2.064885139465332, | |
| "learning_rate": 4.941048273452008e-05, | |
| "loss": 1.8144, | |
| "step": 3534 | |
| }, | |
| { | |
| "epoch": 0.12905678136369395, | |
| "grad_norm": 2.1314241886138916, | |
| "learning_rate": 4.9392214684318605e-05, | |
| "loss": 1.7943, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.13017901424511738, | |
| "grad_norm": 2.0061681270599365, | |
| "learning_rate": 4.93736713712897e-05, | |
| "loss": 1.794, | |
| "step": 3596 | |
| }, | |
| { | |
| "epoch": 0.13130124712654082, | |
| "grad_norm": 1.9408286809921265, | |
| "learning_rate": 4.9354853004692124e-05, | |
| "loss": 1.7882, | |
| "step": 3627 | |
| }, | |
| { | |
| "epoch": 0.13242348000796422, | |
| "grad_norm": 1.8884766101837158, | |
| "learning_rate": 4.93357597968886e-05, | |
| "loss": 1.7846, | |
| "step": 3658 | |
| }, | |
| { | |
| "epoch": 0.13354571288938766, | |
| "grad_norm": 1.9393378496170044, | |
| "learning_rate": 4.931639196334338e-05, | |
| "loss": 1.7923, | |
| "step": 3689 | |
| }, | |
| { | |
| "epoch": 0.1346679457708111, | |
| "grad_norm": 1.8815410137176514, | |
| "learning_rate": 4.9296749722619826e-05, | |
| "loss": 1.7939, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.1357901786522345, | |
| "grad_norm": 1.8603038787841797, | |
| "learning_rate": 4.9276833296377966e-05, | |
| "loss": 1.7589, | |
| "step": 3751 | |
| }, | |
| { | |
| "epoch": 0.13691241153365794, | |
| "grad_norm": 1.775247573852539, | |
| "learning_rate": 4.925664290937196e-05, | |
| "loss": 1.7897, | |
| "step": 3782 | |
| }, | |
| { | |
| "epoch": 0.13803464441508137, | |
| "grad_norm": 1.8576780557632446, | |
| "learning_rate": 4.9236178789447576e-05, | |
| "loss": 1.7908, | |
| "step": 3813 | |
| }, | |
| { | |
| "epoch": 0.13915687729650478, | |
| "grad_norm": 1.800264596939087, | |
| "learning_rate": 4.921544116753962e-05, | |
| "loss": 1.7736, | |
| "step": 3844 | |
| }, | |
| { | |
| "epoch": 0.1402791101779282, | |
| "grad_norm": 1.9730401039123535, | |
| "learning_rate": 4.919443027766935e-05, | |
| "loss": 1.7639, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.14140134305935165, | |
| "grad_norm": 1.8654968738555908, | |
| "learning_rate": 4.91731463569418e-05, | |
| "loss": 1.7477, | |
| "step": 3906 | |
| }, | |
| { | |
| "epoch": 0.14252357594077505, | |
| "grad_norm": 1.8131386041641235, | |
| "learning_rate": 4.915158964554312e-05, | |
| "loss": 1.7887, | |
| "step": 3937 | |
| }, | |
| { | |
| "epoch": 0.1436458088221985, | |
| "grad_norm": 1.8576264381408691, | |
| "learning_rate": 4.912976038673786e-05, | |
| "loss": 1.7779, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 0.14476804170362192, | |
| "grad_norm": 1.8940199613571167, | |
| "learning_rate": 4.9107658826866254e-05, | |
| "loss": 1.7653, | |
| "step": 3999 | |
| }, | |
| { | |
| "epoch": 0.14589027458504533, | |
| "grad_norm": 1.7727802991867065, | |
| "learning_rate": 4.908528521534139e-05, | |
| "loss": 1.7809, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.14701250746646877, | |
| "grad_norm": 1.7416553497314453, | |
| "learning_rate": 4.906263980464644e-05, | |
| "loss": 1.7605, | |
| "step": 4061 | |
| }, | |
| { | |
| "epoch": 0.1481347403478922, | |
| "grad_norm": 1.82987642288208, | |
| "learning_rate": 4.903972285033178e-05, | |
| "loss": 1.7554, | |
| "step": 4092 | |
| }, | |
| { | |
| "epoch": 0.1492569732293156, | |
| "grad_norm": 1.916339635848999, | |
| "learning_rate": 4.901653461101213e-05, | |
| "loss": 1.7872, | |
| "step": 4123 | |
| }, | |
| { | |
| "epoch": 0.15037920611073904, | |
| "grad_norm": 1.8903008699417114, | |
| "learning_rate": 4.8993075348363626e-05, | |
| "loss": 1.782, | |
| "step": 4154 | |
| }, | |
| { | |
| "epoch": 0.15150143899216248, | |
| "grad_norm": 1.9334847927093506, | |
| "learning_rate": 4.896934532712084e-05, | |
| "loss": 1.7565, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 0.1526236718735859, | |
| "grad_norm": 1.7778478860855103, | |
| "learning_rate": 4.8945344815073846e-05, | |
| "loss": 1.7613, | |
| "step": 4216 | |
| }, | |
| { | |
| "epoch": 0.15374590475500932, | |
| "grad_norm": 1.7348295450210571, | |
| "learning_rate": 4.892107408306516e-05, | |
| "loss": 1.7512, | |
| "step": 4247 | |
| }, | |
| { | |
| "epoch": 0.15486813763643276, | |
| "grad_norm": 1.7189710140228271, | |
| "learning_rate": 4.889653340498669e-05, | |
| "loss": 1.741, | |
| "step": 4278 | |
| }, | |
| { | |
| "epoch": 0.15599037051785616, | |
| "grad_norm": 1.8557075262069702, | |
| "learning_rate": 4.8871723057776664e-05, | |
| "loss": 1.7471, | |
| "step": 4309 | |
| }, | |
| { | |
| "epoch": 0.1571126033992796, | |
| "grad_norm": 1.7188880443572998, | |
| "learning_rate": 4.8846643321416476e-05, | |
| "loss": 1.7492, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.15823483628070303, | |
| "grad_norm": 1.6712063550949097, | |
| "learning_rate": 4.882129447892753e-05, | |
| "loss": 1.7434, | |
| "step": 4371 | |
| }, | |
| { | |
| "epoch": 0.15935706916212644, | |
| "grad_norm": 1.7652437686920166, | |
| "learning_rate": 4.8795676816368076e-05, | |
| "loss": 1.7422, | |
| "step": 4402 | |
| }, | |
| { | |
| "epoch": 0.16047930204354988, | |
| "grad_norm": 1.7910144329071045, | |
| "learning_rate": 4.876979062282995e-05, | |
| "loss": 1.7635, | |
| "step": 4433 | |
| }, | |
| { | |
| "epoch": 0.1616015349249733, | |
| "grad_norm": 1.9248684644699097, | |
| "learning_rate": 4.8743636190435325e-05, | |
| "loss": 1.7401, | |
| "step": 4464 | |
| }, | |
| { | |
| "epoch": 0.16272376780639672, | |
| "grad_norm": 1.828202486038208, | |
| "learning_rate": 4.871721381433344e-05, | |
| "loss": 1.7419, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 0.16384600068782015, | |
| "grad_norm": 1.7170790433883667, | |
| "learning_rate": 4.869052379269719e-05, | |
| "loss": 1.7562, | |
| "step": 4526 | |
| }, | |
| { | |
| "epoch": 0.1649682335692436, | |
| "grad_norm": 1.753203272819519, | |
| "learning_rate": 4.866356642671985e-05, | |
| "loss": 1.7569, | |
| "step": 4557 | |
| }, | |
| { | |
| "epoch": 0.166090466450667, | |
| "grad_norm": 1.7906442880630493, | |
| "learning_rate": 4.8636342020611634e-05, | |
| "loss": 1.7376, | |
| "step": 4588 | |
| }, | |
| { | |
| "epoch": 0.16721269933209043, | |
| "grad_norm": 1.7113378047943115, | |
| "learning_rate": 4.860885088159626e-05, | |
| "loss": 1.7386, | |
| "step": 4619 | |
| }, | |
| { | |
| "epoch": 0.16833493221351387, | |
| "grad_norm": 1.7997937202453613, | |
| "learning_rate": 4.858109331990751e-05, | |
| "loss": 1.7531, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.16945716509493727, | |
| "grad_norm": 1.76421320438385, | |
| "learning_rate": 4.855306964878567e-05, | |
| "loss": 1.7402, | |
| "step": 4681 | |
| }, | |
| { | |
| "epoch": 0.1705793979763607, | |
| "grad_norm": 1.7803616523742676, | |
| "learning_rate": 4.8524780184474084e-05, | |
| "loss": 1.7345, | |
| "step": 4712 | |
| }, | |
| { | |
| "epoch": 0.17170163085778414, | |
| "grad_norm": 1.7763142585754395, | |
| "learning_rate": 4.8496225246215496e-05, | |
| "loss": 1.7469, | |
| "step": 4743 | |
| }, | |
| { | |
| "epoch": 0.17282386373920755, | |
| "grad_norm": 1.728219747543335, | |
| "learning_rate": 4.8467405156248505e-05, | |
| "loss": 1.7182, | |
| "step": 4774 | |
| }, | |
| { | |
| "epoch": 0.17394609662063099, | |
| "grad_norm": 1.7837860584259033, | |
| "learning_rate": 4.843832023980392e-05, | |
| "loss": 1.739, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 0.17506832950205442, | |
| "grad_norm": 1.7005128860473633, | |
| "learning_rate": 4.840897082510106e-05, | |
| "loss": 1.7377, | |
| "step": 4836 | |
| }, | |
| { | |
| "epoch": 0.17619056238347783, | |
| "grad_norm": 1.6570392847061157, | |
| "learning_rate": 4.8379357243344084e-05, | |
| "loss": 1.712, | |
| "step": 4867 | |
| }, | |
| { | |
| "epoch": 0.17731279526490126, | |
| "grad_norm": 1.6575350761413574, | |
| "learning_rate": 4.8349479828718236e-05, | |
| "loss": 1.7147, | |
| "step": 4898 | |
| }, | |
| { | |
| "epoch": 0.1784350281463247, | |
| "grad_norm": 1.8768808841705322, | |
| "learning_rate": 4.8319338918386075e-05, | |
| "loss": 1.7312, | |
| "step": 4929 | |
| }, | |
| { | |
| "epoch": 0.1795572610277481, | |
| "grad_norm": 1.7145389318466187, | |
| "learning_rate": 4.828893485248369e-05, | |
| "loss": 1.7221, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.18067949390917154, | |
| "grad_norm": 1.834173560142517, | |
| "learning_rate": 4.825826797411682e-05, | |
| "loss": 1.7322, | |
| "step": 4991 | |
| }, | |
| { | |
| "epoch": 0.18180172679059498, | |
| "grad_norm": 1.7125933170318604, | |
| "learning_rate": 4.822733862935702e-05, | |
| "loss": 1.7156, | |
| "step": 5022 | |
| }, | |
| { | |
| "epoch": 0.18292395967201838, | |
| "grad_norm": 1.7470024824142456, | |
| "learning_rate": 4.819614716723775e-05, | |
| "loss": 1.7176, | |
| "step": 5053 | |
| }, | |
| { | |
| "epoch": 0.18404619255344182, | |
| "grad_norm": 1.7042289972305298, | |
| "learning_rate": 4.8164693939750425e-05, | |
| "loss": 1.7192, | |
| "step": 5084 | |
| }, | |
| { | |
| "epoch": 0.18516842543486525, | |
| "grad_norm": 1.6803418397903442, | |
| "learning_rate": 4.813297930184042e-05, | |
| "loss": 1.7197, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 0.18629065831628866, | |
| "grad_norm": 1.7296956777572632, | |
| "learning_rate": 4.810100361140314e-05, | |
| "loss": 1.72, | |
| "step": 5146 | |
| }, | |
| { | |
| "epoch": 0.1874128911977121, | |
| "grad_norm": 1.6245464086532593, | |
| "learning_rate": 4.8068767229279885e-05, | |
| "loss": 1.7081, | |
| "step": 5177 | |
| }, | |
| { | |
| "epoch": 0.18853512407913553, | |
| "grad_norm": 1.7138885259628296, | |
| "learning_rate": 4.8036270519253854e-05, | |
| "loss": 1.7068, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 0.18965735696055894, | |
| "grad_norm": 1.704185128211975, | |
| "learning_rate": 4.8003513848046e-05, | |
| "loss": 1.7219, | |
| "step": 5239 | |
| }, | |
| { | |
| "epoch": 0.19077958984198237, | |
| "grad_norm": 1.712551236152649, | |
| "learning_rate": 4.79704975853109e-05, | |
| "loss": 1.7118, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.1919018227234058, | |
| "grad_norm": 1.7193052768707275, | |
| "learning_rate": 4.793722210363262e-05, | |
| "loss": 1.7195, | |
| "step": 5301 | |
| }, | |
| { | |
| "epoch": 0.19302405560482921, | |
| "grad_norm": 1.5574607849121094, | |
| "learning_rate": 4.7903687778520414e-05, | |
| "loss": 1.7286, | |
| "step": 5332 | |
| }, | |
| { | |
| "epoch": 0.19414628848625265, | |
| "grad_norm": 1.7480719089508057, | |
| "learning_rate": 4.7869894988404593e-05, | |
| "loss": 1.6957, | |
| "step": 5363 | |
| }, | |
| { | |
| "epoch": 0.19526852136767608, | |
| "grad_norm": 1.7487633228302002, | |
| "learning_rate": 4.783584411463221e-05, | |
| "loss": 1.7203, | |
| "step": 5394 | |
| }, | |
| { | |
| "epoch": 0.1963907542490995, | |
| "grad_norm": 1.6720587015151978, | |
| "learning_rate": 4.780153554146274e-05, | |
| "loss": 1.7009, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.19751298713052293, | |
| "grad_norm": 1.6622951030731201, | |
| "learning_rate": 4.7766969656063766e-05, | |
| "loss": 1.7049, | |
| "step": 5456 | |
| }, | |
| { | |
| "epoch": 0.19863522001194636, | |
| "grad_norm": 1.656158208847046, | |
| "learning_rate": 4.773214684850662e-05, | |
| "loss": 1.7104, | |
| "step": 5487 | |
| }, | |
| { | |
| "epoch": 0.19975745289336977, | |
| "grad_norm": 1.6559454202651978, | |
| "learning_rate": 4.769706751176193e-05, | |
| "loss": 1.7089, | |
| "step": 5518 | |
| }, | |
| { | |
| "epoch": 0.2008796857747932, | |
| "grad_norm": 1.7262494564056396, | |
| "learning_rate": 4.7661732041695264e-05, | |
| "loss": 1.7143, | |
| "step": 5549 | |
| }, | |
| { | |
| "epoch": 0.20200191865621664, | |
| "grad_norm": 1.6877381801605225, | |
| "learning_rate": 4.762614083706258e-05, | |
| "loss": 1.7134, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.20312415153764005, | |
| "grad_norm": 1.5669549703598022, | |
| "learning_rate": 4.759029429950581e-05, | |
| "loss": 1.7213, | |
| "step": 5611 | |
| }, | |
| { | |
| "epoch": 0.20424638441906348, | |
| "grad_norm": 1.7044217586517334, | |
| "learning_rate": 4.7554192833548235e-05, | |
| "loss": 1.7185, | |
| "step": 5642 | |
| }, | |
| { | |
| "epoch": 0.20536861730048692, | |
| "grad_norm": 1.6999757289886475, | |
| "learning_rate": 4.751783684659e-05, | |
| "loss": 1.7163, | |
| "step": 5673 | |
| }, | |
| { | |
| "epoch": 0.20649085018191032, | |
| "grad_norm": 1.6043522357940674, | |
| "learning_rate": 4.748122674890348e-05, | |
| "loss": 1.7031, | |
| "step": 5704 | |
| }, | |
| { | |
| "epoch": 0.20761308306333376, | |
| "grad_norm": 1.7062305212020874, | |
| "learning_rate": 4.7444362953628654e-05, | |
| "loss": 1.7035, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 0.2087353159447572, | |
| "grad_norm": 1.6612005233764648, | |
| "learning_rate": 4.7407245876768424e-05, | |
| "loss": 1.6981, | |
| "step": 5766 | |
| }, | |
| { | |
| "epoch": 0.2098575488261806, | |
| "grad_norm": 1.7277076244354248, | |
| "learning_rate": 4.736987593718397e-05, | |
| "loss": 1.7161, | |
| "step": 5797 | |
| }, | |
| { | |
| "epoch": 0.21097978170760404, | |
| "grad_norm": 1.705458402633667, | |
| "learning_rate": 4.733225355658999e-05, | |
| "loss": 1.6854, | |
| "step": 5828 | |
| }, | |
| { | |
| "epoch": 0.21210201458902747, | |
| "grad_norm": 1.629443883895874, | |
| "learning_rate": 4.7294379159549926e-05, | |
| "loss": 1.7025, | |
| "step": 5859 | |
| }, | |
| { | |
| "epoch": 0.21322424747045088, | |
| "grad_norm": 1.613192081451416, | |
| "learning_rate": 4.725625317347119e-05, | |
| "loss": 1.6992, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.2143464803518743, | |
| "grad_norm": 1.6801332235336304, | |
| "learning_rate": 4.7217876028600374e-05, | |
| "loss": 1.6798, | |
| "step": 5921 | |
| }, | |
| { | |
| "epoch": 0.21546871323329775, | |
| "grad_norm": 1.6418830156326294, | |
| "learning_rate": 4.717924815801832e-05, | |
| "loss": 1.6918, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 0.21659094611472116, | |
| "grad_norm": 1.6128371953964233, | |
| "learning_rate": 4.714036999763532e-05, | |
| "loss": 1.706, | |
| "step": 5983 | |
| }, | |
| { | |
| "epoch": 0.2177131789961446, | |
| "grad_norm": 1.71291983127594, | |
| "learning_rate": 4.7101241986186116e-05, | |
| "loss": 1.6861, | |
| "step": 6014 | |
| }, | |
| { | |
| "epoch": 0.21883541187756803, | |
| "grad_norm": 1.5903745889663696, | |
| "learning_rate": 4.7061864565225e-05, | |
| "loss": 1.6886, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 0.21995764475899143, | |
| "grad_norm": 1.71088445186615, | |
| "learning_rate": 4.702223817912081e-05, | |
| "loss": 1.7003, | |
| "step": 6076 | |
| }, | |
| { | |
| "epoch": 0.22107987764041487, | |
| "grad_norm": 1.541530966758728, | |
| "learning_rate": 4.698236327505195e-05, | |
| "loss": 1.6956, | |
| "step": 6107 | |
| }, | |
| { | |
| "epoch": 0.2222021105218383, | |
| "grad_norm": 1.539455533027649, | |
| "learning_rate": 4.694224030300127e-05, | |
| "loss": 1.6833, | |
| "step": 6138 | |
| }, | |
| { | |
| "epoch": 0.2233243434032617, | |
| "grad_norm": 1.688120722770691, | |
| "learning_rate": 4.690186971575107e-05, | |
| "loss": 1.6973, | |
| "step": 6169 | |
| }, | |
| { | |
| "epoch": 0.22444657628468515, | |
| "grad_norm": 1.6934964656829834, | |
| "learning_rate": 4.6861251968877916e-05, | |
| "loss": 1.6979, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.22556880916610858, | |
| "grad_norm": 1.6558688879013062, | |
| "learning_rate": 4.68203875207476e-05, | |
| "loss": 1.6925, | |
| "step": 6231 | |
| }, | |
| { | |
| "epoch": 0.226691042047532, | |
| "grad_norm": 1.6245280504226685, | |
| "learning_rate": 4.677927683250983e-05, | |
| "loss": 1.6688, | |
| "step": 6262 | |
| }, | |
| { | |
| "epoch": 0.22781327492895542, | |
| "grad_norm": 1.5808422565460205, | |
| "learning_rate": 4.6737920368093156e-05, | |
| "loss": 1.688, | |
| "step": 6293 | |
| }, | |
| { | |
| "epoch": 0.22893550781037886, | |
| "grad_norm": 1.5224875211715698, | |
| "learning_rate": 4.669631859419965e-05, | |
| "loss": 1.6864, | |
| "step": 6324 | |
| }, | |
| { | |
| "epoch": 0.23005774069180226, | |
| "grad_norm": 1.5904366970062256, | |
| "learning_rate": 4.6654471980299676e-05, | |
| "loss": 1.6893, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 0.2311799735732257, | |
| "grad_norm": 1.6145131587982178, | |
| "learning_rate": 4.661238099862658e-05, | |
| "loss": 1.6818, | |
| "step": 6386 | |
| }, | |
| { | |
| "epoch": 0.23230220645464913, | |
| "grad_norm": 1.6297610998153687, | |
| "learning_rate": 4.657004612417138e-05, | |
| "loss": 1.687, | |
| "step": 6417 | |
| }, | |
| { | |
| "epoch": 0.23342443933607254, | |
| "grad_norm": 1.6199692487716675, | |
| "learning_rate": 4.6527467834677374e-05, | |
| "loss": 1.6945, | |
| "step": 6448 | |
| }, | |
| { | |
| "epoch": 0.23454667221749598, | |
| "grad_norm": 1.5439369678497314, | |
| "learning_rate": 4.648464661063478e-05, | |
| "loss": 1.6926, | |
| "step": 6479 | |
| }, | |
| { | |
| "epoch": 0.23566890509891938, | |
| "grad_norm": 1.6095410585403442, | |
| "learning_rate": 4.6441582935275264e-05, | |
| "loss": 1.689, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.23679113798034282, | |
| "grad_norm": 1.4971855878829956, | |
| "learning_rate": 4.6398277294566586e-05, | |
| "loss": 1.6622, | |
| "step": 6541 | |
| }, | |
| { | |
| "epoch": 0.23791337086176625, | |
| "grad_norm": 1.53174889087677, | |
| "learning_rate": 4.6354730177207e-05, | |
| "loss": 1.6785, | |
| "step": 6572 | |
| }, | |
| { | |
| "epoch": 0.23903560374318966, | |
| "grad_norm": 1.4567692279815674, | |
| "learning_rate": 4.6310942074619787e-05, | |
| "loss": 1.6776, | |
| "step": 6603 | |
| }, | |
| { | |
| "epoch": 0.2401578366246131, | |
| "grad_norm": 1.6813284158706665, | |
| "learning_rate": 4.626691348094777e-05, | |
| "loss": 1.6692, | |
| "step": 6634 | |
| }, | |
| { | |
| "epoch": 0.24128006950603653, | |
| "grad_norm": 1.5593857765197754, | |
| "learning_rate": 4.622264489304762e-05, | |
| "loss": 1.6811, | |
| "step": 6665 | |
| }, | |
| { | |
| "epoch": 0.24240230238745994, | |
| "grad_norm": 1.5681389570236206, | |
| "learning_rate": 4.617813681048434e-05, | |
| "loss": 1.689, | |
| "step": 6696 | |
| }, | |
| { | |
| "epoch": 0.24352453526888337, | |
| "grad_norm": 1.6402842998504639, | |
| "learning_rate": 4.61333897355256e-05, | |
| "loss": 1.6621, | |
| "step": 6727 | |
| }, | |
| { | |
| "epoch": 0.2446467681503068, | |
| "grad_norm": 1.642669677734375, | |
| "learning_rate": 4.608840417313604e-05, | |
| "loss": 1.6562, | |
| "step": 6758 | |
| }, | |
| { | |
| "epoch": 0.24576900103173022, | |
| "grad_norm": 1.6442660093307495, | |
| "learning_rate": 4.6043180630971646e-05, | |
| "loss": 1.6721, | |
| "step": 6789 | |
| }, | |
| { | |
| "epoch": 0.24689123391315365, | |
| "grad_norm": 1.5577408075332642, | |
| "learning_rate": 4.599771961937391e-05, | |
| "loss": 1.6837, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.2480134667945771, | |
| "grad_norm": 1.8555899858474731, | |
| "learning_rate": 4.5952021651364204e-05, | |
| "loss": 1.6739, | |
| "step": 6851 | |
| }, | |
| { | |
| "epoch": 0.2491356996760005, | |
| "grad_norm": 1.667812466621399, | |
| "learning_rate": 4.590608724263786e-05, | |
| "loss": 1.6704, | |
| "step": 6882 | |
| }, | |
| { | |
| "epoch": 0.25025793255742396, | |
| "grad_norm": 1.6642868518829346, | |
| "learning_rate": 4.585991691155845e-05, | |
| "loss": 1.6784, | |
| "step": 6913 | |
| }, | |
| { | |
| "epoch": 0.25138016543884734, | |
| "grad_norm": 1.6429824829101562, | |
| "learning_rate": 4.581351117915188e-05, | |
| "loss": 1.6729, | |
| "step": 6944 | |
| }, | |
| { | |
| "epoch": 0.25250239832027077, | |
| "grad_norm": 1.6268694400787354, | |
| "learning_rate": 4.5766870569100534e-05, | |
| "loss": 1.6657, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 0.2536246312016942, | |
| "grad_norm": 1.496177315711975, | |
| "learning_rate": 4.571999560773736e-05, | |
| "loss": 1.6611, | |
| "step": 7006 | |
| }, | |
| { | |
| "epoch": 0.25474686408311764, | |
| "grad_norm": 1.7032805681228638, | |
| "learning_rate": 4.5672886824039915e-05, | |
| "loss": 1.6816, | |
| "step": 7037 | |
| }, | |
| { | |
| "epoch": 0.2558690969645411, | |
| "grad_norm": 1.791925072669983, | |
| "learning_rate": 4.5625544749624435e-05, | |
| "loss": 1.6689, | |
| "step": 7068 | |
| }, | |
| { | |
| "epoch": 0.2569913298459645, | |
| "grad_norm": 1.5614711046218872, | |
| "learning_rate": 4.5577969918739794e-05, | |
| "loss": 1.6647, | |
| "step": 7099 | |
| }, | |
| { | |
| "epoch": 0.2581135627273879, | |
| "grad_norm": 1.517112135887146, | |
| "learning_rate": 4.5530162868261486e-05, | |
| "loss": 1.6614, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.2592357956088113, | |
| "grad_norm": 1.5636824369430542, | |
| "learning_rate": 4.548212413768558e-05, | |
| "loss": 1.6599, | |
| "step": 7161 | |
| }, | |
| { | |
| "epoch": 0.26035802849023476, | |
| "grad_norm": 1.5803399085998535, | |
| "learning_rate": 4.543385426912261e-05, | |
| "loss": 1.6558, | |
| "step": 7192 | |
| }, | |
| { | |
| "epoch": 0.2614802613716582, | |
| "grad_norm": 1.6228526830673218, | |
| "learning_rate": 4.53853538072915e-05, | |
| "loss": 1.6778, | |
| "step": 7223 | |
| }, | |
| { | |
| "epoch": 0.26260249425308163, | |
| "grad_norm": 1.5660549402236938, | |
| "learning_rate": 4.533662329951336e-05, | |
| "loss": 1.6827, | |
| "step": 7254 | |
| }, | |
| { | |
| "epoch": 0.26372472713450507, | |
| "grad_norm": 1.555421233177185, | |
| "learning_rate": 4.528766329570536e-05, | |
| "loss": 1.6755, | |
| "step": 7285 | |
| }, | |
| { | |
| "epoch": 0.26484696001592845, | |
| "grad_norm": 1.603285312652588, | |
| "learning_rate": 4.523847434837447e-05, | |
| "loss": 1.6455, | |
| "step": 7316 | |
| }, | |
| { | |
| "epoch": 0.2659691928973519, | |
| "grad_norm": 1.510772943496704, | |
| "learning_rate": 4.518905701261128e-05, | |
| "loss": 1.6736, | |
| "step": 7347 | |
| }, | |
| { | |
| "epoch": 0.2670914257787753, | |
| "grad_norm": 1.6260360479354858, | |
| "learning_rate": 4.5139411846083715e-05, | |
| "loss": 1.6643, | |
| "step": 7378 | |
| }, | |
| { | |
| "epoch": 0.26821365866019875, | |
| "grad_norm": 3.0237209796905518, | |
| "learning_rate": 4.508953940903073e-05, | |
| "loss": 1.6615, | |
| "step": 7409 | |
| }, | |
| { | |
| "epoch": 0.2693358915416222, | |
| "grad_norm": 1.4725430011749268, | |
| "learning_rate": 4.5039440264255994e-05, | |
| "loss": 1.6582, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.2704581244230456, | |
| "grad_norm": 1.5135307312011719, | |
| "learning_rate": 4.498911497712155e-05, | |
| "loss": 1.6754, | |
| "step": 7471 | |
| }, | |
| { | |
| "epoch": 0.271580357304469, | |
| "grad_norm": 1.5741811990737915, | |
| "learning_rate": 4.493856411554142e-05, | |
| "loss": 1.6889, | |
| "step": 7502 | |
| }, | |
| { | |
| "epoch": 0.27270259018589244, | |
| "grad_norm": 1.5469688177108765, | |
| "learning_rate": 4.4887788249975206e-05, | |
| "loss": 1.6542, | |
| "step": 7533 | |
| }, | |
| { | |
| "epoch": 0.27382482306731587, | |
| "grad_norm": 1.4596927165985107, | |
| "learning_rate": 4.4836787953421656e-05, | |
| "loss": 1.6365, | |
| "step": 7564 | |
| }, | |
| { | |
| "epoch": 0.2749470559487393, | |
| "grad_norm": 1.566522479057312, | |
| "learning_rate": 4.478556380141218e-05, | |
| "loss": 1.657, | |
| "step": 7595 | |
| }, | |
| { | |
| "epoch": 0.27606928883016274, | |
| "grad_norm": 1.5141624212265015, | |
| "learning_rate": 4.4734116372004375e-05, | |
| "loss": 1.6695, | |
| "step": 7626 | |
| }, | |
| { | |
| "epoch": 0.2771915217115862, | |
| "grad_norm": 1.4138630628585815, | |
| "learning_rate": 4.4682446245775477e-05, | |
| "loss": 1.6638, | |
| "step": 7657 | |
| }, | |
| { | |
| "epoch": 0.27831375459300955, | |
| "grad_norm": 1.4885402917861938, | |
| "learning_rate": 4.463055400581586e-05, | |
| "loss": 1.6817, | |
| "step": 7688 | |
| }, | |
| { | |
| "epoch": 0.279435987474433, | |
| "grad_norm": 1.645486831665039, | |
| "learning_rate": 4.4578440237722374e-05, | |
| "loss": 1.6392, | |
| "step": 7719 | |
| }, | |
| { | |
| "epoch": 0.2805582203558564, | |
| "grad_norm": 1.5977535247802734, | |
| "learning_rate": 4.452610552959183e-05, | |
| "loss": 1.6557, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.28168045323727986, | |
| "grad_norm": 1.6347745656967163, | |
| "learning_rate": 4.447355047201428e-05, | |
| "loss": 1.6573, | |
| "step": 7781 | |
| }, | |
| { | |
| "epoch": 0.2828026861187033, | |
| "grad_norm": 1.5288081169128418, | |
| "learning_rate": 4.4420775658066414e-05, | |
| "loss": 1.638, | |
| "step": 7812 | |
| }, | |
| { | |
| "epoch": 0.28392491900012673, | |
| "grad_norm": 1.4643625020980835, | |
| "learning_rate": 4.436778168330484e-05, | |
| "loss": 1.6402, | |
| "step": 7843 | |
| }, | |
| { | |
| "epoch": 0.2850471518815501, | |
| "grad_norm": 1.568663239479065, | |
| "learning_rate": 4.4314569145759353e-05, | |
| "loss": 1.6565, | |
| "step": 7874 | |
| }, | |
| { | |
| "epoch": 0.28616938476297354, | |
| "grad_norm": 1.476515293121338, | |
| "learning_rate": 4.42611386459262e-05, | |
| "loss": 1.6709, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 0.287291617644397, | |
| "grad_norm": 1.532404899597168, | |
| "learning_rate": 4.420749078676133e-05, | |
| "loss": 1.6333, | |
| "step": 7936 | |
| }, | |
| { | |
| "epoch": 0.2884138505258204, | |
| "grad_norm": 1.5388779640197754, | |
| "learning_rate": 4.4153626173673516e-05, | |
| "loss": 1.6494, | |
| "step": 7967 | |
| }, | |
| { | |
| "epoch": 0.28953608340724385, | |
| "grad_norm": 1.5787324905395508, | |
| "learning_rate": 4.409954541451762e-05, | |
| "loss": 1.6362, | |
| "step": 7998 | |
| }, | |
| { | |
| "epoch": 0.2906583162886673, | |
| "grad_norm": 1.4780092239379883, | |
| "learning_rate": 4.404524911958764e-05, | |
| "loss": 1.643, | |
| "step": 8029 | |
| }, | |
| { | |
| "epoch": 0.29178054917009066, | |
| "grad_norm": 1.5434736013412476, | |
| "learning_rate": 4.399073790160989e-05, | |
| "loss": 1.6472, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.2929027820515141, | |
| "grad_norm": 1.4898840188980103, | |
| "learning_rate": 4.393601237573607e-05, | |
| "loss": 1.6483, | |
| "step": 8091 | |
| }, | |
| { | |
| "epoch": 0.29402501493293753, | |
| "grad_norm": 1.5529502630233765, | |
| "learning_rate": 4.388107315953628e-05, | |
| "loss": 1.6291, | |
| "step": 8122 | |
| }, | |
| { | |
| "epoch": 0.29514724781436097, | |
| "grad_norm": 1.4831997156143188, | |
| "learning_rate": 4.382592087299212e-05, | |
| "loss": 1.6518, | |
| "step": 8153 | |
| }, | |
| { | |
| "epoch": 0.2962694806957844, | |
| "grad_norm": 1.4568578004837036, | |
| "learning_rate": 4.377055613848964e-05, | |
| "loss": 1.6465, | |
| "step": 8184 | |
| }, | |
| { | |
| "epoch": 0.29739171357720784, | |
| "grad_norm": 1.4941576719284058, | |
| "learning_rate": 4.3714979580812355e-05, | |
| "loss": 1.634, | |
| "step": 8215 | |
| }, | |
| { | |
| "epoch": 0.2985139464586312, | |
| "grad_norm": 1.5891722440719604, | |
| "learning_rate": 4.365919182713416e-05, | |
| "loss": 1.6422, | |
| "step": 8246 | |
| }, | |
| { | |
| "epoch": 0.29963617934005465, | |
| "grad_norm": 1.5435233116149902, | |
| "learning_rate": 4.360319350701226e-05, | |
| "loss": 1.6446, | |
| "step": 8277 | |
| }, | |
| { | |
| "epoch": 0.3007584122214781, | |
| "grad_norm": 1.4754277467727661, | |
| "learning_rate": 4.3546985252380115e-05, | |
| "loss": 1.655, | |
| "step": 8308 | |
| }, | |
| { | |
| "epoch": 0.3018806451029015, | |
| "grad_norm": 1.5463342666625977, | |
| "learning_rate": 4.349056769754021e-05, | |
| "loss": 1.6407, | |
| "step": 8339 | |
| }, | |
| { | |
| "epoch": 0.30300287798432496, | |
| "grad_norm": 1.4847484827041626, | |
| "learning_rate": 4.3433941479156994e-05, | |
| "loss": 1.65, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.3041251108657484, | |
| "grad_norm": 1.475669264793396, | |
| "learning_rate": 4.3377107236249647e-05, | |
| "loss": 1.6398, | |
| "step": 8401 | |
| }, | |
| { | |
| "epoch": 0.3052473437471718, | |
| "grad_norm": 1.558566689491272, | |
| "learning_rate": 4.332006561018488e-05, | |
| "loss": 1.6501, | |
| "step": 8432 | |
| }, | |
| { | |
| "epoch": 0.3063695766285952, | |
| "grad_norm": 1.5497310161590576, | |
| "learning_rate": 4.3262817244669683e-05, | |
| "loss": 1.6371, | |
| "step": 8463 | |
| }, | |
| { | |
| "epoch": 0.30749180951001864, | |
| "grad_norm": 1.464553952217102, | |
| "learning_rate": 4.3205362785744083e-05, | |
| "loss": 1.6766, | |
| "step": 8494 | |
| }, | |
| { | |
| "epoch": 0.3086140423914421, | |
| "grad_norm": 1.5198413133621216, | |
| "learning_rate": 4.314770288177384e-05, | |
| "loss": 1.633, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 0.3097362752728655, | |
| "grad_norm": 1.5493290424346924, | |
| "learning_rate": 4.308983818344313e-05, | |
| "loss": 1.6465, | |
| "step": 8556 | |
| }, | |
| { | |
| "epoch": 0.31085850815428895, | |
| "grad_norm": 1.4413405656814575, | |
| "learning_rate": 4.3031769343747206e-05, | |
| "loss": 1.6463, | |
| "step": 8587 | |
| }, | |
| { | |
| "epoch": 0.31198074103571233, | |
| "grad_norm": 1.508507251739502, | |
| "learning_rate": 4.297349701798505e-05, | |
| "loss": 1.6262, | |
| "step": 8618 | |
| }, | |
| { | |
| "epoch": 0.31310297391713576, | |
| "grad_norm": 1.560054063796997, | |
| "learning_rate": 4.2915021863751916e-05, | |
| "loss": 1.6484, | |
| "step": 8649 | |
| }, | |
| { | |
| "epoch": 0.3142252067985592, | |
| "grad_norm": 1.495651125907898, | |
| "learning_rate": 4.285634454093198e-05, | |
| "loss": 1.6329, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.31534743967998263, | |
| "grad_norm": 1.481740117073059, | |
| "learning_rate": 4.279746571169086e-05, | |
| "loss": 1.6274, | |
| "step": 8711 | |
| }, | |
| { | |
| "epoch": 0.31646967256140607, | |
| "grad_norm": 1.53792142868042, | |
| "learning_rate": 4.2738386040468136e-05, | |
| "loss": 1.6252, | |
| "step": 8742 | |
| }, | |
| { | |
| "epoch": 0.31759190544282945, | |
| "grad_norm": 1.4411643743515015, | |
| "learning_rate": 4.2679106193969866e-05, | |
| "loss": 1.6423, | |
| "step": 8773 | |
| }, | |
| { | |
| "epoch": 0.3187141383242529, | |
| "grad_norm": 1.5158967971801758, | |
| "learning_rate": 4.261962684116106e-05, | |
| "loss": 1.6596, | |
| "step": 8804 | |
| }, | |
| { | |
| "epoch": 0.3198363712056763, | |
| "grad_norm": 1.6026604175567627, | |
| "learning_rate": 4.2559948653258145e-05, | |
| "loss": 1.6399, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 0.32095860408709975, | |
| "grad_norm": 1.4422760009765625, | |
| "learning_rate": 4.250007230372134e-05, | |
| "loss": 1.646, | |
| "step": 8866 | |
| }, | |
| { | |
| "epoch": 0.3220808369685232, | |
| "grad_norm": 1.4450057744979858, | |
| "learning_rate": 4.2439998468247126e-05, | |
| "loss": 1.6311, | |
| "step": 8897 | |
| }, | |
| { | |
| "epoch": 0.3232030698499466, | |
| "grad_norm": 1.432768702507019, | |
| "learning_rate": 4.2379727824760566e-05, | |
| "loss": 1.6234, | |
| "step": 8928 | |
| }, | |
| { | |
| "epoch": 0.32432530273137, | |
| "grad_norm": 1.5206103324890137, | |
| "learning_rate": 4.231926105340768e-05, | |
| "loss": 1.6268, | |
| "step": 8959 | |
| }, | |
| { | |
| "epoch": 0.32544753561279344, | |
| "grad_norm": 1.5703397989273071, | |
| "learning_rate": 4.225859883654776e-05, | |
| "loss": 1.6409, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.32656976849421687, | |
| "grad_norm": 1.4549362659454346, | |
| "learning_rate": 4.219774185874569e-05, | |
| "loss": 1.6471, | |
| "step": 9021 | |
| }, | |
| { | |
| "epoch": 0.3276920013756403, | |
| "grad_norm": 1.669263243675232, | |
| "learning_rate": 4.213669080676418e-05, | |
| "loss": 1.6355, | |
| "step": 9052 | |
| }, | |
| { | |
| "epoch": 0.32881423425706374, | |
| "grad_norm": 1.4004725217819214, | |
| "learning_rate": 4.2075446369556056e-05, | |
| "loss": 1.6046, | |
| "step": 9083 | |
| }, | |
| { | |
| "epoch": 0.3299364671384872, | |
| "grad_norm": 1.4844101667404175, | |
| "learning_rate": 4.201400923825648e-05, | |
| "loss": 1.6357, | |
| "step": 9114 | |
| }, | |
| { | |
| "epoch": 0.33105870001991056, | |
| "grad_norm": 1.5377836227416992, | |
| "learning_rate": 4.195238010617511e-05, | |
| "loss": 1.6425, | |
| "step": 9145 | |
| }, | |
| { | |
| "epoch": 0.332180932901334, | |
| "grad_norm": 1.4880887269973755, | |
| "learning_rate": 4.1890559668788344e-05, | |
| "loss": 1.6368, | |
| "step": 9176 | |
| }, | |
| { | |
| "epoch": 0.3333031657827574, | |
| "grad_norm": 1.5786559581756592, | |
| "learning_rate": 4.1828548623731405e-05, | |
| "loss": 1.6327, | |
| "step": 9207 | |
| }, | |
| { | |
| "epoch": 0.33442539866418086, | |
| "grad_norm": 1.4619288444519043, | |
| "learning_rate": 4.1766347670790506e-05, | |
| "loss": 1.6431, | |
| "step": 9238 | |
| }, | |
| { | |
| "epoch": 0.3355476315456043, | |
| "grad_norm": 1.4946295022964478, | |
| "learning_rate": 4.170395751189495e-05, | |
| "loss": 1.6265, | |
| "step": 9269 | |
| }, | |
| { | |
| "epoch": 0.33666986442702773, | |
| "grad_norm": 1.4698960781097412, | |
| "learning_rate": 4.164137885110921e-05, | |
| "loss": 1.6356, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3377920973084511, | |
| "grad_norm": 1.4136701822280884, | |
| "learning_rate": 4.157861239462495e-05, | |
| "loss": 1.606, | |
| "step": 9331 | |
| }, | |
| { | |
| "epoch": 0.33891433018987455, | |
| "grad_norm": 1.5250601768493652, | |
| "learning_rate": 4.1515658850753114e-05, | |
| "loss": 1.6266, | |
| "step": 9362 | |
| }, | |
| { | |
| "epoch": 0.340036563071298, | |
| "grad_norm": 1.5827070474624634, | |
| "learning_rate": 4.145251892991588e-05, | |
| "loss": 1.618, | |
| "step": 9393 | |
| }, | |
| { | |
| "epoch": 0.3411587959527214, | |
| "grad_norm": 1.4887738227844238, | |
| "learning_rate": 4.138919334463868e-05, | |
| "loss": 1.6196, | |
| "step": 9424 | |
| }, | |
| { | |
| "epoch": 0.34228102883414485, | |
| "grad_norm": 1.5627696514129639, | |
| "learning_rate": 4.1325682809542124e-05, | |
| "loss": 1.6155, | |
| "step": 9455 | |
| }, | |
| { | |
| "epoch": 0.3434032617155683, | |
| "grad_norm": 1.4552607536315918, | |
| "learning_rate": 4.126198804133398e-05, | |
| "loss": 1.6272, | |
| "step": 9486 | |
| }, | |
| { | |
| "epoch": 0.34452549459699167, | |
| "grad_norm": 1.5104546546936035, | |
| "learning_rate": 4.1198109758801055e-05, | |
| "loss": 1.6245, | |
| "step": 9517 | |
| }, | |
| { | |
| "epoch": 0.3456477274784151, | |
| "grad_norm": 1.4588383436203003, | |
| "learning_rate": 4.113404868280107e-05, | |
| "loss": 1.6285, | |
| "step": 9548 | |
| }, | |
| { | |
| "epoch": 0.34676996035983854, | |
| "grad_norm": 1.40166437625885, | |
| "learning_rate": 4.106980553625457e-05, | |
| "loss": 1.6181, | |
| "step": 9579 | |
| }, | |
| { | |
| "epoch": 0.34789219324126197, | |
| "grad_norm": 1.4949356317520142, | |
| "learning_rate": 4.100538104413674e-05, | |
| "loss": 1.6148, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.3490144261226854, | |
| "grad_norm": 1.4863393306732178, | |
| "learning_rate": 4.09407759334692e-05, | |
| "loss": 1.6218, | |
| "step": 9641 | |
| }, | |
| { | |
| "epoch": 0.35013665900410884, | |
| "grad_norm": 1.4831593036651611, | |
| "learning_rate": 4.087599093331186e-05, | |
| "loss": 1.6201, | |
| "step": 9672 | |
| }, | |
| { | |
| "epoch": 0.3512588918855322, | |
| "grad_norm": 1.487328052520752, | |
| "learning_rate": 4.081102677475462e-05, | |
| "loss": 1.6203, | |
| "step": 9703 | |
| }, | |
| { | |
| "epoch": 0.35238112476695566, | |
| "grad_norm": 1.560600996017456, | |
| "learning_rate": 4.0745884190909194e-05, | |
| "loss": 1.6099, | |
| "step": 9734 | |
| }, | |
| { | |
| "epoch": 0.3535033576483791, | |
| "grad_norm": 1.45511794090271, | |
| "learning_rate": 4.0680563916900796e-05, | |
| "loss": 1.6494, | |
| "step": 9765 | |
| }, | |
| { | |
| "epoch": 0.3546255905298025, | |
| "grad_norm": 1.4966280460357666, | |
| "learning_rate": 4.0615066689859815e-05, | |
| "loss": 1.6157, | |
| "step": 9796 | |
| }, | |
| { | |
| "epoch": 0.35574782341122596, | |
| "grad_norm": 1.4888532161712646, | |
| "learning_rate": 4.0549393248913584e-05, | |
| "loss": 1.6203, | |
| "step": 9827 | |
| }, | |
| { | |
| "epoch": 0.3568700562926494, | |
| "grad_norm": 1.5495861768722534, | |
| "learning_rate": 4.048354433517794e-05, | |
| "loss": 1.6131, | |
| "step": 9858 | |
| }, | |
| { | |
| "epoch": 0.3579922891740728, | |
| "grad_norm": 1.4991432428359985, | |
| "learning_rate": 4.0417520691748916e-05, | |
| "loss": 1.6371, | |
| "step": 9889 | |
| }, | |
| { | |
| "epoch": 0.3591145220554962, | |
| "grad_norm": 1.5163663625717163, | |
| "learning_rate": 4.035132306369438e-05, | |
| "loss": 1.5911, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.36023675493691965, | |
| "grad_norm": 1.439622402191162, | |
| "learning_rate": 4.028495219804555e-05, | |
| "loss": 1.6218, | |
| "step": 9951 | |
| }, | |
| { | |
| "epoch": 0.3613589878183431, | |
| "grad_norm": 1.4068893194198608, | |
| "learning_rate": 4.021840884378864e-05, | |
| "loss": 1.6284, | |
| "step": 9982 | |
| }, | |
| { | |
| "epoch": 0.3624812206997665, | |
| "grad_norm": 1.4577332735061646, | |
| "learning_rate": 4.015169375185633e-05, | |
| "loss": 1.6104, | |
| "step": 10013 | |
| }, | |
| { | |
| "epoch": 0.36360345358118995, | |
| "grad_norm": 1.448833703994751, | |
| "learning_rate": 4.0084807675119396e-05, | |
| "loss": 1.6299, | |
| "step": 10044 | |
| }, | |
| { | |
| "epoch": 0.36472568646261333, | |
| "grad_norm": 1.440450668334961, | |
| "learning_rate": 4.0017751368378106e-05, | |
| "loss": 1.6255, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 0.36584791934403676, | |
| "grad_norm": 1.3380858898162842, | |
| "learning_rate": 3.995052558835377e-05, | |
| "loss": 1.6162, | |
| "step": 10106 | |
| }, | |
| { | |
| "epoch": 0.3669701522254602, | |
| "grad_norm": 1.4549713134765625, | |
| "learning_rate": 3.988313109368017e-05, | |
| "loss": 1.6181, | |
| "step": 10137 | |
| }, | |
| { | |
| "epoch": 0.36809238510688363, | |
| "grad_norm": 1.4933863878250122, | |
| "learning_rate": 3.981556864489504e-05, | |
| "loss": 1.634, | |
| "step": 10168 | |
| }, | |
| { | |
| "epoch": 0.36921461798830707, | |
| "grad_norm": 1.5157703161239624, | |
| "learning_rate": 3.974783900443142e-05, | |
| "loss": 1.6258, | |
| "step": 10199 | |
| }, | |
| { | |
| "epoch": 0.3703368508697305, | |
| "grad_norm": 1.464006781578064, | |
| "learning_rate": 3.9679942936609095e-05, | |
| "loss": 1.6235, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.3714590837511539, | |
| "grad_norm": 1.3768154382705688, | |
| "learning_rate": 3.961188120762596e-05, | |
| "loss": 1.6044, | |
| "step": 10261 | |
| }, | |
| { | |
| "epoch": 0.3725813166325773, | |
| "grad_norm": 1.4427024126052856, | |
| "learning_rate": 3.954365458554938e-05, | |
| "loss": 1.6403, | |
| "step": 10292 | |
| }, | |
| { | |
| "epoch": 0.37370354951400075, | |
| "grad_norm": 1.3831264972686768, | |
| "learning_rate": 3.947526384030751e-05, | |
| "loss": 1.6136, | |
| "step": 10323 | |
| }, | |
| { | |
| "epoch": 0.3748257823954242, | |
| "grad_norm": 1.4275633096694946, | |
| "learning_rate": 3.9406709743680624e-05, | |
| "loss": 1.6167, | |
| "step": 10354 | |
| }, | |
| { | |
| "epoch": 0.3759480152768476, | |
| "grad_norm": 1.4378384351730347, | |
| "learning_rate": 3.9337993069292366e-05, | |
| "loss": 1.6231, | |
| "step": 10385 | |
| }, | |
| { | |
| "epoch": 0.37707024815827106, | |
| "grad_norm": 1.3743884563446045, | |
| "learning_rate": 3.926911459260109e-05, | |
| "loss": 1.6171, | |
| "step": 10416 | |
| }, | |
| { | |
| "epoch": 0.37819248103969444, | |
| "grad_norm": 1.496160864830017, | |
| "learning_rate": 3.920007509089102e-05, | |
| "loss": 1.6234, | |
| "step": 10447 | |
| }, | |
| { | |
| "epoch": 0.3793147139211179, | |
| "grad_norm": 1.4610028266906738, | |
| "learning_rate": 3.913087534326357e-05, | |
| "loss": 1.5963, | |
| "step": 10478 | |
| }, | |
| { | |
| "epoch": 0.3804369468025413, | |
| "grad_norm": 1.483314037322998, | |
| "learning_rate": 3.9061516130628475e-05, | |
| "loss": 1.6021, | |
| "step": 10509 | |
| }, | |
| { | |
| "epoch": 0.38155917968396474, | |
| "grad_norm": 1.4944846630096436, | |
| "learning_rate": 3.8991998235695025e-05, | |
| "loss": 1.5833, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 0.3826814125653882, | |
| "grad_norm": 1.3831861019134521, | |
| "learning_rate": 3.8922322442963224e-05, | |
| "loss": 1.624, | |
| "step": 10571 | |
| }, | |
| { | |
| "epoch": 0.3838036454468116, | |
| "grad_norm": 1.4178634881973267, | |
| "learning_rate": 3.885248953871491e-05, | |
| "loss": 1.6188, | |
| "step": 10602 | |
| }, | |
| { | |
| "epoch": 0.384925878328235, | |
| "grad_norm": 1.4889320135116577, | |
| "learning_rate": 3.8782500311004915e-05, | |
| "loss": 1.608, | |
| "step": 10633 | |
| }, | |
| { | |
| "epoch": 0.38604811120965843, | |
| "grad_norm": 1.3335620164871216, | |
| "learning_rate": 3.871235554965218e-05, | |
| "loss": 1.6182, | |
| "step": 10664 | |
| }, | |
| { | |
| "epoch": 0.38717034409108186, | |
| "grad_norm": 1.4620449542999268, | |
| "learning_rate": 3.864205604623078e-05, | |
| "loss": 1.5848, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 0.3882925769725053, | |
| "grad_norm": 1.3857917785644531, | |
| "learning_rate": 3.857160259406107e-05, | |
| "loss": 1.6048, | |
| "step": 10726 | |
| }, | |
| { | |
| "epoch": 0.38941480985392873, | |
| "grad_norm": 1.4226957559585571, | |
| "learning_rate": 3.8500995988200674e-05, | |
| "loss": 1.6052, | |
| "step": 10757 | |
| }, | |
| { | |
| "epoch": 0.39053704273535217, | |
| "grad_norm": 1.478182077407837, | |
| "learning_rate": 3.843023702543556e-05, | |
| "loss": 1.6268, | |
| "step": 10788 | |
| }, | |
| { | |
| "epoch": 0.39165927561677555, | |
| "grad_norm": 1.431401014328003, | |
| "learning_rate": 3.8359326504270984e-05, | |
| "loss": 1.6176, | |
| "step": 10819 | |
| }, | |
| { | |
| "epoch": 0.392781508498199, | |
| "grad_norm": 1.339880108833313, | |
| "learning_rate": 3.828826522492255e-05, | |
| "loss": 1.5902, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.3939037413796224, | |
| "grad_norm": 1.4537174701690674, | |
| "learning_rate": 3.821705398930713e-05, | |
| "loss": 1.6107, | |
| "step": 10881 | |
| }, | |
| { | |
| "epoch": 0.39502597426104585, | |
| "grad_norm": 1.3559256792068481, | |
| "learning_rate": 3.814569360103385e-05, | |
| "loss": 1.5879, | |
| "step": 10912 | |
| }, | |
| { | |
| "epoch": 0.3961482071424693, | |
| "grad_norm": 1.3561891317367554, | |
| "learning_rate": 3.807418486539499e-05, | |
| "loss": 1.6162, | |
| "step": 10943 | |
| }, | |
| { | |
| "epoch": 0.3972704400238927, | |
| "grad_norm": 1.471112847328186, | |
| "learning_rate": 3.80025285893569e-05, | |
| "loss": 1.5968, | |
| "step": 10974 | |
| }, | |
| { | |
| "epoch": 0.3983926729053161, | |
| "grad_norm": 1.3438925743103027, | |
| "learning_rate": 3.793072558155093e-05, | |
| "loss": 1.5876, | |
| "step": 11005 | |
| }, | |
| { | |
| "epoch": 0.39951490578673954, | |
| "grad_norm": 1.4102482795715332, | |
| "learning_rate": 3.785877665226426e-05, | |
| "loss": 1.5886, | |
| "step": 11036 | |
| }, | |
| { | |
| "epoch": 0.400637138668163, | |
| "grad_norm": 1.4435259103775024, | |
| "learning_rate": 3.778668261343079e-05, | |
| "loss": 1.5999, | |
| "step": 11067 | |
| }, | |
| { | |
| "epoch": 0.4017593715495864, | |
| "grad_norm": 1.4556541442871094, | |
| "learning_rate": 3.771444427862192e-05, | |
| "loss": 1.6185, | |
| "step": 11098 | |
| }, | |
| { | |
| "epoch": 0.40288160443100984, | |
| "grad_norm": 1.370553970336914, | |
| "learning_rate": 3.7642062463037465e-05, | |
| "loss": 1.6005, | |
| "step": 11129 | |
| }, | |
| { | |
| "epoch": 0.4040038373124333, | |
| "grad_norm": 1.368855595588684, | |
| "learning_rate": 3.7569537983496373e-05, | |
| "loss": 1.6024, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.40512607019385666, | |
| "grad_norm": 1.4200265407562256, | |
| "learning_rate": 3.749687165842753e-05, | |
| "loss": 1.6082, | |
| "step": 11191 | |
| }, | |
| { | |
| "epoch": 0.4062483030752801, | |
| "grad_norm": 1.4704499244689941, | |
| "learning_rate": 3.7424064307860536e-05, | |
| "loss": 1.6227, | |
| "step": 11222 | |
| }, | |
| { | |
| "epoch": 0.40737053595670353, | |
| "grad_norm": 1.3868876695632935, | |
| "learning_rate": 3.735111675341645e-05, | |
| "loss": 1.6008, | |
| "step": 11253 | |
| }, | |
| { | |
| "epoch": 0.40849276883812696, | |
| "grad_norm": 1.473650574684143, | |
| "learning_rate": 3.7278029818298524e-05, | |
| "loss": 1.5825, | |
| "step": 11284 | |
| }, | |
| { | |
| "epoch": 0.4096150017195504, | |
| "grad_norm": 1.412559986114502, | |
| "learning_rate": 3.720480432728287e-05, | |
| "loss": 1.5971, | |
| "step": 11315 | |
| }, | |
| { | |
| "epoch": 0.41073723460097383, | |
| "grad_norm": 1.4288370609283447, | |
| "learning_rate": 3.71314411067092e-05, | |
| "loss": 1.6079, | |
| "step": 11346 | |
| }, | |
| { | |
| "epoch": 0.4118594674823972, | |
| "grad_norm": 1.4781348705291748, | |
| "learning_rate": 3.70579409844715e-05, | |
| "loss": 1.5904, | |
| "step": 11377 | |
| }, | |
| { | |
| "epoch": 0.41298170036382065, | |
| "grad_norm": 1.377030611038208, | |
| "learning_rate": 3.698430479000865e-05, | |
| "loss": 1.5804, | |
| "step": 11408 | |
| }, | |
| { | |
| "epoch": 0.4141039332452441, | |
| "grad_norm": 1.4176589250564575, | |
| "learning_rate": 3.691053335429509e-05, | |
| "loss": 1.6046, | |
| "step": 11439 | |
| }, | |
| { | |
| "epoch": 0.4152261661266675, | |
| "grad_norm": 1.4933243989944458, | |
| "learning_rate": 3.683662750983147e-05, | |
| "loss": 1.6018, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 0.41634839900809095, | |
| "grad_norm": 1.4382365942001343, | |
| "learning_rate": 3.676258809063518e-05, | |
| "loss": 1.5962, | |
| "step": 11501 | |
| }, | |
| { | |
| "epoch": 0.4174706318895144, | |
| "grad_norm": 1.468005657196045, | |
| "learning_rate": 3.6688415932231004e-05, | |
| "loss": 1.6044, | |
| "step": 11532 | |
| }, | |
| { | |
| "epoch": 0.41859286477093777, | |
| "grad_norm": 1.4858007431030273, | |
| "learning_rate": 3.661411187164166e-05, | |
| "loss": 1.5973, | |
| "step": 11563 | |
| }, | |
| { | |
| "epoch": 0.4197150976523612, | |
| "grad_norm": 1.457524061203003, | |
| "learning_rate": 3.65396767473784e-05, | |
| "loss": 1.5872, | |
| "step": 11594 | |
| }, | |
| { | |
| "epoch": 0.42083733053378464, | |
| "grad_norm": 1.4685806035995483, | |
| "learning_rate": 3.6465111399431465e-05, | |
| "loss": 1.6072, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 0.42195956341520807, | |
| "grad_norm": 1.4355812072753906, | |
| "learning_rate": 3.6390416669260674e-05, | |
| "loss": 1.6005, | |
| "step": 11656 | |
| }, | |
| { | |
| "epoch": 0.4230817962966315, | |
| "grad_norm": 1.4105843305587769, | |
| "learning_rate": 3.63155933997859e-05, | |
| "loss": 1.5999, | |
| "step": 11687 | |
| }, | |
| { | |
| "epoch": 0.42420402917805494, | |
| "grad_norm": 1.4515639543533325, | |
| "learning_rate": 3.624064243537758e-05, | |
| "loss": 1.5903, | |
| "step": 11718 | |
| }, | |
| { | |
| "epoch": 0.4253262620594783, | |
| "grad_norm": 1.4507205486297607, | |
| "learning_rate": 3.616556462184716e-05, | |
| "loss": 1.6004, | |
| "step": 11749 | |
| }, | |
| { | |
| "epoch": 0.42644849494090176, | |
| "grad_norm": 1.3846348524093628, | |
| "learning_rate": 3.609036080643755e-05, | |
| "loss": 1.5878, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 0.4275707278223252, | |
| "grad_norm": 1.4062190055847168, | |
| "learning_rate": 3.60150318378136e-05, | |
| "loss": 1.6049, | |
| "step": 11811 | |
| }, | |
| { | |
| "epoch": 0.4286929607037486, | |
| "grad_norm": 1.5231355428695679, | |
| "learning_rate": 3.5939578566052465e-05, | |
| "loss": 1.5972, | |
| "step": 11842 | |
| }, | |
| { | |
| "epoch": 0.42981519358517206, | |
| "grad_norm": 1.4500449895858765, | |
| "learning_rate": 3.586400184263408e-05, | |
| "loss": 1.5918, | |
| "step": 11873 | |
| }, | |
| { | |
| "epoch": 0.4309374264665955, | |
| "grad_norm": 1.415440559387207, | |
| "learning_rate": 3.578830252043148e-05, | |
| "loss": 1.6111, | |
| "step": 11904 | |
| }, | |
| { | |
| "epoch": 0.4320596593480189, | |
| "grad_norm": 1.3857108354568481, | |
| "learning_rate": 3.571248145370125e-05, | |
| "loss": 1.5882, | |
| "step": 11935 | |
| }, | |
| { | |
| "epoch": 0.4331818922294423, | |
| "grad_norm": 1.442830204963684, | |
| "learning_rate": 3.5636539498073794e-05, | |
| "loss": 1.587, | |
| "step": 11966 | |
| }, | |
| { | |
| "epoch": 0.43430412511086575, | |
| "grad_norm": 1.3706488609313965, | |
| "learning_rate": 3.556047751054378e-05, | |
| "loss": 1.5942, | |
| "step": 11997 | |
| }, | |
| { | |
| "epoch": 0.4354263579922892, | |
| "grad_norm": 1.450567364692688, | |
| "learning_rate": 3.548429634946039e-05, | |
| "loss": 1.6011, | |
| "step": 12028 | |
| }, | |
| { | |
| "epoch": 0.4365485908737126, | |
| "grad_norm": 1.4172272682189941, | |
| "learning_rate": 3.540799687451768e-05, | |
| "loss": 1.5726, | |
| "step": 12059 | |
| }, | |
| { | |
| "epoch": 0.43767082375513605, | |
| "grad_norm": 1.4156157970428467, | |
| "learning_rate": 3.533157994674485e-05, | |
| "loss": 1.5848, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.43879305663655943, | |
| "grad_norm": 1.3843419551849365, | |
| "learning_rate": 3.5255046428496546e-05, | |
| "loss": 1.5893, | |
| "step": 12121 | |
| }, | |
| { | |
| "epoch": 0.43991528951798287, | |
| "grad_norm": 1.43569815158844, | |
| "learning_rate": 3.517839718344311e-05, | |
| "loss": 1.5922, | |
| "step": 12152 | |
| }, | |
| { | |
| "epoch": 0.4410375223994063, | |
| "grad_norm": 1.4200314283370972, | |
| "learning_rate": 3.510163307656086e-05, | |
| "loss": 1.6047, | |
| "step": 12183 | |
| }, | |
| { | |
| "epoch": 0.44215975528082974, | |
| "grad_norm": 1.4956674575805664, | |
| "learning_rate": 3.5024754974122324e-05, | |
| "loss": 1.5802, | |
| "step": 12214 | |
| }, | |
| { | |
| "epoch": 0.44328198816225317, | |
| "grad_norm": 1.4289231300354004, | |
| "learning_rate": 3.494776374368643e-05, | |
| "loss": 1.6193, | |
| "step": 12245 | |
| }, | |
| { | |
| "epoch": 0.4444042210436766, | |
| "grad_norm": 1.389282464981079, | |
| "learning_rate": 3.4870660254088724e-05, | |
| "loss": 1.5977, | |
| "step": 12276 | |
| }, | |
| { | |
| "epoch": 0.4455264539251, | |
| "grad_norm": 1.4207974672317505, | |
| "learning_rate": 3.479344537543164e-05, | |
| "loss": 1.5789, | |
| "step": 12307 | |
| }, | |
| { | |
| "epoch": 0.4466486868065234, | |
| "grad_norm": 1.355353832244873, | |
| "learning_rate": 3.4716119979074565e-05, | |
| "loss": 1.5889, | |
| "step": 12338 | |
| }, | |
| { | |
| "epoch": 0.44777091968794686, | |
| "grad_norm": 1.3336408138275146, | |
| "learning_rate": 3.463868493762412e-05, | |
| "loss": 1.5865, | |
| "step": 12369 | |
| }, | |
| { | |
| "epoch": 0.4488931525693703, | |
| "grad_norm": 1.5265244245529175, | |
| "learning_rate": 3.456114112492418e-05, | |
| "loss": 1.5993, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.4500153854507937, | |
| "grad_norm": 1.4629555940628052, | |
| "learning_rate": 3.4483489416046164e-05, | |
| "loss": 1.5982, | |
| "step": 12431 | |
| }, | |
| { | |
| "epoch": 0.45113761833221716, | |
| "grad_norm": 1.43988835811615, | |
| "learning_rate": 3.440573068727905e-05, | |
| "loss": 1.5816, | |
| "step": 12462 | |
| }, | |
| { | |
| "epoch": 0.45225985121364054, | |
| "grad_norm": 1.4607633352279663, | |
| "learning_rate": 3.4327865816119495e-05, | |
| "loss": 1.571, | |
| "step": 12493 | |
| }, | |
| { | |
| "epoch": 0.453382084095064, | |
| "grad_norm": 1.3664649724960327, | |
| "learning_rate": 3.4249895681262025e-05, | |
| "loss": 1.5736, | |
| "step": 12524 | |
| }, | |
| { | |
| "epoch": 0.4545043169764874, | |
| "grad_norm": 1.436094880104065, | |
| "learning_rate": 3.417182116258899e-05, | |
| "loss": 1.5829, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 0.45562654985791085, | |
| "grad_norm": 1.3681309223175049, | |
| "learning_rate": 3.409364314116074e-05, | |
| "loss": 1.5938, | |
| "step": 12586 | |
| }, | |
| { | |
| "epoch": 0.4567487827393343, | |
| "grad_norm": 1.3929277658462524, | |
| "learning_rate": 3.401536249920559e-05, | |
| "loss": 1.572, | |
| "step": 12617 | |
| }, | |
| { | |
| "epoch": 0.4578710156207577, | |
| "grad_norm": 1.3980777263641357, | |
| "learning_rate": 3.393698012010998e-05, | |
| "loss": 1.5941, | |
| "step": 12648 | |
| }, | |
| { | |
| "epoch": 0.4589932485021811, | |
| "grad_norm": 1.4055850505828857, | |
| "learning_rate": 3.385849688840839e-05, | |
| "loss": 1.5818, | |
| "step": 12679 | |
| }, | |
| { | |
| "epoch": 0.46011548138360453, | |
| "grad_norm": 1.3678046464920044, | |
| "learning_rate": 3.3779913689773414e-05, | |
| "loss": 1.5759, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 0.46123771426502796, | |
| "grad_norm": 1.468201994895935, | |
| "learning_rate": 3.370123141100578e-05, | |
| "loss": 1.5792, | |
| "step": 12741 | |
| }, | |
| { | |
| "epoch": 0.4623599471464514, | |
| "grad_norm": 1.346614122390747, | |
| "learning_rate": 3.3622450940024305e-05, | |
| "loss": 1.5983, | |
| "step": 12772 | |
| }, | |
| { | |
| "epoch": 0.46348218002787483, | |
| "grad_norm": 1.3895704746246338, | |
| "learning_rate": 3.35435731658559e-05, | |
| "loss": 1.5809, | |
| "step": 12803 | |
| }, | |
| { | |
| "epoch": 0.46460441290929827, | |
| "grad_norm": 1.3664804697036743, | |
| "learning_rate": 3.346459897862552e-05, | |
| "loss": 1.5788, | |
| "step": 12834 | |
| }, | |
| { | |
| "epoch": 0.46572664579072165, | |
| "grad_norm": 1.4561264514923096, | |
| "learning_rate": 3.338552926954613e-05, | |
| "loss": 1.5867, | |
| "step": 12865 | |
| }, | |
| { | |
| "epoch": 0.4668488786721451, | |
| "grad_norm": 1.3407316207885742, | |
| "learning_rate": 3.330636493090868e-05, | |
| "loss": 1.5729, | |
| "step": 12896 | |
| }, | |
| { | |
| "epoch": 0.4679711115535685, | |
| "grad_norm": 1.3465179204940796, | |
| "learning_rate": 3.322710685607193e-05, | |
| "loss": 1.5915, | |
| "step": 12927 | |
| }, | |
| { | |
| "epoch": 0.46909334443499195, | |
| "grad_norm": 1.553585171699524, | |
| "learning_rate": 3.314775593945251e-05, | |
| "loss": 1.5875, | |
| "step": 12958 | |
| }, | |
| { | |
| "epoch": 0.4702155773164154, | |
| "grad_norm": 1.3964170217514038, | |
| "learning_rate": 3.3068313076514714e-05, | |
| "loss": 1.5783, | |
| "step": 12989 | |
| }, | |
| { | |
| "epoch": 0.47133781019783877, | |
| "grad_norm": 1.3884953260421753, | |
| "learning_rate": 3.298877916376047e-05, | |
| "loss": 1.5577, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.4724600430792622, | |
| "grad_norm": 1.3421337604522705, | |
| "learning_rate": 3.290915509871915e-05, | |
| "loss": 1.5791, | |
| "step": 13051 | |
| }, | |
| { | |
| "epoch": 0.47358227596068564, | |
| "grad_norm": 1.297429084777832, | |
| "learning_rate": 3.282944177993753e-05, | |
| "loss": 1.5699, | |
| "step": 13082 | |
| }, | |
| { | |
| "epoch": 0.4747045088421091, | |
| "grad_norm": 1.3672280311584473, | |
| "learning_rate": 3.274964010696957e-05, | |
| "loss": 1.5711, | |
| "step": 13113 | |
| }, | |
| { | |
| "epoch": 0.4758267417235325, | |
| "grad_norm": 1.4202091693878174, | |
| "learning_rate": 3.266975098036629e-05, | |
| "loss": 1.5679, | |
| "step": 13144 | |
| }, | |
| { | |
| "epoch": 0.47694897460495594, | |
| "grad_norm": 1.383973479270935, | |
| "learning_rate": 3.258977530166562e-05, | |
| "loss": 1.6019, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 0.4780712074863793, | |
| "grad_norm": 1.3134119510650635, | |
| "learning_rate": 3.250971397338227e-05, | |
| "loss": 1.5721, | |
| "step": 13206 | |
| }, | |
| { | |
| "epoch": 0.47919344036780276, | |
| "grad_norm": 1.3229272365570068, | |
| "learning_rate": 3.2429567898997404e-05, | |
| "loss": 1.5812, | |
| "step": 13237 | |
| }, | |
| { | |
| "epoch": 0.4803156732492262, | |
| "grad_norm": 1.2991341352462769, | |
| "learning_rate": 3.234933798294859e-05, | |
| "loss": 1.5793, | |
| "step": 13268 | |
| }, | |
| { | |
| "epoch": 0.48143790613064963, | |
| "grad_norm": 1.384522795677185, | |
| "learning_rate": 3.2269025130619535e-05, | |
| "loss": 1.5592, | |
| "step": 13299 | |
| }, | |
| { | |
| "epoch": 0.48256013901207306, | |
| "grad_norm": 1.3743617534637451, | |
| "learning_rate": 3.218863024832985e-05, | |
| "loss": 1.5785, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 0.4836823718934965, | |
| "grad_norm": 1.4512649774551392, | |
| "learning_rate": 3.2108154243324864e-05, | |
| "loss": 1.5703, | |
| "step": 13361 | |
| }, | |
| { | |
| "epoch": 0.4848046047749199, | |
| "grad_norm": 1.2982932329177856, | |
| "learning_rate": 3.2027598023765345e-05, | |
| "loss": 1.5609, | |
| "step": 13392 | |
| }, | |
| { | |
| "epoch": 0.4859268376563433, | |
| "grad_norm": 1.3747495412826538, | |
| "learning_rate": 3.194696249871729e-05, | |
| "loss": 1.5766, | |
| "step": 13423 | |
| }, | |
| { | |
| "epoch": 0.48704907053776675, | |
| "grad_norm": 1.3155137300491333, | |
| "learning_rate": 3.186624857814164e-05, | |
| "loss": 1.57, | |
| "step": 13454 | |
| }, | |
| { | |
| "epoch": 0.4881713034191902, | |
| "grad_norm": 1.4094924926757812, | |
| "learning_rate": 3.178545717288401e-05, | |
| "loss": 1.5855, | |
| "step": 13485 | |
| }, | |
| { | |
| "epoch": 0.4892935363006136, | |
| "grad_norm": 1.3931294679641724, | |
| "learning_rate": 3.170458919466444e-05, | |
| "loss": 1.5486, | |
| "step": 13516 | |
| }, | |
| { | |
| "epoch": 0.49041576918203705, | |
| "grad_norm": 1.48263418674469, | |
| "learning_rate": 3.1623645556067063e-05, | |
| "loss": 1.5829, | |
| "step": 13547 | |
| }, | |
| { | |
| "epoch": 0.49153800206346043, | |
| "grad_norm": 1.3016873598098755, | |
| "learning_rate": 3.154262717052985e-05, | |
| "loss": 1.5808, | |
| "step": 13578 | |
| }, | |
| { | |
| "epoch": 0.49266023494488387, | |
| "grad_norm": 1.623724102973938, | |
| "learning_rate": 3.146153495233426e-05, | |
| "loss": 1.5582, | |
| "step": 13609 | |
| }, | |
| { | |
| "epoch": 0.4937824678263073, | |
| "grad_norm": 1.3603851795196533, | |
| "learning_rate": 3.1380369816594944e-05, | |
| "loss": 1.5703, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 0.49490470070773074, | |
| "grad_norm": 1.4793063402175903, | |
| "learning_rate": 3.129913267924946e-05, | |
| "loss": 1.5739, | |
| "step": 13671 | |
| }, | |
| { | |
| "epoch": 0.4960269335891542, | |
| "grad_norm": 1.4615710973739624, | |
| "learning_rate": 3.121782445704782e-05, | |
| "loss": 1.5846, | |
| "step": 13702 | |
| }, | |
| { | |
| "epoch": 0.4971491664705776, | |
| "grad_norm": 1.419823408126831, | |
| "learning_rate": 3.11364460675423e-05, | |
| "loss": 1.5702, | |
| "step": 13733 | |
| }, | |
| { | |
| "epoch": 0.498271399352001, | |
| "grad_norm": 1.429337501525879, | |
| "learning_rate": 3.1054998429076934e-05, | |
| "loss": 1.5825, | |
| "step": 13764 | |
| }, | |
| { | |
| "epoch": 0.4993936322334244, | |
| "grad_norm": 1.3171850442886353, | |
| "learning_rate": 3.097348246077728e-05, | |
| "loss": 1.5721, | |
| "step": 13795 | |
| }, | |
| { | |
| "epoch": 0.5005158651148479, | |
| "grad_norm": 1.487111210823059, | |
| "learning_rate": 3.0891899082539924e-05, | |
| "loss": 1.5879, | |
| "step": 13826 | |
| }, | |
| { | |
| "epoch": 0.5016380979962712, | |
| "grad_norm": 1.4311749935150146, | |
| "learning_rate": 3.0810249215022233e-05, | |
| "loss": 1.5843, | |
| "step": 13857 | |
| }, | |
| { | |
| "epoch": 0.5027603308776947, | |
| "grad_norm": 1.468863844871521, | |
| "learning_rate": 3.0728533779631865e-05, | |
| "loss": 1.5884, | |
| "step": 13888 | |
| }, | |
| { | |
| "epoch": 0.5038825637591181, | |
| "grad_norm": 1.3970764875411987, | |
| "learning_rate": 3.064675369851637e-05, | |
| "loss": 1.5769, | |
| "step": 13919 | |
| }, | |
| { | |
| "epoch": 0.5050047966405415, | |
| "grad_norm": 1.3623278141021729, | |
| "learning_rate": 3.056490989455289e-05, | |
| "loss": 1.5706, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.506127029521965, | |
| "grad_norm": 1.3077219724655151, | |
| "learning_rate": 3.0483003291337596e-05, | |
| "loss": 1.5761, | |
| "step": 13981 | |
| }, | |
| { | |
| "epoch": 0.5072492624033884, | |
| "grad_norm": 1.3295941352844238, | |
| "learning_rate": 3.040103481317539e-05, | |
| "loss": 1.5776, | |
| "step": 14012 | |
| }, | |
| { | |
| "epoch": 0.5083714952848118, | |
| "grad_norm": 1.3900631666183472, | |
| "learning_rate": 3.03190053850694e-05, | |
| "loss": 1.5777, | |
| "step": 14043 | |
| }, | |
| { | |
| "epoch": 0.5094937281662353, | |
| "grad_norm": 1.3359615802764893, | |
| "learning_rate": 3.0236915932710573e-05, | |
| "loss": 1.5569, | |
| "step": 14074 | |
| }, | |
| { | |
| "epoch": 0.5106159610476587, | |
| "grad_norm": 1.2790296077728271, | |
| "learning_rate": 3.0154767382467232e-05, | |
| "loss": 1.5598, | |
| "step": 14105 | |
| }, | |
| { | |
| "epoch": 0.5117381939290822, | |
| "grad_norm": 1.5767478942871094, | |
| "learning_rate": 3.0072560661374582e-05, | |
| "loss": 1.5483, | |
| "step": 14136 | |
| }, | |
| { | |
| "epoch": 0.5128604268105056, | |
| "grad_norm": 1.343381404876709, | |
| "learning_rate": 2.999029669712431e-05, | |
| "loss": 1.5689, | |
| "step": 14167 | |
| }, | |
| { | |
| "epoch": 0.513982659691929, | |
| "grad_norm": 1.4147651195526123, | |
| "learning_rate": 2.990797641805408e-05, | |
| "loss": 1.5643, | |
| "step": 14198 | |
| }, | |
| { | |
| "epoch": 0.5151048925733523, | |
| "grad_norm": 1.3360931873321533, | |
| "learning_rate": 2.982560075313704e-05, | |
| "loss": 1.5689, | |
| "step": 14229 | |
| }, | |
| { | |
| "epoch": 0.5162271254547758, | |
| "grad_norm": 1.458016037940979, | |
| "learning_rate": 2.9743170631971368e-05, | |
| "loss": 1.5633, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 0.5173493583361992, | |
| "grad_norm": 1.430955171585083, | |
| "learning_rate": 2.9660686984769792e-05, | |
| "loss": 1.5559, | |
| "step": 14291 | |
| }, | |
| { | |
| "epoch": 0.5184715912176227, | |
| "grad_norm": 1.3806464672088623, | |
| "learning_rate": 2.9578150742349047e-05, | |
| "loss": 1.577, | |
| "step": 14322 | |
| }, | |
| { | |
| "epoch": 0.5195938240990461, | |
| "grad_norm": 1.359813928604126, | |
| "learning_rate": 2.949556283611942e-05, | |
| "loss": 1.5485, | |
| "step": 14353 | |
| }, | |
| { | |
| "epoch": 0.5207160569804695, | |
| "grad_norm": 1.4222601652145386, | |
| "learning_rate": 2.9412924198074206e-05, | |
| "loss": 1.575, | |
| "step": 14384 | |
| }, | |
| { | |
| "epoch": 0.521838289861893, | |
| "grad_norm": 1.3186180591583252, | |
| "learning_rate": 2.9330235760779208e-05, | |
| "loss": 1.5744, | |
| "step": 14415 | |
| }, | |
| { | |
| "epoch": 0.5229605227433164, | |
| "grad_norm": 1.3309999704360962, | |
| "learning_rate": 2.9247498457362188e-05, | |
| "loss": 1.5664, | |
| "step": 14446 | |
| }, | |
| { | |
| "epoch": 0.5240827556247398, | |
| "grad_norm": 1.368514060974121, | |
| "learning_rate": 2.9164713221502373e-05, | |
| "loss": 1.56, | |
| "step": 14477 | |
| }, | |
| { | |
| "epoch": 0.5252049885061633, | |
| "grad_norm": 1.3132268190383911, | |
| "learning_rate": 2.9081880987419912e-05, | |
| "loss": 1.563, | |
| "step": 14508 | |
| }, | |
| { | |
| "epoch": 0.5263272213875867, | |
| "grad_norm": 1.431347131729126, | |
| "learning_rate": 2.8999002689865296e-05, | |
| "loss": 1.5612, | |
| "step": 14539 | |
| }, | |
| { | |
| "epoch": 0.5274494542690101, | |
| "grad_norm": 1.303941249847412, | |
| "learning_rate": 2.8916079264108852e-05, | |
| "loss": 1.5601, | |
| "step": 14570 | |
| }, | |
| { | |
| "epoch": 0.5285716871504335, | |
| "grad_norm": 1.4077236652374268, | |
| "learning_rate": 2.883311164593017e-05, | |
| "loss": 1.5516, | |
| "step": 14601 | |
| }, | |
| { | |
| "epoch": 0.5296939200318569, | |
| "grad_norm": 1.3132708072662354, | |
| "learning_rate": 2.875010077160754e-05, | |
| "loss": 1.5538, | |
| "step": 14632 | |
| }, | |
| { | |
| "epoch": 0.5308161529132803, | |
| "grad_norm": 1.2660679817199707, | |
| "learning_rate": 2.866704757790741e-05, | |
| "loss": 1.5652, | |
| "step": 14663 | |
| }, | |
| { | |
| "epoch": 0.5319383857947038, | |
| "grad_norm": 1.4541290998458862, | |
| "learning_rate": 2.858395300207376e-05, | |
| "loss": 1.5602, | |
| "step": 14694 | |
| }, | |
| { | |
| "epoch": 0.5330606186761272, | |
| "grad_norm": 1.3694487810134888, | |
| "learning_rate": 2.8500817981817607e-05, | |
| "loss": 1.5483, | |
| "step": 14725 | |
| }, | |
| { | |
| "epoch": 0.5341828515575506, | |
| "grad_norm": 1.3493553400039673, | |
| "learning_rate": 2.8417643455306336e-05, | |
| "loss": 1.5539, | |
| "step": 14756 | |
| }, | |
| { | |
| "epoch": 0.5353050844389741, | |
| "grad_norm": 1.4280232191085815, | |
| "learning_rate": 2.8334430361153185e-05, | |
| "loss": 1.5672, | |
| "step": 14787 | |
| }, | |
| { | |
| "epoch": 0.5364273173203975, | |
| "grad_norm": 1.3430079221725464, | |
| "learning_rate": 2.8251179638406612e-05, | |
| "loss": 1.5474, | |
| "step": 14818 | |
| }, | |
| { | |
| "epoch": 0.5375495502018209, | |
| "grad_norm": 1.3380746841430664, | |
| "learning_rate": 2.8167892226539704e-05, | |
| "loss": 1.5508, | |
| "step": 14849 | |
| }, | |
| { | |
| "epoch": 0.5386717830832444, | |
| "grad_norm": 1.3501845598220825, | |
| "learning_rate": 2.8084569065439588e-05, | |
| "loss": 1.5656, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 0.5397940159646678, | |
| "grad_norm": 1.3564043045043945, | |
| "learning_rate": 2.8001211095396807e-05, | |
| "loss": 1.5726, | |
| "step": 14911 | |
| }, | |
| { | |
| "epoch": 0.5409162488460912, | |
| "grad_norm": 1.3949267864227295, | |
| "learning_rate": 2.791781925709473e-05, | |
| "loss": 1.5635, | |
| "step": 14942 | |
| }, | |
| { | |
| "epoch": 0.5420384817275146, | |
| "grad_norm": 1.4317481517791748, | |
| "learning_rate": 2.7834394491598908e-05, | |
| "loss": 1.5447, | |
| "step": 14973 | |
| }, | |
| { | |
| "epoch": 0.543160714608938, | |
| "grad_norm": 1.396610140800476, | |
| "learning_rate": 2.7750937740346485e-05, | |
| "loss": 1.557, | |
| "step": 15004 | |
| }, | |
| { | |
| "epoch": 0.5442829474903614, | |
| "grad_norm": 1.369884967803955, | |
| "learning_rate": 2.7667449945135564e-05, | |
| "loss": 1.5672, | |
| "step": 15035 | |
| }, | |
| { | |
| "epoch": 0.5454051803717849, | |
| "grad_norm": 1.4686237573623657, | |
| "learning_rate": 2.7583932048114557e-05, | |
| "loss": 1.572, | |
| "step": 15066 | |
| }, | |
| { | |
| "epoch": 0.5465274132532083, | |
| "grad_norm": 1.524717926979065, | |
| "learning_rate": 2.7500384991771587e-05, | |
| "loss": 1.5537, | |
| "step": 15097 | |
| }, | |
| { | |
| "epoch": 0.5476496461346317, | |
| "grad_norm": 1.3461147546768188, | |
| "learning_rate": 2.7416809718923825e-05, | |
| "loss": 1.5321, | |
| "step": 15128 | |
| }, | |
| { | |
| "epoch": 0.5487718790160552, | |
| "grad_norm": 1.3704477548599243, | |
| "learning_rate": 2.7333207172706864e-05, | |
| "loss": 1.5677, | |
| "step": 15159 | |
| }, | |
| { | |
| "epoch": 0.5498941118974786, | |
| "grad_norm": 1.3601664304733276, | |
| "learning_rate": 2.7249578296564088e-05, | |
| "loss": 1.5577, | |
| "step": 15190 | |
| }, | |
| { | |
| "epoch": 0.551016344778902, | |
| "grad_norm": 1.4055489301681519, | |
| "learning_rate": 2.7165924034235973e-05, | |
| "loss": 1.5453, | |
| "step": 15221 | |
| }, | |
| { | |
| "epoch": 0.5521385776603255, | |
| "grad_norm": 1.3587946891784668, | |
| "learning_rate": 2.708224532974953e-05, | |
| "loss": 1.5401, | |
| "step": 15252 | |
| }, | |
| { | |
| "epoch": 0.5532608105417489, | |
| "grad_norm": 1.3209632635116577, | |
| "learning_rate": 2.6998543127407538e-05, | |
| "loss": 1.5383, | |
| "step": 15283 | |
| }, | |
| { | |
| "epoch": 0.5543830434231724, | |
| "grad_norm": 1.294921636581421, | |
| "learning_rate": 2.6914818371777988e-05, | |
| "loss": 1.5734, | |
| "step": 15314 | |
| }, | |
| { | |
| "epoch": 0.5555052763045957, | |
| "grad_norm": 1.6017462015151978, | |
| "learning_rate": 2.6831072007683373e-05, | |
| "loss": 1.5702, | |
| "step": 15345 | |
| }, | |
| { | |
| "epoch": 0.5566275091860191, | |
| "grad_norm": 1.3644670248031616, | |
| "learning_rate": 2.6747304980190018e-05, | |
| "loss": 1.571, | |
| "step": 15376 | |
| }, | |
| { | |
| "epoch": 0.5577497420674425, | |
| "grad_norm": 1.3694461584091187, | |
| "learning_rate": 2.6663518234597453e-05, | |
| "loss": 1.5398, | |
| "step": 15407 | |
| }, | |
| { | |
| "epoch": 0.558871974948866, | |
| "grad_norm": 1.3380069732666016, | |
| "learning_rate": 2.6579712716427696e-05, | |
| "loss": 1.5628, | |
| "step": 15438 | |
| }, | |
| { | |
| "epoch": 0.5599942078302894, | |
| "grad_norm": 1.322144627571106, | |
| "learning_rate": 2.6495889371414652e-05, | |
| "loss": 1.5682, | |
| "step": 15469 | |
| }, | |
| { | |
| "epoch": 0.5611164407117128, | |
| "grad_norm": 1.3240221738815308, | |
| "learning_rate": 2.6412049145493367e-05, | |
| "loss": 1.5506, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5622386735931363, | |
| "grad_norm": 1.3131070137023926, | |
| "learning_rate": 2.632819298478939e-05, | |
| "loss": 1.5529, | |
| "step": 15531 | |
| }, | |
| { | |
| "epoch": 0.5633609064745597, | |
| "grad_norm": 1.3907220363616943, | |
| "learning_rate": 2.6244321835608105e-05, | |
| "loss": 1.547, | |
| "step": 15562 | |
| }, | |
| { | |
| "epoch": 0.5644831393559832, | |
| "grad_norm": 1.233981966972351, | |
| "learning_rate": 2.6160436644424024e-05, | |
| "loss": 1.5377, | |
| "step": 15593 | |
| }, | |
| { | |
| "epoch": 0.5656053722374066, | |
| "grad_norm": 1.443326711654663, | |
| "learning_rate": 2.6076538357870133e-05, | |
| "loss": 1.5788, | |
| "step": 15624 | |
| }, | |
| { | |
| "epoch": 0.56672760511883, | |
| "grad_norm": 1.4688999652862549, | |
| "learning_rate": 2.5992627922727196e-05, | |
| "loss": 1.5629, | |
| "step": 15655 | |
| }, | |
| { | |
| "epoch": 0.5678498380002535, | |
| "grad_norm": 1.3365731239318848, | |
| "learning_rate": 2.5908706285913066e-05, | |
| "loss": 1.5544, | |
| "step": 15686 | |
| }, | |
| { | |
| "epoch": 0.5689720708816768, | |
| "grad_norm": 1.3793649673461914, | |
| "learning_rate": 2.5824774394472008e-05, | |
| "loss": 1.5317, | |
| "step": 15717 | |
| }, | |
| { | |
| "epoch": 0.5700943037631002, | |
| "grad_norm": 1.417433738708496, | |
| "learning_rate": 2.5740833195563996e-05, | |
| "loss": 1.5506, | |
| "step": 15748 | |
| }, | |
| { | |
| "epoch": 0.5712165366445237, | |
| "grad_norm": 1.346710443496704, | |
| "learning_rate": 2.5656883636454067e-05, | |
| "loss": 1.5462, | |
| "step": 15779 | |
| }, | |
| { | |
| "epoch": 0.5723387695259471, | |
| "grad_norm": 1.4065468311309814, | |
| "learning_rate": 2.557292666450159e-05, | |
| "loss": 1.5464, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 0.5734610024073705, | |
| "grad_norm": 1.3797588348388672, | |
| "learning_rate": 2.5488963227149566e-05, | |
| "loss": 1.565, | |
| "step": 15841 | |
| }, | |
| { | |
| "epoch": 0.574583235288794, | |
| "grad_norm": 1.2842196226119995, | |
| "learning_rate": 2.5404994271913983e-05, | |
| "loss": 1.5489, | |
| "step": 15872 | |
| }, | |
| { | |
| "epoch": 0.5757054681702174, | |
| "grad_norm": 1.368696689605713, | |
| "learning_rate": 2.5321020746373085e-05, | |
| "loss": 1.5358, | |
| "step": 15903 | |
| }, | |
| { | |
| "epoch": 0.5768277010516408, | |
| "grad_norm": 1.3306961059570312, | |
| "learning_rate": 2.52370435981567e-05, | |
| "loss": 1.541, | |
| "step": 15934 | |
| }, | |
| { | |
| "epoch": 0.5779499339330643, | |
| "grad_norm": 1.286727786064148, | |
| "learning_rate": 2.5153063774935533e-05, | |
| "loss": 1.533, | |
| "step": 15965 | |
| }, | |
| { | |
| "epoch": 0.5790721668144877, | |
| "grad_norm": 1.434964656829834, | |
| "learning_rate": 2.506908222441045e-05, | |
| "loss": 1.5404, | |
| "step": 15996 | |
| }, | |
| { | |
| "epoch": 0.5801943996959111, | |
| "grad_norm": 1.3955284357070923, | |
| "learning_rate": 2.498509989430187e-05, | |
| "loss": 1.5532, | |
| "step": 16027 | |
| }, | |
| { | |
| "epoch": 0.5813166325773346, | |
| "grad_norm": 1.3676408529281616, | |
| "learning_rate": 2.4901117732338958e-05, | |
| "loss": 1.5263, | |
| "step": 16058 | |
| }, | |
| { | |
| "epoch": 0.5824388654587579, | |
| "grad_norm": 1.3900113105773926, | |
| "learning_rate": 2.481713668624899e-05, | |
| "loss": 1.5465, | |
| "step": 16089 | |
| }, | |
| { | |
| "epoch": 0.5835610983401813, | |
| "grad_norm": 1.3808554410934448, | |
| "learning_rate": 2.4733157703746663e-05, | |
| "loss": 1.5332, | |
| "step": 16120 | |
| }, | |
| { | |
| "epoch": 0.5846833312216048, | |
| "grad_norm": 1.2974086999893188, | |
| "learning_rate": 2.4649181732523392e-05, | |
| "loss": 1.5562, | |
| "step": 16151 | |
| }, | |
| { | |
| "epoch": 0.5858055641030282, | |
| "grad_norm": 1.4109300374984741, | |
| "learning_rate": 2.4565209720236582e-05, | |
| "loss": 1.5273, | |
| "step": 16182 | |
| }, | |
| { | |
| "epoch": 0.5869277969844516, | |
| "grad_norm": 1.3626701831817627, | |
| "learning_rate": 2.4481242614498975e-05, | |
| "loss": 1.5311, | |
| "step": 16213 | |
| }, | |
| { | |
| "epoch": 0.5880500298658751, | |
| "grad_norm": 1.3017241954803467, | |
| "learning_rate": 2.439728136286796e-05, | |
| "loss": 1.5522, | |
| "step": 16244 | |
| }, | |
| { | |
| "epoch": 0.5891722627472985, | |
| "grad_norm": 1.349171757698059, | |
| "learning_rate": 2.4313326912834852e-05, | |
| "loss": 1.5262, | |
| "step": 16275 | |
| }, | |
| { | |
| "epoch": 0.5902944956287219, | |
| "grad_norm": 1.3548376560211182, | |
| "learning_rate": 2.4229380211814206e-05, | |
| "loss": 1.5455, | |
| "step": 16306 | |
| }, | |
| { | |
| "epoch": 0.5914167285101454, | |
| "grad_norm": 1.412003755569458, | |
| "learning_rate": 2.4145442207133124e-05, | |
| "loss": 1.5634, | |
| "step": 16337 | |
| }, | |
| { | |
| "epoch": 0.5925389613915688, | |
| "grad_norm": 1.3400499820709229, | |
| "learning_rate": 2.406151384602059e-05, | |
| "loss": 1.5398, | |
| "step": 16368 | |
| }, | |
| { | |
| "epoch": 0.5936611942729922, | |
| "grad_norm": 1.3035651445388794, | |
| "learning_rate": 2.3977596075596747e-05, | |
| "loss": 1.5289, | |
| "step": 16399 | |
| }, | |
| { | |
| "epoch": 0.5947834271544157, | |
| "grad_norm": 1.322824478149414, | |
| "learning_rate": 2.3893689842862223e-05, | |
| "loss": 1.5509, | |
| "step": 16430 | |
| }, | |
| { | |
| "epoch": 0.595905660035839, | |
| "grad_norm": 1.3810386657714844, | |
| "learning_rate": 2.3809796094687475e-05, | |
| "loss": 1.5439, | |
| "step": 16461 | |
| }, | |
| { | |
| "epoch": 0.5970278929172624, | |
| "grad_norm": 1.399760127067566, | |
| "learning_rate": 2.372591577780202e-05, | |
| "loss": 1.5459, | |
| "step": 16492 | |
| }, | |
| { | |
| "epoch": 0.5981501257986859, | |
| "grad_norm": 1.3253116607666016, | |
| "learning_rate": 2.3642049838783838e-05, | |
| "loss": 1.5556, | |
| "step": 16523 | |
| }, | |
| { | |
| "epoch": 0.5992723586801093, | |
| "grad_norm": 1.3376234769821167, | |
| "learning_rate": 2.3558199224048666e-05, | |
| "loss": 1.5322, | |
| "step": 16554 | |
| }, | |
| { | |
| "epoch": 0.6003945915615327, | |
| "grad_norm": 1.274533748626709, | |
| "learning_rate": 2.347436487983929e-05, | |
| "loss": 1.5288, | |
| "step": 16585 | |
| }, | |
| { | |
| "epoch": 0.6015168244429562, | |
| "grad_norm": 1.3756400346755981, | |
| "learning_rate": 2.3390547752214888e-05, | |
| "loss": 1.5287, | |
| "step": 16616 | |
| }, | |
| { | |
| "epoch": 0.6026390573243796, | |
| "grad_norm": 1.391845941543579, | |
| "learning_rate": 2.330674878704035e-05, | |
| "loss": 1.5329, | |
| "step": 16647 | |
| }, | |
| { | |
| "epoch": 0.603761290205803, | |
| "grad_norm": 1.414237380027771, | |
| "learning_rate": 2.322296892997561e-05, | |
| "loss": 1.5482, | |
| "step": 16678 | |
| }, | |
| { | |
| "epoch": 0.6048835230872265, | |
| "grad_norm": 1.3953816890716553, | |
| "learning_rate": 2.313920912646497e-05, | |
| "loss": 1.5372, | |
| "step": 16709 | |
| }, | |
| { | |
| "epoch": 0.6060057559686499, | |
| "grad_norm": 1.3669557571411133, | |
| "learning_rate": 2.305547032172643e-05, | |
| "loss": 1.5522, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 0.6071279888500734, | |
| "grad_norm": 1.3847616910934448, | |
| "learning_rate": 2.2971753460741014e-05, | |
| "loss": 1.5314, | |
| "step": 16771 | |
| }, | |
| { | |
| "epoch": 0.6082502217314968, | |
| "grad_norm": 1.2923661470413208, | |
| "learning_rate": 2.288805948824212e-05, | |
| "loss": 1.5434, | |
| "step": 16802 | |
| }, | |
| { | |
| "epoch": 0.6093724546129201, | |
| "grad_norm": 1.3146955966949463, | |
| "learning_rate": 2.2804389348704858e-05, | |
| "loss": 1.5442, | |
| "step": 16833 | |
| }, | |
| { | |
| "epoch": 0.6104946874943435, | |
| "grad_norm": 1.362166166305542, | |
| "learning_rate": 2.2720743986335374e-05, | |
| "loss": 1.546, | |
| "step": 16864 | |
| }, | |
| { | |
| "epoch": 0.611616920375767, | |
| "grad_norm": 1.3853099346160889, | |
| "learning_rate": 2.2637124345060233e-05, | |
| "loss": 1.5385, | |
| "step": 16895 | |
| }, | |
| { | |
| "epoch": 0.6127391532571904, | |
| "grad_norm": 1.3611940145492554, | |
| "learning_rate": 2.2553531368515695e-05, | |
| "loss": 1.5577, | |
| "step": 16926 | |
| }, | |
| { | |
| "epoch": 0.6138613861386139, | |
| "grad_norm": 1.3302477598190308, | |
| "learning_rate": 2.2469966000037144e-05, | |
| "loss": 1.5566, | |
| "step": 16957 | |
| }, | |
| { | |
| "epoch": 0.6149836190200373, | |
| "grad_norm": 1.3969210386276245, | |
| "learning_rate": 2.2386429182648417e-05, | |
| "loss": 1.5459, | |
| "step": 16988 | |
| }, | |
| { | |
| "epoch": 0.6161058519014607, | |
| "grad_norm": 1.3878018856048584, | |
| "learning_rate": 2.230292185905114e-05, | |
| "loss": 1.5295, | |
| "step": 17019 | |
| }, | |
| { | |
| "epoch": 0.6172280847828842, | |
| "grad_norm": 1.3366162776947021, | |
| "learning_rate": 2.2219444971614116e-05, | |
| "loss": 1.5485, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.6183503176643076, | |
| "grad_norm": 1.3503491878509521, | |
| "learning_rate": 2.2135999462362655e-05, | |
| "loss": 1.5266, | |
| "step": 17081 | |
| }, | |
| { | |
| "epoch": 0.619472550545731, | |
| "grad_norm": 1.3379223346710205, | |
| "learning_rate": 2.2052586272968003e-05, | |
| "loss": 1.5366, | |
| "step": 17112 | |
| }, | |
| { | |
| "epoch": 0.6205947834271545, | |
| "grad_norm": 1.299849033355713, | |
| "learning_rate": 2.196920634473666e-05, | |
| "loss": 1.5315, | |
| "step": 17143 | |
| }, | |
| { | |
| "epoch": 0.6217170163085779, | |
| "grad_norm": 1.3590292930603027, | |
| "learning_rate": 2.1885860618599787e-05, | |
| "loss": 1.5332, | |
| "step": 17174 | |
| }, | |
| { | |
| "epoch": 0.6228392491900012, | |
| "grad_norm": 1.3150153160095215, | |
| "learning_rate": 2.1802550035102577e-05, | |
| "loss": 1.5197, | |
| "step": 17205 | |
| }, | |
| { | |
| "epoch": 0.6239614820714247, | |
| "grad_norm": 1.3216016292572021, | |
| "learning_rate": 2.171927553439363e-05, | |
| "loss": 1.5344, | |
| "step": 17236 | |
| }, | |
| { | |
| "epoch": 0.6250837149528481, | |
| "grad_norm": 1.3521660566329956, | |
| "learning_rate": 2.1636038056214376e-05, | |
| "loss": 1.5236, | |
| "step": 17267 | |
| }, | |
| { | |
| "epoch": 0.6262059478342715, | |
| "grad_norm": 1.4077104330062866, | |
| "learning_rate": 2.155283853988844e-05, | |
| "loss": 1.5318, | |
| "step": 17298 | |
| }, | |
| { | |
| "epoch": 0.627328180715695, | |
| "grad_norm": 1.4986066818237305, | |
| "learning_rate": 2.146967792431106e-05, | |
| "loss": 1.5466, | |
| "step": 17329 | |
| }, | |
| { | |
| "epoch": 0.6284504135971184, | |
| "grad_norm": 1.2227765321731567, | |
| "learning_rate": 2.138655714793849e-05, | |
| "loss": 1.5345, | |
| "step": 17360 | |
| }, | |
| { | |
| "epoch": 0.6295726464785418, | |
| "grad_norm": 1.3314886093139648, | |
| "learning_rate": 2.1303477148777367e-05, | |
| "loss": 1.5376, | |
| "step": 17391 | |
| }, | |
| { | |
| "epoch": 0.6306948793599653, | |
| "grad_norm": 1.3682267665863037, | |
| "learning_rate": 2.122043886437421e-05, | |
| "loss": 1.5313, | |
| "step": 17422 | |
| }, | |
| { | |
| "epoch": 0.6318171122413887, | |
| "grad_norm": 1.3226497173309326, | |
| "learning_rate": 2.1137443231804765e-05, | |
| "loss": 1.5361, | |
| "step": 17453 | |
| }, | |
| { | |
| "epoch": 0.6329393451228121, | |
| "grad_norm": 1.3603419065475464, | |
| "learning_rate": 2.105449118766347e-05, | |
| "loss": 1.5353, | |
| "step": 17484 | |
| }, | |
| { | |
| "epoch": 0.6340615780042356, | |
| "grad_norm": 1.3611435890197754, | |
| "learning_rate": 2.097158366805287e-05, | |
| "loss": 1.5449, | |
| "step": 17515 | |
| }, | |
| { | |
| "epoch": 0.6351838108856589, | |
| "grad_norm": 1.3318766355514526, | |
| "learning_rate": 2.0888721608573047e-05, | |
| "loss": 1.5194, | |
| "step": 17546 | |
| }, | |
| { | |
| "epoch": 0.6363060437670823, | |
| "grad_norm": 1.3144105672836304, | |
| "learning_rate": 2.0805905944311087e-05, | |
| "loss": 1.5288, | |
| "step": 17577 | |
| }, | |
| { | |
| "epoch": 0.6374282766485058, | |
| "grad_norm": 1.3346774578094482, | |
| "learning_rate": 2.0723137609830497e-05, | |
| "loss": 1.5278, | |
| "step": 17608 | |
| }, | |
| { | |
| "epoch": 0.6385505095299292, | |
| "grad_norm": 1.4217780828475952, | |
| "learning_rate": 2.0640417539160686e-05, | |
| "loss": 1.5467, | |
| "step": 17639 | |
| }, | |
| { | |
| "epoch": 0.6396727424113526, | |
| "grad_norm": 1.3335380554199219, | |
| "learning_rate": 2.0557746665786427e-05, | |
| "loss": 1.5506, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 0.6407949752927761, | |
| "grad_norm": 1.3793307542800903, | |
| "learning_rate": 2.0475125922637256e-05, | |
| "loss": 1.5172, | |
| "step": 17701 | |
| }, | |
| { | |
| "epoch": 0.6419172081741995, | |
| "grad_norm": 1.3435157537460327, | |
| "learning_rate": 2.0392556242077047e-05, | |
| "loss": 1.5137, | |
| "step": 17732 | |
| }, | |
| { | |
| "epoch": 0.6430394410556229, | |
| "grad_norm": 1.3066918849945068, | |
| "learning_rate": 2.031003855589343e-05, | |
| "loss": 1.5184, | |
| "step": 17763 | |
| }, | |
| { | |
| "epoch": 0.6441616739370464, | |
| "grad_norm": 1.4214332103729248, | |
| "learning_rate": 2.022757379528727e-05, | |
| "loss": 1.5239, | |
| "step": 17794 | |
| }, | |
| { | |
| "epoch": 0.6452839068184698, | |
| "grad_norm": 1.3571085929870605, | |
| "learning_rate": 2.0145162890862184e-05, | |
| "loss": 1.5234, | |
| "step": 17825 | |
| }, | |
| { | |
| "epoch": 0.6464061396998932, | |
| "grad_norm": 1.2680344581604004, | |
| "learning_rate": 2.0062806772614022e-05, | |
| "loss": 1.5207, | |
| "step": 17856 | |
| }, | |
| { | |
| "epoch": 0.6475283725813167, | |
| "grad_norm": 1.3365403413772583, | |
| "learning_rate": 1.9980506369920392e-05, | |
| "loss": 1.5457, | |
| "step": 17887 | |
| }, | |
| { | |
| "epoch": 0.64865060546274, | |
| "grad_norm": 1.3576997518539429, | |
| "learning_rate": 1.989826261153015e-05, | |
| "loss": 1.516, | |
| "step": 17918 | |
| }, | |
| { | |
| "epoch": 0.6497728383441634, | |
| "grad_norm": 1.3189170360565186, | |
| "learning_rate": 1.9816076425552923e-05, | |
| "loss": 1.5204, | |
| "step": 17949 | |
| }, | |
| { | |
| "epoch": 0.6508950712255869, | |
| "grad_norm": 1.2855075597763062, | |
| "learning_rate": 1.9733948739448676e-05, | |
| "loss": 1.5131, | |
| "step": 17980 | |
| }, | |
| { | |
| "epoch": 0.6520173041070103, | |
| "grad_norm": 1.3004227876663208, | |
| "learning_rate": 1.9651880480017155e-05, | |
| "loss": 1.5495, | |
| "step": 18011 | |
| }, | |
| { | |
| "epoch": 0.6531395369884337, | |
| "grad_norm": 1.3858931064605713, | |
| "learning_rate": 1.9569872573387516e-05, | |
| "loss": 1.529, | |
| "step": 18042 | |
| }, | |
| { | |
| "epoch": 0.6542617698698572, | |
| "grad_norm": 1.378490686416626, | |
| "learning_rate": 1.9487925945007854e-05, | |
| "loss": 1.5281, | |
| "step": 18073 | |
| }, | |
| { | |
| "epoch": 0.6553840027512806, | |
| "grad_norm": 1.317062258720398, | |
| "learning_rate": 1.9406041519634726e-05, | |
| "loss": 1.5294, | |
| "step": 18104 | |
| }, | |
| { | |
| "epoch": 0.656506235632704, | |
| "grad_norm": 1.313314437866211, | |
| "learning_rate": 1.932422022132275e-05, | |
| "loss": 1.5343, | |
| "step": 18135 | |
| }, | |
| { | |
| "epoch": 0.6576284685141275, | |
| "grad_norm": 1.3339669704437256, | |
| "learning_rate": 1.924246297341414e-05, | |
| "loss": 1.5203, | |
| "step": 18166 | |
| }, | |
| { | |
| "epoch": 0.6587507013955509, | |
| "grad_norm": 1.298256516456604, | |
| "learning_rate": 1.9160770698528338e-05, | |
| "loss": 1.5297, | |
| "step": 18197 | |
| }, | |
| { | |
| "epoch": 0.6598729342769744, | |
| "grad_norm": 1.322373628616333, | |
| "learning_rate": 1.907914431855156e-05, | |
| "loss": 1.5307, | |
| "step": 18228 | |
| }, | |
| { | |
| "epoch": 0.6609951671583978, | |
| "grad_norm": 1.403425931930542, | |
| "learning_rate": 1.8997584754626412e-05, | |
| "loss": 1.5279, | |
| "step": 18259 | |
| }, | |
| { | |
| "epoch": 0.6621174000398211, | |
| "grad_norm": 1.3005762100219727, | |
| "learning_rate": 1.8916092927141486e-05, | |
| "loss": 1.5325, | |
| "step": 18290 | |
| }, | |
| { | |
| "epoch": 0.6632396329212445, | |
| "grad_norm": 1.3655368089675903, | |
| "learning_rate": 1.883466975572098e-05, | |
| "loss": 1.54, | |
| "step": 18321 | |
| }, | |
| { | |
| "epoch": 0.664361865802668, | |
| "grad_norm": 1.376219391822815, | |
| "learning_rate": 1.8753316159214312e-05, | |
| "loss": 1.518, | |
| "step": 18352 | |
| }, | |
| { | |
| "epoch": 0.6654840986840914, | |
| "grad_norm": 1.3264917135238647, | |
| "learning_rate": 1.8672033055685766e-05, | |
| "loss": 1.5108, | |
| "step": 18383 | |
| }, | |
| { | |
| "epoch": 0.6666063315655149, | |
| "grad_norm": 1.4083831310272217, | |
| "learning_rate": 1.8590821362404116e-05, | |
| "loss": 1.5252, | |
| "step": 18414 | |
| }, | |
| { | |
| "epoch": 0.6677285644469383, | |
| "grad_norm": 1.302178978919983, | |
| "learning_rate": 1.8509681995832294e-05, | |
| "loss": 1.4972, | |
| "step": 18445 | |
| }, | |
| { | |
| "epoch": 0.6688507973283617, | |
| "grad_norm": 1.3290973901748657, | |
| "learning_rate": 1.8428615871617004e-05, | |
| "loss": 1.5343, | |
| "step": 18476 | |
| }, | |
| { | |
| "epoch": 0.6699730302097852, | |
| "grad_norm": 1.4198294878005981, | |
| "learning_rate": 1.8347623904578448e-05, | |
| "loss": 1.5272, | |
| "step": 18507 | |
| }, | |
| { | |
| "epoch": 0.6710952630912086, | |
| "grad_norm": 1.2832363843917847, | |
| "learning_rate": 1.8266707008699975e-05, | |
| "loss": 1.5351, | |
| "step": 18538 | |
| }, | |
| { | |
| "epoch": 0.672217495972632, | |
| "grad_norm": 1.367154836654663, | |
| "learning_rate": 1.818586609711774e-05, | |
| "loss": 1.5236, | |
| "step": 18569 | |
| }, | |
| { | |
| "epoch": 0.6733397288540555, | |
| "grad_norm": 1.3867367506027222, | |
| "learning_rate": 1.8105102082110462e-05, | |
| "loss": 1.5141, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6744619617354789, | |
| "grad_norm": 1.3272528648376465, | |
| "learning_rate": 1.8024415875089058e-05, | |
| "loss": 1.5459, | |
| "step": 18631 | |
| }, | |
| { | |
| "epoch": 0.6755841946169022, | |
| "grad_norm": 1.4012340307235718, | |
| "learning_rate": 1.7943808386586407e-05, | |
| "loss": 1.5082, | |
| "step": 18662 | |
| }, | |
| { | |
| "epoch": 0.6767064274983257, | |
| "grad_norm": 1.3309136629104614, | |
| "learning_rate": 1.7863280526247073e-05, | |
| "loss": 1.5207, | |
| "step": 18693 | |
| }, | |
| { | |
| "epoch": 0.6778286603797491, | |
| "grad_norm": 1.3469054698944092, | |
| "learning_rate": 1.7782833202817003e-05, | |
| "loss": 1.5301, | |
| "step": 18724 | |
| }, | |
| { | |
| "epoch": 0.6789508932611725, | |
| "grad_norm": 1.3786745071411133, | |
| "learning_rate": 1.7702467324133327e-05, | |
| "loss": 1.5236, | |
| "step": 18755 | |
| }, | |
| { | |
| "epoch": 0.680073126142596, | |
| "grad_norm": 1.3620835542678833, | |
| "learning_rate": 1.7622183797114042e-05, | |
| "loss": 1.5288, | |
| "step": 18786 | |
| }, | |
| { | |
| "epoch": 0.6811953590240194, | |
| "grad_norm": 1.3298254013061523, | |
| "learning_rate": 1.7541983527747838e-05, | |
| "loss": 1.5208, | |
| "step": 18817 | |
| }, | |
| { | |
| "epoch": 0.6823175919054428, | |
| "grad_norm": 1.2911970615386963, | |
| "learning_rate": 1.746186742108387e-05, | |
| "loss": 1.5172, | |
| "step": 18848 | |
| }, | |
| { | |
| "epoch": 0.6834398247868663, | |
| "grad_norm": 1.30719792842865, | |
| "learning_rate": 1.73818363812215e-05, | |
| "loss": 1.5206, | |
| "step": 18879 | |
| }, | |
| { | |
| "epoch": 0.6845620576682897, | |
| "grad_norm": 1.3682974576950073, | |
| "learning_rate": 1.7301891311300153e-05, | |
| "loss": 1.5126, | |
| "step": 18910 | |
| }, | |
| { | |
| "epoch": 0.6856842905497131, | |
| "grad_norm": 1.3172578811645508, | |
| "learning_rate": 1.7222033113489055e-05, | |
| "loss": 1.506, | |
| "step": 18941 | |
| }, | |
| { | |
| "epoch": 0.6868065234311366, | |
| "grad_norm": 1.3976131677627563, | |
| "learning_rate": 1.7142262688977127e-05, | |
| "loss": 1.5161, | |
| "step": 18972 | |
| }, | |
| { | |
| "epoch": 0.68792875631256, | |
| "grad_norm": 1.3834096193313599, | |
| "learning_rate": 1.7062580937962764e-05, | |
| "loss": 1.5156, | |
| "step": 19003 | |
| }, | |
| { | |
| "epoch": 0.6890509891939833, | |
| "grad_norm": 1.2939929962158203, | |
| "learning_rate": 1.698298875964369e-05, | |
| "loss": 1.5111, | |
| "step": 19034 | |
| }, | |
| { | |
| "epoch": 0.6901732220754068, | |
| "grad_norm": 1.416242241859436, | |
| "learning_rate": 1.690348705220684e-05, | |
| "loss": 1.5112, | |
| "step": 19065 | |
| }, | |
| { | |
| "epoch": 0.6912954549568302, | |
| "grad_norm": 1.4598749876022339, | |
| "learning_rate": 1.6824076712818156e-05, | |
| "loss": 1.5074, | |
| "step": 19096 | |
| }, | |
| { | |
| "epoch": 0.6924176878382536, | |
| "grad_norm": 1.403602123260498, | |
| "learning_rate": 1.6744758637612533e-05, | |
| "loss": 1.5049, | |
| "step": 19127 | |
| }, | |
| { | |
| "epoch": 0.6935399207196771, | |
| "grad_norm": 1.328615665435791, | |
| "learning_rate": 1.6665533721683664e-05, | |
| "loss": 1.5182, | |
| "step": 19158 | |
| }, | |
| { | |
| "epoch": 0.6946621536011005, | |
| "grad_norm": 1.3603520393371582, | |
| "learning_rate": 1.6586402859073974e-05, | |
| "loss": 1.5303, | |
| "step": 19189 | |
| }, | |
| { | |
| "epoch": 0.6957843864825239, | |
| "grad_norm": 1.4252129793167114, | |
| "learning_rate": 1.6507366942764463e-05, | |
| "loss": 1.5364, | |
| "step": 19220 | |
| }, | |
| { | |
| "epoch": 0.6969066193639474, | |
| "grad_norm": 1.2863671779632568, | |
| "learning_rate": 1.6428426864664732e-05, | |
| "loss": 1.5243, | |
| "step": 19251 | |
| }, | |
| { | |
| "epoch": 0.6980288522453708, | |
| "grad_norm": 1.298772931098938, | |
| "learning_rate": 1.6349583515602816e-05, | |
| "loss": 1.5254, | |
| "step": 19282 | |
| }, | |
| { | |
| "epoch": 0.6991510851267942, | |
| "grad_norm": 1.3208067417144775, | |
| "learning_rate": 1.6270837785315208e-05, | |
| "loss": 1.517, | |
| "step": 19313 | |
| }, | |
| { | |
| "epoch": 0.7002733180082177, | |
| "grad_norm": 1.4582445621490479, | |
| "learning_rate": 1.619219056243676e-05, | |
| "loss": 1.5156, | |
| "step": 19344 | |
| }, | |
| { | |
| "epoch": 0.7013955508896411, | |
| "grad_norm": 1.3674423694610596, | |
| "learning_rate": 1.6113642734490698e-05, | |
| "loss": 1.5056, | |
| "step": 19375 | |
| }, | |
| { | |
| "epoch": 0.7025177837710644, | |
| "grad_norm": 1.289265513420105, | |
| "learning_rate": 1.6035195187878577e-05, | |
| "loss": 1.5151, | |
| "step": 19406 | |
| }, | |
| { | |
| "epoch": 0.7036400166524879, | |
| "grad_norm": 1.3161633014678955, | |
| "learning_rate": 1.5956848807870305e-05, | |
| "loss": 1.5206, | |
| "step": 19437 | |
| }, | |
| { | |
| "epoch": 0.7047622495339113, | |
| "grad_norm": 1.3161797523498535, | |
| "learning_rate": 1.587860447859413e-05, | |
| "loss": 1.5132, | |
| "step": 19468 | |
| }, | |
| { | |
| "epoch": 0.7058844824153347, | |
| "grad_norm": 1.3772165775299072, | |
| "learning_rate": 1.5800463083026686e-05, | |
| "loss": 1.5273, | |
| "step": 19499 | |
| }, | |
| { | |
| "epoch": 0.7070067152967582, | |
| "grad_norm": 1.3191962242126465, | |
| "learning_rate": 1.572242550298298e-05, | |
| "loss": 1.5238, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 0.7081289481781816, | |
| "grad_norm": 1.3758587837219238, | |
| "learning_rate": 1.56444926191065e-05, | |
| "loss": 1.5242, | |
| "step": 19561 | |
| }, | |
| { | |
| "epoch": 0.709251181059605, | |
| "grad_norm": 1.3456153869628906, | |
| "learning_rate": 1.5566665310859257e-05, | |
| "loss": 1.5109, | |
| "step": 19592 | |
| }, | |
| { | |
| "epoch": 0.7103734139410285, | |
| "grad_norm": 1.3654590845108032, | |
| "learning_rate": 1.5488944456511846e-05, | |
| "loss": 1.5092, | |
| "step": 19623 | |
| }, | |
| { | |
| "epoch": 0.7114956468224519, | |
| "grad_norm": 1.2868263721466064, | |
| "learning_rate": 1.5411330933133546e-05, | |
| "loss": 1.534, | |
| "step": 19654 | |
| }, | |
| { | |
| "epoch": 0.7126178797038754, | |
| "grad_norm": 1.3140943050384521, | |
| "learning_rate": 1.533382561658241e-05, | |
| "loss": 1.5381, | |
| "step": 19685 | |
| }, | |
| { | |
| "epoch": 0.7137401125852988, | |
| "grad_norm": 1.353061556816101, | |
| "learning_rate": 1.525642938149541e-05, | |
| "loss": 1.5133, | |
| "step": 19716 | |
| }, | |
| { | |
| "epoch": 0.7148623454667222, | |
| "grad_norm": 1.378933072090149, | |
| "learning_rate": 1.5179143101278536e-05, | |
| "loss": 1.514, | |
| "step": 19747 | |
| }, | |
| { | |
| "epoch": 0.7159845783481456, | |
| "grad_norm": 1.3969671726226807, | |
| "learning_rate": 1.5101967648096955e-05, | |
| "loss": 1.5255, | |
| "step": 19778 | |
| }, | |
| { | |
| "epoch": 0.717106811229569, | |
| "grad_norm": 1.3627468347549438, | |
| "learning_rate": 1.5024903892865172e-05, | |
| "loss": 1.5168, | |
| "step": 19809 | |
| }, | |
| { | |
| "epoch": 0.7182290441109924, | |
| "grad_norm": 1.3613289594650269, | |
| "learning_rate": 1.4947952705237184e-05, | |
| "loss": 1.532, | |
| "step": 19840 | |
| }, | |
| { | |
| "epoch": 0.7193512769924159, | |
| "grad_norm": 1.3214402198791504, | |
| "learning_rate": 1.4871114953596682e-05, | |
| "loss": 1.5236, | |
| "step": 19871 | |
| }, | |
| { | |
| "epoch": 0.7204735098738393, | |
| "grad_norm": 1.3939237594604492, | |
| "learning_rate": 1.4794391505047256e-05, | |
| "loss": 1.521, | |
| "step": 19902 | |
| }, | |
| { | |
| "epoch": 0.7215957427552627, | |
| "grad_norm": 1.384696364402771, | |
| "learning_rate": 1.4717783225402596e-05, | |
| "loss": 1.5118, | |
| "step": 19933 | |
| }, | |
| { | |
| "epoch": 0.7227179756366862, | |
| "grad_norm": 1.286145806312561, | |
| "learning_rate": 1.4641290979176735e-05, | |
| "loss": 1.522, | |
| "step": 19964 | |
| }, | |
| { | |
| "epoch": 0.7238402085181096, | |
| "grad_norm": 1.380027413368225, | |
| "learning_rate": 1.4564915629574246e-05, | |
| "loss": 1.5147, | |
| "step": 19995 | |
| }, | |
| { | |
| "epoch": 0.724962441399533, | |
| "grad_norm": 1.372430443763733, | |
| "learning_rate": 1.4488658038480601e-05, | |
| "loss": 1.5132, | |
| "step": 20026 | |
| }, | |
| { | |
| "epoch": 0.7260846742809565, | |
| "grad_norm": 1.3200669288635254, | |
| "learning_rate": 1.4412519066452323e-05, | |
| "loss": 1.4935, | |
| "step": 20057 | |
| }, | |
| { | |
| "epoch": 0.7272069071623799, | |
| "grad_norm": 1.3791152238845825, | |
| "learning_rate": 1.4336499572707373e-05, | |
| "loss": 1.5242, | |
| "step": 20088 | |
| }, | |
| { | |
| "epoch": 0.7283291400438033, | |
| "grad_norm": 1.287310004234314, | |
| "learning_rate": 1.4260600415115433e-05, | |
| "loss": 1.5098, | |
| "step": 20119 | |
| }, | |
| { | |
| "epoch": 0.7294513729252267, | |
| "grad_norm": 1.307353138923645, | |
| "learning_rate": 1.4184822450188137e-05, | |
| "loss": 1.5098, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.7305736058066501, | |
| "grad_norm": 1.3487526178359985, | |
| "learning_rate": 1.410916653306954e-05, | |
| "loss": 1.5167, | |
| "step": 20181 | |
| }, | |
| { | |
| "epoch": 0.7316958386880735, | |
| "grad_norm": 1.3626441955566406, | |
| "learning_rate": 1.403363351752639e-05, | |
| "loss": 1.5005, | |
| "step": 20212 | |
| }, | |
| { | |
| "epoch": 0.732818071569497, | |
| "grad_norm": 1.3192275762557983, | |
| "learning_rate": 1.3958224255938485e-05, | |
| "loss": 1.5191, | |
| "step": 20243 | |
| }, | |
| { | |
| "epoch": 0.7339403044509204, | |
| "grad_norm": 1.336755633354187, | |
| "learning_rate": 1.388293959928911e-05, | |
| "loss": 1.5223, | |
| "step": 20274 | |
| }, | |
| { | |
| "epoch": 0.7350625373323438, | |
| "grad_norm": 1.3645100593566895, | |
| "learning_rate": 1.3807780397155379e-05, | |
| "loss": 1.5156, | |
| "step": 20305 | |
| }, | |
| { | |
| "epoch": 0.7361847702137673, | |
| "grad_norm": 1.3681402206420898, | |
| "learning_rate": 1.3732747497698655e-05, | |
| "loss": 1.5065, | |
| "step": 20336 | |
| }, | |
| { | |
| "epoch": 0.7373070030951907, | |
| "grad_norm": 1.3669005632400513, | |
| "learning_rate": 1.3657841747655038e-05, | |
| "loss": 1.5148, | |
| "step": 20367 | |
| }, | |
| { | |
| "epoch": 0.7384292359766141, | |
| "grad_norm": 1.349400281906128, | |
| "learning_rate": 1.3583063992325706e-05, | |
| "loss": 1.5234, | |
| "step": 20398 | |
| }, | |
| { | |
| "epoch": 0.7395514688580376, | |
| "grad_norm": 1.3764326572418213, | |
| "learning_rate": 1.3508415075567496e-05, | |
| "loss": 1.5019, | |
| "step": 20429 | |
| }, | |
| { | |
| "epoch": 0.740673701739461, | |
| "grad_norm": 1.5416663885116577, | |
| "learning_rate": 1.343389583978327e-05, | |
| "loss": 1.5188, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 0.7417959346208844, | |
| "grad_norm": 1.3264429569244385, | |
| "learning_rate": 1.3359507125912468e-05, | |
| "loss": 1.5041, | |
| "step": 20491 | |
| }, | |
| { | |
| "epoch": 0.7429181675023078, | |
| "grad_norm": 1.3554550409317017, | |
| "learning_rate": 1.3285249773421627e-05, | |
| "loss": 1.5207, | |
| "step": 20522 | |
| }, | |
| { | |
| "epoch": 0.7440404003837312, | |
| "grad_norm": 1.31184983253479, | |
| "learning_rate": 1.3211124620294884e-05, | |
| "loss": 1.5257, | |
| "step": 20553 | |
| }, | |
| { | |
| "epoch": 0.7451626332651546, | |
| "grad_norm": 1.3225113153457642, | |
| "learning_rate": 1.313713250302451e-05, | |
| "loss": 1.5196, | |
| "step": 20584 | |
| }, | |
| { | |
| "epoch": 0.7462848661465781, | |
| "grad_norm": 1.3386696577072144, | |
| "learning_rate": 1.3063274256601479e-05, | |
| "loss": 1.5174, | |
| "step": 20615 | |
| }, | |
| { | |
| "epoch": 0.7474070990280015, | |
| "grad_norm": 1.423807978630066, | |
| "learning_rate": 1.2989550714506086e-05, | |
| "loss": 1.4968, | |
| "step": 20646 | |
| }, | |
| { | |
| "epoch": 0.7485293319094249, | |
| "grad_norm": 1.2833530902862549, | |
| "learning_rate": 1.291596270869846e-05, | |
| "loss": 1.491, | |
| "step": 20677 | |
| }, | |
| { | |
| "epoch": 0.7496515647908484, | |
| "grad_norm": 1.2796401977539062, | |
| "learning_rate": 1.284251106960927e-05, | |
| "loss": 1.5062, | |
| "step": 20708 | |
| }, | |
| { | |
| "epoch": 0.7507737976722718, | |
| "grad_norm": 1.3797061443328857, | |
| "learning_rate": 1.2769196626130263e-05, | |
| "loss": 1.5152, | |
| "step": 20739 | |
| }, | |
| { | |
| "epoch": 0.7518960305536952, | |
| "grad_norm": 1.4489312171936035, | |
| "learning_rate": 1.2696020205604969e-05, | |
| "loss": 1.5122, | |
| "step": 20770 | |
| }, | |
| { | |
| "epoch": 0.7530182634351187, | |
| "grad_norm": 1.3305705785751343, | |
| "learning_rate": 1.2622982633819359e-05, | |
| "loss": 1.5143, | |
| "step": 20801 | |
| }, | |
| { | |
| "epoch": 0.7541404963165421, | |
| "grad_norm": 1.3734405040740967, | |
| "learning_rate": 1.2550084734992484e-05, | |
| "loss": 1.513, | |
| "step": 20832 | |
| }, | |
| { | |
| "epoch": 0.7552627291979656, | |
| "grad_norm": 1.2886455059051514, | |
| "learning_rate": 1.247732733176724e-05, | |
| "loss": 1.489, | |
| "step": 20863 | |
| }, | |
| { | |
| "epoch": 0.7563849620793889, | |
| "grad_norm": 1.4357209205627441, | |
| "learning_rate": 1.2404711245201044e-05, | |
| "loss": 1.5179, | |
| "step": 20894 | |
| }, | |
| { | |
| "epoch": 0.7575071949608123, | |
| "grad_norm": 1.294068455696106, | |
| "learning_rate": 1.2332237294756535e-05, | |
| "loss": 1.5151, | |
| "step": 20925 | |
| }, | |
| { | |
| "epoch": 0.7586294278422357, | |
| "grad_norm": 1.3966395854949951, | |
| "learning_rate": 1.225990629829241e-05, | |
| "loss": 1.5127, | |
| "step": 20956 | |
| }, | |
| { | |
| "epoch": 0.7597516607236592, | |
| "grad_norm": 1.3190878629684448, | |
| "learning_rate": 1.2187719072054136e-05, | |
| "loss": 1.5063, | |
| "step": 20987 | |
| }, | |
| { | |
| "epoch": 0.7608738936050826, | |
| "grad_norm": 1.2927324771881104, | |
| "learning_rate": 1.2115676430664735e-05, | |
| "loss": 1.4923, | |
| "step": 21018 | |
| }, | |
| { | |
| "epoch": 0.761996126486506, | |
| "grad_norm": 1.3363546133041382, | |
| "learning_rate": 1.2043779187115647e-05, | |
| "loss": 1.4928, | |
| "step": 21049 | |
| }, | |
| { | |
| "epoch": 0.7631183593679295, | |
| "grad_norm": 1.322825312614441, | |
| "learning_rate": 1.1972028152757476e-05, | |
| "loss": 1.5116, | |
| "step": 21080 | |
| }, | |
| { | |
| "epoch": 0.7642405922493529, | |
| "grad_norm": 1.396026372909546, | |
| "learning_rate": 1.1900424137290889e-05, | |
| "loss": 1.5089, | |
| "step": 21111 | |
| }, | |
| { | |
| "epoch": 0.7653628251307764, | |
| "grad_norm": 1.358963966369629, | |
| "learning_rate": 1.1828967948757482e-05, | |
| "loss": 1.505, | |
| "step": 21142 | |
| }, | |
| { | |
| "epoch": 0.7664850580121998, | |
| "grad_norm": 1.3169891834259033, | |
| "learning_rate": 1.175766039353062e-05, | |
| "loss": 1.5115, | |
| "step": 21173 | |
| }, | |
| { | |
| "epoch": 0.7676072908936232, | |
| "grad_norm": 1.3406434059143066, | |
| "learning_rate": 1.1686502276306382e-05, | |
| "loss": 1.5093, | |
| "step": 21204 | |
| }, | |
| { | |
| "epoch": 0.7687295237750467, | |
| "grad_norm": 1.3709667921066284, | |
| "learning_rate": 1.1615494400094445e-05, | |
| "loss": 1.5017, | |
| "step": 21235 | |
| }, | |
| { | |
| "epoch": 0.76985175665647, | |
| "grad_norm": 1.4957972764968872, | |
| "learning_rate": 1.1544637566209029e-05, | |
| "loss": 1.5121, | |
| "step": 21266 | |
| }, | |
| { | |
| "epoch": 0.7709739895378934, | |
| "grad_norm": 1.3525892496109009, | |
| "learning_rate": 1.1473932574259886e-05, | |
| "loss": 1.4934, | |
| "step": 21297 | |
| }, | |
| { | |
| "epoch": 0.7720962224193169, | |
| "grad_norm": 1.3251068592071533, | |
| "learning_rate": 1.1403380222143247e-05, | |
| "loss": 1.4858, | |
| "step": 21328 | |
| }, | |
| { | |
| "epoch": 0.7732184553007403, | |
| "grad_norm": 1.3417954444885254, | |
| "learning_rate": 1.1332981306032808e-05, | |
| "loss": 1.5119, | |
| "step": 21359 | |
| } | |
| ], | |
| "logging_steps": 31, | |
| "max_steps": 30517, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 3052, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5847615378155897e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |