|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15600928255231186, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00015600928255231187, |
|
"grad_norm": 0.5937075614929199, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 3.7681, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00031201856510462375, |
|
"grad_norm": 0.5836828947067261, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 3.8601, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00046802784765693557, |
|
"grad_norm": 0.6111788749694824, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 3.643, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006240371302092475, |
|
"grad_norm": 0.5807424783706665, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 3.724, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0007800464127615594, |
|
"grad_norm": 0.5708947777748108, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 3.9727, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009360556953138711, |
|
"grad_norm": 0.5662252902984619, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 3.6801, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001092064977866183, |
|
"grad_norm": 0.5653729438781738, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 3.6898, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001248074260418495, |
|
"grad_norm": 0.5451233983039856, |
|
"learning_rate": 2.1333333333333335e-05, |
|
"loss": 3.5484, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014040835429708068, |
|
"grad_norm": 0.5682435035705566, |
|
"learning_rate": 2.4e-05, |
|
"loss": 3.538, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0015600928255231187, |
|
"grad_norm": 0.6180667877197266, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 3.6757, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017161021080754305, |
|
"grad_norm": 0.6358373165130615, |
|
"learning_rate": 2.9333333333333336e-05, |
|
"loss": 3.6489, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0018721113906277423, |
|
"grad_norm": 0.6643233895301819, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 3.4313, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0020281206731800542, |
|
"grad_norm": 0.6591399908065796, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 3.472, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002184129955732366, |
|
"grad_norm": 0.8929205536842346, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 3.3531, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0023401392382846778, |
|
"grad_norm": 1.2845464944839478, |
|
"learning_rate": 4e-05, |
|
"loss": 3.143, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00249614852083699, |
|
"grad_norm": 1.216373085975647, |
|
"learning_rate": 4.266666666666667e-05, |
|
"loss": 3.1297, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0026521578033893017, |
|
"grad_norm": 0.9192391633987427, |
|
"learning_rate": 4.5333333333333335e-05, |
|
"loss": 2.9826, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0028081670859416135, |
|
"grad_norm": 0.8917486667633057, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.9068, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0029641763684939253, |
|
"grad_norm": 0.7141512632369995, |
|
"learning_rate": 5.0666666666666674e-05, |
|
"loss": 2.7797, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0031201856510462375, |
|
"grad_norm": 0.8795380592346191, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 2.2265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0032761949335985492, |
|
"grad_norm": 1.047784447669983, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 2.6089, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.003432204216150861, |
|
"grad_norm": 1.0959978103637695, |
|
"learning_rate": 5.866666666666667e-05, |
|
"loss": 2.3416, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0035882134987031728, |
|
"grad_norm": 1.283445954322815, |
|
"learning_rate": 6.133333333333334e-05, |
|
"loss": 2.0565, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0037442227812554845, |
|
"grad_norm": 1.655569314956665, |
|
"learning_rate": 6.400000000000001e-05, |
|
"loss": 1.6648, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0039002320638077967, |
|
"grad_norm": 1.4048818349838257, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.7566, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0040562413463601085, |
|
"grad_norm": 1.0755441188812256, |
|
"learning_rate": 6.933333333333334e-05, |
|
"loss": 1.6591, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.00421225062891242, |
|
"grad_norm": 0.7240940928459167, |
|
"learning_rate": 7.2e-05, |
|
"loss": 2.21, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004368259911464732, |
|
"grad_norm": 0.48980680108070374, |
|
"learning_rate": 7.466666666666667e-05, |
|
"loss": 1.8157, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004524269194017044, |
|
"grad_norm": 0.4145239591598511, |
|
"learning_rate": 7.733333333333333e-05, |
|
"loss": 1.8679, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0046802784765693555, |
|
"grad_norm": 0.3905705213546753, |
|
"learning_rate": 8e-05, |
|
"loss": 1.5733, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004836287759121667, |
|
"grad_norm": 0.40969792008399963, |
|
"learning_rate": 8.266666666666667e-05, |
|
"loss": 1.531, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00499229704167398, |
|
"grad_norm": 0.4269125759601593, |
|
"learning_rate": 8.533333333333334e-05, |
|
"loss": 1.3705, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005148306324226292, |
|
"grad_norm": 0.5876020789146423, |
|
"learning_rate": 8.800000000000001e-05, |
|
"loss": 1.4055, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0053043156067786035, |
|
"grad_norm": 0.39753037691116333, |
|
"learning_rate": 9.066666666666667e-05, |
|
"loss": 1.5403, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.005460324889330915, |
|
"grad_norm": 0.4157419800758362, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 1.5698, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005616334171883227, |
|
"grad_norm": 0.4430864155292511, |
|
"learning_rate": 9.6e-05, |
|
"loss": 1.5466, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005772343454435539, |
|
"grad_norm": 0.5259338021278381, |
|
"learning_rate": 9.866666666666668e-05, |
|
"loss": 1.5878, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0059283527369878505, |
|
"grad_norm": 0.4409235417842865, |
|
"learning_rate": 0.00010133333333333335, |
|
"loss": 1.4203, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006084362019540162, |
|
"grad_norm": 0.5432307124137878, |
|
"learning_rate": 0.00010400000000000001, |
|
"loss": 1.3843, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006240371302092475, |
|
"grad_norm": 0.5789123177528381, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 1.4649, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006396380584644787, |
|
"grad_norm": 0.5596875548362732, |
|
"learning_rate": 0.00010933333333333333, |
|
"loss": 1.3569, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0065523898671970985, |
|
"grad_norm": 0.6517161726951599, |
|
"learning_rate": 0.00011200000000000001, |
|
"loss": 1.4306, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00670839914974941, |
|
"grad_norm": 0.7665486335754395, |
|
"learning_rate": 0.00011466666666666667, |
|
"loss": 1.5369, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006864408432301722, |
|
"grad_norm": 0.8421632647514343, |
|
"learning_rate": 0.00011733333333333334, |
|
"loss": 1.5651, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007020417714854034, |
|
"grad_norm": 0.8437005877494812, |
|
"learning_rate": 0.00012, |
|
"loss": 1.33, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0071764269974063455, |
|
"grad_norm": 0.3544560670852661, |
|
"learning_rate": 0.00012266666666666668, |
|
"loss": 1.5436, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007332436279958657, |
|
"grad_norm": 0.35725343227386475, |
|
"learning_rate": 0.00012533333333333334, |
|
"loss": 1.5792, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007488445562510969, |
|
"grad_norm": 0.7203790545463562, |
|
"learning_rate": 0.00012800000000000002, |
|
"loss": 1.6707, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.007644454845063282, |
|
"grad_norm": 0.352791428565979, |
|
"learning_rate": 0.00013066666666666668, |
|
"loss": 1.5251, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0078004641276155934, |
|
"grad_norm": 0.49014368653297424, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.3478, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007956473410167905, |
|
"grad_norm": 0.29890525341033936, |
|
"learning_rate": 0.00013600000000000003, |
|
"loss": 1.3164, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008112482692720217, |
|
"grad_norm": 0.34632885456085205, |
|
"learning_rate": 0.00013866666666666669, |
|
"loss": 1.3785, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008268491975272529, |
|
"grad_norm": 0.3631187677383423, |
|
"learning_rate": 0.00014133333333333334, |
|
"loss": 1.7873, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.00842450125782484, |
|
"grad_norm": 0.290487140417099, |
|
"learning_rate": 0.000144, |
|
"loss": 1.1681, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008580510540377152, |
|
"grad_norm": 0.3136501610279083, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 1.1332, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.008736519822929464, |
|
"grad_norm": 0.3708946406841278, |
|
"learning_rate": 0.00014933333333333335, |
|
"loss": 1.4316, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.008892529105481776, |
|
"grad_norm": 0.3645316958427429, |
|
"learning_rate": 0.000152, |
|
"loss": 1.3522, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009048538388034088, |
|
"grad_norm": 0.4074520170688629, |
|
"learning_rate": 0.00015466666666666667, |
|
"loss": 1.5344, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0092045476705864, |
|
"grad_norm": 0.3106740713119507, |
|
"learning_rate": 0.00015733333333333333, |
|
"loss": 1.2959, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009360556953138711, |
|
"grad_norm": 0.32623976469039917, |
|
"learning_rate": 0.00016, |
|
"loss": 1.6472, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.009516566235691023, |
|
"grad_norm": 0.35396724939346313, |
|
"learning_rate": 0.00016266666666666667, |
|
"loss": 1.2655, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.009672575518243335, |
|
"grad_norm": 0.3455830216407776, |
|
"learning_rate": 0.00016533333333333333, |
|
"loss": 1.2153, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.009828584800795648, |
|
"grad_norm": 0.3116808235645294, |
|
"learning_rate": 0.000168, |
|
"loss": 1.0851, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00998459408334796, |
|
"grad_norm": 0.3416989743709564, |
|
"learning_rate": 0.00017066666666666668, |
|
"loss": 1.5828, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.010140603365900272, |
|
"grad_norm": 0.3509654104709625, |
|
"learning_rate": 0.00017333333333333334, |
|
"loss": 1.4832, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.010296612648452583, |
|
"grad_norm": 0.3034147322177887, |
|
"learning_rate": 0.00017600000000000002, |
|
"loss": 1.4326, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.010452621931004895, |
|
"grad_norm": 0.3084355890750885, |
|
"learning_rate": 0.00017866666666666668, |
|
"loss": 1.2452, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.010608631213557207, |
|
"grad_norm": 0.3001956343650818, |
|
"learning_rate": 0.00018133333333333334, |
|
"loss": 1.2484, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.010764640496109519, |
|
"grad_norm": 0.30605360865592957, |
|
"learning_rate": 0.00018400000000000003, |
|
"loss": 1.2137, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01092064977866183, |
|
"grad_norm": 0.32967764139175415, |
|
"learning_rate": 0.0001866666666666667, |
|
"loss": 1.4, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011076659061214142, |
|
"grad_norm": 0.3161776661872864, |
|
"learning_rate": 0.00018933333333333335, |
|
"loss": 1.3203, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011232668343766454, |
|
"grad_norm": 0.28808867931365967, |
|
"learning_rate": 0.000192, |
|
"loss": 1.3034, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011388677626318766, |
|
"grad_norm": 0.2804367244243622, |
|
"learning_rate": 0.0001946666666666667, |
|
"loss": 1.2753, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.011544686908871078, |
|
"grad_norm": 0.30980467796325684, |
|
"learning_rate": 0.00019733333333333335, |
|
"loss": 1.3733, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.01170069619142339, |
|
"grad_norm": 0.31240588426589966, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1602, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.011856705473975701, |
|
"grad_norm": 0.28906041383743286, |
|
"learning_rate": 0.00019999991608372393, |
|
"loss": 1.3243, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012012714756528013, |
|
"grad_norm": 0.2740985155105591, |
|
"learning_rate": 0.00019999966433503652, |
|
"loss": 1.1853, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012168724039080325, |
|
"grad_norm": 0.30425482988357544, |
|
"learning_rate": 0.0001999992447543603, |
|
"loss": 1.3282, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012324733321632636, |
|
"grad_norm": 0.3216018080711365, |
|
"learning_rate": 0.00019999865734239946, |
|
"loss": 1.3696, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01248074260418495, |
|
"grad_norm": 0.34770438075065613, |
|
"learning_rate": 0.00019999790210013988, |
|
"loss": 1.261, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.012636751886737262, |
|
"grad_norm": 0.3883892297744751, |
|
"learning_rate": 0.0001999969790288491, |
|
"loss": 1.5873, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.012792761169289573, |
|
"grad_norm": 0.3061410188674927, |
|
"learning_rate": 0.00019999588813007633, |
|
"loss": 1.5559, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.012948770451841885, |
|
"grad_norm": 0.3044775128364563, |
|
"learning_rate": 0.00019999462940565243, |
|
"loss": 1.2439, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013104779734394197, |
|
"grad_norm": 0.3562803864479065, |
|
"learning_rate": 0.00019999320285769, |
|
"loss": 1.4121, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013260789016946509, |
|
"grad_norm": 0.3367731273174286, |
|
"learning_rate": 0.0001999916084885832, |
|
"loss": 1.1937, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01341679829949882, |
|
"grad_norm": 0.3613661527633667, |
|
"learning_rate": 0.00019998984630100792, |
|
"loss": 1.4267, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.013572807582051132, |
|
"grad_norm": 0.30924999713897705, |
|
"learning_rate": 0.0001999879162979217, |
|
"loss": 1.3358, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.013728816864603444, |
|
"grad_norm": 0.34925562143325806, |
|
"learning_rate": 0.0001999858184825637, |
|
"loss": 1.3758, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.013884826147155756, |
|
"grad_norm": 0.3182036280632019, |
|
"learning_rate": 0.00019998355285845475, |
|
"loss": 1.3151, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014040835429708068, |
|
"grad_norm": 0.6028950810432434, |
|
"learning_rate": 0.0001999811194293973, |
|
"loss": 1.3797, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01419684471226038, |
|
"grad_norm": 0.3221015930175781, |
|
"learning_rate": 0.00019997851819947537, |
|
"loss": 1.3293, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.014352853994812691, |
|
"grad_norm": 0.3003532290458679, |
|
"learning_rate": 0.00019997574917305478, |
|
"loss": 1.5671, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.014508863277365003, |
|
"grad_norm": 0.32144418358802795, |
|
"learning_rate": 0.00019997281235478278, |
|
"loss": 1.3733, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.014664872559917315, |
|
"grad_norm": 1.3427015542984009, |
|
"learning_rate": 0.00019996970774958836, |
|
"loss": 1.246, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.014820881842469626, |
|
"grad_norm": 0.3254302144050598, |
|
"learning_rate": 0.00019996643536268204, |
|
"loss": 1.3829, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.014976891125021938, |
|
"grad_norm": 0.2829325795173645, |
|
"learning_rate": 0.0001999629951995559, |
|
"loss": 1.2176, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01513290040757425, |
|
"grad_norm": 0.2943004071712494, |
|
"learning_rate": 0.00019995938726598373, |
|
"loss": 1.4021, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.015288909690126563, |
|
"grad_norm": 0.2698727548122406, |
|
"learning_rate": 0.00019995561156802079, |
|
"loss": 1.2897, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.015444918972678875, |
|
"grad_norm": 0.32416194677352905, |
|
"learning_rate": 0.0001999516681120039, |
|
"loss": 1.218, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.015600928255231187, |
|
"grad_norm": 0.3309131860733032, |
|
"learning_rate": 0.00019994755690455152, |
|
"loss": 1.4658, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.015756937537783497, |
|
"grad_norm": 0.31126394867897034, |
|
"learning_rate": 0.0001999432779525635, |
|
"loss": 1.5518, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01591294682033581, |
|
"grad_norm": 0.28427934646606445, |
|
"learning_rate": 0.0001999388312632214, |
|
"loss": 1.1435, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01606895610288812, |
|
"grad_norm": 0.28065958619117737, |
|
"learning_rate": 0.00019993421684398824, |
|
"loss": 1.3537, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.016224965385440434, |
|
"grad_norm": 0.3787417411804199, |
|
"learning_rate": 0.00019992943470260844, |
|
"loss": 1.2151, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.016380974667992744, |
|
"grad_norm": 0.32704487442970276, |
|
"learning_rate": 0.00019992448484710797, |
|
"loss": 1.1383, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.016536983950545057, |
|
"grad_norm": 0.34436190128326416, |
|
"learning_rate": 0.00019991936728579437, |
|
"loss": 1.3949, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01669299323309737, |
|
"grad_norm": 0.29938092827796936, |
|
"learning_rate": 0.00019991408202725655, |
|
"loss": 1.2821, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.01684900251564968, |
|
"grad_norm": 0.3192508816719055, |
|
"learning_rate": 0.0001999086290803649, |
|
"loss": 1.3655, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017005011798201995, |
|
"grad_norm": 0.2626635730266571, |
|
"learning_rate": 0.00019990300845427125, |
|
"loss": 1.2366, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.017161021080754305, |
|
"grad_norm": 0.288725882768631, |
|
"learning_rate": 0.0001998972201584088, |
|
"loss": 1.0589, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017317030363306618, |
|
"grad_norm": 0.3340204358100891, |
|
"learning_rate": 0.00019989126420249221, |
|
"loss": 1.7077, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.017473039645858928, |
|
"grad_norm": 0.27165043354034424, |
|
"learning_rate": 0.00019988514059651752, |
|
"loss": 1.3596, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01762904892841124, |
|
"grad_norm": 0.2751217186450958, |
|
"learning_rate": 0.00019987884935076213, |
|
"loss": 1.281, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.01778505821096355, |
|
"grad_norm": 0.2712443172931671, |
|
"learning_rate": 0.00019987239047578482, |
|
"loss": 1.2686, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.017941067493515865, |
|
"grad_norm": 0.2898474931716919, |
|
"learning_rate": 0.00019986576398242566, |
|
"loss": 1.2425, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.018097076776068175, |
|
"grad_norm": 0.29883307218551636, |
|
"learning_rate": 0.00019985896988180605, |
|
"loss": 1.6326, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.01825308605862049, |
|
"grad_norm": 0.2548903524875641, |
|
"learning_rate": 0.00019985200818532875, |
|
"loss": 1.317, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0184090953411728, |
|
"grad_norm": 0.260768860578537, |
|
"learning_rate": 0.0001998448789046777, |
|
"loss": 1.4137, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.018565104623725112, |
|
"grad_norm": 0.27813923358917236, |
|
"learning_rate": 0.00019983758205181822, |
|
"loss": 1.1758, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.018721113906277422, |
|
"grad_norm": 0.29539602994918823, |
|
"learning_rate": 0.00019983011763899673, |
|
"loss": 1.2805, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.018877123188829736, |
|
"grad_norm": 0.2691763937473297, |
|
"learning_rate": 0.00019982248567874098, |
|
"loss": 1.3098, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.019033132471382046, |
|
"grad_norm": 0.2895521819591522, |
|
"learning_rate": 0.00019981468618385988, |
|
"loss": 1.1475, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01918914175393436, |
|
"grad_norm": 0.24555402994155884, |
|
"learning_rate": 0.00019980671916744352, |
|
"loss": 1.075, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.01934515103648667, |
|
"grad_norm": 0.29935726523399353, |
|
"learning_rate": 0.00019979858464286317, |
|
"loss": 1.278, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.019501160319038983, |
|
"grad_norm": 0.3469449579715729, |
|
"learning_rate": 0.00019979028262377118, |
|
"loss": 1.602, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.019657169601591296, |
|
"grad_norm": 0.2707567811012268, |
|
"learning_rate": 0.00019978181312410104, |
|
"loss": 1.3181, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.019813178884143606, |
|
"grad_norm": 0.32349273562431335, |
|
"learning_rate": 0.00019977317615806737, |
|
"loss": 1.4862, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.01996918816669592, |
|
"grad_norm": 0.24527911841869354, |
|
"learning_rate": 0.00019976437174016573, |
|
"loss": 1.169, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02012519744924823, |
|
"grad_norm": 0.2882062494754791, |
|
"learning_rate": 0.00019975539988517288, |
|
"loss": 1.275, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.020281206731800543, |
|
"grad_norm": 0.3206437826156616, |
|
"learning_rate": 0.00019974626060814647, |
|
"loss": 1.682, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.020437216014352853, |
|
"grad_norm": 0.3423447012901306, |
|
"learning_rate": 0.0001997369539244252, |
|
"loss": 1.2018, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.020593225296905167, |
|
"grad_norm": 0.29081955552101135, |
|
"learning_rate": 0.0001997274798496287, |
|
"loss": 1.5849, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.020749234579457477, |
|
"grad_norm": 0.2659798860549927, |
|
"learning_rate": 0.00019971783839965756, |
|
"loss": 1.1371, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.02090524386200979, |
|
"grad_norm": 0.3395417034626007, |
|
"learning_rate": 0.00019970802959069328, |
|
"loss": 1.5046, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.0210612531445621, |
|
"grad_norm": 0.22527103126049042, |
|
"learning_rate": 0.00019969805343919821, |
|
"loss": 1.0543, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.021217262427114414, |
|
"grad_norm": 0.30680522322654724, |
|
"learning_rate": 0.0001996879099619156, |
|
"loss": 1.5067, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.021373271709666724, |
|
"grad_norm": 0.22828875482082367, |
|
"learning_rate": 0.00019967759917586953, |
|
"loss": 1.1201, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.021529280992219037, |
|
"grad_norm": 0.2578384280204773, |
|
"learning_rate": 0.00019966712109836476, |
|
"loss": 1.104, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.021685290274771347, |
|
"grad_norm": 0.23175813257694244, |
|
"learning_rate": 0.000199656475746987, |
|
"loss": 0.9706, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02184129955732366, |
|
"grad_norm": 0.29308339953422546, |
|
"learning_rate": 0.00019964566313960264, |
|
"loss": 1.4769, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02199730883987597, |
|
"grad_norm": 0.3059382438659668, |
|
"learning_rate": 0.0001996346832943587, |
|
"loss": 1.4555, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.022153318122428284, |
|
"grad_norm": 0.2929370701313019, |
|
"learning_rate": 0.00019962353622968295, |
|
"loss": 1.4051, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.022309327404980598, |
|
"grad_norm": 0.24365079402923584, |
|
"learning_rate": 0.00019961222196428378, |
|
"loss": 1.189, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.022465336687532908, |
|
"grad_norm": 0.27418485283851624, |
|
"learning_rate": 0.0001996007405171502, |
|
"loss": 1.206, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.02262134597008522, |
|
"grad_norm": 0.2554856836795807, |
|
"learning_rate": 0.00019958909190755187, |
|
"loss": 1.4053, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.02277735525263753, |
|
"grad_norm": 0.2674770951271057, |
|
"learning_rate": 0.00019957727615503888, |
|
"loss": 1.2412, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.022933364535189845, |
|
"grad_norm": 0.3177204728126526, |
|
"learning_rate": 0.00019956529327944198, |
|
"loss": 1.4231, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.023089373817742155, |
|
"grad_norm": 0.2678688168525696, |
|
"learning_rate": 0.00019955314330087225, |
|
"loss": 1.2494, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02324538310029447, |
|
"grad_norm": 0.28164568543434143, |
|
"learning_rate": 0.00019954082623972142, |
|
"loss": 1.2008, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.02340139238284678, |
|
"grad_norm": 0.2897564172744751, |
|
"learning_rate": 0.0001995283421166614, |
|
"loss": 1.463, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023557401665399092, |
|
"grad_norm": 0.276509165763855, |
|
"learning_rate": 0.00019951569095264473, |
|
"loss": 1.4891, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.023713410947951402, |
|
"grad_norm": 0.2585453689098358, |
|
"learning_rate": 0.0001995028727689041, |
|
"loss": 1.1551, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.023869420230503716, |
|
"grad_norm": 0.25659292936325073, |
|
"learning_rate": 0.00019948988758695263, |
|
"loss": 1.1622, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.024025429513056026, |
|
"grad_norm": 0.27132928371429443, |
|
"learning_rate": 0.00019947673542858367, |
|
"loss": 1.2015, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.02418143879560834, |
|
"grad_norm": 0.2951599955558777, |
|
"learning_rate": 0.00019946341631587087, |
|
"loss": 1.1842, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.02433744807816065, |
|
"grad_norm": 0.3114786148071289, |
|
"learning_rate": 0.00019944993027116797, |
|
"loss": 1.4509, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.024493457360712963, |
|
"grad_norm": 0.25183674693107605, |
|
"learning_rate": 0.00019943627731710897, |
|
"loss": 1.1474, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.024649466643265273, |
|
"grad_norm": 0.2717629075050354, |
|
"learning_rate": 0.00019942245747660796, |
|
"loss": 1.2899, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.024805475925817586, |
|
"grad_norm": 0.2532605826854706, |
|
"learning_rate": 0.00019940847077285916, |
|
"loss": 1.0811, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0249614852083699, |
|
"grad_norm": 0.2951716482639313, |
|
"learning_rate": 0.0001993943172293368, |
|
"loss": 1.6252, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02511749449092221, |
|
"grad_norm": 0.29894542694091797, |
|
"learning_rate": 0.0001993799968697951, |
|
"loss": 1.3754, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.025273503773474523, |
|
"grad_norm": 0.28648853302001953, |
|
"learning_rate": 0.00019936550971826834, |
|
"loss": 1.2769, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.025429513056026833, |
|
"grad_norm": 0.2540144920349121, |
|
"learning_rate": 0.00019935085579907063, |
|
"loss": 1.281, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.025585522338579147, |
|
"grad_norm": 0.30044910311698914, |
|
"learning_rate": 0.00019933603513679605, |
|
"loss": 1.1689, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.025741531621131457, |
|
"grad_norm": 0.31799909472465515, |
|
"learning_rate": 0.00019932104775631846, |
|
"loss": 1.287, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02589754090368377, |
|
"grad_norm": 0.290565550327301, |
|
"learning_rate": 0.0001993058936827916, |
|
"loss": 1.4751, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.02605355018623608, |
|
"grad_norm": 0.28967443108558655, |
|
"learning_rate": 0.00019929057294164893, |
|
"loss": 1.2459, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.026209559468788394, |
|
"grad_norm": 0.25141966342926025, |
|
"learning_rate": 0.0001992750855586036, |
|
"loss": 1.1215, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.026365568751340704, |
|
"grad_norm": 0.2819644808769226, |
|
"learning_rate": 0.00019925943155964856, |
|
"loss": 1.5238, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.026521578033893017, |
|
"grad_norm": 0.2336016446352005, |
|
"learning_rate": 0.00019924361097105623, |
|
"loss": 1.2218, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.026677587316445327, |
|
"grad_norm": 0.23773479461669922, |
|
"learning_rate": 0.00019922762381937878, |
|
"loss": 1.0842, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.02683359659899764, |
|
"grad_norm": 0.266222208738327, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 1.2076, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02698960588154995, |
|
"grad_norm": 0.29275181889533997, |
|
"learning_rate": 0.00019919514993437445, |
|
"loss": 1.3901, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.027145615164102264, |
|
"grad_norm": 0.2334383726119995, |
|
"learning_rate": 0.00019917866325554938, |
|
"loss": 1.2012, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.027301624446654574, |
|
"grad_norm": 0.293888121843338, |
|
"learning_rate": 0.00019916201012264254, |
|
"loss": 1.6131, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.027457633729206888, |
|
"grad_norm": 0.3042750954627991, |
|
"learning_rate": 0.0001991451905636033, |
|
"loss": 1.3144, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.027613643011759198, |
|
"grad_norm": 0.2652626633644104, |
|
"learning_rate": 0.00019912820460666044, |
|
"loss": 1.4368, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.02776965229431151, |
|
"grad_norm": 0.28741374611854553, |
|
"learning_rate": 0.00019911105228032186, |
|
"loss": 1.4643, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.027925661576863825, |
|
"grad_norm": 0.2808038890361786, |
|
"learning_rate": 0.00019909373361337476, |
|
"loss": 1.3013, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.028081670859416135, |
|
"grad_norm": 0.22930848598480225, |
|
"learning_rate": 0.0001990762486348855, |
|
"loss": 1.0587, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02823768014196845, |
|
"grad_norm": 0.24289073050022125, |
|
"learning_rate": 0.00019905859737419956, |
|
"loss": 1.1174, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.02839368942452076, |
|
"grad_norm": 0.2626672685146332, |
|
"learning_rate": 0.00019904077986094152, |
|
"loss": 1.1746, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.028549698707073072, |
|
"grad_norm": 0.3174870014190674, |
|
"learning_rate": 0.00019902279612501493, |
|
"loss": 1.4464, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.028705707989625382, |
|
"grad_norm": 0.2851637303829193, |
|
"learning_rate": 0.0001990046461966024, |
|
"loss": 1.3527, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.028861717272177696, |
|
"grad_norm": 0.2576538622379303, |
|
"learning_rate": 0.00019898633010616542, |
|
"loss": 1.2546, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.029017726554730006, |
|
"grad_norm": 0.2922312319278717, |
|
"learning_rate": 0.0001989678478844443, |
|
"loss": 1.1445, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.02917373583728232, |
|
"grad_norm": 0.25312724709510803, |
|
"learning_rate": 0.00019894919956245824, |
|
"loss": 1.0533, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.02932974511983463, |
|
"grad_norm": 0.3193413019180298, |
|
"learning_rate": 0.00019893038517150525, |
|
"loss": 1.655, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.029485754402386943, |
|
"grad_norm": 0.26104092597961426, |
|
"learning_rate": 0.00019891140474316194, |
|
"loss": 1.5094, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.029641763684939253, |
|
"grad_norm": 0.2679871916770935, |
|
"learning_rate": 0.00019889225830928365, |
|
"loss": 1.3535, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.029797772967491566, |
|
"grad_norm": 0.2835332751274109, |
|
"learning_rate": 0.00019887294590200435, |
|
"loss": 1.647, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.029953782250043876, |
|
"grad_norm": 0.2309991866350174, |
|
"learning_rate": 0.00019885346755373656, |
|
"loss": 1.1869, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03010979153259619, |
|
"grad_norm": 0.28801408410072327, |
|
"learning_rate": 0.00019883382329717128, |
|
"loss": 1.4037, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0302658008151485, |
|
"grad_norm": 0.309851735830307, |
|
"learning_rate": 0.00019881401316527793, |
|
"loss": 1.2832, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.030421810097700813, |
|
"grad_norm": 0.27529048919677734, |
|
"learning_rate": 0.0001987940371913044, |
|
"loss": 1.5466, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.030577819380253127, |
|
"grad_norm": 0.25759854912757874, |
|
"learning_rate": 0.00019877389540877687, |
|
"loss": 1.2432, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.030733828662805437, |
|
"grad_norm": 0.27557173371315, |
|
"learning_rate": 0.0001987535878514998, |
|
"loss": 1.5681, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03088983794535775, |
|
"grad_norm": 0.25760918855667114, |
|
"learning_rate": 0.0001987331145535559, |
|
"loss": 1.3067, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03104584722791006, |
|
"grad_norm": 0.299180269241333, |
|
"learning_rate": 0.000198712475549306, |
|
"loss": 1.4642, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.031201856510462374, |
|
"grad_norm": 0.2398681640625, |
|
"learning_rate": 0.00019869167087338907, |
|
"loss": 1.0748, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03135786579301469, |
|
"grad_norm": 0.2560211420059204, |
|
"learning_rate": 0.00019867070056072214, |
|
"loss": 1.2508, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.031513875075566994, |
|
"grad_norm": 0.25509408116340637, |
|
"learning_rate": 0.00019864956464650025, |
|
"loss": 1.4073, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.03166988435811931, |
|
"grad_norm": 0.27500587701797485, |
|
"learning_rate": 0.00019862826316619628, |
|
"loss": 1.3473, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.03182589364067162, |
|
"grad_norm": 0.2923906445503235, |
|
"learning_rate": 0.0001986067961555611, |
|
"loss": 1.4293, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.031981902923223934, |
|
"grad_norm": 0.24456267058849335, |
|
"learning_rate": 0.00019858516365062334, |
|
"loss": 1.2196, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.03213791220577624, |
|
"grad_norm": 0.3021962344646454, |
|
"learning_rate": 0.00019856336568768935, |
|
"loss": 1.5066, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.032293921488328554, |
|
"grad_norm": 0.2485729455947876, |
|
"learning_rate": 0.00019854140230334322, |
|
"loss": 1.2002, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.03244993077088087, |
|
"grad_norm": 0.26055216789245605, |
|
"learning_rate": 0.0001985192735344467, |
|
"loss": 1.3207, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.03260594005343318, |
|
"grad_norm": 0.2658592760562897, |
|
"learning_rate": 0.00019849697941813898, |
|
"loss": 0.9025, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.03276194933598549, |
|
"grad_norm": 0.30481112003326416, |
|
"learning_rate": 0.00019847451999183694, |
|
"loss": 1.5238, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0329179586185378, |
|
"grad_norm": 0.28382736444473267, |
|
"learning_rate": 0.00019845189529323475, |
|
"loss": 1.3224, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.033073967901090115, |
|
"grad_norm": 0.2757686972618103, |
|
"learning_rate": 0.00019842910536030403, |
|
"loss": 1.3672, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.03322997718364243, |
|
"grad_norm": 0.2743508219718933, |
|
"learning_rate": 0.00019840615023129372, |
|
"loss": 1.3628, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.03338598646619474, |
|
"grad_norm": 0.26412197947502136, |
|
"learning_rate": 0.00019838302994472997, |
|
"loss": 1.141, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.03354199574874705, |
|
"grad_norm": 0.2859683632850647, |
|
"learning_rate": 0.0001983597445394162, |
|
"loss": 1.1566, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.03369800503129936, |
|
"grad_norm": 0.24881964921951294, |
|
"learning_rate": 0.00019833629405443284, |
|
"loss": 1.2038, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.033854014313851676, |
|
"grad_norm": 0.25597479939460754, |
|
"learning_rate": 0.0001983126785291375, |
|
"loss": 0.9913, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.03401002359640399, |
|
"grad_norm": 0.26771095395088196, |
|
"learning_rate": 0.00019828889800316466, |
|
"loss": 1.5417, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.034166032878956296, |
|
"grad_norm": 0.2678371071815491, |
|
"learning_rate": 0.00019826495251642578, |
|
"loss": 1.208, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.03432204216150861, |
|
"grad_norm": 0.2947763204574585, |
|
"learning_rate": 0.00019824084210910925, |
|
"loss": 1.3908, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03447805144406092, |
|
"grad_norm": 0.2821643650531769, |
|
"learning_rate": 0.00019821656682168012, |
|
"loss": 1.6573, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.034634060726613236, |
|
"grad_norm": 0.24507346749305725, |
|
"learning_rate": 0.00019819212669488026, |
|
"loss": 1.0647, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.03479007000916554, |
|
"grad_norm": 0.2718466520309448, |
|
"learning_rate": 0.00019816752176972813, |
|
"loss": 1.3013, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.034946079291717856, |
|
"grad_norm": 0.2902746796607971, |
|
"learning_rate": 0.0001981427520875188, |
|
"loss": 1.2212, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.03510208857427017, |
|
"grad_norm": 0.25822389125823975, |
|
"learning_rate": 0.0001981178176898239, |
|
"loss": 1.4543, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03525809785682248, |
|
"grad_norm": 0.3506292700767517, |
|
"learning_rate": 0.00019809271861849145, |
|
"loss": 1.8549, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.03541410713937479, |
|
"grad_norm": 0.2610777020454407, |
|
"learning_rate": 0.00019806745491564586, |
|
"loss": 1.3161, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.0355701164219271, |
|
"grad_norm": 0.29803603887557983, |
|
"learning_rate": 0.0001980420266236878, |
|
"loss": 1.2983, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.03572612570447942, |
|
"grad_norm": 0.24572676420211792, |
|
"learning_rate": 0.0001980164337852943, |
|
"loss": 1.291, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.03588213498703173, |
|
"grad_norm": 0.25573092699050903, |
|
"learning_rate": 0.00019799067644341844, |
|
"loss": 1.3207, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.036038144269584044, |
|
"grad_norm": 0.28766271471977234, |
|
"learning_rate": 0.00019796475464128942, |
|
"loss": 1.4527, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.03619415355213635, |
|
"grad_norm": 0.2636454701423645, |
|
"learning_rate": 0.00019793866842241243, |
|
"loss": 1.3899, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.036350162834688664, |
|
"grad_norm": 0.3094368577003479, |
|
"learning_rate": 0.00019791241783056874, |
|
"loss": 1.2935, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.03650617211724098, |
|
"grad_norm": 0.2588469088077545, |
|
"learning_rate": 0.00019788600290981525, |
|
"loss": 1.2457, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.03666218139979329, |
|
"grad_norm": 0.26457706093788147, |
|
"learning_rate": 0.0001978594237044849, |
|
"loss": 1.1753, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.0368181906823456, |
|
"grad_norm": 0.2559141516685486, |
|
"learning_rate": 0.0001978326802591862, |
|
"loss": 1.2004, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.03697419996489791, |
|
"grad_norm": 0.2815738320350647, |
|
"learning_rate": 0.00019780577261880336, |
|
"loss": 1.3706, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.037130209247450224, |
|
"grad_norm": 0.2584588825702667, |
|
"learning_rate": 0.0001977787008284962, |
|
"loss": 1.4192, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.03728621853000254, |
|
"grad_norm": 0.290865421295166, |
|
"learning_rate": 0.00019775146493369994, |
|
"loss": 1.2308, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.037442227812554844, |
|
"grad_norm": 0.2788088023662567, |
|
"learning_rate": 0.0001977240649801253, |
|
"loss": 1.2095, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03759823709510716, |
|
"grad_norm": 0.28903988003730774, |
|
"learning_rate": 0.00019769650101375837, |
|
"loss": 1.5138, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.03775424637765947, |
|
"grad_norm": 0.29985305666923523, |
|
"learning_rate": 0.00019766877308086036, |
|
"loss": 1.4594, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.037910255660211785, |
|
"grad_norm": 0.3033303916454315, |
|
"learning_rate": 0.00019764088122796783, |
|
"loss": 1.6108, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.03806626494276409, |
|
"grad_norm": 0.2854767143726349, |
|
"learning_rate": 0.0001976128255018924, |
|
"loss": 1.377, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.038222274225316405, |
|
"grad_norm": 0.30725011229515076, |
|
"learning_rate": 0.00019758460594972068, |
|
"loss": 1.2651, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.03837828350786872, |
|
"grad_norm": 0.28218191862106323, |
|
"learning_rate": 0.00019755622261881427, |
|
"loss": 1.4354, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.03853429279042103, |
|
"grad_norm": 0.2794611155986786, |
|
"learning_rate": 0.00019752767555680968, |
|
"loss": 1.4666, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.03869030207297334, |
|
"grad_norm": 0.2824796736240387, |
|
"learning_rate": 0.00019749896481161808, |
|
"loss": 1.3645, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.03884631135552565, |
|
"grad_norm": 0.26165372133255005, |
|
"learning_rate": 0.00019747009043142555, |
|
"loss": 1.3445, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.039002320638077966, |
|
"grad_norm": 0.29985979199409485, |
|
"learning_rate": 0.00019744105246469263, |
|
"loss": 1.4558, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03915832992063028, |
|
"grad_norm": 0.25439903140068054, |
|
"learning_rate": 0.00019741185096015448, |
|
"loss": 1.1075, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.03931433920318259, |
|
"grad_norm": 0.2533755898475647, |
|
"learning_rate": 0.00019738248596682078, |
|
"loss": 1.0891, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.0394703484857349, |
|
"grad_norm": 0.27487608790397644, |
|
"learning_rate": 0.0001973529575339755, |
|
"loss": 1.3128, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.03962635776828721, |
|
"grad_norm": 0.27824172377586365, |
|
"learning_rate": 0.00019732326571117703, |
|
"loss": 1.4045, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.039782367050839526, |
|
"grad_norm": 0.27959418296813965, |
|
"learning_rate": 0.00019729341054825782, |
|
"loss": 1.2169, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.03993837633339184, |
|
"grad_norm": 0.3103275001049042, |
|
"learning_rate": 0.00019726339209532462, |
|
"loss": 1.3043, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.040094385615944146, |
|
"grad_norm": 0.2712806463241577, |
|
"learning_rate": 0.00019723321040275815, |
|
"loss": 1.1747, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.04025039489849646, |
|
"grad_norm": 0.2961602210998535, |
|
"learning_rate": 0.0001972028655212131, |
|
"loss": 1.5744, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.04040640418104877, |
|
"grad_norm": 0.2686194181442261, |
|
"learning_rate": 0.00019717235750161806, |
|
"loss": 1.2442, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.04056241346360109, |
|
"grad_norm": 0.2742723822593689, |
|
"learning_rate": 0.00019714168639517544, |
|
"loss": 1.3225, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04071842274615339, |
|
"grad_norm": 0.28742754459381104, |
|
"learning_rate": 0.00019711085225336132, |
|
"loss": 1.3711, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.04087443202870571, |
|
"grad_norm": 0.30374589562416077, |
|
"learning_rate": 0.00019707985512792543, |
|
"loss": 1.215, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.04103044131125802, |
|
"grad_norm": 0.2738686800003052, |
|
"learning_rate": 0.00019704869507089105, |
|
"loss": 1.4628, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.041186450593810334, |
|
"grad_norm": 0.2695278823375702, |
|
"learning_rate": 0.0001970173721345549, |
|
"loss": 1.4632, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.04134245987636264, |
|
"grad_norm": 0.2954547107219696, |
|
"learning_rate": 0.00019698588637148703, |
|
"loss": 1.2785, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.041498469158914954, |
|
"grad_norm": 0.2756305932998657, |
|
"learning_rate": 0.00019695423783453088, |
|
"loss": 1.4258, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.04165447844146727, |
|
"grad_norm": 0.2642769515514374, |
|
"learning_rate": 0.00019692242657680286, |
|
"loss": 1.3034, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.04181048772401958, |
|
"grad_norm": 0.2760365307331085, |
|
"learning_rate": 0.00019689045265169273, |
|
"loss": 1.5845, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.041966497006571894, |
|
"grad_norm": 0.23845522105693817, |
|
"learning_rate": 0.0001968583161128631, |
|
"loss": 1.113, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.0421225062891242, |
|
"grad_norm": 0.2855961322784424, |
|
"learning_rate": 0.0001968260170142496, |
|
"loss": 1.4019, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.042278515571676514, |
|
"grad_norm": 0.26462671160697937, |
|
"learning_rate": 0.00019679355541006054, |
|
"loss": 1.2425, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.04243452485422883, |
|
"grad_norm": 0.28468820452690125, |
|
"learning_rate": 0.00019676093135477713, |
|
"loss": 1.6525, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.04259053413678114, |
|
"grad_norm": 0.3233076333999634, |
|
"learning_rate": 0.0001967281449031531, |
|
"loss": 1.2168, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.04274654341933345, |
|
"grad_norm": 0.2688952684402466, |
|
"learning_rate": 0.00019669519611021486, |
|
"loss": 1.3948, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.04290255270188576, |
|
"grad_norm": 0.25911059975624084, |
|
"learning_rate": 0.00019666208503126112, |
|
"loss": 1.2875, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.043058561984438075, |
|
"grad_norm": 0.2789272964000702, |
|
"learning_rate": 0.00019662881172186313, |
|
"loss": 1.257, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.04321457126699039, |
|
"grad_norm": 0.26854726672172546, |
|
"learning_rate": 0.00019659537623786428, |
|
"loss": 1.4554, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.043370580549542695, |
|
"grad_norm": 0.31813284754753113, |
|
"learning_rate": 0.00019656177863538026, |
|
"loss": 1.667, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.04352658983209501, |
|
"grad_norm": 0.2801772356033325, |
|
"learning_rate": 0.00019652801897079869, |
|
"loss": 1.4555, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.04368259911464732, |
|
"grad_norm": 0.30256757140159607, |
|
"learning_rate": 0.00019649409730077935, |
|
"loss": 1.2628, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.043838608397199635, |
|
"grad_norm": 0.2807087302207947, |
|
"learning_rate": 0.00019646001368225382, |
|
"loss": 1.5143, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.04399461767975194, |
|
"grad_norm": 0.27217531204223633, |
|
"learning_rate": 0.0001964257681724255, |
|
"loss": 1.5372, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.044150626962304255, |
|
"grad_norm": 0.2996511459350586, |
|
"learning_rate": 0.00019639136082876953, |
|
"loss": 1.2692, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.04430663624485657, |
|
"grad_norm": 0.263231098651886, |
|
"learning_rate": 0.00019635679170903258, |
|
"loss": 1.2328, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.04446264552740888, |
|
"grad_norm": 0.3060413897037506, |
|
"learning_rate": 0.00019632206087123296, |
|
"loss": 1.5173, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.044618654809961196, |
|
"grad_norm": 0.25136467814445496, |
|
"learning_rate": 0.00019628716837366027, |
|
"loss": 1.1781, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.0447746640925135, |
|
"grad_norm": 0.27105534076690674, |
|
"learning_rate": 0.00019625211427487548, |
|
"loss": 1.4542, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.044930673375065816, |
|
"grad_norm": 0.27552956342697144, |
|
"learning_rate": 0.00019621689863371083, |
|
"loss": 1.3352, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.04508668265761813, |
|
"grad_norm": 0.26462072134017944, |
|
"learning_rate": 0.00019618152150926955, |
|
"loss": 1.2531, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.04524269194017044, |
|
"grad_norm": 0.2736480236053467, |
|
"learning_rate": 0.000196145982960926, |
|
"loss": 1.402, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.04539870122272275, |
|
"grad_norm": 0.2739974856376648, |
|
"learning_rate": 0.00019611028304832546, |
|
"loss": 1.4881, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.04555471050527506, |
|
"grad_norm": 0.25353673100471497, |
|
"learning_rate": 0.000196074421831384, |
|
"loss": 1.3935, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.04571071978782738, |
|
"grad_norm": 0.2595098614692688, |
|
"learning_rate": 0.00019603839937028838, |
|
"loss": 1.3306, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.04586672907037969, |
|
"grad_norm": 0.27779051661491394, |
|
"learning_rate": 0.00019600221572549606, |
|
"loss": 1.5111, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.046022738352932, |
|
"grad_norm": 0.26458942890167236, |
|
"learning_rate": 0.00019596587095773495, |
|
"loss": 1.1354, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.04617874763548431, |
|
"grad_norm": 0.3711000084877014, |
|
"learning_rate": 0.00019592936512800342, |
|
"loss": 1.387, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.046334756918036624, |
|
"grad_norm": 0.26172423362731934, |
|
"learning_rate": 0.00019589269829757008, |
|
"loss": 1.1995, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.04649076620058894, |
|
"grad_norm": 0.30684447288513184, |
|
"learning_rate": 0.00019585587052797389, |
|
"loss": 1.2853, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.046646775483141244, |
|
"grad_norm": 0.27383920550346375, |
|
"learning_rate": 0.00019581888188102375, |
|
"loss": 1.1397, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.04680278476569356, |
|
"grad_norm": 0.28926682472229004, |
|
"learning_rate": 0.00019578173241879872, |
|
"loss": 1.2977, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04695879404824587, |
|
"grad_norm": 0.2573678195476532, |
|
"learning_rate": 0.00019574442220364767, |
|
"loss": 1.315, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.047114803330798184, |
|
"grad_norm": 0.286785751581192, |
|
"learning_rate": 0.00019570695129818926, |
|
"loss": 1.196, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.0472708126133505, |
|
"grad_norm": 0.26392433047294617, |
|
"learning_rate": 0.0001956693197653119, |
|
"loss": 1.067, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.047426821895902804, |
|
"grad_norm": 0.29351645708084106, |
|
"learning_rate": 0.00019563152766817354, |
|
"loss": 1.2977, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.04758283117845512, |
|
"grad_norm": 0.3556276857852936, |
|
"learning_rate": 0.00019559357507020162, |
|
"loss": 1.1268, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.04773884046100743, |
|
"grad_norm": 0.3044413924217224, |
|
"learning_rate": 0.00019555546203509297, |
|
"loss": 1.3528, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.047894849743559745, |
|
"grad_norm": 0.25455671548843384, |
|
"learning_rate": 0.00019551718862681364, |
|
"loss": 1.2099, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.04805085902611205, |
|
"grad_norm": 0.2863021492958069, |
|
"learning_rate": 0.00019547875490959885, |
|
"loss": 1.514, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.048206868308664365, |
|
"grad_norm": 0.2713131010532379, |
|
"learning_rate": 0.00019544016094795295, |
|
"loss": 1.2479, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.04836287759121668, |
|
"grad_norm": 0.28673309087753296, |
|
"learning_rate": 0.00019540140680664913, |
|
"loss": 1.4822, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04851888687376899, |
|
"grad_norm": 0.28506314754486084, |
|
"learning_rate": 0.00019536249255072948, |
|
"loss": 1.1714, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.0486748961563213, |
|
"grad_norm": 0.2814370393753052, |
|
"learning_rate": 0.00019532341824550479, |
|
"loss": 1.3045, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.04883090543887361, |
|
"grad_norm": 0.2505611181259155, |
|
"learning_rate": 0.0001952841839565544, |
|
"loss": 1.1565, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.048986914721425925, |
|
"grad_norm": 0.27159830927848816, |
|
"learning_rate": 0.0001952447897497263, |
|
"loss": 1.0939, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.04914292400397824, |
|
"grad_norm": 0.27552008628845215, |
|
"learning_rate": 0.00019520523569113677, |
|
"loss": 1.4382, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.049298933286530545, |
|
"grad_norm": 0.2567708492279053, |
|
"learning_rate": 0.00019516552184717037, |
|
"loss": 1.2241, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.04945494256908286, |
|
"grad_norm": 0.27663713693618774, |
|
"learning_rate": 0.00019512564828447988, |
|
"loss": 1.2449, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.04961095185163517, |
|
"grad_norm": 0.2683660089969635, |
|
"learning_rate": 0.0001950856150699861, |
|
"loss": 1.1652, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.049766961134187486, |
|
"grad_norm": 0.25226572155952454, |
|
"learning_rate": 0.0001950454222708778, |
|
"loss": 1.1307, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.0499229704167398, |
|
"grad_norm": 0.23380513489246368, |
|
"learning_rate": 0.0001950050699546116, |
|
"loss": 1.1257, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.050078979699292106, |
|
"grad_norm": 0.2385280281305313, |
|
"learning_rate": 0.0001949645581889118, |
|
"loss": 0.9917, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.05023498898184442, |
|
"grad_norm": 0.23746567964553833, |
|
"learning_rate": 0.00019492388704177036, |
|
"loss": 1.1364, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.05039099826439673, |
|
"grad_norm": 0.27820831537246704, |
|
"learning_rate": 0.00019488305658144667, |
|
"loss": 1.3707, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.050547007546949047, |
|
"grad_norm": 0.2663419544696808, |
|
"learning_rate": 0.00019484206687646753, |
|
"loss": 1.3662, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.05070301682950135, |
|
"grad_norm": 0.27196773886680603, |
|
"learning_rate": 0.00019480091799562704, |
|
"loss": 1.2766, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05085902611205367, |
|
"grad_norm": 0.296779602766037, |
|
"learning_rate": 0.00019475961000798645, |
|
"loss": 1.5789, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.05101503539460598, |
|
"grad_norm": 0.3267677128314972, |
|
"learning_rate": 0.0001947181429828739, |
|
"loss": 1.2782, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.051171044677158294, |
|
"grad_norm": 0.2852894067764282, |
|
"learning_rate": 0.00019467651698988462, |
|
"loss": 1.1466, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.0513270539597106, |
|
"grad_norm": 0.2959722876548767, |
|
"learning_rate": 0.0001946347320988806, |
|
"loss": 1.1929, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.051483063242262914, |
|
"grad_norm": 0.25998443365097046, |
|
"learning_rate": 0.00019459278837999046, |
|
"loss": 1.4104, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05163907252481523, |
|
"grad_norm": 0.27319809794425964, |
|
"learning_rate": 0.00019455068590360942, |
|
"loss": 1.417, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.05179508180736754, |
|
"grad_norm": 0.22395959496498108, |
|
"learning_rate": 0.00019450842474039913, |
|
"loss": 1.2159, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.05195109108991985, |
|
"grad_norm": 0.24947980046272278, |
|
"learning_rate": 0.00019446600496128758, |
|
"loss": 1.1063, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.05210710037247216, |
|
"grad_norm": 0.235429584980011, |
|
"learning_rate": 0.00019442342663746902, |
|
"loss": 1.2234, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.052263109655024474, |
|
"grad_norm": 0.27443963289260864, |
|
"learning_rate": 0.00019438068984040365, |
|
"loss": 1.2038, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.05241911893757679, |
|
"grad_norm": 0.26688772439956665, |
|
"learning_rate": 0.00019433779464181778, |
|
"loss": 1.2956, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.052575128220129094, |
|
"grad_norm": 0.23804551362991333, |
|
"learning_rate": 0.00019429474111370352, |
|
"loss": 0.9525, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.05273113750268141, |
|
"grad_norm": 0.262890487909317, |
|
"learning_rate": 0.0001942515293283187, |
|
"loss": 1.2713, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.05288714678523372, |
|
"grad_norm": 0.29796820878982544, |
|
"learning_rate": 0.00019420815935818672, |
|
"loss": 1.5058, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.053043156067786035, |
|
"grad_norm": 0.275143563747406, |
|
"learning_rate": 0.00019416463127609656, |
|
"loss": 1.2604, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05319916535033835, |
|
"grad_norm": 0.27801284193992615, |
|
"learning_rate": 0.00019412094515510248, |
|
"loss": 1.2588, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.053355174632890655, |
|
"grad_norm": 0.2604374885559082, |
|
"learning_rate": 0.00019407710106852404, |
|
"loss": 1.1432, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.05351118391544297, |
|
"grad_norm": 0.2863079011440277, |
|
"learning_rate": 0.00019403309908994586, |
|
"loss": 1.4854, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.05366719319799528, |
|
"grad_norm": 0.2515758275985718, |
|
"learning_rate": 0.00019398893929321761, |
|
"loss": 1.1682, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.053823202480547595, |
|
"grad_norm": 0.27037686109542847, |
|
"learning_rate": 0.00019394462175245381, |
|
"loss": 1.3679, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.0539792117630999, |
|
"grad_norm": 0.2368054836988449, |
|
"learning_rate": 0.00019390014654203369, |
|
"loss": 1.1406, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.054135221045652215, |
|
"grad_norm": 0.27759966254234314, |
|
"learning_rate": 0.0001938555137366011, |
|
"loss": 1.1669, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.05429123032820453, |
|
"grad_norm": 0.3004835546016693, |
|
"learning_rate": 0.00019381072341106452, |
|
"loss": 1.4811, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.05444723961075684, |
|
"grad_norm": 0.30656251311302185, |
|
"learning_rate": 0.0001937657756405966, |
|
"loss": 1.515, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.05460324889330915, |
|
"grad_norm": 0.31442925333976746, |
|
"learning_rate": 0.00019372067050063438, |
|
"loss": 1.4848, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05475925817586146, |
|
"grad_norm": 0.2230207473039627, |
|
"learning_rate": 0.00019367540806687893, |
|
"loss": 0.9535, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.054915267458413776, |
|
"grad_norm": 0.2552795708179474, |
|
"learning_rate": 0.0001936299884152954, |
|
"loss": 1.2254, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.05507127674096609, |
|
"grad_norm": 0.29775241017341614, |
|
"learning_rate": 0.0001935844116221127, |
|
"loss": 1.3821, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.055227286023518396, |
|
"grad_norm": 0.24480530619621277, |
|
"learning_rate": 0.00019353867776382354, |
|
"loss": 1.1073, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.05538329530607071, |
|
"grad_norm": 0.2612270414829254, |
|
"learning_rate": 0.00019349278691718427, |
|
"loss": 1.3114, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.05553930458862302, |
|
"grad_norm": 0.307085245847702, |
|
"learning_rate": 0.0001934467391592146, |
|
"loss": 1.3602, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.055695313871175336, |
|
"grad_norm": 0.2688599228858948, |
|
"learning_rate": 0.00019340053456719768, |
|
"loss": 1.4347, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.05585132315372765, |
|
"grad_norm": 0.25372791290283203, |
|
"learning_rate": 0.00019335417321867987, |
|
"loss": 1.3468, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.05600733243627996, |
|
"grad_norm": 0.2706502377986908, |
|
"learning_rate": 0.0001933076551914706, |
|
"loss": 1.4489, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.05616334171883227, |
|
"grad_norm": 0.22997525334358215, |
|
"learning_rate": 0.00019326098056364222, |
|
"loss": 1.1305, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.056319351001384584, |
|
"grad_norm": 0.30573347210884094, |
|
"learning_rate": 0.00019321414941353003, |
|
"loss": 1.4231, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.0564753602839369, |
|
"grad_norm": 0.30873847007751465, |
|
"learning_rate": 0.00019316716181973188, |
|
"loss": 1.3478, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.056631369566489204, |
|
"grad_norm": 0.2514902651309967, |
|
"learning_rate": 0.00019312001786110828, |
|
"loss": 1.2094, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.05678737884904152, |
|
"grad_norm": 0.26067742705345154, |
|
"learning_rate": 0.00019307271761678213, |
|
"loss": 1.5841, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.05694338813159383, |
|
"grad_norm": 0.23508694767951965, |
|
"learning_rate": 0.00019302526116613864, |
|
"loss": 1.103, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.057099397414146144, |
|
"grad_norm": 0.24878567457199097, |
|
"learning_rate": 0.00019297764858882514, |
|
"loss": 1.0968, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.05725540669669845, |
|
"grad_norm": 0.23707476258277893, |
|
"learning_rate": 0.00019292987996475113, |
|
"loss": 1.0831, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.057411415979250764, |
|
"grad_norm": 0.2691617012023926, |
|
"learning_rate": 0.0001928819553740878, |
|
"loss": 1.2254, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.05756742526180308, |
|
"grad_norm": 0.26831138134002686, |
|
"learning_rate": 0.00019283387489726827, |
|
"loss": 1.3084, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.05772343454435539, |
|
"grad_norm": 0.281770259141922, |
|
"learning_rate": 0.00019278563861498723, |
|
"loss": 1.3377, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0578794438269077, |
|
"grad_norm": 0.2634589970111847, |
|
"learning_rate": 0.00019273724660820088, |
|
"loss": 1.2648, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.05803545310946001, |
|
"grad_norm": 0.27592259645462036, |
|
"learning_rate": 0.00019268869895812672, |
|
"loss": 1.2751, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.058191462392012325, |
|
"grad_norm": 0.23107245564460754, |
|
"learning_rate": 0.00019263999574624355, |
|
"loss": 1.2651, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.05834747167456464, |
|
"grad_norm": 0.2582552134990692, |
|
"learning_rate": 0.0001925911370542912, |
|
"loss": 1.4914, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.05850348095711695, |
|
"grad_norm": 0.27152058482170105, |
|
"learning_rate": 0.00019254212296427044, |
|
"loss": 1.2227, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.05865949023966926, |
|
"grad_norm": 0.23554329574108124, |
|
"learning_rate": 0.00019249295355844285, |
|
"loss": 1.4113, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.05881549952222157, |
|
"grad_norm": 0.2793971300125122, |
|
"learning_rate": 0.00019244362891933077, |
|
"loss": 1.3325, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.058971508804773885, |
|
"grad_norm": 0.2800885736942291, |
|
"learning_rate": 0.00019239414912971696, |
|
"loss": 1.358, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.0591275180873262, |
|
"grad_norm": 0.27139201760292053, |
|
"learning_rate": 0.0001923445142726446, |
|
"loss": 1.2269, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.059283527369878505, |
|
"grad_norm": 0.276579886674881, |
|
"learning_rate": 0.0001922947244314172, |
|
"loss": 1.1521, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.05943953665243082, |
|
"grad_norm": 0.28917452692985535, |
|
"learning_rate": 0.0001922447796895982, |
|
"loss": 1.2803, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.05959554593498313, |
|
"grad_norm": 0.28668197989463806, |
|
"learning_rate": 0.00019219468013101124, |
|
"loss": 1.4025, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.059751555217535446, |
|
"grad_norm": 0.2973851263523102, |
|
"learning_rate": 0.00019214442583973966, |
|
"loss": 1.5472, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.05990756450008775, |
|
"grad_norm": 0.25934460759162903, |
|
"learning_rate": 0.00019209401690012653, |
|
"loss": 1.2496, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.060063573782640066, |
|
"grad_norm": 0.22885724902153015, |
|
"learning_rate": 0.00019204345339677442, |
|
"loss": 1.2088, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.06021958306519238, |
|
"grad_norm": 0.28346025943756104, |
|
"learning_rate": 0.00019199273541454538, |
|
"loss": 1.1561, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.06037559234774469, |
|
"grad_norm": 0.2574789822101593, |
|
"learning_rate": 0.00019194186303856067, |
|
"loss": 1.3209, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.060531601630297, |
|
"grad_norm": 0.26535728573799133, |
|
"learning_rate": 0.00019189083635420075, |
|
"loss": 1.3022, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.06068761091284931, |
|
"grad_norm": 0.2844642698764801, |
|
"learning_rate": 0.00019183965544710495, |
|
"loss": 1.3881, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.060843620195401626, |
|
"grad_norm": 0.24562187492847443, |
|
"learning_rate": 0.00019178832040317155, |
|
"loss": 1.159, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06099962947795394, |
|
"grad_norm": 0.25778669118881226, |
|
"learning_rate": 0.0001917368313085574, |
|
"loss": 1.5154, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.061155638760506253, |
|
"grad_norm": 0.22877171635627747, |
|
"learning_rate": 0.00019168518824967795, |
|
"loss": 1.201, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.06131164804305856, |
|
"grad_norm": 0.2764502465724945, |
|
"learning_rate": 0.00019163339131320718, |
|
"loss": 1.4165, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.061467657325610874, |
|
"grad_norm": 0.23493847250938416, |
|
"learning_rate": 0.00019158144058607708, |
|
"loss": 1.1334, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.06162366660816319, |
|
"grad_norm": 0.2605098783969879, |
|
"learning_rate": 0.00019152933615547798, |
|
"loss": 1.1613, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.0617796758907155, |
|
"grad_norm": 0.23720701038837433, |
|
"learning_rate": 0.000191477078108858, |
|
"loss": 1.1966, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.06193568517326781, |
|
"grad_norm": 0.27043676376342773, |
|
"learning_rate": 0.00019142466653392318, |
|
"loss": 1.2793, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.06209169445582012, |
|
"grad_norm": 0.27630025148391724, |
|
"learning_rate": 0.0001913721015186372, |
|
"loss": 1.3858, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.062247703738372434, |
|
"grad_norm": 0.29454129934310913, |
|
"learning_rate": 0.0001913193831512213, |
|
"loss": 1.5234, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.06240371302092475, |
|
"grad_norm": 0.26943233609199524, |
|
"learning_rate": 0.00019126651152015403, |
|
"loss": 1.3181, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06255972230347706, |
|
"grad_norm": 0.28831520676612854, |
|
"learning_rate": 0.0001912134867141712, |
|
"loss": 1.46, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.06271573158602937, |
|
"grad_norm": 0.26342567801475525, |
|
"learning_rate": 0.0001911603088222657, |
|
"loss": 1.4073, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.06287174086858167, |
|
"grad_norm": 0.2623300552368164, |
|
"learning_rate": 0.0001911069779336873, |
|
"loss": 1.3473, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.06302775015113399, |
|
"grad_norm": 0.25125861167907715, |
|
"learning_rate": 0.00019105349413794272, |
|
"loss": 1.0346, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.0631837594336863, |
|
"grad_norm": 0.30890092253685, |
|
"learning_rate": 0.00019099985752479506, |
|
"loss": 1.5751, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.06333976871623861, |
|
"grad_norm": 0.31404733657836914, |
|
"learning_rate": 0.00019094606818426403, |
|
"loss": 1.5458, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.06349577799879093, |
|
"grad_norm": 0.2684463858604431, |
|
"learning_rate": 0.00019089212620662568, |
|
"loss": 1.2342, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.06365178728134324, |
|
"grad_norm": 0.2748461365699768, |
|
"learning_rate": 0.00019083803168241223, |
|
"loss": 1.3353, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.06380779656389556, |
|
"grad_norm": 0.3061840832233429, |
|
"learning_rate": 0.00019078378470241183, |
|
"loss": 1.3197, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.06396380584644787, |
|
"grad_norm": 0.25601011514663696, |
|
"learning_rate": 0.00019072938535766865, |
|
"loss": 1.3904, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06411981512900018, |
|
"grad_norm": 0.2844060957431793, |
|
"learning_rate": 0.00019067483373948243, |
|
"loss": 1.42, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.06427582441155248, |
|
"grad_norm": 0.2969295382499695, |
|
"learning_rate": 0.00019062012993940859, |
|
"loss": 1.4255, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.0644318336941048, |
|
"grad_norm": 0.2655050456523895, |
|
"learning_rate": 0.00019056527404925789, |
|
"loss": 1.1618, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.06458784297665711, |
|
"grad_norm": 0.2571544349193573, |
|
"learning_rate": 0.00019051026616109638, |
|
"loss": 1.2064, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.06474385225920942, |
|
"grad_norm": 0.29847028851509094, |
|
"learning_rate": 0.0001904551063672452, |
|
"loss": 1.2847, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.06489986154176174, |
|
"grad_norm": 0.24265627562999725, |
|
"learning_rate": 0.00019039979476028043, |
|
"loss": 1.2745, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.06505587082431405, |
|
"grad_norm": 0.24038730561733246, |
|
"learning_rate": 0.000190344331433033, |
|
"loss": 1.2761, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.06521188010686636, |
|
"grad_norm": 0.26194193959236145, |
|
"learning_rate": 0.00019028871647858834, |
|
"loss": 1.5021, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.06536788938941868, |
|
"grad_norm": 0.2636980712413788, |
|
"learning_rate": 0.00019023294999028653, |
|
"loss": 1.5029, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.06552389867197098, |
|
"grad_norm": 0.26995277404785156, |
|
"learning_rate": 0.00019017703206172185, |
|
"loss": 1.3068, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.06567990795452329, |
|
"grad_norm": 0.26835623383522034, |
|
"learning_rate": 0.0001901209627867428, |
|
"loss": 1.2868, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.0658359172370756, |
|
"grad_norm": 0.24785400927066803, |
|
"learning_rate": 0.0001900647422594519, |
|
"loss": 1.1875, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.06599192651962792, |
|
"grad_norm": 0.3184250593185425, |
|
"learning_rate": 0.0001900083705742054, |
|
"loss": 1.3802, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.06614793580218023, |
|
"grad_norm": 0.2850029766559601, |
|
"learning_rate": 0.00018995184782561345, |
|
"loss": 1.3043, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.06630394508473254, |
|
"grad_norm": 0.2940841317176819, |
|
"learning_rate": 0.00018989517410853955, |
|
"loss": 1.287, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.06645995436728486, |
|
"grad_norm": 0.2668844163417816, |
|
"learning_rate": 0.0001898383495181007, |
|
"loss": 1.3723, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.06661596364983717, |
|
"grad_norm": 0.2814147472381592, |
|
"learning_rate": 0.00018978137414966698, |
|
"loss": 1.2339, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.06677197293238948, |
|
"grad_norm": 0.3722403049468994, |
|
"learning_rate": 0.0001897242480988617, |
|
"loss": 1.2755, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.06692798221494178, |
|
"grad_norm": 0.2689428925514221, |
|
"learning_rate": 0.00018966697146156092, |
|
"loss": 1.4238, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.0670839914974941, |
|
"grad_norm": 0.29616808891296387, |
|
"learning_rate": 0.00018960954433389345, |
|
"loss": 1.3167, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.06724000078004641, |
|
"grad_norm": 0.2477925419807434, |
|
"learning_rate": 0.0001895519668122408, |
|
"loss": 1.1773, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.06739601006259872, |
|
"grad_norm": 0.23961544036865234, |
|
"learning_rate": 0.0001894942389932367, |
|
"loss": 1.1387, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.06755201934515104, |
|
"grad_norm": 0.26128751039505005, |
|
"learning_rate": 0.00018943636097376726, |
|
"loss": 1.0468, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.06770802862770335, |
|
"grad_norm": 0.33279022574424744, |
|
"learning_rate": 0.00018937833285097066, |
|
"loss": 1.8791, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.06786403791025566, |
|
"grad_norm": 0.2876769006252289, |
|
"learning_rate": 0.00018932015472223693, |
|
"loss": 1.3633, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.06802004719280798, |
|
"grad_norm": 0.24108922481536865, |
|
"learning_rate": 0.00018926182668520792, |
|
"loss": 1.2012, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.06817605647536028, |
|
"grad_norm": 0.29062169790267944, |
|
"learning_rate": 0.0001892033488377771, |
|
"loss": 1.3658, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.06833206575791259, |
|
"grad_norm": 0.26536259055137634, |
|
"learning_rate": 0.0001891447212780893, |
|
"loss": 1.2464, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.0684880750404649, |
|
"grad_norm": 0.2940811514854431, |
|
"learning_rate": 0.0001890859441045407, |
|
"loss": 1.4609, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.06864408432301722, |
|
"grad_norm": 0.27625903487205505, |
|
"learning_rate": 0.0001890270174157784, |
|
"loss": 1.4098, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.06880009360556953, |
|
"grad_norm": 0.2586573362350464, |
|
"learning_rate": 0.00018896794131070073, |
|
"loss": 1.3857, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.06895610288812185, |
|
"grad_norm": 0.28287774324417114, |
|
"learning_rate": 0.0001889087158884565, |
|
"loss": 1.2967, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.06911211217067416, |
|
"grad_norm": 0.2692122459411621, |
|
"learning_rate": 0.00018884934124844532, |
|
"loss": 1.5216, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.06926812145322647, |
|
"grad_norm": 0.3004090189933777, |
|
"learning_rate": 0.00018878981749031716, |
|
"loss": 1.1913, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.06942413073577879, |
|
"grad_norm": 0.253542423248291, |
|
"learning_rate": 0.00018873014471397224, |
|
"loss": 1.1299, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.06958014001833109, |
|
"grad_norm": 0.3034575283527374, |
|
"learning_rate": 0.00018867032301956088, |
|
"loss": 1.3577, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.0697361493008834, |
|
"grad_norm": 0.31302767992019653, |
|
"learning_rate": 0.00018861035250748343, |
|
"loss": 1.6029, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.06989215858343571, |
|
"grad_norm": 0.26993393898010254, |
|
"learning_rate": 0.00018855023327838983, |
|
"loss": 1.2035, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07004816786598803, |
|
"grad_norm": 0.27148422598838806, |
|
"learning_rate": 0.00018848996543317982, |
|
"loss": 1.5843, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.07020417714854034, |
|
"grad_norm": 0.2631765305995941, |
|
"learning_rate": 0.00018842954907300236, |
|
"loss": 1.2641, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07036018643109265, |
|
"grad_norm": 0.2621013820171356, |
|
"learning_rate": 0.00018836898429925585, |
|
"loss": 1.2167, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.07051619571364497, |
|
"grad_norm": 0.25064215064048767, |
|
"learning_rate": 0.0001883082712135877, |
|
"loss": 1.2631, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.07067220499619728, |
|
"grad_norm": 0.2558056712150574, |
|
"learning_rate": 0.00018824740991789415, |
|
"loss": 0.9964, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.07082821427874958, |
|
"grad_norm": 0.2675093412399292, |
|
"learning_rate": 0.00018818640051432035, |
|
"loss": 1.4953, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.07098422356130189, |
|
"grad_norm": 0.2550821006298065, |
|
"learning_rate": 0.0001881252431052599, |
|
"loss": 1.1283, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.0711402328438542, |
|
"grad_norm": 0.24893717467784882, |
|
"learning_rate": 0.00018806393779335483, |
|
"loss": 1.1725, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.07129624212640652, |
|
"grad_norm": 0.24471914768218994, |
|
"learning_rate": 0.00018800248468149543, |
|
"loss": 1.19, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.07145225140895883, |
|
"grad_norm": 0.27745166420936584, |
|
"learning_rate": 0.00018794088387282, |
|
"loss": 1.6347, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.07160826069151115, |
|
"grad_norm": 0.2930917739868164, |
|
"learning_rate": 0.00018787913547071484, |
|
"loss": 1.5139, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.07176426997406346, |
|
"grad_norm": 0.2656380534172058, |
|
"learning_rate": 0.00018781723957881372, |
|
"loss": 1.1726, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07192027925661577, |
|
"grad_norm": 0.27983731031417847, |
|
"learning_rate": 0.0001877551963009982, |
|
"loss": 1.3818, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.07207628853916809, |
|
"grad_norm": 0.2744976580142975, |
|
"learning_rate": 0.0001876930057413971, |
|
"loss": 1.2756, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.07223229782172039, |
|
"grad_norm": 0.2684760093688965, |
|
"learning_rate": 0.00018763066800438636, |
|
"loss": 1.2302, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.0723883071042727, |
|
"grad_norm": 0.25079357624053955, |
|
"learning_rate": 0.00018756818319458907, |
|
"loss": 1.1575, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.07254431638682501, |
|
"grad_norm": 0.2802796959877014, |
|
"learning_rate": 0.000187505551416875, |
|
"loss": 1.3711, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.07270032566937733, |
|
"grad_norm": 0.7640414237976074, |
|
"learning_rate": 0.0001874427727763607, |
|
"loss": 1.3431, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.07285633495192964, |
|
"grad_norm": 0.265717089176178, |
|
"learning_rate": 0.0001873798473784092, |
|
"loss": 1.1778, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.07301234423448195, |
|
"grad_norm": 0.23273074626922607, |
|
"learning_rate": 0.00018731677532862976, |
|
"loss": 1.02, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.07316835351703427, |
|
"grad_norm": 0.248812735080719, |
|
"learning_rate": 0.00018725355673287778, |
|
"loss": 1.1423, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.07332436279958658, |
|
"grad_norm": 0.24919858574867249, |
|
"learning_rate": 0.00018719019169725472, |
|
"loss": 1.2377, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.07348037208213888, |
|
"grad_norm": 0.25503799319267273, |
|
"learning_rate": 0.00018712668032810768, |
|
"loss": 1.3236, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.0736363813646912, |
|
"grad_norm": 0.28893566131591797, |
|
"learning_rate": 0.00018706302273202943, |
|
"loss": 1.4662, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.07379239064724351, |
|
"grad_norm": 0.2384706735610962, |
|
"learning_rate": 0.00018699921901585813, |
|
"loss": 1.2817, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.07394839992979582, |
|
"grad_norm": 0.2527397572994232, |
|
"learning_rate": 0.0001869352692866772, |
|
"loss": 1.1766, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.07410440921234814, |
|
"grad_norm": 0.25340378284454346, |
|
"learning_rate": 0.00018687117365181512, |
|
"loss": 1.1876, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.07426041849490045, |
|
"grad_norm": 0.2570219039916992, |
|
"learning_rate": 0.00018680693221884517, |
|
"loss": 1.3472, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.07441642777745276, |
|
"grad_norm": 0.25267085433006287, |
|
"learning_rate": 0.00018674254509558544, |
|
"loss": 1.5048, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.07457243706000508, |
|
"grad_norm": 0.24603790044784546, |
|
"learning_rate": 0.00018667801239009846, |
|
"loss": 1.276, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.07472844634255738, |
|
"grad_norm": 0.2434520423412323, |
|
"learning_rate": 0.00018661333421069113, |
|
"loss": 1.3999, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.07488445562510969, |
|
"grad_norm": 0.27032792568206787, |
|
"learning_rate": 0.00018654851066591448, |
|
"loss": 1.3909, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.075040464907662, |
|
"grad_norm": 0.26559844613075256, |
|
"learning_rate": 0.00018648354186456348, |
|
"loss": 1.2931, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.07519647419021432, |
|
"grad_norm": 0.2563202679157257, |
|
"learning_rate": 0.000186418427915677, |
|
"loss": 1.2773, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.07535248347276663, |
|
"grad_norm": 0.2463751882314682, |
|
"learning_rate": 0.00018635316892853741, |
|
"loss": 1.4017, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.07550849275531894, |
|
"grad_norm": 0.26452189683914185, |
|
"learning_rate": 0.00018628776501267052, |
|
"loss": 1.2236, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.07566450203787126, |
|
"grad_norm": 0.48540955781936646, |
|
"learning_rate": 0.0001862222162778454, |
|
"loss": 1.1676, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.07582051132042357, |
|
"grad_norm": 0.2931404411792755, |
|
"learning_rate": 0.0001861565228340742, |
|
"loss": 1.3877, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.07597652060297588, |
|
"grad_norm": 0.2707270383834839, |
|
"learning_rate": 0.00018609068479161182, |
|
"loss": 1.2828, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.07613252988552818, |
|
"grad_norm": 0.25902295112609863, |
|
"learning_rate": 0.00018602470226095603, |
|
"loss": 1.2393, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.0762885391680805, |
|
"grad_norm": 0.27907291054725647, |
|
"learning_rate": 0.00018595857535284692, |
|
"loss": 1.1944, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.07644454845063281, |
|
"grad_norm": 0.3079850375652313, |
|
"learning_rate": 0.00018589230417826697, |
|
"loss": 1.3686, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.07660055773318512, |
|
"grad_norm": 0.250303715467453, |
|
"learning_rate": 0.00018582588884844084, |
|
"loss": 1.2497, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.07675656701573744, |
|
"grad_norm": 0.260257750749588, |
|
"learning_rate": 0.00018575932947483502, |
|
"loss": 1.4186, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.07691257629828975, |
|
"grad_norm": 0.2537723481655121, |
|
"learning_rate": 0.00018569262616915784, |
|
"loss": 1.28, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.07706858558084206, |
|
"grad_norm": 0.21861004829406738, |
|
"learning_rate": 0.00018562577904335912, |
|
"loss": 0.9705, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.07722459486339438, |
|
"grad_norm": 0.322566956281662, |
|
"learning_rate": 0.00018555878820963013, |
|
"loss": 1.4941, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.07738060414594668, |
|
"grad_norm": 0.24904873967170715, |
|
"learning_rate": 0.00018549165378040327, |
|
"loss": 1.2277, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.07753661342849899, |
|
"grad_norm": 0.2692057490348816, |
|
"learning_rate": 0.00018542437586835202, |
|
"loss": 1.3786, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.0776926227110513, |
|
"grad_norm": 0.27876508235931396, |
|
"learning_rate": 0.00018535695458639056, |
|
"loss": 1.3822, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.07784863199360362, |
|
"grad_norm": 0.2497495859861374, |
|
"learning_rate": 0.00018528939004767376, |
|
"loss": 1.1872, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.07800464127615593, |
|
"grad_norm": 0.28155678510665894, |
|
"learning_rate": 0.00018522168236559695, |
|
"loss": 1.2253, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07800464127615593, |
|
"eval_loss": 1.3168833255767822, |
|
"eval_runtime": 110.9584, |
|
"eval_samples_per_second": 38.51, |
|
"eval_steps_per_second": 4.822, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07816065055870824, |
|
"grad_norm": 0.25162461400032043, |
|
"learning_rate": 0.0001851538316537956, |
|
"loss": 1.2308, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.07831665984126056, |
|
"grad_norm": 0.33541133999824524, |
|
"learning_rate": 0.0001850858380261453, |
|
"loss": 1.2788, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.07847266912381287, |
|
"grad_norm": 0.29069721698760986, |
|
"learning_rate": 0.00018501770159676156, |
|
"loss": 1.4186, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.07862867840636519, |
|
"grad_norm": 0.24337412416934967, |
|
"learning_rate": 0.0001849494224799994, |
|
"loss": 1.2268, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.07878468768891748, |
|
"grad_norm": 0.2503622770309448, |
|
"learning_rate": 0.00018488100079045344, |
|
"loss": 1.1121, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.0789406969714698, |
|
"grad_norm": 0.3061240017414093, |
|
"learning_rate": 0.0001848124366429576, |
|
"loss": 1.4207, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.07909670625402211, |
|
"grad_norm": 0.3209320902824402, |
|
"learning_rate": 0.00018474373015258473, |
|
"loss": 1.3531, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.07925271553657443, |
|
"grad_norm": 0.26510298252105713, |
|
"learning_rate": 0.0001846748814346468, |
|
"loss": 1.1614, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.07940872481912674, |
|
"grad_norm": 0.24753335118293762, |
|
"learning_rate": 0.00018460589060469425, |
|
"loss": 1.2711, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.07956473410167905, |
|
"grad_norm": 0.2837298512458801, |
|
"learning_rate": 0.00018453675777851627, |
|
"loss": 1.2325, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.07972074338423137, |
|
"grad_norm": 0.30447372794151306, |
|
"learning_rate": 0.00018446748307214019, |
|
"loss": 1.2425, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.07987675266678368, |
|
"grad_norm": 0.27281391620635986, |
|
"learning_rate": 0.0001843980666018315, |
|
"loss": 1.3095, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08003276194933598, |
|
"grad_norm": 0.27750325202941895, |
|
"learning_rate": 0.00018432850848409363, |
|
"loss": 1.5124, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.08018877123188829, |
|
"grad_norm": 0.32551145553588867, |
|
"learning_rate": 0.00018425880883566782, |
|
"loss": 1.5727, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.0803447805144406, |
|
"grad_norm": 0.29455453157424927, |
|
"learning_rate": 0.0001841889677735327, |
|
"loss": 1.1937, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.08050078979699292, |
|
"grad_norm": 0.271435022354126, |
|
"learning_rate": 0.00018411898541490434, |
|
"loss": 1.3523, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.08065679907954523, |
|
"grad_norm": 0.28192776441574097, |
|
"learning_rate": 0.0001840488618772359, |
|
"loss": 1.4196, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.08081280836209755, |
|
"grad_norm": 0.32622769474983215, |
|
"learning_rate": 0.00018397859727821748, |
|
"loss": 1.3939, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.08096881764464986, |
|
"grad_norm": 0.26916465163230896, |
|
"learning_rate": 0.00018390819173577598, |
|
"loss": 1.315, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.08112482692720217, |
|
"grad_norm": 0.2807716429233551, |
|
"learning_rate": 0.00018383764536807485, |
|
"loss": 1.4009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08128083620975449, |
|
"grad_norm": 0.2609405517578125, |
|
"learning_rate": 0.00018376695829351377, |
|
"loss": 0.9599, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.08143684549230679, |
|
"grad_norm": 0.27300071716308594, |
|
"learning_rate": 0.00018369613063072874, |
|
"loss": 1.2349, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.0815928547748591, |
|
"grad_norm": 0.26670917868614197, |
|
"learning_rate": 0.00018362516249859163, |
|
"loss": 1.2895, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.08174886405741141, |
|
"grad_norm": 0.2805304527282715, |
|
"learning_rate": 0.00018355405401621001, |
|
"loss": 1.3661, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.08190487333996373, |
|
"grad_norm": 0.25124502182006836, |
|
"learning_rate": 0.00018348280530292713, |
|
"loss": 1.2215, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.08206088262251604, |
|
"grad_norm": 0.2374117225408554, |
|
"learning_rate": 0.00018341141647832147, |
|
"loss": 1.1662, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.08221689190506835, |
|
"grad_norm": 0.2681942582130432, |
|
"learning_rate": 0.00018333988766220676, |
|
"loss": 1.3256, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.08237290118762067, |
|
"grad_norm": 0.26264506578445435, |
|
"learning_rate": 0.0001832682189746316, |
|
"loss": 1.1417, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.08252891047017298, |
|
"grad_norm": 0.2661115527153015, |
|
"learning_rate": 0.00018319641053587938, |
|
"loss": 1.2202, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.08268491975272528, |
|
"grad_norm": 0.23459146916866302, |
|
"learning_rate": 0.0001831244624664681, |
|
"loss": 1.0511, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0828409290352776, |
|
"grad_norm": 0.31903690099716187, |
|
"learning_rate": 0.00018305237488714995, |
|
"loss": 1.565, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.08299693831782991, |
|
"grad_norm": 0.28528186678886414, |
|
"learning_rate": 0.00018298014791891137, |
|
"loss": 1.5023, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.08315294760038222, |
|
"grad_norm": 0.2572003901004791, |
|
"learning_rate": 0.00018290778168297277, |
|
"loss": 1.1518, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.08330895688293453, |
|
"grad_norm": 0.27797260880470276, |
|
"learning_rate": 0.00018283527630078825, |
|
"loss": 1.344, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.08346496616548685, |
|
"grad_norm": 0.3142591416835785, |
|
"learning_rate": 0.0001827626318940454, |
|
"loss": 1.4126, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.08362097544803916, |
|
"grad_norm": 0.2703491151332855, |
|
"learning_rate": 0.00018268984858466522, |
|
"loss": 1.2156, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.08377698473059147, |
|
"grad_norm": 0.29505112767219543, |
|
"learning_rate": 0.00018261692649480175, |
|
"loss": 1.421, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.08393299401314379, |
|
"grad_norm": 0.2756875157356262, |
|
"learning_rate": 0.00018254386574684204, |
|
"loss": 1.4858, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.08408900329569609, |
|
"grad_norm": 0.2744990885257721, |
|
"learning_rate": 0.0001824706664634058, |
|
"loss": 1.3441, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.0842450125782484, |
|
"grad_norm": 0.2834165096282959, |
|
"learning_rate": 0.00018239732876734527, |
|
"loss": 1.4142, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08440102186080072, |
|
"grad_norm": 0.2717669904232025, |
|
"learning_rate": 0.0001823238527817449, |
|
"loss": 1.3199, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.08455703114335303, |
|
"grad_norm": 0.26433441042900085, |
|
"learning_rate": 0.00018225023862992142, |
|
"loss": 1.3197, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.08471304042590534, |
|
"grad_norm": 0.27460265159606934, |
|
"learning_rate": 0.00018217648643542323, |
|
"loss": 1.216, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.08486904970845766, |
|
"grad_norm": 0.26642194390296936, |
|
"learning_rate": 0.0001821025963220306, |
|
"loss": 1.1716, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.08502505899100997, |
|
"grad_norm": 0.2999640703201294, |
|
"learning_rate": 0.00018202856841375518, |
|
"loss": 1.394, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.08518106827356228, |
|
"grad_norm": 0.2676008641719818, |
|
"learning_rate": 0.00018195440283483988, |
|
"loss": 1.2725, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.08533707755611458, |
|
"grad_norm": 0.26116111874580383, |
|
"learning_rate": 0.0001818800997097587, |
|
"loss": 1.329, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.0854930868386669, |
|
"grad_norm": 0.26923874020576477, |
|
"learning_rate": 0.00018180565916321647, |
|
"loss": 1.2228, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.08564909612121921, |
|
"grad_norm": 0.2784603536128998, |
|
"learning_rate": 0.0001817310813201486, |
|
"loss": 1.1249, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.08580510540377152, |
|
"grad_norm": 0.27981552481651306, |
|
"learning_rate": 0.0001816563663057211, |
|
"loss": 1.2778, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.08596111468632384, |
|
"grad_norm": 0.2464422732591629, |
|
"learning_rate": 0.00018158151424533002, |
|
"loss": 1.0316, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.08611712396887615, |
|
"grad_norm": 0.23159442842006683, |
|
"learning_rate": 0.00018150652526460146, |
|
"loss": 0.9794, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.08627313325142846, |
|
"grad_norm": 0.28374752402305603, |
|
"learning_rate": 0.00018143139948939137, |
|
"loss": 1.0572, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.08642914253398078, |
|
"grad_norm": 0.28464943170547485, |
|
"learning_rate": 0.00018135613704578526, |
|
"loss": 1.024, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.08658515181653309, |
|
"grad_norm": 0.23248714208602905, |
|
"learning_rate": 0.000181280738060098, |
|
"loss": 0.9151, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.08674116109908539, |
|
"grad_norm": 0.2613517940044403, |
|
"learning_rate": 0.00018120520265887363, |
|
"loss": 1.2155, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.0868971703816377, |
|
"grad_norm": 0.2925867438316345, |
|
"learning_rate": 0.00018112953096888516, |
|
"loss": 1.2136, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.08705317966419002, |
|
"grad_norm": 0.3145943582057953, |
|
"learning_rate": 0.00018105372311713432, |
|
"loss": 1.4368, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.08720918894674233, |
|
"grad_norm": 0.29513052105903625, |
|
"learning_rate": 0.0001809777792308513, |
|
"loss": 1.4516, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.08736519822929464, |
|
"grad_norm": 0.22099293768405914, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.0234, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.08752120751184696, |
|
"grad_norm": 0.24346297979354858, |
|
"learning_rate": 0.0001808254838647513, |
|
"loss": 1.3492, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.08767721679439927, |
|
"grad_norm": 0.2770818769931793, |
|
"learning_rate": 0.00018074913264053545, |
|
"loss": 1.4692, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.08783322607695158, |
|
"grad_norm": 0.2789641320705414, |
|
"learning_rate": 0.00018067264589298945, |
|
"loss": 1.3942, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.08798923535950388, |
|
"grad_norm": 0.2892186939716339, |
|
"learning_rate": 0.00018059602375048293, |
|
"loss": 1.3621, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.0881452446420562, |
|
"grad_norm": 0.28431588411331177, |
|
"learning_rate": 0.00018051926634161282, |
|
"loss": 1.3073, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.08830125392460851, |
|
"grad_norm": 0.3204723000526428, |
|
"learning_rate": 0.00018044237379520305, |
|
"loss": 1.8154, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.08845726320716082, |
|
"grad_norm": 0.2658674716949463, |
|
"learning_rate": 0.0001803653462403043, |
|
"loss": 1.1807, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.08861327248971314, |
|
"grad_norm": 0.2409079521894455, |
|
"learning_rate": 0.0001802881838061939, |
|
"loss": 1.2165, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.08876928177226545, |
|
"grad_norm": 0.25896573066711426, |
|
"learning_rate": 0.00018021088662237552, |
|
"loss": 1.1993, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.08892529105481776, |
|
"grad_norm": 0.27663204073905945, |
|
"learning_rate": 0.00018013345481857903, |
|
"loss": 1.1241, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.08908130033737008, |
|
"grad_norm": 0.2892790734767914, |
|
"learning_rate": 0.00018005588852476015, |
|
"loss": 1.6163, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.08923730961992239, |
|
"grad_norm": 0.30898550152778625, |
|
"learning_rate": 0.00017997818787110042, |
|
"loss": 1.2483, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.08939331890247469, |
|
"grad_norm": 0.23732271790504456, |
|
"learning_rate": 0.0001799003529880068, |
|
"loss": 1.1204, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.089549328185027, |
|
"grad_norm": 0.2597337067127228, |
|
"learning_rate": 0.0001798223840061116, |
|
"loss": 1.258, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.08970533746757932, |
|
"grad_norm": 0.31342512369155884, |
|
"learning_rate": 0.00017974428105627208, |
|
"loss": 1.4074, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.08986134675013163, |
|
"grad_norm": 0.30252331495285034, |
|
"learning_rate": 0.00017966604426957047, |
|
"loss": 1.2059, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09001735603268395, |
|
"grad_norm": 0.29326415061950684, |
|
"learning_rate": 0.00017958767377731358, |
|
"loss": 1.4294, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.09017336531523626, |
|
"grad_norm": 0.2915484607219696, |
|
"learning_rate": 0.00017950916971103259, |
|
"loss": 1.3728, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.09032937459778857, |
|
"grad_norm": 0.2966526746749878, |
|
"learning_rate": 0.00017943053220248283, |
|
"loss": 1.5332, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.09048538388034089, |
|
"grad_norm": 0.24311012029647827, |
|
"learning_rate": 0.0001793517613836437, |
|
"loss": 1.1254, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09064139316289319, |
|
"grad_norm": 0.2950594127178192, |
|
"learning_rate": 0.00017927285738671825, |
|
"loss": 1.7255, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.0907974024454455, |
|
"grad_norm": 0.24679097533226013, |
|
"learning_rate": 0.00017919382034413305, |
|
"loss": 1.2781, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.09095341172799781, |
|
"grad_norm": 0.2747292220592499, |
|
"learning_rate": 0.00017911465038853805, |
|
"loss": 1.3434, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.09110942101055013, |
|
"grad_norm": 0.30099523067474365, |
|
"learning_rate": 0.00017903534765280614, |
|
"loss": 1.4518, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.09126543029310244, |
|
"grad_norm": 0.2866073548793793, |
|
"learning_rate": 0.00017895591227003315, |
|
"loss": 1.1706, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.09142143957565475, |
|
"grad_norm": 0.28832805156707764, |
|
"learning_rate": 0.00017887634437353754, |
|
"loss": 1.2271, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.09157744885820707, |
|
"grad_norm": 0.3714962601661682, |
|
"learning_rate": 0.00017879664409686008, |
|
"loss": 1.4474, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.09173345814075938, |
|
"grad_norm": 0.30591243505477905, |
|
"learning_rate": 0.00017871681157376383, |
|
"loss": 1.0327, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.0918894674233117, |
|
"grad_norm": 0.3032775819301605, |
|
"learning_rate": 0.00017863684693823374, |
|
"loss": 1.6824, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.092045476705864, |
|
"grad_norm": 0.26961666345596313, |
|
"learning_rate": 0.00017855675032447648, |
|
"loss": 1.1249, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.0922014859884163, |
|
"grad_norm": 0.2679152488708496, |
|
"learning_rate": 0.00017847652186692026, |
|
"loss": 1.2182, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.09235749527096862, |
|
"grad_norm": 0.24089114367961884, |
|
"learning_rate": 0.00017839616170021452, |
|
"loss": 1.1095, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.09251350455352093, |
|
"grad_norm": 0.25100457668304443, |
|
"learning_rate": 0.00017831566995922985, |
|
"loss": 1.1441, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.09266951383607325, |
|
"grad_norm": 0.2766099274158478, |
|
"learning_rate": 0.0001782350467790575, |
|
"loss": 1.1893, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.09282552311862556, |
|
"grad_norm": 0.2666013240814209, |
|
"learning_rate": 0.00017815429229500946, |
|
"loss": 1.1802, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.09298153240117787, |
|
"grad_norm": 0.28148403763771057, |
|
"learning_rate": 0.00017807340664261802, |
|
"loss": 1.3232, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.09313754168373019, |
|
"grad_norm": 0.23684674501419067, |
|
"learning_rate": 0.00017799238995763568, |
|
"loss": 1.1869, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.09329355096628249, |
|
"grad_norm": 0.2614571154117584, |
|
"learning_rate": 0.00017791124237603477, |
|
"loss": 1.4023, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.0934495602488348, |
|
"grad_norm": 0.3051559329032898, |
|
"learning_rate": 0.00017782996403400736, |
|
"loss": 1.407, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.09360556953138711, |
|
"grad_norm": 0.2745681405067444, |
|
"learning_rate": 0.00017774855506796496, |
|
"loss": 1.3265, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09376157881393943, |
|
"grad_norm": 0.2689257860183716, |
|
"learning_rate": 0.0001776670156145383, |
|
"loss": 1.3046, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.09391758809649174, |
|
"grad_norm": 0.29333195090293884, |
|
"learning_rate": 0.00017758534581057718, |
|
"loss": 1.2624, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.09407359737904405, |
|
"grad_norm": 0.30287420749664307, |
|
"learning_rate": 0.00017750354579315004, |
|
"loss": 1.28, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.09422960666159637, |
|
"grad_norm": 0.27796801924705505, |
|
"learning_rate": 0.00017742161569954398, |
|
"loss": 1.3305, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.09438561594414868, |
|
"grad_norm": 0.2703540325164795, |
|
"learning_rate": 0.0001773395556672644, |
|
"loss": 1.4356, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.094541625226701, |
|
"grad_norm": 0.26395589113235474, |
|
"learning_rate": 0.0001772573658340347, |
|
"loss": 1.1984, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.0946976345092533, |
|
"grad_norm": 0.2784560024738312, |
|
"learning_rate": 0.0001771750463377962, |
|
"loss": 1.3625, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.09485364379180561, |
|
"grad_norm": 0.31962451338768005, |
|
"learning_rate": 0.00017709259731670774, |
|
"loss": 1.3956, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.09500965307435792, |
|
"grad_norm": 0.274460107088089, |
|
"learning_rate": 0.00017701001890914572, |
|
"loss": 1.3071, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.09516566235691024, |
|
"grad_norm": 0.25924167037010193, |
|
"learning_rate": 0.00017692731125370354, |
|
"loss": 1.034, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.09532167163946255, |
|
"grad_norm": 0.3091680705547333, |
|
"learning_rate": 0.00017684447448919154, |
|
"loss": 1.4134, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.09547768092201486, |
|
"grad_norm": 0.25753480195999146, |
|
"learning_rate": 0.00017676150875463686, |
|
"loss": 1.2074, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.09563369020456718, |
|
"grad_norm": 0.27256032824516296, |
|
"learning_rate": 0.0001766784141892829, |
|
"loss": 1.3758, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.09578969948711949, |
|
"grad_norm": 0.24764277040958405, |
|
"learning_rate": 0.0001765951909325895, |
|
"loss": 1.0436, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.09594570876967179, |
|
"grad_norm": 0.2722652554512024, |
|
"learning_rate": 0.00017651183912423228, |
|
"loss": 1.3623, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.0961017180522241, |
|
"grad_norm": 0.27056217193603516, |
|
"learning_rate": 0.0001764283589041028, |
|
"loss": 1.2525, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.09625772733477642, |
|
"grad_norm": 0.27987945079803467, |
|
"learning_rate": 0.00017634475041230797, |
|
"loss": 1.5075, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.09641373661732873, |
|
"grad_norm": 0.29397958517074585, |
|
"learning_rate": 0.00017626101378917004, |
|
"loss": 1.3681, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.09656974589988104, |
|
"grad_norm": 0.2876337766647339, |
|
"learning_rate": 0.0001761771491752264, |
|
"loss": 1.5848, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.09672575518243336, |
|
"grad_norm": 0.237448051571846, |
|
"learning_rate": 0.0001760931567112291, |
|
"loss": 1.0918, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.09688176446498567, |
|
"grad_norm": 0.29513096809387207, |
|
"learning_rate": 0.0001760090365381449, |
|
"loss": 1.3236, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.09703777374753798, |
|
"grad_norm": 0.263920396566391, |
|
"learning_rate": 0.0001759247887971548, |
|
"loss": 1.4573, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.0971937830300903, |
|
"grad_norm": 0.31876271963119507, |
|
"learning_rate": 0.00017584041362965396, |
|
"loss": 1.3874, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.0973497923126426, |
|
"grad_norm": 0.30635690689086914, |
|
"learning_rate": 0.0001757559111772513, |
|
"loss": 1.2355, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.09750580159519491, |
|
"grad_norm": 0.25926241278648376, |
|
"learning_rate": 0.00017567128158176953, |
|
"loss": 1.2641, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.09766181087774722, |
|
"grad_norm": 0.2862091660499573, |
|
"learning_rate": 0.0001755865249852446, |
|
"loss": 1.3818, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.09781782016029954, |
|
"grad_norm": 0.2540535628795624, |
|
"learning_rate": 0.00017550164152992573, |
|
"loss": 1.3807, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.09797382944285185, |
|
"grad_norm": 0.30917900800704956, |
|
"learning_rate": 0.00017541663135827492, |
|
"loss": 1.1053, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.09812983872540416, |
|
"grad_norm": 0.30465036630630493, |
|
"learning_rate": 0.000175331494612967, |
|
"loss": 1.4489, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.09828584800795648, |
|
"grad_norm": 0.3043782711029053, |
|
"learning_rate": 0.00017524623143688902, |
|
"loss": 1.4544, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.09844185729050879, |
|
"grad_norm": 0.2681322991847992, |
|
"learning_rate": 0.00017516084197314046, |
|
"loss": 1.1926, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.09859786657306109, |
|
"grad_norm": 0.33450305461883545, |
|
"learning_rate": 0.00017507532636503256, |
|
"loss": 1.4383, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.0987538758556134, |
|
"grad_norm": 0.2626807987689972, |
|
"learning_rate": 0.00017498968475608838, |
|
"loss": 1.1565, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.09890988513816572, |
|
"grad_norm": 0.2553156912326813, |
|
"learning_rate": 0.00017490391729004244, |
|
"loss": 1.1327, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.09906589442071803, |
|
"grad_norm": 0.23390045762062073, |
|
"learning_rate": 0.00017481802411084042, |
|
"loss": 0.9856, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.09922190370327034, |
|
"grad_norm": 0.29881760478019714, |
|
"learning_rate": 0.00017473200536263905, |
|
"loss": 1.362, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.09937791298582266, |
|
"grad_norm": 0.2904150187969208, |
|
"learning_rate": 0.0001746458611898058, |
|
"loss": 1.242, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.09953392226837497, |
|
"grad_norm": 0.24842409789562225, |
|
"learning_rate": 0.00017455959173691863, |
|
"loss": 1.2694, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.09968993155092729, |
|
"grad_norm": 0.3337212800979614, |
|
"learning_rate": 0.00017447319714876579, |
|
"loss": 1.2554, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.0998459408334796, |
|
"grad_norm": 0.24105407297611237, |
|
"learning_rate": 0.00017438667757034546, |
|
"loss": 1.0582, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1000019501160319, |
|
"grad_norm": 0.24266989529132843, |
|
"learning_rate": 0.00017430003314686569, |
|
"loss": 1.2125, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.10015795939858421, |
|
"grad_norm": 0.2654808461666107, |
|
"learning_rate": 0.00017421326402374405, |
|
"loss": 1.3229, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.10031396868113653, |
|
"grad_norm": 0.21931445598602295, |
|
"learning_rate": 0.00017412637034660734, |
|
"loss": 1.1168, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.10046997796368884, |
|
"grad_norm": 0.28860512375831604, |
|
"learning_rate": 0.0001740393522612915, |
|
"loss": 1.3681, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.10062598724624115, |
|
"grad_norm": 0.2736460566520691, |
|
"learning_rate": 0.0001739522099138411, |
|
"loss": 1.4054, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.10078199652879347, |
|
"grad_norm": 0.23222267627716064, |
|
"learning_rate": 0.00017386494345050942, |
|
"loss": 1.0973, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.10093800581134578, |
|
"grad_norm": 0.2684474587440491, |
|
"learning_rate": 0.000173777553017758, |
|
"loss": 1.0637, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.10109401509389809, |
|
"grad_norm": 0.2648880183696747, |
|
"learning_rate": 0.00017369003876225642, |
|
"loss": 1.5162, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.10125002437645039, |
|
"grad_norm": 0.26263687014579773, |
|
"learning_rate": 0.00017360240083088213, |
|
"loss": 1.3613, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.1014060336590027, |
|
"grad_norm": 0.2455459088087082, |
|
"learning_rate": 0.00017351463937072004, |
|
"loss": 1.3927, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.10156204294155502, |
|
"grad_norm": 0.273078590631485, |
|
"learning_rate": 0.00017342675452906248, |
|
"loss": 1.2485, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.10171805222410733, |
|
"grad_norm": 0.24480541050434113, |
|
"learning_rate": 0.00017333874645340884, |
|
"loss": 1.0656, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.10187406150665965, |
|
"grad_norm": 0.24994470179080963, |
|
"learning_rate": 0.0001732506152914653, |
|
"loss": 1.3653, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.10203007078921196, |
|
"grad_norm": 0.26110485196113586, |
|
"learning_rate": 0.00017316236119114463, |
|
"loss": 1.392, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.10218608007176427, |
|
"grad_norm": 0.30197709798812866, |
|
"learning_rate": 0.00017307398430056593, |
|
"loss": 1.5184, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.10234208935431659, |
|
"grad_norm": 0.26577743887901306, |
|
"learning_rate": 0.00017298548476805446, |
|
"loss": 1.4611, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.10249809863686889, |
|
"grad_norm": 0.2677333950996399, |
|
"learning_rate": 0.00017289686274214118, |
|
"loss": 1.3282, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.1026541079194212, |
|
"grad_norm": 0.2508523762226105, |
|
"learning_rate": 0.00017280811837156268, |
|
"loss": 1.1331, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.10281011720197351, |
|
"grad_norm": 0.24873429536819458, |
|
"learning_rate": 0.00017271925180526094, |
|
"loss": 1.1351, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.10296612648452583, |
|
"grad_norm": 0.2559413015842438, |
|
"learning_rate": 0.00017263026319238301, |
|
"loss": 1.245, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.10312213576707814, |
|
"grad_norm": 0.29988738894462585, |
|
"learning_rate": 0.0001725411526822807, |
|
"loss": 1.4004, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.10327814504963045, |
|
"grad_norm": 0.29719191789627075, |
|
"learning_rate": 0.0001724519204245105, |
|
"loss": 1.5687, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.10343415433218277, |
|
"grad_norm": 0.30810216069221497, |
|
"learning_rate": 0.0001723625665688331, |
|
"loss": 1.3712, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.10359016361473508, |
|
"grad_norm": 0.2754259407520294, |
|
"learning_rate": 0.00017227309126521348, |
|
"loss": 1.2083, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.1037461728972874, |
|
"grad_norm": 0.26548734307289124, |
|
"learning_rate": 0.00017218349466382023, |
|
"loss": 1.2657, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.1039021821798397, |
|
"grad_norm": 0.26369354128837585, |
|
"learning_rate": 0.00017209377691502565, |
|
"loss": 1.3359, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.10405819146239201, |
|
"grad_norm": 0.2526211440563202, |
|
"learning_rate": 0.0001720039381694053, |
|
"loss": 1.0633, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.10421420074494432, |
|
"grad_norm": 0.2874252498149872, |
|
"learning_rate": 0.00017191397857773788, |
|
"loss": 1.2833, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.10437021002749663, |
|
"grad_norm": 0.26982390880584717, |
|
"learning_rate": 0.00017182389829100485, |
|
"loss": 1.1843, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.10452621931004895, |
|
"grad_norm": 0.29615074396133423, |
|
"learning_rate": 0.00017173369746039025, |
|
"loss": 1.2992, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.10468222859260126, |
|
"grad_norm": 0.29073938727378845, |
|
"learning_rate": 0.00017164337623728045, |
|
"loss": 1.5432, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.10483823787515358, |
|
"grad_norm": 0.2858506143093109, |
|
"learning_rate": 0.00017155293477326384, |
|
"loss": 1.4446, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.10499424715770589, |
|
"grad_norm": 0.2399512678384781, |
|
"learning_rate": 0.00017146237322013068, |
|
"loss": 1.1643, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.10515025644025819, |
|
"grad_norm": 0.2796498239040375, |
|
"learning_rate": 0.00017137169172987268, |
|
"loss": 1.3158, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.1053062657228105, |
|
"grad_norm": 0.26859599351882935, |
|
"learning_rate": 0.00017128089045468294, |
|
"loss": 1.1761, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.10546227500536282, |
|
"grad_norm": 0.2749616503715515, |
|
"learning_rate": 0.00017118996954695553, |
|
"loss": 1.0586, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.10561828428791513, |
|
"grad_norm": 0.27312207221984863, |
|
"learning_rate": 0.00017109892915928535, |
|
"loss": 1.1367, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.10577429357046744, |
|
"grad_norm": 0.29626578092575073, |
|
"learning_rate": 0.00017100776944446781, |
|
"loss": 1.4223, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.10593030285301976, |
|
"grad_norm": 0.24335867166519165, |
|
"learning_rate": 0.00017091649055549855, |
|
"loss": 1.1041, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.10608631213557207, |
|
"grad_norm": 0.3017411530017853, |
|
"learning_rate": 0.0001708250926455733, |
|
"loss": 1.2854, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.10624232141812438, |
|
"grad_norm": 0.2864495515823364, |
|
"learning_rate": 0.00017073357586808752, |
|
"loss": 1.2539, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.1063983307006767, |
|
"grad_norm": 0.27407294511795044, |
|
"learning_rate": 0.0001706419403766361, |
|
"loss": 1.3136, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.106554339983229, |
|
"grad_norm": 0.3100734055042267, |
|
"learning_rate": 0.00017055018632501325, |
|
"loss": 1.3231, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.10671034926578131, |
|
"grad_norm": 0.3091520071029663, |
|
"learning_rate": 0.00017045831386721213, |
|
"loss": 1.3513, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.10686635854833362, |
|
"grad_norm": 0.2930145561695099, |
|
"learning_rate": 0.00017036632315742462, |
|
"loss": 1.3292, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.10702236783088594, |
|
"grad_norm": 0.30808883905410767, |
|
"learning_rate": 0.00017027421435004112, |
|
"loss": 1.6094, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.10717837711343825, |
|
"grad_norm": 0.2715398073196411, |
|
"learning_rate": 0.00017018198759965016, |
|
"loss": 1.3641, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.10733438639599056, |
|
"grad_norm": 0.2844456732273102, |
|
"learning_rate": 0.00017008964306103823, |
|
"loss": 1.3933, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.10749039567854288, |
|
"grad_norm": 0.258504718542099, |
|
"learning_rate": 0.00016999718088918955, |
|
"loss": 1.0621, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.10764640496109519, |
|
"grad_norm": 0.28674831986427307, |
|
"learning_rate": 0.00016990460123928575, |
|
"loss": 1.2759, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.10780241424364749, |
|
"grad_norm": 0.3062899708747864, |
|
"learning_rate": 0.0001698119042667056, |
|
"loss": 1.1537, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.1079584235261998, |
|
"grad_norm": 0.2539708614349365, |
|
"learning_rate": 0.00016971909012702483, |
|
"loss": 1.1463, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.10811443280875212, |
|
"grad_norm": 0.30207210779190063, |
|
"learning_rate": 0.00016962615897601573, |
|
"loss": 1.4219, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.10827044209130443, |
|
"grad_norm": 0.28675806522369385, |
|
"learning_rate": 0.00016953311096964705, |
|
"loss": 1.1476, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.10842645137385674, |
|
"grad_norm": 0.33274316787719727, |
|
"learning_rate": 0.00016943994626408363, |
|
"loss": 1.3351, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.10858246065640906, |
|
"grad_norm": 0.2725004553794861, |
|
"learning_rate": 0.00016934666501568617, |
|
"loss": 1.1795, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.10873846993896137, |
|
"grad_norm": 0.29064077138900757, |
|
"learning_rate": 0.00016925326738101098, |
|
"loss": 1.4255, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.10889447922151368, |
|
"grad_norm": 0.3007811903953552, |
|
"learning_rate": 0.00016915975351680968, |
|
"loss": 1.1951, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.109050488504066, |
|
"grad_norm": 0.26098549365997314, |
|
"learning_rate": 0.000169066123580029, |
|
"loss": 1.0585, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.1092064977866183, |
|
"grad_norm": 0.36355966329574585, |
|
"learning_rate": 0.00016897237772781044, |
|
"loss": 1.2911, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10936250706917061, |
|
"grad_norm": 0.2830749750137329, |
|
"learning_rate": 0.00016887851611749005, |
|
"loss": 1.4469, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.10951851635172292, |
|
"grad_norm": 0.3175537884235382, |
|
"learning_rate": 0.00016878453890659814, |
|
"loss": 1.4589, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.10967452563427524, |
|
"grad_norm": 0.2898159623146057, |
|
"learning_rate": 0.0001686904462528591, |
|
"loss": 1.4318, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.10983053491682755, |
|
"grad_norm": 0.28991106152534485, |
|
"learning_rate": 0.000168596238314191, |
|
"loss": 1.3293, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.10998654419937987, |
|
"grad_norm": 0.27654772996902466, |
|
"learning_rate": 0.00016850191524870546, |
|
"loss": 1.4909, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.11014255348193218, |
|
"grad_norm": 0.29537513852119446, |
|
"learning_rate": 0.00016840747721470731, |
|
"loss": 1.4512, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.11029856276448449, |
|
"grad_norm": 0.2656291723251343, |
|
"learning_rate": 0.00016831292437069427, |
|
"loss": 1.0375, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.11045457204703679, |
|
"grad_norm": 0.3286688029766083, |
|
"learning_rate": 0.00016821825687535674, |
|
"loss": 1.3478, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.1106105813295891, |
|
"grad_norm": 0.2618601322174072, |
|
"learning_rate": 0.00016812347488757772, |
|
"loss": 1.3448, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.11076659061214142, |
|
"grad_norm": 0.29108762741088867, |
|
"learning_rate": 0.00016802857856643215, |
|
"loss": 1.3479, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.11092259989469373, |
|
"grad_norm": 0.3029685914516449, |
|
"learning_rate": 0.00016793356807118695, |
|
"loss": 1.2162, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.11107860917724605, |
|
"grad_norm": 0.2573980689048767, |
|
"learning_rate": 0.00016783844356130071, |
|
"loss": 1.0927, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.11123461845979836, |
|
"grad_norm": 0.2836451828479767, |
|
"learning_rate": 0.0001677432051964233, |
|
"loss": 1.2136, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.11139062774235067, |
|
"grad_norm": 0.2437037229537964, |
|
"learning_rate": 0.0001676478531363957, |
|
"loss": 1.0671, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.11154663702490299, |
|
"grad_norm": 0.2603608965873718, |
|
"learning_rate": 0.00016755238754124965, |
|
"loss": 1.2128, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.1117026463074553, |
|
"grad_norm": 0.2617943286895752, |
|
"learning_rate": 0.00016745680857120757, |
|
"loss": 1.3305, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.1118586555900076, |
|
"grad_norm": 0.27264609932899475, |
|
"learning_rate": 0.00016736111638668204, |
|
"loss": 1.3456, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.11201466487255991, |
|
"grad_norm": 0.33472567796707153, |
|
"learning_rate": 0.00016726531114827573, |
|
"loss": 1.2517, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.11217067415511223, |
|
"grad_norm": 0.2825791835784912, |
|
"learning_rate": 0.00016716939301678098, |
|
"loss": 1.3156, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.11232668343766454, |
|
"grad_norm": 0.2815983295440674, |
|
"learning_rate": 0.00016707336215317968, |
|
"loss": 1.2376, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.11248269272021685, |
|
"grad_norm": 0.3158409595489502, |
|
"learning_rate": 0.00016697721871864284, |
|
"loss": 1.5252, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.11263870200276917, |
|
"grad_norm": 0.27121129631996155, |
|
"learning_rate": 0.00016688096287453046, |
|
"loss": 1.3603, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.11279471128532148, |
|
"grad_norm": 0.2568758428096771, |
|
"learning_rate": 0.00016678459478239118, |
|
"loss": 1.1337, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.1129507205678738, |
|
"grad_norm": 0.26672929525375366, |
|
"learning_rate": 0.00016668811460396202, |
|
"loss": 1.1728, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.1131067298504261, |
|
"grad_norm": 0.2683919370174408, |
|
"learning_rate": 0.00016659152250116812, |
|
"loss": 1.2833, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.11326273913297841, |
|
"grad_norm": 0.2757527232170105, |
|
"learning_rate": 0.00016649481863612248, |
|
"loss": 1.0544, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.11341874841553072, |
|
"grad_norm": 0.2571371793746948, |
|
"learning_rate": 0.0001663980031711257, |
|
"loss": 1.1212, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.11357475769808303, |
|
"grad_norm": 0.2757047116756439, |
|
"learning_rate": 0.00016630107626866558, |
|
"loss": 1.1771, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.11373076698063535, |
|
"grad_norm": 0.262979120016098, |
|
"learning_rate": 0.00016620403809141705, |
|
"loss": 0.9962, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.11388677626318766, |
|
"grad_norm": 0.26567909121513367, |
|
"learning_rate": 0.00016610688880224178, |
|
"loss": 1.3037, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.11404278554573997, |
|
"grad_norm": 0.27931660413742065, |
|
"learning_rate": 0.00016600962856418782, |
|
"loss": 1.1863, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.11419879482829229, |
|
"grad_norm": 0.25071558356285095, |
|
"learning_rate": 0.00016591225754048963, |
|
"loss": 1.1437, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.1143548041108446, |
|
"grad_norm": 0.2775113880634308, |
|
"learning_rate": 0.00016581477589456734, |
|
"loss": 1.2152, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.1145108133933969, |
|
"grad_norm": 0.25055718421936035, |
|
"learning_rate": 0.00016571718379002705, |
|
"loss": 1.1479, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.11466682267594921, |
|
"grad_norm": 0.25468993186950684, |
|
"learning_rate": 0.00016561948139065996, |
|
"loss": 1.148, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.11482283195850153, |
|
"grad_norm": 0.26385918259620667, |
|
"learning_rate": 0.00016552166886044253, |
|
"loss": 1.3473, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.11497884124105384, |
|
"grad_norm": 0.27051180601119995, |
|
"learning_rate": 0.00016542374636353604, |
|
"loss": 1.196, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.11513485052360616, |
|
"grad_norm": 0.32731276750564575, |
|
"learning_rate": 0.0001653257140642863, |
|
"loss": 1.4514, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.11529085980615847, |
|
"grad_norm": 0.26046180725097656, |
|
"learning_rate": 0.00016522757212722344, |
|
"loss": 1.2186, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.11544686908871078, |
|
"grad_norm": 0.2661746144294739, |
|
"learning_rate": 0.00016512932071706152, |
|
"loss": 1.123, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1156028783712631, |
|
"grad_norm": 0.25739923119544983, |
|
"learning_rate": 0.0001650309599986985, |
|
"loss": 1.1832, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.1157588876538154, |
|
"grad_norm": 0.30230990052223206, |
|
"learning_rate": 0.00016493249013721558, |
|
"loss": 1.5064, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.11591489693636771, |
|
"grad_norm": 0.25831449031829834, |
|
"learning_rate": 0.00016483391129787727, |
|
"loss": 1.1212, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.11607090621892002, |
|
"grad_norm": 0.24019654095172882, |
|
"learning_rate": 0.000164735223646131, |
|
"loss": 1.1555, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.11622691550147234, |
|
"grad_norm": 0.28396427631378174, |
|
"learning_rate": 0.0001646364273476067, |
|
"loss": 1.4754, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.11638292478402465, |
|
"grad_norm": 0.28211066126823425, |
|
"learning_rate": 0.00016453752256811674, |
|
"loss": 1.526, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.11653893406657696, |
|
"grad_norm": 0.2596474289894104, |
|
"learning_rate": 0.00016443850947365558, |
|
"loss": 1.2072, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.11669494334912928, |
|
"grad_norm": 0.25947293639183044, |
|
"learning_rate": 0.0001643393882303994, |
|
"loss": 1.3467, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.11685095263168159, |
|
"grad_norm": 0.30946600437164307, |
|
"learning_rate": 0.00016424015900470587, |
|
"loss": 1.3948, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.1170069619142339, |
|
"grad_norm": 0.3172161281108856, |
|
"learning_rate": 0.000164140821963114, |
|
"loss": 1.745, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1171629711967862, |
|
"grad_norm": 0.26674196124076843, |
|
"learning_rate": 0.00016404137727234365, |
|
"loss": 1.5021, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.11731898047933852, |
|
"grad_norm": 0.26941999793052673, |
|
"learning_rate": 0.00016394182509929536, |
|
"loss": 1.2651, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.11747498976189083, |
|
"grad_norm": 0.29353249073028564, |
|
"learning_rate": 0.00016384216561105014, |
|
"loss": 1.2397, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.11763099904444314, |
|
"grad_norm": 0.2547638416290283, |
|
"learning_rate": 0.000163742398974869, |
|
"loss": 1.1032, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.11778700832699546, |
|
"grad_norm": 0.25621354579925537, |
|
"learning_rate": 0.00016364252535819282, |
|
"loss": 1.0842, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.11794301760954777, |
|
"grad_norm": 0.25465261936187744, |
|
"learning_rate": 0.00016354254492864211, |
|
"loss": 0.9941, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.11809902689210008, |
|
"grad_norm": 0.25726544857025146, |
|
"learning_rate": 0.00016344245785401653, |
|
"loss": 1.2613, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.1182550361746524, |
|
"grad_norm": 0.2696760594844818, |
|
"learning_rate": 0.00016334226430229475, |
|
"loss": 1.1349, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.1184110454572047, |
|
"grad_norm": 0.29465997219085693, |
|
"learning_rate": 0.00016324196444163423, |
|
"loss": 1.3099, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.11856705473975701, |
|
"grad_norm": 0.2854841351509094, |
|
"learning_rate": 0.00016314155844037074, |
|
"loss": 1.1648, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.11872306402230932, |
|
"grad_norm": 0.28557366132736206, |
|
"learning_rate": 0.0001630410464670182, |
|
"loss": 1.4045, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.11887907330486164, |
|
"grad_norm": 0.337882936000824, |
|
"learning_rate": 0.00016294042869026851, |
|
"loss": 1.4391, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.11903508258741395, |
|
"grad_norm": 0.25410857796669006, |
|
"learning_rate": 0.000162839705278991, |
|
"loss": 1.025, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.11919109186996626, |
|
"grad_norm": 0.2944369614124298, |
|
"learning_rate": 0.0001627388764022323, |
|
"loss": 1.3339, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.11934710115251858, |
|
"grad_norm": 0.30941835045814514, |
|
"learning_rate": 0.0001626379422292162, |
|
"loss": 1.5238, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.11950311043507089, |
|
"grad_norm": 0.2796765863895416, |
|
"learning_rate": 0.000162536902929343, |
|
"loss": 1.1711, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.1196591197176232, |
|
"grad_norm": 0.2882195711135864, |
|
"learning_rate": 0.00016243575867218958, |
|
"loss": 1.2852, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.1198151290001755, |
|
"grad_norm": 0.29050207138061523, |
|
"learning_rate": 0.00016233450962750893, |
|
"loss": 1.2789, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.11997113828272782, |
|
"grad_norm": 0.2745670974254608, |
|
"learning_rate": 0.00016223315596522987, |
|
"loss": 1.2741, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.12012714756528013, |
|
"grad_norm": 0.29764166474342346, |
|
"learning_rate": 0.0001621316978554569, |
|
"loss": 1.3636, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.12028315684783245, |
|
"grad_norm": 0.29131025075912476, |
|
"learning_rate": 0.00016203013546846966, |
|
"loss": 1.5137, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.12043916613038476, |
|
"grad_norm": 0.3370944857597351, |
|
"learning_rate": 0.00016192846897472297, |
|
"loss": 1.5541, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.12059517541293707, |
|
"grad_norm": 0.2678642272949219, |
|
"learning_rate": 0.0001618266985448463, |
|
"loss": 1.2024, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.12075118469548939, |
|
"grad_norm": 0.27655884623527527, |
|
"learning_rate": 0.00016172482434964353, |
|
"loss": 1.1084, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.1209071939780417, |
|
"grad_norm": 0.23235641419887543, |
|
"learning_rate": 0.00016162284656009274, |
|
"loss": 0.8548, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.121063203260594, |
|
"grad_norm": 0.2860414683818817, |
|
"learning_rate": 0.00016152076534734584, |
|
"loss": 1.5026, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.12121921254314631, |
|
"grad_norm": 0.2980406582355499, |
|
"learning_rate": 0.00016141858088272837, |
|
"loss": 1.3692, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.12137522182569863, |
|
"grad_norm": 0.29564347863197327, |
|
"learning_rate": 0.00016131629333773908, |
|
"loss": 1.6193, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.12153123110825094, |
|
"grad_norm": 0.250028520822525, |
|
"learning_rate": 0.0001612139028840498, |
|
"loss": 1.3295, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.12168724039080325, |
|
"grad_norm": 0.25812971591949463, |
|
"learning_rate": 0.00016111140969350503, |
|
"loss": 1.1061, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.12184324967335557, |
|
"grad_norm": 0.2702666223049164, |
|
"learning_rate": 0.0001610088139381217, |
|
"loss": 1.2846, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.12199925895590788, |
|
"grad_norm": 0.24256417155265808, |
|
"learning_rate": 0.00016090611579008888, |
|
"loss": 1.081, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.1221552682384602, |
|
"grad_norm": 0.3177904784679413, |
|
"learning_rate": 0.00016080331542176753, |
|
"loss": 1.5862, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.12231127752101251, |
|
"grad_norm": 0.25483664870262146, |
|
"learning_rate": 0.00016070041300569012, |
|
"loss": 1.1939, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.1224672868035648, |
|
"grad_norm": 0.23578673601150513, |
|
"learning_rate": 0.00016059740871456036, |
|
"loss": 1.0371, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.12262329608611712, |
|
"grad_norm": 0.28674736618995667, |
|
"learning_rate": 0.000160494302721253, |
|
"loss": 1.4739, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.12277930536866943, |
|
"grad_norm": 0.29090616106987, |
|
"learning_rate": 0.0001603910951988135, |
|
"loss": 1.3862, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.12293531465122175, |
|
"grad_norm": 0.2792899012565613, |
|
"learning_rate": 0.00016028778632045762, |
|
"loss": 1.3731, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.12309132393377406, |
|
"grad_norm": 0.2683924436569214, |
|
"learning_rate": 0.00016018437625957133, |
|
"loss": 1.4514, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.12324733321632637, |
|
"grad_norm": 0.331752747297287, |
|
"learning_rate": 0.00016008086518971037, |
|
"loss": 1.0936, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.12340334249887869, |
|
"grad_norm": 0.32185712456703186, |
|
"learning_rate": 0.0001599772532846, |
|
"loss": 1.7093, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.123559351781431, |
|
"grad_norm": 0.28801560401916504, |
|
"learning_rate": 0.0001598735407181347, |
|
"loss": 1.2923, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.1237153610639833, |
|
"grad_norm": 0.2626672387123108, |
|
"learning_rate": 0.00015976972766437795, |
|
"loss": 1.196, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.12387137034653561, |
|
"grad_norm": 0.30561795830726624, |
|
"learning_rate": 0.00015966581429756183, |
|
"loss": 1.5151, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.12402737962908793, |
|
"grad_norm": 0.2764839828014374, |
|
"learning_rate": 0.00015956180079208682, |
|
"loss": 1.231, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.12418338891164024, |
|
"grad_norm": 0.2506803870201111, |
|
"learning_rate": 0.00015945768732252144, |
|
"loss": 1.0394, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.12433939819419255, |
|
"grad_norm": 0.28655874729156494, |
|
"learning_rate": 0.00015935347406360192, |
|
"loss": 1.4689, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.12449540747674487, |
|
"grad_norm": 0.26048576831817627, |
|
"learning_rate": 0.00015924916119023212, |
|
"loss": 1.218, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.12465141675929718, |
|
"grad_norm": 0.26712656021118164, |
|
"learning_rate": 0.00015914474887748295, |
|
"loss": 1.232, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.1248074260418495, |
|
"grad_norm": 0.2652023434638977, |
|
"learning_rate": 0.00015904023730059228, |
|
"loss": 1.0205, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12496343532440181, |
|
"grad_norm": 0.3364275097846985, |
|
"learning_rate": 0.0001589356266349645, |
|
"loss": 1.4919, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.12511944460695412, |
|
"grad_norm": 0.218467116355896, |
|
"learning_rate": 0.00015883091705617045, |
|
"loss": 0.8939, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.12527545388950642, |
|
"grad_norm": 0.2554807960987091, |
|
"learning_rate": 0.00015872610873994685, |
|
"loss": 1.2568, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.12543146317205875, |
|
"grad_norm": 0.2742806673049927, |
|
"learning_rate": 0.00015862120186219613, |
|
"loss": 1.0565, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.12558747245461105, |
|
"grad_norm": 0.23994481563568115, |
|
"learning_rate": 0.00015851619659898623, |
|
"loss": 0.9631, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.12574348173716335, |
|
"grad_norm": 0.29549404978752136, |
|
"learning_rate": 0.00015841109312655016, |
|
"loss": 1.2073, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.12589949101971568, |
|
"grad_norm": 0.27470991015434265, |
|
"learning_rate": 0.00015830589162128572, |
|
"loss": 1.2345, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.12605550030226798, |
|
"grad_norm": 0.27652519941329956, |
|
"learning_rate": 0.00015820059225975531, |
|
"loss": 1.2456, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.1262115095848203, |
|
"grad_norm": 0.2571077346801758, |
|
"learning_rate": 0.0001580951952186856, |
|
"loss": 1.0009, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.1263675188673726, |
|
"grad_norm": 0.27721402049064636, |
|
"learning_rate": 0.000157989700674967, |
|
"loss": 1.2101, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.12652352814992493, |
|
"grad_norm": 0.29823631048202515, |
|
"learning_rate": 0.00015788410880565379, |
|
"loss": 1.3992, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.12667953743247723, |
|
"grad_norm": 0.28366366028785706, |
|
"learning_rate": 0.00015777841978796347, |
|
"loss": 1.005, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.12683554671502956, |
|
"grad_norm": 0.3597376048564911, |
|
"learning_rate": 0.0001576726337992766, |
|
"loss": 1.6046, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.12699155599758186, |
|
"grad_norm": 0.27407100796699524, |
|
"learning_rate": 0.00015756675101713657, |
|
"loss": 1.0167, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.12714756528013416, |
|
"grad_norm": 0.3212680220603943, |
|
"learning_rate": 0.00015746077161924905, |
|
"loss": 1.4425, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.12730357456268648, |
|
"grad_norm": 0.25150859355926514, |
|
"learning_rate": 0.00015735469578348208, |
|
"loss": 1.2482, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.12745958384523878, |
|
"grad_norm": 0.2753000855445862, |
|
"learning_rate": 0.00015724852368786537, |
|
"loss": 1.3006, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.1276155931277911, |
|
"grad_norm": 0.27500027418136597, |
|
"learning_rate": 0.0001571422555105903, |
|
"loss": 1.2095, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.1277716024103434, |
|
"grad_norm": 0.2696485221385956, |
|
"learning_rate": 0.0001570358914300094, |
|
"loss": 1.1708, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.12792761169289574, |
|
"grad_norm": 0.2486962080001831, |
|
"learning_rate": 0.00015692943162463628, |
|
"loss": 1.0531, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.12808362097544804, |
|
"grad_norm": 0.265824556350708, |
|
"learning_rate": 0.00015682287627314515, |
|
"loss": 1.0712, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.12823963025800036, |
|
"grad_norm": 0.2963060140609741, |
|
"learning_rate": 0.00015671622555437053, |
|
"loss": 1.3806, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.12839563954055266, |
|
"grad_norm": 0.2849713861942291, |
|
"learning_rate": 0.00015660947964730708, |
|
"loss": 1.2242, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.12855164882310496, |
|
"grad_norm": 0.25108298659324646, |
|
"learning_rate": 0.0001565026387311092, |
|
"loss": 1.1128, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.1287076581056573, |
|
"grad_norm": 0.27622735500335693, |
|
"learning_rate": 0.00015639570298509064, |
|
"loss": 1.3599, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1288636673882096, |
|
"grad_norm": 0.29195183515548706, |
|
"learning_rate": 0.0001562886725887245, |
|
"loss": 1.2931, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.12901967667076192, |
|
"grad_norm": 0.2943118214607239, |
|
"learning_rate": 0.00015618154772164256, |
|
"loss": 1.5802, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.12917568595331422, |
|
"grad_norm": 0.26325714588165283, |
|
"learning_rate": 0.00015607432856363525, |
|
"loss": 1.2455, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.12933169523586655, |
|
"grad_norm": 0.286743700504303, |
|
"learning_rate": 0.00015596701529465117, |
|
"loss": 1.3008, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.12948770451841884, |
|
"grad_norm": 0.2844702899456024, |
|
"learning_rate": 0.00015585960809479696, |
|
"loss": 1.3737, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.12964371380097114, |
|
"grad_norm": 0.25531789660453796, |
|
"learning_rate": 0.00015575210714433686, |
|
"loss": 1.1425, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.12979972308352347, |
|
"grad_norm": 0.26921185851097107, |
|
"learning_rate": 0.00015564451262369247, |
|
"loss": 1.106, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.12995573236607577, |
|
"grad_norm": 0.28271836042404175, |
|
"learning_rate": 0.00015553682471344238, |
|
"loss": 1.3681, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.1301117416486281, |
|
"grad_norm": 0.26876282691955566, |
|
"learning_rate": 0.00015542904359432198, |
|
"loss": 1.112, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.1302677509311804, |
|
"grad_norm": 0.2895980179309845, |
|
"learning_rate": 0.00015532116944722308, |
|
"loss": 1.1285, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.13042376021373273, |
|
"grad_norm": 0.2612462639808655, |
|
"learning_rate": 0.00015521320245319363, |
|
"loss": 1.2669, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.13057976949628503, |
|
"grad_norm": 0.30689284205436707, |
|
"learning_rate": 0.00015510514279343734, |
|
"loss": 1.3512, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.13073577877883735, |
|
"grad_norm": 0.2981073558330536, |
|
"learning_rate": 0.00015499699064931355, |
|
"loss": 1.1284, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.13089178806138965, |
|
"grad_norm": 0.2637684643268585, |
|
"learning_rate": 0.00015488874620233674, |
|
"loss": 1.0698, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.13104779734394195, |
|
"grad_norm": 0.3048469126224518, |
|
"learning_rate": 0.0001547804096341763, |
|
"loss": 1.5209, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13120380662649428, |
|
"grad_norm": 0.2396387904882431, |
|
"learning_rate": 0.00015467198112665632, |
|
"loss": 0.9584, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.13135981590904658, |
|
"grad_norm": 0.27103736996650696, |
|
"learning_rate": 0.0001545634608617551, |
|
"loss": 1.2846, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.1315158251915989, |
|
"grad_norm": 0.2971721589565277, |
|
"learning_rate": 0.00015445484902160491, |
|
"loss": 1.6074, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.1316718344741512, |
|
"grad_norm": 0.2440243512392044, |
|
"learning_rate": 0.00015434614578849188, |
|
"loss": 1.045, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.13182784375670353, |
|
"grad_norm": 0.30210787057876587, |
|
"learning_rate": 0.00015423735134485536, |
|
"loss": 1.2948, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.13198385303925583, |
|
"grad_norm": 0.25344711542129517, |
|
"learning_rate": 0.00015412846587328782, |
|
"loss": 1.2089, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.13213986232180816, |
|
"grad_norm": 0.2884974479675293, |
|
"learning_rate": 0.0001540194895565346, |
|
"loss": 1.1123, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.13229587160436046, |
|
"grad_norm": 0.28012582659721375, |
|
"learning_rate": 0.00015391042257749336, |
|
"loss": 1.2269, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.13245188088691276, |
|
"grad_norm": 0.26394879817962646, |
|
"learning_rate": 0.00015380126511921403, |
|
"loss": 1.4469, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.1326078901694651, |
|
"grad_norm": 0.2717582583427429, |
|
"learning_rate": 0.0001536920173648984, |
|
"loss": 1.1494, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1327638994520174, |
|
"grad_norm": 0.2968549132347107, |
|
"learning_rate": 0.00015358267949789966, |
|
"loss": 1.1903, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.13291990873456971, |
|
"grad_norm": 0.2570381164550781, |
|
"learning_rate": 0.00015347325170172245, |
|
"loss": 1.1035, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.133075918017122, |
|
"grad_norm": 0.3070929944515228, |
|
"learning_rate": 0.0001533637341600221, |
|
"loss": 1.4062, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.13323192729967434, |
|
"grad_norm": 0.2886407971382141, |
|
"learning_rate": 0.0001532541270566049, |
|
"loss": 1.3491, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.13338793658222664, |
|
"grad_norm": 0.2572009861469269, |
|
"learning_rate": 0.00015314443057542703, |
|
"loss": 1.2643, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.13354394586477897, |
|
"grad_norm": 0.2768828272819519, |
|
"learning_rate": 0.00015303464490059506, |
|
"loss": 1.1444, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.13369995514733127, |
|
"grad_norm": 0.3006720542907715, |
|
"learning_rate": 0.00015292477021636497, |
|
"loss": 1.2172, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.13385596442988357, |
|
"grad_norm": 0.24407751858234406, |
|
"learning_rate": 0.0001528148067071423, |
|
"loss": 0.9457, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.1340119737124359, |
|
"grad_norm": 0.25638723373413086, |
|
"learning_rate": 0.00015270475455748166, |
|
"loss": 1.1478, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.1341679829949882, |
|
"grad_norm": 0.24834637343883514, |
|
"learning_rate": 0.00015259461395208628, |
|
"loss": 0.9835, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.13432399227754052, |
|
"grad_norm": 0.2611735463142395, |
|
"learning_rate": 0.00015248438507580806, |
|
"loss": 1.125, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.13448000156009282, |
|
"grad_norm": 0.3239066004753113, |
|
"learning_rate": 0.00015237406811364682, |
|
"loss": 1.1973, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.13463601084264515, |
|
"grad_norm": 0.2662723958492279, |
|
"learning_rate": 0.0001522636632507504, |
|
"loss": 1.1115, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.13479202012519745, |
|
"grad_norm": 0.26053330302238464, |
|
"learning_rate": 0.00015215317067241414, |
|
"loss": 1.0885, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.13494802940774975, |
|
"grad_norm": 0.337984561920166, |
|
"learning_rate": 0.00015204259056408046, |
|
"loss": 0.8782, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.13510403869030208, |
|
"grad_norm": 0.2965889871120453, |
|
"learning_rate": 0.00015193192311133884, |
|
"loss": 1.3198, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.13526004797285437, |
|
"grad_norm": 0.3056474030017853, |
|
"learning_rate": 0.00015182116849992526, |
|
"loss": 1.5133, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.1354160572554067, |
|
"grad_norm": 0.29193446040153503, |
|
"learning_rate": 0.00015171032691572206, |
|
"loss": 1.2365, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.135572066537959, |
|
"grad_norm": 0.28123265504837036, |
|
"learning_rate": 0.00015159939854475743, |
|
"loss": 1.1654, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.13572807582051133, |
|
"grad_norm": 0.3033466041088104, |
|
"learning_rate": 0.00015148838357320537, |
|
"loss": 1.5473, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.13588408510306363, |
|
"grad_norm": 0.26069045066833496, |
|
"learning_rate": 0.00015137728218738502, |
|
"loss": 1.2213, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.13604009438561596, |
|
"grad_norm": 0.3010377883911133, |
|
"learning_rate": 0.0001512660945737608, |
|
"loss": 1.1906, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.13619610366816826, |
|
"grad_norm": 0.2615121304988861, |
|
"learning_rate": 0.00015115482091894165, |
|
"loss": 1.0807, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.13635211295072056, |
|
"grad_norm": 0.27064162492752075, |
|
"learning_rate": 0.00015104346140968095, |
|
"loss": 1.3376, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.13650812223327288, |
|
"grad_norm": 0.26106327772140503, |
|
"learning_rate": 0.00015093201623287631, |
|
"loss": 1.2357, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.13666413151582518, |
|
"grad_norm": 0.26505109667778015, |
|
"learning_rate": 0.00015082048557556893, |
|
"loss": 1.4311, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.1368201407983775, |
|
"grad_norm": 0.2965877950191498, |
|
"learning_rate": 0.00015070886962494358, |
|
"loss": 1.3246, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.1369761500809298, |
|
"grad_norm": 0.3173799216747284, |
|
"learning_rate": 0.0001505971685683282, |
|
"loss": 1.4795, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.13713215936348214, |
|
"grad_norm": 0.2562354505062103, |
|
"learning_rate": 0.00015048538259319346, |
|
"loss": 1.0112, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.13728816864603444, |
|
"grad_norm": 0.2736887037754059, |
|
"learning_rate": 0.00015037351188715265, |
|
"loss": 1.3539, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.13744417792858676, |
|
"grad_norm": 0.30376073718070984, |
|
"learning_rate": 0.00015026155663796123, |
|
"loss": 1.2837, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.13760018721113906, |
|
"grad_norm": 0.3052879869937897, |
|
"learning_rate": 0.00015014951703351653, |
|
"loss": 1.3994, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.13775619649369136, |
|
"grad_norm": 0.25414812564849854, |
|
"learning_rate": 0.00015003739326185751, |
|
"loss": 0.9258, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.1379122057762437, |
|
"grad_norm": 0.33165043592453003, |
|
"learning_rate": 0.00014992518551116434, |
|
"loss": 1.4427, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.138068215058796, |
|
"grad_norm": 0.2764113247394562, |
|
"learning_rate": 0.00014981289396975817, |
|
"loss": 1.3084, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.13822422434134832, |
|
"grad_norm": 0.3221314251422882, |
|
"learning_rate": 0.0001497005188261007, |
|
"loss": 1.0262, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.13838023362390062, |
|
"grad_norm": 0.24285611510276794, |
|
"learning_rate": 0.0001495880602687941, |
|
"loss": 1.1275, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.13853624290645294, |
|
"grad_norm": 0.27305787801742554, |
|
"learning_rate": 0.00014947551848658034, |
|
"loss": 1.3409, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.13869225218900524, |
|
"grad_norm": 0.29822468757629395, |
|
"learning_rate": 0.00014936289366834123, |
|
"loss": 1.3696, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.13884826147155757, |
|
"grad_norm": 0.259112685918808, |
|
"learning_rate": 0.00014925018600309785, |
|
"loss": 1.2456, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.13900427075410987, |
|
"grad_norm": 0.28749990463256836, |
|
"learning_rate": 0.00014913739568001033, |
|
"loss": 1.2809, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.13916028003666217, |
|
"grad_norm": 0.24120725691318512, |
|
"learning_rate": 0.0001490245228883776, |
|
"loss": 1.1092, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.1393162893192145, |
|
"grad_norm": 0.2791595160961151, |
|
"learning_rate": 0.0001489115678176369, |
|
"loss": 1.024, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.1394722986017668, |
|
"grad_norm": 0.260062038898468, |
|
"learning_rate": 0.00014879853065736365, |
|
"loss": 1.1766, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.13962830788431912, |
|
"grad_norm": 0.2642684280872345, |
|
"learning_rate": 0.00014868541159727096, |
|
"loss": 1.3869, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.13978431716687142, |
|
"grad_norm": 0.2463667243719101, |
|
"learning_rate": 0.00014857221082720948, |
|
"loss": 1.0662, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.13994032644942375, |
|
"grad_norm": 0.2916738986968994, |
|
"learning_rate": 0.0001484589285371669, |
|
"loss": 1.3209, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.14009633573197605, |
|
"grad_norm": 0.27236512303352356, |
|
"learning_rate": 0.0001483455649172678, |
|
"loss": 1.1833, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.14025234501452835, |
|
"grad_norm": 0.2619946002960205, |
|
"learning_rate": 0.0001482321201577733, |
|
"loss": 1.3137, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.14040835429708068, |
|
"grad_norm": 0.31396883726119995, |
|
"learning_rate": 0.00014811859444908052, |
|
"loss": 1.3727, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14056436357963298, |
|
"grad_norm": 0.25572189688682556, |
|
"learning_rate": 0.0001480049879817226, |
|
"loss": 1.1046, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.1407203728621853, |
|
"grad_norm": 0.2937905490398407, |
|
"learning_rate": 0.0001478913009463682, |
|
"loss": 1.3542, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.1408763821447376, |
|
"grad_norm": 0.253520131111145, |
|
"learning_rate": 0.00014777753353382119, |
|
"loss": 1.2329, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.14103239142728993, |
|
"grad_norm": 0.32491999864578247, |
|
"learning_rate": 0.00014766368593502026, |
|
"loss": 1.3285, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.14118840070984223, |
|
"grad_norm": 0.2527139484882355, |
|
"learning_rate": 0.00014754975834103877, |
|
"loss": 1.1277, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.14134440999239456, |
|
"grad_norm": 0.275272399187088, |
|
"learning_rate": 0.00014743575094308431, |
|
"loss": 1.4177, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.14150041927494686, |
|
"grad_norm": 0.26013612747192383, |
|
"learning_rate": 0.0001473216639324984, |
|
"loss": 1.2476, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.14165642855749916, |
|
"grad_norm": 0.28431418538093567, |
|
"learning_rate": 0.0001472074975007562, |
|
"loss": 1.3947, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.1418124378400515, |
|
"grad_norm": 0.2629927396774292, |
|
"learning_rate": 0.0001470932518394661, |
|
"loss": 1.1587, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.14196844712260379, |
|
"grad_norm": 0.2944284975528717, |
|
"learning_rate": 0.00014697892714036958, |
|
"loss": 1.342, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1421244564051561, |
|
"grad_norm": 0.31365662813186646, |
|
"learning_rate": 0.00014686452359534066, |
|
"loss": 1.4326, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.1422804656877084, |
|
"grad_norm": 0.255875825881958, |
|
"learning_rate": 0.0001467500413963857, |
|
"loss": 1.2305, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.14243647497026074, |
|
"grad_norm": 0.2717350423336029, |
|
"learning_rate": 0.00014663548073564316, |
|
"loss": 1.1965, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.14259248425281304, |
|
"grad_norm": 0.28059136867523193, |
|
"learning_rate": 0.00014652084180538302, |
|
"loss": 1.3361, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.14274849353536537, |
|
"grad_norm": 0.2790951430797577, |
|
"learning_rate": 0.00014640612479800686, |
|
"loss": 1.2785, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.14290450281791767, |
|
"grad_norm": 0.24599488079547882, |
|
"learning_rate": 0.00014629132990604706, |
|
"loss": 1.2433, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.14306051210046997, |
|
"grad_norm": 0.288792222738266, |
|
"learning_rate": 0.00014617645732216685, |
|
"loss": 1.1779, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.1432165213830223, |
|
"grad_norm": 0.3035881221294403, |
|
"learning_rate": 0.00014606150723915984, |
|
"loss": 1.3885, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.1433725306655746, |
|
"grad_norm": 0.28884077072143555, |
|
"learning_rate": 0.00014594647984994964, |
|
"loss": 1.3079, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.14352853994812692, |
|
"grad_norm": 0.26054033637046814, |
|
"learning_rate": 0.00014583137534758967, |
|
"loss": 1.1897, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.14368454923067922, |
|
"grad_norm": 0.31249237060546875, |
|
"learning_rate": 0.00014571619392526278, |
|
"loss": 1.4518, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.14384055851323155, |
|
"grad_norm": 0.27947118878364563, |
|
"learning_rate": 0.0001456009357762809, |
|
"loss": 1.2305, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.14399656779578385, |
|
"grad_norm": 0.2928619980812073, |
|
"learning_rate": 0.00014548560109408466, |
|
"loss": 1.3645, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.14415257707833617, |
|
"grad_norm": 0.2735868990421295, |
|
"learning_rate": 0.00014537019007224324, |
|
"loss": 1.4351, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.14430858636088847, |
|
"grad_norm": 0.30757883191108704, |
|
"learning_rate": 0.00014525470290445392, |
|
"loss": 1.4073, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.14446459564344077, |
|
"grad_norm": 0.28719013929367065, |
|
"learning_rate": 0.00014513913978454168, |
|
"loss": 1.2918, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.1446206049259931, |
|
"grad_norm": 0.2720332145690918, |
|
"learning_rate": 0.00014502350090645917, |
|
"loss": 1.2763, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.1447766142085454, |
|
"grad_norm": 0.24720966815948486, |
|
"learning_rate": 0.000144907786464286, |
|
"loss": 1.0549, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.14493262349109773, |
|
"grad_norm": 0.3164946138858795, |
|
"learning_rate": 0.0001447919966522287, |
|
"loss": 1.1007, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.14508863277365003, |
|
"grad_norm": 0.2940044105052948, |
|
"learning_rate": 0.00014467613166462023, |
|
"loss": 1.2818, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.14524464205620236, |
|
"grad_norm": 0.34050655364990234, |
|
"learning_rate": 0.00014456019169591978, |
|
"loss": 1.2618, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.14540065133875466, |
|
"grad_norm": 0.24612417817115784, |
|
"learning_rate": 0.0001444441769407124, |
|
"loss": 0.991, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.14555666062130695, |
|
"grad_norm": 0.2636529505252838, |
|
"learning_rate": 0.00014432808759370854, |
|
"loss": 1.4259, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.14571266990385928, |
|
"grad_norm": 0.2628234624862671, |
|
"learning_rate": 0.00014421192384974396, |
|
"loss": 1.2545, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.14586867918641158, |
|
"grad_norm": 0.2733708918094635, |
|
"learning_rate": 0.00014409568590377918, |
|
"loss": 1.1442, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.1460246884689639, |
|
"grad_norm": 0.24912774562835693, |
|
"learning_rate": 0.0001439793739508994, |
|
"loss": 1.039, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.1461806977515162, |
|
"grad_norm": 0.2927952706813812, |
|
"learning_rate": 0.00014386298818631386, |
|
"loss": 1.179, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.14633670703406854, |
|
"grad_norm": 0.29066377878189087, |
|
"learning_rate": 0.0001437465288053558, |
|
"loss": 1.2024, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.14649271631662084, |
|
"grad_norm": 0.2862846553325653, |
|
"learning_rate": 0.00014362999600348196, |
|
"loss": 1.1401, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.14664872559917316, |
|
"grad_norm": 0.3009769022464752, |
|
"learning_rate": 0.00014351338997627234, |
|
"loss": 1.3966, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.14680473488172546, |
|
"grad_norm": 0.31753668189048767, |
|
"learning_rate": 0.00014339671091942978, |
|
"loss": 1.4626, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.14696074416427776, |
|
"grad_norm": 0.28623080253601074, |
|
"learning_rate": 0.0001432799590287797, |
|
"loss": 1.2841, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.1471167534468301, |
|
"grad_norm": 0.3344881534576416, |
|
"learning_rate": 0.00014316313450026986, |
|
"loss": 1.5589, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.1472727627293824, |
|
"grad_norm": 0.3132301867008209, |
|
"learning_rate": 0.00014304623752996973, |
|
"loss": 1.4286, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.14742877201193472, |
|
"grad_norm": 0.299078106880188, |
|
"learning_rate": 0.00014292926831407061, |
|
"loss": 1.2099, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.14758478129448702, |
|
"grad_norm": 0.27058905363082886, |
|
"learning_rate": 0.0001428122270488848, |
|
"loss": 1.2331, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.14774079057703934, |
|
"grad_norm": 0.3202461004257202, |
|
"learning_rate": 0.00014269511393084572, |
|
"loss": 1.0677, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.14789679985959164, |
|
"grad_norm": 0.3005964756011963, |
|
"learning_rate": 0.00014257792915650728, |
|
"loss": 1.3382, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.14805280914214397, |
|
"grad_norm": 0.28587067127227783, |
|
"learning_rate": 0.00014246067292254366, |
|
"loss": 1.2216, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.14820881842469627, |
|
"grad_norm": 0.27515730261802673, |
|
"learning_rate": 0.00014234334542574906, |
|
"loss": 1.1608, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.14836482770724857, |
|
"grad_norm": 0.26588740944862366, |
|
"learning_rate": 0.00014222594686303706, |
|
"loss": 1.1547, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.1485208369898009, |
|
"grad_norm": 0.3122014105319977, |
|
"learning_rate": 0.00014210847743144087, |
|
"loss": 1.3642, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.1486768462723532, |
|
"grad_norm": 0.34852224588394165, |
|
"learning_rate": 0.00014199093732811225, |
|
"loss": 1.4751, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.14883285555490552, |
|
"grad_norm": 0.2674144208431244, |
|
"learning_rate": 0.00014187332675032188, |
|
"loss": 1.2941, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.14898886483745782, |
|
"grad_norm": 0.30863744020462036, |
|
"learning_rate": 0.00014175564589545854, |
|
"loss": 1.298, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.14914487412001015, |
|
"grad_norm": 0.26412221789360046, |
|
"learning_rate": 0.00014163789496102902, |
|
"loss": 1.218, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.14930088340256245, |
|
"grad_norm": 0.2920873761177063, |
|
"learning_rate": 0.0001415200741446577, |
|
"loss": 1.5198, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.14945689268511475, |
|
"grad_norm": 0.29869547486305237, |
|
"learning_rate": 0.00014140218364408632, |
|
"loss": 1.3896, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.14961290196766708, |
|
"grad_norm": 0.2696417570114136, |
|
"learning_rate": 0.00014128422365717347, |
|
"loss": 1.2046, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.14976891125021938, |
|
"grad_norm": 0.27298402786254883, |
|
"learning_rate": 0.0001411661943818944, |
|
"loss": 1.3599, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1499249205327717, |
|
"grad_norm": 0.27962544560432434, |
|
"learning_rate": 0.0001410480960163407, |
|
"loss": 1.25, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.150080929815324, |
|
"grad_norm": 0.2612510323524475, |
|
"learning_rate": 0.00014092992875871979, |
|
"loss": 1.1053, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.15023693909787633, |
|
"grad_norm": 0.27618667483329773, |
|
"learning_rate": 0.00014081169280735488, |
|
"loss": 1.3871, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.15039294838042863, |
|
"grad_norm": 0.24976608157157898, |
|
"learning_rate": 0.00014069338836068433, |
|
"loss": 1.2613, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.15054895766298096, |
|
"grad_norm": 0.267610102891922, |
|
"learning_rate": 0.00014057501561726157, |
|
"loss": 1.0631, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.15070496694553326, |
|
"grad_norm": 0.29677531123161316, |
|
"learning_rate": 0.00014045657477575448, |
|
"loss": 1.3567, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.15086097622808556, |
|
"grad_norm": 0.29539185762405396, |
|
"learning_rate": 0.0001403380660349455, |
|
"loss": 1.1386, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.15101698551063789, |
|
"grad_norm": 0.2691122889518738, |
|
"learning_rate": 0.00014021948959373076, |
|
"loss": 1.1089, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.15117299479319019, |
|
"grad_norm": 0.24394790828227997, |
|
"learning_rate": 0.0001401008456511202, |
|
"loss": 1.1893, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.1513290040757425, |
|
"grad_norm": 0.2849481403827667, |
|
"learning_rate": 0.0001399821344062369, |
|
"loss": 1.4775, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1514850133582948, |
|
"grad_norm": 0.2634568512439728, |
|
"learning_rate": 0.00013986335605831705, |
|
"loss": 1.1655, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.15164102264084714, |
|
"grad_norm": 0.269879013299942, |
|
"learning_rate": 0.00013974451080670934, |
|
"loss": 1.2047, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.15179703192339944, |
|
"grad_norm": 0.27636033296585083, |
|
"learning_rate": 0.0001396255988508748, |
|
"loss": 1.2987, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.15195304120595177, |
|
"grad_norm": 0.2572225332260132, |
|
"learning_rate": 0.00013950662039038643, |
|
"loss": 1.3322, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.15210905048850407, |
|
"grad_norm": 0.2573801279067993, |
|
"learning_rate": 0.00013938757562492873, |
|
"loss": 1.2547, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.15226505977105637, |
|
"grad_norm": 0.3160158395767212, |
|
"learning_rate": 0.00013926846475429766, |
|
"loss": 1.5537, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.1524210690536087, |
|
"grad_norm": 0.30125337839126587, |
|
"learning_rate": 0.00013914928797839995, |
|
"loss": 1.0853, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.152577078336161, |
|
"grad_norm": 0.25772640109062195, |
|
"learning_rate": 0.0001390300454972531, |
|
"loss": 1.198, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.15273308761871332, |
|
"grad_norm": 0.257586270570755, |
|
"learning_rate": 0.0001389107375109848, |
|
"loss": 1.086, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.15288909690126562, |
|
"grad_norm": 0.2763863205909729, |
|
"learning_rate": 0.00013879136421983266, |
|
"loss": 1.2639, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.15304510618381795, |
|
"grad_norm": 0.2751125991344452, |
|
"learning_rate": 0.00013867192582414393, |
|
"loss": 1.2473, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.15320111546637025, |
|
"grad_norm": 0.3138543367385864, |
|
"learning_rate": 0.0001385524225243751, |
|
"loss": 1.3107, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.15335712474892257, |
|
"grad_norm": 0.27820733189582825, |
|
"learning_rate": 0.00013843285452109166, |
|
"loss": 1.048, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.15351313403147487, |
|
"grad_norm": 0.25756746530532837, |
|
"learning_rate": 0.00013831322201496757, |
|
"loss": 1.0374, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.15366914331402717, |
|
"grad_norm": 0.332603394985199, |
|
"learning_rate": 0.0001381935252067852, |
|
"loss": 1.3359, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.1538251525965795, |
|
"grad_norm": 0.33936744928359985, |
|
"learning_rate": 0.00013807376429743467, |
|
"loss": 1.5814, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.1539811618791318, |
|
"grad_norm": 0.2748062014579773, |
|
"learning_rate": 0.00013795393948791383, |
|
"loss": 1.201, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.15413717116168413, |
|
"grad_norm": 0.26038771867752075, |
|
"learning_rate": 0.0001378340509793277, |
|
"loss": 1.2087, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.15429318044423643, |
|
"grad_norm": 0.24746748805046082, |
|
"learning_rate": 0.00013771409897288822, |
|
"loss": 1.0487, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.15444918972678875, |
|
"grad_norm": 0.270280122756958, |
|
"learning_rate": 0.0001375940836699139, |
|
"loss": 1.1529, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.15460519900934105, |
|
"grad_norm": 0.28278234601020813, |
|
"learning_rate": 0.00013747400527182953, |
|
"loss": 1.4292, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.15476120829189335, |
|
"grad_norm": 0.3091171681880951, |
|
"learning_rate": 0.0001373538639801657, |
|
"loss": 1.2118, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.15491721757444568, |
|
"grad_norm": 0.264275461435318, |
|
"learning_rate": 0.0001372336599965586, |
|
"loss": 1.2727, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.15507322685699798, |
|
"grad_norm": 0.3125738799571991, |
|
"learning_rate": 0.00013711339352274966, |
|
"loss": 1.3389, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.1552292361395503, |
|
"grad_norm": 0.2750801146030426, |
|
"learning_rate": 0.0001369930647605852, |
|
"loss": 1.1031, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.1553852454221026, |
|
"grad_norm": 0.274777889251709, |
|
"learning_rate": 0.00013687267391201605, |
|
"loss": 1.4329, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.15554125470465494, |
|
"grad_norm": 0.28475117683410645, |
|
"learning_rate": 0.00013675222117909717, |
|
"loss": 1.1914, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.15569726398720724, |
|
"grad_norm": 0.27364879846572876, |
|
"learning_rate": 0.00013663170676398752, |
|
"loss": 1.1511, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.15585327326975956, |
|
"grad_norm": 0.310995489358902, |
|
"learning_rate": 0.00013651113086894952, |
|
"loss": 1.0349, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.15600928255231186, |
|
"grad_norm": 0.2910314202308655, |
|
"learning_rate": 0.00013639049369634876, |
|
"loss": 1.3302, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15600928255231186, |
|
"eval_loss": 1.2771576642990112, |
|
"eval_runtime": 110.8263, |
|
"eval_samples_per_second": 38.556, |
|
"eval_steps_per_second": 4.827, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.144559113202074e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|