|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9919316563834837, |
|
"eval_steps": 500, |
|
"global_step": 101064, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003955070400253125, |
|
"grad_norm": 0.3985116183757782, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.3445, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00791014080050625, |
|
"grad_norm": 0.5253750681877136, |
|
"learning_rate": 3e-06, |
|
"loss": 2.2315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.011865211200759373, |
|
"grad_norm": 0.7505590319633484, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.0777, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0158202816010125, |
|
"grad_norm": 0.7067473530769348, |
|
"learning_rate": 6e-06, |
|
"loss": 2.0165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.019775352001265623, |
|
"grad_norm": 0.8964686393737793, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.9872, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.023730422401518746, |
|
"grad_norm": 1.0233873128890991, |
|
"learning_rate": 9e-06, |
|
"loss": 1.9621, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02768549280177187, |
|
"grad_norm": 1.0008875131607056, |
|
"learning_rate": 1.05e-05, |
|
"loss": 1.9519, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.031640563202025, |
|
"grad_norm": 0.9941542148590088, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.922, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03559563360227812, |
|
"grad_norm": 1.0296485424041748, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 1.9305, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.039550704002531245, |
|
"grad_norm": 1.056834101676941, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.9199, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04350577440278437, |
|
"grad_norm": 1.1616301536560059, |
|
"learning_rate": 1.4999976437535872e-05, |
|
"loss": 1.9058, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04746084480303749, |
|
"grad_norm": 1.0753331184387207, |
|
"learning_rate": 1.4999905750291538e-05, |
|
"loss": 1.9321, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05141591520329062, |
|
"grad_norm": 1.3542801141738892, |
|
"learning_rate": 1.4999787938711148e-05, |
|
"loss": 1.8942, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05537098560354374, |
|
"grad_norm": 1.2391093969345093, |
|
"learning_rate": 1.499962300353495e-05, |
|
"loss": 1.8677, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05932605600379687, |
|
"grad_norm": 1.0844732522964478, |
|
"learning_rate": 1.4999410945799291e-05, |
|
"loss": 1.8723, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06328112640405, |
|
"grad_norm": 1.0814001560211182, |
|
"learning_rate": 1.499915176683659e-05, |
|
"loss": 1.8715, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06723619680430312, |
|
"grad_norm": 1.2003209590911865, |
|
"learning_rate": 1.4998845468275357e-05, |
|
"loss": 1.8739, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.07119126720455624, |
|
"grad_norm": 1.2625234127044678, |
|
"learning_rate": 1.4998492052040163e-05, |
|
"loss": 1.8475, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07514633760480936, |
|
"grad_norm": 1.144286870956421, |
|
"learning_rate": 1.499809152035164e-05, |
|
"loss": 1.8517, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07910140800506249, |
|
"grad_norm": 1.2154852151870728, |
|
"learning_rate": 1.4997643875726454e-05, |
|
"loss": 1.8262, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08305647840531562, |
|
"grad_norm": 1.389930009841919, |
|
"learning_rate": 1.4997149120977304e-05, |
|
"loss": 1.8419, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.08701154880556874, |
|
"grad_norm": 1.265163779258728, |
|
"learning_rate": 1.4996607259212892e-05, |
|
"loss": 1.8344, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09096661920582186, |
|
"grad_norm": 1.1876202821731567, |
|
"learning_rate": 1.4996018293837914e-05, |
|
"loss": 1.8356, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.09492168960607499, |
|
"grad_norm": 1.2677972316741943, |
|
"learning_rate": 1.4995382228553028e-05, |
|
"loss": 1.8463, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.09887676000632811, |
|
"grad_norm": 1.206444501876831, |
|
"learning_rate": 1.4994699067354838e-05, |
|
"loss": 1.8256, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10283183040658124, |
|
"grad_norm": 1.2112213373184204, |
|
"learning_rate": 1.4993968814535867e-05, |
|
"loss": 1.8627, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.10678690080683437, |
|
"grad_norm": 1.1587677001953125, |
|
"learning_rate": 1.4993191474684532e-05, |
|
"loss": 1.8458, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.11074197120708748, |
|
"grad_norm": 1.2365622520446777, |
|
"learning_rate": 1.4992367052685107e-05, |
|
"loss": 1.8383, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.11469704160734061, |
|
"grad_norm": 1.179803490638733, |
|
"learning_rate": 1.4991495553717708e-05, |
|
"loss": 1.8305, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.11865211200759374, |
|
"grad_norm": 1.336897850036621, |
|
"learning_rate": 1.499057698325824e-05, |
|
"loss": 1.8381, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12260718240784686, |
|
"grad_norm": 1.346336007118225, |
|
"learning_rate": 1.498961134707838e-05, |
|
"loss": 1.8467, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.1265622528081, |
|
"grad_norm": 1.2718663215637207, |
|
"learning_rate": 1.4988598651245534e-05, |
|
"loss": 1.8076, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1305173232083531, |
|
"grad_norm": 1.1807332038879395, |
|
"learning_rate": 1.4987538902122799e-05, |
|
"loss": 1.8212, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.13447239360860624, |
|
"grad_norm": 1.4749420881271362, |
|
"learning_rate": 1.4986432106368917e-05, |
|
"loss": 1.8039, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.13842746400885936, |
|
"grad_norm": 1.2670525312423706, |
|
"learning_rate": 1.4985278270938247e-05, |
|
"loss": 1.8137, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14238253440911247, |
|
"grad_norm": 1.5255069732666016, |
|
"learning_rate": 1.4984077403080711e-05, |
|
"loss": 1.8161, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1463376048093656, |
|
"grad_norm": 1.1995693445205688, |
|
"learning_rate": 1.4982829510341751e-05, |
|
"loss": 1.8104, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.15029267520961873, |
|
"grad_norm": 1.3007076978683472, |
|
"learning_rate": 1.4981534600562279e-05, |
|
"loss": 1.7952, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.15424774560987187, |
|
"grad_norm": 1.4348576068878174, |
|
"learning_rate": 1.4980192681878635e-05, |
|
"loss": 1.819, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.15820281601012498, |
|
"grad_norm": 1.3245015144348145, |
|
"learning_rate": 1.4978803762722526e-05, |
|
"loss": 1.8043, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1621578864103781, |
|
"grad_norm": 1.3621593713760376, |
|
"learning_rate": 1.4977367851820984e-05, |
|
"loss": 1.7992, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.16611295681063123, |
|
"grad_norm": 1.2361946105957031, |
|
"learning_rate": 1.4975884958196297e-05, |
|
"loss": 1.8179, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 1.5746525526046753, |
|
"learning_rate": 1.4974355091165972e-05, |
|
"loss": 1.8045, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.1740230976111375, |
|
"grad_norm": 1.4326754808425903, |
|
"learning_rate": 1.497277826034265e-05, |
|
"loss": 1.8155, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.1779781680113906, |
|
"grad_norm": 1.3772553205490112, |
|
"learning_rate": 1.4971154475634081e-05, |
|
"loss": 1.7838, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18193323841164372, |
|
"grad_norm": 1.4580802917480469, |
|
"learning_rate": 1.4969483747243023e-05, |
|
"loss": 1.7997, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.18588830881189686, |
|
"grad_norm": 1.2635383605957031, |
|
"learning_rate": 1.4967766085667204e-05, |
|
"loss": 1.8091, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.18984337921214997, |
|
"grad_norm": 1.523247241973877, |
|
"learning_rate": 1.496600150169925e-05, |
|
"loss": 1.8086, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1937984496124031, |
|
"grad_norm": 1.4835641384124756, |
|
"learning_rate": 1.496419000642661e-05, |
|
"loss": 1.8001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.19775352001265623, |
|
"grad_norm": 1.3758567571640015, |
|
"learning_rate": 1.4962331611231496e-05, |
|
"loss": 1.7773, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.20170859041290934, |
|
"grad_norm": 1.343885898590088, |
|
"learning_rate": 1.4960426327790808e-05, |
|
"loss": 1.7884, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.20566366081316248, |
|
"grad_norm": 1.4712055921554565, |
|
"learning_rate": 1.4958474168076061e-05, |
|
"loss": 1.7904, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.2096187312134156, |
|
"grad_norm": 1.3729618787765503, |
|
"learning_rate": 1.4956475144353305e-05, |
|
"loss": 1.7883, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.21357380161366873, |
|
"grad_norm": 1.4087861776351929, |
|
"learning_rate": 1.4954429269183049e-05, |
|
"loss": 1.7764, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.21752887201392185, |
|
"grad_norm": 1.359567642211914, |
|
"learning_rate": 1.4952336555420194e-05, |
|
"loss": 1.7522, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.22148394241417496, |
|
"grad_norm": 1.5321180820465088, |
|
"learning_rate": 1.4950197016213935e-05, |
|
"loss": 1.7858, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.2254390128144281, |
|
"grad_norm": 1.4172133207321167, |
|
"learning_rate": 1.4948010665007694e-05, |
|
"loss": 1.7889, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.22939408321468122, |
|
"grad_norm": 1.389819622039795, |
|
"learning_rate": 1.4945777515539018e-05, |
|
"loss": 1.7787, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.23334915361493436, |
|
"grad_norm": 1.319344162940979, |
|
"learning_rate": 1.4943497581839515e-05, |
|
"loss": 1.7832, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.23730422401518747, |
|
"grad_norm": 1.3472243547439575, |
|
"learning_rate": 1.4941170878234739e-05, |
|
"loss": 1.7708, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24125929441544058, |
|
"grad_norm": 1.319574236869812, |
|
"learning_rate": 1.4938797419344127e-05, |
|
"loss": 1.8013, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.24521436481569373, |
|
"grad_norm": 1.3281052112579346, |
|
"learning_rate": 1.4936377220080886e-05, |
|
"loss": 1.7657, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.24916943521594684, |
|
"grad_norm": 1.4613900184631348, |
|
"learning_rate": 1.4933910295651914e-05, |
|
"loss": 1.7955, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.2531245056162, |
|
"grad_norm": 1.6248208284378052, |
|
"learning_rate": 1.4931396661557699e-05, |
|
"loss": 1.7775, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.2570795760164531, |
|
"grad_norm": 1.4050931930541992, |
|
"learning_rate": 1.492883633359221e-05, |
|
"loss": 1.749, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2610346464167062, |
|
"grad_norm": 1.3959672451019287, |
|
"learning_rate": 1.4926229327842822e-05, |
|
"loss": 1.7735, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.2649897168169593, |
|
"grad_norm": 1.3407930135726929, |
|
"learning_rate": 1.4923575660690197e-05, |
|
"loss": 1.7685, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.2689447872172125, |
|
"grad_norm": 1.4229165315628052, |
|
"learning_rate": 1.4920875348808181e-05, |
|
"loss": 1.769, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.2728998576174656, |
|
"grad_norm": 1.3997846841812134, |
|
"learning_rate": 1.4918128409163712e-05, |
|
"loss": 1.7804, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.2768549280177187, |
|
"grad_norm": 1.7216688394546509, |
|
"learning_rate": 1.4915334859016699e-05, |
|
"loss": 1.7699, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28080999841797183, |
|
"grad_norm": 1.4955641031265259, |
|
"learning_rate": 1.491249471591992e-05, |
|
"loss": 1.7615, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.28476506881822494, |
|
"grad_norm": 1.631832480430603, |
|
"learning_rate": 1.4909607997718917e-05, |
|
"loss": 1.7708, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2887201392184781, |
|
"grad_norm": 1.4372748136520386, |
|
"learning_rate": 1.4906674722551872e-05, |
|
"loss": 1.7618, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.2926752096187312, |
|
"grad_norm": 1.3430101871490479, |
|
"learning_rate": 1.4903694908849506e-05, |
|
"loss": 1.7734, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.29663028001898434, |
|
"grad_norm": 1.4826927185058594, |
|
"learning_rate": 1.4900668575334953e-05, |
|
"loss": 1.7679, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.30058535041923745, |
|
"grad_norm": 1.4105191230773926, |
|
"learning_rate": 1.4897595741023642e-05, |
|
"loss": 1.7666, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.30454042081949056, |
|
"grad_norm": 1.3381356000900269, |
|
"learning_rate": 1.4894476425223191e-05, |
|
"loss": 1.7697, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.30849549121974373, |
|
"grad_norm": 1.3745373487472534, |
|
"learning_rate": 1.4891310647533266e-05, |
|
"loss": 1.7707, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.31245056161999685, |
|
"grad_norm": 1.3524316549301147, |
|
"learning_rate": 1.488809842784548e-05, |
|
"loss": 1.7515, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.31640563202024996, |
|
"grad_norm": 1.4299299716949463, |
|
"learning_rate": 1.4884839786343242e-05, |
|
"loss": 1.7799, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3203607024205031, |
|
"grad_norm": 1.4132308959960938, |
|
"learning_rate": 1.4881534743501656e-05, |
|
"loss": 1.7258, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.3243157728207562, |
|
"grad_norm": 1.4797372817993164, |
|
"learning_rate": 1.4878183320087377e-05, |
|
"loss": 1.7657, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.32827084322100936, |
|
"grad_norm": 1.3737105131149292, |
|
"learning_rate": 1.4874785537158479e-05, |
|
"loss": 1.7845, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.33222591362126247, |
|
"grad_norm": 1.4676398038864136, |
|
"learning_rate": 1.4871341416064337e-05, |
|
"loss": 1.7652, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.3361809840215156, |
|
"grad_norm": 1.3782434463500977, |
|
"learning_rate": 1.4867850978445476e-05, |
|
"loss": 1.7516, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 1.5370761156082153, |
|
"learning_rate": 1.4864314246233448e-05, |
|
"loss": 1.75, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.3440911248220218, |
|
"grad_norm": 1.4677528142929077, |
|
"learning_rate": 1.486073124165068e-05, |
|
"loss": 1.7518, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.348046195222275, |
|
"grad_norm": 1.4215251207351685, |
|
"learning_rate": 1.4857101987210359e-05, |
|
"loss": 1.7634, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.3520012656225281, |
|
"grad_norm": 1.4959337711334229, |
|
"learning_rate": 1.4853426505716261e-05, |
|
"loss": 1.7491, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.3559563360227812, |
|
"grad_norm": 1.4005351066589355, |
|
"learning_rate": 1.4849704820262627e-05, |
|
"loss": 1.7713, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3599114064230343, |
|
"grad_norm": 1.4689812660217285, |
|
"learning_rate": 1.484593695423401e-05, |
|
"loss": 1.7448, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.36386647682328743, |
|
"grad_norm": 1.5371148586273193, |
|
"learning_rate": 1.4842122931305133e-05, |
|
"loss": 1.7452, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.3678215472235406, |
|
"grad_norm": 1.4465723037719727, |
|
"learning_rate": 1.4838262775440741e-05, |
|
"loss": 1.7452, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3717766176237937, |
|
"grad_norm": 1.5890401601791382, |
|
"learning_rate": 1.4834356510895436e-05, |
|
"loss": 1.737, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.37573168802404683, |
|
"grad_norm": 1.4862806797027588, |
|
"learning_rate": 1.4830404162213549e-05, |
|
"loss": 1.7426, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.37968675842429994, |
|
"grad_norm": 1.5449295043945312, |
|
"learning_rate": 1.4826405754228963e-05, |
|
"loss": 1.7379, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.38364182882455306, |
|
"grad_norm": 1.5151877403259277, |
|
"learning_rate": 1.482236131206497e-05, |
|
"loss": 1.7269, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.3875968992248062, |
|
"grad_norm": 1.600046157836914, |
|
"learning_rate": 1.4818270861134113e-05, |
|
"loss": 1.7556, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.39155196962505934, |
|
"grad_norm": 1.4293779134750366, |
|
"learning_rate": 1.4814134427138015e-05, |
|
"loss": 1.7368, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.39550704002531245, |
|
"grad_norm": 1.378175973892212, |
|
"learning_rate": 1.4809952036067231e-05, |
|
"loss": 1.7405, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.39946211042556556, |
|
"grad_norm": 1.417622447013855, |
|
"learning_rate": 1.4805723714201079e-05, |
|
"loss": 1.7484, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.4034171808258187, |
|
"grad_norm": 1.5106312036514282, |
|
"learning_rate": 1.4801449488107477e-05, |
|
"loss": 1.7218, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.40737225122607185, |
|
"grad_norm": 1.5248609781265259, |
|
"learning_rate": 1.4797129384642768e-05, |
|
"loss": 1.7328, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.41132732162632496, |
|
"grad_norm": 1.4607023000717163, |
|
"learning_rate": 1.4792763430951562e-05, |
|
"loss": 1.7131, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4152823920265781, |
|
"grad_norm": 1.4600701332092285, |
|
"learning_rate": 1.4788351654466556e-05, |
|
"loss": 1.7418, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4192374624268312, |
|
"grad_norm": 1.3468823432922363, |
|
"learning_rate": 1.4783894082908377e-05, |
|
"loss": 1.7649, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.4231925328270843, |
|
"grad_norm": 1.5118048191070557, |
|
"learning_rate": 1.4779390744285386e-05, |
|
"loss": 1.7233, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.42714760322733747, |
|
"grad_norm": 1.5199166536331177, |
|
"learning_rate": 1.4774841666893515e-05, |
|
"loss": 1.7238, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.4311026736275906, |
|
"grad_norm": 1.6537836790084839, |
|
"learning_rate": 1.4770246879316097e-05, |
|
"loss": 1.7216, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.4350577440278437, |
|
"grad_norm": 1.37918221950531, |
|
"learning_rate": 1.4765606410423666e-05, |
|
"loss": 1.7481, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.4390128144280968, |
|
"grad_norm": 1.526502013206482, |
|
"learning_rate": 1.4760920289373791e-05, |
|
"loss": 1.7141, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.4429678848283499, |
|
"grad_norm": 1.3577282428741455, |
|
"learning_rate": 1.4756188545610884e-05, |
|
"loss": 1.7507, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.4469229552286031, |
|
"grad_norm": 1.557986855506897, |
|
"learning_rate": 1.475141120886603e-05, |
|
"loss": 1.7103, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.4508780256288562, |
|
"grad_norm": 1.638221025466919, |
|
"learning_rate": 1.474658830915678e-05, |
|
"loss": 1.7363, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.4548330960291093, |
|
"grad_norm": 1.472142219543457, |
|
"learning_rate": 1.474171987678697e-05, |
|
"loss": 1.7331, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.45878816642936243, |
|
"grad_norm": 1.4680249691009521, |
|
"learning_rate": 1.4736805942346542e-05, |
|
"loss": 1.7273, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.46274323682961555, |
|
"grad_norm": 1.4165573120117188, |
|
"learning_rate": 1.4731846536711337e-05, |
|
"loss": 1.7159, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.4666983072298687, |
|
"grad_norm": 2.1816458702087402, |
|
"learning_rate": 1.4726841691042902e-05, |
|
"loss": 1.7236, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.4706533776301218, |
|
"grad_norm": 1.5376547574996948, |
|
"learning_rate": 1.4721791436788307e-05, |
|
"loss": 1.7227, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.47460844803037494, |
|
"grad_norm": 1.6850054264068604, |
|
"learning_rate": 1.4716695805679932e-05, |
|
"loss": 1.7116, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47856351843062805, |
|
"grad_norm": 1.7338590621948242, |
|
"learning_rate": 1.471155482973528e-05, |
|
"loss": 1.7129, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.48251858883088117, |
|
"grad_norm": 1.4183164834976196, |
|
"learning_rate": 1.4706368541256762e-05, |
|
"loss": 1.7267, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.48647365923113434, |
|
"grad_norm": 1.7117156982421875, |
|
"learning_rate": 1.4701136972831513e-05, |
|
"loss": 1.7149, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.49042872963138745, |
|
"grad_norm": 1.4747951030731201, |
|
"learning_rate": 1.4695860157331169e-05, |
|
"loss": 1.7218, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.49438380003164056, |
|
"grad_norm": 1.6341221332550049, |
|
"learning_rate": 1.4690538127911672e-05, |
|
"loss": 1.7331, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.4983388704318937, |
|
"grad_norm": 1.4981880187988281, |
|
"learning_rate": 1.4685170918013054e-05, |
|
"loss": 1.7182, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.5022939408321468, |
|
"grad_norm": 1.5774872303009033, |
|
"learning_rate": 1.4679758561359232e-05, |
|
"loss": 1.7154, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.5062490112324, |
|
"grad_norm": 1.5503437519073486, |
|
"learning_rate": 1.4674301091957795e-05, |
|
"loss": 1.716, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 1.5208927392959595, |
|
"learning_rate": 1.4668798544099795e-05, |
|
"loss": 1.7041, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.5141591520329062, |
|
"grad_norm": 1.8089638948440552, |
|
"learning_rate": 1.4663250952359516e-05, |
|
"loss": 1.7276, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5181142224331593, |
|
"grad_norm": 1.5653834342956543, |
|
"learning_rate": 1.4657658351594275e-05, |
|
"loss": 1.7164, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.5220692928334124, |
|
"grad_norm": 1.7017031908035278, |
|
"learning_rate": 1.4652020776944194e-05, |
|
"loss": 1.7053, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5260243632336655, |
|
"grad_norm": 1.6849620342254639, |
|
"learning_rate": 1.4646338263831977e-05, |
|
"loss": 1.7134, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.5299794336339186, |
|
"grad_norm": 1.8098126649856567, |
|
"learning_rate": 1.4640610847962699e-05, |
|
"loss": 1.7158, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5339345040341718, |
|
"grad_norm": 1.7234479188919067, |
|
"learning_rate": 1.4634838565323563e-05, |
|
"loss": 1.7229, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.537889574434425, |
|
"grad_norm": 1.35356867313385, |
|
"learning_rate": 1.4629021452183695e-05, |
|
"loss": 1.715, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.5418446448346781, |
|
"grad_norm": 1.5286564826965332, |
|
"learning_rate": 1.4623159545093895e-05, |
|
"loss": 1.7011, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.5457997152349312, |
|
"grad_norm": 1.5586360692977905, |
|
"learning_rate": 1.4617252880886427e-05, |
|
"loss": 1.6978, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5497547856351843, |
|
"grad_norm": 1.5301753282546997, |
|
"learning_rate": 1.461130149667477e-05, |
|
"loss": 1.6984, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.5537098560354374, |
|
"grad_norm": 1.6551586389541626, |
|
"learning_rate": 1.4605305429853402e-05, |
|
"loss": 1.6935, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5576649264356905, |
|
"grad_norm": 1.522283911705017, |
|
"learning_rate": 1.4599264718097552e-05, |
|
"loss": 1.6795, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.5616199968359437, |
|
"grad_norm": 1.519173502922058, |
|
"learning_rate": 1.4593179399362967e-05, |
|
"loss": 1.6948, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5655750672361968, |
|
"grad_norm": 1.582780122756958, |
|
"learning_rate": 1.4587049511885675e-05, |
|
"loss": 1.7168, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.5695301376364499, |
|
"grad_norm": 1.5130764245986938, |
|
"learning_rate": 1.458087509418174e-05, |
|
"loss": 1.7049, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.573485208036703, |
|
"grad_norm": 1.581992268562317, |
|
"learning_rate": 1.4574656185047033e-05, |
|
"loss": 1.695, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5774402784369562, |
|
"grad_norm": 1.4675225019454956, |
|
"learning_rate": 1.456839282355697e-05, |
|
"loss": 1.7015, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 1.5948406457901, |
|
"learning_rate": 1.4562085049066282e-05, |
|
"loss": 1.7129, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.5853504192374624, |
|
"grad_norm": 1.8901729583740234, |
|
"learning_rate": 1.4555732901208756e-05, |
|
"loss": 1.7062, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5893054896377156, |
|
"grad_norm": 1.6940269470214844, |
|
"learning_rate": 1.4549336419896993e-05, |
|
"loss": 1.7025, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.5932605600379687, |
|
"grad_norm": 1.5160539150238037, |
|
"learning_rate": 1.454289564532216e-05, |
|
"loss": 1.688, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5972156304382218, |
|
"grad_norm": 1.6424893140792847, |
|
"learning_rate": 1.4536410617953726e-05, |
|
"loss": 1.696, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.6011707008384749, |
|
"grad_norm": 1.492990493774414, |
|
"learning_rate": 1.4529881378539218e-05, |
|
"loss": 1.6768, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.605125771238728, |
|
"grad_norm": 1.7309181690216064, |
|
"learning_rate": 1.452330796810396e-05, |
|
"loss": 1.6972, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.6090808416389811, |
|
"grad_norm": 1.684484601020813, |
|
"learning_rate": 1.451669042795082e-05, |
|
"loss": 1.6903, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.6130359120392342, |
|
"grad_norm": 1.5465792417526245, |
|
"learning_rate": 1.4510028799659944e-05, |
|
"loss": 1.714, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.6169909824394875, |
|
"grad_norm": 1.8257033824920654, |
|
"learning_rate": 1.4503323125088501e-05, |
|
"loss": 1.6894, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6209460528397406, |
|
"grad_norm": 1.5299944877624512, |
|
"learning_rate": 1.4496573446370414e-05, |
|
"loss": 1.6944, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.6249011232399937, |
|
"grad_norm": 1.7090293169021606, |
|
"learning_rate": 1.44897798059161e-05, |
|
"loss": 1.6878, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6288561936402468, |
|
"grad_norm": 1.690470576286316, |
|
"learning_rate": 1.4482942246412203e-05, |
|
"loss": 1.6807, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.6328112640404999, |
|
"grad_norm": 1.8261181116104126, |
|
"learning_rate": 1.4476060810821319e-05, |
|
"loss": 1.6887, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.636766334440753, |
|
"grad_norm": 1.5878318548202515, |
|
"learning_rate": 1.4469135542381741e-05, |
|
"loss": 1.6618, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.6407214048410061, |
|
"grad_norm": 1.5003888607025146, |
|
"learning_rate": 1.4462166484607167e-05, |
|
"loss": 1.6734, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6446764752412593, |
|
"grad_norm": 1.7296781539916992, |
|
"learning_rate": 1.445515368128645e-05, |
|
"loss": 1.6712, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.6486315456415124, |
|
"grad_norm": 1.6283060312271118, |
|
"learning_rate": 1.4448097176483299e-05, |
|
"loss": 1.6963, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.6525866160417655, |
|
"grad_norm": 1.5867258310317993, |
|
"learning_rate": 1.444099701453602e-05, |
|
"loss": 1.6834, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6565416864420187, |
|
"grad_norm": 1.8763879537582397, |
|
"learning_rate": 1.4433853240057229e-05, |
|
"loss": 1.6811, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6604967568422718, |
|
"grad_norm": 1.5323275327682495, |
|
"learning_rate": 1.4426665897933574e-05, |
|
"loss": 1.6778, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 1.581667184829712, |
|
"learning_rate": 1.4419435033325455e-05, |
|
"loss": 1.6926, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.668406897642778, |
|
"grad_norm": 1.6673179864883423, |
|
"learning_rate": 1.441216069166673e-05, |
|
"loss": 1.6806, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.6723619680430312, |
|
"grad_norm": 1.8026336431503296, |
|
"learning_rate": 1.4404842918664446e-05, |
|
"loss": 1.6829, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6763170384432843, |
|
"grad_norm": 1.6094428300857544, |
|
"learning_rate": 1.4397481760298542e-05, |
|
"loss": 1.6763, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 1.565843105316162, |
|
"learning_rate": 1.4390077262821559e-05, |
|
"loss": 1.659, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6842271792437905, |
|
"grad_norm": 1.7567963600158691, |
|
"learning_rate": 1.4382629472758346e-05, |
|
"loss": 1.666, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.6881822496440436, |
|
"grad_norm": 1.591693639755249, |
|
"learning_rate": 1.4375138436905786e-05, |
|
"loss": 1.6666, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6921373200442967, |
|
"grad_norm": 1.638576865196228, |
|
"learning_rate": 1.436760420233248e-05, |
|
"loss": 1.6554, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.69609239044455, |
|
"grad_norm": 1.7055751085281372, |
|
"learning_rate": 1.4360026816378462e-05, |
|
"loss": 1.671, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.7000474608448031, |
|
"grad_norm": 1.6867974996566772, |
|
"learning_rate": 1.4352406326654905e-05, |
|
"loss": 1.6722, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.7040025312450562, |
|
"grad_norm": 1.7862675189971924, |
|
"learning_rate": 1.4344742781043809e-05, |
|
"loss": 1.6965, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.7079576016453093, |
|
"grad_norm": 1.7989298105239868, |
|
"learning_rate": 1.4337036227697715e-05, |
|
"loss": 1.6762, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.7119126720455624, |
|
"grad_norm": 1.7017799615859985, |
|
"learning_rate": 1.4329286715039392e-05, |
|
"loss": 1.6614, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7158677424458155, |
|
"grad_norm": 1.6665624380111694, |
|
"learning_rate": 1.4321494291761537e-05, |
|
"loss": 1.662, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.7198228128460686, |
|
"grad_norm": 1.9826371669769287, |
|
"learning_rate": 1.4313659006826468e-05, |
|
"loss": 1.6638, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7237778832463218, |
|
"grad_norm": 1.7711797952651978, |
|
"learning_rate": 1.430578090946582e-05, |
|
"loss": 1.6625, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.7277329536465749, |
|
"grad_norm": 1.8430758714675903, |
|
"learning_rate": 1.4297860049180223e-05, |
|
"loss": 1.6824, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.731688024046828, |
|
"grad_norm": 1.8522826433181763, |
|
"learning_rate": 1.4289896475739012e-05, |
|
"loss": 1.6599, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7356430944470812, |
|
"grad_norm": 1.6422381401062012, |
|
"learning_rate": 1.4281890239179897e-05, |
|
"loss": 1.6584, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7395981648473343, |
|
"grad_norm": 1.7232320308685303, |
|
"learning_rate": 1.4273841389808653e-05, |
|
"loss": 1.6604, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.7435532352475874, |
|
"grad_norm": 1.6723573207855225, |
|
"learning_rate": 1.4265749978198805e-05, |
|
"loss": 1.6654, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.7475083056478405, |
|
"grad_norm": 1.776376485824585, |
|
"learning_rate": 1.4257616055191316e-05, |
|
"loss": 1.672, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.7514633760480937, |
|
"grad_norm": 1.9001519680023193, |
|
"learning_rate": 1.4249439671894253e-05, |
|
"loss": 1.6473, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7554184464483468, |
|
"grad_norm": 1.5602456331253052, |
|
"learning_rate": 1.4241220879682484e-05, |
|
"loss": 1.6558, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.7593735168485999, |
|
"grad_norm": 1.6195124387741089, |
|
"learning_rate": 1.423295973019734e-05, |
|
"loss": 1.6704, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.763328587248853, |
|
"grad_norm": 1.7354437112808228, |
|
"learning_rate": 1.4224656275346295e-05, |
|
"loss": 1.6697, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.7672836576491061, |
|
"grad_norm": 1.6655138731002808, |
|
"learning_rate": 1.4216310567302648e-05, |
|
"loss": 1.6567, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7712387280493592, |
|
"grad_norm": 1.9168803691864014, |
|
"learning_rate": 1.4207922658505184e-05, |
|
"loss": 1.6398, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7751937984496124, |
|
"grad_norm": 1.7918130159378052, |
|
"learning_rate": 1.4199492601657848e-05, |
|
"loss": 1.654, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7791488688498656, |
|
"grad_norm": 1.7283716201782227, |
|
"learning_rate": 1.4191020449729417e-05, |
|
"loss": 1.6534, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.7831039392501187, |
|
"grad_norm": 1.860144019126892, |
|
"learning_rate": 1.4182506255953167e-05, |
|
"loss": 1.6553, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7870590096503718, |
|
"grad_norm": 1.7320619821548462, |
|
"learning_rate": 1.4173950073826531e-05, |
|
"loss": 1.6586, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.7910140800506249, |
|
"grad_norm": 1.7704521417617798, |
|
"learning_rate": 1.4165351957110772e-05, |
|
"loss": 1.6599, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.794969150450878, |
|
"grad_norm": 2.051400661468506, |
|
"learning_rate": 1.4156711959830644e-05, |
|
"loss": 1.6695, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.7989242208511311, |
|
"grad_norm": 1.7863457202911377, |
|
"learning_rate": 1.4148030136274043e-05, |
|
"loss": 1.6538, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.8028792912513842, |
|
"grad_norm": 1.85243558883667, |
|
"learning_rate": 1.413930654099168e-05, |
|
"loss": 1.63, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.8068343616516374, |
|
"grad_norm": 1.7953428030014038, |
|
"learning_rate": 1.413054122879673e-05, |
|
"loss": 1.6374, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.8107894320518905, |
|
"grad_norm": 1.8957959413528442, |
|
"learning_rate": 1.4121734254764482e-05, |
|
"loss": 1.6445, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8147445024521437, |
|
"grad_norm": 1.7762993574142456, |
|
"learning_rate": 1.4112885674232011e-05, |
|
"loss": 1.6503, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.8186995728523968, |
|
"grad_norm": 1.723813772201538, |
|
"learning_rate": 1.410399554279781e-05, |
|
"loss": 1.6416, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.8226546432526499, |
|
"grad_norm": 1.9667476415634155, |
|
"learning_rate": 1.4095063916321456e-05, |
|
"loss": 1.6297, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.826609713652903, |
|
"grad_norm": 1.8855000734329224, |
|
"learning_rate": 1.4086090850923246e-05, |
|
"loss": 1.6684, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.8305647840531561, |
|
"grad_norm": 1.8669531345367432, |
|
"learning_rate": 1.4077076402983857e-05, |
|
"loss": 1.6344, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8345198544534093, |
|
"grad_norm": 2.0968127250671387, |
|
"learning_rate": 1.4068020629143985e-05, |
|
"loss": 1.6524, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.8384749248536624, |
|
"grad_norm": 1.712428092956543, |
|
"learning_rate": 1.4058923586303988e-05, |
|
"loss": 1.6333, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.8424299952539155, |
|
"grad_norm": 1.7560315132141113, |
|
"learning_rate": 1.4049785331623534e-05, |
|
"loss": 1.6337, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.8463850656541686, |
|
"grad_norm": 1.875779390335083, |
|
"learning_rate": 1.4040605922521231e-05, |
|
"loss": 1.6328, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 1.9142519235610962, |
|
"learning_rate": 1.403138541667428e-05, |
|
"loss": 1.6333, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8542952064546749, |
|
"grad_norm": 2.263770580291748, |
|
"learning_rate": 1.4022123872018107e-05, |
|
"loss": 1.635, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.858250276854928, |
|
"grad_norm": 1.838889479637146, |
|
"learning_rate": 1.4012821346745995e-05, |
|
"loss": 1.6328, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.8622053472551812, |
|
"grad_norm": 1.9877907037734985, |
|
"learning_rate": 1.400347789930872e-05, |
|
"loss": 1.6427, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.8661604176554343, |
|
"grad_norm": 1.9885168075561523, |
|
"learning_rate": 1.399409358841419e-05, |
|
"loss": 1.637, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.8701154880556874, |
|
"grad_norm": 1.8428804874420166, |
|
"learning_rate": 1.3984668473027065e-05, |
|
"loss": 1.6377, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.8740705584559405, |
|
"grad_norm": 1.9875133037567139, |
|
"learning_rate": 1.39752026123684e-05, |
|
"loss": 1.6248, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.8780256288561936, |
|
"grad_norm": 2.0525448322296143, |
|
"learning_rate": 1.3965696065915262e-05, |
|
"loss": 1.6501, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.8819806992564467, |
|
"grad_norm": 1.9695172309875488, |
|
"learning_rate": 1.3956148893400357e-05, |
|
"loss": 1.6308, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.8859357696566998, |
|
"grad_norm": 2.084592580795288, |
|
"learning_rate": 1.3946561154811664e-05, |
|
"loss": 1.6335, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.889890840056953, |
|
"grad_norm": 1.7602378129959106, |
|
"learning_rate": 1.3936932910392048e-05, |
|
"loss": 1.6195, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.8938459104572062, |
|
"grad_norm": 2.0157277584075928, |
|
"learning_rate": 1.3927264220638889e-05, |
|
"loss": 1.6395, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.8978009808574593, |
|
"grad_norm": 2.184307336807251, |
|
"learning_rate": 1.391755514630369e-05, |
|
"loss": 1.6448, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.9017560512577124, |
|
"grad_norm": 1.9747377634048462, |
|
"learning_rate": 1.390780574839171e-05, |
|
"loss": 1.6302, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.9057111216579655, |
|
"grad_norm": 2.1203644275665283, |
|
"learning_rate": 1.3898016088161575e-05, |
|
"loss": 1.6447, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.9096661920582186, |
|
"grad_norm": 2.0279908180236816, |
|
"learning_rate": 1.3888186227124885e-05, |
|
"loss": 1.622, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9136212624584718, |
|
"grad_norm": 1.9809517860412598, |
|
"learning_rate": 1.3878316227045846e-05, |
|
"loss": 1.6189, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.9175763328587249, |
|
"grad_norm": 2.1499814987182617, |
|
"learning_rate": 1.386840614994086e-05, |
|
"loss": 1.6192, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.921531403258978, |
|
"grad_norm": 1.9230985641479492, |
|
"learning_rate": 1.3858456058078148e-05, |
|
"loss": 1.6251, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.9254864736592311, |
|
"grad_norm": 2.1623005867004395, |
|
"learning_rate": 1.3848466013977365e-05, |
|
"loss": 1.6145, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.9294415440594842, |
|
"grad_norm": 2.0579729080200195, |
|
"learning_rate": 1.3838436080409188e-05, |
|
"loss": 1.628, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9333966144597374, |
|
"grad_norm": 1.9566960334777832, |
|
"learning_rate": 1.3828366320394937e-05, |
|
"loss": 1.6317, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.9373516848599905, |
|
"grad_norm": 1.9498804807662964, |
|
"learning_rate": 1.3818256797206177e-05, |
|
"loss": 1.6107, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.9413067552602437, |
|
"grad_norm": 1.8530848026275635, |
|
"learning_rate": 1.3808107574364312e-05, |
|
"loss": 1.6186, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.9452618256604968, |
|
"grad_norm": 2.0309464931488037, |
|
"learning_rate": 1.3797918715640197e-05, |
|
"loss": 1.6208, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.9492168960607499, |
|
"grad_norm": 2.0447559356689453, |
|
"learning_rate": 1.3787690285053732e-05, |
|
"loss": 1.6226, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.953171966461003, |
|
"grad_norm": 2.198214292526245, |
|
"learning_rate": 1.3777422346873453e-05, |
|
"loss": 1.5954, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.9571270368612561, |
|
"grad_norm": 2.0797901153564453, |
|
"learning_rate": 1.3767114965616143e-05, |
|
"loss": 1.6378, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.9610821072615092, |
|
"grad_norm": 2.361177444458008, |
|
"learning_rate": 1.3756768206046418e-05, |
|
"loss": 1.6207, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.9650371776617623, |
|
"grad_norm": 2.194758653640747, |
|
"learning_rate": 1.3746382133176314e-05, |
|
"loss": 1.6147, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.9689922480620154, |
|
"grad_norm": 2.1259610652923584, |
|
"learning_rate": 1.3735956812264893e-05, |
|
"loss": 1.583, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.9729473184622687, |
|
"grad_norm": 2.084428548812866, |
|
"learning_rate": 1.372549230881782e-05, |
|
"loss": 1.6257, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.9769023888625218, |
|
"grad_norm": 2.0704309940338135, |
|
"learning_rate": 1.3714988688586958e-05, |
|
"loss": 1.6062, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.9808574592627749, |
|
"grad_norm": 1.9661308526992798, |
|
"learning_rate": 1.3704446017569953e-05, |
|
"loss": 1.6164, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.984812529663028, |
|
"grad_norm": 2.1788337230682373, |
|
"learning_rate": 1.3693864362009821e-05, |
|
"loss": 1.6188, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.9887676000632811, |
|
"grad_norm": 1.9492045640945435, |
|
"learning_rate": 1.3683243788394534e-05, |
|
"loss": 1.609, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.9927226704635342, |
|
"grad_norm": 2.2324581146240234, |
|
"learning_rate": 1.3672584363456587e-05, |
|
"loss": 1.6058, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.9966777408637874, |
|
"grad_norm": 2.14666485786438, |
|
"learning_rate": 1.3661886154172602e-05, |
|
"loss": 1.6059, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.0006328112640406, |
|
"grad_norm": 2.1909172534942627, |
|
"learning_rate": 1.3651149227762893e-05, |
|
"loss": 1.6006, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.0025726272461015, |
|
"grad_norm": 2.143050193786621, |
|
"learning_rate": 1.3640373651691044e-05, |
|
"loss": 1.5379, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.0065305153170268, |
|
"grad_norm": 2.6964924335479736, |
|
"learning_rate": 1.3629559493663487e-05, |
|
"loss": 1.507, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.0104884033879522, |
|
"grad_norm": 2.556349992752075, |
|
"learning_rate": 1.361870682162908e-05, |
|
"loss": 1.5266, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.0144462914588774, |
|
"grad_norm": 2.2218785285949707, |
|
"learning_rate": 1.3607815703778673e-05, |
|
"loss": 1.4867, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.0184041795298029, |
|
"grad_norm": 2.5236029624938965, |
|
"learning_rate": 1.3596886208544687e-05, |
|
"loss": 1.4879, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.0223620676007283, |
|
"grad_norm": 2.5998125076293945, |
|
"learning_rate": 1.3585918404600679e-05, |
|
"loss": 1.524, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.0263199556716536, |
|
"grad_norm": 2.6749346256256104, |
|
"learning_rate": 1.3574912360860912e-05, |
|
"loss": 1.5148, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.030277843742579, |
|
"grad_norm": 2.4651994705200195, |
|
"learning_rate": 1.3563868146479921e-05, |
|
"loss": 1.4905, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.0342357318135043, |
|
"grad_norm": 2.4166133403778076, |
|
"learning_rate": 1.3552785830852084e-05, |
|
"loss": 1.5155, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.0381936198844297, |
|
"grad_norm": 2.2493224143981934, |
|
"learning_rate": 1.3541665483611175e-05, |
|
"loss": 1.515, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.042151507955355, |
|
"grad_norm": 2.7555856704711914, |
|
"learning_rate": 1.3530507174629938e-05, |
|
"loss": 1.5136, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.0461093960262804, |
|
"grad_norm": 2.7666208744049072, |
|
"learning_rate": 1.3519310974019639e-05, |
|
"loss": 1.4917, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.0500672840972056, |
|
"grad_norm": 2.7659711837768555, |
|
"learning_rate": 1.3508076952129634e-05, |
|
"loss": 1.5076, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.054025172168131, |
|
"grad_norm": 2.5507092475891113, |
|
"learning_rate": 1.3496805179546919e-05, |
|
"loss": 1.5052, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.0579830602390565, |
|
"grad_norm": 2.302542209625244, |
|
"learning_rate": 1.3485495727095687e-05, |
|
"loss": 1.5034, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.0619409483099818, |
|
"grad_norm": 2.578275203704834, |
|
"learning_rate": 1.3474148665836894e-05, |
|
"loss": 1.4886, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.0658988363809072, |
|
"grad_norm": 2.354796886444092, |
|
"learning_rate": 1.3462764067067799e-05, |
|
"loss": 1.506, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.0698567244518324, |
|
"grad_norm": 2.5606160163879395, |
|
"learning_rate": 1.345134200232152e-05, |
|
"loss": 1.4931, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.073814612522758, |
|
"grad_norm": 2.46881365776062, |
|
"learning_rate": 1.343988254336659e-05, |
|
"loss": 1.503, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.0777725005936831, |
|
"grad_norm": 2.657731771469116, |
|
"learning_rate": 1.3428385762206498e-05, |
|
"loss": 1.5064, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.0817303886646086, |
|
"grad_norm": 2.3708932399749756, |
|
"learning_rate": 1.3416851731079244e-05, |
|
"loss": 1.4943, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.0856882767355338, |
|
"grad_norm": 2.6182353496551514, |
|
"learning_rate": 1.340528052245688e-05, |
|
"loss": 1.5001, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.0896461648064593, |
|
"grad_norm": 2.7265477180480957, |
|
"learning_rate": 1.3393672209045055e-05, |
|
"loss": 1.4794, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.0936040528773847, |
|
"grad_norm": 2.6186697483062744, |
|
"learning_rate": 1.3382026863782559e-05, |
|
"loss": 1.4878, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.09756194094831, |
|
"grad_norm": 3.3754959106445312, |
|
"learning_rate": 1.3370344559840868e-05, |
|
"loss": 1.4769, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.1015198290192354, |
|
"grad_norm": 3.0805869102478027, |
|
"learning_rate": 1.3358625370623684e-05, |
|
"loss": 1.5098, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.1054777170901606, |
|
"grad_norm": 2.626561403274536, |
|
"learning_rate": 1.334686936976646e-05, |
|
"loss": 1.4965, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.109435605161086, |
|
"grad_norm": 2.6148223876953125, |
|
"learning_rate": 1.333507663113596e-05, |
|
"loss": 1.4973, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.1133934932320113, |
|
"grad_norm": 2.9437952041625977, |
|
"learning_rate": 1.3323247228829781e-05, |
|
"loss": 1.4861, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.1173513813029368, |
|
"grad_norm": 2.7327873706817627, |
|
"learning_rate": 1.3311381237175882e-05, |
|
"loss": 1.4725, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.121309269373862, |
|
"grad_norm": 2.8548924922943115, |
|
"learning_rate": 1.3299478730732134e-05, |
|
"loss": 1.4825, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.1252671574447874, |
|
"grad_norm": 2.638568878173828, |
|
"learning_rate": 1.3287539784285839e-05, |
|
"loss": 1.4715, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.129225045515713, |
|
"grad_norm": 3.1752021312713623, |
|
"learning_rate": 1.327556447285326e-05, |
|
"loss": 1.4749, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.1331829335866381, |
|
"grad_norm": 2.8398923873901367, |
|
"learning_rate": 1.3263552871679156e-05, |
|
"loss": 1.4901, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.1371408216575636, |
|
"grad_norm": 2.708963632583618, |
|
"learning_rate": 1.3251505056236312e-05, |
|
"loss": 1.4805, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.1410987097284888, |
|
"grad_norm": 2.9168691635131836, |
|
"learning_rate": 1.3239421102225049e-05, |
|
"loss": 1.4653, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.1450565977994143, |
|
"grad_norm": 2.523481845855713, |
|
"learning_rate": 1.322730108557276e-05, |
|
"loss": 1.4822, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.1490144858703395, |
|
"grad_norm": 3.2788479328155518, |
|
"learning_rate": 1.3215145082433436e-05, |
|
"loss": 1.4685, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.152972373941265, |
|
"grad_norm": 2.765491485595703, |
|
"learning_rate": 1.3202953169187181e-05, |
|
"loss": 1.4878, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.1569302620121902, |
|
"grad_norm": 2.9480185508728027, |
|
"learning_rate": 1.3190725422439734e-05, |
|
"loss": 1.4607, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.1608881500831156, |
|
"grad_norm": 2.9779725074768066, |
|
"learning_rate": 1.3178461919021984e-05, |
|
"loss": 1.4724, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.164846038154041, |
|
"grad_norm": 2.768763780593872, |
|
"learning_rate": 1.3166162735989497e-05, |
|
"loss": 1.4989, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.1688039262249663, |
|
"grad_norm": 3.2208807468414307, |
|
"learning_rate": 1.3153827950622019e-05, |
|
"loss": 1.4695, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.1727618142958918, |
|
"grad_norm": 2.7532846927642822, |
|
"learning_rate": 1.3141457640423002e-05, |
|
"loss": 1.4841, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.176719702366817, |
|
"grad_norm": 3.399897575378418, |
|
"learning_rate": 1.3129051883119107e-05, |
|
"loss": 1.4627, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.1806775904377425, |
|
"grad_norm": 2.892542600631714, |
|
"learning_rate": 1.311661075665973e-05, |
|
"loss": 1.4754, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.1846354785086677, |
|
"grad_norm": 2.6261606216430664, |
|
"learning_rate": 1.310413433921649e-05, |
|
"loss": 1.4847, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1885933665795931, |
|
"grad_norm": 2.6923210620880127, |
|
"learning_rate": 1.3091622709182762e-05, |
|
"loss": 1.4722, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.1925512546505184, |
|
"grad_norm": 3.0266880989074707, |
|
"learning_rate": 1.3079075945173164e-05, |
|
"loss": 1.4922, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.1965091427214438, |
|
"grad_norm": 2.9252758026123047, |
|
"learning_rate": 1.306649412602308e-05, |
|
"loss": 1.4692, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.2004670307923693, |
|
"grad_norm": 2.757887125015259, |
|
"learning_rate": 1.305387733078815e-05, |
|
"loss": 1.465, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.2044249188632945, |
|
"grad_norm": 3.536576271057129, |
|
"learning_rate": 1.304122563874379e-05, |
|
"loss": 1.4625, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.20838280693422, |
|
"grad_norm": 2.765883684158325, |
|
"learning_rate": 1.3028539129384668e-05, |
|
"loss": 1.4509, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.2123406950051452, |
|
"grad_norm": 2.8738183975219727, |
|
"learning_rate": 1.3015817882424235e-05, |
|
"loss": 1.4421, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.2162985830760706, |
|
"grad_norm": 2.8780033588409424, |
|
"learning_rate": 1.3003061977794207e-05, |
|
"loss": 1.4749, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.2202564711469959, |
|
"grad_norm": 3.499163866043091, |
|
"learning_rate": 1.2990271495644059e-05, |
|
"loss": 1.4564, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.2242143592179213, |
|
"grad_norm": 2.900714159011841, |
|
"learning_rate": 1.297744651634053e-05, |
|
"loss": 1.4642, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2281722472888466, |
|
"grad_norm": 2.939680337905884, |
|
"learning_rate": 1.2964587120467122e-05, |
|
"loss": 1.4512, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.232130135359772, |
|
"grad_norm": 3.2539045810699463, |
|
"learning_rate": 1.2951693388823577e-05, |
|
"loss": 1.4478, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.2360880234306975, |
|
"grad_norm": 3.0612869262695312, |
|
"learning_rate": 1.293876540242539e-05, |
|
"loss": 1.4674, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.2400459115016227, |
|
"grad_norm": 2.9142935276031494, |
|
"learning_rate": 1.2925803242503287e-05, |
|
"loss": 1.4704, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.2440037995725481, |
|
"grad_norm": 3.0163750648498535, |
|
"learning_rate": 1.291280699050271e-05, |
|
"loss": 1.4433, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.2479616876434734, |
|
"grad_norm": 2.712174654006958, |
|
"learning_rate": 1.289977672808332e-05, |
|
"loss": 1.4637, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.2519195757143988, |
|
"grad_norm": 2.7295100688934326, |
|
"learning_rate": 1.2886712537118475e-05, |
|
"loss": 1.4599, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.2558774637853243, |
|
"grad_norm": 3.3285765647888184, |
|
"learning_rate": 1.2873614499694717e-05, |
|
"loss": 1.4432, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.2598353518562495, |
|
"grad_norm": 3.4205710887908936, |
|
"learning_rate": 1.2860482698111254e-05, |
|
"loss": 1.4571, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.2637932399271747, |
|
"grad_norm": 2.865621328353882, |
|
"learning_rate": 1.2847317214879451e-05, |
|
"loss": 1.4434, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.2677511279981002, |
|
"grad_norm": 2.7961373329162598, |
|
"learning_rate": 1.2834118132722296e-05, |
|
"loss": 1.4305, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.2717090160690256, |
|
"grad_norm": 2.86441707611084, |
|
"learning_rate": 1.2820885534573903e-05, |
|
"loss": 1.4592, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.2756669041399509, |
|
"grad_norm": 3.3199241161346436, |
|
"learning_rate": 1.2807619503578964e-05, |
|
"loss": 1.451, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.2796247922108763, |
|
"grad_norm": 2.9922525882720947, |
|
"learning_rate": 1.2794320123092248e-05, |
|
"loss": 1.4447, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.2835826802818016, |
|
"grad_norm": 2.9650540351867676, |
|
"learning_rate": 1.2780987476678072e-05, |
|
"loss": 1.4492, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.287540568352727, |
|
"grad_norm": 3.2158126831054688, |
|
"learning_rate": 1.2767621648109765e-05, |
|
"loss": 1.428, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.2914984564236525, |
|
"grad_norm": 3.462463617324829, |
|
"learning_rate": 1.275422272136916e-05, |
|
"loss": 1.4355, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.2954563444945777, |
|
"grad_norm": 2.9805209636688232, |
|
"learning_rate": 1.2740790780646048e-05, |
|
"loss": 1.4153, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.299414232565503, |
|
"grad_norm": 3.3206562995910645, |
|
"learning_rate": 1.2727325910337665e-05, |
|
"loss": 1.425, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.3033721206364284, |
|
"grad_norm": 3.306009531021118, |
|
"learning_rate": 1.2713828195048149e-05, |
|
"loss": 1.4398, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.3073300087073538, |
|
"grad_norm": 3.6652069091796875, |
|
"learning_rate": 1.2700297719588015e-05, |
|
"loss": 1.4126, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.311287896778279, |
|
"grad_norm": 3.067331314086914, |
|
"learning_rate": 1.268673456897362e-05, |
|
"loss": 1.4453, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.3152457848492045, |
|
"grad_norm": 3.4072649478912354, |
|
"learning_rate": 1.2673138828426633e-05, |
|
"loss": 1.4195, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.3192036729201297, |
|
"grad_norm": 3.721276044845581, |
|
"learning_rate": 1.2659510583373492e-05, |
|
"loss": 1.4308, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.3231615609910552, |
|
"grad_norm": 3.381657361984253, |
|
"learning_rate": 1.2645849919444875e-05, |
|
"loss": 1.4102, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.3271194490619806, |
|
"grad_norm": 3.2021045684814453, |
|
"learning_rate": 1.2632156922475153e-05, |
|
"loss": 1.4248, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.3310773371329059, |
|
"grad_norm": 3.0373260974884033, |
|
"learning_rate": 1.2618431678501862e-05, |
|
"loss": 1.4156, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.335035225203831, |
|
"grad_norm": 2.7702269554138184, |
|
"learning_rate": 1.2604674273765154e-05, |
|
"loss": 1.4511, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.3389931132747566, |
|
"grad_norm": 3.3153131008148193, |
|
"learning_rate": 1.2590884794707254e-05, |
|
"loss": 1.4523, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.342951001345682, |
|
"grad_norm": 3.249516248703003, |
|
"learning_rate": 1.2577063327971927e-05, |
|
"loss": 1.4225, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.3469088894166072, |
|
"grad_norm": 3.879835367202759, |
|
"learning_rate": 1.2563209960403921e-05, |
|
"loss": 1.4248, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.3508667774875327, |
|
"grad_norm": 3.548116445541382, |
|
"learning_rate": 1.2549324779048432e-05, |
|
"loss": 1.4248, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.354824665558458, |
|
"grad_norm": 3.109065294265747, |
|
"learning_rate": 1.253540787115055e-05, |
|
"loss": 1.4269, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.3587825536293834, |
|
"grad_norm": 3.3330225944519043, |
|
"learning_rate": 1.2521459324154708e-05, |
|
"loss": 1.4354, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.3627404417003088, |
|
"grad_norm": 3.0380284786224365, |
|
"learning_rate": 1.2507479225704149e-05, |
|
"loss": 1.3966, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.366698329771234, |
|
"grad_norm": 3.3855364322662354, |
|
"learning_rate": 1.2493467663640356e-05, |
|
"loss": 1.402, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.3706562178421593, |
|
"grad_norm": 3.3429582118988037, |
|
"learning_rate": 1.247942472600251e-05, |
|
"loss": 1.4315, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.3746141059130847, |
|
"grad_norm": 3.0505242347717285, |
|
"learning_rate": 1.2465350501026931e-05, |
|
"loss": 1.425, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.3785719939840102, |
|
"grad_norm": 3.4241063594818115, |
|
"learning_rate": 1.245124507714654e-05, |
|
"loss": 1.4179, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.3825298820549354, |
|
"grad_norm": 3.458108901977539, |
|
"learning_rate": 1.2437108542990274e-05, |
|
"loss": 1.4133, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.3864877701258609, |
|
"grad_norm": 3.9022340774536133, |
|
"learning_rate": 1.2422940987382556e-05, |
|
"loss": 1.4112, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.3904456581967861, |
|
"grad_norm": 3.5141968727111816, |
|
"learning_rate": 1.240874249934273e-05, |
|
"loss": 1.4453, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.3944035462677116, |
|
"grad_norm": 3.4254074096679688, |
|
"learning_rate": 1.2394513168084485e-05, |
|
"loss": 1.4096, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.398361434338637, |
|
"grad_norm": 3.460205316543579, |
|
"learning_rate": 1.2380253083015321e-05, |
|
"loss": 1.4145, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.4023193224095623, |
|
"grad_norm": 3.7515103816986084, |
|
"learning_rate": 1.236596233373597e-05, |
|
"loss": 1.4132, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.4062772104804875, |
|
"grad_norm": 3.2565503120422363, |
|
"learning_rate": 1.2351641010039833e-05, |
|
"loss": 1.3945, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.410235098551413, |
|
"grad_norm": 3.754737138748169, |
|
"learning_rate": 1.2337289201912429e-05, |
|
"loss": 1.381, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.4141929866223384, |
|
"grad_norm": 3.7933449745178223, |
|
"learning_rate": 1.2322906999530811e-05, |
|
"loss": 1.3943, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.4181508746932636, |
|
"grad_norm": 3.108177900314331, |
|
"learning_rate": 1.2308494493263014e-05, |
|
"loss": 1.4127, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.422108762764189, |
|
"grad_norm": 3.393486499786377, |
|
"learning_rate": 1.2294051773667482e-05, |
|
"loss": 1.3921, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.4260666508351143, |
|
"grad_norm": 3.9485793113708496, |
|
"learning_rate": 1.22795789314925e-05, |
|
"loss": 1.42, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.4300245389060398, |
|
"grad_norm": 3.353940725326538, |
|
"learning_rate": 1.2265076057675615e-05, |
|
"loss": 1.412, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.4339824269769652, |
|
"grad_norm": 3.861928939819336, |
|
"learning_rate": 1.2250543243343082e-05, |
|
"loss": 1.3952, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.4379403150478904, |
|
"grad_norm": 2.9782791137695312, |
|
"learning_rate": 1.2235980579809283e-05, |
|
"loss": 1.3872, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.4418982031188157, |
|
"grad_norm": 3.552558660507202, |
|
"learning_rate": 1.2221388158576142e-05, |
|
"loss": 1.3855, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4458560911897411, |
|
"grad_norm": 3.034158229827881, |
|
"learning_rate": 1.2206766071332568e-05, |
|
"loss": 1.4028, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.4498139792606666, |
|
"grad_norm": 3.669677495956421, |
|
"learning_rate": 1.219211440995387e-05, |
|
"loss": 1.3865, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.4537718673315918, |
|
"grad_norm": 3.4838759899139404, |
|
"learning_rate": 1.2177433266501182e-05, |
|
"loss": 1.3987, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.4577297554025173, |
|
"grad_norm": 3.4691314697265625, |
|
"learning_rate": 1.2162722733220877e-05, |
|
"loss": 1.3939, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.4616876434734425, |
|
"grad_norm": 3.5910284519195557, |
|
"learning_rate": 1.2147982902544004e-05, |
|
"loss": 1.3972, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.465645531544368, |
|
"grad_norm": 3.2121059894561768, |
|
"learning_rate": 1.2133213867085686e-05, |
|
"loss": 1.3762, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.4696034196152934, |
|
"grad_norm": 3.8289687633514404, |
|
"learning_rate": 1.2118415719644557e-05, |
|
"loss": 1.4032, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.4735613076862186, |
|
"grad_norm": 3.597191095352173, |
|
"learning_rate": 1.2103588553202167e-05, |
|
"loss": 1.3925, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.4775191957571439, |
|
"grad_norm": 3.4253151416778564, |
|
"learning_rate": 1.2088732460922407e-05, |
|
"loss": 1.3715, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.4814770838280693, |
|
"grad_norm": 3.624340772628784, |
|
"learning_rate": 1.2073847536150912e-05, |
|
"loss": 1.387, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.4854349718989948, |
|
"grad_norm": 3.2783761024475098, |
|
"learning_rate": 1.2058933872414484e-05, |
|
"loss": 1.3837, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.48939285996992, |
|
"grad_norm": 3.377274990081787, |
|
"learning_rate": 1.2043991563420501e-05, |
|
"loss": 1.3515, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.4933507480408454, |
|
"grad_norm": 3.676497459411621, |
|
"learning_rate": 1.2029020703056327e-05, |
|
"loss": 1.3647, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.4973086361117707, |
|
"grad_norm": 4.441483020782471, |
|
"learning_rate": 1.2014021385388727e-05, |
|
"loss": 1.3594, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.5012665241826961, |
|
"grad_norm": 4.011296272277832, |
|
"learning_rate": 1.1998993704663267e-05, |
|
"loss": 1.3855, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.5052244122536216, |
|
"grad_norm": 3.3986339569091797, |
|
"learning_rate": 1.1983937755303735e-05, |
|
"loss": 1.3549, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.5091823003245468, |
|
"grad_norm": 3.2860589027404785, |
|
"learning_rate": 1.1968853631911532e-05, |
|
"loss": 1.3481, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.513140188395472, |
|
"grad_norm": 4.623264789581299, |
|
"learning_rate": 1.1953741429265089e-05, |
|
"loss": 1.3609, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.5170980764663975, |
|
"grad_norm": 3.3357603549957275, |
|
"learning_rate": 1.1938601242319269e-05, |
|
"loss": 1.373, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.521055964537323, |
|
"grad_norm": 3.6516709327697754, |
|
"learning_rate": 1.1923433166204768e-05, |
|
"loss": 1.3568, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.5250138526082484, |
|
"grad_norm": 4.045721530914307, |
|
"learning_rate": 1.1908237296227522e-05, |
|
"loss": 1.3419, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.5289717406791736, |
|
"grad_norm": 3.331871271133423, |
|
"learning_rate": 1.1893013727868098e-05, |
|
"loss": 1.3575, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.5329296287500989, |
|
"grad_norm": 3.959519624710083, |
|
"learning_rate": 1.1877762556781109e-05, |
|
"loss": 1.3464, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.5368875168210243, |
|
"grad_norm": 4.424190998077393, |
|
"learning_rate": 1.1862483878794596e-05, |
|
"loss": 1.3593, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.5408454048919498, |
|
"grad_norm": 4.052654266357422, |
|
"learning_rate": 1.1847177789909441e-05, |
|
"loss": 1.3474, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.544803292962875, |
|
"grad_norm": 3.552598237991333, |
|
"learning_rate": 1.1831844386298758e-05, |
|
"loss": 1.346, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.5487611810338002, |
|
"grad_norm": 3.979213237762451, |
|
"learning_rate": 1.1816483764307286e-05, |
|
"loss": 1.3557, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.5527190691047257, |
|
"grad_norm": 4.073390960693359, |
|
"learning_rate": 1.1801096020450786e-05, |
|
"loss": 1.3658, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.5566769571756511, |
|
"grad_norm": 4.211179256439209, |
|
"learning_rate": 1.1785681251415431e-05, |
|
"loss": 1.346, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.5606348452465766, |
|
"grad_norm": 3.6185340881347656, |
|
"learning_rate": 1.177023955405721e-05, |
|
"loss": 1.3686, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.5645927333175018, |
|
"grad_norm": 3.7389111518859863, |
|
"learning_rate": 1.1754771025401307e-05, |
|
"loss": 1.3536, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.568550621388427, |
|
"grad_norm": 4.2574357986450195, |
|
"learning_rate": 1.1739275762641494e-05, |
|
"loss": 1.352, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.5725085094593525, |
|
"grad_norm": 3.516805410385132, |
|
"learning_rate": 1.1723753863139529e-05, |
|
"loss": 1.3411, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.576466397530278, |
|
"grad_norm": 3.5958383083343506, |
|
"learning_rate": 1.1708205424424521e-05, |
|
"loss": 1.3433, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.5804242856012032, |
|
"grad_norm": 3.995814323425293, |
|
"learning_rate": 1.1692630544192354e-05, |
|
"loss": 1.3529, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.5843821736721284, |
|
"grad_norm": 3.817218780517578, |
|
"learning_rate": 1.1677029320305041e-05, |
|
"loss": 1.3469, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.5883400617430539, |
|
"grad_norm": 4.439276695251465, |
|
"learning_rate": 1.1661401850790119e-05, |
|
"loss": 1.3466, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.5922979498139793, |
|
"grad_norm": 3.7181553840637207, |
|
"learning_rate": 1.1645748233840044e-05, |
|
"loss": 1.3476, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.5962558378849048, |
|
"grad_norm": 10.218334197998047, |
|
"learning_rate": 1.1630068567811557e-05, |
|
"loss": 1.3602, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.60021372595583, |
|
"grad_norm": 4.133950710296631, |
|
"learning_rate": 1.1614362951225075e-05, |
|
"loss": 1.3485, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.6041716140267552, |
|
"grad_norm": 4.007839202880859, |
|
"learning_rate": 1.1598631482764074e-05, |
|
"loss": 1.3594, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.6081295020976807, |
|
"grad_norm": 4.194820404052734, |
|
"learning_rate": 1.1582874261274463e-05, |
|
"loss": 1.3383, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.6120873901686061, |
|
"grad_norm": 4.193638801574707, |
|
"learning_rate": 1.1567091385763965e-05, |
|
"loss": 1.3715, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.6160452782395314, |
|
"grad_norm": 3.5800745487213135, |
|
"learning_rate": 1.15512829554015e-05, |
|
"loss": 1.3494, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.6200031663104566, |
|
"grad_norm": 4.548177719116211, |
|
"learning_rate": 1.1535449069516552e-05, |
|
"loss": 1.3442, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.623961054381382, |
|
"grad_norm": 3.7496066093444824, |
|
"learning_rate": 1.1519589827598553e-05, |
|
"loss": 1.329, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.6279189424523075, |
|
"grad_norm": 4.815052032470703, |
|
"learning_rate": 1.1503705329296252e-05, |
|
"loss": 1.32, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.631876830523233, |
|
"grad_norm": 4.1927103996276855, |
|
"learning_rate": 1.14877956744171e-05, |
|
"loss": 1.3172, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.6358347185941582, |
|
"grad_norm": 3.9431440830230713, |
|
"learning_rate": 1.1471860962926604e-05, |
|
"loss": 1.3271, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.6397926066650834, |
|
"grad_norm": 4.615567684173584, |
|
"learning_rate": 1.1455901294947722e-05, |
|
"loss": 1.348, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.6437504947360089, |
|
"grad_norm": 4.151221752166748, |
|
"learning_rate": 1.143991677076021e-05, |
|
"loss": 1.3336, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.6477083828069343, |
|
"grad_norm": 4.409358978271484, |
|
"learning_rate": 1.142390749080001e-05, |
|
"loss": 1.3254, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.6516662708778596, |
|
"grad_norm": 4.490970134735107, |
|
"learning_rate": 1.140787355565861e-05, |
|
"loss": 1.3303, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.6556241589487848, |
|
"grad_norm": 4.116312026977539, |
|
"learning_rate": 1.1391815066082418e-05, |
|
"loss": 1.3315, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.6595820470197102, |
|
"grad_norm": 4.251399517059326, |
|
"learning_rate": 1.1375732122972124e-05, |
|
"loss": 1.3243, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.6635399350906357, |
|
"grad_norm": 4.184506416320801, |
|
"learning_rate": 1.1359624827382062e-05, |
|
"loss": 1.3226, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.6674978231615611, |
|
"grad_norm": 4.628664970397949, |
|
"learning_rate": 1.134349328051959e-05, |
|
"loss": 1.3399, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.6714557112324864, |
|
"grad_norm": 4.529860496520996, |
|
"learning_rate": 1.132733758374444e-05, |
|
"loss": 1.2967, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.6754135993034116, |
|
"grad_norm": 4.5048699378967285, |
|
"learning_rate": 1.1311157838568083e-05, |
|
"loss": 1.3255, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.679371487374337, |
|
"grad_norm": 4.321528911590576, |
|
"learning_rate": 1.1294954146653094e-05, |
|
"loss": 1.311, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.6833293754452625, |
|
"grad_norm": 4.919022083282471, |
|
"learning_rate": 1.1278726609812523e-05, |
|
"loss": 1.3219, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.6872872635161877, |
|
"grad_norm": 4.146111965179443, |
|
"learning_rate": 1.126247533000923e-05, |
|
"loss": 1.298, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.691245151587113, |
|
"grad_norm": 4.071747779846191, |
|
"learning_rate": 1.1246200409355271e-05, |
|
"loss": 1.313, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.6952030396580384, |
|
"grad_norm": 3.8871426582336426, |
|
"learning_rate": 1.1229901950111245e-05, |
|
"loss": 1.3176, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.6991609277289639, |
|
"grad_norm": 3.9479401111602783, |
|
"learning_rate": 1.1213580054685644e-05, |
|
"loss": 1.3112, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.7031188157998893, |
|
"grad_norm": 4.039346694946289, |
|
"learning_rate": 1.1197234825634222e-05, |
|
"loss": 1.3109, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.7070767038708146, |
|
"grad_norm": 4.356393814086914, |
|
"learning_rate": 1.1180866365659346e-05, |
|
"loss": 1.3202, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.7110345919417398, |
|
"grad_norm": 4.11832857131958, |
|
"learning_rate": 1.1164474777609351e-05, |
|
"loss": 1.313, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.7149924800126652, |
|
"grad_norm": 4.101129531860352, |
|
"learning_rate": 1.1148060164477887e-05, |
|
"loss": 1.2968, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.7189503680835907, |
|
"grad_norm": 3.728778600692749, |
|
"learning_rate": 1.1131622629403289e-05, |
|
"loss": 1.2869, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.722908256154516, |
|
"grad_norm": 3.846654176712036, |
|
"learning_rate": 1.1115162275667909e-05, |
|
"loss": 1.3054, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.7268661442254412, |
|
"grad_norm": 4.967803478240967, |
|
"learning_rate": 1.1098679206697474e-05, |
|
"loss": 1.3165, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.7308240322963666, |
|
"grad_norm": 4.610755443572998, |
|
"learning_rate": 1.1082173526060454e-05, |
|
"loss": 1.3234, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.734781920367292, |
|
"grad_norm": 4.377742290496826, |
|
"learning_rate": 1.1065645337467375e-05, |
|
"loss": 1.3261, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.7387398084382175, |
|
"grad_norm": 5.010995864868164, |
|
"learning_rate": 1.1049094744770201e-05, |
|
"loss": 1.3026, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.7426976965091427, |
|
"grad_norm": 4.31913423538208, |
|
"learning_rate": 1.1032521851961665e-05, |
|
"loss": 1.2697, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.746655584580068, |
|
"grad_norm": 4.2657060623168945, |
|
"learning_rate": 1.1015926763174617e-05, |
|
"loss": 1.3101, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.7506134726509934, |
|
"grad_norm": 3.801684617996216, |
|
"learning_rate": 1.0999309582681372e-05, |
|
"loss": 1.268, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.7545713607219189, |
|
"grad_norm": 4.505929946899414, |
|
"learning_rate": 1.0982670414893057e-05, |
|
"loss": 1.2957, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.7585292487928441, |
|
"grad_norm": 3.837562084197998, |
|
"learning_rate": 1.0966009364358948e-05, |
|
"loss": 1.3078, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.7624871368637693, |
|
"grad_norm": 4.854923248291016, |
|
"learning_rate": 1.0949326535765823e-05, |
|
"loss": 1.3119, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.7664450249346948, |
|
"grad_norm": 5.621912002563477, |
|
"learning_rate": 1.0932622033937294e-05, |
|
"loss": 1.2847, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.7704029130056202, |
|
"grad_norm": 4.009350776672363, |
|
"learning_rate": 1.0915895963833152e-05, |
|
"loss": 1.3006, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.7743608010765457, |
|
"grad_norm": 3.6888113021850586, |
|
"learning_rate": 1.0899148430548716e-05, |
|
"loss": 1.3032, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.778318689147471, |
|
"grad_norm": 4.511534690856934, |
|
"learning_rate": 1.0882379539314155e-05, |
|
"loss": 1.2615, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.7822765772183962, |
|
"grad_norm": 5.086874485015869, |
|
"learning_rate": 1.0865589395493845e-05, |
|
"loss": 1.2634, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.7862344652893216, |
|
"grad_norm": 4.865400791168213, |
|
"learning_rate": 1.0848778104585692e-05, |
|
"loss": 1.2876, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.790192353360247, |
|
"grad_norm": 4.537430763244629, |
|
"learning_rate": 1.0831945772220487e-05, |
|
"loss": 1.2764, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.7941502414311723, |
|
"grad_norm": 4.448334693908691, |
|
"learning_rate": 1.0815092504161214e-05, |
|
"loss": 1.2814, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.7981081295020975, |
|
"grad_norm": 4.650451183319092, |
|
"learning_rate": 1.0798218406302422e-05, |
|
"loss": 1.2819, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.802066017573023, |
|
"grad_norm": 4.031219005584717, |
|
"learning_rate": 1.0781323584669524e-05, |
|
"loss": 1.2729, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.8060239056439484, |
|
"grad_norm": 4.477336883544922, |
|
"learning_rate": 1.0764408145418157e-05, |
|
"loss": 1.2586, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.8099817937148739, |
|
"grad_norm": 4.118893146514893, |
|
"learning_rate": 1.0747472194833506e-05, |
|
"loss": 1.2591, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.8139396817857991, |
|
"grad_norm": 4.766265392303467, |
|
"learning_rate": 1.073051583932963e-05, |
|
"loss": 1.2693, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.8178975698567243, |
|
"grad_norm": 5.545733451843262, |
|
"learning_rate": 1.0713539185448795e-05, |
|
"loss": 1.2691, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.8218554579276498, |
|
"grad_norm": 4.723430633544922, |
|
"learning_rate": 1.069654233986082e-05, |
|
"loss": 1.2582, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.8258133459985753, |
|
"grad_norm": 4.899106025695801, |
|
"learning_rate": 1.0679525409362387e-05, |
|
"loss": 1.2802, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.8297712340695005, |
|
"grad_norm": 4.531938552856445, |
|
"learning_rate": 1.066248850087638e-05, |
|
"loss": 1.2683, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.8337291221404257, |
|
"grad_norm": 4.174386501312256, |
|
"learning_rate": 1.0645431721451212e-05, |
|
"loss": 1.2538, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.8376870102113512, |
|
"grad_norm": 4.827451229095459, |
|
"learning_rate": 1.0628355178260147e-05, |
|
"loss": 1.2571, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.8416448982822766, |
|
"grad_norm": 4.749929904937744, |
|
"learning_rate": 1.0611258978600638e-05, |
|
"loss": 1.2759, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.845602786353202, |
|
"grad_norm": 4.659051418304443, |
|
"learning_rate": 1.0594143229893643e-05, |
|
"loss": 1.2698, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.8495606744241273, |
|
"grad_norm": 4.492525100708008, |
|
"learning_rate": 1.057700803968295e-05, |
|
"loss": 1.2583, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.8535185624950525, |
|
"grad_norm": 4.04518461227417, |
|
"learning_rate": 1.0559853515634509e-05, |
|
"loss": 1.268, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.857476450565978, |
|
"grad_norm": 5.029372215270996, |
|
"learning_rate": 1.054267976553575e-05, |
|
"loss": 1.238, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8614343386369034, |
|
"grad_norm": 4.29434871673584, |
|
"learning_rate": 1.05254868972949e-05, |
|
"loss": 1.2431, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.8653922267078287, |
|
"grad_norm": 4.874353408813477, |
|
"learning_rate": 1.050827501894032e-05, |
|
"loss": 1.2339, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.869350114778754, |
|
"grad_norm": 4.865941047668457, |
|
"learning_rate": 1.0491044238619817e-05, |
|
"loss": 1.2427, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.8733080028496794, |
|
"grad_norm": 4.548977375030518, |
|
"learning_rate": 1.0473794664599957e-05, |
|
"loss": 1.2586, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.8772658909206048, |
|
"grad_norm": 5.584561824798584, |
|
"learning_rate": 1.0456526405265402e-05, |
|
"loss": 1.2388, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.8812237789915303, |
|
"grad_norm": 4.570620536804199, |
|
"learning_rate": 1.0439239569118215e-05, |
|
"loss": 1.2499, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.8851816670624555, |
|
"grad_norm": 4.9521379470825195, |
|
"learning_rate": 1.0421934264777186e-05, |
|
"loss": 1.2486, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.8891395551333807, |
|
"grad_norm": 4.744478702545166, |
|
"learning_rate": 1.0404610600977141e-05, |
|
"loss": 1.2428, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.8930974432043062, |
|
"grad_norm": 4.681623935699463, |
|
"learning_rate": 1.0387268686568275e-05, |
|
"loss": 1.2577, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.8970553312752316, |
|
"grad_norm": 4.406890392303467, |
|
"learning_rate": 1.0369908630515445e-05, |
|
"loss": 1.2367, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.9010132193461569, |
|
"grad_norm": 4.650542259216309, |
|
"learning_rate": 1.0352530541897507e-05, |
|
"loss": 1.243, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.904971107417082, |
|
"grad_norm": 5.0188164710998535, |
|
"learning_rate": 1.0335134529906619e-05, |
|
"loss": 1.2222, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.9089289954880075, |
|
"grad_norm": 4.498706340789795, |
|
"learning_rate": 1.0317720703847554e-05, |
|
"loss": 1.2508, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.912886883558933, |
|
"grad_norm": 4.909203052520752, |
|
"learning_rate": 1.0300289173137021e-05, |
|
"loss": 1.2241, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.9168447716298584, |
|
"grad_norm": 4.85788631439209, |
|
"learning_rate": 1.0282840047302967e-05, |
|
"loss": 1.2268, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.9208026597007837, |
|
"grad_norm": 4.537557601928711, |
|
"learning_rate": 1.0265373435983907e-05, |
|
"loss": 1.2515, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.924760547771709, |
|
"grad_norm": 4.660990238189697, |
|
"learning_rate": 1.0247889448928208e-05, |
|
"loss": 1.2531, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.9287184358426344, |
|
"grad_norm": 4.9931511878967285, |
|
"learning_rate": 1.0230388195993424e-05, |
|
"loss": 1.2336, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.9326763239135598, |
|
"grad_norm": 4.516580581665039, |
|
"learning_rate": 1.0212869787145594e-05, |
|
"loss": 1.2292, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.936634211984485, |
|
"grad_norm": 4.5982255935668945, |
|
"learning_rate": 1.0195334332458552e-05, |
|
"loss": 1.2417, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.9405921000554103, |
|
"grad_norm": 5.071137428283691, |
|
"learning_rate": 1.0177781942113238e-05, |
|
"loss": 1.2194, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.9445499881263357, |
|
"grad_norm": 4.76341438293457, |
|
"learning_rate": 1.0160212726397001e-05, |
|
"loss": 1.2207, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.9485078761972612, |
|
"grad_norm": 4.945827484130859, |
|
"learning_rate": 1.0142626795702916e-05, |
|
"loss": 1.208, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.9524657642681866, |
|
"grad_norm": 5.068126678466797, |
|
"learning_rate": 1.0125024260529075e-05, |
|
"loss": 1.2372, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.9564236523391119, |
|
"grad_norm": 5.238717079162598, |
|
"learning_rate": 1.010740523147791e-05, |
|
"loss": 1.2287, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.960381540410037, |
|
"grad_norm": 4.586404800415039, |
|
"learning_rate": 1.008976981925548e-05, |
|
"loss": 1.2023, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.9643394284809625, |
|
"grad_norm": 5.284154415130615, |
|
"learning_rate": 1.0072118134670792e-05, |
|
"loss": 1.2254, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.968297316551888, |
|
"grad_norm": 4.639484882354736, |
|
"learning_rate": 1.0054450288635098e-05, |
|
"loss": 1.2298, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.9722552046228132, |
|
"grad_norm": 5.642242908477783, |
|
"learning_rate": 1.003676639216119e-05, |
|
"loss": 1.2196, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.9762130926937385, |
|
"grad_norm": 5.445943355560303, |
|
"learning_rate": 1.0019066556362718e-05, |
|
"loss": 1.2035, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.980170980764664, |
|
"grad_norm": 5.127743244171143, |
|
"learning_rate": 1.000135089245348e-05, |
|
"loss": 1.2281, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.9841288688355894, |
|
"grad_norm": 4.978102684020996, |
|
"learning_rate": 9.98361951174673e-06, |
|
"loss": 1.22, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.9880867569065148, |
|
"grad_norm": 4.681839466094971, |
|
"learning_rate": 9.965872525654468e-06, |
|
"loss": 1.2011, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.99204464497744, |
|
"grad_norm": 5.249551296234131, |
|
"learning_rate": 9.948110045686763e-06, |
|
"loss": 1.1931, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.9960025330483653, |
|
"grad_norm": 4.80012845993042, |
|
"learning_rate": 9.930332183451022e-06, |
|
"loss": 1.2282, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.9999604211192907, |
|
"grad_norm": 5.271570205688477, |
|
"learning_rate": 9.912539050651309e-06, |
|
"loss": 1.1956, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 2.0018986590720305, |
|
"grad_norm": 7.154332637786865, |
|
"learning_rate": 9.894730759087638e-06, |
|
"loss": 1.1201, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 2.005854198805427, |
|
"grad_norm": 5.223172187805176, |
|
"learning_rate": 9.876907420655272e-06, |
|
"loss": 1.0108, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 2.0098097385388236, |
|
"grad_norm": 5.335000038146973, |
|
"learning_rate": 9.859069147344016e-06, |
|
"loss": 1.0126, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 2.01376527827222, |
|
"grad_norm": 5.62663459777832, |
|
"learning_rate": 9.841216051237518e-06, |
|
"loss": 1.0281, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.0177208180056168, |
|
"grad_norm": 6.090157508850098, |
|
"learning_rate": 9.82334824451256e-06, |
|
"loss": 1.017, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 2.0216763577390133, |
|
"grad_norm": 5.461913108825684, |
|
"learning_rate": 9.805465839438357e-06, |
|
"loss": 1.0429, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 2.02563189747241, |
|
"grad_norm": 4.846314907073975, |
|
"learning_rate": 9.78756894837585e-06, |
|
"loss": 0.9998, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 2.029587437205807, |
|
"grad_norm": 6.22315788269043, |
|
"learning_rate": 9.769657683776997e-06, |
|
"loss": 1.0199, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 2.0335429769392035, |
|
"grad_norm": 5.477888107299805, |
|
"learning_rate": 9.75173215818408e-06, |
|
"loss": 1.0033, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.0374985166726, |
|
"grad_norm": 6.461225509643555, |
|
"learning_rate": 9.733792484228973e-06, |
|
"loss": 1.0282, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 2.0414540564059966, |
|
"grad_norm": 6.070066928863525, |
|
"learning_rate": 9.715838774632455e-06, |
|
"loss": 1.0251, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 2.045409596139393, |
|
"grad_norm": 6.459588050842285, |
|
"learning_rate": 9.697871142203504e-06, |
|
"loss": 0.9998, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 2.04936513587279, |
|
"grad_norm": 6.001095771789551, |
|
"learning_rate": 9.679889699838565e-06, |
|
"loss": 0.9961, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 2.0533206756061864, |
|
"grad_norm": 5.671863555908203, |
|
"learning_rate": 9.661894560520868e-06, |
|
"loss": 1.0103, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.057276215339583, |
|
"grad_norm": 6.713314056396484, |
|
"learning_rate": 9.643885837319692e-06, |
|
"loss": 1.0084, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 2.0612317550729795, |
|
"grad_norm": 5.7298903465271, |
|
"learning_rate": 9.62586364338968e-06, |
|
"loss": 1.0145, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 2.0651872948063765, |
|
"grad_norm": 6.283379554748535, |
|
"learning_rate": 9.607828091970112e-06, |
|
"loss": 1.0144, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 2.069142834539773, |
|
"grad_norm": 5.741368293762207, |
|
"learning_rate": 9.589779296384196e-06, |
|
"loss": 0.979, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 2.0730983742731697, |
|
"grad_norm": 5.731276035308838, |
|
"learning_rate": 9.571717370038356e-06, |
|
"loss": 0.9804, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.0770539140065662, |
|
"grad_norm": 6.783426761627197, |
|
"learning_rate": 9.553642426421525e-06, |
|
"loss": 1.0146, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 2.081009453739963, |
|
"grad_norm": 5.622731685638428, |
|
"learning_rate": 9.535554579104428e-06, |
|
"loss": 0.9786, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 2.0849649934733594, |
|
"grad_norm": 5.445565700531006, |
|
"learning_rate": 9.517453941738864e-06, |
|
"loss": 1.0033, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 2.088920533206756, |
|
"grad_norm": 6.861485958099365, |
|
"learning_rate": 9.499340628056999e-06, |
|
"loss": 1.0041, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.0928760729401525, |
|
"grad_norm": 6.059487342834473, |
|
"learning_rate": 9.481214751870645e-06, |
|
"loss": 1.0032, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.096831612673549, |
|
"grad_norm": 6.398611068725586, |
|
"learning_rate": 9.463076427070551e-06, |
|
"loss": 1.0016, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 2.100787152406946, |
|
"grad_norm": 6.830140590667725, |
|
"learning_rate": 9.444925767625685e-06, |
|
"loss": 0.9879, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 2.1047426921403427, |
|
"grad_norm": 6.739964008331299, |
|
"learning_rate": 9.42676288758252e-06, |
|
"loss": 0.9824, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 2.1086982318737393, |
|
"grad_norm": 6.853861331939697, |
|
"learning_rate": 9.408587901064308e-06, |
|
"loss": 0.9888, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 2.112653771607136, |
|
"grad_norm": 5.317558765411377, |
|
"learning_rate": 9.390400922270373e-06, |
|
"loss": 0.9797, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.1166093113405324, |
|
"grad_norm": 6.174235820770264, |
|
"learning_rate": 9.372202065475395e-06, |
|
"loss": 0.9894, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 2.120564851073929, |
|
"grad_norm": 5.993900299072266, |
|
"learning_rate": 9.353991445028679e-06, |
|
"loss": 0.9936, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 2.1245203908073256, |
|
"grad_norm": 6.061595916748047, |
|
"learning_rate": 9.335769175353455e-06, |
|
"loss": 0.974, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 2.128475930540722, |
|
"grad_norm": 6.07938289642334, |
|
"learning_rate": 9.317535370946135e-06, |
|
"loss": 0.9915, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 2.1324314702741187, |
|
"grad_norm": 5.920003890991211, |
|
"learning_rate": 9.299290146375627e-06, |
|
"loss": 0.9919, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.1363870100075157, |
|
"grad_norm": 5.969104290008545, |
|
"learning_rate": 9.281033616282572e-06, |
|
"loss": 0.9942, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 2.1403425497409123, |
|
"grad_norm": 6.8970947265625, |
|
"learning_rate": 9.262765895378667e-06, |
|
"loss": 0.9804, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 2.144298089474309, |
|
"grad_norm": 5.78402042388916, |
|
"learning_rate": 9.244487098445915e-06, |
|
"loss": 0.9709, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 2.1482536292077055, |
|
"grad_norm": 6.274438381195068, |
|
"learning_rate": 9.226197340335915e-06, |
|
"loss": 0.9823, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 2.152209168941102, |
|
"grad_norm": 6.251999378204346, |
|
"learning_rate": 9.20789673596914e-06, |
|
"loss": 0.988, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.1561647086744986, |
|
"grad_norm": 6.099167346954346, |
|
"learning_rate": 9.189585400334208e-06, |
|
"loss": 0.9768, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 2.160120248407895, |
|
"grad_norm": 7.657092094421387, |
|
"learning_rate": 9.171263448487175e-06, |
|
"loss": 0.9743, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 2.1640757881412918, |
|
"grad_norm": 6.438418388366699, |
|
"learning_rate": 9.152930995550795e-06, |
|
"loss": 0.9871, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 2.1680313278746883, |
|
"grad_norm": 6.303830623626709, |
|
"learning_rate": 9.134588156713804e-06, |
|
"loss": 0.9851, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 2.1719868676080853, |
|
"grad_norm": 6.700922966003418, |
|
"learning_rate": 9.116235047230198e-06, |
|
"loss": 0.9758, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.175942407341482, |
|
"grad_norm": 7.142539978027344, |
|
"learning_rate": 9.097871782418505e-06, |
|
"loss": 0.9637, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 2.1798979470748785, |
|
"grad_norm": 4.764535903930664, |
|
"learning_rate": 9.079498477661064e-06, |
|
"loss": 1.0001, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 2.183853486808275, |
|
"grad_norm": 7.237622261047363, |
|
"learning_rate": 9.0611152484033e-06, |
|
"loss": 0.9806, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 2.1878090265416716, |
|
"grad_norm": 5.451415061950684, |
|
"learning_rate": 9.042722210152988e-06, |
|
"loss": 0.9846, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 2.191764566275068, |
|
"grad_norm": 7.0478291511535645, |
|
"learning_rate": 9.02431947847955e-06, |
|
"loss": 0.9526, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.195720106008465, |
|
"grad_norm": 6.550572872161865, |
|
"learning_rate": 9.005907169013302e-06, |
|
"loss": 0.957, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 2.1996756457418614, |
|
"grad_norm": 6.440634727478027, |
|
"learning_rate": 8.987485397444744e-06, |
|
"loss": 0.9597, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 2.203631185475258, |
|
"grad_norm": 6.600515842437744, |
|
"learning_rate": 8.969054279523838e-06, |
|
"loss": 0.9765, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 2.207586725208655, |
|
"grad_norm": 6.594021797180176, |
|
"learning_rate": 8.950613931059262e-06, |
|
"loss": 0.9706, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 2.2115422649420515, |
|
"grad_norm": 6.733046531677246, |
|
"learning_rate": 8.932164467917696e-06, |
|
"loss": 0.9315, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.215497804675448, |
|
"grad_norm": 6.6295928955078125, |
|
"learning_rate": 8.913706006023086e-06, |
|
"loss": 0.9543, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 2.2194533444088447, |
|
"grad_norm": 6.274661064147949, |
|
"learning_rate": 8.89523866135593e-06, |
|
"loss": 0.9809, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 2.2234088841422412, |
|
"grad_norm": 6.869567394256592, |
|
"learning_rate": 8.876762549952532e-06, |
|
"loss": 0.9645, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 2.227364423875638, |
|
"grad_norm": 6.911426067352295, |
|
"learning_rate": 8.858277787904283e-06, |
|
"loss": 0.9601, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 2.2313199636090344, |
|
"grad_norm": 5.583986759185791, |
|
"learning_rate": 8.839784491356926e-06, |
|
"loss": 0.9339, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.235275503342431, |
|
"grad_norm": 6.394748210906982, |
|
"learning_rate": 8.821282776509828e-06, |
|
"loss": 0.941, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 2.2392310430758275, |
|
"grad_norm": 5.726268291473389, |
|
"learning_rate": 8.802772759615259e-06, |
|
"loss": 0.9332, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 2.243186582809224, |
|
"grad_norm": 7.450072288513184, |
|
"learning_rate": 8.784254556977645e-06, |
|
"loss": 0.9577, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 2.247142122542621, |
|
"grad_norm": 6.079563140869141, |
|
"learning_rate": 8.76572828495285e-06, |
|
"loss": 0.9683, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 2.2510976622760177, |
|
"grad_norm": 7.1676836013793945, |
|
"learning_rate": 8.747194059947439e-06, |
|
"loss": 0.9258, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.2550532020094143, |
|
"grad_norm": 6.986599445343018, |
|
"learning_rate": 8.728651998417948e-06, |
|
"loss": 0.9305, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 2.259008741742811, |
|
"grad_norm": 5.925799369812012, |
|
"learning_rate": 8.710102216870154e-06, |
|
"loss": 0.9482, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 2.2629642814762074, |
|
"grad_norm": 6.0704731941223145, |
|
"learning_rate": 8.69154483185834e-06, |
|
"loss": 0.9256, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 2.266919821209604, |
|
"grad_norm": 7.043192386627197, |
|
"learning_rate": 8.672979959984563e-06, |
|
"loss": 0.9564, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 2.2708753609430006, |
|
"grad_norm": 5.858555793762207, |
|
"learning_rate": 8.654407717897925e-06, |
|
"loss": 0.9329, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.274830900676397, |
|
"grad_norm": 6.873895168304443, |
|
"learning_rate": 8.635828222293834e-06, |
|
"loss": 0.9305, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 2.2787864404097937, |
|
"grad_norm": 7.290801048278809, |
|
"learning_rate": 8.617241589913279e-06, |
|
"loss": 0.9471, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 2.2827419801431903, |
|
"grad_norm": 6.579754829406738, |
|
"learning_rate": 8.598647937542088e-06, |
|
"loss": 0.925, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 2.2866975198765873, |
|
"grad_norm": 6.567941665649414, |
|
"learning_rate": 8.5800473820102e-06, |
|
"loss": 0.9461, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 2.290653059609984, |
|
"grad_norm": 6.696491718292236, |
|
"learning_rate": 8.561440040190929e-06, |
|
"loss": 0.9557, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.2946085993433805, |
|
"grad_norm": 6.369635581970215, |
|
"learning_rate": 8.542826029000221e-06, |
|
"loss": 0.9515, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 2.298564139076777, |
|
"grad_norm": 6.151893138885498, |
|
"learning_rate": 8.524205465395942e-06, |
|
"loss": 0.9202, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 2.3025196788101736, |
|
"grad_norm": 7.51953649520874, |
|
"learning_rate": 8.505578466377119e-06, |
|
"loss": 0.9508, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 2.30647521854357, |
|
"grad_norm": 6.321157932281494, |
|
"learning_rate": 8.486945148983215e-06, |
|
"loss": 0.9225, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 2.3104307582769668, |
|
"grad_norm": 7.385085582733154, |
|
"learning_rate": 8.468305630293403e-06, |
|
"loss": 0.9446, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.3143862980103633, |
|
"grad_norm": 7.018087387084961, |
|
"learning_rate": 8.449660027425807e-06, |
|
"loss": 0.9214, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 2.31834183774376, |
|
"grad_norm": 8.585993766784668, |
|
"learning_rate": 8.431008457536794e-06, |
|
"loss": 0.9326, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 2.322297377477157, |
|
"grad_norm": 7.048767566680908, |
|
"learning_rate": 8.412351037820208e-06, |
|
"loss": 0.9418, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 2.3262529172105535, |
|
"grad_norm": 6.425539016723633, |
|
"learning_rate": 8.393687885506664e-06, |
|
"loss": 0.9113, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 2.33020845694395, |
|
"grad_norm": 6.738077163696289, |
|
"learning_rate": 8.375019117862787e-06, |
|
"loss": 0.9182, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.3341639966773466, |
|
"grad_norm": 6.531277656555176, |
|
"learning_rate": 8.356344852190489e-06, |
|
"loss": 0.9346, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 2.338119536410743, |
|
"grad_norm": 7.382998943328857, |
|
"learning_rate": 8.337665205826229e-06, |
|
"loss": 0.9214, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 2.34207507614414, |
|
"grad_norm": 8.067983627319336, |
|
"learning_rate": 8.31898029614027e-06, |
|
"loss": 0.9384, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 2.3460306158775364, |
|
"grad_norm": 7.663227558135986, |
|
"learning_rate": 8.300290240535956e-06, |
|
"loss": 0.9214, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 2.349986155610933, |
|
"grad_norm": 7.118049144744873, |
|
"learning_rate": 8.281595156448947e-06, |
|
"loss": 0.9168, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.3539416953443295, |
|
"grad_norm": 6.668679714202881, |
|
"learning_rate": 8.262895161346518e-06, |
|
"loss": 0.8854, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 2.3578972350777265, |
|
"grad_norm": 5.939303874969482, |
|
"learning_rate": 8.24419037272679e-06, |
|
"loss": 0.9283, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 2.361852774811123, |
|
"grad_norm": 7.534061431884766, |
|
"learning_rate": 8.225480908118005e-06, |
|
"loss": 0.9209, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 2.3658083145445197, |
|
"grad_norm": 6.419159889221191, |
|
"learning_rate": 8.206766885077787e-06, |
|
"loss": 0.9132, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 2.3697638542779162, |
|
"grad_norm": 6.306091785430908, |
|
"learning_rate": 8.188048421192401e-06, |
|
"loss": 0.9236, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.373719394011313, |
|
"grad_norm": 7.05325984954834, |
|
"learning_rate": 8.169325634076016e-06, |
|
"loss": 0.9022, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 2.3776749337447094, |
|
"grad_norm": 7.843636989593506, |
|
"learning_rate": 8.15059864136997e-06, |
|
"loss": 0.8979, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 2.381630473478106, |
|
"grad_norm": 6.075581073760986, |
|
"learning_rate": 8.131867560742015e-06, |
|
"loss": 0.9238, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 2.3855860132115025, |
|
"grad_norm": 8.050814628601074, |
|
"learning_rate": 8.113132509885597e-06, |
|
"loss": 0.8741, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 2.389541552944899, |
|
"grad_norm": 6.963539123535156, |
|
"learning_rate": 8.09439360651911e-06, |
|
"loss": 0.9003, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.393497092678296, |
|
"grad_norm": 6.0645365715026855, |
|
"learning_rate": 8.075650968385143e-06, |
|
"loss": 0.8924, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 2.3974526324116927, |
|
"grad_norm": 6.974462032318115, |
|
"learning_rate": 8.056904713249768e-06, |
|
"loss": 0.9095, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 2.4014081721450893, |
|
"grad_norm": 6.542774200439453, |
|
"learning_rate": 8.03815495890177e-06, |
|
"loss": 0.8817, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 2.405363711878486, |
|
"grad_norm": 6.8144426345825195, |
|
"learning_rate": 8.019401823151932e-06, |
|
"loss": 0.8771, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 2.4093192516118824, |
|
"grad_norm": 7.659158229827881, |
|
"learning_rate": 8.00064542383227e-06, |
|
"loss": 0.9049, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.413274791345279, |
|
"grad_norm": 8.229734420776367, |
|
"learning_rate": 7.981885878795319e-06, |
|
"loss": 0.8774, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 2.4172303310786756, |
|
"grad_norm": 6.553606033325195, |
|
"learning_rate": 7.963123305913372e-06, |
|
"loss": 0.8937, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 2.421185870812072, |
|
"grad_norm": 6.9676737785339355, |
|
"learning_rate": 7.94435782307775e-06, |
|
"loss": 0.8998, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 2.4251414105454687, |
|
"grad_norm": 7.392206192016602, |
|
"learning_rate": 7.925589548198057e-06, |
|
"loss": 0.8958, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 2.4290969502788657, |
|
"grad_norm": 6.842816352844238, |
|
"learning_rate": 7.906818599201438e-06, |
|
"loss": 0.9044, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.4330524900122623, |
|
"grad_norm": 7.204736709594727, |
|
"learning_rate": 7.88804509403185e-06, |
|
"loss": 0.884, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 2.437008029745659, |
|
"grad_norm": 9.083603858947754, |
|
"learning_rate": 7.869269150649295e-06, |
|
"loss": 0.8925, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 2.4409635694790555, |
|
"grad_norm": 6.365993499755859, |
|
"learning_rate": 7.850490887029114e-06, |
|
"loss": 0.8889, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 2.444919109212452, |
|
"grad_norm": 7.185299873352051, |
|
"learning_rate": 7.83171042116121e-06, |
|
"loss": 0.8878, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 2.4488746489458486, |
|
"grad_norm": 6.3879313468933105, |
|
"learning_rate": 7.812927871049331e-06, |
|
"loss": 0.8851, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.452830188679245, |
|
"grad_norm": 6.968358993530273, |
|
"learning_rate": 7.794143354710324e-06, |
|
"loss": 0.9103, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 2.4567857284126418, |
|
"grad_norm": 6.729606628417969, |
|
"learning_rate": 7.775356990173385e-06, |
|
"loss": 0.9025, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 2.4607412681460383, |
|
"grad_norm": 7.090347766876221, |
|
"learning_rate": 7.756568895479326e-06, |
|
"loss": 0.8676, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 2.4646968078794353, |
|
"grad_norm": 6.751077651977539, |
|
"learning_rate": 7.737779188679826e-06, |
|
"loss": 0.9007, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 2.468652347612832, |
|
"grad_norm": 6.40106725692749, |
|
"learning_rate": 7.7189879878367e-06, |
|
"loss": 0.8638, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.4726078873462285, |
|
"grad_norm": 6.530547142028809, |
|
"learning_rate": 7.700195411021144e-06, |
|
"loss": 0.8462, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 2.476563427079625, |
|
"grad_norm": 7.113738536834717, |
|
"learning_rate": 7.681401576313006e-06, |
|
"loss": 0.8792, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 2.4805189668130216, |
|
"grad_norm": 6.407541751861572, |
|
"learning_rate": 7.66260660180003e-06, |
|
"loss": 0.8497, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 2.484474506546418, |
|
"grad_norm": 6.465631484985352, |
|
"learning_rate": 7.643810605577129e-06, |
|
"loss": 0.8836, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 2.488430046279815, |
|
"grad_norm": 6.723154544830322, |
|
"learning_rate": 7.625013705745631e-06, |
|
"loss": 0.861, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.4923855860132114, |
|
"grad_norm": 7.3425374031066895, |
|
"learning_rate": 7.606216020412547e-06, |
|
"loss": 0.8444, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 2.496341125746608, |
|
"grad_norm": 7.729851722717285, |
|
"learning_rate": 7.5874176676898145e-06, |
|
"loss": 0.8566, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 2.500296665480005, |
|
"grad_norm": 7.338516712188721, |
|
"learning_rate": 7.568618765693576e-06, |
|
"loss": 0.8695, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 2.5042522052134015, |
|
"grad_norm": 6.624641418457031, |
|
"learning_rate": 7.549819432543413e-06, |
|
"loss": 0.8575, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 2.508207744946798, |
|
"grad_norm": 7.888009071350098, |
|
"learning_rate": 7.531019786361626e-06, |
|
"loss": 0.8407, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.5121632846801947, |
|
"grad_norm": 6.383202075958252, |
|
"learning_rate": 7.512219945272479e-06, |
|
"loss": 0.8558, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 2.5161188244135912, |
|
"grad_norm": 7.635325908660889, |
|
"learning_rate": 7.493420027401461e-06, |
|
"loss": 0.8549, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 2.520074364146988, |
|
"grad_norm": 7.805452346801758, |
|
"learning_rate": 7.474620150874539e-06, |
|
"loss": 0.8518, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 2.5240299038803844, |
|
"grad_norm": 7.43981409072876, |
|
"learning_rate": 7.455820433817427e-06, |
|
"loss": 0.8319, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 2.527985443613781, |
|
"grad_norm": 7.546152114868164, |
|
"learning_rate": 7.437020994354832e-06, |
|
"loss": 0.8696, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.5319409833471775, |
|
"grad_norm": 7.283714771270752, |
|
"learning_rate": 7.418221950609721e-06, |
|
"loss": 0.8252, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 2.5358965230805746, |
|
"grad_norm": 6.995747089385986, |
|
"learning_rate": 7.399423420702571e-06, |
|
"loss": 0.8304, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 2.5398520628139707, |
|
"grad_norm": 7.93922233581543, |
|
"learning_rate": 7.380625522750634e-06, |
|
"loss": 0.8077, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 2.5438076025473677, |
|
"grad_norm": 7.082808494567871, |
|
"learning_rate": 7.361828374867182e-06, |
|
"loss": 0.8093, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 2.5477631422807643, |
|
"grad_norm": 7.408179759979248, |
|
"learning_rate": 7.343032095160793e-06, |
|
"loss": 0.8371, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.551718682014161, |
|
"grad_norm": 6.96567964553833, |
|
"learning_rate": 7.324236801734566e-06, |
|
"loss": 0.8307, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 2.5556742217475574, |
|
"grad_norm": 6.544528007507324, |
|
"learning_rate": 7.305442612685424e-06, |
|
"loss": 0.8196, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 2.559629761480954, |
|
"grad_norm": 7.655238151550293, |
|
"learning_rate": 7.2866496461033335e-06, |
|
"loss": 0.8378, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 2.5635853012143506, |
|
"grad_norm": 7.466569423675537, |
|
"learning_rate": 7.267858020070598e-06, |
|
"loss": 0.8538, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 2.567540840947747, |
|
"grad_norm": 6.754441261291504, |
|
"learning_rate": 7.249067852661077e-06, |
|
"loss": 0.848, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.571496380681144, |
|
"grad_norm": 7.796608924865723, |
|
"learning_rate": 7.230279261939484e-06, |
|
"loss": 0.8364, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 2.5754519204145403, |
|
"grad_norm": 7.588559150695801, |
|
"learning_rate": 7.211492365960615e-06, |
|
"loss": 0.87, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 2.5794074601479373, |
|
"grad_norm": 6.7228312492370605, |
|
"learning_rate": 7.19270728276862e-06, |
|
"loss": 0.8605, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 2.583362999881334, |
|
"grad_norm": 7.157680511474609, |
|
"learning_rate": 7.173924130396258e-06, |
|
"loss": 0.8147, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 2.5873185396147305, |
|
"grad_norm": 7.738386154174805, |
|
"learning_rate": 7.155143026864159e-06, |
|
"loss": 0.8195, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.591274079348127, |
|
"grad_norm": 7.59410285949707, |
|
"learning_rate": 7.136364090180074e-06, |
|
"loss": 0.8337, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 2.5952296190815236, |
|
"grad_norm": 6.822688102722168, |
|
"learning_rate": 7.1175874383381455e-06, |
|
"loss": 0.83, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 2.59918515881492, |
|
"grad_norm": 8.452685356140137, |
|
"learning_rate": 7.098813189318153e-06, |
|
"loss": 0.8319, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 2.6031406985483168, |
|
"grad_norm": 6.633552551269531, |
|
"learning_rate": 7.080041461084785e-06, |
|
"loss": 0.8334, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 2.6070962382817138, |
|
"grad_norm": 7.842954635620117, |
|
"learning_rate": 7.061272371586885e-06, |
|
"loss": 0.8062, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.61105177801511, |
|
"grad_norm": 6.577658653259277, |
|
"learning_rate": 7.0425060387567215e-06, |
|
"loss": 0.8413, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 2.615007317748507, |
|
"grad_norm": 7.939908027648926, |
|
"learning_rate": 7.023742580509236e-06, |
|
"loss": 0.8127, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 2.6189628574819035, |
|
"grad_norm": 7.662463188171387, |
|
"learning_rate": 7.004982114741316e-06, |
|
"loss": 0.7996, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 2.6229183972153, |
|
"grad_norm": 6.9583635330200195, |
|
"learning_rate": 6.986224759331036e-06, |
|
"loss": 0.8232, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 2.6268739369486966, |
|
"grad_norm": 8.075411796569824, |
|
"learning_rate": 6.967470632136942e-06, |
|
"loss": 0.7982, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.630829476682093, |
|
"grad_norm": 7.972960472106934, |
|
"learning_rate": 6.948719850997278e-06, |
|
"loss": 0.836, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 2.63478501641549, |
|
"grad_norm": 7.087239742279053, |
|
"learning_rate": 6.92997253372928e-06, |
|
"loss": 0.8047, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 2.6387405561488864, |
|
"grad_norm": 7.775416374206543, |
|
"learning_rate": 6.911228798128405e-06, |
|
"loss": 0.8307, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 2.6426960958822834, |
|
"grad_norm": 7.944192409515381, |
|
"learning_rate": 6.892488761967621e-06, |
|
"loss": 0.8103, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 2.6466516356156795, |
|
"grad_norm": 7.953451156616211, |
|
"learning_rate": 6.8737525429966335e-06, |
|
"loss": 0.8285, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.6506071753490765, |
|
"grad_norm": 9.518111228942871, |
|
"learning_rate": 6.855020258941179e-06, |
|
"loss": 0.807, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 2.654562715082473, |
|
"grad_norm": 7.476140975952148, |
|
"learning_rate": 6.836292027502261e-06, |
|
"loss": 0.7977, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 2.6585182548158697, |
|
"grad_norm": 7.218750953674316, |
|
"learning_rate": 6.8175679663554224e-06, |
|
"loss": 0.8084, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 2.6624737945492662, |
|
"grad_norm": 8.890192031860352, |
|
"learning_rate": 6.798848193149998e-06, |
|
"loss": 0.7993, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 2.666429334282663, |
|
"grad_norm": 8.358793258666992, |
|
"learning_rate": 6.78013282550839e-06, |
|
"loss": 0.8348, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.6703848740160594, |
|
"grad_norm": 8.094076156616211, |
|
"learning_rate": 6.761421981025306e-06, |
|
"loss": 0.8185, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 2.674340413749456, |
|
"grad_norm": 8.059944152832031, |
|
"learning_rate": 6.742715777267045e-06, |
|
"loss": 0.8074, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 2.678295953482853, |
|
"grad_norm": 7.6637282371521, |
|
"learning_rate": 6.724014331770736e-06, |
|
"loss": 0.7986, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 2.682251493216249, |
|
"grad_norm": 8.657379150390625, |
|
"learning_rate": 6.705317762043622e-06, |
|
"loss": 0.8139, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 2.686207032949646, |
|
"grad_norm": 7.7766337394714355, |
|
"learning_rate": 6.6866261855623e-06, |
|
"loss": 0.8059, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6901625726830427, |
|
"grad_norm": 7.386256694793701, |
|
"learning_rate": 6.667939719772001e-06, |
|
"loss": 0.7984, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 2.6941181124164393, |
|
"grad_norm": 8.123387336730957, |
|
"learning_rate": 6.649258482085834e-06, |
|
"loss": 0.7747, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 2.698073652149836, |
|
"grad_norm": 7.112607479095459, |
|
"learning_rate": 6.6305825898840686e-06, |
|
"loss": 0.7937, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 2.7020291918832324, |
|
"grad_norm": 8.249123573303223, |
|
"learning_rate": 6.611912160513378e-06, |
|
"loss": 0.7727, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 2.705984731616629, |
|
"grad_norm": 7.522756576538086, |
|
"learning_rate": 6.5932473112861184e-06, |
|
"loss": 0.7639, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.7099402713500256, |
|
"grad_norm": 9.555477142333984, |
|
"learning_rate": 6.574588159479576e-06, |
|
"loss": 0.8042, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 2.7138958110834226, |
|
"grad_norm": 6.161262035369873, |
|
"learning_rate": 6.555934822335251e-06, |
|
"loss": 0.8121, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 2.7178513508168187, |
|
"grad_norm": 8.5349702835083, |
|
"learning_rate": 6.53728741705809e-06, |
|
"loss": 0.7859, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 2.7218068905502157, |
|
"grad_norm": 9.312299728393555, |
|
"learning_rate": 6.518646060815787e-06, |
|
"loss": 0.7879, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 2.7257624302836123, |
|
"grad_norm": 6.545658111572266, |
|
"learning_rate": 6.50001087073801e-06, |
|
"loss": 0.7874, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.729717970017009, |
|
"grad_norm": 8.577535629272461, |
|
"learning_rate": 6.4813819639157e-06, |
|
"loss": 0.7922, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 2.7336735097504055, |
|
"grad_norm": 8.140532493591309, |
|
"learning_rate": 6.462759457400304e-06, |
|
"loss": 0.7909, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 2.737629049483802, |
|
"grad_norm": 8.126466751098633, |
|
"learning_rate": 6.444143468203064e-06, |
|
"loss": 0.7711, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 2.7415845892171986, |
|
"grad_norm": 8.574898719787598, |
|
"learning_rate": 6.425534113294266e-06, |
|
"loss": 0.7845, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 2.745540128950595, |
|
"grad_norm": 8.131915092468262, |
|
"learning_rate": 6.4069315096025075e-06, |
|
"loss": 0.7602, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.749495668683992, |
|
"grad_norm": 7.594697952270508, |
|
"learning_rate": 6.388335774013979e-06, |
|
"loss": 0.7758, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 2.7534512084173883, |
|
"grad_norm": 7.075917720794678, |
|
"learning_rate": 6.369747023371698e-06, |
|
"loss": 0.775, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 2.7574067481507853, |
|
"grad_norm": 6.975893974304199, |
|
"learning_rate": 6.351165374474812e-06, |
|
"loss": 0.7949, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 2.761362287884182, |
|
"grad_norm": 8.883713722229004, |
|
"learning_rate": 6.332590944077832e-06, |
|
"loss": 0.7902, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 2.7653178276175785, |
|
"grad_norm": 7.061893939971924, |
|
"learning_rate": 6.31402384888992e-06, |
|
"loss": 0.7679, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.769273367350975, |
|
"grad_norm": 7.214616775512695, |
|
"learning_rate": 6.295464205574145e-06, |
|
"loss": 0.7628, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 2.7732289070843716, |
|
"grad_norm": 8.735739707946777, |
|
"learning_rate": 6.276912130746758e-06, |
|
"loss": 0.7693, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 2.777184446817768, |
|
"grad_norm": 7.975681781768799, |
|
"learning_rate": 6.258367740976447e-06, |
|
"loss": 0.7804, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 2.781139986551165, |
|
"grad_norm": 7.6719255447387695, |
|
"learning_rate": 6.239831152783622e-06, |
|
"loss": 0.7647, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 2.7850955262845614, |
|
"grad_norm": 7.42200231552124, |
|
"learning_rate": 6.221302482639665e-06, |
|
"loss": 0.7633, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.789051066017958, |
|
"grad_norm": 6.804473400115967, |
|
"learning_rate": 6.202781846966211e-06, |
|
"loss": 0.7689, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 2.793006605751355, |
|
"grad_norm": 9.550134658813477, |
|
"learning_rate": 6.184269362134409e-06, |
|
"loss": 0.7762, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 2.7969621454847515, |
|
"grad_norm": 8.442462921142578, |
|
"learning_rate": 6.165765144464196e-06, |
|
"loss": 0.752, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 2.800917685218148, |
|
"grad_norm": 7.215641975402832, |
|
"learning_rate": 6.147269310223561e-06, |
|
"loss": 0.7751, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 2.8048732249515447, |
|
"grad_norm": 7.342108249664307, |
|
"learning_rate": 6.128781975627822e-06, |
|
"loss": 0.7838, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.8088287646849412, |
|
"grad_norm": 7.887964248657227, |
|
"learning_rate": 6.110303256838884e-06, |
|
"loss": 0.755, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 2.812784304418338, |
|
"grad_norm": 7.857334613800049, |
|
"learning_rate": 6.091833269964524e-06, |
|
"loss": 0.7606, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 2.8167398441517344, |
|
"grad_norm": 6.394157886505127, |
|
"learning_rate": 6.073372131057644e-06, |
|
"loss": 0.7729, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 2.820695383885131, |
|
"grad_norm": 7.473947048187256, |
|
"learning_rate": 6.054919956115565e-06, |
|
"loss": 0.7462, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 2.8246509236185275, |
|
"grad_norm": 7.744419574737549, |
|
"learning_rate": 6.036476861079267e-06, |
|
"loss": 0.7493, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.8286064633519246, |
|
"grad_norm": 7.414424419403076, |
|
"learning_rate": 6.018042961832697e-06, |
|
"loss": 0.7574, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 2.832562003085321, |
|
"grad_norm": 9.066329956054688, |
|
"learning_rate": 5.999618374202004e-06, |
|
"loss": 0.7342, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.8365175428187177, |
|
"grad_norm": 8.319313049316406, |
|
"learning_rate": 5.981203213954842e-06, |
|
"loss": 0.7503, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 2.8404730825521143, |
|
"grad_norm": 7.850893020629883, |
|
"learning_rate": 5.962797596799623e-06, |
|
"loss": 0.7589, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 2.844428622285511, |
|
"grad_norm": 7.3950300216674805, |
|
"learning_rate": 5.9444016383848016e-06, |
|
"loss": 0.7339, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.8483841620189074, |
|
"grad_norm": 7.9359450340271, |
|
"learning_rate": 5.926015454298135e-06, |
|
"loss": 0.7339, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 2.852339701752304, |
|
"grad_norm": 7.991437911987305, |
|
"learning_rate": 5.907639160065978e-06, |
|
"loss": 0.7539, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 2.8562952414857006, |
|
"grad_norm": 8.999034881591797, |
|
"learning_rate": 5.88927287115253e-06, |
|
"loss": 0.7305, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.860250781219097, |
|
"grad_norm": 9.365317344665527, |
|
"learning_rate": 5.870916702959137e-06, |
|
"loss": 0.7632, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 2.864206320952494, |
|
"grad_norm": 7.4115824699401855, |
|
"learning_rate": 5.852570770823541e-06, |
|
"loss": 0.7425, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.8681618606858907, |
|
"grad_norm": 8.937634468078613, |
|
"learning_rate": 5.834235190019177e-06, |
|
"loss": 0.7133, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.8721174004192873, |
|
"grad_norm": 7.646848201751709, |
|
"learning_rate": 5.8159100757544335e-06, |
|
"loss": 0.7018, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 2.876072940152684, |
|
"grad_norm": 10.104166984558105, |
|
"learning_rate": 5.797595543171939e-06, |
|
"loss": 0.7148, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 2.8800284798860805, |
|
"grad_norm": 8.80380630493164, |
|
"learning_rate": 5.779291707347829e-06, |
|
"loss": 0.7148, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.883984019619477, |
|
"grad_norm": 9.618269920349121, |
|
"learning_rate": 5.760998683291034e-06, |
|
"loss": 0.7501, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.8879395593528736, |
|
"grad_norm": 8.776264190673828, |
|
"learning_rate": 5.742716585942541e-06, |
|
"loss": 0.7338, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 2.89189509908627, |
|
"grad_norm": 8.551417350769043, |
|
"learning_rate": 5.724445530174691e-06, |
|
"loss": 0.7408, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.8958506388196668, |
|
"grad_norm": 7.699435710906982, |
|
"learning_rate": 5.706185630790439e-06, |
|
"loss": 0.7334, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 2.8998061785530638, |
|
"grad_norm": 7.05002498626709, |
|
"learning_rate": 5.687937002522653e-06, |
|
"loss": 0.7347, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 2.9037617182864603, |
|
"grad_norm": 7.351695537567139, |
|
"learning_rate": 5.669699760033361e-06, |
|
"loss": 0.7302, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.907717258019857, |
|
"grad_norm": 8.033913612365723, |
|
"learning_rate": 5.651474017913075e-06, |
|
"loss": 0.7161, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 2.9116727977532535, |
|
"grad_norm": 8.220582962036133, |
|
"learning_rate": 5.633259890680022e-06, |
|
"loss": 0.6886, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 2.91562833748665, |
|
"grad_norm": 7.48123025894165, |
|
"learning_rate": 5.615057492779469e-06, |
|
"loss": 0.7375, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.9195838772200466, |
|
"grad_norm": 9.332476615905762, |
|
"learning_rate": 5.59686693858297e-06, |
|
"loss": 0.7198, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 2.923539416953443, |
|
"grad_norm": 7.856235027313232, |
|
"learning_rate": 5.57868834238767e-06, |
|
"loss": 0.7097, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.92749495668684, |
|
"grad_norm": 8.680730819702148, |
|
"learning_rate": 5.560521818415573e-06, |
|
"loss": 0.711, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.9314504964202364, |
|
"grad_norm": 8.170439720153809, |
|
"learning_rate": 5.542367480812831e-06, |
|
"loss": 0.717, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 2.9354060361536334, |
|
"grad_norm": 9.730953216552734, |
|
"learning_rate": 5.524225443649023e-06, |
|
"loss": 0.698, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 2.9393615758870295, |
|
"grad_norm": 9.266618728637695, |
|
"learning_rate": 5.5060958209164475e-06, |
|
"loss": 0.7083, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.9433171156204265, |
|
"grad_norm": 8.5849027633667, |
|
"learning_rate": 5.487978726529389e-06, |
|
"loss": 0.7114, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.947272655353823, |
|
"grad_norm": 8.211541175842285, |
|
"learning_rate": 5.46987427432342e-06, |
|
"loss": 0.7097, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 2.9512281950872197, |
|
"grad_norm": 7.857696533203125, |
|
"learning_rate": 5.451782578054672e-06, |
|
"loss": 0.7058, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.9551837348206162, |
|
"grad_norm": 9.360848426818848, |
|
"learning_rate": 5.433703751399134e-06, |
|
"loss": 0.7026, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 2.959139274554013, |
|
"grad_norm": 7.426985740661621, |
|
"learning_rate": 5.415637907951924e-06, |
|
"loss": 0.7183, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 2.9630948142874094, |
|
"grad_norm": 8.63637638092041, |
|
"learning_rate": 5.397585161226589e-06, |
|
"loss": 0.7005, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.967050354020806, |
|
"grad_norm": 8.272201538085938, |
|
"learning_rate": 5.379545624654378e-06, |
|
"loss": 0.6982, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 2.971005893754203, |
|
"grad_norm": 7.2338433265686035, |
|
"learning_rate": 5.3615194115835435e-06, |
|
"loss": 0.7259, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 2.974961433487599, |
|
"grad_norm": 8.305835723876953, |
|
"learning_rate": 5.343506635278613e-06, |
|
"loss": 0.7132, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.978916973220996, |
|
"grad_norm": 8.030599594116211, |
|
"learning_rate": 5.325507408919701e-06, |
|
"loss": 0.6689, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 2.9828725129543927, |
|
"grad_norm": 7.884495735168457, |
|
"learning_rate": 5.3075218456017635e-06, |
|
"loss": 0.7073, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.9868280526877893, |
|
"grad_norm": 7.318825721740723, |
|
"learning_rate": 5.289550058333928e-06, |
|
"loss": 0.7116, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.990783592421186, |
|
"grad_norm": 7.598905563354492, |
|
"learning_rate": 5.271592160038745e-06, |
|
"loss": 0.6911, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 2.9947391321545824, |
|
"grad_norm": 8.233808517456055, |
|
"learning_rate": 5.2536482635515085e-06, |
|
"loss": 0.7179, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 2.998694671887979, |
|
"grad_norm": 10.12309455871582, |
|
"learning_rate": 5.235718481619525e-06, |
|
"loss": 0.6761, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 3.0006328112640404, |
|
"grad_norm": 8.129044532775879, |
|
"learning_rate": 5.217802926901425e-06, |
|
"loss": 0.6679, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.0045878816642935, |
|
"grad_norm": 7.123943328857422, |
|
"learning_rate": 5.199901711966435e-06, |
|
"loss": 0.547, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 3.0085429520645466, |
|
"grad_norm": 7.461557388305664, |
|
"learning_rate": 5.182014949293688e-06, |
|
"loss": 0.5426, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 3.0124980224647997, |
|
"grad_norm": 10.335936546325684, |
|
"learning_rate": 5.164142751271501e-06, |
|
"loss": 0.5431, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 3.016453092865053, |
|
"grad_norm": 10.355412483215332, |
|
"learning_rate": 5.146285230196684e-06, |
|
"loss": 0.5642, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 3.020408163265306, |
|
"grad_norm": 7.330338001251221, |
|
"learning_rate": 5.12844249827382e-06, |
|
"loss": 0.5149, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 3.024363233665559, |
|
"grad_norm": 8.793408393859863, |
|
"learning_rate": 5.1106146676145755e-06, |
|
"loss": 0.5409, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 3.028318304065812, |
|
"grad_norm": 10.123501777648926, |
|
"learning_rate": 5.092801850236977e-06, |
|
"loss": 0.553, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 3.0322733744660657, |
|
"grad_norm": 7.840044021606445, |
|
"learning_rate": 5.075004158064728e-06, |
|
"loss": 0.5339, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 3.036228444866319, |
|
"grad_norm": 7.7024736404418945, |
|
"learning_rate": 5.057221702926488e-06, |
|
"loss": 0.5595, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 3.040183515266572, |
|
"grad_norm": 8.08385944366455, |
|
"learning_rate": 5.039454596555181e-06, |
|
"loss": 0.5555, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.044138585666825, |
|
"grad_norm": 9.490301132202148, |
|
"learning_rate": 5.021702950587289e-06, |
|
"loss": 0.5487, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 3.048093656067078, |
|
"grad_norm": 8.39875316619873, |
|
"learning_rate": 5.00396687656215e-06, |
|
"loss": 0.5362, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 3.0520487264673313, |
|
"grad_norm": 10.81480884552002, |
|
"learning_rate": 4.986246485921261e-06, |
|
"loss": 0.5416, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 3.0560037968675844, |
|
"grad_norm": 7.355183124542236, |
|
"learning_rate": 4.968541890007572e-06, |
|
"loss": 0.5241, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 3.0599588672678375, |
|
"grad_norm": 7.786324977874756, |
|
"learning_rate": 4.950853200064792e-06, |
|
"loss": 0.5428, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 3.0639139376680906, |
|
"grad_norm": 8.595235824584961, |
|
"learning_rate": 4.933180527236687e-06, |
|
"loss": 0.5429, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 3.0678690080683437, |
|
"grad_norm": 7.885761260986328, |
|
"learning_rate": 4.915523982566383e-06, |
|
"loss": 0.553, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 3.071824078468597, |
|
"grad_norm": 9.978317260742188, |
|
"learning_rate": 4.897883676995664e-06, |
|
"loss": 0.5847, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 3.07577914886885, |
|
"grad_norm": 9.08287239074707, |
|
"learning_rate": 4.8802597213642856e-06, |
|
"loss": 0.5586, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 3.079734219269103, |
|
"grad_norm": 9.484689712524414, |
|
"learning_rate": 4.862652226409261e-06, |
|
"loss": 0.5278, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.083689289669356, |
|
"grad_norm": 9.68605899810791, |
|
"learning_rate": 4.845061302764191e-06, |
|
"loss": 0.5358, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 3.0876443600696093, |
|
"grad_norm": 10.46710205078125, |
|
"learning_rate": 4.827487060958533e-06, |
|
"loss": 0.507, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 3.0915994304698624, |
|
"grad_norm": 9.789128303527832, |
|
"learning_rate": 4.80992961141695e-06, |
|
"loss": 0.5263, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 3.0955545008701155, |
|
"grad_norm": 8.19245719909668, |
|
"learning_rate": 4.792389064458569e-06, |
|
"loss": 0.5488, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 3.0995095712703686, |
|
"grad_norm": 8.210286140441895, |
|
"learning_rate": 4.774865530296338e-06, |
|
"loss": 0.5138, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.1034646416706217, |
|
"grad_norm": 11.341084480285645, |
|
"learning_rate": 4.757359119036288e-06, |
|
"loss": 0.5524, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 3.107419712070875, |
|
"grad_norm": 8.606145858764648, |
|
"learning_rate": 4.739869940676872e-06, |
|
"loss": 0.5401, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 3.111374782471128, |
|
"grad_norm": 8.024489402770996, |
|
"learning_rate": 4.722398105108257e-06, |
|
"loss": 0.541, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 3.115329852871381, |
|
"grad_norm": 6.819253444671631, |
|
"learning_rate": 4.704943722111649e-06, |
|
"loss": 0.5267, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 3.119284923271634, |
|
"grad_norm": 11.054219245910645, |
|
"learning_rate": 4.6875069013585795e-06, |
|
"loss": 0.5078, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.1232399936718873, |
|
"grad_norm": 8.629249572753906, |
|
"learning_rate": 4.670087752410245e-06, |
|
"loss": 0.55, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 3.1271950640721404, |
|
"grad_norm": 10.402203559875488, |
|
"learning_rate": 4.652686384716795e-06, |
|
"loss": 0.5301, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 3.1311501344723935, |
|
"grad_norm": 9.260662078857422, |
|
"learning_rate": 4.635302907616659e-06, |
|
"loss": 0.5051, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 3.1351052048726467, |
|
"grad_norm": 9.835843086242676, |
|
"learning_rate": 4.617937430335848e-06, |
|
"loss": 0.517, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 3.1390602752728998, |
|
"grad_norm": 8.685378074645996, |
|
"learning_rate": 4.600590061987283e-06, |
|
"loss": 0.5537, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.143015345673153, |
|
"grad_norm": 4.874628067016602, |
|
"learning_rate": 4.583260911570091e-06, |
|
"loss": 0.5297, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 3.146970416073406, |
|
"grad_norm": 10.263830184936523, |
|
"learning_rate": 4.5659500879689395e-06, |
|
"loss": 0.5084, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 3.150925486473659, |
|
"grad_norm": 8.627128601074219, |
|
"learning_rate": 4.548657699953333e-06, |
|
"loss": 0.5186, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 3.1548805568739122, |
|
"grad_norm": 12.04641056060791, |
|
"learning_rate": 4.531383856176949e-06, |
|
"loss": 0.511, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 3.1588356272741653, |
|
"grad_norm": 9.611827850341797, |
|
"learning_rate": 4.514128665176935e-06, |
|
"loss": 0.5326, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.1627906976744184, |
|
"grad_norm": 8.870594024658203, |
|
"learning_rate": 4.49689223537325e-06, |
|
"loss": 0.5208, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 3.1667457680746716, |
|
"grad_norm": 9.362154006958008, |
|
"learning_rate": 4.479674675067954e-06, |
|
"loss": 0.5293, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 3.1707008384749247, |
|
"grad_norm": 8.815319061279297, |
|
"learning_rate": 4.462476092444563e-06, |
|
"loss": 0.5224, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 3.174655908875178, |
|
"grad_norm": 8.474082946777344, |
|
"learning_rate": 4.44529659556733e-06, |
|
"loss": 0.5282, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 3.178610979275431, |
|
"grad_norm": 8.675308227539062, |
|
"learning_rate": 4.428136292380605e-06, |
|
"loss": 0.5292, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.1825660496756845, |
|
"grad_norm": 8.218753814697266, |
|
"learning_rate": 4.410995290708121e-06, |
|
"loss": 0.5235, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 3.1865211200759376, |
|
"grad_norm": 9.475656509399414, |
|
"learning_rate": 4.3938736982523485e-06, |
|
"loss": 0.5016, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 3.1904761904761907, |
|
"grad_norm": 6.521944522857666, |
|
"learning_rate": 4.3767716225937895e-06, |
|
"loss": 0.5392, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 3.194431260876444, |
|
"grad_norm": 11.293815612792969, |
|
"learning_rate": 4.359689171190329e-06, |
|
"loss": 0.5172, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 3.198386331276697, |
|
"grad_norm": 8.898575782775879, |
|
"learning_rate": 4.342626451376533e-06, |
|
"loss": 0.4907, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.20234140167695, |
|
"grad_norm": 9.884123802185059, |
|
"learning_rate": 4.325583570363003e-06, |
|
"loss": 0.494, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 3.206296472077203, |
|
"grad_norm": 9.265763282775879, |
|
"learning_rate": 4.308560635235669e-06, |
|
"loss": 0.4984, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 3.2102515424774563, |
|
"grad_norm": 9.925707817077637, |
|
"learning_rate": 4.2915577529551505e-06, |
|
"loss": 0.4789, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 3.2142066128777094, |
|
"grad_norm": 8.641393661499023, |
|
"learning_rate": 4.274575030356057e-06, |
|
"loss": 0.5038, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 3.2181616832779625, |
|
"grad_norm": 8.077740669250488, |
|
"learning_rate": 4.257612574146341e-06, |
|
"loss": 0.5143, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.2221167536782156, |
|
"grad_norm": 7.956730365753174, |
|
"learning_rate": 4.240670490906596e-06, |
|
"loss": 0.5002, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 3.2260718240784687, |
|
"grad_norm": 9.40575122833252, |
|
"learning_rate": 4.2237488870894245e-06, |
|
"loss": 0.5153, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 3.230026894478722, |
|
"grad_norm": 11.024624824523926, |
|
"learning_rate": 4.2068478690187355e-06, |
|
"loss": 0.5146, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 3.233981964878975, |
|
"grad_norm": 10.030312538146973, |
|
"learning_rate": 4.189967542889106e-06, |
|
"loss": 0.4978, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 3.237937035279228, |
|
"grad_norm": 8.483134269714355, |
|
"learning_rate": 4.173108014765084e-06, |
|
"loss": 0.5021, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.241892105679481, |
|
"grad_norm": 9.623600959777832, |
|
"learning_rate": 4.156269390580546e-06, |
|
"loss": 0.5053, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 3.2458471760797343, |
|
"grad_norm": 9.081612586975098, |
|
"learning_rate": 4.139451776138015e-06, |
|
"loss": 0.5038, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 3.2498022464799874, |
|
"grad_norm": 7.760807991027832, |
|
"learning_rate": 4.122655277108014e-06, |
|
"loss": 0.4946, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 3.2537573168802405, |
|
"grad_norm": 9.095376014709473, |
|
"learning_rate": 4.1058799990283814e-06, |
|
"loss": 0.4955, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 3.2577123872804936, |
|
"grad_norm": 7.644027233123779, |
|
"learning_rate": 4.08912604730362e-06, |
|
"loss": 0.4959, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.2616674576807467, |
|
"grad_norm": 10.785470962524414, |
|
"learning_rate": 4.072393527204232e-06, |
|
"loss": 0.5038, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 3.265622528081, |
|
"grad_norm": 10.49952220916748, |
|
"learning_rate": 4.055682543866063e-06, |
|
"loss": 0.5116, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 3.269577598481253, |
|
"grad_norm": 11.771068572998047, |
|
"learning_rate": 4.038993202289629e-06, |
|
"loss": 0.4978, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 3.273532668881506, |
|
"grad_norm": 8.821813583374023, |
|
"learning_rate": 4.022325607339466e-06, |
|
"loss": 0.4768, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 3.277487739281759, |
|
"grad_norm": 13.049178123474121, |
|
"learning_rate": 4.005679863743468e-06, |
|
"loss": 0.5041, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.2814428096820123, |
|
"grad_norm": 9.644794464111328, |
|
"learning_rate": 3.989056076092235e-06, |
|
"loss": 0.5031, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 3.2853978800822654, |
|
"grad_norm": 7.407175540924072, |
|
"learning_rate": 3.972454348838405e-06, |
|
"loss": 0.4883, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 3.2893529504825185, |
|
"grad_norm": 13.028958320617676, |
|
"learning_rate": 3.95587478629601e-06, |
|
"loss": 0.4805, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 3.2933080208827716, |
|
"grad_norm": 7.644301414489746, |
|
"learning_rate": 3.9393174926398005e-06, |
|
"loss": 0.5093, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 3.2972630912830247, |
|
"grad_norm": 8.546706199645996, |
|
"learning_rate": 3.922782571904621e-06, |
|
"loss": 0.4667, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.301218161683278, |
|
"grad_norm": 10.010725975036621, |
|
"learning_rate": 3.906270127984725e-06, |
|
"loss": 0.5127, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 3.305173232083531, |
|
"grad_norm": 12.253199577331543, |
|
"learning_rate": 3.889780264633153e-06, |
|
"loss": 0.4845, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 3.309128302483784, |
|
"grad_norm": 7.043243408203125, |
|
"learning_rate": 3.873313085461048e-06, |
|
"loss": 0.5092, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 3.313083372884037, |
|
"grad_norm": 9.297669410705566, |
|
"learning_rate": 3.856868693937032e-06, |
|
"loss": 0.4827, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 3.3170384432842903, |
|
"grad_norm": 8.122245788574219, |
|
"learning_rate": 3.840447193386537e-06, |
|
"loss": 0.4863, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.3209935136845434, |
|
"grad_norm": 8.875086784362793, |
|
"learning_rate": 3.8240486869911725e-06, |
|
"loss": 0.5004, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 3.3249485840847965, |
|
"grad_norm": 8.587276458740234, |
|
"learning_rate": 3.807673277788062e-06, |
|
"loss": 0.4888, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 3.3289036544850497, |
|
"grad_norm": 10.221160888671875, |
|
"learning_rate": 3.791321068669202e-06, |
|
"loss": 0.4717, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 3.3328587248853028, |
|
"grad_norm": 8.61902141571045, |
|
"learning_rate": 3.7749921623808154e-06, |
|
"loss": 0.5057, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 3.336813795285556, |
|
"grad_norm": 9.123618125915527, |
|
"learning_rate": 3.758686661522711e-06, |
|
"loss": 0.4895, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.340768865685809, |
|
"grad_norm": 10.211472511291504, |
|
"learning_rate": 3.7424046685476288e-06, |
|
"loss": 0.5143, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 3.344723936086062, |
|
"grad_norm": 8.340943336486816, |
|
"learning_rate": 3.726146285760601e-06, |
|
"loss": 0.4589, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 3.3486790064863152, |
|
"grad_norm": 8.512166976928711, |
|
"learning_rate": 3.7099116153183117e-06, |
|
"loss": 0.4511, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 3.3526340768865683, |
|
"grad_norm": 7.626883506774902, |
|
"learning_rate": 3.6937007592284475e-06, |
|
"loss": 0.5123, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 3.356589147286822, |
|
"grad_norm": 7.530759334564209, |
|
"learning_rate": 3.677513819349073e-06, |
|
"loss": 0.4814, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.360544217687075, |
|
"grad_norm": 11.007448196411133, |
|
"learning_rate": 3.6613508973879676e-06, |
|
"loss": 0.4716, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 3.364499288087328, |
|
"grad_norm": 10.204156875610352, |
|
"learning_rate": 3.6452120949020044e-06, |
|
"loss": 0.4846, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 3.3684543584875812, |
|
"grad_norm": 9.174517631530762, |
|
"learning_rate": 3.629097513296501e-06, |
|
"loss": 0.4722, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 3.3724094288878343, |
|
"grad_norm": 7.779313564300537, |
|
"learning_rate": 3.613007253824597e-06, |
|
"loss": 0.4753, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 3.3763644992880875, |
|
"grad_norm": 9.513938903808594, |
|
"learning_rate": 3.596941417586599e-06, |
|
"loss": 0.4838, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.3803195696883406, |
|
"grad_norm": 7.560364723205566, |
|
"learning_rate": 3.5809001055293585e-06, |
|
"loss": 0.4951, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 3.3842746400885937, |
|
"grad_norm": 9.338923454284668, |
|
"learning_rate": 3.5648834184456293e-06, |
|
"loss": 0.4985, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 3.388229710488847, |
|
"grad_norm": 8.588788032531738, |
|
"learning_rate": 3.548891456973448e-06, |
|
"loss": 0.4826, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 3.3921847808891, |
|
"grad_norm": 7.1329545974731445, |
|
"learning_rate": 3.53292432159548e-06, |
|
"loss": 0.4604, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 3.396139851289353, |
|
"grad_norm": 10.372248649597168, |
|
"learning_rate": 3.5169821126384154e-06, |
|
"loss": 0.5027, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.400094921689606, |
|
"grad_norm": 8.757039070129395, |
|
"learning_rate": 3.501064930272304e-06, |
|
"loss": 0.5149, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 3.4040499920898593, |
|
"grad_norm": 10.216232299804688, |
|
"learning_rate": 3.485172874509963e-06, |
|
"loss": 0.4668, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 3.4080050624901124, |
|
"grad_norm": 8.485111236572266, |
|
"learning_rate": 3.46930604520632e-06, |
|
"loss": 0.4549, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 3.4119601328903655, |
|
"grad_norm": 8.599973678588867, |
|
"learning_rate": 3.4534645420578053e-06, |
|
"loss": 0.4729, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 3.4159152032906186, |
|
"grad_norm": 10.100400924682617, |
|
"learning_rate": 3.43764846460171e-06, |
|
"loss": 0.4587, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.4198702736908717, |
|
"grad_norm": 11.832128524780273, |
|
"learning_rate": 3.4218579122155695e-06, |
|
"loss": 0.4667, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 3.423825344091125, |
|
"grad_norm": 9.493983268737793, |
|
"learning_rate": 3.406092984116533e-06, |
|
"loss": 0.4581, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 3.427780414491378, |
|
"grad_norm": 9.855833053588867, |
|
"learning_rate": 3.3903537793607546e-06, |
|
"loss": 0.4718, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 3.431735484891631, |
|
"grad_norm": 12.606931686401367, |
|
"learning_rate": 3.3746403968427506e-06, |
|
"loss": 0.471, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 3.435690555291884, |
|
"grad_norm": 7.937397480010986, |
|
"learning_rate": 3.3589529352947908e-06, |
|
"loss": 0.4752, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.4396456256921373, |
|
"grad_norm": 9.963780403137207, |
|
"learning_rate": 3.3432914932862728e-06, |
|
"loss": 0.4724, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 3.4436006960923904, |
|
"grad_norm": 8.761956214904785, |
|
"learning_rate": 3.3276561692231115e-06, |
|
"loss": 0.4714, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 3.4475557664926435, |
|
"grad_norm": 9.65771198272705, |
|
"learning_rate": 3.312047061347109e-06, |
|
"loss": 0.4505, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 3.4515108368928966, |
|
"grad_norm": 7.428150177001953, |
|
"learning_rate": 3.296464267735344e-06, |
|
"loss": 0.4636, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 3.4554659072931497, |
|
"grad_norm": 8.332456588745117, |
|
"learning_rate": 3.2809078862995496e-06, |
|
"loss": 0.4822, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.459420977693403, |
|
"grad_norm": 14.15750789642334, |
|
"learning_rate": 3.265378014785513e-06, |
|
"loss": 0.4496, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 3.463376048093656, |
|
"grad_norm": 8.370489120483398, |
|
"learning_rate": 3.2498747507724433e-06, |
|
"loss": 0.4668, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 3.467331118493909, |
|
"grad_norm": 11.614065170288086, |
|
"learning_rate": 3.234398191672367e-06, |
|
"loss": 0.4625, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 3.471286188894162, |
|
"grad_norm": 7.348938941955566, |
|
"learning_rate": 3.2189484347295134e-06, |
|
"loss": 0.4417, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 3.4752412592944153, |
|
"grad_norm": 11.13962459564209, |
|
"learning_rate": 3.2035255770197124e-06, |
|
"loss": 0.4827, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.4791963296946684, |
|
"grad_norm": 6.6790056228637695, |
|
"learning_rate": 3.1881297154497676e-06, |
|
"loss": 0.481, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 3.4831514000949215, |
|
"grad_norm": 8.494084358215332, |
|
"learning_rate": 3.1727609467568688e-06, |
|
"loss": 0.4458, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 3.4871064704951746, |
|
"grad_norm": 8.11501693725586, |
|
"learning_rate": 3.1574193675079564e-06, |
|
"loss": 0.4547, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 3.4910615408954278, |
|
"grad_norm": 9.092681884765625, |
|
"learning_rate": 3.142105074099147e-06, |
|
"loss": 0.476, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 3.4950166112956813, |
|
"grad_norm": 10.274823188781738, |
|
"learning_rate": 3.1268181627550997e-06, |
|
"loss": 0.454, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.4989716816959344, |
|
"grad_norm": 11.079930305480957, |
|
"learning_rate": 3.1115587295284326e-06, |
|
"loss": 0.4498, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 3.5029267520961875, |
|
"grad_norm": 8.612648010253906, |
|
"learning_rate": 3.0963268702991023e-06, |
|
"loss": 0.4183, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 3.5068818224964406, |
|
"grad_norm": 9.691105842590332, |
|
"learning_rate": 3.0811226807738126e-06, |
|
"loss": 0.4367, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 3.5108368928966938, |
|
"grad_norm": 10.901847839355469, |
|
"learning_rate": 3.065946256485406e-06, |
|
"loss": 0.4764, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 3.514791963296947, |
|
"grad_norm": 9.77769660949707, |
|
"learning_rate": 3.050797692792275e-06, |
|
"loss": 0.468, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.5187470336972, |
|
"grad_norm": 10.783480644226074, |
|
"learning_rate": 3.035677084877748e-06, |
|
"loss": 0.465, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 3.522702104097453, |
|
"grad_norm": 9.860946655273438, |
|
"learning_rate": 3.0205845277494995e-06, |
|
"loss": 0.4447, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 3.526657174497706, |
|
"grad_norm": 9.348694801330566, |
|
"learning_rate": 3.00552011623895e-06, |
|
"loss": 0.459, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 3.5306122448979593, |
|
"grad_norm": 7.3600358963012695, |
|
"learning_rate": 2.990483945000679e-06, |
|
"loss": 0.4423, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 3.5345673152982124, |
|
"grad_norm": 9.740511894226074, |
|
"learning_rate": 2.975476108511817e-06, |
|
"loss": 0.4435, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.5385223856984656, |
|
"grad_norm": 8.59338665008545, |
|
"learning_rate": 2.9604967010714583e-06, |
|
"loss": 0.43, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 3.5424774560987187, |
|
"grad_norm": 11.468289375305176, |
|
"learning_rate": 2.9455458168000685e-06, |
|
"loss": 0.4368, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 3.546432526498972, |
|
"grad_norm": 9.191219329833984, |
|
"learning_rate": 2.9306235496388993e-06, |
|
"loss": 0.4495, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 3.550387596899225, |
|
"grad_norm": 10.588215827941895, |
|
"learning_rate": 2.91572999334938e-06, |
|
"loss": 0.443, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 3.554342667299478, |
|
"grad_norm": 8.200735092163086, |
|
"learning_rate": 2.9008652415125554e-06, |
|
"loss": 0.4623, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.558297737699731, |
|
"grad_norm": 8.150663375854492, |
|
"learning_rate": 2.8860293875284642e-06, |
|
"loss": 0.4423, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 3.5622528080999842, |
|
"grad_norm": 8.815475463867188, |
|
"learning_rate": 2.8712225246155885e-06, |
|
"loss": 0.4275, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 3.5662078785002373, |
|
"grad_norm": 9.670441627502441, |
|
"learning_rate": 2.856444745810235e-06, |
|
"loss": 0.4411, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 3.5701629489004905, |
|
"grad_norm": 11.169974327087402, |
|
"learning_rate": 2.8416961439659787e-06, |
|
"loss": 0.4527, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 3.5741180193007436, |
|
"grad_norm": 9.71986198425293, |
|
"learning_rate": 2.8269768117530577e-06, |
|
"loss": 0.424, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.5780730897009967, |
|
"grad_norm": 8.910456657409668, |
|
"learning_rate": 2.8122868416578013e-06, |
|
"loss": 0.4291, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 3.58202816010125, |
|
"grad_norm": 9.892890930175781, |
|
"learning_rate": 2.797626325982046e-06, |
|
"loss": 0.427, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 3.585983230501503, |
|
"grad_norm": 8.138750076293945, |
|
"learning_rate": 2.7829953568425623e-06, |
|
"loss": 0.4279, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 3.589938300901756, |
|
"grad_norm": 10.201644897460938, |
|
"learning_rate": 2.7683940261704656e-06, |
|
"loss": 0.4333, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 3.593893371302009, |
|
"grad_norm": 9.497387886047363, |
|
"learning_rate": 2.753822425710642e-06, |
|
"loss": 0.4275, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.5978484417022623, |
|
"grad_norm": 11.225207328796387, |
|
"learning_rate": 2.739280647021174e-06, |
|
"loss": 0.4401, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 3.6018035121025154, |
|
"grad_norm": 8.41219711303711, |
|
"learning_rate": 2.724768781472769e-06, |
|
"loss": 0.4213, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 3.6057585825027685, |
|
"grad_norm": 8.49161148071289, |
|
"learning_rate": 2.7102869202481757e-06, |
|
"loss": 0.4298, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 3.6097136529030216, |
|
"grad_norm": 7.584569931030273, |
|
"learning_rate": 2.695835154341616e-06, |
|
"loss": 0.4455, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 3.6136687233032747, |
|
"grad_norm": 9.672845840454102, |
|
"learning_rate": 2.6814135745582114e-06, |
|
"loss": 0.4161, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.617623793703528, |
|
"grad_norm": 11.135618209838867, |
|
"learning_rate": 2.6670222715134223e-06, |
|
"loss": 0.4158, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 3.621578864103781, |
|
"grad_norm": 10.18213176727295, |
|
"learning_rate": 2.652661335632462e-06, |
|
"loss": 0.4273, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 3.625533934504034, |
|
"grad_norm": 9.0409517288208, |
|
"learning_rate": 2.6383308571497417e-06, |
|
"loss": 0.4403, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 3.629489004904287, |
|
"grad_norm": 10.783854484558105, |
|
"learning_rate": 2.6240309261082944e-06, |
|
"loss": 0.4412, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 3.6334440753045403, |
|
"grad_norm": 8.486966133117676, |
|
"learning_rate": 2.609761632359221e-06, |
|
"loss": 0.4397, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.6373991457047934, |
|
"grad_norm": 10.486551284790039, |
|
"learning_rate": 2.5955230655611086e-06, |
|
"loss": 0.4348, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 3.6413542161050465, |
|
"grad_norm": 8.568268775939941, |
|
"learning_rate": 2.5813153151794913e-06, |
|
"loss": 0.4267, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 3.6453092865052996, |
|
"grad_norm": 9.572600364685059, |
|
"learning_rate": 2.567138470486257e-06, |
|
"loss": 0.4165, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 3.6492643569055527, |
|
"grad_norm": 9.160146713256836, |
|
"learning_rate": 2.5529926205591102e-06, |
|
"loss": 0.4161, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 3.653219427305806, |
|
"grad_norm": 7.692389488220215, |
|
"learning_rate": 2.538877854281012e-06, |
|
"loss": 0.3944, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.657174497706059, |
|
"grad_norm": 10.177444458007812, |
|
"learning_rate": 2.5247942603396016e-06, |
|
"loss": 0.4301, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 3.661129568106312, |
|
"grad_norm": 7.599040508270264, |
|
"learning_rate": 2.5107419272266704e-06, |
|
"loss": 0.4338, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 3.665084638506565, |
|
"grad_norm": 9.309414863586426, |
|
"learning_rate": 2.4967209432375644e-06, |
|
"loss": 0.4286, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 3.6690397089068183, |
|
"grad_norm": 8.994830131530762, |
|
"learning_rate": 2.4827313964706737e-06, |
|
"loss": 0.4082, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 3.6729947793070714, |
|
"grad_norm": 9.818035125732422, |
|
"learning_rate": 2.4687733748268454e-06, |
|
"loss": 0.41, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.6769498497073245, |
|
"grad_norm": 9.885952949523926, |
|
"learning_rate": 2.4548469660088533e-06, |
|
"loss": 0.4075, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 3.6809049201075776, |
|
"grad_norm": 8.293312072753906, |
|
"learning_rate": 2.4409522575208308e-06, |
|
"loss": 0.3905, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 3.684859990507831, |
|
"grad_norm": 9.278348922729492, |
|
"learning_rate": 2.4270893366677303e-06, |
|
"loss": 0.4255, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 3.6888150609080843, |
|
"grad_norm": 9.701757431030273, |
|
"learning_rate": 2.4132582905547684e-06, |
|
"loss": 0.3935, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 3.6927701313083374, |
|
"grad_norm": 10.177591323852539, |
|
"learning_rate": 2.3994592060868913e-06, |
|
"loss": 0.4118, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.6967252017085905, |
|
"grad_norm": 10.166543960571289, |
|
"learning_rate": 2.3856921699682113e-06, |
|
"loss": 0.4117, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 3.7006802721088436, |
|
"grad_norm": 9.657312393188477, |
|
"learning_rate": 2.371957268701475e-06, |
|
"loss": 0.4194, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 3.7046353425090968, |
|
"grad_norm": 9.794391632080078, |
|
"learning_rate": 2.35825458858751e-06, |
|
"loss": 0.4267, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 3.70859041290935, |
|
"grad_norm": 8.307416915893555, |
|
"learning_rate": 2.344584215724699e-06, |
|
"loss": 0.3905, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 3.712545483309603, |
|
"grad_norm": 9.993391036987305, |
|
"learning_rate": 2.3309462360084153e-06, |
|
"loss": 0.4013, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.716500553709856, |
|
"grad_norm": 9.117337226867676, |
|
"learning_rate": 2.3173407351305035e-06, |
|
"loss": 0.4329, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 3.720455624110109, |
|
"grad_norm": 7.99422550201416, |
|
"learning_rate": 2.303767798578727e-06, |
|
"loss": 0.3951, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 3.7244106945103623, |
|
"grad_norm": 7.283392429351807, |
|
"learning_rate": 2.2902275116362455e-06, |
|
"loss": 0.4213, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 3.7283657649106154, |
|
"grad_norm": 7.147285461425781, |
|
"learning_rate": 2.2767199593810643e-06, |
|
"loss": 0.4145, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 3.7323208353108686, |
|
"grad_norm": 7.999619007110596, |
|
"learning_rate": 2.263245226685507e-06, |
|
"loss": 0.4164, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.7362759057111217, |
|
"grad_norm": 7.173412322998047, |
|
"learning_rate": 2.2498033982156786e-06, |
|
"loss": 0.4375, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 3.740230976111375, |
|
"grad_norm": 9.304818153381348, |
|
"learning_rate": 2.236394558430945e-06, |
|
"loss": 0.392, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 3.744186046511628, |
|
"grad_norm": 11.311491966247559, |
|
"learning_rate": 2.2230187915833827e-06, |
|
"loss": 0.3951, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 3.748141116911881, |
|
"grad_norm": 8.677116394042969, |
|
"learning_rate": 2.2096761817172735e-06, |
|
"loss": 0.406, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 3.752096187312134, |
|
"grad_norm": 8.232828140258789, |
|
"learning_rate": 2.196366812668544e-06, |
|
"loss": 0.411, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.7560512577123872, |
|
"grad_norm": 7.7508440017700195, |
|
"learning_rate": 2.1830907680642782e-06, |
|
"loss": 0.425, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 3.7600063281126404, |
|
"grad_norm": 9.16795539855957, |
|
"learning_rate": 2.1698481313221564e-06, |
|
"loss": 0.4124, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 3.7639613985128935, |
|
"grad_norm": 9.288763046264648, |
|
"learning_rate": 2.156638985649958e-06, |
|
"loss": 0.3808, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 3.7679164689131466, |
|
"grad_norm": 8.190459251403809, |
|
"learning_rate": 2.1434634140450197e-06, |
|
"loss": 0.41, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 3.7718715393133997, |
|
"grad_norm": 9.141033172607422, |
|
"learning_rate": 2.1303214992937243e-06, |
|
"loss": 0.3768, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.775826609713653, |
|
"grad_norm": 10.411824226379395, |
|
"learning_rate": 2.117213323970973e-06, |
|
"loss": 0.3955, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 3.779781680113906, |
|
"grad_norm": 8.365334510803223, |
|
"learning_rate": 2.1041389704396817e-06, |
|
"loss": 0.4249, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 3.783736750514159, |
|
"grad_norm": 7.489829063415527, |
|
"learning_rate": 2.0910985208502444e-06, |
|
"loss": 0.4026, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 3.787691820914412, |
|
"grad_norm": 10.931512832641602, |
|
"learning_rate": 2.078092057140026e-06, |
|
"loss": 0.3997, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 3.7916468913146653, |
|
"grad_norm": 7.846652507781982, |
|
"learning_rate": 2.065119661032849e-06, |
|
"loss": 0.395, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.7956019617149184, |
|
"grad_norm": 9.316243171691895, |
|
"learning_rate": 2.0521814140384813e-06, |
|
"loss": 0.41, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 3.7995570321151715, |
|
"grad_norm": 7.889522552490234, |
|
"learning_rate": 2.0392773974521163e-06, |
|
"loss": 0.3711, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 3.803512102515425, |
|
"grad_norm": 8.276979446411133, |
|
"learning_rate": 2.026407692353868e-06, |
|
"loss": 0.3864, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 3.807467172915678, |
|
"grad_norm": 7.635178565979004, |
|
"learning_rate": 2.0135723796082577e-06, |
|
"loss": 0.3854, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 3.8114222433159313, |
|
"grad_norm": 10.312528610229492, |
|
"learning_rate": 2.0007715398637154e-06, |
|
"loss": 0.4005, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.8153773137161844, |
|
"grad_norm": 9.488860130310059, |
|
"learning_rate": 1.9880052535520586e-06, |
|
"loss": 0.3974, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 3.8193323841164375, |
|
"grad_norm": 7.628594875335693, |
|
"learning_rate": 1.975273600888002e-06, |
|
"loss": 0.386, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 3.8232874545166906, |
|
"grad_norm": 8.586271286010742, |
|
"learning_rate": 1.9625766618686344e-06, |
|
"loss": 0.4291, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 3.8272425249169437, |
|
"grad_norm": 12.247819900512695, |
|
"learning_rate": 1.9499145162729406e-06, |
|
"loss": 0.3928, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 3.831197595317197, |
|
"grad_norm": 10.7797269821167, |
|
"learning_rate": 1.9372872436612757e-06, |
|
"loss": 0.3932, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.83515266571745, |
|
"grad_norm": 9.973164558410645, |
|
"learning_rate": 1.92469492337489e-06, |
|
"loss": 0.4081, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 3.839107736117703, |
|
"grad_norm": 8.402750015258789, |
|
"learning_rate": 1.912137634535398e-06, |
|
"loss": 0.3671, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 3.843062806517956, |
|
"grad_norm": 7.674239158630371, |
|
"learning_rate": 1.8996154560443199e-06, |
|
"loss": 0.426, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 3.8470178769182093, |
|
"grad_norm": 12.181373596191406, |
|
"learning_rate": 1.8871284665825514e-06, |
|
"loss": 0.3842, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 3.8509729473184624, |
|
"grad_norm": 10.594054222106934, |
|
"learning_rate": 1.8746767446098952e-06, |
|
"loss": 0.3929, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.8549280177187155, |
|
"grad_norm": 9.901941299438477, |
|
"learning_rate": 1.86226036836455e-06, |
|
"loss": 0.3947, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 3.8588830881189686, |
|
"grad_norm": 8.486770629882812, |
|
"learning_rate": 1.8498794158626267e-06, |
|
"loss": 0.3876, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 3.8628381585192217, |
|
"grad_norm": 9.486550331115723, |
|
"learning_rate": 1.8375339648976577e-06, |
|
"loss": 0.3769, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 3.866793228919475, |
|
"grad_norm": 8.092984199523926, |
|
"learning_rate": 1.8252240930401123e-06, |
|
"loss": 0.3927, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 3.870748299319728, |
|
"grad_norm": 11.440380096435547, |
|
"learning_rate": 1.8129498776368985e-06, |
|
"loss": 0.3913, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.874703369719981, |
|
"grad_norm": 8.888964653015137, |
|
"learning_rate": 1.8007113958108853e-06, |
|
"loss": 0.3608, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 3.878658440120234, |
|
"grad_norm": 9.67924690246582, |
|
"learning_rate": 1.7885087244604153e-06, |
|
"loss": 0.3869, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 3.8826135105204873, |
|
"grad_norm": 8.57454776763916, |
|
"learning_rate": 1.7763419402588263e-06, |
|
"loss": 0.3739, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 3.8865685809207404, |
|
"grad_norm": 10.284082412719727, |
|
"learning_rate": 1.7642111196539621e-06, |
|
"loss": 0.3835, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 3.8905236513209935, |
|
"grad_norm": 10.843579292297363, |
|
"learning_rate": 1.7521163388676956e-06, |
|
"loss": 0.4116, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.8944787217212467, |
|
"grad_norm": 9.84794807434082, |
|
"learning_rate": 1.7400576738954468e-06, |
|
"loss": 0.4025, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 3.8984337921214998, |
|
"grad_norm": 8.804971694946289, |
|
"learning_rate": 1.728035200505718e-06, |
|
"loss": 0.3527, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 3.902388862521753, |
|
"grad_norm": 9.877714157104492, |
|
"learning_rate": 1.716048994239598e-06, |
|
"loss": 0.3746, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 3.906343932922006, |
|
"grad_norm": 8.774710655212402, |
|
"learning_rate": 1.7040991304103059e-06, |
|
"loss": 0.37, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 3.910299003322259, |
|
"grad_norm": 9.542379379272461, |
|
"learning_rate": 1.692185684102699e-06, |
|
"loss": 0.3734, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.914254073722512, |
|
"grad_norm": 10.886748313903809, |
|
"learning_rate": 1.6803087301728216e-06, |
|
"loss": 0.3437, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 3.9182091441227653, |
|
"grad_norm": 9.636738777160645, |
|
"learning_rate": 1.6684683432474177e-06, |
|
"loss": 0.3882, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 3.9221642145230184, |
|
"grad_norm": 7.8506269454956055, |
|
"learning_rate": 1.656664597723476e-06, |
|
"loss": 0.3702, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 3.9261192849232716, |
|
"grad_norm": 9.247983932495117, |
|
"learning_rate": 1.64489756776775e-06, |
|
"loss": 0.3562, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 3.9300743553235247, |
|
"grad_norm": 9.73788070678711, |
|
"learning_rate": 1.633167327316297e-06, |
|
"loss": 0.3913, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.934029425723778, |
|
"grad_norm": 9.978226661682129, |
|
"learning_rate": 1.6214739500740142e-06, |
|
"loss": 0.3628, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 3.937984496124031, |
|
"grad_norm": 8.844416618347168, |
|
"learning_rate": 1.6098175095141815e-06, |
|
"loss": 0.3713, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 3.941939566524284, |
|
"grad_norm": 8.160172462463379, |
|
"learning_rate": 1.5981980788779863e-06, |
|
"loss": 0.3669, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 3.945894636924537, |
|
"grad_norm": 11.400806427001953, |
|
"learning_rate": 1.5866157311740733e-06, |
|
"loss": 0.3758, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 3.9498497073247902, |
|
"grad_norm": 10.661243438720703, |
|
"learning_rate": 1.5750705391780833e-06, |
|
"loss": 0.3869, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.9538047777250434, |
|
"grad_norm": 6.112812519073486, |
|
"learning_rate": 1.5635625754321935e-06, |
|
"loss": 0.3829, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 3.9577598481252965, |
|
"grad_norm": 9.569903373718262, |
|
"learning_rate": 1.55209191224467e-06, |
|
"loss": 0.3836, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 3.9617149185255496, |
|
"grad_norm": 10.391584396362305, |
|
"learning_rate": 1.5406586216894019e-06, |
|
"loss": 0.3854, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 3.9656699889258027, |
|
"grad_norm": 9.75516414642334, |
|
"learning_rate": 1.5292627756054542e-06, |
|
"loss": 0.3837, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 3.969625059326056, |
|
"grad_norm": 7.6981706619262695, |
|
"learning_rate": 1.517904445596617e-06, |
|
"loss": 0.396, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.973580129726309, |
|
"grad_norm": 9.208215713500977, |
|
"learning_rate": 1.506583703030958e-06, |
|
"loss": 0.3652, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 3.977535200126562, |
|
"grad_norm": 11.380250930786133, |
|
"learning_rate": 1.495300619040367e-06, |
|
"loss": 0.3833, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 3.981490270526815, |
|
"grad_norm": 8.134552955627441, |
|
"learning_rate": 1.4840552645201126e-06, |
|
"loss": 0.3723, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 3.9854453409270683, |
|
"grad_norm": 10.506682395935059, |
|
"learning_rate": 1.4728477101283977e-06, |
|
"loss": 0.3825, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 3.9894004113273214, |
|
"grad_norm": 9.069189071655273, |
|
"learning_rate": 1.4616780262859177e-06, |
|
"loss": 0.3804, |
|
"step": 101000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 126330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 25266, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1903543369015073e+20, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|