{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 53680, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007451564828614009, "grad_norm": 1.7114099264144897, "learning_rate": 0.0002, "loss": 1.8316, "step": 10 }, { "epoch": 0.0014903129657228018, "grad_norm": 2.16106915473938, "learning_rate": 0.0002, "loss": 1.9128, "step": 20 }, { "epoch": 0.0022354694485842027, "grad_norm": 1.9972808361053467, "learning_rate": 0.0002, "loss": 1.8523, "step": 30 }, { "epoch": 0.0029806259314456036, "grad_norm": 2.058236598968506, "learning_rate": 0.0002, "loss": 1.7295, "step": 40 }, { "epoch": 0.0037257824143070045, "grad_norm": 2.164706230163574, "learning_rate": 0.0002, "loss": 1.6538, "step": 50 }, { "epoch": 0.004470938897168405, "grad_norm": 1.7097787857055664, "learning_rate": 0.0002, "loss": 1.7809, "step": 60 }, { "epoch": 0.005216095380029807, "grad_norm": 1.7462966442108154, "learning_rate": 0.0002, "loss": 1.7701, "step": 70 }, { "epoch": 0.005961251862891207, "grad_norm": 2.259033679962158, "learning_rate": 0.0002, "loss": 1.7231, "step": 80 }, { "epoch": 0.0067064083457526085, "grad_norm": 2.3188297748565674, "learning_rate": 0.0002, "loss": 1.9569, "step": 90 }, { "epoch": 0.007451564828614009, "grad_norm": 1.7893937826156616, "learning_rate": 0.0002, "loss": 2.5206, "step": 100 }, { "epoch": 0.00819672131147541, "grad_norm": 1.4487690925598145, "learning_rate": 0.0002, "loss": 2.5691, "step": 110 }, { "epoch": 0.00894187779433681, "grad_norm": 1.6098469495773315, "learning_rate": 0.0002, "loss": 2.5209, "step": 120 }, { "epoch": 0.009687034277198211, "grad_norm": 1.4259092807769775, "learning_rate": 0.0002, "loss": 2.5239, "step": 130 }, { "epoch": 0.010432190760059613, "grad_norm": 1.4447660446166992, "learning_rate": 0.0002, "loss": 2.6533, "step": 140 }, { "epoch": 0.011177347242921014, "grad_norm": 1.341070532798767, "learning_rate": 0.0002, "loss": 2.6231, "step": 150 }, { "epoch": 0.011922503725782414, "grad_norm": 1.2440979480743408, "learning_rate": 0.0002, "loss": 2.4615, "step": 160 }, { "epoch": 0.012667660208643815, "grad_norm": 1.4133532047271729, "learning_rate": 0.0002, "loss": 2.5564, "step": 170 }, { "epoch": 0.013412816691505217, "grad_norm": 1.3202648162841797, "learning_rate": 0.0002, "loss": 2.4586, "step": 180 }, { "epoch": 0.014157973174366617, "grad_norm": 1.2608141899108887, "learning_rate": 0.0002, "loss": 2.4948, "step": 190 }, { "epoch": 0.014903129657228018, "grad_norm": 1.3800815343856812, "learning_rate": 0.0002, "loss": 2.4191, "step": 200 }, { "epoch": 0.01564828614008942, "grad_norm": 1.4190888404846191, "learning_rate": 0.0002, "loss": 2.576, "step": 210 }, { "epoch": 0.01639344262295082, "grad_norm": 1.36716628074646, "learning_rate": 0.0002, "loss": 2.4893, "step": 220 }, { "epoch": 0.01713859910581222, "grad_norm": 1.4020277261734009, "learning_rate": 0.0002, "loss": 2.4958, "step": 230 }, { "epoch": 0.01788375558867362, "grad_norm": 1.4159200191497803, "learning_rate": 0.0002, "loss": 2.2937, "step": 240 }, { "epoch": 0.018628912071535022, "grad_norm": 1.6275138854980469, "learning_rate": 0.0002, "loss": 2.3386, "step": 250 }, { "epoch": 0.019374068554396422, "grad_norm": 1.3647948503494263, "learning_rate": 0.0002, "loss": 2.5081, "step": 260 }, { "epoch": 0.020119225037257823, "grad_norm": 1.237382173538208, "learning_rate": 0.0002, "loss": 2.4055, "step": 270 }, { "epoch": 0.020864381520119227, "grad_norm": 1.3185752630233765, "learning_rate": 0.0002, "loss": 2.5449, "step": 280 }, { "epoch": 0.021609538002980627, "grad_norm": 1.454095721244812, "learning_rate": 0.0002, "loss": 2.5499, "step": 290 }, { "epoch": 0.022354694485842028, "grad_norm": 1.4591819047927856, "learning_rate": 0.0002, "loss": 2.4029, "step": 300 }, { "epoch": 0.023099850968703428, "grad_norm": 1.3012772798538208, "learning_rate": 0.0002, "loss": 2.3855, "step": 310 }, { "epoch": 0.02384500745156483, "grad_norm": 1.326201319694519, "learning_rate": 0.0002, "loss": 2.6307, "step": 320 }, { "epoch": 0.02459016393442623, "grad_norm": 1.3953168392181396, "learning_rate": 0.0002, "loss": 2.6101, "step": 330 }, { "epoch": 0.02533532041728763, "grad_norm": 1.5380605459213257, "learning_rate": 0.0002, "loss": 2.4536, "step": 340 }, { "epoch": 0.02608047690014903, "grad_norm": 1.5608468055725098, "learning_rate": 0.0002, "loss": 2.1291, "step": 350 }, { "epoch": 0.026825633383010434, "grad_norm": 1.316112756729126, "learning_rate": 0.0002, "loss": 2.5011, "step": 360 }, { "epoch": 0.027570789865871834, "grad_norm": 1.4802881479263306, "learning_rate": 0.0002, "loss": 2.4637, "step": 370 }, { "epoch": 0.028315946348733235, "grad_norm": 1.213468074798584, "learning_rate": 0.0002, "loss": 2.5339, "step": 380 }, { "epoch": 0.029061102831594635, "grad_norm": 1.3106322288513184, "learning_rate": 0.0002, "loss": 2.5736, "step": 390 }, { "epoch": 0.029806259314456036, "grad_norm": 1.7102941274642944, "learning_rate": 0.0002, "loss": 2.4255, "step": 400 }, { "epoch": 0.030551415797317436, "grad_norm": 2.0210812091827393, "learning_rate": 0.0002, "loss": 2.4833, "step": 410 }, { "epoch": 0.03129657228017884, "grad_norm": 1.38539457321167, "learning_rate": 0.0002, "loss": 2.3001, "step": 420 }, { "epoch": 0.03204172876304024, "grad_norm": 1.439780592918396, "learning_rate": 0.0002, "loss": 2.5553, "step": 430 }, { "epoch": 0.03278688524590164, "grad_norm": 1.488784670829773, "learning_rate": 0.0002, "loss": 2.3695, "step": 440 }, { "epoch": 0.03353204172876304, "grad_norm": 1.4806923866271973, "learning_rate": 0.0002, "loss": 2.4231, "step": 450 }, { "epoch": 0.03427719821162444, "grad_norm": 1.3404014110565186, "learning_rate": 0.0002, "loss": 2.5017, "step": 460 }, { "epoch": 0.03502235469448584, "grad_norm": 1.6606372594833374, "learning_rate": 0.0002, "loss": 2.4502, "step": 470 }, { "epoch": 0.03576751117734724, "grad_norm": 1.6447430849075317, "learning_rate": 0.0002, "loss": 2.2351, "step": 480 }, { "epoch": 0.03651266766020864, "grad_norm": 1.2761911153793335, "learning_rate": 0.0002, "loss": 2.3787, "step": 490 }, { "epoch": 0.037257824143070044, "grad_norm": 1.4461835622787476, "learning_rate": 0.0002, "loss": 2.5047, "step": 500 }, { "epoch": 0.038002980625931444, "grad_norm": 1.9162489175796509, "learning_rate": 0.0002, "loss": 2.3294, "step": 510 }, { "epoch": 0.038748137108792845, "grad_norm": 1.6676952838897705, "learning_rate": 0.0002, "loss": 2.4176, "step": 520 }, { "epoch": 0.039493293591654245, "grad_norm": 1.7947252988815308, "learning_rate": 0.0002, "loss": 2.3651, "step": 530 }, { "epoch": 0.040238450074515646, "grad_norm": 1.4677958488464355, "learning_rate": 0.0002, "loss": 2.6263, "step": 540 }, { "epoch": 0.040983606557377046, "grad_norm": 1.4735350608825684, "learning_rate": 0.0002, "loss": 2.4261, "step": 550 }, { "epoch": 0.041728763040238454, "grad_norm": 1.4515550136566162, "learning_rate": 0.0002, "loss": 2.4795, "step": 560 }, { "epoch": 0.042473919523099854, "grad_norm": 1.6137213706970215, "learning_rate": 0.0002, "loss": 2.5102, "step": 570 }, { "epoch": 0.043219076005961254, "grad_norm": 1.6257590055465698, "learning_rate": 0.0002, "loss": 2.4218, "step": 580 }, { "epoch": 0.043964232488822655, "grad_norm": 1.515711784362793, "learning_rate": 0.0002, "loss": 2.3959, "step": 590 }, { "epoch": 0.044709388971684055, "grad_norm": 1.4846817255020142, "learning_rate": 0.0002, "loss": 2.4579, "step": 600 }, { "epoch": 0.045454545454545456, "grad_norm": 1.7104601860046387, "learning_rate": 0.0002, "loss": 2.4271, "step": 610 }, { "epoch": 0.046199701937406856, "grad_norm": 1.4951586723327637, "learning_rate": 0.0002, "loss": 2.4518, "step": 620 }, { "epoch": 0.04694485842026826, "grad_norm": 1.4925544261932373, "learning_rate": 0.0002, "loss": 2.4987, "step": 630 }, { "epoch": 0.04769001490312966, "grad_norm": 1.4516690969467163, "learning_rate": 0.0002, "loss": 2.4968, "step": 640 }, { "epoch": 0.04843517138599106, "grad_norm": 1.6420326232910156, "learning_rate": 0.0002, "loss": 2.4214, "step": 650 }, { "epoch": 0.04918032786885246, "grad_norm": 1.6954593658447266, "learning_rate": 0.0002, "loss": 2.4363, "step": 660 }, { "epoch": 0.04992548435171386, "grad_norm": 1.6805527210235596, "learning_rate": 0.0002, "loss": 2.2638, "step": 670 }, { "epoch": 0.05067064083457526, "grad_norm": 1.5352181196212769, "learning_rate": 0.0002, "loss": 2.3839, "step": 680 }, { "epoch": 0.05141579731743666, "grad_norm": 1.696765661239624, "learning_rate": 0.0002, "loss": 2.4535, "step": 690 }, { "epoch": 0.05216095380029806, "grad_norm": 1.4545871019363403, "learning_rate": 0.0002, "loss": 2.3242, "step": 700 }, { "epoch": 0.05290611028315946, "grad_norm": 1.4923096895217896, "learning_rate": 0.0002, "loss": 2.4713, "step": 710 }, { "epoch": 0.05365126676602087, "grad_norm": 1.483655571937561, "learning_rate": 0.0002, "loss": 2.4, "step": 720 }, { "epoch": 0.05439642324888227, "grad_norm": 1.5965553522109985, "learning_rate": 0.0002, "loss": 2.5278, "step": 730 }, { "epoch": 0.05514157973174367, "grad_norm": 1.788888931274414, "learning_rate": 0.0002, "loss": 2.5566, "step": 740 }, { "epoch": 0.05588673621460507, "grad_norm": 1.612257957458496, "learning_rate": 0.0002, "loss": 2.4796, "step": 750 }, { "epoch": 0.05663189269746647, "grad_norm": 1.4303230047225952, "learning_rate": 0.0002, "loss": 2.4086, "step": 760 }, { "epoch": 0.05737704918032787, "grad_norm": 1.534578800201416, "learning_rate": 0.0002, "loss": 2.6292, "step": 770 }, { "epoch": 0.05812220566318927, "grad_norm": 1.4243452548980713, "learning_rate": 0.0002, "loss": 2.5726, "step": 780 }, { "epoch": 0.05886736214605067, "grad_norm": 1.3264392614364624, "learning_rate": 0.0002, "loss": 2.552, "step": 790 }, { "epoch": 0.05961251862891207, "grad_norm": 1.251664400100708, "learning_rate": 0.0002, "loss": 2.2833, "step": 800 }, { "epoch": 0.06035767511177347, "grad_norm": 1.5511835813522339, "learning_rate": 0.0002, "loss": 2.5349, "step": 810 }, { "epoch": 0.06110283159463487, "grad_norm": 1.3495992422103882, "learning_rate": 0.0002, "loss": 2.2091, "step": 820 }, { "epoch": 0.06184798807749627, "grad_norm": 1.343342661857605, "learning_rate": 0.0002, "loss": 2.6122, "step": 830 }, { "epoch": 0.06259314456035768, "grad_norm": 1.568200707435608, "learning_rate": 0.0002, "loss": 2.3481, "step": 840 }, { "epoch": 0.06333830104321908, "grad_norm": 1.6847620010375977, "learning_rate": 0.0002, "loss": 2.4552, "step": 850 }, { "epoch": 0.06408345752608048, "grad_norm": 2.0226731300354004, "learning_rate": 0.0002, "loss": 2.4509, "step": 860 }, { "epoch": 0.06482861400894188, "grad_norm": 1.8059664964675903, "learning_rate": 0.0002, "loss": 2.4808, "step": 870 }, { "epoch": 0.06557377049180328, "grad_norm": 1.5953114032745361, "learning_rate": 0.0002, "loss": 2.5304, "step": 880 }, { "epoch": 0.06631892697466468, "grad_norm": 1.7731475830078125, "learning_rate": 0.0002, "loss": 2.258, "step": 890 }, { "epoch": 0.06706408345752608, "grad_norm": 1.8624380826950073, "learning_rate": 0.0002, "loss": 2.3362, "step": 900 }, { "epoch": 0.06780923994038748, "grad_norm": 1.6222556829452515, "learning_rate": 0.0002, "loss": 2.4066, "step": 910 }, { "epoch": 0.06855439642324888, "grad_norm": 1.7085922956466675, "learning_rate": 0.0002, "loss": 2.5117, "step": 920 }, { "epoch": 0.06929955290611028, "grad_norm": 1.6273415088653564, "learning_rate": 0.0002, "loss": 2.5034, "step": 930 }, { "epoch": 0.07004470938897168, "grad_norm": 1.5758957862854004, "learning_rate": 0.0002, "loss": 2.4744, "step": 940 }, { "epoch": 0.07078986587183309, "grad_norm": 1.7586950063705444, "learning_rate": 0.0002, "loss": 2.6029, "step": 950 }, { "epoch": 0.07153502235469449, "grad_norm": 1.7354320287704468, "learning_rate": 0.0002, "loss": 2.4572, "step": 960 }, { "epoch": 0.07228017883755589, "grad_norm": 1.440529704093933, "learning_rate": 0.0002, "loss": 2.4439, "step": 970 }, { "epoch": 0.07302533532041729, "grad_norm": 1.4725509881973267, "learning_rate": 0.0002, "loss": 2.4279, "step": 980 }, { "epoch": 0.07377049180327869, "grad_norm": 1.6604877710342407, "learning_rate": 0.0002, "loss": 2.4856, "step": 990 }, { "epoch": 0.07451564828614009, "grad_norm": 1.5507324934005737, "learning_rate": 0.0002, "loss": 2.5235, "step": 1000 }, { "epoch": 0.07526080476900149, "grad_norm": 1.6070083379745483, "learning_rate": 0.0002, "loss": 2.3157, "step": 1010 }, { "epoch": 0.07600596125186289, "grad_norm": 1.5939691066741943, "learning_rate": 0.0002, "loss": 2.5106, "step": 1020 }, { "epoch": 0.07675111773472429, "grad_norm": 1.68025803565979, "learning_rate": 0.0002, "loss": 2.4918, "step": 1030 }, { "epoch": 0.07749627421758569, "grad_norm": 1.6039890050888062, "learning_rate": 0.0002, "loss": 2.5583, "step": 1040 }, { "epoch": 0.07824143070044709, "grad_norm": 1.8732694387435913, "learning_rate": 0.0002, "loss": 2.2141, "step": 1050 }, { "epoch": 0.07898658718330849, "grad_norm": 1.4604535102844238, "learning_rate": 0.0002, "loss": 2.5011, "step": 1060 }, { "epoch": 0.07973174366616989, "grad_norm": 1.5717531442642212, "learning_rate": 0.0002, "loss": 2.6481, "step": 1070 }, { "epoch": 0.08047690014903129, "grad_norm": 1.6097593307495117, "learning_rate": 0.0002, "loss": 2.6329, "step": 1080 }, { "epoch": 0.08122205663189269, "grad_norm": 1.6716941595077515, "learning_rate": 0.0002, "loss": 2.2852, "step": 1090 }, { "epoch": 0.08196721311475409, "grad_norm": 1.4320539236068726, "learning_rate": 0.0002, "loss": 2.5, "step": 1100 }, { "epoch": 0.08271236959761549, "grad_norm": 1.76832914352417, "learning_rate": 0.0002, "loss": 2.4243, "step": 1110 }, { "epoch": 0.08345752608047691, "grad_norm": 1.5858458280563354, "learning_rate": 0.0002, "loss": 2.4635, "step": 1120 }, { "epoch": 0.08420268256333831, "grad_norm": 1.627699851989746, "learning_rate": 0.0002, "loss": 2.429, "step": 1130 }, { "epoch": 0.08494783904619971, "grad_norm": 1.723833441734314, "learning_rate": 0.0002, "loss": 2.5082, "step": 1140 }, { "epoch": 0.08569299552906111, "grad_norm": 1.6531169414520264, "learning_rate": 0.0002, "loss": 2.3722, "step": 1150 }, { "epoch": 0.08643815201192251, "grad_norm": 1.6412060260772705, "learning_rate": 0.0002, "loss": 2.4493, "step": 1160 }, { "epoch": 0.08718330849478391, "grad_norm": 1.6279152631759644, "learning_rate": 0.0002, "loss": 2.6463, "step": 1170 }, { "epoch": 0.08792846497764531, "grad_norm": 1.9763994216918945, "learning_rate": 0.0002, "loss": 2.5753, "step": 1180 }, { "epoch": 0.08867362146050671, "grad_norm": 1.5985665321350098, "learning_rate": 0.0002, "loss": 2.6349, "step": 1190 }, { "epoch": 0.08941877794336811, "grad_norm": 1.683590054512024, "learning_rate": 0.0002, "loss": 2.4348, "step": 1200 }, { "epoch": 0.09016393442622951, "grad_norm": 1.55161452293396, "learning_rate": 0.0002, "loss": 2.2263, "step": 1210 }, { "epoch": 0.09090909090909091, "grad_norm": 1.7723956108093262, "learning_rate": 0.0002, "loss": 2.5187, "step": 1220 }, { "epoch": 0.09165424739195231, "grad_norm": 2.007422924041748, "learning_rate": 0.0002, "loss": 2.2847, "step": 1230 }, { "epoch": 0.09239940387481371, "grad_norm": 1.6839237213134766, "learning_rate": 0.0002, "loss": 2.395, "step": 1240 }, { "epoch": 0.09314456035767511, "grad_norm": 1.8041167259216309, "learning_rate": 0.0002, "loss": 2.4219, "step": 1250 }, { "epoch": 0.09388971684053651, "grad_norm": 1.3533605337142944, "learning_rate": 0.0002, "loss": 2.1161, "step": 1260 }, { "epoch": 0.09463487332339791, "grad_norm": 1.5705360174179077, "learning_rate": 0.0002, "loss": 2.4224, "step": 1270 }, { "epoch": 0.09538002980625931, "grad_norm": 1.3010971546173096, "learning_rate": 0.0002, "loss": 2.7023, "step": 1280 }, { "epoch": 0.09612518628912071, "grad_norm": 1.61898934841156, "learning_rate": 0.0002, "loss": 2.4894, "step": 1290 }, { "epoch": 0.09687034277198212, "grad_norm": 1.570351243019104, "learning_rate": 0.0002, "loss": 2.6117, "step": 1300 }, { "epoch": 0.09761549925484352, "grad_norm": 1.5680445432662964, "learning_rate": 0.0002, "loss": 2.4442, "step": 1310 }, { "epoch": 0.09836065573770492, "grad_norm": 1.6972705125808716, "learning_rate": 0.0002, "loss": 2.4776, "step": 1320 }, { "epoch": 0.09910581222056632, "grad_norm": 1.4837958812713623, "learning_rate": 0.0002, "loss": 2.4469, "step": 1330 }, { "epoch": 0.09985096870342772, "grad_norm": 1.241639494895935, "learning_rate": 0.0002, "loss": 2.2837, "step": 1340 }, { "epoch": 0.10059612518628912, "grad_norm": 1.6141420602798462, "learning_rate": 0.0002, "loss": 2.4861, "step": 1350 }, { "epoch": 0.10134128166915052, "grad_norm": 1.3432039022445679, "learning_rate": 0.0002, "loss": 2.2677, "step": 1360 }, { "epoch": 0.10208643815201192, "grad_norm": 1.4515589475631714, "learning_rate": 0.0002, "loss": 2.4508, "step": 1370 }, { "epoch": 0.10283159463487332, "grad_norm": 1.6723533868789673, "learning_rate": 0.0002, "loss": 2.4985, "step": 1380 }, { "epoch": 0.10357675111773472, "grad_norm": 1.6158736944198608, "learning_rate": 0.0002, "loss": 2.5645, "step": 1390 }, { "epoch": 0.10432190760059612, "grad_norm": 1.7271533012390137, "learning_rate": 0.0002, "loss": 2.367, "step": 1400 }, { "epoch": 0.10506706408345752, "grad_norm": 1.1721924543380737, "learning_rate": 0.0002, "loss": 2.3531, "step": 1410 }, { "epoch": 0.10581222056631892, "grad_norm": 1.509311318397522, "learning_rate": 0.0002, "loss": 2.3562, "step": 1420 }, { "epoch": 0.10655737704918032, "grad_norm": 1.8995124101638794, "learning_rate": 0.0002, "loss": 2.325, "step": 1430 }, { "epoch": 0.10730253353204174, "grad_norm": 1.5647929906845093, "learning_rate": 0.0002, "loss": 2.306, "step": 1440 }, { "epoch": 0.10804769001490314, "grad_norm": 1.8889915943145752, "learning_rate": 0.0002, "loss": 2.3319, "step": 1450 }, { "epoch": 0.10879284649776454, "grad_norm": 1.7834174633026123, "learning_rate": 0.0002, "loss": 2.4158, "step": 1460 }, { "epoch": 0.10953800298062594, "grad_norm": 1.7446699142456055, "learning_rate": 0.0002, "loss": 2.5858, "step": 1470 }, { "epoch": 0.11028315946348734, "grad_norm": 1.6821390390396118, "learning_rate": 0.0002, "loss": 2.6529, "step": 1480 }, { "epoch": 0.11102831594634874, "grad_norm": 1.7135263681411743, "learning_rate": 0.0002, "loss": 2.4174, "step": 1490 }, { "epoch": 0.11177347242921014, "grad_norm": 1.6189689636230469, "learning_rate": 0.0002, "loss": 2.3491, "step": 1500 }, { "epoch": 0.11251862891207154, "grad_norm": 1.9968479871749878, "learning_rate": 0.0002, "loss": 2.3949, "step": 1510 }, { "epoch": 0.11326378539493294, "grad_norm": 1.7658995389938354, "learning_rate": 0.0002, "loss": 2.3559, "step": 1520 }, { "epoch": 0.11400894187779434, "grad_norm": 1.5398634672164917, "learning_rate": 0.0002, "loss": 2.3183, "step": 1530 }, { "epoch": 0.11475409836065574, "grad_norm": 1.558796763420105, "learning_rate": 0.0002, "loss": 2.5318, "step": 1540 }, { "epoch": 0.11549925484351714, "grad_norm": 1.7153369188308716, "learning_rate": 0.0002, "loss": 2.42, "step": 1550 }, { "epoch": 0.11624441132637854, "grad_norm": 1.8950843811035156, "learning_rate": 0.0002, "loss": 2.4338, "step": 1560 }, { "epoch": 0.11698956780923994, "grad_norm": 1.9553101062774658, "learning_rate": 0.0002, "loss": 2.3172, "step": 1570 }, { "epoch": 0.11773472429210134, "grad_norm": 2.0377368927001953, "learning_rate": 0.0002, "loss": 2.5465, "step": 1580 }, { "epoch": 0.11847988077496274, "grad_norm": 1.596413254737854, "learning_rate": 0.0002, "loss": 2.3895, "step": 1590 }, { "epoch": 0.11922503725782414, "grad_norm": 1.9717952013015747, "learning_rate": 0.0002, "loss": 2.33, "step": 1600 }, { "epoch": 0.11997019374068554, "grad_norm": 1.597959041595459, "learning_rate": 0.0002, "loss": 2.4298, "step": 1610 }, { "epoch": 0.12071535022354694, "grad_norm": 1.7834532260894775, "learning_rate": 0.0002, "loss": 2.4906, "step": 1620 }, { "epoch": 0.12146050670640834, "grad_norm": 1.7709592580795288, "learning_rate": 0.0002, "loss": 2.4961, "step": 1630 }, { "epoch": 0.12220566318926974, "grad_norm": 1.448915719985962, "learning_rate": 0.0002, "loss": 2.2579, "step": 1640 }, { "epoch": 0.12295081967213115, "grad_norm": 1.9186158180236816, "learning_rate": 0.0002, "loss": 2.6403, "step": 1650 }, { "epoch": 0.12369597615499255, "grad_norm": 1.7312026023864746, "learning_rate": 0.0002, "loss": 2.3614, "step": 1660 }, { "epoch": 0.12444113263785395, "grad_norm": 1.6252959966659546, "learning_rate": 0.0002, "loss": 2.1939, "step": 1670 }, { "epoch": 0.12518628912071536, "grad_norm": 1.485950231552124, "learning_rate": 0.0002, "loss": 2.3911, "step": 1680 }, { "epoch": 0.12593144560357675, "grad_norm": 2.1096255779266357, "learning_rate": 0.0002, "loss": 2.3905, "step": 1690 }, { "epoch": 0.12667660208643816, "grad_norm": 1.9784533977508545, "learning_rate": 0.0002, "loss": 2.5378, "step": 1700 }, { "epoch": 0.12742175856929955, "grad_norm": 1.6286430358886719, "learning_rate": 0.0002, "loss": 2.3893, "step": 1710 }, { "epoch": 0.12816691505216096, "grad_norm": 1.6326884031295776, "learning_rate": 0.0002, "loss": 2.2981, "step": 1720 }, { "epoch": 0.12891207153502235, "grad_norm": 1.8827307224273682, "learning_rate": 0.0002, "loss": 2.386, "step": 1730 }, { "epoch": 0.12965722801788376, "grad_norm": 1.5798028707504272, "learning_rate": 0.0002, "loss": 2.4409, "step": 1740 }, { "epoch": 0.13040238450074515, "grad_norm": 1.701941728591919, "learning_rate": 0.0002, "loss": 2.621, "step": 1750 }, { "epoch": 0.13114754098360656, "grad_norm": 2.052530527114868, "learning_rate": 0.0002, "loss": 2.5072, "step": 1760 }, { "epoch": 0.13189269746646795, "grad_norm": 1.7951122522354126, "learning_rate": 0.0002, "loss": 2.6495, "step": 1770 }, { "epoch": 0.13263785394932937, "grad_norm": 1.713895320892334, "learning_rate": 0.0002, "loss": 2.4414, "step": 1780 }, { "epoch": 0.13338301043219075, "grad_norm": 1.7314261198043823, "learning_rate": 0.0002, "loss": 2.5448, "step": 1790 }, { "epoch": 0.13412816691505217, "grad_norm": 2.169917583465576, "learning_rate": 0.0002, "loss": 2.3403, "step": 1800 }, { "epoch": 0.13487332339791355, "grad_norm": 2.260791063308716, "learning_rate": 0.0002, "loss": 2.3851, "step": 1810 }, { "epoch": 0.13561847988077497, "grad_norm": 1.5603179931640625, "learning_rate": 0.0002, "loss": 2.385, "step": 1820 }, { "epoch": 0.13636363636363635, "grad_norm": 1.854830026626587, "learning_rate": 0.0002, "loss": 2.5961, "step": 1830 }, { "epoch": 0.13710879284649777, "grad_norm": 2.0177578926086426, "learning_rate": 0.0002, "loss": 2.5359, "step": 1840 }, { "epoch": 0.13785394932935915, "grad_norm": 1.6412891149520874, "learning_rate": 0.0002, "loss": 2.5614, "step": 1850 }, { "epoch": 0.13859910581222057, "grad_norm": 1.498461127281189, "learning_rate": 0.0002, "loss": 2.4013, "step": 1860 }, { "epoch": 0.13934426229508196, "grad_norm": 1.7153528928756714, "learning_rate": 0.0002, "loss": 2.4077, "step": 1870 }, { "epoch": 0.14008941877794337, "grad_norm": 1.8465684652328491, "learning_rate": 0.0002, "loss": 2.4389, "step": 1880 }, { "epoch": 0.14083457526080476, "grad_norm": 1.687004566192627, "learning_rate": 0.0002, "loss": 2.629, "step": 1890 }, { "epoch": 0.14157973174366617, "grad_norm": 2.4556238651275635, "learning_rate": 0.0002, "loss": 2.4681, "step": 1900 }, { "epoch": 0.14232488822652756, "grad_norm": 1.6830357313156128, "learning_rate": 0.0002, "loss": 2.5962, "step": 1910 }, { "epoch": 0.14307004470938897, "grad_norm": 1.5959599018096924, "learning_rate": 0.0002, "loss": 2.3358, "step": 1920 }, { "epoch": 0.14381520119225039, "grad_norm": 1.9852540493011475, "learning_rate": 0.0002, "loss": 2.4023, "step": 1930 }, { "epoch": 0.14456035767511177, "grad_norm": 1.334253191947937, "learning_rate": 0.0002, "loss": 2.3208, "step": 1940 }, { "epoch": 0.1453055141579732, "grad_norm": 1.7445403337478638, "learning_rate": 0.0002, "loss": 2.7472, "step": 1950 }, { "epoch": 0.14605067064083457, "grad_norm": 2.1274497509002686, "learning_rate": 0.0002, "loss": 2.4091, "step": 1960 }, { "epoch": 0.146795827123696, "grad_norm": 1.9186792373657227, "learning_rate": 0.0002, "loss": 2.5525, "step": 1970 }, { "epoch": 0.14754098360655737, "grad_norm": 1.7423287630081177, "learning_rate": 0.0002, "loss": 2.4312, "step": 1980 }, { "epoch": 0.1482861400894188, "grad_norm": 1.6370166540145874, "learning_rate": 0.0002, "loss": 2.6371, "step": 1990 }, { "epoch": 0.14903129657228018, "grad_norm": 1.812752366065979, "learning_rate": 0.0002, "loss": 2.4224, "step": 2000 }, { "epoch": 0.1497764530551416, "grad_norm": 1.6510322093963623, "learning_rate": 0.0002, "loss": 2.4596, "step": 2010 }, { "epoch": 0.15052160953800298, "grad_norm": 1.7658458948135376, "learning_rate": 0.0002, "loss": 2.4772, "step": 2020 }, { "epoch": 0.1512667660208644, "grad_norm": 1.8121706247329712, "learning_rate": 0.0002, "loss": 2.3523, "step": 2030 }, { "epoch": 0.15201192250372578, "grad_norm": 1.7872707843780518, "learning_rate": 0.0002, "loss": 2.4009, "step": 2040 }, { "epoch": 0.1527570789865872, "grad_norm": 1.8161529302597046, "learning_rate": 0.0002, "loss": 2.2658, "step": 2050 }, { "epoch": 0.15350223546944858, "grad_norm": 3.1155641078948975, "learning_rate": 0.0002, "loss": 2.3572, "step": 2060 }, { "epoch": 0.15424739195231, "grad_norm": 3.371666193008423, "learning_rate": 0.0002, "loss": 2.5711, "step": 2070 }, { "epoch": 0.15499254843517138, "grad_norm": 2.1471800804138184, "learning_rate": 0.0002, "loss": 2.4525, "step": 2080 }, { "epoch": 0.1557377049180328, "grad_norm": 1.849825382232666, "learning_rate": 0.0002, "loss": 2.472, "step": 2090 }, { "epoch": 0.15648286140089418, "grad_norm": 1.9633269309997559, "learning_rate": 0.0002, "loss": 2.5328, "step": 2100 }, { "epoch": 0.1572280178837556, "grad_norm": 2.1593234539031982, "learning_rate": 0.0002, "loss": 2.5724, "step": 2110 }, { "epoch": 0.15797317436661698, "grad_norm": 1.8166416883468628, "learning_rate": 0.0002, "loss": 2.5599, "step": 2120 }, { "epoch": 0.1587183308494784, "grad_norm": 2.0000736713409424, "learning_rate": 0.0002, "loss": 2.5174, "step": 2130 }, { "epoch": 0.15946348733233978, "grad_norm": 2.016064167022705, "learning_rate": 0.0002, "loss": 2.4799, "step": 2140 }, { "epoch": 0.1602086438152012, "grad_norm": 2.289914846420288, "learning_rate": 0.0002, "loss": 2.4579, "step": 2150 }, { "epoch": 0.16095380029806258, "grad_norm": 2.0967648029327393, "learning_rate": 0.0002, "loss": 2.3974, "step": 2160 }, { "epoch": 0.161698956780924, "grad_norm": 1.9680815935134888, "learning_rate": 0.0002, "loss": 2.5335, "step": 2170 }, { "epoch": 0.16244411326378538, "grad_norm": 1.659155011177063, "learning_rate": 0.0002, "loss": 2.2785, "step": 2180 }, { "epoch": 0.1631892697466468, "grad_norm": 1.719580054283142, "learning_rate": 0.0002, "loss": 2.5193, "step": 2190 }, { "epoch": 0.16393442622950818, "grad_norm": 1.4163504838943481, "learning_rate": 0.0002, "loss": 2.4477, "step": 2200 }, { "epoch": 0.1646795827123696, "grad_norm": 1.4476326704025269, "learning_rate": 0.0002, "loss": 2.3973, "step": 2210 }, { "epoch": 0.16542473919523099, "grad_norm": 1.7087070941925049, "learning_rate": 0.0002, "loss": 2.5413, "step": 2220 }, { "epoch": 0.1661698956780924, "grad_norm": 1.813745379447937, "learning_rate": 0.0002, "loss": 2.3435, "step": 2230 }, { "epoch": 0.16691505216095381, "grad_norm": 2.2703053951263428, "learning_rate": 0.0002, "loss": 2.5846, "step": 2240 }, { "epoch": 0.1676602086438152, "grad_norm": 1.743831753730774, "learning_rate": 0.0002, "loss": 2.2399, "step": 2250 }, { "epoch": 0.16840536512667661, "grad_norm": 1.7628560066223145, "learning_rate": 0.0002, "loss": 2.5024, "step": 2260 }, { "epoch": 0.169150521609538, "grad_norm": 2.0905685424804688, "learning_rate": 0.0002, "loss": 2.3357, "step": 2270 }, { "epoch": 0.16989567809239942, "grad_norm": 1.7532408237457275, "learning_rate": 0.0002, "loss": 2.4824, "step": 2280 }, { "epoch": 0.1706408345752608, "grad_norm": 1.749121069908142, "learning_rate": 0.0002, "loss": 2.3893, "step": 2290 }, { "epoch": 0.17138599105812222, "grad_norm": 1.529807448387146, "learning_rate": 0.0002, "loss": 2.3581, "step": 2300 }, { "epoch": 0.1721311475409836, "grad_norm": 1.8684520721435547, "learning_rate": 0.0002, "loss": 2.3585, "step": 2310 }, { "epoch": 0.17287630402384502, "grad_norm": 1.7635807991027832, "learning_rate": 0.0002, "loss": 2.3862, "step": 2320 }, { "epoch": 0.1736214605067064, "grad_norm": 1.6727739572525024, "learning_rate": 0.0002, "loss": 2.5992, "step": 2330 }, { "epoch": 0.17436661698956782, "grad_norm": 1.7773075103759766, "learning_rate": 0.0002, "loss": 2.4468, "step": 2340 }, { "epoch": 0.1751117734724292, "grad_norm": 2.184798002243042, "learning_rate": 0.0002, "loss": 2.4697, "step": 2350 }, { "epoch": 0.17585692995529062, "grad_norm": 1.7383967638015747, "learning_rate": 0.0002, "loss": 2.4937, "step": 2360 }, { "epoch": 0.176602086438152, "grad_norm": 2.211831569671631, "learning_rate": 0.0002, "loss": 2.471, "step": 2370 }, { "epoch": 0.17734724292101342, "grad_norm": 1.5768284797668457, "learning_rate": 0.0002, "loss": 2.4485, "step": 2380 }, { "epoch": 0.1780923994038748, "grad_norm": 1.7147942781448364, "learning_rate": 0.0002, "loss": 2.3187, "step": 2390 }, { "epoch": 0.17883755588673622, "grad_norm": 1.5341167449951172, "learning_rate": 0.0002, "loss": 2.3842, "step": 2400 }, { "epoch": 0.1795827123695976, "grad_norm": 1.8212217092514038, "learning_rate": 0.0002, "loss": 2.2845, "step": 2410 }, { "epoch": 0.18032786885245902, "grad_norm": 1.5502907037734985, "learning_rate": 0.0002, "loss": 2.4956, "step": 2420 }, { "epoch": 0.1810730253353204, "grad_norm": 1.8575736284255981, "learning_rate": 0.0002, "loss": 2.4574, "step": 2430 }, { "epoch": 0.18181818181818182, "grad_norm": 1.594504475593567, "learning_rate": 0.0002, "loss": 2.4808, "step": 2440 }, { "epoch": 0.1825633383010432, "grad_norm": 1.864490032196045, "learning_rate": 0.0002, "loss": 2.6226, "step": 2450 }, { "epoch": 0.18330849478390462, "grad_norm": 1.9114779233932495, "learning_rate": 0.0002, "loss": 2.5032, "step": 2460 }, { "epoch": 0.184053651266766, "grad_norm": 2.119196653366089, "learning_rate": 0.0002, "loss": 2.4699, "step": 2470 }, { "epoch": 0.18479880774962743, "grad_norm": 1.8797602653503418, "learning_rate": 0.0002, "loss": 2.4705, "step": 2480 }, { "epoch": 0.1855439642324888, "grad_norm": 1.985633134841919, "learning_rate": 0.0002, "loss": 2.4216, "step": 2490 }, { "epoch": 0.18628912071535023, "grad_norm": 1.831678032875061, "learning_rate": 0.0002, "loss": 2.538, "step": 2500 }, { "epoch": 0.1870342771982116, "grad_norm": 1.7474476099014282, "learning_rate": 0.0002, "loss": 2.6348, "step": 2510 }, { "epoch": 0.18777943368107303, "grad_norm": 1.7333717346191406, "learning_rate": 0.0002, "loss": 2.4576, "step": 2520 }, { "epoch": 0.1885245901639344, "grad_norm": 2.0141854286193848, "learning_rate": 0.0002, "loss": 2.4277, "step": 2530 }, { "epoch": 0.18926974664679583, "grad_norm": 1.7154362201690674, "learning_rate": 0.0002, "loss": 2.428, "step": 2540 }, { "epoch": 0.19001490312965721, "grad_norm": 2.11297607421875, "learning_rate": 0.0002, "loss": 2.4868, "step": 2550 }, { "epoch": 0.19076005961251863, "grad_norm": 1.8067042827606201, "learning_rate": 0.0002, "loss": 2.4782, "step": 2560 }, { "epoch": 0.19150521609538004, "grad_norm": 2.3354551792144775, "learning_rate": 0.0002, "loss": 2.5139, "step": 2570 }, { "epoch": 0.19225037257824143, "grad_norm": 1.7424496412277222, "learning_rate": 0.0002, "loss": 2.4437, "step": 2580 }, { "epoch": 0.19299552906110284, "grad_norm": 2.1761672496795654, "learning_rate": 0.0002, "loss": 2.4239, "step": 2590 }, { "epoch": 0.19374068554396423, "grad_norm": 2.0857791900634766, "learning_rate": 0.0002, "loss": 2.2968, "step": 2600 }, { "epoch": 0.19448584202682564, "grad_norm": 2.1806142330169678, "learning_rate": 0.0002, "loss": 2.571, "step": 2610 }, { "epoch": 0.19523099850968703, "grad_norm": 1.7046921253204346, "learning_rate": 0.0002, "loss": 2.5633, "step": 2620 }, { "epoch": 0.19597615499254845, "grad_norm": 1.876073956489563, "learning_rate": 0.0002, "loss": 2.4484, "step": 2630 }, { "epoch": 0.19672131147540983, "grad_norm": 1.948696255683899, "learning_rate": 0.0002, "loss": 2.4158, "step": 2640 }, { "epoch": 0.19746646795827125, "grad_norm": 2.022000551223755, "learning_rate": 0.0002, "loss": 2.4718, "step": 2650 }, { "epoch": 0.19821162444113263, "grad_norm": 1.776353120803833, "learning_rate": 0.0002, "loss": 2.5832, "step": 2660 }, { "epoch": 0.19895678092399405, "grad_norm": 1.9837231636047363, "learning_rate": 0.0002, "loss": 2.5152, "step": 2670 }, { "epoch": 0.19970193740685543, "grad_norm": 1.8511583805084229, "learning_rate": 0.0002, "loss": 2.3081, "step": 2680 }, { "epoch": 0.20044709388971685, "grad_norm": 1.7415555715560913, "learning_rate": 0.0002, "loss": 2.3807, "step": 2690 }, { "epoch": 0.20119225037257824, "grad_norm": 1.9808768033981323, "learning_rate": 0.0002, "loss": 2.4238, "step": 2700 }, { "epoch": 0.20193740685543965, "grad_norm": 1.6676552295684814, "learning_rate": 0.0002, "loss": 2.4074, "step": 2710 }, { "epoch": 0.20268256333830104, "grad_norm": 2.046172857284546, "learning_rate": 0.0002, "loss": 2.5146, "step": 2720 }, { "epoch": 0.20342771982116245, "grad_norm": 2.0430774688720703, "learning_rate": 0.0002, "loss": 2.348, "step": 2730 }, { "epoch": 0.20417287630402384, "grad_norm": 1.978427767753601, "learning_rate": 0.0002, "loss": 2.4703, "step": 2740 }, { "epoch": 0.20491803278688525, "grad_norm": 2.2341721057891846, "learning_rate": 0.0002, "loss": 2.424, "step": 2750 }, { "epoch": 0.20566318926974664, "grad_norm": 1.874298095703125, "learning_rate": 0.0002, "loss": 2.5449, "step": 2760 }, { "epoch": 0.20640834575260805, "grad_norm": 2.0746755599975586, "learning_rate": 0.0002, "loss": 2.4735, "step": 2770 }, { "epoch": 0.20715350223546944, "grad_norm": 1.671237587928772, "learning_rate": 0.0002, "loss": 2.4185, "step": 2780 }, { "epoch": 0.20789865871833085, "grad_norm": 1.7436130046844482, "learning_rate": 0.0002, "loss": 2.5611, "step": 2790 }, { "epoch": 0.20864381520119224, "grad_norm": 1.991050124168396, "learning_rate": 0.0002, "loss": 2.5108, "step": 2800 }, { "epoch": 0.20938897168405365, "grad_norm": 1.893971562385559, "learning_rate": 0.0002, "loss": 2.4371, "step": 2810 }, { "epoch": 0.21013412816691504, "grad_norm": 2.048959970474243, "learning_rate": 0.0002, "loss": 2.4887, "step": 2820 }, { "epoch": 0.21087928464977646, "grad_norm": 1.4141104221343994, "learning_rate": 0.0002, "loss": 2.3666, "step": 2830 }, { "epoch": 0.21162444113263784, "grad_norm": 1.9136624336242676, "learning_rate": 0.0002, "loss": 2.5108, "step": 2840 }, { "epoch": 0.21236959761549926, "grad_norm": 1.8413383960723877, "learning_rate": 0.0002, "loss": 2.5956, "step": 2850 }, { "epoch": 0.21311475409836064, "grad_norm": 1.8322744369506836, "learning_rate": 0.0002, "loss": 2.2766, "step": 2860 }, { "epoch": 0.21385991058122206, "grad_norm": 2.0261011123657227, "learning_rate": 0.0002, "loss": 2.5494, "step": 2870 }, { "epoch": 0.21460506706408347, "grad_norm": 1.7044886350631714, "learning_rate": 0.0002, "loss": 2.453, "step": 2880 }, { "epoch": 0.21535022354694486, "grad_norm": 1.8733025789260864, "learning_rate": 0.0002, "loss": 2.4455, "step": 2890 }, { "epoch": 0.21609538002980627, "grad_norm": 1.7016903162002563, "learning_rate": 0.0002, "loss": 2.542, "step": 2900 }, { "epoch": 0.21684053651266766, "grad_norm": 1.8775333166122437, "learning_rate": 0.0002, "loss": 2.5536, "step": 2910 }, { "epoch": 0.21758569299552907, "grad_norm": 1.9875683784484863, "learning_rate": 0.0002, "loss": 2.365, "step": 2920 }, { "epoch": 0.21833084947839046, "grad_norm": 1.9816802740097046, "learning_rate": 0.0002, "loss": 2.5781, "step": 2930 }, { "epoch": 0.21907600596125187, "grad_norm": 2.2699484825134277, "learning_rate": 0.0002, "loss": 2.369, "step": 2940 }, { "epoch": 0.21982116244411326, "grad_norm": 1.795271635055542, "learning_rate": 0.0002, "loss": 2.5805, "step": 2950 }, { "epoch": 0.22056631892697467, "grad_norm": 2.1191015243530273, "learning_rate": 0.0002, "loss": 2.522, "step": 2960 }, { "epoch": 0.22131147540983606, "grad_norm": 1.8573635816574097, "learning_rate": 0.0002, "loss": 2.5885, "step": 2970 }, { "epoch": 0.22205663189269748, "grad_norm": 1.8743098974227905, "learning_rate": 0.0002, "loss": 2.4619, "step": 2980 }, { "epoch": 0.22280178837555886, "grad_norm": 2.030251979827881, "learning_rate": 0.0002, "loss": 2.4544, "step": 2990 }, { "epoch": 0.22354694485842028, "grad_norm": 1.7911770343780518, "learning_rate": 0.0002, "loss": 2.3729, "step": 3000 }, { "epoch": 0.22429210134128166, "grad_norm": 1.7852712869644165, "learning_rate": 0.0002, "loss": 2.411, "step": 3010 }, { "epoch": 0.22503725782414308, "grad_norm": 2.3478708267211914, "learning_rate": 0.0002, "loss": 2.4913, "step": 3020 }, { "epoch": 0.22578241430700446, "grad_norm": 1.9944161176681519, "learning_rate": 0.0002, "loss": 2.5399, "step": 3030 }, { "epoch": 0.22652757078986588, "grad_norm": 2.0674550533294678, "learning_rate": 0.0002, "loss": 2.4142, "step": 3040 }, { "epoch": 0.22727272727272727, "grad_norm": 1.7529555559158325, "learning_rate": 0.0002, "loss": 2.4124, "step": 3050 }, { "epoch": 0.22801788375558868, "grad_norm": 1.950682520866394, "learning_rate": 0.0002, "loss": 2.5125, "step": 3060 }, { "epoch": 0.22876304023845007, "grad_norm": 1.9079375267028809, "learning_rate": 0.0002, "loss": 2.2691, "step": 3070 }, { "epoch": 0.22950819672131148, "grad_norm": 2.3478922843933105, "learning_rate": 0.0002, "loss": 2.6288, "step": 3080 }, { "epoch": 0.23025335320417287, "grad_norm": 1.7229981422424316, "learning_rate": 0.0002, "loss": 2.4475, "step": 3090 }, { "epoch": 0.23099850968703428, "grad_norm": 1.970567226409912, "learning_rate": 0.0002, "loss": 2.6176, "step": 3100 }, { "epoch": 0.23174366616989567, "grad_norm": 2.1065728664398193, "learning_rate": 0.0002, "loss": 2.5292, "step": 3110 }, { "epoch": 0.23248882265275708, "grad_norm": 2.137432336807251, "learning_rate": 0.0002, "loss": 2.5971, "step": 3120 }, { "epoch": 0.23323397913561847, "grad_norm": 1.9328407049179077, "learning_rate": 0.0002, "loss": 2.4158, "step": 3130 }, { "epoch": 0.23397913561847988, "grad_norm": 1.981392741203308, "learning_rate": 0.0002, "loss": 2.524, "step": 3140 }, { "epoch": 0.23472429210134127, "grad_norm": 2.098461866378784, "learning_rate": 0.0002, "loss": 2.4319, "step": 3150 }, { "epoch": 0.23546944858420268, "grad_norm": 2.2548506259918213, "learning_rate": 0.0002, "loss": 2.608, "step": 3160 }, { "epoch": 0.23621460506706407, "grad_norm": 2.1737749576568604, "learning_rate": 0.0002, "loss": 2.4759, "step": 3170 }, { "epoch": 0.23695976154992549, "grad_norm": 2.0309979915618896, "learning_rate": 0.0002, "loss": 2.3443, "step": 3180 }, { "epoch": 0.23770491803278687, "grad_norm": 2.115638494491577, "learning_rate": 0.0002, "loss": 2.5721, "step": 3190 }, { "epoch": 0.23845007451564829, "grad_norm": 2.2513480186462402, "learning_rate": 0.0002, "loss": 2.4294, "step": 3200 }, { "epoch": 0.2391952309985097, "grad_norm": 1.984811544418335, "learning_rate": 0.0002, "loss": 2.5133, "step": 3210 }, { "epoch": 0.2399403874813711, "grad_norm": 1.9788024425506592, "learning_rate": 0.0002, "loss": 2.3574, "step": 3220 }, { "epoch": 0.2406855439642325, "grad_norm": 1.8824642896652222, "learning_rate": 0.0002, "loss": 2.3478, "step": 3230 }, { "epoch": 0.2414307004470939, "grad_norm": 1.9927945137023926, "learning_rate": 0.0002, "loss": 2.6732, "step": 3240 }, { "epoch": 0.2421758569299553, "grad_norm": 1.7224589586257935, "learning_rate": 0.0002, "loss": 2.5057, "step": 3250 }, { "epoch": 0.2429210134128167, "grad_norm": 1.787179708480835, "learning_rate": 0.0002, "loss": 2.4139, "step": 3260 }, { "epoch": 0.2436661698956781, "grad_norm": 1.9321664571762085, "learning_rate": 0.0002, "loss": 2.5715, "step": 3270 }, { "epoch": 0.2444113263785395, "grad_norm": 1.9817686080932617, "learning_rate": 0.0002, "loss": 2.5466, "step": 3280 }, { "epoch": 0.2451564828614009, "grad_norm": 1.8614225387573242, "learning_rate": 0.0002, "loss": 2.4936, "step": 3290 }, { "epoch": 0.2459016393442623, "grad_norm": 1.6355311870574951, "learning_rate": 0.0002, "loss": 2.3833, "step": 3300 }, { "epoch": 0.2466467958271237, "grad_norm": 1.9222790002822876, "learning_rate": 0.0002, "loss": 2.3884, "step": 3310 }, { "epoch": 0.2473919523099851, "grad_norm": 1.8250503540039062, "learning_rate": 0.0002, "loss": 2.4804, "step": 3320 }, { "epoch": 0.2481371087928465, "grad_norm": 1.9070334434509277, "learning_rate": 0.0002, "loss": 2.4982, "step": 3330 }, { "epoch": 0.2488822652757079, "grad_norm": 2.0816781520843506, "learning_rate": 0.0002, "loss": 2.188, "step": 3340 }, { "epoch": 0.2496274217585693, "grad_norm": 2.8250019550323486, "learning_rate": 0.0002, "loss": 2.1956, "step": 3350 }, { "epoch": 0.2503725782414307, "grad_norm": 2.0118353366851807, "learning_rate": 0.0002, "loss": 2.3953, "step": 3360 }, { "epoch": 0.2511177347242921, "grad_norm": 1.9895654916763306, "learning_rate": 0.0002, "loss": 2.4261, "step": 3370 }, { "epoch": 0.2518628912071535, "grad_norm": 1.6906330585479736, "learning_rate": 0.0002, "loss": 2.3543, "step": 3380 }, { "epoch": 0.2526080476900149, "grad_norm": 1.8363487720489502, "learning_rate": 0.0002, "loss": 2.6099, "step": 3390 }, { "epoch": 0.2533532041728763, "grad_norm": 1.8634984493255615, "learning_rate": 0.0002, "loss": 2.4341, "step": 3400 }, { "epoch": 0.2540983606557377, "grad_norm": 1.7584588527679443, "learning_rate": 0.0002, "loss": 2.3242, "step": 3410 }, { "epoch": 0.2548435171385991, "grad_norm": 1.542602300643921, "learning_rate": 0.0002, "loss": 2.5926, "step": 3420 }, { "epoch": 0.2555886736214605, "grad_norm": 1.7898099422454834, "learning_rate": 0.0002, "loss": 2.5387, "step": 3430 }, { "epoch": 0.2563338301043219, "grad_norm": 1.9139480590820312, "learning_rate": 0.0002, "loss": 2.1946, "step": 3440 }, { "epoch": 0.2570789865871833, "grad_norm": 1.7210464477539062, "learning_rate": 0.0002, "loss": 2.6303, "step": 3450 }, { "epoch": 0.2578241430700447, "grad_norm": 2.010288953781128, "learning_rate": 0.0002, "loss": 2.6645, "step": 3460 }, { "epoch": 0.2585692995529061, "grad_norm": 2.2922821044921875, "learning_rate": 0.0002, "loss": 2.353, "step": 3470 }, { "epoch": 0.2593144560357675, "grad_norm": 1.7492296695709229, "learning_rate": 0.0002, "loss": 2.5581, "step": 3480 }, { "epoch": 0.2600596125186289, "grad_norm": 1.845924735069275, "learning_rate": 0.0002, "loss": 2.4135, "step": 3490 }, { "epoch": 0.2608047690014903, "grad_norm": 1.926946759223938, "learning_rate": 0.0002, "loss": 2.5024, "step": 3500 }, { "epoch": 0.2615499254843517, "grad_norm": 2.1195075511932373, "learning_rate": 0.0002, "loss": 2.663, "step": 3510 }, { "epoch": 0.26229508196721313, "grad_norm": 1.7661279439926147, "learning_rate": 0.0002, "loss": 2.5196, "step": 3520 }, { "epoch": 0.2630402384500745, "grad_norm": 2.357307195663452, "learning_rate": 0.0002, "loss": 2.3874, "step": 3530 }, { "epoch": 0.2637853949329359, "grad_norm": 1.9379281997680664, "learning_rate": 0.0002, "loss": 2.5646, "step": 3540 }, { "epoch": 0.26453055141579734, "grad_norm": 1.89481782913208, "learning_rate": 0.0002, "loss": 2.5963, "step": 3550 }, { "epoch": 0.26527570789865873, "grad_norm": 1.9536447525024414, "learning_rate": 0.0002, "loss": 2.2617, "step": 3560 }, { "epoch": 0.2660208643815201, "grad_norm": 1.9169467687606812, "learning_rate": 0.0002, "loss": 2.4662, "step": 3570 }, { "epoch": 0.2667660208643815, "grad_norm": 2.1301636695861816, "learning_rate": 0.0002, "loss": 2.4761, "step": 3580 }, { "epoch": 0.26751117734724295, "grad_norm": 1.9134154319763184, "learning_rate": 0.0002, "loss": 2.5022, "step": 3590 }, { "epoch": 0.26825633383010433, "grad_norm": 2.04421067237854, "learning_rate": 0.0002, "loss": 2.3228, "step": 3600 }, { "epoch": 0.2690014903129657, "grad_norm": 1.7705532312393188, "learning_rate": 0.0002, "loss": 2.4066, "step": 3610 }, { "epoch": 0.2697466467958271, "grad_norm": 2.080298900604248, "learning_rate": 0.0002, "loss": 2.6129, "step": 3620 }, { "epoch": 0.27049180327868855, "grad_norm": 1.6139470338821411, "learning_rate": 0.0002, "loss": 2.5801, "step": 3630 }, { "epoch": 0.27123695976154993, "grad_norm": 2.054302453994751, "learning_rate": 0.0002, "loss": 2.3711, "step": 3640 }, { "epoch": 0.2719821162444113, "grad_norm": 2.338289260864258, "learning_rate": 0.0002, "loss": 2.5838, "step": 3650 }, { "epoch": 0.2727272727272727, "grad_norm": 1.7468085289001465, "learning_rate": 0.0002, "loss": 2.1876, "step": 3660 }, { "epoch": 0.27347242921013415, "grad_norm": 2.027275562286377, "learning_rate": 0.0002, "loss": 2.5105, "step": 3670 }, { "epoch": 0.27421758569299554, "grad_norm": 2.229505777359009, "learning_rate": 0.0002, "loss": 2.6577, "step": 3680 }, { "epoch": 0.2749627421758569, "grad_norm": 1.9320789575576782, "learning_rate": 0.0002, "loss": 2.2976, "step": 3690 }, { "epoch": 0.2757078986587183, "grad_norm": 2.186555862426758, "learning_rate": 0.0002, "loss": 2.3886, "step": 3700 }, { "epoch": 0.27645305514157975, "grad_norm": 1.918982982635498, "learning_rate": 0.0002, "loss": 2.5599, "step": 3710 }, { "epoch": 0.27719821162444114, "grad_norm": 1.8183304071426392, "learning_rate": 0.0002, "loss": 2.5157, "step": 3720 }, { "epoch": 0.2779433681073025, "grad_norm": 1.994059443473816, "learning_rate": 0.0002, "loss": 2.5395, "step": 3730 }, { "epoch": 0.2786885245901639, "grad_norm": 1.7807224988937378, "learning_rate": 0.0002, "loss": 2.3498, "step": 3740 }, { "epoch": 0.27943368107302535, "grad_norm": 2.1187775135040283, "learning_rate": 0.0002, "loss": 2.5581, "step": 3750 }, { "epoch": 0.28017883755588674, "grad_norm": 1.8966116905212402, "learning_rate": 0.0002, "loss": 2.5547, "step": 3760 }, { "epoch": 0.2809239940387481, "grad_norm": 2.044442653656006, "learning_rate": 0.0002, "loss": 2.5357, "step": 3770 }, { "epoch": 0.2816691505216095, "grad_norm": 1.8322268724441528, "learning_rate": 0.0002, "loss": 2.5751, "step": 3780 }, { "epoch": 0.28241430700447095, "grad_norm": 2.1471657752990723, "learning_rate": 0.0002, "loss": 2.5739, "step": 3790 }, { "epoch": 0.28315946348733234, "grad_norm": 1.54653000831604, "learning_rate": 0.0002, "loss": 2.4101, "step": 3800 }, { "epoch": 0.28390461997019373, "grad_norm": 2.0441126823425293, "learning_rate": 0.0002, "loss": 2.4344, "step": 3810 }, { "epoch": 0.2846497764530551, "grad_norm": 2.10286021232605, "learning_rate": 0.0002, "loss": 2.4168, "step": 3820 }, { "epoch": 0.28539493293591656, "grad_norm": 2.1255736351013184, "learning_rate": 0.0002, "loss": 2.4448, "step": 3830 }, { "epoch": 0.28614008941877794, "grad_norm": 1.830354928970337, "learning_rate": 0.0002, "loss": 2.3284, "step": 3840 }, { "epoch": 0.28688524590163933, "grad_norm": 1.9017317295074463, "learning_rate": 0.0002, "loss": 2.51, "step": 3850 }, { "epoch": 0.28763040238450077, "grad_norm": 2.0456435680389404, "learning_rate": 0.0002, "loss": 2.5099, "step": 3860 }, { "epoch": 0.28837555886736216, "grad_norm": 2.036862373352051, "learning_rate": 0.0002, "loss": 2.418, "step": 3870 }, { "epoch": 0.28912071535022354, "grad_norm": 1.9456264972686768, "learning_rate": 0.0002, "loss": 2.371, "step": 3880 }, { "epoch": 0.28986587183308493, "grad_norm": 2.3842172622680664, "learning_rate": 0.0002, "loss": 2.5213, "step": 3890 }, { "epoch": 0.2906110283159464, "grad_norm": 1.9152741432189941, "learning_rate": 0.0002, "loss": 2.5191, "step": 3900 }, { "epoch": 0.29135618479880776, "grad_norm": 1.9884922504425049, "learning_rate": 0.0002, "loss": 2.4272, "step": 3910 }, { "epoch": 0.29210134128166915, "grad_norm": 1.7578502893447876, "learning_rate": 0.0002, "loss": 2.4975, "step": 3920 }, { "epoch": 0.29284649776453053, "grad_norm": 2.5302071571350098, "learning_rate": 0.0002, "loss": 2.3564, "step": 3930 }, { "epoch": 0.293591654247392, "grad_norm": 2.2181880474090576, "learning_rate": 0.0002, "loss": 2.6365, "step": 3940 }, { "epoch": 0.29433681073025336, "grad_norm": 1.8359179496765137, "learning_rate": 0.0002, "loss": 2.4825, "step": 3950 }, { "epoch": 0.29508196721311475, "grad_norm": 1.8330084085464478, "learning_rate": 0.0002, "loss": 2.3746, "step": 3960 }, { "epoch": 0.29582712369597614, "grad_norm": 2.0700743198394775, "learning_rate": 0.0002, "loss": 2.4456, "step": 3970 }, { "epoch": 0.2965722801788376, "grad_norm": 2.0610342025756836, "learning_rate": 0.0002, "loss": 2.5419, "step": 3980 }, { "epoch": 0.29731743666169896, "grad_norm": 2.0633559226989746, "learning_rate": 0.0002, "loss": 2.4911, "step": 3990 }, { "epoch": 0.29806259314456035, "grad_norm": 1.894938588142395, "learning_rate": 0.0002, "loss": 2.5311, "step": 4000 }, { "epoch": 0.29880774962742174, "grad_norm": 2.153447389602661, "learning_rate": 0.0002, "loss": 2.5135, "step": 4010 }, { "epoch": 0.2995529061102832, "grad_norm": 1.8810901641845703, "learning_rate": 0.0002, "loss": 2.5123, "step": 4020 }, { "epoch": 0.30029806259314457, "grad_norm": 2.828382730484009, "learning_rate": 0.0002, "loss": 2.4624, "step": 4030 }, { "epoch": 0.30104321907600595, "grad_norm": 1.5955597162246704, "learning_rate": 0.0002, "loss": 2.4029, "step": 4040 }, { "epoch": 0.30178837555886734, "grad_norm": 2.158923864364624, "learning_rate": 0.0002, "loss": 2.3133, "step": 4050 }, { "epoch": 0.3025335320417288, "grad_norm": 2.044170379638672, "learning_rate": 0.0002, "loss": 2.5106, "step": 4060 }, { "epoch": 0.30327868852459017, "grad_norm": 2.1641745567321777, "learning_rate": 0.0002, "loss": 2.3511, "step": 4070 }, { "epoch": 0.30402384500745155, "grad_norm": 1.7815321683883667, "learning_rate": 0.0002, "loss": 2.47, "step": 4080 }, { "epoch": 0.30476900149031294, "grad_norm": 1.7963491678237915, "learning_rate": 0.0002, "loss": 2.431, "step": 4090 }, { "epoch": 0.3055141579731744, "grad_norm": 1.8735893964767456, "learning_rate": 0.0002, "loss": 2.4197, "step": 4100 }, { "epoch": 0.30625931445603577, "grad_norm": 1.8785910606384277, "learning_rate": 0.0002, "loss": 2.4681, "step": 4110 }, { "epoch": 0.30700447093889716, "grad_norm": 2.711383104324341, "learning_rate": 0.0002, "loss": 2.5373, "step": 4120 }, { "epoch": 0.30774962742175854, "grad_norm": 1.837888479232788, "learning_rate": 0.0002, "loss": 2.4393, "step": 4130 }, { "epoch": 0.30849478390462, "grad_norm": 2.044309616088867, "learning_rate": 0.0002, "loss": 2.5764, "step": 4140 }, { "epoch": 0.30923994038748137, "grad_norm": 2.052886486053467, "learning_rate": 0.0002, "loss": 2.5038, "step": 4150 }, { "epoch": 0.30998509687034276, "grad_norm": 2.0241613388061523, "learning_rate": 0.0002, "loss": 2.6383, "step": 4160 }, { "epoch": 0.3107302533532042, "grad_norm": 1.9976118803024292, "learning_rate": 0.0002, "loss": 2.5346, "step": 4170 }, { "epoch": 0.3114754098360656, "grad_norm": 1.7011710405349731, "learning_rate": 0.0002, "loss": 2.5725, "step": 4180 }, { "epoch": 0.312220566318927, "grad_norm": 2.266010284423828, "learning_rate": 0.0002, "loss": 2.473, "step": 4190 }, { "epoch": 0.31296572280178836, "grad_norm": 1.793825626373291, "learning_rate": 0.0002, "loss": 2.3846, "step": 4200 }, { "epoch": 0.3137108792846498, "grad_norm": 2.258878469467163, "learning_rate": 0.0002, "loss": 2.6184, "step": 4210 }, { "epoch": 0.3144560357675112, "grad_norm": 2.1145365238189697, "learning_rate": 0.0002, "loss": 2.5497, "step": 4220 }, { "epoch": 0.3152011922503726, "grad_norm": 2.1772029399871826, "learning_rate": 0.0002, "loss": 2.4528, "step": 4230 }, { "epoch": 0.31594634873323396, "grad_norm": 2.5457372665405273, "learning_rate": 0.0002, "loss": 2.6091, "step": 4240 }, { "epoch": 0.3166915052160954, "grad_norm": 1.8056083917617798, "learning_rate": 0.0002, "loss": 2.48, "step": 4250 }, { "epoch": 0.3174366616989568, "grad_norm": 2.0121543407440186, "learning_rate": 0.0002, "loss": 2.5622, "step": 4260 }, { "epoch": 0.3181818181818182, "grad_norm": 2.6106455326080322, "learning_rate": 0.0002, "loss": 2.5243, "step": 4270 }, { "epoch": 0.31892697466467956, "grad_norm": 1.9244214296340942, "learning_rate": 0.0002, "loss": 2.4204, "step": 4280 }, { "epoch": 0.319672131147541, "grad_norm": 2.1080305576324463, "learning_rate": 0.0002, "loss": 2.4296, "step": 4290 }, { "epoch": 0.3204172876304024, "grad_norm": 2.309900999069214, "learning_rate": 0.0002, "loss": 2.4689, "step": 4300 }, { "epoch": 0.3211624441132638, "grad_norm": 2.2716972827911377, "learning_rate": 0.0002, "loss": 2.3885, "step": 4310 }, { "epoch": 0.32190760059612517, "grad_norm": 2.2576944828033447, "learning_rate": 0.0002, "loss": 2.3281, "step": 4320 }, { "epoch": 0.3226527570789866, "grad_norm": 1.943267583847046, "learning_rate": 0.0002, "loss": 2.4674, "step": 4330 }, { "epoch": 0.323397913561848, "grad_norm": 1.886509895324707, "learning_rate": 0.0002, "loss": 2.491, "step": 4340 }, { "epoch": 0.3241430700447094, "grad_norm": 1.9226173162460327, "learning_rate": 0.0002, "loss": 2.4899, "step": 4350 }, { "epoch": 0.32488822652757077, "grad_norm": 2.016324043273926, "learning_rate": 0.0002, "loss": 2.4347, "step": 4360 }, { "epoch": 0.3256333830104322, "grad_norm": 1.8798201084136963, "learning_rate": 0.0002, "loss": 2.7045, "step": 4370 }, { "epoch": 0.3263785394932936, "grad_norm": 2.0897727012634277, "learning_rate": 0.0002, "loss": 2.4779, "step": 4380 }, { "epoch": 0.327123695976155, "grad_norm": 2.4956274032592773, "learning_rate": 0.0002, "loss": 2.5188, "step": 4390 }, { "epoch": 0.32786885245901637, "grad_norm": 2.08056378364563, "learning_rate": 0.0002, "loss": 2.4447, "step": 4400 }, { "epoch": 0.3286140089418778, "grad_norm": 2.2066187858581543, "learning_rate": 0.0002, "loss": 2.5474, "step": 4410 }, { "epoch": 0.3293591654247392, "grad_norm": 1.8168262243270874, "learning_rate": 0.0002, "loss": 2.5207, "step": 4420 }, { "epoch": 0.3301043219076006, "grad_norm": 2.649477481842041, "learning_rate": 0.0002, "loss": 2.4138, "step": 4430 }, { "epoch": 0.33084947839046197, "grad_norm": 2.3449325561523438, "learning_rate": 0.0002, "loss": 2.5474, "step": 4440 }, { "epoch": 0.3315946348733234, "grad_norm": 1.7262647151947021, "learning_rate": 0.0002, "loss": 2.5911, "step": 4450 }, { "epoch": 0.3323397913561848, "grad_norm": 2.0296733379364014, "learning_rate": 0.0002, "loss": 2.5417, "step": 4460 }, { "epoch": 0.3330849478390462, "grad_norm": 2.036099672317505, "learning_rate": 0.0002, "loss": 2.3912, "step": 4470 }, { "epoch": 0.33383010432190763, "grad_norm": 1.9934238195419312, "learning_rate": 0.0002, "loss": 2.5022, "step": 4480 }, { "epoch": 0.334575260804769, "grad_norm": 2.1589412689208984, "learning_rate": 0.0002, "loss": 2.5989, "step": 4490 }, { "epoch": 0.3353204172876304, "grad_norm": 1.8449981212615967, "learning_rate": 0.0002, "loss": 2.233, "step": 4500 }, { "epoch": 0.3360655737704918, "grad_norm": 1.7662941217422485, "learning_rate": 0.0002, "loss": 2.5521, "step": 4510 }, { "epoch": 0.33681073025335323, "grad_norm": 2.3483681678771973, "learning_rate": 0.0002, "loss": 2.4731, "step": 4520 }, { "epoch": 0.3375558867362146, "grad_norm": 1.8929402828216553, "learning_rate": 0.0002, "loss": 2.4925, "step": 4530 }, { "epoch": 0.338301043219076, "grad_norm": 1.988408088684082, "learning_rate": 0.0002, "loss": 2.5581, "step": 4540 }, { "epoch": 0.3390461997019374, "grad_norm": 2.3717474937438965, "learning_rate": 0.0002, "loss": 2.5353, "step": 4550 }, { "epoch": 0.33979135618479883, "grad_norm": 2.058992862701416, "learning_rate": 0.0002, "loss": 2.4383, "step": 4560 }, { "epoch": 0.3405365126676602, "grad_norm": 2.3037335872650146, "learning_rate": 0.0002, "loss": 2.6187, "step": 4570 }, { "epoch": 0.3412816691505216, "grad_norm": 2.0833449363708496, "learning_rate": 0.0002, "loss": 2.6367, "step": 4580 }, { "epoch": 0.342026825633383, "grad_norm": 1.6986418962478638, "learning_rate": 0.0002, "loss": 2.343, "step": 4590 }, { "epoch": 0.34277198211624443, "grad_norm": 4.1914472579956055, "learning_rate": 0.0002, "loss": 2.6611, "step": 4600 }, { "epoch": 0.3435171385991058, "grad_norm": 2.1022298336029053, "learning_rate": 0.0002, "loss": 2.5226, "step": 4610 }, { "epoch": 0.3442622950819672, "grad_norm": 2.1484179496765137, "learning_rate": 0.0002, "loss": 2.3475, "step": 4620 }, { "epoch": 0.3450074515648286, "grad_norm": 2.1597182750701904, "learning_rate": 0.0002, "loss": 2.5566, "step": 4630 }, { "epoch": 0.34575260804769004, "grad_norm": 2.0153634548187256, "learning_rate": 0.0002, "loss": 2.399, "step": 4640 }, { "epoch": 0.3464977645305514, "grad_norm": 4.205637454986572, "learning_rate": 0.0002, "loss": 2.4738, "step": 4650 }, { "epoch": 0.3472429210134128, "grad_norm": 1.965390682220459, "learning_rate": 0.0002, "loss": 2.5928, "step": 4660 }, { "epoch": 0.3479880774962742, "grad_norm": 2.318188428878784, "learning_rate": 0.0002, "loss": 2.3906, "step": 4670 }, { "epoch": 0.34873323397913564, "grad_norm": 2.16817045211792, "learning_rate": 0.0002, "loss": 2.5063, "step": 4680 }, { "epoch": 0.349478390461997, "grad_norm": 1.8313651084899902, "learning_rate": 0.0002, "loss": 2.3634, "step": 4690 }, { "epoch": 0.3502235469448584, "grad_norm": 2.083974599838257, "learning_rate": 0.0002, "loss": 2.4984, "step": 4700 }, { "epoch": 0.3509687034277198, "grad_norm": 2.033154249191284, "learning_rate": 0.0002, "loss": 2.5167, "step": 4710 }, { "epoch": 0.35171385991058124, "grad_norm": 1.7030832767486572, "learning_rate": 0.0002, "loss": 2.3332, "step": 4720 }, { "epoch": 0.3524590163934426, "grad_norm": 2.236445188522339, "learning_rate": 0.0002, "loss": 2.5903, "step": 4730 }, { "epoch": 0.353204172876304, "grad_norm": 2.0333669185638428, "learning_rate": 0.0002, "loss": 2.6599, "step": 4740 }, { "epoch": 0.3539493293591654, "grad_norm": 2.043572425842285, "learning_rate": 0.0002, "loss": 2.5685, "step": 4750 }, { "epoch": 0.35469448584202684, "grad_norm": 1.7955186367034912, "learning_rate": 0.0002, "loss": 2.4531, "step": 4760 }, { "epoch": 0.3554396423248882, "grad_norm": 2.064957857131958, "learning_rate": 0.0002, "loss": 2.5, "step": 4770 }, { "epoch": 0.3561847988077496, "grad_norm": 2.2814271450042725, "learning_rate": 0.0002, "loss": 2.3527, "step": 4780 }, { "epoch": 0.356929955290611, "grad_norm": 2.179020881652832, "learning_rate": 0.0002, "loss": 2.4888, "step": 4790 }, { "epoch": 0.35767511177347244, "grad_norm": 1.7410861253738403, "learning_rate": 0.0002, "loss": 2.3833, "step": 4800 }, { "epoch": 0.35842026825633383, "grad_norm": 2.1326522827148438, "learning_rate": 0.0002, "loss": 2.5156, "step": 4810 }, { "epoch": 0.3591654247391952, "grad_norm": 2.075561761856079, "learning_rate": 0.0002, "loss": 2.4855, "step": 4820 }, { "epoch": 0.35991058122205666, "grad_norm": 2.168584108352661, "learning_rate": 0.0002, "loss": 2.5904, "step": 4830 }, { "epoch": 0.36065573770491804, "grad_norm": 2.0109267234802246, "learning_rate": 0.0002, "loss": 2.502, "step": 4840 }, { "epoch": 0.36140089418777943, "grad_norm": 1.8693374395370483, "learning_rate": 0.0002, "loss": 2.531, "step": 4850 }, { "epoch": 0.3621460506706408, "grad_norm": 2.561384439468384, "learning_rate": 0.0002, "loss": 2.2466, "step": 4860 }, { "epoch": 0.36289120715350226, "grad_norm": 2.319011688232422, "learning_rate": 0.0002, "loss": 2.4845, "step": 4870 }, { "epoch": 0.36363636363636365, "grad_norm": 2.0164289474487305, "learning_rate": 0.0002, "loss": 2.6448, "step": 4880 }, { "epoch": 0.36438152011922503, "grad_norm": 1.8674942255020142, "learning_rate": 0.0002, "loss": 2.4344, "step": 4890 }, { "epoch": 0.3651266766020864, "grad_norm": 1.8700525760650635, "learning_rate": 0.0002, "loss": 2.412, "step": 4900 }, { "epoch": 0.36587183308494786, "grad_norm": 1.9874043464660645, "learning_rate": 0.0002, "loss": 2.5567, "step": 4910 }, { "epoch": 0.36661698956780925, "grad_norm": 1.914652943611145, "learning_rate": 0.0002, "loss": 2.5984, "step": 4920 }, { "epoch": 0.36736214605067063, "grad_norm": 2.1003236770629883, "learning_rate": 0.0002, "loss": 2.6851, "step": 4930 }, { "epoch": 0.368107302533532, "grad_norm": 1.9648061990737915, "learning_rate": 0.0002, "loss": 2.4207, "step": 4940 }, { "epoch": 0.36885245901639346, "grad_norm": 2.0232656002044678, "learning_rate": 0.0002, "loss": 2.4922, "step": 4950 }, { "epoch": 0.36959761549925485, "grad_norm": 2.0865535736083984, "learning_rate": 0.0002, "loss": 2.6409, "step": 4960 }, { "epoch": 0.37034277198211624, "grad_norm": 1.8709211349487305, "learning_rate": 0.0002, "loss": 2.3267, "step": 4970 }, { "epoch": 0.3710879284649776, "grad_norm": 2.093190908432007, "learning_rate": 0.0002, "loss": 2.3293, "step": 4980 }, { "epoch": 0.37183308494783907, "grad_norm": 2.2843077182769775, "learning_rate": 0.0002, "loss": 2.5344, "step": 4990 }, { "epoch": 0.37257824143070045, "grad_norm": 1.9902077913284302, "learning_rate": 0.0002, "loss": 2.4221, "step": 5000 }, { "epoch": 0.37332339791356184, "grad_norm": 1.827880859375, "learning_rate": 0.0002, "loss": 2.3791, "step": 5010 }, { "epoch": 0.3740685543964232, "grad_norm": 2.0986554622650146, "learning_rate": 0.0002, "loss": 2.5067, "step": 5020 }, { "epoch": 0.37481371087928467, "grad_norm": 2.260951519012451, "learning_rate": 0.0002, "loss": 2.5156, "step": 5030 }, { "epoch": 0.37555886736214605, "grad_norm": 2.1205878257751465, "learning_rate": 0.0002, "loss": 2.524, "step": 5040 }, { "epoch": 0.37630402384500744, "grad_norm": 1.7332857847213745, "learning_rate": 0.0002, "loss": 2.4875, "step": 5050 }, { "epoch": 0.3770491803278688, "grad_norm": 2.1427862644195557, "learning_rate": 0.0002, "loss": 2.3529, "step": 5060 }, { "epoch": 0.37779433681073027, "grad_norm": 2.6252365112304688, "learning_rate": 0.0002, "loss": 2.6104, "step": 5070 }, { "epoch": 0.37853949329359166, "grad_norm": 2.0142056941986084, "learning_rate": 0.0002, "loss": 2.5434, "step": 5080 }, { "epoch": 0.37928464977645304, "grad_norm": 2.4451770782470703, "learning_rate": 0.0002, "loss": 2.4673, "step": 5090 }, { "epoch": 0.38002980625931443, "grad_norm": 2.5081820487976074, "learning_rate": 0.0002, "loss": 2.4091, "step": 5100 }, { "epoch": 0.38077496274217587, "grad_norm": 2.149099588394165, "learning_rate": 0.0002, "loss": 2.4014, "step": 5110 }, { "epoch": 0.38152011922503726, "grad_norm": 2.3913164138793945, "learning_rate": 0.0002, "loss": 2.437, "step": 5120 }, { "epoch": 0.38226527570789864, "grad_norm": 2.1281864643096924, "learning_rate": 0.0002, "loss": 2.5205, "step": 5130 }, { "epoch": 0.3830104321907601, "grad_norm": 1.710132122039795, "learning_rate": 0.0002, "loss": 2.3987, "step": 5140 }, { "epoch": 0.3837555886736215, "grad_norm": 2.173602342605591, "learning_rate": 0.0002, "loss": 2.5606, "step": 5150 }, { "epoch": 0.38450074515648286, "grad_norm": 2.032154083251953, "learning_rate": 0.0002, "loss": 2.5223, "step": 5160 }, { "epoch": 0.38524590163934425, "grad_norm": 2.1516082286834717, "learning_rate": 0.0002, "loss": 2.4851, "step": 5170 }, { "epoch": 0.3859910581222057, "grad_norm": 1.9636366367340088, "learning_rate": 0.0002, "loss": 2.3907, "step": 5180 }, { "epoch": 0.3867362146050671, "grad_norm": 2.3789303302764893, "learning_rate": 0.0002, "loss": 2.3962, "step": 5190 }, { "epoch": 0.38748137108792846, "grad_norm": 2.102897882461548, "learning_rate": 0.0002, "loss": 2.6373, "step": 5200 }, { "epoch": 0.38822652757078985, "grad_norm": 2.0494508743286133, "learning_rate": 0.0002, "loss": 2.4939, "step": 5210 }, { "epoch": 0.3889716840536513, "grad_norm": 1.95903480052948, "learning_rate": 0.0002, "loss": 2.4768, "step": 5220 }, { "epoch": 0.3897168405365127, "grad_norm": 2.15665864944458, "learning_rate": 0.0002, "loss": 2.5624, "step": 5230 }, { "epoch": 0.39046199701937406, "grad_norm": 2.1582460403442383, "learning_rate": 0.0002, "loss": 2.4983, "step": 5240 }, { "epoch": 0.39120715350223545, "grad_norm": 1.939427375793457, "learning_rate": 0.0002, "loss": 2.4378, "step": 5250 }, { "epoch": 0.3919523099850969, "grad_norm": 1.7251907587051392, "learning_rate": 0.0002, "loss": 2.3679, "step": 5260 }, { "epoch": 0.3926974664679583, "grad_norm": 1.9326486587524414, "learning_rate": 0.0002, "loss": 2.5102, "step": 5270 }, { "epoch": 0.39344262295081966, "grad_norm": 2.2457611560821533, "learning_rate": 0.0002, "loss": 2.6181, "step": 5280 }, { "epoch": 0.39418777943368105, "grad_norm": 2.027223825454712, "learning_rate": 0.0002, "loss": 2.3882, "step": 5290 }, { "epoch": 0.3949329359165425, "grad_norm": 2.135723829269409, "learning_rate": 0.0002, "loss": 2.3614, "step": 5300 }, { "epoch": 0.3956780923994039, "grad_norm": 2.2400708198547363, "learning_rate": 0.0002, "loss": 2.4794, "step": 5310 }, { "epoch": 0.39642324888226527, "grad_norm": 2.0185799598693848, "learning_rate": 0.0002, "loss": 2.5809, "step": 5320 }, { "epoch": 0.39716840536512665, "grad_norm": 2.2141106128692627, "learning_rate": 0.0002, "loss": 2.5126, "step": 5330 }, { "epoch": 0.3979135618479881, "grad_norm": 2.2849326133728027, "learning_rate": 0.0002, "loss": 2.3855, "step": 5340 }, { "epoch": 0.3986587183308495, "grad_norm": 1.7335988283157349, "learning_rate": 0.0002, "loss": 2.3463, "step": 5350 }, { "epoch": 0.39940387481371087, "grad_norm": 1.9355789422988892, "learning_rate": 0.0002, "loss": 2.5056, "step": 5360 }, { "epoch": 0.40014903129657226, "grad_norm": 1.7740095853805542, "learning_rate": 0.0002, "loss": 2.5791, "step": 5370 }, { "epoch": 0.4008941877794337, "grad_norm": 1.7497676610946655, "learning_rate": 0.0002, "loss": 2.4308, "step": 5380 }, { "epoch": 0.4016393442622951, "grad_norm": 2.0560314655303955, "learning_rate": 0.0002, "loss": 2.4581, "step": 5390 }, { "epoch": 0.40238450074515647, "grad_norm": 2.065265655517578, "learning_rate": 0.0002, "loss": 2.5172, "step": 5400 }, { "epoch": 0.40312965722801786, "grad_norm": 2.282453775405884, "learning_rate": 0.0002, "loss": 2.3409, "step": 5410 }, { "epoch": 0.4038748137108793, "grad_norm": 2.3466989994049072, "learning_rate": 0.0002, "loss": 2.5305, "step": 5420 }, { "epoch": 0.4046199701937407, "grad_norm": 2.2113606929779053, "learning_rate": 0.0002, "loss": 2.6027, "step": 5430 }, { "epoch": 0.40536512667660207, "grad_norm": 2.149338483810425, "learning_rate": 0.0002, "loss": 2.3704, "step": 5440 }, { "epoch": 0.4061102831594635, "grad_norm": 2.129307270050049, "learning_rate": 0.0002, "loss": 2.3367, "step": 5450 }, { "epoch": 0.4068554396423249, "grad_norm": 2.2620790004730225, "learning_rate": 0.0002, "loss": 2.5256, "step": 5460 }, { "epoch": 0.4076005961251863, "grad_norm": 2.623889684677124, "learning_rate": 0.0002, "loss": 2.5516, "step": 5470 }, { "epoch": 0.4083457526080477, "grad_norm": 1.7352521419525146, "learning_rate": 0.0002, "loss": 2.2739, "step": 5480 }, { "epoch": 0.4090909090909091, "grad_norm": 2.0829408168792725, "learning_rate": 0.0002, "loss": 2.5512, "step": 5490 }, { "epoch": 0.4098360655737705, "grad_norm": 2.000159978866577, "learning_rate": 0.0002, "loss": 2.3084, "step": 5500 }, { "epoch": 0.4105812220566319, "grad_norm": 2.665837526321411, "learning_rate": 0.0002, "loss": 2.3492, "step": 5510 }, { "epoch": 0.4113263785394933, "grad_norm": 2.220322608947754, "learning_rate": 0.0002, "loss": 2.5528, "step": 5520 }, { "epoch": 0.4120715350223547, "grad_norm": 2.322227954864502, "learning_rate": 0.0002, "loss": 2.4756, "step": 5530 }, { "epoch": 0.4128166915052161, "grad_norm": 2.0118496417999268, "learning_rate": 0.0002, "loss": 2.564, "step": 5540 }, { "epoch": 0.4135618479880775, "grad_norm": 2.0772318840026855, "learning_rate": 0.0002, "loss": 2.583, "step": 5550 }, { "epoch": 0.4143070044709389, "grad_norm": 2.002246141433716, "learning_rate": 0.0002, "loss": 2.6718, "step": 5560 }, { "epoch": 0.4150521609538003, "grad_norm": 2.208174228668213, "learning_rate": 0.0002, "loss": 2.4075, "step": 5570 }, { "epoch": 0.4157973174366617, "grad_norm": 2.2165260314941406, "learning_rate": 0.0002, "loss": 2.6347, "step": 5580 }, { "epoch": 0.4165424739195231, "grad_norm": 2.365262508392334, "learning_rate": 0.0002, "loss": 2.5209, "step": 5590 }, { "epoch": 0.4172876304023845, "grad_norm": 2.264592170715332, "learning_rate": 0.0002, "loss": 2.4414, "step": 5600 }, { "epoch": 0.4180327868852459, "grad_norm": 1.9426195621490479, "learning_rate": 0.0002, "loss": 2.313, "step": 5610 }, { "epoch": 0.4187779433681073, "grad_norm": 2.155670642852783, "learning_rate": 0.0002, "loss": 2.5946, "step": 5620 }, { "epoch": 0.4195230998509687, "grad_norm": 1.9506397247314453, "learning_rate": 0.0002, "loss": 2.4709, "step": 5630 }, { "epoch": 0.4202682563338301, "grad_norm": 2.3226125240325928, "learning_rate": 0.0002, "loss": 2.3527, "step": 5640 }, { "epoch": 0.4210134128166915, "grad_norm": 1.6601407527923584, "learning_rate": 0.0002, "loss": 2.4622, "step": 5650 }, { "epoch": 0.4217585692995529, "grad_norm": 2.126014471054077, "learning_rate": 0.0002, "loss": 2.4602, "step": 5660 }, { "epoch": 0.4225037257824143, "grad_norm": 2.292633295059204, "learning_rate": 0.0002, "loss": 2.7211, "step": 5670 }, { "epoch": 0.4232488822652757, "grad_norm": 2.1793317794799805, "learning_rate": 0.0002, "loss": 2.4626, "step": 5680 }, { "epoch": 0.4239940387481371, "grad_norm": 2.196563720703125, "learning_rate": 0.0002, "loss": 2.5533, "step": 5690 }, { "epoch": 0.4247391952309985, "grad_norm": 2.425448417663574, "learning_rate": 0.0002, "loss": 2.4851, "step": 5700 }, { "epoch": 0.4254843517138599, "grad_norm": 2.2193682193756104, "learning_rate": 0.0002, "loss": 2.3698, "step": 5710 }, { "epoch": 0.4262295081967213, "grad_norm": 2.054805040359497, "learning_rate": 0.0002, "loss": 2.5059, "step": 5720 }, { "epoch": 0.4269746646795827, "grad_norm": 1.8185921907424927, "learning_rate": 0.0002, "loss": 2.5176, "step": 5730 }, { "epoch": 0.4277198211624441, "grad_norm": 1.9967896938323975, "learning_rate": 0.0002, "loss": 2.4504, "step": 5740 }, { "epoch": 0.4284649776453055, "grad_norm": 2.662285327911377, "learning_rate": 0.0002, "loss": 2.4244, "step": 5750 }, { "epoch": 0.42921013412816694, "grad_norm": 2.16428804397583, "learning_rate": 0.0002, "loss": 2.2946, "step": 5760 }, { "epoch": 0.42995529061102833, "grad_norm": 2.0115864276885986, "learning_rate": 0.0002, "loss": 2.4657, "step": 5770 }, { "epoch": 0.4307004470938897, "grad_norm": 2.388542413711548, "learning_rate": 0.0002, "loss": 2.5151, "step": 5780 }, { "epoch": 0.4314456035767511, "grad_norm": 2.0310490131378174, "learning_rate": 0.0002, "loss": 2.3604, "step": 5790 }, { "epoch": 0.43219076005961254, "grad_norm": 1.8433557748794556, "learning_rate": 0.0002, "loss": 2.5056, "step": 5800 }, { "epoch": 0.43293591654247393, "grad_norm": 2.093193769454956, "learning_rate": 0.0002, "loss": 2.5444, "step": 5810 }, { "epoch": 0.4336810730253353, "grad_norm": 2.3394739627838135, "learning_rate": 0.0002, "loss": 2.4693, "step": 5820 }, { "epoch": 0.4344262295081967, "grad_norm": 2.294088363647461, "learning_rate": 0.0002, "loss": 2.2499, "step": 5830 }, { "epoch": 0.43517138599105815, "grad_norm": 2.3843374252319336, "learning_rate": 0.0002, "loss": 2.5797, "step": 5840 }, { "epoch": 0.43591654247391953, "grad_norm": 2.0269100666046143, "learning_rate": 0.0002, "loss": 2.5623, "step": 5850 }, { "epoch": 0.4366616989567809, "grad_norm": 2.5061495304107666, "learning_rate": 0.0002, "loss": 2.3113, "step": 5860 }, { "epoch": 0.4374068554396423, "grad_norm": 2.5287938117980957, "learning_rate": 0.0002, "loss": 2.3915, "step": 5870 }, { "epoch": 0.43815201192250375, "grad_norm": 2.0537567138671875, "learning_rate": 0.0002, "loss": 2.6012, "step": 5880 }, { "epoch": 0.43889716840536513, "grad_norm": 2.5927767753601074, "learning_rate": 0.0002, "loss": 2.4097, "step": 5890 }, { "epoch": 0.4396423248882265, "grad_norm": 2.193775177001953, "learning_rate": 0.0002, "loss": 2.4681, "step": 5900 }, { "epoch": 0.4403874813710879, "grad_norm": 1.99056077003479, "learning_rate": 0.0002, "loss": 2.4156, "step": 5910 }, { "epoch": 0.44113263785394935, "grad_norm": 2.4100918769836426, "learning_rate": 0.0002, "loss": 2.5194, "step": 5920 }, { "epoch": 0.44187779433681074, "grad_norm": 2.178215980529785, "learning_rate": 0.0002, "loss": 2.4394, "step": 5930 }, { "epoch": 0.4426229508196721, "grad_norm": 2.3645856380462646, "learning_rate": 0.0002, "loss": 2.4488, "step": 5940 }, { "epoch": 0.4433681073025335, "grad_norm": 1.8661149740219116, "learning_rate": 0.0002, "loss": 2.4434, "step": 5950 }, { "epoch": 0.44411326378539495, "grad_norm": 1.9467486143112183, "learning_rate": 0.0002, "loss": 2.462, "step": 5960 }, { "epoch": 0.44485842026825634, "grad_norm": 2.4336559772491455, "learning_rate": 0.0002, "loss": 2.6196, "step": 5970 }, { "epoch": 0.4456035767511177, "grad_norm": 2.2935919761657715, "learning_rate": 0.0002, "loss": 2.6004, "step": 5980 }, { "epoch": 0.4463487332339791, "grad_norm": 2.2421000003814697, "learning_rate": 0.0002, "loss": 2.4005, "step": 5990 }, { "epoch": 0.44709388971684055, "grad_norm": 1.7802613973617554, "learning_rate": 0.0002, "loss": 2.4354, "step": 6000 }, { "epoch": 0.44783904619970194, "grad_norm": 2.0072882175445557, "learning_rate": 0.0002, "loss": 2.4345, "step": 6010 }, { "epoch": 0.4485842026825633, "grad_norm": 1.9635995626449585, "learning_rate": 0.0002, "loss": 2.5976, "step": 6020 }, { "epoch": 0.4493293591654247, "grad_norm": 2.6438255310058594, "learning_rate": 0.0002, "loss": 2.4887, "step": 6030 }, { "epoch": 0.45007451564828616, "grad_norm": 2.0173120498657227, "learning_rate": 0.0002, "loss": 2.6285, "step": 6040 }, { "epoch": 0.45081967213114754, "grad_norm": 2.0639147758483887, "learning_rate": 0.0002, "loss": 2.6229, "step": 6050 }, { "epoch": 0.45156482861400893, "grad_norm": 2.2229113578796387, "learning_rate": 0.0002, "loss": 2.3337, "step": 6060 }, { "epoch": 0.4523099850968703, "grad_norm": 2.0675735473632812, "learning_rate": 0.0002, "loss": 2.3311, "step": 6070 }, { "epoch": 0.45305514157973176, "grad_norm": 2.0282604694366455, "learning_rate": 0.0002, "loss": 2.5878, "step": 6080 }, { "epoch": 0.45380029806259314, "grad_norm": 2.4298789501190186, "learning_rate": 0.0002, "loss": 2.5402, "step": 6090 }, { "epoch": 0.45454545454545453, "grad_norm": 2.1782422065734863, "learning_rate": 0.0002, "loss": 2.3497, "step": 6100 }, { "epoch": 0.455290611028316, "grad_norm": 2.3076884746551514, "learning_rate": 0.0002, "loss": 2.4455, "step": 6110 }, { "epoch": 0.45603576751117736, "grad_norm": 2.051884651184082, "learning_rate": 0.0002, "loss": 2.4875, "step": 6120 }, { "epoch": 0.45678092399403875, "grad_norm": 2.0429365634918213, "learning_rate": 0.0002, "loss": 2.5699, "step": 6130 }, { "epoch": 0.45752608047690013, "grad_norm": 2.0632383823394775, "learning_rate": 0.0002, "loss": 2.5193, "step": 6140 }, { "epoch": 0.4582712369597616, "grad_norm": 2.0191586017608643, "learning_rate": 0.0002, "loss": 2.4983, "step": 6150 }, { "epoch": 0.45901639344262296, "grad_norm": 2.1893763542175293, "learning_rate": 0.0002, "loss": 2.4648, "step": 6160 }, { "epoch": 0.45976154992548435, "grad_norm": 2.150398015975952, "learning_rate": 0.0002, "loss": 2.6308, "step": 6170 }, { "epoch": 0.46050670640834573, "grad_norm": 2.084906578063965, "learning_rate": 0.0002, "loss": 2.5127, "step": 6180 }, { "epoch": 0.4612518628912072, "grad_norm": 2.3456621170043945, "learning_rate": 0.0002, "loss": 2.5092, "step": 6190 }, { "epoch": 0.46199701937406856, "grad_norm": 2.259631633758545, "learning_rate": 0.0002, "loss": 2.4077, "step": 6200 }, { "epoch": 0.46274217585692995, "grad_norm": 1.9756247997283936, "learning_rate": 0.0002, "loss": 2.3623, "step": 6210 }, { "epoch": 0.46348733233979134, "grad_norm": 1.8990384340286255, "learning_rate": 0.0002, "loss": 2.5315, "step": 6220 }, { "epoch": 0.4642324888226528, "grad_norm": 2.1848907470703125, "learning_rate": 0.0002, "loss": 2.5418, "step": 6230 }, { "epoch": 0.46497764530551416, "grad_norm": 2.267343521118164, "learning_rate": 0.0002, "loss": 2.4859, "step": 6240 }, { "epoch": 0.46572280178837555, "grad_norm": 2.3617684841156006, "learning_rate": 0.0002, "loss": 2.3138, "step": 6250 }, { "epoch": 0.46646795827123694, "grad_norm": 1.972421646118164, "learning_rate": 0.0002, "loss": 2.5332, "step": 6260 }, { "epoch": 0.4672131147540984, "grad_norm": 2.0907230377197266, "learning_rate": 0.0002, "loss": 2.7235, "step": 6270 }, { "epoch": 0.46795827123695977, "grad_norm": 2.452946901321411, "learning_rate": 0.0002, "loss": 2.4452, "step": 6280 }, { "epoch": 0.46870342771982115, "grad_norm": 2.432163953781128, "learning_rate": 0.0002, "loss": 2.4138, "step": 6290 }, { "epoch": 0.46944858420268254, "grad_norm": 2.2494499683380127, "learning_rate": 0.0002, "loss": 2.4877, "step": 6300 }, { "epoch": 0.470193740685544, "grad_norm": 1.6413583755493164, "learning_rate": 0.0002, "loss": 2.5623, "step": 6310 }, { "epoch": 0.47093889716840537, "grad_norm": 2.4607837200164795, "learning_rate": 0.0002, "loss": 2.4455, "step": 6320 }, { "epoch": 0.47168405365126675, "grad_norm": 2.0634145736694336, "learning_rate": 0.0002, "loss": 2.5643, "step": 6330 }, { "epoch": 0.47242921013412814, "grad_norm": 2.0293703079223633, "learning_rate": 0.0002, "loss": 2.4346, "step": 6340 }, { "epoch": 0.4731743666169896, "grad_norm": 2.567640781402588, "learning_rate": 0.0002, "loss": 2.2669, "step": 6350 }, { "epoch": 0.47391952309985097, "grad_norm": 2.118058204650879, "learning_rate": 0.0002, "loss": 2.5486, "step": 6360 }, { "epoch": 0.47466467958271236, "grad_norm": 2.1259288787841797, "learning_rate": 0.0002, "loss": 2.5674, "step": 6370 }, { "epoch": 0.47540983606557374, "grad_norm": 2.1126372814178467, "learning_rate": 0.0002, "loss": 2.6126, "step": 6380 }, { "epoch": 0.4761549925484352, "grad_norm": 1.626694917678833, "learning_rate": 0.0002, "loss": 2.5019, "step": 6390 }, { "epoch": 0.47690014903129657, "grad_norm": 2.0831708908081055, "learning_rate": 0.0002, "loss": 2.4327, "step": 6400 }, { "epoch": 0.47764530551415796, "grad_norm": 1.9350471496582031, "learning_rate": 0.0002, "loss": 2.4867, "step": 6410 }, { "epoch": 0.4783904619970194, "grad_norm": 2.1217947006225586, "learning_rate": 0.0002, "loss": 2.5254, "step": 6420 }, { "epoch": 0.4791356184798808, "grad_norm": 2.3201823234558105, "learning_rate": 0.0002, "loss": 2.4434, "step": 6430 }, { "epoch": 0.4798807749627422, "grad_norm": 2.2600150108337402, "learning_rate": 0.0002, "loss": 2.4616, "step": 6440 }, { "epoch": 0.48062593144560356, "grad_norm": 2.360180377960205, "learning_rate": 0.0002, "loss": 2.4934, "step": 6450 }, { "epoch": 0.481371087928465, "grad_norm": 2.4700534343719482, "learning_rate": 0.0002, "loss": 2.3958, "step": 6460 }, { "epoch": 0.4821162444113264, "grad_norm": 2.0691604614257812, "learning_rate": 0.0002, "loss": 2.4818, "step": 6470 }, { "epoch": 0.4828614008941878, "grad_norm": 1.950579285621643, "learning_rate": 0.0002, "loss": 2.4003, "step": 6480 }, { "epoch": 0.48360655737704916, "grad_norm": 2.0040478706359863, "learning_rate": 0.0002, "loss": 2.5706, "step": 6490 }, { "epoch": 0.4843517138599106, "grad_norm": 2.4494431018829346, "learning_rate": 0.0002, "loss": 2.4676, "step": 6500 }, { "epoch": 0.485096870342772, "grad_norm": 2.25048565864563, "learning_rate": 0.0002, "loss": 2.6437, "step": 6510 }, { "epoch": 0.4858420268256334, "grad_norm": 2.4388575553894043, "learning_rate": 0.0002, "loss": 2.4013, "step": 6520 }, { "epoch": 0.48658718330849476, "grad_norm": 2.7190568447113037, "learning_rate": 0.0002, "loss": 2.1267, "step": 6530 }, { "epoch": 0.4873323397913562, "grad_norm": 2.00464129447937, "learning_rate": 0.0002, "loss": 2.246, "step": 6540 }, { "epoch": 0.4880774962742176, "grad_norm": 2.230637550354004, "learning_rate": 0.0002, "loss": 2.3657, "step": 6550 }, { "epoch": 0.488822652757079, "grad_norm": 2.030550479888916, "learning_rate": 0.0002, "loss": 2.5825, "step": 6560 }, { "epoch": 0.48956780923994037, "grad_norm": 1.7973986864089966, "learning_rate": 0.0002, "loss": 2.2891, "step": 6570 }, { "epoch": 0.4903129657228018, "grad_norm": 2.1454880237579346, "learning_rate": 0.0002, "loss": 2.3413, "step": 6580 }, { "epoch": 0.4910581222056632, "grad_norm": 2.1549787521362305, "learning_rate": 0.0002, "loss": 2.3444, "step": 6590 }, { "epoch": 0.4918032786885246, "grad_norm": 2.1284945011138916, "learning_rate": 0.0002, "loss": 2.5555, "step": 6600 }, { "epoch": 0.49254843517138597, "grad_norm": 2.23075008392334, "learning_rate": 0.0002, "loss": 2.3384, "step": 6610 }, { "epoch": 0.4932935916542474, "grad_norm": 2.489455223083496, "learning_rate": 0.0002, "loss": 2.6383, "step": 6620 }, { "epoch": 0.4940387481371088, "grad_norm": 2.2579898834228516, "learning_rate": 0.0002, "loss": 2.6226, "step": 6630 }, { "epoch": 0.4947839046199702, "grad_norm": 2.3494224548339844, "learning_rate": 0.0002, "loss": 2.502, "step": 6640 }, { "epoch": 0.49552906110283157, "grad_norm": 2.082937240600586, "learning_rate": 0.0002, "loss": 2.5592, "step": 6650 }, { "epoch": 0.496274217585693, "grad_norm": 2.0874314308166504, "learning_rate": 0.0002, "loss": 2.5922, "step": 6660 }, { "epoch": 0.4970193740685544, "grad_norm": 2.1997947692871094, "learning_rate": 0.0002, "loss": 2.6147, "step": 6670 }, { "epoch": 0.4977645305514158, "grad_norm": 2.215691089630127, "learning_rate": 0.0002, "loss": 2.4961, "step": 6680 }, { "epoch": 0.49850968703427717, "grad_norm": 2.699936866760254, "learning_rate": 0.0002, "loss": 2.6774, "step": 6690 }, { "epoch": 0.4992548435171386, "grad_norm": 2.2943921089172363, "learning_rate": 0.0002, "loss": 2.4801, "step": 6700 }, { "epoch": 0.5, "grad_norm": 2.3712542057037354, "learning_rate": 0.0002, "loss": 2.4751, "step": 6710 }, { "epoch": 0.5007451564828614, "grad_norm": 2.5637362003326416, "learning_rate": 0.0002, "loss": 2.2542, "step": 6720 }, { "epoch": 0.5014903129657228, "grad_norm": 2.330156087875366, "learning_rate": 0.0002, "loss": 2.4645, "step": 6730 }, { "epoch": 0.5022354694485842, "grad_norm": 2.053035020828247, "learning_rate": 0.0002, "loss": 2.5752, "step": 6740 }, { "epoch": 0.5029806259314457, "grad_norm": 2.305776834487915, "learning_rate": 0.0002, "loss": 2.5567, "step": 6750 }, { "epoch": 0.503725782414307, "grad_norm": 2.023801326751709, "learning_rate": 0.0002, "loss": 2.3164, "step": 6760 }, { "epoch": 0.5044709388971684, "grad_norm": 2.477642059326172, "learning_rate": 0.0002, "loss": 2.5777, "step": 6770 }, { "epoch": 0.5052160953800298, "grad_norm": 2.298116683959961, "learning_rate": 0.0002, "loss": 2.4506, "step": 6780 }, { "epoch": 0.5059612518628912, "grad_norm": 2.2904672622680664, "learning_rate": 0.0002, "loss": 2.3897, "step": 6790 }, { "epoch": 0.5067064083457526, "grad_norm": 2.0368447303771973, "learning_rate": 0.0002, "loss": 2.42, "step": 6800 }, { "epoch": 0.507451564828614, "grad_norm": 2.016451835632324, "learning_rate": 0.0002, "loss": 2.3806, "step": 6810 }, { "epoch": 0.5081967213114754, "grad_norm": 1.8767539262771606, "learning_rate": 0.0002, "loss": 2.3789, "step": 6820 }, { "epoch": 0.5089418777943369, "grad_norm": 2.399251937866211, "learning_rate": 0.0002, "loss": 2.5804, "step": 6830 }, { "epoch": 0.5096870342771982, "grad_norm": 2.187103509902954, "learning_rate": 0.0002, "loss": 2.5263, "step": 6840 }, { "epoch": 0.5104321907600596, "grad_norm": 1.9529699087142944, "learning_rate": 0.0002, "loss": 2.423, "step": 6850 }, { "epoch": 0.511177347242921, "grad_norm": 4.577221393585205, "learning_rate": 0.0002, "loss": 2.5348, "step": 6860 }, { "epoch": 0.5119225037257824, "grad_norm": 2.331979513168335, "learning_rate": 0.0002, "loss": 2.6584, "step": 6870 }, { "epoch": 0.5126676602086438, "grad_norm": 2.2853405475616455, "learning_rate": 0.0002, "loss": 2.4096, "step": 6880 }, { "epoch": 0.5134128166915052, "grad_norm": 2.516995906829834, "learning_rate": 0.0002, "loss": 2.4762, "step": 6890 }, { "epoch": 0.5141579731743666, "grad_norm": 2.183138370513916, "learning_rate": 0.0002, "loss": 2.4239, "step": 6900 }, { "epoch": 0.5149031296572281, "grad_norm": 2.2524988651275635, "learning_rate": 0.0002, "loss": 2.4089, "step": 6910 }, { "epoch": 0.5156482861400894, "grad_norm": 2.2979516983032227, "learning_rate": 0.0002, "loss": 2.443, "step": 6920 }, { "epoch": 0.5163934426229508, "grad_norm": 2.099586009979248, "learning_rate": 0.0002, "loss": 2.4679, "step": 6930 }, { "epoch": 0.5171385991058122, "grad_norm": 2.2441070079803467, "learning_rate": 0.0002, "loss": 2.5278, "step": 6940 }, { "epoch": 0.5178837555886736, "grad_norm": 2.376936197280884, "learning_rate": 0.0002, "loss": 2.5222, "step": 6950 }, { "epoch": 0.518628912071535, "grad_norm": 2.196542739868164, "learning_rate": 0.0002, "loss": 2.4336, "step": 6960 }, { "epoch": 0.5193740685543964, "grad_norm": 2.265864610671997, "learning_rate": 0.0002, "loss": 2.4876, "step": 6970 }, { "epoch": 0.5201192250372578, "grad_norm": 2.2958121299743652, "learning_rate": 0.0002, "loss": 2.6756, "step": 6980 }, { "epoch": 0.5208643815201193, "grad_norm": 2.2851719856262207, "learning_rate": 0.0002, "loss": 2.4873, "step": 6990 }, { "epoch": 0.5216095380029806, "grad_norm": 2.3405418395996094, "learning_rate": 0.0002, "loss": 2.3907, "step": 7000 }, { "epoch": 0.522354694485842, "grad_norm": 2.4564900398254395, "learning_rate": 0.0002, "loss": 2.5203, "step": 7010 }, { "epoch": 0.5230998509687034, "grad_norm": 2.123331308364868, "learning_rate": 0.0002, "loss": 2.3261, "step": 7020 }, { "epoch": 0.5238450074515648, "grad_norm": 2.0890355110168457, "learning_rate": 0.0002, "loss": 2.5784, "step": 7030 }, { "epoch": 0.5245901639344263, "grad_norm": 2.4165306091308594, "learning_rate": 0.0002, "loss": 2.4716, "step": 7040 }, { "epoch": 0.5253353204172876, "grad_norm": 2.1103525161743164, "learning_rate": 0.0002, "loss": 2.5825, "step": 7050 }, { "epoch": 0.526080476900149, "grad_norm": 2.33457612991333, "learning_rate": 0.0002, "loss": 2.3823, "step": 7060 }, { "epoch": 0.5268256333830105, "grad_norm": 2.0597524642944336, "learning_rate": 0.0002, "loss": 2.4671, "step": 7070 }, { "epoch": 0.5275707898658718, "grad_norm": 3.1226096153259277, "learning_rate": 0.0002, "loss": 2.4017, "step": 7080 }, { "epoch": 0.5283159463487332, "grad_norm": 2.0051512718200684, "learning_rate": 0.0002, "loss": 2.359, "step": 7090 }, { "epoch": 0.5290611028315947, "grad_norm": 2.500908136367798, "learning_rate": 0.0002, "loss": 2.6868, "step": 7100 }, { "epoch": 0.529806259314456, "grad_norm": 2.04001784324646, "learning_rate": 0.0002, "loss": 2.717, "step": 7110 }, { "epoch": 0.5305514157973175, "grad_norm": 2.715292453765869, "learning_rate": 0.0002, "loss": 2.5725, "step": 7120 }, { "epoch": 0.5312965722801788, "grad_norm": 2.3398818969726562, "learning_rate": 0.0002, "loss": 2.4834, "step": 7130 }, { "epoch": 0.5320417287630402, "grad_norm": 2.456146240234375, "learning_rate": 0.0002, "loss": 2.4792, "step": 7140 }, { "epoch": 0.5327868852459017, "grad_norm": 2.2321231365203857, "learning_rate": 0.0002, "loss": 2.6432, "step": 7150 }, { "epoch": 0.533532041728763, "grad_norm": 2.06449294090271, "learning_rate": 0.0002, "loss": 2.3722, "step": 7160 }, { "epoch": 0.5342771982116244, "grad_norm": 1.98611581325531, "learning_rate": 0.0002, "loss": 2.2874, "step": 7170 }, { "epoch": 0.5350223546944859, "grad_norm": 2.2005727291107178, "learning_rate": 0.0002, "loss": 2.3682, "step": 7180 }, { "epoch": 0.5357675111773472, "grad_norm": 2.3024485111236572, "learning_rate": 0.0002, "loss": 2.5261, "step": 7190 }, { "epoch": 0.5365126676602087, "grad_norm": 2.2706873416900635, "learning_rate": 0.0002, "loss": 2.5962, "step": 7200 }, { "epoch": 0.53725782414307, "grad_norm": 1.8197662830352783, "learning_rate": 0.0002, "loss": 2.5705, "step": 7210 }, { "epoch": 0.5380029806259314, "grad_norm": 1.9607528448104858, "learning_rate": 0.0002, "loss": 2.3861, "step": 7220 }, { "epoch": 0.5387481371087929, "grad_norm": 2.5178678035736084, "learning_rate": 0.0002, "loss": 2.5357, "step": 7230 }, { "epoch": 0.5394932935916542, "grad_norm": 2.2647557258605957, "learning_rate": 0.0002, "loss": 2.3796, "step": 7240 }, { "epoch": 0.5402384500745157, "grad_norm": 2.090864419937134, "learning_rate": 0.0002, "loss": 2.5204, "step": 7250 }, { "epoch": 0.5409836065573771, "grad_norm": 2.0715291500091553, "learning_rate": 0.0002, "loss": 2.5327, "step": 7260 }, { "epoch": 0.5417287630402384, "grad_norm": 1.7275205850601196, "learning_rate": 0.0002, "loss": 2.2495, "step": 7270 }, { "epoch": 0.5424739195230999, "grad_norm": 2.1511425971984863, "learning_rate": 0.0002, "loss": 2.4449, "step": 7280 }, { "epoch": 0.5432190760059612, "grad_norm": 2.3975725173950195, "learning_rate": 0.0002, "loss": 2.6171, "step": 7290 }, { "epoch": 0.5439642324888226, "grad_norm": 2.278902769088745, "learning_rate": 0.0002, "loss": 2.4238, "step": 7300 }, { "epoch": 0.5447093889716841, "grad_norm": 2.1751863956451416, "learning_rate": 0.0002, "loss": 2.3223, "step": 7310 }, { "epoch": 0.5454545454545454, "grad_norm": 2.1303343772888184, "learning_rate": 0.0002, "loss": 2.292, "step": 7320 }, { "epoch": 0.5461997019374069, "grad_norm": 2.0914053916931152, "learning_rate": 0.0002, "loss": 2.6599, "step": 7330 }, { "epoch": 0.5469448584202683, "grad_norm": 2.3275091648101807, "learning_rate": 0.0002, "loss": 2.4827, "step": 7340 }, { "epoch": 0.5476900149031296, "grad_norm": 2.047351598739624, "learning_rate": 0.0002, "loss": 2.4824, "step": 7350 }, { "epoch": 0.5484351713859911, "grad_norm": 2.2209582328796387, "learning_rate": 0.0002, "loss": 2.5068, "step": 7360 }, { "epoch": 0.5491803278688525, "grad_norm": 2.029001235961914, "learning_rate": 0.0002, "loss": 2.3278, "step": 7370 }, { "epoch": 0.5499254843517138, "grad_norm": 2.0651822090148926, "learning_rate": 0.0002, "loss": 2.4738, "step": 7380 }, { "epoch": 0.5506706408345753, "grad_norm": 1.8926769495010376, "learning_rate": 0.0002, "loss": 2.312, "step": 7390 }, { "epoch": 0.5514157973174366, "grad_norm": 2.1285948753356934, "learning_rate": 0.0002, "loss": 2.515, "step": 7400 }, { "epoch": 0.5521609538002981, "grad_norm": 2.026381254196167, "learning_rate": 0.0002, "loss": 2.6837, "step": 7410 }, { "epoch": 0.5529061102831595, "grad_norm": 2.052429437637329, "learning_rate": 0.0002, "loss": 2.3549, "step": 7420 }, { "epoch": 0.5536512667660208, "grad_norm": 2.634350538253784, "learning_rate": 0.0002, "loss": 2.4303, "step": 7430 }, { "epoch": 0.5543964232488823, "grad_norm": 2.1491518020629883, "learning_rate": 0.0002, "loss": 2.5716, "step": 7440 }, { "epoch": 0.5551415797317437, "grad_norm": 1.9551408290863037, "learning_rate": 0.0002, "loss": 2.4071, "step": 7450 }, { "epoch": 0.555886736214605, "grad_norm": 2.2387406826019287, "learning_rate": 0.0002, "loss": 2.383, "step": 7460 }, { "epoch": 0.5566318926974665, "grad_norm": 2.1040196418762207, "learning_rate": 0.0002, "loss": 2.3814, "step": 7470 }, { "epoch": 0.5573770491803278, "grad_norm": 2.3352060317993164, "learning_rate": 0.0002, "loss": 2.5804, "step": 7480 }, { "epoch": 0.5581222056631893, "grad_norm": 2.1420586109161377, "learning_rate": 0.0002, "loss": 2.3543, "step": 7490 }, { "epoch": 0.5588673621460507, "grad_norm": 2.3646399974823, "learning_rate": 0.0002, "loss": 2.3857, "step": 7500 }, { "epoch": 0.559612518628912, "grad_norm": 1.9728518724441528, "learning_rate": 0.0002, "loss": 2.5749, "step": 7510 }, { "epoch": 0.5603576751117735, "grad_norm": 2.2200262546539307, "learning_rate": 0.0002, "loss": 2.5357, "step": 7520 }, { "epoch": 0.5611028315946349, "grad_norm": 2.305957078933716, "learning_rate": 0.0002, "loss": 2.4489, "step": 7530 }, { "epoch": 0.5618479880774963, "grad_norm": 2.1752254962921143, "learning_rate": 0.0002, "loss": 2.2947, "step": 7540 }, { "epoch": 0.5625931445603577, "grad_norm": 2.1745660305023193, "learning_rate": 0.0002, "loss": 2.5143, "step": 7550 }, { "epoch": 0.563338301043219, "grad_norm": 1.8101173639297485, "learning_rate": 0.0002, "loss": 2.5449, "step": 7560 }, { "epoch": 0.5640834575260805, "grad_norm": 2.5516979694366455, "learning_rate": 0.0002, "loss": 2.419, "step": 7570 }, { "epoch": 0.5648286140089419, "grad_norm": 2.087670087814331, "learning_rate": 0.0002, "loss": 2.5587, "step": 7580 }, { "epoch": 0.5655737704918032, "grad_norm": 2.1860992908477783, "learning_rate": 0.0002, "loss": 2.6475, "step": 7590 }, { "epoch": 0.5663189269746647, "grad_norm": 2.522256851196289, "learning_rate": 0.0002, "loss": 2.7054, "step": 7600 }, { "epoch": 0.5670640834575261, "grad_norm": 2.6697170734405518, "learning_rate": 0.0002, "loss": 2.5817, "step": 7610 }, { "epoch": 0.5678092399403875, "grad_norm": 2.129748821258545, "learning_rate": 0.0002, "loss": 2.4949, "step": 7620 }, { "epoch": 0.5685543964232489, "grad_norm": 1.946333646774292, "learning_rate": 0.0002, "loss": 2.4987, "step": 7630 }, { "epoch": 0.5692995529061102, "grad_norm": 2.6684484481811523, "learning_rate": 0.0002, "loss": 2.612, "step": 7640 }, { "epoch": 0.5700447093889717, "grad_norm": 2.1237940788269043, "learning_rate": 0.0002, "loss": 2.5082, "step": 7650 }, { "epoch": 0.5707898658718331, "grad_norm": 2.68740177154541, "learning_rate": 0.0002, "loss": 2.5652, "step": 7660 }, { "epoch": 0.5715350223546944, "grad_norm": 2.360792875289917, "learning_rate": 0.0002, "loss": 2.4631, "step": 7670 }, { "epoch": 0.5722801788375559, "grad_norm": 2.3592369556427, "learning_rate": 0.0002, "loss": 2.4923, "step": 7680 }, { "epoch": 0.5730253353204173, "grad_norm": 2.328521251678467, "learning_rate": 0.0002, "loss": 2.4656, "step": 7690 }, { "epoch": 0.5737704918032787, "grad_norm": 2.1476731300354004, "learning_rate": 0.0002, "loss": 2.3461, "step": 7700 }, { "epoch": 0.5745156482861401, "grad_norm": 2.1318559646606445, "learning_rate": 0.0002, "loss": 2.4251, "step": 7710 }, { "epoch": 0.5752608047690015, "grad_norm": 1.997536301612854, "learning_rate": 0.0002, "loss": 2.5326, "step": 7720 }, { "epoch": 0.5760059612518629, "grad_norm": 2.2597386837005615, "learning_rate": 0.0002, "loss": 2.6773, "step": 7730 }, { "epoch": 0.5767511177347243, "grad_norm": 2.8514564037323, "learning_rate": 0.0002, "loss": 2.5119, "step": 7740 }, { "epoch": 0.5774962742175856, "grad_norm": 2.486799716949463, "learning_rate": 0.0002, "loss": 2.6904, "step": 7750 }, { "epoch": 0.5782414307004471, "grad_norm": 2.237799882888794, "learning_rate": 0.0002, "loss": 2.5005, "step": 7760 }, { "epoch": 0.5789865871833085, "grad_norm": 2.4719021320343018, "learning_rate": 0.0002, "loss": 2.5874, "step": 7770 }, { "epoch": 0.5797317436661699, "grad_norm": 2.2470688819885254, "learning_rate": 0.0002, "loss": 2.5151, "step": 7780 }, { "epoch": 0.5804769001490313, "grad_norm": 2.4005558490753174, "learning_rate": 0.0002, "loss": 2.6334, "step": 7790 }, { "epoch": 0.5812220566318927, "grad_norm": 2.0954015254974365, "learning_rate": 0.0002, "loss": 2.3567, "step": 7800 }, { "epoch": 0.5819672131147541, "grad_norm": 2.228788375854492, "learning_rate": 0.0002, "loss": 2.6451, "step": 7810 }, { "epoch": 0.5827123695976155, "grad_norm": 1.78871488571167, "learning_rate": 0.0002, "loss": 2.6242, "step": 7820 }, { "epoch": 0.5834575260804769, "grad_norm": 1.7899997234344482, "learning_rate": 0.0002, "loss": 2.4975, "step": 7830 }, { "epoch": 0.5842026825633383, "grad_norm": 2.1144442558288574, "learning_rate": 0.0002, "loss": 2.6146, "step": 7840 }, { "epoch": 0.5849478390461997, "grad_norm": 1.8392325639724731, "learning_rate": 0.0002, "loss": 2.4227, "step": 7850 }, { "epoch": 0.5856929955290611, "grad_norm": 1.8613855838775635, "learning_rate": 0.0002, "loss": 2.5583, "step": 7860 }, { "epoch": 0.5864381520119225, "grad_norm": 2.1754300594329834, "learning_rate": 0.0002, "loss": 2.6082, "step": 7870 }, { "epoch": 0.587183308494784, "grad_norm": 2.4607224464416504, "learning_rate": 0.0002, "loss": 2.3939, "step": 7880 }, { "epoch": 0.5879284649776453, "grad_norm": 2.129397392272949, "learning_rate": 0.0002, "loss": 2.5056, "step": 7890 }, { "epoch": 0.5886736214605067, "grad_norm": 2.302616596221924, "learning_rate": 0.0002, "loss": 2.4976, "step": 7900 }, { "epoch": 0.589418777943368, "grad_norm": 3.0152175426483154, "learning_rate": 0.0002, "loss": 2.3935, "step": 7910 }, { "epoch": 0.5901639344262295, "grad_norm": 2.051461696624756, "learning_rate": 0.0002, "loss": 2.4928, "step": 7920 }, { "epoch": 0.5909090909090909, "grad_norm": 2.1009974479675293, "learning_rate": 0.0002, "loss": 2.6526, "step": 7930 }, { "epoch": 0.5916542473919523, "grad_norm": 2.290898561477661, "learning_rate": 0.0002, "loss": 2.5205, "step": 7940 }, { "epoch": 0.5923994038748137, "grad_norm": 2.202995777130127, "learning_rate": 0.0002, "loss": 2.5271, "step": 7950 }, { "epoch": 0.5931445603576752, "grad_norm": 2.211921453475952, "learning_rate": 0.0002, "loss": 2.6762, "step": 7960 }, { "epoch": 0.5938897168405365, "grad_norm": 2.294769763946533, "learning_rate": 0.0002, "loss": 2.5565, "step": 7970 }, { "epoch": 0.5946348733233979, "grad_norm": 2.0982816219329834, "learning_rate": 0.0002, "loss": 2.4512, "step": 7980 }, { "epoch": 0.5953800298062594, "grad_norm": 2.14776611328125, "learning_rate": 0.0002, "loss": 2.5092, "step": 7990 }, { "epoch": 0.5961251862891207, "grad_norm": 2.2703404426574707, "learning_rate": 0.0002, "loss": 2.3723, "step": 8000 }, { "epoch": 0.5968703427719821, "grad_norm": 2.265050172805786, "learning_rate": 0.0002, "loss": 2.4642, "step": 8010 }, { "epoch": 0.5976154992548435, "grad_norm": 2.3132333755493164, "learning_rate": 0.0002, "loss": 2.5886, "step": 8020 }, { "epoch": 0.5983606557377049, "grad_norm": 2.4410815238952637, "learning_rate": 0.0002, "loss": 2.3898, "step": 8030 }, { "epoch": 0.5991058122205664, "grad_norm": 2.282869577407837, "learning_rate": 0.0002, "loss": 2.3925, "step": 8040 }, { "epoch": 0.5998509687034277, "grad_norm": 2.3430824279785156, "learning_rate": 0.0002, "loss": 2.5852, "step": 8050 }, { "epoch": 0.6005961251862891, "grad_norm": 2.7821292877197266, "learning_rate": 0.0002, "loss": 2.1127, "step": 8060 }, { "epoch": 0.6013412816691506, "grad_norm": 2.4642081260681152, "learning_rate": 0.0002, "loss": 2.5766, "step": 8070 }, { "epoch": 0.6020864381520119, "grad_norm": 2.013272285461426, "learning_rate": 0.0002, "loss": 2.4526, "step": 8080 }, { "epoch": 0.6028315946348733, "grad_norm": 2.0950276851654053, "learning_rate": 0.0002, "loss": 2.672, "step": 8090 }, { "epoch": 0.6035767511177347, "grad_norm": 2.2408697605133057, "learning_rate": 0.0002, "loss": 2.4886, "step": 8100 }, { "epoch": 0.6043219076005961, "grad_norm": 2.4338343143463135, "learning_rate": 0.0002, "loss": 2.4129, "step": 8110 }, { "epoch": 0.6050670640834576, "grad_norm": 2.3819990158081055, "learning_rate": 0.0002, "loss": 2.7111, "step": 8120 }, { "epoch": 0.6058122205663189, "grad_norm": 2.3578953742980957, "learning_rate": 0.0002, "loss": 2.5222, "step": 8130 }, { "epoch": 0.6065573770491803, "grad_norm": 2.0468990802764893, "learning_rate": 0.0002, "loss": 2.4965, "step": 8140 }, { "epoch": 0.6073025335320418, "grad_norm": 2.439807415008545, "learning_rate": 0.0002, "loss": 2.6172, "step": 8150 }, { "epoch": 0.6080476900149031, "grad_norm": 2.1083173751831055, "learning_rate": 0.0002, "loss": 2.5669, "step": 8160 }, { "epoch": 0.6087928464977646, "grad_norm": 2.1767308712005615, "learning_rate": 0.0002, "loss": 2.5498, "step": 8170 }, { "epoch": 0.6095380029806259, "grad_norm": 2.1427078247070312, "learning_rate": 0.0002, "loss": 2.4109, "step": 8180 }, { "epoch": 0.6102831594634873, "grad_norm": 2.282959222793579, "learning_rate": 0.0002, "loss": 2.5091, "step": 8190 }, { "epoch": 0.6110283159463488, "grad_norm": 2.2628536224365234, "learning_rate": 0.0002, "loss": 2.5386, "step": 8200 }, { "epoch": 0.6117734724292101, "grad_norm": 2.706434488296509, "learning_rate": 0.0002, "loss": 2.5989, "step": 8210 }, { "epoch": 0.6125186289120715, "grad_norm": 2.3741445541381836, "learning_rate": 0.0002, "loss": 2.2712, "step": 8220 }, { "epoch": 0.613263785394933, "grad_norm": 2.2221875190734863, "learning_rate": 0.0002, "loss": 2.5309, "step": 8230 }, { "epoch": 0.6140089418777943, "grad_norm": 1.9854212999343872, "learning_rate": 0.0002, "loss": 2.2749, "step": 8240 }, { "epoch": 0.6147540983606558, "grad_norm": 2.229374885559082, "learning_rate": 0.0002, "loss": 2.5129, "step": 8250 }, { "epoch": 0.6154992548435171, "grad_norm": 2.4126970767974854, "learning_rate": 0.0002, "loss": 2.325, "step": 8260 }, { "epoch": 0.6162444113263785, "grad_norm": 2.2903852462768555, "learning_rate": 0.0002, "loss": 2.5048, "step": 8270 }, { "epoch": 0.61698956780924, "grad_norm": 2.691183090209961, "learning_rate": 0.0002, "loss": 2.4766, "step": 8280 }, { "epoch": 0.6177347242921013, "grad_norm": 2.386356830596924, "learning_rate": 0.0002, "loss": 2.5979, "step": 8290 }, { "epoch": 0.6184798807749627, "grad_norm": 2.695887804031372, "learning_rate": 0.0002, "loss": 2.642, "step": 8300 }, { "epoch": 0.6192250372578242, "grad_norm": 2.5322632789611816, "learning_rate": 0.0002, "loss": 2.5457, "step": 8310 }, { "epoch": 0.6199701937406855, "grad_norm": 2.29015851020813, "learning_rate": 0.0002, "loss": 2.5388, "step": 8320 }, { "epoch": 0.620715350223547, "grad_norm": 2.326113700866699, "learning_rate": 0.0002, "loss": 2.6757, "step": 8330 }, { "epoch": 0.6214605067064084, "grad_norm": 2.18438458442688, "learning_rate": 0.0002, "loss": 2.3377, "step": 8340 }, { "epoch": 0.6222056631892697, "grad_norm": 2.195730686187744, "learning_rate": 0.0002, "loss": 2.334, "step": 8350 }, { "epoch": 0.6229508196721312, "grad_norm": 2.1896743774414062, "learning_rate": 0.0002, "loss": 2.4039, "step": 8360 }, { "epoch": 0.6236959761549925, "grad_norm": 2.47771954536438, "learning_rate": 0.0002, "loss": 2.625, "step": 8370 }, { "epoch": 0.624441132637854, "grad_norm": 2.5502564907073975, "learning_rate": 0.0002, "loss": 2.5164, "step": 8380 }, { "epoch": 0.6251862891207154, "grad_norm": 1.9382567405700684, "learning_rate": 0.0002, "loss": 2.5153, "step": 8390 }, { "epoch": 0.6259314456035767, "grad_norm": 2.078873872756958, "learning_rate": 0.0002, "loss": 2.4817, "step": 8400 }, { "epoch": 0.6266766020864382, "grad_norm": 2.1636760234832764, "learning_rate": 0.0002, "loss": 2.7134, "step": 8410 }, { "epoch": 0.6274217585692996, "grad_norm": 2.2006876468658447, "learning_rate": 0.0002, "loss": 2.6016, "step": 8420 }, { "epoch": 0.6281669150521609, "grad_norm": 2.364816665649414, "learning_rate": 0.0002, "loss": 2.5201, "step": 8430 }, { "epoch": 0.6289120715350224, "grad_norm": 2.450207471847534, "learning_rate": 0.0002, "loss": 2.6406, "step": 8440 }, { "epoch": 0.6296572280178837, "grad_norm": 2.3795676231384277, "learning_rate": 0.0002, "loss": 2.2957, "step": 8450 }, { "epoch": 0.6304023845007451, "grad_norm": 2.1926169395446777, "learning_rate": 0.0002, "loss": 2.2288, "step": 8460 }, { "epoch": 0.6311475409836066, "grad_norm": 1.9396635293960571, "learning_rate": 0.0002, "loss": 2.3839, "step": 8470 }, { "epoch": 0.6318926974664679, "grad_norm": 2.6635711193084717, "learning_rate": 0.0002, "loss": 2.4389, "step": 8480 }, { "epoch": 0.6326378539493294, "grad_norm": 2.657240390777588, "learning_rate": 0.0002, "loss": 2.3886, "step": 8490 }, { "epoch": 0.6333830104321908, "grad_norm": 2.050353765487671, "learning_rate": 0.0002, "loss": 2.3714, "step": 8500 }, { "epoch": 0.6341281669150521, "grad_norm": 2.3058016300201416, "learning_rate": 0.0002, "loss": 2.4301, "step": 8510 }, { "epoch": 0.6348733233979136, "grad_norm": 2.3272721767425537, "learning_rate": 0.0002, "loss": 2.442, "step": 8520 }, { "epoch": 0.6356184798807749, "grad_norm": 2.105719566345215, "learning_rate": 0.0002, "loss": 2.3532, "step": 8530 }, { "epoch": 0.6363636363636364, "grad_norm": 2.2481689453125, "learning_rate": 0.0002, "loss": 2.7017, "step": 8540 }, { "epoch": 0.6371087928464978, "grad_norm": 2.0684092044830322, "learning_rate": 0.0002, "loss": 2.5492, "step": 8550 }, { "epoch": 0.6378539493293591, "grad_norm": 2.2087674140930176, "learning_rate": 0.0002, "loss": 2.4946, "step": 8560 }, { "epoch": 0.6385991058122206, "grad_norm": 2.0686557292938232, "learning_rate": 0.0002, "loss": 2.603, "step": 8570 }, { "epoch": 0.639344262295082, "grad_norm": 2.223733901977539, "learning_rate": 0.0002, "loss": 2.5515, "step": 8580 }, { "epoch": 0.6400894187779433, "grad_norm": 2.0543527603149414, "learning_rate": 0.0002, "loss": 2.2859, "step": 8590 }, { "epoch": 0.6408345752608048, "grad_norm": 2.119685411453247, "learning_rate": 0.0002, "loss": 2.4915, "step": 8600 }, { "epoch": 0.6415797317436661, "grad_norm": 2.1664891242980957, "learning_rate": 0.0002, "loss": 2.4224, "step": 8610 }, { "epoch": 0.6423248882265276, "grad_norm": 2.2479021549224854, "learning_rate": 0.0002, "loss": 2.5316, "step": 8620 }, { "epoch": 0.643070044709389, "grad_norm": 2.2841110229492188, "learning_rate": 0.0002, "loss": 2.6606, "step": 8630 }, { "epoch": 0.6438152011922503, "grad_norm": 2.4399871826171875, "learning_rate": 0.0002, "loss": 2.5727, "step": 8640 }, { "epoch": 0.6445603576751118, "grad_norm": 1.9307136535644531, "learning_rate": 0.0002, "loss": 2.3302, "step": 8650 }, { "epoch": 0.6453055141579732, "grad_norm": 2.2575156688690186, "learning_rate": 0.0002, "loss": 2.5079, "step": 8660 }, { "epoch": 0.6460506706408345, "grad_norm": 2.704486131668091, "learning_rate": 0.0002, "loss": 2.64, "step": 8670 }, { "epoch": 0.646795827123696, "grad_norm": 2.2060296535491943, "learning_rate": 0.0002, "loss": 2.5639, "step": 8680 }, { "epoch": 0.6475409836065574, "grad_norm": 2.2497682571411133, "learning_rate": 0.0002, "loss": 2.5753, "step": 8690 }, { "epoch": 0.6482861400894188, "grad_norm": 2.1391713619232178, "learning_rate": 0.0002, "loss": 2.5179, "step": 8700 }, { "epoch": 0.6490312965722802, "grad_norm": 2.4493465423583984, "learning_rate": 0.0002, "loss": 2.5404, "step": 8710 }, { "epoch": 0.6497764530551415, "grad_norm": 2.2963478565216064, "learning_rate": 0.0002, "loss": 2.395, "step": 8720 }, { "epoch": 0.650521609538003, "grad_norm": 2.3371636867523193, "learning_rate": 0.0002, "loss": 2.649, "step": 8730 }, { "epoch": 0.6512667660208644, "grad_norm": 2.1336076259613037, "learning_rate": 0.0002, "loss": 2.5782, "step": 8740 }, { "epoch": 0.6520119225037257, "grad_norm": 1.9927014112472534, "learning_rate": 0.0002, "loss": 2.4791, "step": 8750 }, { "epoch": 0.6527570789865872, "grad_norm": 2.5760622024536133, "learning_rate": 0.0002, "loss": 2.5103, "step": 8760 }, { "epoch": 0.6535022354694486, "grad_norm": 2.3019092082977295, "learning_rate": 0.0002, "loss": 2.5403, "step": 8770 }, { "epoch": 0.65424739195231, "grad_norm": 2.1122303009033203, "learning_rate": 0.0002, "loss": 2.4911, "step": 8780 }, { "epoch": 0.6549925484351714, "grad_norm": 2.4388267993927, "learning_rate": 0.0002, "loss": 2.5928, "step": 8790 }, { "epoch": 0.6557377049180327, "grad_norm": 2.3956820964813232, "learning_rate": 0.0002, "loss": 2.4162, "step": 8800 }, { "epoch": 0.6564828614008942, "grad_norm": 2.251885175704956, "learning_rate": 0.0002, "loss": 2.3572, "step": 8810 }, { "epoch": 0.6572280178837556, "grad_norm": 2.1196508407592773, "learning_rate": 0.0002, "loss": 2.271, "step": 8820 }, { "epoch": 0.657973174366617, "grad_norm": 2.6327478885650635, "learning_rate": 0.0002, "loss": 2.5528, "step": 8830 }, { "epoch": 0.6587183308494784, "grad_norm": 3.1525380611419678, "learning_rate": 0.0002, "loss": 2.671, "step": 8840 }, { "epoch": 0.6594634873323398, "grad_norm": 2.371023178100586, "learning_rate": 0.0002, "loss": 2.3042, "step": 8850 }, { "epoch": 0.6602086438152012, "grad_norm": 2.1151058673858643, "learning_rate": 0.0002, "loss": 2.3586, "step": 8860 }, { "epoch": 0.6609538002980626, "grad_norm": 2.2981162071228027, "learning_rate": 0.0002, "loss": 2.3033, "step": 8870 }, { "epoch": 0.6616989567809239, "grad_norm": 2.3385653495788574, "learning_rate": 0.0002, "loss": 2.4125, "step": 8880 }, { "epoch": 0.6624441132637854, "grad_norm": 2.282998561859131, "learning_rate": 0.0002, "loss": 2.5846, "step": 8890 }, { "epoch": 0.6631892697466468, "grad_norm": 2.276402473449707, "learning_rate": 0.0002, "loss": 2.5881, "step": 8900 }, { "epoch": 0.6639344262295082, "grad_norm": 2.753835439682007, "learning_rate": 0.0002, "loss": 2.5402, "step": 8910 }, { "epoch": 0.6646795827123696, "grad_norm": 2.155869960784912, "learning_rate": 0.0002, "loss": 2.3382, "step": 8920 }, { "epoch": 0.665424739195231, "grad_norm": 2.25738263130188, "learning_rate": 0.0002, "loss": 2.5028, "step": 8930 }, { "epoch": 0.6661698956780924, "grad_norm": 2.3716089725494385, "learning_rate": 0.0002, "loss": 2.4135, "step": 8940 }, { "epoch": 0.6669150521609538, "grad_norm": 2.5012192726135254, "learning_rate": 0.0002, "loss": 2.4297, "step": 8950 }, { "epoch": 0.6676602086438153, "grad_norm": 2.177103281021118, "learning_rate": 0.0002, "loss": 2.401, "step": 8960 }, { "epoch": 0.6684053651266766, "grad_norm": 2.500803232192993, "learning_rate": 0.0002, "loss": 2.6323, "step": 8970 }, { "epoch": 0.669150521609538, "grad_norm": 2.7237913608551025, "learning_rate": 0.0002, "loss": 2.4543, "step": 8980 }, { "epoch": 0.6698956780923994, "grad_norm": 2.0506207942962646, "learning_rate": 0.0002, "loss": 2.3305, "step": 8990 }, { "epoch": 0.6706408345752608, "grad_norm": 2.182495355606079, "learning_rate": 0.0002, "loss": 2.3134, "step": 9000 }, { "epoch": 0.6713859910581222, "grad_norm": 2.3970160484313965, "learning_rate": 0.0002, "loss": 2.6011, "step": 9010 }, { "epoch": 0.6721311475409836, "grad_norm": 2.2599663734436035, "learning_rate": 0.0002, "loss": 2.2633, "step": 9020 }, { "epoch": 0.672876304023845, "grad_norm": 2.202136754989624, "learning_rate": 0.0002, "loss": 2.5181, "step": 9030 }, { "epoch": 0.6736214605067065, "grad_norm": 2.4681708812713623, "learning_rate": 0.0002, "loss": 2.4997, "step": 9040 }, { "epoch": 0.6743666169895678, "grad_norm": 2.455841302871704, "learning_rate": 0.0002, "loss": 2.67, "step": 9050 }, { "epoch": 0.6751117734724292, "grad_norm": 2.679401397705078, "learning_rate": 0.0002, "loss": 2.4532, "step": 9060 }, { "epoch": 0.6758569299552906, "grad_norm": 2.003723621368408, "learning_rate": 0.0002, "loss": 2.6612, "step": 9070 }, { "epoch": 0.676602086438152, "grad_norm": 2.256204128265381, "learning_rate": 0.0002, "loss": 2.6165, "step": 9080 }, { "epoch": 0.6773472429210134, "grad_norm": 2.3091988563537598, "learning_rate": 0.0002, "loss": 2.5914, "step": 9090 }, { "epoch": 0.6780923994038748, "grad_norm": 2.3021037578582764, "learning_rate": 0.0002, "loss": 2.5028, "step": 9100 }, { "epoch": 0.6788375558867362, "grad_norm": 2.1524159908294678, "learning_rate": 0.0002, "loss": 2.496, "step": 9110 }, { "epoch": 0.6795827123695977, "grad_norm": 2.2679061889648438, "learning_rate": 0.0002, "loss": 2.5092, "step": 9120 }, { "epoch": 0.680327868852459, "grad_norm": 2.5078539848327637, "learning_rate": 0.0002, "loss": 2.2919, "step": 9130 }, { "epoch": 0.6810730253353204, "grad_norm": 2.5029258728027344, "learning_rate": 0.0002, "loss": 2.4382, "step": 9140 }, { "epoch": 0.6818181818181818, "grad_norm": 2.5736641883850098, "learning_rate": 0.0002, "loss": 2.4778, "step": 9150 }, { "epoch": 0.6825633383010432, "grad_norm": 2.327671527862549, "learning_rate": 0.0002, "loss": 2.5559, "step": 9160 }, { "epoch": 0.6833084947839047, "grad_norm": 2.310634136199951, "learning_rate": 0.0002, "loss": 2.4736, "step": 9170 }, { "epoch": 0.684053651266766, "grad_norm": 2.2063729763031006, "learning_rate": 0.0002, "loss": 2.5829, "step": 9180 }, { "epoch": 0.6847988077496274, "grad_norm": 2.5868451595306396, "learning_rate": 0.0002, "loss": 2.4465, "step": 9190 }, { "epoch": 0.6855439642324889, "grad_norm": 1.984372854232788, "learning_rate": 0.0002, "loss": 2.4654, "step": 9200 }, { "epoch": 0.6862891207153502, "grad_norm": 2.2977192401885986, "learning_rate": 0.0002, "loss": 2.5643, "step": 9210 }, { "epoch": 0.6870342771982116, "grad_norm": 2.3139028549194336, "learning_rate": 0.0002, "loss": 2.7359, "step": 9220 }, { "epoch": 0.687779433681073, "grad_norm": 2.124882698059082, "learning_rate": 0.0002, "loss": 2.4335, "step": 9230 }, { "epoch": 0.6885245901639344, "grad_norm": 2.434063673019409, "learning_rate": 0.0002, "loss": 2.3576, "step": 9240 }, { "epoch": 0.6892697466467959, "grad_norm": 2.290684700012207, "learning_rate": 0.0002, "loss": 2.7062, "step": 9250 }, { "epoch": 0.6900149031296572, "grad_norm": 2.5476014614105225, "learning_rate": 0.0002, "loss": 2.6915, "step": 9260 }, { "epoch": 0.6907600596125186, "grad_norm": 2.1429226398468018, "learning_rate": 0.0002, "loss": 2.5736, "step": 9270 }, { "epoch": 0.6915052160953801, "grad_norm": 2.399526834487915, "learning_rate": 0.0002, "loss": 2.3798, "step": 9280 }, { "epoch": 0.6922503725782414, "grad_norm": 2.5570576190948486, "learning_rate": 0.0002, "loss": 2.5188, "step": 9290 }, { "epoch": 0.6929955290611028, "grad_norm": 2.248030185699463, "learning_rate": 0.0002, "loss": 2.4761, "step": 9300 }, { "epoch": 0.6937406855439643, "grad_norm": 2.649503231048584, "learning_rate": 0.0002, "loss": 2.6177, "step": 9310 }, { "epoch": 0.6944858420268256, "grad_norm": 2.1536803245544434, "learning_rate": 0.0002, "loss": 2.5347, "step": 9320 }, { "epoch": 0.6952309985096871, "grad_norm": 2.3103137016296387, "learning_rate": 0.0002, "loss": 2.4589, "step": 9330 }, { "epoch": 0.6959761549925484, "grad_norm": 2.5560615062713623, "learning_rate": 0.0002, "loss": 2.6268, "step": 9340 }, { "epoch": 0.6967213114754098, "grad_norm": 2.149562120437622, "learning_rate": 0.0002, "loss": 2.7589, "step": 9350 }, { "epoch": 0.6974664679582713, "grad_norm": 2.180457592010498, "learning_rate": 0.0002, "loss": 2.3707, "step": 9360 }, { "epoch": 0.6982116244411326, "grad_norm": 2.1361920833587646, "learning_rate": 0.0002, "loss": 2.5647, "step": 9370 }, { "epoch": 0.698956780923994, "grad_norm": 2.5958340167999268, "learning_rate": 0.0002, "loss": 2.6107, "step": 9380 }, { "epoch": 0.6997019374068555, "grad_norm": 2.298337936401367, "learning_rate": 0.0002, "loss": 2.5033, "step": 9390 }, { "epoch": 0.7004470938897168, "grad_norm": 2.5497617721557617, "learning_rate": 0.0002, "loss": 2.6554, "step": 9400 }, { "epoch": 0.7011922503725783, "grad_norm": 2.4965898990631104, "learning_rate": 0.0002, "loss": 2.2896, "step": 9410 }, { "epoch": 0.7019374068554396, "grad_norm": 1.6895242929458618, "learning_rate": 0.0002, "loss": 2.5128, "step": 9420 }, { "epoch": 0.702682563338301, "grad_norm": 2.5977084636688232, "learning_rate": 0.0002, "loss": 2.6261, "step": 9430 }, { "epoch": 0.7034277198211625, "grad_norm": 2.2413127422332764, "learning_rate": 0.0002, "loss": 2.5724, "step": 9440 }, { "epoch": 0.7041728763040238, "grad_norm": 1.9375770092010498, "learning_rate": 0.0002, "loss": 2.3573, "step": 9450 }, { "epoch": 0.7049180327868853, "grad_norm": 2.1468889713287354, "learning_rate": 0.0002, "loss": 2.4854, "step": 9460 }, { "epoch": 0.7056631892697467, "grad_norm": 2.3307502269744873, "learning_rate": 0.0002, "loss": 2.6102, "step": 9470 }, { "epoch": 0.706408345752608, "grad_norm": 2.025935411453247, "learning_rate": 0.0002, "loss": 2.5878, "step": 9480 }, { "epoch": 0.7071535022354695, "grad_norm": 2.0282442569732666, "learning_rate": 0.0002, "loss": 2.634, "step": 9490 }, { "epoch": 0.7078986587183308, "grad_norm": 2.3716142177581787, "learning_rate": 0.0002, "loss": 2.523, "step": 9500 }, { "epoch": 0.7086438152011922, "grad_norm": 2.1333203315734863, "learning_rate": 0.0002, "loss": 2.4948, "step": 9510 }, { "epoch": 0.7093889716840537, "grad_norm": 2.215022563934326, "learning_rate": 0.0002, "loss": 2.4586, "step": 9520 }, { "epoch": 0.710134128166915, "grad_norm": 2.392059087753296, "learning_rate": 0.0002, "loss": 2.7049, "step": 9530 }, { "epoch": 0.7108792846497765, "grad_norm": 2.0697712898254395, "learning_rate": 0.0002, "loss": 2.4652, "step": 9540 }, { "epoch": 0.7116244411326379, "grad_norm": 2.451186418533325, "learning_rate": 0.0002, "loss": 2.6232, "step": 9550 }, { "epoch": 0.7123695976154992, "grad_norm": 2.7246387004852295, "learning_rate": 0.0002, "loss": 2.6331, "step": 9560 }, { "epoch": 0.7131147540983607, "grad_norm": 2.2628626823425293, "learning_rate": 0.0002, "loss": 2.4974, "step": 9570 }, { "epoch": 0.713859910581222, "grad_norm": 2.2943952083587646, "learning_rate": 0.0002, "loss": 2.5514, "step": 9580 }, { "epoch": 0.7146050670640834, "grad_norm": 2.394134044647217, "learning_rate": 0.0002, "loss": 2.4207, "step": 9590 }, { "epoch": 0.7153502235469449, "grad_norm": 2.2121686935424805, "learning_rate": 0.0002, "loss": 2.4458, "step": 9600 }, { "epoch": 0.7160953800298062, "grad_norm": 2.43963885307312, "learning_rate": 0.0002, "loss": 2.5707, "step": 9610 }, { "epoch": 0.7168405365126677, "grad_norm": 2.44991135597229, "learning_rate": 0.0002, "loss": 2.3977, "step": 9620 }, { "epoch": 0.7175856929955291, "grad_norm": 2.600816488265991, "learning_rate": 0.0002, "loss": 2.6366, "step": 9630 }, { "epoch": 0.7183308494783904, "grad_norm": 2.5606367588043213, "learning_rate": 0.0002, "loss": 2.3104, "step": 9640 }, { "epoch": 0.7190760059612519, "grad_norm": 2.0649945735931396, "learning_rate": 0.0002, "loss": 2.4726, "step": 9650 }, { "epoch": 0.7198211624441133, "grad_norm": 2.463927984237671, "learning_rate": 0.0002, "loss": 2.437, "step": 9660 }, { "epoch": 0.7205663189269746, "grad_norm": 2.189600706100464, "learning_rate": 0.0002, "loss": 2.6419, "step": 9670 }, { "epoch": 0.7213114754098361, "grad_norm": 1.8361003398895264, "learning_rate": 0.0002, "loss": 2.3392, "step": 9680 }, { "epoch": 0.7220566318926974, "grad_norm": 2.4830501079559326, "learning_rate": 0.0002, "loss": 2.6257, "step": 9690 }, { "epoch": 0.7228017883755589, "grad_norm": 2.311711072921753, "learning_rate": 0.0002, "loss": 2.4023, "step": 9700 }, { "epoch": 0.7235469448584203, "grad_norm": 1.9280378818511963, "learning_rate": 0.0002, "loss": 2.6589, "step": 9710 }, { "epoch": 0.7242921013412816, "grad_norm": 2.1411705017089844, "learning_rate": 0.0002, "loss": 2.539, "step": 9720 }, { "epoch": 0.7250372578241431, "grad_norm": 2.262427568435669, "learning_rate": 0.0002, "loss": 2.3129, "step": 9730 }, { "epoch": 0.7257824143070045, "grad_norm": 2.005398988723755, "learning_rate": 0.0002, "loss": 2.4774, "step": 9740 }, { "epoch": 0.7265275707898659, "grad_norm": 2.4369115829467773, "learning_rate": 0.0002, "loss": 2.5077, "step": 9750 }, { "epoch": 0.7272727272727273, "grad_norm": 2.5426080226898193, "learning_rate": 0.0002, "loss": 2.4542, "step": 9760 }, { "epoch": 0.7280178837555886, "grad_norm": 2.222259044647217, "learning_rate": 0.0002, "loss": 2.5733, "step": 9770 }, { "epoch": 0.7287630402384501, "grad_norm": 3.0009191036224365, "learning_rate": 0.0002, "loss": 2.5769, "step": 9780 }, { "epoch": 0.7295081967213115, "grad_norm": 2.354903221130371, "learning_rate": 0.0002, "loss": 2.4887, "step": 9790 }, { "epoch": 0.7302533532041728, "grad_norm": 2.4170987606048584, "learning_rate": 0.0002, "loss": 2.3048, "step": 9800 }, { "epoch": 0.7309985096870343, "grad_norm": 2.6301980018615723, "learning_rate": 0.0002, "loss": 2.2994, "step": 9810 }, { "epoch": 0.7317436661698957, "grad_norm": 1.6262503862380981, "learning_rate": 0.0002, "loss": 2.0802, "step": 9820 }, { "epoch": 0.732488822652757, "grad_norm": 2.165588855743408, "learning_rate": 0.0002, "loss": 2.5009, "step": 9830 }, { "epoch": 0.7332339791356185, "grad_norm": 2.3280584812164307, "learning_rate": 0.0002, "loss": 2.5506, "step": 9840 }, { "epoch": 0.7339791356184798, "grad_norm": 2.505038261413574, "learning_rate": 0.0002, "loss": 2.4491, "step": 9850 }, { "epoch": 0.7347242921013413, "grad_norm": 2.5628268718719482, "learning_rate": 0.0002, "loss": 2.5678, "step": 9860 }, { "epoch": 0.7354694485842027, "grad_norm": 2.371814489364624, "learning_rate": 0.0002, "loss": 2.4223, "step": 9870 }, { "epoch": 0.736214605067064, "grad_norm": 1.9160370826721191, "learning_rate": 0.0002, "loss": 2.5315, "step": 9880 }, { "epoch": 0.7369597615499255, "grad_norm": 2.015497922897339, "learning_rate": 0.0002, "loss": 2.6611, "step": 9890 }, { "epoch": 0.7377049180327869, "grad_norm": 2.402764081954956, "learning_rate": 0.0002, "loss": 2.5224, "step": 9900 }, { "epoch": 0.7384500745156483, "grad_norm": 2.2813656330108643, "learning_rate": 0.0002, "loss": 2.6005, "step": 9910 }, { "epoch": 0.7391952309985097, "grad_norm": 2.1747665405273438, "learning_rate": 0.0002, "loss": 2.6124, "step": 9920 }, { "epoch": 0.7399403874813711, "grad_norm": 2.8765082359313965, "learning_rate": 0.0002, "loss": 2.4271, "step": 9930 }, { "epoch": 0.7406855439642325, "grad_norm": 2.9332666397094727, "learning_rate": 0.0002, "loss": 2.5666, "step": 9940 }, { "epoch": 0.7414307004470939, "grad_norm": 2.209160566329956, "learning_rate": 0.0002, "loss": 2.5214, "step": 9950 }, { "epoch": 0.7421758569299552, "grad_norm": 2.141798734664917, "learning_rate": 0.0002, "loss": 2.5629, "step": 9960 }, { "epoch": 0.7429210134128167, "grad_norm": 2.4280612468719482, "learning_rate": 0.0002, "loss": 2.6251, "step": 9970 }, { "epoch": 0.7436661698956781, "grad_norm": 2.588738441467285, "learning_rate": 0.0002, "loss": 2.5636, "step": 9980 }, { "epoch": 0.7444113263785395, "grad_norm": 2.423440456390381, "learning_rate": 0.0002, "loss": 2.4463, "step": 9990 }, { "epoch": 0.7451564828614009, "grad_norm": 2.3735451698303223, "learning_rate": 0.0002, "loss": 2.2597, "step": 10000 }, { "epoch": 0.7459016393442623, "grad_norm": 2.585657835006714, "learning_rate": 0.0002, "loss": 2.5384, "step": 10010 }, { "epoch": 0.7466467958271237, "grad_norm": 2.6739962100982666, "learning_rate": 0.0002, "loss": 2.413, "step": 10020 }, { "epoch": 0.7473919523099851, "grad_norm": 2.564932346343994, "learning_rate": 0.0002, "loss": 2.4084, "step": 10030 }, { "epoch": 0.7481371087928465, "grad_norm": 2.0360469818115234, "learning_rate": 0.0002, "loss": 2.5756, "step": 10040 }, { "epoch": 0.7488822652757079, "grad_norm": 2.26521897315979, "learning_rate": 0.0002, "loss": 2.3981, "step": 10050 }, { "epoch": 0.7496274217585693, "grad_norm": 2.3003921508789062, "learning_rate": 0.0002, "loss": 2.599, "step": 10060 }, { "epoch": 0.7503725782414307, "grad_norm": 2.3010787963867188, "learning_rate": 0.0002, "loss": 2.6884, "step": 10070 }, { "epoch": 0.7511177347242921, "grad_norm": 2.4945406913757324, "learning_rate": 0.0002, "loss": 2.6397, "step": 10080 }, { "epoch": 0.7518628912071535, "grad_norm": 2.391580104827881, "learning_rate": 0.0002, "loss": 2.4918, "step": 10090 }, { "epoch": 0.7526080476900149, "grad_norm": 2.094149589538574, "learning_rate": 0.0002, "loss": 2.5465, "step": 10100 }, { "epoch": 0.7533532041728763, "grad_norm": 2.440086603164673, "learning_rate": 0.0002, "loss": 2.665, "step": 10110 }, { "epoch": 0.7540983606557377, "grad_norm": 2.482935905456543, "learning_rate": 0.0002, "loss": 2.5492, "step": 10120 }, { "epoch": 0.7548435171385991, "grad_norm": 2.4048640727996826, "learning_rate": 0.0002, "loss": 2.2824, "step": 10130 }, { "epoch": 0.7555886736214605, "grad_norm": 2.5780625343322754, "learning_rate": 0.0002, "loss": 2.5935, "step": 10140 }, { "epoch": 0.7563338301043219, "grad_norm": 1.9736360311508179, "learning_rate": 0.0002, "loss": 2.6113, "step": 10150 }, { "epoch": 0.7570789865871833, "grad_norm": 2.432325839996338, "learning_rate": 0.0002, "loss": 2.5351, "step": 10160 }, { "epoch": 0.7578241430700448, "grad_norm": 1.9205713272094727, "learning_rate": 0.0002, "loss": 2.4855, "step": 10170 }, { "epoch": 0.7585692995529061, "grad_norm": 2.043088436126709, "learning_rate": 0.0002, "loss": 2.5828, "step": 10180 }, { "epoch": 0.7593144560357675, "grad_norm": 3.1941723823547363, "learning_rate": 0.0002, "loss": 2.6751, "step": 10190 }, { "epoch": 0.7600596125186289, "grad_norm": 2.210202693939209, "learning_rate": 0.0002, "loss": 2.4979, "step": 10200 }, { "epoch": 0.7608047690014903, "grad_norm": 2.0645289421081543, "learning_rate": 0.0002, "loss": 2.5, "step": 10210 }, { "epoch": 0.7615499254843517, "grad_norm": 2.4983456134796143, "learning_rate": 0.0002, "loss": 2.501, "step": 10220 }, { "epoch": 0.7622950819672131, "grad_norm": 2.4388515949249268, "learning_rate": 0.0002, "loss": 2.5407, "step": 10230 }, { "epoch": 0.7630402384500745, "grad_norm": 1.960217833518982, "learning_rate": 0.0002, "loss": 2.3579, "step": 10240 }, { "epoch": 0.763785394932936, "grad_norm": 2.206451177597046, "learning_rate": 0.0002, "loss": 2.5991, "step": 10250 }, { "epoch": 0.7645305514157973, "grad_norm": 3.0600955486297607, "learning_rate": 0.0002, "loss": 2.534, "step": 10260 }, { "epoch": 0.7652757078986587, "grad_norm": 2.1760904788970947, "learning_rate": 0.0002, "loss": 2.5936, "step": 10270 }, { "epoch": 0.7660208643815202, "grad_norm": 2.1374666690826416, "learning_rate": 0.0002, "loss": 2.3698, "step": 10280 }, { "epoch": 0.7667660208643815, "grad_norm": 1.8994402885437012, "learning_rate": 0.0002, "loss": 2.5065, "step": 10290 }, { "epoch": 0.767511177347243, "grad_norm": 1.7293874025344849, "learning_rate": 0.0002, "loss": 2.5013, "step": 10300 }, { "epoch": 0.7682563338301043, "grad_norm": 2.463646650314331, "learning_rate": 0.0002, "loss": 2.606, "step": 10310 }, { "epoch": 0.7690014903129657, "grad_norm": 2.521929979324341, "learning_rate": 0.0002, "loss": 2.3497, "step": 10320 }, { "epoch": 0.7697466467958272, "grad_norm": 2.0710620880126953, "learning_rate": 0.0002, "loss": 2.3254, "step": 10330 }, { "epoch": 0.7704918032786885, "grad_norm": 2.2939655780792236, "learning_rate": 0.0002, "loss": 2.5636, "step": 10340 }, { "epoch": 0.7712369597615499, "grad_norm": 2.3778505325317383, "learning_rate": 0.0002, "loss": 2.426, "step": 10350 }, { "epoch": 0.7719821162444114, "grad_norm": 2.2639496326446533, "learning_rate": 0.0002, "loss": 2.5813, "step": 10360 }, { "epoch": 0.7727272727272727, "grad_norm": 1.964016318321228, "learning_rate": 0.0002, "loss": 2.554, "step": 10370 }, { "epoch": 0.7734724292101341, "grad_norm": 2.1532862186431885, "learning_rate": 0.0002, "loss": 2.4438, "step": 10380 }, { "epoch": 0.7742175856929955, "grad_norm": 2.354395627975464, "learning_rate": 0.0002, "loss": 2.6894, "step": 10390 }, { "epoch": 0.7749627421758569, "grad_norm": 2.0978426933288574, "learning_rate": 0.0002, "loss": 2.5408, "step": 10400 }, { "epoch": 0.7757078986587184, "grad_norm": 2.2250595092773438, "learning_rate": 0.0002, "loss": 2.4457, "step": 10410 }, { "epoch": 0.7764530551415797, "grad_norm": 2.65061354637146, "learning_rate": 0.0002, "loss": 2.3338, "step": 10420 }, { "epoch": 0.7771982116244411, "grad_norm": 2.519925117492676, "learning_rate": 0.0002, "loss": 2.6637, "step": 10430 }, { "epoch": 0.7779433681073026, "grad_norm": 1.8276631832122803, "learning_rate": 0.0002, "loss": 2.4743, "step": 10440 }, { "epoch": 0.7786885245901639, "grad_norm": 2.745326042175293, "learning_rate": 0.0002, "loss": 2.4987, "step": 10450 }, { "epoch": 0.7794336810730254, "grad_norm": 2.3664751052856445, "learning_rate": 0.0002, "loss": 2.6172, "step": 10460 }, { "epoch": 0.7801788375558867, "grad_norm": 2.136486768722534, "learning_rate": 0.0002, "loss": 2.581, "step": 10470 }, { "epoch": 0.7809239940387481, "grad_norm": 2.13596773147583, "learning_rate": 0.0002, "loss": 2.4819, "step": 10480 }, { "epoch": 0.7816691505216096, "grad_norm": 2.0259013175964355, "learning_rate": 0.0002, "loss": 2.4299, "step": 10490 }, { "epoch": 0.7824143070044709, "grad_norm": 2.3508143424987793, "learning_rate": 0.0002, "loss": 2.6114, "step": 10500 }, { "epoch": 0.7831594634873323, "grad_norm": 2.127795457839966, "learning_rate": 0.0002, "loss": 2.5393, "step": 10510 }, { "epoch": 0.7839046199701938, "grad_norm": 2.1637115478515625, "learning_rate": 0.0002, "loss": 2.6723, "step": 10520 }, { "epoch": 0.7846497764530551, "grad_norm": 2.2965123653411865, "learning_rate": 0.0002, "loss": 2.5549, "step": 10530 }, { "epoch": 0.7853949329359166, "grad_norm": 2.3020741939544678, "learning_rate": 0.0002, "loss": 2.5971, "step": 10540 }, { "epoch": 0.786140089418778, "grad_norm": 2.3106722831726074, "learning_rate": 0.0002, "loss": 2.5852, "step": 10550 }, { "epoch": 0.7868852459016393, "grad_norm": 2.332108736038208, "learning_rate": 0.0002, "loss": 2.3608, "step": 10560 }, { "epoch": 0.7876304023845008, "grad_norm": 2.337618589401245, "learning_rate": 0.0002, "loss": 2.3785, "step": 10570 }, { "epoch": 0.7883755588673621, "grad_norm": 2.413151502609253, "learning_rate": 0.0002, "loss": 2.6424, "step": 10580 }, { "epoch": 0.7891207153502235, "grad_norm": 1.9372957944869995, "learning_rate": 0.0002, "loss": 2.5991, "step": 10590 }, { "epoch": 0.789865871833085, "grad_norm": 2.435525417327881, "learning_rate": 0.0002, "loss": 2.5649, "step": 10600 }, { "epoch": 0.7906110283159463, "grad_norm": 2.385913372039795, "learning_rate": 0.0002, "loss": 2.386, "step": 10610 }, { "epoch": 0.7913561847988078, "grad_norm": 2.0109829902648926, "learning_rate": 0.0002, "loss": 2.4575, "step": 10620 }, { "epoch": 0.7921013412816692, "grad_norm": 2.28608775138855, "learning_rate": 0.0002, "loss": 2.5546, "step": 10630 }, { "epoch": 0.7928464977645305, "grad_norm": 2.1904425621032715, "learning_rate": 0.0002, "loss": 2.4409, "step": 10640 }, { "epoch": 0.793591654247392, "grad_norm": 2.351679801940918, "learning_rate": 0.0002, "loss": 2.6908, "step": 10650 }, { "epoch": 0.7943368107302533, "grad_norm": 2.2788383960723877, "learning_rate": 0.0002, "loss": 2.4305, "step": 10660 }, { "epoch": 0.7950819672131147, "grad_norm": 2.0748603343963623, "learning_rate": 0.0002, "loss": 2.2788, "step": 10670 }, { "epoch": 0.7958271236959762, "grad_norm": 2.1148083209991455, "learning_rate": 0.0002, "loss": 2.5214, "step": 10680 }, { "epoch": 0.7965722801788375, "grad_norm": 2.439086675643921, "learning_rate": 0.0002, "loss": 2.572, "step": 10690 }, { "epoch": 0.797317436661699, "grad_norm": 2.302316188812256, "learning_rate": 0.0002, "loss": 2.6505, "step": 10700 }, { "epoch": 0.7980625931445604, "grad_norm": 1.977591633796692, "learning_rate": 0.0002, "loss": 2.6918, "step": 10710 }, { "epoch": 0.7988077496274217, "grad_norm": 2.8533225059509277, "learning_rate": 0.0002, "loss": 2.6504, "step": 10720 }, { "epoch": 0.7995529061102832, "grad_norm": 2.3402719497680664, "learning_rate": 0.0002, "loss": 2.5394, "step": 10730 }, { "epoch": 0.8002980625931445, "grad_norm": 1.8375385999679565, "learning_rate": 0.0002, "loss": 2.4388, "step": 10740 }, { "epoch": 0.801043219076006, "grad_norm": 2.375514030456543, "learning_rate": 0.0002, "loss": 2.2368, "step": 10750 }, { "epoch": 0.8017883755588674, "grad_norm": 2.218656063079834, "learning_rate": 0.0002, "loss": 2.6589, "step": 10760 }, { "epoch": 0.8025335320417287, "grad_norm": 2.560631513595581, "learning_rate": 0.0002, "loss": 2.3786, "step": 10770 }, { "epoch": 0.8032786885245902, "grad_norm": 2.4114229679107666, "learning_rate": 0.0002, "loss": 2.5688, "step": 10780 }, { "epoch": 0.8040238450074516, "grad_norm": 2.948805570602417, "learning_rate": 0.0002, "loss": 2.5296, "step": 10790 }, { "epoch": 0.8047690014903129, "grad_norm": 2.384042501449585, "learning_rate": 0.0002, "loss": 2.6218, "step": 10800 }, { "epoch": 0.8055141579731744, "grad_norm": 2.3185818195343018, "learning_rate": 0.0002, "loss": 2.5289, "step": 10810 }, { "epoch": 0.8062593144560357, "grad_norm": 2.2557380199432373, "learning_rate": 0.0002, "loss": 2.4439, "step": 10820 }, { "epoch": 0.8070044709388972, "grad_norm": 2.0974535942077637, "learning_rate": 0.0002, "loss": 2.4567, "step": 10830 }, { "epoch": 0.8077496274217586, "grad_norm": 2.480273962020874, "learning_rate": 0.0002, "loss": 2.4788, "step": 10840 }, { "epoch": 0.8084947839046199, "grad_norm": 2.4017157554626465, "learning_rate": 0.0002, "loss": 2.4639, "step": 10850 }, { "epoch": 0.8092399403874814, "grad_norm": 2.3682639598846436, "learning_rate": 0.0002, "loss": 2.5533, "step": 10860 }, { "epoch": 0.8099850968703428, "grad_norm": 2.693796157836914, "learning_rate": 0.0002, "loss": 2.4756, "step": 10870 }, { "epoch": 0.8107302533532041, "grad_norm": 2.371288776397705, "learning_rate": 0.0002, "loss": 2.4941, "step": 10880 }, { "epoch": 0.8114754098360656, "grad_norm": 2.4420065879821777, "learning_rate": 0.0002, "loss": 2.5901, "step": 10890 }, { "epoch": 0.812220566318927, "grad_norm": 2.154177665710449, "learning_rate": 0.0002, "loss": 2.4709, "step": 10900 }, { "epoch": 0.8129657228017884, "grad_norm": 2.3035285472869873, "learning_rate": 0.0002, "loss": 2.2832, "step": 10910 }, { "epoch": 0.8137108792846498, "grad_norm": 2.2672736644744873, "learning_rate": 0.0002, "loss": 2.8001, "step": 10920 }, { "epoch": 0.8144560357675111, "grad_norm": 2.507875442504883, "learning_rate": 0.0002, "loss": 2.5625, "step": 10930 }, { "epoch": 0.8152011922503726, "grad_norm": 2.542093276977539, "learning_rate": 0.0002, "loss": 2.5738, "step": 10940 }, { "epoch": 0.815946348733234, "grad_norm": 2.764739990234375, "learning_rate": 0.0002, "loss": 2.6649, "step": 10950 }, { "epoch": 0.8166915052160953, "grad_norm": 2.2417197227478027, "learning_rate": 0.0002, "loss": 2.5701, "step": 10960 }, { "epoch": 0.8174366616989568, "grad_norm": 2.3203530311584473, "learning_rate": 0.0002, "loss": 2.57, "step": 10970 }, { "epoch": 0.8181818181818182, "grad_norm": 2.8699557781219482, "learning_rate": 0.0002, "loss": 2.5023, "step": 10980 }, { "epoch": 0.8189269746646796, "grad_norm": 2.4314422607421875, "learning_rate": 0.0002, "loss": 2.5016, "step": 10990 }, { "epoch": 0.819672131147541, "grad_norm": 2.330038547515869, "learning_rate": 0.0002, "loss": 2.3436, "step": 11000 }, { "epoch": 0.8204172876304023, "grad_norm": 2.177577257156372, "learning_rate": 0.0002, "loss": 2.5652, "step": 11010 }, { "epoch": 0.8211624441132638, "grad_norm": 1.8706187009811401, "learning_rate": 0.0002, "loss": 2.7162, "step": 11020 }, { "epoch": 0.8219076005961252, "grad_norm": 2.2145588397979736, "learning_rate": 0.0002, "loss": 2.5284, "step": 11030 }, { "epoch": 0.8226527570789866, "grad_norm": 2.344332456588745, "learning_rate": 0.0002, "loss": 2.6446, "step": 11040 }, { "epoch": 0.823397913561848, "grad_norm": 2.391944646835327, "learning_rate": 0.0002, "loss": 2.5797, "step": 11050 }, { "epoch": 0.8241430700447094, "grad_norm": 2.2976560592651367, "learning_rate": 0.0002, "loss": 2.3885, "step": 11060 }, { "epoch": 0.8248882265275708, "grad_norm": 2.0775372982025146, "learning_rate": 0.0002, "loss": 2.5791, "step": 11070 }, { "epoch": 0.8256333830104322, "grad_norm": 2.264828681945801, "learning_rate": 0.0002, "loss": 2.6107, "step": 11080 }, { "epoch": 0.8263785394932935, "grad_norm": 2.1409361362457275, "learning_rate": 0.0002, "loss": 2.5045, "step": 11090 }, { "epoch": 0.827123695976155, "grad_norm": 2.565410614013672, "learning_rate": 0.0002, "loss": 2.5521, "step": 11100 }, { "epoch": 0.8278688524590164, "grad_norm": 2.375131607055664, "learning_rate": 0.0002, "loss": 2.3359, "step": 11110 }, { "epoch": 0.8286140089418778, "grad_norm": 2.2293524742126465, "learning_rate": 0.0002, "loss": 2.3675, "step": 11120 }, { "epoch": 0.8293591654247392, "grad_norm": 2.4063050746917725, "learning_rate": 0.0002, "loss": 2.4713, "step": 11130 }, { "epoch": 0.8301043219076006, "grad_norm": 2.408867597579956, "learning_rate": 0.0002, "loss": 2.5096, "step": 11140 }, { "epoch": 0.830849478390462, "grad_norm": 2.5441880226135254, "learning_rate": 0.0002, "loss": 2.5106, "step": 11150 }, { "epoch": 0.8315946348733234, "grad_norm": 2.539547920227051, "learning_rate": 0.0002, "loss": 2.4346, "step": 11160 }, { "epoch": 0.8323397913561847, "grad_norm": 2.414623260498047, "learning_rate": 0.0002, "loss": 2.5962, "step": 11170 }, { "epoch": 0.8330849478390462, "grad_norm": 2.219356060028076, "learning_rate": 0.0002, "loss": 2.5249, "step": 11180 }, { "epoch": 0.8338301043219076, "grad_norm": 2.43898606300354, "learning_rate": 0.0002, "loss": 2.6075, "step": 11190 }, { "epoch": 0.834575260804769, "grad_norm": 2.644263744354248, "learning_rate": 0.0002, "loss": 2.5456, "step": 11200 }, { "epoch": 0.8353204172876304, "grad_norm": 2.2671680450439453, "learning_rate": 0.0002, "loss": 2.4596, "step": 11210 }, { "epoch": 0.8360655737704918, "grad_norm": 2.3897671699523926, "learning_rate": 0.0002, "loss": 2.5104, "step": 11220 }, { "epoch": 0.8368107302533532, "grad_norm": 2.2913897037506104, "learning_rate": 0.0002, "loss": 2.6487, "step": 11230 }, { "epoch": 0.8375558867362146, "grad_norm": 2.287472724914551, "learning_rate": 0.0002, "loss": 2.6213, "step": 11240 }, { "epoch": 0.8383010432190761, "grad_norm": 2.184960126876831, "learning_rate": 0.0002, "loss": 2.559, "step": 11250 }, { "epoch": 0.8390461997019374, "grad_norm": 2.3212270736694336, "learning_rate": 0.0002, "loss": 2.4804, "step": 11260 }, { "epoch": 0.8397913561847988, "grad_norm": 2.361088991165161, "learning_rate": 0.0002, "loss": 2.7022, "step": 11270 }, { "epoch": 0.8405365126676602, "grad_norm": 2.6766278743743896, "learning_rate": 0.0002, "loss": 2.5002, "step": 11280 }, { "epoch": 0.8412816691505216, "grad_norm": 2.4925765991210938, "learning_rate": 0.0002, "loss": 2.5971, "step": 11290 }, { "epoch": 0.842026825633383, "grad_norm": 2.4175381660461426, "learning_rate": 0.0002, "loss": 2.5388, "step": 11300 }, { "epoch": 0.8427719821162444, "grad_norm": 2.3096563816070557, "learning_rate": 0.0002, "loss": 2.6924, "step": 11310 }, { "epoch": 0.8435171385991058, "grad_norm": 2.5850837230682373, "learning_rate": 0.0002, "loss": 2.5236, "step": 11320 }, { "epoch": 0.8442622950819673, "grad_norm": 2.491636276245117, "learning_rate": 0.0002, "loss": 2.3833, "step": 11330 }, { "epoch": 0.8450074515648286, "grad_norm": 2.627934455871582, "learning_rate": 0.0002, "loss": 2.4309, "step": 11340 }, { "epoch": 0.84575260804769, "grad_norm": 2.562291383743286, "learning_rate": 0.0002, "loss": 2.6193, "step": 11350 }, { "epoch": 0.8464977645305514, "grad_norm": 2.1559548377990723, "learning_rate": 0.0002, "loss": 2.3262, "step": 11360 }, { "epoch": 0.8472429210134128, "grad_norm": 2.06011962890625, "learning_rate": 0.0002, "loss": 2.4812, "step": 11370 }, { "epoch": 0.8479880774962743, "grad_norm": 1.6976170539855957, "learning_rate": 0.0002, "loss": 2.482, "step": 11380 }, { "epoch": 0.8487332339791356, "grad_norm": 2.2001445293426514, "learning_rate": 0.0002, "loss": 2.6044, "step": 11390 }, { "epoch": 0.849478390461997, "grad_norm": 2.1249303817749023, "learning_rate": 0.0002, "loss": 2.395, "step": 11400 }, { "epoch": 0.8502235469448585, "grad_norm": 2.477747678756714, "learning_rate": 0.0002, "loss": 2.3953, "step": 11410 }, { "epoch": 0.8509687034277198, "grad_norm": 2.436199426651001, "learning_rate": 0.0002, "loss": 2.535, "step": 11420 }, { "epoch": 0.8517138599105812, "grad_norm": 2.53760027885437, "learning_rate": 0.0002, "loss": 2.4221, "step": 11430 }, { "epoch": 0.8524590163934426, "grad_norm": 2.1996359825134277, "learning_rate": 0.0002, "loss": 2.3221, "step": 11440 }, { "epoch": 0.853204172876304, "grad_norm": 2.1827547550201416, "learning_rate": 0.0002, "loss": 2.4165, "step": 11450 }, { "epoch": 0.8539493293591655, "grad_norm": 2.6674787998199463, "learning_rate": 0.0002, "loss": 2.524, "step": 11460 }, { "epoch": 0.8546944858420268, "grad_norm": 2.6001546382904053, "learning_rate": 0.0002, "loss": 2.6605, "step": 11470 }, { "epoch": 0.8554396423248882, "grad_norm": 2.287266254425049, "learning_rate": 0.0002, "loss": 2.6006, "step": 11480 }, { "epoch": 0.8561847988077497, "grad_norm": 2.252857208251953, "learning_rate": 0.0002, "loss": 2.544, "step": 11490 }, { "epoch": 0.856929955290611, "grad_norm": 2.273512601852417, "learning_rate": 0.0002, "loss": 2.5486, "step": 11500 }, { "epoch": 0.8576751117734724, "grad_norm": 2.7023732662200928, "learning_rate": 0.0002, "loss": 2.592, "step": 11510 }, { "epoch": 0.8584202682563339, "grad_norm": 2.303313970565796, "learning_rate": 0.0002, "loss": 2.5199, "step": 11520 }, { "epoch": 0.8591654247391952, "grad_norm": 2.4638290405273438, "learning_rate": 0.0002, "loss": 2.5568, "step": 11530 }, { "epoch": 0.8599105812220567, "grad_norm": 2.451723337173462, "learning_rate": 0.0002, "loss": 2.6141, "step": 11540 }, { "epoch": 0.860655737704918, "grad_norm": 2.3285202980041504, "learning_rate": 0.0002, "loss": 2.5687, "step": 11550 }, { "epoch": 0.8614008941877794, "grad_norm": 2.085536241531372, "learning_rate": 0.0002, "loss": 2.6193, "step": 11560 }, { "epoch": 0.8621460506706409, "grad_norm": 2.5319440364837646, "learning_rate": 0.0002, "loss": 2.5323, "step": 11570 }, { "epoch": 0.8628912071535022, "grad_norm": 2.246924638748169, "learning_rate": 0.0002, "loss": 2.4782, "step": 11580 }, { "epoch": 0.8636363636363636, "grad_norm": 2.1741697788238525, "learning_rate": 0.0002, "loss": 2.4644, "step": 11590 }, { "epoch": 0.8643815201192251, "grad_norm": 2.434746265411377, "learning_rate": 0.0002, "loss": 2.3757, "step": 11600 }, { "epoch": 0.8651266766020864, "grad_norm": 2.406317710876465, "learning_rate": 0.0002, "loss": 2.5596, "step": 11610 }, { "epoch": 0.8658718330849479, "grad_norm": 2.0323896408081055, "learning_rate": 0.0002, "loss": 2.491, "step": 11620 }, { "epoch": 0.8666169895678092, "grad_norm": 1.8637727499008179, "learning_rate": 0.0002, "loss": 2.5684, "step": 11630 }, { "epoch": 0.8673621460506706, "grad_norm": 2.324962854385376, "learning_rate": 0.0002, "loss": 2.5031, "step": 11640 }, { "epoch": 0.8681073025335321, "grad_norm": 2.261016607284546, "learning_rate": 0.0002, "loss": 2.4632, "step": 11650 }, { "epoch": 0.8688524590163934, "grad_norm": 2.4027981758117676, "learning_rate": 0.0002, "loss": 2.5733, "step": 11660 }, { "epoch": 0.8695976154992549, "grad_norm": 2.2393276691436768, "learning_rate": 0.0002, "loss": 2.4513, "step": 11670 }, { "epoch": 0.8703427719821163, "grad_norm": 2.2299957275390625, "learning_rate": 0.0002, "loss": 2.4286, "step": 11680 }, { "epoch": 0.8710879284649776, "grad_norm": 3.5715925693511963, "learning_rate": 0.0002, "loss": 2.6196, "step": 11690 }, { "epoch": 0.8718330849478391, "grad_norm": 2.3358867168426514, "learning_rate": 0.0002, "loss": 2.6385, "step": 11700 }, { "epoch": 0.8725782414307004, "grad_norm": 2.237659454345703, "learning_rate": 0.0002, "loss": 2.5753, "step": 11710 }, { "epoch": 0.8733233979135618, "grad_norm": 2.2841837406158447, "learning_rate": 0.0002, "loss": 2.6415, "step": 11720 }, { "epoch": 0.8740685543964233, "grad_norm": 2.559757709503174, "learning_rate": 0.0002, "loss": 2.5872, "step": 11730 }, { "epoch": 0.8748137108792846, "grad_norm": 2.5326175689697266, "learning_rate": 0.0002, "loss": 2.4485, "step": 11740 }, { "epoch": 0.875558867362146, "grad_norm": 2.234788656234741, "learning_rate": 0.0002, "loss": 2.2943, "step": 11750 }, { "epoch": 0.8763040238450075, "grad_norm": 2.4031131267547607, "learning_rate": 0.0002, "loss": 2.5176, "step": 11760 }, { "epoch": 0.8770491803278688, "grad_norm": 2.832005023956299, "learning_rate": 0.0002, "loss": 2.5063, "step": 11770 }, { "epoch": 0.8777943368107303, "grad_norm": 2.0008394718170166, "learning_rate": 0.0002, "loss": 2.5267, "step": 11780 }, { "epoch": 0.8785394932935916, "grad_norm": 2.0909340381622314, "learning_rate": 0.0002, "loss": 2.5573, "step": 11790 }, { "epoch": 0.879284649776453, "grad_norm": 2.437964677810669, "learning_rate": 0.0002, "loss": 2.6744, "step": 11800 }, { "epoch": 0.8800298062593145, "grad_norm": 2.1106114387512207, "learning_rate": 0.0002, "loss": 2.5049, "step": 11810 }, { "epoch": 0.8807749627421758, "grad_norm": 2.672287940979004, "learning_rate": 0.0002, "loss": 2.7222, "step": 11820 }, { "epoch": 0.8815201192250373, "grad_norm": 2.584033727645874, "learning_rate": 0.0002, "loss": 2.5348, "step": 11830 }, { "epoch": 0.8822652757078987, "grad_norm": 2.440488576889038, "learning_rate": 0.0002, "loss": 2.4024, "step": 11840 }, { "epoch": 0.88301043219076, "grad_norm": 2.302765130996704, "learning_rate": 0.0002, "loss": 2.441, "step": 11850 }, { "epoch": 0.8837555886736215, "grad_norm": 3.5020196437835693, "learning_rate": 0.0002, "loss": 2.5192, "step": 11860 }, { "epoch": 0.8845007451564829, "grad_norm": 1.8765350580215454, "learning_rate": 0.0002, "loss": 2.5156, "step": 11870 }, { "epoch": 0.8852459016393442, "grad_norm": 2.4863150119781494, "learning_rate": 0.0002, "loss": 2.5239, "step": 11880 }, { "epoch": 0.8859910581222057, "grad_norm": 2.492176055908203, "learning_rate": 0.0002, "loss": 2.5697, "step": 11890 }, { "epoch": 0.886736214605067, "grad_norm": 3.2195818424224854, "learning_rate": 0.0002, "loss": 2.6402, "step": 11900 }, { "epoch": 0.8874813710879285, "grad_norm": 2.2776970863342285, "learning_rate": 0.0002, "loss": 2.4884, "step": 11910 }, { "epoch": 0.8882265275707899, "grad_norm": 2.553642749786377, "learning_rate": 0.0002, "loss": 2.6425, "step": 11920 }, { "epoch": 0.8889716840536512, "grad_norm": 2.269604206085205, "learning_rate": 0.0002, "loss": 2.5291, "step": 11930 }, { "epoch": 0.8897168405365127, "grad_norm": 2.3157992362976074, "learning_rate": 0.0002, "loss": 2.7461, "step": 11940 }, { "epoch": 0.8904619970193741, "grad_norm": 2.3829448223114014, "learning_rate": 0.0002, "loss": 2.6959, "step": 11950 }, { "epoch": 0.8912071535022354, "grad_norm": 2.368562698364258, "learning_rate": 0.0002, "loss": 2.3614, "step": 11960 }, { "epoch": 0.8919523099850969, "grad_norm": 2.5107600688934326, "learning_rate": 0.0002, "loss": 2.5187, "step": 11970 }, { "epoch": 0.8926974664679582, "grad_norm": 2.376817226409912, "learning_rate": 0.0002, "loss": 2.5952, "step": 11980 }, { "epoch": 0.8934426229508197, "grad_norm": 2.224245071411133, "learning_rate": 0.0002, "loss": 2.4105, "step": 11990 }, { "epoch": 0.8941877794336811, "grad_norm": 2.118123769760132, "learning_rate": 0.0002, "loss": 2.442, "step": 12000 }, { "epoch": 0.8949329359165424, "grad_norm": 2.6127233505249023, "learning_rate": 0.0002, "loss": 2.3396, "step": 12010 }, { "epoch": 0.8956780923994039, "grad_norm": 2.319699287414551, "learning_rate": 0.0002, "loss": 2.5345, "step": 12020 }, { "epoch": 0.8964232488822653, "grad_norm": 2.394341468811035, "learning_rate": 0.0002, "loss": 2.6386, "step": 12030 }, { "epoch": 0.8971684053651267, "grad_norm": 1.9906131029129028, "learning_rate": 0.0002, "loss": 2.3783, "step": 12040 }, { "epoch": 0.8979135618479881, "grad_norm": 2.038069009780884, "learning_rate": 0.0002, "loss": 2.4852, "step": 12050 }, { "epoch": 0.8986587183308494, "grad_norm": 2.18753981590271, "learning_rate": 0.0002, "loss": 2.3601, "step": 12060 }, { "epoch": 0.8994038748137109, "grad_norm": 2.302685499191284, "learning_rate": 0.0002, "loss": 2.4762, "step": 12070 }, { "epoch": 0.9001490312965723, "grad_norm": 2.177448272705078, "learning_rate": 0.0002, "loss": 2.3813, "step": 12080 }, { "epoch": 0.9008941877794336, "grad_norm": 2.6771061420440674, "learning_rate": 0.0002, "loss": 2.5623, "step": 12090 }, { "epoch": 0.9016393442622951, "grad_norm": 2.364640712738037, "learning_rate": 0.0002, "loss": 2.4862, "step": 12100 }, { "epoch": 0.9023845007451565, "grad_norm": 2.2396554946899414, "learning_rate": 0.0002, "loss": 2.3349, "step": 12110 }, { "epoch": 0.9031296572280179, "grad_norm": 2.5424320697784424, "learning_rate": 0.0002, "loss": 2.5552, "step": 12120 }, { "epoch": 0.9038748137108793, "grad_norm": 2.2599573135375977, "learning_rate": 0.0002, "loss": 2.4783, "step": 12130 }, { "epoch": 0.9046199701937406, "grad_norm": 2.3719987869262695, "learning_rate": 0.0002, "loss": 2.3134, "step": 12140 }, { "epoch": 0.9053651266766021, "grad_norm": 2.3063857555389404, "learning_rate": 0.0002, "loss": 2.375, "step": 12150 }, { "epoch": 0.9061102831594635, "grad_norm": 2.3336477279663086, "learning_rate": 0.0002, "loss": 2.5508, "step": 12160 }, { "epoch": 0.9068554396423248, "grad_norm": 2.325166940689087, "learning_rate": 0.0002, "loss": 2.5011, "step": 12170 }, { "epoch": 0.9076005961251863, "grad_norm": 2.2718868255615234, "learning_rate": 0.0002, "loss": 2.6404, "step": 12180 }, { "epoch": 0.9083457526080477, "grad_norm": 2.607816696166992, "learning_rate": 0.0002, "loss": 2.5262, "step": 12190 }, { "epoch": 0.9090909090909091, "grad_norm": 2.1004855632781982, "learning_rate": 0.0002, "loss": 2.5023, "step": 12200 }, { "epoch": 0.9098360655737705, "grad_norm": 2.336148977279663, "learning_rate": 0.0002, "loss": 2.6659, "step": 12210 }, { "epoch": 0.910581222056632, "grad_norm": 2.531151294708252, "learning_rate": 0.0002, "loss": 2.7188, "step": 12220 }, { "epoch": 0.9113263785394933, "grad_norm": 2.559283494949341, "learning_rate": 0.0002, "loss": 2.6394, "step": 12230 }, { "epoch": 0.9120715350223547, "grad_norm": 2.272649049758911, "learning_rate": 0.0002, "loss": 2.6711, "step": 12240 }, { "epoch": 0.912816691505216, "grad_norm": 2.356740713119507, "learning_rate": 0.0002, "loss": 2.6009, "step": 12250 }, { "epoch": 0.9135618479880775, "grad_norm": 2.4409749507904053, "learning_rate": 0.0002, "loss": 2.6274, "step": 12260 }, { "epoch": 0.9143070044709389, "grad_norm": 2.7427172660827637, "learning_rate": 0.0002, "loss": 2.7499, "step": 12270 }, { "epoch": 0.9150521609538003, "grad_norm": 2.309023380279541, "learning_rate": 0.0002, "loss": 2.3997, "step": 12280 }, { "epoch": 0.9157973174366617, "grad_norm": 2.1871066093444824, "learning_rate": 0.0002, "loss": 2.6141, "step": 12290 }, { "epoch": 0.9165424739195231, "grad_norm": 2.556929588317871, "learning_rate": 0.0002, "loss": 2.4421, "step": 12300 }, { "epoch": 0.9172876304023845, "grad_norm": 2.5296781063079834, "learning_rate": 0.0002, "loss": 2.5385, "step": 12310 }, { "epoch": 0.9180327868852459, "grad_norm": 2.4364492893218994, "learning_rate": 0.0002, "loss": 2.5862, "step": 12320 }, { "epoch": 0.9187779433681073, "grad_norm": 2.1894443035125732, "learning_rate": 0.0002, "loss": 2.5224, "step": 12330 }, { "epoch": 0.9195230998509687, "grad_norm": 2.3544352054595947, "learning_rate": 0.0002, "loss": 2.4424, "step": 12340 }, { "epoch": 0.9202682563338301, "grad_norm": 2.2780649662017822, "learning_rate": 0.0002, "loss": 2.4522, "step": 12350 }, { "epoch": 0.9210134128166915, "grad_norm": 2.3933236598968506, "learning_rate": 0.0002, "loss": 2.4272, "step": 12360 }, { "epoch": 0.9217585692995529, "grad_norm": 2.260446071624756, "learning_rate": 0.0002, "loss": 2.4688, "step": 12370 }, { "epoch": 0.9225037257824144, "grad_norm": 2.4620795249938965, "learning_rate": 0.0002, "loss": 2.1951, "step": 12380 }, { "epoch": 0.9232488822652757, "grad_norm": 2.4303131103515625, "learning_rate": 0.0002, "loss": 2.2946, "step": 12390 }, { "epoch": 0.9239940387481371, "grad_norm": 2.8419077396392822, "learning_rate": 0.0002, "loss": 2.4692, "step": 12400 }, { "epoch": 0.9247391952309985, "grad_norm": 2.9645135402679443, "learning_rate": 0.0002, "loss": 2.6461, "step": 12410 }, { "epoch": 0.9254843517138599, "grad_norm": 2.391739845275879, "learning_rate": 0.0002, "loss": 2.5335, "step": 12420 }, { "epoch": 0.9262295081967213, "grad_norm": 2.686976194381714, "learning_rate": 0.0002, "loss": 2.6333, "step": 12430 }, { "epoch": 0.9269746646795827, "grad_norm": 2.723388195037842, "learning_rate": 0.0002, "loss": 2.354, "step": 12440 }, { "epoch": 0.9277198211624441, "grad_norm": 2.4658613204956055, "learning_rate": 0.0002, "loss": 2.5475, "step": 12450 }, { "epoch": 0.9284649776453056, "grad_norm": 2.6882481575012207, "learning_rate": 0.0002, "loss": 2.3591, "step": 12460 }, { "epoch": 0.9292101341281669, "grad_norm": 2.646805763244629, "learning_rate": 0.0002, "loss": 2.4765, "step": 12470 }, { "epoch": 0.9299552906110283, "grad_norm": 2.4550538063049316, "learning_rate": 0.0002, "loss": 2.6768, "step": 12480 }, { "epoch": 0.9307004470938898, "grad_norm": 2.296886920928955, "learning_rate": 0.0002, "loss": 2.6753, "step": 12490 }, { "epoch": 0.9314456035767511, "grad_norm": 2.1057779788970947, "learning_rate": 0.0002, "loss": 2.5129, "step": 12500 }, { "epoch": 0.9321907600596125, "grad_norm": 2.1545591354370117, "learning_rate": 0.0002, "loss": 2.4823, "step": 12510 }, { "epoch": 0.9329359165424739, "grad_norm": 2.354118824005127, "learning_rate": 0.0002, "loss": 2.6566, "step": 12520 }, { "epoch": 0.9336810730253353, "grad_norm": 2.294067859649658, "learning_rate": 0.0002, "loss": 2.5172, "step": 12530 }, { "epoch": 0.9344262295081968, "grad_norm": 2.2037270069122314, "learning_rate": 0.0002, "loss": 2.5489, "step": 12540 }, { "epoch": 0.9351713859910581, "grad_norm": 2.276435136795044, "learning_rate": 0.0002, "loss": 2.3769, "step": 12550 }, { "epoch": 0.9359165424739195, "grad_norm": 2.4285993576049805, "learning_rate": 0.0002, "loss": 2.7328, "step": 12560 }, { "epoch": 0.936661698956781, "grad_norm": 2.661801338195801, "learning_rate": 0.0002, "loss": 2.5124, "step": 12570 }, { "epoch": 0.9374068554396423, "grad_norm": 1.9602450132369995, "learning_rate": 0.0002, "loss": 2.4483, "step": 12580 }, { "epoch": 0.9381520119225037, "grad_norm": 2.1745917797088623, "learning_rate": 0.0002, "loss": 2.4838, "step": 12590 }, { "epoch": 0.9388971684053651, "grad_norm": 2.373079776763916, "learning_rate": 0.0002, "loss": 2.6714, "step": 12600 }, { "epoch": 0.9396423248882265, "grad_norm": 2.567545175552368, "learning_rate": 0.0002, "loss": 2.6517, "step": 12610 }, { "epoch": 0.940387481371088, "grad_norm": 2.426687240600586, "learning_rate": 0.0002, "loss": 2.698, "step": 12620 }, { "epoch": 0.9411326378539493, "grad_norm": 2.2519803047180176, "learning_rate": 0.0002, "loss": 2.3313, "step": 12630 }, { "epoch": 0.9418777943368107, "grad_norm": 2.221134662628174, "learning_rate": 0.0002, "loss": 2.4458, "step": 12640 }, { "epoch": 0.9426229508196722, "grad_norm": 2.3901071548461914, "learning_rate": 0.0002, "loss": 2.6864, "step": 12650 }, { "epoch": 0.9433681073025335, "grad_norm": 2.3478126525878906, "learning_rate": 0.0002, "loss": 2.5446, "step": 12660 }, { "epoch": 0.944113263785395, "grad_norm": 2.271385908126831, "learning_rate": 0.0002, "loss": 2.6798, "step": 12670 }, { "epoch": 0.9448584202682563, "grad_norm": 2.620520830154419, "learning_rate": 0.0002, "loss": 2.5766, "step": 12680 }, { "epoch": 0.9456035767511177, "grad_norm": 2.460322380065918, "learning_rate": 0.0002, "loss": 2.3643, "step": 12690 }, { "epoch": 0.9463487332339792, "grad_norm": 2.526590585708618, "learning_rate": 0.0002, "loss": 2.4779, "step": 12700 }, { "epoch": 0.9470938897168405, "grad_norm": 2.0752880573272705, "learning_rate": 0.0002, "loss": 2.4393, "step": 12710 }, { "epoch": 0.9478390461997019, "grad_norm": 2.8343307971954346, "learning_rate": 0.0002, "loss": 2.5893, "step": 12720 }, { "epoch": 0.9485842026825634, "grad_norm": 2.369196653366089, "learning_rate": 0.0002, "loss": 2.5725, "step": 12730 }, { "epoch": 0.9493293591654247, "grad_norm": 2.3680763244628906, "learning_rate": 0.0002, "loss": 2.3934, "step": 12740 }, { "epoch": 0.9500745156482862, "grad_norm": 2.43011736869812, "learning_rate": 0.0002, "loss": 2.1303, "step": 12750 }, { "epoch": 0.9508196721311475, "grad_norm": 2.4753246307373047, "learning_rate": 0.0002, "loss": 2.6349, "step": 12760 }, { "epoch": 0.9515648286140089, "grad_norm": 2.558833122253418, "learning_rate": 0.0002, "loss": 2.523, "step": 12770 }, { "epoch": 0.9523099850968704, "grad_norm": 2.847932815551758, "learning_rate": 0.0002, "loss": 2.4538, "step": 12780 }, { "epoch": 0.9530551415797317, "grad_norm": 2.3892436027526855, "learning_rate": 0.0002, "loss": 2.3756, "step": 12790 }, { "epoch": 0.9538002980625931, "grad_norm": 2.214956760406494, "learning_rate": 0.0002, "loss": 2.4268, "step": 12800 }, { "epoch": 0.9545454545454546, "grad_norm": 2.3451969623565674, "learning_rate": 0.0002, "loss": 2.423, "step": 12810 }, { "epoch": 0.9552906110283159, "grad_norm": 2.4705963134765625, "learning_rate": 0.0002, "loss": 2.6488, "step": 12820 }, { "epoch": 0.9560357675111774, "grad_norm": 2.7425971031188965, "learning_rate": 0.0002, "loss": 2.5578, "step": 12830 }, { "epoch": 0.9567809239940388, "grad_norm": 2.3511667251586914, "learning_rate": 0.0002, "loss": 2.3301, "step": 12840 }, { "epoch": 0.9575260804769001, "grad_norm": 2.307358980178833, "learning_rate": 0.0002, "loss": 2.4876, "step": 12850 }, { "epoch": 0.9582712369597616, "grad_norm": 2.233081102371216, "learning_rate": 0.0002, "loss": 2.2724, "step": 12860 }, { "epoch": 0.9590163934426229, "grad_norm": 2.7037274837493896, "learning_rate": 0.0002, "loss": 2.3392, "step": 12870 }, { "epoch": 0.9597615499254843, "grad_norm": 2.435098886489868, "learning_rate": 0.0002, "loss": 2.5374, "step": 12880 }, { "epoch": 0.9605067064083458, "grad_norm": 2.4666409492492676, "learning_rate": 0.0002, "loss": 2.5573, "step": 12890 }, { "epoch": 0.9612518628912071, "grad_norm": 2.260859251022339, "learning_rate": 0.0002, "loss": 2.4869, "step": 12900 }, { "epoch": 0.9619970193740686, "grad_norm": 2.661461353302002, "learning_rate": 0.0002, "loss": 2.5166, "step": 12910 }, { "epoch": 0.96274217585693, "grad_norm": 2.374238967895508, "learning_rate": 0.0002, "loss": 2.5812, "step": 12920 }, { "epoch": 0.9634873323397913, "grad_norm": 2.1044247150421143, "learning_rate": 0.0002, "loss": 2.5499, "step": 12930 }, { "epoch": 0.9642324888226528, "grad_norm": 2.3307137489318848, "learning_rate": 0.0002, "loss": 2.6675, "step": 12940 }, { "epoch": 0.9649776453055141, "grad_norm": 2.323369264602661, "learning_rate": 0.0002, "loss": 2.6393, "step": 12950 }, { "epoch": 0.9657228017883756, "grad_norm": 2.198737621307373, "learning_rate": 0.0002, "loss": 2.659, "step": 12960 }, { "epoch": 0.966467958271237, "grad_norm": 2.636990547180176, "learning_rate": 0.0002, "loss": 2.3037, "step": 12970 }, { "epoch": 0.9672131147540983, "grad_norm": 2.4657247066497803, "learning_rate": 0.0002, "loss": 2.4913, "step": 12980 }, { "epoch": 0.9679582712369598, "grad_norm": 2.854710340499878, "learning_rate": 0.0002, "loss": 2.6958, "step": 12990 }, { "epoch": 0.9687034277198212, "grad_norm": 1.9861502647399902, "learning_rate": 0.0002, "loss": 2.4177, "step": 13000 }, { "epoch": 0.9694485842026825, "grad_norm": 2.2618319988250732, "learning_rate": 0.0002, "loss": 2.6304, "step": 13010 }, { "epoch": 0.970193740685544, "grad_norm": 2.253206491470337, "learning_rate": 0.0002, "loss": 2.539, "step": 13020 }, { "epoch": 0.9709388971684053, "grad_norm": 2.4439284801483154, "learning_rate": 0.0002, "loss": 2.6837, "step": 13030 }, { "epoch": 0.9716840536512668, "grad_norm": 2.5577943325042725, "learning_rate": 0.0002, "loss": 2.6352, "step": 13040 }, { "epoch": 0.9724292101341282, "grad_norm": 2.345137596130371, "learning_rate": 0.0002, "loss": 2.5917, "step": 13050 }, { "epoch": 0.9731743666169895, "grad_norm": 2.3082261085510254, "learning_rate": 0.0002, "loss": 2.484, "step": 13060 }, { "epoch": 0.973919523099851, "grad_norm": 2.542104721069336, "learning_rate": 0.0002, "loss": 2.5961, "step": 13070 }, { "epoch": 0.9746646795827124, "grad_norm": 2.420619487762451, "learning_rate": 0.0002, "loss": 2.6367, "step": 13080 }, { "epoch": 0.9754098360655737, "grad_norm": 2.230170488357544, "learning_rate": 0.0002, "loss": 2.4443, "step": 13090 }, { "epoch": 0.9761549925484352, "grad_norm": 2.4202942848205566, "learning_rate": 0.0002, "loss": 2.4689, "step": 13100 }, { "epoch": 0.9769001490312966, "grad_norm": 2.6881895065307617, "learning_rate": 0.0002, "loss": 2.4001, "step": 13110 }, { "epoch": 0.977645305514158, "grad_norm": 1.8320391178131104, "learning_rate": 0.0002, "loss": 2.545, "step": 13120 }, { "epoch": 0.9783904619970194, "grad_norm": 2.4107582569122314, "learning_rate": 0.0002, "loss": 2.5337, "step": 13130 }, { "epoch": 0.9791356184798807, "grad_norm": 2.323265790939331, "learning_rate": 0.0002, "loss": 2.57, "step": 13140 }, { "epoch": 0.9798807749627422, "grad_norm": 2.5477750301361084, "learning_rate": 0.0002, "loss": 2.5769, "step": 13150 }, { "epoch": 0.9806259314456036, "grad_norm": 3.1621835231781006, "learning_rate": 0.0002, "loss": 2.4171, "step": 13160 }, { "epoch": 0.981371087928465, "grad_norm": 2.4050159454345703, "learning_rate": 0.0002, "loss": 2.5969, "step": 13170 }, { "epoch": 0.9821162444113264, "grad_norm": 2.261742353439331, "learning_rate": 0.0002, "loss": 2.5855, "step": 13180 }, { "epoch": 0.9828614008941878, "grad_norm": 2.472062349319458, "learning_rate": 0.0002, "loss": 2.6003, "step": 13190 }, { "epoch": 0.9836065573770492, "grad_norm": 2.328455924987793, "learning_rate": 0.0002, "loss": 2.6398, "step": 13200 }, { "epoch": 0.9843517138599106, "grad_norm": 2.5007076263427734, "learning_rate": 0.0002, "loss": 2.6027, "step": 13210 }, { "epoch": 0.9850968703427719, "grad_norm": 2.1729538440704346, "learning_rate": 0.0002, "loss": 2.5315, "step": 13220 }, { "epoch": 0.9858420268256334, "grad_norm": 2.4826149940490723, "learning_rate": 0.0002, "loss": 2.5213, "step": 13230 }, { "epoch": 0.9865871833084948, "grad_norm": 2.1284587383270264, "learning_rate": 0.0002, "loss": 2.3732, "step": 13240 }, { "epoch": 0.9873323397913562, "grad_norm": 2.260220766067505, "learning_rate": 0.0002, "loss": 2.5361, "step": 13250 }, { "epoch": 0.9880774962742176, "grad_norm": 2.427016019821167, "learning_rate": 0.0002, "loss": 2.7074, "step": 13260 }, { "epoch": 0.988822652757079, "grad_norm": 2.6063551902770996, "learning_rate": 0.0002, "loss": 2.7052, "step": 13270 }, { "epoch": 0.9895678092399404, "grad_norm": 2.5883805751800537, "learning_rate": 0.0002, "loss": 2.6739, "step": 13280 }, { "epoch": 0.9903129657228018, "grad_norm": 2.7127444744110107, "learning_rate": 0.0002, "loss": 2.5796, "step": 13290 }, { "epoch": 0.9910581222056631, "grad_norm": 2.2932395935058594, "learning_rate": 0.0002, "loss": 2.6266, "step": 13300 }, { "epoch": 0.9918032786885246, "grad_norm": 2.1839466094970703, "learning_rate": 0.0002, "loss": 2.4277, "step": 13310 }, { "epoch": 0.992548435171386, "grad_norm": 2.1350860595703125, "learning_rate": 0.0002, "loss": 2.4021, "step": 13320 }, { "epoch": 0.9932935916542474, "grad_norm": 2.821213960647583, "learning_rate": 0.0002, "loss": 2.4104, "step": 13330 }, { "epoch": 0.9940387481371088, "grad_norm": 2.3386242389678955, "learning_rate": 0.0002, "loss": 2.6611, "step": 13340 }, { "epoch": 0.9947839046199702, "grad_norm": 2.477780342102051, "learning_rate": 0.0002, "loss": 2.5641, "step": 13350 }, { "epoch": 0.9955290611028316, "grad_norm": 2.1101603507995605, "learning_rate": 0.0002, "loss": 2.3935, "step": 13360 }, { "epoch": 0.996274217585693, "grad_norm": 2.371098041534424, "learning_rate": 0.0002, "loss": 2.4277, "step": 13370 }, { "epoch": 0.9970193740685543, "grad_norm": 2.4047231674194336, "learning_rate": 0.0002, "loss": 2.5452, "step": 13380 }, { "epoch": 0.9977645305514158, "grad_norm": 2.2400450706481934, "learning_rate": 0.0002, "loss": 2.5556, "step": 13390 }, { "epoch": 0.9985096870342772, "grad_norm": 2.187760829925537, "learning_rate": 0.0002, "loss": 2.645, "step": 13400 }, { "epoch": 0.9992548435171386, "grad_norm": 2.3010387420654297, "learning_rate": 0.0002, "loss": 2.4113, "step": 13410 }, { "epoch": 1.0, "grad_norm": 2.8511722087860107, "learning_rate": 0.0002, "loss": 2.5681, "step": 13420 }, { "epoch": 1.0, "eval_runtime": 2766.9434, "eval_samples_per_second": 4.85, "eval_steps_per_second": 0.606, "step": 13420 }, { "epoch": 1.0007451564828613, "grad_norm": 2.541956901550293, "learning_rate": 0.0002, "loss": 2.4417, "step": 13430 }, { "epoch": 1.0014903129657229, "grad_norm": 2.2113637924194336, "learning_rate": 0.0002, "loss": 2.3728, "step": 13440 }, { "epoch": 1.0022354694485842, "grad_norm": 2.6709325313568115, "learning_rate": 0.0002, "loss": 2.3259, "step": 13450 }, { "epoch": 1.0029806259314455, "grad_norm": 2.1708545684814453, "learning_rate": 0.0002, "loss": 2.164, "step": 13460 }, { "epoch": 1.003725782414307, "grad_norm": 2.2694172859191895, "learning_rate": 0.0002, "loss": 2.37, "step": 13470 }, { "epoch": 1.0044709388971684, "grad_norm": 2.839197874069214, "learning_rate": 0.0002, "loss": 2.383, "step": 13480 }, { "epoch": 1.0052160953800298, "grad_norm": 2.7437849044799805, "learning_rate": 0.0002, "loss": 2.3811, "step": 13490 }, { "epoch": 1.0059612518628913, "grad_norm": 2.8862855434417725, "learning_rate": 0.0002, "loss": 2.2016, "step": 13500 }, { "epoch": 1.0067064083457526, "grad_norm": 2.6776318550109863, "learning_rate": 0.0002, "loss": 2.2084, "step": 13510 }, { "epoch": 1.007451564828614, "grad_norm": 2.473053455352783, "learning_rate": 0.0002, "loss": 2.4357, "step": 13520 }, { "epoch": 1.0081967213114753, "grad_norm": 2.5545501708984375, "learning_rate": 0.0002, "loss": 2.4439, "step": 13530 }, { "epoch": 1.0089418777943369, "grad_norm": 2.6826674938201904, "learning_rate": 0.0002, "loss": 2.5071, "step": 13540 }, { "epoch": 1.0096870342771982, "grad_norm": 2.596944570541382, "learning_rate": 0.0002, "loss": 2.5319, "step": 13550 }, { "epoch": 1.0104321907600595, "grad_norm": 2.0925228595733643, "learning_rate": 0.0002, "loss": 2.1662, "step": 13560 }, { "epoch": 1.011177347242921, "grad_norm": 2.8665518760681152, "learning_rate": 0.0002, "loss": 2.5069, "step": 13570 }, { "epoch": 1.0119225037257824, "grad_norm": 2.4106383323669434, "learning_rate": 0.0002, "loss": 2.3902, "step": 13580 }, { "epoch": 1.0126676602086437, "grad_norm": 2.5520753860473633, "learning_rate": 0.0002, "loss": 2.4638, "step": 13590 }, { "epoch": 1.0134128166915053, "grad_norm": 2.505903482437134, "learning_rate": 0.0002, "loss": 2.5054, "step": 13600 }, { "epoch": 1.0141579731743666, "grad_norm": 2.5196523666381836, "learning_rate": 0.0002, "loss": 2.1964, "step": 13610 }, { "epoch": 1.014903129657228, "grad_norm": 2.414044141769409, "learning_rate": 0.0002, "loss": 2.2939, "step": 13620 }, { "epoch": 1.0156482861400895, "grad_norm": 2.586638927459717, "learning_rate": 0.0002, "loss": 2.403, "step": 13630 }, { "epoch": 1.0163934426229508, "grad_norm": 2.509230852127075, "learning_rate": 0.0002, "loss": 2.3131, "step": 13640 }, { "epoch": 1.0171385991058122, "grad_norm": 2.8576319217681885, "learning_rate": 0.0002, "loss": 2.3398, "step": 13650 }, { "epoch": 1.0178837555886737, "grad_norm": 3.0436184406280518, "learning_rate": 0.0002, "loss": 2.4026, "step": 13660 }, { "epoch": 1.018628912071535, "grad_norm": 2.6709108352661133, "learning_rate": 0.0002, "loss": 2.3594, "step": 13670 }, { "epoch": 1.0193740685543964, "grad_norm": 2.6364598274230957, "learning_rate": 0.0002, "loss": 2.3834, "step": 13680 }, { "epoch": 1.0201192250372577, "grad_norm": 2.666531562805176, "learning_rate": 0.0002, "loss": 2.4294, "step": 13690 }, { "epoch": 1.0208643815201193, "grad_norm": 2.945071220397949, "learning_rate": 0.0002, "loss": 2.4322, "step": 13700 }, { "epoch": 1.0216095380029806, "grad_norm": 2.6446971893310547, "learning_rate": 0.0002, "loss": 2.3047, "step": 13710 }, { "epoch": 1.022354694485842, "grad_norm": 2.769773483276367, "learning_rate": 0.0002, "loss": 2.328, "step": 13720 }, { "epoch": 1.0230998509687035, "grad_norm": 2.7484028339385986, "learning_rate": 0.0002, "loss": 2.4968, "step": 13730 }, { "epoch": 1.0238450074515648, "grad_norm": 2.563288927078247, "learning_rate": 0.0002, "loss": 2.3943, "step": 13740 }, { "epoch": 1.0245901639344261, "grad_norm": 2.358903408050537, "learning_rate": 0.0002, "loss": 2.3394, "step": 13750 }, { "epoch": 1.0253353204172877, "grad_norm": 3.086336851119995, "learning_rate": 0.0002, "loss": 2.4493, "step": 13760 }, { "epoch": 1.026080476900149, "grad_norm": 2.4959349632263184, "learning_rate": 0.0002, "loss": 2.3651, "step": 13770 }, { "epoch": 1.0268256333830104, "grad_norm": 2.5488016605377197, "learning_rate": 0.0002, "loss": 2.3654, "step": 13780 }, { "epoch": 1.027570789865872, "grad_norm": 2.6956136226654053, "learning_rate": 0.0002, "loss": 2.5436, "step": 13790 }, { "epoch": 1.0283159463487332, "grad_norm": 2.507394790649414, "learning_rate": 0.0002, "loss": 2.393, "step": 13800 }, { "epoch": 1.0290611028315946, "grad_norm": 2.583897113800049, "learning_rate": 0.0002, "loss": 2.369, "step": 13810 }, { "epoch": 1.0298062593144561, "grad_norm": 2.804624080657959, "learning_rate": 0.0002, "loss": 2.4578, "step": 13820 }, { "epoch": 1.0305514157973175, "grad_norm": 2.8353066444396973, "learning_rate": 0.0002, "loss": 2.415, "step": 13830 }, { "epoch": 1.0312965722801788, "grad_norm": 2.8079729080200195, "learning_rate": 0.0002, "loss": 2.3498, "step": 13840 }, { "epoch": 1.0320417287630403, "grad_norm": 3.153747320175171, "learning_rate": 0.0002, "loss": 2.4533, "step": 13850 }, { "epoch": 1.0327868852459017, "grad_norm": 2.170792579650879, "learning_rate": 0.0002, "loss": 2.2922, "step": 13860 }, { "epoch": 1.033532041728763, "grad_norm": 2.5001676082611084, "learning_rate": 0.0002, "loss": 2.4288, "step": 13870 }, { "epoch": 1.0342771982116243, "grad_norm": 2.776643753051758, "learning_rate": 0.0002, "loss": 2.3554, "step": 13880 }, { "epoch": 1.035022354694486, "grad_norm": 2.7371437549591064, "learning_rate": 0.0002, "loss": 2.3946, "step": 13890 }, { "epoch": 1.0357675111773472, "grad_norm": 2.1921558380126953, "learning_rate": 0.0002, "loss": 2.344, "step": 13900 }, { "epoch": 1.0365126676602086, "grad_norm": 2.4711101055145264, "learning_rate": 0.0002, "loss": 2.4239, "step": 13910 }, { "epoch": 1.03725782414307, "grad_norm": 1.9869732856750488, "learning_rate": 0.0002, "loss": 2.3064, "step": 13920 }, { "epoch": 1.0380029806259314, "grad_norm": 2.50216007232666, "learning_rate": 0.0002, "loss": 2.5143, "step": 13930 }, { "epoch": 1.0387481371087928, "grad_norm": 2.3530120849609375, "learning_rate": 0.0002, "loss": 2.46, "step": 13940 }, { "epoch": 1.0394932935916543, "grad_norm": 2.8673856258392334, "learning_rate": 0.0002, "loss": 2.3562, "step": 13950 }, { "epoch": 1.0402384500745157, "grad_norm": 2.380708932876587, "learning_rate": 0.0002, "loss": 2.4367, "step": 13960 }, { "epoch": 1.040983606557377, "grad_norm": 2.6121153831481934, "learning_rate": 0.0002, "loss": 2.4539, "step": 13970 }, { "epoch": 1.0417287630402385, "grad_norm": 2.1682288646698, "learning_rate": 0.0002, "loss": 2.3299, "step": 13980 }, { "epoch": 1.0424739195230999, "grad_norm": 2.493143081665039, "learning_rate": 0.0002, "loss": 2.2522, "step": 13990 }, { "epoch": 1.0432190760059612, "grad_norm": 2.3164801597595215, "learning_rate": 0.0002, "loss": 2.3501, "step": 14000 }, { "epoch": 1.0439642324888228, "grad_norm": 2.372344493865967, "learning_rate": 0.0002, "loss": 2.4671, "step": 14010 }, { "epoch": 1.044709388971684, "grad_norm": 2.2820746898651123, "learning_rate": 0.0002, "loss": 2.3155, "step": 14020 }, { "epoch": 1.0454545454545454, "grad_norm": 2.676579475402832, "learning_rate": 0.0002, "loss": 2.3697, "step": 14030 }, { "epoch": 1.046199701937407, "grad_norm": 2.5534327030181885, "learning_rate": 0.0002, "loss": 2.1582, "step": 14040 }, { "epoch": 1.0469448584202683, "grad_norm": 2.6932196617126465, "learning_rate": 0.0002, "loss": 2.3688, "step": 14050 }, { "epoch": 1.0476900149031296, "grad_norm": 2.8461594581604004, "learning_rate": 0.0002, "loss": 2.6371, "step": 14060 }, { "epoch": 1.048435171385991, "grad_norm": 2.3841376304626465, "learning_rate": 0.0002, "loss": 2.2052, "step": 14070 }, { "epoch": 1.0491803278688525, "grad_norm": 2.358163356781006, "learning_rate": 0.0002, "loss": 2.2105, "step": 14080 }, { "epoch": 1.0499254843517138, "grad_norm": 2.9143970012664795, "learning_rate": 0.0002, "loss": 2.3372, "step": 14090 }, { "epoch": 1.0506706408345752, "grad_norm": 2.2948029041290283, "learning_rate": 0.0002, "loss": 2.4952, "step": 14100 }, { "epoch": 1.0514157973174367, "grad_norm": 2.725156307220459, "learning_rate": 0.0002, "loss": 2.4442, "step": 14110 }, { "epoch": 1.052160953800298, "grad_norm": 2.590603828430176, "learning_rate": 0.0002, "loss": 2.4739, "step": 14120 }, { "epoch": 1.0529061102831594, "grad_norm": 2.1326684951782227, "learning_rate": 0.0002, "loss": 2.3584, "step": 14130 }, { "epoch": 1.053651266766021, "grad_norm": 2.2197265625, "learning_rate": 0.0002, "loss": 2.4694, "step": 14140 }, { "epoch": 1.0543964232488823, "grad_norm": 2.6531333923339844, "learning_rate": 0.0002, "loss": 2.3527, "step": 14150 }, { "epoch": 1.0551415797317436, "grad_norm": 2.60204815864563, "learning_rate": 0.0002, "loss": 2.4601, "step": 14160 }, { "epoch": 1.0558867362146052, "grad_norm": 1.9536486864089966, "learning_rate": 0.0002, "loss": 2.3322, "step": 14170 }, { "epoch": 1.0566318926974665, "grad_norm": 2.206132650375366, "learning_rate": 0.0002, "loss": 2.388, "step": 14180 }, { "epoch": 1.0573770491803278, "grad_norm": 2.372546911239624, "learning_rate": 0.0002, "loss": 2.4207, "step": 14190 }, { "epoch": 1.0581222056631894, "grad_norm": 2.5340518951416016, "learning_rate": 0.0002, "loss": 2.4014, "step": 14200 }, { "epoch": 1.0588673621460507, "grad_norm": 2.513394594192505, "learning_rate": 0.0002, "loss": 2.5686, "step": 14210 }, { "epoch": 1.059612518628912, "grad_norm": 2.649597644805908, "learning_rate": 0.0002, "loss": 2.4436, "step": 14220 }, { "epoch": 1.0603576751117734, "grad_norm": 2.531172513961792, "learning_rate": 0.0002, "loss": 2.2963, "step": 14230 }, { "epoch": 1.061102831594635, "grad_norm": 2.393568515777588, "learning_rate": 0.0002, "loss": 2.2868, "step": 14240 }, { "epoch": 1.0618479880774963, "grad_norm": 2.474477767944336, "learning_rate": 0.0002, "loss": 2.2407, "step": 14250 }, { "epoch": 1.0625931445603576, "grad_norm": 2.3346738815307617, "learning_rate": 0.0002, "loss": 2.3307, "step": 14260 }, { "epoch": 1.0633383010432191, "grad_norm": 2.627002239227295, "learning_rate": 0.0002, "loss": 2.5363, "step": 14270 }, { "epoch": 1.0640834575260805, "grad_norm": 2.602421522140503, "learning_rate": 0.0002, "loss": 2.5165, "step": 14280 }, { "epoch": 1.0648286140089418, "grad_norm": 2.7509329319000244, "learning_rate": 0.0002, "loss": 2.3219, "step": 14290 }, { "epoch": 1.0655737704918034, "grad_norm": 2.4855947494506836, "learning_rate": 0.0002, "loss": 2.5004, "step": 14300 }, { "epoch": 1.0663189269746647, "grad_norm": 2.7259209156036377, "learning_rate": 0.0002, "loss": 2.3724, "step": 14310 }, { "epoch": 1.067064083457526, "grad_norm": 2.578622579574585, "learning_rate": 0.0002, "loss": 2.3865, "step": 14320 }, { "epoch": 1.0678092399403876, "grad_norm": 2.640737771987915, "learning_rate": 0.0002, "loss": 2.2266, "step": 14330 }, { "epoch": 1.068554396423249, "grad_norm": 2.536515712738037, "learning_rate": 0.0002, "loss": 2.4457, "step": 14340 }, { "epoch": 1.0692995529061102, "grad_norm": 2.3752591609954834, "learning_rate": 0.0002, "loss": 2.3925, "step": 14350 }, { "epoch": 1.0700447093889718, "grad_norm": 2.668381929397583, "learning_rate": 0.0002, "loss": 2.3275, "step": 14360 }, { "epoch": 1.0707898658718331, "grad_norm": 2.636784315109253, "learning_rate": 0.0002, "loss": 2.3801, "step": 14370 }, { "epoch": 1.0715350223546944, "grad_norm": 2.4896252155303955, "learning_rate": 0.0002, "loss": 2.3056, "step": 14380 }, { "epoch": 1.072280178837556, "grad_norm": 2.3532116413116455, "learning_rate": 0.0002, "loss": 2.2644, "step": 14390 }, { "epoch": 1.0730253353204173, "grad_norm": 2.7929883003234863, "learning_rate": 0.0002, "loss": 2.326, "step": 14400 }, { "epoch": 1.0737704918032787, "grad_norm": 2.66312837600708, "learning_rate": 0.0002, "loss": 2.4889, "step": 14410 }, { "epoch": 1.07451564828614, "grad_norm": 2.480541706085205, "learning_rate": 0.0002, "loss": 2.4544, "step": 14420 }, { "epoch": 1.0752608047690015, "grad_norm": 2.473315954208374, "learning_rate": 0.0002, "loss": 2.4154, "step": 14430 }, { "epoch": 1.0760059612518629, "grad_norm": 2.4049172401428223, "learning_rate": 0.0002, "loss": 2.3524, "step": 14440 }, { "epoch": 1.0767511177347242, "grad_norm": 2.4751241207122803, "learning_rate": 0.0002, "loss": 2.1488, "step": 14450 }, { "epoch": 1.0774962742175858, "grad_norm": 2.5170867443084717, "learning_rate": 0.0002, "loss": 2.5725, "step": 14460 }, { "epoch": 1.078241430700447, "grad_norm": 2.9997737407684326, "learning_rate": 0.0002, "loss": 2.4776, "step": 14470 }, { "epoch": 1.0789865871833084, "grad_norm": 2.396097421646118, "learning_rate": 0.0002, "loss": 2.249, "step": 14480 }, { "epoch": 1.07973174366617, "grad_norm": 3.120967149734497, "learning_rate": 0.0002, "loss": 2.4061, "step": 14490 }, { "epoch": 1.0804769001490313, "grad_norm": 3.100985527038574, "learning_rate": 0.0002, "loss": 2.4715, "step": 14500 }, { "epoch": 1.0812220566318926, "grad_norm": 2.4423913955688477, "learning_rate": 0.0002, "loss": 2.296, "step": 14510 }, { "epoch": 1.0819672131147542, "grad_norm": 2.439854860305786, "learning_rate": 0.0002, "loss": 2.4691, "step": 14520 }, { "epoch": 1.0827123695976155, "grad_norm": 2.5412724018096924, "learning_rate": 0.0002, "loss": 2.3789, "step": 14530 }, { "epoch": 1.0834575260804769, "grad_norm": 1.9563956260681152, "learning_rate": 0.0002, "loss": 2.571, "step": 14540 }, { "epoch": 1.0842026825633384, "grad_norm": 2.4468328952789307, "learning_rate": 0.0002, "loss": 2.3139, "step": 14550 }, { "epoch": 1.0849478390461997, "grad_norm": 2.4930431842803955, "learning_rate": 0.0002, "loss": 2.313, "step": 14560 }, { "epoch": 1.085692995529061, "grad_norm": 2.567399501800537, "learning_rate": 0.0002, "loss": 2.4385, "step": 14570 }, { "epoch": 1.0864381520119224, "grad_norm": 2.871107339859009, "learning_rate": 0.0002, "loss": 2.5201, "step": 14580 }, { "epoch": 1.087183308494784, "grad_norm": 2.9992733001708984, "learning_rate": 0.0002, "loss": 2.2764, "step": 14590 }, { "epoch": 1.0879284649776453, "grad_norm": 2.3706576824188232, "learning_rate": 0.0002, "loss": 2.604, "step": 14600 }, { "epoch": 1.0886736214605066, "grad_norm": 2.642545461654663, "learning_rate": 0.0002, "loss": 2.3878, "step": 14610 }, { "epoch": 1.0894187779433682, "grad_norm": 2.364602565765381, "learning_rate": 0.0002, "loss": 2.338, "step": 14620 }, { "epoch": 1.0901639344262295, "grad_norm": 2.9395387172698975, "learning_rate": 0.0002, "loss": 2.3848, "step": 14630 }, { "epoch": 1.0909090909090908, "grad_norm": 3.6950175762176514, "learning_rate": 0.0002, "loss": 2.4532, "step": 14640 }, { "epoch": 1.0916542473919524, "grad_norm": 2.566967487335205, "learning_rate": 0.0002, "loss": 2.5999, "step": 14650 }, { "epoch": 1.0923994038748137, "grad_norm": 2.3279924392700195, "learning_rate": 0.0002, "loss": 2.5143, "step": 14660 }, { "epoch": 1.093144560357675, "grad_norm": 3.0130693912506104, "learning_rate": 0.0002, "loss": 2.4298, "step": 14670 }, { "epoch": 1.0938897168405366, "grad_norm": 2.0942749977111816, "learning_rate": 0.0002, "loss": 2.394, "step": 14680 }, { "epoch": 1.094634873323398, "grad_norm": 2.4784209728240967, "learning_rate": 0.0002, "loss": 2.474, "step": 14690 }, { "epoch": 1.0953800298062593, "grad_norm": 2.7817776203155518, "learning_rate": 0.0002, "loss": 2.3312, "step": 14700 }, { "epoch": 1.0961251862891208, "grad_norm": 2.4290931224823, "learning_rate": 0.0002, "loss": 2.3258, "step": 14710 }, { "epoch": 1.0968703427719821, "grad_norm": 2.543133020401001, "learning_rate": 0.0002, "loss": 2.4291, "step": 14720 }, { "epoch": 1.0976154992548435, "grad_norm": 2.4686405658721924, "learning_rate": 0.0002, "loss": 2.2782, "step": 14730 }, { "epoch": 1.098360655737705, "grad_norm": 2.5764386653900146, "learning_rate": 0.0002, "loss": 2.3708, "step": 14740 }, { "epoch": 1.0991058122205664, "grad_norm": 2.827773094177246, "learning_rate": 0.0002, "loss": 2.3506, "step": 14750 }, { "epoch": 1.0998509687034277, "grad_norm": 2.2853167057037354, "learning_rate": 0.0002, "loss": 2.4782, "step": 14760 }, { "epoch": 1.100596125186289, "grad_norm": 2.094871997833252, "learning_rate": 0.0002, "loss": 2.3033, "step": 14770 }, { "epoch": 1.1013412816691506, "grad_norm": 2.252192735671997, "learning_rate": 0.0002, "loss": 2.5064, "step": 14780 }, { "epoch": 1.102086438152012, "grad_norm": 2.530860424041748, "learning_rate": 0.0002, "loss": 2.3885, "step": 14790 }, { "epoch": 1.1028315946348732, "grad_norm": 2.822106122970581, "learning_rate": 0.0002, "loss": 2.4288, "step": 14800 }, { "epoch": 1.1035767511177348, "grad_norm": 2.6030118465423584, "learning_rate": 0.0002, "loss": 2.3705, "step": 14810 }, { "epoch": 1.1043219076005961, "grad_norm": 3.0381968021392822, "learning_rate": 0.0002, "loss": 2.3168, "step": 14820 }, { "epoch": 1.1050670640834575, "grad_norm": 2.1125636100769043, "learning_rate": 0.0002, "loss": 2.2316, "step": 14830 }, { "epoch": 1.105812220566319, "grad_norm": 2.649933338165283, "learning_rate": 0.0002, "loss": 2.3767, "step": 14840 }, { "epoch": 1.1065573770491803, "grad_norm": 2.4779107570648193, "learning_rate": 0.0002, "loss": 2.4179, "step": 14850 }, { "epoch": 1.1073025335320417, "grad_norm": 2.370229721069336, "learning_rate": 0.0002, "loss": 2.3823, "step": 14860 }, { "epoch": 1.1080476900149032, "grad_norm": 2.200868844985962, "learning_rate": 0.0002, "loss": 2.4024, "step": 14870 }, { "epoch": 1.1087928464977646, "grad_norm": 2.385791063308716, "learning_rate": 0.0002, "loss": 2.5435, "step": 14880 }, { "epoch": 1.1095380029806259, "grad_norm": 2.731098175048828, "learning_rate": 0.0002, "loss": 2.4073, "step": 14890 }, { "epoch": 1.1102831594634874, "grad_norm": 2.1808290481567383, "learning_rate": 0.0002, "loss": 2.4306, "step": 14900 }, { "epoch": 1.1110283159463488, "grad_norm": 2.3113632202148438, "learning_rate": 0.0002, "loss": 2.4484, "step": 14910 }, { "epoch": 1.11177347242921, "grad_norm": 2.473583459854126, "learning_rate": 0.0002, "loss": 2.3573, "step": 14920 }, { "epoch": 1.1125186289120714, "grad_norm": 2.4973528385162354, "learning_rate": 0.0002, "loss": 2.4679, "step": 14930 }, { "epoch": 1.113263785394933, "grad_norm": 2.698992967605591, "learning_rate": 0.0002, "loss": 2.573, "step": 14940 }, { "epoch": 1.1140089418777943, "grad_norm": 2.4769937992095947, "learning_rate": 0.0002, "loss": 2.5083, "step": 14950 }, { "epoch": 1.1147540983606556, "grad_norm": 2.54304838180542, "learning_rate": 0.0002, "loss": 2.3611, "step": 14960 }, { "epoch": 1.1154992548435172, "grad_norm": 2.456918478012085, "learning_rate": 0.0002, "loss": 2.512, "step": 14970 }, { "epoch": 1.1162444113263785, "grad_norm": 2.8047049045562744, "learning_rate": 0.0002, "loss": 2.4403, "step": 14980 }, { "epoch": 1.1169895678092399, "grad_norm": 2.716897964477539, "learning_rate": 0.0002, "loss": 2.4872, "step": 14990 }, { "epoch": 1.1177347242921014, "grad_norm": 2.525214910507202, "learning_rate": 0.0002, "loss": 2.3246, "step": 15000 }, { "epoch": 1.1184798807749627, "grad_norm": 2.955278158187866, "learning_rate": 0.0002, "loss": 2.4764, "step": 15010 }, { "epoch": 1.119225037257824, "grad_norm": 2.454103708267212, "learning_rate": 0.0002, "loss": 2.4668, "step": 15020 }, { "epoch": 1.1199701937406856, "grad_norm": 2.495340585708618, "learning_rate": 0.0002, "loss": 2.3346, "step": 15030 }, { "epoch": 1.120715350223547, "grad_norm": 2.2860970497131348, "learning_rate": 0.0002, "loss": 2.5325, "step": 15040 }, { "epoch": 1.1214605067064083, "grad_norm": 2.4267046451568604, "learning_rate": 0.0002, "loss": 2.3052, "step": 15050 }, { "epoch": 1.1222056631892698, "grad_norm": 2.8328089714050293, "learning_rate": 0.0002, "loss": 2.412, "step": 15060 }, { "epoch": 1.1229508196721312, "grad_norm": 2.378267288208008, "learning_rate": 0.0002, "loss": 2.3667, "step": 15070 }, { "epoch": 1.1236959761549925, "grad_norm": 2.625948667526245, "learning_rate": 0.0002, "loss": 2.3818, "step": 15080 }, { "epoch": 1.124441132637854, "grad_norm": 2.269430160522461, "learning_rate": 0.0002, "loss": 2.5105, "step": 15090 }, { "epoch": 1.1251862891207154, "grad_norm": 2.5549607276916504, "learning_rate": 0.0002, "loss": 2.4043, "step": 15100 }, { "epoch": 1.1259314456035767, "grad_norm": 2.5551154613494873, "learning_rate": 0.0002, "loss": 2.4814, "step": 15110 }, { "epoch": 1.1266766020864383, "grad_norm": 2.48346209526062, "learning_rate": 0.0002, "loss": 2.2917, "step": 15120 }, { "epoch": 1.1274217585692996, "grad_norm": 2.214686632156372, "learning_rate": 0.0002, "loss": 2.4799, "step": 15130 }, { "epoch": 1.128166915052161, "grad_norm": 2.6155035495758057, "learning_rate": 0.0002, "loss": 2.4024, "step": 15140 }, { "epoch": 1.1289120715350223, "grad_norm": 2.485732316970825, "learning_rate": 0.0002, "loss": 2.4218, "step": 15150 }, { "epoch": 1.1296572280178838, "grad_norm": 2.489976167678833, "learning_rate": 0.0002, "loss": 2.4819, "step": 15160 }, { "epoch": 1.1304023845007451, "grad_norm": 2.557816743850708, "learning_rate": 0.0002, "loss": 2.404, "step": 15170 }, { "epoch": 1.1311475409836065, "grad_norm": 2.362257242202759, "learning_rate": 0.0002, "loss": 2.413, "step": 15180 }, { "epoch": 1.131892697466468, "grad_norm": 2.555720806121826, "learning_rate": 0.0002, "loss": 2.4822, "step": 15190 }, { "epoch": 1.1326378539493294, "grad_norm": 2.301452398300171, "learning_rate": 0.0002, "loss": 2.4775, "step": 15200 }, { "epoch": 1.1333830104321907, "grad_norm": 2.718770980834961, "learning_rate": 0.0002, "loss": 2.4039, "step": 15210 }, { "epoch": 1.1341281669150522, "grad_norm": 2.561338424682617, "learning_rate": 0.0002, "loss": 2.4091, "step": 15220 }, { "epoch": 1.1348733233979136, "grad_norm": 2.619065523147583, "learning_rate": 0.0002, "loss": 2.5057, "step": 15230 }, { "epoch": 1.135618479880775, "grad_norm": 3.1791832447052, "learning_rate": 0.0002, "loss": 2.4179, "step": 15240 }, { "epoch": 1.1363636363636362, "grad_norm": 2.456456422805786, "learning_rate": 0.0002, "loss": 2.2546, "step": 15250 }, { "epoch": 1.1371087928464978, "grad_norm": 2.696702718734741, "learning_rate": 0.0002, "loss": 2.3953, "step": 15260 }, { "epoch": 1.1378539493293591, "grad_norm": 2.065814733505249, "learning_rate": 0.0002, "loss": 2.1887, "step": 15270 }, { "epoch": 1.1385991058122205, "grad_norm": 2.3281009197235107, "learning_rate": 0.0002, "loss": 2.4815, "step": 15280 }, { "epoch": 1.139344262295082, "grad_norm": 2.4700629711151123, "learning_rate": 0.0002, "loss": 2.3037, "step": 15290 }, { "epoch": 1.1400894187779433, "grad_norm": 2.3414204120635986, "learning_rate": 0.0002, "loss": 2.469, "step": 15300 }, { "epoch": 1.1408345752608047, "grad_norm": 2.8033456802368164, "learning_rate": 0.0002, "loss": 2.2909, "step": 15310 }, { "epoch": 1.1415797317436662, "grad_norm": 2.513645648956299, "learning_rate": 0.0002, "loss": 2.3957, "step": 15320 }, { "epoch": 1.1423248882265276, "grad_norm": 3.029627799987793, "learning_rate": 0.0002, "loss": 2.4792, "step": 15330 }, { "epoch": 1.1430700447093889, "grad_norm": 2.7976560592651367, "learning_rate": 0.0002, "loss": 2.4781, "step": 15340 }, { "epoch": 1.1438152011922504, "grad_norm": 2.6071226596832275, "learning_rate": 0.0002, "loss": 2.4836, "step": 15350 }, { "epoch": 1.1445603576751118, "grad_norm": 2.4131622314453125, "learning_rate": 0.0002, "loss": 2.4202, "step": 15360 }, { "epoch": 1.145305514157973, "grad_norm": 2.6419029235839844, "learning_rate": 0.0002, "loss": 2.5272, "step": 15370 }, { "epoch": 1.1460506706408347, "grad_norm": 2.893653154373169, "learning_rate": 0.0002, "loss": 2.5116, "step": 15380 }, { "epoch": 1.146795827123696, "grad_norm": 2.739550828933716, "learning_rate": 0.0002, "loss": 2.3836, "step": 15390 }, { "epoch": 1.1475409836065573, "grad_norm": 2.9094793796539307, "learning_rate": 0.0002, "loss": 2.3208, "step": 15400 }, { "epoch": 1.1482861400894189, "grad_norm": 2.420971155166626, "learning_rate": 0.0002, "loss": 2.3705, "step": 15410 }, { "epoch": 1.1490312965722802, "grad_norm": 2.5930230617523193, "learning_rate": 0.0002, "loss": 2.5412, "step": 15420 }, { "epoch": 1.1497764530551415, "grad_norm": 2.6284139156341553, "learning_rate": 0.0002, "loss": 2.2475, "step": 15430 }, { "epoch": 1.150521609538003, "grad_norm": 2.640470266342163, "learning_rate": 0.0002, "loss": 2.4292, "step": 15440 }, { "epoch": 1.1512667660208644, "grad_norm": 2.7943484783172607, "learning_rate": 0.0002, "loss": 2.4816, "step": 15450 }, { "epoch": 1.1520119225037257, "grad_norm": 2.281245470046997, "learning_rate": 0.0002, "loss": 2.3931, "step": 15460 }, { "epoch": 1.1527570789865873, "grad_norm": 2.250092029571533, "learning_rate": 0.0002, "loss": 2.4882, "step": 15470 }, { "epoch": 1.1535022354694486, "grad_norm": 2.4525022506713867, "learning_rate": 0.0002, "loss": 2.4132, "step": 15480 }, { "epoch": 1.15424739195231, "grad_norm": 2.4779772758483887, "learning_rate": 0.0002, "loss": 2.3182, "step": 15490 }, { "epoch": 1.1549925484351713, "grad_norm": 2.4261624813079834, "learning_rate": 0.0002, "loss": 2.5122, "step": 15500 }, { "epoch": 1.1557377049180328, "grad_norm": 2.989243745803833, "learning_rate": 0.0002, "loss": 2.5352, "step": 15510 }, { "epoch": 1.1564828614008942, "grad_norm": 3.4298269748687744, "learning_rate": 0.0002, "loss": 2.4098, "step": 15520 }, { "epoch": 1.1572280178837555, "grad_norm": 2.6402604579925537, "learning_rate": 0.0002, "loss": 2.4133, "step": 15530 }, { "epoch": 1.157973174366617, "grad_norm": 2.4061222076416016, "learning_rate": 0.0002, "loss": 2.3814, "step": 15540 }, { "epoch": 1.1587183308494784, "grad_norm": 2.915400266647339, "learning_rate": 0.0002, "loss": 2.3256, "step": 15550 }, { "epoch": 1.1594634873323397, "grad_norm": 2.7170588970184326, "learning_rate": 0.0002, "loss": 2.4919, "step": 15560 }, { "epoch": 1.1602086438152013, "grad_norm": 2.5556771755218506, "learning_rate": 0.0002, "loss": 2.5706, "step": 15570 }, { "epoch": 1.1609538002980626, "grad_norm": 2.762441873550415, "learning_rate": 0.0002, "loss": 2.3807, "step": 15580 }, { "epoch": 1.161698956780924, "grad_norm": 2.6300220489501953, "learning_rate": 0.0002, "loss": 2.405, "step": 15590 }, { "epoch": 1.1624441132637853, "grad_norm": 2.586217164993286, "learning_rate": 0.0002, "loss": 2.5505, "step": 15600 }, { "epoch": 1.1631892697466468, "grad_norm": 2.6956429481506348, "learning_rate": 0.0002, "loss": 2.259, "step": 15610 }, { "epoch": 1.1639344262295082, "grad_norm": 2.6788172721862793, "learning_rate": 0.0002, "loss": 2.281, "step": 15620 }, { "epoch": 1.1646795827123695, "grad_norm": 2.6697628498077393, "learning_rate": 0.0002, "loss": 2.4952, "step": 15630 }, { "epoch": 1.165424739195231, "grad_norm": 2.434091567993164, "learning_rate": 0.0002, "loss": 2.5588, "step": 15640 }, { "epoch": 1.1661698956780924, "grad_norm": 2.590214252471924, "learning_rate": 0.0002, "loss": 2.4701, "step": 15650 }, { "epoch": 1.1669150521609537, "grad_norm": 2.3940234184265137, "learning_rate": 0.0002, "loss": 2.4546, "step": 15660 }, { "epoch": 1.1676602086438153, "grad_norm": 2.5530643463134766, "learning_rate": 0.0002, "loss": 2.3913, "step": 15670 }, { "epoch": 1.1684053651266766, "grad_norm": 2.599776029586792, "learning_rate": 0.0002, "loss": 2.3379, "step": 15680 }, { "epoch": 1.169150521609538, "grad_norm": 2.3956868648529053, "learning_rate": 0.0002, "loss": 2.4249, "step": 15690 }, { "epoch": 1.1698956780923995, "grad_norm": 2.866769313812256, "learning_rate": 0.0002, "loss": 2.5113, "step": 15700 }, { "epoch": 1.1706408345752608, "grad_norm": 2.3817973136901855, "learning_rate": 0.0002, "loss": 2.4549, "step": 15710 }, { "epoch": 1.1713859910581221, "grad_norm": 2.8611204624176025, "learning_rate": 0.0002, "loss": 2.4499, "step": 15720 }, { "epoch": 1.1721311475409837, "grad_norm": 2.5031049251556396, "learning_rate": 0.0002, "loss": 2.474, "step": 15730 }, { "epoch": 1.172876304023845, "grad_norm": 2.7926108837127686, "learning_rate": 0.0002, "loss": 2.6565, "step": 15740 }, { "epoch": 1.1736214605067063, "grad_norm": 2.480978488922119, "learning_rate": 0.0002, "loss": 2.5754, "step": 15750 }, { "epoch": 1.174366616989568, "grad_norm": 2.831561803817749, "learning_rate": 0.0002, "loss": 2.2785, "step": 15760 }, { "epoch": 1.1751117734724292, "grad_norm": 2.7527670860290527, "learning_rate": 0.0002, "loss": 2.4922, "step": 15770 }, { "epoch": 1.1758569299552906, "grad_norm": 2.640598773956299, "learning_rate": 0.0002, "loss": 2.4216, "step": 15780 }, { "epoch": 1.1766020864381521, "grad_norm": 2.8140854835510254, "learning_rate": 0.0002, "loss": 2.509, "step": 15790 }, { "epoch": 1.1773472429210134, "grad_norm": 2.448840618133545, "learning_rate": 0.0002, "loss": 2.2896, "step": 15800 }, { "epoch": 1.1780923994038748, "grad_norm": 2.5497422218322754, "learning_rate": 0.0002, "loss": 2.329, "step": 15810 }, { "epoch": 1.1788375558867363, "grad_norm": 2.042785882949829, "learning_rate": 0.0002, "loss": 2.4259, "step": 15820 }, { "epoch": 1.1795827123695977, "grad_norm": 2.9720048904418945, "learning_rate": 0.0002, "loss": 2.2438, "step": 15830 }, { "epoch": 1.180327868852459, "grad_norm": 2.516237258911133, "learning_rate": 0.0002, "loss": 2.4921, "step": 15840 }, { "epoch": 1.1810730253353203, "grad_norm": 2.7879035472869873, "learning_rate": 0.0002, "loss": 2.407, "step": 15850 }, { "epoch": 1.1818181818181819, "grad_norm": 2.4089086055755615, "learning_rate": 0.0002, "loss": 2.4276, "step": 15860 }, { "epoch": 1.1825633383010432, "grad_norm": 2.6390304565429688, "learning_rate": 0.0002, "loss": 2.428, "step": 15870 }, { "epoch": 1.1833084947839045, "grad_norm": 3.222198963165283, "learning_rate": 0.0002, "loss": 2.3888, "step": 15880 }, { "epoch": 1.184053651266766, "grad_norm": 2.4840657711029053, "learning_rate": 0.0002, "loss": 2.4878, "step": 15890 }, { "epoch": 1.1847988077496274, "grad_norm": 2.1169028282165527, "learning_rate": 0.0002, "loss": 2.424, "step": 15900 }, { "epoch": 1.1855439642324888, "grad_norm": 2.243370294570923, "learning_rate": 0.0002, "loss": 2.2954, "step": 15910 }, { "epoch": 1.1862891207153503, "grad_norm": 2.8752856254577637, "learning_rate": 0.0002, "loss": 2.3783, "step": 15920 }, { "epoch": 1.1870342771982116, "grad_norm": 2.4351089000701904, "learning_rate": 0.0002, "loss": 2.347, "step": 15930 }, { "epoch": 1.187779433681073, "grad_norm": 2.618943214416504, "learning_rate": 0.0002, "loss": 2.4062, "step": 15940 }, { "epoch": 1.1885245901639343, "grad_norm": 2.5448083877563477, "learning_rate": 0.0002, "loss": 2.4515, "step": 15950 }, { "epoch": 1.1892697466467959, "grad_norm": 2.431405544281006, "learning_rate": 0.0002, "loss": 2.4382, "step": 15960 }, { "epoch": 1.1900149031296572, "grad_norm": 2.914207696914673, "learning_rate": 0.0002, "loss": 2.5043, "step": 15970 }, { "epoch": 1.1907600596125185, "grad_norm": 2.584994316101074, "learning_rate": 0.0002, "loss": 2.3106, "step": 15980 }, { "epoch": 1.19150521609538, "grad_norm": 2.518873691558838, "learning_rate": 0.0002, "loss": 2.3248, "step": 15990 }, { "epoch": 1.1922503725782414, "grad_norm": 2.7757396697998047, "learning_rate": 0.0002, "loss": 2.2855, "step": 16000 }, { "epoch": 1.1929955290611027, "grad_norm": 2.546560764312744, "learning_rate": 0.0002, "loss": 2.2436, "step": 16010 }, { "epoch": 1.1937406855439643, "grad_norm": 2.4890432357788086, "learning_rate": 0.0002, "loss": 2.5493, "step": 16020 }, { "epoch": 1.1944858420268256, "grad_norm": 2.523747682571411, "learning_rate": 0.0002, "loss": 2.3246, "step": 16030 }, { "epoch": 1.195230998509687, "grad_norm": 2.861879825592041, "learning_rate": 0.0002, "loss": 2.3921, "step": 16040 }, { "epoch": 1.1959761549925485, "grad_norm": 2.518486261367798, "learning_rate": 0.0002, "loss": 2.4388, "step": 16050 }, { "epoch": 1.1967213114754098, "grad_norm": 2.4700870513916016, "learning_rate": 0.0002, "loss": 2.5634, "step": 16060 }, { "epoch": 1.1974664679582712, "grad_norm": 2.402773141860962, "learning_rate": 0.0002, "loss": 2.3364, "step": 16070 }, { "epoch": 1.1982116244411327, "grad_norm": 2.673292636871338, "learning_rate": 0.0002, "loss": 2.5241, "step": 16080 }, { "epoch": 1.198956780923994, "grad_norm": 2.3342599868774414, "learning_rate": 0.0002, "loss": 2.56, "step": 16090 }, { "epoch": 1.1997019374068554, "grad_norm": 2.9310660362243652, "learning_rate": 0.0002, "loss": 2.492, "step": 16100 }, { "epoch": 1.200447093889717, "grad_norm": 2.380824327468872, "learning_rate": 0.0002, "loss": 2.511, "step": 16110 }, { "epoch": 1.2011922503725783, "grad_norm": 2.292423963546753, "learning_rate": 0.0002, "loss": 2.5952, "step": 16120 }, { "epoch": 1.2019374068554396, "grad_norm": 2.2969772815704346, "learning_rate": 0.0002, "loss": 2.3879, "step": 16130 }, { "epoch": 1.2026825633383011, "grad_norm": 2.4675862789154053, "learning_rate": 0.0002, "loss": 2.4499, "step": 16140 }, { "epoch": 1.2034277198211625, "grad_norm": 2.35262393951416, "learning_rate": 0.0002, "loss": 2.4689, "step": 16150 }, { "epoch": 1.2041728763040238, "grad_norm": 2.7040700912475586, "learning_rate": 0.0002, "loss": 2.547, "step": 16160 }, { "epoch": 1.2049180327868854, "grad_norm": 2.248189926147461, "learning_rate": 0.0002, "loss": 2.5511, "step": 16170 }, { "epoch": 1.2056631892697467, "grad_norm": 2.631309747695923, "learning_rate": 0.0002, "loss": 2.5509, "step": 16180 }, { "epoch": 1.206408345752608, "grad_norm": 2.66676926612854, "learning_rate": 0.0002, "loss": 2.555, "step": 16190 }, { "epoch": 1.2071535022354694, "grad_norm": 2.6497623920440674, "learning_rate": 0.0002, "loss": 2.445, "step": 16200 }, { "epoch": 1.207898658718331, "grad_norm": 2.9009816646575928, "learning_rate": 0.0002, "loss": 2.1566, "step": 16210 }, { "epoch": 1.2086438152011922, "grad_norm": 2.3509867191314697, "learning_rate": 0.0002, "loss": 2.5733, "step": 16220 }, { "epoch": 1.2093889716840536, "grad_norm": 2.4485838413238525, "learning_rate": 0.0002, "loss": 2.4517, "step": 16230 }, { "epoch": 1.2101341281669151, "grad_norm": 2.4066200256347656, "learning_rate": 0.0002, "loss": 2.4892, "step": 16240 }, { "epoch": 1.2108792846497765, "grad_norm": 2.5041356086730957, "learning_rate": 0.0002, "loss": 2.4068, "step": 16250 }, { "epoch": 1.2116244411326378, "grad_norm": 2.457516670227051, "learning_rate": 0.0002, "loss": 2.4337, "step": 16260 }, { "epoch": 1.2123695976154993, "grad_norm": 2.524627923965454, "learning_rate": 0.0002, "loss": 2.6584, "step": 16270 }, { "epoch": 1.2131147540983607, "grad_norm": 2.730018138885498, "learning_rate": 0.0002, "loss": 2.4592, "step": 16280 }, { "epoch": 1.213859910581222, "grad_norm": 3.1194841861724854, "learning_rate": 0.0002, "loss": 2.5211, "step": 16290 }, { "epoch": 1.2146050670640836, "grad_norm": 2.484910726547241, "learning_rate": 0.0002, "loss": 2.4436, "step": 16300 }, { "epoch": 1.2153502235469449, "grad_norm": 2.5972893238067627, "learning_rate": 0.0002, "loss": 2.5412, "step": 16310 }, { "epoch": 1.2160953800298062, "grad_norm": 2.599231719970703, "learning_rate": 0.0002, "loss": 2.5554, "step": 16320 }, { "epoch": 1.2168405365126675, "grad_norm": 2.5555241107940674, "learning_rate": 0.0002, "loss": 2.5618, "step": 16330 }, { "epoch": 1.217585692995529, "grad_norm": 2.6577017307281494, "learning_rate": 0.0002, "loss": 2.3419, "step": 16340 }, { "epoch": 1.2183308494783904, "grad_norm": 2.239018440246582, "learning_rate": 0.0002, "loss": 2.4517, "step": 16350 }, { "epoch": 1.2190760059612518, "grad_norm": 2.2550699710845947, "learning_rate": 0.0002, "loss": 2.5087, "step": 16360 }, { "epoch": 1.2198211624441133, "grad_norm": 2.7445180416107178, "learning_rate": 0.0002, "loss": 2.5011, "step": 16370 }, { "epoch": 1.2205663189269746, "grad_norm": 2.637608528137207, "learning_rate": 0.0002, "loss": 2.3431, "step": 16380 }, { "epoch": 1.221311475409836, "grad_norm": 2.4551427364349365, "learning_rate": 0.0002, "loss": 2.4485, "step": 16390 }, { "epoch": 1.2220566318926975, "grad_norm": 2.595010757446289, "learning_rate": 0.0002, "loss": 2.6419, "step": 16400 }, { "epoch": 1.2228017883755589, "grad_norm": 3.2001779079437256, "learning_rate": 0.0002, "loss": 2.3009, "step": 16410 }, { "epoch": 1.2235469448584202, "grad_norm": 2.3680529594421387, "learning_rate": 0.0002, "loss": 2.4545, "step": 16420 }, { "epoch": 1.2242921013412817, "grad_norm": 2.284846782684326, "learning_rate": 0.0002, "loss": 2.4662, "step": 16430 }, { "epoch": 1.225037257824143, "grad_norm": 2.7528367042541504, "learning_rate": 0.0002, "loss": 2.504, "step": 16440 }, { "epoch": 1.2257824143070044, "grad_norm": 2.5540902614593506, "learning_rate": 0.0002, "loss": 2.5621, "step": 16450 }, { "epoch": 1.226527570789866, "grad_norm": 2.6369526386260986, "learning_rate": 0.0002, "loss": 2.4563, "step": 16460 }, { "epoch": 1.2272727272727273, "grad_norm": 2.560155153274536, "learning_rate": 0.0002, "loss": 2.3171, "step": 16470 }, { "epoch": 1.2280178837555886, "grad_norm": 2.8841707706451416, "learning_rate": 0.0002, "loss": 2.5621, "step": 16480 }, { "epoch": 1.2287630402384502, "grad_norm": 2.587996482849121, "learning_rate": 0.0002, "loss": 2.5119, "step": 16490 }, { "epoch": 1.2295081967213115, "grad_norm": 2.455828905105591, "learning_rate": 0.0002, "loss": 2.3992, "step": 16500 }, { "epoch": 1.2302533532041728, "grad_norm": 2.5524632930755615, "learning_rate": 0.0002, "loss": 2.5209, "step": 16510 }, { "epoch": 1.2309985096870344, "grad_norm": 2.436572551727295, "learning_rate": 0.0002, "loss": 2.4511, "step": 16520 }, { "epoch": 1.2317436661698957, "grad_norm": 2.5850322246551514, "learning_rate": 0.0002, "loss": 2.3393, "step": 16530 }, { "epoch": 1.232488822652757, "grad_norm": 2.884225845336914, "learning_rate": 0.0002, "loss": 2.4517, "step": 16540 }, { "epoch": 1.2332339791356184, "grad_norm": 2.3954968452453613, "learning_rate": 0.0002, "loss": 2.5374, "step": 16550 }, { "epoch": 1.23397913561848, "grad_norm": 2.329937696456909, "learning_rate": 0.0002, "loss": 2.631, "step": 16560 }, { "epoch": 1.2347242921013413, "grad_norm": 1.706188440322876, "learning_rate": 0.0002, "loss": 2.3386, "step": 16570 }, { "epoch": 1.2354694485842026, "grad_norm": 2.635439395904541, "learning_rate": 0.0002, "loss": 2.408, "step": 16580 }, { "epoch": 1.2362146050670642, "grad_norm": 3.037846326828003, "learning_rate": 0.0002, "loss": 2.3775, "step": 16590 }, { "epoch": 1.2369597615499255, "grad_norm": 2.435914993286133, "learning_rate": 0.0002, "loss": 2.4828, "step": 16600 }, { "epoch": 1.2377049180327868, "grad_norm": 2.3864994049072266, "learning_rate": 0.0002, "loss": 2.307, "step": 16610 }, { "epoch": 1.2384500745156484, "grad_norm": 2.5248770713806152, "learning_rate": 0.0002, "loss": 2.4023, "step": 16620 }, { "epoch": 1.2391952309985097, "grad_norm": 2.4763107299804688, "learning_rate": 0.0002, "loss": 2.5354, "step": 16630 }, { "epoch": 1.239940387481371, "grad_norm": 2.6904945373535156, "learning_rate": 0.0002, "loss": 2.5673, "step": 16640 }, { "epoch": 1.2406855439642326, "grad_norm": 2.5238559246063232, "learning_rate": 0.0002, "loss": 2.4272, "step": 16650 }, { "epoch": 1.241430700447094, "grad_norm": 2.451791763305664, "learning_rate": 0.0002, "loss": 2.5297, "step": 16660 }, { "epoch": 1.2421758569299552, "grad_norm": 2.5765328407287598, "learning_rate": 0.0002, "loss": 2.4879, "step": 16670 }, { "epoch": 1.2429210134128166, "grad_norm": 2.518141269683838, "learning_rate": 0.0002, "loss": 2.3585, "step": 16680 }, { "epoch": 1.2436661698956781, "grad_norm": 2.4746904373168945, "learning_rate": 0.0002, "loss": 2.5626, "step": 16690 }, { "epoch": 1.2444113263785395, "grad_norm": 2.6540675163269043, "learning_rate": 0.0002, "loss": 2.4304, "step": 16700 }, { "epoch": 1.2451564828614008, "grad_norm": 1.8600475788116455, "learning_rate": 0.0002, "loss": 2.394, "step": 16710 }, { "epoch": 1.2459016393442623, "grad_norm": 2.5817408561706543, "learning_rate": 0.0002, "loss": 2.5504, "step": 16720 }, { "epoch": 1.2466467958271237, "grad_norm": 2.5648725032806396, "learning_rate": 0.0002, "loss": 2.2296, "step": 16730 }, { "epoch": 1.247391952309985, "grad_norm": 2.600857734680176, "learning_rate": 0.0002, "loss": 2.3259, "step": 16740 }, { "epoch": 1.2481371087928466, "grad_norm": 2.6139464378356934, "learning_rate": 0.0002, "loss": 2.4264, "step": 16750 }, { "epoch": 1.248882265275708, "grad_norm": 2.387685537338257, "learning_rate": 0.0002, "loss": 2.6434, "step": 16760 }, { "epoch": 1.2496274217585692, "grad_norm": 2.712690830230713, "learning_rate": 0.0002, "loss": 2.461, "step": 16770 }, { "epoch": 1.2503725782414308, "grad_norm": 2.4673256874084473, "learning_rate": 0.0002, "loss": 2.4162, "step": 16780 }, { "epoch": 1.251117734724292, "grad_norm": 2.5331919193267822, "learning_rate": 0.0002, "loss": 2.591, "step": 16790 }, { "epoch": 1.2518628912071534, "grad_norm": 2.48547625541687, "learning_rate": 0.0002, "loss": 2.2615, "step": 16800 }, { "epoch": 1.252608047690015, "grad_norm": 2.41300368309021, "learning_rate": 0.0002, "loss": 2.3431, "step": 16810 }, { "epoch": 1.2533532041728763, "grad_norm": 2.566871166229248, "learning_rate": 0.0002, "loss": 2.3982, "step": 16820 }, { "epoch": 1.2540983606557377, "grad_norm": 2.4768545627593994, "learning_rate": 0.0002, "loss": 2.346, "step": 16830 }, { "epoch": 1.2548435171385992, "grad_norm": 2.677468776702881, "learning_rate": 0.0002, "loss": 2.2478, "step": 16840 }, { "epoch": 1.2555886736214605, "grad_norm": 2.2552690505981445, "learning_rate": 0.0002, "loss": 2.5072, "step": 16850 }, { "epoch": 1.2563338301043219, "grad_norm": 2.868969202041626, "learning_rate": 0.0002, "loss": 2.2606, "step": 16860 }, { "epoch": 1.2570789865871834, "grad_norm": 2.6906239986419678, "learning_rate": 0.0002, "loss": 2.5686, "step": 16870 }, { "epoch": 1.2578241430700448, "grad_norm": 2.433225393295288, "learning_rate": 0.0002, "loss": 2.3974, "step": 16880 }, { "epoch": 1.258569299552906, "grad_norm": 2.37595534324646, "learning_rate": 0.0002, "loss": 2.4342, "step": 16890 }, { "epoch": 1.2593144560357676, "grad_norm": 2.5448384284973145, "learning_rate": 0.0002, "loss": 2.3194, "step": 16900 }, { "epoch": 1.260059612518629, "grad_norm": 2.858198404312134, "learning_rate": 0.0002, "loss": 2.4199, "step": 16910 }, { "epoch": 1.2608047690014903, "grad_norm": 2.546701431274414, "learning_rate": 0.0002, "loss": 2.4509, "step": 16920 }, { "epoch": 1.2615499254843516, "grad_norm": 2.5379738807678223, "learning_rate": 0.0002, "loss": 2.4427, "step": 16930 }, { "epoch": 1.2622950819672132, "grad_norm": 2.6814608573913574, "learning_rate": 0.0002, "loss": 2.4686, "step": 16940 }, { "epoch": 1.2630402384500745, "grad_norm": 2.3676536083221436, "learning_rate": 0.0002, "loss": 2.4649, "step": 16950 }, { "epoch": 1.2637853949329358, "grad_norm": 2.789276123046875, "learning_rate": 0.0002, "loss": 2.3954, "step": 16960 }, { "epoch": 1.2645305514157974, "grad_norm": 2.770944595336914, "learning_rate": 0.0002, "loss": 2.5665, "step": 16970 }, { "epoch": 1.2652757078986587, "grad_norm": 2.385180950164795, "learning_rate": 0.0002, "loss": 2.227, "step": 16980 }, { "epoch": 1.26602086438152, "grad_norm": 2.8469879627227783, "learning_rate": 0.0002, "loss": 2.2351, "step": 16990 }, { "epoch": 1.2667660208643814, "grad_norm": 2.438333034515381, "learning_rate": 0.0002, "loss": 2.3147, "step": 17000 }, { "epoch": 1.267511177347243, "grad_norm": 2.537980794906616, "learning_rate": 0.0002, "loss": 2.4113, "step": 17010 }, { "epoch": 1.2682563338301043, "grad_norm": 2.5566976070404053, "learning_rate": 0.0002, "loss": 2.3972, "step": 17020 }, { "epoch": 1.2690014903129656, "grad_norm": 2.5756947994232178, "learning_rate": 0.0002, "loss": 2.3624, "step": 17030 }, { "epoch": 1.2697466467958272, "grad_norm": 2.6613008975982666, "learning_rate": 0.0002, "loss": 2.4689, "step": 17040 }, { "epoch": 1.2704918032786885, "grad_norm": 2.802849054336548, "learning_rate": 0.0002, "loss": 2.3772, "step": 17050 }, { "epoch": 1.2712369597615498, "grad_norm": 2.221499443054199, "learning_rate": 0.0002, "loss": 2.5116, "step": 17060 }, { "epoch": 1.2719821162444114, "grad_norm": 2.3637337684631348, "learning_rate": 0.0002, "loss": 2.4486, "step": 17070 }, { "epoch": 1.2727272727272727, "grad_norm": 2.217311382293701, "learning_rate": 0.0002, "loss": 2.4252, "step": 17080 }, { "epoch": 1.273472429210134, "grad_norm": 2.36021089553833, "learning_rate": 0.0002, "loss": 2.5555, "step": 17090 }, { "epoch": 1.2742175856929956, "grad_norm": 2.5092270374298096, "learning_rate": 0.0002, "loss": 2.298, "step": 17100 }, { "epoch": 1.274962742175857, "grad_norm": 2.396413803100586, "learning_rate": 0.0002, "loss": 2.113, "step": 17110 }, { "epoch": 1.2757078986587183, "grad_norm": 2.5738630294799805, "learning_rate": 0.0002, "loss": 2.2539, "step": 17120 }, { "epoch": 1.2764530551415798, "grad_norm": 2.5101852416992188, "learning_rate": 0.0002, "loss": 2.3469, "step": 17130 }, { "epoch": 1.2771982116244411, "grad_norm": 2.4591407775878906, "learning_rate": 0.0002, "loss": 2.6166, "step": 17140 }, { "epoch": 1.2779433681073025, "grad_norm": 2.526982069015503, "learning_rate": 0.0002, "loss": 2.4245, "step": 17150 }, { "epoch": 1.278688524590164, "grad_norm": 2.8430566787719727, "learning_rate": 0.0002, "loss": 2.3319, "step": 17160 }, { "epoch": 1.2794336810730254, "grad_norm": 3.2182416915893555, "learning_rate": 0.0002, "loss": 2.4737, "step": 17170 }, { "epoch": 1.2801788375558867, "grad_norm": 1.936545491218567, "learning_rate": 0.0002, "loss": 2.3686, "step": 17180 }, { "epoch": 1.2809239940387482, "grad_norm": 2.754791736602783, "learning_rate": 0.0002, "loss": 2.5935, "step": 17190 }, { "epoch": 1.2816691505216096, "grad_norm": 2.615044116973877, "learning_rate": 0.0002, "loss": 2.1516, "step": 17200 }, { "epoch": 1.282414307004471, "grad_norm": 2.3363049030303955, "learning_rate": 0.0002, "loss": 2.4082, "step": 17210 }, { "epoch": 1.2831594634873325, "grad_norm": 2.6004695892333984, "learning_rate": 0.0002, "loss": 2.2609, "step": 17220 }, { "epoch": 1.2839046199701938, "grad_norm": 2.6206114292144775, "learning_rate": 0.0002, "loss": 2.6122, "step": 17230 }, { "epoch": 1.2846497764530551, "grad_norm": 2.5685789585113525, "learning_rate": 0.0002, "loss": 2.4993, "step": 17240 }, { "epoch": 1.2853949329359167, "grad_norm": 2.702047824859619, "learning_rate": 0.0002, "loss": 2.5556, "step": 17250 }, { "epoch": 1.286140089418778, "grad_norm": 2.7429511547088623, "learning_rate": 0.0002, "loss": 2.3498, "step": 17260 }, { "epoch": 1.2868852459016393, "grad_norm": 2.4050281047821045, "learning_rate": 0.0002, "loss": 2.4434, "step": 17270 }, { "epoch": 1.2876304023845009, "grad_norm": 2.3521718978881836, "learning_rate": 0.0002, "loss": 2.2286, "step": 17280 }, { "epoch": 1.2883755588673622, "grad_norm": 2.6510558128356934, "learning_rate": 0.0002, "loss": 2.4448, "step": 17290 }, { "epoch": 1.2891207153502235, "grad_norm": 2.949458599090576, "learning_rate": 0.0002, "loss": 2.6088, "step": 17300 }, { "epoch": 1.2898658718330849, "grad_norm": 2.5707809925079346, "learning_rate": 0.0002, "loss": 2.5102, "step": 17310 }, { "epoch": 1.2906110283159464, "grad_norm": 2.6018505096435547, "learning_rate": 0.0002, "loss": 2.5562, "step": 17320 }, { "epoch": 1.2913561847988078, "grad_norm": 2.5191781520843506, "learning_rate": 0.0002, "loss": 2.2755, "step": 17330 }, { "epoch": 1.292101341281669, "grad_norm": 2.482866048812866, "learning_rate": 0.0002, "loss": 2.3706, "step": 17340 }, { "epoch": 1.2928464977645304, "grad_norm": 2.840179681777954, "learning_rate": 0.0002, "loss": 2.6733, "step": 17350 }, { "epoch": 1.293591654247392, "grad_norm": 2.3511884212493896, "learning_rate": 0.0002, "loss": 2.1673, "step": 17360 }, { "epoch": 1.2943368107302533, "grad_norm": 2.9184696674346924, "learning_rate": 0.0002, "loss": 2.4761, "step": 17370 }, { "epoch": 1.2950819672131146, "grad_norm": 2.500131130218506, "learning_rate": 0.0002, "loss": 2.5081, "step": 17380 }, { "epoch": 1.2958271236959762, "grad_norm": 2.2473769187927246, "learning_rate": 0.0002, "loss": 2.421, "step": 17390 }, { "epoch": 1.2965722801788375, "grad_norm": 2.166339159011841, "learning_rate": 0.0002, "loss": 2.4269, "step": 17400 }, { "epoch": 1.2973174366616989, "grad_norm": 2.546684741973877, "learning_rate": 0.0002, "loss": 2.3341, "step": 17410 }, { "epoch": 1.2980625931445604, "grad_norm": 2.3546717166900635, "learning_rate": 0.0002, "loss": 2.2886, "step": 17420 }, { "epoch": 1.2988077496274217, "grad_norm": 3.0054266452789307, "learning_rate": 0.0002, "loss": 2.5172, "step": 17430 }, { "epoch": 1.299552906110283, "grad_norm": 2.7535972595214844, "learning_rate": 0.0002, "loss": 2.5921, "step": 17440 }, { "epoch": 1.3002980625931446, "grad_norm": 2.568647861480713, "learning_rate": 0.0002, "loss": 2.4328, "step": 17450 }, { "epoch": 1.301043219076006, "grad_norm": 2.550978899002075, "learning_rate": 0.0002, "loss": 2.4866, "step": 17460 }, { "epoch": 1.3017883755588673, "grad_norm": 2.5628457069396973, "learning_rate": 0.0002, "loss": 2.4284, "step": 17470 }, { "epoch": 1.3025335320417288, "grad_norm": 2.3512864112854004, "learning_rate": 0.0002, "loss": 2.4614, "step": 17480 }, { "epoch": 1.3032786885245902, "grad_norm": 2.6441545486450195, "learning_rate": 0.0002, "loss": 2.4846, "step": 17490 }, { "epoch": 1.3040238450074515, "grad_norm": 2.4340755939483643, "learning_rate": 0.0002, "loss": 2.2236, "step": 17500 }, { "epoch": 1.304769001490313, "grad_norm": 2.4928324222564697, "learning_rate": 0.0002, "loss": 2.4082, "step": 17510 }, { "epoch": 1.3055141579731744, "grad_norm": 2.5223214626312256, "learning_rate": 0.0002, "loss": 2.5462, "step": 17520 }, { "epoch": 1.3062593144560357, "grad_norm": 2.824338674545288, "learning_rate": 0.0002, "loss": 2.4898, "step": 17530 }, { "epoch": 1.3070044709388973, "grad_norm": 2.299622058868408, "learning_rate": 0.0002, "loss": 2.4033, "step": 17540 }, { "epoch": 1.3077496274217586, "grad_norm": 2.4792845249176025, "learning_rate": 0.0002, "loss": 2.5937, "step": 17550 }, { "epoch": 1.30849478390462, "grad_norm": 2.402050495147705, "learning_rate": 0.0002, "loss": 2.5102, "step": 17560 }, { "epoch": 1.3092399403874815, "grad_norm": 2.833794593811035, "learning_rate": 0.0002, "loss": 2.6331, "step": 17570 }, { "epoch": 1.3099850968703428, "grad_norm": 2.4737417697906494, "learning_rate": 0.0002, "loss": 2.2122, "step": 17580 }, { "epoch": 1.3107302533532041, "grad_norm": 2.520129680633545, "learning_rate": 0.0002, "loss": 2.4989, "step": 17590 }, { "epoch": 1.3114754098360657, "grad_norm": 2.310004472732544, "learning_rate": 0.0002, "loss": 2.3101, "step": 17600 }, { "epoch": 1.312220566318927, "grad_norm": 2.863279342651367, "learning_rate": 0.0002, "loss": 2.4879, "step": 17610 }, { "epoch": 1.3129657228017884, "grad_norm": 2.590439558029175, "learning_rate": 0.0002, "loss": 2.6645, "step": 17620 }, { "epoch": 1.31371087928465, "grad_norm": 2.840513229370117, "learning_rate": 0.0002, "loss": 2.4547, "step": 17630 }, { "epoch": 1.3144560357675112, "grad_norm": 2.6092474460601807, "learning_rate": 0.0002, "loss": 2.5813, "step": 17640 }, { "epoch": 1.3152011922503726, "grad_norm": 2.347975969314575, "learning_rate": 0.0002, "loss": 2.3886, "step": 17650 }, { "epoch": 1.315946348733234, "grad_norm": 2.4244983196258545, "learning_rate": 0.0002, "loss": 2.4909, "step": 17660 }, { "epoch": 1.3166915052160955, "grad_norm": 2.1615536212921143, "learning_rate": 0.0002, "loss": 2.4675, "step": 17670 }, { "epoch": 1.3174366616989568, "grad_norm": 2.350623846054077, "learning_rate": 0.0002, "loss": 2.4184, "step": 17680 }, { "epoch": 1.3181818181818181, "grad_norm": 2.4277079105377197, "learning_rate": 0.0002, "loss": 2.5482, "step": 17690 }, { "epoch": 1.3189269746646795, "grad_norm": 2.431518793106079, "learning_rate": 0.0002, "loss": 2.5813, "step": 17700 }, { "epoch": 1.319672131147541, "grad_norm": 2.4477312564849854, "learning_rate": 0.0002, "loss": 2.4473, "step": 17710 }, { "epoch": 1.3204172876304023, "grad_norm": 2.8006045818328857, "learning_rate": 0.0002, "loss": 2.639, "step": 17720 }, { "epoch": 1.3211624441132637, "grad_norm": 2.362448215484619, "learning_rate": 0.0002, "loss": 2.3965, "step": 17730 }, { "epoch": 1.3219076005961252, "grad_norm": 2.489379405975342, "learning_rate": 0.0002, "loss": 2.4664, "step": 17740 }, { "epoch": 1.3226527570789866, "grad_norm": 2.386716365814209, "learning_rate": 0.0002, "loss": 2.5855, "step": 17750 }, { "epoch": 1.3233979135618479, "grad_norm": 2.5991947650909424, "learning_rate": 0.0002, "loss": 2.5633, "step": 17760 }, { "epoch": 1.3241430700447094, "grad_norm": 2.499544858932495, "learning_rate": 0.0002, "loss": 2.4749, "step": 17770 }, { "epoch": 1.3248882265275708, "grad_norm": 2.4425177574157715, "learning_rate": 0.0002, "loss": 2.5387, "step": 17780 }, { "epoch": 1.325633383010432, "grad_norm": 2.6338937282562256, "learning_rate": 0.0002, "loss": 2.4913, "step": 17790 }, { "epoch": 1.3263785394932937, "grad_norm": 2.280900716781616, "learning_rate": 0.0002, "loss": 2.548, "step": 17800 }, { "epoch": 1.327123695976155, "grad_norm": 2.5411298274993896, "learning_rate": 0.0002, "loss": 2.5131, "step": 17810 }, { "epoch": 1.3278688524590163, "grad_norm": 2.546914577484131, "learning_rate": 0.0002, "loss": 2.667, "step": 17820 }, { "epoch": 1.3286140089418779, "grad_norm": 2.6048712730407715, "learning_rate": 0.0002, "loss": 2.3578, "step": 17830 }, { "epoch": 1.3293591654247392, "grad_norm": 2.714755058288574, "learning_rate": 0.0002, "loss": 2.6083, "step": 17840 }, { "epoch": 1.3301043219076005, "grad_norm": 2.559908151626587, "learning_rate": 0.0002, "loss": 2.4031, "step": 17850 }, { "epoch": 1.330849478390462, "grad_norm": 2.308744192123413, "learning_rate": 0.0002, "loss": 2.3735, "step": 17860 }, { "epoch": 1.3315946348733234, "grad_norm": 2.331125497817993, "learning_rate": 0.0002, "loss": 2.2689, "step": 17870 }, { "epoch": 1.3323397913561847, "grad_norm": 2.7024810314178467, "learning_rate": 0.0002, "loss": 2.4517, "step": 17880 }, { "epoch": 1.3330849478390463, "grad_norm": 2.6669280529022217, "learning_rate": 0.0002, "loss": 2.3356, "step": 17890 }, { "epoch": 1.3338301043219076, "grad_norm": 2.739065408706665, "learning_rate": 0.0002, "loss": 2.3587, "step": 17900 }, { "epoch": 1.334575260804769, "grad_norm": 2.8512372970581055, "learning_rate": 0.0002, "loss": 2.5018, "step": 17910 }, { "epoch": 1.3353204172876305, "grad_norm": 2.336021900177002, "learning_rate": 0.0002, "loss": 2.3453, "step": 17920 }, { "epoch": 1.3360655737704918, "grad_norm": 2.4425575733184814, "learning_rate": 0.0002, "loss": 2.5346, "step": 17930 }, { "epoch": 1.3368107302533532, "grad_norm": 3.027134656906128, "learning_rate": 0.0002, "loss": 2.3945, "step": 17940 }, { "epoch": 1.3375558867362147, "grad_norm": 2.451101541519165, "learning_rate": 0.0002, "loss": 2.4273, "step": 17950 }, { "epoch": 1.338301043219076, "grad_norm": 2.5687952041625977, "learning_rate": 0.0002, "loss": 2.5681, "step": 17960 }, { "epoch": 1.3390461997019374, "grad_norm": 2.5672757625579834, "learning_rate": 0.0002, "loss": 2.5521, "step": 17970 }, { "epoch": 1.339791356184799, "grad_norm": 2.4332363605499268, "learning_rate": 0.0002, "loss": 2.4988, "step": 17980 }, { "epoch": 1.3405365126676603, "grad_norm": 2.3175182342529297, "learning_rate": 0.0002, "loss": 2.1908, "step": 17990 }, { "epoch": 1.3412816691505216, "grad_norm": 2.677385091781616, "learning_rate": 0.0002, "loss": 2.6282, "step": 18000 }, { "epoch": 1.342026825633383, "grad_norm": 2.5982978343963623, "learning_rate": 0.0002, "loss": 2.3879, "step": 18010 }, { "epoch": 1.3427719821162445, "grad_norm": 2.657008171081543, "learning_rate": 0.0002, "loss": 2.5917, "step": 18020 }, { "epoch": 1.3435171385991058, "grad_norm": 2.4030990600585938, "learning_rate": 0.0002, "loss": 2.4942, "step": 18030 }, { "epoch": 1.3442622950819672, "grad_norm": 2.760948657989502, "learning_rate": 0.0002, "loss": 2.5241, "step": 18040 }, { "epoch": 1.3450074515648285, "grad_norm": 2.4696218967437744, "learning_rate": 0.0002, "loss": 2.5653, "step": 18050 }, { "epoch": 1.34575260804769, "grad_norm": 2.8503217697143555, "learning_rate": 0.0002, "loss": 2.5897, "step": 18060 }, { "epoch": 1.3464977645305514, "grad_norm": 2.43888258934021, "learning_rate": 0.0002, "loss": 2.5546, "step": 18070 }, { "epoch": 1.3472429210134127, "grad_norm": 2.212247610092163, "learning_rate": 0.0002, "loss": 2.2402, "step": 18080 }, { "epoch": 1.3479880774962743, "grad_norm": 2.612032413482666, "learning_rate": 0.0002, "loss": 2.4855, "step": 18090 }, { "epoch": 1.3487332339791356, "grad_norm": 1.9392924308776855, "learning_rate": 0.0002, "loss": 2.485, "step": 18100 }, { "epoch": 1.349478390461997, "grad_norm": 2.618718147277832, "learning_rate": 0.0002, "loss": 2.4073, "step": 18110 }, { "epoch": 1.3502235469448585, "grad_norm": 2.3610072135925293, "learning_rate": 0.0002, "loss": 2.5229, "step": 18120 }, { "epoch": 1.3509687034277198, "grad_norm": 2.595275640487671, "learning_rate": 0.0002, "loss": 2.4553, "step": 18130 }, { "epoch": 1.3517138599105811, "grad_norm": 2.991643190383911, "learning_rate": 0.0002, "loss": 2.676, "step": 18140 }, { "epoch": 1.3524590163934427, "grad_norm": 2.907435655593872, "learning_rate": 0.0002, "loss": 2.5444, "step": 18150 }, { "epoch": 1.353204172876304, "grad_norm": 2.5445618629455566, "learning_rate": 0.0002, "loss": 2.2842, "step": 18160 }, { "epoch": 1.3539493293591653, "grad_norm": 2.4656434059143066, "learning_rate": 0.0002, "loss": 2.4092, "step": 18170 }, { "epoch": 1.354694485842027, "grad_norm": 2.802135467529297, "learning_rate": 0.0002, "loss": 2.546, "step": 18180 }, { "epoch": 1.3554396423248882, "grad_norm": 2.503904342651367, "learning_rate": 0.0002, "loss": 2.1597, "step": 18190 }, { "epoch": 1.3561847988077496, "grad_norm": 2.6705873012542725, "learning_rate": 0.0002, "loss": 2.5067, "step": 18200 }, { "epoch": 1.3569299552906111, "grad_norm": 2.7576780319213867, "learning_rate": 0.0002, "loss": 2.5998, "step": 18210 }, { "epoch": 1.3576751117734724, "grad_norm": 2.821742296218872, "learning_rate": 0.0002, "loss": 2.5604, "step": 18220 }, { "epoch": 1.3584202682563338, "grad_norm": 2.8857474327087402, "learning_rate": 0.0002, "loss": 2.4744, "step": 18230 }, { "epoch": 1.3591654247391953, "grad_norm": 2.5896153450012207, "learning_rate": 0.0002, "loss": 2.4459, "step": 18240 }, { "epoch": 1.3599105812220567, "grad_norm": 2.5406441688537598, "learning_rate": 0.0002, "loss": 2.6658, "step": 18250 }, { "epoch": 1.360655737704918, "grad_norm": 2.489504337310791, "learning_rate": 0.0002, "loss": 2.4687, "step": 18260 }, { "epoch": 1.3614008941877795, "grad_norm": 2.5663485527038574, "learning_rate": 0.0002, "loss": 2.4253, "step": 18270 }, { "epoch": 1.3621460506706409, "grad_norm": 2.5449881553649902, "learning_rate": 0.0002, "loss": 2.4748, "step": 18280 }, { "epoch": 1.3628912071535022, "grad_norm": 2.5304982662200928, "learning_rate": 0.0002, "loss": 2.5618, "step": 18290 }, { "epoch": 1.3636363636363638, "grad_norm": 2.8851704597473145, "learning_rate": 0.0002, "loss": 2.3942, "step": 18300 }, { "epoch": 1.364381520119225, "grad_norm": 2.3670241832733154, "learning_rate": 0.0002, "loss": 2.5914, "step": 18310 }, { "epoch": 1.3651266766020864, "grad_norm": 2.421011447906494, "learning_rate": 0.0002, "loss": 2.4694, "step": 18320 }, { "epoch": 1.365871833084948, "grad_norm": 2.5096373558044434, "learning_rate": 0.0002, "loss": 2.5616, "step": 18330 }, { "epoch": 1.3666169895678093, "grad_norm": 2.6197783946990967, "learning_rate": 0.0002, "loss": 2.351, "step": 18340 }, { "epoch": 1.3673621460506706, "grad_norm": 2.3040313720703125, "learning_rate": 0.0002, "loss": 2.5583, "step": 18350 }, { "epoch": 1.368107302533532, "grad_norm": 2.4461286067962646, "learning_rate": 0.0002, "loss": 2.5253, "step": 18360 }, { "epoch": 1.3688524590163935, "grad_norm": 2.6195592880249023, "learning_rate": 0.0002, "loss": 2.4353, "step": 18370 }, { "epoch": 1.3695976154992549, "grad_norm": 2.711920976638794, "learning_rate": 0.0002, "loss": 2.4055, "step": 18380 }, { "epoch": 1.3703427719821162, "grad_norm": 2.2745280265808105, "learning_rate": 0.0002, "loss": 2.2759, "step": 18390 }, { "epoch": 1.3710879284649775, "grad_norm": 2.446627140045166, "learning_rate": 0.0002, "loss": 2.3023, "step": 18400 }, { "epoch": 1.371833084947839, "grad_norm": 2.3650870323181152, "learning_rate": 0.0002, "loss": 2.1179, "step": 18410 }, { "epoch": 1.3725782414307004, "grad_norm": 2.3850514888763428, "learning_rate": 0.0002, "loss": 2.5995, "step": 18420 }, { "epoch": 1.3733233979135617, "grad_norm": 2.4336416721343994, "learning_rate": 0.0002, "loss": 2.5989, "step": 18430 }, { "epoch": 1.3740685543964233, "grad_norm": 2.4156320095062256, "learning_rate": 0.0002, "loss": 2.4384, "step": 18440 }, { "epoch": 1.3748137108792846, "grad_norm": 2.216463327407837, "learning_rate": 0.0002, "loss": 2.288, "step": 18450 }, { "epoch": 1.375558867362146, "grad_norm": 2.4132537841796875, "learning_rate": 0.0002, "loss": 2.5789, "step": 18460 }, { "epoch": 1.3763040238450075, "grad_norm": 2.4926469326019287, "learning_rate": 0.0002, "loss": 2.2998, "step": 18470 }, { "epoch": 1.3770491803278688, "grad_norm": 2.515826940536499, "learning_rate": 0.0002, "loss": 2.4162, "step": 18480 }, { "epoch": 1.3777943368107302, "grad_norm": 2.26667857170105, "learning_rate": 0.0002, "loss": 2.5093, "step": 18490 }, { "epoch": 1.3785394932935917, "grad_norm": 2.739278793334961, "learning_rate": 0.0002, "loss": 2.6155, "step": 18500 }, { "epoch": 1.379284649776453, "grad_norm": 2.4237091541290283, "learning_rate": 0.0002, "loss": 2.5357, "step": 18510 }, { "epoch": 1.3800298062593144, "grad_norm": 2.349325180053711, "learning_rate": 0.0002, "loss": 2.6382, "step": 18520 }, { "epoch": 1.380774962742176, "grad_norm": 2.0295040607452393, "learning_rate": 0.0002, "loss": 2.2775, "step": 18530 }, { "epoch": 1.3815201192250373, "grad_norm": 2.54569411277771, "learning_rate": 0.0002, "loss": 2.6035, "step": 18540 }, { "epoch": 1.3822652757078986, "grad_norm": 2.625166416168213, "learning_rate": 0.0002, "loss": 2.3252, "step": 18550 }, { "epoch": 1.3830104321907601, "grad_norm": 2.5229341983795166, "learning_rate": 0.0002, "loss": 2.5045, "step": 18560 }, { "epoch": 1.3837555886736215, "grad_norm": 2.5632095336914062, "learning_rate": 0.0002, "loss": 2.4786, "step": 18570 }, { "epoch": 1.3845007451564828, "grad_norm": 2.6461541652679443, "learning_rate": 0.0002, "loss": 2.3556, "step": 18580 }, { "epoch": 1.3852459016393444, "grad_norm": 2.256908893585205, "learning_rate": 0.0002, "loss": 2.4763, "step": 18590 }, { "epoch": 1.3859910581222057, "grad_norm": 2.930579662322998, "learning_rate": 0.0002, "loss": 2.3258, "step": 18600 }, { "epoch": 1.386736214605067, "grad_norm": 2.400162696838379, "learning_rate": 0.0002, "loss": 2.5043, "step": 18610 }, { "epoch": 1.3874813710879286, "grad_norm": 2.6638431549072266, "learning_rate": 0.0002, "loss": 2.3977, "step": 18620 }, { "epoch": 1.38822652757079, "grad_norm": 2.60463285446167, "learning_rate": 0.0002, "loss": 2.4366, "step": 18630 }, { "epoch": 1.3889716840536512, "grad_norm": 2.412717342376709, "learning_rate": 0.0002, "loss": 2.4246, "step": 18640 }, { "epoch": 1.3897168405365128, "grad_norm": 3.0523712635040283, "learning_rate": 0.0002, "loss": 2.4677, "step": 18650 }, { "epoch": 1.3904619970193741, "grad_norm": 2.9874138832092285, "learning_rate": 0.0002, "loss": 2.612, "step": 18660 }, { "epoch": 1.3912071535022354, "grad_norm": 2.532456398010254, "learning_rate": 0.0002, "loss": 2.4691, "step": 18670 }, { "epoch": 1.391952309985097, "grad_norm": 2.7136852741241455, "learning_rate": 0.0002, "loss": 2.4155, "step": 18680 }, { "epoch": 1.3926974664679583, "grad_norm": 2.5134053230285645, "learning_rate": 0.0002, "loss": 2.5658, "step": 18690 }, { "epoch": 1.3934426229508197, "grad_norm": 2.934316396713257, "learning_rate": 0.0002, "loss": 2.5088, "step": 18700 }, { "epoch": 1.394187779433681, "grad_norm": 2.23718523979187, "learning_rate": 0.0002, "loss": 2.3933, "step": 18710 }, { "epoch": 1.3949329359165425, "grad_norm": 2.1990694999694824, "learning_rate": 0.0002, "loss": 2.3508, "step": 18720 }, { "epoch": 1.3956780923994039, "grad_norm": 2.4167299270629883, "learning_rate": 0.0002, "loss": 2.5543, "step": 18730 }, { "epoch": 1.3964232488822652, "grad_norm": 2.468496561050415, "learning_rate": 0.0002, "loss": 2.4384, "step": 18740 }, { "epoch": 1.3971684053651265, "grad_norm": 2.5203256607055664, "learning_rate": 0.0002, "loss": 2.4853, "step": 18750 }, { "epoch": 1.397913561847988, "grad_norm": 2.669184446334839, "learning_rate": 0.0002, "loss": 2.4931, "step": 18760 }, { "epoch": 1.3986587183308494, "grad_norm": 2.7965617179870605, "learning_rate": 0.0002, "loss": 2.6282, "step": 18770 }, { "epoch": 1.3994038748137108, "grad_norm": 2.606688976287842, "learning_rate": 0.0002, "loss": 2.5169, "step": 18780 }, { "epoch": 1.4001490312965723, "grad_norm": 3.333808422088623, "learning_rate": 0.0002, "loss": 2.4799, "step": 18790 }, { "epoch": 1.4008941877794336, "grad_norm": 2.470353841781616, "learning_rate": 0.0002, "loss": 2.6148, "step": 18800 }, { "epoch": 1.401639344262295, "grad_norm": 3.7731544971466064, "learning_rate": 0.0002, "loss": 2.5979, "step": 18810 }, { "epoch": 1.4023845007451565, "grad_norm": 2.7229580879211426, "learning_rate": 0.0002, "loss": 2.5466, "step": 18820 }, { "epoch": 1.4031296572280179, "grad_norm": 2.463998556137085, "learning_rate": 0.0002, "loss": 2.5742, "step": 18830 }, { "epoch": 1.4038748137108792, "grad_norm": 2.2875285148620605, "learning_rate": 0.0002, "loss": 2.3219, "step": 18840 }, { "epoch": 1.4046199701937407, "grad_norm": 2.479088068008423, "learning_rate": 0.0002, "loss": 2.2547, "step": 18850 }, { "epoch": 1.405365126676602, "grad_norm": 2.7648444175720215, "learning_rate": 0.0002, "loss": 2.4328, "step": 18860 }, { "epoch": 1.4061102831594634, "grad_norm": 2.649199962615967, "learning_rate": 0.0002, "loss": 2.5119, "step": 18870 }, { "epoch": 1.406855439642325, "grad_norm": 2.6668455600738525, "learning_rate": 0.0002, "loss": 2.3982, "step": 18880 }, { "epoch": 1.4076005961251863, "grad_norm": 2.207362651824951, "learning_rate": 0.0002, "loss": 2.5986, "step": 18890 }, { "epoch": 1.4083457526080476, "grad_norm": 2.383084535598755, "learning_rate": 0.0002, "loss": 2.3486, "step": 18900 }, { "epoch": 1.4090909090909092, "grad_norm": 2.490837574005127, "learning_rate": 0.0002, "loss": 2.4754, "step": 18910 }, { "epoch": 1.4098360655737705, "grad_norm": 2.479707717895508, "learning_rate": 0.0002, "loss": 2.4408, "step": 18920 }, { "epoch": 1.4105812220566318, "grad_norm": 2.9965310096740723, "learning_rate": 0.0002, "loss": 2.5574, "step": 18930 }, { "epoch": 1.4113263785394934, "grad_norm": 2.707078456878662, "learning_rate": 0.0002, "loss": 2.4176, "step": 18940 }, { "epoch": 1.4120715350223547, "grad_norm": 2.5538723468780518, "learning_rate": 0.0002, "loss": 2.4505, "step": 18950 }, { "epoch": 1.412816691505216, "grad_norm": 2.399869441986084, "learning_rate": 0.0002, "loss": 2.391, "step": 18960 }, { "epoch": 1.4135618479880776, "grad_norm": 2.150087594985962, "learning_rate": 0.0002, "loss": 2.5095, "step": 18970 }, { "epoch": 1.414307004470939, "grad_norm": 2.7738189697265625, "learning_rate": 0.0002, "loss": 2.4873, "step": 18980 }, { "epoch": 1.4150521609538003, "grad_norm": 2.8907742500305176, "learning_rate": 0.0002, "loss": 2.7251, "step": 18990 }, { "epoch": 1.4157973174366618, "grad_norm": 2.500542640686035, "learning_rate": 0.0002, "loss": 2.6472, "step": 19000 }, { "epoch": 1.4165424739195231, "grad_norm": 2.930190324783325, "learning_rate": 0.0002, "loss": 2.4754, "step": 19010 }, { "epoch": 1.4172876304023845, "grad_norm": 2.717930793762207, "learning_rate": 0.0002, "loss": 2.5804, "step": 19020 }, { "epoch": 1.418032786885246, "grad_norm": 2.6405129432678223, "learning_rate": 0.0002, "loss": 2.4247, "step": 19030 }, { "epoch": 1.4187779433681074, "grad_norm": 2.3362009525299072, "learning_rate": 0.0002, "loss": 2.4234, "step": 19040 }, { "epoch": 1.4195230998509687, "grad_norm": 2.1805837154388428, "learning_rate": 0.0002, "loss": 2.4447, "step": 19050 }, { "epoch": 1.42026825633383, "grad_norm": 2.530463695526123, "learning_rate": 0.0002, "loss": 2.5781, "step": 19060 }, { "epoch": 1.4210134128166916, "grad_norm": 2.360027313232422, "learning_rate": 0.0002, "loss": 2.4854, "step": 19070 }, { "epoch": 1.421758569299553, "grad_norm": 2.552933931350708, "learning_rate": 0.0002, "loss": 2.4995, "step": 19080 }, { "epoch": 1.4225037257824142, "grad_norm": 2.643507719039917, "learning_rate": 0.0002, "loss": 2.4029, "step": 19090 }, { "epoch": 1.4232488822652756, "grad_norm": 2.3608195781707764, "learning_rate": 0.0002, "loss": 2.3169, "step": 19100 }, { "epoch": 1.4239940387481371, "grad_norm": 2.282942295074463, "learning_rate": 0.0002, "loss": 2.632, "step": 19110 }, { "epoch": 1.4247391952309985, "grad_norm": 2.8628063201904297, "learning_rate": 0.0002, "loss": 2.3114, "step": 19120 }, { "epoch": 1.4254843517138598, "grad_norm": 2.357126235961914, "learning_rate": 0.0002, "loss": 2.483, "step": 19130 }, { "epoch": 1.4262295081967213, "grad_norm": 2.1612229347229004, "learning_rate": 0.0002, "loss": 2.4844, "step": 19140 }, { "epoch": 1.4269746646795827, "grad_norm": 2.248215436935425, "learning_rate": 0.0002, "loss": 2.3824, "step": 19150 }, { "epoch": 1.427719821162444, "grad_norm": 1.9331451654434204, "learning_rate": 0.0002, "loss": 2.3143, "step": 19160 }, { "epoch": 1.4284649776453056, "grad_norm": 2.4009180068969727, "learning_rate": 0.0002, "loss": 2.382, "step": 19170 }, { "epoch": 1.4292101341281669, "grad_norm": 2.6418373584747314, "learning_rate": 0.0002, "loss": 2.4591, "step": 19180 }, { "epoch": 1.4299552906110282, "grad_norm": 2.737656593322754, "learning_rate": 0.0002, "loss": 2.3134, "step": 19190 }, { "epoch": 1.4307004470938898, "grad_norm": 2.2396743297576904, "learning_rate": 0.0002, "loss": 2.4965, "step": 19200 }, { "epoch": 1.431445603576751, "grad_norm": 2.542550802230835, "learning_rate": 0.0002, "loss": 2.4396, "step": 19210 }, { "epoch": 1.4321907600596124, "grad_norm": 2.6923890113830566, "learning_rate": 0.0002, "loss": 2.3213, "step": 19220 }, { "epoch": 1.432935916542474, "grad_norm": 2.479887008666992, "learning_rate": 0.0002, "loss": 2.528, "step": 19230 }, { "epoch": 1.4336810730253353, "grad_norm": 2.2909326553344727, "learning_rate": 0.0002, "loss": 2.4784, "step": 19240 }, { "epoch": 1.4344262295081966, "grad_norm": 2.543410301208496, "learning_rate": 0.0002, "loss": 2.603, "step": 19250 }, { "epoch": 1.4351713859910582, "grad_norm": 2.4764413833618164, "learning_rate": 0.0002, "loss": 2.3813, "step": 19260 }, { "epoch": 1.4359165424739195, "grad_norm": 2.4031872749328613, "learning_rate": 0.0002, "loss": 2.4507, "step": 19270 }, { "epoch": 1.4366616989567809, "grad_norm": 2.174621105194092, "learning_rate": 0.0002, "loss": 2.505, "step": 19280 }, { "epoch": 1.4374068554396424, "grad_norm": 2.6859323978424072, "learning_rate": 0.0002, "loss": 2.5794, "step": 19290 }, { "epoch": 1.4381520119225037, "grad_norm": 2.5702786445617676, "learning_rate": 0.0002, "loss": 2.429, "step": 19300 }, { "epoch": 1.438897168405365, "grad_norm": 2.4313433170318604, "learning_rate": 0.0002, "loss": 2.4518, "step": 19310 }, { "epoch": 1.4396423248882266, "grad_norm": 2.800537347793579, "learning_rate": 0.0002, "loss": 2.4637, "step": 19320 }, { "epoch": 1.440387481371088, "grad_norm": 2.221001625061035, "learning_rate": 0.0002, "loss": 2.5614, "step": 19330 }, { "epoch": 1.4411326378539493, "grad_norm": 2.630506753921509, "learning_rate": 0.0002, "loss": 2.5497, "step": 19340 }, { "epoch": 1.4418777943368108, "grad_norm": 3.166796922683716, "learning_rate": 0.0002, "loss": 2.4768, "step": 19350 }, { "epoch": 1.4426229508196722, "grad_norm": 2.3211870193481445, "learning_rate": 0.0002, "loss": 2.4818, "step": 19360 }, { "epoch": 1.4433681073025335, "grad_norm": 2.428395986557007, "learning_rate": 0.0002, "loss": 2.2887, "step": 19370 }, { "epoch": 1.444113263785395, "grad_norm": 2.2605369091033936, "learning_rate": 0.0002, "loss": 2.5042, "step": 19380 }, { "epoch": 1.4448584202682564, "grad_norm": 2.404771089553833, "learning_rate": 0.0002, "loss": 2.5354, "step": 19390 }, { "epoch": 1.4456035767511177, "grad_norm": 2.3696742057800293, "learning_rate": 0.0002, "loss": 2.5734, "step": 19400 }, { "epoch": 1.446348733233979, "grad_norm": 2.0790860652923584, "learning_rate": 0.0002, "loss": 2.379, "step": 19410 }, { "epoch": 1.4470938897168406, "grad_norm": 2.8864376544952393, "learning_rate": 0.0002, "loss": 2.5431, "step": 19420 }, { "epoch": 1.447839046199702, "grad_norm": 2.4749741554260254, "learning_rate": 0.0002, "loss": 2.7492, "step": 19430 }, { "epoch": 1.4485842026825633, "grad_norm": 2.4821410179138184, "learning_rate": 0.0002, "loss": 2.6195, "step": 19440 }, { "epoch": 1.4493293591654246, "grad_norm": 2.2254769802093506, "learning_rate": 0.0002, "loss": 2.5504, "step": 19450 }, { "epoch": 1.4500745156482862, "grad_norm": 2.85517954826355, "learning_rate": 0.0002, "loss": 2.5548, "step": 19460 }, { "epoch": 1.4508196721311475, "grad_norm": 2.567664384841919, "learning_rate": 0.0002, "loss": 2.5434, "step": 19470 }, { "epoch": 1.4515648286140088, "grad_norm": 2.842428684234619, "learning_rate": 0.0002, "loss": 2.6169, "step": 19480 }, { "epoch": 1.4523099850968704, "grad_norm": 2.630206346511841, "learning_rate": 0.0002, "loss": 2.46, "step": 19490 }, { "epoch": 1.4530551415797317, "grad_norm": 2.2468862533569336, "learning_rate": 0.0002, "loss": 2.5329, "step": 19500 }, { "epoch": 1.453800298062593, "grad_norm": 2.688547134399414, "learning_rate": 0.0002, "loss": 2.4376, "step": 19510 }, { "epoch": 1.4545454545454546, "grad_norm": 2.439633846282959, "learning_rate": 0.0002, "loss": 2.572, "step": 19520 }, { "epoch": 1.455290611028316, "grad_norm": 2.374743700027466, "learning_rate": 0.0002, "loss": 2.579, "step": 19530 }, { "epoch": 1.4560357675111772, "grad_norm": 2.5189223289489746, "learning_rate": 0.0002, "loss": 2.4794, "step": 19540 }, { "epoch": 1.4567809239940388, "grad_norm": 2.6235687732696533, "learning_rate": 0.0002, "loss": 2.4074, "step": 19550 }, { "epoch": 1.4575260804769001, "grad_norm": 2.6033565998077393, "learning_rate": 0.0002, "loss": 2.4671, "step": 19560 }, { "epoch": 1.4582712369597615, "grad_norm": 2.5955042839050293, "learning_rate": 0.0002, "loss": 2.4777, "step": 19570 }, { "epoch": 1.459016393442623, "grad_norm": 2.8207786083221436, "learning_rate": 0.0002, "loss": 2.5766, "step": 19580 }, { "epoch": 1.4597615499254843, "grad_norm": 2.597846269607544, "learning_rate": 0.0002, "loss": 2.4987, "step": 19590 }, { "epoch": 1.4605067064083457, "grad_norm": 2.6083250045776367, "learning_rate": 0.0002, "loss": 2.6282, "step": 19600 }, { "epoch": 1.4612518628912072, "grad_norm": 2.8631696701049805, "learning_rate": 0.0002, "loss": 2.3029, "step": 19610 }, { "epoch": 1.4619970193740686, "grad_norm": 2.701707124710083, "learning_rate": 0.0002, "loss": 2.4164, "step": 19620 }, { "epoch": 1.46274217585693, "grad_norm": 2.4737026691436768, "learning_rate": 0.0002, "loss": 2.4605, "step": 19630 }, { "epoch": 1.4634873323397914, "grad_norm": 2.5299856662750244, "learning_rate": 0.0002, "loss": 2.2568, "step": 19640 }, { "epoch": 1.4642324888226528, "grad_norm": 2.5376803874969482, "learning_rate": 0.0002, "loss": 2.4407, "step": 19650 }, { "epoch": 1.464977645305514, "grad_norm": 2.8622679710388184, "learning_rate": 0.0002, "loss": 2.4903, "step": 19660 }, { "epoch": 1.4657228017883757, "grad_norm": 2.7106306552886963, "learning_rate": 0.0002, "loss": 2.4316, "step": 19670 }, { "epoch": 1.466467958271237, "grad_norm": 2.3677496910095215, "learning_rate": 0.0002, "loss": 2.4907, "step": 19680 }, { "epoch": 1.4672131147540983, "grad_norm": 2.7566051483154297, "learning_rate": 0.0002, "loss": 2.5288, "step": 19690 }, { "epoch": 1.4679582712369599, "grad_norm": 2.2991981506347656, "learning_rate": 0.0002, "loss": 2.4065, "step": 19700 }, { "epoch": 1.4687034277198212, "grad_norm": 2.579111337661743, "learning_rate": 0.0002, "loss": 2.5721, "step": 19710 }, { "epoch": 1.4694485842026825, "grad_norm": 2.5662269592285156, "learning_rate": 0.0002, "loss": 2.5849, "step": 19720 }, { "epoch": 1.470193740685544, "grad_norm": 2.307398796081543, "learning_rate": 0.0002, "loss": 2.3272, "step": 19730 }, { "epoch": 1.4709388971684054, "grad_norm": 2.695326566696167, "learning_rate": 0.0002, "loss": 2.6097, "step": 19740 }, { "epoch": 1.4716840536512668, "grad_norm": 2.547471523284912, "learning_rate": 0.0002, "loss": 2.6456, "step": 19750 }, { "epoch": 1.472429210134128, "grad_norm": 2.5473928451538086, "learning_rate": 0.0002, "loss": 2.6685, "step": 19760 }, { "epoch": 1.4731743666169896, "grad_norm": 2.6228318214416504, "learning_rate": 0.0002, "loss": 2.3921, "step": 19770 }, { "epoch": 1.473919523099851, "grad_norm": 2.489250659942627, "learning_rate": 0.0002, "loss": 2.3033, "step": 19780 }, { "epoch": 1.4746646795827123, "grad_norm": 2.4918441772460938, "learning_rate": 0.0002, "loss": 2.6749, "step": 19790 }, { "epoch": 1.4754098360655736, "grad_norm": 2.6294076442718506, "learning_rate": 0.0002, "loss": 2.5153, "step": 19800 }, { "epoch": 1.4761549925484352, "grad_norm": 2.7032785415649414, "learning_rate": 0.0002, "loss": 2.4346, "step": 19810 }, { "epoch": 1.4769001490312965, "grad_norm": 3.086024761199951, "learning_rate": 0.0002, "loss": 2.3755, "step": 19820 }, { "epoch": 1.4776453055141578, "grad_norm": 2.7240495681762695, "learning_rate": 0.0002, "loss": 2.4115, "step": 19830 }, { "epoch": 1.4783904619970194, "grad_norm": 2.3864595890045166, "learning_rate": 0.0002, "loss": 2.4623, "step": 19840 }, { "epoch": 1.4791356184798807, "grad_norm": 2.517160177230835, "learning_rate": 0.0002, "loss": 2.2625, "step": 19850 }, { "epoch": 1.479880774962742, "grad_norm": 2.8101651668548584, "learning_rate": 0.0002, "loss": 2.3999, "step": 19860 }, { "epoch": 1.4806259314456036, "grad_norm": 2.6499056816101074, "learning_rate": 0.0002, "loss": 2.4251, "step": 19870 }, { "epoch": 1.481371087928465, "grad_norm": 2.563832998275757, "learning_rate": 0.0002, "loss": 2.3608, "step": 19880 }, { "epoch": 1.4821162444113263, "grad_norm": 2.905278205871582, "learning_rate": 0.0002, "loss": 2.4904, "step": 19890 }, { "epoch": 1.4828614008941878, "grad_norm": 2.7486226558685303, "learning_rate": 0.0002, "loss": 2.5536, "step": 19900 }, { "epoch": 1.4836065573770492, "grad_norm": 3.1326828002929688, "learning_rate": 0.0002, "loss": 2.4822, "step": 19910 }, { "epoch": 1.4843517138599105, "grad_norm": 2.983778476715088, "learning_rate": 0.0002, "loss": 2.5207, "step": 19920 }, { "epoch": 1.485096870342772, "grad_norm": 2.391115427017212, "learning_rate": 0.0002, "loss": 2.4728, "step": 19930 }, { "epoch": 1.4858420268256334, "grad_norm": 2.789459466934204, "learning_rate": 0.0002, "loss": 2.3897, "step": 19940 }, { "epoch": 1.4865871833084947, "grad_norm": 2.1479434967041016, "learning_rate": 0.0002, "loss": 2.4227, "step": 19950 }, { "epoch": 1.4873323397913563, "grad_norm": 2.5585100650787354, "learning_rate": 0.0002, "loss": 2.5672, "step": 19960 }, { "epoch": 1.4880774962742176, "grad_norm": 2.8484928607940674, "learning_rate": 0.0002, "loss": 2.5627, "step": 19970 }, { "epoch": 1.488822652757079, "grad_norm": 2.4347646236419678, "learning_rate": 0.0002, "loss": 2.4704, "step": 19980 }, { "epoch": 1.4895678092399405, "grad_norm": 2.467780351638794, "learning_rate": 0.0002, "loss": 2.4175, "step": 19990 }, { "epoch": 1.4903129657228018, "grad_norm": 2.4302515983581543, "learning_rate": 0.0002, "loss": 2.602, "step": 20000 }, { "epoch": 1.4910581222056631, "grad_norm": 2.5407824516296387, "learning_rate": 0.0002, "loss": 2.3174, "step": 20010 }, { "epoch": 1.4918032786885247, "grad_norm": 2.509092330932617, "learning_rate": 0.0002, "loss": 2.4737, "step": 20020 }, { "epoch": 1.492548435171386, "grad_norm": 2.512086868286133, "learning_rate": 0.0002, "loss": 2.4616, "step": 20030 }, { "epoch": 1.4932935916542474, "grad_norm": 2.5899012088775635, "learning_rate": 0.0002, "loss": 2.6209, "step": 20040 }, { "epoch": 1.494038748137109, "grad_norm": 2.5134575366973877, "learning_rate": 0.0002, "loss": 2.4823, "step": 20050 }, { "epoch": 1.4947839046199702, "grad_norm": 2.4115593433380127, "learning_rate": 0.0002, "loss": 2.4584, "step": 20060 }, { "epoch": 1.4955290611028316, "grad_norm": 2.5678627490997314, "learning_rate": 0.0002, "loss": 2.4916, "step": 20070 }, { "epoch": 1.4962742175856931, "grad_norm": 3.107654333114624, "learning_rate": 0.0002, "loss": 2.3733, "step": 20080 }, { "epoch": 1.4970193740685545, "grad_norm": 2.282750368118286, "learning_rate": 0.0002, "loss": 2.4918, "step": 20090 }, { "epoch": 1.4977645305514158, "grad_norm": 2.589705228805542, "learning_rate": 0.0002, "loss": 2.4723, "step": 20100 }, { "epoch": 1.4985096870342771, "grad_norm": 2.139538288116455, "learning_rate": 0.0002, "loss": 2.463, "step": 20110 }, { "epoch": 1.4992548435171387, "grad_norm": 2.6076650619506836, "learning_rate": 0.0002, "loss": 2.4384, "step": 20120 }, { "epoch": 1.5, "grad_norm": 2.653719186782837, "learning_rate": 0.0002, "loss": 2.3014, "step": 20130 }, { "epoch": 1.5007451564828616, "grad_norm": 3.960010051727295, "learning_rate": 0.0002, "loss": 2.407, "step": 20140 }, { "epoch": 1.5014903129657227, "grad_norm": 2.3678810596466064, "learning_rate": 0.0002, "loss": 2.3797, "step": 20150 }, { "epoch": 1.5022354694485842, "grad_norm": 2.61295485496521, "learning_rate": 0.0002, "loss": 2.4066, "step": 20160 }, { "epoch": 1.5029806259314458, "grad_norm": 2.4393184185028076, "learning_rate": 0.0002, "loss": 2.3545, "step": 20170 }, { "epoch": 1.5037257824143069, "grad_norm": 2.455565929412842, "learning_rate": 0.0002, "loss": 2.4903, "step": 20180 }, { "epoch": 1.5044709388971684, "grad_norm": 2.7440402507781982, "learning_rate": 0.0002, "loss": 2.5455, "step": 20190 }, { "epoch": 1.5052160953800298, "grad_norm": 2.6550514698028564, "learning_rate": 0.0002, "loss": 2.4654, "step": 20200 }, { "epoch": 1.505961251862891, "grad_norm": 2.5063323974609375, "learning_rate": 0.0002, "loss": 2.4515, "step": 20210 }, { "epoch": 1.5067064083457526, "grad_norm": 2.9979264736175537, "learning_rate": 0.0002, "loss": 2.3564, "step": 20220 }, { "epoch": 1.507451564828614, "grad_norm": 2.5606510639190674, "learning_rate": 0.0002, "loss": 2.5883, "step": 20230 }, { "epoch": 1.5081967213114753, "grad_norm": 2.191575050354004, "learning_rate": 0.0002, "loss": 2.4795, "step": 20240 }, { "epoch": 1.5089418777943369, "grad_norm": 2.5303499698638916, "learning_rate": 0.0002, "loss": 2.41, "step": 20250 }, { "epoch": 1.5096870342771982, "grad_norm": 2.602943181991577, "learning_rate": 0.0002, "loss": 2.4533, "step": 20260 }, { "epoch": 1.5104321907600595, "grad_norm": 2.667649030685425, "learning_rate": 0.0002, "loss": 2.5397, "step": 20270 }, { "epoch": 1.511177347242921, "grad_norm": 2.7517497539520264, "learning_rate": 0.0002, "loss": 2.5885, "step": 20280 }, { "epoch": 1.5119225037257824, "grad_norm": 2.7016963958740234, "learning_rate": 0.0002, "loss": 2.2389, "step": 20290 }, { "epoch": 1.5126676602086437, "grad_norm": 3.5088577270507812, "learning_rate": 0.0002, "loss": 2.5337, "step": 20300 }, { "epoch": 1.5134128166915053, "grad_norm": 2.009373188018799, "learning_rate": 0.0002, "loss": 2.5013, "step": 20310 }, { "epoch": 1.5141579731743666, "grad_norm": 3.012927770614624, "learning_rate": 0.0002, "loss": 2.487, "step": 20320 }, { "epoch": 1.514903129657228, "grad_norm": 2.512377977371216, "learning_rate": 0.0002, "loss": 2.6162, "step": 20330 }, { "epoch": 1.5156482861400895, "grad_norm": 2.7566356658935547, "learning_rate": 0.0002, "loss": 2.4332, "step": 20340 }, { "epoch": 1.5163934426229508, "grad_norm": 2.620819330215454, "learning_rate": 0.0002, "loss": 2.451, "step": 20350 }, { "epoch": 1.5171385991058122, "grad_norm": 2.378424882888794, "learning_rate": 0.0002, "loss": 2.3835, "step": 20360 }, { "epoch": 1.5178837555886737, "grad_norm": 2.6105260848999023, "learning_rate": 0.0002, "loss": 2.5465, "step": 20370 }, { "epoch": 1.518628912071535, "grad_norm": 2.4131839275360107, "learning_rate": 0.0002, "loss": 2.3061, "step": 20380 }, { "epoch": 1.5193740685543964, "grad_norm": 2.658151149749756, "learning_rate": 0.0002, "loss": 2.4822, "step": 20390 }, { "epoch": 1.520119225037258, "grad_norm": 2.169219732284546, "learning_rate": 0.0002, "loss": 2.4033, "step": 20400 }, { "epoch": 1.5208643815201193, "grad_norm": 2.769595146179199, "learning_rate": 0.0002, "loss": 2.57, "step": 20410 }, { "epoch": 1.5216095380029806, "grad_norm": 2.6904990673065186, "learning_rate": 0.0002, "loss": 2.4361, "step": 20420 }, { "epoch": 1.5223546944858422, "grad_norm": 2.26855206489563, "learning_rate": 0.0002, "loss": 2.4516, "step": 20430 }, { "epoch": 1.5230998509687033, "grad_norm": 2.5474822521209717, "learning_rate": 0.0002, "loss": 2.4087, "step": 20440 }, { "epoch": 1.5238450074515648, "grad_norm": 2.5763962268829346, "learning_rate": 0.0002, "loss": 2.5081, "step": 20450 }, { "epoch": 1.5245901639344264, "grad_norm": 2.8625242710113525, "learning_rate": 0.0002, "loss": 2.5304, "step": 20460 }, { "epoch": 1.5253353204172875, "grad_norm": 2.5730020999908447, "learning_rate": 0.0002, "loss": 2.5999, "step": 20470 }, { "epoch": 1.526080476900149, "grad_norm": 2.704777717590332, "learning_rate": 0.0002, "loss": 2.5252, "step": 20480 }, { "epoch": 1.5268256333830106, "grad_norm": 2.8790910243988037, "learning_rate": 0.0002, "loss": 2.4782, "step": 20490 }, { "epoch": 1.5275707898658717, "grad_norm": 2.793997287750244, "learning_rate": 0.0002, "loss": 2.5271, "step": 20500 }, { "epoch": 1.5283159463487332, "grad_norm": 2.4312236309051514, "learning_rate": 0.0002, "loss": 2.7423, "step": 20510 }, { "epoch": 1.5290611028315948, "grad_norm": 2.508166551589966, "learning_rate": 0.0002, "loss": 2.5464, "step": 20520 }, { "epoch": 1.529806259314456, "grad_norm": 2.4821505546569824, "learning_rate": 0.0002, "loss": 2.5888, "step": 20530 }, { "epoch": 1.5305514157973175, "grad_norm": 2.0851380825042725, "learning_rate": 0.0002, "loss": 2.1732, "step": 20540 }, { "epoch": 1.5312965722801788, "grad_norm": 2.1268575191497803, "learning_rate": 0.0002, "loss": 2.5131, "step": 20550 }, { "epoch": 1.5320417287630401, "grad_norm": 3.2750792503356934, "learning_rate": 0.0002, "loss": 2.3624, "step": 20560 }, { "epoch": 1.5327868852459017, "grad_norm": 2.771404266357422, "learning_rate": 0.0002, "loss": 2.2204, "step": 20570 }, { "epoch": 1.533532041728763, "grad_norm": 3.1132712364196777, "learning_rate": 0.0002, "loss": 2.5077, "step": 20580 }, { "epoch": 1.5342771982116243, "grad_norm": 2.637928009033203, "learning_rate": 0.0002, "loss": 2.5237, "step": 20590 }, { "epoch": 1.535022354694486, "grad_norm": 3.4630746841430664, "learning_rate": 0.0002, "loss": 2.4241, "step": 20600 }, { "epoch": 1.5357675111773472, "grad_norm": 3.054542303085327, "learning_rate": 0.0002, "loss": 2.6337, "step": 20610 }, { "epoch": 1.5365126676602086, "grad_norm": 2.7666263580322266, "learning_rate": 0.0002, "loss": 2.4202, "step": 20620 }, { "epoch": 1.53725782414307, "grad_norm": 2.8477747440338135, "learning_rate": 0.0002, "loss": 2.4838, "step": 20630 }, { "epoch": 1.5380029806259314, "grad_norm": 2.61881422996521, "learning_rate": 0.0002, "loss": 2.2668, "step": 20640 }, { "epoch": 1.5387481371087928, "grad_norm": 2.792616367340088, "learning_rate": 0.0002, "loss": 2.6354, "step": 20650 }, { "epoch": 1.5394932935916543, "grad_norm": 2.7468929290771484, "learning_rate": 0.0002, "loss": 2.5306, "step": 20660 }, { "epoch": 1.5402384500745157, "grad_norm": 2.3328466415405273, "learning_rate": 0.0002, "loss": 2.5575, "step": 20670 }, { "epoch": 1.540983606557377, "grad_norm": 2.406003475189209, "learning_rate": 0.0002, "loss": 2.4008, "step": 20680 }, { "epoch": 1.5417287630402385, "grad_norm": 2.558863878250122, "learning_rate": 0.0002, "loss": 2.482, "step": 20690 }, { "epoch": 1.5424739195230999, "grad_norm": 2.814772605895996, "learning_rate": 0.0002, "loss": 2.4789, "step": 20700 }, { "epoch": 1.5432190760059612, "grad_norm": 2.6838860511779785, "learning_rate": 0.0002, "loss": 2.5793, "step": 20710 }, { "epoch": 1.5439642324888228, "grad_norm": 2.651115655899048, "learning_rate": 0.0002, "loss": 2.3765, "step": 20720 }, { "epoch": 1.544709388971684, "grad_norm": 2.7897510528564453, "learning_rate": 0.0002, "loss": 2.5824, "step": 20730 }, { "epoch": 1.5454545454545454, "grad_norm": 2.5708913803100586, "learning_rate": 0.0002, "loss": 2.3102, "step": 20740 }, { "epoch": 1.546199701937407, "grad_norm": 2.648242235183716, "learning_rate": 0.0002, "loss": 2.4887, "step": 20750 }, { "epoch": 1.5469448584202683, "grad_norm": 2.639145612716675, "learning_rate": 0.0002, "loss": 2.3971, "step": 20760 }, { "epoch": 1.5476900149031296, "grad_norm": 2.5040032863616943, "learning_rate": 0.0002, "loss": 2.5788, "step": 20770 }, { "epoch": 1.5484351713859912, "grad_norm": 2.368126392364502, "learning_rate": 0.0002, "loss": 2.4651, "step": 20780 }, { "epoch": 1.5491803278688525, "grad_norm": 2.808847427368164, "learning_rate": 0.0002, "loss": 2.4613, "step": 20790 }, { "epoch": 1.5499254843517138, "grad_norm": 2.632382392883301, "learning_rate": 0.0002, "loss": 2.5921, "step": 20800 }, { "epoch": 1.5506706408345754, "grad_norm": 2.4991965293884277, "learning_rate": 0.0002, "loss": 2.6584, "step": 20810 }, { "epoch": 1.5514157973174365, "grad_norm": 2.2685933113098145, "learning_rate": 0.0002, "loss": 2.6257, "step": 20820 }, { "epoch": 1.552160953800298, "grad_norm": 2.494006872177124, "learning_rate": 0.0002, "loss": 2.4663, "step": 20830 }, { "epoch": 1.5529061102831596, "grad_norm": 2.848593235015869, "learning_rate": 0.0002, "loss": 2.6125, "step": 20840 }, { "epoch": 1.5536512667660207, "grad_norm": 2.5735042095184326, "learning_rate": 0.0002, "loss": 2.3904, "step": 20850 }, { "epoch": 1.5543964232488823, "grad_norm": 2.3776659965515137, "learning_rate": 0.0002, "loss": 2.397, "step": 20860 }, { "epoch": 1.5551415797317438, "grad_norm": 2.362820863723755, "learning_rate": 0.0002, "loss": 2.499, "step": 20870 }, { "epoch": 1.555886736214605, "grad_norm": 2.4711267948150635, "learning_rate": 0.0002, "loss": 2.366, "step": 20880 }, { "epoch": 1.5566318926974665, "grad_norm": 2.8080945014953613, "learning_rate": 0.0002, "loss": 2.4686, "step": 20890 }, { "epoch": 1.5573770491803278, "grad_norm": 2.614722967147827, "learning_rate": 0.0002, "loss": 2.4868, "step": 20900 }, { "epoch": 1.5581222056631892, "grad_norm": 2.4597301483154297, "learning_rate": 0.0002, "loss": 2.5285, "step": 20910 }, { "epoch": 1.5588673621460507, "grad_norm": 2.947422981262207, "learning_rate": 0.0002, "loss": 2.6405, "step": 20920 }, { "epoch": 1.559612518628912, "grad_norm": 2.4457414150238037, "learning_rate": 0.0002, "loss": 2.6311, "step": 20930 }, { "epoch": 1.5603576751117734, "grad_norm": 2.5841987133026123, "learning_rate": 0.0002, "loss": 2.605, "step": 20940 }, { "epoch": 1.561102831594635, "grad_norm": 2.63436222076416, "learning_rate": 0.0002, "loss": 2.5732, "step": 20950 }, { "epoch": 1.5618479880774963, "grad_norm": 2.898693799972534, "learning_rate": 0.0002, "loss": 2.5725, "step": 20960 }, { "epoch": 1.5625931445603576, "grad_norm": 2.2288978099823, "learning_rate": 0.0002, "loss": 2.2325, "step": 20970 }, { "epoch": 1.5633383010432191, "grad_norm": 2.741015672683716, "learning_rate": 0.0002, "loss": 2.406, "step": 20980 }, { "epoch": 1.5640834575260805, "grad_norm": 2.3615951538085938, "learning_rate": 0.0002, "loss": 2.4796, "step": 20990 }, { "epoch": 1.5648286140089418, "grad_norm": 2.332751750946045, "learning_rate": 0.0002, "loss": 2.3425, "step": 21000 }, { "epoch": 1.5655737704918034, "grad_norm": 2.585552453994751, "learning_rate": 0.0002, "loss": 2.5401, "step": 21010 }, { "epoch": 1.5663189269746647, "grad_norm": 2.596543550491333, "learning_rate": 0.0002, "loss": 2.2485, "step": 21020 }, { "epoch": 1.567064083457526, "grad_norm": 2.5111849308013916, "learning_rate": 0.0002, "loss": 2.3883, "step": 21030 }, { "epoch": 1.5678092399403876, "grad_norm": 2.949110984802246, "learning_rate": 0.0002, "loss": 2.626, "step": 21040 }, { "epoch": 1.568554396423249, "grad_norm": 2.5220885276794434, "learning_rate": 0.0002, "loss": 2.6748, "step": 21050 }, { "epoch": 1.5692995529061102, "grad_norm": 2.4245173931121826, "learning_rate": 0.0002, "loss": 2.4853, "step": 21060 }, { "epoch": 1.5700447093889718, "grad_norm": 2.2944424152374268, "learning_rate": 0.0002, "loss": 2.3863, "step": 21070 }, { "epoch": 1.5707898658718331, "grad_norm": 2.557868003845215, "learning_rate": 0.0002, "loss": 2.5679, "step": 21080 }, { "epoch": 1.5715350223546944, "grad_norm": 2.418034315109253, "learning_rate": 0.0002, "loss": 2.3904, "step": 21090 }, { "epoch": 1.572280178837556, "grad_norm": 2.6931450366973877, "learning_rate": 0.0002, "loss": 2.5675, "step": 21100 }, { "epoch": 1.5730253353204173, "grad_norm": 2.8950247764587402, "learning_rate": 0.0002, "loss": 2.4783, "step": 21110 }, { "epoch": 1.5737704918032787, "grad_norm": 2.3878729343414307, "learning_rate": 0.0002, "loss": 2.3789, "step": 21120 }, { "epoch": 1.5745156482861402, "grad_norm": 2.6210811138153076, "learning_rate": 0.0002, "loss": 2.5938, "step": 21130 }, { "epoch": 1.5752608047690015, "grad_norm": 2.540480852127075, "learning_rate": 0.0002, "loss": 2.6351, "step": 21140 }, { "epoch": 1.5760059612518629, "grad_norm": 2.7353644371032715, "learning_rate": 0.0002, "loss": 2.3959, "step": 21150 }, { "epoch": 1.5767511177347244, "grad_norm": 2.555797576904297, "learning_rate": 0.0002, "loss": 2.7066, "step": 21160 }, { "epoch": 1.5774962742175855, "grad_norm": 2.2357959747314453, "learning_rate": 0.0002, "loss": 2.4063, "step": 21170 }, { "epoch": 1.578241430700447, "grad_norm": 2.2032105922698975, "learning_rate": 0.0002, "loss": 2.5256, "step": 21180 }, { "epoch": 1.5789865871833086, "grad_norm": 2.421905994415283, "learning_rate": 0.0002, "loss": 2.5101, "step": 21190 }, { "epoch": 1.5797317436661698, "grad_norm": 2.3932013511657715, "learning_rate": 0.0002, "loss": 2.5003, "step": 21200 }, { "epoch": 1.5804769001490313, "grad_norm": 2.751396656036377, "learning_rate": 0.0002, "loss": 2.6125, "step": 21210 }, { "epoch": 1.5812220566318929, "grad_norm": 2.415769100189209, "learning_rate": 0.0002, "loss": 2.3935, "step": 21220 }, { "epoch": 1.581967213114754, "grad_norm": 2.7542152404785156, "learning_rate": 0.0002, "loss": 2.5242, "step": 21230 }, { "epoch": 1.5827123695976155, "grad_norm": 2.4142708778381348, "learning_rate": 0.0002, "loss": 2.5011, "step": 21240 }, { "epoch": 1.5834575260804769, "grad_norm": 2.716956615447998, "learning_rate": 0.0002, "loss": 2.5144, "step": 21250 }, { "epoch": 1.5842026825633382, "grad_norm": 2.8631787300109863, "learning_rate": 0.0002, "loss": 2.5118, "step": 21260 }, { "epoch": 1.5849478390461997, "grad_norm": 2.676856517791748, "learning_rate": 0.0002, "loss": 2.4845, "step": 21270 }, { "epoch": 1.585692995529061, "grad_norm": 2.477172374725342, "learning_rate": 0.0002, "loss": 2.5516, "step": 21280 }, { "epoch": 1.5864381520119224, "grad_norm": 2.3883090019226074, "learning_rate": 0.0002, "loss": 2.5904, "step": 21290 }, { "epoch": 1.587183308494784, "grad_norm": 2.0615110397338867, "learning_rate": 0.0002, "loss": 2.4803, "step": 21300 }, { "epoch": 1.5879284649776453, "grad_norm": 2.6857569217681885, "learning_rate": 0.0002, "loss": 2.6149, "step": 21310 }, { "epoch": 1.5886736214605066, "grad_norm": 2.224165678024292, "learning_rate": 0.0002, "loss": 2.4841, "step": 21320 }, { "epoch": 1.5894187779433682, "grad_norm": 2.5804173946380615, "learning_rate": 0.0002, "loss": 2.7102, "step": 21330 }, { "epoch": 1.5901639344262295, "grad_norm": 2.559598445892334, "learning_rate": 0.0002, "loss": 2.3667, "step": 21340 }, { "epoch": 1.5909090909090908, "grad_norm": 2.5414412021636963, "learning_rate": 0.0002, "loss": 2.44, "step": 21350 }, { "epoch": 1.5916542473919524, "grad_norm": 2.575948476791382, "learning_rate": 0.0002, "loss": 2.6272, "step": 21360 }, { "epoch": 1.5923994038748137, "grad_norm": 2.558068037033081, "learning_rate": 0.0002, "loss": 2.2373, "step": 21370 }, { "epoch": 1.593144560357675, "grad_norm": 2.6954293251037598, "learning_rate": 0.0002, "loss": 2.241, "step": 21380 }, { "epoch": 1.5938897168405366, "grad_norm": 2.49922251701355, "learning_rate": 0.0002, "loss": 2.5755, "step": 21390 }, { "epoch": 1.594634873323398, "grad_norm": 3.0789153575897217, "learning_rate": 0.0002, "loss": 2.5445, "step": 21400 }, { "epoch": 1.5953800298062593, "grad_norm": 2.4592039585113525, "learning_rate": 0.0002, "loss": 2.4833, "step": 21410 }, { "epoch": 1.5961251862891208, "grad_norm": 2.977599620819092, "learning_rate": 0.0002, "loss": 2.6318, "step": 21420 }, { "epoch": 1.5968703427719821, "grad_norm": 2.868572950363159, "learning_rate": 0.0002, "loss": 2.6332, "step": 21430 }, { "epoch": 1.5976154992548435, "grad_norm": 2.8400204181671143, "learning_rate": 0.0002, "loss": 2.6106, "step": 21440 }, { "epoch": 1.598360655737705, "grad_norm": 3.0851080417633057, "learning_rate": 0.0002, "loss": 2.4113, "step": 21450 }, { "epoch": 1.5991058122205664, "grad_norm": 2.4475607872009277, "learning_rate": 0.0002, "loss": 2.5, "step": 21460 }, { "epoch": 1.5998509687034277, "grad_norm": 2.248441696166992, "learning_rate": 0.0002, "loss": 2.2983, "step": 21470 }, { "epoch": 1.6005961251862892, "grad_norm": 2.1876697540283203, "learning_rate": 0.0002, "loss": 2.5876, "step": 21480 }, { "epoch": 1.6013412816691506, "grad_norm": 2.766340494155884, "learning_rate": 0.0002, "loss": 2.6814, "step": 21490 }, { "epoch": 1.602086438152012, "grad_norm": 2.4883816242218018, "learning_rate": 0.0002, "loss": 2.4444, "step": 21500 }, { "epoch": 1.6028315946348735, "grad_norm": 2.7515087127685547, "learning_rate": 0.0002, "loss": 2.5638, "step": 21510 }, { "epoch": 1.6035767511177346, "grad_norm": 2.400191068649292, "learning_rate": 0.0002, "loss": 2.5826, "step": 21520 }, { "epoch": 1.6043219076005961, "grad_norm": 2.65370774269104, "learning_rate": 0.0002, "loss": 2.3286, "step": 21530 }, { "epoch": 1.6050670640834577, "grad_norm": 2.525686264038086, "learning_rate": 0.0002, "loss": 2.5799, "step": 21540 }, { "epoch": 1.6058122205663188, "grad_norm": 2.6483981609344482, "learning_rate": 0.0002, "loss": 2.4719, "step": 21550 }, { "epoch": 1.6065573770491803, "grad_norm": 2.4052860736846924, "learning_rate": 0.0002, "loss": 2.6085, "step": 21560 }, { "epoch": 1.6073025335320419, "grad_norm": 2.373023271560669, "learning_rate": 0.0002, "loss": 2.5449, "step": 21570 }, { "epoch": 1.608047690014903, "grad_norm": 2.792961835861206, "learning_rate": 0.0002, "loss": 2.5756, "step": 21580 }, { "epoch": 1.6087928464977646, "grad_norm": 2.024534225463867, "learning_rate": 0.0002, "loss": 2.4098, "step": 21590 }, { "epoch": 1.6095380029806259, "grad_norm": 2.550271511077881, "learning_rate": 0.0002, "loss": 2.7197, "step": 21600 }, { "epoch": 1.6102831594634872, "grad_norm": 2.357914686203003, "learning_rate": 0.0002, "loss": 2.4916, "step": 21610 }, { "epoch": 1.6110283159463488, "grad_norm": 2.6073484420776367, "learning_rate": 0.0002, "loss": 2.5941, "step": 21620 }, { "epoch": 1.61177347242921, "grad_norm": 2.532940626144409, "learning_rate": 0.0002, "loss": 2.469, "step": 21630 }, { "epoch": 1.6125186289120714, "grad_norm": 2.5476560592651367, "learning_rate": 0.0002, "loss": 2.4917, "step": 21640 }, { "epoch": 1.613263785394933, "grad_norm": 2.505892038345337, "learning_rate": 0.0002, "loss": 2.4284, "step": 21650 }, { "epoch": 1.6140089418777943, "grad_norm": 2.5390076637268066, "learning_rate": 0.0002, "loss": 2.6434, "step": 21660 }, { "epoch": 1.6147540983606556, "grad_norm": 1.909812092781067, "learning_rate": 0.0002, "loss": 2.3956, "step": 21670 }, { "epoch": 1.6154992548435172, "grad_norm": 2.746105670928955, "learning_rate": 0.0002, "loss": 2.6643, "step": 21680 }, { "epoch": 1.6162444113263785, "grad_norm": 2.7096972465515137, "learning_rate": 0.0002, "loss": 2.3153, "step": 21690 }, { "epoch": 1.6169895678092399, "grad_norm": 2.1821792125701904, "learning_rate": 0.0002, "loss": 2.4432, "step": 21700 }, { "epoch": 1.6177347242921014, "grad_norm": 2.201249599456787, "learning_rate": 0.0002, "loss": 2.2265, "step": 21710 }, { "epoch": 1.6184798807749627, "grad_norm": 2.475764274597168, "learning_rate": 0.0002, "loss": 2.4643, "step": 21720 }, { "epoch": 1.619225037257824, "grad_norm": 2.8647007942199707, "learning_rate": 0.0002, "loss": 2.4923, "step": 21730 }, { "epoch": 1.6199701937406856, "grad_norm": 2.39050555229187, "learning_rate": 0.0002, "loss": 2.5228, "step": 21740 }, { "epoch": 1.620715350223547, "grad_norm": 2.2536306381225586, "learning_rate": 0.0002, "loss": 2.4505, "step": 21750 }, { "epoch": 1.6214605067064083, "grad_norm": 2.4495081901550293, "learning_rate": 0.0002, "loss": 2.4158, "step": 21760 }, { "epoch": 1.6222056631892698, "grad_norm": 2.486243486404419, "learning_rate": 0.0002, "loss": 2.5142, "step": 21770 }, { "epoch": 1.6229508196721312, "grad_norm": 2.2863104343414307, "learning_rate": 0.0002, "loss": 2.5569, "step": 21780 }, { "epoch": 1.6236959761549925, "grad_norm": 2.1945390701293945, "learning_rate": 0.0002, "loss": 2.362, "step": 21790 }, { "epoch": 1.624441132637854, "grad_norm": 2.59427809715271, "learning_rate": 0.0002, "loss": 2.5408, "step": 21800 }, { "epoch": 1.6251862891207154, "grad_norm": 2.5718109607696533, "learning_rate": 0.0002, "loss": 2.628, "step": 21810 }, { "epoch": 1.6259314456035767, "grad_norm": 2.5133166313171387, "learning_rate": 0.0002, "loss": 2.5077, "step": 21820 }, { "epoch": 1.6266766020864383, "grad_norm": 2.313368797302246, "learning_rate": 0.0002, "loss": 2.5912, "step": 21830 }, { "epoch": 1.6274217585692996, "grad_norm": 2.2794532775878906, "learning_rate": 0.0002, "loss": 2.3284, "step": 21840 }, { "epoch": 1.628166915052161, "grad_norm": 2.508225202560425, "learning_rate": 0.0002, "loss": 2.5363, "step": 21850 }, { "epoch": 1.6289120715350225, "grad_norm": 2.6698861122131348, "learning_rate": 0.0002, "loss": 2.4758, "step": 21860 }, { "epoch": 1.6296572280178836, "grad_norm": 2.500823497772217, "learning_rate": 0.0002, "loss": 2.3382, "step": 21870 }, { "epoch": 1.6304023845007451, "grad_norm": 2.7005372047424316, "learning_rate": 0.0002, "loss": 2.281, "step": 21880 }, { "epoch": 1.6311475409836067, "grad_norm": 2.6699464321136475, "learning_rate": 0.0002, "loss": 2.5618, "step": 21890 }, { "epoch": 1.6318926974664678, "grad_norm": 2.852407932281494, "learning_rate": 0.0002, "loss": 2.5993, "step": 21900 }, { "epoch": 1.6326378539493294, "grad_norm": 2.408608913421631, "learning_rate": 0.0002, "loss": 2.5603, "step": 21910 }, { "epoch": 1.633383010432191, "grad_norm": 2.828063726425171, "learning_rate": 0.0002, "loss": 2.5029, "step": 21920 }, { "epoch": 1.634128166915052, "grad_norm": 2.4551167488098145, "learning_rate": 0.0002, "loss": 2.3027, "step": 21930 }, { "epoch": 1.6348733233979136, "grad_norm": 2.5409445762634277, "learning_rate": 0.0002, "loss": 2.4344, "step": 21940 }, { "epoch": 1.635618479880775, "grad_norm": 2.7620127201080322, "learning_rate": 0.0002, "loss": 2.6531, "step": 21950 }, { "epoch": 1.6363636363636362, "grad_norm": 2.699997663497925, "learning_rate": 0.0002, "loss": 2.4624, "step": 21960 }, { "epoch": 1.6371087928464978, "grad_norm": 2.9198567867279053, "learning_rate": 0.0002, "loss": 2.5269, "step": 21970 }, { "epoch": 1.6378539493293591, "grad_norm": 2.7812252044677734, "learning_rate": 0.0002, "loss": 2.5048, "step": 21980 }, { "epoch": 1.6385991058122205, "grad_norm": 2.7388384342193604, "learning_rate": 0.0002, "loss": 2.2389, "step": 21990 }, { "epoch": 1.639344262295082, "grad_norm": 2.064328670501709, "learning_rate": 0.0002, "loss": 2.4192, "step": 22000 }, { "epoch": 1.6400894187779433, "grad_norm": 2.4596285820007324, "learning_rate": 0.0002, "loss": 2.543, "step": 22010 }, { "epoch": 1.6408345752608047, "grad_norm": 2.510390043258667, "learning_rate": 0.0002, "loss": 2.3457, "step": 22020 }, { "epoch": 1.6415797317436662, "grad_norm": 2.2899527549743652, "learning_rate": 0.0002, "loss": 2.523, "step": 22030 }, { "epoch": 1.6423248882265276, "grad_norm": 2.8313241004943848, "learning_rate": 0.0002, "loss": 2.4442, "step": 22040 }, { "epoch": 1.6430700447093889, "grad_norm": 2.6460535526275635, "learning_rate": 0.0002, "loss": 2.4327, "step": 22050 }, { "epoch": 1.6438152011922504, "grad_norm": 2.5774576663970947, "learning_rate": 0.0002, "loss": 2.6181, "step": 22060 }, { "epoch": 1.6445603576751118, "grad_norm": 2.517756700515747, "learning_rate": 0.0002, "loss": 2.5624, "step": 22070 }, { "epoch": 1.645305514157973, "grad_norm": 2.382204055786133, "learning_rate": 0.0002, "loss": 2.6089, "step": 22080 }, { "epoch": 1.6460506706408347, "grad_norm": 2.5505146980285645, "learning_rate": 0.0002, "loss": 2.5738, "step": 22090 }, { "epoch": 1.646795827123696, "grad_norm": 3.1729090213775635, "learning_rate": 0.0002, "loss": 2.5716, "step": 22100 }, { "epoch": 1.6475409836065573, "grad_norm": 2.7010695934295654, "learning_rate": 0.0002, "loss": 2.346, "step": 22110 }, { "epoch": 1.6482861400894189, "grad_norm": 2.2861719131469727, "learning_rate": 0.0002, "loss": 2.4943, "step": 22120 }, { "epoch": 1.6490312965722802, "grad_norm": 2.2667644023895264, "learning_rate": 0.0002, "loss": 2.5656, "step": 22130 }, { "epoch": 1.6497764530551415, "grad_norm": 2.9026103019714355, "learning_rate": 0.0002, "loss": 2.5225, "step": 22140 }, { "epoch": 1.650521609538003, "grad_norm": 2.4847819805145264, "learning_rate": 0.0002, "loss": 2.5498, "step": 22150 }, { "epoch": 1.6512667660208644, "grad_norm": 2.3765196800231934, "learning_rate": 0.0002, "loss": 2.4714, "step": 22160 }, { "epoch": 1.6520119225037257, "grad_norm": 2.203185796737671, "learning_rate": 0.0002, "loss": 2.5005, "step": 22170 }, { "epoch": 1.6527570789865873, "grad_norm": 2.480102300643921, "learning_rate": 0.0002, "loss": 2.5686, "step": 22180 }, { "epoch": 1.6535022354694486, "grad_norm": 2.299968957901001, "learning_rate": 0.0002, "loss": 2.5814, "step": 22190 }, { "epoch": 1.65424739195231, "grad_norm": 3.0211429595947266, "learning_rate": 0.0002, "loss": 2.6236, "step": 22200 }, { "epoch": 1.6549925484351715, "grad_norm": 2.7016682624816895, "learning_rate": 0.0002, "loss": 2.5385, "step": 22210 }, { "epoch": 1.6557377049180326, "grad_norm": 2.5749051570892334, "learning_rate": 0.0002, "loss": 2.4144, "step": 22220 }, { "epoch": 1.6564828614008942, "grad_norm": 2.0706841945648193, "learning_rate": 0.0002, "loss": 2.4751, "step": 22230 }, { "epoch": 1.6572280178837557, "grad_norm": 2.4944183826446533, "learning_rate": 0.0002, "loss": 2.5371, "step": 22240 }, { "epoch": 1.6579731743666168, "grad_norm": 2.600158214569092, "learning_rate": 0.0002, "loss": 2.5388, "step": 22250 }, { "epoch": 1.6587183308494784, "grad_norm": 2.428926706314087, "learning_rate": 0.0002, "loss": 2.4767, "step": 22260 }, { "epoch": 1.65946348733234, "grad_norm": 2.805734395980835, "learning_rate": 0.0002, "loss": 2.4908, "step": 22270 }, { "epoch": 1.660208643815201, "grad_norm": 2.7497949600219727, "learning_rate": 0.0002, "loss": 2.4993, "step": 22280 }, { "epoch": 1.6609538002980626, "grad_norm": 2.559528350830078, "learning_rate": 0.0002, "loss": 2.2774, "step": 22290 }, { "epoch": 1.661698956780924, "grad_norm": 2.5140788555145264, "learning_rate": 0.0002, "loss": 2.5504, "step": 22300 }, { "epoch": 1.6624441132637853, "grad_norm": 2.3234243392944336, "learning_rate": 0.0002, "loss": 2.6684, "step": 22310 }, { "epoch": 1.6631892697466468, "grad_norm": 2.3977694511413574, "learning_rate": 0.0002, "loss": 2.7416, "step": 22320 }, { "epoch": 1.6639344262295082, "grad_norm": 2.2138288021087646, "learning_rate": 0.0002, "loss": 2.2312, "step": 22330 }, { "epoch": 1.6646795827123695, "grad_norm": 2.4575119018554688, "learning_rate": 0.0002, "loss": 2.549, "step": 22340 }, { "epoch": 1.665424739195231, "grad_norm": 2.426833152770996, "learning_rate": 0.0002, "loss": 2.449, "step": 22350 }, { "epoch": 1.6661698956780924, "grad_norm": 3.055542230606079, "learning_rate": 0.0002, "loss": 2.4273, "step": 22360 }, { "epoch": 1.6669150521609537, "grad_norm": 2.5469939708709717, "learning_rate": 0.0002, "loss": 2.6418, "step": 22370 }, { "epoch": 1.6676602086438153, "grad_norm": 2.537905216217041, "learning_rate": 0.0002, "loss": 2.5369, "step": 22380 }, { "epoch": 1.6684053651266766, "grad_norm": 2.696962833404541, "learning_rate": 0.0002, "loss": 2.6064, "step": 22390 }, { "epoch": 1.669150521609538, "grad_norm": 2.4675662517547607, "learning_rate": 0.0002, "loss": 2.515, "step": 22400 }, { "epoch": 1.6698956780923995, "grad_norm": 2.486266851425171, "learning_rate": 0.0002, "loss": 2.4349, "step": 22410 }, { "epoch": 1.6706408345752608, "grad_norm": 2.529770612716675, "learning_rate": 0.0002, "loss": 2.3911, "step": 22420 }, { "epoch": 1.6713859910581221, "grad_norm": 2.3345956802368164, "learning_rate": 0.0002, "loss": 2.5192, "step": 22430 }, { "epoch": 1.6721311475409837, "grad_norm": 2.8987648487091064, "learning_rate": 0.0002, "loss": 2.355, "step": 22440 }, { "epoch": 1.672876304023845, "grad_norm": 2.4978036880493164, "learning_rate": 0.0002, "loss": 2.5969, "step": 22450 }, { "epoch": 1.6736214605067063, "grad_norm": 2.2257959842681885, "learning_rate": 0.0002, "loss": 2.5473, "step": 22460 }, { "epoch": 1.674366616989568, "grad_norm": 2.3007636070251465, "learning_rate": 0.0002, "loss": 2.5016, "step": 22470 }, { "epoch": 1.6751117734724292, "grad_norm": 2.6325652599334717, "learning_rate": 0.0002, "loss": 2.4148, "step": 22480 }, { "epoch": 1.6758569299552906, "grad_norm": 2.6435465812683105, "learning_rate": 0.0002, "loss": 2.5434, "step": 22490 }, { "epoch": 1.6766020864381521, "grad_norm": 2.9889986515045166, "learning_rate": 0.0002, "loss": 2.6347, "step": 22500 }, { "epoch": 1.6773472429210134, "grad_norm": 3.051703691482544, "learning_rate": 0.0002, "loss": 2.5085, "step": 22510 }, { "epoch": 1.6780923994038748, "grad_norm": 2.7691986560821533, "learning_rate": 0.0002, "loss": 2.4834, "step": 22520 }, { "epoch": 1.6788375558867363, "grad_norm": 2.565810441970825, "learning_rate": 0.0002, "loss": 2.4304, "step": 22530 }, { "epoch": 1.6795827123695977, "grad_norm": 2.5967001914978027, "learning_rate": 0.0002, "loss": 2.4379, "step": 22540 }, { "epoch": 1.680327868852459, "grad_norm": 1.9632105827331543, "learning_rate": 0.0002, "loss": 2.2073, "step": 22550 }, { "epoch": 1.6810730253353205, "grad_norm": 2.458916664123535, "learning_rate": 0.0002, "loss": 2.443, "step": 22560 }, { "epoch": 1.6818181818181817, "grad_norm": 2.67900013923645, "learning_rate": 0.0002, "loss": 2.6422, "step": 22570 }, { "epoch": 1.6825633383010432, "grad_norm": 2.3862104415893555, "learning_rate": 0.0002, "loss": 2.5824, "step": 22580 }, { "epoch": 1.6833084947839048, "grad_norm": 2.43456768989563, "learning_rate": 0.0002, "loss": 2.4033, "step": 22590 }, { "epoch": 1.6840536512667659, "grad_norm": 2.6378731727600098, "learning_rate": 0.0002, "loss": 2.511, "step": 22600 }, { "epoch": 1.6847988077496274, "grad_norm": 2.612466335296631, "learning_rate": 0.0002, "loss": 2.5721, "step": 22610 }, { "epoch": 1.685543964232489, "grad_norm": 2.5247201919555664, "learning_rate": 0.0002, "loss": 2.2889, "step": 22620 }, { "epoch": 1.68628912071535, "grad_norm": 2.7932889461517334, "learning_rate": 0.0002, "loss": 2.5143, "step": 22630 }, { "epoch": 1.6870342771982116, "grad_norm": 2.531528949737549, "learning_rate": 0.0002, "loss": 2.6344, "step": 22640 }, { "epoch": 1.687779433681073, "grad_norm": 2.5633575916290283, "learning_rate": 0.0002, "loss": 2.6162, "step": 22650 }, { "epoch": 1.6885245901639343, "grad_norm": 2.114488124847412, "learning_rate": 0.0002, "loss": 2.5851, "step": 22660 }, { "epoch": 1.6892697466467959, "grad_norm": 2.478349208831787, "learning_rate": 0.0002, "loss": 2.4753, "step": 22670 }, { "epoch": 1.6900149031296572, "grad_norm": 2.538219690322876, "learning_rate": 0.0002, "loss": 2.5749, "step": 22680 }, { "epoch": 1.6907600596125185, "grad_norm": 2.557431221008301, "learning_rate": 0.0002, "loss": 2.5866, "step": 22690 }, { "epoch": 1.69150521609538, "grad_norm": 2.831338882446289, "learning_rate": 0.0002, "loss": 2.7184, "step": 22700 }, { "epoch": 1.6922503725782414, "grad_norm": 2.0015451908111572, "learning_rate": 0.0002, "loss": 2.4181, "step": 22710 }, { "epoch": 1.6929955290611027, "grad_norm": 2.3533787727355957, "learning_rate": 0.0002, "loss": 2.3283, "step": 22720 }, { "epoch": 1.6937406855439643, "grad_norm": 2.209768295288086, "learning_rate": 0.0002, "loss": 2.5522, "step": 22730 }, { "epoch": 1.6944858420268256, "grad_norm": 2.5806851387023926, "learning_rate": 0.0002, "loss": 2.6221, "step": 22740 }, { "epoch": 1.695230998509687, "grad_norm": 2.6998038291931152, "learning_rate": 0.0002, "loss": 2.5525, "step": 22750 }, { "epoch": 1.6959761549925485, "grad_norm": 2.318673610687256, "learning_rate": 0.0002, "loss": 2.394, "step": 22760 }, { "epoch": 1.6967213114754098, "grad_norm": 1.767236590385437, "learning_rate": 0.0002, "loss": 2.2679, "step": 22770 }, { "epoch": 1.6974664679582712, "grad_norm": 2.5516786575317383, "learning_rate": 0.0002, "loss": 2.4601, "step": 22780 }, { "epoch": 1.6982116244411327, "grad_norm": 2.5691072940826416, "learning_rate": 0.0002, "loss": 2.4903, "step": 22790 }, { "epoch": 1.698956780923994, "grad_norm": 2.6032519340515137, "learning_rate": 0.0002, "loss": 2.5091, "step": 22800 }, { "epoch": 1.6997019374068554, "grad_norm": 2.57243013381958, "learning_rate": 0.0002, "loss": 2.3827, "step": 22810 }, { "epoch": 1.700447093889717, "grad_norm": 2.560321092605591, "learning_rate": 0.0002, "loss": 2.587, "step": 22820 }, { "epoch": 1.7011922503725783, "grad_norm": 2.3471126556396484, "learning_rate": 0.0002, "loss": 2.5432, "step": 22830 }, { "epoch": 1.7019374068554396, "grad_norm": 2.7333359718322754, "learning_rate": 0.0002, "loss": 2.4153, "step": 22840 }, { "epoch": 1.7026825633383011, "grad_norm": 2.481062173843384, "learning_rate": 0.0002, "loss": 2.6483, "step": 22850 }, { "epoch": 1.7034277198211625, "grad_norm": 2.814061164855957, "learning_rate": 0.0002, "loss": 2.5051, "step": 22860 }, { "epoch": 1.7041728763040238, "grad_norm": 2.8423242568969727, "learning_rate": 0.0002, "loss": 2.6922, "step": 22870 }, { "epoch": 1.7049180327868854, "grad_norm": 2.553450107574463, "learning_rate": 0.0002, "loss": 2.5812, "step": 22880 }, { "epoch": 1.7056631892697467, "grad_norm": 2.6540987491607666, "learning_rate": 0.0002, "loss": 2.5114, "step": 22890 }, { "epoch": 1.706408345752608, "grad_norm": 2.4300739765167236, "learning_rate": 0.0002, "loss": 2.6493, "step": 22900 }, { "epoch": 1.7071535022354696, "grad_norm": 2.4339499473571777, "learning_rate": 0.0002, "loss": 2.5105, "step": 22910 }, { "epoch": 1.7078986587183307, "grad_norm": 2.5802414417266846, "learning_rate": 0.0002, "loss": 2.4289, "step": 22920 }, { "epoch": 1.7086438152011922, "grad_norm": 2.7700533866882324, "learning_rate": 0.0002, "loss": 2.6301, "step": 22930 }, { "epoch": 1.7093889716840538, "grad_norm": 2.8011083602905273, "learning_rate": 0.0002, "loss": 2.3991, "step": 22940 }, { "epoch": 1.710134128166915, "grad_norm": 2.746286630630493, "learning_rate": 0.0002, "loss": 2.396, "step": 22950 }, { "epoch": 1.7108792846497765, "grad_norm": 2.3285419940948486, "learning_rate": 0.0002, "loss": 2.536, "step": 22960 }, { "epoch": 1.711624441132638, "grad_norm": 2.7214415073394775, "learning_rate": 0.0002, "loss": 2.5631, "step": 22970 }, { "epoch": 1.7123695976154991, "grad_norm": 2.2611494064331055, "learning_rate": 0.0002, "loss": 2.4555, "step": 22980 }, { "epoch": 1.7131147540983607, "grad_norm": 2.569819688796997, "learning_rate": 0.0002, "loss": 2.4973, "step": 22990 }, { "epoch": 1.713859910581222, "grad_norm": 2.3823273181915283, "learning_rate": 0.0002, "loss": 2.5048, "step": 23000 }, { "epoch": 1.7146050670640833, "grad_norm": 2.369661569595337, "learning_rate": 0.0002, "loss": 2.351, "step": 23010 }, { "epoch": 1.7153502235469449, "grad_norm": 2.551729679107666, "learning_rate": 0.0002, "loss": 2.5331, "step": 23020 }, { "epoch": 1.7160953800298062, "grad_norm": 2.7406301498413086, "learning_rate": 0.0002, "loss": 2.3774, "step": 23030 }, { "epoch": 1.7168405365126675, "grad_norm": 3.1042418479919434, "learning_rate": 0.0002, "loss": 2.5131, "step": 23040 }, { "epoch": 1.717585692995529, "grad_norm": 2.6012461185455322, "learning_rate": 0.0002, "loss": 2.635, "step": 23050 }, { "epoch": 1.7183308494783904, "grad_norm": 2.7491185665130615, "learning_rate": 0.0002, "loss": 2.5063, "step": 23060 }, { "epoch": 1.7190760059612518, "grad_norm": 2.6317410469055176, "learning_rate": 0.0002, "loss": 2.6614, "step": 23070 }, { "epoch": 1.7198211624441133, "grad_norm": 2.699126958847046, "learning_rate": 0.0002, "loss": 2.6547, "step": 23080 }, { "epoch": 1.7205663189269746, "grad_norm": 2.882514715194702, "learning_rate": 0.0002, "loss": 2.5183, "step": 23090 }, { "epoch": 1.721311475409836, "grad_norm": 2.217384099960327, "learning_rate": 0.0002, "loss": 2.4662, "step": 23100 }, { "epoch": 1.7220566318926975, "grad_norm": 2.645479679107666, "learning_rate": 0.0002, "loss": 2.5276, "step": 23110 }, { "epoch": 1.7228017883755589, "grad_norm": 3.0477075576782227, "learning_rate": 0.0002, "loss": 2.5021, "step": 23120 }, { "epoch": 1.7235469448584202, "grad_norm": 2.56258225440979, "learning_rate": 0.0002, "loss": 2.5864, "step": 23130 }, { "epoch": 1.7242921013412817, "grad_norm": 2.4579217433929443, "learning_rate": 0.0002, "loss": 2.5179, "step": 23140 }, { "epoch": 1.725037257824143, "grad_norm": 2.761368989944458, "learning_rate": 0.0002, "loss": 2.4578, "step": 23150 }, { "epoch": 1.7257824143070044, "grad_norm": 2.7409048080444336, "learning_rate": 0.0002, "loss": 2.5305, "step": 23160 }, { "epoch": 1.726527570789866, "grad_norm": 2.3490233421325684, "learning_rate": 0.0002, "loss": 2.4945, "step": 23170 }, { "epoch": 1.7272727272727273, "grad_norm": 2.397792339324951, "learning_rate": 0.0002, "loss": 2.5054, "step": 23180 }, { "epoch": 1.7280178837555886, "grad_norm": 2.8195900917053223, "learning_rate": 0.0002, "loss": 2.5461, "step": 23190 }, { "epoch": 1.7287630402384502, "grad_norm": 2.2869858741760254, "learning_rate": 0.0002, "loss": 2.4762, "step": 23200 }, { "epoch": 1.7295081967213115, "grad_norm": 2.136507272720337, "learning_rate": 0.0002, "loss": 2.5111, "step": 23210 }, { "epoch": 1.7302533532041728, "grad_norm": 2.2523751258850098, "learning_rate": 0.0002, "loss": 2.3777, "step": 23220 }, { "epoch": 1.7309985096870344, "grad_norm": 2.7627720832824707, "learning_rate": 0.0002, "loss": 2.3795, "step": 23230 }, { "epoch": 1.7317436661698957, "grad_norm": 2.663520336151123, "learning_rate": 0.0002, "loss": 2.4765, "step": 23240 }, { "epoch": 1.732488822652757, "grad_norm": 2.7753946781158447, "learning_rate": 0.0002, "loss": 2.6572, "step": 23250 }, { "epoch": 1.7332339791356186, "grad_norm": 2.697758436203003, "learning_rate": 0.0002, "loss": 2.5019, "step": 23260 }, { "epoch": 1.7339791356184797, "grad_norm": 2.4751906394958496, "learning_rate": 0.0002, "loss": 2.5409, "step": 23270 }, { "epoch": 1.7347242921013413, "grad_norm": 2.4185609817504883, "learning_rate": 0.0002, "loss": 2.5397, "step": 23280 }, { "epoch": 1.7354694485842028, "grad_norm": 2.462890148162842, "learning_rate": 0.0002, "loss": 2.3603, "step": 23290 }, { "epoch": 1.736214605067064, "grad_norm": 2.575814723968506, "learning_rate": 0.0002, "loss": 2.6263, "step": 23300 }, { "epoch": 1.7369597615499255, "grad_norm": 2.7433903217315674, "learning_rate": 0.0002, "loss": 2.3266, "step": 23310 }, { "epoch": 1.737704918032787, "grad_norm": 2.4978883266448975, "learning_rate": 0.0002, "loss": 2.4926, "step": 23320 }, { "epoch": 1.7384500745156481, "grad_norm": 2.297142744064331, "learning_rate": 0.0002, "loss": 2.6361, "step": 23330 }, { "epoch": 1.7391952309985097, "grad_norm": 2.436873435974121, "learning_rate": 0.0002, "loss": 2.4963, "step": 23340 }, { "epoch": 1.7399403874813713, "grad_norm": 2.3684535026550293, "learning_rate": 0.0002, "loss": 2.451, "step": 23350 }, { "epoch": 1.7406855439642324, "grad_norm": 2.752638101577759, "learning_rate": 0.0002, "loss": 2.3857, "step": 23360 }, { "epoch": 1.741430700447094, "grad_norm": 3.9165122509002686, "learning_rate": 0.0002, "loss": 2.552, "step": 23370 }, { "epoch": 1.7421758569299552, "grad_norm": 2.561709403991699, "learning_rate": 0.0002, "loss": 2.5888, "step": 23380 }, { "epoch": 1.7429210134128166, "grad_norm": 2.5623152256011963, "learning_rate": 0.0002, "loss": 2.3984, "step": 23390 }, { "epoch": 1.7436661698956781, "grad_norm": 2.3049354553222656, "learning_rate": 0.0002, "loss": 2.3511, "step": 23400 }, { "epoch": 1.7444113263785395, "grad_norm": 2.6003150939941406, "learning_rate": 0.0002, "loss": 2.4049, "step": 23410 }, { "epoch": 1.7451564828614008, "grad_norm": 2.4863944053649902, "learning_rate": 0.0002, "loss": 2.3698, "step": 23420 }, { "epoch": 1.7459016393442623, "grad_norm": 2.313157558441162, "learning_rate": 0.0002, "loss": 2.489, "step": 23430 }, { "epoch": 1.7466467958271237, "grad_norm": 2.4840941429138184, "learning_rate": 0.0002, "loss": 2.4372, "step": 23440 }, { "epoch": 1.747391952309985, "grad_norm": 2.6886258125305176, "learning_rate": 0.0002, "loss": 2.4685, "step": 23450 }, { "epoch": 1.7481371087928466, "grad_norm": 2.4791266918182373, "learning_rate": 0.0002, "loss": 2.4431, "step": 23460 }, { "epoch": 1.748882265275708, "grad_norm": 2.7931692600250244, "learning_rate": 0.0002, "loss": 2.2753, "step": 23470 }, { "epoch": 1.7496274217585692, "grad_norm": 2.818281650543213, "learning_rate": 0.0002, "loss": 2.4702, "step": 23480 }, { "epoch": 1.7503725782414308, "grad_norm": 2.638704299926758, "learning_rate": 0.0002, "loss": 2.3848, "step": 23490 }, { "epoch": 1.751117734724292, "grad_norm": 2.501591682434082, "learning_rate": 0.0002, "loss": 2.5548, "step": 23500 }, { "epoch": 1.7518628912071534, "grad_norm": 2.969492197036743, "learning_rate": 0.0002, "loss": 2.5042, "step": 23510 }, { "epoch": 1.752608047690015, "grad_norm": 2.4512195587158203, "learning_rate": 0.0002, "loss": 2.2923, "step": 23520 }, { "epoch": 1.7533532041728763, "grad_norm": 2.2900497913360596, "learning_rate": 0.0002, "loss": 2.4102, "step": 23530 }, { "epoch": 1.7540983606557377, "grad_norm": 2.6613259315490723, "learning_rate": 0.0002, "loss": 2.277, "step": 23540 }, { "epoch": 1.7548435171385992, "grad_norm": 2.427893877029419, "learning_rate": 0.0002, "loss": 2.5847, "step": 23550 }, { "epoch": 1.7555886736214605, "grad_norm": 2.631917715072632, "learning_rate": 0.0002, "loss": 2.6333, "step": 23560 }, { "epoch": 1.7563338301043219, "grad_norm": 2.395526170730591, "learning_rate": 0.0002, "loss": 2.5131, "step": 23570 }, { "epoch": 1.7570789865871834, "grad_norm": 2.392958641052246, "learning_rate": 0.0002, "loss": 2.5297, "step": 23580 }, { "epoch": 1.7578241430700448, "grad_norm": 2.660301923751831, "learning_rate": 0.0002, "loss": 2.529, "step": 23590 }, { "epoch": 1.758569299552906, "grad_norm": 2.4387199878692627, "learning_rate": 0.0002, "loss": 2.5486, "step": 23600 }, { "epoch": 1.7593144560357676, "grad_norm": 2.463621139526367, "learning_rate": 0.0002, "loss": 2.6065, "step": 23610 }, { "epoch": 1.7600596125186287, "grad_norm": 2.4696829319000244, "learning_rate": 0.0002, "loss": 2.5175, "step": 23620 }, { "epoch": 1.7608047690014903, "grad_norm": 2.802701473236084, "learning_rate": 0.0002, "loss": 2.5997, "step": 23630 }, { "epoch": 1.7615499254843519, "grad_norm": 2.8156678676605225, "learning_rate": 0.0002, "loss": 2.5085, "step": 23640 }, { "epoch": 1.762295081967213, "grad_norm": 2.4574429988861084, "learning_rate": 0.0002, "loss": 2.5127, "step": 23650 }, { "epoch": 1.7630402384500745, "grad_norm": 2.26594614982605, "learning_rate": 0.0002, "loss": 2.6283, "step": 23660 }, { "epoch": 1.763785394932936, "grad_norm": 2.458120107650757, "learning_rate": 0.0002, "loss": 2.4844, "step": 23670 }, { "epoch": 1.7645305514157972, "grad_norm": 2.4478704929351807, "learning_rate": 0.0002, "loss": 2.596, "step": 23680 }, { "epoch": 1.7652757078986587, "grad_norm": 2.5973260402679443, "learning_rate": 0.0002, "loss": 2.7246, "step": 23690 }, { "epoch": 1.7660208643815203, "grad_norm": 2.2895405292510986, "learning_rate": 0.0002, "loss": 2.5442, "step": 23700 }, { "epoch": 1.7667660208643814, "grad_norm": 3.0286567211151123, "learning_rate": 0.0002, "loss": 2.4685, "step": 23710 }, { "epoch": 1.767511177347243, "grad_norm": 2.95426869392395, "learning_rate": 0.0002, "loss": 2.6306, "step": 23720 }, { "epoch": 1.7682563338301043, "grad_norm": 2.4281368255615234, "learning_rate": 0.0002, "loss": 2.4162, "step": 23730 }, { "epoch": 1.7690014903129656, "grad_norm": 2.274803876876831, "learning_rate": 0.0002, "loss": 2.2333, "step": 23740 }, { "epoch": 1.7697466467958272, "grad_norm": 2.6567139625549316, "learning_rate": 0.0002, "loss": 2.5369, "step": 23750 }, { "epoch": 1.7704918032786885, "grad_norm": 2.1148862838745117, "learning_rate": 0.0002, "loss": 2.4882, "step": 23760 }, { "epoch": 1.7712369597615498, "grad_norm": 2.397024631500244, "learning_rate": 0.0002, "loss": 2.5951, "step": 23770 }, { "epoch": 1.7719821162444114, "grad_norm": 2.556600570678711, "learning_rate": 0.0002, "loss": 2.4878, "step": 23780 }, { "epoch": 1.7727272727272727, "grad_norm": 2.361325979232788, "learning_rate": 0.0002, "loss": 2.6025, "step": 23790 }, { "epoch": 1.773472429210134, "grad_norm": 2.221918821334839, "learning_rate": 0.0002, "loss": 2.4707, "step": 23800 }, { "epoch": 1.7742175856929956, "grad_norm": 2.5794131755828857, "learning_rate": 0.0002, "loss": 2.3977, "step": 23810 }, { "epoch": 1.774962742175857, "grad_norm": 2.4929275512695312, "learning_rate": 0.0002, "loss": 2.6056, "step": 23820 }, { "epoch": 1.7757078986587183, "grad_norm": 2.478788375854492, "learning_rate": 0.0002, "loss": 2.5346, "step": 23830 }, { "epoch": 1.7764530551415798, "grad_norm": 2.7491824626922607, "learning_rate": 0.0002, "loss": 2.2653, "step": 23840 }, { "epoch": 1.7771982116244411, "grad_norm": 2.842250108718872, "learning_rate": 0.0002, "loss": 2.5534, "step": 23850 }, { "epoch": 1.7779433681073025, "grad_norm": 2.78625226020813, "learning_rate": 0.0002, "loss": 2.529, "step": 23860 }, { "epoch": 1.778688524590164, "grad_norm": 2.5092759132385254, "learning_rate": 0.0002, "loss": 2.3547, "step": 23870 }, { "epoch": 1.7794336810730254, "grad_norm": 2.5346925258636475, "learning_rate": 0.0002, "loss": 2.5704, "step": 23880 }, { "epoch": 1.7801788375558867, "grad_norm": 2.1645240783691406, "learning_rate": 0.0002, "loss": 2.4889, "step": 23890 }, { "epoch": 1.7809239940387482, "grad_norm": 2.653005361557007, "learning_rate": 0.0002, "loss": 2.4614, "step": 23900 }, { "epoch": 1.7816691505216096, "grad_norm": 3.1483113765716553, "learning_rate": 0.0002, "loss": 2.5343, "step": 23910 }, { "epoch": 1.782414307004471, "grad_norm": 2.815312385559082, "learning_rate": 0.0002, "loss": 2.491, "step": 23920 }, { "epoch": 1.7831594634873325, "grad_norm": 2.5819027423858643, "learning_rate": 0.0002, "loss": 2.4377, "step": 23930 }, { "epoch": 1.7839046199701938, "grad_norm": 2.691847085952759, "learning_rate": 0.0002, "loss": 2.5363, "step": 23940 }, { "epoch": 1.7846497764530551, "grad_norm": 3.1236226558685303, "learning_rate": 0.0002, "loss": 2.6327, "step": 23950 }, { "epoch": 1.7853949329359167, "grad_norm": 2.2760941982269287, "learning_rate": 0.0002, "loss": 2.4321, "step": 23960 }, { "epoch": 1.786140089418778, "grad_norm": 2.5152201652526855, "learning_rate": 0.0002, "loss": 2.519, "step": 23970 }, { "epoch": 1.7868852459016393, "grad_norm": 2.2634105682373047, "learning_rate": 0.0002, "loss": 2.4622, "step": 23980 }, { "epoch": 1.7876304023845009, "grad_norm": 2.6219863891601562, "learning_rate": 0.0002, "loss": 2.6106, "step": 23990 }, { "epoch": 1.788375558867362, "grad_norm": 3.093374013900757, "learning_rate": 0.0002, "loss": 2.4982, "step": 24000 }, { "epoch": 1.7891207153502235, "grad_norm": 2.5569963455200195, "learning_rate": 0.0002, "loss": 2.4287, "step": 24010 }, { "epoch": 1.789865871833085, "grad_norm": 2.514453887939453, "learning_rate": 0.0002, "loss": 2.5517, "step": 24020 }, { "epoch": 1.7906110283159462, "grad_norm": 2.578756809234619, "learning_rate": 0.0002, "loss": 2.5094, "step": 24030 }, { "epoch": 1.7913561847988078, "grad_norm": 2.6853983402252197, "learning_rate": 0.0002, "loss": 2.6338, "step": 24040 }, { "epoch": 1.7921013412816693, "grad_norm": 2.5106022357940674, "learning_rate": 0.0002, "loss": 2.5402, "step": 24050 }, { "epoch": 1.7928464977645304, "grad_norm": 2.823246479034424, "learning_rate": 0.0002, "loss": 2.321, "step": 24060 }, { "epoch": 1.793591654247392, "grad_norm": 2.782550096511841, "learning_rate": 0.0002, "loss": 2.6001, "step": 24070 }, { "epoch": 1.7943368107302533, "grad_norm": 2.8128268718719482, "learning_rate": 0.0002, "loss": 2.3526, "step": 24080 }, { "epoch": 1.7950819672131146, "grad_norm": 2.4673032760620117, "learning_rate": 0.0002, "loss": 2.2312, "step": 24090 }, { "epoch": 1.7958271236959762, "grad_norm": 2.503586530685425, "learning_rate": 0.0002, "loss": 2.4475, "step": 24100 }, { "epoch": 1.7965722801788375, "grad_norm": 2.3996407985687256, "learning_rate": 0.0002, "loss": 2.5589, "step": 24110 }, { "epoch": 1.7973174366616989, "grad_norm": 2.5348939895629883, "learning_rate": 0.0002, "loss": 2.2477, "step": 24120 }, { "epoch": 1.7980625931445604, "grad_norm": 2.695479393005371, "learning_rate": 0.0002, "loss": 2.5913, "step": 24130 }, { "epoch": 1.7988077496274217, "grad_norm": 2.827122211456299, "learning_rate": 0.0002, "loss": 2.3807, "step": 24140 }, { "epoch": 1.799552906110283, "grad_norm": 2.496100425720215, "learning_rate": 0.0002, "loss": 2.4569, "step": 24150 }, { "epoch": 1.8002980625931446, "grad_norm": 2.7637364864349365, "learning_rate": 0.0002, "loss": 2.4109, "step": 24160 }, { "epoch": 1.801043219076006, "grad_norm": 2.378218650817871, "learning_rate": 0.0002, "loss": 2.3436, "step": 24170 }, { "epoch": 1.8017883755588673, "grad_norm": 2.8666129112243652, "learning_rate": 0.0002, "loss": 2.6058, "step": 24180 }, { "epoch": 1.8025335320417288, "grad_norm": 2.1839277744293213, "learning_rate": 0.0002, "loss": 2.3226, "step": 24190 }, { "epoch": 1.8032786885245902, "grad_norm": 2.1608381271362305, "learning_rate": 0.0002, "loss": 2.5368, "step": 24200 }, { "epoch": 1.8040238450074515, "grad_norm": 2.1796987056732178, "learning_rate": 0.0002, "loss": 2.5402, "step": 24210 }, { "epoch": 1.804769001490313, "grad_norm": 2.9191579818725586, "learning_rate": 0.0002, "loss": 2.5582, "step": 24220 }, { "epoch": 1.8055141579731744, "grad_norm": 2.621483087539673, "learning_rate": 0.0002, "loss": 2.4998, "step": 24230 }, { "epoch": 1.8062593144560357, "grad_norm": 2.5693342685699463, "learning_rate": 0.0002, "loss": 2.6363, "step": 24240 }, { "epoch": 1.8070044709388973, "grad_norm": 2.3040270805358887, "learning_rate": 0.0002, "loss": 2.3897, "step": 24250 }, { "epoch": 1.8077496274217586, "grad_norm": 2.5259714126586914, "learning_rate": 0.0002, "loss": 2.3625, "step": 24260 }, { "epoch": 1.80849478390462, "grad_norm": 2.1863303184509277, "learning_rate": 0.0002, "loss": 2.5026, "step": 24270 }, { "epoch": 1.8092399403874815, "grad_norm": 2.6622605323791504, "learning_rate": 0.0002, "loss": 2.5436, "step": 24280 }, { "epoch": 1.8099850968703428, "grad_norm": 2.9189093112945557, "learning_rate": 0.0002, "loss": 2.6779, "step": 24290 }, { "epoch": 1.8107302533532041, "grad_norm": 2.7089121341705322, "learning_rate": 0.0002, "loss": 2.6477, "step": 24300 }, { "epoch": 1.8114754098360657, "grad_norm": 2.326711654663086, "learning_rate": 0.0002, "loss": 2.5291, "step": 24310 }, { "epoch": 1.812220566318927, "grad_norm": 2.570425033569336, "learning_rate": 0.0002, "loss": 2.4593, "step": 24320 }, { "epoch": 1.8129657228017884, "grad_norm": 3.20462703704834, "learning_rate": 0.0002, "loss": 2.5166, "step": 24330 }, { "epoch": 1.81371087928465, "grad_norm": 2.2558157444000244, "learning_rate": 0.0002, "loss": 2.5881, "step": 24340 }, { "epoch": 1.814456035767511, "grad_norm": 2.6994922161102295, "learning_rate": 0.0002, "loss": 2.5089, "step": 24350 }, { "epoch": 1.8152011922503726, "grad_norm": 2.84627103805542, "learning_rate": 0.0002, "loss": 2.4608, "step": 24360 }, { "epoch": 1.8159463487332341, "grad_norm": 2.4853367805480957, "learning_rate": 0.0002, "loss": 2.2028, "step": 24370 }, { "epoch": 1.8166915052160952, "grad_norm": 2.2818758487701416, "learning_rate": 0.0002, "loss": 2.471, "step": 24380 }, { "epoch": 1.8174366616989568, "grad_norm": 2.9547371864318848, "learning_rate": 0.0002, "loss": 2.5479, "step": 24390 }, { "epoch": 1.8181818181818183, "grad_norm": 2.700378656387329, "learning_rate": 0.0002, "loss": 2.5568, "step": 24400 }, { "epoch": 1.8189269746646795, "grad_norm": 2.5123701095581055, "learning_rate": 0.0002, "loss": 2.3738, "step": 24410 }, { "epoch": 1.819672131147541, "grad_norm": 2.8290090560913086, "learning_rate": 0.0002, "loss": 2.5638, "step": 24420 }, { "epoch": 1.8204172876304023, "grad_norm": 2.747896671295166, "learning_rate": 0.0002, "loss": 2.754, "step": 24430 }, { "epoch": 1.8211624441132637, "grad_norm": 2.2470264434814453, "learning_rate": 0.0002, "loss": 2.4369, "step": 24440 }, { "epoch": 1.8219076005961252, "grad_norm": 2.4347453117370605, "learning_rate": 0.0002, "loss": 2.4282, "step": 24450 }, { "epoch": 1.8226527570789866, "grad_norm": 2.6316144466400146, "learning_rate": 0.0002, "loss": 2.4912, "step": 24460 }, { "epoch": 1.8233979135618479, "grad_norm": 2.4304425716400146, "learning_rate": 0.0002, "loss": 2.5277, "step": 24470 }, { "epoch": 1.8241430700447094, "grad_norm": 2.7673425674438477, "learning_rate": 0.0002, "loss": 2.6021, "step": 24480 }, { "epoch": 1.8248882265275708, "grad_norm": 2.2696149349212646, "learning_rate": 0.0002, "loss": 2.4493, "step": 24490 }, { "epoch": 1.825633383010432, "grad_norm": 2.58644962310791, "learning_rate": 0.0002, "loss": 2.5297, "step": 24500 }, { "epoch": 1.8263785394932937, "grad_norm": 3.001873254776001, "learning_rate": 0.0002, "loss": 2.3753, "step": 24510 }, { "epoch": 1.827123695976155, "grad_norm": 2.724271297454834, "learning_rate": 0.0002, "loss": 2.4127, "step": 24520 }, { "epoch": 1.8278688524590163, "grad_norm": 2.4911880493164062, "learning_rate": 0.0002, "loss": 2.7232, "step": 24530 }, { "epoch": 1.8286140089418779, "grad_norm": 2.659914493560791, "learning_rate": 0.0002, "loss": 2.4794, "step": 24540 }, { "epoch": 1.8293591654247392, "grad_norm": 2.4134860038757324, "learning_rate": 0.0002, "loss": 2.5825, "step": 24550 }, { "epoch": 1.8301043219076005, "grad_norm": 2.6322641372680664, "learning_rate": 0.0002, "loss": 2.4633, "step": 24560 }, { "epoch": 1.830849478390462, "grad_norm": 2.942608118057251, "learning_rate": 0.0002, "loss": 2.7876, "step": 24570 }, { "epoch": 1.8315946348733234, "grad_norm": 2.5157153606414795, "learning_rate": 0.0002, "loss": 2.4832, "step": 24580 }, { "epoch": 1.8323397913561847, "grad_norm": 2.5834529399871826, "learning_rate": 0.0002, "loss": 2.4591, "step": 24590 }, { "epoch": 1.8330849478390463, "grad_norm": 2.607112169265747, "learning_rate": 0.0002, "loss": 2.5687, "step": 24600 }, { "epoch": 1.8338301043219076, "grad_norm": 2.7782909870147705, "learning_rate": 0.0002, "loss": 2.5903, "step": 24610 }, { "epoch": 1.834575260804769, "grad_norm": 2.6779706478118896, "learning_rate": 0.0002, "loss": 2.561, "step": 24620 }, { "epoch": 1.8353204172876305, "grad_norm": 2.514186143875122, "learning_rate": 0.0002, "loss": 2.5755, "step": 24630 }, { "epoch": 1.8360655737704918, "grad_norm": 2.3403496742248535, "learning_rate": 0.0002, "loss": 2.4768, "step": 24640 }, { "epoch": 1.8368107302533532, "grad_norm": 2.3311824798583984, "learning_rate": 0.0002, "loss": 2.5229, "step": 24650 }, { "epoch": 1.8375558867362147, "grad_norm": 1.7865532636642456, "learning_rate": 0.0002, "loss": 2.5142, "step": 24660 }, { "epoch": 1.838301043219076, "grad_norm": 2.4601778984069824, "learning_rate": 0.0002, "loss": 2.5981, "step": 24670 }, { "epoch": 1.8390461997019374, "grad_norm": 2.6825180053710938, "learning_rate": 0.0002, "loss": 2.5526, "step": 24680 }, { "epoch": 1.839791356184799, "grad_norm": 2.428622245788574, "learning_rate": 0.0002, "loss": 2.2803, "step": 24690 }, { "epoch": 1.84053651266766, "grad_norm": 2.353142261505127, "learning_rate": 0.0002, "loss": 2.4896, "step": 24700 }, { "epoch": 1.8412816691505216, "grad_norm": 2.611560106277466, "learning_rate": 0.0002, "loss": 2.6822, "step": 24710 }, { "epoch": 1.8420268256333832, "grad_norm": 2.8267176151275635, "learning_rate": 0.0002, "loss": 2.6869, "step": 24720 }, { "epoch": 1.8427719821162443, "grad_norm": 2.8418166637420654, "learning_rate": 0.0002, "loss": 2.5455, "step": 24730 }, { "epoch": 1.8435171385991058, "grad_norm": 2.458555221557617, "learning_rate": 0.0002, "loss": 2.6303, "step": 24740 }, { "epoch": 1.8442622950819674, "grad_norm": 2.2593302726745605, "learning_rate": 0.0002, "loss": 2.5129, "step": 24750 }, { "epoch": 1.8450074515648285, "grad_norm": 2.7010560035705566, "learning_rate": 0.0002, "loss": 2.3973, "step": 24760 }, { "epoch": 1.84575260804769, "grad_norm": 2.4872236251831055, "learning_rate": 0.0002, "loss": 2.6178, "step": 24770 }, { "epoch": 1.8464977645305514, "grad_norm": 2.882812738418579, "learning_rate": 0.0002, "loss": 2.533, "step": 24780 }, { "epoch": 1.8472429210134127, "grad_norm": 2.354396104812622, "learning_rate": 0.0002, "loss": 2.5111, "step": 24790 }, { "epoch": 1.8479880774962743, "grad_norm": 2.5242695808410645, "learning_rate": 0.0002, "loss": 2.5467, "step": 24800 }, { "epoch": 1.8487332339791356, "grad_norm": 2.3869271278381348, "learning_rate": 0.0002, "loss": 2.578, "step": 24810 }, { "epoch": 1.849478390461997, "grad_norm": 2.6933507919311523, "learning_rate": 0.0002, "loss": 2.6325, "step": 24820 }, { "epoch": 1.8502235469448585, "grad_norm": 2.445830821990967, "learning_rate": 0.0002, "loss": 2.5435, "step": 24830 }, { "epoch": 1.8509687034277198, "grad_norm": 2.1250483989715576, "learning_rate": 0.0002, "loss": 2.6424, "step": 24840 }, { "epoch": 1.8517138599105811, "grad_norm": 2.1476426124572754, "learning_rate": 0.0002, "loss": 2.5599, "step": 24850 }, { "epoch": 1.8524590163934427, "grad_norm": 2.7636313438415527, "learning_rate": 0.0002, "loss": 2.6807, "step": 24860 }, { "epoch": 1.853204172876304, "grad_norm": 2.6150963306427, "learning_rate": 0.0002, "loss": 2.5823, "step": 24870 }, { "epoch": 1.8539493293591653, "grad_norm": 2.9421913623809814, "learning_rate": 0.0002, "loss": 2.5598, "step": 24880 }, { "epoch": 1.854694485842027, "grad_norm": 2.576680898666382, "learning_rate": 0.0002, "loss": 2.4352, "step": 24890 }, { "epoch": 1.8554396423248882, "grad_norm": 2.372847557067871, "learning_rate": 0.0002, "loss": 2.4352, "step": 24900 }, { "epoch": 1.8561847988077496, "grad_norm": 2.7840962409973145, "learning_rate": 0.0002, "loss": 2.4458, "step": 24910 }, { "epoch": 1.8569299552906111, "grad_norm": 2.608435869216919, "learning_rate": 0.0002, "loss": 2.3279, "step": 24920 }, { "epoch": 1.8576751117734724, "grad_norm": 2.2083051204681396, "learning_rate": 0.0002, "loss": 2.5685, "step": 24930 }, { "epoch": 1.8584202682563338, "grad_norm": 2.6166601181030273, "learning_rate": 0.0002, "loss": 2.5726, "step": 24940 }, { "epoch": 1.8591654247391953, "grad_norm": 2.4802517890930176, "learning_rate": 0.0002, "loss": 2.4114, "step": 24950 }, { "epoch": 1.8599105812220567, "grad_norm": 2.6344408988952637, "learning_rate": 0.0002, "loss": 2.6164, "step": 24960 }, { "epoch": 1.860655737704918, "grad_norm": 2.4686670303344727, "learning_rate": 0.0002, "loss": 2.4948, "step": 24970 }, { "epoch": 1.8614008941877795, "grad_norm": 2.556004762649536, "learning_rate": 0.0002, "loss": 2.5358, "step": 24980 }, { "epoch": 1.8621460506706409, "grad_norm": 2.642232656478882, "learning_rate": 0.0002, "loss": 2.4455, "step": 24990 }, { "epoch": 1.8628912071535022, "grad_norm": 2.6357104778289795, "learning_rate": 0.0002, "loss": 2.4734, "step": 25000 }, { "epoch": 1.8636363636363638, "grad_norm": 2.7195780277252197, "learning_rate": 0.0002, "loss": 2.5011, "step": 25010 }, { "epoch": 1.864381520119225, "grad_norm": 2.306607961654663, "learning_rate": 0.0002, "loss": 2.4745, "step": 25020 }, { "epoch": 1.8651266766020864, "grad_norm": 2.3458077907562256, "learning_rate": 0.0002, "loss": 2.5006, "step": 25030 }, { "epoch": 1.865871833084948, "grad_norm": 2.6288564205169678, "learning_rate": 0.0002, "loss": 2.4656, "step": 25040 }, { "epoch": 1.866616989567809, "grad_norm": 2.5845961570739746, "learning_rate": 0.0002, "loss": 2.4353, "step": 25050 }, { "epoch": 1.8673621460506706, "grad_norm": 2.3820455074310303, "learning_rate": 0.0002, "loss": 2.5236, "step": 25060 }, { "epoch": 1.8681073025335322, "grad_norm": 2.8274917602539062, "learning_rate": 0.0002, "loss": 2.5278, "step": 25070 }, { "epoch": 1.8688524590163933, "grad_norm": 2.551588296890259, "learning_rate": 0.0002, "loss": 2.5355, "step": 25080 }, { "epoch": 1.8695976154992549, "grad_norm": 2.707473039627075, "learning_rate": 0.0002, "loss": 2.4049, "step": 25090 }, { "epoch": 1.8703427719821164, "grad_norm": 2.40470290184021, "learning_rate": 0.0002, "loss": 2.5121, "step": 25100 }, { "epoch": 1.8710879284649775, "grad_norm": 2.507206916809082, "learning_rate": 0.0002, "loss": 2.2627, "step": 25110 }, { "epoch": 1.871833084947839, "grad_norm": 2.4339914321899414, "learning_rate": 0.0002, "loss": 2.5134, "step": 25120 }, { "epoch": 1.8725782414307004, "grad_norm": 2.4960076808929443, "learning_rate": 0.0002, "loss": 2.5428, "step": 25130 }, { "epoch": 1.8733233979135617, "grad_norm": 3.661783218383789, "learning_rate": 0.0002, "loss": 2.6599, "step": 25140 }, { "epoch": 1.8740685543964233, "grad_norm": 2.513418197631836, "learning_rate": 0.0002, "loss": 2.4901, "step": 25150 }, { "epoch": 1.8748137108792846, "grad_norm": 2.3090083599090576, "learning_rate": 0.0002, "loss": 2.5672, "step": 25160 }, { "epoch": 1.875558867362146, "grad_norm": 2.645890951156616, "learning_rate": 0.0002, "loss": 2.5898, "step": 25170 }, { "epoch": 1.8763040238450075, "grad_norm": 3.336200714111328, "learning_rate": 0.0002, "loss": 2.6214, "step": 25180 }, { "epoch": 1.8770491803278688, "grad_norm": 2.3214824199676514, "learning_rate": 0.0002, "loss": 2.6054, "step": 25190 }, { "epoch": 1.8777943368107302, "grad_norm": 2.3875439167022705, "learning_rate": 0.0002, "loss": 2.5562, "step": 25200 }, { "epoch": 1.8785394932935917, "grad_norm": 2.643754482269287, "learning_rate": 0.0002, "loss": 2.5102, "step": 25210 }, { "epoch": 1.879284649776453, "grad_norm": 2.7053935527801514, "learning_rate": 0.0002, "loss": 2.3707, "step": 25220 }, { "epoch": 1.8800298062593144, "grad_norm": 2.6861038208007812, "learning_rate": 0.0002, "loss": 2.7176, "step": 25230 }, { "epoch": 1.880774962742176, "grad_norm": 2.6391537189483643, "learning_rate": 0.0002, "loss": 2.3108, "step": 25240 }, { "epoch": 1.8815201192250373, "grad_norm": 2.6725473403930664, "learning_rate": 0.0002, "loss": 2.6606, "step": 25250 }, { "epoch": 1.8822652757078986, "grad_norm": 2.843163013458252, "learning_rate": 0.0002, "loss": 2.4298, "step": 25260 }, { "epoch": 1.8830104321907601, "grad_norm": 2.280057430267334, "learning_rate": 0.0002, "loss": 2.5212, "step": 25270 }, { "epoch": 1.8837555886736215, "grad_norm": 2.221116304397583, "learning_rate": 0.0002, "loss": 2.4512, "step": 25280 }, { "epoch": 1.8845007451564828, "grad_norm": 2.413114547729492, "learning_rate": 0.0002, "loss": 2.4593, "step": 25290 }, { "epoch": 1.8852459016393444, "grad_norm": 2.225498914718628, "learning_rate": 0.0002, "loss": 2.3172, "step": 25300 }, { "epoch": 1.8859910581222057, "grad_norm": 2.6563146114349365, "learning_rate": 0.0002, "loss": 2.5154, "step": 25310 }, { "epoch": 1.886736214605067, "grad_norm": 2.232198715209961, "learning_rate": 0.0002, "loss": 2.3323, "step": 25320 }, { "epoch": 1.8874813710879286, "grad_norm": 2.4613037109375, "learning_rate": 0.0002, "loss": 2.5738, "step": 25330 }, { "epoch": 1.88822652757079, "grad_norm": 2.7953736782073975, "learning_rate": 0.0002, "loss": 2.4914, "step": 25340 }, { "epoch": 1.8889716840536512, "grad_norm": 2.416280508041382, "learning_rate": 0.0002, "loss": 2.643, "step": 25350 }, { "epoch": 1.8897168405365128, "grad_norm": 2.5031518936157227, "learning_rate": 0.0002, "loss": 2.3818, "step": 25360 }, { "epoch": 1.8904619970193741, "grad_norm": 2.5476391315460205, "learning_rate": 0.0002, "loss": 2.6249, "step": 25370 }, { "epoch": 1.8912071535022354, "grad_norm": 2.5421531200408936, "learning_rate": 0.0002, "loss": 2.6841, "step": 25380 }, { "epoch": 1.891952309985097, "grad_norm": 2.7282488346099854, "learning_rate": 0.0002, "loss": 2.6655, "step": 25390 }, { "epoch": 1.8926974664679581, "grad_norm": 2.47148060798645, "learning_rate": 0.0002, "loss": 2.3096, "step": 25400 }, { "epoch": 1.8934426229508197, "grad_norm": 2.394070625305176, "learning_rate": 0.0002, "loss": 2.6875, "step": 25410 }, { "epoch": 1.8941877794336812, "grad_norm": 2.7232706546783447, "learning_rate": 0.0002, "loss": 2.5139, "step": 25420 }, { "epoch": 1.8949329359165423, "grad_norm": 2.7696988582611084, "learning_rate": 0.0002, "loss": 2.5473, "step": 25430 }, { "epoch": 1.8956780923994039, "grad_norm": 2.484501838684082, "learning_rate": 0.0002, "loss": 2.6315, "step": 25440 }, { "epoch": 1.8964232488822654, "grad_norm": 2.669006109237671, "learning_rate": 0.0002, "loss": 2.4127, "step": 25450 }, { "epoch": 1.8971684053651265, "grad_norm": 2.8366858959198, "learning_rate": 0.0002, "loss": 2.6871, "step": 25460 }, { "epoch": 1.897913561847988, "grad_norm": 2.7077786922454834, "learning_rate": 0.0002, "loss": 2.3913, "step": 25470 }, { "epoch": 1.8986587183308494, "grad_norm": 2.649768829345703, "learning_rate": 0.0002, "loss": 2.6283, "step": 25480 }, { "epoch": 1.8994038748137108, "grad_norm": 2.4160945415496826, "learning_rate": 0.0002, "loss": 2.5643, "step": 25490 }, { "epoch": 1.9001490312965723, "grad_norm": 2.4574332237243652, "learning_rate": 0.0002, "loss": 2.1872, "step": 25500 }, { "epoch": 1.9008941877794336, "grad_norm": 2.4507484436035156, "learning_rate": 0.0002, "loss": 2.532, "step": 25510 }, { "epoch": 1.901639344262295, "grad_norm": 1.882723093032837, "learning_rate": 0.0002, "loss": 2.4446, "step": 25520 }, { "epoch": 1.9023845007451565, "grad_norm": 2.4363670349121094, "learning_rate": 0.0002, "loss": 2.3938, "step": 25530 }, { "epoch": 1.9031296572280179, "grad_norm": 2.288465738296509, "learning_rate": 0.0002, "loss": 2.5107, "step": 25540 }, { "epoch": 1.9038748137108792, "grad_norm": 2.611039400100708, "learning_rate": 0.0002, "loss": 2.5329, "step": 25550 }, { "epoch": 1.9046199701937407, "grad_norm": 2.746476173400879, "learning_rate": 0.0002, "loss": 2.4759, "step": 25560 }, { "epoch": 1.905365126676602, "grad_norm": 2.628481864929199, "learning_rate": 0.0002, "loss": 2.6533, "step": 25570 }, { "epoch": 1.9061102831594634, "grad_norm": 2.5151212215423584, "learning_rate": 0.0002, "loss": 2.4787, "step": 25580 }, { "epoch": 1.906855439642325, "grad_norm": 2.3716254234313965, "learning_rate": 0.0002, "loss": 2.5077, "step": 25590 }, { "epoch": 1.9076005961251863, "grad_norm": 2.687274932861328, "learning_rate": 0.0002, "loss": 2.3605, "step": 25600 }, { "epoch": 1.9083457526080476, "grad_norm": 2.3180296421051025, "learning_rate": 0.0002, "loss": 2.4078, "step": 25610 }, { "epoch": 1.9090909090909092, "grad_norm": 2.033092737197876, "learning_rate": 0.0002, "loss": 2.2995, "step": 25620 }, { "epoch": 1.9098360655737705, "grad_norm": 2.2614009380340576, "learning_rate": 0.0002, "loss": 2.5323, "step": 25630 }, { "epoch": 1.9105812220566318, "grad_norm": 2.1245622634887695, "learning_rate": 0.0002, "loss": 2.4616, "step": 25640 }, { "epoch": 1.9113263785394934, "grad_norm": 2.5503997802734375, "learning_rate": 0.0002, "loss": 2.6637, "step": 25650 }, { "epoch": 1.9120715350223547, "grad_norm": 2.7903940677642822, "learning_rate": 0.0002, "loss": 2.6795, "step": 25660 }, { "epoch": 1.912816691505216, "grad_norm": 2.427300453186035, "learning_rate": 0.0002, "loss": 2.4091, "step": 25670 }, { "epoch": 1.9135618479880776, "grad_norm": 2.535790205001831, "learning_rate": 0.0002, "loss": 2.3897, "step": 25680 }, { "epoch": 1.914307004470939, "grad_norm": 2.60248064994812, "learning_rate": 0.0002, "loss": 2.5952, "step": 25690 }, { "epoch": 1.9150521609538003, "grad_norm": 2.7798526287078857, "learning_rate": 0.0002, "loss": 2.4811, "step": 25700 }, { "epoch": 1.9157973174366618, "grad_norm": 2.0604312419891357, "learning_rate": 0.0002, "loss": 2.6361, "step": 25710 }, { "epoch": 1.9165424739195231, "grad_norm": 2.663926124572754, "learning_rate": 0.0002, "loss": 2.4111, "step": 25720 }, { "epoch": 1.9172876304023845, "grad_norm": 2.741672992706299, "learning_rate": 0.0002, "loss": 2.391, "step": 25730 }, { "epoch": 1.918032786885246, "grad_norm": 2.5494937896728516, "learning_rate": 0.0002, "loss": 2.497, "step": 25740 }, { "epoch": 1.9187779433681071, "grad_norm": 2.7732324600219727, "learning_rate": 0.0002, "loss": 2.3377, "step": 25750 }, { "epoch": 1.9195230998509687, "grad_norm": 2.3186347484588623, "learning_rate": 0.0002, "loss": 2.6408, "step": 25760 }, { "epoch": 1.9202682563338302, "grad_norm": 2.6721131801605225, "learning_rate": 0.0002, "loss": 2.5869, "step": 25770 }, { "epoch": 1.9210134128166914, "grad_norm": 2.4322500228881836, "learning_rate": 0.0002, "loss": 2.4635, "step": 25780 }, { "epoch": 1.921758569299553, "grad_norm": 2.7271809577941895, "learning_rate": 0.0002, "loss": 2.6242, "step": 25790 }, { "epoch": 1.9225037257824145, "grad_norm": 2.2470762729644775, "learning_rate": 0.0002, "loss": 2.5125, "step": 25800 }, { "epoch": 1.9232488822652756, "grad_norm": 2.861534833908081, "learning_rate": 0.0002, "loss": 2.5176, "step": 25810 }, { "epoch": 1.9239940387481371, "grad_norm": 2.7653250694274902, "learning_rate": 0.0002, "loss": 2.4254, "step": 25820 }, { "epoch": 1.9247391952309985, "grad_norm": 2.630427122116089, "learning_rate": 0.0002, "loss": 2.4022, "step": 25830 }, { "epoch": 1.9254843517138598, "grad_norm": 2.5337681770324707, "learning_rate": 0.0002, "loss": 2.497, "step": 25840 }, { "epoch": 1.9262295081967213, "grad_norm": 2.4699361324310303, "learning_rate": 0.0002, "loss": 2.6214, "step": 25850 }, { "epoch": 1.9269746646795827, "grad_norm": 1.9499725103378296, "learning_rate": 0.0002, "loss": 2.4598, "step": 25860 }, { "epoch": 1.927719821162444, "grad_norm": 2.4422264099121094, "learning_rate": 0.0002, "loss": 2.4889, "step": 25870 }, { "epoch": 1.9284649776453056, "grad_norm": 2.2586450576782227, "learning_rate": 0.0002, "loss": 2.5206, "step": 25880 }, { "epoch": 1.9292101341281669, "grad_norm": 2.6377463340759277, "learning_rate": 0.0002, "loss": 2.3293, "step": 25890 }, { "epoch": 1.9299552906110282, "grad_norm": 2.7808732986450195, "learning_rate": 0.0002, "loss": 2.5287, "step": 25900 }, { "epoch": 1.9307004470938898, "grad_norm": 3.0270299911499023, "learning_rate": 0.0002, "loss": 2.5512, "step": 25910 }, { "epoch": 1.931445603576751, "grad_norm": 2.4635636806488037, "learning_rate": 0.0002, "loss": 2.5143, "step": 25920 }, { "epoch": 1.9321907600596124, "grad_norm": 3.6546430587768555, "learning_rate": 0.0002, "loss": 2.4352, "step": 25930 }, { "epoch": 1.932935916542474, "grad_norm": 2.299994707107544, "learning_rate": 0.0002, "loss": 2.4868, "step": 25940 }, { "epoch": 1.9336810730253353, "grad_norm": 2.5070691108703613, "learning_rate": 0.0002, "loss": 2.5456, "step": 25950 }, { "epoch": 1.9344262295081966, "grad_norm": 2.400691509246826, "learning_rate": 0.0002, "loss": 2.6418, "step": 25960 }, { "epoch": 1.9351713859910582, "grad_norm": 2.6068758964538574, "learning_rate": 0.0002, "loss": 2.5137, "step": 25970 }, { "epoch": 1.9359165424739195, "grad_norm": 2.5092999935150146, "learning_rate": 0.0002, "loss": 2.5346, "step": 25980 }, { "epoch": 1.9366616989567809, "grad_norm": 2.6502528190612793, "learning_rate": 0.0002, "loss": 2.6351, "step": 25990 }, { "epoch": 1.9374068554396424, "grad_norm": 2.406677722930908, "learning_rate": 0.0002, "loss": 2.4647, "step": 26000 }, { "epoch": 1.9381520119225037, "grad_norm": 2.532393217086792, "learning_rate": 0.0002, "loss": 2.4782, "step": 26010 }, { "epoch": 1.938897168405365, "grad_norm": 2.3414087295532227, "learning_rate": 0.0002, "loss": 2.4257, "step": 26020 }, { "epoch": 1.9396423248882266, "grad_norm": 2.684752941131592, "learning_rate": 0.0002, "loss": 2.3947, "step": 26030 }, { "epoch": 1.940387481371088, "grad_norm": 2.432410955429077, "learning_rate": 0.0002, "loss": 2.623, "step": 26040 }, { "epoch": 1.9411326378539493, "grad_norm": 3.1778249740600586, "learning_rate": 0.0002, "loss": 2.4038, "step": 26050 }, { "epoch": 1.9418777943368108, "grad_norm": 2.836688280105591, "learning_rate": 0.0002, "loss": 2.5303, "step": 26060 }, { "epoch": 1.9426229508196722, "grad_norm": 2.4091341495513916, "learning_rate": 0.0002, "loss": 2.4783, "step": 26070 }, { "epoch": 1.9433681073025335, "grad_norm": 2.5459396839141846, "learning_rate": 0.0002, "loss": 2.5745, "step": 26080 }, { "epoch": 1.944113263785395, "grad_norm": 2.6074795722961426, "learning_rate": 0.0002, "loss": 2.5106, "step": 26090 }, { "epoch": 1.9448584202682562, "grad_norm": 2.8824479579925537, "learning_rate": 0.0002, "loss": 2.4438, "step": 26100 }, { "epoch": 1.9456035767511177, "grad_norm": 2.3653016090393066, "learning_rate": 0.0002, "loss": 2.5536, "step": 26110 }, { "epoch": 1.9463487332339793, "grad_norm": 2.3197124004364014, "learning_rate": 0.0002, "loss": 2.4542, "step": 26120 }, { "epoch": 1.9470938897168404, "grad_norm": 2.626267194747925, "learning_rate": 0.0002, "loss": 2.4828, "step": 26130 }, { "epoch": 1.947839046199702, "grad_norm": 2.914698362350464, "learning_rate": 0.0002, "loss": 2.5634, "step": 26140 }, { "epoch": 1.9485842026825635, "grad_norm": 2.8641860485076904, "learning_rate": 0.0002, "loss": 2.6208, "step": 26150 }, { "epoch": 1.9493293591654246, "grad_norm": 2.823122262954712, "learning_rate": 0.0002, "loss": 2.5639, "step": 26160 }, { "epoch": 1.9500745156482862, "grad_norm": 2.152498722076416, "learning_rate": 0.0002, "loss": 2.5226, "step": 26170 }, { "epoch": 1.9508196721311475, "grad_norm": 2.7207183837890625, "learning_rate": 0.0002, "loss": 2.4951, "step": 26180 }, { "epoch": 1.9515648286140088, "grad_norm": 2.4285998344421387, "learning_rate": 0.0002, "loss": 2.5207, "step": 26190 }, { "epoch": 1.9523099850968704, "grad_norm": 2.4538893699645996, "learning_rate": 0.0002, "loss": 2.4034, "step": 26200 }, { "epoch": 1.9530551415797317, "grad_norm": 2.513335704803467, "learning_rate": 0.0002, "loss": 2.4913, "step": 26210 }, { "epoch": 1.953800298062593, "grad_norm": 2.8927149772644043, "learning_rate": 0.0002, "loss": 2.5541, "step": 26220 }, { "epoch": 1.9545454545454546, "grad_norm": 2.610621452331543, "learning_rate": 0.0002, "loss": 2.3861, "step": 26230 }, { "epoch": 1.955290611028316, "grad_norm": 2.7638297080993652, "learning_rate": 0.0002, "loss": 2.452, "step": 26240 }, { "epoch": 1.9560357675111772, "grad_norm": 2.722166061401367, "learning_rate": 0.0002, "loss": 2.6235, "step": 26250 }, { "epoch": 1.9567809239940388, "grad_norm": 2.614750862121582, "learning_rate": 0.0002, "loss": 2.4784, "step": 26260 }, { "epoch": 1.9575260804769001, "grad_norm": 2.5103914737701416, "learning_rate": 0.0002, "loss": 2.5742, "step": 26270 }, { "epoch": 1.9582712369597615, "grad_norm": 2.72584867477417, "learning_rate": 0.0002, "loss": 2.379, "step": 26280 }, { "epoch": 1.959016393442623, "grad_norm": 2.343048095703125, "learning_rate": 0.0002, "loss": 2.5236, "step": 26290 }, { "epoch": 1.9597615499254843, "grad_norm": 2.357050895690918, "learning_rate": 0.0002, "loss": 2.4987, "step": 26300 }, { "epoch": 1.9605067064083457, "grad_norm": 2.71625018119812, "learning_rate": 0.0002, "loss": 2.596, "step": 26310 }, { "epoch": 1.9612518628912072, "grad_norm": 2.790226459503174, "learning_rate": 0.0002, "loss": 2.4313, "step": 26320 }, { "epoch": 1.9619970193740686, "grad_norm": 2.4327445030212402, "learning_rate": 0.0002, "loss": 2.5478, "step": 26330 }, { "epoch": 1.96274217585693, "grad_norm": 2.637118339538574, "learning_rate": 0.0002, "loss": 2.638, "step": 26340 }, { "epoch": 1.9634873323397914, "grad_norm": 2.398437023162842, "learning_rate": 0.0002, "loss": 2.3303, "step": 26350 }, { "epoch": 1.9642324888226528, "grad_norm": 2.131314277648926, "learning_rate": 0.0002, "loss": 2.2881, "step": 26360 }, { "epoch": 1.964977645305514, "grad_norm": 2.55584716796875, "learning_rate": 0.0002, "loss": 2.5168, "step": 26370 }, { "epoch": 1.9657228017883757, "grad_norm": 2.5380923748016357, "learning_rate": 0.0002, "loss": 2.362, "step": 26380 }, { "epoch": 1.966467958271237, "grad_norm": 2.3342056274414062, "learning_rate": 0.0002, "loss": 2.6316, "step": 26390 }, { "epoch": 1.9672131147540983, "grad_norm": 2.483621835708618, "learning_rate": 0.0002, "loss": 2.4311, "step": 26400 }, { "epoch": 1.9679582712369599, "grad_norm": 3.239633560180664, "learning_rate": 0.0002, "loss": 2.4224, "step": 26410 }, { "epoch": 1.9687034277198212, "grad_norm": 2.420544147491455, "learning_rate": 0.0002, "loss": 2.4733, "step": 26420 }, { "epoch": 1.9694485842026825, "grad_norm": 2.3765010833740234, "learning_rate": 0.0002, "loss": 2.519, "step": 26430 }, { "epoch": 1.970193740685544, "grad_norm": 2.2879652976989746, "learning_rate": 0.0002, "loss": 2.5747, "step": 26440 }, { "epoch": 1.9709388971684052, "grad_norm": 2.4816901683807373, "learning_rate": 0.0002, "loss": 2.566, "step": 26450 }, { "epoch": 1.9716840536512668, "grad_norm": 2.6422929763793945, "learning_rate": 0.0002, "loss": 2.4609, "step": 26460 }, { "epoch": 1.9724292101341283, "grad_norm": 3.031636953353882, "learning_rate": 0.0002, "loss": 2.4401, "step": 26470 }, { "epoch": 1.9731743666169894, "grad_norm": 1.9829246997833252, "learning_rate": 0.0002, "loss": 2.4296, "step": 26480 }, { "epoch": 1.973919523099851, "grad_norm": 2.687513828277588, "learning_rate": 0.0002, "loss": 2.5862, "step": 26490 }, { "epoch": 1.9746646795827125, "grad_norm": 2.684131145477295, "learning_rate": 0.0002, "loss": 2.6292, "step": 26500 }, { "epoch": 1.9754098360655736, "grad_norm": 2.090724468231201, "learning_rate": 0.0002, "loss": 2.3809, "step": 26510 }, { "epoch": 1.9761549925484352, "grad_norm": 2.503957748413086, "learning_rate": 0.0002, "loss": 2.662, "step": 26520 }, { "epoch": 1.9769001490312967, "grad_norm": 2.3615572452545166, "learning_rate": 0.0002, "loss": 2.5989, "step": 26530 }, { "epoch": 1.9776453055141578, "grad_norm": 2.630366086959839, "learning_rate": 0.0002, "loss": 2.5245, "step": 26540 }, { "epoch": 1.9783904619970194, "grad_norm": 2.7511889934539795, "learning_rate": 0.0002, "loss": 2.4983, "step": 26550 }, { "epoch": 1.9791356184798807, "grad_norm": 2.7013278007507324, "learning_rate": 0.0002, "loss": 2.6212, "step": 26560 }, { "epoch": 1.979880774962742, "grad_norm": 2.5615251064300537, "learning_rate": 0.0002, "loss": 2.5408, "step": 26570 }, { "epoch": 1.9806259314456036, "grad_norm": 2.737309217453003, "learning_rate": 0.0002, "loss": 2.5117, "step": 26580 }, { "epoch": 1.981371087928465, "grad_norm": 2.0868828296661377, "learning_rate": 0.0002, "loss": 2.3501, "step": 26590 }, { "epoch": 1.9821162444113263, "grad_norm": 2.7384564876556396, "learning_rate": 0.0002, "loss": 2.4741, "step": 26600 }, { "epoch": 1.9828614008941878, "grad_norm": 2.5381107330322266, "learning_rate": 0.0002, "loss": 2.5183, "step": 26610 }, { "epoch": 1.9836065573770492, "grad_norm": 2.441673994064331, "learning_rate": 0.0002, "loss": 2.6689, "step": 26620 }, { "epoch": 1.9843517138599105, "grad_norm": 2.331639528274536, "learning_rate": 0.0002, "loss": 2.4582, "step": 26630 }, { "epoch": 1.985096870342772, "grad_norm": 2.777003288269043, "learning_rate": 0.0002, "loss": 2.4258, "step": 26640 }, { "epoch": 1.9858420268256334, "grad_norm": 2.382567882537842, "learning_rate": 0.0002, "loss": 2.4606, "step": 26650 }, { "epoch": 1.9865871833084947, "grad_norm": 2.5854926109313965, "learning_rate": 0.0002, "loss": 2.558, "step": 26660 }, { "epoch": 1.9873323397913563, "grad_norm": 2.8911728858947754, "learning_rate": 0.0002, "loss": 2.6694, "step": 26670 }, { "epoch": 1.9880774962742176, "grad_norm": 2.597140073776245, "learning_rate": 0.0002, "loss": 2.5155, "step": 26680 }, { "epoch": 1.988822652757079, "grad_norm": 2.733752489089966, "learning_rate": 0.0002, "loss": 2.7112, "step": 26690 }, { "epoch": 1.9895678092399405, "grad_norm": 2.311286687850952, "learning_rate": 0.0002, "loss": 2.3415, "step": 26700 }, { "epoch": 1.9903129657228018, "grad_norm": 2.7273147106170654, "learning_rate": 0.0002, "loss": 2.6524, "step": 26710 }, { "epoch": 1.9910581222056631, "grad_norm": 2.367246389389038, "learning_rate": 0.0002, "loss": 2.5106, "step": 26720 }, { "epoch": 1.9918032786885247, "grad_norm": 2.5700063705444336, "learning_rate": 0.0002, "loss": 2.5936, "step": 26730 }, { "epoch": 1.992548435171386, "grad_norm": 2.5638208389282227, "learning_rate": 0.0002, "loss": 2.6146, "step": 26740 }, { "epoch": 1.9932935916542474, "grad_norm": 1.9451677799224854, "learning_rate": 0.0002, "loss": 2.4673, "step": 26750 }, { "epoch": 1.994038748137109, "grad_norm": 2.4559242725372314, "learning_rate": 0.0002, "loss": 2.5946, "step": 26760 }, { "epoch": 1.9947839046199702, "grad_norm": 2.445701837539673, "learning_rate": 0.0002, "loss": 2.7494, "step": 26770 }, { "epoch": 1.9955290611028316, "grad_norm": 2.668653964996338, "learning_rate": 0.0002, "loss": 2.3548, "step": 26780 }, { "epoch": 1.9962742175856931, "grad_norm": 2.523761749267578, "learning_rate": 0.0002, "loss": 2.4606, "step": 26790 }, { "epoch": 1.9970193740685542, "grad_norm": 2.4471657276153564, "learning_rate": 0.0002, "loss": 2.4458, "step": 26800 }, { "epoch": 1.9977645305514158, "grad_norm": 2.2107632160186768, "learning_rate": 0.0002, "loss": 2.5186, "step": 26810 }, { "epoch": 1.9985096870342773, "grad_norm": 2.4634511470794678, "learning_rate": 0.0002, "loss": 2.5912, "step": 26820 }, { "epoch": 1.9992548435171384, "grad_norm": 2.3914170265197754, "learning_rate": 0.0002, "loss": 2.4112, "step": 26830 }, { "epoch": 2.0, "grad_norm": 2.793278455734253, "learning_rate": 0.0002, "loss": 2.4186, "step": 26840 }, { "epoch": 2.0, "eval_runtime": 2778.8528, "eval_samples_per_second": 4.829, "eval_steps_per_second": 0.604, "step": 26840 }, { "epoch": 2.0007451564828616, "grad_norm": 2.427016258239746, "learning_rate": 0.0002, "loss": 2.2428, "step": 26850 }, { "epoch": 2.0014903129657227, "grad_norm": 2.445763111114502, "learning_rate": 0.0002, "loss": 2.4872, "step": 26860 }, { "epoch": 2.002235469448584, "grad_norm": 2.4963574409484863, "learning_rate": 0.0002, "loss": 2.4217, "step": 26870 }, { "epoch": 2.0029806259314458, "grad_norm": 3.122708320617676, "learning_rate": 0.0002, "loss": 2.482, "step": 26880 }, { "epoch": 2.003725782414307, "grad_norm": 2.769930124282837, "learning_rate": 0.0002, "loss": 2.3721, "step": 26890 }, { "epoch": 2.0044709388971684, "grad_norm": 2.6897122859954834, "learning_rate": 0.0002, "loss": 2.2882, "step": 26900 }, { "epoch": 2.00521609538003, "grad_norm": 2.7737324237823486, "learning_rate": 0.0002, "loss": 2.4593, "step": 26910 }, { "epoch": 2.005961251862891, "grad_norm": 2.649350643157959, "learning_rate": 0.0002, "loss": 2.5489, "step": 26920 }, { "epoch": 2.0067064083457526, "grad_norm": 2.721299886703491, "learning_rate": 0.0002, "loss": 2.4069, "step": 26930 }, { "epoch": 2.007451564828614, "grad_norm": 2.5351850986480713, "learning_rate": 0.0002, "loss": 2.3394, "step": 26940 }, { "epoch": 2.0081967213114753, "grad_norm": 2.717834949493408, "learning_rate": 0.0002, "loss": 2.505, "step": 26950 }, { "epoch": 2.008941877794337, "grad_norm": 2.9618914127349854, "learning_rate": 0.0002, "loss": 2.4794, "step": 26960 }, { "epoch": 2.0096870342771984, "grad_norm": 2.4195728302001953, "learning_rate": 0.0002, "loss": 2.3318, "step": 26970 }, { "epoch": 2.0104321907600595, "grad_norm": 3.099935531616211, "learning_rate": 0.0002, "loss": 2.2687, "step": 26980 }, { "epoch": 2.011177347242921, "grad_norm": 2.5944323539733887, "learning_rate": 0.0002, "loss": 2.5544, "step": 26990 }, { "epoch": 2.0119225037257826, "grad_norm": 2.521864175796509, "learning_rate": 0.0002, "loss": 2.4157, "step": 27000 }, { "epoch": 2.0126676602086437, "grad_norm": 2.8137166500091553, "learning_rate": 0.0002, "loss": 2.2001, "step": 27010 }, { "epoch": 2.0134128166915053, "grad_norm": 2.695870876312256, "learning_rate": 0.0002, "loss": 2.3827, "step": 27020 }, { "epoch": 2.0141579731743664, "grad_norm": 2.4329819679260254, "learning_rate": 0.0002, "loss": 2.2124, "step": 27030 }, { "epoch": 2.014903129657228, "grad_norm": 2.539754629135132, "learning_rate": 0.0002, "loss": 2.3536, "step": 27040 }, { "epoch": 2.0156482861400895, "grad_norm": 2.557807207107544, "learning_rate": 0.0002, "loss": 2.3424, "step": 27050 }, { "epoch": 2.0163934426229506, "grad_norm": 3.0624873638153076, "learning_rate": 0.0002, "loss": 2.5087, "step": 27060 }, { "epoch": 2.017138599105812, "grad_norm": 2.791489601135254, "learning_rate": 0.0002, "loss": 2.515, "step": 27070 }, { "epoch": 2.0178837555886737, "grad_norm": 2.6769886016845703, "learning_rate": 0.0002, "loss": 2.5366, "step": 27080 }, { "epoch": 2.018628912071535, "grad_norm": 2.861119270324707, "learning_rate": 0.0002, "loss": 2.5285, "step": 27090 }, { "epoch": 2.0193740685543964, "grad_norm": 2.2920444011688232, "learning_rate": 0.0002, "loss": 2.3931, "step": 27100 }, { "epoch": 2.020119225037258, "grad_norm": 2.878737211227417, "learning_rate": 0.0002, "loss": 2.2288, "step": 27110 }, { "epoch": 2.020864381520119, "grad_norm": 2.782578706741333, "learning_rate": 0.0002, "loss": 2.2606, "step": 27120 }, { "epoch": 2.0216095380029806, "grad_norm": 2.6214089393615723, "learning_rate": 0.0002, "loss": 2.4091, "step": 27130 }, { "epoch": 2.022354694485842, "grad_norm": 2.628899335861206, "learning_rate": 0.0002, "loss": 2.3615, "step": 27140 }, { "epoch": 2.0230998509687033, "grad_norm": 2.4732677936553955, "learning_rate": 0.0002, "loss": 2.2547, "step": 27150 }, { "epoch": 2.023845007451565, "grad_norm": 2.5701940059661865, "learning_rate": 0.0002, "loss": 2.3523, "step": 27160 }, { "epoch": 2.0245901639344264, "grad_norm": 2.6380484104156494, "learning_rate": 0.0002, "loss": 2.2513, "step": 27170 }, { "epoch": 2.0253353204172875, "grad_norm": 2.5996925830841064, "learning_rate": 0.0002, "loss": 2.3843, "step": 27180 }, { "epoch": 2.026080476900149, "grad_norm": 2.727452516555786, "learning_rate": 0.0002, "loss": 2.2922, "step": 27190 }, { "epoch": 2.0268256333830106, "grad_norm": 2.9830238819122314, "learning_rate": 0.0002, "loss": 2.313, "step": 27200 }, { "epoch": 2.0275707898658717, "grad_norm": 2.7571492195129395, "learning_rate": 0.0002, "loss": 2.3851, "step": 27210 }, { "epoch": 2.0283159463487332, "grad_norm": 2.3931894302368164, "learning_rate": 0.0002, "loss": 2.0832, "step": 27220 }, { "epoch": 2.029061102831595, "grad_norm": 2.732896566390991, "learning_rate": 0.0002, "loss": 2.4687, "step": 27230 }, { "epoch": 2.029806259314456, "grad_norm": 2.637854814529419, "learning_rate": 0.0002, "loss": 2.487, "step": 27240 }, { "epoch": 2.0305514157973175, "grad_norm": 2.2118914127349854, "learning_rate": 0.0002, "loss": 2.3466, "step": 27250 }, { "epoch": 2.031296572280179, "grad_norm": 2.7009260654449463, "learning_rate": 0.0002, "loss": 2.3756, "step": 27260 }, { "epoch": 2.03204172876304, "grad_norm": 2.3376128673553467, "learning_rate": 0.0002, "loss": 2.4311, "step": 27270 }, { "epoch": 2.0327868852459017, "grad_norm": 2.7543601989746094, "learning_rate": 0.0002, "loss": 2.2645, "step": 27280 }, { "epoch": 2.0335320417287632, "grad_norm": 2.2563414573669434, "learning_rate": 0.0002, "loss": 2.3356, "step": 27290 }, { "epoch": 2.0342771982116243, "grad_norm": 2.4909472465515137, "learning_rate": 0.0002, "loss": 2.4706, "step": 27300 }, { "epoch": 2.035022354694486, "grad_norm": 2.689833164215088, "learning_rate": 0.0002, "loss": 2.5033, "step": 27310 }, { "epoch": 2.0357675111773474, "grad_norm": 2.7386205196380615, "learning_rate": 0.0002, "loss": 2.4948, "step": 27320 }, { "epoch": 2.0365126676602086, "grad_norm": 2.537919759750366, "learning_rate": 0.0002, "loss": 2.4454, "step": 27330 }, { "epoch": 2.03725782414307, "grad_norm": 2.522033214569092, "learning_rate": 0.0002, "loss": 2.2687, "step": 27340 }, { "epoch": 2.0380029806259317, "grad_norm": 2.6754488945007324, "learning_rate": 0.0002, "loss": 2.2479, "step": 27350 }, { "epoch": 2.0387481371087928, "grad_norm": 2.134866952896118, "learning_rate": 0.0002, "loss": 2.3477, "step": 27360 }, { "epoch": 2.0394932935916543, "grad_norm": 2.3635213375091553, "learning_rate": 0.0002, "loss": 2.4031, "step": 27370 }, { "epoch": 2.0402384500745154, "grad_norm": 2.4664597511291504, "learning_rate": 0.0002, "loss": 2.5241, "step": 27380 }, { "epoch": 2.040983606557377, "grad_norm": 2.7248127460479736, "learning_rate": 0.0002, "loss": 2.4092, "step": 27390 }, { "epoch": 2.0417287630402385, "grad_norm": 2.75724458694458, "learning_rate": 0.0002, "loss": 2.4693, "step": 27400 }, { "epoch": 2.0424739195230996, "grad_norm": 2.580315113067627, "learning_rate": 0.0002, "loss": 2.4291, "step": 27410 }, { "epoch": 2.043219076005961, "grad_norm": 3.07570743560791, "learning_rate": 0.0002, "loss": 2.4226, "step": 27420 }, { "epoch": 2.0439642324888228, "grad_norm": 2.4915974140167236, "learning_rate": 0.0002, "loss": 2.4481, "step": 27430 }, { "epoch": 2.044709388971684, "grad_norm": 2.7171037197113037, "learning_rate": 0.0002, "loss": 2.2091, "step": 27440 }, { "epoch": 2.0454545454545454, "grad_norm": 3.0568337440490723, "learning_rate": 0.0002, "loss": 2.3815, "step": 27450 }, { "epoch": 2.046199701937407, "grad_norm": 2.659975051879883, "learning_rate": 0.0002, "loss": 2.4473, "step": 27460 }, { "epoch": 2.046944858420268, "grad_norm": 2.7344188690185547, "learning_rate": 0.0002, "loss": 2.429, "step": 27470 }, { "epoch": 2.0476900149031296, "grad_norm": 2.360349416732788, "learning_rate": 0.0002, "loss": 2.2687, "step": 27480 }, { "epoch": 2.048435171385991, "grad_norm": 2.6498806476593018, "learning_rate": 0.0002, "loss": 2.4305, "step": 27490 }, { "epoch": 2.0491803278688523, "grad_norm": 2.6238315105438232, "learning_rate": 0.0002, "loss": 2.3188, "step": 27500 }, { "epoch": 2.049925484351714, "grad_norm": 2.6448466777801514, "learning_rate": 0.0002, "loss": 2.286, "step": 27510 }, { "epoch": 2.0506706408345754, "grad_norm": 2.3168444633483887, "learning_rate": 0.0002, "loss": 2.2408, "step": 27520 }, { "epoch": 2.0514157973174365, "grad_norm": 2.7542989253997803, "learning_rate": 0.0002, "loss": 2.2652, "step": 27530 }, { "epoch": 2.052160953800298, "grad_norm": 2.794304132461548, "learning_rate": 0.0002, "loss": 2.3382, "step": 27540 }, { "epoch": 2.0529061102831596, "grad_norm": 2.4550607204437256, "learning_rate": 0.0002, "loss": 2.5653, "step": 27550 }, { "epoch": 2.0536512667660207, "grad_norm": 2.7925267219543457, "learning_rate": 0.0002, "loss": 2.3883, "step": 27560 }, { "epoch": 2.0543964232488823, "grad_norm": 2.732017993927002, "learning_rate": 0.0002, "loss": 2.5619, "step": 27570 }, { "epoch": 2.055141579731744, "grad_norm": 2.7166635990142822, "learning_rate": 0.0002, "loss": 2.3683, "step": 27580 }, { "epoch": 2.055886736214605, "grad_norm": 2.5912301540374756, "learning_rate": 0.0002, "loss": 2.3318, "step": 27590 }, { "epoch": 2.0566318926974665, "grad_norm": 2.5270578861236572, "learning_rate": 0.0002, "loss": 2.2477, "step": 27600 }, { "epoch": 2.057377049180328, "grad_norm": 2.8963217735290527, "learning_rate": 0.0002, "loss": 2.3905, "step": 27610 }, { "epoch": 2.058122205663189, "grad_norm": 2.306488513946533, "learning_rate": 0.0002, "loss": 2.3994, "step": 27620 }, { "epoch": 2.0588673621460507, "grad_norm": 2.74562406539917, "learning_rate": 0.0002, "loss": 2.2255, "step": 27630 }, { "epoch": 2.0596125186289123, "grad_norm": 2.907052755355835, "learning_rate": 0.0002, "loss": 2.4501, "step": 27640 }, { "epoch": 2.0603576751117734, "grad_norm": 2.6865978240966797, "learning_rate": 0.0002, "loss": 2.4872, "step": 27650 }, { "epoch": 2.061102831594635, "grad_norm": 2.829334259033203, "learning_rate": 0.0002, "loss": 2.3885, "step": 27660 }, { "epoch": 2.0618479880774965, "grad_norm": 2.276557445526123, "learning_rate": 0.0002, "loss": 2.2833, "step": 27670 }, { "epoch": 2.0625931445603576, "grad_norm": 2.7757015228271484, "learning_rate": 0.0002, "loss": 2.4336, "step": 27680 }, { "epoch": 2.063338301043219, "grad_norm": 2.630664587020874, "learning_rate": 0.0002, "loss": 2.466, "step": 27690 }, { "epoch": 2.0640834575260807, "grad_norm": 2.500433921813965, "learning_rate": 0.0002, "loss": 2.4625, "step": 27700 }, { "epoch": 2.064828614008942, "grad_norm": 2.5673367977142334, "learning_rate": 0.0002, "loss": 2.3708, "step": 27710 }, { "epoch": 2.0655737704918034, "grad_norm": 2.798462390899658, "learning_rate": 0.0002, "loss": 2.5025, "step": 27720 }, { "epoch": 2.066318926974665, "grad_norm": 2.660876989364624, "learning_rate": 0.0002, "loss": 2.3465, "step": 27730 }, { "epoch": 2.067064083457526, "grad_norm": 2.681070566177368, "learning_rate": 0.0002, "loss": 2.4037, "step": 27740 }, { "epoch": 2.0678092399403876, "grad_norm": 2.4489269256591797, "learning_rate": 0.0002, "loss": 2.3239, "step": 27750 }, { "epoch": 2.0685543964232487, "grad_norm": 2.980785608291626, "learning_rate": 0.0002, "loss": 2.4702, "step": 27760 }, { "epoch": 2.0692995529061102, "grad_norm": 2.7260890007019043, "learning_rate": 0.0002, "loss": 2.3616, "step": 27770 }, { "epoch": 2.070044709388972, "grad_norm": 2.714730739593506, "learning_rate": 0.0002, "loss": 2.3833, "step": 27780 }, { "epoch": 2.070789865871833, "grad_norm": 2.3693652153015137, "learning_rate": 0.0002, "loss": 2.2471, "step": 27790 }, { "epoch": 2.0715350223546944, "grad_norm": 2.6847126483917236, "learning_rate": 0.0002, "loss": 2.5155, "step": 27800 }, { "epoch": 2.072280178837556, "grad_norm": 3.285604476928711, "learning_rate": 0.0002, "loss": 2.5073, "step": 27810 }, { "epoch": 2.073025335320417, "grad_norm": 2.9383273124694824, "learning_rate": 0.0002, "loss": 2.401, "step": 27820 }, { "epoch": 2.0737704918032787, "grad_norm": 2.5305216312408447, "learning_rate": 0.0002, "loss": 2.49, "step": 27830 }, { "epoch": 2.07451564828614, "grad_norm": 3.088163375854492, "learning_rate": 0.0002, "loss": 2.3074, "step": 27840 }, { "epoch": 2.0752608047690013, "grad_norm": 2.9205684661865234, "learning_rate": 0.0002, "loss": 2.3076, "step": 27850 }, { "epoch": 2.076005961251863, "grad_norm": 2.644702196121216, "learning_rate": 0.0002, "loss": 2.3757, "step": 27860 }, { "epoch": 2.0767511177347244, "grad_norm": 2.7440295219421387, "learning_rate": 0.0002, "loss": 2.4924, "step": 27870 }, { "epoch": 2.0774962742175855, "grad_norm": 2.949817657470703, "learning_rate": 0.0002, "loss": 2.343, "step": 27880 }, { "epoch": 2.078241430700447, "grad_norm": 2.5574519634246826, "learning_rate": 0.0002, "loss": 2.4203, "step": 27890 }, { "epoch": 2.0789865871833086, "grad_norm": 2.6132795810699463, "learning_rate": 0.0002, "loss": 2.5225, "step": 27900 }, { "epoch": 2.0797317436661698, "grad_norm": 2.1226987838745117, "learning_rate": 0.0002, "loss": 2.1986, "step": 27910 }, { "epoch": 2.0804769001490313, "grad_norm": 2.5743627548217773, "learning_rate": 0.0002, "loss": 2.3704, "step": 27920 }, { "epoch": 2.081222056631893, "grad_norm": 2.4740493297576904, "learning_rate": 0.0002, "loss": 2.2522, "step": 27930 }, { "epoch": 2.081967213114754, "grad_norm": 2.5864386558532715, "learning_rate": 0.0002, "loss": 2.4904, "step": 27940 }, { "epoch": 2.0827123695976155, "grad_norm": 2.6593196392059326, "learning_rate": 0.0002, "loss": 2.2939, "step": 27950 }, { "epoch": 2.083457526080477, "grad_norm": 2.5160462856292725, "learning_rate": 0.0002, "loss": 2.5326, "step": 27960 }, { "epoch": 2.084202682563338, "grad_norm": 2.8797519207000732, "learning_rate": 0.0002, "loss": 2.4039, "step": 27970 }, { "epoch": 2.0849478390461997, "grad_norm": 2.435398578643799, "learning_rate": 0.0002, "loss": 2.4237, "step": 27980 }, { "epoch": 2.0856929955290613, "grad_norm": 2.6593923568725586, "learning_rate": 0.0002, "loss": 2.5967, "step": 27990 }, { "epoch": 2.0864381520119224, "grad_norm": 2.205998182296753, "learning_rate": 0.0002, "loss": 2.2983, "step": 28000 }, { "epoch": 2.087183308494784, "grad_norm": 2.313530445098877, "learning_rate": 0.0002, "loss": 2.291, "step": 28010 }, { "epoch": 2.0879284649776455, "grad_norm": 2.9315454959869385, "learning_rate": 0.0002, "loss": 2.311, "step": 28020 }, { "epoch": 2.0886736214605066, "grad_norm": 2.7529959678649902, "learning_rate": 0.0002, "loss": 2.2777, "step": 28030 }, { "epoch": 2.089418777943368, "grad_norm": 2.668069362640381, "learning_rate": 0.0002, "loss": 2.6066, "step": 28040 }, { "epoch": 2.0901639344262297, "grad_norm": 2.849188804626465, "learning_rate": 0.0002, "loss": 2.535, "step": 28050 }, { "epoch": 2.090909090909091, "grad_norm": 2.613250970840454, "learning_rate": 0.0002, "loss": 2.2796, "step": 28060 }, { "epoch": 2.0916542473919524, "grad_norm": 2.3394582271575928, "learning_rate": 0.0002, "loss": 2.4765, "step": 28070 }, { "epoch": 2.092399403874814, "grad_norm": 2.5623340606689453, "learning_rate": 0.0002, "loss": 2.4527, "step": 28080 }, { "epoch": 2.093144560357675, "grad_norm": 2.4075920581817627, "learning_rate": 0.0002, "loss": 2.2952, "step": 28090 }, { "epoch": 2.0938897168405366, "grad_norm": 2.763248920440674, "learning_rate": 0.0002, "loss": 2.2659, "step": 28100 }, { "epoch": 2.0946348733233977, "grad_norm": 2.7755019664764404, "learning_rate": 0.0002, "loss": 2.622, "step": 28110 }, { "epoch": 2.0953800298062593, "grad_norm": 2.6472527980804443, "learning_rate": 0.0002, "loss": 2.4746, "step": 28120 }, { "epoch": 2.096125186289121, "grad_norm": 3.476677417755127, "learning_rate": 0.0002, "loss": 2.3413, "step": 28130 }, { "epoch": 2.096870342771982, "grad_norm": 2.5230743885040283, "learning_rate": 0.0002, "loss": 2.3565, "step": 28140 }, { "epoch": 2.0976154992548435, "grad_norm": 2.7483949661254883, "learning_rate": 0.0002, "loss": 2.3682, "step": 28150 }, { "epoch": 2.098360655737705, "grad_norm": 2.6513829231262207, "learning_rate": 0.0002, "loss": 2.2721, "step": 28160 }, { "epoch": 2.099105812220566, "grad_norm": 2.6120636463165283, "learning_rate": 0.0002, "loss": 2.6904, "step": 28170 }, { "epoch": 2.0998509687034277, "grad_norm": 2.4628775119781494, "learning_rate": 0.0002, "loss": 2.4378, "step": 28180 }, { "epoch": 2.1005961251862892, "grad_norm": 2.5905325412750244, "learning_rate": 0.0002, "loss": 2.2657, "step": 28190 }, { "epoch": 2.1013412816691504, "grad_norm": 2.9600167274475098, "learning_rate": 0.0002, "loss": 2.2855, "step": 28200 }, { "epoch": 2.102086438152012, "grad_norm": 3.109748363494873, "learning_rate": 0.0002, "loss": 2.4388, "step": 28210 }, { "epoch": 2.1028315946348735, "grad_norm": 2.639881134033203, "learning_rate": 0.0002, "loss": 2.4958, "step": 28220 }, { "epoch": 2.1035767511177346, "grad_norm": 2.901447057723999, "learning_rate": 0.0002, "loss": 2.6251, "step": 28230 }, { "epoch": 2.104321907600596, "grad_norm": 2.4745752811431885, "learning_rate": 0.0002, "loss": 2.5932, "step": 28240 }, { "epoch": 2.1050670640834577, "grad_norm": 2.363933801651001, "learning_rate": 0.0002, "loss": 2.3875, "step": 28250 }, { "epoch": 2.105812220566319, "grad_norm": 2.6219677925109863, "learning_rate": 0.0002, "loss": 2.3415, "step": 28260 }, { "epoch": 2.1065573770491803, "grad_norm": 2.6669254302978516, "learning_rate": 0.0002, "loss": 2.5103, "step": 28270 }, { "epoch": 2.107302533532042, "grad_norm": 2.7276668548583984, "learning_rate": 0.0002, "loss": 2.2377, "step": 28280 }, { "epoch": 2.108047690014903, "grad_norm": 2.473850727081299, "learning_rate": 0.0002, "loss": 2.4347, "step": 28290 }, { "epoch": 2.1087928464977646, "grad_norm": 2.89056396484375, "learning_rate": 0.0002, "loss": 2.3286, "step": 28300 }, { "epoch": 2.109538002980626, "grad_norm": 2.4817709922790527, "learning_rate": 0.0002, "loss": 2.5692, "step": 28310 }, { "epoch": 2.110283159463487, "grad_norm": 1.887121319770813, "learning_rate": 0.0002, "loss": 2.2894, "step": 28320 }, { "epoch": 2.1110283159463488, "grad_norm": 2.799809694290161, "learning_rate": 0.0002, "loss": 2.4199, "step": 28330 }, { "epoch": 2.1117734724292103, "grad_norm": 2.481428861618042, "learning_rate": 0.0002, "loss": 2.3954, "step": 28340 }, { "epoch": 2.1125186289120714, "grad_norm": 2.573892831802368, "learning_rate": 0.0002, "loss": 2.4221, "step": 28350 }, { "epoch": 2.113263785394933, "grad_norm": 2.6230032444000244, "learning_rate": 0.0002, "loss": 2.3498, "step": 28360 }, { "epoch": 2.1140089418777945, "grad_norm": 2.3305563926696777, "learning_rate": 0.0002, "loss": 2.4446, "step": 28370 }, { "epoch": 2.1147540983606556, "grad_norm": 2.297919750213623, "learning_rate": 0.0002, "loss": 2.452, "step": 28380 }, { "epoch": 2.115499254843517, "grad_norm": 3.0434772968292236, "learning_rate": 0.0002, "loss": 2.3133, "step": 28390 }, { "epoch": 2.1162444113263787, "grad_norm": 3.005995750427246, "learning_rate": 0.0002, "loss": 2.3543, "step": 28400 }, { "epoch": 2.11698956780924, "grad_norm": 2.65181565284729, "learning_rate": 0.0002, "loss": 2.2972, "step": 28410 }, { "epoch": 2.1177347242921014, "grad_norm": 2.5356462001800537, "learning_rate": 0.0002, "loss": 2.3888, "step": 28420 }, { "epoch": 2.118479880774963, "grad_norm": 2.4722752571105957, "learning_rate": 0.0002, "loss": 2.4412, "step": 28430 }, { "epoch": 2.119225037257824, "grad_norm": 2.7374267578125, "learning_rate": 0.0002, "loss": 2.5805, "step": 28440 }, { "epoch": 2.1199701937406856, "grad_norm": 2.93241548538208, "learning_rate": 0.0002, "loss": 2.5802, "step": 28450 }, { "epoch": 2.1207153502235467, "grad_norm": 2.6112005710601807, "learning_rate": 0.0002, "loss": 2.485, "step": 28460 }, { "epoch": 2.1214605067064083, "grad_norm": 2.2507147789001465, "learning_rate": 0.0002, "loss": 2.3001, "step": 28470 }, { "epoch": 2.12220566318927, "grad_norm": 2.8266193866729736, "learning_rate": 0.0002, "loss": 2.3678, "step": 28480 }, { "epoch": 2.122950819672131, "grad_norm": 2.5680441856384277, "learning_rate": 0.0002, "loss": 2.5827, "step": 28490 }, { "epoch": 2.1236959761549925, "grad_norm": 2.623478889465332, "learning_rate": 0.0002, "loss": 2.3191, "step": 28500 }, { "epoch": 2.124441132637854, "grad_norm": 2.591749906539917, "learning_rate": 0.0002, "loss": 2.4871, "step": 28510 }, { "epoch": 2.125186289120715, "grad_norm": 2.7326436042785645, "learning_rate": 0.0002, "loss": 2.3359, "step": 28520 }, { "epoch": 2.1259314456035767, "grad_norm": 3.0076076984405518, "learning_rate": 0.0002, "loss": 2.3586, "step": 28530 }, { "epoch": 2.1266766020864383, "grad_norm": 2.5301334857940674, "learning_rate": 0.0002, "loss": 2.4504, "step": 28540 }, { "epoch": 2.1274217585692994, "grad_norm": 2.3998358249664307, "learning_rate": 0.0002, "loss": 2.2544, "step": 28550 }, { "epoch": 2.128166915052161, "grad_norm": 2.7175776958465576, "learning_rate": 0.0002, "loss": 2.582, "step": 28560 }, { "epoch": 2.1289120715350225, "grad_norm": 2.4228036403656006, "learning_rate": 0.0002, "loss": 2.3565, "step": 28570 }, { "epoch": 2.1296572280178836, "grad_norm": 2.6200060844421387, "learning_rate": 0.0002, "loss": 2.1041, "step": 28580 }, { "epoch": 2.130402384500745, "grad_norm": 2.493910312652588, "learning_rate": 0.0002, "loss": 2.3126, "step": 28590 }, { "epoch": 2.1311475409836067, "grad_norm": 3.021311044692993, "learning_rate": 0.0002, "loss": 2.5049, "step": 28600 }, { "epoch": 2.131892697466468, "grad_norm": 2.4209420680999756, "learning_rate": 0.0002, "loss": 2.3074, "step": 28610 }, { "epoch": 2.1326378539493294, "grad_norm": 2.2820701599121094, "learning_rate": 0.0002, "loss": 2.3154, "step": 28620 }, { "epoch": 2.133383010432191, "grad_norm": 2.4041130542755127, "learning_rate": 0.0002, "loss": 2.4206, "step": 28630 }, { "epoch": 2.134128166915052, "grad_norm": 2.7592175006866455, "learning_rate": 0.0002, "loss": 2.4367, "step": 28640 }, { "epoch": 2.1348733233979136, "grad_norm": 2.982658863067627, "learning_rate": 0.0002, "loss": 2.4851, "step": 28650 }, { "epoch": 2.135618479880775, "grad_norm": 2.370884656906128, "learning_rate": 0.0002, "loss": 2.3921, "step": 28660 }, { "epoch": 2.1363636363636362, "grad_norm": 2.121638298034668, "learning_rate": 0.0002, "loss": 2.4153, "step": 28670 }, { "epoch": 2.137108792846498, "grad_norm": 2.6304523944854736, "learning_rate": 0.0002, "loss": 2.5541, "step": 28680 }, { "epoch": 2.1378539493293593, "grad_norm": 2.711233139038086, "learning_rate": 0.0002, "loss": 2.3325, "step": 28690 }, { "epoch": 2.1385991058122205, "grad_norm": 2.832350969314575, "learning_rate": 0.0002, "loss": 2.4829, "step": 28700 }, { "epoch": 2.139344262295082, "grad_norm": 2.737445831298828, "learning_rate": 0.0002, "loss": 2.5671, "step": 28710 }, { "epoch": 2.1400894187779436, "grad_norm": 2.456326484680176, "learning_rate": 0.0002, "loss": 2.4368, "step": 28720 }, { "epoch": 2.1408345752608047, "grad_norm": 2.3802084922790527, "learning_rate": 0.0002, "loss": 2.5299, "step": 28730 }, { "epoch": 2.1415797317436662, "grad_norm": 2.521446943283081, "learning_rate": 0.0002, "loss": 2.51, "step": 28740 }, { "epoch": 2.1423248882265273, "grad_norm": 2.6229171752929688, "learning_rate": 0.0002, "loss": 2.4292, "step": 28750 }, { "epoch": 2.143070044709389, "grad_norm": 2.573530435562134, "learning_rate": 0.0002, "loss": 2.5116, "step": 28760 }, { "epoch": 2.1438152011922504, "grad_norm": 2.3284008502960205, "learning_rate": 0.0002, "loss": 2.4296, "step": 28770 }, { "epoch": 2.144560357675112, "grad_norm": 2.8102099895477295, "learning_rate": 0.0002, "loss": 2.4006, "step": 28780 }, { "epoch": 2.145305514157973, "grad_norm": 2.7067055702209473, "learning_rate": 0.0002, "loss": 2.5078, "step": 28790 }, { "epoch": 2.1460506706408347, "grad_norm": 2.4002151489257812, "learning_rate": 0.0002, "loss": 2.6035, "step": 28800 }, { "epoch": 2.1467958271236958, "grad_norm": 2.9272592067718506, "learning_rate": 0.0002, "loss": 2.6574, "step": 28810 }, { "epoch": 2.1475409836065573, "grad_norm": 2.3437740802764893, "learning_rate": 0.0002, "loss": 2.3623, "step": 28820 }, { "epoch": 2.148286140089419, "grad_norm": 2.795177459716797, "learning_rate": 0.0002, "loss": 2.4582, "step": 28830 }, { "epoch": 2.14903129657228, "grad_norm": 2.934332847595215, "learning_rate": 0.0002, "loss": 2.5275, "step": 28840 }, { "epoch": 2.1497764530551415, "grad_norm": 2.446051597595215, "learning_rate": 0.0002, "loss": 2.3525, "step": 28850 }, { "epoch": 2.150521609538003, "grad_norm": 2.6848883628845215, "learning_rate": 0.0002, "loss": 2.5435, "step": 28860 }, { "epoch": 2.151266766020864, "grad_norm": 2.80637788772583, "learning_rate": 0.0002, "loss": 2.3997, "step": 28870 }, { "epoch": 2.1520119225037257, "grad_norm": 2.7611582279205322, "learning_rate": 0.0002, "loss": 2.4226, "step": 28880 }, { "epoch": 2.1527570789865873, "grad_norm": 3.1131093502044678, "learning_rate": 0.0002, "loss": 2.2822, "step": 28890 }, { "epoch": 2.1535022354694484, "grad_norm": 2.372504949569702, "learning_rate": 0.0002, "loss": 2.3701, "step": 28900 }, { "epoch": 2.15424739195231, "grad_norm": 2.3762528896331787, "learning_rate": 0.0002, "loss": 2.4664, "step": 28910 }, { "epoch": 2.1549925484351715, "grad_norm": 2.665152072906494, "learning_rate": 0.0002, "loss": 2.5005, "step": 28920 }, { "epoch": 2.1557377049180326, "grad_norm": 2.8510541915893555, "learning_rate": 0.0002, "loss": 2.5503, "step": 28930 }, { "epoch": 2.156482861400894, "grad_norm": 2.647361993789673, "learning_rate": 0.0002, "loss": 2.2592, "step": 28940 }, { "epoch": 2.1572280178837557, "grad_norm": 2.626680612564087, "learning_rate": 0.0002, "loss": 2.4075, "step": 28950 }, { "epoch": 2.157973174366617, "grad_norm": 3.47946834564209, "learning_rate": 0.0002, "loss": 2.5345, "step": 28960 }, { "epoch": 2.1587183308494784, "grad_norm": 2.671481132507324, "learning_rate": 0.0002, "loss": 2.2957, "step": 28970 }, { "epoch": 2.15946348733234, "grad_norm": 2.696821689605713, "learning_rate": 0.0002, "loss": 2.3831, "step": 28980 }, { "epoch": 2.160208643815201, "grad_norm": 2.814481019973755, "learning_rate": 0.0002, "loss": 2.4499, "step": 28990 }, { "epoch": 2.1609538002980626, "grad_norm": 2.933293342590332, "learning_rate": 0.0002, "loss": 2.3267, "step": 29000 }, { "epoch": 2.161698956780924, "grad_norm": 2.5142786502838135, "learning_rate": 0.0002, "loss": 2.349, "step": 29010 }, { "epoch": 2.1624441132637853, "grad_norm": 2.8400211334228516, "learning_rate": 0.0002, "loss": 2.4176, "step": 29020 }, { "epoch": 2.163189269746647, "grad_norm": 2.3746659755706787, "learning_rate": 0.0002, "loss": 2.4822, "step": 29030 }, { "epoch": 2.1639344262295084, "grad_norm": 2.615368604660034, "learning_rate": 0.0002, "loss": 2.4141, "step": 29040 }, { "epoch": 2.1646795827123695, "grad_norm": 2.766857385635376, "learning_rate": 0.0002, "loss": 2.5287, "step": 29050 }, { "epoch": 2.165424739195231, "grad_norm": 2.6013858318328857, "learning_rate": 0.0002, "loss": 2.5461, "step": 29060 }, { "epoch": 2.1661698956780926, "grad_norm": 2.6237449645996094, "learning_rate": 0.0002, "loss": 2.4418, "step": 29070 }, { "epoch": 2.1669150521609537, "grad_norm": 2.9323627948760986, "learning_rate": 0.0002, "loss": 2.504, "step": 29080 }, { "epoch": 2.1676602086438153, "grad_norm": 2.950990915298462, "learning_rate": 0.0002, "loss": 2.2819, "step": 29090 }, { "epoch": 2.168405365126677, "grad_norm": 2.595555305480957, "learning_rate": 0.0002, "loss": 2.4392, "step": 29100 }, { "epoch": 2.169150521609538, "grad_norm": 2.9946279525756836, "learning_rate": 0.0002, "loss": 2.538, "step": 29110 }, { "epoch": 2.1698956780923995, "grad_norm": 2.4822959899902344, "learning_rate": 0.0002, "loss": 2.329, "step": 29120 }, { "epoch": 2.170640834575261, "grad_norm": 2.95805287361145, "learning_rate": 0.0002, "loss": 2.5216, "step": 29130 }, { "epoch": 2.171385991058122, "grad_norm": 2.540740966796875, "learning_rate": 0.0002, "loss": 2.3522, "step": 29140 }, { "epoch": 2.1721311475409837, "grad_norm": 2.614377737045288, "learning_rate": 0.0002, "loss": 2.4523, "step": 29150 }, { "epoch": 2.172876304023845, "grad_norm": 2.5126142501831055, "learning_rate": 0.0002, "loss": 2.4256, "step": 29160 }, { "epoch": 2.1736214605067063, "grad_norm": 2.8608033657073975, "learning_rate": 0.0002, "loss": 2.3527, "step": 29170 }, { "epoch": 2.174366616989568, "grad_norm": 2.863196849822998, "learning_rate": 0.0002, "loss": 2.3969, "step": 29180 }, { "epoch": 2.175111773472429, "grad_norm": 2.6187872886657715, "learning_rate": 0.0002, "loss": 2.569, "step": 29190 }, { "epoch": 2.1758569299552906, "grad_norm": 2.520378351211548, "learning_rate": 0.0002, "loss": 2.4414, "step": 29200 }, { "epoch": 2.176602086438152, "grad_norm": 2.438509464263916, "learning_rate": 0.0002, "loss": 2.5206, "step": 29210 }, { "epoch": 2.1773472429210132, "grad_norm": 2.899704694747925, "learning_rate": 0.0002, "loss": 2.4476, "step": 29220 }, { "epoch": 2.178092399403875, "grad_norm": 2.8411431312561035, "learning_rate": 0.0002, "loss": 2.3632, "step": 29230 }, { "epoch": 2.1788375558867363, "grad_norm": 3.046539783477783, "learning_rate": 0.0002, "loss": 2.4188, "step": 29240 }, { "epoch": 2.1795827123695974, "grad_norm": 2.8408050537109375, "learning_rate": 0.0002, "loss": 2.5058, "step": 29250 }, { "epoch": 2.180327868852459, "grad_norm": 1.9899271726608276, "learning_rate": 0.0002, "loss": 2.3903, "step": 29260 }, { "epoch": 2.1810730253353205, "grad_norm": 3.0790789127349854, "learning_rate": 0.0002, "loss": 2.4835, "step": 29270 }, { "epoch": 2.1818181818181817, "grad_norm": 2.649176836013794, "learning_rate": 0.0002, "loss": 2.381, "step": 29280 }, { "epoch": 2.182563338301043, "grad_norm": 2.7816390991210938, "learning_rate": 0.0002, "loss": 2.5655, "step": 29290 }, { "epoch": 2.1833084947839048, "grad_norm": 2.8856558799743652, "learning_rate": 0.0002, "loss": 2.4248, "step": 29300 }, { "epoch": 2.184053651266766, "grad_norm": 2.5775249004364014, "learning_rate": 0.0002, "loss": 2.4948, "step": 29310 }, { "epoch": 2.1847988077496274, "grad_norm": 2.680647134780884, "learning_rate": 0.0002, "loss": 2.4791, "step": 29320 }, { "epoch": 2.185543964232489, "grad_norm": 2.7082343101501465, "learning_rate": 0.0002, "loss": 2.2716, "step": 29330 }, { "epoch": 2.18628912071535, "grad_norm": 2.76863956451416, "learning_rate": 0.0002, "loss": 2.3834, "step": 29340 }, { "epoch": 2.1870342771982116, "grad_norm": 2.652808666229248, "learning_rate": 0.0002, "loss": 2.5556, "step": 29350 }, { "epoch": 2.187779433681073, "grad_norm": 2.7211995124816895, "learning_rate": 0.0002, "loss": 2.428, "step": 29360 }, { "epoch": 2.1885245901639343, "grad_norm": 2.6517481803894043, "learning_rate": 0.0002, "loss": 2.3506, "step": 29370 }, { "epoch": 2.189269746646796, "grad_norm": 2.680163621902466, "learning_rate": 0.0002, "loss": 2.3581, "step": 29380 }, { "epoch": 2.1900149031296574, "grad_norm": 2.6171483993530273, "learning_rate": 0.0002, "loss": 2.3894, "step": 29390 }, { "epoch": 2.1907600596125185, "grad_norm": 2.645461320877075, "learning_rate": 0.0002, "loss": 2.4044, "step": 29400 }, { "epoch": 2.19150521609538, "grad_norm": 2.607429265975952, "learning_rate": 0.0002, "loss": 2.3481, "step": 29410 }, { "epoch": 2.1922503725782416, "grad_norm": 2.634819269180298, "learning_rate": 0.0002, "loss": 2.3877, "step": 29420 }, { "epoch": 2.1929955290611027, "grad_norm": 2.682586193084717, "learning_rate": 0.0002, "loss": 2.3752, "step": 29430 }, { "epoch": 2.1937406855439643, "grad_norm": 2.5644547939300537, "learning_rate": 0.0002, "loss": 2.4439, "step": 29440 }, { "epoch": 2.194485842026826, "grad_norm": 2.639521837234497, "learning_rate": 0.0002, "loss": 2.4411, "step": 29450 }, { "epoch": 2.195230998509687, "grad_norm": 2.455165147781372, "learning_rate": 0.0002, "loss": 2.6128, "step": 29460 }, { "epoch": 2.1959761549925485, "grad_norm": 2.800022602081299, "learning_rate": 0.0002, "loss": 2.4411, "step": 29470 }, { "epoch": 2.19672131147541, "grad_norm": 2.4787418842315674, "learning_rate": 0.0002, "loss": 2.4363, "step": 29480 }, { "epoch": 2.197466467958271, "grad_norm": 2.7749505043029785, "learning_rate": 0.0002, "loss": 2.4775, "step": 29490 }, { "epoch": 2.1982116244411327, "grad_norm": 2.472622871398926, "learning_rate": 0.0002, "loss": 2.3768, "step": 29500 }, { "epoch": 2.198956780923994, "grad_norm": 2.560708999633789, "learning_rate": 0.0002, "loss": 2.4336, "step": 29510 }, { "epoch": 2.1997019374068554, "grad_norm": 2.5658538341522217, "learning_rate": 0.0002, "loss": 2.389, "step": 29520 }, { "epoch": 2.200447093889717, "grad_norm": 2.6552112102508545, "learning_rate": 0.0002, "loss": 2.1017, "step": 29530 }, { "epoch": 2.201192250372578, "grad_norm": 2.23706316947937, "learning_rate": 0.0002, "loss": 2.4461, "step": 29540 }, { "epoch": 2.2019374068554396, "grad_norm": 2.763774871826172, "learning_rate": 0.0002, "loss": 2.4351, "step": 29550 }, { "epoch": 2.202682563338301, "grad_norm": 2.5139219760894775, "learning_rate": 0.0002, "loss": 2.351, "step": 29560 }, { "epoch": 2.2034277198211623, "grad_norm": 2.893038272857666, "learning_rate": 0.0002, "loss": 2.4692, "step": 29570 }, { "epoch": 2.204172876304024, "grad_norm": 2.818425178527832, "learning_rate": 0.0002, "loss": 2.4447, "step": 29580 }, { "epoch": 2.2049180327868854, "grad_norm": 2.331808567047119, "learning_rate": 0.0002, "loss": 2.4306, "step": 29590 }, { "epoch": 2.2056631892697465, "grad_norm": 3.2033214569091797, "learning_rate": 0.0002, "loss": 2.5642, "step": 29600 }, { "epoch": 2.206408345752608, "grad_norm": 2.41534686088562, "learning_rate": 0.0002, "loss": 2.3699, "step": 29610 }, { "epoch": 2.2071535022354696, "grad_norm": 2.8032174110412598, "learning_rate": 0.0002, "loss": 2.6087, "step": 29620 }, { "epoch": 2.2078986587183307, "grad_norm": 2.4665184020996094, "learning_rate": 0.0002, "loss": 2.4702, "step": 29630 }, { "epoch": 2.2086438152011922, "grad_norm": 2.474245309829712, "learning_rate": 0.0002, "loss": 2.392, "step": 29640 }, { "epoch": 2.209388971684054, "grad_norm": 2.9560275077819824, "learning_rate": 0.0002, "loss": 2.5555, "step": 29650 }, { "epoch": 2.210134128166915, "grad_norm": 2.4646859169006348, "learning_rate": 0.0002, "loss": 2.3858, "step": 29660 }, { "epoch": 2.2108792846497765, "grad_norm": 2.8833131790161133, "learning_rate": 0.0002, "loss": 2.3451, "step": 29670 }, { "epoch": 2.211624441132638, "grad_norm": 2.540769577026367, "learning_rate": 0.0002, "loss": 2.256, "step": 29680 }, { "epoch": 2.212369597615499, "grad_norm": 3.0184593200683594, "learning_rate": 0.0002, "loss": 2.4306, "step": 29690 }, { "epoch": 2.2131147540983607, "grad_norm": 2.6296427249908447, "learning_rate": 0.0002, "loss": 2.5813, "step": 29700 }, { "epoch": 2.2138599105812222, "grad_norm": 2.667278528213501, "learning_rate": 0.0002, "loss": 2.4874, "step": 29710 }, { "epoch": 2.2146050670640833, "grad_norm": 2.660968065261841, "learning_rate": 0.0002, "loss": 2.3787, "step": 29720 }, { "epoch": 2.215350223546945, "grad_norm": 2.5494511127471924, "learning_rate": 0.0002, "loss": 2.4766, "step": 29730 }, { "epoch": 2.2160953800298064, "grad_norm": 2.503387928009033, "learning_rate": 0.0002, "loss": 2.4909, "step": 29740 }, { "epoch": 2.2168405365126675, "grad_norm": 2.6121363639831543, "learning_rate": 0.0002, "loss": 2.5237, "step": 29750 }, { "epoch": 2.217585692995529, "grad_norm": 2.4190587997436523, "learning_rate": 0.0002, "loss": 2.3553, "step": 29760 }, { "epoch": 2.2183308494783907, "grad_norm": 2.8600144386291504, "learning_rate": 0.0002, "loss": 2.4106, "step": 29770 }, { "epoch": 2.2190760059612518, "grad_norm": 3.615830898284912, "learning_rate": 0.0002, "loss": 2.5998, "step": 29780 }, { "epoch": 2.2198211624441133, "grad_norm": 3.1991500854492188, "learning_rate": 0.0002, "loss": 2.6738, "step": 29790 }, { "epoch": 2.220566318926975, "grad_norm": 2.7967700958251953, "learning_rate": 0.0002, "loss": 2.4236, "step": 29800 }, { "epoch": 2.221311475409836, "grad_norm": 2.68129301071167, "learning_rate": 0.0002, "loss": 2.4228, "step": 29810 }, { "epoch": 2.2220566318926975, "grad_norm": 2.7020351886749268, "learning_rate": 0.0002, "loss": 2.3199, "step": 29820 }, { "epoch": 2.222801788375559, "grad_norm": 2.417415142059326, "learning_rate": 0.0002, "loss": 2.344, "step": 29830 }, { "epoch": 2.22354694485842, "grad_norm": 2.5789639949798584, "learning_rate": 0.0002, "loss": 2.3901, "step": 29840 }, { "epoch": 2.2242921013412817, "grad_norm": 2.623487710952759, "learning_rate": 0.0002, "loss": 2.3944, "step": 29850 }, { "epoch": 2.225037257824143, "grad_norm": 2.554654598236084, "learning_rate": 0.0002, "loss": 2.3396, "step": 29860 }, { "epoch": 2.2257824143070044, "grad_norm": 2.624058485031128, "learning_rate": 0.0002, "loss": 2.222, "step": 29870 }, { "epoch": 2.226527570789866, "grad_norm": 2.9472296237945557, "learning_rate": 0.0002, "loss": 2.5653, "step": 29880 }, { "epoch": 2.227272727272727, "grad_norm": 2.7747509479522705, "learning_rate": 0.0002, "loss": 2.3922, "step": 29890 }, { "epoch": 2.2280178837555886, "grad_norm": 2.717242956161499, "learning_rate": 0.0002, "loss": 2.3198, "step": 29900 }, { "epoch": 2.22876304023845, "grad_norm": 2.829017400741577, "learning_rate": 0.0002, "loss": 2.2318, "step": 29910 }, { "epoch": 2.2295081967213113, "grad_norm": 2.65274715423584, "learning_rate": 0.0002, "loss": 2.3987, "step": 29920 }, { "epoch": 2.230253353204173, "grad_norm": 2.738203287124634, "learning_rate": 0.0002, "loss": 2.2976, "step": 29930 }, { "epoch": 2.2309985096870344, "grad_norm": 2.4520084857940674, "learning_rate": 0.0002, "loss": 2.5559, "step": 29940 }, { "epoch": 2.2317436661698955, "grad_norm": 2.829719066619873, "learning_rate": 0.0002, "loss": 2.3981, "step": 29950 }, { "epoch": 2.232488822652757, "grad_norm": 2.8684730529785156, "learning_rate": 0.0002, "loss": 2.4195, "step": 29960 }, { "epoch": 2.2332339791356186, "grad_norm": 2.597276449203491, "learning_rate": 0.0002, "loss": 2.4955, "step": 29970 }, { "epoch": 2.2339791356184797, "grad_norm": 2.678597927093506, "learning_rate": 0.0002, "loss": 2.3952, "step": 29980 }, { "epoch": 2.2347242921013413, "grad_norm": 2.6332530975341797, "learning_rate": 0.0002, "loss": 2.6308, "step": 29990 }, { "epoch": 2.235469448584203, "grad_norm": 2.6793925762176514, "learning_rate": 0.0002, "loss": 2.3281, "step": 30000 }, { "epoch": 2.236214605067064, "grad_norm": 2.714738607406616, "learning_rate": 0.0002, "loss": 2.4091, "step": 30010 }, { "epoch": 2.2369597615499255, "grad_norm": 2.5853700637817383, "learning_rate": 0.0002, "loss": 2.4243, "step": 30020 }, { "epoch": 2.237704918032787, "grad_norm": 2.972341775894165, "learning_rate": 0.0002, "loss": 2.4984, "step": 30030 }, { "epoch": 2.238450074515648, "grad_norm": 2.5449442863464355, "learning_rate": 0.0002, "loss": 2.4433, "step": 30040 }, { "epoch": 2.2391952309985097, "grad_norm": 2.6521236896514893, "learning_rate": 0.0002, "loss": 2.5521, "step": 30050 }, { "epoch": 2.2399403874813713, "grad_norm": 2.510643482208252, "learning_rate": 0.0002, "loss": 2.5315, "step": 30060 }, { "epoch": 2.2406855439642324, "grad_norm": 2.896329641342163, "learning_rate": 0.0002, "loss": 2.4374, "step": 30070 }, { "epoch": 2.241430700447094, "grad_norm": 2.6130764484405518, "learning_rate": 0.0002, "loss": 2.4338, "step": 30080 }, { "epoch": 2.2421758569299555, "grad_norm": 2.5632150173187256, "learning_rate": 0.0002, "loss": 2.4224, "step": 30090 }, { "epoch": 2.2429210134128166, "grad_norm": 2.6753768920898438, "learning_rate": 0.0002, "loss": 2.4606, "step": 30100 }, { "epoch": 2.243666169895678, "grad_norm": 2.60931396484375, "learning_rate": 0.0002, "loss": 2.4416, "step": 30110 }, { "epoch": 2.2444113263785397, "grad_norm": 2.428415536880493, "learning_rate": 0.0002, "loss": 2.596, "step": 30120 }, { "epoch": 2.245156482861401, "grad_norm": 2.97552752494812, "learning_rate": 0.0002, "loss": 2.6441, "step": 30130 }, { "epoch": 2.2459016393442623, "grad_norm": 2.513252019882202, "learning_rate": 0.0002, "loss": 2.5045, "step": 30140 }, { "epoch": 2.246646795827124, "grad_norm": 2.529350996017456, "learning_rate": 0.0002, "loss": 2.2703, "step": 30150 }, { "epoch": 2.247391952309985, "grad_norm": 2.729717254638672, "learning_rate": 0.0002, "loss": 2.5772, "step": 30160 }, { "epoch": 2.2481371087928466, "grad_norm": 2.8661959171295166, "learning_rate": 0.0002, "loss": 2.5031, "step": 30170 }, { "epoch": 2.248882265275708, "grad_norm": 2.6999034881591797, "learning_rate": 0.0002, "loss": 2.4389, "step": 30180 }, { "epoch": 2.2496274217585692, "grad_norm": 3.331155300140381, "learning_rate": 0.0002, "loss": 2.5102, "step": 30190 }, { "epoch": 2.2503725782414308, "grad_norm": 2.8043971061706543, "learning_rate": 0.0002, "loss": 2.3276, "step": 30200 }, { "epoch": 2.251117734724292, "grad_norm": 2.57314133644104, "learning_rate": 0.0002, "loss": 2.3967, "step": 30210 }, { "epoch": 2.2518628912071534, "grad_norm": 2.738511085510254, "learning_rate": 0.0002, "loss": 2.5336, "step": 30220 }, { "epoch": 2.252608047690015, "grad_norm": 2.7028000354766846, "learning_rate": 0.0002, "loss": 2.4197, "step": 30230 }, { "epoch": 2.2533532041728765, "grad_norm": 1.9753464460372925, "learning_rate": 0.0002, "loss": 2.2315, "step": 30240 }, { "epoch": 2.2540983606557377, "grad_norm": 2.5343260765075684, "learning_rate": 0.0002, "loss": 2.656, "step": 30250 }, { "epoch": 2.254843517138599, "grad_norm": 3.3517582416534424, "learning_rate": 0.0002, "loss": 2.5387, "step": 30260 }, { "epoch": 2.2555886736214603, "grad_norm": 2.795591115951538, "learning_rate": 0.0002, "loss": 2.428, "step": 30270 }, { "epoch": 2.256333830104322, "grad_norm": 2.458268404006958, "learning_rate": 0.0002, "loss": 2.6109, "step": 30280 }, { "epoch": 2.2570789865871834, "grad_norm": 2.965975761413574, "learning_rate": 0.0002, "loss": 2.265, "step": 30290 }, { "epoch": 2.2578241430700445, "grad_norm": 2.6871607303619385, "learning_rate": 0.0002, "loss": 2.3368, "step": 30300 }, { "epoch": 2.258569299552906, "grad_norm": 2.760650157928467, "learning_rate": 0.0002, "loss": 2.4479, "step": 30310 }, { "epoch": 2.2593144560357676, "grad_norm": 2.5536069869995117, "learning_rate": 0.0002, "loss": 2.3555, "step": 30320 }, { "epoch": 2.2600596125186287, "grad_norm": 2.6912643909454346, "learning_rate": 0.0002, "loss": 2.4761, "step": 30330 }, { "epoch": 2.2608047690014903, "grad_norm": 2.6149418354034424, "learning_rate": 0.0002, "loss": 2.3776, "step": 30340 }, { "epoch": 2.261549925484352, "grad_norm": 2.6882479190826416, "learning_rate": 0.0002, "loss": 2.4329, "step": 30350 }, { "epoch": 2.262295081967213, "grad_norm": 2.7375595569610596, "learning_rate": 0.0002, "loss": 2.4161, "step": 30360 }, { "epoch": 2.2630402384500745, "grad_norm": 2.4539854526519775, "learning_rate": 0.0002, "loss": 2.4432, "step": 30370 }, { "epoch": 2.263785394932936, "grad_norm": 2.7913661003112793, "learning_rate": 0.0002, "loss": 2.6048, "step": 30380 }, { "epoch": 2.264530551415797, "grad_norm": 2.822239875793457, "learning_rate": 0.0002, "loss": 2.6286, "step": 30390 }, { "epoch": 2.2652757078986587, "grad_norm": 2.82560658454895, "learning_rate": 0.0002, "loss": 2.2957, "step": 30400 }, { "epoch": 2.2660208643815203, "grad_norm": 2.30305814743042, "learning_rate": 0.0002, "loss": 2.5297, "step": 30410 }, { "epoch": 2.2667660208643814, "grad_norm": 2.366816520690918, "learning_rate": 0.0002, "loss": 2.3713, "step": 30420 }, { "epoch": 2.267511177347243, "grad_norm": 2.849090814590454, "learning_rate": 0.0002, "loss": 2.2744, "step": 30430 }, { "epoch": 2.2682563338301045, "grad_norm": 2.4802587032318115, "learning_rate": 0.0002, "loss": 2.2494, "step": 30440 }, { "epoch": 2.2690014903129656, "grad_norm": 2.9670379161834717, "learning_rate": 0.0002, "loss": 2.5435, "step": 30450 }, { "epoch": 2.269746646795827, "grad_norm": 2.596202850341797, "learning_rate": 0.0002, "loss": 2.428, "step": 30460 }, { "epoch": 2.2704918032786887, "grad_norm": 2.6651835441589355, "learning_rate": 0.0002, "loss": 2.3748, "step": 30470 }, { "epoch": 2.27123695976155, "grad_norm": 2.2997448444366455, "learning_rate": 0.0002, "loss": 2.5528, "step": 30480 }, { "epoch": 2.2719821162444114, "grad_norm": 2.415158271789551, "learning_rate": 0.0002, "loss": 2.3274, "step": 30490 }, { "epoch": 2.2727272727272725, "grad_norm": 2.361969470977783, "learning_rate": 0.0002, "loss": 2.5814, "step": 30500 }, { "epoch": 2.273472429210134, "grad_norm": 2.340508222579956, "learning_rate": 0.0002, "loss": 2.2694, "step": 30510 }, { "epoch": 2.2742175856929956, "grad_norm": 2.5094423294067383, "learning_rate": 0.0002, "loss": 2.4632, "step": 30520 }, { "epoch": 2.274962742175857, "grad_norm": 2.8565590381622314, "learning_rate": 0.0002, "loss": 2.2868, "step": 30530 }, { "epoch": 2.2757078986587183, "grad_norm": 2.701770067214966, "learning_rate": 0.0002, "loss": 2.4865, "step": 30540 }, { "epoch": 2.27645305514158, "grad_norm": 2.518066644668579, "learning_rate": 0.0002, "loss": 2.5408, "step": 30550 }, { "epoch": 2.277198211624441, "grad_norm": 2.743431806564331, "learning_rate": 0.0002, "loss": 2.4585, "step": 30560 }, { "epoch": 2.2779433681073025, "grad_norm": 2.8521339893341064, "learning_rate": 0.0002, "loss": 2.3772, "step": 30570 }, { "epoch": 2.278688524590164, "grad_norm": 2.967759370803833, "learning_rate": 0.0002, "loss": 2.5984, "step": 30580 }, { "epoch": 2.2794336810730256, "grad_norm": 2.4297335147857666, "learning_rate": 0.0002, "loss": 2.4854, "step": 30590 }, { "epoch": 2.2801788375558867, "grad_norm": 2.547301769256592, "learning_rate": 0.0002, "loss": 2.478, "step": 30600 }, { "epoch": 2.2809239940387482, "grad_norm": 2.791649341583252, "learning_rate": 0.0002, "loss": 2.5139, "step": 30610 }, { "epoch": 2.2816691505216093, "grad_norm": 2.7930760383605957, "learning_rate": 0.0002, "loss": 2.5727, "step": 30620 }, { "epoch": 2.282414307004471, "grad_norm": 2.5463852882385254, "learning_rate": 0.0002, "loss": 2.5221, "step": 30630 }, { "epoch": 2.2831594634873325, "grad_norm": 2.653203248977661, "learning_rate": 0.0002, "loss": 2.3023, "step": 30640 }, { "epoch": 2.2839046199701936, "grad_norm": 3.0527825355529785, "learning_rate": 0.0002, "loss": 2.2436, "step": 30650 }, { "epoch": 2.284649776453055, "grad_norm": 2.5038249492645264, "learning_rate": 0.0002, "loss": 2.5085, "step": 30660 }, { "epoch": 2.2853949329359167, "grad_norm": 2.571519374847412, "learning_rate": 0.0002, "loss": 2.2828, "step": 30670 }, { "epoch": 2.2861400894187778, "grad_norm": 2.742832660675049, "learning_rate": 0.0002, "loss": 2.4795, "step": 30680 }, { "epoch": 2.2868852459016393, "grad_norm": 2.4409167766571045, "learning_rate": 0.0002, "loss": 2.4449, "step": 30690 }, { "epoch": 2.287630402384501, "grad_norm": 2.7374167442321777, "learning_rate": 0.0002, "loss": 2.2682, "step": 30700 }, { "epoch": 2.288375558867362, "grad_norm": 2.6040146350860596, "learning_rate": 0.0002, "loss": 2.4203, "step": 30710 }, { "epoch": 2.2891207153502235, "grad_norm": 2.70377779006958, "learning_rate": 0.0002, "loss": 2.3425, "step": 30720 }, { "epoch": 2.289865871833085, "grad_norm": 2.382824420928955, "learning_rate": 0.0002, "loss": 2.3675, "step": 30730 }, { "epoch": 2.290611028315946, "grad_norm": 2.6663031578063965, "learning_rate": 0.0002, "loss": 2.5198, "step": 30740 }, { "epoch": 2.2913561847988078, "grad_norm": 2.4763283729553223, "learning_rate": 0.0002, "loss": 2.3904, "step": 30750 }, { "epoch": 2.2921013412816693, "grad_norm": 2.601113796234131, "learning_rate": 0.0002, "loss": 2.5542, "step": 30760 }, { "epoch": 2.2928464977645304, "grad_norm": 2.841099739074707, "learning_rate": 0.0002, "loss": 2.5474, "step": 30770 }, { "epoch": 2.293591654247392, "grad_norm": 2.850001573562622, "learning_rate": 0.0002, "loss": 2.4976, "step": 30780 }, { "epoch": 2.2943368107302535, "grad_norm": 2.646151304244995, "learning_rate": 0.0002, "loss": 2.4373, "step": 30790 }, { "epoch": 2.2950819672131146, "grad_norm": 2.71795916557312, "learning_rate": 0.0002, "loss": 2.3082, "step": 30800 }, { "epoch": 2.295827123695976, "grad_norm": 2.5677895545959473, "learning_rate": 0.0002, "loss": 2.3645, "step": 30810 }, { "epoch": 2.2965722801788377, "grad_norm": 2.6931703090667725, "learning_rate": 0.0002, "loss": 2.5397, "step": 30820 }, { "epoch": 2.297317436661699, "grad_norm": 2.990161418914795, "learning_rate": 0.0002, "loss": 2.5421, "step": 30830 }, { "epoch": 2.2980625931445604, "grad_norm": 2.717191696166992, "learning_rate": 0.0002, "loss": 2.4656, "step": 30840 }, { "epoch": 2.2988077496274215, "grad_norm": 3.0563786029815674, "learning_rate": 0.0002, "loss": 2.5854, "step": 30850 }, { "epoch": 2.299552906110283, "grad_norm": 2.8930904865264893, "learning_rate": 0.0002, "loss": 2.3787, "step": 30860 }, { "epoch": 2.3002980625931446, "grad_norm": 2.5913820266723633, "learning_rate": 0.0002, "loss": 2.3729, "step": 30870 }, { "epoch": 2.301043219076006, "grad_norm": 2.668519973754883, "learning_rate": 0.0002, "loss": 2.2982, "step": 30880 }, { "epoch": 2.3017883755588673, "grad_norm": 2.6482491493225098, "learning_rate": 0.0002, "loss": 2.4505, "step": 30890 }, { "epoch": 2.302533532041729, "grad_norm": 2.7581119537353516, "learning_rate": 0.0002, "loss": 2.5304, "step": 30900 }, { "epoch": 2.30327868852459, "grad_norm": 2.807406425476074, "learning_rate": 0.0002, "loss": 2.6199, "step": 30910 }, { "epoch": 2.3040238450074515, "grad_norm": 2.669703245162964, "learning_rate": 0.0002, "loss": 2.3566, "step": 30920 }, { "epoch": 2.304769001490313, "grad_norm": 2.4919867515563965, "learning_rate": 0.0002, "loss": 2.4593, "step": 30930 }, { "epoch": 2.3055141579731746, "grad_norm": 2.781989574432373, "learning_rate": 0.0002, "loss": 2.3244, "step": 30940 }, { "epoch": 2.3062593144560357, "grad_norm": 2.7712695598602295, "learning_rate": 0.0002, "loss": 2.3854, "step": 30950 }, { "epoch": 2.3070044709388973, "grad_norm": 2.8249754905700684, "learning_rate": 0.0002, "loss": 2.6018, "step": 30960 }, { "epoch": 2.3077496274217584, "grad_norm": 2.443105459213257, "learning_rate": 0.0002, "loss": 2.2767, "step": 30970 }, { "epoch": 2.30849478390462, "grad_norm": 2.589513063430786, "learning_rate": 0.0002, "loss": 2.5455, "step": 30980 }, { "epoch": 2.3092399403874815, "grad_norm": 2.7437689304351807, "learning_rate": 0.0002, "loss": 2.7474, "step": 30990 }, { "epoch": 2.3099850968703426, "grad_norm": 2.4255599975585938, "learning_rate": 0.0002, "loss": 2.3217, "step": 31000 }, { "epoch": 2.310730253353204, "grad_norm": 2.56787371635437, "learning_rate": 0.0002, "loss": 2.3108, "step": 31010 }, { "epoch": 2.3114754098360657, "grad_norm": 2.7535786628723145, "learning_rate": 0.0002, "loss": 2.4737, "step": 31020 }, { "epoch": 2.312220566318927, "grad_norm": 2.5729820728302, "learning_rate": 0.0002, "loss": 2.3027, "step": 31030 }, { "epoch": 2.3129657228017884, "grad_norm": 2.6444244384765625, "learning_rate": 0.0002, "loss": 2.4639, "step": 31040 }, { "epoch": 2.31371087928465, "grad_norm": 2.6988863945007324, "learning_rate": 0.0002, "loss": 2.4188, "step": 31050 }, { "epoch": 2.314456035767511, "grad_norm": 3.3972041606903076, "learning_rate": 0.0002, "loss": 2.639, "step": 31060 }, { "epoch": 2.3152011922503726, "grad_norm": 2.741131544113159, "learning_rate": 0.0002, "loss": 2.484, "step": 31070 }, { "epoch": 2.315946348733234, "grad_norm": 2.761235475540161, "learning_rate": 0.0002, "loss": 2.4808, "step": 31080 }, { "epoch": 2.3166915052160952, "grad_norm": 2.5519707202911377, "learning_rate": 0.0002, "loss": 2.5373, "step": 31090 }, { "epoch": 2.317436661698957, "grad_norm": 2.4959230422973633, "learning_rate": 0.0002, "loss": 2.5488, "step": 31100 }, { "epoch": 2.3181818181818183, "grad_norm": 2.7000091075897217, "learning_rate": 0.0002, "loss": 2.2065, "step": 31110 }, { "epoch": 2.3189269746646795, "grad_norm": 2.97749662399292, "learning_rate": 0.0002, "loss": 2.3805, "step": 31120 }, { "epoch": 2.319672131147541, "grad_norm": 2.6675100326538086, "learning_rate": 0.0002, "loss": 2.5932, "step": 31130 }, { "epoch": 2.3204172876304026, "grad_norm": 2.4122798442840576, "learning_rate": 0.0002, "loss": 2.47, "step": 31140 }, { "epoch": 2.3211624441132637, "grad_norm": 2.4250221252441406, "learning_rate": 0.0002, "loss": 2.3444, "step": 31150 }, { "epoch": 2.321907600596125, "grad_norm": 2.625974178314209, "learning_rate": 0.0002, "loss": 2.6722, "step": 31160 }, { "epoch": 2.3226527570789868, "grad_norm": 2.5637755393981934, "learning_rate": 0.0002, "loss": 2.4901, "step": 31170 }, { "epoch": 2.323397913561848, "grad_norm": 2.491208076477051, "learning_rate": 0.0002, "loss": 2.3771, "step": 31180 }, { "epoch": 2.3241430700447094, "grad_norm": 2.751556634902954, "learning_rate": 0.0002, "loss": 2.3967, "step": 31190 }, { "epoch": 2.3248882265275705, "grad_norm": 2.622948169708252, "learning_rate": 0.0002, "loss": 2.4714, "step": 31200 }, { "epoch": 2.325633383010432, "grad_norm": 2.500760078430176, "learning_rate": 0.0002, "loss": 2.4578, "step": 31210 }, { "epoch": 2.3263785394932937, "grad_norm": 2.7233376502990723, "learning_rate": 0.0002, "loss": 2.5258, "step": 31220 }, { "epoch": 2.327123695976155, "grad_norm": 2.32476806640625, "learning_rate": 0.0002, "loss": 2.431, "step": 31230 }, { "epoch": 2.3278688524590163, "grad_norm": 2.283064603805542, "learning_rate": 0.0002, "loss": 2.2992, "step": 31240 }, { "epoch": 2.328614008941878, "grad_norm": 2.767536163330078, "learning_rate": 0.0002, "loss": 2.4628, "step": 31250 }, { "epoch": 2.329359165424739, "grad_norm": 3.01369047164917, "learning_rate": 0.0002, "loss": 2.3962, "step": 31260 }, { "epoch": 2.3301043219076005, "grad_norm": 2.545297622680664, "learning_rate": 0.0002, "loss": 2.5169, "step": 31270 }, { "epoch": 2.330849478390462, "grad_norm": 2.610537528991699, "learning_rate": 0.0002, "loss": 2.5468, "step": 31280 }, { "epoch": 2.3315946348733236, "grad_norm": 2.7675936222076416, "learning_rate": 0.0002, "loss": 2.2562, "step": 31290 }, { "epoch": 2.3323397913561847, "grad_norm": 2.5579588413238525, "learning_rate": 0.0002, "loss": 2.5384, "step": 31300 }, { "epoch": 2.3330849478390463, "grad_norm": 2.905510902404785, "learning_rate": 0.0002, "loss": 2.3248, "step": 31310 }, { "epoch": 2.3338301043219074, "grad_norm": 2.6101415157318115, "learning_rate": 0.0002, "loss": 2.5987, "step": 31320 }, { "epoch": 2.334575260804769, "grad_norm": 2.5008299350738525, "learning_rate": 0.0002, "loss": 2.2796, "step": 31330 }, { "epoch": 2.3353204172876305, "grad_norm": 2.7054800987243652, "learning_rate": 0.0002, "loss": 2.2972, "step": 31340 }, { "epoch": 2.3360655737704916, "grad_norm": 2.9480528831481934, "learning_rate": 0.0002, "loss": 2.624, "step": 31350 }, { "epoch": 2.336810730253353, "grad_norm": 2.9812073707580566, "learning_rate": 0.0002, "loss": 2.5909, "step": 31360 }, { "epoch": 2.3375558867362147, "grad_norm": 2.557450294494629, "learning_rate": 0.0002, "loss": 2.3268, "step": 31370 }, { "epoch": 2.338301043219076, "grad_norm": 2.770230770111084, "learning_rate": 0.0002, "loss": 2.4681, "step": 31380 }, { "epoch": 2.3390461997019374, "grad_norm": 2.71527099609375, "learning_rate": 0.0002, "loss": 2.4039, "step": 31390 }, { "epoch": 2.339791356184799, "grad_norm": 2.635856866836548, "learning_rate": 0.0002, "loss": 2.4766, "step": 31400 }, { "epoch": 2.34053651266766, "grad_norm": 2.7687203884124756, "learning_rate": 0.0002, "loss": 2.4795, "step": 31410 }, { "epoch": 2.3412816691505216, "grad_norm": 2.6635115146636963, "learning_rate": 0.0002, "loss": 2.3895, "step": 31420 }, { "epoch": 2.342026825633383, "grad_norm": 2.6172454357147217, "learning_rate": 0.0002, "loss": 2.7309, "step": 31430 }, { "epoch": 2.3427719821162443, "grad_norm": 3.0446231365203857, "learning_rate": 0.0002, "loss": 2.3944, "step": 31440 }, { "epoch": 2.343517138599106, "grad_norm": 2.6833817958831787, "learning_rate": 0.0002, "loss": 2.412, "step": 31450 }, { "epoch": 2.3442622950819674, "grad_norm": 2.333240509033203, "learning_rate": 0.0002, "loss": 2.5243, "step": 31460 }, { "epoch": 2.3450074515648285, "grad_norm": 2.985318422317505, "learning_rate": 0.0002, "loss": 2.4594, "step": 31470 }, { "epoch": 2.34575260804769, "grad_norm": 2.4295361042022705, "learning_rate": 0.0002, "loss": 2.3086, "step": 31480 }, { "epoch": 2.3464977645305516, "grad_norm": 2.663954734802246, "learning_rate": 0.0002, "loss": 2.5138, "step": 31490 }, { "epoch": 2.3472429210134127, "grad_norm": 2.842383861541748, "learning_rate": 0.0002, "loss": 2.649, "step": 31500 }, { "epoch": 2.3479880774962743, "grad_norm": 2.4584267139434814, "learning_rate": 0.0002, "loss": 2.2672, "step": 31510 }, { "epoch": 2.348733233979136, "grad_norm": 2.6671886444091797, "learning_rate": 0.0002, "loss": 2.3535, "step": 31520 }, { "epoch": 2.349478390461997, "grad_norm": 2.6735708713531494, "learning_rate": 0.0002, "loss": 2.4926, "step": 31530 }, { "epoch": 2.3502235469448585, "grad_norm": 2.579631805419922, "learning_rate": 0.0002, "loss": 2.1678, "step": 31540 }, { "epoch": 2.3509687034277196, "grad_norm": 2.5396575927734375, "learning_rate": 0.0002, "loss": 2.4117, "step": 31550 }, { "epoch": 2.351713859910581, "grad_norm": 2.7359862327575684, "learning_rate": 0.0002, "loss": 2.459, "step": 31560 }, { "epoch": 2.3524590163934427, "grad_norm": 2.8085885047912598, "learning_rate": 0.0002, "loss": 2.3537, "step": 31570 }, { "epoch": 2.3532041728763042, "grad_norm": 3.020338773727417, "learning_rate": 0.0002, "loss": 2.5794, "step": 31580 }, { "epoch": 2.3539493293591653, "grad_norm": 2.755391836166382, "learning_rate": 0.0002, "loss": 2.3765, "step": 31590 }, { "epoch": 2.354694485842027, "grad_norm": 2.8029239177703857, "learning_rate": 0.0002, "loss": 2.3851, "step": 31600 }, { "epoch": 2.355439642324888, "grad_norm": 2.442831516265869, "learning_rate": 0.0002, "loss": 2.426, "step": 31610 }, { "epoch": 2.3561847988077496, "grad_norm": 2.6783061027526855, "learning_rate": 0.0002, "loss": 2.5379, "step": 31620 }, { "epoch": 2.356929955290611, "grad_norm": 2.8545401096343994, "learning_rate": 0.0002, "loss": 2.3918, "step": 31630 }, { "epoch": 2.3576751117734727, "grad_norm": 2.6297447681427, "learning_rate": 0.0002, "loss": 2.5717, "step": 31640 }, { "epoch": 2.3584202682563338, "grad_norm": 2.3793184757232666, "learning_rate": 0.0002, "loss": 2.4338, "step": 31650 }, { "epoch": 2.3591654247391953, "grad_norm": 2.4211175441741943, "learning_rate": 0.0002, "loss": 2.5335, "step": 31660 }, { "epoch": 2.3599105812220564, "grad_norm": 2.7338030338287354, "learning_rate": 0.0002, "loss": 2.4089, "step": 31670 }, { "epoch": 2.360655737704918, "grad_norm": 2.5373005867004395, "learning_rate": 0.0002, "loss": 2.5354, "step": 31680 }, { "epoch": 2.3614008941877795, "grad_norm": 2.932400703430176, "learning_rate": 0.0002, "loss": 2.4069, "step": 31690 }, { "epoch": 2.3621460506706407, "grad_norm": 2.6089446544647217, "learning_rate": 0.0002, "loss": 2.4368, "step": 31700 }, { "epoch": 2.362891207153502, "grad_norm": 2.8511080741882324, "learning_rate": 0.0002, "loss": 2.3916, "step": 31710 }, { "epoch": 2.3636363636363638, "grad_norm": 3.1267642974853516, "learning_rate": 0.0002, "loss": 2.5248, "step": 31720 }, { "epoch": 2.364381520119225, "grad_norm": 2.741081953048706, "learning_rate": 0.0002, "loss": 2.3826, "step": 31730 }, { "epoch": 2.3651266766020864, "grad_norm": 2.7541043758392334, "learning_rate": 0.0002, "loss": 2.4853, "step": 31740 }, { "epoch": 2.365871833084948, "grad_norm": 2.6684322357177734, "learning_rate": 0.0002, "loss": 2.2149, "step": 31750 }, { "epoch": 2.366616989567809, "grad_norm": 2.5884592533111572, "learning_rate": 0.0002, "loss": 2.5272, "step": 31760 }, { "epoch": 2.3673621460506706, "grad_norm": 2.832197666168213, "learning_rate": 0.0002, "loss": 2.548, "step": 31770 }, { "epoch": 2.368107302533532, "grad_norm": 2.574288845062256, "learning_rate": 0.0002, "loss": 2.333, "step": 31780 }, { "epoch": 2.3688524590163933, "grad_norm": 2.654503107070923, "learning_rate": 0.0002, "loss": 2.4165, "step": 31790 }, { "epoch": 2.369597615499255, "grad_norm": 2.2965807914733887, "learning_rate": 0.0002, "loss": 2.5123, "step": 31800 }, { "epoch": 2.3703427719821164, "grad_norm": 2.757343292236328, "learning_rate": 0.0002, "loss": 2.5358, "step": 31810 }, { "epoch": 2.3710879284649775, "grad_norm": 2.404266119003296, "learning_rate": 0.0002, "loss": 2.4523, "step": 31820 }, { "epoch": 2.371833084947839, "grad_norm": 2.5798537731170654, "learning_rate": 0.0002, "loss": 2.5129, "step": 31830 }, { "epoch": 2.3725782414307006, "grad_norm": 2.3345487117767334, "learning_rate": 0.0002, "loss": 2.4274, "step": 31840 }, { "epoch": 2.3733233979135617, "grad_norm": 2.4548275470733643, "learning_rate": 0.0002, "loss": 2.6288, "step": 31850 }, { "epoch": 2.3740685543964233, "grad_norm": 2.830592632293701, "learning_rate": 0.0002, "loss": 2.4632, "step": 31860 }, { "epoch": 2.374813710879285, "grad_norm": 2.5509464740753174, "learning_rate": 0.0002, "loss": 2.4051, "step": 31870 }, { "epoch": 2.375558867362146, "grad_norm": 2.498534679412842, "learning_rate": 0.0002, "loss": 2.4469, "step": 31880 }, { "epoch": 2.3763040238450075, "grad_norm": 2.4056124687194824, "learning_rate": 0.0002, "loss": 2.4079, "step": 31890 }, { "epoch": 2.3770491803278686, "grad_norm": 2.7303104400634766, "learning_rate": 0.0002, "loss": 2.3214, "step": 31900 }, { "epoch": 2.37779433681073, "grad_norm": 2.8305602073669434, "learning_rate": 0.0002, "loss": 2.4894, "step": 31910 }, { "epoch": 2.3785394932935917, "grad_norm": 3.2949180603027344, "learning_rate": 0.0002, "loss": 2.4071, "step": 31920 }, { "epoch": 2.3792846497764533, "grad_norm": 2.639760971069336, "learning_rate": 0.0002, "loss": 2.5399, "step": 31930 }, { "epoch": 2.3800298062593144, "grad_norm": 2.7378196716308594, "learning_rate": 0.0002, "loss": 2.4692, "step": 31940 }, { "epoch": 2.380774962742176, "grad_norm": 2.9000864028930664, "learning_rate": 0.0002, "loss": 2.6328, "step": 31950 }, { "epoch": 2.381520119225037, "grad_norm": 2.8372485637664795, "learning_rate": 0.0002, "loss": 2.5593, "step": 31960 }, { "epoch": 2.3822652757078986, "grad_norm": 2.391453504562378, "learning_rate": 0.0002, "loss": 2.4111, "step": 31970 }, { "epoch": 2.38301043219076, "grad_norm": 2.5468664169311523, "learning_rate": 0.0002, "loss": 2.5173, "step": 31980 }, { "epoch": 2.3837555886736217, "grad_norm": 2.59735107421875, "learning_rate": 0.0002, "loss": 2.5609, "step": 31990 }, { "epoch": 2.384500745156483, "grad_norm": 2.841322660446167, "learning_rate": 0.0002, "loss": 2.4528, "step": 32000 }, { "epoch": 2.3852459016393444, "grad_norm": 2.7470717430114746, "learning_rate": 0.0002, "loss": 2.6136, "step": 32010 }, { "epoch": 2.3859910581222055, "grad_norm": 3.1498608589172363, "learning_rate": 0.0002, "loss": 2.4515, "step": 32020 }, { "epoch": 2.386736214605067, "grad_norm": 2.6419332027435303, "learning_rate": 0.0002, "loss": 2.6439, "step": 32030 }, { "epoch": 2.3874813710879286, "grad_norm": 2.7486846446990967, "learning_rate": 0.0002, "loss": 2.3877, "step": 32040 }, { "epoch": 2.3882265275707897, "grad_norm": 2.418884515762329, "learning_rate": 0.0002, "loss": 2.3455, "step": 32050 }, { "epoch": 2.3889716840536512, "grad_norm": 2.8571934700012207, "learning_rate": 0.0002, "loss": 2.5127, "step": 32060 }, { "epoch": 2.389716840536513, "grad_norm": 2.683866262435913, "learning_rate": 0.0002, "loss": 2.604, "step": 32070 }, { "epoch": 2.390461997019374, "grad_norm": 2.7434074878692627, "learning_rate": 0.0002, "loss": 2.4338, "step": 32080 }, { "epoch": 2.3912071535022354, "grad_norm": 2.5270578861236572, "learning_rate": 0.0002, "loss": 2.555, "step": 32090 }, { "epoch": 2.391952309985097, "grad_norm": 2.526337146759033, "learning_rate": 0.0002, "loss": 2.5349, "step": 32100 }, { "epoch": 2.392697466467958, "grad_norm": 2.61208176612854, "learning_rate": 0.0002, "loss": 2.634, "step": 32110 }, { "epoch": 2.3934426229508197, "grad_norm": 2.6599786281585693, "learning_rate": 0.0002, "loss": 2.2547, "step": 32120 }, { "epoch": 2.394187779433681, "grad_norm": 2.4096555709838867, "learning_rate": 0.0002, "loss": 2.4555, "step": 32130 }, { "epoch": 2.3949329359165423, "grad_norm": 2.4262518882751465, "learning_rate": 0.0002, "loss": 2.5405, "step": 32140 }, { "epoch": 2.395678092399404, "grad_norm": 2.738983392715454, "learning_rate": 0.0002, "loss": 2.5729, "step": 32150 }, { "epoch": 2.3964232488822654, "grad_norm": 2.9784836769104004, "learning_rate": 0.0002, "loss": 2.5058, "step": 32160 }, { "epoch": 2.3971684053651265, "grad_norm": 2.204904556274414, "learning_rate": 0.0002, "loss": 2.4696, "step": 32170 }, { "epoch": 2.397913561847988, "grad_norm": 2.905687093734741, "learning_rate": 0.0002, "loss": 2.5, "step": 32180 }, { "epoch": 2.3986587183308496, "grad_norm": 2.540802240371704, "learning_rate": 0.0002, "loss": 2.7352, "step": 32190 }, { "epoch": 2.3994038748137108, "grad_norm": 2.4641029834747314, "learning_rate": 0.0002, "loss": 2.2883, "step": 32200 }, { "epoch": 2.4001490312965723, "grad_norm": 2.787086248397827, "learning_rate": 0.0002, "loss": 2.3687, "step": 32210 }, { "epoch": 2.400894187779434, "grad_norm": 2.6492245197296143, "learning_rate": 0.0002, "loss": 2.5022, "step": 32220 }, { "epoch": 2.401639344262295, "grad_norm": 2.8292956352233887, "learning_rate": 0.0002, "loss": 2.5244, "step": 32230 }, { "epoch": 2.4023845007451565, "grad_norm": 2.6720879077911377, "learning_rate": 0.0002, "loss": 2.3531, "step": 32240 }, { "epoch": 2.4031296572280176, "grad_norm": 2.6397554874420166, "learning_rate": 0.0002, "loss": 2.6169, "step": 32250 }, { "epoch": 2.403874813710879, "grad_norm": 2.494509696960449, "learning_rate": 0.0002, "loss": 2.6833, "step": 32260 }, { "epoch": 2.4046199701937407, "grad_norm": 2.6733009815216064, "learning_rate": 0.0002, "loss": 2.5063, "step": 32270 }, { "epoch": 2.4053651266766023, "grad_norm": 2.522325277328491, "learning_rate": 0.0002, "loss": 2.4202, "step": 32280 }, { "epoch": 2.4061102831594634, "grad_norm": 2.934260845184326, "learning_rate": 0.0002, "loss": 2.6828, "step": 32290 }, { "epoch": 2.406855439642325, "grad_norm": 2.7040140628814697, "learning_rate": 0.0002, "loss": 2.4384, "step": 32300 }, { "epoch": 2.407600596125186, "grad_norm": 2.9310309886932373, "learning_rate": 0.0002, "loss": 2.5099, "step": 32310 }, { "epoch": 2.4083457526080476, "grad_norm": 2.7560479640960693, "learning_rate": 0.0002, "loss": 2.4807, "step": 32320 }, { "epoch": 2.409090909090909, "grad_norm": 2.6352450847625732, "learning_rate": 0.0002, "loss": 2.5188, "step": 32330 }, { "epoch": 2.4098360655737707, "grad_norm": 2.613154888153076, "learning_rate": 0.0002, "loss": 2.6614, "step": 32340 }, { "epoch": 2.410581222056632, "grad_norm": 2.5213232040405273, "learning_rate": 0.0002, "loss": 2.6163, "step": 32350 }, { "epoch": 2.4113263785394934, "grad_norm": 2.757983684539795, "learning_rate": 0.0002, "loss": 2.4484, "step": 32360 }, { "epoch": 2.4120715350223545, "grad_norm": 2.594177007675171, "learning_rate": 0.0002, "loss": 2.4938, "step": 32370 }, { "epoch": 2.412816691505216, "grad_norm": 2.2750751972198486, "learning_rate": 0.0002, "loss": 2.3242, "step": 32380 }, { "epoch": 2.4135618479880776, "grad_norm": 3.5427920818328857, "learning_rate": 0.0002, "loss": 2.6142, "step": 32390 }, { "epoch": 2.4143070044709387, "grad_norm": 2.4356930255889893, "learning_rate": 0.0002, "loss": 2.2886, "step": 32400 }, { "epoch": 2.4150521609538003, "grad_norm": 3.0683324337005615, "learning_rate": 0.0002, "loss": 2.3534, "step": 32410 }, { "epoch": 2.415797317436662, "grad_norm": 2.464430809020996, "learning_rate": 0.0002, "loss": 2.3722, "step": 32420 }, { "epoch": 2.416542473919523, "grad_norm": 2.803317070007324, "learning_rate": 0.0002, "loss": 2.4745, "step": 32430 }, { "epoch": 2.4172876304023845, "grad_norm": 2.4788084030151367, "learning_rate": 0.0002, "loss": 2.4501, "step": 32440 }, { "epoch": 2.418032786885246, "grad_norm": 2.686638832092285, "learning_rate": 0.0002, "loss": 2.5698, "step": 32450 }, { "epoch": 2.418777943368107, "grad_norm": 3.0381052494049072, "learning_rate": 0.0002, "loss": 2.5183, "step": 32460 }, { "epoch": 2.4195230998509687, "grad_norm": 2.5471017360687256, "learning_rate": 0.0002, "loss": 2.5867, "step": 32470 }, { "epoch": 2.4202682563338302, "grad_norm": 2.2520577907562256, "learning_rate": 0.0002, "loss": 2.3733, "step": 32480 }, { "epoch": 2.4210134128166914, "grad_norm": 2.5006518363952637, "learning_rate": 0.0002, "loss": 2.5879, "step": 32490 }, { "epoch": 2.421758569299553, "grad_norm": 2.8727335929870605, "learning_rate": 0.0002, "loss": 2.489, "step": 32500 }, { "epoch": 2.4225037257824145, "grad_norm": 2.567643165588379, "learning_rate": 0.0002, "loss": 2.4855, "step": 32510 }, { "epoch": 2.4232488822652756, "grad_norm": 2.561478614807129, "learning_rate": 0.0002, "loss": 2.4656, "step": 32520 }, { "epoch": 2.423994038748137, "grad_norm": 2.8764541149139404, "learning_rate": 0.0002, "loss": 2.2183, "step": 32530 }, { "epoch": 2.4247391952309987, "grad_norm": 2.419424057006836, "learning_rate": 0.0002, "loss": 2.3354, "step": 32540 }, { "epoch": 2.42548435171386, "grad_norm": 2.3081166744232178, "learning_rate": 0.0002, "loss": 2.3535, "step": 32550 }, { "epoch": 2.4262295081967213, "grad_norm": 2.464271068572998, "learning_rate": 0.0002, "loss": 2.437, "step": 32560 }, { "epoch": 2.426974664679583, "grad_norm": 2.919722080230713, "learning_rate": 0.0002, "loss": 2.6343, "step": 32570 }, { "epoch": 2.427719821162444, "grad_norm": 2.682070016860962, "learning_rate": 0.0002, "loss": 2.453, "step": 32580 }, { "epoch": 2.4284649776453056, "grad_norm": 2.3702690601348877, "learning_rate": 0.0002, "loss": 2.3294, "step": 32590 }, { "epoch": 2.429210134128167, "grad_norm": 2.7352712154388428, "learning_rate": 0.0002, "loss": 2.6568, "step": 32600 }, { "epoch": 2.429955290611028, "grad_norm": 2.396092653274536, "learning_rate": 0.0002, "loss": 2.5299, "step": 32610 }, { "epoch": 2.4307004470938898, "grad_norm": 2.298727512359619, "learning_rate": 0.0002, "loss": 2.4894, "step": 32620 }, { "epoch": 2.4314456035767513, "grad_norm": 2.5034990310668945, "learning_rate": 0.0002, "loss": 2.5429, "step": 32630 }, { "epoch": 2.4321907600596124, "grad_norm": 2.662663221359253, "learning_rate": 0.0002, "loss": 2.5597, "step": 32640 }, { "epoch": 2.432935916542474, "grad_norm": 2.516818046569824, "learning_rate": 0.0002, "loss": 2.4248, "step": 32650 }, { "epoch": 2.433681073025335, "grad_norm": 2.416969060897827, "learning_rate": 0.0002, "loss": 2.5677, "step": 32660 }, { "epoch": 2.4344262295081966, "grad_norm": 2.575011968612671, "learning_rate": 0.0002, "loss": 2.5937, "step": 32670 }, { "epoch": 2.435171385991058, "grad_norm": 2.692478895187378, "learning_rate": 0.0002, "loss": 2.5584, "step": 32680 }, { "epoch": 2.4359165424739198, "grad_norm": 3.400285243988037, "learning_rate": 0.0002, "loss": 2.415, "step": 32690 }, { "epoch": 2.436661698956781, "grad_norm": 3.0482239723205566, "learning_rate": 0.0002, "loss": 2.6148, "step": 32700 }, { "epoch": 2.4374068554396424, "grad_norm": 2.66845965385437, "learning_rate": 0.0002, "loss": 2.4034, "step": 32710 }, { "epoch": 2.4381520119225035, "grad_norm": 2.6935739517211914, "learning_rate": 0.0002, "loss": 2.1729, "step": 32720 }, { "epoch": 2.438897168405365, "grad_norm": 2.8061110973358154, "learning_rate": 0.0002, "loss": 2.3881, "step": 32730 }, { "epoch": 2.4396423248882266, "grad_norm": 2.6738741397857666, "learning_rate": 0.0002, "loss": 2.4427, "step": 32740 }, { "epoch": 2.4403874813710877, "grad_norm": 2.7789554595947266, "learning_rate": 0.0002, "loss": 2.3962, "step": 32750 }, { "epoch": 2.4411326378539493, "grad_norm": 2.6854817867279053, "learning_rate": 0.0002, "loss": 2.4369, "step": 32760 }, { "epoch": 2.441877794336811, "grad_norm": 2.570359706878662, "learning_rate": 0.0002, "loss": 2.4015, "step": 32770 }, { "epoch": 2.442622950819672, "grad_norm": 2.7053561210632324, "learning_rate": 0.0002, "loss": 2.4896, "step": 32780 }, { "epoch": 2.4433681073025335, "grad_norm": 2.5930299758911133, "learning_rate": 0.0002, "loss": 2.4455, "step": 32790 }, { "epoch": 2.444113263785395, "grad_norm": 2.827674627304077, "learning_rate": 0.0002, "loss": 2.4475, "step": 32800 }, { "epoch": 2.444858420268256, "grad_norm": 2.686424732208252, "learning_rate": 0.0002, "loss": 2.4927, "step": 32810 }, { "epoch": 2.4456035767511177, "grad_norm": 2.965540647506714, "learning_rate": 0.0002, "loss": 2.673, "step": 32820 }, { "epoch": 2.4463487332339793, "grad_norm": 2.8941571712493896, "learning_rate": 0.0002, "loss": 2.5195, "step": 32830 }, { "epoch": 2.4470938897168404, "grad_norm": 3.103050947189331, "learning_rate": 0.0002, "loss": 2.5864, "step": 32840 }, { "epoch": 2.447839046199702, "grad_norm": 2.7334511280059814, "learning_rate": 0.0002, "loss": 2.4724, "step": 32850 }, { "epoch": 2.4485842026825635, "grad_norm": 2.449275493621826, "learning_rate": 0.0002, "loss": 2.5776, "step": 32860 }, { "epoch": 2.4493293591654246, "grad_norm": 2.5807056427001953, "learning_rate": 0.0002, "loss": 2.536, "step": 32870 }, { "epoch": 2.450074515648286, "grad_norm": 2.7957377433776855, "learning_rate": 0.0002, "loss": 2.6551, "step": 32880 }, { "epoch": 2.4508196721311477, "grad_norm": 2.858997106552124, "learning_rate": 0.0002, "loss": 2.3838, "step": 32890 }, { "epoch": 2.451564828614009, "grad_norm": 2.8242721557617188, "learning_rate": 0.0002, "loss": 2.3843, "step": 32900 }, { "epoch": 2.4523099850968704, "grad_norm": 2.914250612258911, "learning_rate": 0.0002, "loss": 2.3497, "step": 32910 }, { "epoch": 2.453055141579732, "grad_norm": 2.325657606124878, "learning_rate": 0.0002, "loss": 2.5284, "step": 32920 }, { "epoch": 2.453800298062593, "grad_norm": 2.4877982139587402, "learning_rate": 0.0002, "loss": 2.5986, "step": 32930 }, { "epoch": 2.4545454545454546, "grad_norm": 2.6177945137023926, "learning_rate": 0.0002, "loss": 2.5368, "step": 32940 }, { "epoch": 2.455290611028316, "grad_norm": 2.1501779556274414, "learning_rate": 0.0002, "loss": 2.3093, "step": 32950 }, { "epoch": 2.4560357675111772, "grad_norm": 2.546349048614502, "learning_rate": 0.0002, "loss": 2.3231, "step": 32960 }, { "epoch": 2.456780923994039, "grad_norm": 2.3199493885040283, "learning_rate": 0.0002, "loss": 2.5672, "step": 32970 }, { "epoch": 2.4575260804769004, "grad_norm": 2.532883405685425, "learning_rate": 0.0002, "loss": 2.2814, "step": 32980 }, { "epoch": 2.4582712369597615, "grad_norm": 2.9254093170166016, "learning_rate": 0.0002, "loss": 2.5414, "step": 32990 }, { "epoch": 2.459016393442623, "grad_norm": 2.5555853843688965, "learning_rate": 0.0002, "loss": 2.5877, "step": 33000 }, { "epoch": 2.459761549925484, "grad_norm": 2.520555019378662, "learning_rate": 0.0002, "loss": 2.4599, "step": 33010 }, { "epoch": 2.4605067064083457, "grad_norm": 2.652587413787842, "learning_rate": 0.0002, "loss": 2.5855, "step": 33020 }, { "epoch": 2.4612518628912072, "grad_norm": 2.7828288078308105, "learning_rate": 0.0002, "loss": 2.5077, "step": 33030 }, { "epoch": 2.461997019374069, "grad_norm": 2.4250879287719727, "learning_rate": 0.0002, "loss": 2.6537, "step": 33040 }, { "epoch": 2.46274217585693, "grad_norm": 2.376924753189087, "learning_rate": 0.0002, "loss": 2.5013, "step": 33050 }, { "epoch": 2.4634873323397914, "grad_norm": 2.6665396690368652, "learning_rate": 0.0002, "loss": 2.452, "step": 33060 }, { "epoch": 2.4642324888226526, "grad_norm": 2.920982599258423, "learning_rate": 0.0002, "loss": 2.4782, "step": 33070 }, { "epoch": 2.464977645305514, "grad_norm": 1.9443694353103638, "learning_rate": 0.0002, "loss": 2.2695, "step": 33080 }, { "epoch": 2.4657228017883757, "grad_norm": 2.8950319290161133, "learning_rate": 0.0002, "loss": 2.6325, "step": 33090 }, { "epoch": 2.4664679582712368, "grad_norm": 3.0620553493499756, "learning_rate": 0.0002, "loss": 2.3013, "step": 33100 }, { "epoch": 2.4672131147540983, "grad_norm": 2.653696060180664, "learning_rate": 0.0002, "loss": 2.5691, "step": 33110 }, { "epoch": 2.46795827123696, "grad_norm": 2.347299337387085, "learning_rate": 0.0002, "loss": 2.2995, "step": 33120 }, { "epoch": 2.468703427719821, "grad_norm": 2.8021976947784424, "learning_rate": 0.0002, "loss": 2.6018, "step": 33130 }, { "epoch": 2.4694485842026825, "grad_norm": 1.9987612962722778, "learning_rate": 0.0002, "loss": 2.4832, "step": 33140 }, { "epoch": 2.470193740685544, "grad_norm": 2.7334144115448, "learning_rate": 0.0002, "loss": 2.4264, "step": 33150 }, { "epoch": 2.470938897168405, "grad_norm": 2.6771295070648193, "learning_rate": 0.0002, "loss": 2.5609, "step": 33160 }, { "epoch": 2.4716840536512668, "grad_norm": 2.628952741622925, "learning_rate": 0.0002, "loss": 2.5076, "step": 33170 }, { "epoch": 2.4724292101341283, "grad_norm": 2.5400376319885254, "learning_rate": 0.0002, "loss": 2.6434, "step": 33180 }, { "epoch": 2.4731743666169894, "grad_norm": 2.332078695297241, "learning_rate": 0.0002, "loss": 2.4067, "step": 33190 }, { "epoch": 2.473919523099851, "grad_norm": 2.7730212211608887, "learning_rate": 0.0002, "loss": 2.4936, "step": 33200 }, { "epoch": 2.4746646795827125, "grad_norm": 2.5415468215942383, "learning_rate": 0.0002, "loss": 2.6113, "step": 33210 }, { "epoch": 2.4754098360655736, "grad_norm": 2.6376805305480957, "learning_rate": 0.0002, "loss": 2.4445, "step": 33220 }, { "epoch": 2.476154992548435, "grad_norm": 2.808635711669922, "learning_rate": 0.0002, "loss": 2.4696, "step": 33230 }, { "epoch": 2.4769001490312967, "grad_norm": 2.3896541595458984, "learning_rate": 0.0002, "loss": 2.316, "step": 33240 }, { "epoch": 2.477645305514158, "grad_norm": 2.628732919692993, "learning_rate": 0.0002, "loss": 2.661, "step": 33250 }, { "epoch": 2.4783904619970194, "grad_norm": 2.627577543258667, "learning_rate": 0.0002, "loss": 2.5131, "step": 33260 }, { "epoch": 2.479135618479881, "grad_norm": 2.3876442909240723, "learning_rate": 0.0002, "loss": 2.3684, "step": 33270 }, { "epoch": 2.479880774962742, "grad_norm": 2.680107355117798, "learning_rate": 0.0002, "loss": 2.3554, "step": 33280 }, { "epoch": 2.4806259314456036, "grad_norm": 2.5032846927642822, "learning_rate": 0.0002, "loss": 2.4945, "step": 33290 }, { "epoch": 2.481371087928465, "grad_norm": 2.6249349117279053, "learning_rate": 0.0002, "loss": 2.574, "step": 33300 }, { "epoch": 2.4821162444113263, "grad_norm": 2.3198981285095215, "learning_rate": 0.0002, "loss": 2.2761, "step": 33310 }, { "epoch": 2.482861400894188, "grad_norm": 2.745832920074463, "learning_rate": 0.0002, "loss": 2.2473, "step": 33320 }, { "epoch": 2.4836065573770494, "grad_norm": 2.7251551151275635, "learning_rate": 0.0002, "loss": 2.5399, "step": 33330 }, { "epoch": 2.4843517138599105, "grad_norm": 2.507437229156494, "learning_rate": 0.0002, "loss": 2.4234, "step": 33340 }, { "epoch": 2.485096870342772, "grad_norm": 2.7611029148101807, "learning_rate": 0.0002, "loss": 2.4828, "step": 33350 }, { "epoch": 2.485842026825633, "grad_norm": 2.7167751789093018, "learning_rate": 0.0002, "loss": 2.3991, "step": 33360 }, { "epoch": 2.4865871833084947, "grad_norm": 2.4960172176361084, "learning_rate": 0.0002, "loss": 2.3095, "step": 33370 }, { "epoch": 2.4873323397913563, "grad_norm": 2.461411237716675, "learning_rate": 0.0002, "loss": 2.4042, "step": 33380 }, { "epoch": 2.488077496274218, "grad_norm": 2.855609655380249, "learning_rate": 0.0002, "loss": 2.6609, "step": 33390 }, { "epoch": 2.488822652757079, "grad_norm": 2.677143096923828, "learning_rate": 0.0002, "loss": 2.4067, "step": 33400 }, { "epoch": 2.4895678092399405, "grad_norm": 2.7277297973632812, "learning_rate": 0.0002, "loss": 2.4223, "step": 33410 }, { "epoch": 2.4903129657228016, "grad_norm": 2.5944976806640625, "learning_rate": 0.0002, "loss": 2.622, "step": 33420 }, { "epoch": 2.491058122205663, "grad_norm": 2.656810998916626, "learning_rate": 0.0002, "loss": 2.5017, "step": 33430 }, { "epoch": 2.4918032786885247, "grad_norm": 2.643364429473877, "learning_rate": 0.0002, "loss": 2.5823, "step": 33440 }, { "epoch": 2.492548435171386, "grad_norm": 2.4666240215301514, "learning_rate": 0.0002, "loss": 2.4402, "step": 33450 }, { "epoch": 2.4932935916542474, "grad_norm": 2.453465700149536, "learning_rate": 0.0002, "loss": 2.6123, "step": 33460 }, { "epoch": 2.494038748137109, "grad_norm": 2.574659824371338, "learning_rate": 0.0002, "loss": 2.642, "step": 33470 }, { "epoch": 2.49478390461997, "grad_norm": 2.6516501903533936, "learning_rate": 0.0002, "loss": 2.3554, "step": 33480 }, { "epoch": 2.4955290611028316, "grad_norm": 2.5587661266326904, "learning_rate": 0.0002, "loss": 2.3586, "step": 33490 }, { "epoch": 2.496274217585693, "grad_norm": 2.8648366928100586, "learning_rate": 0.0002, "loss": 2.392, "step": 33500 }, { "epoch": 2.4970193740685542, "grad_norm": 2.6029820442199707, "learning_rate": 0.0002, "loss": 2.4047, "step": 33510 }, { "epoch": 2.497764530551416, "grad_norm": 3.100987672805786, "learning_rate": 0.0002, "loss": 2.4915, "step": 33520 }, { "epoch": 2.4985096870342773, "grad_norm": 2.6375701427459717, "learning_rate": 0.0002, "loss": 2.3645, "step": 33530 }, { "epoch": 2.4992548435171384, "grad_norm": 2.5169084072113037, "learning_rate": 0.0002, "loss": 2.3556, "step": 33540 }, { "epoch": 2.5, "grad_norm": 2.6907997131347656, "learning_rate": 0.0002, "loss": 2.4695, "step": 33550 }, { "epoch": 2.5007451564828616, "grad_norm": 2.838801622390747, "learning_rate": 0.0002, "loss": 2.6255, "step": 33560 }, { "epoch": 2.5014903129657227, "grad_norm": 2.6259567737579346, "learning_rate": 0.0002, "loss": 2.232, "step": 33570 }, { "epoch": 2.502235469448584, "grad_norm": 2.487612247467041, "learning_rate": 0.0002, "loss": 2.4681, "step": 33580 }, { "epoch": 2.5029806259314458, "grad_norm": 2.7123425006866455, "learning_rate": 0.0002, "loss": 2.4245, "step": 33590 }, { "epoch": 2.503725782414307, "grad_norm": 2.5011355876922607, "learning_rate": 0.0002, "loss": 2.3437, "step": 33600 }, { "epoch": 2.5044709388971684, "grad_norm": 2.726006507873535, "learning_rate": 0.0002, "loss": 2.5336, "step": 33610 }, { "epoch": 2.50521609538003, "grad_norm": 2.6538584232330322, "learning_rate": 0.0002, "loss": 2.5752, "step": 33620 }, { "epoch": 2.505961251862891, "grad_norm": 2.8791120052337646, "learning_rate": 0.0002, "loss": 2.5624, "step": 33630 }, { "epoch": 2.5067064083457526, "grad_norm": 2.7906670570373535, "learning_rate": 0.0002, "loss": 2.566, "step": 33640 }, { "epoch": 2.5074515648286138, "grad_norm": 2.980694532394409, "learning_rate": 0.0002, "loss": 2.6706, "step": 33650 }, { "epoch": 2.5081967213114753, "grad_norm": 2.641268014907837, "learning_rate": 0.0002, "loss": 2.4101, "step": 33660 }, { "epoch": 2.508941877794337, "grad_norm": 2.835050344467163, "learning_rate": 0.0002, "loss": 2.4747, "step": 33670 }, { "epoch": 2.5096870342771984, "grad_norm": 2.5962984561920166, "learning_rate": 0.0002, "loss": 2.3435, "step": 33680 }, { "epoch": 2.5104321907600595, "grad_norm": 2.6460578441619873, "learning_rate": 0.0002, "loss": 2.4282, "step": 33690 }, { "epoch": 2.511177347242921, "grad_norm": 2.415022373199463, "learning_rate": 0.0002, "loss": 2.3002, "step": 33700 }, { "epoch": 2.511922503725782, "grad_norm": 2.628546953201294, "learning_rate": 0.0002, "loss": 2.6371, "step": 33710 }, { "epoch": 2.5126676602086437, "grad_norm": 2.697800636291504, "learning_rate": 0.0002, "loss": 2.302, "step": 33720 }, { "epoch": 2.5134128166915053, "grad_norm": 2.8529772758483887, "learning_rate": 0.0002, "loss": 2.5254, "step": 33730 }, { "epoch": 2.514157973174367, "grad_norm": 2.5316379070281982, "learning_rate": 0.0002, "loss": 2.4038, "step": 33740 }, { "epoch": 2.514903129657228, "grad_norm": 2.350818634033203, "learning_rate": 0.0002, "loss": 2.3349, "step": 33750 }, { "epoch": 2.5156482861400895, "grad_norm": 2.5626444816589355, "learning_rate": 0.0002, "loss": 2.5337, "step": 33760 }, { "epoch": 2.5163934426229506, "grad_norm": 2.1654140949249268, "learning_rate": 0.0002, "loss": 2.3723, "step": 33770 }, { "epoch": 2.517138599105812, "grad_norm": 3.0190258026123047, "learning_rate": 0.0002, "loss": 2.5135, "step": 33780 }, { "epoch": 2.5178837555886737, "grad_norm": 2.6092681884765625, "learning_rate": 0.0002, "loss": 2.4508, "step": 33790 }, { "epoch": 2.5186289120715353, "grad_norm": 3.0868630409240723, "learning_rate": 0.0002, "loss": 2.4954, "step": 33800 }, { "epoch": 2.5193740685543964, "grad_norm": 2.437244176864624, "learning_rate": 0.0002, "loss": 2.5313, "step": 33810 }, { "epoch": 2.520119225037258, "grad_norm": 2.7770519256591797, "learning_rate": 0.0002, "loss": 2.4047, "step": 33820 }, { "epoch": 2.520864381520119, "grad_norm": 2.540032386779785, "learning_rate": 0.0002, "loss": 2.3, "step": 33830 }, { "epoch": 2.5216095380029806, "grad_norm": 2.8454859256744385, "learning_rate": 0.0002, "loss": 2.4243, "step": 33840 }, { "epoch": 2.522354694485842, "grad_norm": 2.4793384075164795, "learning_rate": 0.0002, "loss": 2.441, "step": 33850 }, { "epoch": 2.5230998509687033, "grad_norm": 2.884309768676758, "learning_rate": 0.0002, "loss": 2.6058, "step": 33860 }, { "epoch": 2.523845007451565, "grad_norm": 2.188300371170044, "learning_rate": 0.0002, "loss": 2.2676, "step": 33870 }, { "epoch": 2.5245901639344264, "grad_norm": 2.1281039714813232, "learning_rate": 0.0002, "loss": 2.4825, "step": 33880 }, { "epoch": 2.5253353204172875, "grad_norm": 2.872929096221924, "learning_rate": 0.0002, "loss": 2.6046, "step": 33890 }, { "epoch": 2.526080476900149, "grad_norm": 2.7643649578094482, "learning_rate": 0.0002, "loss": 2.5626, "step": 33900 }, { "epoch": 2.5268256333830106, "grad_norm": 2.42836856842041, "learning_rate": 0.0002, "loss": 2.5225, "step": 33910 }, { "epoch": 2.5275707898658717, "grad_norm": 2.7934534549713135, "learning_rate": 0.0002, "loss": 2.6089, "step": 33920 }, { "epoch": 2.5283159463487332, "grad_norm": 2.9219272136688232, "learning_rate": 0.0002, "loss": 2.4704, "step": 33930 }, { "epoch": 2.529061102831595, "grad_norm": 2.470702648162842, "learning_rate": 0.0002, "loss": 2.5143, "step": 33940 }, { "epoch": 2.529806259314456, "grad_norm": 2.40101957321167, "learning_rate": 0.0002, "loss": 2.4033, "step": 33950 }, { "epoch": 2.5305514157973175, "grad_norm": 2.4224092960357666, "learning_rate": 0.0002, "loss": 2.3719, "step": 33960 }, { "epoch": 2.531296572280179, "grad_norm": 2.8269007205963135, "learning_rate": 0.0002, "loss": 2.6289, "step": 33970 }, { "epoch": 2.53204172876304, "grad_norm": 2.590324878692627, "learning_rate": 0.0002, "loss": 2.3846, "step": 33980 }, { "epoch": 2.5327868852459017, "grad_norm": 2.609064817428589, "learning_rate": 0.0002, "loss": 2.5036, "step": 33990 }, { "epoch": 2.533532041728763, "grad_norm": 2.3954341411590576, "learning_rate": 0.0002, "loss": 2.3507, "step": 34000 }, { "epoch": 2.5342771982116243, "grad_norm": 2.4784460067749023, "learning_rate": 0.0002, "loss": 2.5389, "step": 34010 }, { "epoch": 2.535022354694486, "grad_norm": 2.3329291343688965, "learning_rate": 0.0002, "loss": 2.4564, "step": 34020 }, { "epoch": 2.5357675111773474, "grad_norm": 2.564966917037964, "learning_rate": 0.0002, "loss": 2.5259, "step": 34030 }, { "epoch": 2.5365126676602086, "grad_norm": 2.7907464504241943, "learning_rate": 0.0002, "loss": 2.5996, "step": 34040 }, { "epoch": 2.53725782414307, "grad_norm": 2.61496901512146, "learning_rate": 0.0002, "loss": 2.4294, "step": 34050 }, { "epoch": 2.538002980625931, "grad_norm": 2.5736818313598633, "learning_rate": 0.0002, "loss": 2.2709, "step": 34060 }, { "epoch": 2.5387481371087928, "grad_norm": 2.3524608612060547, "learning_rate": 0.0002, "loss": 2.2737, "step": 34070 }, { "epoch": 2.5394932935916543, "grad_norm": 2.7142202854156494, "learning_rate": 0.0002, "loss": 2.4911, "step": 34080 }, { "epoch": 2.540238450074516, "grad_norm": 2.64286470413208, "learning_rate": 0.0002, "loss": 2.5254, "step": 34090 }, { "epoch": 2.540983606557377, "grad_norm": 3.114713191986084, "learning_rate": 0.0002, "loss": 2.5674, "step": 34100 }, { "epoch": 2.5417287630402385, "grad_norm": 3.056455373764038, "learning_rate": 0.0002, "loss": 2.4114, "step": 34110 }, { "epoch": 2.5424739195230996, "grad_norm": 2.2923319339752197, "learning_rate": 0.0002, "loss": 2.372, "step": 34120 }, { "epoch": 2.543219076005961, "grad_norm": 3.0862503051757812, "learning_rate": 0.0002, "loss": 2.5038, "step": 34130 }, { "epoch": 2.5439642324888228, "grad_norm": 2.7312214374542236, "learning_rate": 0.0002, "loss": 2.7273, "step": 34140 }, { "epoch": 2.5447093889716843, "grad_norm": 2.667236089706421, "learning_rate": 0.0002, "loss": 2.5896, "step": 34150 }, { "epoch": 2.5454545454545454, "grad_norm": 2.5944461822509766, "learning_rate": 0.0002, "loss": 2.6472, "step": 34160 }, { "epoch": 2.546199701937407, "grad_norm": 2.375399589538574, "learning_rate": 0.0002, "loss": 2.5534, "step": 34170 }, { "epoch": 2.546944858420268, "grad_norm": 2.3310904502868652, "learning_rate": 0.0002, "loss": 2.3438, "step": 34180 }, { "epoch": 2.5476900149031296, "grad_norm": 2.3396718502044678, "learning_rate": 0.0002, "loss": 2.629, "step": 34190 }, { "epoch": 2.548435171385991, "grad_norm": 2.717461109161377, "learning_rate": 0.0002, "loss": 2.5391, "step": 34200 }, { "epoch": 2.5491803278688527, "grad_norm": 2.5795769691467285, "learning_rate": 0.0002, "loss": 2.4663, "step": 34210 }, { "epoch": 2.549925484351714, "grad_norm": 2.31074857711792, "learning_rate": 0.0002, "loss": 2.3499, "step": 34220 }, { "epoch": 2.5506706408345754, "grad_norm": 2.2600789070129395, "learning_rate": 0.0002, "loss": 2.4089, "step": 34230 }, { "epoch": 2.5514157973174365, "grad_norm": 2.5004208087921143, "learning_rate": 0.0002, "loss": 2.4895, "step": 34240 }, { "epoch": 2.552160953800298, "grad_norm": 2.4714882373809814, "learning_rate": 0.0002, "loss": 2.3912, "step": 34250 }, { "epoch": 2.5529061102831596, "grad_norm": 2.5733115673065186, "learning_rate": 0.0002, "loss": 2.4612, "step": 34260 }, { "epoch": 2.5536512667660207, "grad_norm": 2.537883996963501, "learning_rate": 0.0002, "loss": 2.5025, "step": 34270 }, { "epoch": 2.5543964232488823, "grad_norm": 2.867398262023926, "learning_rate": 0.0002, "loss": 2.4764, "step": 34280 }, { "epoch": 2.555141579731744, "grad_norm": 2.3895552158355713, "learning_rate": 0.0002, "loss": 2.6755, "step": 34290 }, { "epoch": 2.555886736214605, "grad_norm": 2.6244559288024902, "learning_rate": 0.0002, "loss": 2.5904, "step": 34300 }, { "epoch": 2.5566318926974665, "grad_norm": 3.0970191955566406, "learning_rate": 0.0002, "loss": 2.4423, "step": 34310 }, { "epoch": 2.557377049180328, "grad_norm": 2.543386459350586, "learning_rate": 0.0002, "loss": 2.4975, "step": 34320 }, { "epoch": 2.558122205663189, "grad_norm": 2.6352686882019043, "learning_rate": 0.0002, "loss": 2.5865, "step": 34330 }, { "epoch": 2.5588673621460507, "grad_norm": 3.1985583305358887, "learning_rate": 0.0002, "loss": 2.6501, "step": 34340 }, { "epoch": 2.559612518628912, "grad_norm": 2.206559896469116, "learning_rate": 0.0002, "loss": 2.5045, "step": 34350 }, { "epoch": 2.5603576751117734, "grad_norm": 2.626335620880127, "learning_rate": 0.0002, "loss": 2.4685, "step": 34360 }, { "epoch": 2.561102831594635, "grad_norm": 2.7654409408569336, "learning_rate": 0.0002, "loss": 2.2565, "step": 34370 }, { "epoch": 2.5618479880774965, "grad_norm": 2.5158398151397705, "learning_rate": 0.0002, "loss": 2.5521, "step": 34380 }, { "epoch": 2.5625931445603576, "grad_norm": 3.0391643047332764, "learning_rate": 0.0002, "loss": 2.5027, "step": 34390 }, { "epoch": 2.563338301043219, "grad_norm": 2.9316203594207764, "learning_rate": 0.0002, "loss": 2.3832, "step": 34400 }, { "epoch": 2.5640834575260802, "grad_norm": 2.54579496383667, "learning_rate": 0.0002, "loss": 2.4208, "step": 34410 }, { "epoch": 2.564828614008942, "grad_norm": 2.8894309997558594, "learning_rate": 0.0002, "loss": 2.4221, "step": 34420 }, { "epoch": 2.5655737704918034, "grad_norm": 2.742469072341919, "learning_rate": 0.0002, "loss": 2.5528, "step": 34430 }, { "epoch": 2.566318926974665, "grad_norm": 2.7238597869873047, "learning_rate": 0.0002, "loss": 2.5734, "step": 34440 }, { "epoch": 2.567064083457526, "grad_norm": 2.892765998840332, "learning_rate": 0.0002, "loss": 2.3945, "step": 34450 }, { "epoch": 2.5678092399403876, "grad_norm": 2.4451138973236084, "learning_rate": 0.0002, "loss": 2.5518, "step": 34460 }, { "epoch": 2.5685543964232487, "grad_norm": 2.573063850402832, "learning_rate": 0.0002, "loss": 2.4919, "step": 34470 }, { "epoch": 2.5692995529061102, "grad_norm": 2.7531449794769287, "learning_rate": 0.0002, "loss": 2.5301, "step": 34480 }, { "epoch": 2.570044709388972, "grad_norm": 2.6223232746124268, "learning_rate": 0.0002, "loss": 2.6511, "step": 34490 }, { "epoch": 2.5707898658718333, "grad_norm": 2.8373148441314697, "learning_rate": 0.0002, "loss": 2.3971, "step": 34500 }, { "epoch": 2.5715350223546944, "grad_norm": 2.770490884780884, "learning_rate": 0.0002, "loss": 2.5108, "step": 34510 }, { "epoch": 2.572280178837556, "grad_norm": 2.5854387283325195, "learning_rate": 0.0002, "loss": 2.5076, "step": 34520 }, { "epoch": 2.573025335320417, "grad_norm": 2.411576986312866, "learning_rate": 0.0002, "loss": 2.6648, "step": 34530 }, { "epoch": 2.5737704918032787, "grad_norm": 2.5123982429504395, "learning_rate": 0.0002, "loss": 2.5907, "step": 34540 }, { "epoch": 2.57451564828614, "grad_norm": 2.831042528152466, "learning_rate": 0.0002, "loss": 2.5356, "step": 34550 }, { "epoch": 2.5752608047690018, "grad_norm": 2.643927574157715, "learning_rate": 0.0002, "loss": 2.4261, "step": 34560 }, { "epoch": 2.576005961251863, "grad_norm": 2.698838949203491, "learning_rate": 0.0002, "loss": 2.4362, "step": 34570 }, { "epoch": 2.5767511177347244, "grad_norm": 2.549325704574585, "learning_rate": 0.0002, "loss": 2.5073, "step": 34580 }, { "epoch": 2.5774962742175855, "grad_norm": 2.3992767333984375, "learning_rate": 0.0002, "loss": 2.468, "step": 34590 }, { "epoch": 2.578241430700447, "grad_norm": 3.1624796390533447, "learning_rate": 0.0002, "loss": 2.5036, "step": 34600 }, { "epoch": 2.5789865871833086, "grad_norm": 2.8962011337280273, "learning_rate": 0.0002, "loss": 2.5846, "step": 34610 }, { "epoch": 2.5797317436661698, "grad_norm": 2.2955880165100098, "learning_rate": 0.0002, "loss": 2.5806, "step": 34620 }, { "epoch": 2.5804769001490313, "grad_norm": 2.375588893890381, "learning_rate": 0.0002, "loss": 2.4193, "step": 34630 }, { "epoch": 2.581222056631893, "grad_norm": 2.7803938388824463, "learning_rate": 0.0002, "loss": 2.4429, "step": 34640 }, { "epoch": 2.581967213114754, "grad_norm": 2.5117294788360596, "learning_rate": 0.0002, "loss": 2.459, "step": 34650 }, { "epoch": 2.5827123695976155, "grad_norm": 2.65371036529541, "learning_rate": 0.0002, "loss": 2.5789, "step": 34660 }, { "epoch": 2.583457526080477, "grad_norm": 2.6229374408721924, "learning_rate": 0.0002, "loss": 2.6575, "step": 34670 }, { "epoch": 2.584202682563338, "grad_norm": 3.0383689403533936, "learning_rate": 0.0002, "loss": 2.5808, "step": 34680 }, { "epoch": 2.5849478390461997, "grad_norm": 2.6986963748931885, "learning_rate": 0.0002, "loss": 2.3615, "step": 34690 }, { "epoch": 2.585692995529061, "grad_norm": 2.7407946586608887, "learning_rate": 0.0002, "loss": 2.1574, "step": 34700 }, { "epoch": 2.5864381520119224, "grad_norm": 2.9318466186523438, "learning_rate": 0.0002, "loss": 2.6112, "step": 34710 }, { "epoch": 2.587183308494784, "grad_norm": 2.139223337173462, "learning_rate": 0.0002, "loss": 2.4195, "step": 34720 }, { "epoch": 2.5879284649776455, "grad_norm": 2.886835813522339, "learning_rate": 0.0002, "loss": 2.4984, "step": 34730 }, { "epoch": 2.5886736214605066, "grad_norm": 2.40415358543396, "learning_rate": 0.0002, "loss": 2.5024, "step": 34740 }, { "epoch": 2.589418777943368, "grad_norm": 2.612410306930542, "learning_rate": 0.0002, "loss": 2.4832, "step": 34750 }, { "epoch": 2.5901639344262293, "grad_norm": 2.2832210063934326, "learning_rate": 0.0002, "loss": 2.4649, "step": 34760 }, { "epoch": 2.590909090909091, "grad_norm": 2.763620615005493, "learning_rate": 0.0002, "loss": 2.3836, "step": 34770 }, { "epoch": 2.5916542473919524, "grad_norm": 2.6266095638275146, "learning_rate": 0.0002, "loss": 2.4563, "step": 34780 }, { "epoch": 2.592399403874814, "grad_norm": 2.604623317718506, "learning_rate": 0.0002, "loss": 2.5474, "step": 34790 }, { "epoch": 2.593144560357675, "grad_norm": 2.5370583534240723, "learning_rate": 0.0002, "loss": 2.4926, "step": 34800 }, { "epoch": 2.5938897168405366, "grad_norm": 2.7688779830932617, "learning_rate": 0.0002, "loss": 2.5277, "step": 34810 }, { "epoch": 2.5946348733233977, "grad_norm": 1.6677604913711548, "learning_rate": 0.0002, "loss": 2.4469, "step": 34820 }, { "epoch": 2.5953800298062593, "grad_norm": 2.6081464290618896, "learning_rate": 0.0002, "loss": 2.6667, "step": 34830 }, { "epoch": 2.596125186289121, "grad_norm": 2.504087209701538, "learning_rate": 0.0002, "loss": 2.4811, "step": 34840 }, { "epoch": 2.5968703427719824, "grad_norm": 2.514352798461914, "learning_rate": 0.0002, "loss": 2.4831, "step": 34850 }, { "epoch": 2.5976154992548435, "grad_norm": 2.6763663291931152, "learning_rate": 0.0002, "loss": 2.4431, "step": 34860 }, { "epoch": 2.598360655737705, "grad_norm": 2.6285643577575684, "learning_rate": 0.0002, "loss": 2.6107, "step": 34870 }, { "epoch": 2.599105812220566, "grad_norm": 2.520859956741333, "learning_rate": 0.0002, "loss": 2.5125, "step": 34880 }, { "epoch": 2.5998509687034277, "grad_norm": 2.3472795486450195, "learning_rate": 0.0002, "loss": 2.308, "step": 34890 }, { "epoch": 2.6005961251862892, "grad_norm": 2.735682249069214, "learning_rate": 0.0002, "loss": 2.5909, "step": 34900 }, { "epoch": 2.601341281669151, "grad_norm": 3.270073652267456, "learning_rate": 0.0002, "loss": 2.6196, "step": 34910 }, { "epoch": 2.602086438152012, "grad_norm": 2.6279194355010986, "learning_rate": 0.0002, "loss": 2.519, "step": 34920 }, { "epoch": 2.6028315946348735, "grad_norm": 2.125945568084717, "learning_rate": 0.0002, "loss": 2.4861, "step": 34930 }, { "epoch": 2.6035767511177346, "grad_norm": 2.817042589187622, "learning_rate": 0.0002, "loss": 2.4631, "step": 34940 }, { "epoch": 2.604321907600596, "grad_norm": 2.75313138961792, "learning_rate": 0.0002, "loss": 2.304, "step": 34950 }, { "epoch": 2.6050670640834577, "grad_norm": 2.5719428062438965, "learning_rate": 0.0002, "loss": 2.4361, "step": 34960 }, { "epoch": 2.605812220566319, "grad_norm": 2.4278576374053955, "learning_rate": 0.0002, "loss": 2.5518, "step": 34970 }, { "epoch": 2.6065573770491803, "grad_norm": 2.707906484603882, "learning_rate": 0.0002, "loss": 2.5591, "step": 34980 }, { "epoch": 2.607302533532042, "grad_norm": 3.9694321155548096, "learning_rate": 0.0002, "loss": 2.5416, "step": 34990 }, { "epoch": 2.608047690014903, "grad_norm": 2.7127926349639893, "learning_rate": 0.0002, "loss": 2.3842, "step": 35000 }, { "epoch": 2.6087928464977646, "grad_norm": 2.6059675216674805, "learning_rate": 0.0002, "loss": 2.4429, "step": 35010 }, { "epoch": 2.609538002980626, "grad_norm": 2.09088134765625, "learning_rate": 0.0002, "loss": 2.3938, "step": 35020 }, { "epoch": 2.610283159463487, "grad_norm": 3.14701509475708, "learning_rate": 0.0002, "loss": 2.3627, "step": 35030 }, { "epoch": 2.6110283159463488, "grad_norm": 2.85632061958313, "learning_rate": 0.0002, "loss": 2.6743, "step": 35040 }, { "epoch": 2.61177347242921, "grad_norm": 2.6178882122039795, "learning_rate": 0.0002, "loss": 2.5535, "step": 35050 }, { "epoch": 2.6125186289120714, "grad_norm": 2.384420394897461, "learning_rate": 0.0002, "loss": 2.5122, "step": 35060 }, { "epoch": 2.613263785394933, "grad_norm": 2.4369149208068848, "learning_rate": 0.0002, "loss": 2.6472, "step": 35070 }, { "epoch": 2.6140089418777945, "grad_norm": 2.6348259449005127, "learning_rate": 0.0002, "loss": 2.4447, "step": 35080 }, { "epoch": 2.6147540983606556, "grad_norm": 2.780752420425415, "learning_rate": 0.0002, "loss": 2.5351, "step": 35090 }, { "epoch": 2.615499254843517, "grad_norm": 2.450267791748047, "learning_rate": 0.0002, "loss": 2.3382, "step": 35100 }, { "epoch": 2.6162444113263783, "grad_norm": 2.720033645629883, "learning_rate": 0.0002, "loss": 2.3684, "step": 35110 }, { "epoch": 2.61698956780924, "grad_norm": 2.995755910873413, "learning_rate": 0.0002, "loss": 2.4707, "step": 35120 }, { "epoch": 2.6177347242921014, "grad_norm": 2.7804386615753174, "learning_rate": 0.0002, "loss": 2.5992, "step": 35130 }, { "epoch": 2.618479880774963, "grad_norm": 2.687592029571533, "learning_rate": 0.0002, "loss": 2.6438, "step": 35140 }, { "epoch": 2.619225037257824, "grad_norm": 2.72942852973938, "learning_rate": 0.0002, "loss": 2.5031, "step": 35150 }, { "epoch": 2.6199701937406856, "grad_norm": 2.743436813354492, "learning_rate": 0.0002, "loss": 2.5397, "step": 35160 }, { "epoch": 2.6207153502235467, "grad_norm": 2.544156789779663, "learning_rate": 0.0002, "loss": 2.579, "step": 35170 }, { "epoch": 2.6214605067064083, "grad_norm": 2.2848076820373535, "learning_rate": 0.0002, "loss": 2.42, "step": 35180 }, { "epoch": 2.62220566318927, "grad_norm": 2.5742909908294678, "learning_rate": 0.0002, "loss": 2.4444, "step": 35190 }, { "epoch": 2.6229508196721314, "grad_norm": 2.081449270248413, "learning_rate": 0.0002, "loss": 2.4044, "step": 35200 }, { "epoch": 2.6236959761549925, "grad_norm": 2.488731622695923, "learning_rate": 0.0002, "loss": 2.5191, "step": 35210 }, { "epoch": 2.624441132637854, "grad_norm": 2.7537927627563477, "learning_rate": 0.0002, "loss": 2.6245, "step": 35220 }, { "epoch": 2.625186289120715, "grad_norm": 2.9869134426116943, "learning_rate": 0.0002, "loss": 2.4964, "step": 35230 }, { "epoch": 2.6259314456035767, "grad_norm": 2.343207836151123, "learning_rate": 0.0002, "loss": 2.3746, "step": 35240 }, { "epoch": 2.6266766020864383, "grad_norm": 2.5939278602600098, "learning_rate": 0.0002, "loss": 2.5991, "step": 35250 }, { "epoch": 2.6274217585693, "grad_norm": 2.727090835571289, "learning_rate": 0.0002, "loss": 2.2542, "step": 35260 }, { "epoch": 2.628166915052161, "grad_norm": 2.4267163276672363, "learning_rate": 0.0002, "loss": 2.3105, "step": 35270 }, { "epoch": 2.6289120715350225, "grad_norm": 2.565988540649414, "learning_rate": 0.0002, "loss": 2.6, "step": 35280 }, { "epoch": 2.6296572280178836, "grad_norm": 2.4375131130218506, "learning_rate": 0.0002, "loss": 2.5983, "step": 35290 }, { "epoch": 2.630402384500745, "grad_norm": 2.661345958709717, "learning_rate": 0.0002, "loss": 2.3076, "step": 35300 }, { "epoch": 2.6311475409836067, "grad_norm": 2.9548776149749756, "learning_rate": 0.0002, "loss": 2.5269, "step": 35310 }, { "epoch": 2.631892697466468, "grad_norm": 2.780071496963501, "learning_rate": 0.0002, "loss": 2.5164, "step": 35320 }, { "epoch": 2.6326378539493294, "grad_norm": 3.2674901485443115, "learning_rate": 0.0002, "loss": 2.3561, "step": 35330 }, { "epoch": 2.633383010432191, "grad_norm": 2.833890438079834, "learning_rate": 0.0002, "loss": 2.2951, "step": 35340 }, { "epoch": 2.634128166915052, "grad_norm": 2.463898181915283, "learning_rate": 0.0002, "loss": 2.5373, "step": 35350 }, { "epoch": 2.6348733233979136, "grad_norm": 2.6374974250793457, "learning_rate": 0.0002, "loss": 2.5007, "step": 35360 }, { "epoch": 2.635618479880775, "grad_norm": 2.711229085922241, "learning_rate": 0.0002, "loss": 2.4471, "step": 35370 }, { "epoch": 2.6363636363636362, "grad_norm": 2.583221435546875, "learning_rate": 0.0002, "loss": 2.5363, "step": 35380 }, { "epoch": 2.637108792846498, "grad_norm": 2.668238878250122, "learning_rate": 0.0002, "loss": 2.5197, "step": 35390 }, { "epoch": 2.637853949329359, "grad_norm": 2.6465303897857666, "learning_rate": 0.0002, "loss": 2.2952, "step": 35400 }, { "epoch": 2.6385991058122205, "grad_norm": 2.5099635124206543, "learning_rate": 0.0002, "loss": 2.547, "step": 35410 }, { "epoch": 2.639344262295082, "grad_norm": 2.978407621383667, "learning_rate": 0.0002, "loss": 2.563, "step": 35420 }, { "epoch": 2.6400894187779436, "grad_norm": 2.6208372116088867, "learning_rate": 0.0002, "loss": 2.6188, "step": 35430 }, { "epoch": 2.6408345752608047, "grad_norm": 3.47013258934021, "learning_rate": 0.0002, "loss": 2.5536, "step": 35440 }, { "epoch": 2.6415797317436662, "grad_norm": 2.438246726989746, "learning_rate": 0.0002, "loss": 2.385, "step": 35450 }, { "epoch": 2.6423248882265273, "grad_norm": 2.5819106101989746, "learning_rate": 0.0002, "loss": 2.5517, "step": 35460 }, { "epoch": 2.643070044709389, "grad_norm": 2.8637759685516357, "learning_rate": 0.0002, "loss": 2.48, "step": 35470 }, { "epoch": 2.6438152011922504, "grad_norm": 2.3931562900543213, "learning_rate": 0.0002, "loss": 2.3955, "step": 35480 }, { "epoch": 2.644560357675112, "grad_norm": 3.022334575653076, "learning_rate": 0.0002, "loss": 2.3759, "step": 35490 }, { "epoch": 2.645305514157973, "grad_norm": 2.471989154815674, "learning_rate": 0.0002, "loss": 2.4271, "step": 35500 }, { "epoch": 2.6460506706408347, "grad_norm": 2.57979679107666, "learning_rate": 0.0002, "loss": 2.5865, "step": 35510 }, { "epoch": 2.6467958271236958, "grad_norm": 2.5882067680358887, "learning_rate": 0.0002, "loss": 2.5651, "step": 35520 }, { "epoch": 2.6475409836065573, "grad_norm": 2.924689531326294, "learning_rate": 0.0002, "loss": 2.5692, "step": 35530 }, { "epoch": 2.648286140089419, "grad_norm": 2.678907632827759, "learning_rate": 0.0002, "loss": 2.3231, "step": 35540 }, { "epoch": 2.6490312965722804, "grad_norm": 2.9073657989501953, "learning_rate": 0.0002, "loss": 2.4733, "step": 35550 }, { "epoch": 2.6497764530551415, "grad_norm": 2.6203291416168213, "learning_rate": 0.0002, "loss": 2.4252, "step": 35560 }, { "epoch": 2.650521609538003, "grad_norm": 2.8620171546936035, "learning_rate": 0.0002, "loss": 2.5345, "step": 35570 }, { "epoch": 2.651266766020864, "grad_norm": 3.0765066146850586, "learning_rate": 0.0002, "loss": 2.5476, "step": 35580 }, { "epoch": 2.6520119225037257, "grad_norm": 2.6432547569274902, "learning_rate": 0.0002, "loss": 2.6744, "step": 35590 }, { "epoch": 2.6527570789865873, "grad_norm": 2.4183008670806885, "learning_rate": 0.0002, "loss": 2.4373, "step": 35600 }, { "epoch": 2.653502235469449, "grad_norm": 2.6401169300079346, "learning_rate": 0.0002, "loss": 2.6115, "step": 35610 }, { "epoch": 2.65424739195231, "grad_norm": 2.336107015609741, "learning_rate": 0.0002, "loss": 2.5014, "step": 35620 }, { "epoch": 2.6549925484351715, "grad_norm": 2.7248358726501465, "learning_rate": 0.0002, "loss": 2.3869, "step": 35630 }, { "epoch": 2.6557377049180326, "grad_norm": 2.5932023525238037, "learning_rate": 0.0002, "loss": 2.6997, "step": 35640 }, { "epoch": 2.656482861400894, "grad_norm": 2.6041550636291504, "learning_rate": 0.0002, "loss": 2.4897, "step": 35650 }, { "epoch": 2.6572280178837557, "grad_norm": 2.6969892978668213, "learning_rate": 0.0002, "loss": 2.564, "step": 35660 }, { "epoch": 2.657973174366617, "grad_norm": 2.610666275024414, "learning_rate": 0.0002, "loss": 2.6471, "step": 35670 }, { "epoch": 2.6587183308494784, "grad_norm": 2.631580114364624, "learning_rate": 0.0002, "loss": 2.5464, "step": 35680 }, { "epoch": 2.65946348733234, "grad_norm": 2.7000410556793213, "learning_rate": 0.0002, "loss": 2.5847, "step": 35690 }, { "epoch": 2.660208643815201, "grad_norm": 2.5906224250793457, "learning_rate": 0.0002, "loss": 2.5968, "step": 35700 }, { "epoch": 2.6609538002980626, "grad_norm": 2.6354689598083496, "learning_rate": 0.0002, "loss": 2.6701, "step": 35710 }, { "epoch": 2.661698956780924, "grad_norm": 2.598720073699951, "learning_rate": 0.0002, "loss": 2.5396, "step": 35720 }, { "epoch": 2.6624441132637853, "grad_norm": 2.6841044425964355, "learning_rate": 0.0002, "loss": 2.5725, "step": 35730 }, { "epoch": 2.663189269746647, "grad_norm": 2.7917165756225586, "learning_rate": 0.0002, "loss": 2.5492, "step": 35740 }, { "epoch": 2.663934426229508, "grad_norm": 2.852966547012329, "learning_rate": 0.0002, "loss": 2.5787, "step": 35750 }, { "epoch": 2.6646795827123695, "grad_norm": 2.4196488857269287, "learning_rate": 0.0002, "loss": 2.6144, "step": 35760 }, { "epoch": 2.665424739195231, "grad_norm": 2.710623264312744, "learning_rate": 0.0002, "loss": 2.669, "step": 35770 }, { "epoch": 2.6661698956780926, "grad_norm": 2.8702971935272217, "learning_rate": 0.0002, "loss": 2.3902, "step": 35780 }, { "epoch": 2.6669150521609537, "grad_norm": 2.8416409492492676, "learning_rate": 0.0002, "loss": 2.6465, "step": 35790 }, { "epoch": 2.6676602086438153, "grad_norm": 2.761198043823242, "learning_rate": 0.0002, "loss": 2.3041, "step": 35800 }, { "epoch": 2.6684053651266764, "grad_norm": 2.7360453605651855, "learning_rate": 0.0002, "loss": 2.3746, "step": 35810 }, { "epoch": 2.669150521609538, "grad_norm": 2.5194859504699707, "learning_rate": 0.0002, "loss": 2.5007, "step": 35820 }, { "epoch": 2.6698956780923995, "grad_norm": 2.272688388824463, "learning_rate": 0.0002, "loss": 2.3409, "step": 35830 }, { "epoch": 2.670640834575261, "grad_norm": 2.8286778926849365, "learning_rate": 0.0002, "loss": 2.4279, "step": 35840 }, { "epoch": 2.671385991058122, "grad_norm": 2.7673115730285645, "learning_rate": 0.0002, "loss": 2.6094, "step": 35850 }, { "epoch": 2.6721311475409837, "grad_norm": 2.627052068710327, "learning_rate": 0.0002, "loss": 2.4081, "step": 35860 }, { "epoch": 2.672876304023845, "grad_norm": 2.9847543239593506, "learning_rate": 0.0002, "loss": 2.5277, "step": 35870 }, { "epoch": 2.6736214605067063, "grad_norm": 2.7096521854400635, "learning_rate": 0.0002, "loss": 2.4191, "step": 35880 }, { "epoch": 2.674366616989568, "grad_norm": 2.454817056655884, "learning_rate": 0.0002, "loss": 2.5331, "step": 35890 }, { "epoch": 2.6751117734724295, "grad_norm": 2.617335557937622, "learning_rate": 0.0002, "loss": 2.2716, "step": 35900 }, { "epoch": 2.6758569299552906, "grad_norm": 2.6219711303710938, "learning_rate": 0.0002, "loss": 2.4334, "step": 35910 }, { "epoch": 2.676602086438152, "grad_norm": 2.2985072135925293, "learning_rate": 0.0002, "loss": 2.5012, "step": 35920 }, { "epoch": 2.6773472429210132, "grad_norm": 2.7559444904327393, "learning_rate": 0.0002, "loss": 2.5972, "step": 35930 }, { "epoch": 2.678092399403875, "grad_norm": 2.789966583251953, "learning_rate": 0.0002, "loss": 2.4791, "step": 35940 }, { "epoch": 2.6788375558867363, "grad_norm": 2.9208645820617676, "learning_rate": 0.0002, "loss": 2.5503, "step": 35950 }, { "epoch": 2.679582712369598, "grad_norm": 2.563593864440918, "learning_rate": 0.0002, "loss": 2.6192, "step": 35960 }, { "epoch": 2.680327868852459, "grad_norm": 2.91192364692688, "learning_rate": 0.0002, "loss": 2.3414, "step": 35970 }, { "epoch": 2.6810730253353205, "grad_norm": 2.6559500694274902, "learning_rate": 0.0002, "loss": 2.3, "step": 35980 }, { "epoch": 2.6818181818181817, "grad_norm": 2.29866886138916, "learning_rate": 0.0002, "loss": 2.463, "step": 35990 }, { "epoch": 2.682563338301043, "grad_norm": 2.475616216659546, "learning_rate": 0.0002, "loss": 2.456, "step": 36000 }, { "epoch": 2.6833084947839048, "grad_norm": 2.6832971572875977, "learning_rate": 0.0002, "loss": 2.5678, "step": 36010 }, { "epoch": 2.684053651266766, "grad_norm": 2.717376708984375, "learning_rate": 0.0002, "loss": 2.5248, "step": 36020 }, { "epoch": 2.6847988077496274, "grad_norm": 3.192349910736084, "learning_rate": 0.0002, "loss": 2.3875, "step": 36030 }, { "epoch": 2.685543964232489, "grad_norm": 2.5880353450775146, "learning_rate": 0.0002, "loss": 2.5578, "step": 36040 }, { "epoch": 2.68628912071535, "grad_norm": 2.367215394973755, "learning_rate": 0.0002, "loss": 2.553, "step": 36050 }, { "epoch": 2.6870342771982116, "grad_norm": 2.3446109294891357, "learning_rate": 0.0002, "loss": 2.5302, "step": 36060 }, { "epoch": 2.687779433681073, "grad_norm": 2.9561381340026855, "learning_rate": 0.0002, "loss": 2.7431, "step": 36070 }, { "epoch": 2.6885245901639343, "grad_norm": 2.4364163875579834, "learning_rate": 0.0002, "loss": 2.4944, "step": 36080 }, { "epoch": 2.689269746646796, "grad_norm": 2.623122215270996, "learning_rate": 0.0002, "loss": 2.5464, "step": 36090 }, { "epoch": 2.690014903129657, "grad_norm": 2.6617910861968994, "learning_rate": 0.0002, "loss": 2.5365, "step": 36100 }, { "epoch": 2.6907600596125185, "grad_norm": 2.458702564239502, "learning_rate": 0.0002, "loss": 2.525, "step": 36110 }, { "epoch": 2.69150521609538, "grad_norm": 2.783048629760742, "learning_rate": 0.0002, "loss": 2.4975, "step": 36120 }, { "epoch": 2.6922503725782416, "grad_norm": 3.2591092586517334, "learning_rate": 0.0002, "loss": 2.5694, "step": 36130 }, { "epoch": 2.6929955290611027, "grad_norm": 2.77760910987854, "learning_rate": 0.0002, "loss": 2.4527, "step": 36140 }, { "epoch": 2.6937406855439643, "grad_norm": 2.6666505336761475, "learning_rate": 0.0002, "loss": 2.5507, "step": 36150 }, { "epoch": 2.6944858420268254, "grad_norm": 2.61444354057312, "learning_rate": 0.0002, "loss": 2.4439, "step": 36160 }, { "epoch": 2.695230998509687, "grad_norm": 2.2256479263305664, "learning_rate": 0.0002, "loss": 2.3892, "step": 36170 }, { "epoch": 2.6959761549925485, "grad_norm": 2.4084644317626953, "learning_rate": 0.0002, "loss": 2.5677, "step": 36180 }, { "epoch": 2.69672131147541, "grad_norm": 3.0166308879852295, "learning_rate": 0.0002, "loss": 2.4645, "step": 36190 }, { "epoch": 2.697466467958271, "grad_norm": 2.671741008758545, "learning_rate": 0.0002, "loss": 2.6156, "step": 36200 }, { "epoch": 2.6982116244411327, "grad_norm": 2.723830223083496, "learning_rate": 0.0002, "loss": 2.4664, "step": 36210 }, { "epoch": 2.698956780923994, "grad_norm": 2.855217695236206, "learning_rate": 0.0002, "loss": 2.481, "step": 36220 }, { "epoch": 2.6997019374068554, "grad_norm": 2.242652416229248, "learning_rate": 0.0002, "loss": 2.3911, "step": 36230 }, { "epoch": 2.700447093889717, "grad_norm": 2.4699301719665527, "learning_rate": 0.0002, "loss": 2.5208, "step": 36240 }, { "epoch": 2.7011922503725785, "grad_norm": 2.6784889698028564, "learning_rate": 0.0002, "loss": 2.2951, "step": 36250 }, { "epoch": 2.7019374068554396, "grad_norm": 2.4282853603363037, "learning_rate": 0.0002, "loss": 2.6418, "step": 36260 }, { "epoch": 2.702682563338301, "grad_norm": 2.9294357299804688, "learning_rate": 0.0002, "loss": 2.6716, "step": 36270 }, { "epoch": 2.7034277198211623, "grad_norm": 2.729619264602661, "learning_rate": 0.0002, "loss": 2.4832, "step": 36280 }, { "epoch": 2.704172876304024, "grad_norm": 2.574918031692505, "learning_rate": 0.0002, "loss": 2.4267, "step": 36290 }, { "epoch": 2.7049180327868854, "grad_norm": 2.5243308544158936, "learning_rate": 0.0002, "loss": 2.4349, "step": 36300 }, { "epoch": 2.705663189269747, "grad_norm": 2.761279821395874, "learning_rate": 0.0002, "loss": 2.5064, "step": 36310 }, { "epoch": 2.706408345752608, "grad_norm": 2.487800121307373, "learning_rate": 0.0002, "loss": 2.481, "step": 36320 }, { "epoch": 2.7071535022354696, "grad_norm": 2.707909107208252, "learning_rate": 0.0002, "loss": 2.5022, "step": 36330 }, { "epoch": 2.7078986587183307, "grad_norm": 2.6682002544403076, "learning_rate": 0.0002, "loss": 2.5912, "step": 36340 }, { "epoch": 2.7086438152011922, "grad_norm": 2.786775588989258, "learning_rate": 0.0002, "loss": 2.4986, "step": 36350 }, { "epoch": 2.709388971684054, "grad_norm": 2.84505295753479, "learning_rate": 0.0002, "loss": 2.4681, "step": 36360 }, { "epoch": 2.710134128166915, "grad_norm": 2.364448308944702, "learning_rate": 0.0002, "loss": 2.5899, "step": 36370 }, { "epoch": 2.7108792846497765, "grad_norm": 2.7492170333862305, "learning_rate": 0.0002, "loss": 2.3887, "step": 36380 }, { "epoch": 2.711624441132638, "grad_norm": 2.7889928817749023, "learning_rate": 0.0002, "loss": 2.5625, "step": 36390 }, { "epoch": 2.712369597615499, "grad_norm": 2.5459094047546387, "learning_rate": 0.0002, "loss": 2.6412, "step": 36400 }, { "epoch": 2.7131147540983607, "grad_norm": 2.648048162460327, "learning_rate": 0.0002, "loss": 2.5925, "step": 36410 }, { "epoch": 2.7138599105812222, "grad_norm": 2.7833564281463623, "learning_rate": 0.0002, "loss": 2.3773, "step": 36420 }, { "epoch": 2.7146050670640833, "grad_norm": 2.9601666927337646, "learning_rate": 0.0002, "loss": 2.7538, "step": 36430 }, { "epoch": 2.715350223546945, "grad_norm": 3.0814125537872314, "learning_rate": 0.0002, "loss": 2.6718, "step": 36440 }, { "epoch": 2.716095380029806, "grad_norm": 2.4058563709259033, "learning_rate": 0.0002, "loss": 2.6188, "step": 36450 }, { "epoch": 2.7168405365126675, "grad_norm": 2.7348439693450928, "learning_rate": 0.0002, "loss": 2.6306, "step": 36460 }, { "epoch": 2.717585692995529, "grad_norm": 2.448627471923828, "learning_rate": 0.0002, "loss": 2.5885, "step": 36470 }, { "epoch": 2.7183308494783907, "grad_norm": 2.4670491218566895, "learning_rate": 0.0002, "loss": 2.44, "step": 36480 }, { "epoch": 2.7190760059612518, "grad_norm": 2.3683083057403564, "learning_rate": 0.0002, "loss": 2.5304, "step": 36490 }, { "epoch": 2.7198211624441133, "grad_norm": 2.6678662300109863, "learning_rate": 0.0002, "loss": 2.6104, "step": 36500 }, { "epoch": 2.7205663189269744, "grad_norm": 2.6376142501831055, "learning_rate": 0.0002, "loss": 2.5446, "step": 36510 }, { "epoch": 2.721311475409836, "grad_norm": 2.51070237159729, "learning_rate": 0.0002, "loss": 2.5938, "step": 36520 }, { "epoch": 2.7220566318926975, "grad_norm": 2.3976802825927734, "learning_rate": 0.0002, "loss": 2.3664, "step": 36530 }, { "epoch": 2.722801788375559, "grad_norm": 2.557739496231079, "learning_rate": 0.0002, "loss": 2.6302, "step": 36540 }, { "epoch": 2.72354694485842, "grad_norm": 2.489729404449463, "learning_rate": 0.0002, "loss": 2.325, "step": 36550 }, { "epoch": 2.7242921013412817, "grad_norm": 2.4591825008392334, "learning_rate": 0.0002, "loss": 2.4624, "step": 36560 }, { "epoch": 2.725037257824143, "grad_norm": 2.7376599311828613, "learning_rate": 0.0002, "loss": 2.3916, "step": 36570 }, { "epoch": 2.7257824143070044, "grad_norm": 2.656623125076294, "learning_rate": 0.0002, "loss": 2.48, "step": 36580 }, { "epoch": 2.726527570789866, "grad_norm": 2.71795392036438, "learning_rate": 0.0002, "loss": 2.5828, "step": 36590 }, { "epoch": 2.7272727272727275, "grad_norm": 2.759921073913574, "learning_rate": 0.0002, "loss": 2.5211, "step": 36600 }, { "epoch": 2.7280178837555886, "grad_norm": 2.558744430541992, "learning_rate": 0.0002, "loss": 2.5161, "step": 36610 }, { "epoch": 2.72876304023845, "grad_norm": 2.899496078491211, "learning_rate": 0.0002, "loss": 2.3968, "step": 36620 }, { "epoch": 2.7295081967213113, "grad_norm": 2.5663375854492188, "learning_rate": 0.0002, "loss": 2.4568, "step": 36630 }, { "epoch": 2.730253353204173, "grad_norm": 2.4600677490234375, "learning_rate": 0.0002, "loss": 2.4366, "step": 36640 }, { "epoch": 2.7309985096870344, "grad_norm": 2.5599944591522217, "learning_rate": 0.0002, "loss": 2.3192, "step": 36650 }, { "epoch": 2.731743666169896, "grad_norm": 2.5939974784851074, "learning_rate": 0.0002, "loss": 2.6118, "step": 36660 }, { "epoch": 2.732488822652757, "grad_norm": 2.5912539958953857, "learning_rate": 0.0002, "loss": 2.5891, "step": 36670 }, { "epoch": 2.7332339791356186, "grad_norm": 3.277555465698242, "learning_rate": 0.0002, "loss": 2.3884, "step": 36680 }, { "epoch": 2.7339791356184797, "grad_norm": 2.8053581714630127, "learning_rate": 0.0002, "loss": 2.4551, "step": 36690 }, { "epoch": 2.7347242921013413, "grad_norm": 2.5936853885650635, "learning_rate": 0.0002, "loss": 2.513, "step": 36700 }, { "epoch": 2.735469448584203, "grad_norm": 2.418405294418335, "learning_rate": 0.0002, "loss": 2.2918, "step": 36710 }, { "epoch": 2.736214605067064, "grad_norm": 2.522139072418213, "learning_rate": 0.0002, "loss": 2.4492, "step": 36720 }, { "epoch": 2.7369597615499255, "grad_norm": 2.8760268688201904, "learning_rate": 0.0002, "loss": 2.4819, "step": 36730 }, { "epoch": 2.737704918032787, "grad_norm": 2.54500150680542, "learning_rate": 0.0002, "loss": 2.4401, "step": 36740 }, { "epoch": 2.738450074515648, "grad_norm": 3.1572742462158203, "learning_rate": 0.0002, "loss": 2.3991, "step": 36750 }, { "epoch": 2.7391952309985097, "grad_norm": 2.786902666091919, "learning_rate": 0.0002, "loss": 2.5974, "step": 36760 }, { "epoch": 2.7399403874813713, "grad_norm": 2.5400989055633545, "learning_rate": 0.0002, "loss": 2.5479, "step": 36770 }, { "epoch": 2.7406855439642324, "grad_norm": 2.4169130325317383, "learning_rate": 0.0002, "loss": 2.4016, "step": 36780 }, { "epoch": 2.741430700447094, "grad_norm": 2.450286865234375, "learning_rate": 0.0002, "loss": 2.5699, "step": 36790 }, { "epoch": 2.742175856929955, "grad_norm": 2.757652759552002, "learning_rate": 0.0002, "loss": 2.5433, "step": 36800 }, { "epoch": 2.7429210134128166, "grad_norm": 2.3215696811676025, "learning_rate": 0.0002, "loss": 2.4423, "step": 36810 }, { "epoch": 2.743666169895678, "grad_norm": 2.422499895095825, "learning_rate": 0.0002, "loss": 2.2945, "step": 36820 }, { "epoch": 2.7444113263785397, "grad_norm": 2.4259049892425537, "learning_rate": 0.0002, "loss": 2.5629, "step": 36830 }, { "epoch": 2.745156482861401, "grad_norm": 2.6686112880706787, "learning_rate": 0.0002, "loss": 2.4427, "step": 36840 }, { "epoch": 2.7459016393442623, "grad_norm": 2.5597991943359375, "learning_rate": 0.0002, "loss": 2.4121, "step": 36850 }, { "epoch": 2.7466467958271235, "grad_norm": 2.713035821914673, "learning_rate": 0.0002, "loss": 2.5649, "step": 36860 }, { "epoch": 2.747391952309985, "grad_norm": 2.730883836746216, "learning_rate": 0.0002, "loss": 2.3433, "step": 36870 }, { "epoch": 2.7481371087928466, "grad_norm": 2.833503007888794, "learning_rate": 0.0002, "loss": 2.4411, "step": 36880 }, { "epoch": 2.748882265275708, "grad_norm": 2.7715904712677, "learning_rate": 0.0002, "loss": 2.4082, "step": 36890 }, { "epoch": 2.7496274217585692, "grad_norm": 2.716165065765381, "learning_rate": 0.0002, "loss": 2.3817, "step": 36900 }, { "epoch": 2.7503725782414308, "grad_norm": 2.7706308364868164, "learning_rate": 0.0002, "loss": 2.5381, "step": 36910 }, { "epoch": 2.751117734724292, "grad_norm": 2.569395065307617, "learning_rate": 0.0002, "loss": 2.4863, "step": 36920 }, { "epoch": 2.7518628912071534, "grad_norm": 2.589057207107544, "learning_rate": 0.0002, "loss": 2.5399, "step": 36930 }, { "epoch": 2.752608047690015, "grad_norm": 2.632874011993408, "learning_rate": 0.0002, "loss": 2.4628, "step": 36940 }, { "epoch": 2.7533532041728765, "grad_norm": 3.023503541946411, "learning_rate": 0.0002, "loss": 2.5736, "step": 36950 }, { "epoch": 2.7540983606557377, "grad_norm": 2.538649559020996, "learning_rate": 0.0002, "loss": 2.4441, "step": 36960 }, { "epoch": 2.754843517138599, "grad_norm": 2.65378737449646, "learning_rate": 0.0002, "loss": 2.5479, "step": 36970 }, { "epoch": 2.7555886736214603, "grad_norm": 2.514577865600586, "learning_rate": 0.0002, "loss": 2.3853, "step": 36980 }, { "epoch": 2.756333830104322, "grad_norm": 2.3040006160736084, "learning_rate": 0.0002, "loss": 2.5896, "step": 36990 }, { "epoch": 2.7570789865871834, "grad_norm": 2.7404825687408447, "learning_rate": 0.0002, "loss": 2.7121, "step": 37000 }, { "epoch": 2.757824143070045, "grad_norm": 2.6444921493530273, "learning_rate": 0.0002, "loss": 2.4906, "step": 37010 }, { "epoch": 2.758569299552906, "grad_norm": 2.368401288986206, "learning_rate": 0.0002, "loss": 2.413, "step": 37020 }, { "epoch": 2.7593144560357676, "grad_norm": 2.6097865104675293, "learning_rate": 0.0002, "loss": 2.567, "step": 37030 }, { "epoch": 2.7600596125186287, "grad_norm": 2.694190740585327, "learning_rate": 0.0002, "loss": 2.57, "step": 37040 }, { "epoch": 2.7608047690014903, "grad_norm": 2.5604560375213623, "learning_rate": 0.0002, "loss": 2.4598, "step": 37050 }, { "epoch": 2.761549925484352, "grad_norm": 2.912165641784668, "learning_rate": 0.0002, "loss": 2.6524, "step": 37060 }, { "epoch": 2.762295081967213, "grad_norm": 3.2894179821014404, "learning_rate": 0.0002, "loss": 2.5217, "step": 37070 }, { "epoch": 2.7630402384500745, "grad_norm": 2.828571319580078, "learning_rate": 0.0002, "loss": 2.5196, "step": 37080 }, { "epoch": 2.763785394932936, "grad_norm": 2.4312257766723633, "learning_rate": 0.0002, "loss": 2.3419, "step": 37090 }, { "epoch": 2.764530551415797, "grad_norm": 2.4520037174224854, "learning_rate": 0.0002, "loss": 2.4494, "step": 37100 }, { "epoch": 2.7652757078986587, "grad_norm": 2.708139181137085, "learning_rate": 0.0002, "loss": 2.5631, "step": 37110 }, { "epoch": 2.7660208643815203, "grad_norm": 2.8655529022216797, "learning_rate": 0.0002, "loss": 2.5352, "step": 37120 }, { "epoch": 2.7667660208643814, "grad_norm": 2.484017848968506, "learning_rate": 0.0002, "loss": 2.4615, "step": 37130 }, { "epoch": 2.767511177347243, "grad_norm": 2.4956047534942627, "learning_rate": 0.0002, "loss": 2.5465, "step": 37140 }, { "epoch": 2.768256333830104, "grad_norm": 2.5548791885375977, "learning_rate": 0.0002, "loss": 2.4609, "step": 37150 }, { "epoch": 2.7690014903129656, "grad_norm": 2.561833381652832, "learning_rate": 0.0002, "loss": 2.5994, "step": 37160 }, { "epoch": 2.769746646795827, "grad_norm": 2.9793152809143066, "learning_rate": 0.0002, "loss": 2.432, "step": 37170 }, { "epoch": 2.7704918032786887, "grad_norm": 2.6743390560150146, "learning_rate": 0.0002, "loss": 2.4788, "step": 37180 }, { "epoch": 2.77123695976155, "grad_norm": 2.461435317993164, "learning_rate": 0.0002, "loss": 2.5217, "step": 37190 }, { "epoch": 2.7719821162444114, "grad_norm": 2.5211734771728516, "learning_rate": 0.0002, "loss": 2.613, "step": 37200 }, { "epoch": 2.7727272727272725, "grad_norm": 2.824679136276245, "learning_rate": 0.0002, "loss": 2.6382, "step": 37210 }, { "epoch": 2.773472429210134, "grad_norm": 2.458425521850586, "learning_rate": 0.0002, "loss": 2.4033, "step": 37220 }, { "epoch": 2.7742175856929956, "grad_norm": 2.7605643272399902, "learning_rate": 0.0002, "loss": 2.4153, "step": 37230 }, { "epoch": 2.774962742175857, "grad_norm": 2.3235936164855957, "learning_rate": 0.0002, "loss": 2.5107, "step": 37240 }, { "epoch": 2.7757078986587183, "grad_norm": 2.2972285747528076, "learning_rate": 0.0002, "loss": 2.5371, "step": 37250 }, { "epoch": 2.77645305514158, "grad_norm": 2.7939791679382324, "learning_rate": 0.0002, "loss": 2.3748, "step": 37260 }, { "epoch": 2.777198211624441, "grad_norm": 2.5067827701568604, "learning_rate": 0.0002, "loss": 2.3598, "step": 37270 }, { "epoch": 2.7779433681073025, "grad_norm": 2.909909248352051, "learning_rate": 0.0002, "loss": 2.5853, "step": 37280 }, { "epoch": 2.778688524590164, "grad_norm": 2.569460868835449, "learning_rate": 0.0002, "loss": 2.5193, "step": 37290 }, { "epoch": 2.7794336810730256, "grad_norm": 3.214890956878662, "learning_rate": 0.0002, "loss": 2.4651, "step": 37300 }, { "epoch": 2.7801788375558867, "grad_norm": 2.867858648300171, "learning_rate": 0.0002, "loss": 2.6508, "step": 37310 }, { "epoch": 2.7809239940387482, "grad_norm": 2.7677152156829834, "learning_rate": 0.0002, "loss": 2.5517, "step": 37320 }, { "epoch": 2.7816691505216093, "grad_norm": 2.633157253265381, "learning_rate": 0.0002, "loss": 2.4063, "step": 37330 }, { "epoch": 2.782414307004471, "grad_norm": 2.4681646823883057, "learning_rate": 0.0002, "loss": 2.4698, "step": 37340 }, { "epoch": 2.7831594634873325, "grad_norm": 2.595750570297241, "learning_rate": 0.0002, "loss": 2.398, "step": 37350 }, { "epoch": 2.783904619970194, "grad_norm": 2.4892077445983887, "learning_rate": 0.0002, "loss": 2.5829, "step": 37360 }, { "epoch": 2.784649776453055, "grad_norm": 2.4976730346679688, "learning_rate": 0.0002, "loss": 2.3726, "step": 37370 }, { "epoch": 2.7853949329359167, "grad_norm": 2.330193519592285, "learning_rate": 0.0002, "loss": 2.4521, "step": 37380 }, { "epoch": 2.7861400894187778, "grad_norm": 2.6689720153808594, "learning_rate": 0.0002, "loss": 2.414, "step": 37390 }, { "epoch": 2.7868852459016393, "grad_norm": 2.6920714378356934, "learning_rate": 0.0002, "loss": 2.5038, "step": 37400 }, { "epoch": 2.787630402384501, "grad_norm": 2.4694406986236572, "learning_rate": 0.0002, "loss": 2.3686, "step": 37410 }, { "epoch": 2.788375558867362, "grad_norm": 2.5774457454681396, "learning_rate": 0.0002, "loss": 2.3996, "step": 37420 }, { "epoch": 2.7891207153502235, "grad_norm": 2.640498638153076, "learning_rate": 0.0002, "loss": 2.4829, "step": 37430 }, { "epoch": 2.789865871833085, "grad_norm": 2.770134925842285, "learning_rate": 0.0002, "loss": 2.5329, "step": 37440 }, { "epoch": 2.790611028315946, "grad_norm": 2.9241816997528076, "learning_rate": 0.0002, "loss": 2.4869, "step": 37450 }, { "epoch": 2.7913561847988078, "grad_norm": 2.629659652709961, "learning_rate": 0.0002, "loss": 2.4289, "step": 37460 }, { "epoch": 2.7921013412816693, "grad_norm": 2.6241800785064697, "learning_rate": 0.0002, "loss": 2.5179, "step": 37470 }, { "epoch": 2.7928464977645304, "grad_norm": 2.5617072582244873, "learning_rate": 0.0002, "loss": 2.5042, "step": 37480 }, { "epoch": 2.793591654247392, "grad_norm": 2.659287452697754, "learning_rate": 0.0002, "loss": 2.4369, "step": 37490 }, { "epoch": 2.794336810730253, "grad_norm": 2.6287639141082764, "learning_rate": 0.0002, "loss": 2.4064, "step": 37500 }, { "epoch": 2.7950819672131146, "grad_norm": 2.7507472038269043, "learning_rate": 0.0002, "loss": 2.4495, "step": 37510 }, { "epoch": 2.795827123695976, "grad_norm": 2.509035348892212, "learning_rate": 0.0002, "loss": 2.4707, "step": 37520 }, { "epoch": 2.7965722801788377, "grad_norm": 2.2699403762817383, "learning_rate": 0.0002, "loss": 2.3909, "step": 37530 }, { "epoch": 2.797317436661699, "grad_norm": 2.630939245223999, "learning_rate": 0.0002, "loss": 2.4802, "step": 37540 }, { "epoch": 2.7980625931445604, "grad_norm": 3.045865058898926, "learning_rate": 0.0002, "loss": 2.4884, "step": 37550 }, { "epoch": 2.7988077496274215, "grad_norm": 2.486050605773926, "learning_rate": 0.0002, "loss": 2.4905, "step": 37560 }, { "epoch": 2.799552906110283, "grad_norm": 2.318683624267578, "learning_rate": 0.0002, "loss": 2.2404, "step": 37570 }, { "epoch": 2.8002980625931446, "grad_norm": 2.7278268337249756, "learning_rate": 0.0002, "loss": 2.5899, "step": 37580 }, { "epoch": 2.801043219076006, "grad_norm": 2.8368096351623535, "learning_rate": 0.0002, "loss": 2.5105, "step": 37590 }, { "epoch": 2.8017883755588673, "grad_norm": 2.7143986225128174, "learning_rate": 0.0002, "loss": 2.4156, "step": 37600 }, { "epoch": 2.802533532041729, "grad_norm": 2.6735024452209473, "learning_rate": 0.0002, "loss": 2.3569, "step": 37610 }, { "epoch": 2.80327868852459, "grad_norm": 2.700565814971924, "learning_rate": 0.0002, "loss": 2.485, "step": 37620 }, { "epoch": 2.8040238450074515, "grad_norm": 2.5607948303222656, "learning_rate": 0.0002, "loss": 2.5495, "step": 37630 }, { "epoch": 2.804769001490313, "grad_norm": 2.044367790222168, "learning_rate": 0.0002, "loss": 2.452, "step": 37640 }, { "epoch": 2.8055141579731746, "grad_norm": 2.569173574447632, "learning_rate": 0.0002, "loss": 2.554, "step": 37650 }, { "epoch": 2.8062593144560357, "grad_norm": 2.6920878887176514, "learning_rate": 0.0002, "loss": 2.5635, "step": 37660 }, { "epoch": 2.8070044709388973, "grad_norm": 2.5670292377471924, "learning_rate": 0.0002, "loss": 2.4568, "step": 37670 }, { "epoch": 2.8077496274217584, "grad_norm": 2.1942081451416016, "learning_rate": 0.0002, "loss": 2.4599, "step": 37680 }, { "epoch": 2.80849478390462, "grad_norm": 2.5298988819122314, "learning_rate": 0.0002, "loss": 2.4536, "step": 37690 }, { "epoch": 2.8092399403874815, "grad_norm": 2.82232666015625, "learning_rate": 0.0002, "loss": 2.4608, "step": 37700 }, { "epoch": 2.809985096870343, "grad_norm": 2.4265050888061523, "learning_rate": 0.0002, "loss": 2.4723, "step": 37710 }, { "epoch": 2.810730253353204, "grad_norm": 2.705826759338379, "learning_rate": 0.0002, "loss": 2.4147, "step": 37720 }, { "epoch": 2.8114754098360657, "grad_norm": 2.584636926651001, "learning_rate": 0.0002, "loss": 2.5809, "step": 37730 }, { "epoch": 2.812220566318927, "grad_norm": 2.8928425312042236, "learning_rate": 0.0002, "loss": 2.7184, "step": 37740 }, { "epoch": 2.8129657228017884, "grad_norm": 2.567809581756592, "learning_rate": 0.0002, "loss": 2.4948, "step": 37750 }, { "epoch": 2.81371087928465, "grad_norm": 2.8919975757598877, "learning_rate": 0.0002, "loss": 2.4801, "step": 37760 }, { "epoch": 2.814456035767511, "grad_norm": 2.455564022064209, "learning_rate": 0.0002, "loss": 2.5779, "step": 37770 }, { "epoch": 2.8152011922503726, "grad_norm": 2.659975528717041, "learning_rate": 0.0002, "loss": 2.6608, "step": 37780 }, { "epoch": 2.815946348733234, "grad_norm": 2.6775760650634766, "learning_rate": 0.0002, "loss": 2.4948, "step": 37790 }, { "epoch": 2.8166915052160952, "grad_norm": 2.86912202835083, "learning_rate": 0.0002, "loss": 2.3338, "step": 37800 }, { "epoch": 2.817436661698957, "grad_norm": 2.8781628608703613, "learning_rate": 0.0002, "loss": 2.6059, "step": 37810 }, { "epoch": 2.8181818181818183, "grad_norm": 2.688969135284424, "learning_rate": 0.0002, "loss": 2.792, "step": 37820 }, { "epoch": 2.8189269746646795, "grad_norm": 2.80010986328125, "learning_rate": 0.0002, "loss": 2.3393, "step": 37830 }, { "epoch": 2.819672131147541, "grad_norm": 2.4589450359344482, "learning_rate": 0.0002, "loss": 2.5836, "step": 37840 }, { "epoch": 2.820417287630402, "grad_norm": 2.3914148807525635, "learning_rate": 0.0002, "loss": 2.5583, "step": 37850 }, { "epoch": 2.8211624441132637, "grad_norm": 2.734759569168091, "learning_rate": 0.0002, "loss": 2.624, "step": 37860 }, { "epoch": 2.821907600596125, "grad_norm": 2.7147603034973145, "learning_rate": 0.0002, "loss": 2.6149, "step": 37870 }, { "epoch": 2.8226527570789868, "grad_norm": 2.4781367778778076, "learning_rate": 0.0002, "loss": 2.5671, "step": 37880 }, { "epoch": 2.823397913561848, "grad_norm": 2.6984505653381348, "learning_rate": 0.0002, "loss": 2.5764, "step": 37890 }, { "epoch": 2.8241430700447094, "grad_norm": 2.6264562606811523, "learning_rate": 0.0002, "loss": 2.272, "step": 37900 }, { "epoch": 2.8248882265275705, "grad_norm": 2.978095054626465, "learning_rate": 0.0002, "loss": 2.483, "step": 37910 }, { "epoch": 2.825633383010432, "grad_norm": 2.6886980533599854, "learning_rate": 0.0002, "loss": 2.6149, "step": 37920 }, { "epoch": 2.8263785394932937, "grad_norm": 2.677035331726074, "learning_rate": 0.0002, "loss": 2.6447, "step": 37930 }, { "epoch": 2.827123695976155, "grad_norm": 3.3771278858184814, "learning_rate": 0.0002, "loss": 2.7219, "step": 37940 }, { "epoch": 2.8278688524590163, "grad_norm": 1.9655869007110596, "learning_rate": 0.0002, "loss": 2.4848, "step": 37950 }, { "epoch": 2.828614008941878, "grad_norm": 2.6271440982818604, "learning_rate": 0.0002, "loss": 2.4456, "step": 37960 }, { "epoch": 2.829359165424739, "grad_norm": 2.5585412979125977, "learning_rate": 0.0002, "loss": 2.7289, "step": 37970 }, { "epoch": 2.8301043219076005, "grad_norm": 2.735304117202759, "learning_rate": 0.0002, "loss": 2.4382, "step": 37980 }, { "epoch": 2.830849478390462, "grad_norm": 2.609837055206299, "learning_rate": 0.0002, "loss": 2.4991, "step": 37990 }, { "epoch": 2.8315946348733236, "grad_norm": 2.7447588443756104, "learning_rate": 0.0002, "loss": 2.5325, "step": 38000 }, { "epoch": 2.8323397913561847, "grad_norm": 2.400519847869873, "learning_rate": 0.0002, "loss": 2.6211, "step": 38010 }, { "epoch": 2.8330849478390463, "grad_norm": 2.8218953609466553, "learning_rate": 0.0002, "loss": 2.6096, "step": 38020 }, { "epoch": 2.8338301043219074, "grad_norm": 2.7308266162872314, "learning_rate": 0.0002, "loss": 2.557, "step": 38030 }, { "epoch": 2.834575260804769, "grad_norm": 2.5944695472717285, "learning_rate": 0.0002, "loss": 2.5554, "step": 38040 }, { "epoch": 2.8353204172876305, "grad_norm": 2.427563428878784, "learning_rate": 0.0002, "loss": 2.3361, "step": 38050 }, { "epoch": 2.836065573770492, "grad_norm": 2.4476137161254883, "learning_rate": 0.0002, "loss": 2.4115, "step": 38060 }, { "epoch": 2.836810730253353, "grad_norm": 3.2641639709472656, "learning_rate": 0.0002, "loss": 2.4905, "step": 38070 }, { "epoch": 2.8375558867362147, "grad_norm": 2.3455142974853516, "learning_rate": 0.0002, "loss": 2.4734, "step": 38080 }, { "epoch": 2.838301043219076, "grad_norm": 2.834339141845703, "learning_rate": 0.0002, "loss": 2.5312, "step": 38090 }, { "epoch": 2.8390461997019374, "grad_norm": 2.8637094497680664, "learning_rate": 0.0002, "loss": 2.4174, "step": 38100 }, { "epoch": 2.839791356184799, "grad_norm": 3.0524630546569824, "learning_rate": 0.0002, "loss": 2.5523, "step": 38110 }, { "epoch": 2.84053651266766, "grad_norm": 2.649812936782837, "learning_rate": 0.0002, "loss": 2.2958, "step": 38120 }, { "epoch": 2.8412816691505216, "grad_norm": 2.645263433456421, "learning_rate": 0.0002, "loss": 2.4867, "step": 38130 }, { "epoch": 2.842026825633383, "grad_norm": 2.0363569259643555, "learning_rate": 0.0002, "loss": 2.3287, "step": 38140 }, { "epoch": 2.8427719821162443, "grad_norm": 2.7432138919830322, "learning_rate": 0.0002, "loss": 2.6093, "step": 38150 }, { "epoch": 2.843517138599106, "grad_norm": 2.721869945526123, "learning_rate": 0.0002, "loss": 2.6309, "step": 38160 }, { "epoch": 2.8442622950819674, "grad_norm": 2.674832582473755, "learning_rate": 0.0002, "loss": 2.3478, "step": 38170 }, { "epoch": 2.8450074515648285, "grad_norm": 2.252639055252075, "learning_rate": 0.0002, "loss": 2.3417, "step": 38180 }, { "epoch": 2.84575260804769, "grad_norm": 2.549644708633423, "learning_rate": 0.0002, "loss": 2.3126, "step": 38190 }, { "epoch": 2.846497764530551, "grad_norm": 2.77371883392334, "learning_rate": 0.0002, "loss": 2.6122, "step": 38200 }, { "epoch": 2.8472429210134127, "grad_norm": 2.7712173461914062, "learning_rate": 0.0002, "loss": 2.4918, "step": 38210 }, { "epoch": 2.8479880774962743, "grad_norm": 2.5276310443878174, "learning_rate": 0.0002, "loss": 2.443, "step": 38220 }, { "epoch": 2.848733233979136, "grad_norm": 2.3760945796966553, "learning_rate": 0.0002, "loss": 2.4374, "step": 38230 }, { "epoch": 2.849478390461997, "grad_norm": 2.5871782302856445, "learning_rate": 0.0002, "loss": 2.571, "step": 38240 }, { "epoch": 2.8502235469448585, "grad_norm": 2.721224069595337, "learning_rate": 0.0002, "loss": 2.582, "step": 38250 }, { "epoch": 2.8509687034277196, "grad_norm": 2.55556058883667, "learning_rate": 0.0002, "loss": 2.3885, "step": 38260 }, { "epoch": 2.851713859910581, "grad_norm": 2.8083126544952393, "learning_rate": 0.0002, "loss": 2.4455, "step": 38270 }, { "epoch": 2.8524590163934427, "grad_norm": 2.5174221992492676, "learning_rate": 0.0002, "loss": 2.5159, "step": 38280 }, { "epoch": 2.8532041728763042, "grad_norm": 2.380772590637207, "learning_rate": 0.0002, "loss": 2.6566, "step": 38290 }, { "epoch": 2.8539493293591653, "grad_norm": 2.3448495864868164, "learning_rate": 0.0002, "loss": 2.4935, "step": 38300 }, { "epoch": 2.854694485842027, "grad_norm": 2.500901699066162, "learning_rate": 0.0002, "loss": 2.3849, "step": 38310 }, { "epoch": 2.855439642324888, "grad_norm": 2.658292293548584, "learning_rate": 0.0002, "loss": 2.3852, "step": 38320 }, { "epoch": 2.8561847988077496, "grad_norm": 2.5677103996276855, "learning_rate": 0.0002, "loss": 2.467, "step": 38330 }, { "epoch": 2.856929955290611, "grad_norm": 2.411125898361206, "learning_rate": 0.0002, "loss": 2.4616, "step": 38340 }, { "epoch": 2.8576751117734727, "grad_norm": 2.651226043701172, "learning_rate": 0.0002, "loss": 2.4536, "step": 38350 }, { "epoch": 2.8584202682563338, "grad_norm": 2.6440541744232178, "learning_rate": 0.0002, "loss": 2.349, "step": 38360 }, { "epoch": 2.8591654247391953, "grad_norm": 2.4641504287719727, "learning_rate": 0.0002, "loss": 2.5248, "step": 38370 }, { "epoch": 2.8599105812220564, "grad_norm": 2.716660499572754, "learning_rate": 0.0002, "loss": 2.606, "step": 38380 }, { "epoch": 2.860655737704918, "grad_norm": 2.255239963531494, "learning_rate": 0.0002, "loss": 2.4059, "step": 38390 }, { "epoch": 2.8614008941877795, "grad_norm": 2.145322322845459, "learning_rate": 0.0002, "loss": 2.4944, "step": 38400 }, { "epoch": 2.862146050670641, "grad_norm": 2.833284378051758, "learning_rate": 0.0002, "loss": 2.5779, "step": 38410 }, { "epoch": 2.862891207153502, "grad_norm": 2.970714569091797, "learning_rate": 0.0002, "loss": 2.4606, "step": 38420 }, { "epoch": 2.8636363636363638, "grad_norm": 2.7407591342926025, "learning_rate": 0.0002, "loss": 2.5038, "step": 38430 }, { "epoch": 2.864381520119225, "grad_norm": 2.734581708908081, "learning_rate": 0.0002, "loss": 2.4065, "step": 38440 }, { "epoch": 2.8651266766020864, "grad_norm": 2.5742697715759277, "learning_rate": 0.0002, "loss": 2.359, "step": 38450 }, { "epoch": 2.865871833084948, "grad_norm": 2.6709296703338623, "learning_rate": 0.0002, "loss": 2.4409, "step": 38460 }, { "epoch": 2.866616989567809, "grad_norm": 2.3500664234161377, "learning_rate": 0.0002, "loss": 2.3481, "step": 38470 }, { "epoch": 2.8673621460506706, "grad_norm": 2.6254522800445557, "learning_rate": 0.0002, "loss": 2.6385, "step": 38480 }, { "epoch": 2.868107302533532, "grad_norm": 2.2674224376678467, "learning_rate": 0.0002, "loss": 2.5171, "step": 38490 }, { "epoch": 2.8688524590163933, "grad_norm": 2.6358728408813477, "learning_rate": 0.0002, "loss": 2.649, "step": 38500 }, { "epoch": 2.869597615499255, "grad_norm": 2.7351884841918945, "learning_rate": 0.0002, "loss": 2.6956, "step": 38510 }, { "epoch": 2.8703427719821164, "grad_norm": 2.099317789077759, "learning_rate": 0.0002, "loss": 2.4007, "step": 38520 }, { "epoch": 2.8710879284649775, "grad_norm": 2.7157390117645264, "learning_rate": 0.0002, "loss": 2.6054, "step": 38530 }, { "epoch": 2.871833084947839, "grad_norm": 2.5156712532043457, "learning_rate": 0.0002, "loss": 2.4476, "step": 38540 }, { "epoch": 2.8725782414307, "grad_norm": 2.307621955871582, "learning_rate": 0.0002, "loss": 2.3631, "step": 38550 }, { "epoch": 2.8733233979135617, "grad_norm": 2.7251169681549072, "learning_rate": 0.0002, "loss": 2.4591, "step": 38560 }, { "epoch": 2.8740685543964233, "grad_norm": 2.367175340652466, "learning_rate": 0.0002, "loss": 2.5013, "step": 38570 }, { "epoch": 2.874813710879285, "grad_norm": 3.0403735637664795, "learning_rate": 0.0002, "loss": 2.4797, "step": 38580 }, { "epoch": 2.875558867362146, "grad_norm": 2.884767770767212, "learning_rate": 0.0002, "loss": 2.159, "step": 38590 }, { "epoch": 2.8763040238450075, "grad_norm": 2.2452404499053955, "learning_rate": 0.0002, "loss": 2.4509, "step": 38600 }, { "epoch": 2.8770491803278686, "grad_norm": 2.496917486190796, "learning_rate": 0.0002, "loss": 2.5203, "step": 38610 }, { "epoch": 2.87779433681073, "grad_norm": 2.7510409355163574, "learning_rate": 0.0002, "loss": 2.5307, "step": 38620 }, { "epoch": 2.8785394932935917, "grad_norm": 2.698000907897949, "learning_rate": 0.0002, "loss": 2.625, "step": 38630 }, { "epoch": 2.8792846497764533, "grad_norm": 2.3727385997772217, "learning_rate": 0.0002, "loss": 2.612, "step": 38640 }, { "epoch": 2.8800298062593144, "grad_norm": 2.5714304447174072, "learning_rate": 0.0002, "loss": 2.5383, "step": 38650 }, { "epoch": 2.880774962742176, "grad_norm": 2.4994068145751953, "learning_rate": 0.0002, "loss": 2.3407, "step": 38660 }, { "epoch": 2.881520119225037, "grad_norm": 2.728253126144409, "learning_rate": 0.0002, "loss": 2.4754, "step": 38670 }, { "epoch": 2.8822652757078986, "grad_norm": 2.7636911869049072, "learning_rate": 0.0002, "loss": 2.5834, "step": 38680 }, { "epoch": 2.88301043219076, "grad_norm": 2.6368253231048584, "learning_rate": 0.0002, "loss": 2.526, "step": 38690 }, { "epoch": 2.8837555886736217, "grad_norm": 2.8573157787323, "learning_rate": 0.0002, "loss": 2.5318, "step": 38700 }, { "epoch": 2.884500745156483, "grad_norm": 2.7631237506866455, "learning_rate": 0.0002, "loss": 2.4429, "step": 38710 }, { "epoch": 2.8852459016393444, "grad_norm": 2.6455793380737305, "learning_rate": 0.0002, "loss": 2.5363, "step": 38720 }, { "epoch": 2.8859910581222055, "grad_norm": 2.3246517181396484, "learning_rate": 0.0002, "loss": 2.379, "step": 38730 }, { "epoch": 2.886736214605067, "grad_norm": 2.690464735031128, "learning_rate": 0.0002, "loss": 2.6405, "step": 38740 }, { "epoch": 2.8874813710879286, "grad_norm": 2.527547597885132, "learning_rate": 0.0002, "loss": 2.6174, "step": 38750 }, { "epoch": 2.88822652757079, "grad_norm": 2.4176671504974365, "learning_rate": 0.0002, "loss": 2.4597, "step": 38760 }, { "epoch": 2.8889716840536512, "grad_norm": 2.7073466777801514, "learning_rate": 0.0002, "loss": 2.5065, "step": 38770 }, { "epoch": 2.889716840536513, "grad_norm": 2.439682722091675, "learning_rate": 0.0002, "loss": 2.3043, "step": 38780 }, { "epoch": 2.890461997019374, "grad_norm": 2.4189679622650146, "learning_rate": 0.0002, "loss": 2.4287, "step": 38790 }, { "epoch": 2.8912071535022354, "grad_norm": 2.4912519454956055, "learning_rate": 0.0002, "loss": 2.4595, "step": 38800 }, { "epoch": 2.891952309985097, "grad_norm": 2.4591786861419678, "learning_rate": 0.0002, "loss": 2.5589, "step": 38810 }, { "epoch": 2.892697466467958, "grad_norm": 2.469432830810547, "learning_rate": 0.0002, "loss": 2.5173, "step": 38820 }, { "epoch": 2.8934426229508197, "grad_norm": 2.421070098876953, "learning_rate": 0.0002, "loss": 2.4513, "step": 38830 }, { "epoch": 2.894187779433681, "grad_norm": 2.4555258750915527, "learning_rate": 0.0002, "loss": 2.6341, "step": 38840 }, { "epoch": 2.8949329359165423, "grad_norm": 2.3373961448669434, "learning_rate": 0.0002, "loss": 2.5724, "step": 38850 }, { "epoch": 2.895678092399404, "grad_norm": 2.7534141540527344, "learning_rate": 0.0002, "loss": 2.4472, "step": 38860 }, { "epoch": 2.8964232488822654, "grad_norm": 2.53497052192688, "learning_rate": 0.0002, "loss": 2.1598, "step": 38870 }, { "epoch": 2.8971684053651265, "grad_norm": 2.5229415893554688, "learning_rate": 0.0002, "loss": 2.581, "step": 38880 }, { "epoch": 2.897913561847988, "grad_norm": 2.3807928562164307, "learning_rate": 0.0002, "loss": 2.4987, "step": 38890 }, { "epoch": 2.898658718330849, "grad_norm": 2.803424596786499, "learning_rate": 0.0002, "loss": 2.6143, "step": 38900 }, { "epoch": 2.8994038748137108, "grad_norm": 2.6139283180236816, "learning_rate": 0.0002, "loss": 2.403, "step": 38910 }, { "epoch": 2.9001490312965723, "grad_norm": 2.9887192249298096, "learning_rate": 0.0002, "loss": 2.6108, "step": 38920 }, { "epoch": 2.900894187779434, "grad_norm": 2.6957292556762695, "learning_rate": 0.0002, "loss": 2.4734, "step": 38930 }, { "epoch": 2.901639344262295, "grad_norm": 2.6717658042907715, "learning_rate": 0.0002, "loss": 2.5778, "step": 38940 }, { "epoch": 2.9023845007451565, "grad_norm": 2.75540828704834, "learning_rate": 0.0002, "loss": 2.5948, "step": 38950 }, { "epoch": 2.9031296572280176, "grad_norm": 2.5205585956573486, "learning_rate": 0.0002, "loss": 2.5108, "step": 38960 }, { "epoch": 2.903874813710879, "grad_norm": 2.2808632850646973, "learning_rate": 0.0002, "loss": 2.5035, "step": 38970 }, { "epoch": 2.9046199701937407, "grad_norm": 2.8300139904022217, "learning_rate": 0.0002, "loss": 2.6883, "step": 38980 }, { "epoch": 2.9053651266766023, "grad_norm": 2.3492043018341064, "learning_rate": 0.0002, "loss": 2.517, "step": 38990 }, { "epoch": 2.9061102831594634, "grad_norm": 2.677483081817627, "learning_rate": 0.0002, "loss": 2.5173, "step": 39000 }, { "epoch": 2.906855439642325, "grad_norm": 2.076521635055542, "learning_rate": 0.0002, "loss": 2.3087, "step": 39010 }, { "epoch": 2.907600596125186, "grad_norm": 2.9172234535217285, "learning_rate": 0.0002, "loss": 2.4004, "step": 39020 }, { "epoch": 2.9083457526080476, "grad_norm": 2.752596616744995, "learning_rate": 0.0002, "loss": 2.4206, "step": 39030 }, { "epoch": 2.909090909090909, "grad_norm": 2.586287260055542, "learning_rate": 0.0002, "loss": 2.5331, "step": 39040 }, { "epoch": 2.9098360655737707, "grad_norm": 2.6482787132263184, "learning_rate": 0.0002, "loss": 2.5963, "step": 39050 }, { "epoch": 2.910581222056632, "grad_norm": 2.513068914413452, "learning_rate": 0.0002, "loss": 2.6465, "step": 39060 }, { "epoch": 2.9113263785394934, "grad_norm": 2.8123700618743896, "learning_rate": 0.0002, "loss": 2.5256, "step": 39070 }, { "epoch": 2.9120715350223545, "grad_norm": 2.57496976852417, "learning_rate": 0.0002, "loss": 2.4337, "step": 39080 }, { "epoch": 2.912816691505216, "grad_norm": 2.719500780105591, "learning_rate": 0.0002, "loss": 2.672, "step": 39090 }, { "epoch": 2.9135618479880776, "grad_norm": 2.6564457416534424, "learning_rate": 0.0002, "loss": 2.5579, "step": 39100 }, { "epoch": 2.914307004470939, "grad_norm": 2.656972646713257, "learning_rate": 0.0002, "loss": 2.532, "step": 39110 }, { "epoch": 2.9150521609538003, "grad_norm": 2.5278375148773193, "learning_rate": 0.0002, "loss": 2.3322, "step": 39120 }, { "epoch": 2.915797317436662, "grad_norm": 2.6466145515441895, "learning_rate": 0.0002, "loss": 2.5936, "step": 39130 }, { "epoch": 2.916542473919523, "grad_norm": 2.8328826427459717, "learning_rate": 0.0002, "loss": 2.5601, "step": 39140 }, { "epoch": 2.9172876304023845, "grad_norm": 2.4768784046173096, "learning_rate": 0.0002, "loss": 2.4364, "step": 39150 }, { "epoch": 2.918032786885246, "grad_norm": 3.1645164489746094, "learning_rate": 0.0002, "loss": 2.5898, "step": 39160 }, { "epoch": 2.918777943368107, "grad_norm": 2.8019328117370605, "learning_rate": 0.0002, "loss": 2.5901, "step": 39170 }, { "epoch": 2.9195230998509687, "grad_norm": 3.203110933303833, "learning_rate": 0.0002, "loss": 2.4048, "step": 39180 }, { "epoch": 2.9202682563338302, "grad_norm": 3.061889410018921, "learning_rate": 0.0002, "loss": 2.6781, "step": 39190 }, { "epoch": 2.9210134128166914, "grad_norm": 2.735222101211548, "learning_rate": 0.0002, "loss": 2.5379, "step": 39200 }, { "epoch": 2.921758569299553, "grad_norm": 2.0281805992126465, "learning_rate": 0.0002, "loss": 2.4894, "step": 39210 }, { "epoch": 2.9225037257824145, "grad_norm": 2.89827561378479, "learning_rate": 0.0002, "loss": 2.3229, "step": 39220 }, { "epoch": 2.9232488822652756, "grad_norm": 2.8243911266326904, "learning_rate": 0.0002, "loss": 2.2712, "step": 39230 }, { "epoch": 2.923994038748137, "grad_norm": 2.6554248332977295, "learning_rate": 0.0002, "loss": 2.4221, "step": 39240 }, { "epoch": 2.9247391952309982, "grad_norm": 2.6550304889678955, "learning_rate": 0.0002, "loss": 2.5087, "step": 39250 }, { "epoch": 2.92548435171386, "grad_norm": 2.154710054397583, "learning_rate": 0.0002, "loss": 2.4395, "step": 39260 }, { "epoch": 2.9262295081967213, "grad_norm": 2.5351250171661377, "learning_rate": 0.0002, "loss": 2.7179, "step": 39270 }, { "epoch": 2.926974664679583, "grad_norm": 2.533205986022949, "learning_rate": 0.0002, "loss": 2.5654, "step": 39280 }, { "epoch": 2.927719821162444, "grad_norm": 2.13325572013855, "learning_rate": 0.0002, "loss": 2.6483, "step": 39290 }, { "epoch": 2.9284649776453056, "grad_norm": 2.66166353225708, "learning_rate": 0.0002, "loss": 2.5622, "step": 39300 }, { "epoch": 2.9292101341281667, "grad_norm": 2.33381724357605, "learning_rate": 0.0002, "loss": 2.555, "step": 39310 }, { "epoch": 2.929955290611028, "grad_norm": 2.7781875133514404, "learning_rate": 0.0002, "loss": 2.6041, "step": 39320 }, { "epoch": 2.9307004470938898, "grad_norm": 2.579591989517212, "learning_rate": 0.0002, "loss": 2.4904, "step": 39330 }, { "epoch": 2.9314456035767513, "grad_norm": 2.5462992191314697, "learning_rate": 0.0002, "loss": 2.6305, "step": 39340 }, { "epoch": 2.9321907600596124, "grad_norm": 2.6036219596862793, "learning_rate": 0.0002, "loss": 2.6255, "step": 39350 }, { "epoch": 2.932935916542474, "grad_norm": 2.4048843383789062, "learning_rate": 0.0002, "loss": 2.4814, "step": 39360 }, { "epoch": 2.933681073025335, "grad_norm": 2.5451865196228027, "learning_rate": 0.0002, "loss": 2.4788, "step": 39370 }, { "epoch": 2.9344262295081966, "grad_norm": 2.7191903591156006, "learning_rate": 0.0002, "loss": 2.5476, "step": 39380 }, { "epoch": 2.935171385991058, "grad_norm": 2.3997859954833984, "learning_rate": 0.0002, "loss": 2.6285, "step": 39390 }, { "epoch": 2.9359165424739198, "grad_norm": 2.5516610145568848, "learning_rate": 0.0002, "loss": 2.5219, "step": 39400 }, { "epoch": 2.936661698956781, "grad_norm": 2.5959670543670654, "learning_rate": 0.0002, "loss": 2.5965, "step": 39410 }, { "epoch": 2.9374068554396424, "grad_norm": 2.9444220066070557, "learning_rate": 0.0002, "loss": 2.5746, "step": 39420 }, { "epoch": 2.9381520119225035, "grad_norm": 2.497292995452881, "learning_rate": 0.0002, "loss": 2.5914, "step": 39430 }, { "epoch": 2.938897168405365, "grad_norm": 2.524833917617798, "learning_rate": 0.0002, "loss": 2.581, "step": 39440 }, { "epoch": 2.9396423248882266, "grad_norm": 2.8581857681274414, "learning_rate": 0.0002, "loss": 2.5388, "step": 39450 }, { "epoch": 2.940387481371088, "grad_norm": 2.8836469650268555, "learning_rate": 0.0002, "loss": 2.5906, "step": 39460 }, { "epoch": 2.9411326378539493, "grad_norm": 2.530555248260498, "learning_rate": 0.0002, "loss": 2.6937, "step": 39470 }, { "epoch": 2.941877794336811, "grad_norm": 2.8848953247070312, "learning_rate": 0.0002, "loss": 2.4355, "step": 39480 }, { "epoch": 2.942622950819672, "grad_norm": 2.4546191692352295, "learning_rate": 0.0002, "loss": 2.5684, "step": 39490 }, { "epoch": 2.9433681073025335, "grad_norm": 3.093285083770752, "learning_rate": 0.0002, "loss": 2.4457, "step": 39500 }, { "epoch": 2.944113263785395, "grad_norm": 2.597832679748535, "learning_rate": 0.0002, "loss": 2.402, "step": 39510 }, { "epoch": 2.944858420268256, "grad_norm": 2.5404412746429443, "learning_rate": 0.0002, "loss": 2.5724, "step": 39520 }, { "epoch": 2.9456035767511177, "grad_norm": 3.019563674926758, "learning_rate": 0.0002, "loss": 2.3523, "step": 39530 }, { "epoch": 2.9463487332339793, "grad_norm": 3.1213128566741943, "learning_rate": 0.0002, "loss": 2.6512, "step": 39540 }, { "epoch": 2.9470938897168404, "grad_norm": 2.413450002670288, "learning_rate": 0.0002, "loss": 2.4045, "step": 39550 }, { "epoch": 2.947839046199702, "grad_norm": 2.7260587215423584, "learning_rate": 0.0002, "loss": 2.6666, "step": 39560 }, { "epoch": 2.9485842026825635, "grad_norm": 3.4645745754241943, "learning_rate": 0.0002, "loss": 2.6023, "step": 39570 }, { "epoch": 2.9493293591654246, "grad_norm": 2.8158528804779053, "learning_rate": 0.0002, "loss": 2.4214, "step": 39580 }, { "epoch": 2.950074515648286, "grad_norm": 2.712031841278076, "learning_rate": 0.0002, "loss": 2.5187, "step": 39590 }, { "epoch": 2.9508196721311473, "grad_norm": 2.5256295204162598, "learning_rate": 0.0002, "loss": 2.4577, "step": 39600 }, { "epoch": 2.951564828614009, "grad_norm": 2.72895884513855, "learning_rate": 0.0002, "loss": 2.5944, "step": 39610 }, { "epoch": 2.9523099850968704, "grad_norm": 2.040031909942627, "learning_rate": 0.0002, "loss": 2.4583, "step": 39620 }, { "epoch": 2.953055141579732, "grad_norm": 2.666433811187744, "learning_rate": 0.0002, "loss": 2.5275, "step": 39630 }, { "epoch": 2.953800298062593, "grad_norm": 2.6652700901031494, "learning_rate": 0.0002, "loss": 2.3713, "step": 39640 }, { "epoch": 2.9545454545454546, "grad_norm": 2.743901014328003, "learning_rate": 0.0002, "loss": 2.5227, "step": 39650 }, { "epoch": 2.9552906110283157, "grad_norm": 2.845050811767578, "learning_rate": 0.0002, "loss": 2.5615, "step": 39660 }, { "epoch": 2.9560357675111772, "grad_norm": 2.8907158374786377, "learning_rate": 0.0002, "loss": 2.5205, "step": 39670 }, { "epoch": 2.956780923994039, "grad_norm": 2.6992757320404053, "learning_rate": 0.0002, "loss": 2.4661, "step": 39680 }, { "epoch": 2.9575260804769004, "grad_norm": 2.9309325218200684, "learning_rate": 0.0002, "loss": 2.6417, "step": 39690 }, { "epoch": 2.9582712369597615, "grad_norm": 2.801201581954956, "learning_rate": 0.0002, "loss": 2.495, "step": 39700 }, { "epoch": 2.959016393442623, "grad_norm": 2.763702869415283, "learning_rate": 0.0002, "loss": 2.4012, "step": 39710 }, { "epoch": 2.959761549925484, "grad_norm": 2.6679039001464844, "learning_rate": 0.0002, "loss": 2.6976, "step": 39720 }, { "epoch": 2.9605067064083457, "grad_norm": 2.567474603652954, "learning_rate": 0.0002, "loss": 2.6038, "step": 39730 }, { "epoch": 2.9612518628912072, "grad_norm": 3.114344596862793, "learning_rate": 0.0002, "loss": 2.4868, "step": 39740 }, { "epoch": 2.961997019374069, "grad_norm": 2.5168049335479736, "learning_rate": 0.0002, "loss": 2.5279, "step": 39750 }, { "epoch": 2.96274217585693, "grad_norm": 3.0251882076263428, "learning_rate": 0.0002, "loss": 2.6403, "step": 39760 }, { "epoch": 2.9634873323397914, "grad_norm": 2.606680393218994, "learning_rate": 0.0002, "loss": 2.6361, "step": 39770 }, { "epoch": 2.9642324888226526, "grad_norm": 2.699082612991333, "learning_rate": 0.0002, "loss": 2.5908, "step": 39780 }, { "epoch": 2.964977645305514, "grad_norm": 2.8698062896728516, "learning_rate": 0.0002, "loss": 2.4495, "step": 39790 }, { "epoch": 2.9657228017883757, "grad_norm": 2.898927927017212, "learning_rate": 0.0002, "loss": 2.4352, "step": 39800 }, { "epoch": 2.966467958271237, "grad_norm": 2.3014628887176514, "learning_rate": 0.0002, "loss": 2.5411, "step": 39810 }, { "epoch": 2.9672131147540983, "grad_norm": 2.9208085536956787, "learning_rate": 0.0002, "loss": 2.4678, "step": 39820 }, { "epoch": 2.96795827123696, "grad_norm": 2.51240873336792, "learning_rate": 0.0002, "loss": 2.101, "step": 39830 }, { "epoch": 2.968703427719821, "grad_norm": 2.7985494136810303, "learning_rate": 0.0002, "loss": 2.6076, "step": 39840 }, { "epoch": 2.9694485842026825, "grad_norm": 2.5904476642608643, "learning_rate": 0.0002, "loss": 2.4389, "step": 39850 }, { "epoch": 2.970193740685544, "grad_norm": 2.6392509937286377, "learning_rate": 0.0002, "loss": 2.3973, "step": 39860 }, { "epoch": 2.970938897168405, "grad_norm": 1.774827241897583, "learning_rate": 0.0002, "loss": 2.4029, "step": 39870 }, { "epoch": 2.9716840536512668, "grad_norm": 2.54217529296875, "learning_rate": 0.0002, "loss": 2.6573, "step": 39880 }, { "epoch": 2.9724292101341283, "grad_norm": 2.6354312896728516, "learning_rate": 0.0002, "loss": 2.3592, "step": 39890 }, { "epoch": 2.9731743666169894, "grad_norm": 2.6568002700805664, "learning_rate": 0.0002, "loss": 2.5426, "step": 39900 }, { "epoch": 2.973919523099851, "grad_norm": 2.660454273223877, "learning_rate": 0.0002, "loss": 2.4449, "step": 39910 }, { "epoch": 2.9746646795827125, "grad_norm": 2.9466612339019775, "learning_rate": 0.0002, "loss": 2.48, "step": 39920 }, { "epoch": 2.9754098360655736, "grad_norm": 2.5808732509613037, "learning_rate": 0.0002, "loss": 2.6554, "step": 39930 }, { "epoch": 2.976154992548435, "grad_norm": 2.8035290241241455, "learning_rate": 0.0002, "loss": 2.8002, "step": 39940 }, { "epoch": 2.9769001490312967, "grad_norm": 2.615812301635742, "learning_rate": 0.0002, "loss": 2.4047, "step": 39950 }, { "epoch": 2.977645305514158, "grad_norm": 2.588972330093384, "learning_rate": 0.0002, "loss": 2.495, "step": 39960 }, { "epoch": 2.9783904619970194, "grad_norm": 2.669950008392334, "learning_rate": 0.0002, "loss": 2.5453, "step": 39970 }, { "epoch": 2.979135618479881, "grad_norm": 2.609656572341919, "learning_rate": 0.0002, "loss": 2.549, "step": 39980 }, { "epoch": 2.979880774962742, "grad_norm": 2.9678401947021484, "learning_rate": 0.0002, "loss": 2.5067, "step": 39990 }, { "epoch": 2.9806259314456036, "grad_norm": 1.8499410152435303, "learning_rate": 0.0002, "loss": 2.5054, "step": 40000 }, { "epoch": 2.9813710879284647, "grad_norm": 2.4903769493103027, "learning_rate": 0.0002, "loss": 2.4195, "step": 40010 }, { "epoch": 2.9821162444113263, "grad_norm": 2.87322998046875, "learning_rate": 0.0002, "loss": 2.4816, "step": 40020 }, { "epoch": 2.982861400894188, "grad_norm": 2.6124825477600098, "learning_rate": 0.0002, "loss": 2.3755, "step": 40030 }, { "epoch": 2.9836065573770494, "grad_norm": 2.674150228500366, "learning_rate": 0.0002, "loss": 2.4891, "step": 40040 }, { "epoch": 2.9843517138599105, "grad_norm": 2.499284267425537, "learning_rate": 0.0002, "loss": 2.5349, "step": 40050 }, { "epoch": 2.985096870342772, "grad_norm": 2.7705399990081787, "learning_rate": 0.0002, "loss": 2.4778, "step": 40060 }, { "epoch": 2.985842026825633, "grad_norm": 2.2329294681549072, "learning_rate": 0.0002, "loss": 2.4104, "step": 40070 }, { "epoch": 2.9865871833084947, "grad_norm": 2.8790485858917236, "learning_rate": 0.0002, "loss": 2.4753, "step": 40080 }, { "epoch": 2.9873323397913563, "grad_norm": 2.469627618789673, "learning_rate": 0.0002, "loss": 2.6872, "step": 40090 }, { "epoch": 2.988077496274218, "grad_norm": 2.38393235206604, "learning_rate": 0.0002, "loss": 2.6073, "step": 40100 }, { "epoch": 2.988822652757079, "grad_norm": 2.2364330291748047, "learning_rate": 0.0002, "loss": 2.4443, "step": 40110 }, { "epoch": 2.9895678092399405, "grad_norm": 2.5776116847991943, "learning_rate": 0.0002, "loss": 2.532, "step": 40120 }, { "epoch": 2.9903129657228016, "grad_norm": 2.5935330390930176, "learning_rate": 0.0002, "loss": 2.4889, "step": 40130 }, { "epoch": 2.991058122205663, "grad_norm": 2.6229746341705322, "learning_rate": 0.0002, "loss": 2.5562, "step": 40140 }, { "epoch": 2.9918032786885247, "grad_norm": 2.5080349445343018, "learning_rate": 0.0002, "loss": 2.4328, "step": 40150 }, { "epoch": 2.9925484351713862, "grad_norm": 2.5937001705169678, "learning_rate": 0.0002, "loss": 2.591, "step": 40160 }, { "epoch": 2.9932935916542474, "grad_norm": 2.3092591762542725, "learning_rate": 0.0002, "loss": 2.4414, "step": 40170 }, { "epoch": 2.994038748137109, "grad_norm": 2.848226308822632, "learning_rate": 0.0002, "loss": 2.4109, "step": 40180 }, { "epoch": 2.99478390461997, "grad_norm": 2.3122453689575195, "learning_rate": 0.0002, "loss": 2.3277, "step": 40190 }, { "epoch": 2.9955290611028316, "grad_norm": 2.513367176055908, "learning_rate": 0.0002, "loss": 2.5037, "step": 40200 }, { "epoch": 2.996274217585693, "grad_norm": 2.57222843170166, "learning_rate": 0.0002, "loss": 2.5487, "step": 40210 }, { "epoch": 2.9970193740685542, "grad_norm": 2.354405164718628, "learning_rate": 0.0002, "loss": 2.386, "step": 40220 }, { "epoch": 2.997764530551416, "grad_norm": 2.828702688217163, "learning_rate": 0.0002, "loss": 2.3264, "step": 40230 }, { "epoch": 2.9985096870342773, "grad_norm": 2.779327392578125, "learning_rate": 0.0002, "loss": 2.3778, "step": 40240 }, { "epoch": 2.9992548435171384, "grad_norm": 2.4226698875427246, "learning_rate": 0.0002, "loss": 2.1855, "step": 40250 }, { "epoch": 3.0, "grad_norm": 2.918992042541504, "learning_rate": 0.0002, "loss": 2.4347, "step": 40260 }, { "epoch": 3.0, "eval_runtime": 2763.3152, "eval_samples_per_second": 4.856, "eval_steps_per_second": 0.607, "step": 40260 }, { "epoch": 3.0007451564828616, "grad_norm": 2.5024285316467285, "learning_rate": 0.0002, "loss": 2.3297, "step": 40270 }, { "epoch": 3.0014903129657227, "grad_norm": 2.8226559162139893, "learning_rate": 0.0002, "loss": 2.3661, "step": 40280 }, { "epoch": 3.002235469448584, "grad_norm": 2.758744716644287, "learning_rate": 0.0002, "loss": 2.4495, "step": 40290 }, { "epoch": 3.0029806259314458, "grad_norm": 2.426999568939209, "learning_rate": 0.0002, "loss": 2.2649, "step": 40300 }, { "epoch": 3.003725782414307, "grad_norm": 2.6783957481384277, "learning_rate": 0.0002, "loss": 2.3838, "step": 40310 }, { "epoch": 3.0044709388971684, "grad_norm": 3.014652967453003, "learning_rate": 0.0002, "loss": 2.1913, "step": 40320 }, { "epoch": 3.00521609538003, "grad_norm": 2.7266387939453125, "learning_rate": 0.0002, "loss": 2.3369, "step": 40330 }, { "epoch": 3.005961251862891, "grad_norm": 2.7019906044006348, "learning_rate": 0.0002, "loss": 2.1718, "step": 40340 }, { "epoch": 3.0067064083457526, "grad_norm": 2.5368382930755615, "learning_rate": 0.0002, "loss": 2.4286, "step": 40350 }, { "epoch": 3.007451564828614, "grad_norm": 2.9685909748077393, "learning_rate": 0.0002, "loss": 2.3881, "step": 40360 }, { "epoch": 3.0081967213114753, "grad_norm": 2.656423330307007, "learning_rate": 0.0002, "loss": 2.3198, "step": 40370 }, { "epoch": 3.008941877794337, "grad_norm": 2.7926347255706787, "learning_rate": 0.0002, "loss": 2.2615, "step": 40380 }, { "epoch": 3.0096870342771984, "grad_norm": 3.065396308898926, "learning_rate": 0.0002, "loss": 2.4156, "step": 40390 }, { "epoch": 3.0104321907600595, "grad_norm": 2.8727266788482666, "learning_rate": 0.0002, "loss": 2.429, "step": 40400 }, { "epoch": 3.011177347242921, "grad_norm": 2.9066572189331055, "learning_rate": 0.0002, "loss": 2.3824, "step": 40410 }, { "epoch": 3.0119225037257826, "grad_norm": 2.813734769821167, "learning_rate": 0.0002, "loss": 2.2325, "step": 40420 }, { "epoch": 3.0126676602086437, "grad_norm": 2.808375120162964, "learning_rate": 0.0002, "loss": 2.3175, "step": 40430 }, { "epoch": 3.0134128166915053, "grad_norm": 2.6436479091644287, "learning_rate": 0.0002, "loss": 2.4796, "step": 40440 }, { "epoch": 3.0141579731743664, "grad_norm": 2.557705879211426, "learning_rate": 0.0002, "loss": 2.5264, "step": 40450 }, { "epoch": 3.014903129657228, "grad_norm": 2.7825839519500732, "learning_rate": 0.0002, "loss": 2.4484, "step": 40460 }, { "epoch": 3.0156482861400895, "grad_norm": 2.4266088008880615, "learning_rate": 0.0002, "loss": 2.4146, "step": 40470 }, { "epoch": 3.0163934426229506, "grad_norm": 2.8366827964782715, "learning_rate": 0.0002, "loss": 2.3706, "step": 40480 }, { "epoch": 3.017138599105812, "grad_norm": 2.837256669998169, "learning_rate": 0.0002, "loss": 2.4929, "step": 40490 }, { "epoch": 3.0178837555886737, "grad_norm": 2.7179813385009766, "learning_rate": 0.0002, "loss": 2.365, "step": 40500 }, { "epoch": 3.018628912071535, "grad_norm": 2.3554391860961914, "learning_rate": 0.0002, "loss": 2.2821, "step": 40510 }, { "epoch": 3.0193740685543964, "grad_norm": 2.696993112564087, "learning_rate": 0.0002, "loss": 2.5171, "step": 40520 }, { "epoch": 3.020119225037258, "grad_norm": 2.1214194297790527, "learning_rate": 0.0002, "loss": 2.2159, "step": 40530 }, { "epoch": 3.020864381520119, "grad_norm": 2.8695015907287598, "learning_rate": 0.0002, "loss": 2.3214, "step": 40540 }, { "epoch": 3.0216095380029806, "grad_norm": 2.551729679107666, "learning_rate": 0.0002, "loss": 2.2563, "step": 40550 }, { "epoch": 3.022354694485842, "grad_norm": 2.3707385063171387, "learning_rate": 0.0002, "loss": 2.4613, "step": 40560 }, { "epoch": 3.0230998509687033, "grad_norm": 2.9850101470947266, "learning_rate": 0.0002, "loss": 2.37, "step": 40570 }, { "epoch": 3.023845007451565, "grad_norm": 2.5724594593048096, "learning_rate": 0.0002, "loss": 2.3877, "step": 40580 }, { "epoch": 3.0245901639344264, "grad_norm": 2.7039642333984375, "learning_rate": 0.0002, "loss": 2.2501, "step": 40590 }, { "epoch": 3.0253353204172875, "grad_norm": 2.7257907390594482, "learning_rate": 0.0002, "loss": 2.3512, "step": 40600 }, { "epoch": 3.026080476900149, "grad_norm": 2.788710355758667, "learning_rate": 0.0002, "loss": 2.3724, "step": 40610 }, { "epoch": 3.0268256333830106, "grad_norm": 2.3942348957061768, "learning_rate": 0.0002, "loss": 2.288, "step": 40620 }, { "epoch": 3.0275707898658717, "grad_norm": 2.6751253604888916, "learning_rate": 0.0002, "loss": 2.4059, "step": 40630 }, { "epoch": 3.0283159463487332, "grad_norm": 3.1774163246154785, "learning_rate": 0.0002, "loss": 2.4086, "step": 40640 }, { "epoch": 3.029061102831595, "grad_norm": 2.69118595123291, "learning_rate": 0.0002, "loss": 2.4347, "step": 40650 }, { "epoch": 3.029806259314456, "grad_norm": 2.844470262527466, "learning_rate": 0.0002, "loss": 2.3638, "step": 40660 }, { "epoch": 3.0305514157973175, "grad_norm": 2.662158966064453, "learning_rate": 0.0002, "loss": 2.3734, "step": 40670 }, { "epoch": 3.031296572280179, "grad_norm": 2.9637556076049805, "learning_rate": 0.0002, "loss": 2.3224, "step": 40680 }, { "epoch": 3.03204172876304, "grad_norm": 2.62660813331604, "learning_rate": 0.0002, "loss": 2.4436, "step": 40690 }, { "epoch": 3.0327868852459017, "grad_norm": 2.9725089073181152, "learning_rate": 0.0002, "loss": 2.5596, "step": 40700 }, { "epoch": 3.0335320417287632, "grad_norm": 2.9883205890655518, "learning_rate": 0.0002, "loss": 2.2181, "step": 40710 }, { "epoch": 3.0342771982116243, "grad_norm": 2.632417917251587, "learning_rate": 0.0002, "loss": 2.3767, "step": 40720 }, { "epoch": 3.035022354694486, "grad_norm": 2.510411262512207, "learning_rate": 0.0002, "loss": 2.3296, "step": 40730 }, { "epoch": 3.0357675111773474, "grad_norm": 2.5027692317962646, "learning_rate": 0.0002, "loss": 2.2664, "step": 40740 }, { "epoch": 3.0365126676602086, "grad_norm": 2.8952243328094482, "learning_rate": 0.0002, "loss": 2.3159, "step": 40750 }, { "epoch": 3.03725782414307, "grad_norm": 2.4545536041259766, "learning_rate": 0.0002, "loss": 2.5346, "step": 40760 }, { "epoch": 3.0380029806259317, "grad_norm": 2.8499226570129395, "learning_rate": 0.0002, "loss": 2.3085, "step": 40770 }, { "epoch": 3.0387481371087928, "grad_norm": 3.025698184967041, "learning_rate": 0.0002, "loss": 2.5594, "step": 40780 }, { "epoch": 3.0394932935916543, "grad_norm": 2.6746528148651123, "learning_rate": 0.0002, "loss": 2.4489, "step": 40790 }, { "epoch": 3.0402384500745154, "grad_norm": 3.0967001914978027, "learning_rate": 0.0002, "loss": 2.2821, "step": 40800 }, { "epoch": 3.040983606557377, "grad_norm": 2.5189292430877686, "learning_rate": 0.0002, "loss": 2.1087, "step": 40810 }, { "epoch": 3.0417287630402385, "grad_norm": 2.407550573348999, "learning_rate": 0.0002, "loss": 2.4971, "step": 40820 }, { "epoch": 3.0424739195230996, "grad_norm": 2.7270169258117676, "learning_rate": 0.0002, "loss": 2.3941, "step": 40830 }, { "epoch": 3.043219076005961, "grad_norm": 2.8451573848724365, "learning_rate": 0.0002, "loss": 2.4935, "step": 40840 }, { "epoch": 3.0439642324888228, "grad_norm": 2.520591974258423, "learning_rate": 0.0002, "loss": 2.4202, "step": 40850 }, { "epoch": 3.044709388971684, "grad_norm": 2.5729188919067383, "learning_rate": 0.0002, "loss": 2.3634, "step": 40860 }, { "epoch": 3.0454545454545454, "grad_norm": 2.5250637531280518, "learning_rate": 0.0002, "loss": 2.3602, "step": 40870 }, { "epoch": 3.046199701937407, "grad_norm": 2.7925519943237305, "learning_rate": 0.0002, "loss": 2.4729, "step": 40880 }, { "epoch": 3.046944858420268, "grad_norm": 2.4078192710876465, "learning_rate": 0.0002, "loss": 2.4195, "step": 40890 }, { "epoch": 3.0476900149031296, "grad_norm": 2.648886203765869, "learning_rate": 0.0002, "loss": 2.4189, "step": 40900 }, { "epoch": 3.048435171385991, "grad_norm": 2.811570167541504, "learning_rate": 0.0002, "loss": 2.3038, "step": 40910 }, { "epoch": 3.0491803278688523, "grad_norm": 2.7872612476348877, "learning_rate": 0.0002, "loss": 2.3317, "step": 40920 }, { "epoch": 3.049925484351714, "grad_norm": 2.933014154434204, "learning_rate": 0.0002, "loss": 2.398, "step": 40930 }, { "epoch": 3.0506706408345754, "grad_norm": 2.8727405071258545, "learning_rate": 0.0002, "loss": 2.2654, "step": 40940 }, { "epoch": 3.0514157973174365, "grad_norm": 2.7194955348968506, "learning_rate": 0.0002, "loss": 2.5749, "step": 40950 }, { "epoch": 3.052160953800298, "grad_norm": 2.616337776184082, "learning_rate": 0.0002, "loss": 2.4542, "step": 40960 }, { "epoch": 3.0529061102831596, "grad_norm": 2.4622654914855957, "learning_rate": 0.0002, "loss": 2.3492, "step": 40970 }, { "epoch": 3.0536512667660207, "grad_norm": 2.7078351974487305, "learning_rate": 0.0002, "loss": 2.3654, "step": 40980 }, { "epoch": 3.0543964232488823, "grad_norm": 2.5035488605499268, "learning_rate": 0.0002, "loss": 2.4756, "step": 40990 }, { "epoch": 3.055141579731744, "grad_norm": 2.7884819507598877, "learning_rate": 0.0002, "loss": 2.3663, "step": 41000 }, { "epoch": 3.055886736214605, "grad_norm": 2.593498945236206, "learning_rate": 0.0002, "loss": 2.3312, "step": 41010 }, { "epoch": 3.0566318926974665, "grad_norm": 2.6816306114196777, "learning_rate": 0.0002, "loss": 2.4835, "step": 41020 }, { "epoch": 3.057377049180328, "grad_norm": 2.6609058380126953, "learning_rate": 0.0002, "loss": 2.4931, "step": 41030 }, { "epoch": 3.058122205663189, "grad_norm": 2.7509803771972656, "learning_rate": 0.0002, "loss": 2.3721, "step": 41040 }, { "epoch": 3.0588673621460507, "grad_norm": 2.3781402111053467, "learning_rate": 0.0002, "loss": 2.3468, "step": 41050 }, { "epoch": 3.0596125186289123, "grad_norm": 2.922632932662964, "learning_rate": 0.0002, "loss": 2.525, "step": 41060 }, { "epoch": 3.0603576751117734, "grad_norm": 2.792450189590454, "learning_rate": 0.0002, "loss": 2.2651, "step": 41070 }, { "epoch": 3.061102831594635, "grad_norm": 2.9025795459747314, "learning_rate": 0.0002, "loss": 2.4843, "step": 41080 }, { "epoch": 3.0618479880774965, "grad_norm": 2.6068289279937744, "learning_rate": 0.0002, "loss": 2.4153, "step": 41090 }, { "epoch": 3.0625931445603576, "grad_norm": 2.5537047386169434, "learning_rate": 0.0002, "loss": 2.4612, "step": 41100 }, { "epoch": 3.063338301043219, "grad_norm": 2.9191601276397705, "learning_rate": 0.0002, "loss": 2.5681, "step": 41110 }, { "epoch": 3.0640834575260807, "grad_norm": 2.4749388694763184, "learning_rate": 0.0002, "loss": 2.2255, "step": 41120 }, { "epoch": 3.064828614008942, "grad_norm": 3.2458298206329346, "learning_rate": 0.0002, "loss": 2.3751, "step": 41130 }, { "epoch": 3.0655737704918034, "grad_norm": 2.4875826835632324, "learning_rate": 0.0002, "loss": 2.4063, "step": 41140 }, { "epoch": 3.066318926974665, "grad_norm": 2.788435697555542, "learning_rate": 0.0002, "loss": 2.4848, "step": 41150 }, { "epoch": 3.067064083457526, "grad_norm": 2.731872320175171, "learning_rate": 0.0002, "loss": 2.4219, "step": 41160 }, { "epoch": 3.0678092399403876, "grad_norm": 2.5522799491882324, "learning_rate": 0.0002, "loss": 2.2948, "step": 41170 }, { "epoch": 3.0685543964232487, "grad_norm": 2.494781255722046, "learning_rate": 0.0002, "loss": 2.4205, "step": 41180 }, { "epoch": 3.0692995529061102, "grad_norm": 2.828057289123535, "learning_rate": 0.0002, "loss": 2.4637, "step": 41190 }, { "epoch": 3.070044709388972, "grad_norm": 2.98608660697937, "learning_rate": 0.0002, "loss": 2.4205, "step": 41200 }, { "epoch": 3.070789865871833, "grad_norm": 2.9125189781188965, "learning_rate": 0.0002, "loss": 2.3945, "step": 41210 }, { "epoch": 3.0715350223546944, "grad_norm": 2.599982261657715, "learning_rate": 0.0002, "loss": 2.4643, "step": 41220 }, { "epoch": 3.072280178837556, "grad_norm": 2.4792168140411377, "learning_rate": 0.0002, "loss": 2.368, "step": 41230 }, { "epoch": 3.073025335320417, "grad_norm": 2.604639768600464, "learning_rate": 0.0002, "loss": 2.3853, "step": 41240 }, { "epoch": 3.0737704918032787, "grad_norm": 2.617868185043335, "learning_rate": 0.0002, "loss": 2.342, "step": 41250 }, { "epoch": 3.07451564828614, "grad_norm": 2.1132025718688965, "learning_rate": 0.0002, "loss": 2.2172, "step": 41260 }, { "epoch": 3.0752608047690013, "grad_norm": 2.4275896549224854, "learning_rate": 0.0002, "loss": 2.0727, "step": 41270 }, { "epoch": 3.076005961251863, "grad_norm": 2.285426616668701, "learning_rate": 0.0002, "loss": 2.2816, "step": 41280 }, { "epoch": 3.0767511177347244, "grad_norm": 2.9328603744506836, "learning_rate": 0.0002, "loss": 2.3676, "step": 41290 }, { "epoch": 3.0774962742175855, "grad_norm": 2.247255325317383, "learning_rate": 0.0002, "loss": 2.2789, "step": 41300 }, { "epoch": 3.078241430700447, "grad_norm": 2.8094482421875, "learning_rate": 0.0002, "loss": 2.5418, "step": 41310 }, { "epoch": 3.0789865871833086, "grad_norm": 2.7178444862365723, "learning_rate": 0.0002, "loss": 2.4174, "step": 41320 }, { "epoch": 3.0797317436661698, "grad_norm": 3.3467769622802734, "learning_rate": 0.0002, "loss": 2.459, "step": 41330 }, { "epoch": 3.0804769001490313, "grad_norm": 2.7289459705352783, "learning_rate": 0.0002, "loss": 2.3743, "step": 41340 }, { "epoch": 3.081222056631893, "grad_norm": 3.2940895557403564, "learning_rate": 0.0002, "loss": 2.4498, "step": 41350 }, { "epoch": 3.081967213114754, "grad_norm": 2.498302459716797, "learning_rate": 0.0002, "loss": 2.4309, "step": 41360 }, { "epoch": 3.0827123695976155, "grad_norm": 2.9865078926086426, "learning_rate": 0.0002, "loss": 2.585, "step": 41370 }, { "epoch": 3.083457526080477, "grad_norm": 3.016395092010498, "learning_rate": 0.0002, "loss": 2.6466, "step": 41380 }, { "epoch": 3.084202682563338, "grad_norm": 2.7781736850738525, "learning_rate": 0.0002, "loss": 2.3803, "step": 41390 }, { "epoch": 3.0849478390461997, "grad_norm": 2.8058199882507324, "learning_rate": 0.0002, "loss": 2.3414, "step": 41400 }, { "epoch": 3.0856929955290613, "grad_norm": 2.8715202808380127, "learning_rate": 0.0002, "loss": 2.2693, "step": 41410 }, { "epoch": 3.0864381520119224, "grad_norm": 2.7370269298553467, "learning_rate": 0.0002, "loss": 2.4274, "step": 41420 }, { "epoch": 3.087183308494784, "grad_norm": 2.570589065551758, "learning_rate": 0.0002, "loss": 2.5473, "step": 41430 }, { "epoch": 3.0879284649776455, "grad_norm": 2.858039617538452, "learning_rate": 0.0002, "loss": 2.4307, "step": 41440 }, { "epoch": 3.0886736214605066, "grad_norm": 2.4094789028167725, "learning_rate": 0.0002, "loss": 2.3369, "step": 41450 }, { "epoch": 3.089418777943368, "grad_norm": 2.665861129760742, "learning_rate": 0.0002, "loss": 2.4736, "step": 41460 }, { "epoch": 3.0901639344262297, "grad_norm": 2.7448792457580566, "learning_rate": 0.0002, "loss": 2.2552, "step": 41470 }, { "epoch": 3.090909090909091, "grad_norm": 3.320862293243408, "learning_rate": 0.0002, "loss": 2.361, "step": 41480 }, { "epoch": 3.0916542473919524, "grad_norm": 2.623382568359375, "learning_rate": 0.0002, "loss": 2.2576, "step": 41490 }, { "epoch": 3.092399403874814, "grad_norm": 2.757305383682251, "learning_rate": 0.0002, "loss": 2.4842, "step": 41500 }, { "epoch": 3.093144560357675, "grad_norm": 3.2521588802337646, "learning_rate": 0.0002, "loss": 2.4941, "step": 41510 }, { "epoch": 3.0938897168405366, "grad_norm": 2.6293327808380127, "learning_rate": 0.0002, "loss": 2.3642, "step": 41520 }, { "epoch": 3.0946348733233977, "grad_norm": 2.7597970962524414, "learning_rate": 0.0002, "loss": 2.4894, "step": 41530 }, { "epoch": 3.0953800298062593, "grad_norm": 3.117297887802124, "learning_rate": 0.0002, "loss": 2.5951, "step": 41540 }, { "epoch": 3.096125186289121, "grad_norm": 2.641249656677246, "learning_rate": 0.0002, "loss": 2.2928, "step": 41550 }, { "epoch": 3.096870342771982, "grad_norm": 3.0518288612365723, "learning_rate": 0.0002, "loss": 2.3452, "step": 41560 }, { "epoch": 3.0976154992548435, "grad_norm": 2.6108360290527344, "learning_rate": 0.0002, "loss": 2.2999, "step": 41570 }, { "epoch": 3.098360655737705, "grad_norm": 3.087437629699707, "learning_rate": 0.0002, "loss": 2.4094, "step": 41580 }, { "epoch": 3.099105812220566, "grad_norm": 2.4869635105133057, "learning_rate": 0.0002, "loss": 2.4677, "step": 41590 }, { "epoch": 3.0998509687034277, "grad_norm": 3.1462340354919434, "learning_rate": 0.0002, "loss": 2.5562, "step": 41600 }, { "epoch": 3.1005961251862892, "grad_norm": 2.7001521587371826, "learning_rate": 0.0002, "loss": 2.4251, "step": 41610 }, { "epoch": 3.1013412816691504, "grad_norm": 3.1031558513641357, "learning_rate": 0.0002, "loss": 2.492, "step": 41620 }, { "epoch": 3.102086438152012, "grad_norm": 2.6600401401519775, "learning_rate": 0.0002, "loss": 2.4346, "step": 41630 }, { "epoch": 3.1028315946348735, "grad_norm": 2.673090934753418, "learning_rate": 0.0002, "loss": 2.3663, "step": 41640 }, { "epoch": 3.1035767511177346, "grad_norm": 2.6423120498657227, "learning_rate": 0.0002, "loss": 2.486, "step": 41650 }, { "epoch": 3.104321907600596, "grad_norm": 2.6316373348236084, "learning_rate": 0.0002, "loss": 2.5157, "step": 41660 }, { "epoch": 3.1050670640834577, "grad_norm": 2.9219932556152344, "learning_rate": 0.0002, "loss": 2.2875, "step": 41670 }, { "epoch": 3.105812220566319, "grad_norm": 3.2191224098205566, "learning_rate": 0.0002, "loss": 2.3327, "step": 41680 }, { "epoch": 3.1065573770491803, "grad_norm": 2.6431362628936768, "learning_rate": 0.0002, "loss": 2.4269, "step": 41690 }, { "epoch": 3.107302533532042, "grad_norm": 2.56723952293396, "learning_rate": 0.0002, "loss": 2.3461, "step": 41700 }, { "epoch": 3.108047690014903, "grad_norm": 3.1009645462036133, "learning_rate": 0.0002, "loss": 2.62, "step": 41710 }, { "epoch": 3.1087928464977646, "grad_norm": 2.805692672729492, "learning_rate": 0.0002, "loss": 2.2908, "step": 41720 }, { "epoch": 3.109538002980626, "grad_norm": 2.93380069732666, "learning_rate": 0.0002, "loss": 2.4379, "step": 41730 }, { "epoch": 3.110283159463487, "grad_norm": 2.5862841606140137, "learning_rate": 0.0002, "loss": 2.3883, "step": 41740 }, { "epoch": 3.1110283159463488, "grad_norm": 2.6500649452209473, "learning_rate": 0.0002, "loss": 2.3901, "step": 41750 }, { "epoch": 3.1117734724292103, "grad_norm": 2.738546371459961, "learning_rate": 0.0002, "loss": 2.4566, "step": 41760 }, { "epoch": 3.1125186289120714, "grad_norm": 2.519313335418701, "learning_rate": 0.0002, "loss": 2.4855, "step": 41770 }, { "epoch": 3.113263785394933, "grad_norm": 2.5396151542663574, "learning_rate": 0.0002, "loss": 2.4722, "step": 41780 }, { "epoch": 3.1140089418777945, "grad_norm": 2.6931238174438477, "learning_rate": 0.0002, "loss": 2.5307, "step": 41790 }, { "epoch": 3.1147540983606556, "grad_norm": 2.5568339824676514, "learning_rate": 0.0002, "loss": 2.3558, "step": 41800 }, { "epoch": 3.115499254843517, "grad_norm": 2.8651018142700195, "learning_rate": 0.0002, "loss": 2.3666, "step": 41810 }, { "epoch": 3.1162444113263787, "grad_norm": 2.6825473308563232, "learning_rate": 0.0002, "loss": 2.4556, "step": 41820 }, { "epoch": 3.11698956780924, "grad_norm": 2.327749490737915, "learning_rate": 0.0002, "loss": 2.3705, "step": 41830 }, { "epoch": 3.1177347242921014, "grad_norm": 2.5712668895721436, "learning_rate": 0.0002, "loss": 2.5078, "step": 41840 }, { "epoch": 3.118479880774963, "grad_norm": 2.770862102508545, "learning_rate": 0.0002, "loss": 2.4223, "step": 41850 }, { "epoch": 3.119225037257824, "grad_norm": 2.7376694679260254, "learning_rate": 0.0002, "loss": 2.0986, "step": 41860 }, { "epoch": 3.1199701937406856, "grad_norm": 2.3979244232177734, "learning_rate": 0.0002, "loss": 2.3534, "step": 41870 }, { "epoch": 3.1207153502235467, "grad_norm": 3.0536673069000244, "learning_rate": 0.0002, "loss": 2.4107, "step": 41880 }, { "epoch": 3.1214605067064083, "grad_norm": 2.726121187210083, "learning_rate": 0.0002, "loss": 2.3308, "step": 41890 }, { "epoch": 3.12220566318927, "grad_norm": 2.5417113304138184, "learning_rate": 0.0002, "loss": 2.3748, "step": 41900 }, { "epoch": 3.122950819672131, "grad_norm": 2.813762664794922, "learning_rate": 0.0002, "loss": 2.4617, "step": 41910 }, { "epoch": 3.1236959761549925, "grad_norm": 2.67824649810791, "learning_rate": 0.0002, "loss": 2.391, "step": 41920 }, { "epoch": 3.124441132637854, "grad_norm": 2.5998127460479736, "learning_rate": 0.0002, "loss": 2.417, "step": 41930 }, { "epoch": 3.125186289120715, "grad_norm": 2.6877424716949463, "learning_rate": 0.0002, "loss": 2.154, "step": 41940 }, { "epoch": 3.1259314456035767, "grad_norm": 2.592630386352539, "learning_rate": 0.0002, "loss": 2.2968, "step": 41950 }, { "epoch": 3.1266766020864383, "grad_norm": 2.4909517765045166, "learning_rate": 0.0002, "loss": 2.2333, "step": 41960 }, { "epoch": 3.1274217585692994, "grad_norm": 2.7702407836914062, "learning_rate": 0.0002, "loss": 2.329, "step": 41970 }, { "epoch": 3.128166915052161, "grad_norm": 3.2048144340515137, "learning_rate": 0.0002, "loss": 2.5344, "step": 41980 }, { "epoch": 3.1289120715350225, "grad_norm": 2.452103614807129, "learning_rate": 0.0002, "loss": 2.3947, "step": 41990 }, { "epoch": 3.1296572280178836, "grad_norm": 3.0650296211242676, "learning_rate": 0.0002, "loss": 2.6355, "step": 42000 }, { "epoch": 3.130402384500745, "grad_norm": 3.1083576679229736, "learning_rate": 0.0002, "loss": 2.3897, "step": 42010 }, { "epoch": 3.1311475409836067, "grad_norm": 2.4616737365722656, "learning_rate": 0.0002, "loss": 2.4941, "step": 42020 }, { "epoch": 3.131892697466468, "grad_norm": 2.4813618659973145, "learning_rate": 0.0002, "loss": 2.2795, "step": 42030 }, { "epoch": 3.1326378539493294, "grad_norm": 2.4839460849761963, "learning_rate": 0.0002, "loss": 2.4753, "step": 42040 }, { "epoch": 3.133383010432191, "grad_norm": 2.5165300369262695, "learning_rate": 0.0002, "loss": 2.4168, "step": 42050 }, { "epoch": 3.134128166915052, "grad_norm": 3.2887141704559326, "learning_rate": 0.0002, "loss": 2.2558, "step": 42060 }, { "epoch": 3.1348733233979136, "grad_norm": 2.8972301483154297, "learning_rate": 0.0002, "loss": 2.5248, "step": 42070 }, { "epoch": 3.135618479880775, "grad_norm": 2.6605656147003174, "learning_rate": 0.0002, "loss": 2.3511, "step": 42080 }, { "epoch": 3.1363636363636362, "grad_norm": 2.9574508666992188, "learning_rate": 0.0002, "loss": 2.5589, "step": 42090 }, { "epoch": 3.137108792846498, "grad_norm": 2.4659640789031982, "learning_rate": 0.0002, "loss": 2.4887, "step": 42100 }, { "epoch": 3.1378539493293593, "grad_norm": 2.799456834793091, "learning_rate": 0.0002, "loss": 2.4112, "step": 42110 }, { "epoch": 3.1385991058122205, "grad_norm": 2.674593925476074, "learning_rate": 0.0002, "loss": 2.472, "step": 42120 }, { "epoch": 3.139344262295082, "grad_norm": 2.741276979446411, "learning_rate": 0.0002, "loss": 2.438, "step": 42130 }, { "epoch": 3.1400894187779436, "grad_norm": 3.0024397373199463, "learning_rate": 0.0002, "loss": 2.4534, "step": 42140 }, { "epoch": 3.1408345752608047, "grad_norm": 2.393557548522949, "learning_rate": 0.0002, "loss": 2.4285, "step": 42150 }, { "epoch": 3.1415797317436662, "grad_norm": 2.408137798309326, "learning_rate": 0.0002, "loss": 2.2754, "step": 42160 }, { "epoch": 3.1423248882265273, "grad_norm": 2.6676998138427734, "learning_rate": 0.0002, "loss": 2.4991, "step": 42170 }, { "epoch": 3.143070044709389, "grad_norm": 2.7383525371551514, "learning_rate": 0.0002, "loss": 2.4517, "step": 42180 }, { "epoch": 3.1438152011922504, "grad_norm": 3.209341526031494, "learning_rate": 0.0002, "loss": 2.4987, "step": 42190 }, { "epoch": 3.144560357675112, "grad_norm": 2.7689366340637207, "learning_rate": 0.0002, "loss": 2.3316, "step": 42200 }, { "epoch": 3.145305514157973, "grad_norm": 2.7870845794677734, "learning_rate": 0.0002, "loss": 2.4651, "step": 42210 }, { "epoch": 3.1460506706408347, "grad_norm": 2.8037571907043457, "learning_rate": 0.0002, "loss": 2.4491, "step": 42220 }, { "epoch": 3.1467958271236958, "grad_norm": 2.640016555786133, "learning_rate": 0.0002, "loss": 2.2967, "step": 42230 }, { "epoch": 3.1475409836065573, "grad_norm": 2.958155870437622, "learning_rate": 0.0002, "loss": 2.1981, "step": 42240 }, { "epoch": 3.148286140089419, "grad_norm": 3.091360569000244, "learning_rate": 0.0002, "loss": 2.5147, "step": 42250 }, { "epoch": 3.14903129657228, "grad_norm": 3.4414288997650146, "learning_rate": 0.0002, "loss": 2.4308, "step": 42260 }, { "epoch": 3.1497764530551415, "grad_norm": 3.074572801589966, "learning_rate": 0.0002, "loss": 2.5425, "step": 42270 }, { "epoch": 3.150521609538003, "grad_norm": 2.8518035411834717, "learning_rate": 0.0002, "loss": 2.5321, "step": 42280 }, { "epoch": 3.151266766020864, "grad_norm": 2.689629077911377, "learning_rate": 0.0002, "loss": 2.527, "step": 42290 }, { "epoch": 3.1520119225037257, "grad_norm": 2.6013498306274414, "learning_rate": 0.0002, "loss": 2.3945, "step": 42300 }, { "epoch": 3.1527570789865873, "grad_norm": 2.6785435676574707, "learning_rate": 0.0002, "loss": 2.5889, "step": 42310 }, { "epoch": 3.1535022354694484, "grad_norm": 2.7143375873565674, "learning_rate": 0.0002, "loss": 2.4548, "step": 42320 }, { "epoch": 3.15424739195231, "grad_norm": 2.794013261795044, "learning_rate": 0.0002, "loss": 2.4599, "step": 42330 }, { "epoch": 3.1549925484351715, "grad_norm": 2.9435579776763916, "learning_rate": 0.0002, "loss": 2.4458, "step": 42340 }, { "epoch": 3.1557377049180326, "grad_norm": 2.210059642791748, "learning_rate": 0.0002, "loss": 2.4752, "step": 42350 }, { "epoch": 3.156482861400894, "grad_norm": 3.0881826877593994, "learning_rate": 0.0002, "loss": 2.3679, "step": 42360 }, { "epoch": 3.1572280178837557, "grad_norm": 2.6826508045196533, "learning_rate": 0.0002, "loss": 2.3735, "step": 42370 }, { "epoch": 3.157973174366617, "grad_norm": 2.657874822616577, "learning_rate": 0.0002, "loss": 2.3878, "step": 42380 }, { "epoch": 3.1587183308494784, "grad_norm": 2.6119277477264404, "learning_rate": 0.0002, "loss": 2.4191, "step": 42390 }, { "epoch": 3.15946348733234, "grad_norm": 2.929684638977051, "learning_rate": 0.0002, "loss": 2.3763, "step": 42400 }, { "epoch": 3.160208643815201, "grad_norm": 2.660856246948242, "learning_rate": 0.0002, "loss": 2.4806, "step": 42410 }, { "epoch": 3.1609538002980626, "grad_norm": 2.3593862056732178, "learning_rate": 0.0002, "loss": 2.2756, "step": 42420 }, { "epoch": 3.161698956780924, "grad_norm": 3.1583991050720215, "learning_rate": 0.0002, "loss": 2.4429, "step": 42430 }, { "epoch": 3.1624441132637853, "grad_norm": 2.3144760131835938, "learning_rate": 0.0002, "loss": 2.3234, "step": 42440 }, { "epoch": 3.163189269746647, "grad_norm": 2.6270995140075684, "learning_rate": 0.0002, "loss": 2.4479, "step": 42450 }, { "epoch": 3.1639344262295084, "grad_norm": 2.9927475452423096, "learning_rate": 0.0002, "loss": 2.4985, "step": 42460 }, { "epoch": 3.1646795827123695, "grad_norm": 2.8037357330322266, "learning_rate": 0.0002, "loss": 2.3792, "step": 42470 }, { "epoch": 3.165424739195231, "grad_norm": 2.522001028060913, "learning_rate": 0.0002, "loss": 2.5035, "step": 42480 }, { "epoch": 3.1661698956780926, "grad_norm": 2.7954230308532715, "learning_rate": 0.0002, "loss": 2.5186, "step": 42490 }, { "epoch": 3.1669150521609537, "grad_norm": 2.842353105545044, "learning_rate": 0.0002, "loss": 2.3828, "step": 42500 }, { "epoch": 3.1676602086438153, "grad_norm": 3.6176884174346924, "learning_rate": 0.0002, "loss": 2.4387, "step": 42510 }, { "epoch": 3.168405365126677, "grad_norm": 2.5962915420532227, "learning_rate": 0.0002, "loss": 2.3441, "step": 42520 }, { "epoch": 3.169150521609538, "grad_norm": 2.856750726699829, "learning_rate": 0.0002, "loss": 2.4871, "step": 42530 }, { "epoch": 3.1698956780923995, "grad_norm": 2.5241379737854004, "learning_rate": 0.0002, "loss": 2.4527, "step": 42540 }, { "epoch": 3.170640834575261, "grad_norm": 3.149193525314331, "learning_rate": 0.0002, "loss": 2.4242, "step": 42550 }, { "epoch": 3.171385991058122, "grad_norm": 2.600130081176758, "learning_rate": 0.0002, "loss": 2.4865, "step": 42560 }, { "epoch": 3.1721311475409837, "grad_norm": 2.598031520843506, "learning_rate": 0.0002, "loss": 2.4508, "step": 42570 }, { "epoch": 3.172876304023845, "grad_norm": 2.6382107734680176, "learning_rate": 0.0002, "loss": 2.4075, "step": 42580 }, { "epoch": 3.1736214605067063, "grad_norm": 2.6536340713500977, "learning_rate": 0.0002, "loss": 2.2179, "step": 42590 }, { "epoch": 3.174366616989568, "grad_norm": 2.2409815788269043, "learning_rate": 0.0002, "loss": 2.4891, "step": 42600 }, { "epoch": 3.175111773472429, "grad_norm": 2.8284525871276855, "learning_rate": 0.0002, "loss": 2.4104, "step": 42610 }, { "epoch": 3.1758569299552906, "grad_norm": 2.8395347595214844, "learning_rate": 0.0002, "loss": 2.4732, "step": 42620 }, { "epoch": 3.176602086438152, "grad_norm": 2.7434206008911133, "learning_rate": 0.0002, "loss": 2.5753, "step": 42630 }, { "epoch": 3.1773472429210132, "grad_norm": 2.5308117866516113, "learning_rate": 0.0002, "loss": 2.4494, "step": 42640 }, { "epoch": 3.178092399403875, "grad_norm": 2.765371322631836, "learning_rate": 0.0002, "loss": 2.2449, "step": 42650 }, { "epoch": 3.1788375558867363, "grad_norm": 2.844912052154541, "learning_rate": 0.0002, "loss": 2.5911, "step": 42660 }, { "epoch": 3.1795827123695974, "grad_norm": 2.811023712158203, "learning_rate": 0.0002, "loss": 2.2395, "step": 42670 }, { "epoch": 3.180327868852459, "grad_norm": 2.2983829975128174, "learning_rate": 0.0002, "loss": 2.2522, "step": 42680 }, { "epoch": 3.1810730253353205, "grad_norm": 2.5697834491729736, "learning_rate": 0.0002, "loss": 2.4592, "step": 42690 }, { "epoch": 3.1818181818181817, "grad_norm": 2.6632912158966064, "learning_rate": 0.0002, "loss": 2.3589, "step": 42700 }, { "epoch": 3.182563338301043, "grad_norm": 2.8424110412597656, "learning_rate": 0.0002, "loss": 2.4862, "step": 42710 }, { "epoch": 3.1833084947839048, "grad_norm": 2.80009126663208, "learning_rate": 0.0002, "loss": 2.3216, "step": 42720 }, { "epoch": 3.184053651266766, "grad_norm": 2.937347173690796, "learning_rate": 0.0002, "loss": 2.5931, "step": 42730 }, { "epoch": 3.1847988077496274, "grad_norm": 3.170665740966797, "learning_rate": 0.0002, "loss": 2.3354, "step": 42740 }, { "epoch": 3.185543964232489, "grad_norm": 2.7834672927856445, "learning_rate": 0.0002, "loss": 2.3822, "step": 42750 }, { "epoch": 3.18628912071535, "grad_norm": 2.834599733352661, "learning_rate": 0.0002, "loss": 2.5047, "step": 42760 }, { "epoch": 3.1870342771982116, "grad_norm": 2.9384005069732666, "learning_rate": 0.0002, "loss": 2.4439, "step": 42770 }, { "epoch": 3.187779433681073, "grad_norm": 2.7702577114105225, "learning_rate": 0.0002, "loss": 2.4609, "step": 42780 }, { "epoch": 3.1885245901639343, "grad_norm": 2.811617612838745, "learning_rate": 0.0002, "loss": 2.5652, "step": 42790 }, { "epoch": 3.189269746646796, "grad_norm": 2.532407283782959, "learning_rate": 0.0002, "loss": 2.3831, "step": 42800 }, { "epoch": 3.1900149031296574, "grad_norm": 2.6427395343780518, "learning_rate": 0.0002, "loss": 2.5223, "step": 42810 }, { "epoch": 3.1907600596125185, "grad_norm": 2.6416213512420654, "learning_rate": 0.0002, "loss": 2.2635, "step": 42820 }, { "epoch": 3.19150521609538, "grad_norm": 2.5367422103881836, "learning_rate": 0.0002, "loss": 2.2955, "step": 42830 }, { "epoch": 3.1922503725782416, "grad_norm": 2.593743085861206, "learning_rate": 0.0002, "loss": 2.5609, "step": 42840 }, { "epoch": 3.1929955290611027, "grad_norm": 2.4231393337249756, "learning_rate": 0.0002, "loss": 2.2702, "step": 42850 }, { "epoch": 3.1937406855439643, "grad_norm": 2.6415021419525146, "learning_rate": 0.0002, "loss": 2.4803, "step": 42860 }, { "epoch": 3.194485842026826, "grad_norm": 3.0961923599243164, "learning_rate": 0.0002, "loss": 2.5815, "step": 42870 }, { "epoch": 3.195230998509687, "grad_norm": 2.677943229675293, "learning_rate": 0.0002, "loss": 2.4246, "step": 42880 }, { "epoch": 3.1959761549925485, "grad_norm": 2.9092931747436523, "learning_rate": 0.0002, "loss": 2.4639, "step": 42890 }, { "epoch": 3.19672131147541, "grad_norm": 2.8990132808685303, "learning_rate": 0.0002, "loss": 2.5542, "step": 42900 }, { "epoch": 3.197466467958271, "grad_norm": 2.2910470962524414, "learning_rate": 0.0002, "loss": 2.5081, "step": 42910 }, { "epoch": 3.1982116244411327, "grad_norm": 2.7519373893737793, "learning_rate": 0.0002, "loss": 2.2503, "step": 42920 }, { "epoch": 3.198956780923994, "grad_norm": 2.626312017440796, "learning_rate": 0.0002, "loss": 2.3392, "step": 42930 }, { "epoch": 3.1997019374068554, "grad_norm": 2.6032698154449463, "learning_rate": 0.0002, "loss": 2.4991, "step": 42940 }, { "epoch": 3.200447093889717, "grad_norm": 2.6117911338806152, "learning_rate": 0.0002, "loss": 2.6928, "step": 42950 }, { "epoch": 3.201192250372578, "grad_norm": 2.7212131023406982, "learning_rate": 0.0002, "loss": 2.0789, "step": 42960 }, { "epoch": 3.2019374068554396, "grad_norm": 2.6502857208251953, "learning_rate": 0.0002, "loss": 2.5775, "step": 42970 }, { "epoch": 3.202682563338301, "grad_norm": 2.800010919570923, "learning_rate": 0.0002, "loss": 2.4754, "step": 42980 }, { "epoch": 3.2034277198211623, "grad_norm": 2.5369884967803955, "learning_rate": 0.0002, "loss": 2.4423, "step": 42990 }, { "epoch": 3.204172876304024, "grad_norm": 3.0359115600585938, "learning_rate": 0.0002, "loss": 2.4593, "step": 43000 }, { "epoch": 3.2049180327868854, "grad_norm": 2.6096441745758057, "learning_rate": 0.0002, "loss": 2.3148, "step": 43010 }, { "epoch": 3.2056631892697465, "grad_norm": 3.0144314765930176, "learning_rate": 0.0002, "loss": 2.3979, "step": 43020 }, { "epoch": 3.206408345752608, "grad_norm": 2.607226610183716, "learning_rate": 0.0002, "loss": 2.5429, "step": 43030 }, { "epoch": 3.2071535022354696, "grad_norm": 3.2121124267578125, "learning_rate": 0.0002, "loss": 2.5202, "step": 43040 }, { "epoch": 3.2078986587183307, "grad_norm": 2.5059688091278076, "learning_rate": 0.0002, "loss": 2.4008, "step": 43050 }, { "epoch": 3.2086438152011922, "grad_norm": 2.7579286098480225, "learning_rate": 0.0002, "loss": 2.4386, "step": 43060 }, { "epoch": 3.209388971684054, "grad_norm": 2.520742654800415, "learning_rate": 0.0002, "loss": 2.3709, "step": 43070 }, { "epoch": 3.210134128166915, "grad_norm": 2.72102952003479, "learning_rate": 0.0002, "loss": 2.1754, "step": 43080 }, { "epoch": 3.2108792846497765, "grad_norm": 2.840904474258423, "learning_rate": 0.0002, "loss": 2.5463, "step": 43090 }, { "epoch": 3.211624441132638, "grad_norm": 2.4814138412475586, "learning_rate": 0.0002, "loss": 2.3215, "step": 43100 }, { "epoch": 3.212369597615499, "grad_norm": 2.4107296466827393, "learning_rate": 0.0002, "loss": 2.4029, "step": 43110 }, { "epoch": 3.2131147540983607, "grad_norm": 2.8633036613464355, "learning_rate": 0.0002, "loss": 2.2321, "step": 43120 }, { "epoch": 3.2138599105812222, "grad_norm": 2.631967067718506, "learning_rate": 0.0002, "loss": 2.2269, "step": 43130 }, { "epoch": 3.2146050670640833, "grad_norm": 2.8928747177124023, "learning_rate": 0.0002, "loss": 2.4295, "step": 43140 }, { "epoch": 3.215350223546945, "grad_norm": 2.65671443939209, "learning_rate": 0.0002, "loss": 2.4534, "step": 43150 }, { "epoch": 3.2160953800298064, "grad_norm": 2.9897003173828125, "learning_rate": 0.0002, "loss": 2.5198, "step": 43160 }, { "epoch": 3.2168405365126675, "grad_norm": 2.7278244495391846, "learning_rate": 0.0002, "loss": 2.4829, "step": 43170 }, { "epoch": 3.217585692995529, "grad_norm": 2.805060625076294, "learning_rate": 0.0002, "loss": 2.3381, "step": 43180 }, { "epoch": 3.2183308494783907, "grad_norm": 2.806382656097412, "learning_rate": 0.0002, "loss": 2.3296, "step": 43190 }, { "epoch": 3.2190760059612518, "grad_norm": 3.0220911502838135, "learning_rate": 0.0002, "loss": 2.514, "step": 43200 }, { "epoch": 3.2198211624441133, "grad_norm": 2.6727559566497803, "learning_rate": 0.0002, "loss": 2.4302, "step": 43210 }, { "epoch": 3.220566318926975, "grad_norm": 2.733079433441162, "learning_rate": 0.0002, "loss": 2.5175, "step": 43220 }, { "epoch": 3.221311475409836, "grad_norm": 2.369826555252075, "learning_rate": 0.0002, "loss": 2.1816, "step": 43230 }, { "epoch": 3.2220566318926975, "grad_norm": 2.4770312309265137, "learning_rate": 0.0002, "loss": 2.6042, "step": 43240 }, { "epoch": 3.222801788375559, "grad_norm": 2.658463954925537, "learning_rate": 0.0002, "loss": 2.3651, "step": 43250 }, { "epoch": 3.22354694485842, "grad_norm": 2.8133113384246826, "learning_rate": 0.0002, "loss": 2.585, "step": 43260 }, { "epoch": 3.2242921013412817, "grad_norm": 2.6553611755371094, "learning_rate": 0.0002, "loss": 2.3669, "step": 43270 }, { "epoch": 3.225037257824143, "grad_norm": 2.66679048538208, "learning_rate": 0.0002, "loss": 2.6358, "step": 43280 }, { "epoch": 3.2257824143070044, "grad_norm": 2.5681023597717285, "learning_rate": 0.0002, "loss": 2.3225, "step": 43290 }, { "epoch": 3.226527570789866, "grad_norm": 3.2260591983795166, "learning_rate": 0.0002, "loss": 2.4329, "step": 43300 }, { "epoch": 3.227272727272727, "grad_norm": 2.7905280590057373, "learning_rate": 0.0002, "loss": 2.4839, "step": 43310 }, { "epoch": 3.2280178837555886, "grad_norm": 3.111307382583618, "learning_rate": 0.0002, "loss": 2.2841, "step": 43320 }, { "epoch": 3.22876304023845, "grad_norm": 2.8080315589904785, "learning_rate": 0.0002, "loss": 2.4087, "step": 43330 }, { "epoch": 3.2295081967213113, "grad_norm": 2.8237740993499756, "learning_rate": 0.0002, "loss": 2.4916, "step": 43340 }, { "epoch": 3.230253353204173, "grad_norm": 2.3456661701202393, "learning_rate": 0.0002, "loss": 2.3679, "step": 43350 }, { "epoch": 3.2309985096870344, "grad_norm": 2.5504918098449707, "learning_rate": 0.0002, "loss": 2.4728, "step": 43360 }, { "epoch": 3.2317436661698955, "grad_norm": 2.742598295211792, "learning_rate": 0.0002, "loss": 2.5054, "step": 43370 }, { "epoch": 3.232488822652757, "grad_norm": 2.7378361225128174, "learning_rate": 0.0002, "loss": 2.4379, "step": 43380 }, { "epoch": 3.2332339791356186, "grad_norm": 2.609257936477661, "learning_rate": 0.0002, "loss": 2.5783, "step": 43390 }, { "epoch": 3.2339791356184797, "grad_norm": 2.4723076820373535, "learning_rate": 0.0002, "loss": 2.4199, "step": 43400 }, { "epoch": 3.2347242921013413, "grad_norm": 2.965663194656372, "learning_rate": 0.0002, "loss": 2.4963, "step": 43410 }, { "epoch": 3.235469448584203, "grad_norm": 2.788440704345703, "learning_rate": 0.0002, "loss": 2.4947, "step": 43420 }, { "epoch": 3.236214605067064, "grad_norm": 2.882891893386841, "learning_rate": 0.0002, "loss": 2.4839, "step": 43430 }, { "epoch": 3.2369597615499255, "grad_norm": 2.6174352169036865, "learning_rate": 0.0002, "loss": 2.332, "step": 43440 }, { "epoch": 3.237704918032787, "grad_norm": 2.465498685836792, "learning_rate": 0.0002, "loss": 2.2894, "step": 43450 }, { "epoch": 3.238450074515648, "grad_norm": 2.473252773284912, "learning_rate": 0.0002, "loss": 2.4923, "step": 43460 }, { "epoch": 3.2391952309985097, "grad_norm": 2.660106897354126, "learning_rate": 0.0002, "loss": 2.3363, "step": 43470 }, { "epoch": 3.2399403874813713, "grad_norm": 2.8930771350860596, "learning_rate": 0.0002, "loss": 2.5892, "step": 43480 }, { "epoch": 3.2406855439642324, "grad_norm": 2.360569953918457, "learning_rate": 0.0002, "loss": 2.4016, "step": 43490 }, { "epoch": 3.241430700447094, "grad_norm": 2.7150566577911377, "learning_rate": 0.0002, "loss": 2.1884, "step": 43500 }, { "epoch": 3.2421758569299555, "grad_norm": 3.106088876724243, "learning_rate": 0.0002, "loss": 2.5107, "step": 43510 }, { "epoch": 3.2429210134128166, "grad_norm": 2.799938201904297, "learning_rate": 0.0002, "loss": 2.5407, "step": 43520 }, { "epoch": 3.243666169895678, "grad_norm": 2.440535306930542, "learning_rate": 0.0002, "loss": 2.5892, "step": 43530 }, { "epoch": 3.2444113263785397, "grad_norm": 2.66219162940979, "learning_rate": 0.0002, "loss": 2.3643, "step": 43540 }, { "epoch": 3.245156482861401, "grad_norm": 2.7151572704315186, "learning_rate": 0.0002, "loss": 2.3527, "step": 43550 }, { "epoch": 3.2459016393442623, "grad_norm": 3.217926025390625, "learning_rate": 0.0002, "loss": 2.3433, "step": 43560 }, { "epoch": 3.246646795827124, "grad_norm": 2.634303569793701, "learning_rate": 0.0002, "loss": 2.3928, "step": 43570 }, { "epoch": 3.247391952309985, "grad_norm": 2.9070944786071777, "learning_rate": 0.0002, "loss": 2.4142, "step": 43580 }, { "epoch": 3.2481371087928466, "grad_norm": 3.3297340869903564, "learning_rate": 0.0002, "loss": 2.4862, "step": 43590 }, { "epoch": 3.248882265275708, "grad_norm": 2.632563591003418, "learning_rate": 0.0002, "loss": 2.5569, "step": 43600 }, { "epoch": 3.2496274217585692, "grad_norm": 3.3055930137634277, "learning_rate": 0.0002, "loss": 2.4756, "step": 43610 }, { "epoch": 3.2503725782414308, "grad_norm": 2.854285955429077, "learning_rate": 0.0002, "loss": 2.5673, "step": 43620 }, { "epoch": 3.251117734724292, "grad_norm": 2.5545718669891357, "learning_rate": 0.0002, "loss": 2.4522, "step": 43630 }, { "epoch": 3.2518628912071534, "grad_norm": 2.046332836151123, "learning_rate": 0.0002, "loss": 2.2818, "step": 43640 }, { "epoch": 3.252608047690015, "grad_norm": 2.8132576942443848, "learning_rate": 0.0002, "loss": 2.4598, "step": 43650 }, { "epoch": 3.2533532041728765, "grad_norm": 3.0209426879882812, "learning_rate": 0.0002, "loss": 2.5342, "step": 43660 }, { "epoch": 3.2540983606557377, "grad_norm": 2.7068018913269043, "learning_rate": 0.0002, "loss": 2.3042, "step": 43670 }, { "epoch": 3.254843517138599, "grad_norm": 2.8782992362976074, "learning_rate": 0.0002, "loss": 2.4472, "step": 43680 }, { "epoch": 3.2555886736214603, "grad_norm": 2.6785826683044434, "learning_rate": 0.0002, "loss": 2.4746, "step": 43690 }, { "epoch": 3.256333830104322, "grad_norm": 2.71146821975708, "learning_rate": 0.0002, "loss": 2.5153, "step": 43700 }, { "epoch": 3.2570789865871834, "grad_norm": 2.8306570053100586, "learning_rate": 0.0002, "loss": 2.3877, "step": 43710 }, { "epoch": 3.2578241430700445, "grad_norm": 2.9262166023254395, "learning_rate": 0.0002, "loss": 2.5242, "step": 43720 }, { "epoch": 3.258569299552906, "grad_norm": 2.766948938369751, "learning_rate": 0.0002, "loss": 2.3074, "step": 43730 }, { "epoch": 3.2593144560357676, "grad_norm": 2.673241376876831, "learning_rate": 0.0002, "loss": 2.4664, "step": 43740 }, { "epoch": 3.2600596125186287, "grad_norm": 2.917844295501709, "learning_rate": 0.0002, "loss": 2.3575, "step": 43750 }, { "epoch": 3.2608047690014903, "grad_norm": 2.763775587081909, "learning_rate": 0.0002, "loss": 2.4072, "step": 43760 }, { "epoch": 3.261549925484352, "grad_norm": 2.166604518890381, "learning_rate": 0.0002, "loss": 2.3257, "step": 43770 }, { "epoch": 3.262295081967213, "grad_norm": 2.9719974994659424, "learning_rate": 0.0002, "loss": 2.3358, "step": 43780 }, { "epoch": 3.2630402384500745, "grad_norm": 3.200921058654785, "learning_rate": 0.0002, "loss": 2.5461, "step": 43790 }, { "epoch": 3.263785394932936, "grad_norm": 2.6361167430877686, "learning_rate": 0.0002, "loss": 2.6172, "step": 43800 }, { "epoch": 3.264530551415797, "grad_norm": 2.4378700256347656, "learning_rate": 0.0002, "loss": 2.4782, "step": 43810 }, { "epoch": 3.2652757078986587, "grad_norm": 2.8756327629089355, "learning_rate": 0.0002, "loss": 2.6013, "step": 43820 }, { "epoch": 3.2660208643815203, "grad_norm": 2.74397611618042, "learning_rate": 0.0002, "loss": 2.6196, "step": 43830 }, { "epoch": 3.2667660208643814, "grad_norm": 2.8466079235076904, "learning_rate": 0.0002, "loss": 2.4868, "step": 43840 }, { "epoch": 3.267511177347243, "grad_norm": 2.6650137901306152, "learning_rate": 0.0002, "loss": 2.461, "step": 43850 }, { "epoch": 3.2682563338301045, "grad_norm": 2.747535228729248, "learning_rate": 0.0002, "loss": 2.3765, "step": 43860 }, { "epoch": 3.2690014903129656, "grad_norm": 2.7829294204711914, "learning_rate": 0.0002, "loss": 2.4128, "step": 43870 }, { "epoch": 3.269746646795827, "grad_norm": 2.3735265731811523, "learning_rate": 0.0002, "loss": 2.5265, "step": 43880 }, { "epoch": 3.2704918032786887, "grad_norm": 2.747103214263916, "learning_rate": 0.0002, "loss": 2.5282, "step": 43890 }, { "epoch": 3.27123695976155, "grad_norm": 2.760833740234375, "learning_rate": 0.0002, "loss": 2.4452, "step": 43900 }, { "epoch": 3.2719821162444114, "grad_norm": 2.7421011924743652, "learning_rate": 0.0002, "loss": 2.4462, "step": 43910 }, { "epoch": 3.2727272727272725, "grad_norm": 2.950409412384033, "learning_rate": 0.0002, "loss": 2.5802, "step": 43920 }, { "epoch": 3.273472429210134, "grad_norm": 2.5211143493652344, "learning_rate": 0.0002, "loss": 2.3641, "step": 43930 }, { "epoch": 3.2742175856929956, "grad_norm": 3.1419506072998047, "learning_rate": 0.0002, "loss": 2.4876, "step": 43940 }, { "epoch": 3.274962742175857, "grad_norm": 2.7190613746643066, "learning_rate": 0.0002, "loss": 2.4003, "step": 43950 }, { "epoch": 3.2757078986587183, "grad_norm": 2.7831997871398926, "learning_rate": 0.0002, "loss": 2.4456, "step": 43960 }, { "epoch": 3.27645305514158, "grad_norm": 2.8021488189697266, "learning_rate": 0.0002, "loss": 2.3307, "step": 43970 }, { "epoch": 3.277198211624441, "grad_norm": 2.8400144577026367, "learning_rate": 0.0002, "loss": 2.4423, "step": 43980 }, { "epoch": 3.2779433681073025, "grad_norm": 2.451012134552002, "learning_rate": 0.0002, "loss": 2.3158, "step": 43990 }, { "epoch": 3.278688524590164, "grad_norm": 2.821969985961914, "learning_rate": 0.0002, "loss": 2.318, "step": 44000 }, { "epoch": 3.2794336810730256, "grad_norm": 2.844849109649658, "learning_rate": 0.0002, "loss": 2.4889, "step": 44010 }, { "epoch": 3.2801788375558867, "grad_norm": 2.737499237060547, "learning_rate": 0.0002, "loss": 2.4165, "step": 44020 }, { "epoch": 3.2809239940387482, "grad_norm": 2.477479934692383, "learning_rate": 0.0002, "loss": 2.4084, "step": 44030 }, { "epoch": 3.2816691505216093, "grad_norm": 2.523198366165161, "learning_rate": 0.0002, "loss": 2.4999, "step": 44040 }, { "epoch": 3.282414307004471, "grad_norm": 2.695603609085083, "learning_rate": 0.0002, "loss": 2.3774, "step": 44050 }, { "epoch": 3.2831594634873325, "grad_norm": 3.03360652923584, "learning_rate": 0.0002, "loss": 2.2998, "step": 44060 }, { "epoch": 3.2839046199701936, "grad_norm": 2.572307825088501, "learning_rate": 0.0002, "loss": 2.3314, "step": 44070 }, { "epoch": 3.284649776453055, "grad_norm": 2.493046283721924, "learning_rate": 0.0002, "loss": 2.5724, "step": 44080 }, { "epoch": 3.2853949329359167, "grad_norm": 2.80645489692688, "learning_rate": 0.0002, "loss": 2.4075, "step": 44090 }, { "epoch": 3.2861400894187778, "grad_norm": 2.564210891723633, "learning_rate": 0.0002, "loss": 2.4697, "step": 44100 }, { "epoch": 3.2868852459016393, "grad_norm": 2.9380991458892822, "learning_rate": 0.0002, "loss": 2.4112, "step": 44110 }, { "epoch": 3.287630402384501, "grad_norm": 2.7709860801696777, "learning_rate": 0.0002, "loss": 2.5449, "step": 44120 }, { "epoch": 3.288375558867362, "grad_norm": 2.371518611907959, "learning_rate": 0.0002, "loss": 2.2587, "step": 44130 }, { "epoch": 3.2891207153502235, "grad_norm": 2.40743350982666, "learning_rate": 0.0002, "loss": 2.4445, "step": 44140 }, { "epoch": 3.289865871833085, "grad_norm": 3.0384521484375, "learning_rate": 0.0002, "loss": 2.3444, "step": 44150 }, { "epoch": 3.290611028315946, "grad_norm": 2.498652935028076, "learning_rate": 0.0002, "loss": 2.4962, "step": 44160 }, { "epoch": 3.2913561847988078, "grad_norm": 3.0309088230133057, "learning_rate": 0.0002, "loss": 2.4336, "step": 44170 }, { "epoch": 3.2921013412816693, "grad_norm": 2.711760997772217, "learning_rate": 0.0002, "loss": 2.3514, "step": 44180 }, { "epoch": 3.2928464977645304, "grad_norm": 2.884289264678955, "learning_rate": 0.0002, "loss": 2.5209, "step": 44190 }, { "epoch": 3.293591654247392, "grad_norm": 2.5556466579437256, "learning_rate": 0.0002, "loss": 2.4795, "step": 44200 }, { "epoch": 3.2943368107302535, "grad_norm": 2.5224225521087646, "learning_rate": 0.0002, "loss": 2.3033, "step": 44210 }, { "epoch": 3.2950819672131146, "grad_norm": 2.700383424758911, "learning_rate": 0.0002, "loss": 2.4198, "step": 44220 }, { "epoch": 3.295827123695976, "grad_norm": 2.7120893001556396, "learning_rate": 0.0002, "loss": 2.3386, "step": 44230 }, { "epoch": 3.2965722801788377, "grad_norm": 2.6876585483551025, "learning_rate": 0.0002, "loss": 2.3047, "step": 44240 }, { "epoch": 3.297317436661699, "grad_norm": 2.7172040939331055, "learning_rate": 0.0002, "loss": 2.5499, "step": 44250 }, { "epoch": 3.2980625931445604, "grad_norm": 2.645282506942749, "learning_rate": 0.0002, "loss": 2.4294, "step": 44260 }, { "epoch": 3.2988077496274215, "grad_norm": 2.8003482818603516, "learning_rate": 0.0002, "loss": 2.4373, "step": 44270 }, { "epoch": 3.299552906110283, "grad_norm": 2.8162996768951416, "learning_rate": 0.0002, "loss": 2.4679, "step": 44280 }, { "epoch": 3.3002980625931446, "grad_norm": 2.7134716510772705, "learning_rate": 0.0002, "loss": 2.3455, "step": 44290 }, { "epoch": 3.301043219076006, "grad_norm": 2.854041337966919, "learning_rate": 0.0002, "loss": 2.5039, "step": 44300 }, { "epoch": 3.3017883755588673, "grad_norm": 2.794156789779663, "learning_rate": 0.0002, "loss": 2.6137, "step": 44310 }, { "epoch": 3.302533532041729, "grad_norm": 2.626420021057129, "learning_rate": 0.0002, "loss": 2.5824, "step": 44320 }, { "epoch": 3.30327868852459, "grad_norm": 2.318218946456909, "learning_rate": 0.0002, "loss": 2.6067, "step": 44330 }, { "epoch": 3.3040238450074515, "grad_norm": 2.7970023155212402, "learning_rate": 0.0002, "loss": 2.4504, "step": 44340 }, { "epoch": 3.304769001490313, "grad_norm": 2.773334503173828, "learning_rate": 0.0002, "loss": 2.6143, "step": 44350 }, { "epoch": 3.3055141579731746, "grad_norm": 2.5117685794830322, "learning_rate": 0.0002, "loss": 2.2903, "step": 44360 }, { "epoch": 3.3062593144560357, "grad_norm": 2.768136501312256, "learning_rate": 0.0002, "loss": 2.4649, "step": 44370 }, { "epoch": 3.3070044709388973, "grad_norm": 2.2928473949432373, "learning_rate": 0.0002, "loss": 2.4589, "step": 44380 }, { "epoch": 3.3077496274217584, "grad_norm": 2.4605629444122314, "learning_rate": 0.0002, "loss": 2.324, "step": 44390 }, { "epoch": 3.30849478390462, "grad_norm": 2.7441422939300537, "learning_rate": 0.0002, "loss": 2.333, "step": 44400 }, { "epoch": 3.3092399403874815, "grad_norm": 2.4191830158233643, "learning_rate": 0.0002, "loss": 2.5422, "step": 44410 }, { "epoch": 3.3099850968703426, "grad_norm": 2.452444314956665, "learning_rate": 0.0002, "loss": 2.377, "step": 44420 }, { "epoch": 3.310730253353204, "grad_norm": 2.9849095344543457, "learning_rate": 0.0002, "loss": 2.5103, "step": 44430 }, { "epoch": 3.3114754098360657, "grad_norm": 2.7864060401916504, "learning_rate": 0.0002, "loss": 2.4538, "step": 44440 }, { "epoch": 3.312220566318927, "grad_norm": 2.8097054958343506, "learning_rate": 0.0002, "loss": 2.4729, "step": 44450 }, { "epoch": 3.3129657228017884, "grad_norm": 2.5728414058685303, "learning_rate": 0.0002, "loss": 2.4751, "step": 44460 }, { "epoch": 3.31371087928465, "grad_norm": 2.7810518741607666, "learning_rate": 0.0002, "loss": 2.5308, "step": 44470 }, { "epoch": 3.314456035767511, "grad_norm": 2.845767021179199, "learning_rate": 0.0002, "loss": 2.4061, "step": 44480 }, { "epoch": 3.3152011922503726, "grad_norm": 2.721355438232422, "learning_rate": 0.0002, "loss": 2.5016, "step": 44490 }, { "epoch": 3.315946348733234, "grad_norm": 2.6908435821533203, "learning_rate": 0.0002, "loss": 2.576, "step": 44500 }, { "epoch": 3.3166915052160952, "grad_norm": 2.6813907623291016, "learning_rate": 0.0002, "loss": 2.6311, "step": 44510 }, { "epoch": 3.317436661698957, "grad_norm": 3.2077786922454834, "learning_rate": 0.0002, "loss": 2.5925, "step": 44520 }, { "epoch": 3.3181818181818183, "grad_norm": 2.6613762378692627, "learning_rate": 0.0002, "loss": 2.506, "step": 44530 }, { "epoch": 3.3189269746646795, "grad_norm": 2.0632989406585693, "learning_rate": 0.0002, "loss": 2.3812, "step": 44540 }, { "epoch": 3.319672131147541, "grad_norm": 2.6608712673187256, "learning_rate": 0.0002, "loss": 2.319, "step": 44550 }, { "epoch": 3.3204172876304026, "grad_norm": 3.02986216545105, "learning_rate": 0.0002, "loss": 2.2761, "step": 44560 }, { "epoch": 3.3211624441132637, "grad_norm": 2.6622636318206787, "learning_rate": 0.0002, "loss": 2.4661, "step": 44570 }, { "epoch": 3.321907600596125, "grad_norm": 2.753340721130371, "learning_rate": 0.0002, "loss": 2.4398, "step": 44580 }, { "epoch": 3.3226527570789868, "grad_norm": 2.8317463397979736, "learning_rate": 0.0002, "loss": 2.4028, "step": 44590 }, { "epoch": 3.323397913561848, "grad_norm": 2.6618127822875977, "learning_rate": 0.0002, "loss": 2.469, "step": 44600 }, { "epoch": 3.3241430700447094, "grad_norm": 2.7465057373046875, "learning_rate": 0.0002, "loss": 2.6312, "step": 44610 }, { "epoch": 3.3248882265275705, "grad_norm": 2.6905951499938965, "learning_rate": 0.0002, "loss": 2.5652, "step": 44620 }, { "epoch": 3.325633383010432, "grad_norm": 2.609553337097168, "learning_rate": 0.0002, "loss": 2.4733, "step": 44630 }, { "epoch": 3.3263785394932937, "grad_norm": 3.0250022411346436, "learning_rate": 0.0002, "loss": 2.2852, "step": 44640 }, { "epoch": 3.327123695976155, "grad_norm": 2.7133560180664062, "learning_rate": 0.0002, "loss": 2.6097, "step": 44650 }, { "epoch": 3.3278688524590163, "grad_norm": 2.479445695877075, "learning_rate": 0.0002, "loss": 2.4231, "step": 44660 }, { "epoch": 3.328614008941878, "grad_norm": 2.7370223999023438, "learning_rate": 0.0002, "loss": 2.359, "step": 44670 }, { "epoch": 3.329359165424739, "grad_norm": 2.5486905574798584, "learning_rate": 0.0002, "loss": 2.5112, "step": 44680 }, { "epoch": 3.3301043219076005, "grad_norm": 2.0843801498413086, "learning_rate": 0.0002, "loss": 2.4368, "step": 44690 }, { "epoch": 3.330849478390462, "grad_norm": 2.692003011703491, "learning_rate": 0.0002, "loss": 2.4591, "step": 44700 }, { "epoch": 3.3315946348733236, "grad_norm": 2.4572720527648926, "learning_rate": 0.0002, "loss": 2.521, "step": 44710 }, { "epoch": 3.3323397913561847, "grad_norm": 2.675017833709717, "learning_rate": 0.0002, "loss": 2.5644, "step": 44720 }, { "epoch": 3.3330849478390463, "grad_norm": 2.6414129734039307, "learning_rate": 0.0002, "loss": 2.601, "step": 44730 }, { "epoch": 3.3338301043219074, "grad_norm": 2.5128352642059326, "learning_rate": 0.0002, "loss": 2.4304, "step": 44740 }, { "epoch": 3.334575260804769, "grad_norm": 2.5863492488861084, "learning_rate": 0.0002, "loss": 2.3684, "step": 44750 }, { "epoch": 3.3353204172876305, "grad_norm": 2.2314746379852295, "learning_rate": 0.0002, "loss": 2.5097, "step": 44760 }, { "epoch": 3.3360655737704916, "grad_norm": 2.5924713611602783, "learning_rate": 0.0002, "loss": 2.5148, "step": 44770 }, { "epoch": 3.336810730253353, "grad_norm": 2.298295497894287, "learning_rate": 0.0002, "loss": 2.3235, "step": 44780 }, { "epoch": 3.3375558867362147, "grad_norm": 2.701958417892456, "learning_rate": 0.0002, "loss": 2.4991, "step": 44790 }, { "epoch": 3.338301043219076, "grad_norm": 2.766066074371338, "learning_rate": 0.0002, "loss": 2.4484, "step": 44800 }, { "epoch": 3.3390461997019374, "grad_norm": 2.684408187866211, "learning_rate": 0.0002, "loss": 2.5959, "step": 44810 }, { "epoch": 3.339791356184799, "grad_norm": 2.5606040954589844, "learning_rate": 0.0002, "loss": 2.3181, "step": 44820 }, { "epoch": 3.34053651266766, "grad_norm": 2.261032819747925, "learning_rate": 0.0002, "loss": 2.3889, "step": 44830 }, { "epoch": 3.3412816691505216, "grad_norm": 2.6086981296539307, "learning_rate": 0.0002, "loss": 2.3264, "step": 44840 }, { "epoch": 3.342026825633383, "grad_norm": 2.7089896202087402, "learning_rate": 0.0002, "loss": 2.4724, "step": 44850 }, { "epoch": 3.3427719821162443, "grad_norm": 2.4596564769744873, "learning_rate": 0.0002, "loss": 2.3453, "step": 44860 }, { "epoch": 3.343517138599106, "grad_norm": 3.0206546783447266, "learning_rate": 0.0002, "loss": 2.274, "step": 44870 }, { "epoch": 3.3442622950819674, "grad_norm": 2.696455717086792, "learning_rate": 0.0002, "loss": 2.5678, "step": 44880 }, { "epoch": 3.3450074515648285, "grad_norm": 2.755286693572998, "learning_rate": 0.0002, "loss": 2.6242, "step": 44890 }, { "epoch": 3.34575260804769, "grad_norm": 2.4545297622680664, "learning_rate": 0.0002, "loss": 2.4866, "step": 44900 }, { "epoch": 3.3464977645305516, "grad_norm": 1.9408307075500488, "learning_rate": 0.0002, "loss": 2.2509, "step": 44910 }, { "epoch": 3.3472429210134127, "grad_norm": 2.969522714614868, "learning_rate": 0.0002, "loss": 2.5758, "step": 44920 }, { "epoch": 3.3479880774962743, "grad_norm": 2.4225828647613525, "learning_rate": 0.0002, "loss": 2.2521, "step": 44930 }, { "epoch": 3.348733233979136, "grad_norm": 2.567896842956543, "learning_rate": 0.0002, "loss": 2.5203, "step": 44940 }, { "epoch": 3.349478390461997, "grad_norm": 2.6760551929473877, "learning_rate": 0.0002, "loss": 2.4264, "step": 44950 }, { "epoch": 3.3502235469448585, "grad_norm": 2.5255861282348633, "learning_rate": 0.0002, "loss": 2.5705, "step": 44960 }, { "epoch": 3.3509687034277196, "grad_norm": 2.5304393768310547, "learning_rate": 0.0002, "loss": 2.3158, "step": 44970 }, { "epoch": 3.351713859910581, "grad_norm": 2.5755674839019775, "learning_rate": 0.0002, "loss": 2.4185, "step": 44980 }, { "epoch": 3.3524590163934427, "grad_norm": 2.782089948654175, "learning_rate": 0.0002, "loss": 2.5002, "step": 44990 }, { "epoch": 3.3532041728763042, "grad_norm": 2.9056670665740967, "learning_rate": 0.0002, "loss": 2.5654, "step": 45000 }, { "epoch": 3.3539493293591653, "grad_norm": 3.098062038421631, "learning_rate": 0.0002, "loss": 2.4097, "step": 45010 }, { "epoch": 3.354694485842027, "grad_norm": 2.8015189170837402, "learning_rate": 0.0002, "loss": 2.4039, "step": 45020 }, { "epoch": 3.355439642324888, "grad_norm": 2.5579729080200195, "learning_rate": 0.0002, "loss": 2.4874, "step": 45030 }, { "epoch": 3.3561847988077496, "grad_norm": 2.528961181640625, "learning_rate": 0.0002, "loss": 2.4139, "step": 45040 }, { "epoch": 3.356929955290611, "grad_norm": 2.753173828125, "learning_rate": 0.0002, "loss": 2.2572, "step": 45050 }, { "epoch": 3.3576751117734727, "grad_norm": 2.733590841293335, "learning_rate": 0.0002, "loss": 2.571, "step": 45060 }, { "epoch": 3.3584202682563338, "grad_norm": 2.6290042400360107, "learning_rate": 0.0002, "loss": 2.5703, "step": 45070 }, { "epoch": 3.3591654247391953, "grad_norm": 2.5969390869140625, "learning_rate": 0.0002, "loss": 2.3944, "step": 45080 }, { "epoch": 3.3599105812220564, "grad_norm": 2.6536154747009277, "learning_rate": 0.0002, "loss": 2.4431, "step": 45090 }, { "epoch": 3.360655737704918, "grad_norm": 2.6232528686523438, "learning_rate": 0.0002, "loss": 2.502, "step": 45100 }, { "epoch": 3.3614008941877795, "grad_norm": 2.800161361694336, "learning_rate": 0.0002, "loss": 2.3932, "step": 45110 }, { "epoch": 3.3621460506706407, "grad_norm": 2.5421383380889893, "learning_rate": 0.0002, "loss": 2.527, "step": 45120 }, { "epoch": 3.362891207153502, "grad_norm": 2.968223810195923, "learning_rate": 0.0002, "loss": 2.6396, "step": 45130 }, { "epoch": 3.3636363636363638, "grad_norm": 2.4359583854675293, "learning_rate": 0.0002, "loss": 2.3573, "step": 45140 }, { "epoch": 3.364381520119225, "grad_norm": 2.3761839866638184, "learning_rate": 0.0002, "loss": 2.6412, "step": 45150 }, { "epoch": 3.3651266766020864, "grad_norm": 2.7938098907470703, "learning_rate": 0.0002, "loss": 2.5008, "step": 45160 }, { "epoch": 3.365871833084948, "grad_norm": 2.7078990936279297, "learning_rate": 0.0002, "loss": 2.627, "step": 45170 }, { "epoch": 3.366616989567809, "grad_norm": 2.781672477722168, "learning_rate": 0.0002, "loss": 2.5451, "step": 45180 }, { "epoch": 3.3673621460506706, "grad_norm": 2.7269418239593506, "learning_rate": 0.0002, "loss": 2.3616, "step": 45190 }, { "epoch": 3.368107302533532, "grad_norm": 2.7787158489227295, "learning_rate": 0.0002, "loss": 2.4846, "step": 45200 }, { "epoch": 3.3688524590163933, "grad_norm": 2.631345748901367, "learning_rate": 0.0002, "loss": 2.556, "step": 45210 }, { "epoch": 3.369597615499255, "grad_norm": 2.876192092895508, "learning_rate": 0.0002, "loss": 2.5152, "step": 45220 }, { "epoch": 3.3703427719821164, "grad_norm": 2.6566519737243652, "learning_rate": 0.0002, "loss": 2.6127, "step": 45230 }, { "epoch": 3.3710879284649775, "grad_norm": 3.0558583736419678, "learning_rate": 0.0002, "loss": 2.5733, "step": 45240 }, { "epoch": 3.371833084947839, "grad_norm": 2.9002668857574463, "learning_rate": 0.0002, "loss": 2.5031, "step": 45250 }, { "epoch": 3.3725782414307006, "grad_norm": 2.6893272399902344, "learning_rate": 0.0002, "loss": 2.5278, "step": 45260 }, { "epoch": 3.3733233979135617, "grad_norm": 2.7107250690460205, "learning_rate": 0.0002, "loss": 2.6275, "step": 45270 }, { "epoch": 3.3740685543964233, "grad_norm": 2.816999912261963, "learning_rate": 0.0002, "loss": 2.5468, "step": 45280 }, { "epoch": 3.374813710879285, "grad_norm": 2.670982599258423, "learning_rate": 0.0002, "loss": 2.4967, "step": 45290 }, { "epoch": 3.375558867362146, "grad_norm": 2.483621835708618, "learning_rate": 0.0002, "loss": 2.2782, "step": 45300 }, { "epoch": 3.3763040238450075, "grad_norm": 2.733739137649536, "learning_rate": 0.0002, "loss": 2.3869, "step": 45310 }, { "epoch": 3.3770491803278686, "grad_norm": 2.6917941570281982, "learning_rate": 0.0002, "loss": 2.4299, "step": 45320 }, { "epoch": 3.37779433681073, "grad_norm": 2.675069570541382, "learning_rate": 0.0002, "loss": 2.5749, "step": 45330 }, { "epoch": 3.3785394932935917, "grad_norm": 2.6892285346984863, "learning_rate": 0.0002, "loss": 2.387, "step": 45340 }, { "epoch": 3.3792846497764533, "grad_norm": 3.1701433658599854, "learning_rate": 0.0002, "loss": 2.5472, "step": 45350 }, { "epoch": 3.3800298062593144, "grad_norm": 2.8027126789093018, "learning_rate": 0.0002, "loss": 2.4505, "step": 45360 }, { "epoch": 3.380774962742176, "grad_norm": 3.0367190837860107, "learning_rate": 0.0002, "loss": 2.6113, "step": 45370 }, { "epoch": 3.381520119225037, "grad_norm": 2.723576545715332, "learning_rate": 0.0002, "loss": 2.3816, "step": 45380 }, { "epoch": 3.3822652757078986, "grad_norm": 2.6705434322357178, "learning_rate": 0.0002, "loss": 2.4231, "step": 45390 }, { "epoch": 3.38301043219076, "grad_norm": 2.584972381591797, "learning_rate": 0.0002, "loss": 2.3369, "step": 45400 }, { "epoch": 3.3837555886736217, "grad_norm": 2.5815274715423584, "learning_rate": 0.0002, "loss": 2.3362, "step": 45410 }, { "epoch": 3.384500745156483, "grad_norm": 2.7649471759796143, "learning_rate": 0.0002, "loss": 2.3817, "step": 45420 }, { "epoch": 3.3852459016393444, "grad_norm": 2.435465097427368, "learning_rate": 0.0002, "loss": 2.5519, "step": 45430 }, { "epoch": 3.3859910581222055, "grad_norm": 2.7562053203582764, "learning_rate": 0.0002, "loss": 2.4509, "step": 45440 }, { "epoch": 3.386736214605067, "grad_norm": 2.829549789428711, "learning_rate": 0.0002, "loss": 2.4191, "step": 45450 }, { "epoch": 3.3874813710879286, "grad_norm": 2.9970972537994385, "learning_rate": 0.0002, "loss": 2.5063, "step": 45460 }, { "epoch": 3.3882265275707897, "grad_norm": 3.018434524536133, "learning_rate": 0.0002, "loss": 2.4994, "step": 45470 }, { "epoch": 3.3889716840536512, "grad_norm": 2.6649057865142822, "learning_rate": 0.0002, "loss": 2.421, "step": 45480 }, { "epoch": 3.389716840536513, "grad_norm": 2.2870116233825684, "learning_rate": 0.0002, "loss": 2.5703, "step": 45490 }, { "epoch": 3.390461997019374, "grad_norm": 2.1558291912078857, "learning_rate": 0.0002, "loss": 2.618, "step": 45500 }, { "epoch": 3.3912071535022354, "grad_norm": 2.7797162532806396, "learning_rate": 0.0002, "loss": 2.404, "step": 45510 }, { "epoch": 3.391952309985097, "grad_norm": 3.0129196643829346, "learning_rate": 0.0002, "loss": 2.3708, "step": 45520 }, { "epoch": 3.392697466467958, "grad_norm": 2.453110694885254, "learning_rate": 0.0002, "loss": 2.5923, "step": 45530 }, { "epoch": 3.3934426229508197, "grad_norm": 2.845862865447998, "learning_rate": 0.0002, "loss": 2.5138, "step": 45540 }, { "epoch": 3.394187779433681, "grad_norm": 3.013814687728882, "learning_rate": 0.0002, "loss": 2.6143, "step": 45550 }, { "epoch": 3.3949329359165423, "grad_norm": 2.650480270385742, "learning_rate": 0.0002, "loss": 2.6253, "step": 45560 }, { "epoch": 3.395678092399404, "grad_norm": 2.6634345054626465, "learning_rate": 0.0002, "loss": 2.4054, "step": 45570 }, { "epoch": 3.3964232488822654, "grad_norm": 2.5562565326690674, "learning_rate": 0.0002, "loss": 2.3089, "step": 45580 }, { "epoch": 3.3971684053651265, "grad_norm": 2.798459768295288, "learning_rate": 0.0002, "loss": 2.3853, "step": 45590 }, { "epoch": 3.397913561847988, "grad_norm": 2.3773062229156494, "learning_rate": 0.0002, "loss": 2.5745, "step": 45600 }, { "epoch": 3.3986587183308496, "grad_norm": 2.7547972202301025, "learning_rate": 0.0002, "loss": 2.4046, "step": 45610 }, { "epoch": 3.3994038748137108, "grad_norm": 2.549546003341675, "learning_rate": 0.0002, "loss": 2.6018, "step": 45620 }, { "epoch": 3.4001490312965723, "grad_norm": 2.604703903198242, "learning_rate": 0.0002, "loss": 2.5778, "step": 45630 }, { "epoch": 3.400894187779434, "grad_norm": 2.515007495880127, "learning_rate": 0.0002, "loss": 2.4235, "step": 45640 }, { "epoch": 3.401639344262295, "grad_norm": 3.1332290172576904, "learning_rate": 0.0002, "loss": 2.5225, "step": 45650 }, { "epoch": 3.4023845007451565, "grad_norm": 2.7233545780181885, "learning_rate": 0.0002, "loss": 2.5562, "step": 45660 }, { "epoch": 3.4031296572280176, "grad_norm": 2.413351058959961, "learning_rate": 0.0002, "loss": 2.4198, "step": 45670 }, { "epoch": 3.403874813710879, "grad_norm": 2.706968307495117, "learning_rate": 0.0002, "loss": 2.44, "step": 45680 }, { "epoch": 3.4046199701937407, "grad_norm": 2.8159170150756836, "learning_rate": 0.0002, "loss": 2.499, "step": 45690 }, { "epoch": 3.4053651266766023, "grad_norm": 2.412567615509033, "learning_rate": 0.0002, "loss": 2.4767, "step": 45700 }, { "epoch": 3.4061102831594634, "grad_norm": 2.582634210586548, "learning_rate": 0.0002, "loss": 2.3958, "step": 45710 }, { "epoch": 3.406855439642325, "grad_norm": 2.5285251140594482, "learning_rate": 0.0002, "loss": 2.332, "step": 45720 }, { "epoch": 3.407600596125186, "grad_norm": 2.7439749240875244, "learning_rate": 0.0002, "loss": 2.3391, "step": 45730 }, { "epoch": 3.4083457526080476, "grad_norm": 2.710297107696533, "learning_rate": 0.0002, "loss": 2.5074, "step": 45740 }, { "epoch": 3.409090909090909, "grad_norm": 2.586346387863159, "learning_rate": 0.0002, "loss": 2.5411, "step": 45750 }, { "epoch": 3.4098360655737707, "grad_norm": 2.2955081462860107, "learning_rate": 0.0002, "loss": 2.5316, "step": 45760 }, { "epoch": 3.410581222056632, "grad_norm": 2.482043504714966, "learning_rate": 0.0002, "loss": 2.5112, "step": 45770 }, { "epoch": 3.4113263785394934, "grad_norm": 2.5250322818756104, "learning_rate": 0.0002, "loss": 2.4582, "step": 45780 }, { "epoch": 3.4120715350223545, "grad_norm": 3.7372679710388184, "learning_rate": 0.0002, "loss": 2.6059, "step": 45790 }, { "epoch": 3.412816691505216, "grad_norm": 2.6757683753967285, "learning_rate": 0.0002, "loss": 2.421, "step": 45800 }, { "epoch": 3.4135618479880776, "grad_norm": 2.1661860942840576, "learning_rate": 0.0002, "loss": 2.243, "step": 45810 }, { "epoch": 3.4143070044709387, "grad_norm": 2.95503830909729, "learning_rate": 0.0002, "loss": 2.5708, "step": 45820 }, { "epoch": 3.4150521609538003, "grad_norm": 2.38747239112854, "learning_rate": 0.0002, "loss": 2.4036, "step": 45830 }, { "epoch": 3.415797317436662, "grad_norm": 2.2388579845428467, "learning_rate": 0.0002, "loss": 2.4164, "step": 45840 }, { "epoch": 3.416542473919523, "grad_norm": 2.6116092205047607, "learning_rate": 0.0002, "loss": 2.6254, "step": 45850 }, { "epoch": 3.4172876304023845, "grad_norm": 2.408705472946167, "learning_rate": 0.0002, "loss": 2.6464, "step": 45860 }, { "epoch": 3.418032786885246, "grad_norm": 2.4243757724761963, "learning_rate": 0.0002, "loss": 2.6336, "step": 45870 }, { "epoch": 3.418777943368107, "grad_norm": 2.9750640392303467, "learning_rate": 0.0002, "loss": 2.4791, "step": 45880 }, { "epoch": 3.4195230998509687, "grad_norm": 2.836698055267334, "learning_rate": 0.0002, "loss": 2.6173, "step": 45890 }, { "epoch": 3.4202682563338302, "grad_norm": 2.734327793121338, "learning_rate": 0.0002, "loss": 2.3956, "step": 45900 }, { "epoch": 3.4210134128166914, "grad_norm": 2.6513071060180664, "learning_rate": 0.0002, "loss": 2.5761, "step": 45910 }, { "epoch": 3.421758569299553, "grad_norm": 2.8348026275634766, "learning_rate": 0.0002, "loss": 2.4796, "step": 45920 }, { "epoch": 3.4225037257824145, "grad_norm": 2.5720503330230713, "learning_rate": 0.0002, "loss": 2.6315, "step": 45930 }, { "epoch": 3.4232488822652756, "grad_norm": 2.497056245803833, "learning_rate": 0.0002, "loss": 2.4317, "step": 45940 }, { "epoch": 3.423994038748137, "grad_norm": 2.849952459335327, "learning_rate": 0.0002, "loss": 2.4361, "step": 45950 }, { "epoch": 3.4247391952309987, "grad_norm": 2.823962688446045, "learning_rate": 0.0002, "loss": 2.6059, "step": 45960 }, { "epoch": 3.42548435171386, "grad_norm": 2.484067440032959, "learning_rate": 0.0002, "loss": 2.3086, "step": 45970 }, { "epoch": 3.4262295081967213, "grad_norm": 3.0087854862213135, "learning_rate": 0.0002, "loss": 2.3751, "step": 45980 }, { "epoch": 3.426974664679583, "grad_norm": 2.5996415615081787, "learning_rate": 0.0002, "loss": 2.5007, "step": 45990 }, { "epoch": 3.427719821162444, "grad_norm": 2.758151054382324, "learning_rate": 0.0002, "loss": 2.4652, "step": 46000 }, { "epoch": 3.4284649776453056, "grad_norm": 2.789275646209717, "learning_rate": 0.0002, "loss": 2.645, "step": 46010 }, { "epoch": 3.429210134128167, "grad_norm": 2.859830141067505, "learning_rate": 0.0002, "loss": 2.5338, "step": 46020 }, { "epoch": 3.429955290611028, "grad_norm": 2.25303053855896, "learning_rate": 0.0002, "loss": 2.2158, "step": 46030 }, { "epoch": 3.4307004470938898, "grad_norm": 3.3532886505126953, "learning_rate": 0.0002, "loss": 2.335, "step": 46040 }, { "epoch": 3.4314456035767513, "grad_norm": 2.6834700107574463, "learning_rate": 0.0002, "loss": 2.662, "step": 46050 }, { "epoch": 3.4321907600596124, "grad_norm": 2.6147358417510986, "learning_rate": 0.0002, "loss": 2.4504, "step": 46060 }, { "epoch": 3.432935916542474, "grad_norm": 2.483168363571167, "learning_rate": 0.0002, "loss": 2.241, "step": 46070 }, { "epoch": 3.433681073025335, "grad_norm": 2.5020689964294434, "learning_rate": 0.0002, "loss": 2.2708, "step": 46080 }, { "epoch": 3.4344262295081966, "grad_norm": 2.6769652366638184, "learning_rate": 0.0002, "loss": 2.4544, "step": 46090 }, { "epoch": 3.435171385991058, "grad_norm": 2.70047926902771, "learning_rate": 0.0002, "loss": 2.5237, "step": 46100 }, { "epoch": 3.4359165424739198, "grad_norm": 2.588719367980957, "learning_rate": 0.0002, "loss": 2.4592, "step": 46110 }, { "epoch": 3.436661698956781, "grad_norm": 2.694007158279419, "learning_rate": 0.0002, "loss": 2.5565, "step": 46120 }, { "epoch": 3.4374068554396424, "grad_norm": 2.5668649673461914, "learning_rate": 0.0002, "loss": 2.414, "step": 46130 }, { "epoch": 3.4381520119225035, "grad_norm": 2.8535616397857666, "learning_rate": 0.0002, "loss": 2.5938, "step": 46140 }, { "epoch": 3.438897168405365, "grad_norm": 2.704505443572998, "learning_rate": 0.0002, "loss": 2.488, "step": 46150 }, { "epoch": 3.4396423248882266, "grad_norm": 2.4701411724090576, "learning_rate": 0.0002, "loss": 2.3884, "step": 46160 }, { "epoch": 3.4403874813710877, "grad_norm": 2.898048162460327, "learning_rate": 0.0002, "loss": 2.5974, "step": 46170 }, { "epoch": 3.4411326378539493, "grad_norm": 2.504690170288086, "learning_rate": 0.0002, "loss": 2.5142, "step": 46180 }, { "epoch": 3.441877794336811, "grad_norm": 2.4398272037506104, "learning_rate": 0.0002, "loss": 2.3432, "step": 46190 }, { "epoch": 3.442622950819672, "grad_norm": 2.6902806758880615, "learning_rate": 0.0002, "loss": 2.5055, "step": 46200 }, { "epoch": 3.4433681073025335, "grad_norm": 3.127070426940918, "learning_rate": 0.0002, "loss": 2.3998, "step": 46210 }, { "epoch": 3.444113263785395, "grad_norm": 2.8394556045532227, "learning_rate": 0.0002, "loss": 2.5031, "step": 46220 }, { "epoch": 3.444858420268256, "grad_norm": 2.2959282398223877, "learning_rate": 0.0002, "loss": 2.4069, "step": 46230 }, { "epoch": 3.4456035767511177, "grad_norm": 2.881746530532837, "learning_rate": 0.0002, "loss": 2.4572, "step": 46240 }, { "epoch": 3.4463487332339793, "grad_norm": 2.929504871368408, "learning_rate": 0.0002, "loss": 2.2962, "step": 46250 }, { "epoch": 3.4470938897168404, "grad_norm": 2.570570945739746, "learning_rate": 0.0002, "loss": 2.3323, "step": 46260 }, { "epoch": 3.447839046199702, "grad_norm": 2.8684487342834473, "learning_rate": 0.0002, "loss": 2.5943, "step": 46270 }, { "epoch": 3.4485842026825635, "grad_norm": 2.15544056892395, "learning_rate": 0.0002, "loss": 2.4587, "step": 46280 }, { "epoch": 3.4493293591654246, "grad_norm": 2.5828864574432373, "learning_rate": 0.0002, "loss": 2.5151, "step": 46290 }, { "epoch": 3.450074515648286, "grad_norm": 2.892545461654663, "learning_rate": 0.0002, "loss": 2.4213, "step": 46300 }, { "epoch": 3.4508196721311477, "grad_norm": 2.867642879486084, "learning_rate": 0.0002, "loss": 2.4857, "step": 46310 }, { "epoch": 3.451564828614009, "grad_norm": 3.811511278152466, "learning_rate": 0.0002, "loss": 2.5187, "step": 46320 }, { "epoch": 3.4523099850968704, "grad_norm": 1.9235666990280151, "learning_rate": 0.0002, "loss": 2.2494, "step": 46330 }, { "epoch": 3.453055141579732, "grad_norm": 2.4899580478668213, "learning_rate": 0.0002, "loss": 2.3877, "step": 46340 }, { "epoch": 3.453800298062593, "grad_norm": 2.7740957736968994, "learning_rate": 0.0002, "loss": 2.5916, "step": 46350 }, { "epoch": 3.4545454545454546, "grad_norm": 3.30594539642334, "learning_rate": 0.0002, "loss": 2.5507, "step": 46360 }, { "epoch": 3.455290611028316, "grad_norm": 2.3682942390441895, "learning_rate": 0.0002, "loss": 2.5072, "step": 46370 }, { "epoch": 3.4560357675111772, "grad_norm": 2.640885829925537, "learning_rate": 0.0002, "loss": 2.5571, "step": 46380 }, { "epoch": 3.456780923994039, "grad_norm": 2.810642719268799, "learning_rate": 0.0002, "loss": 2.4887, "step": 46390 }, { "epoch": 3.4575260804769004, "grad_norm": 2.425133228302002, "learning_rate": 0.0002, "loss": 2.3365, "step": 46400 }, { "epoch": 3.4582712369597615, "grad_norm": 2.6030843257904053, "learning_rate": 0.0002, "loss": 2.475, "step": 46410 }, { "epoch": 3.459016393442623, "grad_norm": 2.4549689292907715, "learning_rate": 0.0002, "loss": 2.4761, "step": 46420 }, { "epoch": 3.459761549925484, "grad_norm": 2.5607504844665527, "learning_rate": 0.0002, "loss": 2.4621, "step": 46430 }, { "epoch": 3.4605067064083457, "grad_norm": 2.6467771530151367, "learning_rate": 0.0002, "loss": 2.5109, "step": 46440 }, { "epoch": 3.4612518628912072, "grad_norm": 2.1029367446899414, "learning_rate": 0.0002, "loss": 2.4962, "step": 46450 }, { "epoch": 3.461997019374069, "grad_norm": 2.5813140869140625, "learning_rate": 0.0002, "loss": 2.6477, "step": 46460 }, { "epoch": 3.46274217585693, "grad_norm": 2.906320571899414, "learning_rate": 0.0002, "loss": 2.5958, "step": 46470 }, { "epoch": 3.4634873323397914, "grad_norm": 2.431466579437256, "learning_rate": 0.0002, "loss": 2.5054, "step": 46480 }, { "epoch": 3.4642324888226526, "grad_norm": 2.375662088394165, "learning_rate": 0.0002, "loss": 2.4448, "step": 46490 }, { "epoch": 3.464977645305514, "grad_norm": 2.675147771835327, "learning_rate": 0.0002, "loss": 2.4303, "step": 46500 }, { "epoch": 3.4657228017883757, "grad_norm": 2.9978744983673096, "learning_rate": 0.0002, "loss": 2.3751, "step": 46510 }, { "epoch": 3.4664679582712368, "grad_norm": 2.483147621154785, "learning_rate": 0.0002, "loss": 2.4115, "step": 46520 }, { "epoch": 3.4672131147540983, "grad_norm": 2.3755555152893066, "learning_rate": 0.0002, "loss": 2.2329, "step": 46530 }, { "epoch": 3.46795827123696, "grad_norm": 2.9722342491149902, "learning_rate": 0.0002, "loss": 2.66, "step": 46540 }, { "epoch": 3.468703427719821, "grad_norm": 2.3765087127685547, "learning_rate": 0.0002, "loss": 2.3346, "step": 46550 }, { "epoch": 3.4694485842026825, "grad_norm": 3.10359263420105, "learning_rate": 0.0002, "loss": 2.4465, "step": 46560 }, { "epoch": 3.470193740685544, "grad_norm": 2.499055862426758, "learning_rate": 0.0002, "loss": 2.4433, "step": 46570 }, { "epoch": 3.470938897168405, "grad_norm": 2.9596831798553467, "learning_rate": 0.0002, "loss": 2.6495, "step": 46580 }, { "epoch": 3.4716840536512668, "grad_norm": 2.490100622177124, "learning_rate": 0.0002, "loss": 2.4167, "step": 46590 }, { "epoch": 3.4724292101341283, "grad_norm": 2.3789775371551514, "learning_rate": 0.0002, "loss": 2.3696, "step": 46600 }, { "epoch": 3.4731743666169894, "grad_norm": 2.522531032562256, "learning_rate": 0.0002, "loss": 2.416, "step": 46610 }, { "epoch": 3.473919523099851, "grad_norm": 1.9312344789505005, "learning_rate": 0.0002, "loss": 2.3704, "step": 46620 }, { "epoch": 3.4746646795827125, "grad_norm": 2.6317503452301025, "learning_rate": 0.0002, "loss": 2.4834, "step": 46630 }, { "epoch": 3.4754098360655736, "grad_norm": 2.854759693145752, "learning_rate": 0.0002, "loss": 2.3015, "step": 46640 }, { "epoch": 3.476154992548435, "grad_norm": 2.843125581741333, "learning_rate": 0.0002, "loss": 2.6554, "step": 46650 }, { "epoch": 3.4769001490312967, "grad_norm": 2.7469377517700195, "learning_rate": 0.0002, "loss": 2.5061, "step": 46660 }, { "epoch": 3.477645305514158, "grad_norm": 2.6205174922943115, "learning_rate": 0.0002, "loss": 2.256, "step": 46670 }, { "epoch": 3.4783904619970194, "grad_norm": 2.8084607124328613, "learning_rate": 0.0002, "loss": 2.5537, "step": 46680 }, { "epoch": 3.479135618479881, "grad_norm": 2.556391716003418, "learning_rate": 0.0002, "loss": 2.3825, "step": 46690 }, { "epoch": 3.479880774962742, "grad_norm": 2.547045946121216, "learning_rate": 0.0002, "loss": 2.5505, "step": 46700 }, { "epoch": 3.4806259314456036, "grad_norm": 2.9189865589141846, "learning_rate": 0.0002, "loss": 2.3604, "step": 46710 }, { "epoch": 3.481371087928465, "grad_norm": 2.6150434017181396, "learning_rate": 0.0002, "loss": 2.5506, "step": 46720 }, { "epoch": 3.4821162444113263, "grad_norm": 2.9110896587371826, "learning_rate": 0.0002, "loss": 2.5469, "step": 46730 }, { "epoch": 3.482861400894188, "grad_norm": 2.4996886253356934, "learning_rate": 0.0002, "loss": 2.4058, "step": 46740 }, { "epoch": 3.4836065573770494, "grad_norm": 2.6568424701690674, "learning_rate": 0.0002, "loss": 2.4281, "step": 46750 }, { "epoch": 3.4843517138599105, "grad_norm": 2.790637254714966, "learning_rate": 0.0002, "loss": 2.2597, "step": 46760 }, { "epoch": 3.485096870342772, "grad_norm": 2.712463855743408, "learning_rate": 0.0002, "loss": 2.6115, "step": 46770 }, { "epoch": 3.485842026825633, "grad_norm": 2.838874340057373, "learning_rate": 0.0002, "loss": 2.5807, "step": 46780 }, { "epoch": 3.4865871833084947, "grad_norm": 2.435051441192627, "learning_rate": 0.0002, "loss": 2.2417, "step": 46790 }, { "epoch": 3.4873323397913563, "grad_norm": 2.4676756858825684, "learning_rate": 0.0002, "loss": 2.4674, "step": 46800 }, { "epoch": 3.488077496274218, "grad_norm": 2.871751070022583, "learning_rate": 0.0002, "loss": 2.5139, "step": 46810 }, { "epoch": 3.488822652757079, "grad_norm": 2.740755319595337, "learning_rate": 0.0002, "loss": 2.459, "step": 46820 }, { "epoch": 3.4895678092399405, "grad_norm": 2.719113349914551, "learning_rate": 0.0002, "loss": 2.2784, "step": 46830 }, { "epoch": 3.4903129657228016, "grad_norm": 2.9216134548187256, "learning_rate": 0.0002, "loss": 2.572, "step": 46840 }, { "epoch": 3.491058122205663, "grad_norm": 2.4576525688171387, "learning_rate": 0.0002, "loss": 2.2147, "step": 46850 }, { "epoch": 3.4918032786885247, "grad_norm": 2.628617525100708, "learning_rate": 0.0002, "loss": 2.6005, "step": 46860 }, { "epoch": 3.492548435171386, "grad_norm": 2.8615686893463135, "learning_rate": 0.0002, "loss": 2.4068, "step": 46870 }, { "epoch": 3.4932935916542474, "grad_norm": 2.8863871097564697, "learning_rate": 0.0002, "loss": 2.5519, "step": 46880 }, { "epoch": 3.494038748137109, "grad_norm": 2.529972553253174, "learning_rate": 0.0002, "loss": 2.5626, "step": 46890 }, { "epoch": 3.49478390461997, "grad_norm": 2.534062385559082, "learning_rate": 0.0002, "loss": 2.4962, "step": 46900 }, { "epoch": 3.4955290611028316, "grad_norm": 2.641014575958252, "learning_rate": 0.0002, "loss": 2.3222, "step": 46910 }, { "epoch": 3.496274217585693, "grad_norm": 2.8244290351867676, "learning_rate": 0.0002, "loss": 2.4623, "step": 46920 }, { "epoch": 3.4970193740685542, "grad_norm": 2.8250889778137207, "learning_rate": 0.0002, "loss": 2.5361, "step": 46930 }, { "epoch": 3.497764530551416, "grad_norm": 3.2792413234710693, "learning_rate": 0.0002, "loss": 2.6656, "step": 46940 }, { "epoch": 3.4985096870342773, "grad_norm": 2.25415301322937, "learning_rate": 0.0002, "loss": 2.4449, "step": 46950 }, { "epoch": 3.4992548435171384, "grad_norm": 2.867708921432495, "learning_rate": 0.0002, "loss": 2.3822, "step": 46960 }, { "epoch": 3.5, "grad_norm": 2.8867411613464355, "learning_rate": 0.0002, "loss": 2.4606, "step": 46970 }, { "epoch": 3.5007451564828616, "grad_norm": 2.8825507164001465, "learning_rate": 0.0002, "loss": 2.5032, "step": 46980 }, { "epoch": 3.5014903129657227, "grad_norm": 3.2540154457092285, "learning_rate": 0.0002, "loss": 2.4558, "step": 46990 }, { "epoch": 3.502235469448584, "grad_norm": 2.831401824951172, "learning_rate": 0.0002, "loss": 2.54, "step": 47000 }, { "epoch": 3.5029806259314458, "grad_norm": 2.632877826690674, "learning_rate": 0.0002, "loss": 2.3713, "step": 47010 }, { "epoch": 3.503725782414307, "grad_norm": 2.836522340774536, "learning_rate": 0.0002, "loss": 2.5631, "step": 47020 }, { "epoch": 3.5044709388971684, "grad_norm": 2.5134811401367188, "learning_rate": 0.0002, "loss": 2.4654, "step": 47030 }, { "epoch": 3.50521609538003, "grad_norm": 2.8926382064819336, "learning_rate": 0.0002, "loss": 2.525, "step": 47040 }, { "epoch": 3.505961251862891, "grad_norm": 2.3543901443481445, "learning_rate": 0.0002, "loss": 2.3846, "step": 47050 }, { "epoch": 3.5067064083457526, "grad_norm": 2.8736624717712402, "learning_rate": 0.0002, "loss": 2.608, "step": 47060 }, { "epoch": 3.5074515648286138, "grad_norm": 2.5337979793548584, "learning_rate": 0.0002, "loss": 2.5153, "step": 47070 }, { "epoch": 3.5081967213114753, "grad_norm": 2.630679130554199, "learning_rate": 0.0002, "loss": 2.6418, "step": 47080 }, { "epoch": 3.508941877794337, "grad_norm": 2.820647954940796, "learning_rate": 0.0002, "loss": 2.5799, "step": 47090 }, { "epoch": 3.5096870342771984, "grad_norm": 2.707824230194092, "learning_rate": 0.0002, "loss": 2.3407, "step": 47100 }, { "epoch": 3.5104321907600595, "grad_norm": 2.6139681339263916, "learning_rate": 0.0002, "loss": 2.5448, "step": 47110 }, { "epoch": 3.511177347242921, "grad_norm": 2.887916326522827, "learning_rate": 0.0002, "loss": 2.4502, "step": 47120 }, { "epoch": 3.511922503725782, "grad_norm": 2.730395555496216, "learning_rate": 0.0002, "loss": 2.4459, "step": 47130 }, { "epoch": 3.5126676602086437, "grad_norm": 2.41025447845459, "learning_rate": 0.0002, "loss": 2.5996, "step": 47140 }, { "epoch": 3.5134128166915053, "grad_norm": 2.7070274353027344, "learning_rate": 0.0002, "loss": 2.5929, "step": 47150 }, { "epoch": 3.514157973174367, "grad_norm": 2.963433265686035, "learning_rate": 0.0002, "loss": 2.5335, "step": 47160 }, { "epoch": 3.514903129657228, "grad_norm": 2.7727930545806885, "learning_rate": 0.0002, "loss": 2.5952, "step": 47170 }, { "epoch": 3.5156482861400895, "grad_norm": 2.868478536605835, "learning_rate": 0.0002, "loss": 2.4693, "step": 47180 }, { "epoch": 3.5163934426229506, "grad_norm": 2.550471305847168, "learning_rate": 0.0002, "loss": 2.6188, "step": 47190 }, { "epoch": 3.517138599105812, "grad_norm": 2.536785125732422, "learning_rate": 0.0002, "loss": 2.513, "step": 47200 }, { "epoch": 3.5178837555886737, "grad_norm": 2.470963716506958, "learning_rate": 0.0002, "loss": 2.4627, "step": 47210 }, { "epoch": 3.5186289120715353, "grad_norm": 2.5856993198394775, "learning_rate": 0.0002, "loss": 2.4106, "step": 47220 }, { "epoch": 3.5193740685543964, "grad_norm": 2.3822851181030273, "learning_rate": 0.0002, "loss": 2.4622, "step": 47230 }, { "epoch": 3.520119225037258, "grad_norm": 2.6513640880584717, "learning_rate": 0.0002, "loss": 2.3524, "step": 47240 }, { "epoch": 3.520864381520119, "grad_norm": 2.421938419342041, "learning_rate": 0.0002, "loss": 2.5163, "step": 47250 }, { "epoch": 3.5216095380029806, "grad_norm": 2.7255730628967285, "learning_rate": 0.0002, "loss": 2.4664, "step": 47260 }, { "epoch": 3.522354694485842, "grad_norm": 2.8659775257110596, "learning_rate": 0.0002, "loss": 2.5989, "step": 47270 }, { "epoch": 3.5230998509687033, "grad_norm": 2.8360795974731445, "learning_rate": 0.0002, "loss": 2.5135, "step": 47280 }, { "epoch": 3.523845007451565, "grad_norm": 2.400752544403076, "learning_rate": 0.0002, "loss": 2.2798, "step": 47290 }, { "epoch": 3.5245901639344264, "grad_norm": 2.741732120513916, "learning_rate": 0.0002, "loss": 2.5847, "step": 47300 }, { "epoch": 3.5253353204172875, "grad_norm": 2.8098509311676025, "learning_rate": 0.0002, "loss": 2.4662, "step": 47310 }, { "epoch": 3.526080476900149, "grad_norm": 2.437133312225342, "learning_rate": 0.0002, "loss": 2.528, "step": 47320 }, { "epoch": 3.5268256333830106, "grad_norm": 2.986236572265625, "learning_rate": 0.0002, "loss": 2.5172, "step": 47330 }, { "epoch": 3.5275707898658717, "grad_norm": 2.7789604663848877, "learning_rate": 0.0002, "loss": 2.6075, "step": 47340 }, { "epoch": 3.5283159463487332, "grad_norm": 2.8106555938720703, "learning_rate": 0.0002, "loss": 2.3869, "step": 47350 }, { "epoch": 3.529061102831595, "grad_norm": 2.428912878036499, "learning_rate": 0.0002, "loss": 2.5395, "step": 47360 }, { "epoch": 3.529806259314456, "grad_norm": 2.528440237045288, "learning_rate": 0.0002, "loss": 2.3945, "step": 47370 }, { "epoch": 3.5305514157973175, "grad_norm": 2.547029972076416, "learning_rate": 0.0002, "loss": 2.4825, "step": 47380 }, { "epoch": 3.531296572280179, "grad_norm": 2.567345142364502, "learning_rate": 0.0002, "loss": 2.4825, "step": 47390 }, { "epoch": 3.53204172876304, "grad_norm": 2.5996034145355225, "learning_rate": 0.0002, "loss": 2.5573, "step": 47400 }, { "epoch": 3.5327868852459017, "grad_norm": 2.373216390609741, "learning_rate": 0.0002, "loss": 2.432, "step": 47410 }, { "epoch": 3.533532041728763, "grad_norm": 2.4746549129486084, "learning_rate": 0.0002, "loss": 2.6546, "step": 47420 }, { "epoch": 3.5342771982116243, "grad_norm": 2.6175284385681152, "learning_rate": 0.0002, "loss": 2.5659, "step": 47430 }, { "epoch": 3.535022354694486, "grad_norm": 2.7137603759765625, "learning_rate": 0.0002, "loss": 2.5188, "step": 47440 }, { "epoch": 3.5357675111773474, "grad_norm": 2.842714548110962, "learning_rate": 0.0002, "loss": 2.637, "step": 47450 }, { "epoch": 3.5365126676602086, "grad_norm": 2.494274139404297, "learning_rate": 0.0002, "loss": 2.4436, "step": 47460 }, { "epoch": 3.53725782414307, "grad_norm": 2.0920238494873047, "learning_rate": 0.0002, "loss": 2.3784, "step": 47470 }, { "epoch": 3.538002980625931, "grad_norm": 2.5640475749969482, "learning_rate": 0.0002, "loss": 2.5922, "step": 47480 }, { "epoch": 3.5387481371087928, "grad_norm": 2.3668644428253174, "learning_rate": 0.0002, "loss": 2.4113, "step": 47490 }, { "epoch": 3.5394932935916543, "grad_norm": 2.7140488624572754, "learning_rate": 0.0002, "loss": 2.6541, "step": 47500 }, { "epoch": 3.540238450074516, "grad_norm": 2.655503511428833, "learning_rate": 0.0002, "loss": 2.6018, "step": 47510 }, { "epoch": 3.540983606557377, "grad_norm": 2.689578056335449, "learning_rate": 0.0002, "loss": 2.4236, "step": 47520 }, { "epoch": 3.5417287630402385, "grad_norm": 2.8998870849609375, "learning_rate": 0.0002, "loss": 2.4237, "step": 47530 }, { "epoch": 3.5424739195230996, "grad_norm": 2.749411106109619, "learning_rate": 0.0002, "loss": 2.3785, "step": 47540 }, { "epoch": 3.543219076005961, "grad_norm": 2.72086238861084, "learning_rate": 0.0002, "loss": 2.3861, "step": 47550 }, { "epoch": 3.5439642324888228, "grad_norm": 2.7207980155944824, "learning_rate": 0.0002, "loss": 2.6309, "step": 47560 }, { "epoch": 3.5447093889716843, "grad_norm": 2.5211992263793945, "learning_rate": 0.0002, "loss": 2.6079, "step": 47570 }, { "epoch": 3.5454545454545454, "grad_norm": 2.592176914215088, "learning_rate": 0.0002, "loss": 2.6657, "step": 47580 }, { "epoch": 3.546199701937407, "grad_norm": 2.651425838470459, "learning_rate": 0.0002, "loss": 2.432, "step": 47590 }, { "epoch": 3.546944858420268, "grad_norm": 2.9301748275756836, "learning_rate": 0.0002, "loss": 2.6111, "step": 47600 }, { "epoch": 3.5476900149031296, "grad_norm": 2.4946770668029785, "learning_rate": 0.0002, "loss": 2.5022, "step": 47610 }, { "epoch": 3.548435171385991, "grad_norm": 2.465942144393921, "learning_rate": 0.0002, "loss": 2.4284, "step": 47620 }, { "epoch": 3.5491803278688527, "grad_norm": 2.899252414703369, "learning_rate": 0.0002, "loss": 2.203, "step": 47630 }, { "epoch": 3.549925484351714, "grad_norm": 2.9829139709472656, "learning_rate": 0.0002, "loss": 2.4265, "step": 47640 }, { "epoch": 3.5506706408345754, "grad_norm": 3.0602529048919678, "learning_rate": 0.0002, "loss": 2.4456, "step": 47650 }, { "epoch": 3.5514157973174365, "grad_norm": 2.6664137840270996, "learning_rate": 0.0002, "loss": 2.4857, "step": 47660 }, { "epoch": 3.552160953800298, "grad_norm": 2.6339783668518066, "learning_rate": 0.0002, "loss": 2.4292, "step": 47670 }, { "epoch": 3.5529061102831596, "grad_norm": 2.6717617511749268, "learning_rate": 0.0002, "loss": 2.7226, "step": 47680 }, { "epoch": 3.5536512667660207, "grad_norm": 2.6578307151794434, "learning_rate": 0.0002, "loss": 2.5738, "step": 47690 }, { "epoch": 3.5543964232488823, "grad_norm": 3.1757662296295166, "learning_rate": 0.0002, "loss": 2.3902, "step": 47700 }, { "epoch": 3.555141579731744, "grad_norm": 2.820986032485962, "learning_rate": 0.0002, "loss": 2.5761, "step": 47710 }, { "epoch": 3.555886736214605, "grad_norm": 2.5975351333618164, "learning_rate": 0.0002, "loss": 2.4182, "step": 47720 }, { "epoch": 3.5566318926974665, "grad_norm": 2.1608307361602783, "learning_rate": 0.0002, "loss": 2.4476, "step": 47730 }, { "epoch": 3.557377049180328, "grad_norm": 2.800523519515991, "learning_rate": 0.0002, "loss": 2.5175, "step": 47740 }, { "epoch": 3.558122205663189, "grad_norm": 2.303067207336426, "learning_rate": 0.0002, "loss": 2.3914, "step": 47750 }, { "epoch": 3.5588673621460507, "grad_norm": 2.3079776763916016, "learning_rate": 0.0002, "loss": 2.4874, "step": 47760 }, { "epoch": 3.559612518628912, "grad_norm": 3.0130832195281982, "learning_rate": 0.0002, "loss": 2.6741, "step": 47770 }, { "epoch": 3.5603576751117734, "grad_norm": 2.6367170810699463, "learning_rate": 0.0002, "loss": 2.3853, "step": 47780 }, { "epoch": 3.561102831594635, "grad_norm": 2.8482704162597656, "learning_rate": 0.0002, "loss": 2.4991, "step": 47790 }, { "epoch": 3.5618479880774965, "grad_norm": 2.4554948806762695, "learning_rate": 0.0002, "loss": 2.4502, "step": 47800 }, { "epoch": 3.5625931445603576, "grad_norm": 2.6724441051483154, "learning_rate": 0.0002, "loss": 2.6487, "step": 47810 }, { "epoch": 3.563338301043219, "grad_norm": 2.8899343013763428, "learning_rate": 0.0002, "loss": 2.6546, "step": 47820 }, { "epoch": 3.5640834575260802, "grad_norm": 3.299750804901123, "learning_rate": 0.0002, "loss": 2.6725, "step": 47830 }, { "epoch": 3.564828614008942, "grad_norm": 2.936067819595337, "learning_rate": 0.0002, "loss": 2.5936, "step": 47840 }, { "epoch": 3.5655737704918034, "grad_norm": 2.8651251792907715, "learning_rate": 0.0002, "loss": 2.5748, "step": 47850 }, { "epoch": 3.566318926974665, "grad_norm": 2.662198066711426, "learning_rate": 0.0002, "loss": 2.5171, "step": 47860 }, { "epoch": 3.567064083457526, "grad_norm": 2.711688995361328, "learning_rate": 0.0002, "loss": 2.542, "step": 47870 }, { "epoch": 3.5678092399403876, "grad_norm": 2.569240093231201, "learning_rate": 0.0002, "loss": 2.5765, "step": 47880 }, { "epoch": 3.5685543964232487, "grad_norm": 3.035104990005493, "learning_rate": 0.0002, "loss": 2.6961, "step": 47890 }, { "epoch": 3.5692995529061102, "grad_norm": 2.7538046836853027, "learning_rate": 0.0002, "loss": 2.4598, "step": 47900 }, { "epoch": 3.570044709388972, "grad_norm": 1.9985271692276, "learning_rate": 0.0002, "loss": 2.3044, "step": 47910 }, { "epoch": 3.5707898658718333, "grad_norm": 2.6245272159576416, "learning_rate": 0.0002, "loss": 2.6054, "step": 47920 }, { "epoch": 3.5715350223546944, "grad_norm": 2.6232852935791016, "learning_rate": 0.0002, "loss": 2.5152, "step": 47930 }, { "epoch": 3.572280178837556, "grad_norm": 2.8082447052001953, "learning_rate": 0.0002, "loss": 2.4981, "step": 47940 }, { "epoch": 3.573025335320417, "grad_norm": 2.4905755519866943, "learning_rate": 0.0002, "loss": 2.6797, "step": 47950 }, { "epoch": 3.5737704918032787, "grad_norm": 2.3986294269561768, "learning_rate": 0.0002, "loss": 2.3939, "step": 47960 }, { "epoch": 3.57451564828614, "grad_norm": 2.645498037338257, "learning_rate": 0.0002, "loss": 2.5753, "step": 47970 }, { "epoch": 3.5752608047690018, "grad_norm": 2.494290828704834, "learning_rate": 0.0002, "loss": 2.3679, "step": 47980 }, { "epoch": 3.576005961251863, "grad_norm": 2.542083740234375, "learning_rate": 0.0002, "loss": 2.53, "step": 47990 }, { "epoch": 3.5767511177347244, "grad_norm": 2.529242753982544, "learning_rate": 0.0002, "loss": 2.5242, "step": 48000 }, { "epoch": 3.5774962742175855, "grad_norm": 2.7906339168548584, "learning_rate": 0.0002, "loss": 2.4736, "step": 48010 }, { "epoch": 3.578241430700447, "grad_norm": 2.607820510864258, "learning_rate": 0.0002, "loss": 2.5297, "step": 48020 }, { "epoch": 3.5789865871833086, "grad_norm": 2.3591980934143066, "learning_rate": 0.0002, "loss": 2.6442, "step": 48030 }, { "epoch": 3.5797317436661698, "grad_norm": 2.4848647117614746, "learning_rate": 0.0002, "loss": 2.4396, "step": 48040 }, { "epoch": 3.5804769001490313, "grad_norm": 2.30979323387146, "learning_rate": 0.0002, "loss": 2.4167, "step": 48050 }, { "epoch": 3.581222056631893, "grad_norm": 2.738574981689453, "learning_rate": 0.0002, "loss": 2.5074, "step": 48060 }, { "epoch": 3.581967213114754, "grad_norm": 2.5661182403564453, "learning_rate": 0.0002, "loss": 2.3834, "step": 48070 }, { "epoch": 3.5827123695976155, "grad_norm": 2.47078013420105, "learning_rate": 0.0002, "loss": 2.5172, "step": 48080 }, { "epoch": 3.583457526080477, "grad_norm": 2.635650157928467, "learning_rate": 0.0002, "loss": 2.5179, "step": 48090 }, { "epoch": 3.584202682563338, "grad_norm": 2.555915117263794, "learning_rate": 0.0002, "loss": 2.3665, "step": 48100 }, { "epoch": 3.5849478390461997, "grad_norm": 2.8681654930114746, "learning_rate": 0.0002, "loss": 2.3979, "step": 48110 }, { "epoch": 3.585692995529061, "grad_norm": 2.8930814266204834, "learning_rate": 0.0002, "loss": 2.5871, "step": 48120 }, { "epoch": 3.5864381520119224, "grad_norm": 2.3275046348571777, "learning_rate": 0.0002, "loss": 2.4286, "step": 48130 }, { "epoch": 3.587183308494784, "grad_norm": 2.7267794609069824, "learning_rate": 0.0002, "loss": 2.6153, "step": 48140 }, { "epoch": 3.5879284649776455, "grad_norm": 2.8347225189208984, "learning_rate": 0.0002, "loss": 2.4797, "step": 48150 }, { "epoch": 3.5886736214605066, "grad_norm": 2.776869058609009, "learning_rate": 0.0002, "loss": 2.4546, "step": 48160 }, { "epoch": 3.589418777943368, "grad_norm": 2.580460548400879, "learning_rate": 0.0002, "loss": 2.4808, "step": 48170 }, { "epoch": 3.5901639344262293, "grad_norm": 2.462087869644165, "learning_rate": 0.0002, "loss": 2.4598, "step": 48180 }, { "epoch": 3.590909090909091, "grad_norm": 2.611598491668701, "learning_rate": 0.0002, "loss": 2.3196, "step": 48190 }, { "epoch": 3.5916542473919524, "grad_norm": 2.7346081733703613, "learning_rate": 0.0002, "loss": 2.6164, "step": 48200 }, { "epoch": 3.592399403874814, "grad_norm": 2.7156240940093994, "learning_rate": 0.0002, "loss": 2.5327, "step": 48210 }, { "epoch": 3.593144560357675, "grad_norm": 2.703343629837036, "learning_rate": 0.0002, "loss": 2.4702, "step": 48220 }, { "epoch": 3.5938897168405366, "grad_norm": 3.1075315475463867, "learning_rate": 0.0002, "loss": 2.5256, "step": 48230 }, { "epoch": 3.5946348733233977, "grad_norm": 3.027200937271118, "learning_rate": 0.0002, "loss": 2.4926, "step": 48240 }, { "epoch": 3.5953800298062593, "grad_norm": 2.59000825881958, "learning_rate": 0.0002, "loss": 2.0317, "step": 48250 }, { "epoch": 3.596125186289121, "grad_norm": 2.602517604827881, "learning_rate": 0.0002, "loss": 2.3541, "step": 48260 }, { "epoch": 3.5968703427719824, "grad_norm": 2.692680835723877, "learning_rate": 0.0002, "loss": 2.4874, "step": 48270 }, { "epoch": 3.5976154992548435, "grad_norm": 1.833561658859253, "learning_rate": 0.0002, "loss": 2.3711, "step": 48280 }, { "epoch": 3.598360655737705, "grad_norm": 3.1557459831237793, "learning_rate": 0.0002, "loss": 2.5707, "step": 48290 }, { "epoch": 3.599105812220566, "grad_norm": 2.482980728149414, "learning_rate": 0.0002, "loss": 2.4062, "step": 48300 }, { "epoch": 3.5998509687034277, "grad_norm": 2.730379104614258, "learning_rate": 0.0002, "loss": 2.6087, "step": 48310 }, { "epoch": 3.6005961251862892, "grad_norm": 2.50185227394104, "learning_rate": 0.0002, "loss": 2.4351, "step": 48320 }, { "epoch": 3.601341281669151, "grad_norm": 2.4742274284362793, "learning_rate": 0.0002, "loss": 2.3752, "step": 48330 }, { "epoch": 3.602086438152012, "grad_norm": 2.677802801132202, "learning_rate": 0.0002, "loss": 2.4924, "step": 48340 }, { "epoch": 3.6028315946348735, "grad_norm": 2.7922158241271973, "learning_rate": 0.0002, "loss": 2.5855, "step": 48350 }, { "epoch": 3.6035767511177346, "grad_norm": 3.383920431137085, "learning_rate": 0.0002, "loss": 2.5071, "step": 48360 }, { "epoch": 3.604321907600596, "grad_norm": 2.9359383583068848, "learning_rate": 0.0002, "loss": 2.361, "step": 48370 }, { "epoch": 3.6050670640834577, "grad_norm": 2.7278084754943848, "learning_rate": 0.0002, "loss": 2.641, "step": 48380 }, { "epoch": 3.605812220566319, "grad_norm": 2.810471296310425, "learning_rate": 0.0002, "loss": 2.6345, "step": 48390 }, { "epoch": 3.6065573770491803, "grad_norm": 2.4108266830444336, "learning_rate": 0.0002, "loss": 2.5544, "step": 48400 }, { "epoch": 3.607302533532042, "grad_norm": 2.8318967819213867, "learning_rate": 0.0002, "loss": 2.52, "step": 48410 }, { "epoch": 3.608047690014903, "grad_norm": 2.8543920516967773, "learning_rate": 0.0002, "loss": 2.5537, "step": 48420 }, { "epoch": 3.6087928464977646, "grad_norm": 2.791773796081543, "learning_rate": 0.0002, "loss": 2.4955, "step": 48430 }, { "epoch": 3.609538002980626, "grad_norm": 2.8440630435943604, "learning_rate": 0.0002, "loss": 2.4265, "step": 48440 }, { "epoch": 3.610283159463487, "grad_norm": 2.4981586933135986, "learning_rate": 0.0002, "loss": 2.4647, "step": 48450 }, { "epoch": 3.6110283159463488, "grad_norm": 2.399179458618164, "learning_rate": 0.0002, "loss": 2.5682, "step": 48460 }, { "epoch": 3.61177347242921, "grad_norm": 2.578258991241455, "learning_rate": 0.0002, "loss": 2.5542, "step": 48470 }, { "epoch": 3.6125186289120714, "grad_norm": 2.8405141830444336, "learning_rate": 0.0002, "loss": 2.5277, "step": 48480 }, { "epoch": 3.613263785394933, "grad_norm": 2.530799150466919, "learning_rate": 0.0002, "loss": 2.644, "step": 48490 }, { "epoch": 3.6140089418777945, "grad_norm": 2.7181057929992676, "learning_rate": 0.0002, "loss": 2.4457, "step": 48500 }, { "epoch": 3.6147540983606556, "grad_norm": 2.577636480331421, "learning_rate": 0.0002, "loss": 2.4226, "step": 48510 }, { "epoch": 3.615499254843517, "grad_norm": 3.4074344635009766, "learning_rate": 0.0002, "loss": 2.5066, "step": 48520 }, { "epoch": 3.6162444113263783, "grad_norm": 2.69319748878479, "learning_rate": 0.0002, "loss": 2.5189, "step": 48530 }, { "epoch": 3.61698956780924, "grad_norm": 2.8684957027435303, "learning_rate": 0.0002, "loss": 2.5323, "step": 48540 }, { "epoch": 3.6177347242921014, "grad_norm": 2.828727960586548, "learning_rate": 0.0002, "loss": 2.5495, "step": 48550 }, { "epoch": 3.618479880774963, "grad_norm": 2.4730985164642334, "learning_rate": 0.0002, "loss": 2.5354, "step": 48560 }, { "epoch": 3.619225037257824, "grad_norm": 2.840999126434326, "learning_rate": 0.0002, "loss": 2.5194, "step": 48570 }, { "epoch": 3.6199701937406856, "grad_norm": 2.5371737480163574, "learning_rate": 0.0002, "loss": 2.4109, "step": 48580 }, { "epoch": 3.6207153502235467, "grad_norm": 3.4489569664001465, "learning_rate": 0.0002, "loss": 2.6168, "step": 48590 }, { "epoch": 3.6214605067064083, "grad_norm": 2.614786386489868, "learning_rate": 0.0002, "loss": 2.4671, "step": 48600 }, { "epoch": 3.62220566318927, "grad_norm": 2.5180749893188477, "learning_rate": 0.0002, "loss": 2.4171, "step": 48610 }, { "epoch": 3.6229508196721314, "grad_norm": 2.856128692626953, "learning_rate": 0.0002, "loss": 2.3804, "step": 48620 }, { "epoch": 3.6236959761549925, "grad_norm": 2.196049213409424, "learning_rate": 0.0002, "loss": 2.4736, "step": 48630 }, { "epoch": 3.624441132637854, "grad_norm": 2.728996515274048, "learning_rate": 0.0002, "loss": 2.4175, "step": 48640 }, { "epoch": 3.625186289120715, "grad_norm": 2.3340585231781006, "learning_rate": 0.0002, "loss": 2.4509, "step": 48650 }, { "epoch": 3.6259314456035767, "grad_norm": 2.537264823913574, "learning_rate": 0.0002, "loss": 2.3519, "step": 48660 }, { "epoch": 3.6266766020864383, "grad_norm": 2.4871232509613037, "learning_rate": 0.0002, "loss": 2.3988, "step": 48670 }, { "epoch": 3.6274217585693, "grad_norm": 2.5547709465026855, "learning_rate": 0.0002, "loss": 2.2731, "step": 48680 }, { "epoch": 3.628166915052161, "grad_norm": 2.8591160774230957, "learning_rate": 0.0002, "loss": 2.6361, "step": 48690 }, { "epoch": 3.6289120715350225, "grad_norm": 2.216677665710449, "learning_rate": 0.0002, "loss": 2.3692, "step": 48700 }, { "epoch": 3.6296572280178836, "grad_norm": 2.721316337585449, "learning_rate": 0.0002, "loss": 2.4691, "step": 48710 }, { "epoch": 3.630402384500745, "grad_norm": 2.8119468688964844, "learning_rate": 0.0002, "loss": 2.397, "step": 48720 }, { "epoch": 3.6311475409836067, "grad_norm": 3.7255992889404297, "learning_rate": 0.0002, "loss": 2.461, "step": 48730 }, { "epoch": 3.631892697466468, "grad_norm": 2.680361032485962, "learning_rate": 0.0002, "loss": 2.2952, "step": 48740 }, { "epoch": 3.6326378539493294, "grad_norm": 2.1289005279541016, "learning_rate": 0.0002, "loss": 2.4144, "step": 48750 }, { "epoch": 3.633383010432191, "grad_norm": 2.259798765182495, "learning_rate": 0.0002, "loss": 2.6487, "step": 48760 }, { "epoch": 3.634128166915052, "grad_norm": 2.293454170227051, "learning_rate": 0.0002, "loss": 2.5502, "step": 48770 }, { "epoch": 3.6348733233979136, "grad_norm": 2.636841297149658, "learning_rate": 0.0002, "loss": 2.5529, "step": 48780 }, { "epoch": 3.635618479880775, "grad_norm": 2.310394287109375, "learning_rate": 0.0002, "loss": 2.4676, "step": 48790 }, { "epoch": 3.6363636363636362, "grad_norm": 2.545966625213623, "learning_rate": 0.0002, "loss": 2.4672, "step": 48800 }, { "epoch": 3.637108792846498, "grad_norm": 2.9500443935394287, "learning_rate": 0.0002, "loss": 2.4827, "step": 48810 }, { "epoch": 3.637853949329359, "grad_norm": 2.543187141418457, "learning_rate": 0.0002, "loss": 2.4132, "step": 48820 }, { "epoch": 3.6385991058122205, "grad_norm": 2.2847654819488525, "learning_rate": 0.0002, "loss": 2.4768, "step": 48830 }, { "epoch": 3.639344262295082, "grad_norm": 2.72090482711792, "learning_rate": 0.0002, "loss": 2.5947, "step": 48840 }, { "epoch": 3.6400894187779436, "grad_norm": 2.562582015991211, "learning_rate": 0.0002, "loss": 2.5658, "step": 48850 }, { "epoch": 3.6408345752608047, "grad_norm": 2.780815839767456, "learning_rate": 0.0002, "loss": 2.5017, "step": 48860 }, { "epoch": 3.6415797317436662, "grad_norm": 2.9853780269622803, "learning_rate": 0.0002, "loss": 2.6693, "step": 48870 }, { "epoch": 3.6423248882265273, "grad_norm": 2.79302716255188, "learning_rate": 0.0002, "loss": 2.4589, "step": 48880 }, { "epoch": 3.643070044709389, "grad_norm": 2.7018563747406006, "learning_rate": 0.0002, "loss": 2.3826, "step": 48890 }, { "epoch": 3.6438152011922504, "grad_norm": 2.699528694152832, "learning_rate": 0.0002, "loss": 2.3877, "step": 48900 }, { "epoch": 3.644560357675112, "grad_norm": 2.4166371822357178, "learning_rate": 0.0002, "loss": 2.5869, "step": 48910 }, { "epoch": 3.645305514157973, "grad_norm": 2.5733654499053955, "learning_rate": 0.0002, "loss": 2.5201, "step": 48920 }, { "epoch": 3.6460506706408347, "grad_norm": 2.7172818183898926, "learning_rate": 0.0002, "loss": 2.4568, "step": 48930 }, { "epoch": 3.6467958271236958, "grad_norm": 2.5636215209960938, "learning_rate": 0.0002, "loss": 2.5941, "step": 48940 }, { "epoch": 3.6475409836065573, "grad_norm": 2.592777729034424, "learning_rate": 0.0002, "loss": 2.3906, "step": 48950 }, { "epoch": 3.648286140089419, "grad_norm": 2.5706088542938232, "learning_rate": 0.0002, "loss": 2.4504, "step": 48960 }, { "epoch": 3.6490312965722804, "grad_norm": 2.48825740814209, "learning_rate": 0.0002, "loss": 2.4888, "step": 48970 }, { "epoch": 3.6497764530551415, "grad_norm": 2.727311372756958, "learning_rate": 0.0002, "loss": 2.4781, "step": 48980 }, { "epoch": 3.650521609538003, "grad_norm": 2.3753762245178223, "learning_rate": 0.0002, "loss": 2.504, "step": 48990 }, { "epoch": 3.651266766020864, "grad_norm": 2.639061450958252, "learning_rate": 0.0002, "loss": 2.5593, "step": 49000 }, { "epoch": 3.6520119225037257, "grad_norm": 2.736478090286255, "learning_rate": 0.0002, "loss": 2.5617, "step": 49010 }, { "epoch": 3.6527570789865873, "grad_norm": 2.6167259216308594, "learning_rate": 0.0002, "loss": 2.7529, "step": 49020 }, { "epoch": 3.653502235469449, "grad_norm": 2.2042272090911865, "learning_rate": 0.0002, "loss": 2.4023, "step": 49030 }, { "epoch": 3.65424739195231, "grad_norm": 2.191154956817627, "learning_rate": 0.0002, "loss": 2.5161, "step": 49040 }, { "epoch": 3.6549925484351715, "grad_norm": 2.672147035598755, "learning_rate": 0.0002, "loss": 2.4459, "step": 49050 }, { "epoch": 3.6557377049180326, "grad_norm": 2.6667487621307373, "learning_rate": 0.0002, "loss": 2.5033, "step": 49060 }, { "epoch": 3.656482861400894, "grad_norm": 2.488171100616455, "learning_rate": 0.0002, "loss": 2.3307, "step": 49070 }, { "epoch": 3.6572280178837557, "grad_norm": 2.9280152320861816, "learning_rate": 0.0002, "loss": 2.5912, "step": 49080 }, { "epoch": 3.657973174366617, "grad_norm": 2.767899513244629, "learning_rate": 0.0002, "loss": 2.5289, "step": 49090 }, { "epoch": 3.6587183308494784, "grad_norm": 2.595731019973755, "learning_rate": 0.0002, "loss": 2.6172, "step": 49100 }, { "epoch": 3.65946348733234, "grad_norm": 2.531205177307129, "learning_rate": 0.0002, "loss": 2.5384, "step": 49110 }, { "epoch": 3.660208643815201, "grad_norm": 2.5499446392059326, "learning_rate": 0.0002, "loss": 2.4912, "step": 49120 }, { "epoch": 3.6609538002980626, "grad_norm": 2.692110300064087, "learning_rate": 0.0002, "loss": 2.6736, "step": 49130 }, { "epoch": 3.661698956780924, "grad_norm": 2.5231432914733887, "learning_rate": 0.0002, "loss": 2.4245, "step": 49140 }, { "epoch": 3.6624441132637853, "grad_norm": 2.7942330837249756, "learning_rate": 0.0002, "loss": 2.5642, "step": 49150 }, { "epoch": 3.663189269746647, "grad_norm": 3.3361740112304688, "learning_rate": 0.0002, "loss": 2.5024, "step": 49160 }, { "epoch": 3.663934426229508, "grad_norm": 3.006382465362549, "learning_rate": 0.0002, "loss": 2.5587, "step": 49170 }, { "epoch": 3.6646795827123695, "grad_norm": 2.637521743774414, "learning_rate": 0.0002, "loss": 2.5168, "step": 49180 }, { "epoch": 3.665424739195231, "grad_norm": 2.8171842098236084, "learning_rate": 0.0002, "loss": 2.6647, "step": 49190 }, { "epoch": 3.6661698956780926, "grad_norm": 2.3812084197998047, "learning_rate": 0.0002, "loss": 2.2236, "step": 49200 }, { "epoch": 3.6669150521609537, "grad_norm": 2.4592621326446533, "learning_rate": 0.0002, "loss": 2.3351, "step": 49210 }, { "epoch": 3.6676602086438153, "grad_norm": 2.5426881313323975, "learning_rate": 0.0002, "loss": 2.5694, "step": 49220 }, { "epoch": 3.6684053651266764, "grad_norm": 2.749530553817749, "learning_rate": 0.0002, "loss": 2.4955, "step": 49230 }, { "epoch": 3.669150521609538, "grad_norm": 3.183913230895996, "learning_rate": 0.0002, "loss": 2.7195, "step": 49240 }, { "epoch": 3.6698956780923995, "grad_norm": 2.6278300285339355, "learning_rate": 0.0002, "loss": 2.5673, "step": 49250 }, { "epoch": 3.670640834575261, "grad_norm": 2.996420383453369, "learning_rate": 0.0002, "loss": 2.5843, "step": 49260 }, { "epoch": 3.671385991058122, "grad_norm": 2.6500403881073, "learning_rate": 0.0002, "loss": 2.6122, "step": 49270 }, { "epoch": 3.6721311475409837, "grad_norm": 3.047901153564453, "learning_rate": 0.0002, "loss": 2.4905, "step": 49280 }, { "epoch": 3.672876304023845, "grad_norm": 2.8154449462890625, "learning_rate": 0.0002, "loss": 2.4668, "step": 49290 }, { "epoch": 3.6736214605067063, "grad_norm": 2.3584389686584473, "learning_rate": 0.0002, "loss": 2.3198, "step": 49300 }, { "epoch": 3.674366616989568, "grad_norm": 2.881648063659668, "learning_rate": 0.0002, "loss": 2.4651, "step": 49310 }, { "epoch": 3.6751117734724295, "grad_norm": 2.5896761417388916, "learning_rate": 0.0002, "loss": 2.241, "step": 49320 }, { "epoch": 3.6758569299552906, "grad_norm": 2.812044858932495, "learning_rate": 0.0002, "loss": 2.5172, "step": 49330 }, { "epoch": 3.676602086438152, "grad_norm": 2.66312575340271, "learning_rate": 0.0002, "loss": 2.4133, "step": 49340 }, { "epoch": 3.6773472429210132, "grad_norm": 2.790090799331665, "learning_rate": 0.0002, "loss": 2.709, "step": 49350 }, { "epoch": 3.678092399403875, "grad_norm": 2.4893035888671875, "learning_rate": 0.0002, "loss": 2.4402, "step": 49360 }, { "epoch": 3.6788375558867363, "grad_norm": 2.754606246948242, "learning_rate": 0.0002, "loss": 2.4593, "step": 49370 }, { "epoch": 3.679582712369598, "grad_norm": 2.878617763519287, "learning_rate": 0.0002, "loss": 2.6367, "step": 49380 }, { "epoch": 3.680327868852459, "grad_norm": 2.854499101638794, "learning_rate": 0.0002, "loss": 2.4631, "step": 49390 }, { "epoch": 3.6810730253353205, "grad_norm": 2.849299669265747, "learning_rate": 0.0002, "loss": 2.465, "step": 49400 }, { "epoch": 3.6818181818181817, "grad_norm": 2.570016860961914, "learning_rate": 0.0002, "loss": 2.4901, "step": 49410 }, { "epoch": 3.682563338301043, "grad_norm": 2.6923599243164062, "learning_rate": 0.0002, "loss": 2.4682, "step": 49420 }, { "epoch": 3.6833084947839048, "grad_norm": 2.5435593128204346, "learning_rate": 0.0002, "loss": 2.5621, "step": 49430 }, { "epoch": 3.684053651266766, "grad_norm": 2.8608274459838867, "learning_rate": 0.0002, "loss": 2.586, "step": 49440 }, { "epoch": 3.6847988077496274, "grad_norm": 2.5079166889190674, "learning_rate": 0.0002, "loss": 2.4451, "step": 49450 }, { "epoch": 3.685543964232489, "grad_norm": 2.874114990234375, "learning_rate": 0.0002, "loss": 2.446, "step": 49460 }, { "epoch": 3.68628912071535, "grad_norm": 2.727252960205078, "learning_rate": 0.0002, "loss": 2.4395, "step": 49470 }, { "epoch": 3.6870342771982116, "grad_norm": 2.734837532043457, "learning_rate": 0.0002, "loss": 2.4344, "step": 49480 }, { "epoch": 3.687779433681073, "grad_norm": 2.5745677947998047, "learning_rate": 0.0002, "loss": 2.3926, "step": 49490 }, { "epoch": 3.6885245901639343, "grad_norm": 2.9296648502349854, "learning_rate": 0.0002, "loss": 2.458, "step": 49500 }, { "epoch": 3.689269746646796, "grad_norm": 2.666250705718994, "learning_rate": 0.0002, "loss": 2.5119, "step": 49510 }, { "epoch": 3.690014903129657, "grad_norm": 2.7720589637756348, "learning_rate": 0.0002, "loss": 2.4189, "step": 49520 }, { "epoch": 3.6907600596125185, "grad_norm": 2.5077097415924072, "learning_rate": 0.0002, "loss": 2.486, "step": 49530 }, { "epoch": 3.69150521609538, "grad_norm": 2.7415547370910645, "learning_rate": 0.0002, "loss": 2.4732, "step": 49540 }, { "epoch": 3.6922503725782416, "grad_norm": 3.023948907852173, "learning_rate": 0.0002, "loss": 2.4565, "step": 49550 }, { "epoch": 3.6929955290611027, "grad_norm": 2.926791191101074, "learning_rate": 0.0002, "loss": 2.3835, "step": 49560 }, { "epoch": 3.6937406855439643, "grad_norm": 2.64837646484375, "learning_rate": 0.0002, "loss": 2.5395, "step": 49570 }, { "epoch": 3.6944858420268254, "grad_norm": 3.1777780055999756, "learning_rate": 0.0002, "loss": 2.6367, "step": 49580 }, { "epoch": 3.695230998509687, "grad_norm": 2.567836284637451, "learning_rate": 0.0002, "loss": 2.7127, "step": 49590 }, { "epoch": 3.6959761549925485, "grad_norm": 2.6760616302490234, "learning_rate": 0.0002, "loss": 2.5427, "step": 49600 }, { "epoch": 3.69672131147541, "grad_norm": 2.6477506160736084, "learning_rate": 0.0002, "loss": 2.446, "step": 49610 }, { "epoch": 3.697466467958271, "grad_norm": 2.555225372314453, "learning_rate": 0.0002, "loss": 2.4988, "step": 49620 }, { "epoch": 3.6982116244411327, "grad_norm": 2.3987672328948975, "learning_rate": 0.0002, "loss": 2.476, "step": 49630 }, { "epoch": 3.698956780923994, "grad_norm": 2.7095890045166016, "learning_rate": 0.0002, "loss": 2.5311, "step": 49640 }, { "epoch": 3.6997019374068554, "grad_norm": 2.7306811809539795, "learning_rate": 0.0002, "loss": 2.5193, "step": 49650 }, { "epoch": 3.700447093889717, "grad_norm": 2.5883631706237793, "learning_rate": 0.0002, "loss": 2.5261, "step": 49660 }, { "epoch": 3.7011922503725785, "grad_norm": 2.3863844871520996, "learning_rate": 0.0002, "loss": 2.4421, "step": 49670 }, { "epoch": 3.7019374068554396, "grad_norm": 2.6333467960357666, "learning_rate": 0.0002, "loss": 2.5035, "step": 49680 }, { "epoch": 3.702682563338301, "grad_norm": 2.6204724311828613, "learning_rate": 0.0002, "loss": 2.3942, "step": 49690 }, { "epoch": 3.7034277198211623, "grad_norm": 2.580791711807251, "learning_rate": 0.0002, "loss": 2.205, "step": 49700 }, { "epoch": 3.704172876304024, "grad_norm": 2.720482587814331, "learning_rate": 0.0002, "loss": 2.4407, "step": 49710 }, { "epoch": 3.7049180327868854, "grad_norm": 2.664541006088257, "learning_rate": 0.0002, "loss": 2.4447, "step": 49720 }, { "epoch": 3.705663189269747, "grad_norm": 2.4946305751800537, "learning_rate": 0.0002, "loss": 2.5258, "step": 49730 }, { "epoch": 3.706408345752608, "grad_norm": 2.704585552215576, "learning_rate": 0.0002, "loss": 2.3665, "step": 49740 }, { "epoch": 3.7071535022354696, "grad_norm": 2.7640819549560547, "learning_rate": 0.0002, "loss": 2.5606, "step": 49750 }, { "epoch": 3.7078986587183307, "grad_norm": 2.631479263305664, "learning_rate": 0.0002, "loss": 2.5774, "step": 49760 }, { "epoch": 3.7086438152011922, "grad_norm": 2.800203800201416, "learning_rate": 0.0002, "loss": 2.4952, "step": 49770 }, { "epoch": 3.709388971684054, "grad_norm": 2.66166090965271, "learning_rate": 0.0002, "loss": 2.5133, "step": 49780 }, { "epoch": 3.710134128166915, "grad_norm": 2.272296905517578, "learning_rate": 0.0002, "loss": 2.5408, "step": 49790 }, { "epoch": 3.7108792846497765, "grad_norm": 2.591278076171875, "learning_rate": 0.0002, "loss": 2.5519, "step": 49800 }, { "epoch": 3.711624441132638, "grad_norm": 2.4364070892333984, "learning_rate": 0.0002, "loss": 2.4554, "step": 49810 }, { "epoch": 3.712369597615499, "grad_norm": 2.7269444465637207, "learning_rate": 0.0002, "loss": 2.4394, "step": 49820 }, { "epoch": 3.7131147540983607, "grad_norm": 2.7368903160095215, "learning_rate": 0.0002, "loss": 2.5778, "step": 49830 }, { "epoch": 3.7138599105812222, "grad_norm": 2.6224026679992676, "learning_rate": 0.0002, "loss": 2.4692, "step": 49840 }, { "epoch": 3.7146050670640833, "grad_norm": 2.544132709503174, "learning_rate": 0.0002, "loss": 2.5929, "step": 49850 }, { "epoch": 3.715350223546945, "grad_norm": 2.141615629196167, "learning_rate": 0.0002, "loss": 2.4091, "step": 49860 }, { "epoch": 3.716095380029806, "grad_norm": 2.357640027999878, "learning_rate": 0.0002, "loss": 2.5352, "step": 49870 }, { "epoch": 3.7168405365126675, "grad_norm": 2.8612751960754395, "learning_rate": 0.0002, "loss": 2.6656, "step": 49880 }, { "epoch": 3.717585692995529, "grad_norm": 2.558424472808838, "learning_rate": 0.0002, "loss": 2.4203, "step": 49890 }, { "epoch": 3.7183308494783907, "grad_norm": 2.3906445503234863, "learning_rate": 0.0002, "loss": 2.4509, "step": 49900 }, { "epoch": 3.7190760059612518, "grad_norm": 2.647514581680298, "learning_rate": 0.0002, "loss": 2.5821, "step": 49910 }, { "epoch": 3.7198211624441133, "grad_norm": 2.710005283355713, "learning_rate": 0.0002, "loss": 2.5298, "step": 49920 }, { "epoch": 3.7205663189269744, "grad_norm": 2.5558953285217285, "learning_rate": 0.0002, "loss": 2.4721, "step": 49930 }, { "epoch": 3.721311475409836, "grad_norm": 2.446017265319824, "learning_rate": 0.0002, "loss": 2.4778, "step": 49940 }, { "epoch": 3.7220566318926975, "grad_norm": 3.0394492149353027, "learning_rate": 0.0002, "loss": 2.3313, "step": 49950 }, { "epoch": 3.722801788375559, "grad_norm": 2.5545358657836914, "learning_rate": 0.0002, "loss": 2.6545, "step": 49960 }, { "epoch": 3.72354694485842, "grad_norm": 2.7570934295654297, "learning_rate": 0.0002, "loss": 2.4509, "step": 49970 }, { "epoch": 3.7242921013412817, "grad_norm": 2.592240571975708, "learning_rate": 0.0002, "loss": 2.4852, "step": 49980 }, { "epoch": 3.725037257824143, "grad_norm": 2.751447916030884, "learning_rate": 0.0002, "loss": 2.3889, "step": 49990 }, { "epoch": 3.7257824143070044, "grad_norm": 2.723025321960449, "learning_rate": 0.0002, "loss": 2.3713, "step": 50000 }, { "epoch": 3.726527570789866, "grad_norm": 2.759801149368286, "learning_rate": 0.0002, "loss": 2.507, "step": 50010 }, { "epoch": 3.7272727272727275, "grad_norm": 2.721895933151245, "learning_rate": 0.0002, "loss": 2.4909, "step": 50020 }, { "epoch": 3.7280178837555886, "grad_norm": 2.8816866874694824, "learning_rate": 0.0002, "loss": 2.5981, "step": 50030 }, { "epoch": 3.72876304023845, "grad_norm": 2.328068256378174, "learning_rate": 0.0002, "loss": 2.5311, "step": 50040 }, { "epoch": 3.7295081967213113, "grad_norm": 2.826514482498169, "learning_rate": 0.0002, "loss": 2.4776, "step": 50050 }, { "epoch": 3.730253353204173, "grad_norm": 2.855038642883301, "learning_rate": 0.0002, "loss": 2.5622, "step": 50060 }, { "epoch": 3.7309985096870344, "grad_norm": 2.6433498859405518, "learning_rate": 0.0002, "loss": 2.4781, "step": 50070 }, { "epoch": 3.731743666169896, "grad_norm": 2.565962076187134, "learning_rate": 0.0002, "loss": 2.5446, "step": 50080 }, { "epoch": 3.732488822652757, "grad_norm": 2.8265738487243652, "learning_rate": 0.0002, "loss": 2.534, "step": 50090 }, { "epoch": 3.7332339791356186, "grad_norm": 2.9599409103393555, "learning_rate": 0.0002, "loss": 2.4206, "step": 50100 }, { "epoch": 3.7339791356184797, "grad_norm": 2.669797897338867, "learning_rate": 0.0002, "loss": 2.6088, "step": 50110 }, { "epoch": 3.7347242921013413, "grad_norm": 2.8583147525787354, "learning_rate": 0.0002, "loss": 2.5827, "step": 50120 }, { "epoch": 3.735469448584203, "grad_norm": 2.217470407485962, "learning_rate": 0.0002, "loss": 2.4562, "step": 50130 }, { "epoch": 3.736214605067064, "grad_norm": 3.1019763946533203, "learning_rate": 0.0002, "loss": 2.4064, "step": 50140 }, { "epoch": 3.7369597615499255, "grad_norm": 2.963712692260742, "learning_rate": 0.0002, "loss": 2.5403, "step": 50150 }, { "epoch": 3.737704918032787, "grad_norm": 3.015799045562744, "learning_rate": 0.0002, "loss": 2.6094, "step": 50160 }, { "epoch": 3.738450074515648, "grad_norm": 2.435353994369507, "learning_rate": 0.0002, "loss": 2.4104, "step": 50170 }, { "epoch": 3.7391952309985097, "grad_norm": 2.5293540954589844, "learning_rate": 0.0002, "loss": 2.5578, "step": 50180 }, { "epoch": 3.7399403874813713, "grad_norm": 2.1452317237854004, "learning_rate": 0.0002, "loss": 2.4667, "step": 50190 }, { "epoch": 3.7406855439642324, "grad_norm": 2.675736665725708, "learning_rate": 0.0002, "loss": 2.6145, "step": 50200 }, { "epoch": 3.741430700447094, "grad_norm": 2.4674720764160156, "learning_rate": 0.0002, "loss": 2.3061, "step": 50210 }, { "epoch": 3.742175856929955, "grad_norm": 2.5854485034942627, "learning_rate": 0.0002, "loss": 2.6369, "step": 50220 }, { "epoch": 3.7429210134128166, "grad_norm": 2.5845370292663574, "learning_rate": 0.0002, "loss": 2.4753, "step": 50230 }, { "epoch": 3.743666169895678, "grad_norm": 2.4826760292053223, "learning_rate": 0.0002, "loss": 2.4353, "step": 50240 }, { "epoch": 3.7444113263785397, "grad_norm": 2.491818428039551, "learning_rate": 0.0002, "loss": 2.3542, "step": 50250 }, { "epoch": 3.745156482861401, "grad_norm": 2.7059671878814697, "learning_rate": 0.0002, "loss": 2.3786, "step": 50260 }, { "epoch": 3.7459016393442623, "grad_norm": 2.488055944442749, "learning_rate": 0.0002, "loss": 2.3972, "step": 50270 }, { "epoch": 3.7466467958271235, "grad_norm": 2.8024628162384033, "learning_rate": 0.0002, "loss": 2.4418, "step": 50280 }, { "epoch": 3.747391952309985, "grad_norm": 2.639986991882324, "learning_rate": 0.0002, "loss": 2.5661, "step": 50290 }, { "epoch": 3.7481371087928466, "grad_norm": 2.477285623550415, "learning_rate": 0.0002, "loss": 2.4308, "step": 50300 }, { "epoch": 3.748882265275708, "grad_norm": 2.709949254989624, "learning_rate": 0.0002, "loss": 2.6701, "step": 50310 }, { "epoch": 3.7496274217585692, "grad_norm": 2.567444324493408, "learning_rate": 0.0002, "loss": 2.6772, "step": 50320 }, { "epoch": 3.7503725782414308, "grad_norm": 2.8398115634918213, "learning_rate": 0.0002, "loss": 2.5978, "step": 50330 }, { "epoch": 3.751117734724292, "grad_norm": 2.623926877975464, "learning_rate": 0.0002, "loss": 2.4706, "step": 50340 }, { "epoch": 3.7518628912071534, "grad_norm": 2.404278039932251, "learning_rate": 0.0002, "loss": 2.6895, "step": 50350 }, { "epoch": 3.752608047690015, "grad_norm": 2.811429023742676, "learning_rate": 0.0002, "loss": 2.3915, "step": 50360 }, { "epoch": 3.7533532041728765, "grad_norm": 2.551865339279175, "learning_rate": 0.0002, "loss": 2.3965, "step": 50370 }, { "epoch": 3.7540983606557377, "grad_norm": 3.0696215629577637, "learning_rate": 0.0002, "loss": 2.4566, "step": 50380 }, { "epoch": 3.754843517138599, "grad_norm": 2.6258938312530518, "learning_rate": 0.0002, "loss": 2.564, "step": 50390 }, { "epoch": 3.7555886736214603, "grad_norm": 2.8297739028930664, "learning_rate": 0.0002, "loss": 2.7246, "step": 50400 }, { "epoch": 3.756333830104322, "grad_norm": 2.9615511894226074, "learning_rate": 0.0002, "loss": 2.6202, "step": 50410 }, { "epoch": 3.7570789865871834, "grad_norm": 2.3409204483032227, "learning_rate": 0.0002, "loss": 2.5359, "step": 50420 }, { "epoch": 3.757824143070045, "grad_norm": 2.545732021331787, "learning_rate": 0.0002, "loss": 2.5465, "step": 50430 }, { "epoch": 3.758569299552906, "grad_norm": 2.5160508155822754, "learning_rate": 0.0002, "loss": 2.5209, "step": 50440 }, { "epoch": 3.7593144560357676, "grad_norm": 2.9805400371551514, "learning_rate": 0.0002, "loss": 2.4882, "step": 50450 }, { "epoch": 3.7600596125186287, "grad_norm": 2.4430508613586426, "learning_rate": 0.0002, "loss": 2.5019, "step": 50460 }, { "epoch": 3.7608047690014903, "grad_norm": 2.3823506832122803, "learning_rate": 0.0002, "loss": 2.3845, "step": 50470 }, { "epoch": 3.761549925484352, "grad_norm": 2.54958176612854, "learning_rate": 0.0002, "loss": 2.4257, "step": 50480 }, { "epoch": 3.762295081967213, "grad_norm": 2.992579460144043, "learning_rate": 0.0002, "loss": 2.5295, "step": 50490 }, { "epoch": 3.7630402384500745, "grad_norm": 2.7364273071289062, "learning_rate": 0.0002, "loss": 2.7259, "step": 50500 }, { "epoch": 3.763785394932936, "grad_norm": 2.527580499649048, "learning_rate": 0.0002, "loss": 2.5548, "step": 50510 }, { "epoch": 3.764530551415797, "grad_norm": 2.6764330863952637, "learning_rate": 0.0002, "loss": 2.4295, "step": 50520 }, { "epoch": 3.7652757078986587, "grad_norm": 2.7412283420562744, "learning_rate": 0.0002, "loss": 2.5146, "step": 50530 }, { "epoch": 3.7660208643815203, "grad_norm": 2.5869789123535156, "learning_rate": 0.0002, "loss": 2.5206, "step": 50540 }, { "epoch": 3.7667660208643814, "grad_norm": 2.71201491355896, "learning_rate": 0.0002, "loss": 2.4387, "step": 50550 }, { "epoch": 3.767511177347243, "grad_norm": 2.881373167037964, "learning_rate": 0.0002, "loss": 2.5828, "step": 50560 }, { "epoch": 3.768256333830104, "grad_norm": 2.6753411293029785, "learning_rate": 0.0002, "loss": 2.6769, "step": 50570 }, { "epoch": 3.7690014903129656, "grad_norm": 2.626857042312622, "learning_rate": 0.0002, "loss": 2.5313, "step": 50580 }, { "epoch": 3.769746646795827, "grad_norm": 2.0912129878997803, "learning_rate": 0.0002, "loss": 2.1949, "step": 50590 }, { "epoch": 3.7704918032786887, "grad_norm": 2.5169942378997803, "learning_rate": 0.0002, "loss": 2.4955, "step": 50600 }, { "epoch": 3.77123695976155, "grad_norm": 2.7082998752593994, "learning_rate": 0.0002, "loss": 2.5901, "step": 50610 }, { "epoch": 3.7719821162444114, "grad_norm": 2.681004762649536, "learning_rate": 0.0002, "loss": 2.5101, "step": 50620 }, { "epoch": 3.7727272727272725, "grad_norm": 2.7208945751190186, "learning_rate": 0.0002, "loss": 2.4098, "step": 50630 }, { "epoch": 3.773472429210134, "grad_norm": 2.369579553604126, "learning_rate": 0.0002, "loss": 2.4761, "step": 50640 }, { "epoch": 3.7742175856929956, "grad_norm": 2.708627223968506, "learning_rate": 0.0002, "loss": 2.5875, "step": 50650 }, { "epoch": 3.774962742175857, "grad_norm": 2.6574110984802246, "learning_rate": 0.0002, "loss": 2.4478, "step": 50660 }, { "epoch": 3.7757078986587183, "grad_norm": 2.582005739212036, "learning_rate": 0.0002, "loss": 2.4235, "step": 50670 }, { "epoch": 3.77645305514158, "grad_norm": 2.9919252395629883, "learning_rate": 0.0002, "loss": 2.5915, "step": 50680 }, { "epoch": 3.777198211624441, "grad_norm": 2.489131212234497, "learning_rate": 0.0002, "loss": 2.5323, "step": 50690 }, { "epoch": 3.7779433681073025, "grad_norm": 3.1119613647460938, "learning_rate": 0.0002, "loss": 2.4534, "step": 50700 }, { "epoch": 3.778688524590164, "grad_norm": 2.639336585998535, "learning_rate": 0.0002, "loss": 2.5858, "step": 50710 }, { "epoch": 3.7794336810730256, "grad_norm": 2.6446197032928467, "learning_rate": 0.0002, "loss": 2.4894, "step": 50720 }, { "epoch": 3.7801788375558867, "grad_norm": 2.721428871154785, "learning_rate": 0.0002, "loss": 2.6664, "step": 50730 }, { "epoch": 3.7809239940387482, "grad_norm": 3.0219340324401855, "learning_rate": 0.0002, "loss": 2.5447, "step": 50740 }, { "epoch": 3.7816691505216093, "grad_norm": 2.371525764465332, "learning_rate": 0.0002, "loss": 2.2999, "step": 50750 }, { "epoch": 3.782414307004471, "grad_norm": 3.023408889770508, "learning_rate": 0.0002, "loss": 2.4849, "step": 50760 }, { "epoch": 3.7831594634873325, "grad_norm": 2.1331787109375, "learning_rate": 0.0002, "loss": 2.5371, "step": 50770 }, { "epoch": 3.783904619970194, "grad_norm": 2.6607348918914795, "learning_rate": 0.0002, "loss": 2.5061, "step": 50780 }, { "epoch": 3.784649776453055, "grad_norm": 2.316195011138916, "learning_rate": 0.0002, "loss": 2.5484, "step": 50790 }, { "epoch": 3.7853949329359167, "grad_norm": 2.7560927867889404, "learning_rate": 0.0002, "loss": 2.4545, "step": 50800 }, { "epoch": 3.7861400894187778, "grad_norm": 2.3662703037261963, "learning_rate": 0.0002, "loss": 2.5492, "step": 50810 }, { "epoch": 3.7868852459016393, "grad_norm": 2.9224653244018555, "learning_rate": 0.0002, "loss": 2.6868, "step": 50820 }, { "epoch": 3.787630402384501, "grad_norm": 2.669923782348633, "learning_rate": 0.0002, "loss": 2.3945, "step": 50830 }, { "epoch": 3.788375558867362, "grad_norm": 2.4683589935302734, "learning_rate": 0.0002, "loss": 2.6, "step": 50840 }, { "epoch": 3.7891207153502235, "grad_norm": 2.4863712787628174, "learning_rate": 0.0002, "loss": 2.6107, "step": 50850 }, { "epoch": 3.789865871833085, "grad_norm": 2.4746665954589844, "learning_rate": 0.0002, "loss": 2.421, "step": 50860 }, { "epoch": 3.790611028315946, "grad_norm": 2.53080153465271, "learning_rate": 0.0002, "loss": 2.2403, "step": 50870 }, { "epoch": 3.7913561847988078, "grad_norm": 2.8547794818878174, "learning_rate": 0.0002, "loss": 2.4001, "step": 50880 }, { "epoch": 3.7921013412816693, "grad_norm": 2.5060040950775146, "learning_rate": 0.0002, "loss": 2.4906, "step": 50890 }, { "epoch": 3.7928464977645304, "grad_norm": 2.542915105819702, "learning_rate": 0.0002, "loss": 2.3689, "step": 50900 }, { "epoch": 3.793591654247392, "grad_norm": 2.546762704849243, "learning_rate": 0.0002, "loss": 2.5893, "step": 50910 }, { "epoch": 3.794336810730253, "grad_norm": 2.9480457305908203, "learning_rate": 0.0002, "loss": 2.4305, "step": 50920 }, { "epoch": 3.7950819672131146, "grad_norm": 2.469069719314575, "learning_rate": 0.0002, "loss": 2.3765, "step": 50930 }, { "epoch": 3.795827123695976, "grad_norm": 2.714322328567505, "learning_rate": 0.0002, "loss": 2.5366, "step": 50940 }, { "epoch": 3.7965722801788377, "grad_norm": 2.614262580871582, "learning_rate": 0.0002, "loss": 2.721, "step": 50950 }, { "epoch": 3.797317436661699, "grad_norm": 2.443918466567993, "learning_rate": 0.0002, "loss": 2.3982, "step": 50960 }, { "epoch": 3.7980625931445604, "grad_norm": 2.338960647583008, "learning_rate": 0.0002, "loss": 2.3152, "step": 50970 }, { "epoch": 3.7988077496274215, "grad_norm": 2.9768331050872803, "learning_rate": 0.0002, "loss": 2.4752, "step": 50980 }, { "epoch": 3.799552906110283, "grad_norm": 2.6073148250579834, "learning_rate": 0.0002, "loss": 2.4918, "step": 50990 }, { "epoch": 3.8002980625931446, "grad_norm": 2.7576828002929688, "learning_rate": 0.0002, "loss": 2.5247, "step": 51000 }, { "epoch": 3.801043219076006, "grad_norm": 2.710273504257202, "learning_rate": 0.0002, "loss": 2.6895, "step": 51010 }, { "epoch": 3.8017883755588673, "grad_norm": 2.608431339263916, "learning_rate": 0.0002, "loss": 2.4041, "step": 51020 }, { "epoch": 3.802533532041729, "grad_norm": 2.511503219604492, "learning_rate": 0.0002, "loss": 2.4616, "step": 51030 }, { "epoch": 3.80327868852459, "grad_norm": 2.6001126766204834, "learning_rate": 0.0002, "loss": 2.467, "step": 51040 }, { "epoch": 3.8040238450074515, "grad_norm": 2.7922747135162354, "learning_rate": 0.0002, "loss": 2.5395, "step": 51050 }, { "epoch": 3.804769001490313, "grad_norm": 2.260394334793091, "learning_rate": 0.0002, "loss": 2.2577, "step": 51060 }, { "epoch": 3.8055141579731746, "grad_norm": 2.6235592365264893, "learning_rate": 0.0002, "loss": 2.3754, "step": 51070 }, { "epoch": 3.8062593144560357, "grad_norm": 2.6814324855804443, "learning_rate": 0.0002, "loss": 2.495, "step": 51080 }, { "epoch": 3.8070044709388973, "grad_norm": 1.8012224435806274, "learning_rate": 0.0002, "loss": 2.4035, "step": 51090 }, { "epoch": 3.8077496274217584, "grad_norm": 2.7128658294677734, "learning_rate": 0.0002, "loss": 2.6043, "step": 51100 }, { "epoch": 3.80849478390462, "grad_norm": 2.600168466567993, "learning_rate": 0.0002, "loss": 2.6245, "step": 51110 }, { "epoch": 3.8092399403874815, "grad_norm": 2.9342854022979736, "learning_rate": 0.0002, "loss": 2.5065, "step": 51120 }, { "epoch": 3.809985096870343, "grad_norm": 2.6882317066192627, "learning_rate": 0.0002, "loss": 2.5987, "step": 51130 }, { "epoch": 3.810730253353204, "grad_norm": 2.672231674194336, "learning_rate": 0.0002, "loss": 2.5405, "step": 51140 }, { "epoch": 3.8114754098360657, "grad_norm": 2.7051048278808594, "learning_rate": 0.0002, "loss": 2.5367, "step": 51150 }, { "epoch": 3.812220566318927, "grad_norm": 2.8326873779296875, "learning_rate": 0.0002, "loss": 2.6169, "step": 51160 }, { "epoch": 3.8129657228017884, "grad_norm": 2.9274744987487793, "learning_rate": 0.0002, "loss": 2.6881, "step": 51170 }, { "epoch": 3.81371087928465, "grad_norm": 2.338207721710205, "learning_rate": 0.0002, "loss": 2.5462, "step": 51180 }, { "epoch": 3.814456035767511, "grad_norm": 2.5919198989868164, "learning_rate": 0.0002, "loss": 2.2888, "step": 51190 }, { "epoch": 3.8152011922503726, "grad_norm": 2.5705173015594482, "learning_rate": 0.0002, "loss": 2.5768, "step": 51200 }, { "epoch": 3.815946348733234, "grad_norm": 2.6229724884033203, "learning_rate": 0.0002, "loss": 2.4647, "step": 51210 }, { "epoch": 3.8166915052160952, "grad_norm": 2.2743210792541504, "learning_rate": 0.0002, "loss": 2.3109, "step": 51220 }, { "epoch": 3.817436661698957, "grad_norm": 2.2738277912139893, "learning_rate": 0.0002, "loss": 2.5418, "step": 51230 }, { "epoch": 3.8181818181818183, "grad_norm": 2.817492961883545, "learning_rate": 0.0002, "loss": 2.5758, "step": 51240 }, { "epoch": 3.8189269746646795, "grad_norm": 2.5765511989593506, "learning_rate": 0.0002, "loss": 2.428, "step": 51250 }, { "epoch": 3.819672131147541, "grad_norm": 2.62100887298584, "learning_rate": 0.0002, "loss": 2.2739, "step": 51260 }, { "epoch": 3.820417287630402, "grad_norm": 2.599499225616455, "learning_rate": 0.0002, "loss": 2.6626, "step": 51270 }, { "epoch": 3.8211624441132637, "grad_norm": 3.347637176513672, "learning_rate": 0.0002, "loss": 2.441, "step": 51280 }, { "epoch": 3.821907600596125, "grad_norm": 2.842421293258667, "learning_rate": 0.0002, "loss": 2.5185, "step": 51290 }, { "epoch": 3.8226527570789868, "grad_norm": 2.7661526203155518, "learning_rate": 0.0002, "loss": 2.5964, "step": 51300 }, { "epoch": 3.823397913561848, "grad_norm": 2.7636051177978516, "learning_rate": 0.0002, "loss": 2.6437, "step": 51310 }, { "epoch": 3.8241430700447094, "grad_norm": 2.534313678741455, "learning_rate": 0.0002, "loss": 2.4475, "step": 51320 }, { "epoch": 3.8248882265275705, "grad_norm": 1.8897809982299805, "learning_rate": 0.0002, "loss": 2.1495, "step": 51330 }, { "epoch": 3.825633383010432, "grad_norm": 2.465552806854248, "learning_rate": 0.0002, "loss": 2.4313, "step": 51340 }, { "epoch": 3.8263785394932937, "grad_norm": 2.8640079498291016, "learning_rate": 0.0002, "loss": 2.6838, "step": 51350 }, { "epoch": 3.827123695976155, "grad_norm": 1.6258368492126465, "learning_rate": 0.0002, "loss": 2.3335, "step": 51360 }, { "epoch": 3.8278688524590163, "grad_norm": 2.6712746620178223, "learning_rate": 0.0002, "loss": 2.5755, "step": 51370 }, { "epoch": 3.828614008941878, "grad_norm": 2.809267282485962, "learning_rate": 0.0002, "loss": 2.4954, "step": 51380 }, { "epoch": 3.829359165424739, "grad_norm": 2.894386053085327, "learning_rate": 0.0002, "loss": 2.4332, "step": 51390 }, { "epoch": 3.8301043219076005, "grad_norm": 2.8683619499206543, "learning_rate": 0.0002, "loss": 2.4112, "step": 51400 }, { "epoch": 3.830849478390462, "grad_norm": 2.383283853530884, "learning_rate": 0.0002, "loss": 2.5905, "step": 51410 }, { "epoch": 3.8315946348733236, "grad_norm": 2.6616315841674805, "learning_rate": 0.0002, "loss": 2.416, "step": 51420 }, { "epoch": 3.8323397913561847, "grad_norm": 3.09716796875, "learning_rate": 0.0002, "loss": 2.5738, "step": 51430 }, { "epoch": 3.8330849478390463, "grad_norm": 2.646336793899536, "learning_rate": 0.0002, "loss": 2.5713, "step": 51440 }, { "epoch": 3.8338301043219074, "grad_norm": 2.847592353820801, "learning_rate": 0.0002, "loss": 2.3993, "step": 51450 }, { "epoch": 3.834575260804769, "grad_norm": 2.573282480239868, "learning_rate": 0.0002, "loss": 2.4144, "step": 51460 }, { "epoch": 3.8353204172876305, "grad_norm": 2.621114492416382, "learning_rate": 0.0002, "loss": 2.418, "step": 51470 }, { "epoch": 3.836065573770492, "grad_norm": 2.757758855819702, "learning_rate": 0.0002, "loss": 2.3968, "step": 51480 }, { "epoch": 3.836810730253353, "grad_norm": 2.8561811447143555, "learning_rate": 0.0002, "loss": 2.4822, "step": 51490 }, { "epoch": 3.8375558867362147, "grad_norm": 2.84928035736084, "learning_rate": 0.0002, "loss": 2.2998, "step": 51500 }, { "epoch": 3.838301043219076, "grad_norm": 2.6037542819976807, "learning_rate": 0.0002, "loss": 2.4761, "step": 51510 }, { "epoch": 3.8390461997019374, "grad_norm": 3.122798442840576, "learning_rate": 0.0002, "loss": 2.6203, "step": 51520 }, { "epoch": 3.839791356184799, "grad_norm": 2.524787664413452, "learning_rate": 0.0002, "loss": 2.4676, "step": 51530 }, { "epoch": 3.84053651266766, "grad_norm": 2.385199785232544, "learning_rate": 0.0002, "loss": 2.4442, "step": 51540 }, { "epoch": 3.8412816691505216, "grad_norm": 2.4337494373321533, "learning_rate": 0.0002, "loss": 2.5004, "step": 51550 }, { "epoch": 3.842026825633383, "grad_norm": 2.2421205043792725, "learning_rate": 0.0002, "loss": 2.362, "step": 51560 }, { "epoch": 3.8427719821162443, "grad_norm": 2.355182647705078, "learning_rate": 0.0002, "loss": 2.4003, "step": 51570 }, { "epoch": 3.843517138599106, "grad_norm": 2.7259955406188965, "learning_rate": 0.0002, "loss": 2.362, "step": 51580 }, { "epoch": 3.8442622950819674, "grad_norm": 2.447803258895874, "learning_rate": 0.0002, "loss": 2.4592, "step": 51590 }, { "epoch": 3.8450074515648285, "grad_norm": 3.1207966804504395, "learning_rate": 0.0002, "loss": 2.3862, "step": 51600 }, { "epoch": 3.84575260804769, "grad_norm": 3.0702199935913086, "learning_rate": 0.0002, "loss": 2.58, "step": 51610 }, { "epoch": 3.846497764530551, "grad_norm": 2.7334744930267334, "learning_rate": 0.0002, "loss": 2.5699, "step": 51620 }, { "epoch": 3.8472429210134127, "grad_norm": 2.2549920082092285, "learning_rate": 0.0002, "loss": 2.3604, "step": 51630 }, { "epoch": 3.8479880774962743, "grad_norm": 2.400271415710449, "learning_rate": 0.0002, "loss": 2.6297, "step": 51640 }, { "epoch": 3.848733233979136, "grad_norm": 2.1789305210113525, "learning_rate": 0.0002, "loss": 2.5277, "step": 51650 }, { "epoch": 3.849478390461997, "grad_norm": 2.701901435852051, "learning_rate": 0.0002, "loss": 2.4133, "step": 51660 }, { "epoch": 3.8502235469448585, "grad_norm": 2.7472198009490967, "learning_rate": 0.0002, "loss": 2.3137, "step": 51670 }, { "epoch": 3.8509687034277196, "grad_norm": 2.6537559032440186, "learning_rate": 0.0002, "loss": 2.5885, "step": 51680 }, { "epoch": 3.851713859910581, "grad_norm": 2.521488666534424, "learning_rate": 0.0002, "loss": 2.5628, "step": 51690 }, { "epoch": 3.8524590163934427, "grad_norm": 2.5507121086120605, "learning_rate": 0.0002, "loss": 2.4122, "step": 51700 }, { "epoch": 3.8532041728763042, "grad_norm": 2.589373826980591, "learning_rate": 0.0002, "loss": 2.6058, "step": 51710 }, { "epoch": 3.8539493293591653, "grad_norm": 2.9518160820007324, "learning_rate": 0.0002, "loss": 2.5514, "step": 51720 }, { "epoch": 3.854694485842027, "grad_norm": 2.976022243499756, "learning_rate": 0.0002, "loss": 2.556, "step": 51730 }, { "epoch": 3.855439642324888, "grad_norm": 2.435248374938965, "learning_rate": 0.0002, "loss": 2.4229, "step": 51740 }, { "epoch": 3.8561847988077496, "grad_norm": 2.6801443099975586, "learning_rate": 0.0002, "loss": 2.4686, "step": 51750 }, { "epoch": 3.856929955290611, "grad_norm": 1.9394075870513916, "learning_rate": 0.0002, "loss": 2.2974, "step": 51760 }, { "epoch": 3.8576751117734727, "grad_norm": 2.496823310852051, "learning_rate": 0.0002, "loss": 2.3496, "step": 51770 }, { "epoch": 3.8584202682563338, "grad_norm": 3.03865122795105, "learning_rate": 0.0002, "loss": 2.2881, "step": 51780 }, { "epoch": 3.8591654247391953, "grad_norm": 2.5730419158935547, "learning_rate": 0.0002, "loss": 2.392, "step": 51790 }, { "epoch": 3.8599105812220564, "grad_norm": 2.6450493335723877, "learning_rate": 0.0002, "loss": 2.5092, "step": 51800 }, { "epoch": 3.860655737704918, "grad_norm": 2.5688185691833496, "learning_rate": 0.0002, "loss": 2.366, "step": 51810 }, { "epoch": 3.8614008941877795, "grad_norm": 2.4910011291503906, "learning_rate": 0.0002, "loss": 2.4483, "step": 51820 }, { "epoch": 3.862146050670641, "grad_norm": 3.078287363052368, "learning_rate": 0.0002, "loss": 2.6476, "step": 51830 }, { "epoch": 3.862891207153502, "grad_norm": 2.2944555282592773, "learning_rate": 0.0002, "loss": 2.5578, "step": 51840 }, { "epoch": 3.8636363636363638, "grad_norm": 2.777221202850342, "learning_rate": 0.0002, "loss": 2.4892, "step": 51850 }, { "epoch": 3.864381520119225, "grad_norm": 2.459630250930786, "learning_rate": 0.0002, "loss": 2.5713, "step": 51860 }, { "epoch": 3.8651266766020864, "grad_norm": 3.0153188705444336, "learning_rate": 0.0002, "loss": 2.6051, "step": 51870 }, { "epoch": 3.865871833084948, "grad_norm": 2.6499626636505127, "learning_rate": 0.0002, "loss": 2.5795, "step": 51880 }, { "epoch": 3.866616989567809, "grad_norm": 2.9734129905700684, "learning_rate": 0.0002, "loss": 2.5746, "step": 51890 }, { "epoch": 3.8673621460506706, "grad_norm": 2.2308945655822754, "learning_rate": 0.0002, "loss": 2.4392, "step": 51900 }, { "epoch": 3.868107302533532, "grad_norm": 2.5618507862091064, "learning_rate": 0.0002, "loss": 2.4746, "step": 51910 }, { "epoch": 3.8688524590163933, "grad_norm": 2.3245627880096436, "learning_rate": 0.0002, "loss": 2.4258, "step": 51920 }, { "epoch": 3.869597615499255, "grad_norm": 2.2224037647247314, "learning_rate": 0.0002, "loss": 2.5592, "step": 51930 }, { "epoch": 3.8703427719821164, "grad_norm": 3.254966974258423, "learning_rate": 0.0002, "loss": 2.5822, "step": 51940 }, { "epoch": 3.8710879284649775, "grad_norm": 2.469639778137207, "learning_rate": 0.0002, "loss": 2.4718, "step": 51950 }, { "epoch": 3.871833084947839, "grad_norm": 2.6414148807525635, "learning_rate": 0.0002, "loss": 2.5728, "step": 51960 }, { "epoch": 3.8725782414307, "grad_norm": 2.460594892501831, "learning_rate": 0.0002, "loss": 2.3055, "step": 51970 }, { "epoch": 3.8733233979135617, "grad_norm": 2.6977572441101074, "learning_rate": 0.0002, "loss": 2.5869, "step": 51980 }, { "epoch": 3.8740685543964233, "grad_norm": 2.5198538303375244, "learning_rate": 0.0002, "loss": 2.3507, "step": 51990 }, { "epoch": 3.874813710879285, "grad_norm": 2.7170722484588623, "learning_rate": 0.0002, "loss": 2.6903, "step": 52000 }, { "epoch": 3.875558867362146, "grad_norm": 2.7183640003204346, "learning_rate": 0.0002, "loss": 2.6414, "step": 52010 }, { "epoch": 3.8763040238450075, "grad_norm": 2.5241034030914307, "learning_rate": 0.0002, "loss": 2.2518, "step": 52020 }, { "epoch": 3.8770491803278686, "grad_norm": 2.6409521102905273, "learning_rate": 0.0002, "loss": 2.4558, "step": 52030 }, { "epoch": 3.87779433681073, "grad_norm": 2.5587220191955566, "learning_rate": 0.0002, "loss": 2.3879, "step": 52040 }, { "epoch": 3.8785394932935917, "grad_norm": 2.8432421684265137, "learning_rate": 0.0002, "loss": 2.6653, "step": 52050 }, { "epoch": 3.8792846497764533, "grad_norm": 3.088017225265503, "learning_rate": 0.0002, "loss": 2.5824, "step": 52060 }, { "epoch": 3.8800298062593144, "grad_norm": 2.6075172424316406, "learning_rate": 0.0002, "loss": 2.2853, "step": 52070 }, { "epoch": 3.880774962742176, "grad_norm": 2.3779594898223877, "learning_rate": 0.0002, "loss": 2.5866, "step": 52080 }, { "epoch": 3.881520119225037, "grad_norm": 2.727888584136963, "learning_rate": 0.0002, "loss": 2.6519, "step": 52090 }, { "epoch": 3.8822652757078986, "grad_norm": 3.6070122718811035, "learning_rate": 0.0002, "loss": 2.4691, "step": 52100 }, { "epoch": 3.88301043219076, "grad_norm": 2.8092737197875977, "learning_rate": 0.0002, "loss": 2.5209, "step": 52110 }, { "epoch": 3.8837555886736217, "grad_norm": 2.367863416671753, "learning_rate": 0.0002, "loss": 2.3449, "step": 52120 }, { "epoch": 3.884500745156483, "grad_norm": 2.1919851303100586, "learning_rate": 0.0002, "loss": 2.6012, "step": 52130 }, { "epoch": 3.8852459016393444, "grad_norm": 2.689823865890503, "learning_rate": 0.0002, "loss": 2.5961, "step": 52140 }, { "epoch": 3.8859910581222055, "grad_norm": 2.8224596977233887, "learning_rate": 0.0002, "loss": 2.4739, "step": 52150 }, { "epoch": 3.886736214605067, "grad_norm": 2.892871856689453, "learning_rate": 0.0002, "loss": 2.6218, "step": 52160 }, { "epoch": 3.8874813710879286, "grad_norm": 2.5560922622680664, "learning_rate": 0.0002, "loss": 2.4748, "step": 52170 }, { "epoch": 3.88822652757079, "grad_norm": 2.412459135055542, "learning_rate": 0.0002, "loss": 2.5312, "step": 52180 }, { "epoch": 3.8889716840536512, "grad_norm": 2.6065220832824707, "learning_rate": 0.0002, "loss": 2.3348, "step": 52190 }, { "epoch": 3.889716840536513, "grad_norm": 2.771390199661255, "learning_rate": 0.0002, "loss": 2.4942, "step": 52200 }, { "epoch": 3.890461997019374, "grad_norm": 2.511683702468872, "learning_rate": 0.0002, "loss": 2.5767, "step": 52210 }, { "epoch": 3.8912071535022354, "grad_norm": 2.926457643508911, "learning_rate": 0.0002, "loss": 2.4977, "step": 52220 }, { "epoch": 3.891952309985097, "grad_norm": 2.466374397277832, "learning_rate": 0.0002, "loss": 2.5909, "step": 52230 }, { "epoch": 3.892697466467958, "grad_norm": 2.454491376876831, "learning_rate": 0.0002, "loss": 2.2894, "step": 52240 }, { "epoch": 3.8934426229508197, "grad_norm": 2.548574209213257, "learning_rate": 0.0002, "loss": 2.4649, "step": 52250 }, { "epoch": 3.894187779433681, "grad_norm": 2.4088494777679443, "learning_rate": 0.0002, "loss": 2.1674, "step": 52260 }, { "epoch": 3.8949329359165423, "grad_norm": 2.7018039226531982, "learning_rate": 0.0002, "loss": 2.5642, "step": 52270 }, { "epoch": 3.895678092399404, "grad_norm": 2.7482492923736572, "learning_rate": 0.0002, "loss": 2.4304, "step": 52280 }, { "epoch": 3.8964232488822654, "grad_norm": 2.329071521759033, "learning_rate": 0.0002, "loss": 2.4925, "step": 52290 }, { "epoch": 3.8971684053651265, "grad_norm": 2.745163917541504, "learning_rate": 0.0002, "loss": 2.465, "step": 52300 }, { "epoch": 3.897913561847988, "grad_norm": 2.767301559448242, "learning_rate": 0.0002, "loss": 2.5761, "step": 52310 }, { "epoch": 3.898658718330849, "grad_norm": 2.6464600563049316, "learning_rate": 0.0002, "loss": 2.3783, "step": 52320 }, { "epoch": 3.8994038748137108, "grad_norm": 2.457310199737549, "learning_rate": 0.0002, "loss": 2.0868, "step": 52330 }, { "epoch": 3.9001490312965723, "grad_norm": 2.3915839195251465, "learning_rate": 0.0002, "loss": 2.4427, "step": 52340 }, { "epoch": 3.900894187779434, "grad_norm": 2.6058714389801025, "learning_rate": 0.0002, "loss": 2.4721, "step": 52350 }, { "epoch": 3.901639344262295, "grad_norm": 3.087757110595703, "learning_rate": 0.0002, "loss": 2.567, "step": 52360 }, { "epoch": 3.9023845007451565, "grad_norm": 2.904249429702759, "learning_rate": 0.0002, "loss": 2.4766, "step": 52370 }, { "epoch": 3.9031296572280176, "grad_norm": 2.673194169998169, "learning_rate": 0.0002, "loss": 2.6861, "step": 52380 }, { "epoch": 3.903874813710879, "grad_norm": 2.4433786869049072, "learning_rate": 0.0002, "loss": 2.4555, "step": 52390 }, { "epoch": 3.9046199701937407, "grad_norm": 2.482530117034912, "learning_rate": 0.0002, "loss": 2.5579, "step": 52400 }, { "epoch": 3.9053651266766023, "grad_norm": 2.7197022438049316, "learning_rate": 0.0002, "loss": 2.4326, "step": 52410 }, { "epoch": 3.9061102831594634, "grad_norm": 2.534710645675659, "learning_rate": 0.0002, "loss": 2.7553, "step": 52420 }, { "epoch": 3.906855439642325, "grad_norm": 2.1356122493743896, "learning_rate": 0.0002, "loss": 2.2721, "step": 52430 }, { "epoch": 3.907600596125186, "grad_norm": 2.3630571365356445, "learning_rate": 0.0002, "loss": 2.5597, "step": 52440 }, { "epoch": 3.9083457526080476, "grad_norm": 2.6591246128082275, "learning_rate": 0.0002, "loss": 2.4836, "step": 52450 }, { "epoch": 3.909090909090909, "grad_norm": 2.511927604675293, "learning_rate": 0.0002, "loss": 2.6249, "step": 52460 }, { "epoch": 3.9098360655737707, "grad_norm": 2.8676466941833496, "learning_rate": 0.0002, "loss": 2.4936, "step": 52470 }, { "epoch": 3.910581222056632, "grad_norm": 2.7902708053588867, "learning_rate": 0.0002, "loss": 2.5414, "step": 52480 }, { "epoch": 3.9113263785394934, "grad_norm": 2.6967098712921143, "learning_rate": 0.0002, "loss": 2.5556, "step": 52490 }, { "epoch": 3.9120715350223545, "grad_norm": 2.569732904434204, "learning_rate": 0.0002, "loss": 2.4874, "step": 52500 }, { "epoch": 3.912816691505216, "grad_norm": 2.595181465148926, "learning_rate": 0.0002, "loss": 2.5599, "step": 52510 }, { "epoch": 3.9135618479880776, "grad_norm": 2.3675436973571777, "learning_rate": 0.0002, "loss": 2.6449, "step": 52520 }, { "epoch": 3.914307004470939, "grad_norm": 2.709636688232422, "learning_rate": 0.0002, "loss": 2.5443, "step": 52530 }, { "epoch": 3.9150521609538003, "grad_norm": 2.7258338928222656, "learning_rate": 0.0002, "loss": 2.3612, "step": 52540 }, { "epoch": 3.915797317436662, "grad_norm": 3.1132118701934814, "learning_rate": 0.0002, "loss": 2.3272, "step": 52550 }, { "epoch": 3.916542473919523, "grad_norm": 2.628192186355591, "learning_rate": 0.0002, "loss": 2.4815, "step": 52560 }, { "epoch": 3.9172876304023845, "grad_norm": 2.5047247409820557, "learning_rate": 0.0002, "loss": 2.4923, "step": 52570 }, { "epoch": 3.918032786885246, "grad_norm": 2.726745843887329, "learning_rate": 0.0002, "loss": 2.4188, "step": 52580 }, { "epoch": 3.918777943368107, "grad_norm": 2.729163646697998, "learning_rate": 0.0002, "loss": 2.5176, "step": 52590 }, { "epoch": 3.9195230998509687, "grad_norm": 2.8599231243133545, "learning_rate": 0.0002, "loss": 2.5171, "step": 52600 }, { "epoch": 3.9202682563338302, "grad_norm": 2.2114243507385254, "learning_rate": 0.0002, "loss": 2.5073, "step": 52610 }, { "epoch": 3.9210134128166914, "grad_norm": 2.164640426635742, "learning_rate": 0.0002, "loss": 2.6172, "step": 52620 }, { "epoch": 3.921758569299553, "grad_norm": 2.7725958824157715, "learning_rate": 0.0002, "loss": 2.5268, "step": 52630 }, { "epoch": 3.9225037257824145, "grad_norm": 2.6083829402923584, "learning_rate": 0.0002, "loss": 2.4921, "step": 52640 }, { "epoch": 3.9232488822652756, "grad_norm": 2.948575496673584, "learning_rate": 0.0002, "loss": 2.5642, "step": 52650 }, { "epoch": 3.923994038748137, "grad_norm": 2.494868755340576, "learning_rate": 0.0002, "loss": 2.4473, "step": 52660 }, { "epoch": 3.9247391952309982, "grad_norm": 3.170365333557129, "learning_rate": 0.0002, "loss": 2.542, "step": 52670 }, { "epoch": 3.92548435171386, "grad_norm": 2.6471188068389893, "learning_rate": 0.0002, "loss": 2.436, "step": 52680 }, { "epoch": 3.9262295081967213, "grad_norm": 2.868462562561035, "learning_rate": 0.0002, "loss": 2.4097, "step": 52690 }, { "epoch": 3.926974664679583, "grad_norm": 2.8815088272094727, "learning_rate": 0.0002, "loss": 2.5023, "step": 52700 }, { "epoch": 3.927719821162444, "grad_norm": 2.6703245639801025, "learning_rate": 0.0002, "loss": 2.5889, "step": 52710 }, { "epoch": 3.9284649776453056, "grad_norm": 2.9111456871032715, "learning_rate": 0.0002, "loss": 2.3997, "step": 52720 }, { "epoch": 3.9292101341281667, "grad_norm": 3.0684051513671875, "learning_rate": 0.0002, "loss": 2.379, "step": 52730 }, { "epoch": 3.929955290611028, "grad_norm": 2.901998996734619, "learning_rate": 0.0002, "loss": 2.6043, "step": 52740 }, { "epoch": 3.9307004470938898, "grad_norm": 3.4542715549468994, "learning_rate": 0.0002, "loss": 2.2915, "step": 52750 }, { "epoch": 3.9314456035767513, "grad_norm": 2.3926374912261963, "learning_rate": 0.0002, "loss": 2.2738, "step": 52760 }, { "epoch": 3.9321907600596124, "grad_norm": 2.790262222290039, "learning_rate": 0.0002, "loss": 2.4374, "step": 52770 }, { "epoch": 3.932935916542474, "grad_norm": 3.287041664123535, "learning_rate": 0.0002, "loss": 2.3272, "step": 52780 }, { "epoch": 3.933681073025335, "grad_norm": 2.6632344722747803, "learning_rate": 0.0002, "loss": 2.619, "step": 52790 }, { "epoch": 3.9344262295081966, "grad_norm": 2.1901233196258545, "learning_rate": 0.0002, "loss": 2.3979, "step": 52800 }, { "epoch": 3.935171385991058, "grad_norm": 2.72855544090271, "learning_rate": 0.0002, "loss": 2.5868, "step": 52810 }, { "epoch": 3.9359165424739198, "grad_norm": 2.598860502243042, "learning_rate": 0.0002, "loss": 2.4699, "step": 52820 }, { "epoch": 3.936661698956781, "grad_norm": 2.5119919776916504, "learning_rate": 0.0002, "loss": 2.3506, "step": 52830 }, { "epoch": 3.9374068554396424, "grad_norm": 2.60629940032959, "learning_rate": 0.0002, "loss": 2.5943, "step": 52840 }, { "epoch": 3.9381520119225035, "grad_norm": 2.4986929893493652, "learning_rate": 0.0002, "loss": 2.4542, "step": 52850 }, { "epoch": 3.938897168405365, "grad_norm": 2.564370632171631, "learning_rate": 0.0002, "loss": 2.4587, "step": 52860 }, { "epoch": 3.9396423248882266, "grad_norm": 3.0465457439422607, "learning_rate": 0.0002, "loss": 2.5511, "step": 52870 }, { "epoch": 3.940387481371088, "grad_norm": 2.802629232406616, "learning_rate": 0.0002, "loss": 2.5935, "step": 52880 }, { "epoch": 3.9411326378539493, "grad_norm": 2.5224101543426514, "learning_rate": 0.0002, "loss": 2.4176, "step": 52890 }, { "epoch": 3.941877794336811, "grad_norm": 2.601170301437378, "learning_rate": 0.0002, "loss": 2.4259, "step": 52900 }, { "epoch": 3.942622950819672, "grad_norm": 2.618849515914917, "learning_rate": 0.0002, "loss": 2.5917, "step": 52910 }, { "epoch": 3.9433681073025335, "grad_norm": 2.3083279132843018, "learning_rate": 0.0002, "loss": 2.4501, "step": 52920 }, { "epoch": 3.944113263785395, "grad_norm": 2.783717155456543, "learning_rate": 0.0002, "loss": 2.6085, "step": 52930 }, { "epoch": 3.944858420268256, "grad_norm": 2.4879956245422363, "learning_rate": 0.0002, "loss": 2.3325, "step": 52940 }, { "epoch": 3.9456035767511177, "grad_norm": 2.765442132949829, "learning_rate": 0.0002, "loss": 2.5773, "step": 52950 }, { "epoch": 3.9463487332339793, "grad_norm": 2.7897586822509766, "learning_rate": 0.0002, "loss": 2.4599, "step": 52960 }, { "epoch": 3.9470938897168404, "grad_norm": 2.905817747116089, "learning_rate": 0.0002, "loss": 2.475, "step": 52970 }, { "epoch": 3.947839046199702, "grad_norm": 2.8461194038391113, "learning_rate": 0.0002, "loss": 2.6288, "step": 52980 }, { "epoch": 3.9485842026825635, "grad_norm": 2.5353071689605713, "learning_rate": 0.0002, "loss": 2.4977, "step": 52990 }, { "epoch": 3.9493293591654246, "grad_norm": 2.5417919158935547, "learning_rate": 0.0002, "loss": 2.4847, "step": 53000 }, { "epoch": 3.950074515648286, "grad_norm": 2.5590386390686035, "learning_rate": 0.0002, "loss": 2.4574, "step": 53010 }, { "epoch": 3.9508196721311473, "grad_norm": 2.5833194255828857, "learning_rate": 0.0002, "loss": 2.1748, "step": 53020 }, { "epoch": 3.951564828614009, "grad_norm": 2.8139731884002686, "learning_rate": 0.0002, "loss": 2.5048, "step": 53030 }, { "epoch": 3.9523099850968704, "grad_norm": 2.520247459411621, "learning_rate": 0.0002, "loss": 2.5423, "step": 53040 }, { "epoch": 3.953055141579732, "grad_norm": 2.8328166007995605, "learning_rate": 0.0002, "loss": 2.7476, "step": 53050 }, { "epoch": 3.953800298062593, "grad_norm": 2.491218328475952, "learning_rate": 0.0002, "loss": 2.5617, "step": 53060 }, { "epoch": 3.9545454545454546, "grad_norm": 2.5774359703063965, "learning_rate": 0.0002, "loss": 2.4691, "step": 53070 }, { "epoch": 3.9552906110283157, "grad_norm": 2.8533051013946533, "learning_rate": 0.0002, "loss": 2.7179, "step": 53080 }, { "epoch": 3.9560357675111772, "grad_norm": 2.5596423149108887, "learning_rate": 0.0002, "loss": 2.5002, "step": 53090 }, { "epoch": 3.956780923994039, "grad_norm": 2.4977810382843018, "learning_rate": 0.0002, "loss": 2.5137, "step": 53100 }, { "epoch": 3.9575260804769004, "grad_norm": 2.417628526687622, "learning_rate": 0.0002, "loss": 2.4268, "step": 53110 }, { "epoch": 3.9582712369597615, "grad_norm": 2.7879631519317627, "learning_rate": 0.0002, "loss": 2.6652, "step": 53120 }, { "epoch": 3.959016393442623, "grad_norm": 2.732900857925415, "learning_rate": 0.0002, "loss": 2.4687, "step": 53130 }, { "epoch": 3.959761549925484, "grad_norm": 2.5908401012420654, "learning_rate": 0.0002, "loss": 2.4273, "step": 53140 }, { "epoch": 3.9605067064083457, "grad_norm": 3.021219253540039, "learning_rate": 0.0002, "loss": 2.4994, "step": 53150 }, { "epoch": 3.9612518628912072, "grad_norm": 2.95470929145813, "learning_rate": 0.0002, "loss": 2.4703, "step": 53160 }, { "epoch": 3.961997019374069, "grad_norm": 2.4512953758239746, "learning_rate": 0.0002, "loss": 2.5641, "step": 53170 }, { "epoch": 3.96274217585693, "grad_norm": 2.5563712120056152, "learning_rate": 0.0002, "loss": 2.5442, "step": 53180 }, { "epoch": 3.9634873323397914, "grad_norm": 2.6706528663635254, "learning_rate": 0.0002, "loss": 2.5561, "step": 53190 }, { "epoch": 3.9642324888226526, "grad_norm": 2.7212600708007812, "learning_rate": 0.0002, "loss": 2.4722, "step": 53200 }, { "epoch": 3.964977645305514, "grad_norm": 2.527886390686035, "learning_rate": 0.0002, "loss": 2.4376, "step": 53210 }, { "epoch": 3.9657228017883757, "grad_norm": 2.4174673557281494, "learning_rate": 0.0002, "loss": 2.5337, "step": 53220 }, { "epoch": 3.966467958271237, "grad_norm": 3.1026086807250977, "learning_rate": 0.0002, "loss": 2.4212, "step": 53230 }, { "epoch": 3.9672131147540983, "grad_norm": 2.696936845779419, "learning_rate": 0.0002, "loss": 2.4115, "step": 53240 }, { "epoch": 3.96795827123696, "grad_norm": 2.7065093517303467, "learning_rate": 0.0002, "loss": 2.575, "step": 53250 }, { "epoch": 3.968703427719821, "grad_norm": 2.599903106689453, "learning_rate": 0.0002, "loss": 2.4136, "step": 53260 }, { "epoch": 3.9694485842026825, "grad_norm": 2.7001261711120605, "learning_rate": 0.0002, "loss": 2.4217, "step": 53270 }, { "epoch": 3.970193740685544, "grad_norm": 2.7080583572387695, "learning_rate": 0.0002, "loss": 2.6516, "step": 53280 }, { "epoch": 3.970938897168405, "grad_norm": 2.549535036087036, "learning_rate": 0.0002, "loss": 2.4606, "step": 53290 }, { "epoch": 3.9716840536512668, "grad_norm": 3.246875047683716, "learning_rate": 0.0002, "loss": 2.6515, "step": 53300 }, { "epoch": 3.9724292101341283, "grad_norm": 2.5322539806365967, "learning_rate": 0.0002, "loss": 2.4486, "step": 53310 }, { "epoch": 3.9731743666169894, "grad_norm": 2.2940611839294434, "learning_rate": 0.0002, "loss": 2.358, "step": 53320 }, { "epoch": 3.973919523099851, "grad_norm": 2.6890695095062256, "learning_rate": 0.0002, "loss": 2.5776, "step": 53330 }, { "epoch": 3.9746646795827125, "grad_norm": 2.7836878299713135, "learning_rate": 0.0002, "loss": 2.3885, "step": 53340 }, { "epoch": 3.9754098360655736, "grad_norm": 2.572012186050415, "learning_rate": 0.0002, "loss": 2.3641, "step": 53350 }, { "epoch": 3.976154992548435, "grad_norm": 2.936228036880493, "learning_rate": 0.0002, "loss": 2.4106, "step": 53360 }, { "epoch": 3.9769001490312967, "grad_norm": 2.7177422046661377, "learning_rate": 0.0002, "loss": 2.6147, "step": 53370 }, { "epoch": 3.977645305514158, "grad_norm": 2.8363125324249268, "learning_rate": 0.0002, "loss": 2.5204, "step": 53380 }, { "epoch": 3.9783904619970194, "grad_norm": 2.6096811294555664, "learning_rate": 0.0002, "loss": 2.3503, "step": 53390 }, { "epoch": 3.979135618479881, "grad_norm": 2.363093852996826, "learning_rate": 0.0002, "loss": 2.7152, "step": 53400 }, { "epoch": 3.979880774962742, "grad_norm": 2.8034679889678955, "learning_rate": 0.0002, "loss": 2.5279, "step": 53410 }, { "epoch": 3.9806259314456036, "grad_norm": 2.614542245864868, "learning_rate": 0.0002, "loss": 2.429, "step": 53420 }, { "epoch": 3.9813710879284647, "grad_norm": 2.7522165775299072, "learning_rate": 0.0002, "loss": 2.547, "step": 53430 }, { "epoch": 3.9821162444113263, "grad_norm": 2.595388412475586, "learning_rate": 0.0002, "loss": 2.566, "step": 53440 }, { "epoch": 3.982861400894188, "grad_norm": 2.463608741760254, "learning_rate": 0.0002, "loss": 2.4284, "step": 53450 }, { "epoch": 3.9836065573770494, "grad_norm": 2.377199172973633, "learning_rate": 0.0002, "loss": 2.5547, "step": 53460 }, { "epoch": 3.9843517138599105, "grad_norm": 2.985874652862549, "learning_rate": 0.0002, "loss": 2.5976, "step": 53470 }, { "epoch": 3.985096870342772, "grad_norm": 2.601820707321167, "learning_rate": 0.0002, "loss": 2.5576, "step": 53480 }, { "epoch": 3.985842026825633, "grad_norm": 3.1383955478668213, "learning_rate": 0.0002, "loss": 2.4143, "step": 53490 }, { "epoch": 3.9865871833084947, "grad_norm": 2.46256685256958, "learning_rate": 0.0002, "loss": 2.6677, "step": 53500 }, { "epoch": 3.9873323397913563, "grad_norm": 2.43536376953125, "learning_rate": 0.0002, "loss": 2.526, "step": 53510 }, { "epoch": 3.988077496274218, "grad_norm": 2.3847920894622803, "learning_rate": 0.0002, "loss": 2.6184, "step": 53520 }, { "epoch": 3.988822652757079, "grad_norm": 2.5792295932769775, "learning_rate": 0.0002, "loss": 2.6166, "step": 53530 }, { "epoch": 3.9895678092399405, "grad_norm": 2.792299509048462, "learning_rate": 0.0002, "loss": 2.6626, "step": 53540 }, { "epoch": 3.9903129657228016, "grad_norm": 2.445161819458008, "learning_rate": 0.0002, "loss": 2.5005, "step": 53550 }, { "epoch": 3.991058122205663, "grad_norm": 3.263998031616211, "learning_rate": 0.0002, "loss": 2.5312, "step": 53560 }, { "epoch": 3.9918032786885247, "grad_norm": 2.557817220687866, "learning_rate": 0.0002, "loss": 2.503, "step": 53570 }, { "epoch": 3.9925484351713862, "grad_norm": 2.727743625640869, "learning_rate": 0.0002, "loss": 2.6169, "step": 53580 }, { "epoch": 3.9932935916542474, "grad_norm": 2.6156795024871826, "learning_rate": 0.0002, "loss": 2.5591, "step": 53590 }, { "epoch": 3.994038748137109, "grad_norm": 2.661540985107422, "learning_rate": 0.0002, "loss": 2.5086, "step": 53600 }, { "epoch": 3.99478390461997, "grad_norm": 2.503107786178589, "learning_rate": 0.0002, "loss": 2.5765, "step": 53610 }, { "epoch": 3.9955290611028316, "grad_norm": 2.6578187942504883, "learning_rate": 0.0002, "loss": 2.4175, "step": 53620 }, { "epoch": 3.996274217585693, "grad_norm": 2.996702194213867, "learning_rate": 0.0002, "loss": 2.694, "step": 53630 }, { "epoch": 3.9970193740685542, "grad_norm": 2.9681408405303955, "learning_rate": 0.0002, "loss": 2.4948, "step": 53640 }, { "epoch": 3.997764530551416, "grad_norm": 2.7573134899139404, "learning_rate": 0.0002, "loss": 2.6957, "step": 53650 }, { "epoch": 3.9985096870342773, "grad_norm": 2.6993305683135986, "learning_rate": 0.0002, "loss": 2.588, "step": 53660 }, { "epoch": 3.9992548435171384, "grad_norm": 2.6153762340545654, "learning_rate": 0.0002, "loss": 2.5999, "step": 53670 }, { "epoch": 4.0, "grad_norm": 3.1838414669036865, "learning_rate": 0.0002, "loss": 2.4747, "step": 53680 }, { "epoch": 4.0, "eval_runtime": 2866.1708, "eval_samples_per_second": 4.682, "eval_steps_per_second": 0.585, "step": 53680 } ], "logging_steps": 10, "max_steps": 80520, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.734753409106688e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }