diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 91.0, - "global_step": 18367713, + "epoch": 100.0, + "global_step": 20184300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -221417,11 +221417,21908 @@ "eval_samples_per_second": 1737.64, "eval_steps_per_second": 72.407, "step": 18367713 + }, + { + "epoch": 91.0, + "learning_rate": 4.5155665542030194e-06, + "loss": 1.8238, + "step": 18368000 + }, + { + "epoch": 91.0, + "learning_rate": 4.5143279677769355e-06, + "loss": 1.7877, + "step": 18368500 + }, + { + "epoch": 91.01, + "learning_rate": 4.513091858523704e-06, + "loss": 1.8172, + "step": 18369000 + }, + { + "epoch": 91.01, + "learning_rate": 4.51185327209762e-06, + "loss": 1.7793, + "step": 18369500 + }, + { + "epoch": 91.01, + "learning_rate": 4.5106146856715365e-06, + "loss": 1.7981, + "step": 18370000 + }, + { + "epoch": 91.01, + "learning_rate": 4.509376099245453e-06, + "loss": 1.7923, + "step": 18370500 + }, + { + "epoch": 91.02, + "learning_rate": 4.5081375128193695e-06, + "loss": 1.8031, + "step": 18371000 + }, + { + "epoch": 91.02, + "learning_rate": 4.506898926393286e-06, + "loss": 1.795, + "step": 18371500 + }, + { + "epoch": 91.02, + "learning_rate": 4.5056603399672025e-06, + "loss": 1.8148, + "step": 18372000 + }, + { + "epoch": 91.02, + "learning_rate": 4.504421753541119e-06, + "loss": 1.7991, + "step": 18372500 + }, + { + "epoch": 91.03, + "learning_rate": 4.5031831671150355e-06, + "loss": 1.8063, + "step": 18373000 + }, + { + "epoch": 91.03, + "learning_rate": 4.501944580688952e-06, + "loss": 1.8167, + "step": 18373500 + }, + { + "epoch": 91.03, + "learning_rate": 4.50070847143572e-06, + "loss": 1.7992, + "step": 18374000 + }, + { + "epoch": 91.03, + "learning_rate": 4.4994698850096365e-06, + "loss": 1.8178, + "step": 18374500 + }, + { + "epoch": 91.04, + "learning_rate": 4.498233775756405e-06, + "loss": 1.7876, + "step": 18375000 + }, + { + "epoch": 91.04, + "learning_rate": 4.496997666503173e-06, + "loss": 1.8133, + "step": 18375500 + }, + { + "epoch": 91.04, + "learning_rate": 4.49575908007709e-06, + "loss": 1.7867, + "step": 18376000 + }, + { + "epoch": 91.04, + "learning_rate": 4.494520493651006e-06, + "loss": 1.7892, + "step": 18376500 + }, + { + "epoch": 91.05, + "learning_rate": 4.493284384397774e-06, + "loss": 1.7739, + "step": 18377000 + }, + { + "epoch": 91.05, + "learning_rate": 4.492045797971691e-06, + "loss": 1.8005, + "step": 18377500 + }, + { + "epoch": 91.05, + "learning_rate": 4.490807211545608e-06, + "loss": 1.814, + "step": 18378000 + }, + { + "epoch": 91.05, + "learning_rate": 4.489568625119524e-06, + "loss": 1.8055, + "step": 18378500 + }, + { + "epoch": 91.06, + "learning_rate": 4.48833003869344e-06, + "loss": 1.8141, + "step": 18379000 + }, + { + "epoch": 91.06, + "learning_rate": 4.487091452267356e-06, + "loss": 1.7972, + "step": 18379500 + }, + { + "epoch": 91.06, + "learning_rate": 4.485852865841273e-06, + "loss": 1.8121, + "step": 18380000 + }, + { + "epoch": 91.06, + "learning_rate": 4.484614279415189e-06, + "loss": 1.8006, + "step": 18380500 + }, + { + "epoch": 91.07, + "learning_rate": 4.483375692989105e-06, + "loss": 1.8122, + "step": 18381000 + }, + { + "epoch": 91.07, + "learning_rate": 4.4821371065630215e-06, + "loss": 1.813, + "step": 18381500 + }, + { + "epoch": 91.07, + "learning_rate": 4.48090099730979e-06, + "loss": 1.8126, + "step": 18382000 + }, + { + "epoch": 91.07, + "learning_rate": 4.479662410883706e-06, + "loss": 1.7956, + "step": 18382500 + }, + { + "epoch": 91.08, + "learning_rate": 4.478423824457623e-06, + "loss": 1.802, + "step": 18383000 + }, + { + "epoch": 91.08, + "learning_rate": 4.477185238031539e-06, + "loss": 1.7802, + "step": 18383500 + }, + { + "epoch": 91.08, + "learning_rate": 4.475946651605456e-06, + "loss": 1.8093, + "step": 18384000 + }, + { + "epoch": 91.08, + "learning_rate": 4.474710542352224e-06, + "loss": 1.8001, + "step": 18384500 + }, + { + "epoch": 91.09, + "learning_rate": 4.473471955926141e-06, + "loss": 1.8319, + "step": 18385000 + }, + { + "epoch": 91.09, + "learning_rate": 4.472233369500057e-06, + "loss": 1.8107, + "step": 18385500 + }, + { + "epoch": 91.09, + "learning_rate": 4.470994783073973e-06, + "loss": 1.8162, + "step": 18386000 + }, + { + "epoch": 91.09, + "learning_rate": 4.4697561966478895e-06, + "loss": 1.8314, + "step": 18386500 + }, + { + "epoch": 91.1, + "learning_rate": 4.468517610221806e-06, + "loss": 1.8067, + "step": 18387000 + }, + { + "epoch": 91.1, + "learning_rate": 4.467279023795723e-06, + "loss": 1.8121, + "step": 18387500 + }, + { + "epoch": 91.1, + "learning_rate": 4.4660404373696385e-06, + "loss": 1.7994, + "step": 18388000 + }, + { + "epoch": 91.1, + "learning_rate": 4.4648018509435555e-06, + "loss": 1.7916, + "step": 18388500 + }, + { + "epoch": 91.11, + "learning_rate": 4.4635632645174716e-06, + "loss": 1.8057, + "step": 18389000 + }, + { + "epoch": 91.11, + "learning_rate": 4.4623246780913885e-06, + "loss": 1.7974, + "step": 18389500 + }, + { + "epoch": 91.11, + "learning_rate": 4.4610885688381564e-06, + "loss": 1.7834, + "step": 18390000 + }, + { + "epoch": 91.11, + "learning_rate": 4.459849982412073e-06, + "loss": 1.8185, + "step": 18390500 + }, + { + "epoch": 91.12, + "learning_rate": 4.4586113959859895e-06, + "loss": 1.8006, + "step": 18391000 + }, + { + "epoch": 91.12, + "learning_rate": 4.4573728095599055e-06, + "loss": 1.7961, + "step": 18391500 + }, + { + "epoch": 91.12, + "learning_rate": 4.456134223133822e-06, + "loss": 1.7955, + "step": 18392000 + }, + { + "epoch": 91.12, + "learning_rate": 4.4548956367077385e-06, + "loss": 1.8343, + "step": 18392500 + }, + { + "epoch": 91.13, + "learning_rate": 4.453657050281655e-06, + "loss": 1.8246, + "step": 18393000 + }, + { + "epoch": 91.13, + "learning_rate": 4.4524209410284234e-06, + "loss": 1.7989, + "step": 18393500 + }, + { + "epoch": 91.13, + "learning_rate": 4.451184831775191e-06, + "loss": 1.8199, + "step": 18394000 + }, + { + "epoch": 91.13, + "learning_rate": 4.449946245349108e-06, + "loss": 1.8138, + "step": 18394500 + }, + { + "epoch": 91.14, + "learning_rate": 4.448707658923024e-06, + "loss": 1.822, + "step": 18395000 + }, + { + "epoch": 91.14, + "learning_rate": 4.447469072496941e-06, + "loss": 1.7962, + "step": 18395500 + }, + { + "epoch": 91.14, + "learning_rate": 4.446230486070857e-06, + "loss": 1.7837, + "step": 18396000 + }, + { + "epoch": 91.14, + "learning_rate": 4.4449918996447735e-06, + "loss": 1.7973, + "step": 18396500 + }, + { + "epoch": 91.15, + "learning_rate": 4.44375331321869e-06, + "loss": 1.8241, + "step": 18397000 + }, + { + "epoch": 91.15, + "learning_rate": 4.442517203965458e-06, + "loss": 1.8237, + "step": 18397500 + }, + { + "epoch": 91.15, + "learning_rate": 4.4412786175393745e-06, + "loss": 1.8102, + "step": 18398000 + }, + { + "epoch": 91.15, + "learning_rate": 4.440040031113291e-06, + "loss": 1.8286, + "step": 18398500 + }, + { + "epoch": 91.16, + "learning_rate": 4.4388014446872075e-06, + "loss": 1.7764, + "step": 18399000 + }, + { + "epoch": 91.16, + "learning_rate": 4.4375628582611236e-06, + "loss": 1.8141, + "step": 18399500 + }, + { + "epoch": 91.16, + "learning_rate": 4.4363242718350405e-06, + "loss": 1.7887, + "step": 18400000 + }, + { + "epoch": 91.16, + "learning_rate": 4.4350856854089566e-06, + "loss": 1.823, + "step": 18400500 + }, + { + "epoch": 91.16, + "learning_rate": 4.4338470989828735e-06, + "loss": 1.7915, + "step": 18401000 + }, + { + "epoch": 91.17, + "learning_rate": 4.43260851255679e-06, + "loss": 1.8139, + "step": 18401500 + }, + { + "epoch": 91.17, + "learning_rate": 4.431369926130706e-06, + "loss": 1.8145, + "step": 18402000 + }, + { + "epoch": 91.17, + "learning_rate": 4.430131339704622e-06, + "loss": 1.7938, + "step": 18402500 + }, + { + "epoch": 91.17, + "learning_rate": 4.428892753278539e-06, + "loss": 1.8088, + "step": 18403000 + }, + { + "epoch": 91.18, + "learning_rate": 4.427654166852455e-06, + "loss": 1.8114, + "step": 18403500 + }, + { + "epoch": 91.18, + "learning_rate": 4.426415580426372e-06, + "loss": 1.8043, + "step": 18404000 + }, + { + "epoch": 91.18, + "learning_rate": 4.425176994000287e-06, + "loss": 1.7982, + "step": 18404500 + }, + { + "epoch": 91.18, + "learning_rate": 4.423938407574204e-06, + "loss": 1.7848, + "step": 18405000 + }, + { + "epoch": 91.19, + "learning_rate": 4.422699821148121e-06, + "loss": 1.8017, + "step": 18405500 + }, + { + "epoch": 91.19, + "learning_rate": 4.421463711894889e-06, + "loss": 1.8133, + "step": 18406000 + }, + { + "epoch": 91.19, + "learning_rate": 4.420230079814509e-06, + "loss": 1.8233, + "step": 18406500 + }, + { + "epoch": 91.19, + "learning_rate": 4.418991493388426e-06, + "loss": 1.7882, + "step": 18407000 + }, + { + "epoch": 91.2, + "learning_rate": 4.4177529069623424e-06, + "loss": 1.7908, + "step": 18407500 + }, + { + "epoch": 91.2, + "learning_rate": 4.416516797709111e-06, + "loss": 1.8051, + "step": 18408000 + }, + { + "epoch": 91.2, + "learning_rate": 4.415278211283027e-06, + "loss": 1.8144, + "step": 18408500 + }, + { + "epoch": 91.2, + "learning_rate": 4.414039624856943e-06, + "loss": 1.8121, + "step": 18409000 + }, + { + "epoch": 91.21, + "learning_rate": 4.4128010384308595e-06, + "loss": 1.7947, + "step": 18409500 + }, + { + "epoch": 91.21, + "learning_rate": 4.411562452004776e-06, + "loss": 1.7938, + "step": 18410000 + }, + { + "epoch": 91.21, + "learning_rate": 4.4103238655786925e-06, + "loss": 1.8087, + "step": 18410500 + }, + { + "epoch": 91.21, + "learning_rate": 4.4090852791526086e-06, + "loss": 1.8004, + "step": 18411000 + }, + { + "epoch": 91.22, + "learning_rate": 4.407849169899377e-06, + "loss": 1.8073, + "step": 18411500 + }, + { + "epoch": 91.22, + "learning_rate": 4.406610583473294e-06, + "loss": 1.7987, + "step": 18412000 + }, + { + "epoch": 91.22, + "learning_rate": 4.4053719970472095e-06, + "loss": 1.7817, + "step": 18412500 + }, + { + "epoch": 91.22, + "learning_rate": 4.4041334106211265e-06, + "loss": 1.7861, + "step": 18413000 + }, + { + "epoch": 91.23, + "learning_rate": 4.402894824195043e-06, + "loss": 1.8135, + "step": 18413500 + }, + { + "epoch": 91.23, + "learning_rate": 4.401658714941811e-06, + "loss": 1.7576, + "step": 18414000 + }, + { + "epoch": 91.23, + "learning_rate": 4.4004201285157274e-06, + "loss": 1.8048, + "step": 18414500 + }, + { + "epoch": 91.23, + "learning_rate": 4.399181542089644e-06, + "loss": 1.7548, + "step": 18415000 + }, + { + "epoch": 91.24, + "learning_rate": 4.3979429556635605e-06, + "loss": 1.8192, + "step": 18415500 + }, + { + "epoch": 91.24, + "learning_rate": 4.396706846410329e-06, + "loss": 1.8046, + "step": 18416000 + }, + { + "epoch": 91.24, + "learning_rate": 4.395468259984245e-06, + "loss": 1.8224, + "step": 18416500 + }, + { + "epoch": 91.24, + "learning_rate": 4.3942296735581614e-06, + "loss": 1.7869, + "step": 18417000 + }, + { + "epoch": 91.25, + "learning_rate": 4.392991087132078e-06, + "loss": 1.8152, + "step": 18417500 + }, + { + "epoch": 91.25, + "learning_rate": 4.3917525007059944e-06, + "loss": 1.8083, + "step": 18418000 + }, + { + "epoch": 91.25, + "learning_rate": 4.390513914279911e-06, + "loss": 1.8127, + "step": 18418500 + }, + { + "epoch": 91.25, + "learning_rate": 4.3892753278538274e-06, + "loss": 1.8243, + "step": 18419000 + }, + { + "epoch": 91.26, + "learning_rate": 4.3880367414277435e-06, + "loss": 1.7837, + "step": 18419500 + }, + { + "epoch": 91.26, + "learning_rate": 4.38679815500166e-06, + "loss": 1.8011, + "step": 18420000 + }, + { + "epoch": 91.26, + "learning_rate": 4.3855595685755765e-06, + "loss": 1.7814, + "step": 18420500 + }, + { + "epoch": 91.26, + "learning_rate": 4.384320982149493e-06, + "loss": 1.8039, + "step": 18421000 + }, + { + "epoch": 91.27, + "learning_rate": 4.383082395723409e-06, + "loss": 1.7995, + "step": 18421500 + }, + { + "epoch": 91.27, + "learning_rate": 4.3818462864701775e-06, + "loss": 1.7886, + "step": 18422000 + }, + { + "epoch": 91.27, + "learning_rate": 4.380612654389798e-06, + "loss": 1.8072, + "step": 18422500 + }, + { + "epoch": 91.27, + "learning_rate": 4.379374067963714e-06, + "loss": 1.8142, + "step": 18423000 + }, + { + "epoch": 91.28, + "learning_rate": 4.378135481537631e-06, + "loss": 1.8281, + "step": 18423500 + }, + { + "epoch": 91.28, + "learning_rate": 4.376896895111547e-06, + "loss": 1.8153, + "step": 18424000 + }, + { + "epoch": 91.28, + "learning_rate": 4.375658308685464e-06, + "loss": 1.7936, + "step": 18424500 + }, + { + "epoch": 91.28, + "learning_rate": 4.3744197222593795e-06, + "loss": 1.7945, + "step": 18425000 + }, + { + "epoch": 91.29, + "learning_rate": 4.373181135833296e-06, + "loss": 1.7936, + "step": 18425500 + }, + { + "epoch": 91.29, + "learning_rate": 4.3719425494072125e-06, + "loss": 1.8093, + "step": 18426000 + }, + { + "epoch": 91.29, + "learning_rate": 4.370703962981129e-06, + "loss": 1.8145, + "step": 18426500 + }, + { + "epoch": 91.29, + "learning_rate": 4.3694653765550455e-06, + "loss": 1.7938, + "step": 18427000 + }, + { + "epoch": 91.3, + "learning_rate": 4.3682267901289615e-06, + "loss": 1.7991, + "step": 18427500 + }, + { + "epoch": 91.3, + "learning_rate": 4.3669882037028785e-06, + "loss": 1.8111, + "step": 18428000 + }, + { + "epoch": 91.3, + "learning_rate": 4.3657496172767946e-06, + "loss": 1.7835, + "step": 18428500 + }, + { + "epoch": 91.3, + "learning_rate": 4.3645135080235625e-06, + "loss": 1.8006, + "step": 18429000 + }, + { + "epoch": 91.31, + "learning_rate": 4.3632749215974794e-06, + "loss": 1.8044, + "step": 18429500 + }, + { + "epoch": 91.31, + "learning_rate": 4.362036335171396e-06, + "loss": 1.8151, + "step": 18430000 + }, + { + "epoch": 91.31, + "learning_rate": 4.3607977487453125e-06, + "loss": 1.7854, + "step": 18430500 + }, + { + "epoch": 91.31, + "learning_rate": 4.3595591623192285e-06, + "loss": 1.8164, + "step": 18431000 + }, + { + "epoch": 91.32, + "learning_rate": 4.358320575893145e-06, + "loss": 1.8253, + "step": 18431500 + }, + { + "epoch": 91.32, + "learning_rate": 4.3570819894670615e-06, + "loss": 1.8103, + "step": 18432000 + }, + { + "epoch": 91.32, + "learning_rate": 4.355843403040978e-06, + "loss": 1.7965, + "step": 18432500 + }, + { + "epoch": 91.32, + "learning_rate": 4.3546072937877464e-06, + "loss": 1.8188, + "step": 18433000 + }, + { + "epoch": 91.33, + "learning_rate": 4.3533687073616625e-06, + "loss": 1.807, + "step": 18433500 + }, + { + "epoch": 91.33, + "learning_rate": 4.352130120935579e-06, + "loss": 1.8107, + "step": 18434000 + }, + { + "epoch": 91.33, + "learning_rate": 4.350891534509495e-06, + "loss": 1.8244, + "step": 18434500 + }, + { + "epoch": 91.33, + "learning_rate": 4.349652948083412e-06, + "loss": 1.782, + "step": 18435000 + }, + { + "epoch": 91.34, + "learning_rate": 4.348414361657328e-06, + "loss": 1.8092, + "step": 18435500 + }, + { + "epoch": 91.34, + "learning_rate": 4.347175775231245e-06, + "loss": 1.8002, + "step": 18436000 + }, + { + "epoch": 91.34, + "learning_rate": 4.345937188805161e-06, + "loss": 1.8015, + "step": 18436500 + }, + { + "epoch": 91.34, + "learning_rate": 4.344703556724781e-06, + "loss": 1.7918, + "step": 18437000 + }, + { + "epoch": 91.35, + "learning_rate": 4.343467447471549e-06, + "loss": 1.8164, + "step": 18437500 + }, + { + "epoch": 91.35, + "learning_rate": 4.342228861045466e-06, + "loss": 1.8096, + "step": 18438000 + }, + { + "epoch": 91.35, + "learning_rate": 4.340990274619382e-06, + "loss": 1.8126, + "step": 18438500 + }, + { + "epoch": 91.35, + "learning_rate": 4.339751688193299e-06, + "loss": 1.8234, + "step": 18439000 + }, + { + "epoch": 91.36, + "learning_rate": 4.338513101767215e-06, + "loss": 1.7843, + "step": 18439500 + }, + { + "epoch": 91.36, + "learning_rate": 4.3372745153411315e-06, + "loss": 1.7892, + "step": 18440000 + }, + { + "epoch": 91.36, + "learning_rate": 4.3360359289150475e-06, + "loss": 1.7983, + "step": 18440500 + }, + { + "epoch": 91.36, + "learning_rate": 4.3347973424889645e-06, + "loss": 1.7977, + "step": 18441000 + }, + { + "epoch": 91.37, + "learning_rate": 4.3335587560628805e-06, + "loss": 1.7831, + "step": 18441500 + }, + { + "epoch": 91.37, + "learning_rate": 4.332322646809649e-06, + "loss": 1.7906, + "step": 18442000 + }, + { + "epoch": 91.37, + "learning_rate": 4.3310840603835654e-06, + "loss": 1.8172, + "step": 18442500 + }, + { + "epoch": 91.37, + "learning_rate": 4.329847951130334e-06, + "loss": 1.8025, + "step": 18443000 + }, + { + "epoch": 91.38, + "learning_rate": 4.32860936470425e-06, + "loss": 1.7938, + "step": 18443500 + }, + { + "epoch": 91.38, + "learning_rate": 4.327370778278167e-06, + "loss": 1.7916, + "step": 18444000 + }, + { + "epoch": 91.38, + "learning_rate": 4.326132191852083e-06, + "loss": 1.7927, + "step": 18444500 + }, + { + "epoch": 91.38, + "learning_rate": 4.324893605425999e-06, + "loss": 1.7992, + "step": 18445000 + }, + { + "epoch": 91.39, + "learning_rate": 4.323655018999916e-06, + "loss": 1.8043, + "step": 18445500 + }, + { + "epoch": 91.39, + "learning_rate": 4.322418909746684e-06, + "loss": 1.8067, + "step": 18446000 + }, + { + "epoch": 91.39, + "learning_rate": 4.321180323320601e-06, + "loss": 1.8118, + "step": 18446500 + }, + { + "epoch": 91.39, + "learning_rate": 4.319941736894517e-06, + "loss": 1.8203, + "step": 18447000 + }, + { + "epoch": 91.4, + "learning_rate": 4.318703150468434e-06, + "loss": 1.8081, + "step": 18447500 + }, + { + "epoch": 91.4, + "learning_rate": 4.3174645640423495e-06, + "loss": 1.7989, + "step": 18448000 + }, + { + "epoch": 91.4, + "learning_rate": 4.316225977616266e-06, + "loss": 1.7943, + "step": 18448500 + }, + { + "epoch": 91.4, + "learning_rate": 4.3149873911901825e-06, + "loss": 1.7726, + "step": 18449000 + }, + { + "epoch": 91.41, + "learning_rate": 4.313748804764099e-06, + "loss": 1.7879, + "step": 18449500 + }, + { + "epoch": 91.41, + "learning_rate": 4.3125102183380155e-06, + "loss": 1.8255, + "step": 18450000 + }, + { + "epoch": 91.41, + "learning_rate": 4.311271631911932e-06, + "loss": 1.8035, + "step": 18450500 + }, + { + "epoch": 91.41, + "learning_rate": 4.310033045485848e-06, + "loss": 1.7864, + "step": 18451000 + }, + { + "epoch": 91.42, + "learning_rate": 4.3087969362326165e-06, + "loss": 1.8104, + "step": 18451500 + }, + { + "epoch": 91.42, + "learning_rate": 4.3075583498065325e-06, + "loss": 1.8231, + "step": 18452000 + }, + { + "epoch": 91.42, + "learning_rate": 4.3063197633804495e-06, + "loss": 1.8403, + "step": 18452500 + }, + { + "epoch": 91.42, + "learning_rate": 4.3050811769543656e-06, + "loss": 1.8119, + "step": 18453000 + }, + { + "epoch": 91.43, + "learning_rate": 4.3038425905282825e-06, + "loss": 1.8011, + "step": 18453500 + }, + { + "epoch": 91.43, + "learning_rate": 4.302608958447902e-06, + "loss": 1.8145, + "step": 18454000 + }, + { + "epoch": 91.43, + "learning_rate": 4.301370372021819e-06, + "loss": 1.8077, + "step": 18454500 + }, + { + "epoch": 91.43, + "learning_rate": 4.300131785595735e-06, + "loss": 1.7986, + "step": 18455000 + }, + { + "epoch": 91.43, + "learning_rate": 4.298893199169652e-06, + "loss": 1.7884, + "step": 18455500 + }, + { + "epoch": 91.44, + "learning_rate": 4.297654612743568e-06, + "loss": 1.8042, + "step": 18456000 + }, + { + "epoch": 91.44, + "learning_rate": 4.2964160263174844e-06, + "loss": 1.7959, + "step": 18456500 + }, + { + "epoch": 91.44, + "learning_rate": 4.295179917064253e-06, + "loss": 1.7996, + "step": 18457000 + }, + { + "epoch": 91.44, + "learning_rate": 4.293941330638169e-06, + "loss": 1.8079, + "step": 18457500 + }, + { + "epoch": 91.45, + "learning_rate": 4.292702744212085e-06, + "loss": 1.8014, + "step": 18458000 + }, + { + "epoch": 91.45, + "learning_rate": 4.291464157786002e-06, + "loss": 1.8148, + "step": 18458500 + }, + { + "epoch": 91.45, + "learning_rate": 4.290225571359919e-06, + "loss": 1.7937, + "step": 18459000 + }, + { + "epoch": 91.45, + "learning_rate": 4.2889869849338345e-06, + "loss": 1.8136, + "step": 18459500 + }, + { + "epoch": 91.46, + "learning_rate": 4.287750875680603e-06, + "loss": 1.7974, + "step": 18460000 + }, + { + "epoch": 91.46, + "learning_rate": 4.28651228925452e-06, + "loss": 1.7971, + "step": 18460500 + }, + { + "epoch": 91.46, + "learning_rate": 4.285273702828436e-06, + "loss": 1.7939, + "step": 18461000 + }, + { + "epoch": 91.46, + "learning_rate": 4.284035116402352e-06, + "loss": 1.8095, + "step": 18461500 + }, + { + "epoch": 91.47, + "learning_rate": 4.282796529976269e-06, + "loss": 1.788, + "step": 18462000 + }, + { + "epoch": 91.47, + "learning_rate": 4.281560420723037e-06, + "loss": 1.8043, + "step": 18462500 + }, + { + "epoch": 91.47, + "learning_rate": 4.280321834296954e-06, + "loss": 1.8013, + "step": 18463000 + }, + { + "epoch": 91.47, + "learning_rate": 4.27908324787087e-06, + "loss": 1.8226, + "step": 18463500 + }, + { + "epoch": 91.48, + "learning_rate": 4.277844661444786e-06, + "loss": 1.791, + "step": 18464000 + }, + { + "epoch": 91.48, + "learning_rate": 4.2766060750187025e-06, + "loss": 1.7857, + "step": 18464500 + }, + { + "epoch": 91.48, + "learning_rate": 4.275367488592619e-06, + "loss": 1.809, + "step": 18465000 + }, + { + "epoch": 91.48, + "learning_rate": 4.2741289021665355e-06, + "loss": 1.8259, + "step": 18465500 + }, + { + "epoch": 91.49, + "learning_rate": 4.272890315740452e-06, + "loss": 1.8109, + "step": 18466000 + }, + { + "epoch": 91.49, + "learning_rate": 4.2716517293143685e-06, + "loss": 1.7935, + "step": 18466500 + }, + { + "epoch": 91.49, + "learning_rate": 4.2704131428882846e-06, + "loss": 1.7839, + "step": 18467000 + }, + { + "epoch": 91.49, + "learning_rate": 4.269174556462201e-06, + "loss": 1.8161, + "step": 18467500 + }, + { + "epoch": 91.5, + "learning_rate": 4.2679384472089694e-06, + "loss": 1.7774, + "step": 18468000 + }, + { + "epoch": 91.5, + "learning_rate": 4.2666998607828855e-06, + "loss": 1.799, + "step": 18468500 + }, + { + "epoch": 91.5, + "learning_rate": 4.2654612743568025e-06, + "loss": 1.7922, + "step": 18469000 + }, + { + "epoch": 91.5, + "learning_rate": 4.2642226879307185e-06, + "loss": 1.8079, + "step": 18469500 + }, + { + "epoch": 91.51, + "learning_rate": 4.262986578677487e-06, + "loss": 1.8125, + "step": 18470000 + }, + { + "epoch": 91.51, + "learning_rate": 4.2617479922514034e-06, + "loss": 1.7899, + "step": 18470500 + }, + { + "epoch": 91.51, + "learning_rate": 4.2605094058253195e-06, + "loss": 1.8053, + "step": 18471000 + }, + { + "epoch": 91.51, + "learning_rate": 4.2592708193992364e-06, + "loss": 1.8038, + "step": 18471500 + }, + { + "epoch": 91.52, + "learning_rate": 4.2580322329731525e-06, + "loss": 1.7897, + "step": 18472000 + }, + { + "epoch": 91.52, + "learning_rate": 4.2567961237199205e-06, + "loss": 1.7926, + "step": 18472500 + }, + { + "epoch": 91.52, + "learning_rate": 4.255557537293837e-06, + "loss": 1.8311, + "step": 18473000 + }, + { + "epoch": 91.52, + "learning_rate": 4.254318950867754e-06, + "loss": 1.8104, + "step": 18473500 + }, + { + "epoch": 91.53, + "learning_rate": 4.25308036444167e-06, + "loss": 1.8184, + "step": 18474000 + }, + { + "epoch": 91.53, + "learning_rate": 4.251844255188438e-06, + "loss": 1.8073, + "step": 18474500 + }, + { + "epoch": 91.53, + "learning_rate": 4.250605668762355e-06, + "loss": 1.8114, + "step": 18475000 + }, + { + "epoch": 91.53, + "learning_rate": 4.249369559509123e-06, + "loss": 1.7977, + "step": 18475500 + }, + { + "epoch": 91.54, + "learning_rate": 4.24813097308304e-06, + "loss": 1.7972, + "step": 18476000 + }, + { + "epoch": 91.54, + "learning_rate": 4.246892386656956e-06, + "loss": 1.8107, + "step": 18476500 + }, + { + "epoch": 91.54, + "learning_rate": 4.245653800230872e-06, + "loss": 1.7936, + "step": 18477000 + }, + { + "epoch": 91.54, + "learning_rate": 4.244417690977641e-06, + "loss": 1.812, + "step": 18477500 + }, + { + "epoch": 91.55, + "learning_rate": 4.243179104551557e-06, + "loss": 1.812, + "step": 18478000 + }, + { + "epoch": 91.55, + "learning_rate": 4.241940518125474e-06, + "loss": 1.8184, + "step": 18478500 + }, + { + "epoch": 91.55, + "learning_rate": 4.240704408872242e-06, + "loss": 1.7866, + "step": 18479000 + }, + { + "epoch": 91.55, + "learning_rate": 4.239465822446159e-06, + "loss": 1.7878, + "step": 18479500 + }, + { + "epoch": 91.56, + "learning_rate": 4.238227236020075e-06, + "loss": 1.8121, + "step": 18480000 + }, + { + "epoch": 91.56, + "learning_rate": 4.236988649593992e-06, + "loss": 1.7962, + "step": 18480500 + }, + { + "epoch": 91.56, + "learning_rate": 4.235750063167908e-06, + "loss": 1.7987, + "step": 18481000 + }, + { + "epoch": 91.56, + "learning_rate": 4.234511476741824e-06, + "loss": 1.8132, + "step": 18481500 + }, + { + "epoch": 91.57, + "learning_rate": 4.23327289031574e-06, + "loss": 1.8095, + "step": 18482000 + }, + { + "epoch": 91.57, + "learning_rate": 4.232034303889657e-06, + "loss": 1.8208, + "step": 18482500 + }, + { + "epoch": 91.57, + "learning_rate": 4.230795717463573e-06, + "loss": 1.788, + "step": 18483000 + }, + { + "epoch": 91.57, + "learning_rate": 4.22955713103749e-06, + "loss": 1.8069, + "step": 18483500 + }, + { + "epoch": 91.58, + "learning_rate": 4.2283185446114055e-06, + "loss": 1.7952, + "step": 18484000 + }, + { + "epoch": 91.58, + "learning_rate": 4.227079958185322e-06, + "loss": 1.8092, + "step": 18484500 + }, + { + "epoch": 91.58, + "learning_rate": 4.2258413717592385e-06, + "loss": 1.8073, + "step": 18485000 + }, + { + "epoch": 91.58, + "learning_rate": 4.2246027853331554e-06, + "loss": 1.7964, + "step": 18485500 + }, + { + "epoch": 91.59, + "learning_rate": 4.2233641989070715e-06, + "loss": 1.8035, + "step": 18486000 + }, + { + "epoch": 91.59, + "learning_rate": 4.222125612480988e-06, + "loss": 1.8093, + "step": 18486500 + }, + { + "epoch": 91.59, + "learning_rate": 4.2208870260549045e-06, + "loss": 1.8137, + "step": 18487000 + }, + { + "epoch": 91.59, + "learning_rate": 4.219648439628821e-06, + "loss": 1.7957, + "step": 18487500 + }, + { + "epoch": 91.6, + "learning_rate": 4.2184098532027375e-06, + "loss": 1.8314, + "step": 18488000 + }, + { + "epoch": 91.6, + "learning_rate": 4.217171266776653e-06, + "loss": 1.805, + "step": 18488500 + }, + { + "epoch": 91.6, + "learning_rate": 4.21593268035057e-06, + "loss": 1.7765, + "step": 18489000 + }, + { + "epoch": 91.6, + "learning_rate": 4.214694093924486e-06, + "loss": 1.7965, + "step": 18489500 + }, + { + "epoch": 91.61, + "learning_rate": 4.213455507498403e-06, + "loss": 1.7834, + "step": 18490000 + }, + { + "epoch": 91.61, + "learning_rate": 4.212216921072319e-06, + "loss": 1.7969, + "step": 18490500 + }, + { + "epoch": 91.61, + "learning_rate": 4.2109832889919395e-06, + "loss": 1.7914, + "step": 18491000 + }, + { + "epoch": 91.61, + "learning_rate": 4.209747179738708e-06, + "loss": 1.8087, + "step": 18491500 + }, + { + "epoch": 91.62, + "learning_rate": 4.208508593312624e-06, + "loss": 1.788, + "step": 18492000 + }, + { + "epoch": 91.62, + "learning_rate": 4.2072700068865404e-06, + "loss": 1.7911, + "step": 18492500 + }, + { + "epoch": 91.62, + "learning_rate": 4.206031420460457e-06, + "loss": 1.8222, + "step": 18493000 + }, + { + "epoch": 91.62, + "learning_rate": 4.2047928340343735e-06, + "loss": 1.8048, + "step": 18493500 + }, + { + "epoch": 91.63, + "learning_rate": 4.2035542476082895e-06, + "loss": 1.7843, + "step": 18494000 + }, + { + "epoch": 91.63, + "learning_rate": 4.202315661182206e-06, + "loss": 1.8122, + "step": 18494500 + }, + { + "epoch": 91.63, + "learning_rate": 4.201079551928975e-06, + "loss": 1.7916, + "step": 18495000 + }, + { + "epoch": 91.63, + "learning_rate": 4.1998409655028905e-06, + "loss": 1.824, + "step": 18495500 + }, + { + "epoch": 91.64, + "learning_rate": 4.19860485624966e-06, + "loss": 1.8045, + "step": 18496000 + }, + { + "epoch": 91.64, + "learning_rate": 4.197366269823575e-06, + "loss": 1.7892, + "step": 18496500 + }, + { + "epoch": 91.64, + "learning_rate": 4.196127683397492e-06, + "loss": 1.7754, + "step": 18497000 + }, + { + "epoch": 91.64, + "learning_rate": 4.194889096971408e-06, + "loss": 1.8226, + "step": 18497500 + }, + { + "epoch": 91.65, + "learning_rate": 4.193650510545325e-06, + "loss": 1.8181, + "step": 18498000 + }, + { + "epoch": 91.65, + "learning_rate": 4.192414401292093e-06, + "loss": 1.7891, + "step": 18498500 + }, + { + "epoch": 91.65, + "learning_rate": 4.19117581486601e-06, + "loss": 1.8071, + "step": 18499000 + }, + { + "epoch": 91.65, + "learning_rate": 4.189937228439926e-06, + "loss": 1.7856, + "step": 18499500 + }, + { + "epoch": 91.66, + "learning_rate": 4.188698642013842e-06, + "loss": 1.766, + "step": 18500000 + }, + { + "epoch": 91.66, + "learning_rate": 4.1874600555877585e-06, + "loss": 1.8005, + "step": 18500500 + }, + { + "epoch": 91.66, + "learning_rate": 4.186223946334527e-06, + "loss": 1.8345, + "step": 18501000 + }, + { + "epoch": 91.66, + "learning_rate": 4.184985359908443e-06, + "loss": 1.7774, + "step": 18501500 + }, + { + "epoch": 91.67, + "learning_rate": 4.18374677348236e-06, + "loss": 1.8103, + "step": 18502000 + }, + { + "epoch": 91.67, + "learning_rate": 4.182508187056276e-06, + "loss": 1.8228, + "step": 18502500 + }, + { + "epoch": 91.67, + "learning_rate": 4.181269600630193e-06, + "loss": 1.8207, + "step": 18503000 + }, + { + "epoch": 91.67, + "learning_rate": 4.180031014204109e-06, + "loss": 1.7878, + "step": 18503500 + }, + { + "epoch": 91.68, + "learning_rate": 4.1787924277780255e-06, + "loss": 1.819, + "step": 18504000 + }, + { + "epoch": 91.68, + "learning_rate": 4.177553841351942e-06, + "loss": 1.835, + "step": 18504500 + }, + { + "epoch": 91.68, + "learning_rate": 4.1763152549258585e-06, + "loss": 1.7822, + "step": 18505000 + }, + { + "epoch": 91.68, + "learning_rate": 4.1750766684997745e-06, + "loss": 1.7858, + "step": 18505500 + }, + { + "epoch": 91.69, + "learning_rate": 4.173838082073691e-06, + "loss": 1.7809, + "step": 18506000 + }, + { + "epoch": 91.69, + "learning_rate": 4.1725994956476076e-06, + "loss": 1.8198, + "step": 18506500 + }, + { + "epoch": 91.69, + "learning_rate": 4.1713633863943755e-06, + "loss": 1.8045, + "step": 18507000 + }, + { + "epoch": 91.69, + "learning_rate": 4.170129754313996e-06, + "loss": 1.7881, + "step": 18507500 + }, + { + "epoch": 91.7, + "learning_rate": 4.168891167887913e-06, + "loss": 1.8333, + "step": 18508000 + }, + { + "epoch": 91.7, + "learning_rate": 4.16765258146183e-06, + "loss": 1.8031, + "step": 18508500 + }, + { + "epoch": 91.7, + "learning_rate": 4.166413995035745e-06, + "loss": 1.8146, + "step": 18509000 + }, + { + "epoch": 91.7, + "learning_rate": 4.165175408609662e-06, + "loss": 1.8147, + "step": 18509500 + }, + { + "epoch": 91.7, + "learning_rate": 4.163936822183578e-06, + "loss": 1.8014, + "step": 18510000 + }, + { + "epoch": 91.71, + "learning_rate": 4.162698235757495e-06, + "loss": 1.8102, + "step": 18510500 + }, + { + "epoch": 91.71, + "learning_rate": 4.161459649331411e-06, + "loss": 1.8136, + "step": 18511000 + }, + { + "epoch": 91.71, + "learning_rate": 4.160221062905327e-06, + "loss": 1.8024, + "step": 18511500 + }, + { + "epoch": 91.71, + "learning_rate": 4.1589824764792435e-06, + "loss": 1.811, + "step": 18512000 + }, + { + "epoch": 91.72, + "learning_rate": 4.15774389005316e-06, + "loss": 1.8004, + "step": 18512500 + }, + { + "epoch": 91.72, + "learning_rate": 4.1565053036270765e-06, + "loss": 1.8176, + "step": 18513000 + }, + { + "epoch": 91.72, + "learning_rate": 4.155266717200993e-06, + "loss": 1.8076, + "step": 18513500 + }, + { + "epoch": 91.72, + "learning_rate": 4.1540281307749095e-06, + "loss": 1.8059, + "step": 18514000 + }, + { + "epoch": 91.73, + "learning_rate": 4.152792021521678e-06, + "loss": 1.7871, + "step": 18514500 + }, + { + "epoch": 91.73, + "learning_rate": 4.1515534350955935e-06, + "loss": 1.8129, + "step": 18515000 + }, + { + "epoch": 91.73, + "learning_rate": 4.1503148486695105e-06, + "loss": 1.811, + "step": 18515500 + }, + { + "epoch": 91.73, + "learning_rate": 4.149076262243427e-06, + "loss": 1.8213, + "step": 18516000 + }, + { + "epoch": 91.74, + "learning_rate": 4.1478376758173435e-06, + "loss": 1.8046, + "step": 18516500 + }, + { + "epoch": 91.74, + "learning_rate": 4.14659908939126e-06, + "loss": 1.8373, + "step": 18517000 + }, + { + "epoch": 91.74, + "learning_rate": 4.145362980138028e-06, + "loss": 1.8008, + "step": 18517500 + }, + { + "epoch": 91.74, + "learning_rate": 4.144126870884796e-06, + "loss": 1.7921, + "step": 18518000 + }, + { + "epoch": 91.75, + "learning_rate": 4.142888284458713e-06, + "loss": 1.799, + "step": 18518500 + }, + { + "epoch": 91.75, + "learning_rate": 4.14164969803263e-06, + "loss": 1.804, + "step": 18519000 + }, + { + "epoch": 91.75, + "learning_rate": 4.1404111116065454e-06, + "loss": 1.8077, + "step": 18519500 + }, + { + "epoch": 91.75, + "learning_rate": 4.139172525180462e-06, + "loss": 1.7993, + "step": 18520000 + }, + { + "epoch": 91.76, + "learning_rate": 4.1379339387543784e-06, + "loss": 1.7859, + "step": 18520500 + }, + { + "epoch": 91.76, + "learning_rate": 4.136695352328295e-06, + "loss": 1.8017, + "step": 18521000 + }, + { + "epoch": 91.76, + "learning_rate": 4.1354567659022114e-06, + "loss": 1.8077, + "step": 18521500 + }, + { + "epoch": 91.76, + "learning_rate": 4.1342181794761275e-06, + "loss": 1.814, + "step": 18522000 + }, + { + "epoch": 91.77, + "learning_rate": 4.132979593050044e-06, + "loss": 1.7972, + "step": 18522500 + }, + { + "epoch": 91.77, + "learning_rate": 4.1317410066239605e-06, + "loss": 1.7909, + "step": 18523000 + }, + { + "epoch": 91.77, + "learning_rate": 4.130502420197877e-06, + "loss": 1.8196, + "step": 18523500 + }, + { + "epoch": 91.77, + "learning_rate": 4.1292663109446454e-06, + "loss": 1.7972, + "step": 18524000 + }, + { + "epoch": 91.78, + "learning_rate": 4.1280277245185615e-06, + "loss": 1.8111, + "step": 18524500 + }, + { + "epoch": 91.78, + "learning_rate": 4.1267891380924784e-06, + "loss": 1.8073, + "step": 18525000 + }, + { + "epoch": 91.78, + "learning_rate": 4.125550551666394e-06, + "loss": 1.8265, + "step": 18525500 + }, + { + "epoch": 91.78, + "learning_rate": 4.124314442413163e-06, + "loss": 1.8081, + "step": 18526000 + }, + { + "epoch": 91.79, + "learning_rate": 4.123075855987079e-06, + "loss": 1.8251, + "step": 18526500 + }, + { + "epoch": 91.79, + "learning_rate": 4.121839746733848e-06, + "loss": 1.799, + "step": 18527000 + }, + { + "epoch": 91.79, + "learning_rate": 4.120601160307764e-06, + "loss": 1.8239, + "step": 18527500 + }, + { + "epoch": 91.79, + "learning_rate": 4.11936257388168e-06, + "loss": 1.8177, + "step": 18528000 + }, + { + "epoch": 91.8, + "learning_rate": 4.1181239874555965e-06, + "loss": 1.7974, + "step": 18528500 + }, + { + "epoch": 91.8, + "learning_rate": 4.116885401029513e-06, + "loss": 1.796, + "step": 18529000 + }, + { + "epoch": 91.8, + "learning_rate": 4.11564681460343e-06, + "loss": 1.8176, + "step": 18529500 + }, + { + "epoch": 91.8, + "learning_rate": 4.1144082281773456e-06, + "loss": 1.7928, + "step": 18530000 + }, + { + "epoch": 91.81, + "learning_rate": 4.1131696417512625e-06, + "loss": 1.781, + "step": 18530500 + }, + { + "epoch": 91.81, + "learning_rate": 4.1119310553251786e-06, + "loss": 1.8005, + "step": 18531000 + }, + { + "epoch": 91.81, + "learning_rate": 4.110694946071947e-06, + "loss": 1.8261, + "step": 18531500 + }, + { + "epoch": 91.81, + "learning_rate": 4.1094563596458634e-06, + "loss": 1.8171, + "step": 18532000 + }, + { + "epoch": 91.82, + "learning_rate": 4.10821777321978e-06, + "loss": 1.7985, + "step": 18532500 + }, + { + "epoch": 91.82, + "learning_rate": 4.106981663966548e-06, + "loss": 1.7756, + "step": 18533000 + }, + { + "epoch": 91.82, + "learning_rate": 4.105743077540465e-06, + "loss": 1.7974, + "step": 18533500 + }, + { + "epoch": 91.82, + "learning_rate": 4.104504491114381e-06, + "loss": 1.8083, + "step": 18534000 + }, + { + "epoch": 91.83, + "learning_rate": 4.1032659046882974e-06, + "loss": 1.794, + "step": 18534500 + }, + { + "epoch": 91.83, + "learning_rate": 4.1020273182622135e-06, + "loss": 1.7819, + "step": 18535000 + }, + { + "epoch": 91.83, + "learning_rate": 4.1007887318361304e-06, + "loss": 1.8181, + "step": 18535500 + }, + { + "epoch": 91.83, + "learning_rate": 4.0995501454100465e-06, + "loss": 1.8284, + "step": 18536000 + }, + { + "epoch": 91.84, + "learning_rate": 4.0983115589839634e-06, + "loss": 1.7931, + "step": 18536500 + }, + { + "epoch": 91.84, + "learning_rate": 4.097072972557879e-06, + "loss": 1.8073, + "step": 18537000 + }, + { + "epoch": 91.84, + "learning_rate": 4.095834386131796e-06, + "loss": 1.8103, + "step": 18537500 + }, + { + "epoch": 91.84, + "learning_rate": 4.094595799705712e-06, + "loss": 1.8161, + "step": 18538000 + }, + { + "epoch": 91.85, + "learning_rate": 4.093357213279629e-06, + "loss": 1.8134, + "step": 18538500 + }, + { + "epoch": 91.85, + "learning_rate": 4.092118626853545e-06, + "loss": 1.8162, + "step": 18539000 + }, + { + "epoch": 91.85, + "learning_rate": 4.090880040427461e-06, + "loss": 1.8092, + "step": 18539500 + }, + { + "epoch": 91.85, + "learning_rate": 4.089641454001378e-06, + "loss": 1.8184, + "step": 18540000 + }, + { + "epoch": 91.86, + "learning_rate": 4.088402867575294e-06, + "loss": 1.7973, + "step": 18540500 + }, + { + "epoch": 91.86, + "learning_rate": 4.087166758322063e-06, + "loss": 1.8087, + "step": 18541000 + }, + { + "epoch": 91.86, + "learning_rate": 4.085928171895979e-06, + "loss": 1.7835, + "step": 18541500 + }, + { + "epoch": 91.86, + "learning_rate": 4.084689585469896e-06, + "loss": 1.8248, + "step": 18542000 + }, + { + "epoch": 91.87, + "learning_rate": 4.0834534762166636e-06, + "loss": 1.8075, + "step": 18542500 + }, + { + "epoch": 91.87, + "learning_rate": 4.0822148897905805e-06, + "loss": 1.8304, + "step": 18543000 + }, + { + "epoch": 91.87, + "learning_rate": 4.080976303364497e-06, + "loss": 1.7839, + "step": 18543500 + }, + { + "epoch": 91.87, + "learning_rate": 4.079737716938413e-06, + "loss": 1.7811, + "step": 18544000 + }, + { + "epoch": 91.88, + "learning_rate": 4.078499130512329e-06, + "loss": 1.7882, + "step": 18544500 + }, + { + "epoch": 91.88, + "learning_rate": 4.077260544086246e-06, + "loss": 1.8047, + "step": 18545000 + }, + { + "epoch": 91.88, + "learning_rate": 4.076021957660162e-06, + "loss": 1.7723, + "step": 18545500 + }, + { + "epoch": 91.88, + "learning_rate": 4.0747858484069306e-06, + "loss": 1.8135, + "step": 18546000 + }, + { + "epoch": 91.89, + "learning_rate": 4.073547261980847e-06, + "loss": 1.8053, + "step": 18546500 + }, + { + "epoch": 91.89, + "learning_rate": 4.0723086755547636e-06, + "loss": 1.8002, + "step": 18547000 + }, + { + "epoch": 91.89, + "learning_rate": 4.0710725663015315e-06, + "loss": 1.8031, + "step": 18547500 + }, + { + "epoch": 91.89, + "learning_rate": 4.0698339798754485e-06, + "loss": 1.7937, + "step": 18548000 + }, + { + "epoch": 91.9, + "learning_rate": 4.068595393449364e-06, + "loss": 1.7992, + "step": 18548500 + }, + { + "epoch": 91.9, + "learning_rate": 4.067356807023281e-06, + "loss": 1.8354, + "step": 18549000 + }, + { + "epoch": 91.9, + "learning_rate": 4.066118220597197e-06, + "loss": 1.7761, + "step": 18549500 + }, + { + "epoch": 91.9, + "learning_rate": 4.064879634171114e-06, + "loss": 1.7857, + "step": 18550000 + }, + { + "epoch": 91.91, + "learning_rate": 4.06364104774503e-06, + "loss": 1.8107, + "step": 18550500 + }, + { + "epoch": 91.91, + "learning_rate": 4.062402461318946e-06, + "loss": 1.8407, + "step": 18551000 + }, + { + "epoch": 91.91, + "learning_rate": 4.061166352065715e-06, + "loss": 1.8099, + "step": 18551500 + }, + { + "epoch": 91.91, + "learning_rate": 4.059930242812483e-06, + "loss": 1.8294, + "step": 18552000 + }, + { + "epoch": 91.92, + "learning_rate": 4.0586916563863995e-06, + "loss": 1.7949, + "step": 18552500 + }, + { + "epoch": 91.92, + "learning_rate": 4.057455547133168e-06, + "loss": 1.793, + "step": 18553000 + }, + { + "epoch": 91.92, + "learning_rate": 4.056216960707084e-06, + "loss": 1.8161, + "step": 18553500 + }, + { + "epoch": 91.92, + "learning_rate": 4.0549783742810005e-06, + "loss": 1.8104, + "step": 18554000 + }, + { + "epoch": 91.93, + "learning_rate": 4.0537397878549166e-06, + "loss": 1.7961, + "step": 18554500 + }, + { + "epoch": 91.93, + "learning_rate": 4.0525012014288335e-06, + "loss": 1.8115, + "step": 18555000 + }, + { + "epoch": 91.93, + "learning_rate": 4.0512626150027496e-06, + "loss": 1.809, + "step": 18555500 + }, + { + "epoch": 91.93, + "learning_rate": 4.0500240285766665e-06, + "loss": 1.8076, + "step": 18556000 + }, + { + "epoch": 91.94, + "learning_rate": 4.0487879193234345e-06, + "loss": 1.8158, + "step": 18556500 + }, + { + "epoch": 91.94, + "learning_rate": 4.047549332897351e-06, + "loss": 1.7911, + "step": 18557000 + }, + { + "epoch": 91.94, + "learning_rate": 4.0463107464712675e-06, + "loss": 1.8222, + "step": 18557500 + }, + { + "epoch": 91.94, + "learning_rate": 4.0450721600451835e-06, + "loss": 1.7996, + "step": 18558000 + }, + { + "epoch": 91.95, + "learning_rate": 4.0438335736191005e-06, + "loss": 1.7807, + "step": 18558500 + }, + { + "epoch": 91.95, + "learning_rate": 4.0425949871930165e-06, + "loss": 1.8013, + "step": 18559000 + }, + { + "epoch": 91.95, + "learning_rate": 4.0413564007669335e-06, + "loss": 1.8112, + "step": 18559500 + }, + { + "epoch": 91.95, + "learning_rate": 4.040117814340849e-06, + "loss": 1.8027, + "step": 18560000 + }, + { + "epoch": 91.96, + "learning_rate": 4.038879227914766e-06, + "loss": 1.8141, + "step": 18560500 + }, + { + "epoch": 91.96, + "learning_rate": 4.037640641488682e-06, + "loss": 1.8385, + "step": 18561000 + }, + { + "epoch": 91.96, + "learning_rate": 4.036402055062599e-06, + "loss": 1.8135, + "step": 18561500 + }, + { + "epoch": 91.96, + "learning_rate": 4.035163468636515e-06, + "loss": 1.7873, + "step": 18562000 + }, + { + "epoch": 91.97, + "learning_rate": 4.033924882210431e-06, + "loss": 1.8105, + "step": 18562500 + }, + { + "epoch": 91.97, + "learning_rate": 4.032686295784347e-06, + "loss": 1.8076, + "step": 18563000 + }, + { + "epoch": 91.97, + "learning_rate": 4.031447709358264e-06, + "loss": 1.7805, + "step": 18563500 + }, + { + "epoch": 91.97, + "learning_rate": 4.030209122932181e-06, + "loss": 1.7807, + "step": 18564000 + }, + { + "epoch": 91.97, + "learning_rate": 4.028970536506097e-06, + "loss": 1.8205, + "step": 18564500 + }, + { + "epoch": 91.98, + "learning_rate": 4.027734427252865e-06, + "loss": 1.8214, + "step": 18565000 + }, + { + "epoch": 91.98, + "learning_rate": 4.026498317999634e-06, + "loss": 1.7879, + "step": 18565500 + }, + { + "epoch": 91.98, + "learning_rate": 4.02525973157355e-06, + "loss": 1.7966, + "step": 18566000 + }, + { + "epoch": 91.98, + "learning_rate": 4.0240236223203185e-06, + "loss": 1.7945, + "step": 18566500 + }, + { + "epoch": 91.99, + "learning_rate": 4.0227850358942346e-06, + "loss": 1.8153, + "step": 18567000 + }, + { + "epoch": 91.99, + "learning_rate": 4.0215464494681515e-06, + "loss": 1.8056, + "step": 18567500 + }, + { + "epoch": 91.99, + "learning_rate": 4.020307863042068e-06, + "loss": 1.7954, + "step": 18568000 + }, + { + "epoch": 91.99, + "learning_rate": 4.019069276615984e-06, + "loss": 1.8094, + "step": 18568500 + }, + { + "epoch": 92.0, + "learning_rate": 4.017830690189901e-06, + "loss": 1.803, + "step": 18569000 + }, + { + "epoch": 92.0, + "learning_rate": 4.016592103763817e-06, + "loss": 1.7962, + "step": 18569500 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.6854704703021435, + "eval_accuracy_mlm": 0.646739844825033, + "eval_accuracy_nsp": 0.8680650614412513, + "eval_loss": 2.325012445449829, + "eval_runtime": 147.0403, + "eval_samples_per_second": 1733.939, + "eval_steps_per_second": 72.252, + "step": 18569556 + }, + { + "epoch": 92.0, + "learning_rate": 4.015353517337734e-06, + "loss": 1.7942, + "step": 18570000 + }, + { + "epoch": 92.0, + "learning_rate": 4.014114930911649e-06, + "loss": 1.791, + "step": 18570500 + }, + { + "epoch": 92.01, + "learning_rate": 4.012876344485566e-06, + "loss": 1.7914, + "step": 18571000 + }, + { + "epoch": 92.01, + "learning_rate": 4.011637758059482e-06, + "loss": 1.7903, + "step": 18571500 + }, + { + "epoch": 92.01, + "learning_rate": 4.010399171633399e-06, + "loss": 1.7944, + "step": 18572000 + }, + { + "epoch": 92.01, + "learning_rate": 4.009160585207315e-06, + "loss": 1.7605, + "step": 18572500 + }, + { + "epoch": 92.02, + "learning_rate": 4.007921998781231e-06, + "loss": 1.7905, + "step": 18573000 + }, + { + "epoch": 92.02, + "learning_rate": 4.006685889528e-06, + "loss": 1.8214, + "step": 18573500 + }, + { + "epoch": 92.02, + "learning_rate": 4.005447303101916e-06, + "loss": 1.8067, + "step": 18574000 + }, + { + "epoch": 92.02, + "learning_rate": 4.004208716675832e-06, + "loss": 1.8162, + "step": 18574500 + }, + { + "epoch": 92.03, + "learning_rate": 4.002970130249749e-06, + "loss": 1.7994, + "step": 18575000 + }, + { + "epoch": 92.03, + "learning_rate": 4.001731543823665e-06, + "loss": 1.7994, + "step": 18575500 + }, + { + "epoch": 92.03, + "learning_rate": 4.000492957397582e-06, + "loss": 1.7949, + "step": 18576000 + }, + { + "epoch": 92.03, + "learning_rate": 3.99925684814435e-06, + "loss": 1.7809, + "step": 18576500 + }, + { + "epoch": 92.04, + "learning_rate": 3.998018261718267e-06, + "loss": 1.8145, + "step": 18577000 + }, + { + "epoch": 92.04, + "learning_rate": 3.996782152465035e-06, + "loss": 1.7966, + "step": 18577500 + }, + { + "epoch": 92.04, + "learning_rate": 3.9955460432118035e-06, + "loss": 1.8132, + "step": 18578000 + }, + { + "epoch": 92.04, + "learning_rate": 3.99430745678572e-06, + "loss": 1.7801, + "step": 18578500 + }, + { + "epoch": 92.05, + "learning_rate": 3.9930688703596365e-06, + "loss": 1.7611, + "step": 18579000 + }, + { + "epoch": 92.05, + "learning_rate": 3.991830283933553e-06, + "loss": 1.7756, + "step": 18579500 + }, + { + "epoch": 92.05, + "learning_rate": 3.990591697507469e-06, + "loss": 1.8085, + "step": 18580000 + }, + { + "epoch": 92.05, + "learning_rate": 3.989353111081385e-06, + "loss": 1.7942, + "step": 18580500 + }, + { + "epoch": 92.06, + "learning_rate": 3.988114524655302e-06, + "loss": 1.7956, + "step": 18581000 + }, + { + "epoch": 92.06, + "learning_rate": 3.986875938229219e-06, + "loss": 1.8205, + "step": 18581500 + }, + { + "epoch": 92.06, + "learning_rate": 3.985637351803134e-06, + "loss": 1.7842, + "step": 18582000 + }, + { + "epoch": 92.06, + "learning_rate": 3.984398765377051e-06, + "loss": 1.7868, + "step": 18582500 + }, + { + "epoch": 92.07, + "learning_rate": 3.983160178950967e-06, + "loss": 1.7991, + "step": 18583000 + }, + { + "epoch": 92.07, + "learning_rate": 3.981921592524884e-06, + "loss": 1.7914, + "step": 18583500 + }, + { + "epoch": 92.07, + "learning_rate": 3.9806830060988e-06, + "loss": 1.7998, + "step": 18584000 + }, + { + "epoch": 92.07, + "learning_rate": 3.979446896845569e-06, + "loss": 1.7963, + "step": 18584500 + }, + { + "epoch": 92.08, + "learning_rate": 3.978210787592337e-06, + "loss": 1.8, + "step": 18585000 + }, + { + "epoch": 92.08, + "learning_rate": 3.9769722011662536e-06, + "loss": 1.7837, + "step": 18585500 + }, + { + "epoch": 92.08, + "learning_rate": 3.97573361474017e-06, + "loss": 1.8154, + "step": 18586000 + }, + { + "epoch": 92.08, + "learning_rate": 3.974495028314086e-06, + "loss": 1.8086, + "step": 18586500 + }, + { + "epoch": 92.09, + "learning_rate": 3.973256441888002e-06, + "loss": 1.789, + "step": 18587000 + }, + { + "epoch": 92.09, + "learning_rate": 3.972022809807623e-06, + "loss": 1.7797, + "step": 18587500 + }, + { + "epoch": 92.09, + "learning_rate": 3.9707842233815394e-06, + "loss": 1.8133, + "step": 18588000 + }, + { + "epoch": 92.09, + "learning_rate": 3.969545636955456e-06, + "loss": 1.7945, + "step": 18588500 + }, + { + "epoch": 92.1, + "learning_rate": 3.968307050529372e-06, + "loss": 1.7803, + "step": 18589000 + }, + { + "epoch": 92.1, + "learning_rate": 3.9670684641032885e-06, + "loss": 1.7893, + "step": 18589500 + }, + { + "epoch": 92.1, + "learning_rate": 3.965829877677205e-06, + "loss": 1.8154, + "step": 18590000 + }, + { + "epoch": 92.1, + "learning_rate": 3.9645912912511215e-06, + "loss": 1.7905, + "step": 18590500 + }, + { + "epoch": 92.11, + "learning_rate": 3.963352704825038e-06, + "loss": 1.7984, + "step": 18591000 + }, + { + "epoch": 92.11, + "learning_rate": 3.962114118398954e-06, + "loss": 1.7857, + "step": 18591500 + }, + { + "epoch": 92.11, + "learning_rate": 3.96087553197287e-06, + "loss": 1.8122, + "step": 18592000 + }, + { + "epoch": 92.11, + "learning_rate": 3.959636945546787e-06, + "loss": 1.7881, + "step": 18592500 + }, + { + "epoch": 92.12, + "learning_rate": 3.958398359120703e-06, + "loss": 1.8289, + "step": 18593000 + }, + { + "epoch": 92.12, + "learning_rate": 3.957162249867472e-06, + "loss": 1.7944, + "step": 18593500 + }, + { + "epoch": 92.12, + "learning_rate": 3.9559261406142396e-06, + "loss": 1.7887, + "step": 18594000 + }, + { + "epoch": 92.12, + "learning_rate": 3.9546875541881565e-06, + "loss": 1.8001, + "step": 18594500 + }, + { + "epoch": 92.13, + "learning_rate": 3.9534489677620726e-06, + "loss": 1.7967, + "step": 18595000 + }, + { + "epoch": 92.13, + "learning_rate": 3.9522103813359895e-06, + "loss": 1.7522, + "step": 18595500 + }, + { + "epoch": 92.13, + "learning_rate": 3.950971794909905e-06, + "loss": 1.778, + "step": 18596000 + }, + { + "epoch": 92.13, + "learning_rate": 3.949733208483822e-06, + "loss": 1.7917, + "step": 18596500 + }, + { + "epoch": 92.14, + "learning_rate": 3.948494622057739e-06, + "loss": 1.7987, + "step": 18597000 + }, + { + "epoch": 92.14, + "learning_rate": 3.947256035631655e-06, + "loss": 1.8182, + "step": 18597500 + }, + { + "epoch": 92.14, + "learning_rate": 3.946019926378423e-06, + "loss": 1.7826, + "step": 18598000 + }, + { + "epoch": 92.14, + "learning_rate": 3.9447813399523396e-06, + "loss": 1.7938, + "step": 18598500 + }, + { + "epoch": 92.15, + "learning_rate": 3.943542753526256e-06, + "loss": 1.8187, + "step": 18599000 + }, + { + "epoch": 92.15, + "learning_rate": 3.942304167100172e-06, + "loss": 1.7993, + "step": 18599500 + }, + { + "epoch": 92.15, + "learning_rate": 3.941065580674089e-06, + "loss": 1.8146, + "step": 18600000 + }, + { + "epoch": 92.15, + "learning_rate": 3.939829471420857e-06, + "loss": 1.7917, + "step": 18600500 + }, + { + "epoch": 92.16, + "learning_rate": 3.9385908849947735e-06, + "loss": 1.8213, + "step": 18601000 + }, + { + "epoch": 92.16, + "learning_rate": 3.93735229856869e-06, + "loss": 1.8038, + "step": 18601500 + }, + { + "epoch": 92.16, + "learning_rate": 3.9361137121426065e-06, + "loss": 1.7937, + "step": 18602000 + }, + { + "epoch": 92.16, + "learning_rate": 3.934875125716523e-06, + "loss": 1.7866, + "step": 18602500 + }, + { + "epoch": 92.17, + "learning_rate": 3.933636539290439e-06, + "loss": 1.8031, + "step": 18603000 + }, + { + "epoch": 92.17, + "learning_rate": 3.932397952864355e-06, + "loss": 1.7807, + "step": 18603500 + }, + { + "epoch": 92.17, + "learning_rate": 3.931161843611124e-06, + "loss": 1.8292, + "step": 18604000 + }, + { + "epoch": 92.17, + "learning_rate": 3.92992325718504e-06, + "loss": 1.8037, + "step": 18604500 + }, + { + "epoch": 92.18, + "learning_rate": 3.928684670758957e-06, + "loss": 1.8093, + "step": 18605000 + }, + { + "epoch": 92.18, + "learning_rate": 3.927446084332873e-06, + "loss": 1.8048, + "step": 18605500 + }, + { + "epoch": 92.18, + "learning_rate": 3.92620749790679e-06, + "loss": 1.7917, + "step": 18606000 + }, + { + "epoch": 92.18, + "learning_rate": 3.924968911480705e-06, + "loss": 1.7729, + "step": 18606500 + }, + { + "epoch": 92.19, + "learning_rate": 3.923730325054622e-06, + "loss": 1.8028, + "step": 18607000 + }, + { + "epoch": 92.19, + "learning_rate": 3.922491738628539e-06, + "loss": 1.8052, + "step": 18607500 + }, + { + "epoch": 92.19, + "learning_rate": 3.921255629375307e-06, + "loss": 1.8231, + "step": 18608000 + }, + { + "epoch": 92.19, + "learning_rate": 3.920017042949223e-06, + "loss": 1.7973, + "step": 18608500 + }, + { + "epoch": 92.2, + "learning_rate": 3.91877845652314e-06, + "loss": 1.8125, + "step": 18609000 + }, + { + "epoch": 92.2, + "learning_rate": 3.917542347269908e-06, + "loss": 1.775, + "step": 18609500 + }, + { + "epoch": 92.2, + "learning_rate": 3.9163037608438246e-06, + "loss": 1.7731, + "step": 18610000 + }, + { + "epoch": 92.2, + "learning_rate": 3.915065174417741e-06, + "loss": 1.8171, + "step": 18610500 + }, + { + "epoch": 92.21, + "learning_rate": 3.913826587991657e-06, + "loss": 1.8017, + "step": 18611000 + }, + { + "epoch": 92.21, + "learning_rate": 3.912588001565574e-06, + "loss": 1.795, + "step": 18611500 + }, + { + "epoch": 92.21, + "learning_rate": 3.91134941513949e-06, + "loss": 1.7984, + "step": 18612000 + }, + { + "epoch": 92.21, + "learning_rate": 3.910110828713407e-06, + "loss": 1.7969, + "step": 18612500 + }, + { + "epoch": 92.22, + "learning_rate": 3.908872242287323e-06, + "loss": 1.7622, + "step": 18613000 + }, + { + "epoch": 92.22, + "learning_rate": 3.907633655861239e-06, + "loss": 1.7718, + "step": 18613500 + }, + { + "epoch": 92.22, + "learning_rate": 3.906397546608008e-06, + "loss": 1.7906, + "step": 18614000 + }, + { + "epoch": 92.22, + "learning_rate": 3.905158960181924e-06, + "loss": 1.8051, + "step": 18614500 + }, + { + "epoch": 92.23, + "learning_rate": 3.903925328101544e-06, + "loss": 1.8252, + "step": 18615000 + }, + { + "epoch": 92.23, + "learning_rate": 3.9026867416754605e-06, + "loss": 1.7962, + "step": 18615500 + }, + { + "epoch": 92.23, + "learning_rate": 3.901448155249377e-06, + "loss": 1.8158, + "step": 18616000 + }, + { + "epoch": 92.23, + "learning_rate": 3.9002095688232935e-06, + "loss": 1.7909, + "step": 18616500 + }, + { + "epoch": 92.24, + "learning_rate": 3.89897098239721e-06, + "loss": 1.807, + "step": 18617000 + }, + { + "epoch": 92.24, + "learning_rate": 3.8977323959711265e-06, + "loss": 1.7949, + "step": 18617500 + }, + { + "epoch": 92.24, + "learning_rate": 3.896493809545043e-06, + "loss": 1.7948, + "step": 18618000 + }, + { + "epoch": 92.24, + "learning_rate": 3.8952552231189595e-06, + "loss": 1.7675, + "step": 18618500 + }, + { + "epoch": 92.24, + "learning_rate": 3.894016636692875e-06, + "loss": 1.786, + "step": 18619000 + }, + { + "epoch": 92.25, + "learning_rate": 3.892778050266792e-06, + "loss": 1.8001, + "step": 18619500 + }, + { + "epoch": 92.25, + "learning_rate": 3.89154194101356e-06, + "loss": 1.8181, + "step": 18620000 + }, + { + "epoch": 92.25, + "learning_rate": 3.890305831760329e-06, + "loss": 1.7722, + "step": 18620500 + }, + { + "epoch": 92.25, + "learning_rate": 3.889067245334245e-06, + "loss": 1.7821, + "step": 18621000 + }, + { + "epoch": 92.26, + "learning_rate": 3.8878286589081615e-06, + "loss": 1.7842, + "step": 18621500 + }, + { + "epoch": 92.26, + "learning_rate": 3.8865900724820775e-06, + "loss": 1.7998, + "step": 18622000 + }, + { + "epoch": 92.26, + "learning_rate": 3.8853514860559945e-06, + "loss": 1.7808, + "step": 18622500 + }, + { + "epoch": 92.26, + "learning_rate": 3.8841128996299106e-06, + "loss": 1.7926, + "step": 18623000 + }, + { + "epoch": 92.27, + "learning_rate": 3.882874313203827e-06, + "loss": 1.8011, + "step": 18623500 + }, + { + "epoch": 92.27, + "learning_rate": 3.881635726777743e-06, + "loss": 1.8053, + "step": 18624000 + }, + { + "epoch": 92.27, + "learning_rate": 3.88039714035166e-06, + "loss": 1.7997, + "step": 18624500 + }, + { + "epoch": 92.27, + "learning_rate": 3.8791585539255766e-06, + "loss": 1.8283, + "step": 18625000 + }, + { + "epoch": 92.28, + "learning_rate": 3.877919967499493e-06, + "loss": 1.8005, + "step": 18625500 + }, + { + "epoch": 92.28, + "learning_rate": 3.876681381073409e-06, + "loss": 1.8112, + "step": 18626000 + }, + { + "epoch": 92.28, + "learning_rate": 3.875442794647325e-06, + "loss": 1.7964, + "step": 18626500 + }, + { + "epoch": 92.28, + "learning_rate": 3.874204208221242e-06, + "loss": 1.7872, + "step": 18627000 + }, + { + "epoch": 92.29, + "learning_rate": 3.87296809896801e-06, + "loss": 1.7815, + "step": 18627500 + }, + { + "epoch": 92.29, + "learning_rate": 3.8717319897147785e-06, + "loss": 1.8043, + "step": 18628000 + }, + { + "epoch": 92.29, + "learning_rate": 3.870493403288695e-06, + "loss": 1.8238, + "step": 18628500 + }, + { + "epoch": 92.29, + "learning_rate": 3.8692548168626115e-06, + "loss": 1.791, + "step": 18629000 + }, + { + "epoch": 92.3, + "learning_rate": 3.868016230436528e-06, + "loss": 1.8019, + "step": 18629500 + }, + { + "epoch": 92.3, + "learning_rate": 3.866780121183296e-06, + "loss": 1.7865, + "step": 18630000 + }, + { + "epoch": 92.3, + "learning_rate": 3.8655415347572125e-06, + "loss": 1.7919, + "step": 18630500 + }, + { + "epoch": 92.3, + "learning_rate": 3.8643029483311294e-06, + "loss": 1.7932, + "step": 18631000 + }, + { + "epoch": 92.31, + "learning_rate": 3.8630643619050455e-06, + "loss": 1.8104, + "step": 18631500 + }, + { + "epoch": 92.31, + "learning_rate": 3.861828252651814e-06, + "loss": 1.7993, + "step": 18632000 + }, + { + "epoch": 92.31, + "learning_rate": 3.86058966622573e-06, + "loss": 1.7779, + "step": 18632500 + }, + { + "epoch": 92.31, + "learning_rate": 3.8593510797996465e-06, + "loss": 1.8061, + "step": 18633000 + }, + { + "epoch": 92.32, + "learning_rate": 3.8581124933735626e-06, + "loss": 1.8067, + "step": 18633500 + }, + { + "epoch": 92.32, + "learning_rate": 3.8568739069474795e-06, + "loss": 1.8043, + "step": 18634000 + }, + { + "epoch": 92.32, + "learning_rate": 3.8556353205213956e-06, + "loss": 1.7822, + "step": 18634500 + }, + { + "epoch": 92.32, + "learning_rate": 3.854399211268164e-06, + "loss": 1.8302, + "step": 18635000 + }, + { + "epoch": 92.33, + "learning_rate": 3.8531606248420805e-06, + "loss": 1.7923, + "step": 18635500 + }, + { + "epoch": 92.33, + "learning_rate": 3.8519220384159965e-06, + "loss": 1.7932, + "step": 18636000 + }, + { + "epoch": 92.33, + "learning_rate": 3.850683451989913e-06, + "loss": 1.8017, + "step": 18636500 + }, + { + "epoch": 92.33, + "learning_rate": 3.8494448655638295e-06, + "loss": 1.8092, + "step": 18637000 + }, + { + "epoch": 92.34, + "learning_rate": 3.848206279137746e-06, + "loss": 1.8035, + "step": 18637500 + }, + { + "epoch": 92.34, + "learning_rate": 3.8469676927116626e-06, + "loss": 1.8179, + "step": 18638000 + }, + { + "epoch": 92.34, + "learning_rate": 3.845729106285579e-06, + "loss": 1.7945, + "step": 18638500 + }, + { + "epoch": 92.34, + "learning_rate": 3.844490519859495e-06, + "loss": 1.7748, + "step": 18639000 + }, + { + "epoch": 92.35, + "learning_rate": 3.843251933433412e-06, + "loss": 1.7926, + "step": 18639500 + }, + { + "epoch": 92.35, + "learning_rate": 3.842013347007328e-06, + "loss": 1.7776, + "step": 18640000 + }, + { + "epoch": 92.35, + "learning_rate": 3.8407772377540965e-06, + "loss": 1.816, + "step": 18640500 + }, + { + "epoch": 92.35, + "learning_rate": 3.839538651328013e-06, + "loss": 1.8118, + "step": 18641000 + }, + { + "epoch": 92.36, + "learning_rate": 3.8383000649019295e-06, + "loss": 1.7924, + "step": 18641500 + }, + { + "epoch": 92.36, + "learning_rate": 3.837061478475845e-06, + "loss": 1.8098, + "step": 18642000 + }, + { + "epoch": 92.36, + "learning_rate": 3.835822892049762e-06, + "loss": 1.8093, + "step": 18642500 + }, + { + "epoch": 92.36, + "learning_rate": 3.8345867827965305e-06, + "loss": 1.8058, + "step": 18643000 + }, + { + "epoch": 92.37, + "learning_rate": 3.833348196370447e-06, + "loss": 1.8155, + "step": 18643500 + }, + { + "epoch": 92.37, + "learning_rate": 3.832109609944363e-06, + "loss": 1.7955, + "step": 18644000 + }, + { + "epoch": 92.37, + "learning_rate": 3.83087102351828e-06, + "loss": 1.7809, + "step": 18644500 + }, + { + "epoch": 92.37, + "learning_rate": 3.829632437092196e-06, + "loss": 1.7978, + "step": 18645000 + }, + { + "epoch": 92.38, + "learning_rate": 3.828393850666112e-06, + "loss": 1.8403, + "step": 18645500 + }, + { + "epoch": 92.38, + "learning_rate": 3.827155264240028e-06, + "loss": 1.7823, + "step": 18646000 + }, + { + "epoch": 92.38, + "learning_rate": 3.825916677813945e-06, + "loss": 1.7931, + "step": 18646500 + }, + { + "epoch": 92.38, + "learning_rate": 3.824678091387861e-06, + "loss": 1.8006, + "step": 18647000 + }, + { + "epoch": 92.39, + "learning_rate": 3.82344198213463e-06, + "loss": 1.8078, + "step": 18647500 + }, + { + "epoch": 92.39, + "learning_rate": 3.822203395708546e-06, + "loss": 1.7972, + "step": 18648000 + }, + { + "epoch": 92.39, + "learning_rate": 3.820964809282463e-06, + "loss": 1.7782, + "step": 18648500 + }, + { + "epoch": 92.39, + "learning_rate": 3.819726222856379e-06, + "loss": 1.7916, + "step": 18649000 + }, + { + "epoch": 92.4, + "learning_rate": 3.818487636430295e-06, + "loss": 1.8131, + "step": 18649500 + }, + { + "epoch": 92.4, + "learning_rate": 3.817249050004212e-06, + "loss": 1.8025, + "step": 18650000 + }, + { + "epoch": 92.4, + "learning_rate": 3.816010463578128e-06, + "loss": 1.8131, + "step": 18650500 + }, + { + "epoch": 92.4, + "learning_rate": 3.814771877152045e-06, + "loss": 1.8002, + "step": 18651000 + }, + { + "epoch": 92.41, + "learning_rate": 3.8135357678988127e-06, + "loss": 1.7952, + "step": 18651500 + }, + { + "epoch": 92.41, + "learning_rate": 3.8122971814727292e-06, + "loss": 1.7876, + "step": 18652000 + }, + { + "epoch": 92.41, + "learning_rate": 3.8110585950466453e-06, + "loss": 1.7738, + "step": 18652500 + }, + { + "epoch": 92.41, + "learning_rate": 3.809820008620562e-06, + "loss": 1.7869, + "step": 18653000 + }, + { + "epoch": 92.42, + "learning_rate": 3.8085838993673302e-06, + "loss": 1.801, + "step": 18653500 + }, + { + "epoch": 92.42, + "learning_rate": 3.8073453129412467e-06, + "loss": 1.7885, + "step": 18654000 + }, + { + "epoch": 92.42, + "learning_rate": 3.806106726515163e-06, + "loss": 1.7944, + "step": 18654500 + }, + { + "epoch": 92.42, + "learning_rate": 3.8048681400890797e-06, + "loss": 1.7974, + "step": 18655000 + }, + { + "epoch": 92.43, + "learning_rate": 3.8036295536629954e-06, + "loss": 1.7943, + "step": 18655500 + }, + { + "epoch": 92.43, + "learning_rate": 3.8023909672369123e-06, + "loss": 1.7931, + "step": 18656000 + }, + { + "epoch": 92.43, + "learning_rate": 3.8011523808108284e-06, + "loss": 1.8046, + "step": 18656500 + }, + { + "epoch": 92.43, + "learning_rate": 3.799913794384745e-06, + "loss": 1.7738, + "step": 18657000 + }, + { + "epoch": 92.44, + "learning_rate": 3.7986776851315133e-06, + "loss": 1.8027, + "step": 18657500 + }, + { + "epoch": 92.44, + "learning_rate": 3.79743909870543e-06, + "loss": 1.8111, + "step": 18658000 + }, + { + "epoch": 92.44, + "learning_rate": 3.796202989452198e-06, + "loss": 1.7983, + "step": 18658500 + }, + { + "epoch": 92.44, + "learning_rate": 3.7949644030261147e-06, + "loss": 1.8013, + "step": 18659000 + }, + { + "epoch": 92.45, + "learning_rate": 3.793728293772883e-06, + "loss": 1.7954, + "step": 18659500 + }, + { + "epoch": 92.45, + "learning_rate": 3.7924897073467996e-06, + "loss": 1.8054, + "step": 18660000 + }, + { + "epoch": 92.45, + "learning_rate": 3.7912511209207157e-06, + "loss": 1.8, + "step": 18660500 + }, + { + "epoch": 92.45, + "learning_rate": 3.790012534494632e-06, + "loss": 1.7803, + "step": 18661000 + }, + { + "epoch": 92.46, + "learning_rate": 3.7887764252414005e-06, + "loss": 1.7881, + "step": 18661500 + }, + { + "epoch": 92.46, + "learning_rate": 3.787537838815317e-06, + "loss": 1.846, + "step": 18662000 + }, + { + "epoch": 92.46, + "learning_rate": 3.786299252389233e-06, + "loss": 1.8087, + "step": 18662500 + }, + { + "epoch": 92.46, + "learning_rate": 3.78506066596315e-06, + "loss": 1.7996, + "step": 18663000 + }, + { + "epoch": 92.47, + "learning_rate": 3.7838220795370657e-06, + "loss": 1.8094, + "step": 18663500 + }, + { + "epoch": 92.47, + "learning_rate": 3.7825834931109826e-06, + "loss": 1.8313, + "step": 18664000 + }, + { + "epoch": 92.47, + "learning_rate": 3.7813449066848987e-06, + "loss": 1.8266, + "step": 18664500 + }, + { + "epoch": 92.47, + "learning_rate": 3.7801087974316675e-06, + "loss": 1.8024, + "step": 18665000 + }, + { + "epoch": 92.48, + "learning_rate": 3.778872688178436e-06, + "loss": 1.7978, + "step": 18665500 + }, + { + "epoch": 92.48, + "learning_rate": 3.7776341017523524e-06, + "loss": 1.8009, + "step": 18666000 + }, + { + "epoch": 92.48, + "learning_rate": 3.7763955153262685e-06, + "loss": 1.8048, + "step": 18666500 + }, + { + "epoch": 92.48, + "learning_rate": 3.775156928900185e-06, + "loss": 1.7995, + "step": 18667000 + }, + { + "epoch": 92.49, + "learning_rate": 3.773918342474101e-06, + "loss": 1.7928, + "step": 18667500 + }, + { + "epoch": 92.49, + "learning_rate": 3.7726797560480176e-06, + "loss": 1.8094, + "step": 18668000 + }, + { + "epoch": 92.49, + "learning_rate": 3.7714411696219337e-06, + "loss": 1.8093, + "step": 18668500 + }, + { + "epoch": 92.49, + "learning_rate": 3.7702025831958506e-06, + "loss": 1.7915, + "step": 18669000 + }, + { + "epoch": 92.5, + "learning_rate": 3.768963996769767e-06, + "loss": 1.8067, + "step": 18669500 + }, + { + "epoch": 92.5, + "learning_rate": 3.767725410343683e-06, + "loss": 1.8169, + "step": 18670000 + }, + { + "epoch": 92.5, + "learning_rate": 3.766489301090452e-06, + "loss": 1.7854, + "step": 18670500 + }, + { + "epoch": 92.5, + "learning_rate": 3.765250714664368e-06, + "loss": 1.8003, + "step": 18671000 + }, + { + "epoch": 92.51, + "learning_rate": 3.7640121282382846e-06, + "loss": 1.807, + "step": 18671500 + }, + { + "epoch": 92.51, + "learning_rate": 3.7627735418122007e-06, + "loss": 1.7963, + "step": 18672000 + }, + { + "epoch": 92.51, + "learning_rate": 3.7615374325589695e-06, + "loss": 1.8021, + "step": 18672500 + }, + { + "epoch": 92.51, + "learning_rate": 3.7602988461328856e-06, + "loss": 1.8256, + "step": 18673000 + }, + { + "epoch": 92.51, + "learning_rate": 3.759060259706802e-06, + "loss": 1.7738, + "step": 18673500 + }, + { + "epoch": 92.52, + "learning_rate": 3.757821673280718e-06, + "loss": 1.8055, + "step": 18674000 + }, + { + "epoch": 92.52, + "learning_rate": 3.756583086854635e-06, + "loss": 1.7977, + "step": 18674500 + }, + { + "epoch": 92.52, + "learning_rate": 3.7553445004285507e-06, + "loss": 1.7798, + "step": 18675000 + }, + { + "epoch": 92.52, + "learning_rate": 3.7541059140024677e-06, + "loss": 1.7864, + "step": 18675500 + }, + { + "epoch": 92.53, + "learning_rate": 3.7528673275763837e-06, + "loss": 1.8009, + "step": 18676000 + }, + { + "epoch": 92.53, + "learning_rate": 3.7516287411503002e-06, + "loss": 1.7966, + "step": 18676500 + }, + { + "epoch": 92.53, + "learning_rate": 3.7503926318970686e-06, + "loss": 1.8138, + "step": 18677000 + }, + { + "epoch": 92.53, + "learning_rate": 3.749154045470985e-06, + "loss": 1.7875, + "step": 18677500 + }, + { + "epoch": 92.54, + "learning_rate": 3.7479154590449012e-06, + "loss": 1.7925, + "step": 18678000 + }, + { + "epoch": 92.54, + "learning_rate": 3.7466768726188177e-06, + "loss": 1.7753, + "step": 18678500 + }, + { + "epoch": 92.54, + "learning_rate": 3.745438286192734e-06, + "loss": 1.8137, + "step": 18679000 + }, + { + "epoch": 92.54, + "learning_rate": 3.7441996997666503e-06, + "loss": 1.7997, + "step": 18679500 + }, + { + "epoch": 92.55, + "learning_rate": 3.7429611133405672e-06, + "loss": 1.7901, + "step": 18680000 + }, + { + "epoch": 92.55, + "learning_rate": 3.7417225269144833e-06, + "loss": 1.8008, + "step": 18680500 + }, + { + "epoch": 92.55, + "learning_rate": 3.7404839404884e-06, + "loss": 1.8076, + "step": 18681000 + }, + { + "epoch": 92.55, + "learning_rate": 3.739245354062316e-06, + "loss": 1.8097, + "step": 18681500 + }, + { + "epoch": 92.56, + "learning_rate": 3.7380067676362324e-06, + "loss": 1.7805, + "step": 18682000 + }, + { + "epoch": 92.56, + "learning_rate": 3.736773135555853e-06, + "loss": 1.7976, + "step": 18682500 + }, + { + "epoch": 92.56, + "learning_rate": 3.7355345491297696e-06, + "loss": 1.8022, + "step": 18683000 + }, + { + "epoch": 92.56, + "learning_rate": 3.7342959627036857e-06, + "loss": 1.8189, + "step": 18683500 + }, + { + "epoch": 92.57, + "learning_rate": 3.733057376277602e-06, + "loss": 1.8047, + "step": 18684000 + }, + { + "epoch": 92.57, + "learning_rate": 3.7318187898515183e-06, + "loss": 1.7727, + "step": 18684500 + }, + { + "epoch": 92.57, + "learning_rate": 3.730580203425435e-06, + "loss": 1.8078, + "step": 18685000 + }, + { + "epoch": 92.57, + "learning_rate": 3.729341616999351e-06, + "loss": 1.7802, + "step": 18685500 + }, + { + "epoch": 92.58, + "learning_rate": 3.7281030305732678e-06, + "loss": 1.7937, + "step": 18686000 + }, + { + "epoch": 92.58, + "learning_rate": 3.726864444147184e-06, + "loss": 1.8274, + "step": 18686500 + }, + { + "epoch": 92.58, + "learning_rate": 3.7256283348939527e-06, + "loss": 1.7949, + "step": 18687000 + }, + { + "epoch": 92.58, + "learning_rate": 3.7243897484678688e-06, + "loss": 1.7974, + "step": 18687500 + }, + { + "epoch": 92.59, + "learning_rate": 3.7231511620417853e-06, + "loss": 1.8059, + "step": 18688000 + }, + { + "epoch": 92.59, + "learning_rate": 3.7219125756157013e-06, + "loss": 1.8018, + "step": 18688500 + }, + { + "epoch": 92.59, + "learning_rate": 3.720673989189618e-06, + "loss": 1.7764, + "step": 18689000 + }, + { + "epoch": 92.59, + "learning_rate": 3.7194378799363862e-06, + "loss": 1.7763, + "step": 18689500 + }, + { + "epoch": 92.6, + "learning_rate": 3.7181992935103027e-06, + "loss": 1.7828, + "step": 18690000 + }, + { + "epoch": 92.6, + "learning_rate": 3.716960707084219e-06, + "loss": 1.8032, + "step": 18690500 + }, + { + "epoch": 92.6, + "learning_rate": 3.7157221206581353e-06, + "loss": 1.7766, + "step": 18691000 + }, + { + "epoch": 92.6, + "learning_rate": 3.7144835342320514e-06, + "loss": 1.8173, + "step": 18691500 + }, + { + "epoch": 92.61, + "learning_rate": 3.7132449478059683e-06, + "loss": 1.8019, + "step": 18692000 + }, + { + "epoch": 92.61, + "learning_rate": 3.712006361379885e-06, + "loss": 1.8074, + "step": 18692500 + }, + { + "epoch": 92.61, + "learning_rate": 3.710767774953801e-06, + "loss": 1.7749, + "step": 18693000 + }, + { + "epoch": 92.61, + "learning_rate": 3.7095291885277174e-06, + "loss": 1.788, + "step": 18693500 + }, + { + "epoch": 92.62, + "learning_rate": 3.7082906021016335e-06, + "loss": 1.8198, + "step": 18694000 + }, + { + "epoch": 92.62, + "learning_rate": 3.7070544928484023e-06, + "loss": 1.7973, + "step": 18694500 + }, + { + "epoch": 92.62, + "learning_rate": 3.7058159064223184e-06, + "loss": 1.8137, + "step": 18695000 + }, + { + "epoch": 92.62, + "learning_rate": 3.704577319996235e-06, + "loss": 1.8035, + "step": 18695500 + }, + { + "epoch": 92.63, + "learning_rate": 3.703338733570151e-06, + "loss": 1.8089, + "step": 18696000 + }, + { + "epoch": 92.63, + "learning_rate": 3.702100147144068e-06, + "loss": 1.7845, + "step": 18696500 + }, + { + "epoch": 92.63, + "learning_rate": 3.7008615607179836e-06, + "loss": 1.8107, + "step": 18697000 + }, + { + "epoch": 92.63, + "learning_rate": 3.6996229742919005e-06, + "loss": 1.7973, + "step": 18697500 + }, + { + "epoch": 92.64, + "learning_rate": 3.6983893422115208e-06, + "loss": 1.7956, + "step": 18698000 + }, + { + "epoch": 92.64, + "learning_rate": 3.6971507557854377e-06, + "loss": 1.7992, + "step": 18698500 + }, + { + "epoch": 92.64, + "learning_rate": 3.6959121693593538e-06, + "loss": 1.7948, + "step": 18699000 + }, + { + "epoch": 92.64, + "learning_rate": 3.6946760601061226e-06, + "loss": 1.7959, + "step": 18699500 + }, + { + "epoch": 92.65, + "learning_rate": 3.6934374736800387e-06, + "loss": 1.7861, + "step": 18700000 + }, + { + "epoch": 92.65, + "learning_rate": 3.692198887253955e-06, + "loss": 1.804, + "step": 18700500 + }, + { + "epoch": 92.65, + "learning_rate": 3.6909603008278712e-06, + "loss": 1.8015, + "step": 18701000 + }, + { + "epoch": 92.65, + "learning_rate": 3.6897217144017878e-06, + "loss": 1.7854, + "step": 18701500 + }, + { + "epoch": 92.66, + "learning_rate": 3.688485605148556e-06, + "loss": 1.7849, + "step": 18702000 + }, + { + "epoch": 92.66, + "learning_rate": 3.6872470187224726e-06, + "loss": 1.8001, + "step": 18702500 + }, + { + "epoch": 92.66, + "learning_rate": 3.6860084322963887e-06, + "loss": 1.7843, + "step": 18703000 + }, + { + "epoch": 92.66, + "learning_rate": 3.6847698458703057e-06, + "loss": 1.7801, + "step": 18703500 + }, + { + "epoch": 92.67, + "learning_rate": 3.6835312594442213e-06, + "loss": 1.7778, + "step": 18704000 + }, + { + "epoch": 92.67, + "learning_rate": 3.6822926730181382e-06, + "loss": 1.8161, + "step": 18704500 + }, + { + "epoch": 92.67, + "learning_rate": 3.681056563764906e-06, + "loss": 1.8009, + "step": 18705000 + }, + { + "epoch": 92.67, + "learning_rate": 3.679817977338823e-06, + "loss": 1.7838, + "step": 18705500 + }, + { + "epoch": 92.68, + "learning_rate": 3.678579390912739e-06, + "loss": 1.7842, + "step": 18706000 + }, + { + "epoch": 92.68, + "learning_rate": 3.6773408044866557e-06, + "loss": 1.801, + "step": 18706500 + }, + { + "epoch": 92.68, + "learning_rate": 3.676102218060572e-06, + "loss": 1.7955, + "step": 18707000 + }, + { + "epoch": 92.68, + "learning_rate": 3.6748636316344883e-06, + "loss": 1.795, + "step": 18707500 + }, + { + "epoch": 92.69, + "learning_rate": 3.6736250452084052e-06, + "loss": 1.7963, + "step": 18708000 + }, + { + "epoch": 92.69, + "learning_rate": 3.672386458782321e-06, + "loss": 1.8035, + "step": 18708500 + }, + { + "epoch": 92.69, + "learning_rate": 3.671147872356238e-06, + "loss": 1.7925, + "step": 18709000 + }, + { + "epoch": 92.69, + "learning_rate": 3.669909285930154e-06, + "loss": 1.8016, + "step": 18709500 + }, + { + "epoch": 92.7, + "learning_rate": 3.6686706995040704e-06, + "loss": 1.7748, + "step": 18710000 + }, + { + "epoch": 92.7, + "learning_rate": 3.6674321130779865e-06, + "loss": 1.7911, + "step": 18710500 + }, + { + "epoch": 92.7, + "learning_rate": 3.666193526651903e-06, + "loss": 1.803, + "step": 18711000 + }, + { + "epoch": 92.7, + "learning_rate": 3.6649574173986714e-06, + "loss": 1.795, + "step": 18711500 + }, + { + "epoch": 92.71, + "learning_rate": 3.663718830972588e-06, + "loss": 1.8069, + "step": 18712000 + }, + { + "epoch": 92.71, + "learning_rate": 3.6624827217193563e-06, + "loss": 1.7909, + "step": 18712500 + }, + { + "epoch": 92.71, + "learning_rate": 3.6612441352932728e-06, + "loss": 1.8073, + "step": 18713000 + }, + { + "epoch": 92.71, + "learning_rate": 3.660005548867189e-06, + "loss": 1.803, + "step": 18713500 + }, + { + "epoch": 92.72, + "learning_rate": 3.6587669624411054e-06, + "loss": 1.8134, + "step": 18714000 + }, + { + "epoch": 92.72, + "learning_rate": 3.6575283760150214e-06, + "loss": 1.7902, + "step": 18714500 + }, + { + "epoch": 92.72, + "learning_rate": 3.6562922667617907e-06, + "loss": 1.8082, + "step": 18715000 + }, + { + "epoch": 92.72, + "learning_rate": 3.6550561575085586e-06, + "loss": 1.8025, + "step": 18715500 + }, + { + "epoch": 92.73, + "learning_rate": 3.6538175710824756e-06, + "loss": 1.7943, + "step": 18716000 + }, + { + "epoch": 92.73, + "learning_rate": 3.6525814618292435e-06, + "loss": 1.7941, + "step": 18716500 + }, + { + "epoch": 92.73, + "learning_rate": 3.6513428754031604e-06, + "loss": 1.7861, + "step": 18717000 + }, + { + "epoch": 92.73, + "learning_rate": 3.6501042889770765e-06, + "loss": 1.7814, + "step": 18717500 + }, + { + "epoch": 92.74, + "learning_rate": 3.648865702550993e-06, + "loss": 1.7979, + "step": 18718000 + }, + { + "epoch": 92.74, + "learning_rate": 3.647627116124909e-06, + "loss": 1.7945, + "step": 18718500 + }, + { + "epoch": 92.74, + "learning_rate": 3.6463885296988256e-06, + "loss": 1.7903, + "step": 18719000 + }, + { + "epoch": 92.74, + "learning_rate": 3.6451499432727417e-06, + "loss": 1.794, + "step": 18719500 + }, + { + "epoch": 92.75, + "learning_rate": 3.643911356846658e-06, + "loss": 1.7847, + "step": 18720000 + }, + { + "epoch": 92.75, + "learning_rate": 3.6426727704205743e-06, + "loss": 1.7952, + "step": 18720500 + }, + { + "epoch": 92.75, + "learning_rate": 3.641434183994491e-06, + "loss": 1.7908, + "step": 18721000 + }, + { + "epoch": 92.75, + "learning_rate": 3.640195597568407e-06, + "loss": 1.7752, + "step": 18721500 + }, + { + "epoch": 92.76, + "learning_rate": 3.638957011142324e-06, + "loss": 1.7919, + "step": 18722000 + }, + { + "epoch": 92.76, + "learning_rate": 3.6377184247162403e-06, + "loss": 1.8039, + "step": 18722500 + }, + { + "epoch": 92.76, + "learning_rate": 3.6364798382901564e-06, + "loss": 1.8153, + "step": 18723000 + }, + { + "epoch": 92.76, + "learning_rate": 3.635241251864073e-06, + "loss": 1.802, + "step": 18723500 + }, + { + "epoch": 92.77, + "learning_rate": 3.634002665437989e-06, + "loss": 1.7867, + "step": 18724000 + }, + { + "epoch": 92.77, + "learning_rate": 3.6327640790119055e-06, + "loss": 1.8005, + "step": 18724500 + }, + { + "epoch": 92.77, + "learning_rate": 3.6315254925858216e-06, + "loss": 1.7985, + "step": 18725000 + }, + { + "epoch": 92.77, + "learning_rate": 3.6302869061597385e-06, + "loss": 1.7919, + "step": 18725500 + }, + { + "epoch": 92.78, + "learning_rate": 3.629048319733654e-06, + "loss": 1.7924, + "step": 18726000 + }, + { + "epoch": 92.78, + "learning_rate": 3.627809733307571e-06, + "loss": 1.7747, + "step": 18726500 + }, + { + "epoch": 92.78, + "learning_rate": 3.626573624054339e-06, + "loss": 1.8057, + "step": 18727000 + }, + { + "epoch": 92.78, + "learning_rate": 3.625335037628256e-06, + "loss": 1.8206, + "step": 18727500 + }, + { + "epoch": 92.78, + "learning_rate": 3.6241014055478762e-06, + "loss": 1.7886, + "step": 18728000 + }, + { + "epoch": 92.79, + "learning_rate": 3.622862819121793e-06, + "loss": 1.8036, + "step": 18728500 + }, + { + "epoch": 92.79, + "learning_rate": 3.6216242326957092e-06, + "loss": 1.7855, + "step": 18729000 + }, + { + "epoch": 92.79, + "learning_rate": 3.6203856462696257e-06, + "loss": 1.8131, + "step": 18729500 + }, + { + "epoch": 92.79, + "learning_rate": 3.619147059843542e-06, + "loss": 1.7968, + "step": 18730000 + }, + { + "epoch": 92.8, + "learning_rate": 3.6179084734174583e-06, + "loss": 1.8049, + "step": 18730500 + }, + { + "epoch": 92.8, + "learning_rate": 3.6166723641642267e-06, + "loss": 1.8029, + "step": 18731000 + }, + { + "epoch": 92.8, + "learning_rate": 3.6154337777381432e-06, + "loss": 1.8157, + "step": 18731500 + }, + { + "epoch": 92.8, + "learning_rate": 3.6141976684849116e-06, + "loss": 1.8092, + "step": 18732000 + }, + { + "epoch": 92.81, + "learning_rate": 3.612959082058828e-06, + "loss": 1.7927, + "step": 18732500 + }, + { + "epoch": 92.81, + "learning_rate": 3.611720495632744e-06, + "loss": 1.8063, + "step": 18733000 + }, + { + "epoch": 92.81, + "learning_rate": 3.610484386379513e-06, + "loss": 1.8148, + "step": 18733500 + }, + { + "epoch": 92.81, + "learning_rate": 3.609245799953429e-06, + "loss": 1.8215, + "step": 18734000 + }, + { + "epoch": 92.82, + "learning_rate": 3.6080096907001983e-06, + "loss": 1.8029, + "step": 18734500 + }, + { + "epoch": 92.82, + "learning_rate": 3.606771104274114e-06, + "loss": 1.7792, + "step": 18735000 + }, + { + "epoch": 92.82, + "learning_rate": 3.605532517848031e-06, + "loss": 1.794, + "step": 18735500 + }, + { + "epoch": 92.82, + "learning_rate": 3.6042939314219466e-06, + "loss": 1.8045, + "step": 18736000 + }, + { + "epoch": 92.83, + "learning_rate": 3.6030553449958635e-06, + "loss": 1.7848, + "step": 18736500 + }, + { + "epoch": 92.83, + "learning_rate": 3.6018167585697796e-06, + "loss": 1.769, + "step": 18737000 + }, + { + "epoch": 92.83, + "learning_rate": 3.600578172143696e-06, + "loss": 1.7874, + "step": 18737500 + }, + { + "epoch": 92.83, + "learning_rate": 3.599339585717612e-06, + "loss": 1.7835, + "step": 18738000 + }, + { + "epoch": 92.84, + "learning_rate": 3.5981009992915287e-06, + "loss": 1.7826, + "step": 18738500 + }, + { + "epoch": 92.84, + "learning_rate": 3.5968624128654447e-06, + "loss": 1.7916, + "step": 18739000 + }, + { + "epoch": 92.84, + "learning_rate": 3.5956238264393612e-06, + "loss": 1.8233, + "step": 18739500 + }, + { + "epoch": 92.84, + "learning_rate": 3.594385240013278e-06, + "loss": 1.8259, + "step": 18740000 + }, + { + "epoch": 92.85, + "learning_rate": 3.5931466535871943e-06, + "loss": 1.7696, + "step": 18740500 + }, + { + "epoch": 92.85, + "learning_rate": 3.5919080671611108e-06, + "loss": 1.7935, + "step": 18741000 + }, + { + "epoch": 92.85, + "learning_rate": 3.590669480735027e-06, + "loss": 1.8018, + "step": 18741500 + }, + { + "epoch": 92.85, + "learning_rate": 3.5894308943089433e-06, + "loss": 1.7933, + "step": 18742000 + }, + { + "epoch": 92.86, + "learning_rate": 3.5881923078828594e-06, + "loss": 1.7779, + "step": 18742500 + }, + { + "epoch": 92.86, + "learning_rate": 3.586953721456776e-06, + "loss": 1.7873, + "step": 18743000 + }, + { + "epoch": 92.86, + "learning_rate": 3.585715135030692e-06, + "loss": 1.7958, + "step": 18743500 + }, + { + "epoch": 92.86, + "learning_rate": 3.584476548604609e-06, + "loss": 1.8019, + "step": 18744000 + }, + { + "epoch": 92.87, + "learning_rate": 3.583240439351377e-06, + "loss": 1.8079, + "step": 18744500 + }, + { + "epoch": 92.87, + "learning_rate": 3.582004330098146e-06, + "loss": 1.8157, + "step": 18745000 + }, + { + "epoch": 92.87, + "learning_rate": 3.580765743672062e-06, + "loss": 1.8049, + "step": 18745500 + }, + { + "epoch": 92.87, + "learning_rate": 3.5795271572459787e-06, + "loss": 1.8048, + "step": 18746000 + }, + { + "epoch": 92.88, + "learning_rate": 3.578288570819895e-06, + "loss": 1.8246, + "step": 18746500 + }, + { + "epoch": 92.88, + "learning_rate": 3.5770499843938113e-06, + "loss": 1.7996, + "step": 18747000 + }, + { + "epoch": 92.88, + "learning_rate": 3.5758113979677274e-06, + "loss": 1.8273, + "step": 18747500 + }, + { + "epoch": 92.88, + "learning_rate": 3.574572811541644e-06, + "loss": 1.799, + "step": 18748000 + }, + { + "epoch": 92.89, + "learning_rate": 3.573334225115561e-06, + "loss": 1.7968, + "step": 18748500 + }, + { + "epoch": 92.89, + "learning_rate": 3.5720981158623288e-06, + "loss": 1.8109, + "step": 18749000 + }, + { + "epoch": 92.89, + "learning_rate": 3.570859529436245e-06, + "loss": 1.7984, + "step": 18749500 + }, + { + "epoch": 92.89, + "learning_rate": 3.5696234201830137e-06, + "loss": 1.7914, + "step": 18750000 + }, + { + "epoch": 92.9, + "learning_rate": 3.5683848337569298e-06, + "loss": 1.7895, + "step": 18750500 + }, + { + "epoch": 92.9, + "learning_rate": 3.5671462473308463e-06, + "loss": 1.7943, + "step": 18751000 + }, + { + "epoch": 92.9, + "learning_rate": 3.5659076609047623e-06, + "loss": 1.7716, + "step": 18751500 + }, + { + "epoch": 92.9, + "learning_rate": 3.5646690744786793e-06, + "loss": 1.7891, + "step": 18752000 + }, + { + "epoch": 92.91, + "learning_rate": 3.5634304880525958e-06, + "loss": 1.8066, + "step": 18752500 + }, + { + "epoch": 92.91, + "learning_rate": 3.562191901626512e-06, + "loss": 1.8156, + "step": 18753000 + }, + { + "epoch": 92.91, + "learning_rate": 3.5609533152004284e-06, + "loss": 1.8296, + "step": 18753500 + }, + { + "epoch": 92.91, + "learning_rate": 3.5597147287743444e-06, + "loss": 1.7795, + "step": 18754000 + }, + { + "epoch": 92.92, + "learning_rate": 3.5584786195211132e-06, + "loss": 1.7846, + "step": 18754500 + }, + { + "epoch": 92.92, + "learning_rate": 3.5572425102678816e-06, + "loss": 1.8085, + "step": 18755000 + }, + { + "epoch": 92.92, + "learning_rate": 3.556003923841798e-06, + "loss": 1.8018, + "step": 18755500 + }, + { + "epoch": 92.92, + "learning_rate": 3.5547678145885665e-06, + "loss": 1.7998, + "step": 18756000 + }, + { + "epoch": 92.93, + "learning_rate": 3.553529228162483e-06, + "loss": 1.7875, + "step": 18756500 + }, + { + "epoch": 92.93, + "learning_rate": 3.552290641736399e-06, + "loss": 1.795, + "step": 18757000 + }, + { + "epoch": 92.93, + "learning_rate": 3.551052055310316e-06, + "loss": 1.8066, + "step": 18757500 + }, + { + "epoch": 92.93, + "learning_rate": 3.5498134688842317e-06, + "loss": 1.8114, + "step": 18758000 + }, + { + "epoch": 92.94, + "learning_rate": 3.5485748824581486e-06, + "loss": 1.7943, + "step": 18758500 + }, + { + "epoch": 92.94, + "learning_rate": 3.5473362960320647e-06, + "loss": 1.7935, + "step": 18759000 + }, + { + "epoch": 92.94, + "learning_rate": 3.546097709605981e-06, + "loss": 1.8119, + "step": 18759500 + }, + { + "epoch": 92.94, + "learning_rate": 3.5448591231798973e-06, + "loss": 1.7947, + "step": 18760000 + }, + { + "epoch": 92.95, + "learning_rate": 3.543623013926666e-06, + "loss": 1.7872, + "step": 18760500 + }, + { + "epoch": 92.95, + "learning_rate": 3.5423869046734345e-06, + "loss": 1.8065, + "step": 18761000 + }, + { + "epoch": 92.95, + "learning_rate": 3.541148318247351e-06, + "loss": 1.796, + "step": 18761500 + }, + { + "epoch": 92.95, + "learning_rate": 3.539909731821267e-06, + "loss": 1.7993, + "step": 18762000 + }, + { + "epoch": 92.96, + "learning_rate": 3.5386711453951836e-06, + "loss": 1.801, + "step": 18762500 + }, + { + "epoch": 92.96, + "learning_rate": 3.537435036141952e-06, + "loss": 1.8112, + "step": 18763000 + }, + { + "epoch": 92.96, + "learning_rate": 3.5361964497158685e-06, + "loss": 1.7721, + "step": 18763500 + }, + { + "epoch": 92.96, + "learning_rate": 3.5349578632897846e-06, + "loss": 1.7738, + "step": 18764000 + }, + { + "epoch": 92.97, + "learning_rate": 3.5337192768637015e-06, + "loss": 1.8087, + "step": 18764500 + }, + { + "epoch": 92.97, + "learning_rate": 3.532480690437617e-06, + "loss": 1.8099, + "step": 18765000 + }, + { + "epoch": 92.97, + "learning_rate": 3.531242104011534e-06, + "loss": 1.7839, + "step": 18765500 + }, + { + "epoch": 92.97, + "learning_rate": 3.53000351758545e-06, + "loss": 1.792, + "step": 18766000 + }, + { + "epoch": 92.98, + "learning_rate": 3.5287649311593666e-06, + "loss": 1.765, + "step": 18766500 + }, + { + "epoch": 92.98, + "learning_rate": 3.5275263447332827e-06, + "loss": 1.806, + "step": 18767000 + }, + { + "epoch": 92.98, + "learning_rate": 3.5262877583071992e-06, + "loss": 1.806, + "step": 18767500 + }, + { + "epoch": 92.98, + "learning_rate": 3.525049171881116e-06, + "loss": 1.7692, + "step": 18768000 + }, + { + "epoch": 92.99, + "learning_rate": 3.523810585455032e-06, + "loss": 1.8051, + "step": 18768500 + }, + { + "epoch": 92.99, + "learning_rate": 3.5225719990289487e-06, + "loss": 1.7909, + "step": 18769000 + }, + { + "epoch": 92.99, + "learning_rate": 3.521333412602865e-06, + "loss": 1.7806, + "step": 18769500 + }, + { + "epoch": 92.99, + "learning_rate": 3.5200948261767813e-06, + "loss": 1.7932, + "step": 18770000 + }, + { + "epoch": 93.0, + "learning_rate": 3.5188562397506974e-06, + "loss": 1.7938, + "step": 18770500 + }, + { + "epoch": 93.0, + "learning_rate": 3.5176201304974662e-06, + "loss": 1.7922, + "step": 18771000 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.6856451325370596, + "eval_accuracy_mlm": 0.6470140033120039, + "eval_accuracy_nsp": 0.8679983840539067, + "eval_loss": 2.300736904144287, + "eval_runtime": 146.8864, + "eval_samples_per_second": 1735.756, + "eval_steps_per_second": 72.328, + "step": 18771399 + }, + { + "epoch": 93.0, + "learning_rate": 3.5163815440713823e-06, + "loss": 1.7909, + "step": 18771500 + }, + { + "epoch": 93.0, + "learning_rate": 3.515142957645299e-06, + "loss": 1.7971, + "step": 18772000 + }, + { + "epoch": 93.01, + "learning_rate": 3.513906848392067e-06, + "loss": 1.8084, + "step": 18772500 + }, + { + "epoch": 93.01, + "learning_rate": 3.5126682619659837e-06, + "loss": 1.7777, + "step": 18773000 + }, + { + "epoch": 93.01, + "learning_rate": 3.5114296755398998e-06, + "loss": 1.808, + "step": 18773500 + }, + { + "epoch": 93.01, + "learning_rate": 3.5101910891138167e-06, + "loss": 1.7864, + "step": 18774000 + }, + { + "epoch": 93.02, + "learning_rate": 3.5089525026877324e-06, + "loss": 1.8119, + "step": 18774500 + }, + { + "epoch": 93.02, + "learning_rate": 3.5077139162616493e-06, + "loss": 1.7687, + "step": 18775000 + }, + { + "epoch": 93.02, + "learning_rate": 3.506475329835565e-06, + "loss": 1.8017, + "step": 18775500 + }, + { + "epoch": 93.02, + "learning_rate": 3.505236743409482e-06, + "loss": 1.805, + "step": 18776000 + }, + { + "epoch": 93.03, + "learning_rate": 3.503998156983398e-06, + "loss": 1.8142, + "step": 18776500 + }, + { + "epoch": 93.03, + "learning_rate": 3.5027595705573145e-06, + "loss": 1.7926, + "step": 18777000 + }, + { + "epoch": 93.03, + "learning_rate": 3.501525938476935e-06, + "loss": 1.787, + "step": 18777500 + }, + { + "epoch": 93.03, + "learning_rate": 3.5002873520508517e-06, + "loss": 1.8028, + "step": 18778000 + }, + { + "epoch": 93.04, + "learning_rate": 3.4990487656247677e-06, + "loss": 1.7826, + "step": 18778500 + }, + { + "epoch": 93.04, + "learning_rate": 3.4978101791986842e-06, + "loss": 1.7904, + "step": 18779000 + }, + { + "epoch": 93.04, + "learning_rate": 3.4965715927726003e-06, + "loss": 1.8014, + "step": 18779500 + }, + { + "epoch": 93.04, + "learning_rate": 3.495333006346517e-06, + "loss": 1.7931, + "step": 18780000 + }, + { + "epoch": 93.05, + "learning_rate": 3.4940944199204338e-06, + "loss": 1.8065, + "step": 18780500 + }, + { + "epoch": 93.05, + "learning_rate": 3.4928583106672017e-06, + "loss": 1.7975, + "step": 18781000 + }, + { + "epoch": 93.05, + "learning_rate": 3.4916197242411187e-06, + "loss": 1.7969, + "step": 18781500 + }, + { + "epoch": 93.05, + "learning_rate": 3.4903811378150347e-06, + "loss": 1.8054, + "step": 18782000 + }, + { + "epoch": 93.05, + "learning_rate": 3.4891425513889512e-06, + "loss": 1.8042, + "step": 18782500 + }, + { + "epoch": 93.06, + "learning_rate": 3.4879064421357196e-06, + "loss": 1.8058, + "step": 18783000 + }, + { + "epoch": 93.06, + "learning_rate": 3.486667855709636e-06, + "loss": 1.8059, + "step": 18783500 + }, + { + "epoch": 93.06, + "learning_rate": 3.485429269283552e-06, + "loss": 1.7913, + "step": 18784000 + }, + { + "epoch": 93.06, + "learning_rate": 3.4841906828574687e-06, + "loss": 1.8087, + "step": 18784500 + }, + { + "epoch": 93.07, + "learning_rate": 3.482952096431385e-06, + "loss": 1.7985, + "step": 18785000 + }, + { + "epoch": 93.07, + "learning_rate": 3.4817135100053017e-06, + "loss": 1.785, + "step": 18785500 + }, + { + "epoch": 93.07, + "learning_rate": 3.4804749235792174e-06, + "loss": 1.7971, + "step": 18786000 + }, + { + "epoch": 93.07, + "learning_rate": 3.4792363371531343e-06, + "loss": 1.799, + "step": 18786500 + }, + { + "epoch": 93.08, + "learning_rate": 3.47799775072705e-06, + "loss": 1.7865, + "step": 18787000 + }, + { + "epoch": 93.08, + "learning_rate": 3.476759164300967e-06, + "loss": 1.7974, + "step": 18787500 + }, + { + "epoch": 93.08, + "learning_rate": 3.4755230550477353e-06, + "loss": 1.7878, + "step": 18788000 + }, + { + "epoch": 93.08, + "learning_rate": 3.4742844686216518e-06, + "loss": 1.8141, + "step": 18788500 + }, + { + "epoch": 93.09, + "learning_rate": 3.473045882195568e-06, + "loss": 1.7915, + "step": 18789000 + }, + { + "epoch": 93.09, + "learning_rate": 3.4718072957694844e-06, + "loss": 1.7877, + "step": 18789500 + }, + { + "epoch": 93.09, + "learning_rate": 3.4705711865162528e-06, + "loss": 1.8053, + "step": 18790000 + }, + { + "epoch": 93.09, + "learning_rate": 3.4693326000901693e-06, + "loss": 1.7782, + "step": 18790500 + }, + { + "epoch": 93.1, + "learning_rate": 3.4680940136640853e-06, + "loss": 1.8005, + "step": 18791000 + }, + { + "epoch": 93.1, + "learning_rate": 3.466855427238002e-06, + "loss": 1.8045, + "step": 18791500 + }, + { + "epoch": 93.1, + "learning_rate": 3.4656193179847702e-06, + "loss": 1.7819, + "step": 18792000 + }, + { + "epoch": 93.1, + "learning_rate": 3.4643807315586867e-06, + "loss": 1.7948, + "step": 18792500 + }, + { + "epoch": 93.11, + "learning_rate": 3.463142145132603e-06, + "loss": 1.7962, + "step": 18793000 + }, + { + "epoch": 93.11, + "learning_rate": 3.4619035587065197e-06, + "loss": 1.8065, + "step": 18793500 + }, + { + "epoch": 93.11, + "learning_rate": 3.4606649722804363e-06, + "loss": 1.7931, + "step": 18794000 + }, + { + "epoch": 93.11, + "learning_rate": 3.4594288630272046e-06, + "loss": 1.7973, + "step": 18794500 + }, + { + "epoch": 93.12, + "learning_rate": 3.4581902766011207e-06, + "loss": 1.8238, + "step": 18795000 + }, + { + "epoch": 93.12, + "learning_rate": 3.4569516901750372e-06, + "loss": 1.7787, + "step": 18795500 + }, + { + "epoch": 93.12, + "learning_rate": 3.4557155809218056e-06, + "loss": 1.7894, + "step": 18796000 + }, + { + "epoch": 93.12, + "learning_rate": 3.454476994495722e-06, + "loss": 1.7675, + "step": 18796500 + }, + { + "epoch": 93.13, + "learning_rate": 3.453238408069638e-06, + "loss": 1.7817, + "step": 18797000 + }, + { + "epoch": 93.13, + "learning_rate": 3.4519998216435547e-06, + "loss": 1.7877, + "step": 18797500 + }, + { + "epoch": 93.13, + "learning_rate": 3.4507612352174716e-06, + "loss": 1.8045, + "step": 18798000 + }, + { + "epoch": 93.13, + "learning_rate": 3.4495226487913873e-06, + "loss": 1.8174, + "step": 18798500 + }, + { + "epoch": 93.14, + "learning_rate": 3.4482840623653042e-06, + "loss": 1.788, + "step": 18799000 + }, + { + "epoch": 93.14, + "learning_rate": 3.4470454759392203e-06, + "loss": 1.781, + "step": 18799500 + }, + { + "epoch": 93.14, + "learning_rate": 3.445806889513137e-06, + "loss": 1.7755, + "step": 18800000 + }, + { + "epoch": 93.14, + "learning_rate": 3.444568303087053e-06, + "loss": 1.7809, + "step": 18800500 + }, + { + "epoch": 93.15, + "learning_rate": 3.4433297166609694e-06, + "loss": 1.7728, + "step": 18801000 + }, + { + "epoch": 93.15, + "learning_rate": 3.4420911302348855e-06, + "loss": 1.7942, + "step": 18801500 + }, + { + "epoch": 93.15, + "learning_rate": 3.440852543808802e-06, + "loss": 1.808, + "step": 18802000 + }, + { + "epoch": 93.15, + "learning_rate": 3.439613957382718e-06, + "loss": 1.8147, + "step": 18802500 + }, + { + "epoch": 93.16, + "learning_rate": 3.438377848129487e-06, + "loss": 1.79, + "step": 18803000 + }, + { + "epoch": 93.16, + "learning_rate": 3.437139261703403e-06, + "loss": 1.7756, + "step": 18803500 + }, + { + "epoch": 93.16, + "learning_rate": 3.43590067527732e-06, + "loss": 1.7875, + "step": 18804000 + }, + { + "epoch": 93.16, + "learning_rate": 3.4346620888512355e-06, + "loss": 1.7897, + "step": 18804500 + }, + { + "epoch": 93.17, + "learning_rate": 3.4334235024251525e-06, + "loss": 1.7788, + "step": 18805000 + }, + { + "epoch": 93.17, + "learning_rate": 3.432184915999069e-06, + "loss": 1.7844, + "step": 18805500 + }, + { + "epoch": 93.17, + "learning_rate": 3.430946329572985e-06, + "loss": 1.806, + "step": 18806000 + }, + { + "epoch": 93.17, + "learning_rate": 3.4297077431469015e-06, + "loss": 1.8158, + "step": 18806500 + }, + { + "epoch": 93.18, + "learning_rate": 3.42847163389367e-06, + "loss": 1.776, + "step": 18807000 + }, + { + "epoch": 93.18, + "learning_rate": 3.4272330474675864e-06, + "loss": 1.8004, + "step": 18807500 + }, + { + "epoch": 93.18, + "learning_rate": 3.4259944610415025e-06, + "loss": 1.8022, + "step": 18808000 + }, + { + "epoch": 93.18, + "learning_rate": 3.4247558746154194e-06, + "loss": 1.769, + "step": 18808500 + }, + { + "epoch": 93.19, + "learning_rate": 3.4235197653621874e-06, + "loss": 1.7843, + "step": 18809000 + }, + { + "epoch": 93.19, + "learning_rate": 3.422283656108956e-06, + "loss": 1.7783, + "step": 18809500 + }, + { + "epoch": 93.19, + "learning_rate": 3.4210450696828723e-06, + "loss": 1.7873, + "step": 18810000 + }, + { + "epoch": 93.19, + "learning_rate": 3.4198064832567892e-06, + "loss": 1.8051, + "step": 18810500 + }, + { + "epoch": 93.2, + "learning_rate": 3.4185678968307053e-06, + "loss": 1.7925, + "step": 18811000 + }, + { + "epoch": 93.2, + "learning_rate": 3.417329310404622e-06, + "loss": 1.7948, + "step": 18811500 + }, + { + "epoch": 93.2, + "learning_rate": 3.416090723978538e-06, + "loss": 1.7905, + "step": 18812000 + }, + { + "epoch": 93.2, + "learning_rate": 3.4148521375524544e-06, + "loss": 1.7954, + "step": 18812500 + }, + { + "epoch": 93.21, + "learning_rate": 3.4136135511263705e-06, + "loss": 1.7696, + "step": 18813000 + }, + { + "epoch": 93.21, + "learning_rate": 3.412374964700287e-06, + "loss": 1.8064, + "step": 18813500 + }, + { + "epoch": 93.21, + "learning_rate": 3.4111388554470554e-06, + "loss": 1.7944, + "step": 18814000 + }, + { + "epoch": 93.21, + "learning_rate": 3.409900269020972e-06, + "loss": 1.7974, + "step": 18814500 + }, + { + "epoch": 93.22, + "learning_rate": 3.408661682594888e-06, + "loss": 1.811, + "step": 18815000 + }, + { + "epoch": 93.22, + "learning_rate": 3.407423096168805e-06, + "loss": 1.7815, + "step": 18815500 + }, + { + "epoch": 93.22, + "learning_rate": 3.4061845097427205e-06, + "loss": 1.7933, + "step": 18816000 + }, + { + "epoch": 93.22, + "learning_rate": 3.4049459233166375e-06, + "loss": 1.7976, + "step": 18816500 + }, + { + "epoch": 93.23, + "learning_rate": 3.4037073368905536e-06, + "loss": 1.7908, + "step": 18817000 + }, + { + "epoch": 93.23, + "learning_rate": 3.40246875046447e-06, + "loss": 1.7914, + "step": 18817500 + }, + { + "epoch": 93.23, + "learning_rate": 3.4012301640383866e-06, + "loss": 1.7888, + "step": 18818000 + }, + { + "epoch": 93.23, + "learning_rate": 3.3999915776123026e-06, + "loss": 1.7645, + "step": 18818500 + }, + { + "epoch": 93.24, + "learning_rate": 3.3987554683590715e-06, + "loss": 1.7788, + "step": 18819000 + }, + { + "epoch": 93.24, + "learning_rate": 3.3975168819329875e-06, + "loss": 1.7963, + "step": 18819500 + }, + { + "epoch": 93.24, + "learning_rate": 3.396280772679756e-06, + "loss": 1.7745, + "step": 18820000 + }, + { + "epoch": 93.24, + "learning_rate": 3.3950446634265247e-06, + "loss": 1.7872, + "step": 18820500 + }, + { + "epoch": 93.25, + "learning_rate": 3.393806077000441e-06, + "loss": 1.7889, + "step": 18821000 + }, + { + "epoch": 93.25, + "learning_rate": 3.3925674905743573e-06, + "loss": 1.7835, + "step": 18821500 + }, + { + "epoch": 93.25, + "learning_rate": 3.3913289041482734e-06, + "loss": 1.7679, + "step": 18822000 + }, + { + "epoch": 93.25, + "learning_rate": 3.3900903177221903e-06, + "loss": 1.8077, + "step": 18822500 + }, + { + "epoch": 93.26, + "learning_rate": 3.3888542084689583e-06, + "loss": 1.7918, + "step": 18823000 + }, + { + "epoch": 93.26, + "learning_rate": 3.3876156220428752e-06, + "loss": 1.8009, + "step": 18823500 + }, + { + "epoch": 93.26, + "learning_rate": 3.386379512789643e-06, + "loss": 1.7837, + "step": 18824000 + }, + { + "epoch": 93.26, + "learning_rate": 3.38514092636356e-06, + "loss": 1.8044, + "step": 18824500 + }, + { + "epoch": 93.27, + "learning_rate": 3.383902339937476e-06, + "loss": 1.7845, + "step": 18825000 + }, + { + "epoch": 93.27, + "learning_rate": 3.3826637535113927e-06, + "loss": 1.7732, + "step": 18825500 + }, + { + "epoch": 93.27, + "learning_rate": 3.381425167085309e-06, + "loss": 1.7758, + "step": 18826000 + }, + { + "epoch": 93.27, + "learning_rate": 3.3801865806592253e-06, + "loss": 1.7686, + "step": 18826500 + }, + { + "epoch": 93.28, + "learning_rate": 3.378947994233142e-06, + "loss": 1.7982, + "step": 18827000 + }, + { + "epoch": 93.28, + "learning_rate": 3.377709407807058e-06, + "loss": 1.8107, + "step": 18827500 + }, + { + "epoch": 93.28, + "learning_rate": 3.376470821380975e-06, + "loss": 1.7992, + "step": 18828000 + }, + { + "epoch": 93.28, + "learning_rate": 3.3752322349548904e-06, + "loss": 1.8088, + "step": 18828500 + }, + { + "epoch": 93.29, + "learning_rate": 3.3739936485288074e-06, + "loss": 1.795, + "step": 18829000 + }, + { + "epoch": 93.29, + "learning_rate": 3.3727550621027235e-06, + "loss": 1.7879, + "step": 18829500 + }, + { + "epoch": 93.29, + "learning_rate": 3.37151647567664e-06, + "loss": 1.7848, + "step": 18830000 + }, + { + "epoch": 93.29, + "learning_rate": 3.370277889250556e-06, + "loss": 1.7911, + "step": 18830500 + }, + { + "epoch": 93.3, + "learning_rate": 3.369041779997325e-06, + "loss": 1.7904, + "step": 18831000 + }, + { + "epoch": 93.3, + "learning_rate": 3.367803193571241e-06, + "loss": 1.7744, + "step": 18831500 + }, + { + "epoch": 93.3, + "learning_rate": 3.3665670843180097e-06, + "loss": 1.7686, + "step": 18832000 + }, + { + "epoch": 93.3, + "learning_rate": 3.365328497891926e-06, + "loss": 1.7784, + "step": 18832500 + }, + { + "epoch": 93.31, + "learning_rate": 3.3640899114658423e-06, + "loss": 1.7707, + "step": 18833000 + }, + { + "epoch": 93.31, + "learning_rate": 3.3628513250397584e-06, + "loss": 1.7967, + "step": 18833500 + }, + { + "epoch": 93.31, + "learning_rate": 3.3616127386136753e-06, + "loss": 1.8034, + "step": 18834000 + }, + { + "epoch": 93.31, + "learning_rate": 3.360374152187591e-06, + "loss": 1.8126, + "step": 18834500 + }, + { + "epoch": 93.32, + "learning_rate": 3.359135565761508e-06, + "loss": 1.783, + "step": 18835000 + }, + { + "epoch": 93.32, + "learning_rate": 3.3578969793354244e-06, + "loss": 1.8016, + "step": 18835500 + }, + { + "epoch": 93.32, + "learning_rate": 3.3566583929093405e-06, + "loss": 1.7832, + "step": 18836000 + }, + { + "epoch": 93.32, + "learning_rate": 3.3554222836561093e-06, + "loss": 1.7859, + "step": 18836500 + }, + { + "epoch": 93.33, + "learning_rate": 3.3541836972300254e-06, + "loss": 1.7931, + "step": 18837000 + }, + { + "epoch": 93.33, + "learning_rate": 3.352945110803942e-06, + "loss": 1.7688, + "step": 18837500 + }, + { + "epoch": 93.33, + "learning_rate": 3.351706524377858e-06, + "loss": 1.7864, + "step": 18838000 + }, + { + "epoch": 93.33, + "learning_rate": 3.350467937951775e-06, + "loss": 1.8019, + "step": 18838500 + }, + { + "epoch": 93.33, + "learning_rate": 3.3492293515256906e-06, + "loss": 1.8103, + "step": 18839000 + }, + { + "epoch": 93.34, + "learning_rate": 3.34799324227246e-06, + "loss": 1.7715, + "step": 18839500 + }, + { + "epoch": 93.34, + "learning_rate": 3.346754655846376e-06, + "loss": 1.7754, + "step": 18840000 + }, + { + "epoch": 93.34, + "learning_rate": 3.3455160694202924e-06, + "loss": 1.7888, + "step": 18840500 + }, + { + "epoch": 93.34, + "learning_rate": 3.3442774829942085e-06, + "loss": 1.8043, + "step": 18841000 + }, + { + "epoch": 93.35, + "learning_rate": 3.343038896568125e-06, + "loss": 1.8024, + "step": 18841500 + }, + { + "epoch": 93.35, + "learning_rate": 3.341800310142041e-06, + "loss": 1.7954, + "step": 18842000 + }, + { + "epoch": 93.35, + "learning_rate": 3.3405617237159576e-06, + "loss": 1.812, + "step": 18842500 + }, + { + "epoch": 93.35, + "learning_rate": 3.3393231372898736e-06, + "loss": 1.7865, + "step": 18843000 + }, + { + "epoch": 93.36, + "learning_rate": 3.3380895052094948e-06, + "loss": 1.7886, + "step": 18843500 + }, + { + "epoch": 93.36, + "learning_rate": 3.336853395956263e-06, + "loss": 1.7683, + "step": 18844000 + }, + { + "epoch": 93.36, + "learning_rate": 3.3356148095301796e-06, + "loss": 1.8, + "step": 18844500 + }, + { + "epoch": 93.36, + "learning_rate": 3.3343762231040957e-06, + "loss": 1.7887, + "step": 18845000 + }, + { + "epoch": 93.37, + "learning_rate": 3.3331376366780127e-06, + "loss": 1.7836, + "step": 18845500 + }, + { + "epoch": 93.37, + "learning_rate": 3.3318990502519283e-06, + "loss": 1.7939, + "step": 18846000 + }, + { + "epoch": 93.37, + "learning_rate": 3.3306604638258452e-06, + "loss": 1.8068, + "step": 18846500 + }, + { + "epoch": 93.37, + "learning_rate": 3.329421877399761e-06, + "loss": 1.7737, + "step": 18847000 + }, + { + "epoch": 93.38, + "learning_rate": 3.32818576814653e-06, + "loss": 1.7752, + "step": 18847500 + }, + { + "epoch": 93.38, + "learning_rate": 3.3269471817204462e-06, + "loss": 1.7801, + "step": 18848000 + }, + { + "epoch": 93.38, + "learning_rate": 3.3257085952943627e-06, + "loss": 1.7871, + "step": 18848500 + }, + { + "epoch": 93.38, + "learning_rate": 3.324470008868279e-06, + "loss": 1.7793, + "step": 18849000 + }, + { + "epoch": 93.39, + "learning_rate": 3.3232314224421953e-06, + "loss": 1.7739, + "step": 18849500 + }, + { + "epoch": 93.39, + "learning_rate": 3.3219953131889637e-06, + "loss": 1.844, + "step": 18850000 + }, + { + "epoch": 93.39, + "learning_rate": 3.32075672676288e-06, + "loss": 1.7834, + "step": 18850500 + }, + { + "epoch": 93.39, + "learning_rate": 3.3195181403367963e-06, + "loss": 1.782, + "step": 18851000 + }, + { + "epoch": 93.4, + "learning_rate": 3.3182795539107128e-06, + "loss": 1.7855, + "step": 18851500 + }, + { + "epoch": 93.4, + "learning_rate": 3.3170409674846297e-06, + "loss": 1.7836, + "step": 18852000 + }, + { + "epoch": 93.4, + "learning_rate": 3.315802381058546e-06, + "loss": 1.8044, + "step": 18852500 + }, + { + "epoch": 93.4, + "learning_rate": 3.3145637946324623e-06, + "loss": 1.8091, + "step": 18853000 + }, + { + "epoch": 93.41, + "learning_rate": 3.3133252082063784e-06, + "loss": 1.7835, + "step": 18853500 + }, + { + "epoch": 93.41, + "learning_rate": 3.312086621780295e-06, + "loss": 1.7842, + "step": 18854000 + }, + { + "epoch": 93.41, + "learning_rate": 3.310848035354211e-06, + "loss": 1.8017, + "step": 18854500 + }, + { + "epoch": 93.41, + "learning_rate": 3.3096119261009798e-06, + "loss": 1.8002, + "step": 18855000 + }, + { + "epoch": 93.42, + "learning_rate": 3.308373339674896e-06, + "loss": 1.7813, + "step": 18855500 + }, + { + "epoch": 93.42, + "learning_rate": 3.3071347532488124e-06, + "loss": 1.8115, + "step": 18856000 + }, + { + "epoch": 93.42, + "learning_rate": 3.3058961668227284e-06, + "loss": 1.8006, + "step": 18856500 + }, + { + "epoch": 93.42, + "learning_rate": 3.3046575803966454e-06, + "loss": 1.8026, + "step": 18857000 + }, + { + "epoch": 93.43, + "learning_rate": 3.3034214711434133e-06, + "loss": 1.8005, + "step": 18857500 + }, + { + "epoch": 93.43, + "learning_rate": 3.3021828847173303e-06, + "loss": 1.8104, + "step": 18858000 + }, + { + "epoch": 93.43, + "learning_rate": 3.3009467754640982e-06, + "loss": 1.7934, + "step": 18858500 + }, + { + "epoch": 93.43, + "learning_rate": 3.299708189038015e-06, + "loss": 1.8208, + "step": 18859000 + }, + { + "epoch": 93.44, + "learning_rate": 3.2984696026119312e-06, + "loss": 1.8072, + "step": 18859500 + }, + { + "epoch": 93.44, + "learning_rate": 3.2972310161858477e-06, + "loss": 1.7895, + "step": 18860000 + }, + { + "epoch": 93.44, + "learning_rate": 3.295992429759764e-06, + "loss": 1.7906, + "step": 18860500 + }, + { + "epoch": 93.44, + "learning_rate": 3.2947538433336803e-06, + "loss": 1.7928, + "step": 18861000 + }, + { + "epoch": 93.45, + "learning_rate": 3.2935152569075964e-06, + "loss": 1.7974, + "step": 18861500 + }, + { + "epoch": 93.45, + "learning_rate": 3.292276670481513e-06, + "loss": 1.7892, + "step": 18862000 + }, + { + "epoch": 93.45, + "learning_rate": 3.2910405612282813e-06, + "loss": 1.7789, + "step": 18862500 + }, + { + "epoch": 93.45, + "learning_rate": 3.289801974802198e-06, + "loss": 1.8062, + "step": 18863000 + }, + { + "epoch": 93.46, + "learning_rate": 3.288563388376114e-06, + "loss": 1.7808, + "step": 18863500 + }, + { + "epoch": 93.46, + "learning_rate": 3.287324801950031e-06, + "loss": 1.7965, + "step": 18864000 + }, + { + "epoch": 93.46, + "learning_rate": 3.2860862155239473e-06, + "loss": 1.8078, + "step": 18864500 + }, + { + "epoch": 93.46, + "learning_rate": 3.2848501062707157e-06, + "loss": 1.7781, + "step": 18865000 + }, + { + "epoch": 93.47, + "learning_rate": 3.2836139970174837e-06, + "loss": 1.791, + "step": 18865500 + }, + { + "epoch": 93.47, + "learning_rate": 3.2823754105914006e-06, + "loss": 1.7911, + "step": 18866000 + }, + { + "epoch": 93.47, + "learning_rate": 3.2811368241653167e-06, + "loss": 1.8188, + "step": 18866500 + }, + { + "epoch": 93.47, + "learning_rate": 3.2799007149120855e-06, + "loss": 1.7859, + "step": 18867000 + }, + { + "epoch": 93.48, + "learning_rate": 3.2786621284860016e-06, + "loss": 1.791, + "step": 18867500 + }, + { + "epoch": 93.48, + "learning_rate": 3.277423542059918e-06, + "loss": 1.7928, + "step": 18868000 + }, + { + "epoch": 93.48, + "learning_rate": 3.276184955633834e-06, + "loss": 1.7614, + "step": 18868500 + }, + { + "epoch": 93.48, + "learning_rate": 3.2749463692077507e-06, + "loss": 1.7915, + "step": 18869000 + }, + { + "epoch": 93.49, + "learning_rate": 3.2737077827816676e-06, + "loss": 1.7935, + "step": 18869500 + }, + { + "epoch": 93.49, + "learning_rate": 3.2724691963555832e-06, + "loss": 1.7815, + "step": 18870000 + }, + { + "epoch": 93.49, + "learning_rate": 3.2712306099295e-06, + "loss": 1.7901, + "step": 18870500 + }, + { + "epoch": 93.49, + "learning_rate": 3.2699920235034162e-06, + "loss": 1.7793, + "step": 18871000 + }, + { + "epoch": 93.5, + "learning_rate": 3.2687534370773327e-06, + "loss": 1.8129, + "step": 18871500 + }, + { + "epoch": 93.5, + "learning_rate": 3.267514850651249e-06, + "loss": 1.7768, + "step": 18872000 + }, + { + "epoch": 93.5, + "learning_rate": 3.2662762642251653e-06, + "loss": 1.8013, + "step": 18872500 + }, + { + "epoch": 93.5, + "learning_rate": 3.2650376777990814e-06, + "loss": 1.7743, + "step": 18873000 + }, + { + "epoch": 93.51, + "learning_rate": 3.263799091372998e-06, + "loss": 1.7868, + "step": 18873500 + }, + { + "epoch": 93.51, + "learning_rate": 3.262560504946914e-06, + "loss": 1.8108, + "step": 18874000 + }, + { + "epoch": 93.51, + "learning_rate": 3.261324395693683e-06, + "loss": 1.7759, + "step": 18874500 + }, + { + "epoch": 93.51, + "learning_rate": 3.260085809267599e-06, + "loss": 1.8062, + "step": 18875000 + }, + { + "epoch": 93.52, + "learning_rate": 3.258847222841516e-06, + "loss": 1.7874, + "step": 18875500 + }, + { + "epoch": 93.52, + "learning_rate": 3.2576086364154315e-06, + "loss": 1.7901, + "step": 18876000 + }, + { + "epoch": 93.52, + "learning_rate": 3.2563700499893484e-06, + "loss": 1.7821, + "step": 18876500 + }, + { + "epoch": 93.52, + "learning_rate": 3.2551339407361164e-06, + "loss": 1.8003, + "step": 18877000 + }, + { + "epoch": 93.53, + "learning_rate": 3.2538953543100333e-06, + "loss": 1.7838, + "step": 18877500 + }, + { + "epoch": 93.53, + "learning_rate": 3.2526567678839494e-06, + "loss": 1.7834, + "step": 18878000 + }, + { + "epoch": 93.53, + "learning_rate": 3.251418181457866e-06, + "loss": 1.7996, + "step": 18878500 + }, + { + "epoch": 93.53, + "learning_rate": 3.2501820722046343e-06, + "loss": 1.7932, + "step": 18879000 + }, + { + "epoch": 93.54, + "learning_rate": 3.2489434857785508e-06, + "loss": 1.7715, + "step": 18879500 + }, + { + "epoch": 93.54, + "learning_rate": 3.247707376525319e-06, + "loss": 1.7839, + "step": 18880000 + }, + { + "epoch": 93.54, + "learning_rate": 3.246471267272088e-06, + "loss": 1.8048, + "step": 18880500 + }, + { + "epoch": 93.54, + "learning_rate": 3.245232680846004e-06, + "loss": 1.7741, + "step": 18881000 + }, + { + "epoch": 93.55, + "learning_rate": 3.243996571592773e-06, + "loss": 1.8109, + "step": 18881500 + }, + { + "epoch": 93.55, + "learning_rate": 3.242757985166689e-06, + "loss": 1.7854, + "step": 18882000 + }, + { + "epoch": 93.55, + "learning_rate": 3.2415193987406054e-06, + "loss": 1.7956, + "step": 18882500 + }, + { + "epoch": 93.55, + "learning_rate": 3.2402808123145215e-06, + "loss": 1.7822, + "step": 18883000 + }, + { + "epoch": 93.56, + "learning_rate": 3.2390422258884385e-06, + "loss": 1.8077, + "step": 18883500 + }, + { + "epoch": 93.56, + "learning_rate": 3.237803639462354e-06, + "loss": 1.7845, + "step": 18884000 + }, + { + "epoch": 93.56, + "learning_rate": 3.236565053036271e-06, + "loss": 1.7939, + "step": 18884500 + }, + { + "epoch": 93.56, + "learning_rate": 3.2353264666101875e-06, + "loss": 1.7957, + "step": 18885000 + }, + { + "epoch": 93.57, + "learning_rate": 3.2340878801841036e-06, + "loss": 1.7861, + "step": 18885500 + }, + { + "epoch": 93.57, + "learning_rate": 3.23284929375802e-06, + "loss": 1.7794, + "step": 18886000 + }, + { + "epoch": 93.57, + "learning_rate": 3.2316107073319362e-06, + "loss": 1.8044, + "step": 18886500 + }, + { + "epoch": 93.57, + "learning_rate": 3.230372120905853e-06, + "loss": 1.786, + "step": 18887000 + }, + { + "epoch": 93.58, + "learning_rate": 3.229133534479769e-06, + "loss": 1.7841, + "step": 18887500 + }, + { + "epoch": 93.58, + "learning_rate": 3.227897425226538e-06, + "loss": 1.7995, + "step": 18888000 + }, + { + "epoch": 93.58, + "learning_rate": 3.2266588388004537e-06, + "loss": 1.7828, + "step": 18888500 + }, + { + "epoch": 93.58, + "learning_rate": 3.2254202523743706e-06, + "loss": 1.8069, + "step": 18889000 + }, + { + "epoch": 93.59, + "learning_rate": 3.2241816659482867e-06, + "loss": 1.7938, + "step": 18889500 + }, + { + "epoch": 93.59, + "learning_rate": 3.222948033867908e-06, + "loss": 1.7846, + "step": 18890000 + }, + { + "epoch": 93.59, + "learning_rate": 3.221709447441824e-06, + "loss": 1.816, + "step": 18890500 + }, + { + "epoch": 93.59, + "learning_rate": 3.2204708610157404e-06, + "loss": 1.7686, + "step": 18891000 + }, + { + "epoch": 93.6, + "learning_rate": 3.2192322745896565e-06, + "loss": 1.8122, + "step": 18891500 + }, + { + "epoch": 93.6, + "learning_rate": 3.217993688163573e-06, + "loss": 1.7839, + "step": 18892000 + }, + { + "epoch": 93.6, + "learning_rate": 3.2167600560831937e-06, + "loss": 1.8037, + "step": 18892500 + }, + { + "epoch": 93.6, + "learning_rate": 3.2155214696571093e-06, + "loss": 1.8005, + "step": 18893000 + }, + { + "epoch": 93.6, + "learning_rate": 3.2142828832310263e-06, + "loss": 1.792, + "step": 18893500 + }, + { + "epoch": 93.61, + "learning_rate": 3.2130442968049428e-06, + "loss": 1.7934, + "step": 18894000 + }, + { + "epoch": 93.61, + "learning_rate": 3.211805710378859e-06, + "loss": 1.8092, + "step": 18894500 + }, + { + "epoch": 93.61, + "learning_rate": 3.2105671239527758e-06, + "loss": 1.7724, + "step": 18895000 + }, + { + "epoch": 93.61, + "learning_rate": 3.2093285375266914e-06, + "loss": 1.7932, + "step": 18895500 + }, + { + "epoch": 93.62, + "learning_rate": 3.2080899511006084e-06, + "loss": 1.7813, + "step": 18896000 + }, + { + "epoch": 93.62, + "learning_rate": 3.206851364674524e-06, + "loss": 1.7776, + "step": 18896500 + }, + { + "epoch": 93.62, + "learning_rate": 3.205612778248441e-06, + "loss": 1.78, + "step": 18897000 + }, + { + "epoch": 93.62, + "learning_rate": 3.204374191822357e-06, + "loss": 1.7979, + "step": 18897500 + }, + { + "epoch": 93.63, + "learning_rate": 3.2031356053962735e-06, + "loss": 1.811, + "step": 18898000 + }, + { + "epoch": 93.63, + "learning_rate": 3.2018970189701896e-06, + "loss": 1.7966, + "step": 18898500 + }, + { + "epoch": 93.63, + "learning_rate": 3.200658432544106e-06, + "loss": 1.7694, + "step": 18899000 + }, + { + "epoch": 93.63, + "learning_rate": 3.199419846118023e-06, + "loss": 1.7716, + "step": 18899500 + }, + { + "epoch": 93.64, + "learning_rate": 3.198183736864791e-06, + "loss": 1.7826, + "step": 18900000 + }, + { + "epoch": 93.64, + "learning_rate": 3.196945150438707e-06, + "loss": 1.8124, + "step": 18900500 + }, + { + "epoch": 93.64, + "learning_rate": 3.1957065640126236e-06, + "loss": 1.7932, + "step": 18901000 + }, + { + "epoch": 93.64, + "learning_rate": 3.1944679775865405e-06, + "loss": 1.7874, + "step": 18901500 + }, + { + "epoch": 93.65, + "learning_rate": 3.193231868333309e-06, + "loss": 1.8039, + "step": 18902000 + }, + { + "epoch": 93.65, + "learning_rate": 3.1919932819072254e-06, + "loss": 1.8027, + "step": 18902500 + }, + { + "epoch": 93.65, + "learning_rate": 3.1907546954811415e-06, + "loss": 1.8178, + "step": 18903000 + }, + { + "epoch": 93.65, + "learning_rate": 3.189516109055058e-06, + "loss": 1.7851, + "step": 18903500 + }, + { + "epoch": 93.66, + "learning_rate": 3.188277522628974e-06, + "loss": 1.7899, + "step": 18904000 + }, + { + "epoch": 93.66, + "learning_rate": 3.1870389362028906e-06, + "loss": 1.806, + "step": 18904500 + }, + { + "epoch": 93.66, + "learning_rate": 3.1858003497768067e-06, + "loss": 1.8083, + "step": 18905000 + }, + { + "epoch": 93.66, + "learning_rate": 3.1845617633507236e-06, + "loss": 1.7889, + "step": 18905500 + }, + { + "epoch": 93.67, + "learning_rate": 3.1833231769246393e-06, + "loss": 1.7814, + "step": 18906000 + }, + { + "epoch": 93.67, + "learning_rate": 3.182084590498556e-06, + "loss": 1.7792, + "step": 18906500 + }, + { + "epoch": 93.67, + "learning_rate": 3.180848481245324e-06, + "loss": 1.7903, + "step": 18907000 + }, + { + "epoch": 93.67, + "learning_rate": 3.1796123719920934e-06, + "loss": 1.7905, + "step": 18907500 + }, + { + "epoch": 93.68, + "learning_rate": 3.178373785566009e-06, + "loss": 1.7716, + "step": 18908000 + }, + { + "epoch": 93.68, + "learning_rate": 3.177135199139926e-06, + "loss": 1.7834, + "step": 18908500 + }, + { + "epoch": 93.68, + "learning_rate": 3.175896612713842e-06, + "loss": 1.7942, + "step": 18909000 + }, + { + "epoch": 93.68, + "learning_rate": 3.1746580262877585e-06, + "loss": 1.777, + "step": 18909500 + }, + { + "epoch": 93.69, + "learning_rate": 3.1734194398616746e-06, + "loss": 1.7944, + "step": 18910000 + }, + { + "epoch": 93.69, + "learning_rate": 3.1721833306084434e-06, + "loss": 1.8017, + "step": 18910500 + }, + { + "epoch": 93.69, + "learning_rate": 3.1709447441823595e-06, + "loss": 1.779, + "step": 18911000 + }, + { + "epoch": 93.69, + "learning_rate": 3.169706157756276e-06, + "loss": 1.7946, + "step": 18911500 + }, + { + "epoch": 93.7, + "learning_rate": 3.1684700485030444e-06, + "loss": 1.8024, + "step": 18912000 + }, + { + "epoch": 93.7, + "learning_rate": 3.167231462076961e-06, + "loss": 1.8022, + "step": 18912500 + }, + { + "epoch": 93.7, + "learning_rate": 3.165992875650877e-06, + "loss": 1.8069, + "step": 18913000 + }, + { + "epoch": 93.7, + "learning_rate": 3.164754289224794e-06, + "loss": 1.7762, + "step": 18913500 + }, + { + "epoch": 93.71, + "learning_rate": 3.1635157027987096e-06, + "loss": 1.759, + "step": 18914000 + }, + { + "epoch": 93.71, + "learning_rate": 3.1622771163726265e-06, + "loss": 1.7864, + "step": 18914500 + }, + { + "epoch": 93.71, + "learning_rate": 3.161038529946543e-06, + "loss": 1.7889, + "step": 18915000 + }, + { + "epoch": 93.71, + "learning_rate": 3.159799943520459e-06, + "loss": 1.7922, + "step": 18915500 + }, + { + "epoch": 93.72, + "learning_rate": 3.1585613570943756e-06, + "loss": 1.7913, + "step": 18916000 + }, + { + "epoch": 93.72, + "learning_rate": 3.1573227706682917e-06, + "loss": 1.7879, + "step": 18916500 + }, + { + "epoch": 93.72, + "learning_rate": 3.1560841842422086e-06, + "loss": 1.7706, + "step": 18917000 + }, + { + "epoch": 93.72, + "learning_rate": 3.1548455978161243e-06, + "loss": 1.7973, + "step": 18917500 + }, + { + "epoch": 93.73, + "learning_rate": 3.153607011390041e-06, + "loss": 1.8206, + "step": 18918000 + }, + { + "epoch": 93.73, + "learning_rate": 3.1523684249639573e-06, + "loss": 1.7904, + "step": 18918500 + }, + { + "epoch": 93.73, + "learning_rate": 3.151132315710726e-06, + "loss": 1.7843, + "step": 18919000 + }, + { + "epoch": 93.73, + "learning_rate": 3.149893729284642e-06, + "loss": 1.794, + "step": 18919500 + }, + { + "epoch": 93.74, + "learning_rate": 3.1486551428585587e-06, + "loss": 1.7932, + "step": 18920000 + }, + { + "epoch": 93.74, + "learning_rate": 3.1474165564324748e-06, + "loss": 1.7917, + "step": 18920500 + }, + { + "epoch": 93.74, + "learning_rate": 3.1461779700063913e-06, + "loss": 1.7982, + "step": 18921000 + }, + { + "epoch": 93.74, + "learning_rate": 3.1449393835803073e-06, + "loss": 1.7867, + "step": 18921500 + }, + { + "epoch": 93.75, + "learning_rate": 3.143703274327076e-06, + "loss": 1.8069, + "step": 18922000 + }, + { + "epoch": 93.75, + "learning_rate": 3.1424646879009922e-06, + "loss": 1.7897, + "step": 18922500 + }, + { + "epoch": 93.75, + "learning_rate": 3.1412261014749087e-06, + "loss": 1.77, + "step": 18923000 + }, + { + "epoch": 93.75, + "learning_rate": 3.139987515048825e-06, + "loss": 1.8031, + "step": 18923500 + }, + { + "epoch": 93.76, + "learning_rate": 3.1387489286227417e-06, + "loss": 1.7908, + "step": 18924000 + }, + { + "epoch": 93.76, + "learning_rate": 3.1375103421966582e-06, + "loss": 1.7824, + "step": 18924500 + }, + { + "epoch": 93.76, + "learning_rate": 3.1362717557705743e-06, + "loss": 1.8045, + "step": 18925000 + }, + { + "epoch": 93.76, + "learning_rate": 3.135033169344491e-06, + "loss": 1.8008, + "step": 18925500 + }, + { + "epoch": 93.77, + "learning_rate": 3.133794582918407e-06, + "loss": 1.7845, + "step": 18926000 + }, + { + "epoch": 93.77, + "learning_rate": 3.1325584736651757e-06, + "loss": 1.7937, + "step": 18926500 + }, + { + "epoch": 93.77, + "learning_rate": 3.131319887239092e-06, + "loss": 1.77, + "step": 18927000 + }, + { + "epoch": 93.77, + "learning_rate": 3.1300813008130083e-06, + "loss": 1.7937, + "step": 18927500 + }, + { + "epoch": 93.78, + "learning_rate": 3.1288427143869244e-06, + "loss": 1.8022, + "step": 18928000 + }, + { + "epoch": 93.78, + "learning_rate": 3.1276041279608413e-06, + "loss": 1.7752, + "step": 18928500 + }, + { + "epoch": 93.78, + "learning_rate": 3.1263680187076093e-06, + "loss": 1.7846, + "step": 18929000 + }, + { + "epoch": 93.78, + "learning_rate": 3.125129432281526e-06, + "loss": 1.7893, + "step": 18929500 + }, + { + "epoch": 93.79, + "learning_rate": 3.1238908458554423e-06, + "loss": 1.804, + "step": 18930000 + }, + { + "epoch": 93.79, + "learning_rate": 3.1226547366022107e-06, + "loss": 1.7819, + "step": 18930500 + }, + { + "epoch": 93.79, + "learning_rate": 3.121416150176127e-06, + "loss": 1.8084, + "step": 18931000 + }, + { + "epoch": 93.79, + "learning_rate": 3.1201775637500433e-06, + "loss": 1.7843, + "step": 18931500 + }, + { + "epoch": 93.8, + "learning_rate": 3.11893897732396e-06, + "loss": 1.7786, + "step": 18932000 + }, + { + "epoch": 93.8, + "learning_rate": 3.1177003908978763e-06, + "loss": 1.7851, + "step": 18932500 + }, + { + "epoch": 93.8, + "learning_rate": 3.1164618044717928e-06, + "loss": 1.7898, + "step": 18933000 + }, + { + "epoch": 93.8, + "learning_rate": 3.115225695218561e-06, + "loss": 1.7748, + "step": 18933500 + }, + { + "epoch": 93.81, + "learning_rate": 3.1139871087924777e-06, + "loss": 1.807, + "step": 18934000 + }, + { + "epoch": 93.81, + "learning_rate": 3.112750999539246e-06, + "loss": 1.7932, + "step": 18934500 + }, + { + "epoch": 93.81, + "learning_rate": 3.111512413113162e-06, + "loss": 1.8051, + "step": 18935000 + }, + { + "epoch": 93.81, + "learning_rate": 3.110273826687079e-06, + "loss": 1.81, + "step": 18935500 + }, + { + "epoch": 93.82, + "learning_rate": 3.109035240260995e-06, + "loss": 1.7973, + "step": 18936000 + }, + { + "epoch": 93.82, + "learning_rate": 3.1077966538349116e-06, + "loss": 1.7701, + "step": 18936500 + }, + { + "epoch": 93.82, + "learning_rate": 3.10656054458168e-06, + "loss": 1.8103, + "step": 18937000 + }, + { + "epoch": 93.82, + "learning_rate": 3.1053219581555965e-06, + "loss": 1.8216, + "step": 18937500 + }, + { + "epoch": 93.83, + "learning_rate": 3.1040858489023645e-06, + "loss": 1.7571, + "step": 18938000 + }, + { + "epoch": 93.83, + "learning_rate": 3.1028472624762814e-06, + "loss": 1.7926, + "step": 18938500 + }, + { + "epoch": 93.83, + "learning_rate": 3.1016086760501975e-06, + "loss": 1.8123, + "step": 18939000 + }, + { + "epoch": 93.83, + "learning_rate": 3.100370089624114e-06, + "loss": 1.785, + "step": 18939500 + }, + { + "epoch": 93.84, + "learning_rate": 3.0991315031980305e-06, + "loss": 1.7812, + "step": 18940000 + }, + { + "epoch": 93.84, + "learning_rate": 3.0978929167719466e-06, + "loss": 1.7968, + "step": 18940500 + }, + { + "epoch": 93.84, + "learning_rate": 3.096654330345863e-06, + "loss": 1.7751, + "step": 18941000 + }, + { + "epoch": 93.84, + "learning_rate": 3.095415743919779e-06, + "loss": 1.7904, + "step": 18941500 + }, + { + "epoch": 93.85, + "learning_rate": 3.094179634666548e-06, + "loss": 1.7737, + "step": 18942000 + }, + { + "epoch": 93.85, + "learning_rate": 3.0929410482404645e-06, + "loss": 1.7832, + "step": 18942500 + }, + { + "epoch": 93.85, + "learning_rate": 3.091704938987233e-06, + "loss": 1.7803, + "step": 18943000 + }, + { + "epoch": 93.85, + "learning_rate": 3.0904663525611494e-06, + "loss": 1.8158, + "step": 18943500 + }, + { + "epoch": 93.86, + "learning_rate": 3.0892277661350655e-06, + "loss": 1.7918, + "step": 18944000 + }, + { + "epoch": 93.86, + "learning_rate": 3.087989179708982e-06, + "loss": 1.7865, + "step": 18944500 + }, + { + "epoch": 93.86, + "learning_rate": 3.086750593282898e-06, + "loss": 1.7823, + "step": 18945000 + }, + { + "epoch": 93.86, + "learning_rate": 3.0855120068568146e-06, + "loss": 1.7789, + "step": 18945500 + }, + { + "epoch": 93.87, + "learning_rate": 3.084273420430731e-06, + "loss": 1.771, + "step": 18946000 + }, + { + "epoch": 93.87, + "learning_rate": 3.083034834004647e-06, + "loss": 1.7853, + "step": 18946500 + }, + { + "epoch": 93.87, + "learning_rate": 3.0817962475785637e-06, + "loss": 1.7984, + "step": 18947000 + }, + { + "epoch": 93.87, + "learning_rate": 3.080560138325332e-06, + "loss": 1.8054, + "step": 18947500 + }, + { + "epoch": 93.87, + "learning_rate": 3.0793215518992485e-06, + "loss": 1.7869, + "step": 18948000 + }, + { + "epoch": 93.88, + "learning_rate": 3.0780829654731646e-06, + "loss": 1.7762, + "step": 18948500 + }, + { + "epoch": 93.88, + "learning_rate": 3.076844379047081e-06, + "loss": 1.8133, + "step": 18949000 + }, + { + "epoch": 93.88, + "learning_rate": 3.0756057926209976e-06, + "loss": 1.7843, + "step": 18949500 + }, + { + "epoch": 93.88, + "learning_rate": 3.074367206194914e-06, + "loss": 1.8145, + "step": 18950000 + }, + { + "epoch": 93.89, + "learning_rate": 3.0731310969416825e-06, + "loss": 1.8061, + "step": 18950500 + }, + { + "epoch": 93.89, + "learning_rate": 3.071894987688451e-06, + "loss": 1.7989, + "step": 18951000 + }, + { + "epoch": 93.89, + "learning_rate": 3.0706564012623674e-06, + "loss": 1.8015, + "step": 18951500 + }, + { + "epoch": 93.89, + "learning_rate": 3.0694178148362835e-06, + "loss": 1.7971, + "step": 18952000 + }, + { + "epoch": 93.9, + "learning_rate": 3.0681792284102004e-06, + "loss": 1.7661, + "step": 18952500 + }, + { + "epoch": 93.9, + "learning_rate": 3.0669406419841165e-06, + "loss": 1.7656, + "step": 18953000 + }, + { + "epoch": 93.9, + "learning_rate": 3.065702055558033e-06, + "loss": 1.7871, + "step": 18953500 + }, + { + "epoch": 93.9, + "learning_rate": 3.0644634691319495e-06, + "loss": 1.7849, + "step": 18954000 + }, + { + "epoch": 93.91, + "learning_rate": 3.0632248827058656e-06, + "loss": 1.7845, + "step": 18954500 + }, + { + "epoch": 93.91, + "learning_rate": 3.061986296279782e-06, + "loss": 1.776, + "step": 18955000 + }, + { + "epoch": 93.91, + "learning_rate": 3.060747709853698e-06, + "loss": 1.7764, + "step": 18955500 + }, + { + "epoch": 93.91, + "learning_rate": 3.0595091234276147e-06, + "loss": 1.783, + "step": 18956000 + }, + { + "epoch": 93.92, + "learning_rate": 3.0582705370015308e-06, + "loss": 1.7904, + "step": 18956500 + }, + { + "epoch": 93.92, + "learning_rate": 3.0570319505754473e-06, + "loss": 1.7831, + "step": 18957000 + }, + { + "epoch": 93.92, + "learning_rate": 3.0557933641493638e-06, + "loss": 1.7751, + "step": 18957500 + }, + { + "epoch": 93.92, + "learning_rate": 3.05455477772328e-06, + "loss": 1.8009, + "step": 18958000 + }, + { + "epoch": 93.93, + "learning_rate": 3.0533161912971968e-06, + "loss": 1.7967, + "step": 18958500 + }, + { + "epoch": 93.93, + "learning_rate": 3.052077604871113e-06, + "loss": 1.7878, + "step": 18959000 + }, + { + "epoch": 93.93, + "learning_rate": 3.0508390184450294e-06, + "loss": 1.7758, + "step": 18959500 + }, + { + "epoch": 93.93, + "learning_rate": 3.0496004320189454e-06, + "loss": 1.7797, + "step": 18960000 + }, + { + "epoch": 93.94, + "learning_rate": 3.048361845592862e-06, + "loss": 1.8009, + "step": 18960500 + }, + { + "epoch": 93.94, + "learning_rate": 3.0471232591667785e-06, + "loss": 1.7813, + "step": 18961000 + }, + { + "epoch": 93.94, + "learning_rate": 3.0458846727406945e-06, + "loss": 1.7787, + "step": 18961500 + }, + { + "epoch": 93.94, + "learning_rate": 3.0446485634874633e-06, + "loss": 1.7881, + "step": 18962000 + }, + { + "epoch": 93.95, + "learning_rate": 3.0434149314070836e-06, + "loss": 1.8035, + "step": 18962500 + }, + { + "epoch": 93.95, + "learning_rate": 3.042176344981e-06, + "loss": 1.8033, + "step": 18963000 + }, + { + "epoch": 93.95, + "learning_rate": 3.0409377585549166e-06, + "loss": 1.8143, + "step": 18963500 + }, + { + "epoch": 93.95, + "learning_rate": 3.039699172128833e-06, + "loss": 1.7874, + "step": 18964000 + }, + { + "epoch": 93.96, + "learning_rate": 3.0384605857027492e-06, + "loss": 1.7938, + "step": 18964500 + }, + { + "epoch": 93.96, + "learning_rate": 3.0372219992766657e-06, + "loss": 1.7641, + "step": 18965000 + }, + { + "epoch": 93.96, + "learning_rate": 3.0359834128505822e-06, + "loss": 1.7898, + "step": 18965500 + }, + { + "epoch": 93.96, + "learning_rate": 3.0347448264244983e-06, + "loss": 1.798, + "step": 18966000 + }, + { + "epoch": 93.97, + "learning_rate": 3.033506239998415e-06, + "loss": 1.7702, + "step": 18966500 + }, + { + "epoch": 93.97, + "learning_rate": 3.0322726079180355e-06, + "loss": 1.802, + "step": 18967000 + }, + { + "epoch": 93.97, + "learning_rate": 3.031034021491952e-06, + "loss": 1.7983, + "step": 18967500 + }, + { + "epoch": 93.97, + "learning_rate": 3.029795435065868e-06, + "loss": 1.8094, + "step": 18968000 + }, + { + "epoch": 93.98, + "learning_rate": 3.0285568486397846e-06, + "loss": 1.7761, + "step": 18968500 + }, + { + "epoch": 93.98, + "learning_rate": 3.027318262213701e-06, + "loss": 1.7913, + "step": 18969000 + }, + { + "epoch": 93.98, + "learning_rate": 3.026079675787617e-06, + "loss": 1.7928, + "step": 18969500 + }, + { + "epoch": 93.98, + "learning_rate": 3.0248410893615337e-06, + "loss": 1.7787, + "step": 18970000 + }, + { + "epoch": 93.99, + "learning_rate": 3.0236025029354498e-06, + "loss": 1.7798, + "step": 18970500 + }, + { + "epoch": 93.99, + "learning_rate": 3.0223639165093663e-06, + "loss": 1.7707, + "step": 18971000 + }, + { + "epoch": 93.99, + "learning_rate": 3.0211278072561347e-06, + "loss": 1.7885, + "step": 18971500 + }, + { + "epoch": 93.99, + "learning_rate": 3.019889220830051e-06, + "loss": 1.7921, + "step": 18972000 + }, + { + "epoch": 94.0, + "learning_rate": 3.01865311157682e-06, + "loss": 1.8087, + "step": 18972500 + }, + { + "epoch": 94.0, + "learning_rate": 3.017414525150736e-06, + "loss": 1.8017, + "step": 18973000 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.6856816384535349, + "eval_accuracy_mlm": 0.6469539919371597, + "eval_accuracy_nsp": 0.8684219815735079, + "eval_loss": 2.311725616455078, + "eval_runtime": 147.0378, + "eval_samples_per_second": 1733.969, + "eval_steps_per_second": 72.254, + "step": 18973242 + }, + { + "epoch": 94.0, + "learning_rate": 3.0161759387246526e-06, + "loss": 1.7706, + "step": 18973500 + }, + { + "epoch": 94.0, + "learning_rate": 3.014939829471421e-06, + "loss": 1.7808, + "step": 18974000 + }, + { + "epoch": 94.01, + "learning_rate": 3.0137012430453374e-06, + "loss": 1.7786, + "step": 18974500 + }, + { + "epoch": 94.01, + "learning_rate": 3.0124626566192535e-06, + "loss": 1.7906, + "step": 18975000 + }, + { + "epoch": 94.01, + "learning_rate": 3.01122407019317e-06, + "loss": 1.7993, + "step": 18975500 + }, + { + "epoch": 94.01, + "learning_rate": 3.0099854837670865e-06, + "loss": 1.8006, + "step": 18976000 + }, + { + "epoch": 94.02, + "learning_rate": 3.0087468973410026e-06, + "loss": 1.7736, + "step": 18976500 + }, + { + "epoch": 94.02, + "learning_rate": 3.007508310914919e-06, + "loss": 1.7823, + "step": 18977000 + }, + { + "epoch": 94.02, + "learning_rate": 3.0062697244888356e-06, + "loss": 1.7935, + "step": 18977500 + }, + { + "epoch": 94.02, + "learning_rate": 3.005031138062752e-06, + "loss": 1.7881, + "step": 18978000 + }, + { + "epoch": 94.03, + "learning_rate": 3.003792551636668e-06, + "loss": 1.7739, + "step": 18978500 + }, + { + "epoch": 94.03, + "learning_rate": 3.0025539652105847e-06, + "loss": 1.8044, + "step": 18979000 + }, + { + "epoch": 94.03, + "learning_rate": 3.0013153787845012e-06, + "loss": 1.8031, + "step": 18979500 + }, + { + "epoch": 94.03, + "learning_rate": 3.0000792695312696e-06, + "loss": 1.7797, + "step": 18980000 + }, + { + "epoch": 94.04, + "learning_rate": 2.998840683105186e-06, + "loss": 1.7759, + "step": 18980500 + }, + { + "epoch": 94.04, + "learning_rate": 2.9976045738519545e-06, + "loss": 1.7828, + "step": 18981000 + }, + { + "epoch": 94.04, + "learning_rate": 2.996365987425871e-06, + "loss": 1.7745, + "step": 18981500 + }, + { + "epoch": 94.04, + "learning_rate": 2.995127400999787e-06, + "loss": 1.7915, + "step": 18982000 + }, + { + "epoch": 94.05, + "learning_rate": 2.9938888145737036e-06, + "loss": 1.7697, + "step": 18982500 + }, + { + "epoch": 94.05, + "learning_rate": 2.9926502281476197e-06, + "loss": 1.779, + "step": 18983000 + }, + { + "epoch": 94.05, + "learning_rate": 2.991411641721536e-06, + "loss": 1.7818, + "step": 18983500 + }, + { + "epoch": 94.05, + "learning_rate": 2.9901730552954527e-06, + "loss": 1.7884, + "step": 18984000 + }, + { + "epoch": 94.06, + "learning_rate": 2.9889344688693688e-06, + "loss": 1.7988, + "step": 18984500 + }, + { + "epoch": 94.06, + "learning_rate": 2.9876958824432853e-06, + "loss": 1.766, + "step": 18985000 + }, + { + "epoch": 94.06, + "learning_rate": 2.9864572960172013e-06, + "loss": 1.7899, + "step": 18985500 + }, + { + "epoch": 94.06, + "learning_rate": 2.985218709591118e-06, + "loss": 1.786, + "step": 18986000 + }, + { + "epoch": 94.07, + "learning_rate": 2.9839801231650343e-06, + "loss": 1.7699, + "step": 18986500 + }, + { + "epoch": 94.07, + "learning_rate": 2.982741536738951e-06, + "loss": 1.7587, + "step": 18987000 + }, + { + "epoch": 94.07, + "learning_rate": 2.9815029503128674e-06, + "loss": 1.793, + "step": 18987500 + }, + { + "epoch": 94.07, + "learning_rate": 2.9802668410596353e-06, + "loss": 1.7885, + "step": 18988000 + }, + { + "epoch": 94.08, + "learning_rate": 2.9790282546335522e-06, + "loss": 1.7987, + "step": 18988500 + }, + { + "epoch": 94.08, + "learning_rate": 2.9777896682074683e-06, + "loss": 1.7905, + "step": 18989000 + }, + { + "epoch": 94.08, + "learning_rate": 2.976551081781385e-06, + "loss": 1.7878, + "step": 18989500 + }, + { + "epoch": 94.08, + "learning_rate": 2.975312495355301e-06, + "loss": 1.8126, + "step": 18990000 + }, + { + "epoch": 94.09, + "learning_rate": 2.9740739089292174e-06, + "loss": 1.7934, + "step": 18990500 + }, + { + "epoch": 94.09, + "learning_rate": 2.972835322503134e-06, + "loss": 1.8086, + "step": 18991000 + }, + { + "epoch": 94.09, + "learning_rate": 2.9715992132499023e-06, + "loss": 1.7799, + "step": 18991500 + }, + { + "epoch": 94.09, + "learning_rate": 2.970363103996671e-06, + "loss": 1.7925, + "step": 18992000 + }, + { + "epoch": 94.1, + "learning_rate": 2.969124517570587e-06, + "loss": 1.7913, + "step": 18992500 + }, + { + "epoch": 94.1, + "learning_rate": 2.9678859311445037e-06, + "loss": 1.798, + "step": 18993000 + }, + { + "epoch": 94.1, + "learning_rate": 2.9666473447184198e-06, + "loss": 1.789, + "step": 18993500 + }, + { + "epoch": 94.1, + "learning_rate": 2.9654087582923363e-06, + "loss": 1.7856, + "step": 18994000 + }, + { + "epoch": 94.11, + "learning_rate": 2.964170171866253e-06, + "loss": 1.7789, + "step": 18994500 + }, + { + "epoch": 94.11, + "learning_rate": 2.962934062613021e-06, + "loss": 1.7783, + "step": 18995000 + }, + { + "epoch": 94.11, + "learning_rate": 2.9616954761869377e-06, + "loss": 1.7762, + "step": 18995500 + }, + { + "epoch": 94.11, + "learning_rate": 2.9604568897608538e-06, + "loss": 1.7801, + "step": 18996000 + }, + { + "epoch": 94.12, + "learning_rate": 2.9592183033347703e-06, + "loss": 1.7886, + "step": 18996500 + }, + { + "epoch": 94.12, + "learning_rate": 2.9579797169086864e-06, + "loss": 1.802, + "step": 18997000 + }, + { + "epoch": 94.12, + "learning_rate": 2.956741130482603e-06, + "loss": 1.8109, + "step": 18997500 + }, + { + "epoch": 94.12, + "learning_rate": 2.9555025440565194e-06, + "loss": 1.7921, + "step": 18998000 + }, + { + "epoch": 94.13, + "learning_rate": 2.9542639576304354e-06, + "loss": 1.7824, + "step": 18998500 + }, + { + "epoch": 94.13, + "learning_rate": 2.953025371204352e-06, + "loss": 1.785, + "step": 18999000 + }, + { + "epoch": 94.13, + "learning_rate": 2.9517892619511203e-06, + "loss": 1.7788, + "step": 18999500 + }, + { + "epoch": 94.13, + "learning_rate": 2.950550675525037e-06, + "loss": 1.7781, + "step": 19000000 + }, + { + "epoch": 94.14, + "learning_rate": 2.949312089098953e-06, + "loss": 1.8023, + "step": 19000500 + }, + { + "epoch": 94.14, + "learning_rate": 2.94807350267287e-06, + "loss": 1.7759, + "step": 19001000 + }, + { + "epoch": 94.14, + "learning_rate": 2.946834916246786e-06, + "loss": 1.8143, + "step": 19001500 + }, + { + "epoch": 94.14, + "learning_rate": 2.9455963298207024e-06, + "loss": 1.7864, + "step": 19002000 + }, + { + "epoch": 94.14, + "learning_rate": 2.944357743394619e-06, + "loss": 1.7917, + "step": 19002500 + }, + { + "epoch": 94.15, + "learning_rate": 2.943119156968535e-06, + "loss": 1.7953, + "step": 19003000 + }, + { + "epoch": 94.15, + "learning_rate": 2.9418805705424515e-06, + "loss": 1.7768, + "step": 19003500 + }, + { + "epoch": 94.15, + "learning_rate": 2.9406419841163676e-06, + "loss": 1.7558, + "step": 19004000 + }, + { + "epoch": 94.15, + "learning_rate": 2.939403397690284e-06, + "loss": 1.7807, + "step": 19004500 + }, + { + "epoch": 94.16, + "learning_rate": 2.9381648112642006e-06, + "loss": 1.7821, + "step": 19005000 + }, + { + "epoch": 94.16, + "learning_rate": 2.9369262248381167e-06, + "loss": 1.7927, + "step": 19005500 + }, + { + "epoch": 94.16, + "learning_rate": 2.935687638412033e-06, + "loss": 1.7877, + "step": 19006000 + }, + { + "epoch": 94.16, + "learning_rate": 2.934454006331654e-06, + "loss": 1.7728, + "step": 19006500 + }, + { + "epoch": 94.17, + "learning_rate": 2.9332178970784227e-06, + "loss": 1.7957, + "step": 19007000 + }, + { + "epoch": 94.17, + "learning_rate": 2.9319793106523388e-06, + "loss": 1.7903, + "step": 19007500 + }, + { + "epoch": 94.17, + "learning_rate": 2.9307407242262553e-06, + "loss": 1.7897, + "step": 19008000 + }, + { + "epoch": 94.17, + "learning_rate": 2.9295021378001714e-06, + "loss": 1.7797, + "step": 19008500 + }, + { + "epoch": 94.18, + "learning_rate": 2.928263551374088e-06, + "loss": 1.7694, + "step": 19009000 + }, + { + "epoch": 94.18, + "learning_rate": 2.9270249649480044e-06, + "loss": 1.7736, + "step": 19009500 + }, + { + "epoch": 94.18, + "learning_rate": 2.9257863785219205e-06, + "loss": 1.7912, + "step": 19010000 + }, + { + "epoch": 94.18, + "learning_rate": 2.924547792095837e-06, + "loss": 1.7731, + "step": 19010500 + }, + { + "epoch": 94.19, + "learning_rate": 2.9233116828426054e-06, + "loss": 1.8031, + "step": 19011000 + }, + { + "epoch": 94.19, + "learning_rate": 2.922073096416522e-06, + "loss": 1.78, + "step": 19011500 + }, + { + "epoch": 94.19, + "learning_rate": 2.920834509990438e-06, + "loss": 1.8002, + "step": 19012000 + }, + { + "epoch": 94.19, + "learning_rate": 2.9195959235643544e-06, + "loss": 1.7856, + "step": 19012500 + }, + { + "epoch": 94.2, + "learning_rate": 2.9183598143111233e-06, + "loss": 1.7818, + "step": 19013000 + }, + { + "epoch": 94.2, + "learning_rate": 2.9171237050578916e-06, + "loss": 1.7625, + "step": 19013500 + }, + { + "epoch": 94.2, + "learning_rate": 2.915885118631808e-06, + "loss": 1.7899, + "step": 19014000 + }, + { + "epoch": 94.2, + "learning_rate": 2.9146465322057242e-06, + "loss": 1.783, + "step": 19014500 + }, + { + "epoch": 94.21, + "learning_rate": 2.9134079457796407e-06, + "loss": 1.7868, + "step": 19015000 + }, + { + "epoch": 94.21, + "learning_rate": 2.912171836526409e-06, + "loss": 1.7605, + "step": 19015500 + }, + { + "epoch": 94.21, + "learning_rate": 2.9109332501003256e-06, + "loss": 1.7741, + "step": 19016000 + }, + { + "epoch": 94.21, + "learning_rate": 2.909697140847094e-06, + "loss": 1.7783, + "step": 19016500 + }, + { + "epoch": 94.22, + "learning_rate": 2.9084585544210105e-06, + "loss": 1.8065, + "step": 19017000 + }, + { + "epoch": 94.22, + "learning_rate": 2.907219967994927e-06, + "loss": 1.7691, + "step": 19017500 + }, + { + "epoch": 94.22, + "learning_rate": 2.905981381568843e-06, + "loss": 1.798, + "step": 19018000 + }, + { + "epoch": 94.22, + "learning_rate": 2.9047427951427596e-06, + "loss": 1.7897, + "step": 19018500 + }, + { + "epoch": 94.23, + "learning_rate": 2.9035042087166757e-06, + "loss": 1.7859, + "step": 19019000 + }, + { + "epoch": 94.23, + "learning_rate": 2.902265622290592e-06, + "loss": 1.7506, + "step": 19019500 + }, + { + "epoch": 94.23, + "learning_rate": 2.9010270358645087e-06, + "loss": 1.7807, + "step": 19020000 + }, + { + "epoch": 94.23, + "learning_rate": 2.899790926611277e-06, + "loss": 1.8035, + "step": 19020500 + }, + { + "epoch": 94.24, + "learning_rate": 2.8985523401851936e-06, + "loss": 1.7819, + "step": 19021000 + }, + { + "epoch": 94.24, + "learning_rate": 2.89731375375911e-06, + "loss": 1.7718, + "step": 19021500 + }, + { + "epoch": 94.24, + "learning_rate": 2.8960751673330266e-06, + "loss": 1.7901, + "step": 19022000 + }, + { + "epoch": 94.24, + "learning_rate": 2.8948365809069427e-06, + "loss": 1.7961, + "step": 19022500 + }, + { + "epoch": 94.25, + "learning_rate": 2.893597994480859e-06, + "loss": 1.7958, + "step": 19023000 + }, + { + "epoch": 94.25, + "learning_rate": 2.8923594080547753e-06, + "loss": 1.7719, + "step": 19023500 + }, + { + "epoch": 94.25, + "learning_rate": 2.8911208216286918e-06, + "loss": 1.7711, + "step": 19024000 + }, + { + "epoch": 94.25, + "learning_rate": 2.8898822352026083e-06, + "loss": 1.8014, + "step": 19024500 + }, + { + "epoch": 94.26, + "learning_rate": 2.8886436487765243e-06, + "loss": 1.787, + "step": 19025000 + }, + { + "epoch": 94.26, + "learning_rate": 2.887405062350441e-06, + "loss": 1.7888, + "step": 19025500 + }, + { + "epoch": 94.26, + "learning_rate": 2.8861689530972092e-06, + "loss": 1.7918, + "step": 19026000 + }, + { + "epoch": 94.26, + "learning_rate": 2.8849303666711257e-06, + "loss": 1.7551, + "step": 19026500 + }, + { + "epoch": 94.27, + "learning_rate": 2.883691780245042e-06, + "loss": 1.7809, + "step": 19027000 + }, + { + "epoch": 94.27, + "learning_rate": 2.8824531938189583e-06, + "loss": 1.7595, + "step": 19027500 + }, + { + "epoch": 94.27, + "learning_rate": 2.881214607392875e-06, + "loss": 1.7917, + "step": 19028000 + }, + { + "epoch": 94.27, + "learning_rate": 2.879976020966791e-06, + "loss": 1.7628, + "step": 19028500 + }, + { + "epoch": 94.28, + "learning_rate": 2.878737434540708e-06, + "loss": 1.7992, + "step": 19029000 + }, + { + "epoch": 94.28, + "learning_rate": 2.877498848114624e-06, + "loss": 1.7884, + "step": 19029500 + }, + { + "epoch": 94.28, + "learning_rate": 2.8762602616885404e-06, + "loss": 1.782, + "step": 19030000 + }, + { + "epoch": 94.28, + "learning_rate": 2.8750216752624565e-06, + "loss": 1.8174, + "step": 19030500 + }, + { + "epoch": 94.29, + "learning_rate": 2.873783088836373e-06, + "loss": 1.7862, + "step": 19031000 + }, + { + "epoch": 94.29, + "learning_rate": 2.8725445024102895e-06, + "loss": 1.7875, + "step": 19031500 + }, + { + "epoch": 94.29, + "learning_rate": 2.8713059159842056e-06, + "loss": 1.7931, + "step": 19032000 + }, + { + "epoch": 94.29, + "learning_rate": 2.8700698067309744e-06, + "loss": 1.7708, + "step": 19032500 + }, + { + "epoch": 94.3, + "learning_rate": 2.8688312203048905e-06, + "loss": 1.8122, + "step": 19033000 + }, + { + "epoch": 94.3, + "learning_rate": 2.867592633878807e-06, + "loss": 1.8106, + "step": 19033500 + }, + { + "epoch": 94.3, + "learning_rate": 2.8663565246255754e-06, + "loss": 1.7678, + "step": 19034000 + }, + { + "epoch": 94.3, + "learning_rate": 2.865117938199492e-06, + "loss": 1.7791, + "step": 19034500 + }, + { + "epoch": 94.31, + "learning_rate": 2.8638793517734084e-06, + "loss": 1.7877, + "step": 19035000 + }, + { + "epoch": 94.31, + "learning_rate": 2.8626407653473245e-06, + "loss": 1.7883, + "step": 19035500 + }, + { + "epoch": 94.31, + "learning_rate": 2.861402178921241e-06, + "loss": 1.7926, + "step": 19036000 + }, + { + "epoch": 94.31, + "learning_rate": 2.8601660696680094e-06, + "loss": 1.7699, + "step": 19036500 + }, + { + "epoch": 94.32, + "learning_rate": 2.858929960414778e-06, + "loss": 1.7886, + "step": 19037000 + }, + { + "epoch": 94.32, + "learning_rate": 2.8576913739886943e-06, + "loss": 1.8086, + "step": 19037500 + }, + { + "epoch": 94.32, + "learning_rate": 2.8564527875626108e-06, + "loss": 1.7875, + "step": 19038000 + }, + { + "epoch": 94.32, + "learning_rate": 2.855214201136527e-06, + "loss": 1.7865, + "step": 19038500 + }, + { + "epoch": 94.33, + "learning_rate": 2.8539756147104433e-06, + "loss": 1.7723, + "step": 19039000 + }, + { + "epoch": 94.33, + "learning_rate": 2.85273702828436e-06, + "loss": 1.7876, + "step": 19039500 + }, + { + "epoch": 94.33, + "learning_rate": 2.8515009190311282e-06, + "loss": 1.789, + "step": 19040000 + }, + { + "epoch": 94.33, + "learning_rate": 2.8502623326050447e-06, + "loss": 1.7817, + "step": 19040500 + }, + { + "epoch": 94.34, + "learning_rate": 2.849023746178961e-06, + "loss": 1.776, + "step": 19041000 + }, + { + "epoch": 94.34, + "learning_rate": 2.8477851597528773e-06, + "loss": 1.7838, + "step": 19041500 + }, + { + "epoch": 94.34, + "learning_rate": 2.8465465733267934e-06, + "loss": 1.7771, + "step": 19042000 + }, + { + "epoch": 94.34, + "learning_rate": 2.84530798690071e-06, + "loss": 1.7854, + "step": 19042500 + }, + { + "epoch": 94.35, + "learning_rate": 2.8440718776474787e-06, + "loss": 1.7869, + "step": 19043000 + }, + { + "epoch": 94.35, + "learning_rate": 2.842833291221395e-06, + "loss": 1.7829, + "step": 19043500 + }, + { + "epoch": 94.35, + "learning_rate": 2.8415947047953113e-06, + "loss": 1.7785, + "step": 19044000 + }, + { + "epoch": 94.35, + "learning_rate": 2.8403561183692274e-06, + "loss": 1.7703, + "step": 19044500 + }, + { + "epoch": 94.36, + "learning_rate": 2.8391175319431443e-06, + "loss": 1.7743, + "step": 19045000 + }, + { + "epoch": 94.36, + "learning_rate": 2.8378789455170604e-06, + "loss": 1.7859, + "step": 19045500 + }, + { + "epoch": 94.36, + "learning_rate": 2.836640359090977e-06, + "loss": 1.7851, + "step": 19046000 + }, + { + "epoch": 94.36, + "learning_rate": 2.8354017726648934e-06, + "loss": 1.7743, + "step": 19046500 + }, + { + "epoch": 94.37, + "learning_rate": 2.8341631862388095e-06, + "loss": 1.7792, + "step": 19047000 + }, + { + "epoch": 94.37, + "learning_rate": 2.832924599812726e-06, + "loss": 1.785, + "step": 19047500 + }, + { + "epoch": 94.37, + "learning_rate": 2.831686013386642e-06, + "loss": 1.7986, + "step": 19048000 + }, + { + "epoch": 94.37, + "learning_rate": 2.830449904133411e-06, + "loss": 1.7905, + "step": 19048500 + }, + { + "epoch": 94.38, + "learning_rate": 2.829211317707327e-06, + "loss": 1.7828, + "step": 19049000 + }, + { + "epoch": 94.38, + "learning_rate": 2.8279727312812435e-06, + "loss": 1.7617, + "step": 19049500 + }, + { + "epoch": 94.38, + "learning_rate": 2.82673414485516e-06, + "loss": 1.784, + "step": 19050000 + }, + { + "epoch": 94.38, + "learning_rate": 2.825495558429076e-06, + "loss": 1.7956, + "step": 19050500 + }, + { + "epoch": 94.39, + "learning_rate": 2.8242569720029926e-06, + "loss": 1.7666, + "step": 19051000 + }, + { + "epoch": 94.39, + "learning_rate": 2.8230183855769086e-06, + "loss": 1.8154, + "step": 19051500 + }, + { + "epoch": 94.39, + "learning_rate": 2.8217822763236774e-06, + "loss": 1.8179, + "step": 19052000 + }, + { + "epoch": 94.39, + "learning_rate": 2.820546167070446e-06, + "loss": 1.7849, + "step": 19052500 + }, + { + "epoch": 94.4, + "learning_rate": 2.8193075806443623e-06, + "loss": 1.7849, + "step": 19053000 + }, + { + "epoch": 94.4, + "learning_rate": 2.818068994218279e-06, + "loss": 1.7889, + "step": 19053500 + }, + { + "epoch": 94.4, + "learning_rate": 2.8168328849650472e-06, + "loss": 1.7754, + "step": 19054000 + }, + { + "epoch": 94.4, + "learning_rate": 2.8155942985389637e-06, + "loss": 1.8015, + "step": 19054500 + }, + { + "epoch": 94.41, + "learning_rate": 2.81435571211288e-06, + "loss": 1.7805, + "step": 19055000 + }, + { + "epoch": 94.41, + "learning_rate": 2.8131171256867963e-06, + "loss": 1.8082, + "step": 19055500 + }, + { + "epoch": 94.41, + "learning_rate": 2.8118785392607124e-06, + "loss": 1.7813, + "step": 19056000 + }, + { + "epoch": 94.41, + "learning_rate": 2.810639952834629e-06, + "loss": 1.8022, + "step": 19056500 + }, + { + "epoch": 94.41, + "learning_rate": 2.8094013664085454e-06, + "loss": 1.7762, + "step": 19057000 + }, + { + "epoch": 94.42, + "learning_rate": 2.808162779982462e-06, + "loss": 1.7979, + "step": 19057500 + }, + { + "epoch": 94.42, + "learning_rate": 2.8069266707292303e-06, + "loss": 1.764, + "step": 19058000 + }, + { + "epoch": 94.42, + "learning_rate": 2.8056880843031464e-06, + "loss": 1.7663, + "step": 19058500 + }, + { + "epoch": 94.42, + "learning_rate": 2.8044494978770633e-06, + "loss": 1.7839, + "step": 19059000 + }, + { + "epoch": 94.43, + "learning_rate": 2.8032109114509794e-06, + "loss": 1.7816, + "step": 19059500 + }, + { + "epoch": 94.43, + "learning_rate": 2.801972325024896e-06, + "loss": 1.791, + "step": 19060000 + }, + { + "epoch": 94.43, + "learning_rate": 2.8007362157716643e-06, + "loss": 1.8139, + "step": 19060500 + }, + { + "epoch": 94.43, + "learning_rate": 2.7994976293455808e-06, + "loss": 1.7884, + "step": 19061000 + }, + { + "epoch": 94.44, + "learning_rate": 2.798259042919497e-06, + "loss": 1.7868, + "step": 19061500 + }, + { + "epoch": 94.44, + "learning_rate": 2.7970204564934134e-06, + "loss": 1.7891, + "step": 19062000 + }, + { + "epoch": 94.44, + "learning_rate": 2.79578187006733e-06, + "loss": 1.7903, + "step": 19062500 + }, + { + "epoch": 94.44, + "learning_rate": 2.794543283641246e-06, + "loss": 1.7806, + "step": 19063000 + }, + { + "epoch": 94.45, + "learning_rate": 2.7933046972151625e-06, + "loss": 1.7768, + "step": 19063500 + }, + { + "epoch": 94.45, + "learning_rate": 2.7920661107890785e-06, + "loss": 1.8142, + "step": 19064000 + }, + { + "epoch": 94.45, + "learning_rate": 2.790827524362995e-06, + "loss": 1.7767, + "step": 19064500 + }, + { + "epoch": 94.45, + "learning_rate": 2.7895889379369115e-06, + "loss": 1.7811, + "step": 19065000 + }, + { + "epoch": 94.46, + "learning_rate": 2.7883503515108276e-06, + "loss": 1.7939, + "step": 19065500 + }, + { + "epoch": 94.46, + "learning_rate": 2.787111765084744e-06, + "loss": 1.7768, + "step": 19066000 + }, + { + "epoch": 94.46, + "learning_rate": 2.7858731786586606e-06, + "loss": 1.7838, + "step": 19066500 + }, + { + "epoch": 94.46, + "learning_rate": 2.784634592232577e-06, + "loss": 1.8244, + "step": 19067000 + }, + { + "epoch": 94.47, + "learning_rate": 2.7833960058064932e-06, + "loss": 1.8021, + "step": 19067500 + }, + { + "epoch": 94.47, + "learning_rate": 2.7821574193804097e-06, + "loss": 1.7898, + "step": 19068000 + }, + { + "epoch": 94.47, + "learning_rate": 2.7809188329543262e-06, + "loss": 1.7873, + "step": 19068500 + }, + { + "epoch": 94.47, + "learning_rate": 2.7796802465282423e-06, + "loss": 1.7725, + "step": 19069000 + }, + { + "epoch": 94.48, + "learning_rate": 2.778444137275011e-06, + "loss": 1.753, + "step": 19069500 + }, + { + "epoch": 94.48, + "learning_rate": 2.777205550848927e-06, + "loss": 1.788, + "step": 19070000 + }, + { + "epoch": 94.48, + "learning_rate": 2.7759669644228437e-06, + "loss": 1.7931, + "step": 19070500 + }, + { + "epoch": 94.48, + "learning_rate": 2.77472837799676e-06, + "loss": 1.7979, + "step": 19071000 + }, + { + "epoch": 94.49, + "learning_rate": 2.7734922687435286e-06, + "loss": 1.7824, + "step": 19071500 + }, + { + "epoch": 94.49, + "learning_rate": 2.772253682317445e-06, + "loss": 1.7889, + "step": 19072000 + }, + { + "epoch": 94.49, + "learning_rate": 2.771015095891361e-06, + "loss": 1.7819, + "step": 19072500 + }, + { + "epoch": 94.49, + "learning_rate": 2.7697765094652777e-06, + "loss": 1.7773, + "step": 19073000 + }, + { + "epoch": 94.5, + "learning_rate": 2.7685379230391938e-06, + "loss": 1.7764, + "step": 19073500 + }, + { + "epoch": 94.5, + "learning_rate": 2.7673018137859626e-06, + "loss": 1.7796, + "step": 19074000 + }, + { + "epoch": 94.5, + "learning_rate": 2.7660632273598787e-06, + "loss": 1.789, + "step": 19074500 + }, + { + "epoch": 94.5, + "learning_rate": 2.764824640933795e-06, + "loss": 1.7888, + "step": 19075000 + }, + { + "epoch": 94.51, + "learning_rate": 2.7635860545077117e-06, + "loss": 1.7988, + "step": 19075500 + }, + { + "epoch": 94.51, + "learning_rate": 2.7623474680816278e-06, + "loss": 1.7856, + "step": 19076000 + }, + { + "epoch": 94.51, + "learning_rate": 2.7611088816555443e-06, + "loss": 1.7791, + "step": 19076500 + }, + { + "epoch": 94.51, + "learning_rate": 2.7598702952294603e-06, + "loss": 1.8048, + "step": 19077000 + }, + { + "epoch": 94.52, + "learning_rate": 2.7586317088033773e-06, + "loss": 1.7938, + "step": 19077500 + }, + { + "epoch": 94.52, + "learning_rate": 2.7573931223772933e-06, + "loss": 1.7729, + "step": 19078000 + }, + { + "epoch": 94.52, + "learning_rate": 2.75615453595121e-06, + "loss": 1.7658, + "step": 19078500 + }, + { + "epoch": 94.52, + "learning_rate": 2.7549184266979782e-06, + "loss": 1.7645, + "step": 19079000 + }, + { + "epoch": 94.53, + "learning_rate": 2.7536798402718947e-06, + "loss": 1.7661, + "step": 19079500 + }, + { + "epoch": 94.53, + "learning_rate": 2.752443731018663e-06, + "loss": 1.8053, + "step": 19080000 + }, + { + "epoch": 94.53, + "learning_rate": 2.7512051445925796e-06, + "loss": 1.7842, + "step": 19080500 + }, + { + "epoch": 94.53, + "learning_rate": 2.749966558166496e-06, + "loss": 1.769, + "step": 19081000 + }, + { + "epoch": 94.54, + "learning_rate": 2.7487279717404122e-06, + "loss": 1.7903, + "step": 19081500 + }, + { + "epoch": 94.54, + "learning_rate": 2.7474893853143287e-06, + "loss": 1.7793, + "step": 19082000 + }, + { + "epoch": 94.54, + "learning_rate": 2.746250798888245e-06, + "loss": 1.7721, + "step": 19082500 + }, + { + "epoch": 94.54, + "learning_rate": 2.7450122124621613e-06, + "loss": 1.7806, + "step": 19083000 + }, + { + "epoch": 94.55, + "learning_rate": 2.743773626036078e-06, + "loss": 1.7845, + "step": 19083500 + }, + { + "epoch": 94.55, + "learning_rate": 2.742537516782846e-06, + "loss": 1.8153, + "step": 19084000 + }, + { + "epoch": 94.55, + "learning_rate": 2.7412989303567627e-06, + "loss": 1.7922, + "step": 19084500 + }, + { + "epoch": 94.55, + "learning_rate": 2.740062821103531e-06, + "loss": 1.7773, + "step": 19085000 + }, + { + "epoch": 94.56, + "learning_rate": 2.7388242346774476e-06, + "loss": 1.7878, + "step": 19085500 + }, + { + "epoch": 94.56, + "learning_rate": 2.7375856482513637e-06, + "loss": 1.7595, + "step": 19086000 + }, + { + "epoch": 94.56, + "learning_rate": 2.73634706182528e-06, + "loss": 1.7657, + "step": 19086500 + }, + { + "epoch": 94.56, + "learning_rate": 2.7351084753991967e-06, + "loss": 1.8135, + "step": 19087000 + }, + { + "epoch": 94.57, + "learning_rate": 2.7338698889731128e-06, + "loss": 1.7815, + "step": 19087500 + }, + { + "epoch": 94.57, + "learning_rate": 2.7326313025470293e-06, + "loss": 1.7969, + "step": 19088000 + }, + { + "epoch": 94.57, + "learning_rate": 2.7313927161209454e-06, + "loss": 1.7668, + "step": 19088500 + }, + { + "epoch": 94.57, + "learning_rate": 2.730154129694862e-06, + "loss": 1.7799, + "step": 19089000 + }, + { + "epoch": 94.58, + "learning_rate": 2.7289155432687784e-06, + "loss": 1.7694, + "step": 19089500 + }, + { + "epoch": 94.58, + "learning_rate": 2.727676956842695e-06, + "loss": 1.7835, + "step": 19090000 + }, + { + "epoch": 94.58, + "learning_rate": 2.7264383704166114e-06, + "loss": 1.7843, + "step": 19090500 + }, + { + "epoch": 94.58, + "learning_rate": 2.7252022611633793e-06, + "loss": 1.8114, + "step": 19091000 + }, + { + "epoch": 94.59, + "learning_rate": 2.723966151910148e-06, + "loss": 1.7716, + "step": 19091500 + }, + { + "epoch": 94.59, + "learning_rate": 2.7227275654840642e-06, + "loss": 1.8063, + "step": 19092000 + }, + { + "epoch": 94.59, + "learning_rate": 2.7214889790579807e-06, + "loss": 1.7947, + "step": 19092500 + }, + { + "epoch": 94.59, + "learning_rate": 2.7202503926318972e-06, + "loss": 1.7934, + "step": 19093000 + }, + { + "epoch": 94.6, + "learning_rate": 2.7190118062058137e-06, + "loss": 1.7819, + "step": 19093500 + }, + { + "epoch": 94.6, + "learning_rate": 2.71777321977973e-06, + "loss": 1.7793, + "step": 19094000 + }, + { + "epoch": 94.6, + "learning_rate": 2.7165371105264986e-06, + "loss": 1.7779, + "step": 19094500 + }, + { + "epoch": 94.6, + "learning_rate": 2.715301001273267e-06, + "loss": 1.7786, + "step": 19095000 + }, + { + "epoch": 94.61, + "learning_rate": 2.714062414847183e-06, + "loss": 1.7944, + "step": 19095500 + }, + { + "epoch": 94.61, + "learning_rate": 2.7128238284210996e-06, + "loss": 1.8044, + "step": 19096000 + }, + { + "epoch": 94.61, + "learning_rate": 2.711585241995016e-06, + "loss": 1.789, + "step": 19096500 + }, + { + "epoch": 94.61, + "learning_rate": 2.7103466555689326e-06, + "loss": 1.7962, + "step": 19097000 + }, + { + "epoch": 94.62, + "learning_rate": 2.7091080691428487e-06, + "loss": 1.7924, + "step": 19097500 + }, + { + "epoch": 94.62, + "learning_rate": 2.707869482716765e-06, + "loss": 1.7919, + "step": 19098000 + }, + { + "epoch": 94.62, + "learning_rate": 2.7066308962906817e-06, + "loss": 1.7673, + "step": 19098500 + }, + { + "epoch": 94.62, + "learning_rate": 2.70539478703745e-06, + "loss": 1.7832, + "step": 19099000 + }, + { + "epoch": 94.63, + "learning_rate": 2.7041562006113666e-06, + "loss": 1.7558, + "step": 19099500 + }, + { + "epoch": 94.63, + "learning_rate": 2.7029176141852827e-06, + "loss": 1.7939, + "step": 19100000 + }, + { + "epoch": 94.63, + "learning_rate": 2.701679027759199e-06, + "loss": 1.7735, + "step": 19100500 + }, + { + "epoch": 94.63, + "learning_rate": 2.7004404413331153e-06, + "loss": 1.7668, + "step": 19101000 + }, + { + "epoch": 94.64, + "learning_rate": 2.699204332079884e-06, + "loss": 1.7858, + "step": 19101500 + }, + { + "epoch": 94.64, + "learning_rate": 2.6979657456538006e-06, + "loss": 1.7556, + "step": 19102000 + }, + { + "epoch": 94.64, + "learning_rate": 2.6967271592277167e-06, + "loss": 1.7833, + "step": 19102500 + }, + { + "epoch": 94.64, + "learning_rate": 2.695488572801633e-06, + "loss": 1.7727, + "step": 19103000 + }, + { + "epoch": 94.65, + "learning_rate": 2.6942499863755492e-06, + "loss": 1.8078, + "step": 19103500 + }, + { + "epoch": 94.65, + "learning_rate": 2.6930113999494657e-06, + "loss": 1.777, + "step": 19104000 + }, + { + "epoch": 94.65, + "learning_rate": 2.691775290696234e-06, + "loss": 1.7803, + "step": 19104500 + }, + { + "epoch": 94.65, + "learning_rate": 2.6905367042701506e-06, + "loss": 1.768, + "step": 19105000 + }, + { + "epoch": 94.66, + "learning_rate": 2.689298117844067e-06, + "loss": 1.7802, + "step": 19105500 + }, + { + "epoch": 94.66, + "learning_rate": 2.6880620085908355e-06, + "loss": 1.7846, + "step": 19106000 + }, + { + "epoch": 94.66, + "learning_rate": 2.686823422164752e-06, + "loss": 1.8075, + "step": 19106500 + }, + { + "epoch": 94.66, + "learning_rate": 2.685584835738668e-06, + "loss": 1.7818, + "step": 19107000 + }, + { + "epoch": 94.67, + "learning_rate": 2.6843462493125846e-06, + "loss": 1.7729, + "step": 19107500 + }, + { + "epoch": 94.67, + "learning_rate": 2.6831076628865007e-06, + "loss": 1.787, + "step": 19108000 + }, + { + "epoch": 94.67, + "learning_rate": 2.681869076460417e-06, + "loss": 1.7819, + "step": 19108500 + }, + { + "epoch": 94.67, + "learning_rate": 2.6806304900343337e-06, + "loss": 1.7744, + "step": 19109000 + }, + { + "epoch": 94.68, + "learning_rate": 2.67939190360825e-06, + "loss": 1.7872, + "step": 19109500 + }, + { + "epoch": 94.68, + "learning_rate": 2.6781533171821667e-06, + "loss": 1.7856, + "step": 19110000 + }, + { + "epoch": 94.68, + "learning_rate": 2.676917207928935e-06, + "loss": 1.7854, + "step": 19110500 + }, + { + "epoch": 94.68, + "learning_rate": 2.6756786215028516e-06, + "loss": 1.7898, + "step": 19111000 + }, + { + "epoch": 94.68, + "learning_rate": 2.6744400350767677e-06, + "loss": 1.7831, + "step": 19111500 + }, + { + "epoch": 94.69, + "learning_rate": 2.673201448650684e-06, + "loss": 1.7926, + "step": 19112000 + }, + { + "epoch": 94.69, + "learning_rate": 2.6719628622246003e-06, + "loss": 1.7796, + "step": 19112500 + }, + { + "epoch": 94.69, + "learning_rate": 2.670726752971369e-06, + "loss": 1.7571, + "step": 19113000 + }, + { + "epoch": 94.69, + "learning_rate": 2.6694881665452856e-06, + "loss": 1.7926, + "step": 19113500 + }, + { + "epoch": 94.7, + "learning_rate": 2.6682495801192017e-06, + "loss": 1.7767, + "step": 19114000 + }, + { + "epoch": 94.7, + "learning_rate": 2.667010993693118e-06, + "loss": 1.786, + "step": 19114500 + }, + { + "epoch": 94.7, + "learning_rate": 2.6657724072670343e-06, + "loss": 1.7915, + "step": 19115000 + }, + { + "epoch": 94.7, + "learning_rate": 2.664536298013803e-06, + "loss": 1.7664, + "step": 19115500 + }, + { + "epoch": 94.71, + "learning_rate": 2.6633001887605714e-06, + "loss": 1.7858, + "step": 19116000 + }, + { + "epoch": 94.71, + "learning_rate": 2.662061602334488e-06, + "loss": 1.7976, + "step": 19116500 + }, + { + "epoch": 94.71, + "learning_rate": 2.660823015908404e-06, + "loss": 1.7741, + "step": 19117000 + }, + { + "epoch": 94.71, + "learning_rate": 2.659586906655173e-06, + "loss": 1.795, + "step": 19117500 + }, + { + "epoch": 94.72, + "learning_rate": 2.6583483202290893e-06, + "loss": 1.7809, + "step": 19118000 + }, + { + "epoch": 94.72, + "learning_rate": 2.6571097338030054e-06, + "loss": 1.7669, + "step": 19118500 + }, + { + "epoch": 94.72, + "learning_rate": 2.655871147376922e-06, + "loss": 1.7951, + "step": 19119000 + }, + { + "epoch": 94.72, + "learning_rate": 2.654632560950838e-06, + "loss": 1.7775, + "step": 19119500 + }, + { + "epoch": 94.73, + "learning_rate": 2.6533939745247545e-06, + "loss": 1.7972, + "step": 19120000 + }, + { + "epoch": 94.73, + "learning_rate": 2.652155388098671e-06, + "loss": 1.7852, + "step": 19120500 + }, + { + "epoch": 94.73, + "learning_rate": 2.650916801672587e-06, + "loss": 1.777, + "step": 19121000 + }, + { + "epoch": 94.73, + "learning_rate": 2.6496782152465036e-06, + "loss": 1.8212, + "step": 19121500 + }, + { + "epoch": 94.74, + "learning_rate": 2.6484396288204197e-06, + "loss": 1.7855, + "step": 19122000 + }, + { + "epoch": 94.74, + "learning_rate": 2.647201042394336e-06, + "loss": 1.7689, + "step": 19122500 + }, + { + "epoch": 94.74, + "learning_rate": 2.6459624559682527e-06, + "loss": 1.8029, + "step": 19123000 + }, + { + "epoch": 94.74, + "learning_rate": 2.644723869542169e-06, + "loss": 1.7764, + "step": 19123500 + }, + { + "epoch": 94.75, + "learning_rate": 2.6434852831160853e-06, + "loss": 1.7802, + "step": 19124000 + }, + { + "epoch": 94.75, + "learning_rate": 2.642246696690002e-06, + "loss": 1.778, + "step": 19124500 + }, + { + "epoch": 94.75, + "learning_rate": 2.6410081102639183e-06, + "loss": 1.7692, + "step": 19125000 + }, + { + "epoch": 94.75, + "learning_rate": 2.6397695238378344e-06, + "loss": 1.779, + "step": 19125500 + }, + { + "epoch": 94.76, + "learning_rate": 2.638530937411751e-06, + "loss": 1.776, + "step": 19126000 + }, + { + "epoch": 94.76, + "learning_rate": 2.637292350985667e-06, + "loss": 1.7913, + "step": 19126500 + }, + { + "epoch": 94.76, + "learning_rate": 2.6360562417324358e-06, + "loss": 1.8007, + "step": 19127000 + }, + { + "epoch": 94.76, + "learning_rate": 2.6348176553063523e-06, + "loss": 1.794, + "step": 19127500 + }, + { + "epoch": 94.77, + "learning_rate": 2.6335790688802684e-06, + "loss": 1.7763, + "step": 19128000 + }, + { + "epoch": 94.77, + "learning_rate": 2.632340482454185e-06, + "loss": 1.7839, + "step": 19128500 + }, + { + "epoch": 94.77, + "learning_rate": 2.6311043732009532e-06, + "loss": 1.7907, + "step": 19129000 + }, + { + "epoch": 94.77, + "learning_rate": 2.6298657867748698e-06, + "loss": 1.7654, + "step": 19129500 + }, + { + "epoch": 94.78, + "learning_rate": 2.628627200348786e-06, + "loss": 1.7717, + "step": 19130000 + }, + { + "epoch": 94.78, + "learning_rate": 2.6273886139227023e-06, + "loss": 1.7832, + "step": 19130500 + }, + { + "epoch": 94.78, + "learning_rate": 2.626150027496619e-06, + "loss": 1.8032, + "step": 19131000 + }, + { + "epoch": 94.78, + "learning_rate": 2.624911441070535e-06, + "loss": 1.8002, + "step": 19131500 + }, + { + "epoch": 94.79, + "learning_rate": 2.623672854644452e-06, + "loss": 1.7984, + "step": 19132000 + }, + { + "epoch": 94.79, + "learning_rate": 2.622434268218368e-06, + "loss": 1.7995, + "step": 19132500 + }, + { + "epoch": 94.79, + "learning_rate": 2.6211956817922844e-06, + "loss": 1.7806, + "step": 19133000 + }, + { + "epoch": 94.79, + "learning_rate": 2.6199570953662005e-06, + "loss": 1.766, + "step": 19133500 + }, + { + "epoch": 94.8, + "learning_rate": 2.618718508940117e-06, + "loss": 1.7993, + "step": 19134000 + }, + { + "epoch": 94.8, + "learning_rate": 2.6174823996868854e-06, + "loss": 1.7861, + "step": 19134500 + }, + { + "epoch": 94.8, + "learning_rate": 2.616243813260802e-06, + "loss": 1.7629, + "step": 19135000 + }, + { + "epoch": 94.8, + "learning_rate": 2.6150052268347184e-06, + "loss": 1.7818, + "step": 19135500 + }, + { + "epoch": 94.81, + "learning_rate": 2.613769117581487e-06, + "loss": 1.7672, + "step": 19136000 + }, + { + "epoch": 94.81, + "learning_rate": 2.6125305311554033e-06, + "loss": 1.7833, + "step": 19136500 + }, + { + "epoch": 94.81, + "learning_rate": 2.6112919447293194e-06, + "loss": 1.8134, + "step": 19137000 + }, + { + "epoch": 94.81, + "learning_rate": 2.610055835476088e-06, + "loss": 1.7798, + "step": 19137500 + }, + { + "epoch": 94.82, + "learning_rate": 2.6088172490500043e-06, + "loss": 1.7839, + "step": 19138000 + }, + { + "epoch": 94.82, + "learning_rate": 2.6075786626239208e-06, + "loss": 1.78, + "step": 19138500 + }, + { + "epoch": 94.82, + "learning_rate": 2.6063400761978373e-06, + "loss": 1.7945, + "step": 19139000 + }, + { + "epoch": 94.82, + "learning_rate": 2.6051014897717534e-06, + "loss": 1.7941, + "step": 19139500 + }, + { + "epoch": 94.83, + "learning_rate": 2.60386290334567e-06, + "loss": 1.7754, + "step": 19140000 + }, + { + "epoch": 94.83, + "learning_rate": 2.602624316919586e-06, + "loss": 1.7676, + "step": 19140500 + }, + { + "epoch": 94.83, + "learning_rate": 2.6013857304935025e-06, + "loss": 1.7822, + "step": 19141000 + }, + { + "epoch": 94.83, + "learning_rate": 2.600147144067419e-06, + "loss": 1.8139, + "step": 19141500 + }, + { + "epoch": 94.84, + "learning_rate": 2.598908557641335e-06, + "loss": 1.7825, + "step": 19142000 + }, + { + "epoch": 94.84, + "learning_rate": 2.597672448388104e-06, + "loss": 1.7897, + "step": 19142500 + }, + { + "epoch": 94.84, + "learning_rate": 2.59643386196202e-06, + "loss": 1.7896, + "step": 19143000 + }, + { + "epoch": 94.84, + "learning_rate": 2.5951977527087887e-06, + "loss": 1.7815, + "step": 19143500 + }, + { + "epoch": 94.85, + "learning_rate": 2.593959166282705e-06, + "loss": 1.8005, + "step": 19144000 + }, + { + "epoch": 94.85, + "learning_rate": 2.5927205798566213e-06, + "loss": 1.7781, + "step": 19144500 + }, + { + "epoch": 94.85, + "learning_rate": 2.5914819934305374e-06, + "loss": 1.7633, + "step": 19145000 + }, + { + "epoch": 94.85, + "learning_rate": 2.590243407004454e-06, + "loss": 1.7878, + "step": 19145500 + }, + { + "epoch": 94.86, + "learning_rate": 2.5890072977512227e-06, + "loss": 1.7861, + "step": 19146000 + }, + { + "epoch": 94.86, + "learning_rate": 2.587768711325139e-06, + "loss": 1.8038, + "step": 19146500 + }, + { + "epoch": 94.86, + "learning_rate": 2.5865301248990553e-06, + "loss": 1.7828, + "step": 19147000 + }, + { + "epoch": 94.86, + "learning_rate": 2.5852915384729714e-06, + "loss": 1.7884, + "step": 19147500 + }, + { + "epoch": 94.87, + "learning_rate": 2.5840529520468883e-06, + "loss": 1.7782, + "step": 19148000 + }, + { + "epoch": 94.87, + "learning_rate": 2.5828143656208044e-06, + "loss": 1.7777, + "step": 19148500 + }, + { + "epoch": 94.87, + "learning_rate": 2.581575779194721e-06, + "loss": 1.7758, + "step": 19149000 + }, + { + "epoch": 94.87, + "learning_rate": 2.580337192768637e-06, + "loss": 1.7844, + "step": 19149500 + }, + { + "epoch": 94.88, + "learning_rate": 2.579101083515406e-06, + "loss": 1.8014, + "step": 19150000 + }, + { + "epoch": 94.88, + "learning_rate": 2.5778624970893223e-06, + "loss": 1.8092, + "step": 19150500 + }, + { + "epoch": 94.88, + "learning_rate": 2.5766239106632384e-06, + "loss": 1.7891, + "step": 19151000 + }, + { + "epoch": 94.88, + "learning_rate": 2.575385324237155e-06, + "loss": 1.7632, + "step": 19151500 + }, + { + "epoch": 94.89, + "learning_rate": 2.574146737811071e-06, + "loss": 1.7766, + "step": 19152000 + }, + { + "epoch": 94.89, + "learning_rate": 2.5729081513849875e-06, + "loss": 1.782, + "step": 19152500 + }, + { + "epoch": 94.89, + "learning_rate": 2.571669564958904e-06, + "loss": 1.7702, + "step": 19153000 + }, + { + "epoch": 94.89, + "learning_rate": 2.57043097853282e-06, + "loss": 1.7791, + "step": 19153500 + }, + { + "epoch": 94.9, + "learning_rate": 2.569194869279589e-06, + "loss": 1.7834, + "step": 19154000 + }, + { + "epoch": 94.9, + "learning_rate": 2.567956282853505e-06, + "loss": 1.7946, + "step": 19154500 + }, + { + "epoch": 94.9, + "learning_rate": 2.5667201736002738e-06, + "loss": 1.7825, + "step": 19155000 + }, + { + "epoch": 94.9, + "learning_rate": 2.56548158717419e-06, + "loss": 1.7861, + "step": 19155500 + }, + { + "epoch": 94.91, + "learning_rate": 2.5642430007481063e-06, + "loss": 1.7611, + "step": 19156000 + }, + { + "epoch": 94.91, + "learning_rate": 2.5630044143220224e-06, + "loss": 1.8016, + "step": 19156500 + }, + { + "epoch": 94.91, + "learning_rate": 2.561765827895939e-06, + "loss": 1.7969, + "step": 19157000 + }, + { + "epoch": 94.91, + "learning_rate": 2.5605272414698554e-06, + "loss": 1.7756, + "step": 19157500 + }, + { + "epoch": 94.92, + "learning_rate": 2.5592886550437715e-06, + "loss": 1.7743, + "step": 19158000 + }, + { + "epoch": 94.92, + "learning_rate": 2.5580500686176884e-06, + "loss": 1.7728, + "step": 19158500 + }, + { + "epoch": 94.92, + "learning_rate": 2.5568114821916045e-06, + "loss": 1.7987, + "step": 19159000 + }, + { + "epoch": 94.92, + "learning_rate": 2.555572895765521e-06, + "loss": 1.755, + "step": 19159500 + }, + { + "epoch": 94.93, + "learning_rate": 2.554336786512289e-06, + "loss": 1.7985, + "step": 19160000 + }, + { + "epoch": 94.93, + "learning_rate": 2.553098200086206e-06, + "loss": 1.7689, + "step": 19160500 + }, + { + "epoch": 94.93, + "learning_rate": 2.551859613660122e-06, + "loss": 1.7855, + "step": 19161000 + }, + { + "epoch": 94.93, + "learning_rate": 2.5506210272340385e-06, + "loss": 1.7808, + "step": 19161500 + }, + { + "epoch": 94.94, + "learning_rate": 2.5493849179808073e-06, + "loss": 1.779, + "step": 19162000 + }, + { + "epoch": 94.94, + "learning_rate": 2.5481463315547234e-06, + "loss": 1.8109, + "step": 19162500 + }, + { + "epoch": 94.94, + "learning_rate": 2.5469102223014918e-06, + "loss": 1.7911, + "step": 19163000 + }, + { + "epoch": 94.94, + "learning_rate": 2.5456716358754083e-06, + "loss": 1.8005, + "step": 19163500 + }, + { + "epoch": 94.95, + "learning_rate": 2.544433049449325e-06, + "loss": 1.7797, + "step": 19164000 + }, + { + "epoch": 94.95, + "learning_rate": 2.543194463023241e-06, + "loss": 1.7902, + "step": 19164500 + }, + { + "epoch": 94.95, + "learning_rate": 2.5419558765971574e-06, + "loss": 1.7855, + "step": 19165000 + }, + { + "epoch": 94.95, + "learning_rate": 2.540717290171074e-06, + "loss": 1.7704, + "step": 19165500 + }, + { + "epoch": 94.95, + "learning_rate": 2.53947870374499e-06, + "loss": 1.7872, + "step": 19166000 + }, + { + "epoch": 94.96, + "learning_rate": 2.5382401173189065e-06, + "loss": 1.7741, + "step": 19166500 + }, + { + "epoch": 94.96, + "learning_rate": 2.5370015308928226e-06, + "loss": 1.7751, + "step": 19167000 + }, + { + "epoch": 94.96, + "learning_rate": 2.535762944466739e-06, + "loss": 1.8031, + "step": 19167500 + }, + { + "epoch": 94.96, + "learning_rate": 2.5345243580406556e-06, + "loss": 1.7732, + "step": 19168000 + }, + { + "epoch": 94.97, + "learning_rate": 2.533288248787424e-06, + "loss": 1.7678, + "step": 19168500 + }, + { + "epoch": 94.97, + "learning_rate": 2.5320496623613405e-06, + "loss": 1.7752, + "step": 19169000 + }, + { + "epoch": 94.97, + "learning_rate": 2.5308110759352565e-06, + "loss": 1.7934, + "step": 19169500 + }, + { + "epoch": 94.97, + "learning_rate": 2.529572489509173e-06, + "loss": 1.7499, + "step": 19170000 + }, + { + "epoch": 94.98, + "learning_rate": 2.528333903083089e-06, + "loss": 1.7852, + "step": 19170500 + }, + { + "epoch": 94.98, + "learning_rate": 2.527095316657006e-06, + "loss": 1.7836, + "step": 19171000 + }, + { + "epoch": 94.98, + "learning_rate": 2.5258592074037744e-06, + "loss": 1.7839, + "step": 19171500 + }, + { + "epoch": 94.98, + "learning_rate": 2.5246206209776905e-06, + "loss": 1.7973, + "step": 19172000 + }, + { + "epoch": 94.99, + "learning_rate": 2.523382034551607e-06, + "loss": 1.783, + "step": 19172500 + }, + { + "epoch": 94.99, + "learning_rate": 2.5221434481255235e-06, + "loss": 1.7964, + "step": 19173000 + }, + { + "epoch": 94.99, + "learning_rate": 2.52090486169944e-06, + "loss": 1.781, + "step": 19173500 + }, + { + "epoch": 94.99, + "learning_rate": 2.519666275273356e-06, + "loss": 1.7983, + "step": 19174000 + }, + { + "epoch": 95.0, + "learning_rate": 2.518430166020125e-06, + "loss": 1.7738, + "step": 19174500 + }, + { + "epoch": 95.0, + "learning_rate": 2.517191579594041e-06, + "loss": 1.7774, + "step": 19175000 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.6867611295760832, + "eval_accuracy_mlm": 0.6480042502386186, + "eval_accuracy_nsp": 0.8695947191509223, + "eval_loss": 2.3160958290100098, + "eval_runtime": 147.1337, + "eval_samples_per_second": 1732.839, + "eval_steps_per_second": 72.206, + "step": 19175085 + }, + { + "epoch": 95.0, + "learning_rate": 2.5159554703408094e-06, + "loss": 1.7699, + "step": 19175500 + }, + { + "epoch": 95.0, + "learning_rate": 2.514716883914726e-06, + "loss": 1.7926, + "step": 19176000 + }, + { + "epoch": 95.01, + "learning_rate": 2.5134782974886424e-06, + "loss": 1.766, + "step": 19176500 + }, + { + "epoch": 95.01, + "learning_rate": 2.512239711062559e-06, + "loss": 1.7971, + "step": 19177000 + }, + { + "epoch": 95.01, + "learning_rate": 2.511001124636475e-06, + "loss": 1.7513, + "step": 19177500 + }, + { + "epoch": 95.01, + "learning_rate": 2.5097625382103915e-06, + "loss": 1.7864, + "step": 19178000 + }, + { + "epoch": 95.02, + "learning_rate": 2.50852642895716e-06, + "loss": 1.7785, + "step": 19178500 + }, + { + "epoch": 95.02, + "learning_rate": 2.5072878425310764e-06, + "loss": 1.7682, + "step": 19179000 + }, + { + "epoch": 95.02, + "learning_rate": 2.5060492561049925e-06, + "loss": 1.79, + "step": 19179500 + }, + { + "epoch": 95.02, + "learning_rate": 2.504810669678909e-06, + "loss": 1.7668, + "step": 19180000 + }, + { + "epoch": 95.03, + "learning_rate": 2.5035720832528255e-06, + "loss": 1.7786, + "step": 19180500 + }, + { + "epoch": 95.03, + "learning_rate": 2.5023334968267415e-06, + "loss": 1.8015, + "step": 19181000 + }, + { + "epoch": 95.03, + "learning_rate": 2.5010973875735104e-06, + "loss": 1.7668, + "step": 19181500 + }, + { + "epoch": 95.03, + "learning_rate": 2.4998588011474264e-06, + "loss": 1.7944, + "step": 19182000 + }, + { + "epoch": 95.04, + "learning_rate": 2.498620214721343e-06, + "loss": 1.7903, + "step": 19182500 + }, + { + "epoch": 95.04, + "learning_rate": 2.4973816282952594e-06, + "loss": 1.7741, + "step": 19183000 + }, + { + "epoch": 95.04, + "learning_rate": 2.4961430418691755e-06, + "loss": 1.7717, + "step": 19183500 + }, + { + "epoch": 95.04, + "learning_rate": 2.494904455443092e-06, + "loss": 1.7897, + "step": 19184000 + }, + { + "epoch": 95.05, + "learning_rate": 2.493665869017008e-06, + "loss": 1.7654, + "step": 19184500 + }, + { + "epoch": 95.05, + "learning_rate": 2.4924272825909246e-06, + "loss": 1.7832, + "step": 19185000 + }, + { + "epoch": 95.05, + "learning_rate": 2.491188696164841e-06, + "loss": 1.7861, + "step": 19185500 + }, + { + "epoch": 95.05, + "learning_rate": 2.4899501097387576e-06, + "loss": 1.8033, + "step": 19186000 + }, + { + "epoch": 95.06, + "learning_rate": 2.4887115233126737e-06, + "loss": 1.7701, + "step": 19186500 + }, + { + "epoch": 95.06, + "learning_rate": 2.4874754140594425e-06, + "loss": 1.7972, + "step": 19187000 + }, + { + "epoch": 95.06, + "learning_rate": 2.486236827633359e-06, + "loss": 1.7841, + "step": 19187500 + }, + { + "epoch": 95.06, + "learning_rate": 2.484998241207275e-06, + "loss": 1.7831, + "step": 19188000 + }, + { + "epoch": 95.07, + "learning_rate": 2.4837596547811916e-06, + "loss": 1.7622, + "step": 19188500 + }, + { + "epoch": 95.07, + "learning_rate": 2.4825210683551077e-06, + "loss": 1.7803, + "step": 19189000 + }, + { + "epoch": 95.07, + "learning_rate": 2.481282481929024e-06, + "loss": 1.7915, + "step": 19189500 + }, + { + "epoch": 95.07, + "learning_rate": 2.4800438955029407e-06, + "loss": 1.7947, + "step": 19190000 + }, + { + "epoch": 95.08, + "learning_rate": 2.4788053090768568e-06, + "loss": 1.7716, + "step": 19190500 + }, + { + "epoch": 95.08, + "learning_rate": 2.4775691998236256e-06, + "loss": 1.7854, + "step": 19191000 + }, + { + "epoch": 95.08, + "learning_rate": 2.4763306133975417e-06, + "loss": 1.7624, + "step": 19191500 + }, + { + "epoch": 95.08, + "learning_rate": 2.475092026971458e-06, + "loss": 1.7697, + "step": 19192000 + }, + { + "epoch": 95.09, + "learning_rate": 2.4738534405453743e-06, + "loss": 1.774, + "step": 19192500 + }, + { + "epoch": 95.09, + "learning_rate": 2.4726148541192908e-06, + "loss": 1.7863, + "step": 19193000 + }, + { + "epoch": 95.09, + "learning_rate": 2.471378744866059e-06, + "loss": 1.7803, + "step": 19193500 + }, + { + "epoch": 95.09, + "learning_rate": 2.4701401584399757e-06, + "loss": 1.799, + "step": 19194000 + }, + { + "epoch": 95.1, + "learning_rate": 2.468901572013892e-06, + "loss": 1.767, + "step": 19194500 + }, + { + "epoch": 95.1, + "learning_rate": 2.4676629855878082e-06, + "loss": 1.781, + "step": 19195000 + }, + { + "epoch": 95.1, + "learning_rate": 2.4664243991617247e-06, + "loss": 1.7648, + "step": 19195500 + }, + { + "epoch": 95.1, + "learning_rate": 2.4651858127356412e-06, + "loss": 1.7906, + "step": 19196000 + }, + { + "epoch": 95.11, + "learning_rate": 2.4639472263095578e-06, + "loss": 1.7788, + "step": 19196500 + }, + { + "epoch": 95.11, + "learning_rate": 2.462708639883474e-06, + "loss": 1.7864, + "step": 19197000 + }, + { + "epoch": 95.11, + "learning_rate": 2.4614700534573903e-06, + "loss": 1.7809, + "step": 19197500 + }, + { + "epoch": 95.11, + "learning_rate": 2.460231467031307e-06, + "loss": 1.7882, + "step": 19198000 + }, + { + "epoch": 95.12, + "learning_rate": 2.4589953577780752e-06, + "loss": 1.7968, + "step": 19198500 + }, + { + "epoch": 95.12, + "learning_rate": 2.4577567713519917e-06, + "loss": 1.7892, + "step": 19199000 + }, + { + "epoch": 95.12, + "learning_rate": 2.456518184925908e-06, + "loss": 1.8076, + "step": 19199500 + }, + { + "epoch": 95.12, + "learning_rate": 2.4552795984998243e-06, + "loss": 1.7728, + "step": 19200000 + }, + { + "epoch": 95.13, + "learning_rate": 2.4540410120737404e-06, + "loss": 1.7617, + "step": 19200500 + }, + { + "epoch": 95.13, + "learning_rate": 2.452804902820509e-06, + "loss": 1.786, + "step": 19201000 + }, + { + "epoch": 95.13, + "learning_rate": 2.4515663163944257e-06, + "loss": 1.7917, + "step": 19201500 + }, + { + "epoch": 95.13, + "learning_rate": 2.450330207141194e-06, + "loss": 1.7594, + "step": 19202000 + }, + { + "epoch": 95.14, + "learning_rate": 2.449094097887963e-06, + "loss": 1.7743, + "step": 19202500 + }, + { + "epoch": 95.14, + "learning_rate": 2.447855511461879e-06, + "loss": 1.769, + "step": 19203000 + }, + { + "epoch": 95.14, + "learning_rate": 2.4466169250357955e-06, + "loss": 1.779, + "step": 19203500 + }, + { + "epoch": 95.14, + "learning_rate": 2.4453783386097116e-06, + "loss": 1.7958, + "step": 19204000 + }, + { + "epoch": 95.15, + "learning_rate": 2.444139752183628e-06, + "loss": 1.7733, + "step": 19204500 + }, + { + "epoch": 95.15, + "learning_rate": 2.442901165757544e-06, + "loss": 1.7757, + "step": 19205000 + }, + { + "epoch": 95.15, + "learning_rate": 2.4416625793314607e-06, + "loss": 1.7666, + "step": 19205500 + }, + { + "epoch": 95.15, + "learning_rate": 2.440423992905377e-06, + "loss": 1.8024, + "step": 19206000 + }, + { + "epoch": 95.16, + "learning_rate": 2.4391854064792933e-06, + "loss": 1.7743, + "step": 19206500 + }, + { + "epoch": 95.16, + "learning_rate": 2.4379468200532098e-06, + "loss": 1.7901, + "step": 19207000 + }, + { + "epoch": 95.16, + "learning_rate": 2.436710710799978e-06, + "loss": 1.7956, + "step": 19207500 + }, + { + "epoch": 95.16, + "learning_rate": 2.4354721243738946e-06, + "loss": 1.7703, + "step": 19208000 + }, + { + "epoch": 95.17, + "learning_rate": 2.434233537947811e-06, + "loss": 1.7647, + "step": 19208500 + }, + { + "epoch": 95.17, + "learning_rate": 2.4329974286945795e-06, + "loss": 1.7695, + "step": 19209000 + }, + { + "epoch": 95.17, + "learning_rate": 2.431761319441348e-06, + "loss": 1.7887, + "step": 19209500 + }, + { + "epoch": 95.17, + "learning_rate": 2.4305227330152644e-06, + "loss": 1.77, + "step": 19210000 + }, + { + "epoch": 95.18, + "learning_rate": 2.429284146589181e-06, + "loss": 1.7812, + "step": 19210500 + }, + { + "epoch": 95.18, + "learning_rate": 2.428045560163097e-06, + "loss": 1.8016, + "step": 19211000 + }, + { + "epoch": 95.18, + "learning_rate": 2.426809450909866e-06, + "loss": 1.7719, + "step": 19211500 + }, + { + "epoch": 95.18, + "learning_rate": 2.425570864483782e-06, + "loss": 1.7753, + "step": 19212000 + }, + { + "epoch": 95.19, + "learning_rate": 2.4243347552305507e-06, + "loss": 1.8011, + "step": 19212500 + }, + { + "epoch": 95.19, + "learning_rate": 2.423096168804467e-06, + "loss": 1.7846, + "step": 19213000 + }, + { + "epoch": 95.19, + "learning_rate": 2.4218575823783833e-06, + "loss": 1.776, + "step": 19213500 + }, + { + "epoch": 95.19, + "learning_rate": 2.4206189959523e-06, + "loss": 1.7702, + "step": 19214000 + }, + { + "epoch": 95.2, + "learning_rate": 2.419380409526216e-06, + "loss": 1.7859, + "step": 19214500 + }, + { + "epoch": 95.2, + "learning_rate": 2.4181418231001324e-06, + "loss": 1.7739, + "step": 19215000 + }, + { + "epoch": 95.2, + "learning_rate": 2.4169032366740485e-06, + "loss": 1.7756, + "step": 19215500 + }, + { + "epoch": 95.2, + "learning_rate": 2.415664650247965e-06, + "loss": 1.789, + "step": 19216000 + }, + { + "epoch": 95.21, + "learning_rate": 2.4144260638218815e-06, + "loss": 1.7904, + "step": 19216500 + }, + { + "epoch": 95.21, + "learning_rate": 2.413187477395798e-06, + "loss": 1.7745, + "step": 19217000 + }, + { + "epoch": 95.21, + "learning_rate": 2.4119488909697145e-06, + "loss": 1.7847, + "step": 19217500 + }, + { + "epoch": 95.21, + "learning_rate": 2.4107103045436306e-06, + "loss": 1.7718, + "step": 19218000 + }, + { + "epoch": 95.22, + "learning_rate": 2.409471718117547e-06, + "loss": 1.7868, + "step": 19218500 + }, + { + "epoch": 95.22, + "learning_rate": 2.408233131691463e-06, + "loss": 1.7799, + "step": 19219000 + }, + { + "epoch": 95.22, + "learning_rate": 2.4069945452653797e-06, + "loss": 1.7666, + "step": 19219500 + }, + { + "epoch": 95.22, + "learning_rate": 2.405755958839296e-06, + "loss": 1.7843, + "step": 19220000 + }, + { + "epoch": 95.23, + "learning_rate": 2.4045198495860646e-06, + "loss": 1.7739, + "step": 19220500 + }, + { + "epoch": 95.23, + "learning_rate": 2.403281263159981e-06, + "loss": 1.7625, + "step": 19221000 + }, + { + "epoch": 95.23, + "learning_rate": 2.402042676733897e-06, + "loss": 1.7715, + "step": 19221500 + }, + { + "epoch": 95.23, + "learning_rate": 2.4008040903078136e-06, + "loss": 1.7747, + "step": 19222000 + }, + { + "epoch": 95.23, + "learning_rate": 2.3995655038817297e-06, + "loss": 1.7736, + "step": 19222500 + }, + { + "epoch": 95.24, + "learning_rate": 2.3983269174556462e-06, + "loss": 1.7791, + "step": 19223000 + }, + { + "epoch": 95.24, + "learning_rate": 2.3970883310295627e-06, + "loss": 1.7653, + "step": 19223500 + }, + { + "epoch": 95.24, + "learning_rate": 2.395849744603479e-06, + "loss": 1.7886, + "step": 19224000 + }, + { + "epoch": 95.24, + "learning_rate": 2.3946111581773957e-06, + "loss": 1.7865, + "step": 19224500 + }, + { + "epoch": 95.25, + "learning_rate": 2.393372571751312e-06, + "loss": 1.7772, + "step": 19225000 + }, + { + "epoch": 95.25, + "learning_rate": 2.3921339853252283e-06, + "loss": 1.7801, + "step": 19225500 + }, + { + "epoch": 95.25, + "learning_rate": 2.3908953988991444e-06, + "loss": 1.7852, + "step": 19226000 + }, + { + "epoch": 95.25, + "learning_rate": 2.3896592896459132e-06, + "loss": 1.7986, + "step": 19226500 + }, + { + "epoch": 95.26, + "learning_rate": 2.3884207032198293e-06, + "loss": 1.7721, + "step": 19227000 + }, + { + "epoch": 95.26, + "learning_rate": 2.387182116793746e-06, + "loss": 1.7649, + "step": 19227500 + }, + { + "epoch": 95.26, + "learning_rate": 2.3859435303676623e-06, + "loss": 1.793, + "step": 19228000 + }, + { + "epoch": 95.26, + "learning_rate": 2.3847049439415784e-06, + "loss": 1.7781, + "step": 19228500 + }, + { + "epoch": 95.27, + "learning_rate": 2.383466357515495e-06, + "loss": 1.7878, + "step": 19229000 + }, + { + "epoch": 95.27, + "learning_rate": 2.3822302482622633e-06, + "loss": 1.7951, + "step": 19229500 + }, + { + "epoch": 95.27, + "learning_rate": 2.3809916618361798e-06, + "loss": 1.7914, + "step": 19230000 + }, + { + "epoch": 95.27, + "learning_rate": 2.379753075410096e-06, + "loss": 1.7843, + "step": 19230500 + }, + { + "epoch": 95.28, + "learning_rate": 2.3785144889840124e-06, + "loss": 1.7815, + "step": 19231000 + }, + { + "epoch": 95.28, + "learning_rate": 2.377275902557929e-06, + "loss": 1.7633, + "step": 19231500 + }, + { + "epoch": 95.28, + "learning_rate": 2.3760397933046973e-06, + "loss": 1.7666, + "step": 19232000 + }, + { + "epoch": 95.28, + "learning_rate": 2.3748012068786138e-06, + "loss": 1.7821, + "step": 19232500 + }, + { + "epoch": 95.29, + "learning_rate": 2.37356262045253e-06, + "loss": 1.7747, + "step": 19233000 + }, + { + "epoch": 95.29, + "learning_rate": 2.3723265111992987e-06, + "loss": 1.7888, + "step": 19233500 + }, + { + "epoch": 95.29, + "learning_rate": 2.3710879247732147e-06, + "loss": 1.7766, + "step": 19234000 + }, + { + "epoch": 95.29, + "learning_rate": 2.3698493383471312e-06, + "loss": 1.784, + "step": 19234500 + }, + { + "epoch": 95.3, + "learning_rate": 2.3686107519210477e-06, + "loss": 1.7766, + "step": 19235000 + }, + { + "epoch": 95.3, + "learning_rate": 2.367372165494964e-06, + "loss": 1.7869, + "step": 19235500 + }, + { + "epoch": 95.3, + "learning_rate": 2.3661360562417326e-06, + "loss": 1.7785, + "step": 19236000 + }, + { + "epoch": 95.3, + "learning_rate": 2.3648974698156487e-06, + "loss": 1.8124, + "step": 19236500 + }, + { + "epoch": 95.31, + "learning_rate": 2.3636588833895652e-06, + "loss": 1.7888, + "step": 19237000 + }, + { + "epoch": 95.31, + "learning_rate": 2.3624202969634813e-06, + "loss": 1.7629, + "step": 19237500 + }, + { + "epoch": 95.31, + "learning_rate": 2.36118418771025e-06, + "loss": 1.7613, + "step": 19238000 + }, + { + "epoch": 95.31, + "learning_rate": 2.3599456012841666e-06, + "loss": 1.7812, + "step": 19238500 + }, + { + "epoch": 95.32, + "learning_rate": 2.3587070148580827e-06, + "loss": 1.7722, + "step": 19239000 + }, + { + "epoch": 95.32, + "learning_rate": 2.357468428431999e-06, + "loss": 1.7904, + "step": 19239500 + }, + { + "epoch": 95.32, + "learning_rate": 2.3562298420059157e-06, + "loss": 1.7838, + "step": 19240000 + }, + { + "epoch": 95.32, + "learning_rate": 2.3549912555798322e-06, + "loss": 1.7761, + "step": 19240500 + }, + { + "epoch": 95.33, + "learning_rate": 2.3537526691537483e-06, + "loss": 1.7916, + "step": 19241000 + }, + { + "epoch": 95.33, + "learning_rate": 2.352514082727665e-06, + "loss": 1.7503, + "step": 19241500 + }, + { + "epoch": 95.33, + "learning_rate": 2.351275496301581e-06, + "loss": 1.7839, + "step": 19242000 + }, + { + "epoch": 95.33, + "learning_rate": 2.3500369098754974e-06, + "loss": 1.7655, + "step": 19242500 + }, + { + "epoch": 95.34, + "learning_rate": 2.348798323449414e-06, + "loss": 1.7803, + "step": 19243000 + }, + { + "epoch": 95.34, + "learning_rate": 2.3475622141961823e-06, + "loss": 1.7911, + "step": 19243500 + }, + { + "epoch": 95.34, + "learning_rate": 2.346326104942951e-06, + "loss": 1.8022, + "step": 19244000 + }, + { + "epoch": 95.34, + "learning_rate": 2.345087518516867e-06, + "loss": 1.7884, + "step": 19244500 + }, + { + "epoch": 95.35, + "learning_rate": 2.3438489320907837e-06, + "loss": 1.7738, + "step": 19245000 + }, + { + "epoch": 95.35, + "learning_rate": 2.3426103456646998e-06, + "loss": 1.7733, + "step": 19245500 + }, + { + "epoch": 95.35, + "learning_rate": 2.3413717592386163e-06, + "loss": 1.7866, + "step": 19246000 + }, + { + "epoch": 95.35, + "learning_rate": 2.3401331728125328e-06, + "loss": 1.7518, + "step": 19246500 + }, + { + "epoch": 95.36, + "learning_rate": 2.338894586386449e-06, + "loss": 1.783, + "step": 19247000 + }, + { + "epoch": 95.36, + "learning_rate": 2.3376559999603653e-06, + "loss": 1.7806, + "step": 19247500 + }, + { + "epoch": 95.36, + "learning_rate": 2.3364198907071337e-06, + "loss": 1.7813, + "step": 19248000 + }, + { + "epoch": 95.36, + "learning_rate": 2.3351813042810502e-06, + "loss": 1.7883, + "step": 19248500 + }, + { + "epoch": 95.37, + "learning_rate": 2.3339427178549663e-06, + "loss": 1.786, + "step": 19249000 + }, + { + "epoch": 95.37, + "learning_rate": 2.332704131428883e-06, + "loss": 1.7645, + "step": 19249500 + }, + { + "epoch": 95.37, + "learning_rate": 2.3314655450027993e-06, + "loss": 1.7768, + "step": 19250000 + }, + { + "epoch": 95.37, + "learning_rate": 2.3302269585767154e-06, + "loss": 1.8067, + "step": 19250500 + }, + { + "epoch": 95.38, + "learning_rate": 2.3289908493234842e-06, + "loss": 1.7615, + "step": 19251000 + }, + { + "epoch": 95.38, + "learning_rate": 2.3277522628974003e-06, + "loss": 1.7923, + "step": 19251500 + }, + { + "epoch": 95.38, + "learning_rate": 2.326513676471317e-06, + "loss": 1.7584, + "step": 19252000 + }, + { + "epoch": 95.38, + "learning_rate": 2.3252750900452333e-06, + "loss": 1.7694, + "step": 19252500 + }, + { + "epoch": 95.39, + "learning_rate": 2.32403650361915e-06, + "loss": 1.7763, + "step": 19253000 + }, + { + "epoch": 95.39, + "learning_rate": 2.3227979171930663e-06, + "loss": 1.7745, + "step": 19253500 + }, + { + "epoch": 95.39, + "learning_rate": 2.3215593307669824e-06, + "loss": 1.7969, + "step": 19254000 + }, + { + "epoch": 95.39, + "learning_rate": 2.320323221513751e-06, + "loss": 1.7941, + "step": 19254500 + }, + { + "epoch": 95.4, + "learning_rate": 2.319087112260519e-06, + "loss": 1.8008, + "step": 19255000 + }, + { + "epoch": 95.4, + "learning_rate": 2.317848525834436e-06, + "loss": 1.769, + "step": 19255500 + }, + { + "epoch": 95.4, + "learning_rate": 2.316609939408352e-06, + "loss": 1.7494, + "step": 19256000 + }, + { + "epoch": 95.4, + "learning_rate": 2.3153713529822687e-06, + "loss": 1.7927, + "step": 19256500 + }, + { + "epoch": 95.41, + "learning_rate": 2.3141327665561848e-06, + "loss": 1.7827, + "step": 19257000 + }, + { + "epoch": 95.41, + "learning_rate": 2.3128941801301013e-06, + "loss": 1.7973, + "step": 19257500 + }, + { + "epoch": 95.41, + "learning_rate": 2.31165807087687e-06, + "loss": 1.7943, + "step": 19258000 + }, + { + "epoch": 95.41, + "learning_rate": 2.310419484450786e-06, + "loss": 1.7655, + "step": 19258500 + }, + { + "epoch": 95.42, + "learning_rate": 2.3091808980247027e-06, + "loss": 1.7814, + "step": 19259000 + }, + { + "epoch": 95.42, + "learning_rate": 2.3079423115986187e-06, + "loss": 1.7706, + "step": 19259500 + }, + { + "epoch": 95.42, + "learning_rate": 2.3067037251725353e-06, + "loss": 1.7797, + "step": 19260000 + }, + { + "epoch": 95.42, + "learning_rate": 2.3054651387464513e-06, + "loss": 1.7858, + "step": 19260500 + }, + { + "epoch": 95.43, + "learning_rate": 2.304226552320368e-06, + "loss": 1.7738, + "step": 19261000 + }, + { + "epoch": 95.43, + "learning_rate": 2.3029879658942843e-06, + "loss": 1.7789, + "step": 19261500 + }, + { + "epoch": 95.43, + "learning_rate": 2.3017493794682004e-06, + "loss": 1.8064, + "step": 19262000 + }, + { + "epoch": 95.43, + "learning_rate": 2.3005132702149692e-06, + "loss": 1.7772, + "step": 19262500 + }, + { + "epoch": 95.44, + "learning_rate": 2.2992746837888853e-06, + "loss": 1.7657, + "step": 19263000 + }, + { + "epoch": 95.44, + "learning_rate": 2.298036097362802e-06, + "loss": 1.7974, + "step": 19263500 + }, + { + "epoch": 95.44, + "learning_rate": 2.2967975109367183e-06, + "loss": 1.7792, + "step": 19264000 + }, + { + "epoch": 95.44, + "learning_rate": 2.2955614016834867e-06, + "loss": 1.7666, + "step": 19264500 + }, + { + "epoch": 95.45, + "learning_rate": 2.2943228152574032e-06, + "loss": 1.7571, + "step": 19265000 + }, + { + "epoch": 95.45, + "learning_rate": 2.2930842288313193e-06, + "loss": 1.7567, + "step": 19265500 + }, + { + "epoch": 95.45, + "learning_rate": 2.291845642405236e-06, + "loss": 1.7521, + "step": 19266000 + }, + { + "epoch": 95.45, + "learning_rate": 2.2906070559791523e-06, + "loss": 1.7603, + "step": 19266500 + }, + { + "epoch": 95.46, + "learning_rate": 2.289368469553069e-06, + "loss": 1.7682, + "step": 19267000 + }, + { + "epoch": 95.46, + "learning_rate": 2.288129883126985e-06, + "loss": 1.7923, + "step": 19267500 + }, + { + "epoch": 95.46, + "learning_rate": 2.2868912967009014e-06, + "loss": 1.7772, + "step": 19268000 + }, + { + "epoch": 95.46, + "learning_rate": 2.285652710274818e-06, + "loss": 1.77, + "step": 19268500 + }, + { + "epoch": 95.47, + "learning_rate": 2.284414123848734e-06, + "loss": 1.806, + "step": 19269000 + }, + { + "epoch": 95.47, + "learning_rate": 2.2831755374226505e-06, + "loss": 1.7583, + "step": 19269500 + }, + { + "epoch": 95.47, + "learning_rate": 2.2819369509965666e-06, + "loss": 1.7915, + "step": 19270000 + }, + { + "epoch": 95.47, + "learning_rate": 2.2807008417433354e-06, + "loss": 1.8001, + "step": 19270500 + }, + { + "epoch": 95.48, + "learning_rate": 2.2794622553172515e-06, + "loss": 1.7579, + "step": 19271000 + }, + { + "epoch": 95.48, + "learning_rate": 2.278223668891168e-06, + "loss": 1.7743, + "step": 19271500 + }, + { + "epoch": 95.48, + "learning_rate": 2.2769850824650845e-06, + "loss": 1.7612, + "step": 19272000 + }, + { + "epoch": 95.48, + "learning_rate": 2.275748973211853e-06, + "loss": 1.7548, + "step": 19272500 + }, + { + "epoch": 95.49, + "learning_rate": 2.2745103867857694e-06, + "loss": 1.7796, + "step": 19273000 + }, + { + "epoch": 95.49, + "learning_rate": 2.2732718003596854e-06, + "loss": 1.7831, + "step": 19273500 + }, + { + "epoch": 95.49, + "learning_rate": 2.272033213933602e-06, + "loss": 1.7765, + "step": 19274000 + }, + { + "epoch": 95.49, + "learning_rate": 2.270794627507518e-06, + "loss": 1.772, + "step": 19274500 + }, + { + "epoch": 95.5, + "learning_rate": 2.2695560410814345e-06, + "loss": 1.7667, + "step": 19275000 + }, + { + "epoch": 95.5, + "learning_rate": 2.2683199318282033e-06, + "loss": 1.7932, + "step": 19275500 + }, + { + "epoch": 95.5, + "learning_rate": 2.2670813454021194e-06, + "loss": 1.7702, + "step": 19276000 + }, + { + "epoch": 95.5, + "learning_rate": 2.265842758976036e-06, + "loss": 1.7743, + "step": 19276500 + }, + { + "epoch": 95.5, + "learning_rate": 2.264604172549952e-06, + "loss": 1.776, + "step": 19277000 + }, + { + "epoch": 95.51, + "learning_rate": 2.263365586123869e-06, + "loss": 1.7782, + "step": 19277500 + }, + { + "epoch": 95.51, + "learning_rate": 2.262126999697785e-06, + "loss": 1.8033, + "step": 19278000 + }, + { + "epoch": 95.51, + "learning_rate": 2.2608884132717015e-06, + "loss": 1.7861, + "step": 19278500 + }, + { + "epoch": 95.51, + "learning_rate": 2.25965230401847e-06, + "loss": 1.7779, + "step": 19279000 + }, + { + "epoch": 95.52, + "learning_rate": 2.2584137175923864e-06, + "loss": 1.7673, + "step": 19279500 + }, + { + "epoch": 95.52, + "learning_rate": 2.257177608339155e-06, + "loss": 1.7834, + "step": 19280000 + }, + { + "epoch": 95.52, + "learning_rate": 2.2559390219130713e-06, + "loss": 1.7692, + "step": 19280500 + }, + { + "epoch": 95.52, + "learning_rate": 2.254700435486988e-06, + "loss": 1.7755, + "step": 19281000 + }, + { + "epoch": 95.53, + "learning_rate": 2.253461849060904e-06, + "loss": 1.7784, + "step": 19281500 + }, + { + "epoch": 95.53, + "learning_rate": 2.2522232626348204e-06, + "loss": 1.7838, + "step": 19282000 + }, + { + "epoch": 95.53, + "learning_rate": 2.2509846762087365e-06, + "loss": 1.7835, + "step": 19282500 + }, + { + "epoch": 95.53, + "learning_rate": 2.249746089782653e-06, + "loss": 1.78, + "step": 19283000 + }, + { + "epoch": 95.54, + "learning_rate": 2.2485075033565695e-06, + "loss": 1.7876, + "step": 19283500 + }, + { + "epoch": 95.54, + "learning_rate": 2.247271394103338e-06, + "loss": 1.781, + "step": 19284000 + }, + { + "epoch": 95.54, + "learning_rate": 2.2460352848501067e-06, + "loss": 1.7781, + "step": 19284500 + }, + { + "epoch": 95.54, + "learning_rate": 2.2447966984240228e-06, + "loss": 1.7746, + "step": 19285000 + }, + { + "epoch": 95.55, + "learning_rate": 2.2435581119979393e-06, + "loss": 1.7811, + "step": 19285500 + }, + { + "epoch": 95.55, + "learning_rate": 2.2423195255718553e-06, + "loss": 1.7791, + "step": 19286000 + }, + { + "epoch": 95.55, + "learning_rate": 2.241083416318624e-06, + "loss": 1.7726, + "step": 19286500 + }, + { + "epoch": 95.55, + "learning_rate": 2.2398448298925402e-06, + "loss": 1.7936, + "step": 19287000 + }, + { + "epoch": 95.56, + "learning_rate": 2.2386062434664567e-06, + "loss": 1.8031, + "step": 19287500 + }, + { + "epoch": 95.56, + "learning_rate": 2.2373676570403732e-06, + "loss": 1.7963, + "step": 19288000 + }, + { + "epoch": 95.56, + "learning_rate": 2.2361290706142893e-06, + "loss": 1.7779, + "step": 19288500 + }, + { + "epoch": 95.56, + "learning_rate": 2.234890484188206e-06, + "loss": 1.7586, + "step": 19289000 + }, + { + "epoch": 95.57, + "learning_rate": 2.233651897762122e-06, + "loss": 1.8082, + "step": 19289500 + }, + { + "epoch": 95.57, + "learning_rate": 2.2324133113360384e-06, + "loss": 1.7824, + "step": 19290000 + }, + { + "epoch": 95.57, + "learning_rate": 2.231174724909955e-06, + "loss": 1.7797, + "step": 19290500 + }, + { + "epoch": 95.57, + "learning_rate": 2.2299386156567233e-06, + "loss": 1.7757, + "step": 19291000 + }, + { + "epoch": 95.58, + "learning_rate": 2.22870002923064e-06, + "loss": 1.7681, + "step": 19291500 + }, + { + "epoch": 95.58, + "learning_rate": 2.227463919977408e-06, + "loss": 1.7798, + "step": 19292000 + }, + { + "epoch": 95.58, + "learning_rate": 2.2262253335513247e-06, + "loss": 1.777, + "step": 19292500 + }, + { + "epoch": 95.58, + "learning_rate": 2.2249867471252408e-06, + "loss": 1.7618, + "step": 19293000 + }, + { + "epoch": 95.59, + "learning_rate": 2.2237481606991573e-06, + "loss": 1.7747, + "step": 19293500 + }, + { + "epoch": 95.59, + "learning_rate": 2.222509574273074e-06, + "loss": 1.795, + "step": 19294000 + }, + { + "epoch": 95.59, + "learning_rate": 2.221273465019842e-06, + "loss": 1.7661, + "step": 19294500 + }, + { + "epoch": 95.59, + "learning_rate": 2.2200348785937587e-06, + "loss": 1.7753, + "step": 19295000 + }, + { + "epoch": 95.6, + "learning_rate": 2.2187962921676748e-06, + "loss": 1.7859, + "step": 19295500 + }, + { + "epoch": 95.6, + "learning_rate": 2.2175577057415913e-06, + "loss": 1.7889, + "step": 19296000 + }, + { + "epoch": 95.6, + "learning_rate": 2.2163191193155078e-06, + "loss": 1.777, + "step": 19296500 + }, + { + "epoch": 95.6, + "learning_rate": 2.2150805328894243e-06, + "loss": 1.7994, + "step": 19297000 + }, + { + "epoch": 95.61, + "learning_rate": 2.2138419464633404e-06, + "loss": 1.784, + "step": 19297500 + }, + { + "epoch": 95.61, + "learning_rate": 2.212603360037257e-06, + "loss": 1.7798, + "step": 19298000 + }, + { + "epoch": 95.61, + "learning_rate": 2.2113647736111734e-06, + "loss": 1.7655, + "step": 19298500 + }, + { + "epoch": 95.61, + "learning_rate": 2.2101261871850894e-06, + "loss": 1.7662, + "step": 19299000 + }, + { + "epoch": 95.62, + "learning_rate": 2.2088900779318583e-06, + "loss": 1.788, + "step": 19299500 + }, + { + "epoch": 95.62, + "learning_rate": 2.2076514915057743e-06, + "loss": 1.7931, + "step": 19300000 + }, + { + "epoch": 95.62, + "learning_rate": 2.206412905079691e-06, + "loss": 1.7769, + "step": 19300500 + }, + { + "epoch": 95.62, + "learning_rate": 2.205174318653607e-06, + "loss": 1.7652, + "step": 19301000 + }, + { + "epoch": 95.63, + "learning_rate": 2.2039357322275234e-06, + "loss": 1.7962, + "step": 19301500 + }, + { + "epoch": 95.63, + "learning_rate": 2.20269714580144e-06, + "loss": 1.772, + "step": 19302000 + }, + { + "epoch": 95.63, + "learning_rate": 2.201458559375356e-06, + "loss": 1.7937, + "step": 19302500 + }, + { + "epoch": 95.63, + "learning_rate": 2.2002199729492725e-06, + "loss": 1.7844, + "step": 19303000 + }, + { + "epoch": 95.64, + "learning_rate": 2.1989813865231886e-06, + "loss": 1.7538, + "step": 19303500 + }, + { + "epoch": 95.64, + "learning_rate": 2.1977452772699574e-06, + "loss": 1.7793, + "step": 19304000 + }, + { + "epoch": 95.64, + "learning_rate": 2.1965066908438735e-06, + "loss": 1.7916, + "step": 19304500 + }, + { + "epoch": 95.64, + "learning_rate": 2.19526810441779e-06, + "loss": 1.7497, + "step": 19305000 + }, + { + "epoch": 95.65, + "learning_rate": 2.194031995164559e-06, + "loss": 1.7705, + "step": 19305500 + }, + { + "epoch": 95.65, + "learning_rate": 2.192793408738475e-06, + "loss": 1.7758, + "step": 19306000 + }, + { + "epoch": 95.65, + "learning_rate": 2.1915548223123914e-06, + "loss": 1.7949, + "step": 19306500 + }, + { + "epoch": 95.65, + "learning_rate": 2.1903187130591598e-06, + "loss": 1.7774, + "step": 19307000 + }, + { + "epoch": 95.66, + "learning_rate": 2.1890801266330763e-06, + "loss": 1.795, + "step": 19307500 + }, + { + "epoch": 95.66, + "learning_rate": 2.1878415402069924e-06, + "loss": 1.7584, + "step": 19308000 + }, + { + "epoch": 95.66, + "learning_rate": 2.186602953780909e-06, + "loss": 1.7712, + "step": 19308500 + }, + { + "epoch": 95.66, + "learning_rate": 2.1853643673548254e-06, + "loss": 1.7751, + "step": 19309000 + }, + { + "epoch": 95.67, + "learning_rate": 2.184125780928742e-06, + "loss": 1.7937, + "step": 19309500 + }, + { + "epoch": 95.67, + "learning_rate": 2.1828871945026584e-06, + "loss": 1.7669, + "step": 19310000 + }, + { + "epoch": 95.67, + "learning_rate": 2.1816486080765745e-06, + "loss": 1.7961, + "step": 19310500 + }, + { + "epoch": 95.67, + "learning_rate": 2.180410021650491e-06, + "loss": 1.79, + "step": 19311000 + }, + { + "epoch": 95.68, + "learning_rate": 2.1791739123972594e-06, + "loss": 1.7637, + "step": 19311500 + }, + { + "epoch": 95.68, + "learning_rate": 2.177935325971176e-06, + "loss": 1.7751, + "step": 19312000 + }, + { + "epoch": 95.68, + "learning_rate": 2.1766992167179442e-06, + "loss": 1.7949, + "step": 19312500 + }, + { + "epoch": 95.68, + "learning_rate": 2.1754606302918607e-06, + "loss": 1.7857, + "step": 19313000 + }, + { + "epoch": 95.69, + "learning_rate": 2.1742220438657773e-06, + "loss": 1.7743, + "step": 19313500 + }, + { + "epoch": 95.69, + "learning_rate": 2.1729834574396933e-06, + "loss": 1.7623, + "step": 19314000 + }, + { + "epoch": 95.69, + "learning_rate": 2.17174487101361e-06, + "loss": 1.7695, + "step": 19314500 + }, + { + "epoch": 95.69, + "learning_rate": 2.170506284587526e-06, + "loss": 1.7634, + "step": 19315000 + }, + { + "epoch": 95.7, + "learning_rate": 2.1692676981614424e-06, + "loss": 1.7728, + "step": 19315500 + }, + { + "epoch": 95.7, + "learning_rate": 2.1680291117353585e-06, + "loss": 1.7798, + "step": 19316000 + }, + { + "epoch": 95.7, + "learning_rate": 2.166790525309275e-06, + "loss": 1.786, + "step": 19316500 + }, + { + "epoch": 95.7, + "learning_rate": 2.1655519388831915e-06, + "loss": 1.7546, + "step": 19317000 + }, + { + "epoch": 95.71, + "learning_rate": 2.1643133524571076e-06, + "loss": 1.7667, + "step": 19317500 + }, + { + "epoch": 95.71, + "learning_rate": 2.1630747660310245e-06, + "loss": 1.7648, + "step": 19318000 + }, + { + "epoch": 95.71, + "learning_rate": 2.1618386567777925e-06, + "loss": 1.7661, + "step": 19318500 + }, + { + "epoch": 95.71, + "learning_rate": 2.1606025475245613e-06, + "loss": 1.7738, + "step": 19319000 + }, + { + "epoch": 95.72, + "learning_rate": 2.1593639610984774e-06, + "loss": 1.7729, + "step": 19319500 + }, + { + "epoch": 95.72, + "learning_rate": 2.158125374672394e-06, + "loss": 1.7653, + "step": 19320000 + }, + { + "epoch": 95.72, + "learning_rate": 2.1568867882463104e-06, + "loss": 1.7913, + "step": 19320500 + }, + { + "epoch": 95.72, + "learning_rate": 2.1556482018202265e-06, + "loss": 1.7664, + "step": 19321000 + }, + { + "epoch": 95.73, + "learning_rate": 2.1544096153941434e-06, + "loss": 1.7951, + "step": 19321500 + }, + { + "epoch": 95.73, + "learning_rate": 2.1531710289680595e-06, + "loss": 1.7883, + "step": 19322000 + }, + { + "epoch": 95.73, + "learning_rate": 2.151932442541976e-06, + "loss": 1.7627, + "step": 19322500 + }, + { + "epoch": 95.73, + "learning_rate": 2.1506963332887444e-06, + "loss": 1.7895, + "step": 19323000 + }, + { + "epoch": 95.74, + "learning_rate": 2.149457746862661e-06, + "loss": 1.7951, + "step": 19323500 + }, + { + "epoch": 95.74, + "learning_rate": 2.148219160436577e-06, + "loss": 1.7846, + "step": 19324000 + }, + { + "epoch": 95.74, + "learning_rate": 2.1469805740104935e-06, + "loss": 1.7595, + "step": 19324500 + }, + { + "epoch": 95.74, + "learning_rate": 2.1457444647572623e-06, + "loss": 1.7894, + "step": 19325000 + }, + { + "epoch": 95.75, + "learning_rate": 2.1445058783311783e-06, + "loss": 1.7823, + "step": 19325500 + }, + { + "epoch": 95.75, + "learning_rate": 2.143272246250799e-06, + "loss": 1.7673, + "step": 19326000 + }, + { + "epoch": 95.75, + "learning_rate": 2.142033659824715e-06, + "loss": 1.7956, + "step": 19326500 + }, + { + "epoch": 95.75, + "learning_rate": 2.1407950733986316e-06, + "loss": 1.7881, + "step": 19327000 + }, + { + "epoch": 95.76, + "learning_rate": 2.1395564869725477e-06, + "loss": 1.786, + "step": 19327500 + }, + { + "epoch": 95.76, + "learning_rate": 2.1383179005464646e-06, + "loss": 1.7762, + "step": 19328000 + }, + { + "epoch": 95.76, + "learning_rate": 2.1370793141203807e-06, + "loss": 1.7826, + "step": 19328500 + }, + { + "epoch": 95.76, + "learning_rate": 2.1358407276942972e-06, + "loss": 1.7769, + "step": 19329000 + }, + { + "epoch": 95.77, + "learning_rate": 2.1346021412682137e-06, + "loss": 1.7692, + "step": 19329500 + }, + { + "epoch": 95.77, + "learning_rate": 2.13336355484213e-06, + "loss": 1.757, + "step": 19330000 + }, + { + "epoch": 95.77, + "learning_rate": 2.1321249684160463e-06, + "loss": 1.7852, + "step": 19330500 + }, + { + "epoch": 95.77, + "learning_rate": 2.1308863819899624e-06, + "loss": 1.7794, + "step": 19331000 + }, + { + "epoch": 95.77, + "learning_rate": 2.129647795563879e-06, + "loss": 1.7686, + "step": 19331500 + }, + { + "epoch": 95.78, + "learning_rate": 2.1284092091377954e-06, + "loss": 1.7793, + "step": 19332000 + }, + { + "epoch": 95.78, + "learning_rate": 2.1271706227117115e-06, + "loss": 1.7668, + "step": 19332500 + }, + { + "epoch": 95.78, + "learning_rate": 2.1259345134584803e-06, + "loss": 1.7765, + "step": 19333000 + }, + { + "epoch": 95.78, + "learning_rate": 2.1246959270323964e-06, + "loss": 1.7793, + "step": 19333500 + }, + { + "epoch": 95.79, + "learning_rate": 2.123457340606313e-06, + "loss": 1.7761, + "step": 19334000 + }, + { + "epoch": 95.79, + "learning_rate": 2.122218754180229e-06, + "loss": 1.7803, + "step": 19334500 + }, + { + "epoch": 95.79, + "learning_rate": 2.1209801677541455e-06, + "loss": 1.7993, + "step": 19335000 + }, + { + "epoch": 95.79, + "learning_rate": 2.119746535673766e-06, + "loss": 1.7927, + "step": 19335500 + }, + { + "epoch": 95.8, + "learning_rate": 2.118510426420535e-06, + "loss": 1.7933, + "step": 19336000 + }, + { + "epoch": 95.8, + "learning_rate": 2.1172718399944515e-06, + "loss": 1.7701, + "step": 19336500 + }, + { + "epoch": 95.8, + "learning_rate": 2.1160332535683675e-06, + "loss": 1.7852, + "step": 19337000 + }, + { + "epoch": 95.8, + "learning_rate": 2.114794667142284e-06, + "loss": 1.7595, + "step": 19337500 + }, + { + "epoch": 95.81, + "learning_rate": 2.1135560807162e-06, + "loss": 1.7846, + "step": 19338000 + }, + { + "epoch": 95.81, + "learning_rate": 2.1123174942901166e-06, + "loss": 1.7756, + "step": 19338500 + }, + { + "epoch": 95.81, + "learning_rate": 2.1110789078640327e-06, + "loss": 1.7763, + "step": 19339000 + }, + { + "epoch": 95.81, + "learning_rate": 2.1098403214379492e-06, + "loss": 1.7705, + "step": 19339500 + }, + { + "epoch": 95.82, + "learning_rate": 2.1086017350118657e-06, + "loss": 1.7811, + "step": 19340000 + }, + { + "epoch": 95.82, + "learning_rate": 2.1073631485857822e-06, + "loss": 1.7544, + "step": 19340500 + }, + { + "epoch": 95.82, + "learning_rate": 2.1061245621596987e-06, + "loss": 1.7634, + "step": 19341000 + }, + { + "epoch": 95.82, + "learning_rate": 2.104885975733615e-06, + "loss": 1.7715, + "step": 19341500 + }, + { + "epoch": 95.83, + "learning_rate": 2.1036473893075313e-06, + "loss": 1.7645, + "step": 19342000 + }, + { + "epoch": 95.83, + "learning_rate": 2.1024088028814474e-06, + "loss": 1.7916, + "step": 19342500 + }, + { + "epoch": 95.83, + "learning_rate": 2.101170216455364e-06, + "loss": 1.7634, + "step": 19343000 + }, + { + "epoch": 95.83, + "learning_rate": 2.0999316300292804e-06, + "loss": 1.7928, + "step": 19343500 + }, + { + "epoch": 95.84, + "learning_rate": 2.098695520776049e-06, + "loss": 1.7647, + "step": 19344000 + }, + { + "epoch": 95.84, + "learning_rate": 2.0974569343499653e-06, + "loss": 1.7659, + "step": 19344500 + }, + { + "epoch": 95.84, + "learning_rate": 2.0962183479238814e-06, + "loss": 1.747, + "step": 19345000 + }, + { + "epoch": 95.84, + "learning_rate": 2.094979761497798e-06, + "loss": 1.7817, + "step": 19345500 + }, + { + "epoch": 95.85, + "learning_rate": 2.0937436522445663e-06, + "loss": 1.7599, + "step": 19346000 + }, + { + "epoch": 95.85, + "learning_rate": 2.0925050658184828e-06, + "loss": 1.7689, + "step": 19346500 + }, + { + "epoch": 95.85, + "learning_rate": 2.0912664793923993e-06, + "loss": 1.7888, + "step": 19347000 + }, + { + "epoch": 95.85, + "learning_rate": 2.0900278929663154e-06, + "loss": 1.7823, + "step": 19347500 + }, + { + "epoch": 95.86, + "learning_rate": 2.088789306540232e-06, + "loss": 1.7693, + "step": 19348000 + }, + { + "epoch": 95.86, + "learning_rate": 2.0875531972870003e-06, + "loss": 1.7625, + "step": 19348500 + }, + { + "epoch": 95.86, + "learning_rate": 2.0863146108609168e-06, + "loss": 1.7887, + "step": 19349000 + }, + { + "epoch": 95.86, + "learning_rate": 2.085076024434833e-06, + "loss": 1.7924, + "step": 19349500 + }, + { + "epoch": 95.87, + "learning_rate": 2.0838374380087493e-06, + "loss": 1.7861, + "step": 19350000 + }, + { + "epoch": 95.87, + "learning_rate": 2.082598851582666e-06, + "loss": 1.7927, + "step": 19350500 + }, + { + "epoch": 95.87, + "learning_rate": 2.0813627423294342e-06, + "loss": 1.7761, + "step": 19351000 + }, + { + "epoch": 95.87, + "learning_rate": 2.080126633076203e-06, + "loss": 1.7589, + "step": 19351500 + }, + { + "epoch": 95.88, + "learning_rate": 2.078888046650119e-06, + "loss": 1.7784, + "step": 19352000 + }, + { + "epoch": 95.88, + "learning_rate": 2.0776494602240356e-06, + "loss": 1.7953, + "step": 19352500 + }, + { + "epoch": 95.88, + "learning_rate": 2.076413350970804e-06, + "loss": 1.7706, + "step": 19353000 + }, + { + "epoch": 95.88, + "learning_rate": 2.0751747645447205e-06, + "loss": 1.7818, + "step": 19353500 + }, + { + "epoch": 95.89, + "learning_rate": 2.0739361781186366e-06, + "loss": 1.7869, + "step": 19354000 + }, + { + "epoch": 95.89, + "learning_rate": 2.072697591692553e-06, + "loss": 1.7663, + "step": 19354500 + }, + { + "epoch": 95.89, + "learning_rate": 2.071461482439322e-06, + "loss": 1.7977, + "step": 19355000 + }, + { + "epoch": 95.89, + "learning_rate": 2.070222896013238e-06, + "loss": 1.791, + "step": 19355500 + }, + { + "epoch": 95.9, + "learning_rate": 2.0689843095871545e-06, + "loss": 1.7679, + "step": 19356000 + }, + { + "epoch": 95.9, + "learning_rate": 2.0677457231610706e-06, + "loss": 1.788, + "step": 19356500 + }, + { + "epoch": 95.9, + "learning_rate": 2.066507136734987e-06, + "loss": 1.7774, + "step": 19357000 + }, + { + "epoch": 95.9, + "learning_rate": 2.0652685503089036e-06, + "loss": 1.7657, + "step": 19357500 + }, + { + "epoch": 95.91, + "learning_rate": 2.06402996388282e-06, + "loss": 1.7578, + "step": 19358000 + }, + { + "epoch": 95.91, + "learning_rate": 2.062791377456736e-06, + "loss": 1.7751, + "step": 19358500 + }, + { + "epoch": 95.91, + "learning_rate": 2.061555268203505e-06, + "loss": 1.7794, + "step": 19359000 + }, + { + "epoch": 95.91, + "learning_rate": 2.0603166817774215e-06, + "loss": 1.7497, + "step": 19359500 + }, + { + "epoch": 95.92, + "learning_rate": 2.0590780953513376e-06, + "loss": 1.7672, + "step": 19360000 + }, + { + "epoch": 95.92, + "learning_rate": 2.057839508925254e-06, + "loss": 1.7984, + "step": 19360500 + }, + { + "epoch": 95.92, + "learning_rate": 2.05660092249917e-06, + "loss": 1.7865, + "step": 19361000 + }, + { + "epoch": 95.92, + "learning_rate": 2.055364813245939e-06, + "loss": 1.7728, + "step": 19361500 + }, + { + "epoch": 95.93, + "learning_rate": 2.054126226819855e-06, + "loss": 1.7761, + "step": 19362000 + }, + { + "epoch": 95.93, + "learning_rate": 2.0528876403937716e-06, + "loss": 1.7643, + "step": 19362500 + }, + { + "epoch": 95.93, + "learning_rate": 2.051649053967688e-06, + "loss": 1.7969, + "step": 19363000 + }, + { + "epoch": 95.93, + "learning_rate": 2.050410467541604e-06, + "loss": 1.7856, + "step": 19363500 + }, + { + "epoch": 95.94, + "learning_rate": 2.0491718811155206e-06, + "loss": 1.7894, + "step": 19364000 + }, + { + "epoch": 95.94, + "learning_rate": 2.0479332946894367e-06, + "loss": 1.7699, + "step": 19364500 + }, + { + "epoch": 95.94, + "learning_rate": 2.0466947082633532e-06, + "loss": 1.8014, + "step": 19365000 + }, + { + "epoch": 95.94, + "learning_rate": 2.0454561218372697e-06, + "loss": 1.7585, + "step": 19365500 + }, + { + "epoch": 95.95, + "learning_rate": 2.044217535411186e-06, + "loss": 1.7759, + "step": 19366000 + }, + { + "epoch": 95.95, + "learning_rate": 2.0429789489851023e-06, + "loss": 1.798, + "step": 19366500 + }, + { + "epoch": 95.95, + "learning_rate": 2.041740362559019e-06, + "loss": 1.7799, + "step": 19367000 + }, + { + "epoch": 95.95, + "learning_rate": 2.0405017761329353e-06, + "loss": 1.7999, + "step": 19367500 + }, + { + "epoch": 95.96, + "learning_rate": 2.0392631897068514e-06, + "loss": 1.7584, + "step": 19368000 + }, + { + "epoch": 95.96, + "learning_rate": 2.038024603280768e-06, + "loss": 1.7941, + "step": 19368500 + }, + { + "epoch": 95.96, + "learning_rate": 2.0367860168546844e-06, + "loss": 1.766, + "step": 19369000 + }, + { + "epoch": 95.96, + "learning_rate": 2.035549907601453e-06, + "loss": 1.7744, + "step": 19369500 + }, + { + "epoch": 95.97, + "learning_rate": 2.0343113211753693e-06, + "loss": 1.79, + "step": 19370000 + }, + { + "epoch": 95.97, + "learning_rate": 2.0330727347492854e-06, + "loss": 1.7835, + "step": 19370500 + }, + { + "epoch": 95.97, + "learning_rate": 2.031836625496054e-06, + "loss": 1.7816, + "step": 19371000 + }, + { + "epoch": 95.97, + "learning_rate": 2.0305980390699703e-06, + "loss": 1.764, + "step": 19371500 + }, + { + "epoch": 95.98, + "learning_rate": 2.029359452643887e-06, + "loss": 1.7758, + "step": 19372000 + }, + { + "epoch": 95.98, + "learning_rate": 2.028120866217803e-06, + "loss": 1.7707, + "step": 19372500 + }, + { + "epoch": 95.98, + "learning_rate": 2.0268822797917194e-06, + "loss": 1.7951, + "step": 19373000 + }, + { + "epoch": 95.98, + "learning_rate": 2.02564864771134e-06, + "loss": 1.7834, + "step": 19373500 + }, + { + "epoch": 95.99, + "learning_rate": 2.0244100612852566e-06, + "loss": 1.7795, + "step": 19374000 + }, + { + "epoch": 95.99, + "learning_rate": 2.023171474859173e-06, + "loss": 1.7742, + "step": 19374500 + }, + { + "epoch": 95.99, + "learning_rate": 2.021932888433089e-06, + "loss": 1.7895, + "step": 19375000 + }, + { + "epoch": 95.99, + "learning_rate": 2.0206943020070057e-06, + "loss": 1.764, + "step": 19375500 + }, + { + "epoch": 96.0, + "learning_rate": 2.0194557155809217e-06, + "loss": 1.8001, + "step": 19376000 + }, + { + "epoch": 96.0, + "learning_rate": 2.0182171291548382e-06, + "loss": 1.791, + "step": 19376500 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.6873047121683882, + "eval_accuracy_mlm": 0.6488415920257267, + "eval_accuracy_nsp": 0.8686180915362862, + "eval_loss": 2.316138982772827, + "eval_runtime": 147.2368, + "eval_samples_per_second": 1731.625, + "eval_steps_per_second": 72.156, + "step": 19376928 + }, + { + "epoch": 96.0, + "learning_rate": 2.0169785427287548e-06, + "loss": 1.7872, + "step": 19377000 + }, + { + "epoch": 96.0, + "learning_rate": 2.015739956302671e-06, + "loss": 1.7637, + "step": 19377500 + }, + { + "epoch": 96.01, + "learning_rate": 2.0145038470494396e-06, + "loss": 1.7844, + "step": 19378000 + }, + { + "epoch": 96.01, + "learning_rate": 2.0132652606233557e-06, + "loss": 1.7857, + "step": 19378500 + }, + { + "epoch": 96.01, + "learning_rate": 2.0120266741972722e-06, + "loss": 1.7675, + "step": 19379000 + }, + { + "epoch": 96.01, + "learning_rate": 2.0107880877711883e-06, + "loss": 1.7646, + "step": 19379500 + }, + { + "epoch": 96.02, + "learning_rate": 2.009549501345105e-06, + "loss": 1.7882, + "step": 19380000 + }, + { + "epoch": 96.02, + "learning_rate": 2.0083133920918736e-06, + "loss": 1.7747, + "step": 19380500 + }, + { + "epoch": 96.02, + "learning_rate": 2.0070748056657897e-06, + "loss": 1.7838, + "step": 19381000 + }, + { + "epoch": 96.02, + "learning_rate": 2.005836219239706e-06, + "loss": 1.7776, + "step": 19381500 + }, + { + "epoch": 96.03, + "learning_rate": 2.0046001099864746e-06, + "loss": 1.7752, + "step": 19382000 + }, + { + "epoch": 96.03, + "learning_rate": 2.003361523560391e-06, + "loss": 1.7769, + "step": 19382500 + }, + { + "epoch": 96.03, + "learning_rate": 2.002122937134307e-06, + "loss": 1.7679, + "step": 19383000 + }, + { + "epoch": 96.03, + "learning_rate": 2.0008843507082237e-06, + "loss": 1.7943, + "step": 19383500 + }, + { + "epoch": 96.04, + "learning_rate": 1.99964576428214e-06, + "loss": 1.7702, + "step": 19384000 + }, + { + "epoch": 96.04, + "learning_rate": 1.9984071778560567e-06, + "loss": 1.7644, + "step": 19384500 + }, + { + "epoch": 96.04, + "learning_rate": 1.997168591429973e-06, + "loss": 1.7493, + "step": 19385000 + }, + { + "epoch": 96.04, + "learning_rate": 1.9959300050038893e-06, + "loss": 1.7654, + "step": 19385500 + }, + { + "epoch": 96.04, + "learning_rate": 1.9946914185778058e-06, + "loss": 1.7654, + "step": 19386000 + }, + { + "epoch": 96.05, + "learning_rate": 1.993452832151722e-06, + "loss": 1.7713, + "step": 19386500 + }, + { + "epoch": 96.05, + "learning_rate": 1.9922142457256384e-06, + "loss": 1.7692, + "step": 19387000 + }, + { + "epoch": 96.05, + "learning_rate": 1.990975659299555e-06, + "loss": 1.7794, + "step": 19387500 + }, + { + "epoch": 96.05, + "learning_rate": 1.9897395500463233e-06, + "loss": 1.7653, + "step": 19388000 + }, + { + "epoch": 96.06, + "learning_rate": 1.9885009636202398e-06, + "loss": 1.786, + "step": 19388500 + }, + { + "epoch": 96.06, + "learning_rate": 1.987262377194156e-06, + "loss": 1.751, + "step": 19389000 + }, + { + "epoch": 96.06, + "learning_rate": 1.9860237907680724e-06, + "loss": 1.7677, + "step": 19389500 + }, + { + "epoch": 96.06, + "learning_rate": 1.9847852043419884e-06, + "loss": 1.775, + "step": 19390000 + }, + { + "epoch": 96.07, + "learning_rate": 1.9835515722616095e-06, + "loss": 1.7797, + "step": 19390500 + }, + { + "epoch": 96.07, + "learning_rate": 1.982315463008378e-06, + "loss": 1.7773, + "step": 19391000 + }, + { + "epoch": 96.07, + "learning_rate": 1.9810768765822944e-06, + "loss": 1.7536, + "step": 19391500 + }, + { + "epoch": 96.07, + "learning_rate": 1.9798382901562105e-06, + "loss": 1.7718, + "step": 19392000 + }, + { + "epoch": 96.08, + "learning_rate": 1.978599703730127e-06, + "loss": 1.7754, + "step": 19392500 + }, + { + "epoch": 96.08, + "learning_rate": 1.9773611173040435e-06, + "loss": 1.7857, + "step": 19393000 + }, + { + "epoch": 96.08, + "learning_rate": 1.9761225308779596e-06, + "loss": 1.7729, + "step": 19393500 + }, + { + "epoch": 96.08, + "learning_rate": 1.974883944451876e-06, + "loss": 1.7891, + "step": 19394000 + }, + { + "epoch": 96.09, + "learning_rate": 1.9736478351986445e-06, + "loss": 1.7645, + "step": 19394500 + }, + { + "epoch": 96.09, + "learning_rate": 1.972409248772561e-06, + "loss": 1.7889, + "step": 19395000 + }, + { + "epoch": 96.09, + "learning_rate": 1.971170662346477e-06, + "loss": 1.7585, + "step": 19395500 + }, + { + "epoch": 96.09, + "learning_rate": 1.9699320759203936e-06, + "loss": 1.7713, + "step": 19396000 + }, + { + "epoch": 96.1, + "learning_rate": 1.96869348949431e-06, + "loss": 1.7819, + "step": 19396500 + }, + { + "epoch": 96.1, + "learning_rate": 1.9674573802410785e-06, + "loss": 1.7963, + "step": 19397000 + }, + { + "epoch": 96.1, + "learning_rate": 1.966218793814995e-06, + "loss": 1.783, + "step": 19397500 + }, + { + "epoch": 96.1, + "learning_rate": 1.964980207388911e-06, + "loss": 1.7834, + "step": 19398000 + }, + { + "epoch": 96.11, + "learning_rate": 1.9637416209628276e-06, + "loss": 1.7907, + "step": 19398500 + }, + { + "epoch": 96.11, + "learning_rate": 1.9625030345367437e-06, + "loss": 1.7696, + "step": 19399000 + }, + { + "epoch": 96.11, + "learning_rate": 1.96126444811066e-06, + "loss": 1.7712, + "step": 19399500 + }, + { + "epoch": 96.11, + "learning_rate": 1.9600258616845767e-06, + "loss": 1.7661, + "step": 19400000 + }, + { + "epoch": 96.12, + "learning_rate": 1.958789752431345e-06, + "loss": 1.7976, + "step": 19400500 + }, + { + "epoch": 96.12, + "learning_rate": 1.9575511660052616e-06, + "loss": 1.7645, + "step": 19401000 + }, + { + "epoch": 96.12, + "learning_rate": 1.956312579579178e-06, + "loss": 1.7648, + "step": 19401500 + }, + { + "epoch": 96.12, + "learning_rate": 1.9550739931530946e-06, + "loss": 1.8045, + "step": 19402000 + }, + { + "epoch": 96.13, + "learning_rate": 1.9538354067270106e-06, + "loss": 1.7874, + "step": 19402500 + }, + { + "epoch": 96.13, + "learning_rate": 1.9525992974737795e-06, + "loss": 1.7568, + "step": 19403000 + }, + { + "epoch": 96.13, + "learning_rate": 1.9513607110476955e-06, + "loss": 1.7455, + "step": 19403500 + }, + { + "epoch": 96.13, + "learning_rate": 1.950122124621612e-06, + "loss": 1.7711, + "step": 19404000 + }, + { + "epoch": 96.14, + "learning_rate": 1.9488835381955285e-06, + "loss": 1.77, + "step": 19404500 + }, + { + "epoch": 96.14, + "learning_rate": 1.9476449517694446e-06, + "loss": 1.7728, + "step": 19405000 + }, + { + "epoch": 96.14, + "learning_rate": 1.9464088425162134e-06, + "loss": 1.756, + "step": 19405500 + }, + { + "epoch": 96.14, + "learning_rate": 1.9451702560901295e-06, + "loss": 1.7455, + "step": 19406000 + }, + { + "epoch": 96.15, + "learning_rate": 1.943931669664046e-06, + "loss": 1.7821, + "step": 19406500 + }, + { + "epoch": 96.15, + "learning_rate": 1.942693083237962e-06, + "loss": 1.7711, + "step": 19407000 + }, + { + "epoch": 96.15, + "learning_rate": 1.9414544968118786e-06, + "loss": 1.7727, + "step": 19407500 + }, + { + "epoch": 96.15, + "learning_rate": 1.940215910385795e-06, + "loss": 1.7787, + "step": 19408000 + }, + { + "epoch": 96.16, + "learning_rate": 1.938977323959711e-06, + "loss": 1.7777, + "step": 19408500 + }, + { + "epoch": 96.16, + "learning_rate": 1.9377387375336277e-06, + "loss": 1.7313, + "step": 19409000 + }, + { + "epoch": 96.16, + "learning_rate": 1.936502628280396e-06, + "loss": 1.7619, + "step": 19409500 + }, + { + "epoch": 96.16, + "learning_rate": 1.9352640418543126e-06, + "loss": 1.7638, + "step": 19410000 + }, + { + "epoch": 96.17, + "learning_rate": 1.934025455428229e-06, + "loss": 1.7694, + "step": 19410500 + }, + { + "epoch": 96.17, + "learning_rate": 1.932786869002145e-06, + "loss": 1.7807, + "step": 19411000 + }, + { + "epoch": 96.17, + "learning_rate": 1.9315482825760617e-06, + "loss": 1.7888, + "step": 19411500 + }, + { + "epoch": 96.17, + "learning_rate": 1.9303096961499778e-06, + "loss": 1.7831, + "step": 19412000 + }, + { + "epoch": 96.18, + "learning_rate": 1.9290711097238947e-06, + "loss": 1.7615, + "step": 19412500 + }, + { + "epoch": 96.18, + "learning_rate": 1.9278325232978108e-06, + "loss": 1.7811, + "step": 19413000 + }, + { + "epoch": 96.18, + "learning_rate": 1.9265939368717273e-06, + "loss": 1.7717, + "step": 19413500 + }, + { + "epoch": 96.18, + "learning_rate": 1.9253578276184957e-06, + "loss": 1.7778, + "step": 19414000 + }, + { + "epoch": 96.19, + "learning_rate": 1.924121718365264e-06, + "loss": 1.7513, + "step": 19414500 + }, + { + "epoch": 96.19, + "learning_rate": 1.9228831319391805e-06, + "loss": 1.7684, + "step": 19415000 + }, + { + "epoch": 96.19, + "learning_rate": 1.921644545513097e-06, + "loss": 1.7681, + "step": 19415500 + }, + { + "epoch": 96.19, + "learning_rate": 1.9204059590870136e-06, + "loss": 1.7838, + "step": 19416000 + }, + { + "epoch": 96.2, + "learning_rate": 1.9191673726609296e-06, + "loss": 1.7716, + "step": 19416500 + }, + { + "epoch": 96.2, + "learning_rate": 1.917928786234846e-06, + "loss": 1.7642, + "step": 19417000 + }, + { + "epoch": 96.2, + "learning_rate": 1.9166901998087622e-06, + "loss": 1.7804, + "step": 19417500 + }, + { + "epoch": 96.2, + "learning_rate": 1.915454090555531e-06, + "loss": 1.7558, + "step": 19418000 + }, + { + "epoch": 96.21, + "learning_rate": 1.914215504129447e-06, + "loss": 1.7849, + "step": 19418500 + }, + { + "epoch": 96.21, + "learning_rate": 1.912979394876216e-06, + "loss": 1.7461, + "step": 19419000 + }, + { + "epoch": 96.21, + "learning_rate": 1.9117432856229843e-06, + "loss": 1.7468, + "step": 19419500 + }, + { + "epoch": 96.21, + "learning_rate": 1.9105046991969004e-06, + "loss": 1.7637, + "step": 19420000 + }, + { + "epoch": 96.22, + "learning_rate": 1.9092661127708173e-06, + "loss": 1.7726, + "step": 19420500 + }, + { + "epoch": 96.22, + "learning_rate": 1.9080275263447334e-06, + "loss": 1.772, + "step": 19421000 + }, + { + "epoch": 96.22, + "learning_rate": 1.90678893991865e-06, + "loss": 1.7556, + "step": 19421500 + }, + { + "epoch": 96.22, + "learning_rate": 1.9055503534925662e-06, + "loss": 1.7624, + "step": 19422000 + }, + { + "epoch": 96.23, + "learning_rate": 1.9043117670664825e-06, + "loss": 1.769, + "step": 19422500 + }, + { + "epoch": 96.23, + "learning_rate": 1.9030731806403988e-06, + "loss": 1.7401, + "step": 19423000 + }, + { + "epoch": 96.23, + "learning_rate": 1.901834594214315e-06, + "loss": 1.7865, + "step": 19423500 + }, + { + "epoch": 96.23, + "learning_rate": 1.9005960077882316e-06, + "loss": 1.7866, + "step": 19424000 + }, + { + "epoch": 96.24, + "learning_rate": 1.8993598985350002e-06, + "loss": 1.7742, + "step": 19424500 + }, + { + "epoch": 96.24, + "learning_rate": 1.8981213121089165e-06, + "loss": 1.758, + "step": 19425000 + }, + { + "epoch": 96.24, + "learning_rate": 1.8968827256828328e-06, + "loss": 1.7676, + "step": 19425500 + }, + { + "epoch": 96.24, + "learning_rate": 1.895644139256749e-06, + "loss": 1.7869, + "step": 19426000 + }, + { + "epoch": 96.25, + "learning_rate": 1.8944080300035177e-06, + "loss": 1.7515, + "step": 19426500 + }, + { + "epoch": 96.25, + "learning_rate": 1.893169443577434e-06, + "loss": 1.747, + "step": 19427000 + }, + { + "epoch": 96.25, + "learning_rate": 1.8919308571513502e-06, + "loss": 1.7568, + "step": 19427500 + }, + { + "epoch": 96.25, + "learning_rate": 1.8906922707252667e-06, + "loss": 1.7698, + "step": 19428000 + }, + { + "epoch": 96.26, + "learning_rate": 1.889453684299183e-06, + "loss": 1.7609, + "step": 19428500 + }, + { + "epoch": 96.26, + "learning_rate": 1.8882175750459516e-06, + "loss": 1.7738, + "step": 19429000 + }, + { + "epoch": 96.26, + "learning_rate": 1.886978988619868e-06, + "loss": 1.7792, + "step": 19429500 + }, + { + "epoch": 96.26, + "learning_rate": 1.8857404021937842e-06, + "loss": 1.7755, + "step": 19430000 + }, + { + "epoch": 96.27, + "learning_rate": 1.8845018157677005e-06, + "loss": 1.7561, + "step": 19430500 + }, + { + "epoch": 96.27, + "learning_rate": 1.883263229341617e-06, + "loss": 1.7788, + "step": 19431000 + }, + { + "epoch": 96.27, + "learning_rate": 1.8820246429155335e-06, + "loss": 1.7869, + "step": 19431500 + }, + { + "epoch": 96.27, + "learning_rate": 1.88078605648945e-06, + "loss": 1.7798, + "step": 19432000 + }, + { + "epoch": 96.28, + "learning_rate": 1.8795474700633663e-06, + "loss": 1.7906, + "step": 19432500 + }, + { + "epoch": 96.28, + "learning_rate": 1.8783088836372826e-06, + "loss": 1.7366, + "step": 19433000 + }, + { + "epoch": 96.28, + "learning_rate": 1.877070297211199e-06, + "loss": 1.7729, + "step": 19433500 + }, + { + "epoch": 96.28, + "learning_rate": 1.8758317107851152e-06, + "loss": 1.7999, + "step": 19434000 + }, + { + "epoch": 96.29, + "learning_rate": 1.8745931243590315e-06, + "loss": 1.7681, + "step": 19434500 + }, + { + "epoch": 96.29, + "learning_rate": 1.873354537932948e-06, + "loss": 1.7897, + "step": 19435000 + }, + { + "epoch": 96.29, + "learning_rate": 1.8721184286797166e-06, + "loss": 1.7846, + "step": 19435500 + }, + { + "epoch": 96.29, + "learning_rate": 1.8708798422536329e-06, + "loss": 1.7692, + "step": 19436000 + }, + { + "epoch": 96.3, + "learning_rate": 1.8696437330004015e-06, + "loss": 1.7834, + "step": 19436500 + }, + { + "epoch": 96.3, + "learning_rate": 1.8684051465743178e-06, + "loss": 1.7857, + "step": 19437000 + }, + { + "epoch": 96.3, + "learning_rate": 1.867166560148234e-06, + "loss": 1.777, + "step": 19437500 + }, + { + "epoch": 96.3, + "learning_rate": 1.8659279737221504e-06, + "loss": 1.7707, + "step": 19438000 + }, + { + "epoch": 96.31, + "learning_rate": 1.864691864468919e-06, + "loss": 1.7588, + "step": 19438500 + }, + { + "epoch": 96.31, + "learning_rate": 1.8634532780428353e-06, + "loss": 1.7778, + "step": 19439000 + }, + { + "epoch": 96.31, + "learning_rate": 1.8622146916167518e-06, + "loss": 1.7751, + "step": 19439500 + }, + { + "epoch": 96.31, + "learning_rate": 1.860976105190668e-06, + "loss": 1.77, + "step": 19440000 + }, + { + "epoch": 96.31, + "learning_rate": 1.8597375187645843e-06, + "loss": 1.7558, + "step": 19440500 + }, + { + "epoch": 96.32, + "learning_rate": 1.8584989323385006e-06, + "loss": 1.7792, + "step": 19441000 + }, + { + "epoch": 96.32, + "learning_rate": 1.857260345912417e-06, + "loss": 1.7606, + "step": 19441500 + }, + { + "epoch": 96.32, + "learning_rate": 1.8560217594863336e-06, + "loss": 1.785, + "step": 19442000 + }, + { + "epoch": 96.32, + "learning_rate": 1.854785650233102e-06, + "loss": 1.7502, + "step": 19442500 + }, + { + "epoch": 96.33, + "learning_rate": 1.8535495409798706e-06, + "loss": 1.7639, + "step": 19443000 + }, + { + "epoch": 96.33, + "learning_rate": 1.852310954553787e-06, + "loss": 1.7676, + "step": 19443500 + }, + { + "epoch": 96.33, + "learning_rate": 1.8510723681277032e-06, + "loss": 1.7683, + "step": 19444000 + }, + { + "epoch": 96.33, + "learning_rate": 1.8498337817016195e-06, + "loss": 1.7692, + "step": 19444500 + }, + { + "epoch": 96.34, + "learning_rate": 1.8485976724483881e-06, + "loss": 1.7961, + "step": 19445000 + }, + { + "epoch": 96.34, + "learning_rate": 1.8473590860223044e-06, + "loss": 1.7885, + "step": 19445500 + }, + { + "epoch": 96.34, + "learning_rate": 1.8461204995962207e-06, + "loss": 1.7633, + "step": 19446000 + }, + { + "epoch": 96.34, + "learning_rate": 1.8448819131701372e-06, + "loss": 1.8001, + "step": 19446500 + }, + { + "epoch": 96.35, + "learning_rate": 1.8436433267440537e-06, + "loss": 1.7694, + "step": 19447000 + }, + { + "epoch": 96.35, + "learning_rate": 1.8424047403179702e-06, + "loss": 1.7665, + "step": 19447500 + }, + { + "epoch": 96.35, + "learning_rate": 1.8411661538918865e-06, + "loss": 1.7793, + "step": 19448000 + }, + { + "epoch": 96.35, + "learning_rate": 1.8399275674658028e-06, + "loss": 1.7837, + "step": 19448500 + }, + { + "epoch": 96.36, + "learning_rate": 1.838688981039719e-06, + "loss": 1.7564, + "step": 19449000 + }, + { + "epoch": 96.36, + "learning_rate": 1.8374503946136354e-06, + "loss": 1.7553, + "step": 19449500 + }, + { + "epoch": 96.36, + "learning_rate": 1.8362118081875519e-06, + "loss": 1.7409, + "step": 19450000 + }, + { + "epoch": 96.36, + "learning_rate": 1.8349732217614682e-06, + "loss": 1.7487, + "step": 19450500 + }, + { + "epoch": 96.37, + "learning_rate": 1.8337395896810889e-06, + "loss": 1.756, + "step": 19451000 + }, + { + "epoch": 96.37, + "learning_rate": 1.8325034804278573e-06, + "loss": 1.7658, + "step": 19451500 + }, + { + "epoch": 96.37, + "learning_rate": 1.831264894001774e-06, + "loss": 1.77, + "step": 19452000 + }, + { + "epoch": 96.37, + "learning_rate": 1.8300263075756903e-06, + "loss": 1.8026, + "step": 19452500 + }, + { + "epoch": 96.38, + "learning_rate": 1.8287877211496066e-06, + "loss": 1.7447, + "step": 19453000 + }, + { + "epoch": 96.38, + "learning_rate": 1.8275491347235228e-06, + "loss": 1.7742, + "step": 19453500 + }, + { + "epoch": 96.38, + "learning_rate": 1.8263130254702914e-06, + "loss": 1.7909, + "step": 19454000 + }, + { + "epoch": 96.38, + "learning_rate": 1.8250744390442077e-06, + "loss": 1.7487, + "step": 19454500 + }, + { + "epoch": 96.39, + "learning_rate": 1.8238358526181242e-06, + "loss": 1.7742, + "step": 19455000 + }, + { + "epoch": 96.39, + "learning_rate": 1.8225972661920405e-06, + "loss": 1.7735, + "step": 19455500 + }, + { + "epoch": 96.39, + "learning_rate": 1.8213586797659568e-06, + "loss": 1.7729, + "step": 19456000 + }, + { + "epoch": 96.39, + "learning_rate": 1.8201200933398731e-06, + "loss": 1.7763, + "step": 19456500 + }, + { + "epoch": 96.4, + "learning_rate": 1.8188815069137894e-06, + "loss": 1.7633, + "step": 19457000 + }, + { + "epoch": 96.4, + "learning_rate": 1.817645397660558e-06, + "loss": 1.7945, + "step": 19457500 + }, + { + "epoch": 96.4, + "learning_rate": 1.8164068112344743e-06, + "loss": 1.7932, + "step": 19458000 + }, + { + "epoch": 96.4, + "learning_rate": 1.8151682248083908e-06, + "loss": 1.7745, + "step": 19458500 + }, + { + "epoch": 96.41, + "learning_rate": 1.813929638382307e-06, + "loss": 1.7511, + "step": 19459000 + }, + { + "epoch": 96.41, + "learning_rate": 1.8126910519562234e-06, + "loss": 1.774, + "step": 19459500 + }, + { + "epoch": 96.41, + "learning_rate": 1.8114524655301397e-06, + "loss": 1.7727, + "step": 19460000 + }, + { + "epoch": 96.41, + "learning_rate": 1.810213879104056e-06, + "loss": 1.784, + "step": 19460500 + }, + { + "epoch": 96.42, + "learning_rate": 1.8089752926779727e-06, + "loss": 1.7868, + "step": 19461000 + }, + { + "epoch": 96.42, + "learning_rate": 1.807736706251889e-06, + "loss": 1.7691, + "step": 19461500 + }, + { + "epoch": 96.42, + "learning_rate": 1.8064981198258055e-06, + "loss": 1.7785, + "step": 19462000 + }, + { + "epoch": 96.42, + "learning_rate": 1.8052595333997218e-06, + "loss": 1.7849, + "step": 19462500 + }, + { + "epoch": 96.43, + "learning_rate": 1.804020946973638e-06, + "loss": 1.7861, + "step": 19463000 + }, + { + "epoch": 96.43, + "learning_rate": 1.8027823605475544e-06, + "loss": 1.7854, + "step": 19463500 + }, + { + "epoch": 96.43, + "learning_rate": 1.801546251294323e-06, + "loss": 1.7604, + "step": 19464000 + }, + { + "epoch": 96.43, + "learning_rate": 1.8003076648682393e-06, + "loss": 1.7854, + "step": 19464500 + }, + { + "epoch": 96.44, + "learning_rate": 1.7990715556150079e-06, + "loss": 1.7571, + "step": 19465000 + }, + { + "epoch": 96.44, + "learning_rate": 1.7978329691889242e-06, + "loss": 1.7415, + "step": 19465500 + }, + { + "epoch": 96.44, + "learning_rate": 1.7965993371085448e-06, + "loss": 1.7779, + "step": 19466000 + }, + { + "epoch": 96.44, + "learning_rate": 1.7953607506824611e-06, + "loss": 1.77, + "step": 19466500 + }, + { + "epoch": 96.45, + "learning_rate": 1.7941221642563774e-06, + "loss": 1.7762, + "step": 19467000 + }, + { + "epoch": 96.45, + "learning_rate": 1.7928835778302941e-06, + "loss": 1.7579, + "step": 19467500 + }, + { + "epoch": 96.45, + "learning_rate": 1.7916449914042104e-06, + "loss": 1.7831, + "step": 19468000 + }, + { + "epoch": 96.45, + "learning_rate": 1.7904064049781267e-06, + "loss": 1.7733, + "step": 19468500 + }, + { + "epoch": 96.46, + "learning_rate": 1.789167818552043e-06, + "loss": 1.772, + "step": 19469000 + }, + { + "epoch": 96.46, + "learning_rate": 1.7879317092988116e-06, + "loss": 1.7711, + "step": 19469500 + }, + { + "epoch": 96.46, + "learning_rate": 1.786693122872728e-06, + "loss": 1.7819, + "step": 19470000 + }, + { + "epoch": 96.46, + "learning_rate": 1.7854545364466444e-06, + "loss": 1.7565, + "step": 19470500 + }, + { + "epoch": 96.47, + "learning_rate": 1.7842159500205607e-06, + "loss": 1.7655, + "step": 19471000 + }, + { + "epoch": 96.47, + "learning_rate": 1.782977363594477e-06, + "loss": 1.7633, + "step": 19471500 + }, + { + "epoch": 96.47, + "learning_rate": 1.7817387771683933e-06, + "loss": 1.7601, + "step": 19472000 + }, + { + "epoch": 96.47, + "learning_rate": 1.7805001907423096e-06, + "loss": 1.773, + "step": 19472500 + }, + { + "epoch": 96.48, + "learning_rate": 1.779261604316226e-06, + "loss": 1.7568, + "step": 19473000 + }, + { + "epoch": 96.48, + "learning_rate": 1.7780230178901424e-06, + "loss": 1.7783, + "step": 19473500 + }, + { + "epoch": 96.48, + "learning_rate": 1.7767844314640587e-06, + "loss": 1.754, + "step": 19474000 + }, + { + "epoch": 96.48, + "learning_rate": 1.775545845037975e-06, + "loss": 1.7819, + "step": 19474500 + }, + { + "epoch": 96.49, + "learning_rate": 1.7743072586118917e-06, + "loss": 1.7975, + "step": 19475000 + }, + { + "epoch": 96.49, + "learning_rate": 1.7730711493586599e-06, + "loss": 1.7487, + "step": 19475500 + }, + { + "epoch": 96.49, + "learning_rate": 1.7718325629325762e-06, + "loss": 1.7486, + "step": 19476000 + }, + { + "epoch": 96.49, + "learning_rate": 1.7705939765064927e-06, + "loss": 1.7754, + "step": 19476500 + }, + { + "epoch": 96.5, + "learning_rate": 1.7693553900804092e-06, + "loss": 1.775, + "step": 19477000 + }, + { + "epoch": 96.5, + "learning_rate": 1.7681168036543257e-06, + "loss": 1.8026, + "step": 19477500 + }, + { + "epoch": 96.5, + "learning_rate": 1.766878217228242e-06, + "loss": 1.7793, + "step": 19478000 + }, + { + "epoch": 96.5, + "learning_rate": 1.7656421079750106e-06, + "loss": 1.7813, + "step": 19478500 + }, + { + "epoch": 96.51, + "learning_rate": 1.7644035215489269e-06, + "loss": 1.7477, + "step": 19479000 + }, + { + "epoch": 96.51, + "learning_rate": 1.7631649351228432e-06, + "loss": 1.7596, + "step": 19479500 + }, + { + "epoch": 96.51, + "learning_rate": 1.7619263486967594e-06, + "loss": 1.7585, + "step": 19480000 + }, + { + "epoch": 96.51, + "learning_rate": 1.760687762270676e-06, + "loss": 1.8015, + "step": 19480500 + }, + { + "epoch": 96.52, + "learning_rate": 1.7594491758445922e-06, + "loss": 1.7809, + "step": 19481000 + }, + { + "epoch": 96.52, + "learning_rate": 1.7582105894185085e-06, + "loss": 1.7832, + "step": 19481500 + }, + { + "epoch": 96.52, + "learning_rate": 1.7569720029924248e-06, + "loss": 1.7846, + "step": 19482000 + }, + { + "epoch": 96.52, + "learning_rate": 1.7557358937391934e-06, + "loss": 1.786, + "step": 19482500 + }, + { + "epoch": 96.53, + "learning_rate": 1.7544973073131097e-06, + "loss": 1.7939, + "step": 19483000 + }, + { + "epoch": 96.53, + "learning_rate": 1.753258720887026e-06, + "loss": 1.7561, + "step": 19483500 + }, + { + "epoch": 96.53, + "learning_rate": 1.7520201344609425e-06, + "loss": 1.7863, + "step": 19484000 + }, + { + "epoch": 96.53, + "learning_rate": 1.7507840252077111e-06, + "loss": 1.7796, + "step": 19484500 + }, + { + "epoch": 96.54, + "learning_rate": 1.7495454387816274e-06, + "loss": 1.7723, + "step": 19485000 + }, + { + "epoch": 96.54, + "learning_rate": 1.7483068523555437e-06, + "loss": 1.762, + "step": 19485500 + }, + { + "epoch": 96.54, + "learning_rate": 1.74706826592946e-06, + "loss": 1.7802, + "step": 19486000 + }, + { + "epoch": 96.54, + "learning_rate": 1.7458296795033763e-06, + "loss": 1.7831, + "step": 19486500 + }, + { + "epoch": 96.55, + "learning_rate": 1.7445910930772926e-06, + "loss": 1.7912, + "step": 19487000 + }, + { + "epoch": 96.55, + "learning_rate": 1.7433525066512093e-06, + "loss": 1.7793, + "step": 19487500 + }, + { + "epoch": 96.55, + "learning_rate": 1.7421139202251256e-06, + "loss": 1.7749, + "step": 19488000 + }, + { + "epoch": 96.55, + "learning_rate": 1.740875333799042e-06, + "loss": 1.7621, + "step": 19488500 + }, + { + "epoch": 96.56, + "learning_rate": 1.7396367473729584e-06, + "loss": 1.7757, + "step": 19489000 + }, + { + "epoch": 96.56, + "learning_rate": 1.7383981609468747e-06, + "loss": 1.7612, + "step": 19489500 + }, + { + "epoch": 96.56, + "learning_rate": 1.7371620516936433e-06, + "loss": 1.7701, + "step": 19490000 + }, + { + "epoch": 96.56, + "learning_rate": 1.7359234652675596e-06, + "loss": 1.7683, + "step": 19490500 + }, + { + "epoch": 96.57, + "learning_rate": 1.7346873560143282e-06, + "loss": 1.7587, + "step": 19491000 + }, + { + "epoch": 96.57, + "learning_rate": 1.7334487695882445e-06, + "loss": 1.7667, + "step": 19491500 + }, + { + "epoch": 96.57, + "learning_rate": 1.732210183162161e-06, + "loss": 1.7667, + "step": 19492000 + }, + { + "epoch": 96.57, + "learning_rate": 1.7309715967360773e-06, + "loss": 1.7855, + "step": 19492500 + }, + { + "epoch": 96.58, + "learning_rate": 1.7297330103099935e-06, + "loss": 1.7779, + "step": 19493000 + }, + { + "epoch": 96.58, + "learning_rate": 1.7284944238839098e-06, + "loss": 1.7761, + "step": 19493500 + }, + { + "epoch": 96.58, + "learning_rate": 1.7272583146306784e-06, + "loss": 1.7894, + "step": 19494000 + }, + { + "epoch": 96.58, + "learning_rate": 1.7260197282045947e-06, + "loss": 1.7668, + "step": 19494500 + }, + { + "epoch": 96.58, + "learning_rate": 1.724781141778511e-06, + "loss": 1.7783, + "step": 19495000 + }, + { + "epoch": 96.59, + "learning_rate": 1.7235425553524275e-06, + "loss": 1.7897, + "step": 19495500 + }, + { + "epoch": 96.59, + "learning_rate": 1.7223039689263438e-06, + "loss": 1.7871, + "step": 19496000 + }, + { + "epoch": 96.59, + "learning_rate": 1.7210678596731124e-06, + "loss": 1.7633, + "step": 19496500 + }, + { + "epoch": 96.59, + "learning_rate": 1.7198292732470287e-06, + "loss": 1.7783, + "step": 19497000 + }, + { + "epoch": 96.6, + "learning_rate": 1.718590686820945e-06, + "loss": 1.7635, + "step": 19497500 + }, + { + "epoch": 96.6, + "learning_rate": 1.7173521003948613e-06, + "loss": 1.7797, + "step": 19498000 + }, + { + "epoch": 96.6, + "learning_rate": 1.7161135139687778e-06, + "loss": 1.7683, + "step": 19498500 + }, + { + "epoch": 96.6, + "learning_rate": 1.714874927542694e-06, + "loss": 1.7766, + "step": 19499000 + }, + { + "epoch": 96.61, + "learning_rate": 1.7136363411166104e-06, + "loss": 1.7482, + "step": 19499500 + }, + { + "epoch": 96.61, + "learning_rate": 1.7123977546905271e-06, + "loss": 1.7579, + "step": 19500000 + }, + { + "epoch": 96.61, + "learning_rate": 1.7111591682644434e-06, + "loss": 1.751, + "step": 19500500 + }, + { + "epoch": 96.61, + "learning_rate": 1.7099205818383597e-06, + "loss": 1.7741, + "step": 19501000 + }, + { + "epoch": 96.62, + "learning_rate": 1.708681995412276e-06, + "loss": 1.7752, + "step": 19501500 + }, + { + "epoch": 96.62, + "learning_rate": 1.7074434089861923e-06, + "loss": 1.7571, + "step": 19502000 + }, + { + "epoch": 96.62, + "learning_rate": 1.7062048225601088e-06, + "loss": 1.7799, + "step": 19502500 + }, + { + "epoch": 96.62, + "learning_rate": 1.704966236134025e-06, + "loss": 1.7797, + "step": 19503000 + }, + { + "epoch": 96.63, + "learning_rate": 1.7037276497079414e-06, + "loss": 1.8, + "step": 19503500 + }, + { + "epoch": 96.63, + "learning_rate": 1.7024890632818577e-06, + "loss": 1.7566, + "step": 19504000 + }, + { + "epoch": 96.63, + "learning_rate": 1.701250476855774e-06, + "loss": 1.7812, + "step": 19504500 + }, + { + "epoch": 96.63, + "learning_rate": 1.7000143676025426e-06, + "loss": 1.7622, + "step": 19505000 + }, + { + "epoch": 96.64, + "learning_rate": 1.698775781176459e-06, + "loss": 1.7617, + "step": 19505500 + }, + { + "epoch": 96.64, + "learning_rate": 1.6975371947503753e-06, + "loss": 1.7571, + "step": 19506000 + }, + { + "epoch": 96.64, + "learning_rate": 1.696301085497144e-06, + "loss": 1.7746, + "step": 19506500 + }, + { + "epoch": 96.64, + "learning_rate": 1.6950624990710602e-06, + "loss": 1.7793, + "step": 19507000 + }, + { + "epoch": 96.65, + "learning_rate": 1.6938239126449765e-06, + "loss": 1.768, + "step": 19507500 + }, + { + "epoch": 96.65, + "learning_rate": 1.6925853262188928e-06, + "loss": 1.7824, + "step": 19508000 + }, + { + "epoch": 96.65, + "learning_rate": 1.6913467397928091e-06, + "loss": 1.7817, + "step": 19508500 + }, + { + "epoch": 96.65, + "learning_rate": 1.6901081533667256e-06, + "loss": 1.7621, + "step": 19509000 + }, + { + "epoch": 96.66, + "learning_rate": 1.6888720441134942e-06, + "loss": 1.7791, + "step": 19509500 + }, + { + "epoch": 96.66, + "learning_rate": 1.6876334576874105e-06, + "loss": 1.7719, + "step": 19510000 + }, + { + "epoch": 96.66, + "learning_rate": 1.6863948712613268e-06, + "loss": 1.7583, + "step": 19510500 + }, + { + "epoch": 96.66, + "learning_rate": 1.6851562848352435e-06, + "loss": 1.7641, + "step": 19511000 + }, + { + "epoch": 96.67, + "learning_rate": 1.6839176984091598e-06, + "loss": 1.7687, + "step": 19511500 + }, + { + "epoch": 96.67, + "learning_rate": 1.6826791119830761e-06, + "loss": 1.771, + "step": 19512000 + }, + { + "epoch": 96.67, + "learning_rate": 1.6814430027298447e-06, + "loss": 1.7837, + "step": 19512500 + }, + { + "epoch": 96.67, + "learning_rate": 1.680204416303761e-06, + "loss": 1.7891, + "step": 19513000 + }, + { + "epoch": 96.68, + "learning_rate": 1.6789683070505294e-06, + "loss": 1.7869, + "step": 19513500 + }, + { + "epoch": 96.68, + "learning_rate": 1.6777297206244459e-06, + "loss": 1.7826, + "step": 19514000 + }, + { + "epoch": 96.68, + "learning_rate": 1.6764936113712143e-06, + "loss": 1.771, + "step": 19514500 + }, + { + "epoch": 96.68, + "learning_rate": 1.6752550249451306e-06, + "loss": 1.7739, + "step": 19515000 + }, + { + "epoch": 96.69, + "learning_rate": 1.6740164385190469e-06, + "loss": 1.7587, + "step": 19515500 + }, + { + "epoch": 96.69, + "learning_rate": 1.6727778520929636e-06, + "loss": 1.7664, + "step": 19516000 + }, + { + "epoch": 96.69, + "learning_rate": 1.6715392656668799e-06, + "loss": 1.7853, + "step": 19516500 + }, + { + "epoch": 96.69, + "learning_rate": 1.6703031564136483e-06, + "loss": 1.7426, + "step": 19517000 + }, + { + "epoch": 96.7, + "learning_rate": 1.6690645699875648e-06, + "loss": 1.7778, + "step": 19517500 + }, + { + "epoch": 96.7, + "learning_rate": 1.6678259835614813e-06, + "loss": 1.7772, + "step": 19518000 + }, + { + "epoch": 96.7, + "learning_rate": 1.6665873971353976e-06, + "loss": 1.7556, + "step": 19518500 + }, + { + "epoch": 96.7, + "learning_rate": 1.6653488107093139e-06, + "loss": 1.7885, + "step": 19519000 + }, + { + "epoch": 96.71, + "learning_rate": 1.6641102242832301e-06, + "loss": 1.769, + "step": 19519500 + }, + { + "epoch": 96.71, + "learning_rate": 1.6628716378571464e-06, + "loss": 1.7393, + "step": 19520000 + }, + { + "epoch": 96.71, + "learning_rate": 1.6616330514310627e-06, + "loss": 1.759, + "step": 19520500 + }, + { + "epoch": 96.71, + "learning_rate": 1.6603969421778313e-06, + "loss": 1.7882, + "step": 19521000 + }, + { + "epoch": 96.72, + "learning_rate": 1.6591583557517478e-06, + "loss": 1.7764, + "step": 19521500 + }, + { + "epoch": 96.72, + "learning_rate": 1.6579197693256641e-06, + "loss": 1.7591, + "step": 19522000 + }, + { + "epoch": 96.72, + "learning_rate": 1.6566811828995804e-06, + "loss": 1.7775, + "step": 19522500 + }, + { + "epoch": 96.72, + "learning_rate": 1.6554425964734967e-06, + "loss": 1.7543, + "step": 19523000 + }, + { + "epoch": 96.73, + "learning_rate": 1.654204010047413e-06, + "loss": 1.7792, + "step": 19523500 + }, + { + "epoch": 96.73, + "learning_rate": 1.6529654236213295e-06, + "loss": 1.77, + "step": 19524000 + }, + { + "epoch": 96.73, + "learning_rate": 1.6517268371952458e-06, + "loss": 1.7766, + "step": 19524500 + }, + { + "epoch": 96.73, + "learning_rate": 1.6504907279420144e-06, + "loss": 1.7763, + "step": 19525000 + }, + { + "epoch": 96.74, + "learning_rate": 1.6492521415159307e-06, + "loss": 1.8082, + "step": 19525500 + }, + { + "epoch": 96.74, + "learning_rate": 1.6480160322626993e-06, + "loss": 1.8005, + "step": 19526000 + }, + { + "epoch": 96.74, + "learning_rate": 1.6467774458366156e-06, + "loss": 1.7744, + "step": 19526500 + }, + { + "epoch": 96.74, + "learning_rate": 1.6455388594105319e-06, + "loss": 1.7949, + "step": 19527000 + }, + { + "epoch": 96.75, + "learning_rate": 1.6443002729844482e-06, + "loss": 1.7659, + "step": 19527500 + }, + { + "epoch": 96.75, + "learning_rate": 1.6430616865583647e-06, + "loss": 1.7715, + "step": 19528000 + }, + { + "epoch": 96.75, + "learning_rate": 1.6418231001322812e-06, + "loss": 1.7802, + "step": 19528500 + }, + { + "epoch": 96.75, + "learning_rate": 1.6405869908790496e-06, + "loss": 1.7755, + "step": 19529000 + }, + { + "epoch": 96.76, + "learning_rate": 1.6393484044529659e-06, + "loss": 1.77, + "step": 19529500 + }, + { + "epoch": 96.76, + "learning_rate": 1.6381098180268826e-06, + "loss": 1.7753, + "step": 19530000 + }, + { + "epoch": 96.76, + "learning_rate": 1.6368712316007989e-06, + "loss": 1.7573, + "step": 19530500 + }, + { + "epoch": 96.76, + "learning_rate": 1.6356326451747152e-06, + "loss": 1.7937, + "step": 19531000 + }, + { + "epoch": 96.77, + "learning_rate": 1.6343940587486315e-06, + "loss": 1.7662, + "step": 19531500 + }, + { + "epoch": 96.77, + "learning_rate": 1.6331579494954e-06, + "loss": 1.7663, + "step": 19532000 + }, + { + "epoch": 96.77, + "learning_rate": 1.6319193630693163e-06, + "loss": 1.7576, + "step": 19532500 + }, + { + "epoch": 96.77, + "learning_rate": 1.6306807766432328e-06, + "loss": 1.7541, + "step": 19533000 + }, + { + "epoch": 96.78, + "learning_rate": 1.6294421902171491e-06, + "loss": 1.7779, + "step": 19533500 + }, + { + "epoch": 96.78, + "learning_rate": 1.6282060809639177e-06, + "loss": 1.7563, + "step": 19534000 + }, + { + "epoch": 96.78, + "learning_rate": 1.626967494537834e-06, + "loss": 1.7726, + "step": 19534500 + }, + { + "epoch": 96.78, + "learning_rate": 1.6257289081117503e-06, + "loss": 1.78, + "step": 19535000 + }, + { + "epoch": 96.79, + "learning_rate": 1.6244903216856666e-06, + "loss": 1.7495, + "step": 19535500 + }, + { + "epoch": 96.79, + "learning_rate": 1.6232517352595831e-06, + "loss": 1.7837, + "step": 19536000 + }, + { + "epoch": 96.79, + "learning_rate": 1.6220131488334994e-06, + "loss": 1.7874, + "step": 19536500 + }, + { + "epoch": 96.79, + "learning_rate": 1.6207745624074157e-06, + "loss": 1.78, + "step": 19537000 + }, + { + "epoch": 96.8, + "learning_rate": 1.6195384531541843e-06, + "loss": 1.7783, + "step": 19537500 + }, + { + "epoch": 96.8, + "learning_rate": 1.6182998667281006e-06, + "loss": 1.7689, + "step": 19538000 + }, + { + "epoch": 96.8, + "learning_rate": 1.6170612803020169e-06, + "loss": 1.7706, + "step": 19538500 + }, + { + "epoch": 96.8, + "learning_rate": 1.6158226938759332e-06, + "loss": 1.7943, + "step": 19539000 + }, + { + "epoch": 96.81, + "learning_rate": 1.6145841074498497e-06, + "loss": 1.7748, + "step": 19539500 + }, + { + "epoch": 96.81, + "learning_rate": 1.613345521023766e-06, + "loss": 1.7902, + "step": 19540000 + }, + { + "epoch": 96.81, + "learning_rate": 1.6121069345976823e-06, + "loss": 1.7764, + "step": 19540500 + }, + { + "epoch": 96.81, + "learning_rate": 1.610868348171599e-06, + "loss": 1.7821, + "step": 19541000 + }, + { + "epoch": 96.82, + "learning_rate": 1.6096297617455153e-06, + "loss": 1.7594, + "step": 19541500 + }, + { + "epoch": 96.82, + "learning_rate": 1.6083911753194316e-06, + "loss": 1.7809, + "step": 19542000 + }, + { + "epoch": 96.82, + "learning_rate": 1.607157543239052e-06, + "loss": 1.7691, + "step": 19542500 + }, + { + "epoch": 96.82, + "learning_rate": 1.6059214339858207e-06, + "loss": 1.7642, + "step": 19543000 + }, + { + "epoch": 96.83, + "learning_rate": 1.604682847559737e-06, + "loss": 1.7991, + "step": 19543500 + }, + { + "epoch": 96.83, + "learning_rate": 1.6034442611336534e-06, + "loss": 1.781, + "step": 19544000 + }, + { + "epoch": 96.83, + "learning_rate": 1.6022056747075697e-06, + "loss": 1.7573, + "step": 19544500 + }, + { + "epoch": 96.83, + "learning_rate": 1.6009695654543383e-06, + "loss": 1.7686, + "step": 19545000 + }, + { + "epoch": 96.84, + "learning_rate": 1.5997309790282546e-06, + "loss": 1.7765, + "step": 19545500 + }, + { + "epoch": 96.84, + "learning_rate": 1.598492392602171e-06, + "loss": 1.7812, + "step": 19546000 + }, + { + "epoch": 96.84, + "learning_rate": 1.5972538061760872e-06, + "loss": 1.7667, + "step": 19546500 + }, + { + "epoch": 96.84, + "learning_rate": 1.5960152197500037e-06, + "loss": 1.7856, + "step": 19547000 + }, + { + "epoch": 96.85, + "learning_rate": 1.5947766333239202e-06, + "loss": 1.7714, + "step": 19547500 + }, + { + "epoch": 96.85, + "learning_rate": 1.5935380468978367e-06, + "loss": 1.7427, + "step": 19548000 + }, + { + "epoch": 96.85, + "learning_rate": 1.592299460471753e-06, + "loss": 1.7997, + "step": 19548500 + }, + { + "epoch": 96.85, + "learning_rate": 1.5910608740456693e-06, + "loss": 1.7525, + "step": 19549000 + }, + { + "epoch": 96.85, + "learning_rate": 1.589824764792438e-06, + "loss": 1.7727, + "step": 19549500 + }, + { + "epoch": 96.86, + "learning_rate": 1.5885861783663542e-06, + "loss": 1.764, + "step": 19550000 + }, + { + "epoch": 96.86, + "learning_rate": 1.5873475919402705e-06, + "loss": 1.7971, + "step": 19550500 + }, + { + "epoch": 96.86, + "learning_rate": 1.5861090055141868e-06, + "loss": 1.7743, + "step": 19551000 + }, + { + "epoch": 96.86, + "learning_rate": 1.5848704190881033e-06, + "loss": 1.7605, + "step": 19551500 + }, + { + "epoch": 96.87, + "learning_rate": 1.5836318326620196e-06, + "loss": 1.7656, + "step": 19552000 + }, + { + "epoch": 96.87, + "learning_rate": 1.5823932462359359e-06, + "loss": 1.7754, + "step": 19552500 + }, + { + "epoch": 96.87, + "learning_rate": 1.5811546598098522e-06, + "loss": 1.7737, + "step": 19553000 + }, + { + "epoch": 96.87, + "learning_rate": 1.5799160733837685e-06, + "loss": 1.7744, + "step": 19553500 + }, + { + "epoch": 96.88, + "learning_rate": 1.578677486957685e-06, + "loss": 1.7661, + "step": 19554000 + }, + { + "epoch": 96.88, + "learning_rate": 1.5774389005316013e-06, + "loss": 1.7731, + "step": 19554500 + }, + { + "epoch": 96.88, + "learning_rate": 1.5762027912783699e-06, + "loss": 1.7722, + "step": 19555000 + }, + { + "epoch": 96.88, + "learning_rate": 1.5749642048522862e-06, + "loss": 1.7812, + "step": 19555500 + }, + { + "epoch": 96.89, + "learning_rate": 1.5737256184262025e-06, + "loss": 1.7647, + "step": 19556000 + }, + { + "epoch": 96.89, + "learning_rate": 1.5724870320001192e-06, + "loss": 1.7568, + "step": 19556500 + }, + { + "epoch": 96.89, + "learning_rate": 1.5712509227468873e-06, + "loss": 1.7795, + "step": 19557000 + }, + { + "epoch": 96.89, + "learning_rate": 1.5700123363208036e-06, + "loss": 1.7727, + "step": 19557500 + }, + { + "epoch": 96.9, + "learning_rate": 1.5687762270675722e-06, + "loss": 1.7771, + "step": 19558000 + }, + { + "epoch": 96.9, + "learning_rate": 1.5675376406414887e-06, + "loss": 1.7857, + "step": 19558500 + }, + { + "epoch": 96.9, + "learning_rate": 1.566299054215405e-06, + "loss": 1.7725, + "step": 19559000 + }, + { + "epoch": 96.9, + "learning_rate": 1.5650629449621736e-06, + "loss": 1.7768, + "step": 19559500 + }, + { + "epoch": 96.91, + "learning_rate": 1.56382435853609e-06, + "loss": 1.782, + "step": 19560000 + }, + { + "epoch": 96.91, + "learning_rate": 1.5625857721100062e-06, + "loss": 1.7727, + "step": 19560500 + }, + { + "epoch": 96.91, + "learning_rate": 1.5613471856839227e-06, + "loss": 1.7636, + "step": 19561000 + }, + { + "epoch": 96.91, + "learning_rate": 1.560108599257839e-06, + "loss": 1.7679, + "step": 19561500 + }, + { + "epoch": 96.92, + "learning_rate": 1.5588700128317553e-06, + "loss": 1.7707, + "step": 19562000 + }, + { + "epoch": 96.92, + "learning_rate": 1.5576314264056718e-06, + "loss": 1.7689, + "step": 19562500 + }, + { + "epoch": 96.92, + "learning_rate": 1.5563928399795883e-06, + "loss": 1.7666, + "step": 19563000 + }, + { + "epoch": 96.92, + "learning_rate": 1.5551542535535046e-06, + "loss": 1.7815, + "step": 19563500 + }, + { + "epoch": 96.93, + "learning_rate": 1.553915667127421e-06, + "loss": 1.7636, + "step": 19564000 + }, + { + "epoch": 96.93, + "learning_rate": 1.5526770807013372e-06, + "loss": 1.7943, + "step": 19564500 + }, + { + "epoch": 96.93, + "learning_rate": 1.5514384942752535e-06, + "loss": 1.7625, + "step": 19565000 + }, + { + "epoch": 96.93, + "learning_rate": 1.55019990784917e-06, + "loss": 1.7836, + "step": 19565500 + }, + { + "epoch": 96.94, + "learning_rate": 1.5489613214230865e-06, + "loss": 1.7826, + "step": 19566000 + }, + { + "epoch": 96.94, + "learning_rate": 1.5477227349970028e-06, + "loss": 1.7827, + "step": 19566500 + }, + { + "epoch": 96.94, + "learning_rate": 1.546484148570919e-06, + "loss": 1.7787, + "step": 19567000 + }, + { + "epoch": 96.94, + "learning_rate": 1.5452455621448354e-06, + "loss": 1.7784, + "step": 19567500 + }, + { + "epoch": 96.95, + "learning_rate": 1.5440069757187517e-06, + "loss": 1.7933, + "step": 19568000 + }, + { + "epoch": 96.95, + "learning_rate": 1.542768389292668e-06, + "loss": 1.7818, + "step": 19568500 + }, + { + "epoch": 96.95, + "learning_rate": 1.5415298028665845e-06, + "loss": 1.7579, + "step": 19569000 + }, + { + "epoch": 96.95, + "learning_rate": 1.540291216440501e-06, + "loss": 1.7744, + "step": 19569500 + }, + { + "epoch": 96.96, + "learning_rate": 1.5390575843601217e-06, + "loss": 1.7896, + "step": 19570000 + }, + { + "epoch": 96.96, + "learning_rate": 1.537818997934038e-06, + "loss": 1.7505, + "step": 19570500 + }, + { + "epoch": 96.96, + "learning_rate": 1.5365804115079542e-06, + "loss": 1.806, + "step": 19571000 + }, + { + "epoch": 96.96, + "learning_rate": 1.5353443022547228e-06, + "loss": 1.7725, + "step": 19571500 + }, + { + "epoch": 96.97, + "learning_rate": 1.5341057158286391e-06, + "loss": 1.7695, + "step": 19572000 + }, + { + "epoch": 96.97, + "learning_rate": 1.5328671294025554e-06, + "loss": 1.7656, + "step": 19572500 + }, + { + "epoch": 96.97, + "learning_rate": 1.531628542976472e-06, + "loss": 1.7648, + "step": 19573000 + }, + { + "epoch": 96.97, + "learning_rate": 1.5303899565503884e-06, + "loss": 1.7489, + "step": 19573500 + }, + { + "epoch": 96.98, + "learning_rate": 1.5291513701243047e-06, + "loss": 1.7772, + "step": 19574000 + }, + { + "epoch": 96.98, + "learning_rate": 1.5279152608710731e-06, + "loss": 1.7718, + "step": 19574500 + }, + { + "epoch": 96.98, + "learning_rate": 1.5266766744449894e-06, + "loss": 1.7679, + "step": 19575000 + }, + { + "epoch": 96.98, + "learning_rate": 1.525438088018906e-06, + "loss": 1.785, + "step": 19575500 + }, + { + "epoch": 96.99, + "learning_rate": 1.5241995015928222e-06, + "loss": 1.7767, + "step": 19576000 + }, + { + "epoch": 96.99, + "learning_rate": 1.5229633923395906e-06, + "loss": 1.7805, + "step": 19576500 + }, + { + "epoch": 96.99, + "learning_rate": 1.521724805913507e-06, + "loss": 1.7484, + "step": 19577000 + }, + { + "epoch": 96.99, + "learning_rate": 1.5204862194874236e-06, + "loss": 1.755, + "step": 19577500 + }, + { + "epoch": 97.0, + "learning_rate": 1.51924763306134e-06, + "loss": 1.7707, + "step": 19578000 + }, + { + "epoch": 97.0, + "learning_rate": 1.5180090466352562e-06, + "loss": 1.7597, + "step": 19578500 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.6871512097341254, + "eval_accuracy_mlm": 0.6487664185147802, + "eval_accuracy_nsp": 0.8682297938099851, + "eval_loss": 2.3128037452697754, + "eval_runtime": 147.1489, + "eval_samples_per_second": 1732.66, + "eval_steps_per_second": 72.199, + "step": 19578771 + }, + { + "epoch": 97.0, + "learning_rate": 1.5167704602091725e-06, + "loss": 1.7546, + "step": 19579000 + }, + { + "epoch": 97.0, + "learning_rate": 1.5155318737830888e-06, + "loss": 1.7672, + "step": 19579500 + }, + { + "epoch": 97.01, + "learning_rate": 1.5142932873570053e-06, + "loss": 1.7602, + "step": 19580000 + }, + { + "epoch": 97.01, + "learning_rate": 1.5130547009309216e-06, + "loss": 1.7658, + "step": 19580500 + }, + { + "epoch": 97.01, + "learning_rate": 1.511816114504838e-06, + "loss": 1.7547, + "step": 19581000 + }, + { + "epoch": 97.01, + "learning_rate": 1.5105800052516067e-06, + "loss": 1.7727, + "step": 19581500 + }, + { + "epoch": 97.02, + "learning_rate": 1.509343895998375e-06, + "loss": 1.7694, + "step": 19582000 + }, + { + "epoch": 97.02, + "learning_rate": 1.5081053095722914e-06, + "loss": 1.7775, + "step": 19582500 + }, + { + "epoch": 97.02, + "learning_rate": 1.5068667231462079e-06, + "loss": 1.7942, + "step": 19583000 + }, + { + "epoch": 97.02, + "learning_rate": 1.5056281367201241e-06, + "loss": 1.7681, + "step": 19583500 + }, + { + "epoch": 97.03, + "learning_rate": 1.5043920274668925e-06, + "loss": 1.756, + "step": 19584000 + }, + { + "epoch": 97.03, + "learning_rate": 1.503153441040809e-06, + "loss": 1.7519, + "step": 19584500 + }, + { + "epoch": 97.03, + "learning_rate": 1.5019148546147255e-06, + "loss": 1.779, + "step": 19585000 + }, + { + "epoch": 97.03, + "learning_rate": 1.5006762681886418e-06, + "loss": 1.7867, + "step": 19585500 + }, + { + "epoch": 97.04, + "learning_rate": 1.4994376817625581e-06, + "loss": 1.7509, + "step": 19586000 + }, + { + "epoch": 97.04, + "learning_rate": 1.4981990953364744e-06, + "loss": 1.7484, + "step": 19586500 + }, + { + "epoch": 97.04, + "learning_rate": 1.4969605089103907e-06, + "loss": 1.7661, + "step": 19587000 + }, + { + "epoch": 97.04, + "learning_rate": 1.495721922484307e-06, + "loss": 1.7693, + "step": 19587500 + }, + { + "epoch": 97.05, + "learning_rate": 1.4944833360582235e-06, + "loss": 1.7697, + "step": 19588000 + }, + { + "epoch": 97.05, + "learning_rate": 1.49324474963214e-06, + "loss": 1.7784, + "step": 19588500 + }, + { + "epoch": 97.05, + "learning_rate": 1.4920061632060563e-06, + "loss": 1.7567, + "step": 19589000 + }, + { + "epoch": 97.05, + "learning_rate": 1.4907675767799726e-06, + "loss": 1.7582, + "step": 19589500 + }, + { + "epoch": 97.06, + "learning_rate": 1.489528990353889e-06, + "loss": 1.7645, + "step": 19590000 + }, + { + "epoch": 97.06, + "learning_rate": 1.4882904039278052e-06, + "loss": 1.7531, + "step": 19590500 + }, + { + "epoch": 97.06, + "learning_rate": 1.4870518175017217e-06, + "loss": 1.7213, + "step": 19591000 + }, + { + "epoch": 97.06, + "learning_rate": 1.4858132310756382e-06, + "loss": 1.7696, + "step": 19591500 + }, + { + "epoch": 97.07, + "learning_rate": 1.4845771218224066e-06, + "loss": 1.779, + "step": 19592000 + }, + { + "epoch": 97.07, + "learning_rate": 1.483338535396323e-06, + "loss": 1.7905, + "step": 19592500 + }, + { + "epoch": 97.07, + "learning_rate": 1.4820999489702394e-06, + "loss": 1.7474, + "step": 19593000 + }, + { + "epoch": 97.07, + "learning_rate": 1.4808613625441557e-06, + "loss": 1.7516, + "step": 19593500 + }, + { + "epoch": 97.08, + "learning_rate": 1.479622776118072e-06, + "loss": 1.7688, + "step": 19594000 + }, + { + "epoch": 97.08, + "learning_rate": 1.4783841896919883e-06, + "loss": 1.7769, + "step": 19594500 + }, + { + "epoch": 97.08, + "learning_rate": 1.4771456032659048e-06, + "loss": 1.7458, + "step": 19595000 + }, + { + "epoch": 97.08, + "learning_rate": 1.4759070168398213e-06, + "loss": 1.7846, + "step": 19595500 + }, + { + "epoch": 97.09, + "learning_rate": 1.4746684304137376e-06, + "loss": 1.7676, + "step": 19596000 + }, + { + "epoch": 97.09, + "learning_rate": 1.473432321160506e-06, + "loss": 1.7624, + "step": 19596500 + }, + { + "epoch": 97.09, + "learning_rate": 1.4721937347344225e-06, + "loss": 1.7679, + "step": 19597000 + }, + { + "epoch": 97.09, + "learning_rate": 1.4709551483083387e-06, + "loss": 1.8029, + "step": 19597500 + }, + { + "epoch": 97.1, + "learning_rate": 1.4697190390551071e-06, + "loss": 1.7733, + "step": 19598000 + }, + { + "epoch": 97.1, + "learning_rate": 1.4684804526290236e-06, + "loss": 1.7623, + "step": 19598500 + }, + { + "epoch": 97.1, + "learning_rate": 1.46724186620294e-06, + "loss": 1.7845, + "step": 19599000 + }, + { + "epoch": 97.1, + "learning_rate": 1.4660032797768564e-06, + "loss": 1.768, + "step": 19599500 + }, + { + "epoch": 97.11, + "learning_rate": 1.4647646933507727e-06, + "loss": 1.762, + "step": 19600000 + }, + { + "epoch": 97.11, + "learning_rate": 1.463526106924689e-06, + "loss": 1.7689, + "step": 19600500 + }, + { + "epoch": 97.11, + "learning_rate": 1.4622899976714576e-06, + "loss": 1.7591, + "step": 19601000 + }, + { + "epoch": 97.11, + "learning_rate": 1.461051411245374e-06, + "loss": 1.7873, + "step": 19601500 + }, + { + "epoch": 97.12, + "learning_rate": 1.4598153019921425e-06, + "loss": 1.7869, + "step": 19602000 + }, + { + "epoch": 97.12, + "learning_rate": 1.4585767155660588e-06, + "loss": 1.7642, + "step": 19602500 + }, + { + "epoch": 97.12, + "learning_rate": 1.4573381291399753e-06, + "loss": 1.7792, + "step": 19603000 + }, + { + "epoch": 97.12, + "learning_rate": 1.4560995427138916e-06, + "loss": 1.7682, + "step": 19603500 + }, + { + "epoch": 97.12, + "learning_rate": 1.4548609562878079e-06, + "loss": 1.8008, + "step": 19604000 + }, + { + "epoch": 97.13, + "learning_rate": 1.4536223698617242e-06, + "loss": 1.755, + "step": 19604500 + }, + { + "epoch": 97.13, + "learning_rate": 1.4523837834356407e-06, + "loss": 1.7603, + "step": 19605000 + }, + { + "epoch": 97.13, + "learning_rate": 1.451145197009557e-06, + "loss": 1.7713, + "step": 19605500 + }, + { + "epoch": 97.13, + "learning_rate": 1.4499090877563254e-06, + "loss": 1.7513, + "step": 19606000 + }, + { + "epoch": 97.14, + "learning_rate": 1.4486705013302419e-06, + "loss": 1.7599, + "step": 19606500 + }, + { + "epoch": 97.14, + "learning_rate": 1.4474319149041584e-06, + "loss": 1.7784, + "step": 19607000 + }, + { + "epoch": 97.14, + "learning_rate": 1.4461958056509268e-06, + "loss": 1.7667, + "step": 19607500 + }, + { + "epoch": 97.14, + "learning_rate": 1.4449572192248433e-06, + "loss": 1.7528, + "step": 19608000 + }, + { + "epoch": 97.15, + "learning_rate": 1.4437186327987596e-06, + "loss": 1.7417, + "step": 19608500 + }, + { + "epoch": 97.15, + "learning_rate": 1.442482523545528e-06, + "loss": 1.7629, + "step": 19609000 + }, + { + "epoch": 97.15, + "learning_rate": 1.4412439371194442e-06, + "loss": 1.7657, + "step": 19609500 + }, + { + "epoch": 97.15, + "learning_rate": 1.4400053506933607e-06, + "loss": 1.7576, + "step": 19610000 + }, + { + "epoch": 97.16, + "learning_rate": 1.438766764267277e-06, + "loss": 1.7852, + "step": 19610500 + }, + { + "epoch": 97.16, + "learning_rate": 1.4375281778411935e-06, + "loss": 1.7767, + "step": 19611000 + }, + { + "epoch": 97.16, + "learning_rate": 1.4362895914151098e-06, + "loss": 1.7851, + "step": 19611500 + }, + { + "epoch": 97.16, + "learning_rate": 1.4350510049890261e-06, + "loss": 1.7677, + "step": 19612000 + }, + { + "epoch": 97.17, + "learning_rate": 1.4338124185629424e-06, + "loss": 1.7673, + "step": 19612500 + }, + { + "epoch": 97.17, + "learning_rate": 1.432573832136859e-06, + "loss": 1.7776, + "step": 19613000 + }, + { + "epoch": 97.17, + "learning_rate": 1.4313352457107752e-06, + "loss": 1.7592, + "step": 19613500 + }, + { + "epoch": 97.17, + "learning_rate": 1.4300991364575438e-06, + "loss": 1.765, + "step": 19614000 + }, + { + "epoch": 97.18, + "learning_rate": 1.4288605500314603e-06, + "loss": 1.7632, + "step": 19614500 + }, + { + "epoch": 97.18, + "learning_rate": 1.4276219636053766e-06, + "loss": 1.7646, + "step": 19615000 + }, + { + "epoch": 97.18, + "learning_rate": 1.426383377179293e-06, + "loss": 1.7751, + "step": 19615500 + }, + { + "epoch": 97.18, + "learning_rate": 1.4251447907532092e-06, + "loss": 1.7708, + "step": 19616000 + }, + { + "epoch": 97.19, + "learning_rate": 1.4239062043271255e-06, + "loss": 1.7755, + "step": 19616500 + }, + { + "epoch": 97.19, + "learning_rate": 1.4226676179010418e-06, + "loss": 1.7566, + "step": 19617000 + }, + { + "epoch": 97.19, + "learning_rate": 1.4214290314749583e-06, + "loss": 1.7606, + "step": 19617500 + }, + { + "epoch": 97.19, + "learning_rate": 1.4201904450488748e-06, + "loss": 1.7396, + "step": 19618000 + }, + { + "epoch": 97.2, + "learning_rate": 1.418951858622791e-06, + "loss": 1.7663, + "step": 19618500 + }, + { + "epoch": 97.2, + "learning_rate": 1.4177132721967074e-06, + "loss": 1.7751, + "step": 19619000 + }, + { + "epoch": 97.2, + "learning_rate": 1.416477162943476e-06, + "loss": 1.7858, + "step": 19619500 + }, + { + "epoch": 97.2, + "learning_rate": 1.4152385765173923e-06, + "loss": 1.7663, + "step": 19620000 + }, + { + "epoch": 97.21, + "learning_rate": 1.4139999900913086e-06, + "loss": 1.7891, + "step": 19620500 + }, + { + "epoch": 97.21, + "learning_rate": 1.412761403665225e-06, + "loss": 1.7552, + "step": 19621000 + }, + { + "epoch": 97.21, + "learning_rate": 1.4115228172391414e-06, + "loss": 1.7719, + "step": 19621500 + }, + { + "epoch": 97.21, + "learning_rate": 1.4102842308130579e-06, + "loss": 1.7584, + "step": 19622000 + }, + { + "epoch": 97.22, + "learning_rate": 1.4090456443869742e-06, + "loss": 1.7691, + "step": 19622500 + }, + { + "epoch": 97.22, + "learning_rate": 1.4078070579608905e-06, + "loss": 1.7712, + "step": 19623000 + }, + { + "epoch": 97.22, + "learning_rate": 1.406570948707659e-06, + "loss": 1.775, + "step": 19623500 + }, + { + "epoch": 97.22, + "learning_rate": 1.4053323622815753e-06, + "loss": 1.7804, + "step": 19624000 + }, + { + "epoch": 97.23, + "learning_rate": 1.4040962530283437e-06, + "loss": 1.7761, + "step": 19624500 + }, + { + "epoch": 97.23, + "learning_rate": 1.4028576666022602e-06, + "loss": 1.7656, + "step": 19625000 + }, + { + "epoch": 97.23, + "learning_rate": 1.4016190801761767e-06, + "loss": 1.7517, + "step": 19625500 + }, + { + "epoch": 97.23, + "learning_rate": 1.400380493750093e-06, + "loss": 1.7781, + "step": 19626000 + }, + { + "epoch": 97.24, + "learning_rate": 1.3991419073240093e-06, + "loss": 1.7756, + "step": 19626500 + }, + { + "epoch": 97.24, + "learning_rate": 1.3979033208979256e-06, + "loss": 1.7576, + "step": 19627000 + }, + { + "epoch": 97.24, + "learning_rate": 1.396664734471842e-06, + "loss": 1.7815, + "step": 19627500 + }, + { + "epoch": 97.24, + "learning_rate": 1.3954261480457584e-06, + "loss": 1.7735, + "step": 19628000 + }, + { + "epoch": 97.25, + "learning_rate": 1.394190038792527e-06, + "loss": 1.7687, + "step": 19628500 + }, + { + "epoch": 97.25, + "learning_rate": 1.3929514523664433e-06, + "loss": 1.7684, + "step": 19629000 + }, + { + "epoch": 97.25, + "learning_rate": 1.391715343113212e-06, + "loss": 1.7791, + "step": 19629500 + }, + { + "epoch": 97.25, + "learning_rate": 1.3904767566871282e-06, + "loss": 1.7797, + "step": 19630000 + }, + { + "epoch": 97.26, + "learning_rate": 1.3892406474338968e-06, + "loss": 1.7826, + "step": 19630500 + }, + { + "epoch": 97.26, + "learning_rate": 1.388002061007813e-06, + "loss": 1.7884, + "step": 19631000 + }, + { + "epoch": 97.26, + "learning_rate": 1.3867634745817294e-06, + "loss": 1.7643, + "step": 19631500 + }, + { + "epoch": 97.26, + "learning_rate": 1.3855248881556457e-06, + "loss": 1.7788, + "step": 19632000 + }, + { + "epoch": 97.27, + "learning_rate": 1.3842863017295622e-06, + "loss": 1.7674, + "step": 19632500 + }, + { + "epoch": 97.27, + "learning_rate": 1.3830477153034787e-06, + "loss": 1.7901, + "step": 19633000 + }, + { + "epoch": 97.27, + "learning_rate": 1.381809128877395e-06, + "loss": 1.778, + "step": 19633500 + }, + { + "epoch": 97.27, + "learning_rate": 1.3805705424513113e-06, + "loss": 1.7617, + "step": 19634000 + }, + { + "epoch": 97.28, + "learning_rate": 1.3793319560252276e-06, + "loss": 1.7737, + "step": 19634500 + }, + { + "epoch": 97.28, + "learning_rate": 1.3780933695991439e-06, + "loss": 1.7886, + "step": 19635000 + }, + { + "epoch": 97.28, + "learning_rate": 1.3768547831730601e-06, + "loss": 1.7649, + "step": 19635500 + }, + { + "epoch": 97.28, + "learning_rate": 1.3756161967469769e-06, + "loss": 1.7973, + "step": 19636000 + }, + { + "epoch": 97.29, + "learning_rate": 1.3743776103208932e-06, + "loss": 1.7982, + "step": 19636500 + }, + { + "epoch": 97.29, + "learning_rate": 1.3731415010676615e-06, + "loss": 1.7675, + "step": 19637000 + }, + { + "epoch": 97.29, + "learning_rate": 1.3719053918144301e-06, + "loss": 1.7869, + "step": 19637500 + }, + { + "epoch": 97.29, + "learning_rate": 1.3706692825611987e-06, + "loss": 1.782, + "step": 19638000 + }, + { + "epoch": 97.3, + "learning_rate": 1.369430696135115e-06, + "loss": 1.7823, + "step": 19638500 + }, + { + "epoch": 97.3, + "learning_rate": 1.3681921097090313e-06, + "loss": 1.7464, + "step": 19639000 + }, + { + "epoch": 97.3, + "learning_rate": 1.3669535232829476e-06, + "loss": 1.7815, + "step": 19639500 + }, + { + "epoch": 97.3, + "learning_rate": 1.3657149368568641e-06, + "loss": 1.7678, + "step": 19640000 + }, + { + "epoch": 97.31, + "learning_rate": 1.3644763504307804e-06, + "loss": 1.7916, + "step": 19640500 + }, + { + "epoch": 97.31, + "learning_rate": 1.363240241177549e-06, + "loss": 1.7716, + "step": 19641000 + }, + { + "epoch": 97.31, + "learning_rate": 1.3620016547514653e-06, + "loss": 1.7713, + "step": 19641500 + }, + { + "epoch": 97.31, + "learning_rate": 1.3607630683253816e-06, + "loss": 1.7461, + "step": 19642000 + }, + { + "epoch": 97.32, + "learning_rate": 1.359524481899298e-06, + "loss": 1.7682, + "step": 19642500 + }, + { + "epoch": 97.32, + "learning_rate": 1.3582858954732144e-06, + "loss": 1.7682, + "step": 19643000 + }, + { + "epoch": 97.32, + "learning_rate": 1.3570497862199828e-06, + "loss": 1.8012, + "step": 19643500 + }, + { + "epoch": 97.32, + "learning_rate": 1.3558111997938993e-06, + "loss": 1.7251, + "step": 19644000 + }, + { + "epoch": 97.33, + "learning_rate": 1.3545726133678158e-06, + "loss": 1.7824, + "step": 19644500 + }, + { + "epoch": 97.33, + "learning_rate": 1.353334026941732e-06, + "loss": 1.7649, + "step": 19645000 + }, + { + "epoch": 97.33, + "learning_rate": 1.3520954405156484e-06, + "loss": 1.7681, + "step": 19645500 + }, + { + "epoch": 97.33, + "learning_rate": 1.3508568540895647e-06, + "loss": 1.7781, + "step": 19646000 + }, + { + "epoch": 97.34, + "learning_rate": 1.349618267663481e-06, + "loss": 1.7733, + "step": 19646500 + }, + { + "epoch": 97.34, + "learning_rate": 1.3483796812373975e-06, + "loss": 1.7433, + "step": 19647000 + }, + { + "epoch": 97.34, + "learning_rate": 1.347141094811314e-06, + "loss": 1.7692, + "step": 19647500 + }, + { + "epoch": 97.34, + "learning_rate": 1.3459025083852303e-06, + "loss": 1.7837, + "step": 19648000 + }, + { + "epoch": 97.35, + "learning_rate": 1.3446639219591466e-06, + "loss": 1.7578, + "step": 19648500 + }, + { + "epoch": 97.35, + "learning_rate": 1.3434253355330628e-06, + "loss": 1.761, + "step": 19649000 + }, + { + "epoch": 97.35, + "learning_rate": 1.3421867491069791e-06, + "loss": 1.7524, + "step": 19649500 + }, + { + "epoch": 97.35, + "learning_rate": 1.3409481626808956e-06, + "loss": 1.7609, + "step": 19650000 + }, + { + "epoch": 97.36, + "learning_rate": 1.339712053427664e-06, + "loss": 1.7622, + "step": 19650500 + }, + { + "epoch": 97.36, + "learning_rate": 1.3384734670015805e-06, + "loss": 1.7524, + "step": 19651000 + }, + { + "epoch": 97.36, + "learning_rate": 1.3372348805754968e-06, + "loss": 1.7559, + "step": 19651500 + }, + { + "epoch": 97.36, + "learning_rate": 1.3359987713222654e-06, + "loss": 1.7725, + "step": 19652000 + }, + { + "epoch": 97.37, + "learning_rate": 1.3347601848961817e-06, + "loss": 1.763, + "step": 19652500 + }, + { + "epoch": 97.37, + "learning_rate": 1.333521598470098e-06, + "loss": 1.771, + "step": 19653000 + }, + { + "epoch": 97.37, + "learning_rate": 1.3322830120440145e-06, + "loss": 1.771, + "step": 19653500 + }, + { + "epoch": 97.37, + "learning_rate": 1.3310444256179308e-06, + "loss": 1.7806, + "step": 19654000 + }, + { + "epoch": 97.38, + "learning_rate": 1.329805839191847e-06, + "loss": 1.7713, + "step": 19654500 + }, + { + "epoch": 97.38, + "learning_rate": 1.3285672527657636e-06, + "loss": 1.7857, + "step": 19655000 + }, + { + "epoch": 97.38, + "learning_rate": 1.32732866633968e-06, + "loss": 1.7563, + "step": 19655500 + }, + { + "epoch": 97.38, + "learning_rate": 1.3260925570864485e-06, + "loss": 1.7745, + "step": 19656000 + }, + { + "epoch": 97.39, + "learning_rate": 1.3248539706603648e-06, + "loss": 1.7793, + "step": 19656500 + }, + { + "epoch": 97.39, + "learning_rate": 1.323615384234281e-06, + "loss": 1.7389, + "step": 19657000 + }, + { + "epoch": 97.39, + "learning_rate": 1.3223792749810497e-06, + "loss": 1.7671, + "step": 19657500 + }, + { + "epoch": 97.39, + "learning_rate": 1.321140688554966e-06, + "loss": 1.768, + "step": 19658000 + }, + { + "epoch": 97.4, + "learning_rate": 1.3199045793017346e-06, + "loss": 1.7535, + "step": 19658500 + }, + { + "epoch": 97.4, + "learning_rate": 1.318665992875651e-06, + "loss": 1.7656, + "step": 19659000 + }, + { + "epoch": 97.4, + "learning_rate": 1.3174274064495674e-06, + "loss": 1.7822, + "step": 19659500 + }, + { + "epoch": 97.4, + "learning_rate": 1.316191297196336e-06, + "loss": 1.7471, + "step": 19660000 + }, + { + "epoch": 97.4, + "learning_rate": 1.3149527107702523e-06, + "loss": 1.7435, + "step": 19660500 + }, + { + "epoch": 97.41, + "learning_rate": 1.3137141243441686e-06, + "loss": 1.7685, + "step": 19661000 + }, + { + "epoch": 97.41, + "learning_rate": 1.3124755379180848e-06, + "loss": 1.7598, + "step": 19661500 + }, + { + "epoch": 97.41, + "learning_rate": 1.3112369514920011e-06, + "loss": 1.7538, + "step": 19662000 + }, + { + "epoch": 97.41, + "learning_rate": 1.3099983650659176e-06, + "loss": 1.7734, + "step": 19662500 + }, + { + "epoch": 97.42, + "learning_rate": 1.3087597786398341e-06, + "loss": 1.774, + "step": 19663000 + }, + { + "epoch": 97.42, + "learning_rate": 1.3075211922137504e-06, + "loss": 1.7541, + "step": 19663500 + }, + { + "epoch": 97.42, + "learning_rate": 1.3062826057876667e-06, + "loss": 1.7711, + "step": 19664000 + }, + { + "epoch": 97.42, + "learning_rate": 1.305044019361583e-06, + "loss": 1.7749, + "step": 19664500 + }, + { + "epoch": 97.43, + "learning_rate": 1.3038054329354993e-06, + "loss": 1.792, + "step": 19665000 + }, + { + "epoch": 97.43, + "learning_rate": 1.3025668465094158e-06, + "loss": 1.7684, + "step": 19665500 + }, + { + "epoch": 97.43, + "learning_rate": 1.3013282600833323e-06, + "loss": 1.7717, + "step": 19666000 + }, + { + "epoch": 97.43, + "learning_rate": 1.3000896736572486e-06, + "loss": 1.7568, + "step": 19666500 + }, + { + "epoch": 97.44, + "learning_rate": 1.298853564404017e-06, + "loss": 1.7658, + "step": 19667000 + }, + { + "epoch": 97.44, + "learning_rate": 1.2976174551507856e-06, + "loss": 1.7915, + "step": 19667500 + }, + { + "epoch": 97.44, + "learning_rate": 1.296378868724702e-06, + "loss": 1.7448, + "step": 19668000 + }, + { + "epoch": 97.44, + "learning_rate": 1.2951402822986182e-06, + "loss": 1.7846, + "step": 19668500 + }, + { + "epoch": 97.45, + "learning_rate": 1.2939016958725347e-06, + "loss": 1.7807, + "step": 19669000 + }, + { + "epoch": 97.45, + "learning_rate": 1.292663109446451e-06, + "loss": 1.753, + "step": 19669500 + }, + { + "epoch": 97.45, + "learning_rate": 1.2914245230203675e-06, + "loss": 1.7772, + "step": 19670000 + }, + { + "epoch": 97.45, + "learning_rate": 1.2901859365942838e-06, + "loss": 1.7613, + "step": 19670500 + }, + { + "epoch": 97.46, + "learning_rate": 1.2889473501682e-06, + "loss": 1.7823, + "step": 19671000 + }, + { + "epoch": 97.46, + "learning_rate": 1.2877087637421164e-06, + "loss": 1.7761, + "step": 19671500 + }, + { + "epoch": 97.46, + "learning_rate": 1.2864701773160329e-06, + "loss": 1.7496, + "step": 19672000 + }, + { + "epoch": 97.46, + "learning_rate": 1.2852315908899492e-06, + "loss": 1.7696, + "step": 19672500 + }, + { + "epoch": 97.47, + "learning_rate": 1.2839930044638655e-06, + "loss": 1.7918, + "step": 19673000 + }, + { + "epoch": 97.47, + "learning_rate": 1.282754418037782e-06, + "loss": 1.7457, + "step": 19673500 + }, + { + "epoch": 97.47, + "learning_rate": 1.2815158316116983e-06, + "loss": 1.7561, + "step": 19674000 + }, + { + "epoch": 97.47, + "learning_rate": 1.2802797223584669e-06, + "loss": 1.7666, + "step": 19674500 + }, + { + "epoch": 97.48, + "learning_rate": 1.2790411359323831e-06, + "loss": 1.7688, + "step": 19675000 + }, + { + "epoch": 97.48, + "learning_rate": 1.2778050266791517e-06, + "loss": 1.7644, + "step": 19675500 + }, + { + "epoch": 97.48, + "learning_rate": 1.276566440253068e-06, + "loss": 1.7311, + "step": 19676000 + }, + { + "epoch": 97.48, + "learning_rate": 1.2753278538269843e-06, + "loss": 1.7753, + "step": 19676500 + }, + { + "epoch": 97.49, + "learning_rate": 1.2740892674009008e-06, + "loss": 1.7833, + "step": 19677000 + }, + { + "epoch": 97.49, + "learning_rate": 1.2728506809748171e-06, + "loss": 1.7539, + "step": 19677500 + }, + { + "epoch": 97.49, + "learning_rate": 1.2716120945487334e-06, + "loss": 1.7617, + "step": 19678000 + }, + { + "epoch": 97.49, + "learning_rate": 1.27037350812265e-06, + "loss": 1.7662, + "step": 19678500 + }, + { + "epoch": 97.5, + "learning_rate": 1.2691349216965662e-06, + "loss": 1.7592, + "step": 19679000 + }, + { + "epoch": 97.5, + "learning_rate": 1.2678963352704825e-06, + "loss": 1.759, + "step": 19679500 + }, + { + "epoch": 97.5, + "learning_rate": 1.2666602260172511e-06, + "loss": 1.7664, + "step": 19680000 + }, + { + "epoch": 97.5, + "learning_rate": 1.2654241167640195e-06, + "loss": 1.7703, + "step": 19680500 + }, + { + "epoch": 97.51, + "learning_rate": 1.264188007510788e-06, + "loss": 1.7597, + "step": 19681000 + }, + { + "epoch": 97.51, + "learning_rate": 1.2629494210847046e-06, + "loss": 1.7917, + "step": 19681500 + }, + { + "epoch": 97.51, + "learning_rate": 1.2617108346586209e-06, + "loss": 1.754, + "step": 19682000 + }, + { + "epoch": 97.51, + "learning_rate": 1.2604722482325372e-06, + "loss": 1.7945, + "step": 19682500 + }, + { + "epoch": 97.52, + "learning_rate": 1.2592336618064537e-06, + "loss": 1.7634, + "step": 19683000 + }, + { + "epoch": 97.52, + "learning_rate": 1.25799507538037e-06, + "loss": 1.7538, + "step": 19683500 + }, + { + "epoch": 97.52, + "learning_rate": 1.2567564889542863e-06, + "loss": 1.7468, + "step": 19684000 + }, + { + "epoch": 97.52, + "learning_rate": 1.2555179025282026e-06, + "loss": 1.7567, + "step": 19684500 + }, + { + "epoch": 97.53, + "learning_rate": 1.254279316102119e-06, + "loss": 1.7433, + "step": 19685000 + }, + { + "epoch": 97.53, + "learning_rate": 1.2530407296760354e-06, + "loss": 1.7718, + "step": 19685500 + }, + { + "epoch": 97.53, + "learning_rate": 1.2518021432499519e-06, + "loss": 1.7742, + "step": 19686000 + }, + { + "epoch": 97.53, + "learning_rate": 1.2505635568238682e-06, + "loss": 1.7599, + "step": 19686500 + }, + { + "epoch": 97.54, + "learning_rate": 1.2493274475706365e-06, + "loss": 1.7744, + "step": 19687000 + }, + { + "epoch": 97.54, + "learning_rate": 1.2480913383174051e-06, + "loss": 1.7805, + "step": 19687500 + }, + { + "epoch": 97.54, + "learning_rate": 1.2468527518913214e-06, + "loss": 1.7652, + "step": 19688000 + }, + { + "epoch": 97.54, + "learning_rate": 1.245614165465238e-06, + "loss": 1.7571, + "step": 19688500 + }, + { + "epoch": 97.55, + "learning_rate": 1.2443755790391542e-06, + "loss": 1.7606, + "step": 19689000 + }, + { + "epoch": 97.55, + "learning_rate": 1.2431369926130707e-06, + "loss": 1.767, + "step": 19689500 + }, + { + "epoch": 97.55, + "learning_rate": 1.241898406186987e-06, + "loss": 1.8022, + "step": 19690000 + }, + { + "epoch": 97.55, + "learning_rate": 1.2406622969337554e-06, + "loss": 1.7593, + "step": 19690500 + }, + { + "epoch": 97.56, + "learning_rate": 1.239423710507672e-06, + "loss": 1.7545, + "step": 19691000 + }, + { + "epoch": 97.56, + "learning_rate": 1.2381851240815882e-06, + "loss": 1.7776, + "step": 19691500 + }, + { + "epoch": 97.56, + "learning_rate": 1.2369465376555045e-06, + "loss": 1.7478, + "step": 19692000 + }, + { + "epoch": 97.56, + "learning_rate": 1.2357104284022731e-06, + "loss": 1.7958, + "step": 19692500 + }, + { + "epoch": 97.57, + "learning_rate": 1.2344718419761896e-06, + "loss": 1.7311, + "step": 19693000 + }, + { + "epoch": 97.57, + "learning_rate": 1.233233255550106e-06, + "loss": 1.7466, + "step": 19693500 + }, + { + "epoch": 97.57, + "learning_rate": 1.2319946691240222e-06, + "loss": 1.7474, + "step": 19694000 + }, + { + "epoch": 97.57, + "learning_rate": 1.2307560826979385e-06, + "loss": 1.752, + "step": 19694500 + }, + { + "epoch": 97.58, + "learning_rate": 1.2295174962718548e-06, + "loss": 1.7535, + "step": 19695000 + }, + { + "epoch": 97.58, + "learning_rate": 1.2282789098457713e-06, + "loss": 1.7494, + "step": 19695500 + }, + { + "epoch": 97.58, + "learning_rate": 1.2270403234196878e-06, + "loss": 1.7415, + "step": 19696000 + }, + { + "epoch": 97.58, + "learning_rate": 1.225801736993604e-06, + "loss": 1.7925, + "step": 19696500 + }, + { + "epoch": 97.59, + "learning_rate": 1.2245631505675204e-06, + "loss": 1.7631, + "step": 19697000 + }, + { + "epoch": 97.59, + "learning_rate": 1.2233245641414367e-06, + "loss": 1.7752, + "step": 19697500 + }, + { + "epoch": 97.59, + "learning_rate": 1.2220884548882053e-06, + "loss": 1.7872, + "step": 19698000 + }, + { + "epoch": 97.59, + "learning_rate": 1.2208498684621216e-06, + "loss": 1.7462, + "step": 19698500 + }, + { + "epoch": 97.6, + "learning_rate": 1.2196112820360379e-06, + "loss": 1.7791, + "step": 19699000 + }, + { + "epoch": 97.6, + "learning_rate": 1.2183726956099544e-06, + "loss": 1.7638, + "step": 19699500 + }, + { + "epoch": 97.6, + "learning_rate": 1.2171341091838707e-06, + "loss": 1.746, + "step": 19700000 + }, + { + "epoch": 97.6, + "learning_rate": 1.2158955227577872e-06, + "loss": 1.762, + "step": 19700500 + }, + { + "epoch": 97.61, + "learning_rate": 1.2146594135045555e-06, + "loss": 1.7906, + "step": 19701000 + }, + { + "epoch": 97.61, + "learning_rate": 1.2134208270784718e-06, + "loss": 1.769, + "step": 19701500 + }, + { + "epoch": 97.61, + "learning_rate": 1.2121822406523883e-06, + "loss": 1.7781, + "step": 19702000 + }, + { + "epoch": 97.61, + "learning_rate": 1.2109436542263046e-06, + "loss": 1.7548, + "step": 19702500 + }, + { + "epoch": 97.62, + "learning_rate": 1.209707544973073e-06, + "loss": 1.7679, + "step": 19703000 + }, + { + "epoch": 97.62, + "learning_rate": 1.2084689585469895e-06, + "loss": 1.7793, + "step": 19703500 + }, + { + "epoch": 97.62, + "learning_rate": 1.207230372120906e-06, + "loss": 1.7655, + "step": 19704000 + }, + { + "epoch": 97.62, + "learning_rate": 1.2059917856948223e-06, + "loss": 1.7691, + "step": 19704500 + }, + { + "epoch": 97.63, + "learning_rate": 1.2047531992687386e-06, + "loss": 1.7806, + "step": 19705000 + }, + { + "epoch": 97.63, + "learning_rate": 1.203514612842655e-06, + "loss": 1.7869, + "step": 19705500 + }, + { + "epoch": 97.63, + "learning_rate": 1.2022760264165712e-06, + "loss": 1.7528, + "step": 19706000 + }, + { + "epoch": 97.63, + "learning_rate": 1.2010399171633398e-06, + "loss": 1.7799, + "step": 19706500 + }, + { + "epoch": 97.64, + "learning_rate": 1.1998013307372563e-06, + "loss": 1.7762, + "step": 19707000 + }, + { + "epoch": 97.64, + "learning_rate": 1.1985627443111726e-06, + "loss": 1.789, + "step": 19707500 + }, + { + "epoch": 97.64, + "learning_rate": 1.197324157885089e-06, + "loss": 1.7784, + "step": 19708000 + }, + { + "epoch": 97.64, + "learning_rate": 1.1960855714590054e-06, + "loss": 1.7741, + "step": 19708500 + }, + { + "epoch": 97.65, + "learning_rate": 1.1948494622057738e-06, + "loss": 1.7642, + "step": 19709000 + }, + { + "epoch": 97.65, + "learning_rate": 1.19361087577969e-06, + "loss": 1.7523, + "step": 19709500 + }, + { + "epoch": 97.65, + "learning_rate": 1.1923722893536066e-06, + "loss": 1.7572, + "step": 19710000 + }, + { + "epoch": 97.65, + "learning_rate": 1.1911337029275229e-06, + "loss": 1.7739, + "step": 19710500 + }, + { + "epoch": 97.66, + "learning_rate": 1.1898951165014394e-06, + "loss": 1.7775, + "step": 19711000 + }, + { + "epoch": 97.66, + "learning_rate": 1.1886565300753557e-06, + "loss": 1.747, + "step": 19711500 + }, + { + "epoch": 97.66, + "learning_rate": 1.187417943649272e-06, + "loss": 1.7653, + "step": 19712000 + }, + { + "epoch": 97.66, + "learning_rate": 1.1861793572231883e-06, + "loss": 1.7619, + "step": 19712500 + }, + { + "epoch": 97.67, + "learning_rate": 1.1849407707971048e-06, + "loss": 1.7837, + "step": 19713000 + }, + { + "epoch": 97.67, + "learning_rate": 1.1837046615438731e-06, + "loss": 1.7601, + "step": 19713500 + }, + { + "epoch": 97.67, + "learning_rate": 1.1824660751177896e-06, + "loss": 1.7663, + "step": 19714000 + }, + { + "epoch": 97.67, + "learning_rate": 1.1812274886917062e-06, + "loss": 1.7622, + "step": 19714500 + }, + { + "epoch": 97.67, + "learning_rate": 1.1799889022656224e-06, + "loss": 1.7498, + "step": 19715000 + }, + { + "epoch": 97.68, + "learning_rate": 1.1787503158395387e-06, + "loss": 1.7819, + "step": 19715500 + }, + { + "epoch": 97.68, + "learning_rate": 1.1775142065863073e-06, + "loss": 1.7446, + "step": 19716000 + }, + { + "epoch": 97.68, + "learning_rate": 1.1762756201602236e-06, + "loss": 1.7676, + "step": 19716500 + }, + { + "epoch": 97.68, + "learning_rate": 1.175039510906992e-06, + "loss": 1.7614, + "step": 19717000 + }, + { + "epoch": 97.69, + "learning_rate": 1.1738009244809085e-06, + "loss": 1.784, + "step": 19717500 + }, + { + "epoch": 97.69, + "learning_rate": 1.1725623380548248e-06, + "loss": 1.7791, + "step": 19718000 + }, + { + "epoch": 97.69, + "learning_rate": 1.1713237516287413e-06, + "loss": 1.7531, + "step": 19718500 + }, + { + "epoch": 97.69, + "learning_rate": 1.1700851652026576e-06, + "loss": 1.7695, + "step": 19719000 + }, + { + "epoch": 97.7, + "learning_rate": 1.1688490559494262e-06, + "loss": 1.7614, + "step": 19719500 + }, + { + "epoch": 97.7, + "learning_rate": 1.1676129466961946e-06, + "loss": 1.7822, + "step": 19720000 + }, + { + "epoch": 97.7, + "learning_rate": 1.1663743602701109e-06, + "loss": 1.7676, + "step": 19720500 + }, + { + "epoch": 97.7, + "learning_rate": 1.1651357738440274e-06, + "loss": 1.7657, + "step": 19721000 + }, + { + "epoch": 97.71, + "learning_rate": 1.1638971874179437e-06, + "loss": 1.7669, + "step": 19721500 + }, + { + "epoch": 97.71, + "learning_rate": 1.16265860099186e-06, + "loss": 1.7787, + "step": 19722000 + }, + { + "epoch": 97.71, + "learning_rate": 1.1614200145657765e-06, + "loss": 1.7682, + "step": 19722500 + }, + { + "epoch": 97.71, + "learning_rate": 1.160183905312545e-06, + "loss": 1.7712, + "step": 19723000 + }, + { + "epoch": 97.72, + "learning_rate": 1.1589453188864614e-06, + "loss": 1.7657, + "step": 19723500 + }, + { + "epoch": 97.72, + "learning_rate": 1.15770920963323e-06, + "loss": 1.7565, + "step": 19724000 + }, + { + "epoch": 97.72, + "learning_rate": 1.1564706232071463e-06, + "loss": 1.7937, + "step": 19724500 + }, + { + "epoch": 97.72, + "learning_rate": 1.1552320367810626e-06, + "loss": 1.7822, + "step": 19725000 + }, + { + "epoch": 97.73, + "learning_rate": 1.1539934503549788e-06, + "loss": 1.7577, + "step": 19725500 + }, + { + "epoch": 97.73, + "learning_rate": 1.1527548639288954e-06, + "loss": 1.7604, + "step": 19726000 + }, + { + "epoch": 97.73, + "learning_rate": 1.1515162775028116e-06, + "loss": 1.7542, + "step": 19726500 + }, + { + "epoch": 97.73, + "learning_rate": 1.1502776910767282e-06, + "loss": 1.7883, + "step": 19727000 + }, + { + "epoch": 97.74, + "learning_rate": 1.1490391046506444e-06, + "loss": 1.7631, + "step": 19727500 + }, + { + "epoch": 97.74, + "learning_rate": 1.1478005182245607e-06, + "loss": 1.7772, + "step": 19728000 + }, + { + "epoch": 97.74, + "learning_rate": 1.146561931798477e-06, + "loss": 1.7695, + "step": 19728500 + }, + { + "epoch": 97.74, + "learning_rate": 1.1453233453723933e-06, + "loss": 1.7594, + "step": 19729000 + }, + { + "epoch": 97.75, + "learning_rate": 1.1440847589463098e-06, + "loss": 1.7847, + "step": 19729500 + }, + { + "epoch": 97.75, + "learning_rate": 1.1428461725202263e-06, + "loss": 1.7682, + "step": 19730000 + }, + { + "epoch": 97.75, + "learning_rate": 1.1416075860941426e-06, + "loss": 1.7534, + "step": 19730500 + }, + { + "epoch": 97.75, + "learning_rate": 1.140371476840911e-06, + "loss": 1.755, + "step": 19731000 + }, + { + "epoch": 97.76, + "learning_rate": 1.1391328904148275e-06, + "loss": 1.7571, + "step": 19731500 + }, + { + "epoch": 97.76, + "learning_rate": 1.1378943039887438e-06, + "loss": 1.7834, + "step": 19732000 + }, + { + "epoch": 97.76, + "learning_rate": 1.1366581947355122e-06, + "loss": 1.7811, + "step": 19732500 + }, + { + "epoch": 97.76, + "learning_rate": 1.1354196083094285e-06, + "loss": 1.7719, + "step": 19733000 + }, + { + "epoch": 97.77, + "learning_rate": 1.134181021883345e-06, + "loss": 1.7946, + "step": 19733500 + }, + { + "epoch": 97.77, + "learning_rate": 1.1329424354572615e-06, + "loss": 1.7884, + "step": 19734000 + }, + { + "epoch": 97.77, + "learning_rate": 1.1317038490311778e-06, + "loss": 1.7604, + "step": 19734500 + }, + { + "epoch": 97.77, + "learning_rate": 1.130465262605094e-06, + "loss": 1.7796, + "step": 19735000 + }, + { + "epoch": 97.78, + "learning_rate": 1.1292266761790104e-06, + "loss": 1.7702, + "step": 19735500 + }, + { + "epoch": 97.78, + "learning_rate": 1.127990566925779e-06, + "loss": 1.7932, + "step": 19736000 + }, + { + "epoch": 97.78, + "learning_rate": 1.1267519804996953e-06, + "loss": 1.7591, + "step": 19736500 + }, + { + "epoch": 97.78, + "learning_rate": 1.1255133940736118e-06, + "loss": 1.7742, + "step": 19737000 + }, + { + "epoch": 97.79, + "learning_rate": 1.124274807647528e-06, + "loss": 1.7724, + "step": 19737500 + }, + { + "epoch": 97.79, + "learning_rate": 1.1230362212214446e-06, + "loss": 1.7509, + "step": 19738000 + }, + { + "epoch": 97.79, + "learning_rate": 1.1217976347953609e-06, + "loss": 1.7613, + "step": 19738500 + }, + { + "epoch": 97.79, + "learning_rate": 1.1205590483692772e-06, + "loss": 1.7828, + "step": 19739000 + }, + { + "epoch": 97.8, + "learning_rate": 1.1193204619431934e-06, + "loss": 1.7732, + "step": 19739500 + }, + { + "epoch": 97.8, + "learning_rate": 1.1180818755171097e-06, + "loss": 1.7737, + "step": 19740000 + }, + { + "epoch": 97.8, + "learning_rate": 1.1168432890910262e-06, + "loss": 1.786, + "step": 19740500 + }, + { + "epoch": 97.8, + "learning_rate": 1.1156071798377948e-06, + "loss": 1.7374, + "step": 19741000 + }, + { + "epoch": 97.81, + "learning_rate": 1.1143685934117111e-06, + "loss": 1.788, + "step": 19741500 + }, + { + "epoch": 97.81, + "learning_rate": 1.1131324841584797e-06, + "loss": 1.7598, + "step": 19742000 + }, + { + "epoch": 97.81, + "learning_rate": 1.111893897732396e-06, + "loss": 1.7746, + "step": 19742500 + }, + { + "epoch": 97.81, + "learning_rate": 1.1106553113063123e-06, + "loss": 1.7589, + "step": 19743000 + }, + { + "epoch": 97.82, + "learning_rate": 1.1094167248802286e-06, + "loss": 1.7645, + "step": 19743500 + }, + { + "epoch": 97.82, + "learning_rate": 1.1081781384541451e-06, + "loss": 1.7569, + "step": 19744000 + }, + { + "epoch": 97.82, + "learning_rate": 1.1069395520280616e-06, + "loss": 1.7848, + "step": 19744500 + }, + { + "epoch": 97.82, + "learning_rate": 1.105700965601978e-06, + "loss": 1.7475, + "step": 19745000 + }, + { + "epoch": 97.83, + "learning_rate": 1.1044623791758942e-06, + "loss": 1.7384, + "step": 19745500 + }, + { + "epoch": 97.83, + "learning_rate": 1.1032237927498105e-06, + "loss": 1.767, + "step": 19746000 + }, + { + "epoch": 97.83, + "learning_rate": 1.1019852063237268e-06, + "loss": 1.7835, + "step": 19746500 + }, + { + "epoch": 97.83, + "learning_rate": 1.1007515742433475e-06, + "loss": 1.7716, + "step": 19747000 + }, + { + "epoch": 97.84, + "learning_rate": 1.099512987817264e-06, + "loss": 1.7766, + "step": 19747500 + }, + { + "epoch": 97.84, + "learning_rate": 1.0982744013911803e-06, + "loss": 1.761, + "step": 19748000 + }, + { + "epoch": 97.84, + "learning_rate": 1.0970358149650968e-06, + "loss": 1.7613, + "step": 19748500 + }, + { + "epoch": 97.84, + "learning_rate": 1.095797228539013e-06, + "loss": 1.7795, + "step": 19749000 + }, + { + "epoch": 97.85, + "learning_rate": 1.0945586421129294e-06, + "loss": 1.7898, + "step": 19749500 + }, + { + "epoch": 97.85, + "learning_rate": 1.0933200556868457e-06, + "loss": 1.7451, + "step": 19750000 + }, + { + "epoch": 97.85, + "learning_rate": 1.0920814692607622e-06, + "loss": 1.773, + "step": 19750500 + }, + { + "epoch": 97.85, + "learning_rate": 1.0908428828346785e-06, + "loss": 1.7571, + "step": 19751000 + }, + { + "epoch": 97.86, + "learning_rate": 1.0896067735814468e-06, + "loss": 1.7707, + "step": 19751500 + }, + { + "epoch": 97.86, + "learning_rate": 1.0883681871553634e-06, + "loss": 1.7527, + "step": 19752000 + }, + { + "epoch": 97.86, + "learning_rate": 1.0871296007292799e-06, + "loss": 1.7704, + "step": 19752500 + }, + { + "epoch": 97.86, + "learning_rate": 1.0858934914760482e-06, + "loss": 1.763, + "step": 19753000 + }, + { + "epoch": 97.87, + "learning_rate": 1.0846549050499647e-06, + "loss": 1.7554, + "step": 19753500 + }, + { + "epoch": 97.87, + "learning_rate": 1.083416318623881e-06, + "loss": 1.7598, + "step": 19754000 + }, + { + "epoch": 97.87, + "learning_rate": 1.0821777321977973e-06, + "loss": 1.7554, + "step": 19754500 + }, + { + "epoch": 97.87, + "learning_rate": 1.0809391457717136e-06, + "loss": 1.7897, + "step": 19755000 + }, + { + "epoch": 97.88, + "learning_rate": 1.0797005593456301e-06, + "loss": 1.7763, + "step": 19755500 + }, + { + "epoch": 97.88, + "learning_rate": 1.0784619729195464e-06, + "loss": 1.7729, + "step": 19756000 + }, + { + "epoch": 97.88, + "learning_rate": 1.077223386493463e-06, + "loss": 1.7708, + "step": 19756500 + }, + { + "epoch": 97.88, + "learning_rate": 1.0759848000673792e-06, + "loss": 1.7835, + "step": 19757000 + }, + { + "epoch": 97.89, + "learning_rate": 1.0747486908141476e-06, + "loss": 1.7729, + "step": 19757500 + }, + { + "epoch": 97.89, + "learning_rate": 1.073510104388064e-06, + "loss": 1.7566, + "step": 19758000 + }, + { + "epoch": 97.89, + "learning_rate": 1.0722715179619804e-06, + "loss": 1.7864, + "step": 19758500 + }, + { + "epoch": 97.89, + "learning_rate": 1.0710329315358967e-06, + "loss": 1.7725, + "step": 19759000 + }, + { + "epoch": 97.9, + "learning_rate": 1.0697943451098132e-06, + "loss": 1.7539, + "step": 19759500 + }, + { + "epoch": 97.9, + "learning_rate": 1.0685557586837295e-06, + "loss": 1.7797, + "step": 19760000 + }, + { + "epoch": 97.9, + "learning_rate": 1.0673171722576458e-06, + "loss": 1.7668, + "step": 19760500 + }, + { + "epoch": 97.9, + "learning_rate": 1.0660810630044144e-06, + "loss": 1.7602, + "step": 19761000 + }, + { + "epoch": 97.91, + "learning_rate": 1.064844953751183e-06, + "loss": 1.7635, + "step": 19761500 + }, + { + "epoch": 97.91, + "learning_rate": 1.0636063673250993e-06, + "loss": 1.7799, + "step": 19762000 + }, + { + "epoch": 97.91, + "learning_rate": 1.0623677808990156e-06, + "loss": 1.7903, + "step": 19762500 + }, + { + "epoch": 97.91, + "learning_rate": 1.061129194472932e-06, + "loss": 1.7749, + "step": 19763000 + }, + { + "epoch": 97.92, + "learning_rate": 1.0598906080468484e-06, + "loss": 1.7641, + "step": 19763500 + }, + { + "epoch": 97.92, + "learning_rate": 1.0586520216207647e-06, + "loss": 1.761, + "step": 19764000 + }, + { + "epoch": 97.92, + "learning_rate": 1.0574134351946812e-06, + "loss": 1.7756, + "step": 19764500 + }, + { + "epoch": 97.92, + "learning_rate": 1.0561748487685975e-06, + "loss": 1.7639, + "step": 19765000 + }, + { + "epoch": 97.93, + "learning_rate": 1.0549362623425137e-06, + "loss": 1.7746, + "step": 19765500 + }, + { + "epoch": 97.93, + "learning_rate": 1.05369767591643e-06, + "loss": 1.7728, + "step": 19766000 + }, + { + "epoch": 97.93, + "learning_rate": 1.0524615666631986e-06, + "loss": 1.7662, + "step": 19766500 + }, + { + "epoch": 97.93, + "learning_rate": 1.0512229802371151e-06, + "loss": 1.7825, + "step": 19767000 + }, + { + "epoch": 97.94, + "learning_rate": 1.0499843938110314e-06, + "loss": 1.7757, + "step": 19767500 + }, + { + "epoch": 97.94, + "learning_rate": 1.0487458073849477e-06, + "loss": 1.7639, + "step": 19768000 + }, + { + "epoch": 97.94, + "learning_rate": 1.047507220958864e-06, + "loss": 1.748, + "step": 19768500 + }, + { + "epoch": 97.94, + "learning_rate": 1.0462686345327805e-06, + "loss": 1.7769, + "step": 19769000 + }, + { + "epoch": 97.94, + "learning_rate": 1.0450300481066968e-06, + "loss": 1.7762, + "step": 19769500 + }, + { + "epoch": 97.95, + "learning_rate": 1.0437914616806133e-06, + "loss": 1.7631, + "step": 19770000 + }, + { + "epoch": 97.95, + "learning_rate": 1.0425528752545296e-06, + "loss": 1.7642, + "step": 19770500 + }, + { + "epoch": 97.95, + "learning_rate": 1.041314288828446e-06, + "loss": 1.7826, + "step": 19771000 + }, + { + "epoch": 97.95, + "learning_rate": 1.0400781795752145e-06, + "loss": 1.7514, + "step": 19771500 + }, + { + "epoch": 97.96, + "learning_rate": 1.0388395931491308e-06, + "loss": 1.8017, + "step": 19772000 + }, + { + "epoch": 97.96, + "learning_rate": 1.0376034838958994e-06, + "loss": 1.7718, + "step": 19772500 + }, + { + "epoch": 97.96, + "learning_rate": 1.0363648974698157e-06, + "loss": 1.7455, + "step": 19773000 + }, + { + "epoch": 97.96, + "learning_rate": 1.035126311043732e-06, + "loss": 1.7603, + "step": 19773500 + }, + { + "epoch": 97.97, + "learning_rate": 1.0338877246176485e-06, + "loss": 1.7651, + "step": 19774000 + }, + { + "epoch": 97.97, + "learning_rate": 1.0326491381915648e-06, + "loss": 1.7796, + "step": 19774500 + }, + { + "epoch": 97.97, + "learning_rate": 1.031410551765481e-06, + "loss": 1.7474, + "step": 19775000 + }, + { + "epoch": 97.97, + "learning_rate": 1.0301719653393976e-06, + "loss": 1.7553, + "step": 19775500 + }, + { + "epoch": 97.98, + "learning_rate": 1.0289333789133139e-06, + "loss": 1.769, + "step": 19776000 + }, + { + "epoch": 97.98, + "learning_rate": 1.0276947924872302e-06, + "loss": 1.7556, + "step": 19776500 + }, + { + "epoch": 97.98, + "learning_rate": 1.0264562060611467e-06, + "loss": 1.7668, + "step": 19777000 + }, + { + "epoch": 97.98, + "learning_rate": 1.025217619635063e-06, + "loss": 1.7572, + "step": 19777500 + }, + { + "epoch": 97.99, + "learning_rate": 1.0239815103818316e-06, + "loss": 1.7613, + "step": 19778000 + }, + { + "epoch": 97.99, + "learning_rate": 1.0227429239557479e-06, + "loss": 1.7745, + "step": 19778500 + }, + { + "epoch": 97.99, + "learning_rate": 1.0215043375296641e-06, + "loss": 1.7424, + "step": 19779000 + }, + { + "epoch": 97.99, + "learning_rate": 1.0202682282764327e-06, + "loss": 1.782, + "step": 19779500 + }, + { + "epoch": 98.0, + "learning_rate": 1.019029641850349e-06, + "loss": 1.7797, + "step": 19780000 + }, + { + "epoch": 98.0, + "learning_rate": 1.0177910554242653e-06, + "loss": 1.7705, + "step": 19780500 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.6875432970204784, + "eval_accuracy_mlm": 0.6491573384378511, + "eval_accuracy_nsp": 0.8684455147690413, + "eval_loss": 2.312778949737549, + "eval_runtime": 147.1591, + "eval_samples_per_second": 1732.539, + "eval_steps_per_second": 72.194, + "step": 19780614 + }, + { + "epoch": 98.0, + "learning_rate": 1.0165524689981818e-06, + "loss": 1.7802, + "step": 19781000 + }, + { + "epoch": 98.0, + "learning_rate": 1.0153138825720983e-06, + "loss": 1.7516, + "step": 19781500 + }, + { + "epoch": 98.01, + "learning_rate": 1.0140752961460146e-06, + "loss": 1.7575, + "step": 19782000 + }, + { + "epoch": 98.01, + "learning_rate": 1.012836709719931e-06, + "loss": 1.7681, + "step": 19782500 + }, + { + "epoch": 98.01, + "learning_rate": 1.0116006004666993e-06, + "loss": 1.773, + "step": 19783000 + }, + { + "epoch": 98.01, + "learning_rate": 1.010364491213468e-06, + "loss": 1.7569, + "step": 19783500 + }, + { + "epoch": 98.02, + "learning_rate": 1.0091259047873842e-06, + "loss": 1.7638, + "step": 19784000 + }, + { + "epoch": 98.02, + "learning_rate": 1.0078873183613005e-06, + "loss": 1.7566, + "step": 19784500 + }, + { + "epoch": 98.02, + "learning_rate": 1.006648731935217e-06, + "loss": 1.7487, + "step": 19785000 + }, + { + "epoch": 98.02, + "learning_rate": 1.0054101455091335e-06, + "loss": 1.7702, + "step": 19785500 + }, + { + "epoch": 98.03, + "learning_rate": 1.0041715590830498e-06, + "loss": 1.7679, + "step": 19786000 + }, + { + "epoch": 98.03, + "learning_rate": 1.002932972656966e-06, + "loss": 1.7697, + "step": 19786500 + }, + { + "epoch": 98.03, + "learning_rate": 1.0016943862308824e-06, + "loss": 1.7545, + "step": 19787000 + }, + { + "epoch": 98.03, + "learning_rate": 1.0004557998047987e-06, + "loss": 1.7625, + "step": 19787500 + }, + { + "epoch": 98.04, + "learning_rate": 9.992196905515673e-07, + "loss": 1.7565, + "step": 19788000 + }, + { + "epoch": 98.04, + "learning_rate": 9.979811041254838e-07, + "loss": 1.7763, + "step": 19788500 + }, + { + "epoch": 98.04, + "learning_rate": 9.967425176994e-07, + "loss": 1.771, + "step": 19789000 + }, + { + "epoch": 98.04, + "learning_rate": 9.955039312733166e-07, + "loss": 1.7735, + "step": 19789500 + }, + { + "epoch": 98.05, + "learning_rate": 9.94267822020085e-07, + "loss": 1.7723, + "step": 19790000 + }, + { + "epoch": 98.05, + "learning_rate": 9.930292355940013e-07, + "loss": 1.7826, + "step": 19790500 + }, + { + "epoch": 98.05, + "learning_rate": 9.917906491679178e-07, + "loss": 1.7642, + "step": 19791000 + }, + { + "epoch": 98.05, + "learning_rate": 9.90552062741834e-07, + "loss": 1.765, + "step": 19791500 + }, + { + "epoch": 98.06, + "learning_rate": 9.893134763157503e-07, + "loss": 1.7722, + "step": 19792000 + }, + { + "epoch": 98.06, + "learning_rate": 9.880748898896668e-07, + "loss": 1.77, + "step": 19792500 + }, + { + "epoch": 98.06, + "learning_rate": 9.868363034635831e-07, + "loss": 1.7674, + "step": 19793000 + }, + { + "epoch": 98.06, + "learning_rate": 9.855977170374994e-07, + "loss": 1.757, + "step": 19793500 + }, + { + "epoch": 98.07, + "learning_rate": 9.84359130611416e-07, + "loss": 1.788, + "step": 19794000 + }, + { + "epoch": 98.07, + "learning_rate": 9.831205441853322e-07, + "loss": 1.7798, + "step": 19794500 + }, + { + "epoch": 98.07, + "learning_rate": 9.818819577592485e-07, + "loss": 1.7642, + "step": 19795000 + }, + { + "epoch": 98.07, + "learning_rate": 9.80643371333165e-07, + "loss": 1.7519, + "step": 19795500 + }, + { + "epoch": 98.08, + "learning_rate": 9.794047849070813e-07, + "loss": 1.7755, + "step": 19796000 + }, + { + "epoch": 98.08, + "learning_rate": 9.7816867565385e-07, + "loss": 1.7439, + "step": 19796500 + }, + { + "epoch": 98.08, + "learning_rate": 9.769300892277662e-07, + "loss": 1.7529, + "step": 19797000 + }, + { + "epoch": 98.08, + "learning_rate": 9.756939799745348e-07, + "loss": 1.7851, + "step": 19797500 + }, + { + "epoch": 98.09, + "learning_rate": 9.74455393548451e-07, + "loss": 1.7637, + "step": 19798000 + }, + { + "epoch": 98.09, + "learning_rate": 9.732168071223674e-07, + "loss": 1.7256, + "step": 19798500 + }, + { + "epoch": 98.09, + "learning_rate": 9.719782206962837e-07, + "loss": 1.739, + "step": 19799000 + }, + { + "epoch": 98.09, + "learning_rate": 9.707396342702002e-07, + "loss": 1.7437, + "step": 19799500 + }, + { + "epoch": 98.1, + "learning_rate": 9.695010478441165e-07, + "loss": 1.761, + "step": 19800000 + }, + { + "epoch": 98.1, + "learning_rate": 9.68262461418033e-07, + "loss": 1.7472, + "step": 19800500 + }, + { + "epoch": 98.1, + "learning_rate": 9.670238749919493e-07, + "loss": 1.7556, + "step": 19801000 + }, + { + "epoch": 98.1, + "learning_rate": 9.657877657387177e-07, + "loss": 1.7687, + "step": 19801500 + }, + { + "epoch": 98.11, + "learning_rate": 9.645491793126342e-07, + "loss": 1.7789, + "step": 19802000 + }, + { + "epoch": 98.11, + "learning_rate": 9.633105928865505e-07, + "loss": 1.7704, + "step": 19802500 + }, + { + "epoch": 98.11, + "learning_rate": 9.620720064604668e-07, + "loss": 1.7474, + "step": 19803000 + }, + { + "epoch": 98.11, + "learning_rate": 9.608334200343833e-07, + "loss": 1.7386, + "step": 19803500 + }, + { + "epoch": 98.12, + "learning_rate": 9.595948336082996e-07, + "loss": 1.7541, + "step": 19804000 + }, + { + "epoch": 98.12, + "learning_rate": 9.583562471822159e-07, + "loss": 1.7396, + "step": 19804500 + }, + { + "epoch": 98.12, + "learning_rate": 9.571201379289844e-07, + "loss": 1.7542, + "step": 19805000 + }, + { + "epoch": 98.12, + "learning_rate": 9.558815515029007e-07, + "loss": 1.7921, + "step": 19805500 + }, + { + "epoch": 98.13, + "learning_rate": 9.54642965076817e-07, + "loss": 1.7591, + "step": 19806000 + }, + { + "epoch": 98.13, + "learning_rate": 9.534043786507336e-07, + "loss": 1.7668, + "step": 19806500 + }, + { + "epoch": 98.13, + "learning_rate": 9.521657922246499e-07, + "loss": 1.7636, + "step": 19807000 + }, + { + "epoch": 98.13, + "learning_rate": 9.509272057985662e-07, + "loss": 1.7654, + "step": 19807500 + }, + { + "epoch": 98.14, + "learning_rate": 9.496886193724826e-07, + "loss": 1.7505, + "step": 19808000 + }, + { + "epoch": 98.14, + "learning_rate": 9.484500329463989e-07, + "loss": 1.7647, + "step": 19808500 + }, + { + "epoch": 98.14, + "learning_rate": 9.472139236931675e-07, + "loss": 1.7379, + "step": 19809000 + }, + { + "epoch": 98.14, + "learning_rate": 9.459753372670839e-07, + "loss": 1.7504, + "step": 19809500 + }, + { + "epoch": 98.15, + "learning_rate": 9.447367508410002e-07, + "loss": 1.7559, + "step": 19810000 + }, + { + "epoch": 98.15, + "learning_rate": 9.435006415877688e-07, + "loss": 1.7443, + "step": 19810500 + }, + { + "epoch": 98.15, + "learning_rate": 9.422620551616851e-07, + "loss": 1.7548, + "step": 19811000 + }, + { + "epoch": 98.15, + "learning_rate": 9.410234687356015e-07, + "loss": 1.7367, + "step": 19811500 + }, + { + "epoch": 98.16, + "learning_rate": 9.397848823095178e-07, + "loss": 1.7747, + "step": 19812000 + }, + { + "epoch": 98.16, + "learning_rate": 9.385462958834341e-07, + "loss": 1.7316, + "step": 19812500 + }, + { + "epoch": 98.16, + "learning_rate": 9.373077094573506e-07, + "loss": 1.7559, + "step": 19813000 + }, + { + "epoch": 98.16, + "learning_rate": 9.36069123031267e-07, + "loss": 1.7578, + "step": 19813500 + }, + { + "epoch": 98.17, + "learning_rate": 9.348305366051833e-07, + "loss": 1.7442, + "step": 19814000 + }, + { + "epoch": 98.17, + "learning_rate": 9.335919501790996e-07, + "loss": 1.7707, + "step": 19814500 + }, + { + "epoch": 98.17, + "learning_rate": 9.32353363753016e-07, + "loss": 1.758, + "step": 19815000 + }, + { + "epoch": 98.17, + "learning_rate": 9.311147773269323e-07, + "loss": 1.7589, + "step": 19815500 + }, + { + "epoch": 98.18, + "learning_rate": 9.298761909008488e-07, + "loss": 1.7761, + "step": 19816000 + }, + { + "epoch": 98.18, + "learning_rate": 9.286400816476172e-07, + "loss": 1.7828, + "step": 19816500 + }, + { + "epoch": 98.18, + "learning_rate": 9.274014952215336e-07, + "loss": 1.7531, + "step": 19817000 + }, + { + "epoch": 98.18, + "learning_rate": 9.261629087954501e-07, + "loss": 1.7362, + "step": 19817500 + }, + { + "epoch": 98.19, + "learning_rate": 9.249243223693664e-07, + "loss": 1.7804, + "step": 19818000 + }, + { + "epoch": 98.19, + "learning_rate": 9.236857359432828e-07, + "loss": 1.7474, + "step": 19818500 + }, + { + "epoch": 98.19, + "learning_rate": 9.22447149517199e-07, + "loss": 1.7407, + "step": 19819000 + }, + { + "epoch": 98.19, + "learning_rate": 9.212085630911153e-07, + "loss": 1.7506, + "step": 19819500 + }, + { + "epoch": 98.2, + "learning_rate": 9.199699766650317e-07, + "loss": 1.7572, + "step": 19820000 + }, + { + "epoch": 98.2, + "learning_rate": 9.187313902389482e-07, + "loss": 1.7541, + "step": 19820500 + }, + { + "epoch": 98.2, + "learning_rate": 9.174928038128645e-07, + "loss": 1.7984, + "step": 19821000 + }, + { + "epoch": 98.2, + "learning_rate": 9.162542173867808e-07, + "loss": 1.7555, + "step": 19821500 + }, + { + "epoch": 98.21, + "learning_rate": 9.150156309606972e-07, + "loss": 1.7673, + "step": 19822000 + }, + { + "epoch": 98.21, + "learning_rate": 9.137795217074658e-07, + "loss": 1.7601, + "step": 19822500 + }, + { + "epoch": 98.21, + "learning_rate": 9.125434124542342e-07, + "loss": 1.754, + "step": 19823000 + }, + { + "epoch": 98.21, + "learning_rate": 9.113048260281507e-07, + "loss": 1.7612, + "step": 19823500 + }, + { + "epoch": 98.21, + "learning_rate": 9.100711939477714e-07, + "loss": 1.7789, + "step": 19824000 + }, + { + "epoch": 98.22, + "learning_rate": 9.088326075216877e-07, + "loss": 1.7769, + "step": 19824500 + }, + { + "epoch": 98.22, + "learning_rate": 9.075940210956041e-07, + "loss": 1.7737, + "step": 19825000 + }, + { + "epoch": 98.22, + "learning_rate": 9.063554346695204e-07, + "loss": 1.7564, + "step": 19825500 + }, + { + "epoch": 98.22, + "learning_rate": 9.051168482434367e-07, + "loss": 1.751, + "step": 19826000 + }, + { + "epoch": 98.23, + "learning_rate": 9.038782618173531e-07, + "loss": 1.7562, + "step": 19826500 + }, + { + "epoch": 98.23, + "learning_rate": 9.026396753912696e-07, + "loss": 1.7619, + "step": 19827000 + }, + { + "epoch": 98.23, + "learning_rate": 9.014010889651859e-07, + "loss": 1.7527, + "step": 19827500 + }, + { + "epoch": 98.23, + "learning_rate": 9.001625025391023e-07, + "loss": 1.7428, + "step": 19828000 + }, + { + "epoch": 98.24, + "learning_rate": 8.989239161130186e-07, + "loss": 1.7683, + "step": 19828500 + }, + { + "epoch": 98.24, + "learning_rate": 8.976853296869349e-07, + "loss": 1.7528, + "step": 19829000 + }, + { + "epoch": 98.24, + "learning_rate": 8.964467432608513e-07, + "loss": 1.7577, + "step": 19829500 + }, + { + "epoch": 98.24, + "learning_rate": 8.952081568347678e-07, + "loss": 1.7656, + "step": 19830000 + }, + { + "epoch": 98.25, + "learning_rate": 8.939695704086841e-07, + "loss": 1.7471, + "step": 19830500 + }, + { + "epoch": 98.25, + "learning_rate": 8.927309839826004e-07, + "loss": 1.7794, + "step": 19831000 + }, + { + "epoch": 98.25, + "learning_rate": 8.91494874729369e-07, + "loss": 1.7637, + "step": 19831500 + }, + { + "epoch": 98.25, + "learning_rate": 8.902587654761374e-07, + "loss": 1.7511, + "step": 19832000 + }, + { + "epoch": 98.26, + "learning_rate": 8.890201790500537e-07, + "loss": 1.7753, + "step": 19832500 + }, + { + "epoch": 98.26, + "learning_rate": 8.877840697968223e-07, + "loss": 1.7634, + "step": 19833000 + }, + { + "epoch": 98.26, + "learning_rate": 8.865454833707386e-07, + "loss": 1.783, + "step": 19833500 + }, + { + "epoch": 98.26, + "learning_rate": 8.85306896944655e-07, + "loss": 1.7684, + "step": 19834000 + }, + { + "epoch": 98.27, + "learning_rate": 8.840683105185713e-07, + "loss": 1.7591, + "step": 19834500 + }, + { + "epoch": 98.27, + "learning_rate": 8.828297240924878e-07, + "loss": 1.764, + "step": 19835000 + }, + { + "epoch": 98.27, + "learning_rate": 8.815911376664041e-07, + "loss": 1.7483, + "step": 19835500 + }, + { + "epoch": 98.27, + "learning_rate": 8.803525512403205e-07, + "loss": 1.7387, + "step": 19836000 + }, + { + "epoch": 98.28, + "learning_rate": 8.791139648142368e-07, + "loss": 1.7695, + "step": 19836500 + }, + { + "epoch": 98.28, + "learning_rate": 8.778753783881532e-07, + "loss": 1.7549, + "step": 19837000 + }, + { + "epoch": 98.28, + "learning_rate": 8.766367919620695e-07, + "loss": 1.7789, + "step": 19837500 + }, + { + "epoch": 98.28, + "learning_rate": 8.754006827088381e-07, + "loss": 1.7495, + "step": 19838000 + }, + { + "epoch": 98.29, + "learning_rate": 8.741620962827544e-07, + "loss": 1.7448, + "step": 19838500 + }, + { + "epoch": 98.29, + "learning_rate": 8.729235098566708e-07, + "loss": 1.7539, + "step": 19839000 + }, + { + "epoch": 98.29, + "learning_rate": 8.716849234305873e-07, + "loss": 1.7722, + "step": 19839500 + }, + { + "epoch": 98.29, + "learning_rate": 8.704463370045036e-07, + "loss": 1.75, + "step": 19840000 + }, + { + "epoch": 98.3, + "learning_rate": 8.692077505784199e-07, + "loss": 1.746, + "step": 19840500 + }, + { + "epoch": 98.3, + "learning_rate": 8.679691641523363e-07, + "loss": 1.7555, + "step": 19841000 + }, + { + "epoch": 98.3, + "learning_rate": 8.667305777262526e-07, + "loss": 1.772, + "step": 19841500 + }, + { + "epoch": 98.3, + "learning_rate": 8.654919913001689e-07, + "loss": 1.767, + "step": 19842000 + }, + { + "epoch": 98.31, + "learning_rate": 8.642534048740854e-07, + "loss": 1.7683, + "step": 19842500 + }, + { + "epoch": 98.31, + "learning_rate": 8.630148184480018e-07, + "loss": 1.727, + "step": 19843000 + }, + { + "epoch": 98.31, + "learning_rate": 8.617762320219181e-07, + "loss": 1.7596, + "step": 19843500 + }, + { + "epoch": 98.31, + "learning_rate": 8.605376455958345e-07, + "loss": 1.7415, + "step": 19844000 + }, + { + "epoch": 98.32, + "learning_rate": 8.59301536342603e-07, + "loss": 1.7687, + "step": 19844500 + }, + { + "epoch": 98.32, + "learning_rate": 8.580654270893714e-07, + "loss": 1.778, + "step": 19845000 + }, + { + "epoch": 98.32, + "learning_rate": 8.5682931783614e-07, + "loss": 1.7664, + "step": 19845500 + }, + { + "epoch": 98.32, + "learning_rate": 8.555907314100563e-07, + "loss": 1.7437, + "step": 19846000 + }, + { + "epoch": 98.33, + "learning_rate": 8.543521449839726e-07, + "loss": 1.7531, + "step": 19846500 + }, + { + "epoch": 98.33, + "learning_rate": 8.531135585578891e-07, + "loss": 1.7711, + "step": 19847000 + }, + { + "epoch": 98.33, + "learning_rate": 8.518749721318055e-07, + "loss": 1.7628, + "step": 19847500 + }, + { + "epoch": 98.33, + "learning_rate": 8.506363857057218e-07, + "loss": 1.7677, + "step": 19848000 + }, + { + "epoch": 98.34, + "learning_rate": 8.493977992796382e-07, + "loss": 1.7678, + "step": 19848500 + }, + { + "epoch": 98.34, + "learning_rate": 8.481616900264068e-07, + "loss": 1.7668, + "step": 19849000 + }, + { + "epoch": 98.34, + "learning_rate": 8.469231036003231e-07, + "loss": 1.7717, + "step": 19849500 + }, + { + "epoch": 98.34, + "learning_rate": 8.456869943470915e-07, + "loss": 1.7698, + "step": 19850000 + }, + { + "epoch": 98.35, + "learning_rate": 8.44448407921008e-07, + "loss": 1.7801, + "step": 19850500 + }, + { + "epoch": 98.35, + "learning_rate": 8.432098214949244e-07, + "loss": 1.7777, + "step": 19851000 + }, + { + "epoch": 98.35, + "learning_rate": 8.419712350688407e-07, + "loss": 1.766, + "step": 19851500 + }, + { + "epoch": 98.35, + "learning_rate": 8.40732648642757e-07, + "loss": 1.7746, + "step": 19852000 + }, + { + "epoch": 98.36, + "learning_rate": 8.394940622166734e-07, + "loss": 1.7569, + "step": 19852500 + }, + { + "epoch": 98.36, + "learning_rate": 8.382554757905897e-07, + "loss": 1.7748, + "step": 19853000 + }, + { + "epoch": 98.36, + "learning_rate": 8.370168893645062e-07, + "loss": 1.7529, + "step": 19853500 + }, + { + "epoch": 98.36, + "learning_rate": 8.357783029384225e-07, + "loss": 1.7736, + "step": 19854000 + }, + { + "epoch": 98.37, + "learning_rate": 8.345397165123389e-07, + "loss": 1.7537, + "step": 19854500 + }, + { + "epoch": 98.37, + "learning_rate": 8.333011300862552e-07, + "loss": 1.7423, + "step": 19855000 + }, + { + "epoch": 98.37, + "learning_rate": 8.320625436601716e-07, + "loss": 1.7377, + "step": 19855500 + }, + { + "epoch": 98.37, + "learning_rate": 8.308239572340879e-07, + "loss": 1.758, + "step": 19856000 + }, + { + "epoch": 98.38, + "learning_rate": 8.295878479808565e-07, + "loss": 1.7618, + "step": 19856500 + }, + { + "epoch": 98.38, + "learning_rate": 8.283492615547727e-07, + "loss": 1.772, + "step": 19857000 + }, + { + "epoch": 98.38, + "learning_rate": 8.271106751286891e-07, + "loss": 1.7681, + "step": 19857500 + }, + { + "epoch": 98.38, + "learning_rate": 8.258720887026057e-07, + "loss": 1.743, + "step": 19858000 + }, + { + "epoch": 98.39, + "learning_rate": 8.24635979449374e-07, + "loss": 1.7709, + "step": 19858500 + }, + { + "epoch": 98.39, + "learning_rate": 8.233998701961426e-07, + "loss": 1.7771, + "step": 19859000 + }, + { + "epoch": 98.39, + "learning_rate": 8.221612837700589e-07, + "loss": 1.7787, + "step": 19859500 + }, + { + "epoch": 98.39, + "learning_rate": 8.209226973439753e-07, + "loss": 1.7447, + "step": 19860000 + }, + { + "epoch": 98.4, + "learning_rate": 8.196841109178916e-07, + "loss": 1.7661, + "step": 19860500 + }, + { + "epoch": 98.4, + "learning_rate": 8.184455244918079e-07, + "loss": 1.7668, + "step": 19861000 + }, + { + "epoch": 98.4, + "learning_rate": 8.172069380657244e-07, + "loss": 1.7754, + "step": 19861500 + }, + { + "epoch": 98.4, + "learning_rate": 8.159683516396408e-07, + "loss": 1.7673, + "step": 19862000 + }, + { + "epoch": 98.41, + "learning_rate": 8.147322423864092e-07, + "loss": 1.7533, + "step": 19862500 + }, + { + "epoch": 98.41, + "learning_rate": 8.134936559603257e-07, + "loss": 1.7664, + "step": 19863000 + }, + { + "epoch": 98.41, + "learning_rate": 8.12255069534242e-07, + "loss": 1.7806, + "step": 19863500 + }, + { + "epoch": 98.41, + "learning_rate": 8.110164831081584e-07, + "loss": 1.7439, + "step": 19864000 + }, + { + "epoch": 98.42, + "learning_rate": 8.097778966820747e-07, + "loss": 1.7396, + "step": 19864500 + }, + { + "epoch": 98.42, + "learning_rate": 8.08539310255991e-07, + "loss": 1.7486, + "step": 19865000 + }, + { + "epoch": 98.42, + "learning_rate": 8.073007238299074e-07, + "loss": 1.7563, + "step": 19865500 + }, + { + "epoch": 98.42, + "learning_rate": 8.060621374038239e-07, + "loss": 1.7816, + "step": 19866000 + }, + { + "epoch": 98.43, + "learning_rate": 8.048235509777402e-07, + "loss": 1.7581, + "step": 19866500 + }, + { + "epoch": 98.43, + "learning_rate": 8.035849645516566e-07, + "loss": 1.7542, + "step": 19867000 + }, + { + "epoch": 98.43, + "learning_rate": 8.023463781255729e-07, + "loss": 1.7493, + "step": 19867500 + }, + { + "epoch": 98.43, + "learning_rate": 8.011077916994892e-07, + "loss": 1.7595, + "step": 19868000 + }, + { + "epoch": 98.44, + "learning_rate": 7.998692052734056e-07, + "loss": 1.7722, + "step": 19868500 + }, + { + "epoch": 98.44, + "learning_rate": 7.986330960201742e-07, + "loss": 1.7789, + "step": 19869000 + }, + { + "epoch": 98.44, + "learning_rate": 7.973945095940905e-07, + "loss": 1.7632, + "step": 19869500 + }, + { + "epoch": 98.44, + "learning_rate": 7.96158400340859e-07, + "loss": 1.7494, + "step": 19870000 + }, + { + "epoch": 98.45, + "learning_rate": 7.949198139147753e-07, + "loss": 1.7418, + "step": 19870500 + }, + { + "epoch": 98.45, + "learning_rate": 7.936812274886917e-07, + "loss": 1.7656, + "step": 19871000 + }, + { + "epoch": 98.45, + "learning_rate": 7.92442641062608e-07, + "loss": 1.749, + "step": 19871500 + }, + { + "epoch": 98.45, + "learning_rate": 7.912040546365245e-07, + "loss": 1.7481, + "step": 19872000 + }, + { + "epoch": 98.46, + "learning_rate": 7.899679453832929e-07, + "loss": 1.7642, + "step": 19872500 + }, + { + "epoch": 98.46, + "learning_rate": 7.887293589572093e-07, + "loss": 1.7562, + "step": 19873000 + }, + { + "epoch": 98.46, + "learning_rate": 7.874907725311256e-07, + "loss": 1.731, + "step": 19873500 + }, + { + "epoch": 98.46, + "learning_rate": 7.862521861050421e-07, + "loss": 1.7488, + "step": 19874000 + }, + { + "epoch": 98.47, + "learning_rate": 7.850135996789585e-07, + "loss": 1.753, + "step": 19874500 + }, + { + "epoch": 98.47, + "learning_rate": 7.837750132528748e-07, + "loss": 1.7585, + "step": 19875000 + }, + { + "epoch": 98.47, + "learning_rate": 7.825364268267911e-07, + "loss": 1.7577, + "step": 19875500 + }, + { + "epoch": 98.47, + "learning_rate": 7.812978404007075e-07, + "loss": 1.7818, + "step": 19876000 + }, + { + "epoch": 98.48, + "learning_rate": 7.800617311474761e-07, + "loss": 1.7708, + "step": 19876500 + }, + { + "epoch": 98.48, + "learning_rate": 7.788231447213924e-07, + "loss": 1.7725, + "step": 19877000 + }, + { + "epoch": 98.48, + "learning_rate": 7.775845582953088e-07, + "loss": 1.7589, + "step": 19877500 + }, + { + "epoch": 98.48, + "learning_rate": 7.763459718692251e-07, + "loss": 1.783, + "step": 19878000 + }, + { + "epoch": 98.48, + "learning_rate": 7.751073854431415e-07, + "loss": 1.7505, + "step": 19878500 + }, + { + "epoch": 98.49, + "learning_rate": 7.7387127618991e-07, + "loss": 1.7474, + "step": 19879000 + }, + { + "epoch": 98.49, + "learning_rate": 7.726351669366786e-07, + "loss": 1.7412, + "step": 19879500 + }, + { + "epoch": 98.49, + "learning_rate": 7.713990576834471e-07, + "loss": 1.7456, + "step": 19880000 + }, + { + "epoch": 98.49, + "learning_rate": 7.701604712573634e-07, + "loss": 1.745, + "step": 19880500 + }, + { + "epoch": 98.5, + "learning_rate": 7.689218848312799e-07, + "loss": 1.765, + "step": 19881000 + }, + { + "epoch": 98.5, + "learning_rate": 7.676832984051962e-07, + "loss": 1.7472, + "step": 19881500 + }, + { + "epoch": 98.5, + "learning_rate": 7.664447119791125e-07, + "loss": 1.794, + "step": 19882000 + }, + { + "epoch": 98.5, + "learning_rate": 7.65206125553029e-07, + "loss": 1.7685, + "step": 19882500 + }, + { + "epoch": 98.51, + "learning_rate": 7.639675391269452e-07, + "loss": 1.7655, + "step": 19883000 + }, + { + "epoch": 98.51, + "learning_rate": 7.627289527008615e-07, + "loss": 1.7721, + "step": 19883500 + }, + { + "epoch": 98.51, + "learning_rate": 7.614903662747779e-07, + "loss": 1.7591, + "step": 19884000 + }, + { + "epoch": 98.51, + "learning_rate": 7.602517798486943e-07, + "loss": 1.7528, + "step": 19884500 + }, + { + "epoch": 98.52, + "learning_rate": 7.590131934226106e-07, + "loss": 1.7471, + "step": 19885000 + }, + { + "epoch": 98.52, + "learning_rate": 7.57774606996527e-07, + "loss": 1.7757, + "step": 19885500 + }, + { + "epoch": 98.52, + "learning_rate": 7.565360205704434e-07, + "loss": 1.7342, + "step": 19886000 + }, + { + "epoch": 98.52, + "learning_rate": 7.552974341443597e-07, + "loss": 1.7749, + "step": 19886500 + }, + { + "epoch": 98.53, + "learning_rate": 7.540613248911283e-07, + "loss": 1.7655, + "step": 19887000 + }, + { + "epoch": 98.53, + "learning_rate": 7.528252156378968e-07, + "loss": 1.7489, + "step": 19887500 + }, + { + "epoch": 98.53, + "learning_rate": 7.515866292118132e-07, + "loss": 1.7582, + "step": 19888000 + }, + { + "epoch": 98.53, + "learning_rate": 7.503480427857295e-07, + "loss": 1.7692, + "step": 19888500 + }, + { + "epoch": 98.54, + "learning_rate": 7.491094563596459e-07, + "loss": 1.769, + "step": 19889000 + }, + { + "epoch": 98.54, + "learning_rate": 7.478708699335622e-07, + "loss": 1.7609, + "step": 19889500 + }, + { + "epoch": 98.54, + "learning_rate": 7.466347606803308e-07, + "loss": 1.7562, + "step": 19890000 + }, + { + "epoch": 98.54, + "learning_rate": 7.453961742542472e-07, + "loss": 1.7585, + "step": 19890500 + }, + { + "epoch": 98.55, + "learning_rate": 7.441600650010157e-07, + "loss": 1.7718, + "step": 19891000 + }, + { + "epoch": 98.55, + "learning_rate": 7.42921478574932e-07, + "loss": 1.7594, + "step": 19891500 + }, + { + "epoch": 98.55, + "learning_rate": 7.416828921488484e-07, + "loss": 1.7583, + "step": 19892000 + }, + { + "epoch": 98.55, + "learning_rate": 7.404443057227648e-07, + "loss": 1.7754, + "step": 19892500 + }, + { + "epoch": 98.56, + "learning_rate": 7.392081964695333e-07, + "loss": 1.7598, + "step": 19893000 + }, + { + "epoch": 98.56, + "learning_rate": 7.379720872163018e-07, + "loss": 1.7743, + "step": 19893500 + }, + { + "epoch": 98.56, + "learning_rate": 7.367335007902182e-07, + "loss": 1.7669, + "step": 19894000 + }, + { + "epoch": 98.56, + "learning_rate": 7.354949143641346e-07, + "loss": 1.7569, + "step": 19894500 + }, + { + "epoch": 98.57, + "learning_rate": 7.34256327938051e-07, + "loss": 1.7755, + "step": 19895000 + }, + { + "epoch": 98.57, + "learning_rate": 7.330202186848194e-07, + "loss": 1.7562, + "step": 19895500 + }, + { + "epoch": 98.57, + "learning_rate": 7.317816322587357e-07, + "loss": 1.7859, + "step": 19896000 + }, + { + "epoch": 98.57, + "learning_rate": 7.305430458326521e-07, + "loss": 1.7473, + "step": 19896500 + }, + { + "epoch": 98.58, + "learning_rate": 7.293044594065685e-07, + "loss": 1.7524, + "step": 19897000 + }, + { + "epoch": 98.58, + "learning_rate": 7.280658729804848e-07, + "loss": 1.7527, + "step": 19897500 + }, + { + "epoch": 98.58, + "learning_rate": 7.268272865544012e-07, + "loss": 1.7752, + "step": 19898000 + }, + { + "epoch": 98.58, + "learning_rate": 7.255887001283176e-07, + "loss": 1.763, + "step": 19898500 + }, + { + "epoch": 98.59, + "learning_rate": 7.243525908750861e-07, + "loss": 1.7595, + "step": 19899000 + }, + { + "epoch": 98.59, + "learning_rate": 7.231140044490024e-07, + "loss": 1.7701, + "step": 19899500 + }, + { + "epoch": 98.59, + "learning_rate": 7.218754180229189e-07, + "loss": 1.7829, + "step": 19900000 + }, + { + "epoch": 98.59, + "learning_rate": 7.206368315968352e-07, + "loss": 1.7343, + "step": 19900500 + }, + { + "epoch": 98.6, + "learning_rate": 7.193982451707515e-07, + "loss": 1.7697, + "step": 19901000 + }, + { + "epoch": 98.6, + "learning_rate": 7.181596587446679e-07, + "loss": 1.773, + "step": 19901500 + }, + { + "epoch": 98.6, + "learning_rate": 7.169210723185843e-07, + "loss": 1.7916, + "step": 19902000 + }, + { + "epoch": 98.6, + "learning_rate": 7.156824858925006e-07, + "loss": 1.7715, + "step": 19902500 + }, + { + "epoch": 98.61, + "learning_rate": 7.14443899466417e-07, + "loss": 1.759, + "step": 19903000 + }, + { + "epoch": 98.61, + "learning_rate": 7.132053130403334e-07, + "loss": 1.7443, + "step": 19903500 + }, + { + "epoch": 98.61, + "learning_rate": 7.119667266142497e-07, + "loss": 1.7597, + "step": 19904000 + }, + { + "epoch": 98.61, + "learning_rate": 7.107281401881661e-07, + "loss": 1.7537, + "step": 19904500 + }, + { + "epoch": 98.62, + "learning_rate": 7.094895537620825e-07, + "loss": 1.763, + "step": 19905000 + }, + { + "epoch": 98.62, + "learning_rate": 7.08253444508851e-07, + "loss": 1.7714, + "step": 19905500 + }, + { + "epoch": 98.62, + "learning_rate": 7.070148580827674e-07, + "loss": 1.7592, + "step": 19906000 + }, + { + "epoch": 98.62, + "learning_rate": 7.057762716566837e-07, + "loss": 1.756, + "step": 19906500 + }, + { + "epoch": 98.63, + "learning_rate": 7.045376852306001e-07, + "loss": 1.7714, + "step": 19907000 + }, + { + "epoch": 98.63, + "learning_rate": 7.032990988045165e-07, + "loss": 1.771, + "step": 19907500 + }, + { + "epoch": 98.63, + "learning_rate": 7.02062989551285e-07, + "loss": 1.7527, + "step": 19908000 + }, + { + "epoch": 98.63, + "learning_rate": 7.008268802980534e-07, + "loss": 1.764, + "step": 19908500 + }, + { + "epoch": 98.64, + "learning_rate": 6.995882938719698e-07, + "loss": 1.7534, + "step": 19909000 + }, + { + "epoch": 98.64, + "learning_rate": 6.983497074458862e-07, + "loss": 1.7711, + "step": 19909500 + }, + { + "epoch": 98.64, + "learning_rate": 6.971111210198025e-07, + "loss": 1.7525, + "step": 19910000 + }, + { + "epoch": 98.64, + "learning_rate": 6.958725345937188e-07, + "loss": 1.7637, + "step": 19910500 + }, + { + "epoch": 98.65, + "learning_rate": 6.946339481676353e-07, + "loss": 1.7835, + "step": 19911000 + }, + { + "epoch": 98.65, + "learning_rate": 6.933953617415516e-07, + "loss": 1.7604, + "step": 19911500 + }, + { + "epoch": 98.65, + "learning_rate": 6.921592524883201e-07, + "loss": 1.76, + "step": 19912000 + }, + { + "epoch": 98.65, + "learning_rate": 6.909206660622365e-07, + "loss": 1.7501, + "step": 19912500 + }, + { + "epoch": 98.66, + "learning_rate": 6.896820796361529e-07, + "loss": 1.7673, + "step": 19913000 + }, + { + "epoch": 98.66, + "learning_rate": 6.884434932100692e-07, + "loss": 1.7479, + "step": 19913500 + }, + { + "epoch": 98.66, + "learning_rate": 6.872049067839856e-07, + "loss": 1.756, + "step": 19914000 + }, + { + "epoch": 98.66, + "learning_rate": 6.85966320357902e-07, + "loss": 1.7843, + "step": 19914500 + }, + { + "epoch": 98.67, + "learning_rate": 6.847277339318183e-07, + "loss": 1.7636, + "step": 19915000 + }, + { + "epoch": 98.67, + "learning_rate": 6.834891475057347e-07, + "loss": 1.7613, + "step": 19915500 + }, + { + "epoch": 98.67, + "learning_rate": 6.822505610796511e-07, + "loss": 1.7562, + "step": 19916000 + }, + { + "epoch": 98.67, + "learning_rate": 6.810144518264196e-07, + "loss": 1.7684, + "step": 19916500 + }, + { + "epoch": 98.68, + "learning_rate": 6.79775865400336e-07, + "loss": 1.768, + "step": 19917000 + }, + { + "epoch": 98.68, + "learning_rate": 6.785372789742523e-07, + "loss": 1.7716, + "step": 19917500 + }, + { + "epoch": 98.68, + "learning_rate": 6.773011697210208e-07, + "loss": 1.7659, + "step": 19918000 + }, + { + "epoch": 98.68, + "learning_rate": 6.760625832949373e-07, + "loss": 1.767, + "step": 19918500 + }, + { + "epoch": 98.69, + "learning_rate": 6.748239968688536e-07, + "loss": 1.7554, + "step": 19919000 + }, + { + "epoch": 98.69, + "learning_rate": 6.735854104427699e-07, + "loss": 1.774, + "step": 19919500 + }, + { + "epoch": 98.69, + "learning_rate": 6.723468240166863e-07, + "loss": 1.7621, + "step": 19920000 + }, + { + "epoch": 98.69, + "learning_rate": 6.71113191936307e-07, + "loss": 1.7607, + "step": 19920500 + }, + { + "epoch": 98.7, + "learning_rate": 6.698746055102233e-07, + "loss": 1.7459, + "step": 19921000 + }, + { + "epoch": 98.7, + "learning_rate": 6.686360190841396e-07, + "loss": 1.777, + "step": 19921500 + }, + { + "epoch": 98.7, + "learning_rate": 6.67397432658056e-07, + "loss": 1.7674, + "step": 19922000 + }, + { + "epoch": 98.7, + "learning_rate": 6.661588462319724e-07, + "loss": 1.7706, + "step": 19922500 + }, + { + "epoch": 98.71, + "learning_rate": 6.649202598058887e-07, + "loss": 1.7572, + "step": 19923000 + }, + { + "epoch": 98.71, + "learning_rate": 6.636816733798051e-07, + "loss": 1.754, + "step": 19923500 + }, + { + "epoch": 98.71, + "learning_rate": 6.624430869537215e-07, + "loss": 1.7912, + "step": 19924000 + }, + { + "epoch": 98.71, + "learning_rate": 6.612045005276378e-07, + "loss": 1.7453, + "step": 19924500 + }, + { + "epoch": 98.72, + "learning_rate": 6.599659141015542e-07, + "loss": 1.7433, + "step": 19925000 + }, + { + "epoch": 98.72, + "learning_rate": 6.587273276754705e-07, + "loss": 1.7627, + "step": 19925500 + }, + { + "epoch": 98.72, + "learning_rate": 6.574912184222391e-07, + "loss": 1.7769, + "step": 19926000 + }, + { + "epoch": 98.72, + "learning_rate": 6.562526319961555e-07, + "loss": 1.7326, + "step": 19926500 + }, + { + "epoch": 98.73, + "learning_rate": 6.550140455700718e-07, + "loss": 1.7575, + "step": 19927000 + }, + { + "epoch": 98.73, + "learning_rate": 6.537754591439882e-07, + "loss": 1.7729, + "step": 19927500 + }, + { + "epoch": 98.73, + "learning_rate": 6.525368727179046e-07, + "loss": 1.7656, + "step": 19928000 + }, + { + "epoch": 98.73, + "learning_rate": 6.512982862918209e-07, + "loss": 1.7689, + "step": 19928500 + }, + { + "epoch": 98.74, + "learning_rate": 6.500596998657372e-07, + "loss": 1.7773, + "step": 19929000 + }, + { + "epoch": 98.74, + "learning_rate": 6.488211134396537e-07, + "loss": 1.7602, + "step": 19929500 + }, + { + "epoch": 98.74, + "learning_rate": 6.4758252701357e-07, + "loss": 1.7582, + "step": 19930000 + }, + { + "epoch": 98.74, + "learning_rate": 6.463439405874863e-07, + "loss": 1.7863, + "step": 19930500 + }, + { + "epoch": 98.75, + "learning_rate": 6.451053541614028e-07, + "loss": 1.7684, + "step": 19931000 + }, + { + "epoch": 98.75, + "learning_rate": 6.438667677353191e-07, + "loss": 1.7703, + "step": 19931500 + }, + { + "epoch": 98.75, + "learning_rate": 6.426331356549398e-07, + "loss": 1.7741, + "step": 19932000 + }, + { + "epoch": 98.75, + "learning_rate": 6.413945492288561e-07, + "loss": 1.7631, + "step": 19932500 + }, + { + "epoch": 98.75, + "learning_rate": 6.401559628027725e-07, + "loss": 1.7686, + "step": 19933000 + }, + { + "epoch": 98.76, + "learning_rate": 6.389198535495411e-07, + "loss": 1.7607, + "step": 19933500 + }, + { + "epoch": 98.76, + "learning_rate": 6.376837442963095e-07, + "loss": 1.7537, + "step": 19934000 + }, + { + "epoch": 98.76, + "learning_rate": 6.364451578702259e-07, + "loss": 1.7738, + "step": 19934500 + }, + { + "epoch": 98.76, + "learning_rate": 6.352065714441422e-07, + "loss": 1.7772, + "step": 19935000 + }, + { + "epoch": 98.77, + "learning_rate": 6.339679850180586e-07, + "loss": 1.7415, + "step": 19935500 + }, + { + "epoch": 98.77, + "learning_rate": 6.32729398591975e-07, + "loss": 1.7646, + "step": 19936000 + }, + { + "epoch": 98.77, + "learning_rate": 6.314908121658913e-07, + "loss": 1.7591, + "step": 19936500 + }, + { + "epoch": 98.77, + "learning_rate": 6.302522257398076e-07, + "loss": 1.7587, + "step": 19937000 + }, + { + "epoch": 98.78, + "learning_rate": 6.290161164865762e-07, + "loss": 1.7769, + "step": 19937500 + }, + { + "epoch": 98.78, + "learning_rate": 6.277775300604926e-07, + "loss": 1.7386, + "step": 19938000 + }, + { + "epoch": 98.78, + "learning_rate": 6.265389436344089e-07, + "loss": 1.7738, + "step": 19938500 + }, + { + "epoch": 98.78, + "learning_rate": 6.253003572083253e-07, + "loss": 1.7834, + "step": 19939000 + }, + { + "epoch": 98.79, + "learning_rate": 6.240642479550939e-07, + "loss": 1.7553, + "step": 19939500 + }, + { + "epoch": 98.79, + "learning_rate": 6.228256615290102e-07, + "loss": 1.7858, + "step": 19940000 + }, + { + "epoch": 98.79, + "learning_rate": 6.215870751029265e-07, + "loss": 1.7736, + "step": 19940500 + }, + { + "epoch": 98.79, + "learning_rate": 6.203484886768429e-07, + "loss": 1.7616, + "step": 19941000 + }, + { + "epoch": 98.8, + "learning_rate": 6.191099022507593e-07, + "loss": 1.7615, + "step": 19941500 + }, + { + "epoch": 98.8, + "learning_rate": 6.178713158246756e-07, + "loss": 1.7848, + "step": 19942000 + }, + { + "epoch": 98.8, + "learning_rate": 6.16632729398592e-07, + "loss": 1.7685, + "step": 19942500 + }, + { + "epoch": 98.8, + "learning_rate": 6.153941429725084e-07, + "loss": 1.7641, + "step": 19943000 + }, + { + "epoch": 98.81, + "learning_rate": 6.141555565464247e-07, + "loss": 1.7567, + "step": 19943500 + }, + { + "epoch": 98.81, + "learning_rate": 6.129169701203411e-07, + "loss": 1.7338, + "step": 19944000 + }, + { + "epoch": 98.81, + "learning_rate": 6.116783836942575e-07, + "loss": 1.7559, + "step": 19944500 + }, + { + "epoch": 98.81, + "learning_rate": 6.104397972681738e-07, + "loss": 1.7743, + "step": 19945000 + }, + { + "epoch": 98.82, + "learning_rate": 6.092012108420902e-07, + "loss": 1.7761, + "step": 19945500 + }, + { + "epoch": 98.82, + "learning_rate": 6.079651015888587e-07, + "loss": 1.767, + "step": 19946000 + }, + { + "epoch": 98.82, + "learning_rate": 6.067265151627751e-07, + "loss": 1.774, + "step": 19946500 + }, + { + "epoch": 98.82, + "learning_rate": 6.054879287366915e-07, + "loss": 1.7439, + "step": 19947000 + }, + { + "epoch": 98.83, + "learning_rate": 6.042493423106077e-07, + "loss": 1.7601, + "step": 19947500 + }, + { + "epoch": 98.83, + "learning_rate": 6.030132330573762e-07, + "loss": 1.7519, + "step": 19948000 + }, + { + "epoch": 98.83, + "learning_rate": 6.017746466312927e-07, + "loss": 1.7706, + "step": 19948500 + }, + { + "epoch": 98.83, + "learning_rate": 6.00536060205209e-07, + "loss": 1.7587, + "step": 19949000 + }, + { + "epoch": 98.84, + "learning_rate": 5.992974737791253e-07, + "loss": 1.7624, + "step": 19949500 + }, + { + "epoch": 98.84, + "learning_rate": 5.980613645258939e-07, + "loss": 1.7623, + "step": 19950000 + }, + { + "epoch": 98.84, + "learning_rate": 5.968227780998103e-07, + "loss": 1.7582, + "step": 19950500 + }, + { + "epoch": 98.84, + "learning_rate": 5.955841916737266e-07, + "loss": 1.78, + "step": 19951000 + }, + { + "epoch": 98.85, + "learning_rate": 5.94345605247643e-07, + "loss": 1.7852, + "step": 19951500 + }, + { + "epoch": 98.85, + "learning_rate": 5.931070188215594e-07, + "loss": 1.7913, + "step": 19952000 + }, + { + "epoch": 98.85, + "learning_rate": 5.918684323954757e-07, + "loss": 1.747, + "step": 19952500 + }, + { + "epoch": 98.85, + "learning_rate": 5.906298459693921e-07, + "loss": 1.7544, + "step": 19953000 + }, + { + "epoch": 98.86, + "learning_rate": 5.893912595433084e-07, + "loss": 1.762, + "step": 19953500 + }, + { + "epoch": 98.86, + "learning_rate": 5.881526731172248e-07, + "loss": 1.7411, + "step": 19954000 + }, + { + "epoch": 98.86, + "learning_rate": 5.869165638639934e-07, + "loss": 1.7551, + "step": 19954500 + }, + { + "epoch": 98.86, + "learning_rate": 5.856779774379097e-07, + "loss": 1.7655, + "step": 19955000 + }, + { + "epoch": 98.87, + "learning_rate": 5.844393910118261e-07, + "loss": 1.7744, + "step": 19955500 + }, + { + "epoch": 98.87, + "learning_rate": 5.832008045857425e-07, + "loss": 1.7486, + "step": 19956000 + }, + { + "epoch": 98.87, + "learning_rate": 5.819622181596588e-07, + "loss": 1.773, + "step": 19956500 + }, + { + "epoch": 98.87, + "learning_rate": 5.807236317335751e-07, + "loss": 1.7752, + "step": 19957000 + }, + { + "epoch": 98.88, + "learning_rate": 5.794850453074916e-07, + "loss": 1.7395, + "step": 19957500 + }, + { + "epoch": 98.88, + "learning_rate": 5.782464588814079e-07, + "loss": 1.7629, + "step": 19958000 + }, + { + "epoch": 98.88, + "learning_rate": 5.770078724553242e-07, + "loss": 1.7464, + "step": 19958500 + }, + { + "epoch": 98.88, + "learning_rate": 5.757717632020927e-07, + "loss": 1.7574, + "step": 19959000 + }, + { + "epoch": 98.89, + "learning_rate": 5.745331767760092e-07, + "loss": 1.741, + "step": 19959500 + }, + { + "epoch": 98.89, + "learning_rate": 5.732945903499255e-07, + "loss": 1.781, + "step": 19960000 + }, + { + "epoch": 98.89, + "learning_rate": 5.720560039238417e-07, + "loss": 1.7535, + "step": 19960500 + }, + { + "epoch": 98.89, + "learning_rate": 5.708174174977582e-07, + "loss": 1.7668, + "step": 19961000 + }, + { + "epoch": 98.9, + "learning_rate": 5.695788310716745e-07, + "loss": 1.7651, + "step": 19961500 + }, + { + "epoch": 98.9, + "learning_rate": 5.683402446455908e-07, + "loss": 1.7678, + "step": 19962000 + }, + { + "epoch": 98.9, + "learning_rate": 5.671016582195073e-07, + "loss": 1.7579, + "step": 19962500 + }, + { + "epoch": 98.9, + "learning_rate": 5.658630717934236e-07, + "loss": 1.7715, + "step": 19963000 + }, + { + "epoch": 98.91, + "learning_rate": 5.646244853673399e-07, + "loss": 1.7803, + "step": 19963500 + }, + { + "epoch": 98.91, + "learning_rate": 5.633858989412563e-07, + "loss": 1.7687, + "step": 19964000 + }, + { + "epoch": 98.91, + "learning_rate": 5.621473125151727e-07, + "loss": 1.7868, + "step": 19964500 + }, + { + "epoch": 98.91, + "learning_rate": 5.609112032619412e-07, + "loss": 1.7646, + "step": 19965000 + }, + { + "epoch": 98.92, + "learning_rate": 5.596726168358576e-07, + "loss": 1.7605, + "step": 19965500 + }, + { + "epoch": 98.92, + "learning_rate": 5.584340304097739e-07, + "loss": 1.7478, + "step": 19966000 + }, + { + "epoch": 98.92, + "learning_rate": 5.571954439836903e-07, + "loss": 1.7787, + "step": 19966500 + }, + { + "epoch": 98.92, + "learning_rate": 5.559593347304589e-07, + "loss": 1.7529, + "step": 19967000 + }, + { + "epoch": 98.93, + "learning_rate": 5.547207483043752e-07, + "loss": 1.735, + "step": 19967500 + }, + { + "epoch": 98.93, + "learning_rate": 5.534871162239959e-07, + "loss": 1.7507, + "step": 19968000 + }, + { + "epoch": 98.93, + "learning_rate": 5.522485297979122e-07, + "loss": 1.7483, + "step": 19968500 + }, + { + "epoch": 98.93, + "learning_rate": 5.510099433718287e-07, + "loss": 1.7543, + "step": 19969000 + }, + { + "epoch": 98.94, + "learning_rate": 5.49771356945745e-07, + "loss": 1.7729, + "step": 19969500 + }, + { + "epoch": 98.94, + "learning_rate": 5.485327705196613e-07, + "loss": 1.7646, + "step": 19970000 + }, + { + "epoch": 98.94, + "learning_rate": 5.472941840935778e-07, + "loss": 1.7822, + "step": 19970500 + }, + { + "epoch": 98.94, + "learning_rate": 5.460580748403463e-07, + "loss": 1.7671, + "step": 19971000 + }, + { + "epoch": 98.95, + "learning_rate": 5.448194884142626e-07, + "loss": 1.7559, + "step": 19971500 + }, + { + "epoch": 98.95, + "learning_rate": 5.43580901988179e-07, + "loss": 1.7706, + "step": 19972000 + }, + { + "epoch": 98.95, + "learning_rate": 5.423447927349474e-07, + "loss": 1.7557, + "step": 19972500 + }, + { + "epoch": 98.95, + "learning_rate": 5.411062063088638e-07, + "loss": 1.7689, + "step": 19973000 + }, + { + "epoch": 98.96, + "learning_rate": 5.398700970556324e-07, + "loss": 1.7753, + "step": 19973500 + }, + { + "epoch": 98.96, + "learning_rate": 5.386315106295487e-07, + "loss": 1.77, + "step": 19974000 + }, + { + "epoch": 98.96, + "learning_rate": 5.37392924203465e-07, + "loss": 1.7785, + "step": 19974500 + }, + { + "epoch": 98.96, + "learning_rate": 5.361543377773815e-07, + "loss": 1.7781, + "step": 19975000 + }, + { + "epoch": 98.97, + "learning_rate": 5.349157513512978e-07, + "loss": 1.7744, + "step": 19975500 + }, + { + "epoch": 98.97, + "learning_rate": 5.336771649252141e-07, + "loss": 1.7619, + "step": 19976000 + }, + { + "epoch": 98.97, + "learning_rate": 5.324385784991306e-07, + "loss": 1.7535, + "step": 19976500 + }, + { + "epoch": 98.97, + "learning_rate": 5.311999920730469e-07, + "loss": 1.7915, + "step": 19977000 + }, + { + "epoch": 98.98, + "learning_rate": 5.299614056469632e-07, + "loss": 1.777, + "step": 19977500 + }, + { + "epoch": 98.98, + "learning_rate": 5.287228192208796e-07, + "loss": 1.7638, + "step": 19978000 + }, + { + "epoch": 98.98, + "learning_rate": 5.27484232794796e-07, + "loss": 1.7584, + "step": 19978500 + }, + { + "epoch": 98.98, + "learning_rate": 5.262456463687123e-07, + "loss": 1.7711, + "step": 19979000 + }, + { + "epoch": 98.99, + "learning_rate": 5.250070599426287e-07, + "loss": 1.7525, + "step": 19979500 + }, + { + "epoch": 98.99, + "learning_rate": 5.237709506893973e-07, + "loss": 1.7502, + "step": 19980000 + }, + { + "epoch": 98.99, + "learning_rate": 5.225323642633136e-07, + "loss": 1.7675, + "step": 19980500 + }, + { + "epoch": 98.99, + "learning_rate": 5.212937778372299e-07, + "loss": 1.7547, + "step": 19981000 + }, + { + "epoch": 99.0, + "learning_rate": 5.200551914111463e-07, + "loss": 1.763, + "step": 19981500 + }, + { + "epoch": 99.0, + "learning_rate": 5.188166049850627e-07, + "loss": 1.7384, + "step": 19982000 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.6875362239222096, + "eval_accuracy_mlm": 0.6489239224448309, + "eval_accuracy_nsp": 0.869461364376233, + "eval_loss": 2.321455478668213, + "eval_runtime": 147.1717, + "eval_samples_per_second": 1732.392, + "eval_steps_per_second": 72.188, + "step": 19982457 + }, + { + "epoch": 99.0, + "learning_rate": 5.175804957318312e-07, + "loss": 1.7502, + "step": 19982500 + }, + { + "epoch": 99.0, + "learning_rate": 5.163419093057476e-07, + "loss": 1.7359, + "step": 19983000 + }, + { + "epoch": 99.01, + "learning_rate": 5.151033228796639e-07, + "loss": 1.7604, + "step": 19983500 + }, + { + "epoch": 99.01, + "learning_rate": 5.138647364535803e-07, + "loss": 1.7585, + "step": 19984000 + }, + { + "epoch": 99.01, + "learning_rate": 5.126261500274967e-07, + "loss": 1.76, + "step": 19984500 + }, + { + "epoch": 99.01, + "learning_rate": 5.11387563601413e-07, + "loss": 1.7339, + "step": 19985000 + }, + { + "epoch": 99.02, + "learning_rate": 5.101489771753294e-07, + "loss": 1.7593, + "step": 19985500 + }, + { + "epoch": 99.02, + "learning_rate": 5.089103907492458e-07, + "loss": 1.7356, + "step": 19986000 + }, + { + "epoch": 99.02, + "learning_rate": 5.076742814960142e-07, + "loss": 1.7545, + "step": 19986500 + }, + { + "epoch": 99.02, + "learning_rate": 5.064356950699305e-07, + "loss": 1.7492, + "step": 19987000 + }, + { + "epoch": 99.02, + "learning_rate": 5.05197108643847e-07, + "loss": 1.7506, + "step": 19987500 + }, + { + "epoch": 99.03, + "learning_rate": 5.039585222177633e-07, + "loss": 1.7505, + "step": 19988000 + }, + { + "epoch": 99.03, + "learning_rate": 5.027199357916796e-07, + "loss": 1.744, + "step": 19988500 + }, + { + "epoch": 99.03, + "learning_rate": 5.014813493655961e-07, + "loss": 1.7645, + "step": 19989000 + }, + { + "epoch": 99.03, + "learning_rate": 5.002427629395124e-07, + "loss": 1.7681, + "step": 19989500 + }, + { + "epoch": 99.04, + "learning_rate": 4.990041765134287e-07, + "loss": 1.7629, + "step": 19990000 + }, + { + "epoch": 99.04, + "learning_rate": 4.977655900873451e-07, + "loss": 1.7545, + "step": 19990500 + }, + { + "epoch": 99.04, + "learning_rate": 4.965270036612615e-07, + "loss": 1.7557, + "step": 19991000 + }, + { + "epoch": 99.04, + "learning_rate": 4.952884172351778e-07, + "loss": 1.762, + "step": 19991500 + }, + { + "epoch": 99.05, + "learning_rate": 4.940523079819464e-07, + "loss": 1.7716, + "step": 19992000 + }, + { + "epoch": 99.05, + "learning_rate": 4.928137215558628e-07, + "loss": 1.7834, + "step": 19992500 + }, + { + "epoch": 99.05, + "learning_rate": 4.915751351297791e-07, + "loss": 1.7628, + "step": 19993000 + }, + { + "epoch": 99.05, + "learning_rate": 4.903365487036955e-07, + "loss": 1.769, + "step": 19993500 + }, + { + "epoch": 99.06, + "learning_rate": 4.890979622776118e-07, + "loss": 1.7895, + "step": 19994000 + }, + { + "epoch": 99.06, + "learning_rate": 4.878593758515282e-07, + "loss": 1.751, + "step": 19994500 + }, + { + "epoch": 99.06, + "learning_rate": 4.866207894254446e-07, + "loss": 1.7813, + "step": 19995000 + }, + { + "epoch": 99.06, + "learning_rate": 4.853822029993609e-07, + "loss": 1.7295, + "step": 19995500 + }, + { + "epoch": 99.07, + "learning_rate": 4.841436165732773e-07, + "loss": 1.7715, + "step": 19996000 + }, + { + "epoch": 99.07, + "learning_rate": 4.829075073200458e-07, + "loss": 1.7592, + "step": 19996500 + }, + { + "epoch": 99.07, + "learning_rate": 4.816689208939622e-07, + "loss": 1.7537, + "step": 19997000 + }, + { + "epoch": 99.07, + "learning_rate": 4.804352888135829e-07, + "loss": 1.7537, + "step": 19997500 + }, + { + "epoch": 99.08, + "learning_rate": 4.791967023874992e-07, + "loss": 1.7841, + "step": 19998000 + }, + { + "epoch": 99.08, + "learning_rate": 4.779581159614157e-07, + "loss": 1.7582, + "step": 19998500 + }, + { + "epoch": 99.08, + "learning_rate": 4.7671952953533195e-07, + "loss": 1.777, + "step": 19999000 + }, + { + "epoch": 99.08, + "learning_rate": 4.7548342028210044e-07, + "loss": 1.7739, + "step": 19999500 + }, + { + "epoch": 99.09, + "learning_rate": 4.7424483385601684e-07, + "loss": 1.7789, + "step": 20000000 + }, + { + "epoch": 99.09, + "learning_rate": 4.730062474299332e-07, + "loss": 1.7705, + "step": 20000500 + }, + { + "epoch": 99.09, + "learning_rate": 4.7176766100384953e-07, + "loss": 1.7505, + "step": 20001000 + }, + { + "epoch": 99.09, + "learning_rate": 4.7052907457776593e-07, + "loss": 1.7712, + "step": 20001500 + }, + { + "epoch": 99.1, + "learning_rate": 4.692929653245345e-07, + "loss": 1.761, + "step": 20002000 + }, + { + "epoch": 99.1, + "learning_rate": 4.6805437889845077e-07, + "loss": 1.7761, + "step": 20002500 + }, + { + "epoch": 99.1, + "learning_rate": 4.668157924723671e-07, + "loss": 1.7707, + "step": 20003000 + }, + { + "epoch": 99.1, + "learning_rate": 4.655772060462835e-07, + "loss": 1.7418, + "step": 20003500 + }, + { + "epoch": 99.11, + "learning_rate": 4.6434109679305206e-07, + "loss": 1.7611, + "step": 20004000 + }, + { + "epoch": 99.11, + "learning_rate": 4.6310251036696835e-07, + "loss": 1.7658, + "step": 20004500 + }, + { + "epoch": 99.11, + "learning_rate": 4.618639239408848e-07, + "loss": 1.7399, + "step": 20005000 + }, + { + "epoch": 99.11, + "learning_rate": 4.606253375148011e-07, + "loss": 1.7433, + "step": 20005500 + }, + { + "epoch": 99.12, + "learning_rate": 4.5938675108871744e-07, + "loss": 1.7513, + "step": 20006000 + }, + { + "epoch": 99.12, + "learning_rate": 4.581481646626339e-07, + "loss": 1.7238, + "step": 20006500 + }, + { + "epoch": 99.12, + "learning_rate": 4.569095782365502e-07, + "loss": 1.778, + "step": 20007000 + }, + { + "epoch": 99.12, + "learning_rate": 4.5567099181046653e-07, + "loss": 1.7623, + "step": 20007500 + }, + { + "epoch": 99.13, + "learning_rate": 4.5443240538438293e-07, + "loss": 1.7499, + "step": 20008000 + }, + { + "epoch": 99.13, + "learning_rate": 4.531962961311515e-07, + "loss": 1.7473, + "step": 20008500 + }, + { + "epoch": 99.13, + "learning_rate": 4.5195770970506777e-07, + "loss": 1.7626, + "step": 20009000 + }, + { + "epoch": 99.13, + "learning_rate": 4.5072160045183637e-07, + "loss": 1.7558, + "step": 20009500 + }, + { + "epoch": 99.14, + "learning_rate": 4.494830140257527e-07, + "loss": 1.788, + "step": 20010000 + }, + { + "epoch": 99.14, + "learning_rate": 4.4824442759966906e-07, + "loss": 1.7517, + "step": 20010500 + }, + { + "epoch": 99.14, + "learning_rate": 4.4700584117358546e-07, + "loss": 1.7667, + "step": 20011000 + }, + { + "epoch": 99.14, + "learning_rate": 4.457672547475018e-07, + "loss": 1.7572, + "step": 20011500 + }, + { + "epoch": 99.15, + "learning_rate": 4.4452866832141815e-07, + "loss": 1.7448, + "step": 20012000 + }, + { + "epoch": 99.15, + "learning_rate": 4.4329008189533455e-07, + "loss": 1.7616, + "step": 20012500 + }, + { + "epoch": 99.15, + "learning_rate": 4.420514954692509e-07, + "loss": 1.763, + "step": 20013000 + }, + { + "epoch": 99.15, + "learning_rate": 4.408129090431672e-07, + "loss": 1.7487, + "step": 20013500 + }, + { + "epoch": 99.16, + "learning_rate": 4.3957432261708364e-07, + "loss": 1.7589, + "step": 20014000 + }, + { + "epoch": 99.16, + "learning_rate": 4.3833821336385213e-07, + "loss": 1.7659, + "step": 20014500 + }, + { + "epoch": 99.16, + "learning_rate": 4.371021041106206e-07, + "loss": 1.7691, + "step": 20015000 + }, + { + "epoch": 99.16, + "learning_rate": 4.3586351768453697e-07, + "loss": 1.7606, + "step": 20015500 + }, + { + "epoch": 99.17, + "learning_rate": 4.3462493125845337e-07, + "loss": 1.7476, + "step": 20016000 + }, + { + "epoch": 99.17, + "learning_rate": 4.333863448323697e-07, + "loss": 1.78, + "step": 20016500 + }, + { + "epoch": 99.17, + "learning_rate": 4.3214775840628606e-07, + "loss": 1.7635, + "step": 20017000 + }, + { + "epoch": 99.17, + "learning_rate": 4.3091164915305466e-07, + "loss": 1.753, + "step": 20017500 + }, + { + "epoch": 99.18, + "learning_rate": 4.29673062726971e-07, + "loss": 1.7885, + "step": 20018000 + }, + { + "epoch": 99.18, + "learning_rate": 4.284344763008873e-07, + "loss": 1.7733, + "step": 20018500 + }, + { + "epoch": 99.18, + "learning_rate": 4.2719588987480375e-07, + "loss": 1.7718, + "step": 20019000 + }, + { + "epoch": 99.18, + "learning_rate": 4.2595730344872004e-07, + "loss": 1.7541, + "step": 20019500 + }, + { + "epoch": 99.19, + "learning_rate": 4.247187170226364e-07, + "loss": 1.7578, + "step": 20020000 + }, + { + "epoch": 99.19, + "learning_rate": 4.2348013059655284e-07, + "loss": 1.7662, + "step": 20020500 + }, + { + "epoch": 99.19, + "learning_rate": 4.2224154417046913e-07, + "loss": 1.7545, + "step": 20021000 + }, + { + "epoch": 99.19, + "learning_rate": 4.210054349172377e-07, + "loss": 1.7658, + "step": 20021500 + }, + { + "epoch": 99.2, + "learning_rate": 4.1976684849115407e-07, + "loss": 1.7709, + "step": 20022000 + }, + { + "epoch": 99.2, + "learning_rate": 4.185282620650704e-07, + "loss": 1.7873, + "step": 20022500 + }, + { + "epoch": 99.2, + "learning_rate": 4.172896756389867e-07, + "loss": 1.7605, + "step": 20023000 + }, + { + "epoch": 99.2, + "learning_rate": 4.160535663857553e-07, + "loss": 1.7724, + "step": 20023500 + }, + { + "epoch": 99.21, + "learning_rate": 4.1481497995967166e-07, + "loss": 1.7591, + "step": 20024000 + }, + { + "epoch": 99.21, + "learning_rate": 4.13576393533588e-07, + "loss": 1.7519, + "step": 20024500 + }, + { + "epoch": 99.21, + "learning_rate": 4.123378071075044e-07, + "loss": 1.7457, + "step": 20025000 + }, + { + "epoch": 99.21, + "learning_rate": 4.1109922068142075e-07, + "loss": 1.7402, + "step": 20025500 + }, + { + "epoch": 99.22, + "learning_rate": 4.0986311142818924e-07, + "loss": 1.7733, + "step": 20026000 + }, + { + "epoch": 99.22, + "learning_rate": 4.086245250021056e-07, + "loss": 1.7563, + "step": 20026500 + }, + { + "epoch": 99.22, + "learning_rate": 4.073884157488742e-07, + "loss": 1.7647, + "step": 20027000 + }, + { + "epoch": 99.22, + "learning_rate": 4.0614982932279053e-07, + "loss": 1.7521, + "step": 20027500 + }, + { + "epoch": 99.23, + "learning_rate": 4.049112428967068e-07, + "loss": 1.7564, + "step": 20028000 + }, + { + "epoch": 99.23, + "learning_rate": 4.0367265647062327e-07, + "loss": 1.7449, + "step": 20028500 + }, + { + "epoch": 99.23, + "learning_rate": 4.0243407004453956e-07, + "loss": 1.7851, + "step": 20029000 + }, + { + "epoch": 99.23, + "learning_rate": 4.011979607913081e-07, + "loss": 1.7702, + "step": 20029500 + }, + { + "epoch": 99.24, + "learning_rate": 3.999618515380767e-07, + "loss": 1.7508, + "step": 20030000 + }, + { + "epoch": 99.24, + "learning_rate": 3.98723265111993e-07, + "loss": 1.7612, + "step": 20030500 + }, + { + "epoch": 99.24, + "learning_rate": 3.9748467868590935e-07, + "loss": 1.7614, + "step": 20031000 + }, + { + "epoch": 99.24, + "learning_rate": 3.9624609225982574e-07, + "loss": 1.7661, + "step": 20031500 + }, + { + "epoch": 99.25, + "learning_rate": 3.950075058337421e-07, + "loss": 1.774, + "step": 20032000 + }, + { + "epoch": 99.25, + "learning_rate": 3.9376891940765844e-07, + "loss": 1.7541, + "step": 20032500 + }, + { + "epoch": 99.25, + "learning_rate": 3.9253033298157484e-07, + "loss": 1.7802, + "step": 20033000 + }, + { + "epoch": 99.25, + "learning_rate": 3.9129422372834333e-07, + "loss": 1.7884, + "step": 20033500 + }, + { + "epoch": 99.26, + "learning_rate": 3.9005563730225967e-07, + "loss": 1.7513, + "step": 20034000 + }, + { + "epoch": 99.26, + "learning_rate": 3.8881705087617607e-07, + "loss": 1.7693, + "step": 20034500 + }, + { + "epoch": 99.26, + "learning_rate": 3.875784644500924e-07, + "loss": 1.7877, + "step": 20035000 + }, + { + "epoch": 99.26, + "learning_rate": 3.8633987802400876e-07, + "loss": 1.7771, + "step": 20035500 + }, + { + "epoch": 99.27, + "learning_rate": 3.8510129159792516e-07, + "loss": 1.7684, + "step": 20036000 + }, + { + "epoch": 99.27, + "learning_rate": 3.838627051718415e-07, + "loss": 1.7438, + "step": 20036500 + }, + { + "epoch": 99.27, + "learning_rate": 3.8262411874575785e-07, + "loss": 1.7421, + "step": 20037000 + }, + { + "epoch": 99.27, + "learning_rate": 3.813855323196742e-07, + "loss": 1.7691, + "step": 20037500 + }, + { + "epoch": 99.28, + "learning_rate": 3.801469458935906e-07, + "loss": 1.7866, + "step": 20038000 + }, + { + "epoch": 99.28, + "learning_rate": 3.7890835946750694e-07, + "loss": 1.7576, + "step": 20038500 + }, + { + "epoch": 99.28, + "learning_rate": 3.776697730414233e-07, + "loss": 1.7428, + "step": 20039000 + }, + { + "epoch": 99.28, + "learning_rate": 3.764311866153397e-07, + "loss": 1.7614, + "step": 20039500 + }, + { + "epoch": 99.29, + "learning_rate": 3.751950773621082e-07, + "loss": 1.7815, + "step": 20040000 + }, + { + "epoch": 99.29, + "learning_rate": 3.739564909360246e-07, + "loss": 1.7932, + "step": 20040500 + }, + { + "epoch": 99.29, + "learning_rate": 3.7271790450994087e-07, + "loss": 1.7593, + "step": 20041000 + }, + { + "epoch": 99.29, + "learning_rate": 3.714817952567094e-07, + "loss": 1.7716, + "step": 20041500 + }, + { + "epoch": 99.29, + "learning_rate": 3.702432088306258e-07, + "loss": 1.7567, + "step": 20042000 + }, + { + "epoch": 99.3, + "learning_rate": 3.6900462240454216e-07, + "loss": 1.7753, + "step": 20042500 + }, + { + "epoch": 99.3, + "learning_rate": 3.677660359784585e-07, + "loss": 1.7546, + "step": 20043000 + }, + { + "epoch": 99.3, + "learning_rate": 3.665274495523749e-07, + "loss": 1.7611, + "step": 20043500 + }, + { + "epoch": 99.3, + "learning_rate": 3.6528886312629125e-07, + "loss": 1.7545, + "step": 20044000 + }, + { + "epoch": 99.31, + "learning_rate": 3.640502767002076e-07, + "loss": 1.7596, + "step": 20044500 + }, + { + "epoch": 99.31, + "learning_rate": 3.62811690274124e-07, + "loss": 1.764, + "step": 20045000 + }, + { + "epoch": 99.31, + "learning_rate": 3.615755810208925e-07, + "loss": 1.753, + "step": 20045500 + }, + { + "epoch": 99.31, + "learning_rate": 3.6033947176766103e-07, + "loss": 1.7552, + "step": 20046000 + }, + { + "epoch": 99.32, + "learning_rate": 3.591008853415774e-07, + "loss": 1.7547, + "step": 20046500 + }, + { + "epoch": 99.32, + "learning_rate": 3.578622989154937e-07, + "loss": 1.7551, + "step": 20047000 + }, + { + "epoch": 99.32, + "learning_rate": 3.566237124894101e-07, + "loss": 1.7625, + "step": 20047500 + }, + { + "epoch": 99.32, + "learning_rate": 3.553876032361786e-07, + "loss": 1.7741, + "step": 20048000 + }, + { + "epoch": 99.33, + "learning_rate": 3.54149016810095e-07, + "loss": 1.7803, + "step": 20048500 + }, + { + "epoch": 99.33, + "learning_rate": 3.529104303840113e-07, + "loss": 1.7555, + "step": 20049000 + }, + { + "epoch": 99.33, + "learning_rate": 3.516718439579277e-07, + "loss": 1.7856, + "step": 20049500 + }, + { + "epoch": 99.33, + "learning_rate": 3.5043573470469625e-07, + "loss": 1.7469, + "step": 20050000 + }, + { + "epoch": 99.34, + "learning_rate": 3.491971482786126e-07, + "loss": 1.7675, + "step": 20050500 + }, + { + "epoch": 99.34, + "learning_rate": 3.4795856185252894e-07, + "loss": 1.768, + "step": 20051000 + }, + { + "epoch": 99.34, + "learning_rate": 3.4671997542644534e-07, + "loss": 1.7626, + "step": 20051500 + }, + { + "epoch": 99.34, + "learning_rate": 3.454813890003617e-07, + "loss": 1.7625, + "step": 20052000 + }, + { + "epoch": 99.35, + "learning_rate": 3.4424280257427803e-07, + "loss": 1.7512, + "step": 20052500 + }, + { + "epoch": 99.35, + "learning_rate": 3.430066933210466e-07, + "loss": 1.7495, + "step": 20053000 + }, + { + "epoch": 99.35, + "learning_rate": 3.417681068949629e-07, + "loss": 1.7496, + "step": 20053500 + }, + { + "epoch": 99.35, + "learning_rate": 3.405295204688793e-07, + "loss": 1.7495, + "step": 20054000 + }, + { + "epoch": 99.36, + "learning_rate": 3.392909340427956e-07, + "loss": 1.7537, + "step": 20054500 + }, + { + "epoch": 99.36, + "learning_rate": 3.38052347616712e-07, + "loss": 1.7431, + "step": 20055000 + }, + { + "epoch": 99.36, + "learning_rate": 3.3681376119062836e-07, + "loss": 1.7594, + "step": 20055500 + }, + { + "epoch": 99.36, + "learning_rate": 3.355776519373969e-07, + "loss": 1.7632, + "step": 20056000 + }, + { + "epoch": 99.37, + "learning_rate": 3.3433906551131325e-07, + "loss": 1.7621, + "step": 20056500 + }, + { + "epoch": 99.37, + "learning_rate": 3.3310047908522965e-07, + "loss": 1.7439, + "step": 20057000 + }, + { + "epoch": 99.37, + "learning_rate": 3.31861892659146e-07, + "loss": 1.7869, + "step": 20057500 + }, + { + "epoch": 99.37, + "learning_rate": 3.3062330623306234e-07, + "loss": 1.7646, + "step": 20058000 + }, + { + "epoch": 99.38, + "learning_rate": 3.293871969798309e-07, + "loss": 1.7497, + "step": 20058500 + }, + { + "epoch": 99.38, + "learning_rate": 3.281510877265994e-07, + "loss": 1.7577, + "step": 20059000 + }, + { + "epoch": 99.38, + "learning_rate": 3.269125013005158e-07, + "loss": 1.7738, + "step": 20059500 + }, + { + "epoch": 99.38, + "learning_rate": 3.256739148744321e-07, + "loss": 1.7513, + "step": 20060000 + }, + { + "epoch": 99.39, + "learning_rate": 3.2443532844834847e-07, + "loss": 1.7542, + "step": 20060500 + }, + { + "epoch": 99.39, + "learning_rate": 3.2319674202226487e-07, + "loss": 1.764, + "step": 20061000 + }, + { + "epoch": 99.39, + "learning_rate": 3.219581555961812e-07, + "loss": 1.7361, + "step": 20061500 + }, + { + "epoch": 99.39, + "learning_rate": 3.2071956917009756e-07, + "loss": 1.7692, + "step": 20062000 + }, + { + "epoch": 99.4, + "learning_rate": 3.1948098274401396e-07, + "loss": 1.7767, + "step": 20062500 + }, + { + "epoch": 99.4, + "learning_rate": 3.1824487349078245e-07, + "loss": 1.7569, + "step": 20063000 + }, + { + "epoch": 99.4, + "learning_rate": 3.1700628706469885e-07, + "loss": 1.7694, + "step": 20063500 + }, + { + "epoch": 99.4, + "learning_rate": 3.157677006386152e-07, + "loss": 1.7717, + "step": 20064000 + }, + { + "epoch": 99.41, + "learning_rate": 3.1452911421253154e-07, + "loss": 1.7699, + "step": 20064500 + }, + { + "epoch": 99.41, + "learning_rate": 3.132905277864479e-07, + "loss": 1.7529, + "step": 20065000 + }, + { + "epoch": 99.41, + "learning_rate": 3.120519413603643e-07, + "loss": 1.7645, + "step": 20065500 + }, + { + "epoch": 99.41, + "learning_rate": 3.1081335493428063e-07, + "loss": 1.7648, + "step": 20066000 + }, + { + "epoch": 99.42, + "learning_rate": 3.09574768508197e-07, + "loss": 1.766, + "step": 20066500 + }, + { + "epoch": 99.42, + "learning_rate": 3.0833865925496547e-07, + "loss": 1.7797, + "step": 20067000 + }, + { + "epoch": 99.42, + "learning_rate": 3.0710007282888187e-07, + "loss": 1.7671, + "step": 20067500 + }, + { + "epoch": 99.42, + "learning_rate": 3.0586148640279826e-07, + "loss": 1.796, + "step": 20068000 + }, + { + "epoch": 99.43, + "learning_rate": 3.0462289997671456e-07, + "loss": 1.7632, + "step": 20068500 + }, + { + "epoch": 99.43, + "learning_rate": 3.0338431355063096e-07, + "loss": 1.7642, + "step": 20069000 + }, + { + "epoch": 99.43, + "learning_rate": 3.021482042973995e-07, + "loss": 1.7517, + "step": 20069500 + }, + { + "epoch": 99.43, + "learning_rate": 3.0090961787131585e-07, + "loss": 1.7633, + "step": 20070000 + }, + { + "epoch": 99.44, + "learning_rate": 2.996710314452322e-07, + "loss": 1.7589, + "step": 20070500 + }, + { + "epoch": 99.44, + "learning_rate": 2.984324450191486e-07, + "loss": 1.7413, + "step": 20071000 + }, + { + "epoch": 99.44, + "learning_rate": 2.9719385859306494e-07, + "loss": 1.75, + "step": 20071500 + }, + { + "epoch": 99.44, + "learning_rate": 2.959552721669813e-07, + "loss": 1.7844, + "step": 20072000 + }, + { + "epoch": 99.45, + "learning_rate": 2.947166857408977e-07, + "loss": 1.7742, + "step": 20072500 + }, + { + "epoch": 99.45, + "learning_rate": 2.93478099314814e-07, + "loss": 1.7607, + "step": 20073000 + }, + { + "epoch": 99.45, + "learning_rate": 2.9223951288873037e-07, + "loss": 1.7645, + "step": 20073500 + }, + { + "epoch": 99.45, + "learning_rate": 2.9100340363549886e-07, + "loss": 1.771, + "step": 20074000 + }, + { + "epoch": 99.46, + "learning_rate": 2.8976481720941526e-07, + "loss": 1.7646, + "step": 20074500 + }, + { + "epoch": 99.46, + "learning_rate": 2.885262307833316e-07, + "loss": 1.7641, + "step": 20075000 + }, + { + "epoch": 99.46, + "learning_rate": 2.8728764435724795e-07, + "loss": 1.7665, + "step": 20075500 + }, + { + "epoch": 99.46, + "learning_rate": 2.8604905793116435e-07, + "loss": 1.7667, + "step": 20076000 + }, + { + "epoch": 99.47, + "learning_rate": 2.848129486779329e-07, + "loss": 1.7599, + "step": 20076500 + }, + { + "epoch": 99.47, + "learning_rate": 2.835743622518492e-07, + "loss": 1.7873, + "step": 20077000 + }, + { + "epoch": 99.47, + "learning_rate": 2.823357758257656e-07, + "loss": 1.7583, + "step": 20077500 + }, + { + "epoch": 99.47, + "learning_rate": 2.81097189399682e-07, + "loss": 1.7618, + "step": 20078000 + }, + { + "epoch": 99.48, + "learning_rate": 2.798586029735983e-07, + "loss": 1.7548, + "step": 20078500 + }, + { + "epoch": 99.48, + "learning_rate": 2.786200165475147e-07, + "loss": 1.7596, + "step": 20079000 + }, + { + "epoch": 99.48, + "learning_rate": 2.77381430121431e-07, + "loss": 1.754, + "step": 20079500 + }, + { + "epoch": 99.48, + "learning_rate": 2.7614284369534737e-07, + "loss": 1.7708, + "step": 20080000 + }, + { + "epoch": 99.49, + "learning_rate": 2.7490425726926377e-07, + "loss": 1.7676, + "step": 20080500 + }, + { + "epoch": 99.49, + "learning_rate": 2.736656708431801e-07, + "loss": 1.7794, + "step": 20081000 + }, + { + "epoch": 99.49, + "learning_rate": 2.7242708441709646e-07, + "loss": 1.7516, + "step": 20081500 + }, + { + "epoch": 99.49, + "learning_rate": 2.711884979910128e-07, + "loss": 1.7753, + "step": 20082000 + }, + { + "epoch": 99.5, + "learning_rate": 2.699548659106335e-07, + "loss": 1.7582, + "step": 20082500 + }, + { + "epoch": 99.5, + "learning_rate": 2.687162794845499e-07, + "loss": 1.7626, + "step": 20083000 + }, + { + "epoch": 99.5, + "learning_rate": 2.6747769305846624e-07, + "loss": 1.7579, + "step": 20083500 + }, + { + "epoch": 99.5, + "learning_rate": 2.662391066323826e-07, + "loss": 1.7674, + "step": 20084000 + }, + { + "epoch": 99.51, + "learning_rate": 2.65000520206299e-07, + "loss": 1.7805, + "step": 20084500 + }, + { + "epoch": 99.51, + "learning_rate": 2.6376193378021533e-07, + "loss": 1.7448, + "step": 20085000 + }, + { + "epoch": 99.51, + "learning_rate": 2.625233473541317e-07, + "loss": 1.79, + "step": 20085500 + }, + { + "epoch": 99.51, + "learning_rate": 2.612847609280481e-07, + "loss": 1.7547, + "step": 20086000 + }, + { + "epoch": 99.52, + "learning_rate": 2.6004865167481657e-07, + "loss": 1.7662, + "step": 20086500 + }, + { + "epoch": 99.52, + "learning_rate": 2.588100652487329e-07, + "loss": 1.781, + "step": 20087000 + }, + { + "epoch": 99.52, + "learning_rate": 2.575714788226493e-07, + "loss": 1.764, + "step": 20087500 + }, + { + "epoch": 99.52, + "learning_rate": 2.5633289239656566e-07, + "loss": 1.7545, + "step": 20088000 + }, + { + "epoch": 99.53, + "learning_rate": 2.550967831433342e-07, + "loss": 1.7659, + "step": 20088500 + }, + { + "epoch": 99.53, + "learning_rate": 2.5385819671725055e-07, + "loss": 1.7731, + "step": 20089000 + }, + { + "epoch": 99.53, + "learning_rate": 2.526196102911669e-07, + "loss": 1.7792, + "step": 20089500 + }, + { + "epoch": 99.53, + "learning_rate": 2.513810238650833e-07, + "loss": 1.7788, + "step": 20090000 + }, + { + "epoch": 99.54, + "learning_rate": 2.5014243743899964e-07, + "loss": 1.7857, + "step": 20090500 + }, + { + "epoch": 99.54, + "learning_rate": 2.48903851012916e-07, + "loss": 1.7738, + "step": 20091000 + }, + { + "epoch": 99.54, + "learning_rate": 2.4766774175968453e-07, + "loss": 1.7782, + "step": 20091500 + }, + { + "epoch": 99.54, + "learning_rate": 2.464291553336009e-07, + "loss": 1.7558, + "step": 20092000 + }, + { + "epoch": 99.55, + "learning_rate": 2.451905689075172e-07, + "loss": 1.7793, + "step": 20092500 + }, + { + "epoch": 99.55, + "learning_rate": 2.439519824814336e-07, + "loss": 1.7905, + "step": 20093000 + }, + { + "epoch": 99.55, + "learning_rate": 2.427158732282021e-07, + "loss": 1.7464, + "step": 20093500 + }, + { + "epoch": 99.55, + "learning_rate": 2.414772868021185e-07, + "loss": 1.7328, + "step": 20094000 + }, + { + "epoch": 99.56, + "learning_rate": 2.4023870037603486e-07, + "loss": 1.7717, + "step": 20094500 + }, + { + "epoch": 99.56, + "learning_rate": 2.390001139499512e-07, + "loss": 1.7595, + "step": 20095000 + }, + { + "epoch": 99.56, + "learning_rate": 2.3776152752386758e-07, + "loss": 1.7994, + "step": 20095500 + }, + { + "epoch": 99.56, + "learning_rate": 2.3652294109778395e-07, + "loss": 1.7726, + "step": 20096000 + }, + { + "epoch": 99.57, + "learning_rate": 2.3528435467170027e-07, + "loss": 1.7683, + "step": 20096500 + }, + { + "epoch": 99.57, + "learning_rate": 2.3404576824561667e-07, + "loss": 1.7662, + "step": 20097000 + }, + { + "epoch": 99.57, + "learning_rate": 2.3280718181953304e-07, + "loss": 1.7738, + "step": 20097500 + }, + { + "epoch": 99.57, + "learning_rate": 2.3156859539344936e-07, + "loss": 1.7796, + "step": 20098000 + }, + { + "epoch": 99.57, + "learning_rate": 2.303324861402179e-07, + "loss": 1.7583, + "step": 20098500 + }, + { + "epoch": 99.58, + "learning_rate": 2.2909389971413425e-07, + "loss": 1.779, + "step": 20099000 + }, + { + "epoch": 99.58, + "learning_rate": 2.2785531328805062e-07, + "loss": 1.737, + "step": 20099500 + }, + { + "epoch": 99.58, + "learning_rate": 2.26616726861967e-07, + "loss": 1.7509, + "step": 20100000 + }, + { + "epoch": 99.58, + "learning_rate": 2.2537814043588331e-07, + "loss": 1.747, + "step": 20100500 + }, + { + "epoch": 99.59, + "learning_rate": 2.2414203118265188e-07, + "loss": 1.7728, + "step": 20101000 + }, + { + "epoch": 99.59, + "learning_rate": 2.2290344475656826e-07, + "loss": 1.7587, + "step": 20101500 + }, + { + "epoch": 99.59, + "learning_rate": 2.2166485833048458e-07, + "loss": 1.751, + "step": 20102000 + }, + { + "epoch": 99.59, + "learning_rate": 2.2042627190440095e-07, + "loss": 1.7498, + "step": 20102500 + }, + { + "epoch": 99.6, + "learning_rate": 2.1918768547831732e-07, + "loss": 1.7849, + "step": 20103000 + }, + { + "epoch": 99.6, + "learning_rate": 2.1794909905223367e-07, + "loss": 1.79, + "step": 20103500 + }, + { + "epoch": 99.6, + "learning_rate": 2.1671051262615004e-07, + "loss": 1.759, + "step": 20104000 + }, + { + "epoch": 99.6, + "learning_rate": 2.1547688054577073e-07, + "loss": 1.7795, + "step": 20104500 + }, + { + "epoch": 99.61, + "learning_rate": 2.142382941196871e-07, + "loss": 1.7455, + "step": 20105000 + }, + { + "epoch": 99.61, + "learning_rate": 2.1299970769360347e-07, + "loss": 1.7395, + "step": 20105500 + }, + { + "epoch": 99.61, + "learning_rate": 2.117611212675198e-07, + "loss": 1.7665, + "step": 20106000 + }, + { + "epoch": 99.61, + "learning_rate": 2.1052253484143617e-07, + "loss": 1.783, + "step": 20106500 + }, + { + "epoch": 99.62, + "learning_rate": 2.0928394841535254e-07, + "loss": 1.7773, + "step": 20107000 + }, + { + "epoch": 99.62, + "learning_rate": 2.0804536198926888e-07, + "loss": 1.773, + "step": 20107500 + }, + { + "epoch": 99.62, + "learning_rate": 2.0680677556318526e-07, + "loss": 1.7648, + "step": 20108000 + }, + { + "epoch": 99.62, + "learning_rate": 2.0556818913710163e-07, + "loss": 1.7763, + "step": 20108500 + }, + { + "epoch": 99.63, + "learning_rate": 2.0433207988387015e-07, + "loss": 1.7559, + "step": 20109000 + }, + { + "epoch": 99.63, + "learning_rate": 2.0309349345778652e-07, + "loss": 1.762, + "step": 20109500 + }, + { + "epoch": 99.63, + "learning_rate": 2.01857384204555e-07, + "loss": 1.7631, + "step": 20110000 + }, + { + "epoch": 99.63, + "learning_rate": 2.0061879777847138e-07, + "loss": 1.7684, + "step": 20110500 + }, + { + "epoch": 99.64, + "learning_rate": 1.9938021135238778e-07, + "loss": 1.766, + "step": 20111000 + }, + { + "epoch": 99.64, + "learning_rate": 1.981416249263041e-07, + "loss": 1.7544, + "step": 20111500 + }, + { + "epoch": 99.64, + "learning_rate": 1.9690303850022047e-07, + "loss": 1.78, + "step": 20112000 + }, + { + "epoch": 99.64, + "learning_rate": 1.9566445207413685e-07, + "loss": 1.7712, + "step": 20112500 + }, + { + "epoch": 99.65, + "learning_rate": 1.9442586564805322e-07, + "loss": 1.7714, + "step": 20113000 + }, + { + "epoch": 99.65, + "learning_rate": 1.9318975639482174e-07, + "loss": 1.7633, + "step": 20113500 + }, + { + "epoch": 99.65, + "learning_rate": 1.9195116996873808e-07, + "loss": 1.7561, + "step": 20114000 + }, + { + "epoch": 99.65, + "learning_rate": 1.9071258354265443e-07, + "loss": 1.7692, + "step": 20114500 + }, + { + "epoch": 99.66, + "learning_rate": 1.8947399711657083e-07, + "loss": 1.7868, + "step": 20115000 + }, + { + "epoch": 99.66, + "learning_rate": 1.8823541069048717e-07, + "loss": 1.7764, + "step": 20115500 + }, + { + "epoch": 99.66, + "learning_rate": 1.8699682426440352e-07, + "loss": 1.7471, + "step": 20116000 + }, + { + "epoch": 99.66, + "learning_rate": 1.857582378383199e-07, + "loss": 1.7777, + "step": 20116500 + }, + { + "epoch": 99.67, + "learning_rate": 1.8451965141223626e-07, + "loss": 1.7683, + "step": 20117000 + }, + { + "epoch": 99.67, + "learning_rate": 1.832810649861526e-07, + "loss": 1.7457, + "step": 20117500 + }, + { + "epoch": 99.67, + "learning_rate": 1.8204247856006898e-07, + "loss": 1.7618, + "step": 20118000 + }, + { + "epoch": 99.67, + "learning_rate": 1.8080389213398533e-07, + "loss": 1.7755, + "step": 20118500 + }, + { + "epoch": 99.68, + "learning_rate": 1.795653057079017e-07, + "loss": 1.7782, + "step": 20119000 + }, + { + "epoch": 99.68, + "learning_rate": 1.7832671928181807e-07, + "loss": 1.7722, + "step": 20119500 + }, + { + "epoch": 99.68, + "learning_rate": 1.770906100285866e-07, + "loss": 1.7751, + "step": 20120000 + }, + { + "epoch": 99.68, + "learning_rate": 1.7585202360250294e-07, + "loss": 1.777, + "step": 20120500 + }, + { + "epoch": 99.69, + "learning_rate": 1.746134371764193e-07, + "loss": 1.776, + "step": 20121000 + }, + { + "epoch": 99.69, + "learning_rate": 1.7337732792318783e-07, + "loss": 1.7826, + "step": 20121500 + }, + { + "epoch": 99.69, + "learning_rate": 1.721387414971042e-07, + "loss": 1.7823, + "step": 20122000 + }, + { + "epoch": 99.69, + "learning_rate": 1.7090015507102054e-07, + "loss": 1.7658, + "step": 20122500 + }, + { + "epoch": 99.7, + "learning_rate": 1.6966156864493692e-07, + "loss": 1.8001, + "step": 20123000 + }, + { + "epoch": 99.7, + "learning_rate": 1.684229822188533e-07, + "loss": 1.7756, + "step": 20123500 + }, + { + "epoch": 99.7, + "learning_rate": 1.6718439579276963e-07, + "loss": 1.7707, + "step": 20124000 + }, + { + "epoch": 99.7, + "learning_rate": 1.6594580936668598e-07, + "loss": 1.7665, + "step": 20124500 + }, + { + "epoch": 99.71, + "learning_rate": 1.6470970011345453e-07, + "loss": 1.757, + "step": 20125000 + }, + { + "epoch": 99.71, + "learning_rate": 1.634711136873709e-07, + "loss": 1.7583, + "step": 20125500 + }, + { + "epoch": 99.71, + "learning_rate": 1.6223252726128724e-07, + "loss": 1.801, + "step": 20126000 + }, + { + "epoch": 99.71, + "learning_rate": 1.609939408352036e-07, + "loss": 1.7775, + "step": 20126500 + }, + { + "epoch": 99.72, + "learning_rate": 1.5975535440911996e-07, + "loss": 1.7803, + "step": 20127000 + }, + { + "epoch": 99.72, + "learning_rate": 1.5851676798303633e-07, + "loss": 1.7802, + "step": 20127500 + }, + { + "epoch": 99.72, + "learning_rate": 1.5727818155695268e-07, + "loss": 1.7662, + "step": 20128000 + }, + { + "epoch": 99.72, + "learning_rate": 1.5603959513086905e-07, + "loss": 1.7612, + "step": 20128500 + }, + { + "epoch": 99.73, + "learning_rate": 1.548010087047854e-07, + "loss": 1.7407, + "step": 20129000 + }, + { + "epoch": 99.73, + "learning_rate": 1.5356489945155394e-07, + "loss": 1.758, + "step": 20129500 + }, + { + "epoch": 99.73, + "learning_rate": 1.5232879019832246e-07, + "loss": 1.7757, + "step": 20130000 + }, + { + "epoch": 99.73, + "learning_rate": 1.510902037722388e-07, + "loss": 1.7679, + "step": 20130500 + }, + { + "epoch": 99.74, + "learning_rate": 1.4985161734615518e-07, + "loss": 1.79, + "step": 20131000 + }, + { + "epoch": 99.74, + "learning_rate": 1.4861303092007155e-07, + "loss": 1.7687, + "step": 20131500 + }, + { + "epoch": 99.74, + "learning_rate": 1.473744444939879e-07, + "loss": 1.758, + "step": 20132000 + }, + { + "epoch": 99.74, + "learning_rate": 1.4613585806790427e-07, + "loss": 1.7623, + "step": 20132500 + }, + { + "epoch": 99.75, + "learning_rate": 1.4490222598752496e-07, + "loss": 1.7405, + "step": 20133000 + }, + { + "epoch": 99.75, + "learning_rate": 1.4366363956144133e-07, + "loss": 1.757, + "step": 20133500 + }, + { + "epoch": 99.75, + "learning_rate": 1.4242505313535768e-07, + "loss": 1.7806, + "step": 20134000 + }, + { + "epoch": 99.75, + "learning_rate": 1.4118646670927402e-07, + "loss": 1.7711, + "step": 20134500 + }, + { + "epoch": 99.76, + "learning_rate": 1.3994788028319042e-07, + "loss": 1.7627, + "step": 20135000 + }, + { + "epoch": 99.76, + "learning_rate": 1.3870929385710677e-07, + "loss": 1.7867, + "step": 20135500 + }, + { + "epoch": 99.76, + "learning_rate": 1.3747070743102311e-07, + "loss": 1.7448, + "step": 20136000 + }, + { + "epoch": 99.76, + "learning_rate": 1.3623212100493949e-07, + "loss": 1.7571, + "step": 20136500 + }, + { + "epoch": 99.77, + "learning_rate": 1.3499353457885586e-07, + "loss": 1.7337, + "step": 20137000 + }, + { + "epoch": 99.77, + "learning_rate": 1.337549481527722e-07, + "loss": 1.7697, + "step": 20137500 + }, + { + "epoch": 99.77, + "learning_rate": 1.3251636172668858e-07, + "loss": 1.773, + "step": 20138000 + }, + { + "epoch": 99.77, + "learning_rate": 1.3127777530060492e-07, + "loss": 1.7551, + "step": 20138500 + }, + { + "epoch": 99.78, + "learning_rate": 1.300391888745213e-07, + "loss": 1.7702, + "step": 20139000 + }, + { + "epoch": 99.78, + "learning_rate": 1.2880307962128984e-07, + "loss": 1.7797, + "step": 20139500 + }, + { + "epoch": 99.78, + "learning_rate": 1.2756449319520619e-07, + "loss": 1.7749, + "step": 20140000 + }, + { + "epoch": 99.78, + "learning_rate": 1.2632590676912253e-07, + "loss": 1.7616, + "step": 20140500 + }, + { + "epoch": 99.79, + "learning_rate": 1.250873203430389e-07, + "loss": 1.7777, + "step": 20141000 + }, + { + "epoch": 99.79, + "learning_rate": 1.2384873391695528e-07, + "loss": 1.7897, + "step": 20141500 + }, + { + "epoch": 99.79, + "learning_rate": 1.2261014749087162e-07, + "loss": 1.7495, + "step": 20142000 + }, + { + "epoch": 99.79, + "learning_rate": 1.2137403823764014e-07, + "loss": 1.7902, + "step": 20142500 + }, + { + "epoch": 99.8, + "learning_rate": 1.2014040615726086e-07, + "loss": 1.7714, + "step": 20143000 + }, + { + "epoch": 99.8, + "learning_rate": 1.189018197311772e-07, + "loss": 1.7471, + "step": 20143500 + }, + { + "epoch": 99.8, + "learning_rate": 1.1766323330509358e-07, + "loss": 1.781, + "step": 20144000 + }, + { + "epoch": 99.8, + "learning_rate": 1.1642464687900993e-07, + "loss": 1.7569, + "step": 20144500 + }, + { + "epoch": 99.81, + "learning_rate": 1.1518606045292628e-07, + "loss": 1.7655, + "step": 20145000 + }, + { + "epoch": 99.81, + "learning_rate": 1.1394747402684267e-07, + "loss": 1.7711, + "step": 20145500 + }, + { + "epoch": 99.81, + "learning_rate": 1.1270888760075901e-07, + "loss": 1.7743, + "step": 20146000 + }, + { + "epoch": 99.81, + "learning_rate": 1.1147277834752754e-07, + "loss": 1.7382, + "step": 20146500 + }, + { + "epoch": 99.82, + "learning_rate": 1.1023419192144389e-07, + "loss": 1.7757, + "step": 20147000 + }, + { + "epoch": 99.82, + "learning_rate": 1.0899560549536027e-07, + "loss": 1.8014, + "step": 20147500 + }, + { + "epoch": 99.82, + "learning_rate": 1.0775701906927662e-07, + "loss": 1.7693, + "step": 20148000 + }, + { + "epoch": 99.82, + "learning_rate": 1.0651843264319298e-07, + "loss": 1.7625, + "step": 20148500 + }, + { + "epoch": 99.83, + "learning_rate": 1.0527984621710935e-07, + "loss": 1.7743, + "step": 20149000 + }, + { + "epoch": 99.83, + "learning_rate": 1.0404125979102571e-07, + "loss": 1.774, + "step": 20149500 + }, + { + "epoch": 99.83, + "learning_rate": 1.0280267336494206e-07, + "loss": 1.7929, + "step": 20150000 + }, + { + "epoch": 99.83, + "learning_rate": 1.0156408693885843e-07, + "loss": 1.7587, + "step": 20150500 + }, + { + "epoch": 99.84, + "learning_rate": 1.0032550051277479e-07, + "loss": 1.7666, + "step": 20151000 + }, + { + "epoch": 99.84, + "learning_rate": 9.908691408669115e-08, + "loss": 1.7434, + "step": 20151500 + }, + { + "epoch": 99.84, + "learning_rate": 9.784832766060752e-08, + "loss": 1.7647, + "step": 20152000 + }, + { + "epoch": 99.84, + "learning_rate": 9.660974123452386e-08, + "loss": 1.7685, + "step": 20152500 + }, + { + "epoch": 99.84, + "learning_rate": 9.537610915414456e-08, + "loss": 1.765, + "step": 20153000 + }, + { + "epoch": 99.85, + "learning_rate": 9.413752272806093e-08, + "loss": 1.7778, + "step": 20153500 + }, + { + "epoch": 99.85, + "learning_rate": 9.289893630197729e-08, + "loss": 1.7707, + "step": 20154000 + }, + { + "epoch": 99.85, + "learning_rate": 9.166034987589365e-08, + "loss": 1.7639, + "step": 20154500 + }, + { + "epoch": 99.85, + "learning_rate": 9.042176344981e-08, + "loss": 1.77, + "step": 20155000 + }, + { + "epoch": 99.86, + "learning_rate": 8.918317702372636e-08, + "loss": 1.7732, + "step": 20155500 + }, + { + "epoch": 99.86, + "learning_rate": 8.794459059764272e-08, + "loss": 1.7903, + "step": 20156000 + }, + { + "epoch": 99.86, + "learning_rate": 8.670848134441126e-08, + "loss": 1.7838, + "step": 20156500 + }, + { + "epoch": 99.86, + "learning_rate": 8.546989491832761e-08, + "loss": 1.7762, + "step": 20157000 + }, + { + "epoch": 99.87, + "learning_rate": 8.423130849224399e-08, + "loss": 1.7784, + "step": 20157500 + }, + { + "epoch": 99.87, + "learning_rate": 8.299272206616033e-08, + "loss": 1.7695, + "step": 20158000 + }, + { + "epoch": 99.87, + "learning_rate": 8.17541356400767e-08, + "loss": 1.7672, + "step": 20158500 + }, + { + "epoch": 99.87, + "learning_rate": 8.051554921399306e-08, + "loss": 1.7813, + "step": 20159000 + }, + { + "epoch": 99.88, + "learning_rate": 7.927696278790942e-08, + "loss": 1.7601, + "step": 20159500 + }, + { + "epoch": 99.88, + "learning_rate": 7.803837636182578e-08, + "loss": 1.7801, + "step": 20160000 + }, + { + "epoch": 99.88, + "learning_rate": 7.680226710859431e-08, + "loss": 1.7729, + "step": 20160500 + }, + { + "epoch": 99.88, + "learning_rate": 7.556368068251067e-08, + "loss": 1.7913, + "step": 20161000 + }, + { + "epoch": 99.89, + "learning_rate": 7.432509425642703e-08, + "loss": 1.7757, + "step": 20161500 + }, + { + "epoch": 99.89, + "learning_rate": 7.308898500319555e-08, + "loss": 1.7638, + "step": 20162000 + }, + { + "epoch": 99.89, + "learning_rate": 7.185287574996408e-08, + "loss": 1.7614, + "step": 20162500 + }, + { + "epoch": 99.89, + "learning_rate": 7.061428932388044e-08, + "loss": 1.7469, + "step": 20163000 + }, + { + "epoch": 99.9, + "learning_rate": 6.937570289779681e-08, + "loss": 1.7518, + "step": 20163500 + }, + { + "epoch": 99.9, + "learning_rate": 6.813711647171316e-08, + "loss": 1.7711, + "step": 20164000 + }, + { + "epoch": 99.9, + "learning_rate": 6.690100721848169e-08, + "loss": 1.7831, + "step": 20164500 + }, + { + "epoch": 99.9, + "learning_rate": 6.566489796525022e-08, + "loss": 1.8008, + "step": 20165000 + }, + { + "epoch": 99.91, + "learning_rate": 6.442631153916658e-08, + "loss": 1.754, + "step": 20165500 + }, + { + "epoch": 99.91, + "learning_rate": 6.318772511308295e-08, + "loss": 1.801, + "step": 20166000 + }, + { + "epoch": 99.91, + "learning_rate": 6.19491386869993e-08, + "loss": 1.7811, + "step": 20166500 + }, + { + "epoch": 99.91, + "learning_rate": 6.071055226091566e-08, + "loss": 1.7753, + "step": 20167000 + }, + { + "epoch": 99.92, + "learning_rate": 5.947196583483203e-08, + "loss": 1.7909, + "step": 20167500 + }, + { + "epoch": 99.92, + "learning_rate": 5.823337940874838e-08, + "loss": 1.752, + "step": 20168000 + }, + { + "epoch": 99.92, + "learning_rate": 5.699727015551691e-08, + "loss": 1.7863, + "step": 20168500 + }, + { + "epoch": 99.92, + "learning_rate": 5.575868372943327e-08, + "loss": 1.7745, + "step": 20169000 + }, + { + "epoch": 99.93, + "learning_rate": 5.452009730334964e-08, + "loss": 1.7339, + "step": 20169500 + }, + { + "epoch": 99.93, + "learning_rate": 5.328151087726599e-08, + "loss": 1.7639, + "step": 20170000 + }, + { + "epoch": 99.93, + "learning_rate": 5.204292445118236e-08, + "loss": 1.7692, + "step": 20170500 + }, + { + "epoch": 99.93, + "learning_rate": 5.080433802509872e-08, + "loss": 1.7791, + "step": 20171000 + }, + { + "epoch": 99.94, + "learning_rate": 4.9565751599015075e-08, + "loss": 1.7527, + "step": 20171500 + }, + { + "epoch": 99.94, + "learning_rate": 4.832716517293144e-08, + "loss": 1.7679, + "step": 20172000 + }, + { + "epoch": 99.94, + "learning_rate": 4.70885787468478e-08, + "loss": 1.7747, + "step": 20172500 + }, + { + "epoch": 99.94, + "learning_rate": 4.584999232076416e-08, + "loss": 1.762, + "step": 20173000 + }, + { + "epoch": 99.95, + "learning_rate": 4.461140589468052e-08, + "loss": 1.7717, + "step": 20173500 + }, + { + "epoch": 99.95, + "learning_rate": 4.337529664144905e-08, + "loss": 1.7703, + "step": 20174000 + }, + { + "epoch": 99.95, + "learning_rate": 4.2136710215365415e-08, + "loss": 1.7513, + "step": 20174500 + }, + { + "epoch": 99.95, + "learning_rate": 4.0898123789281774e-08, + "loss": 1.7769, + "step": 20175000 + }, + { + "epoch": 99.96, + "learning_rate": 3.965953736319813e-08, + "loss": 1.7845, + "step": 20175500 + }, + { + "epoch": 99.96, + "learning_rate": 3.842095093711449e-08, + "loss": 1.7709, + "step": 20176000 + }, + { + "epoch": 99.96, + "learning_rate": 3.718236451103085e-08, + "loss": 1.7733, + "step": 20176500 + }, + { + "epoch": 99.96, + "learning_rate": 3.594377808494721e-08, + "loss": 1.7737, + "step": 20177000 + }, + { + "epoch": 99.97, + "learning_rate": 3.4705191658863576e-08, + "loss": 1.7951, + "step": 20177500 + }, + { + "epoch": 99.97, + "learning_rate": 3.3466605232779935e-08, + "loss": 1.7738, + "step": 20178000 + }, + { + "epoch": 99.97, + "learning_rate": 3.223049597954846e-08, + "loss": 1.7832, + "step": 20178500 + }, + { + "epoch": 99.97, + "learning_rate": 3.099190955346482e-08, + "loss": 1.7636, + "step": 20179000 + }, + { + "epoch": 99.98, + "learning_rate": 2.9753323127381184e-08, + "loss": 1.7692, + "step": 20179500 + }, + { + "epoch": 99.98, + "learning_rate": 2.8514736701297543e-08, + "loss": 1.7525, + "step": 20180000 + }, + { + "epoch": 99.98, + "learning_rate": 2.7276150275213902e-08, + "loss": 1.7682, + "step": 20180500 + }, + { + "epoch": 99.98, + "learning_rate": 2.6037563849130268e-08, + "loss": 1.7772, + "step": 20181000 + }, + { + "epoch": 99.99, + "learning_rate": 2.4801454595898796e-08, + "loss": 1.7848, + "step": 20181500 + }, + { + "epoch": 99.99, + "learning_rate": 2.356534534266732e-08, + "loss": 1.7652, + "step": 20182000 + }, + { + "epoch": 99.99, + "learning_rate": 2.232675891658368e-08, + "loss": 1.758, + "step": 20182500 + }, + { + "epoch": 99.99, + "learning_rate": 2.1088172490500043e-08, + "loss": 1.7551, + "step": 20183000 + }, + { + "epoch": 100.0, + "learning_rate": 1.9849586064416405e-08, + "loss": 1.7859, + "step": 20183500 + }, + { + "epoch": 100.0, + "learning_rate": 1.8610999638332764e-08, + "loss": 1.785, + "step": 20184000 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.6880450819643994, + "eval_accuracy_mlm": 0.6495895431712034, + "eval_accuracy_nsp": 0.869280943210477, + "eval_loss": 2.31811785697937, + "eval_runtime": 147.0596, + "eval_samples_per_second": 1733.712, + "eval_steps_per_second": 72.243, + "step": 20184300 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 2.3780546655767814e+19, + "total_flos": 2.6132554735491883e+19, "trial_name": null, "trial_params": null }