{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9701281377581955, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.9457994579945803e-05, "loss": 4.3763, "step": 5 }, { "epoch": 0.08, "learning_rate": 4.878048780487805e-05, "loss": 3.8394, "step": 10 }, { "epoch": 0.12, "learning_rate": 4.81029810298103e-05, "loss": 3.639, "step": 15 }, { "epoch": 0.16, "learning_rate": 4.7425474254742554e-05, "loss": 3.7962, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.6747967479674795e-05, "loss": 3.5152, "step": 25 }, { "epoch": 0.24, "learning_rate": 4.607046070460705e-05, "loss": 3.5752, "step": 30 }, { "epoch": 0.28, "learning_rate": 4.53929539295393e-05, "loss": 3.4463, "step": 35 }, { "epoch": 0.32, "learning_rate": 4.4715447154471546e-05, "loss": 3.5218, "step": 40 }, { "epoch": 0.32, "eval_loss": 3.2419066429138184, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 180.4791, "eval_samples_per_second": 0.15, "eval_steps_per_second": 0.078, "step": 40 }, { "epoch": 0.36, "learning_rate": 4.4037940379403794e-05, "loss": 3.5264, "step": 45 }, { "epoch": 0.4, "learning_rate": 4.336043360433605e-05, "loss": 3.6286, "step": 50 }, { "epoch": 0.44, "learning_rate": 4.26829268292683e-05, "loss": 3.4611, "step": 55 }, { "epoch": 0.48, "learning_rate": 4.2005420054200545e-05, "loss": 3.9319, "step": 60 }, { "epoch": 0.53, "learning_rate": 4.132791327913279e-05, "loss": 3.7751, "step": 65 }, { "epoch": 0.57, "learning_rate": 4.065040650406504e-05, "loss": 3.6241, "step": 70 }, { "epoch": 0.61, "learning_rate": 3.9972899728997295e-05, "loss": 3.3906, "step": 75 }, { "epoch": 0.65, "learning_rate": 3.9295392953929537e-05, "loss": 3.1815, "step": 80 }, { "epoch": 0.65, "eval_loss": 3.189028739929199, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 265.9332, "eval_samples_per_second": 0.102, "eval_steps_per_second": 0.053, "step": 80 }, { "epoch": 0.69, "learning_rate": 3.861788617886179e-05, "loss": 3.3673, "step": 85 }, { "epoch": 0.73, "learning_rate": 3.794037940379404e-05, "loss": 3.3792, "step": 90 }, { "epoch": 0.77, "learning_rate": 3.726287262872629e-05, "loss": 3.5732, "step": 95 }, { "epoch": 0.81, "learning_rate": 3.6585365853658535e-05, "loss": 3.2772, "step": 100 }, { "epoch": 0.85, "learning_rate": 3.6043360433604336e-05, "loss": 3.3803, "step": 105 }, { "epoch": 0.89, "learning_rate": 3.5365853658536584e-05, "loss": 3.2597, "step": 110 }, { "epoch": 0.93, "learning_rate": 3.468834688346884e-05, "loss": 3.4085, "step": 115 }, { "epoch": 0.97, "learning_rate": 3.401084010840109e-05, "loss": 3.2319, "step": 120 }, { "epoch": 0.97, "eval_loss": 3.166618824005127, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 433.3435, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.032, "step": 120 }, { "epoch": 1.01, "learning_rate": 3.3333333333333335e-05, "loss": 3.6139, "step": 125 }, { "epoch": 1.05, "learning_rate": 3.265582655826558e-05, "loss": 3.5873, "step": 130 }, { "epoch": 1.09, "learning_rate": 3.197831978319784e-05, "loss": 3.3622, "step": 135 }, { "epoch": 1.13, "learning_rate": 3.130081300813008e-05, "loss": 3.4854, "step": 140 }, { "epoch": 1.17, "learning_rate": 3.0623306233062334e-05, "loss": 3.4429, "step": 145 }, { "epoch": 1.21, "learning_rate": 2.9945799457994585e-05, "loss": 3.1618, "step": 150 }, { "epoch": 1.25, "learning_rate": 2.926829268292683e-05, "loss": 3.428, "step": 155 }, { "epoch": 1.29, "learning_rate": 2.859078590785908e-05, "loss": 3.2305, "step": 160 }, { "epoch": 1.29, "eval_loss": 3.152249336242676, "eval_rouge2_fmeasure": 0.0037, "eval_rouge2_precision": 0.0062, "eval_rouge2_recall": 0.0026, "eval_runtime": 372.2318, "eval_samples_per_second": 0.073, "eval_steps_per_second": 0.038, "step": 160 }, { "epoch": 1.33, "learning_rate": 2.7913279132791332e-05, "loss": 3.0798, "step": 165 }, { "epoch": 1.37, "learning_rate": 2.7235772357723577e-05, "loss": 3.2169, "step": 170 }, { "epoch": 1.41, "learning_rate": 2.6558265582655828e-05, "loss": 3.4364, "step": 175 }, { "epoch": 1.45, "learning_rate": 2.588075880758808e-05, "loss": 3.2353, "step": 180 }, { "epoch": 1.49, "learning_rate": 2.5203252032520324e-05, "loss": 3.4389, "step": 185 }, { "epoch": 1.54, "learning_rate": 2.4525745257452575e-05, "loss": 3.2847, "step": 190 }, { "epoch": 1.58, "learning_rate": 2.3848238482384823e-05, "loss": 3.0442, "step": 195 }, { "epoch": 1.62, "learning_rate": 2.3170731707317075e-05, "loss": 3.2043, "step": 200 }, { "epoch": 1.62, "eval_loss": 3.1420483589172363, "eval_rouge2_fmeasure": 0.0032, "eval_rouge2_precision": 0.0041, "eval_rouge2_recall": 0.0026, "eval_runtime": 417.3311, "eval_samples_per_second": 0.065, "eval_steps_per_second": 0.034, "step": 200 }, { "epoch": 1.66, "learning_rate": 2.2493224932249323e-05, "loss": 3.4208, "step": 205 }, { "epoch": 1.7, "learning_rate": 2.181571815718157e-05, "loss": 3.7157, "step": 210 }, { "epoch": 1.74, "learning_rate": 2.1138211382113822e-05, "loss": 3.0534, "step": 215 }, { "epoch": 1.78, "learning_rate": 2.046070460704607e-05, "loss": 3.2667, "step": 220 }, { "epoch": 1.82, "learning_rate": 1.978319783197832e-05, "loss": 3.3489, "step": 225 }, { "epoch": 1.86, "learning_rate": 1.9105691056910573e-05, "loss": 3.3139, "step": 230 }, { "epoch": 1.9, "learning_rate": 1.842818428184282e-05, "loss": 3.3155, "step": 235 }, { "epoch": 1.94, "learning_rate": 1.775067750677507e-05, "loss": 3.603, "step": 240 }, { "epoch": 1.94, "eval_loss": 3.135263681411743, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 517.2838, "eval_samples_per_second": 0.052, "eval_steps_per_second": 0.027, "step": 240 }, { "epoch": 1.99, "learning_rate": 1.707317073170732e-05, "loss": 3.4609, "step": 245 }, { "epoch": 2.03, "learning_rate": 1.6395663956639568e-05, "loss": 3.2715, "step": 250 }, { "epoch": 2.07, "learning_rate": 1.5718157181571816e-05, "loss": 3.91, "step": 255 }, { "epoch": 2.11, "learning_rate": 1.5040650406504067e-05, "loss": 3.0947, "step": 260 }, { "epoch": 2.15, "learning_rate": 1.4498644986449867e-05, "loss": 3.2667, "step": 265 }, { "epoch": 2.19, "learning_rate": 1.3821138211382115e-05, "loss": 3.5289, "step": 270 }, { "epoch": 2.23, "learning_rate": 1.3143631436314363e-05, "loss": 3.3016, "step": 275 }, { "epoch": 2.27, "learning_rate": 1.2466124661246612e-05, "loss": 3.2669, "step": 280 }, { "epoch": 2.27, "eval_loss": 3.130876064300537, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 441.1701, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.032, "step": 280 }, { "epoch": 2.31, "learning_rate": 1.1788617886178862e-05, "loss": 3.0182, "step": 285 }, { "epoch": 2.35, "learning_rate": 1.1111111111111112e-05, "loss": 3.4221, "step": 290 }, { "epoch": 2.39, "learning_rate": 1.0433604336043361e-05, "loss": 3.1717, "step": 295 }, { "epoch": 2.43, "learning_rate": 9.756097560975611e-06, "loss": 3.309, "step": 300 }, { "epoch": 2.47, "learning_rate": 9.078590785907859e-06, "loss": 3.215, "step": 305 }, { "epoch": 2.51, "learning_rate": 8.401084010840109e-06, "loss": 3.2693, "step": 310 }, { "epoch": 2.55, "learning_rate": 7.723577235772358e-06, "loss": 3.3973, "step": 315 }, { "epoch": 2.59, "learning_rate": 7.046070460704607e-06, "loss": 3.3138, "step": 320 }, { "epoch": 2.59, "eval_loss": 3.127232074737549, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 495.8047, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.028, "step": 320 }, { "epoch": 2.63, "learning_rate": 6.368563685636857e-06, "loss": 3.5124, "step": 325 }, { "epoch": 2.67, "learning_rate": 5.6910569105691056e-06, "loss": 3.128, "step": 330 }, { "epoch": 2.71, "learning_rate": 5.013550135501355e-06, "loss": 3.6112, "step": 335 }, { "epoch": 2.75, "learning_rate": 4.336043360433605e-06, "loss": 3.0695, "step": 340 }, { "epoch": 2.79, "learning_rate": 3.6585365853658537e-06, "loss": 3.4197, "step": 345 }, { "epoch": 2.83, "learning_rate": 3.116531165311653e-06, "loss": 3.2216, "step": 350 }, { "epoch": 2.87, "learning_rate": 2.4390243902439027e-06, "loss": 3.1863, "step": 355 }, { "epoch": 2.92, "learning_rate": 1.7615176151761518e-06, "loss": 3.3628, "step": 360 }, { "epoch": 2.92, "eval_loss": 3.125962972640991, "eval_rouge2_fmeasure": 0.0, "eval_rouge2_precision": 0.0, "eval_rouge2_recall": 0.0, "eval_runtime": 439.7386, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.032, "step": 360 }, { "epoch": 0.03, "learning_rate": 4.951491632306573e-05, "loss": 3.6245, "step": 365 }, { "epoch": 0.03, "learning_rate": 4.950817904977498e-05, "loss": 3.3963, "step": 370 }, { "epoch": 0.03, "learning_rate": 4.950144177648422e-05, "loss": 3.1854, "step": 375 }, { "epoch": 0.03, "learning_rate": 4.9494704503193465e-05, "loss": 3.5305, "step": 380 }, { "epoch": 0.03, "learning_rate": 4.9487967229902713e-05, "loss": 3.3036, "step": 385 }, { "epoch": 0.03, "learning_rate": 4.948122995661196e-05, "loss": 3.8064, "step": 390 }, { "epoch": 0.03, "learning_rate": 4.947449268332121e-05, "loss": 3.3977, "step": 395 }, { "epoch": 0.03, "learning_rate": 4.946775541003045e-05, "loss": 3.453, "step": 400 }, { "epoch": 0.03, "learning_rate": 4.94610181367397e-05, "loss": 3.1789, "step": 405 }, { "epoch": 0.03, "learning_rate": 4.945428086344895e-05, "loss": 3.3437, "step": 410 }, { "epoch": 0.03, "learning_rate": 4.944754359015819e-05, "loss": 3.1314, "step": 415 }, { "epoch": 0.03, "learning_rate": 4.9440806316867435e-05, "loss": 3.5475, "step": 420 }, { "epoch": 0.03, "learning_rate": 4.9434069043576684e-05, "loss": 3.145, "step": 425 }, { "epoch": 0.03, "learning_rate": 4.942733177028593e-05, "loss": 3.3634, "step": 430 }, { "epoch": 0.04, "learning_rate": 4.9420594496995175e-05, "loss": 3.2653, "step": 435 }, { "epoch": 0.04, "learning_rate": 4.9413857223704424e-05, "loss": 3.4231, "step": 440 }, { "epoch": 0.04, "learning_rate": 4.940711995041367e-05, "loss": 3.4497, "step": 445 }, { "epoch": 0.04, "learning_rate": 4.940173013178107e-05, "loss": 3.6448, "step": 450 }, { "epoch": 0.04, "learning_rate": 4.939499285849031e-05, "loss": 3.1871, "step": 455 }, { "epoch": 0.04, "learning_rate": 4.938825558519956e-05, "loss": 3.0073, "step": 460 }, { "epoch": 0.04, "learning_rate": 4.938151831190881e-05, "loss": 3.2752, "step": 465 }, { "epoch": 0.04, "learning_rate": 4.937478103861805e-05, "loss": 3.3738, "step": 470 }, { "epoch": 0.04, "learning_rate": 4.93680437653273e-05, "loss": 3.1156, "step": 475 }, { "epoch": 0.04, "learning_rate": 4.936130649203655e-05, "loss": 3.089, "step": 480 }, { "epoch": 0.04, "learning_rate": 4.9354569218745796e-05, "loss": 3.2361, "step": 485 }, { "epoch": 0.04, "learning_rate": 4.934783194545504e-05, "loss": 3.0767, "step": 490 }, { "epoch": 0.04, "learning_rate": 4.934109467216429e-05, "loss": 3.4057, "step": 495 }, { "epoch": 0.04, "learning_rate": 4.933435739887353e-05, "loss": 2.9778, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.932762012558277e-05, "loss": 3.1364, "step": 505 }, { "epoch": 0.04, "learning_rate": 4.932088285229202e-05, "loss": 3.5782, "step": 510 }, { "epoch": 0.04, "learning_rate": 4.931414557900127e-05, "loss": 3.056, "step": 515 }, { "epoch": 0.04, "learning_rate": 4.930740830571052e-05, "loss": 3.4149, "step": 520 }, { "epoch": 0.04, "learning_rate": 4.930067103241976e-05, "loss": 3.2642, "step": 525 }, { "epoch": 0.04, "learning_rate": 4.929393375912901e-05, "loss": 3.0618, "step": 530 }, { "epoch": 0.04, "learning_rate": 4.928719648583826e-05, "loss": 3.2814, "step": 535 }, { "epoch": 0.04, "learning_rate": 4.92804592125475e-05, "loss": 3.0362, "step": 540 }, { "epoch": 0.04, "learning_rate": 4.927372193925674e-05, "loss": 3.4842, "step": 545 }, { "epoch": 0.04, "learning_rate": 4.926698466596599e-05, "loss": 3.2595, "step": 550 }, { "epoch": 0.04, "learning_rate": 4.926024739267524e-05, "loss": 3.2634, "step": 555 }, { "epoch": 0.05, "learning_rate": 4.925351011938448e-05, "loss": 3.492, "step": 560 }, { "epoch": 0.05, "learning_rate": 4.924677284609373e-05, "loss": 3.5355, "step": 565 }, { "epoch": 0.05, "learning_rate": 4.924003557280298e-05, "loss": 3.3369, "step": 570 }, { "epoch": 0.05, "learning_rate": 4.923329829951223e-05, "loss": 3.3085, "step": 575 }, { "epoch": 0.05, "learning_rate": 4.922656102622147e-05, "loss": 3.0434, "step": 580 }, { "epoch": 0.05, "learning_rate": 4.921982375293071e-05, "loss": 3.1722, "step": 585 }, { "epoch": 0.05, "learning_rate": 4.921308647963996e-05, "loss": 3.2637, "step": 590 }, { "epoch": 0.05, "learning_rate": 4.9206349206349204e-05, "loss": 3.5246, "step": 595 }, { "epoch": 0.05, "learning_rate": 4.919961193305845e-05, "loss": 3.5051, "step": 600 }, { "epoch": 0.05, "learning_rate": 4.91928746597677e-05, "loss": 3.2346, "step": 605 }, { "epoch": 0.05, "learning_rate": 4.918613738647695e-05, "loss": 3.2653, "step": 610 }, { "epoch": 0.05, "learning_rate": 4.917940011318619e-05, "loss": 3.1633, "step": 615 }, { "epoch": 0.05, "learning_rate": 4.917266283989544e-05, "loss": 3.0785, "step": 620 }, { "epoch": 0.05, "learning_rate": 4.916592556660469e-05, "loss": 3.1973, "step": 625 }, { "epoch": 0.05, "learning_rate": 4.915918829331393e-05, "loss": 3.2443, "step": 630 }, { "epoch": 0.05, "learning_rate": 4.9152451020023175e-05, "loss": 3.3169, "step": 635 }, { "epoch": 0.05, "learning_rate": 4.9145713746732423e-05, "loss": 3.4131, "step": 640 }, { "epoch": 0.05, "learning_rate": 4.913897647344167e-05, "loss": 3.2112, "step": 645 }, { "epoch": 0.05, "learning_rate": 4.9132239200150914e-05, "loss": 2.9755, "step": 650 }, { "epoch": 0.05, "learning_rate": 4.912550192686016e-05, "loss": 3.5015, "step": 655 }, { "epoch": 0.05, "learning_rate": 4.911876465356941e-05, "loss": 3.2172, "step": 660 }, { "epoch": 0.05, "learning_rate": 4.911202738027866e-05, "loss": 3.0727, "step": 665 }, { "epoch": 0.05, "learning_rate": 4.91052901069879e-05, "loss": 3.1883, "step": 670 }, { "epoch": 0.05, "learning_rate": 4.9098552833697145e-05, "loss": 3.6434, "step": 675 }, { "epoch": 0.05, "learning_rate": 4.9091815560406394e-05, "loss": 3.582, "step": 680 }, { "epoch": 0.06, "learning_rate": 4.908507828711564e-05, "loss": 3.6279, "step": 685 }, { "epoch": 0.06, "learning_rate": 4.9078341013824885e-05, "loss": 3.4427, "step": 690 }, { "epoch": 0.06, "learning_rate": 4.9071603740534134e-05, "loss": 3.2696, "step": 695 }, { "epoch": 0.06, "learning_rate": 4.906486646724338e-05, "loss": 3.2136, "step": 700 }, { "epoch": 0.06, "learning_rate": 4.9058129193952625e-05, "loss": 3.4226, "step": 705 }, { "epoch": 0.06, "learning_rate": 4.9051391920661874e-05, "loss": 3.0063, "step": 710 }, { "epoch": 0.06, "learning_rate": 4.9044654647371116e-05, "loss": 3.2971, "step": 715 }, { "epoch": 0.06, "learning_rate": 4.9037917374080365e-05, "loss": 3.4248, "step": 720 }, { "epoch": 0.06, "learning_rate": 4.903118010078961e-05, "loss": 3.3834, "step": 725 }, { "epoch": 0.06, "learning_rate": 4.9024442827498856e-05, "loss": 3.3403, "step": 730 }, { "epoch": 0.06, "learning_rate": 4.9017705554208105e-05, "loss": 3.1636, "step": 735 }, { "epoch": 0.06, "learning_rate": 4.9010968280917353e-05, "loss": 3.2906, "step": 740 }, { "epoch": 0.06, "learning_rate": 4.9004231007626596e-05, "loss": 3.2874, "step": 745 }, { "epoch": 0.06, "learning_rate": 4.8997493734335844e-05, "loss": 3.1831, "step": 750 }, { "epoch": 0.06, "learning_rate": 4.8990756461045086e-05, "loss": 3.2486, "step": 755 }, { "epoch": 0.06, "learning_rate": 4.898401918775433e-05, "loss": 3.2148, "step": 760 }, { "epoch": 0.06, "learning_rate": 4.897728191446358e-05, "loss": 3.2995, "step": 765 }, { "epoch": 0.06, "learning_rate": 4.8970544641172826e-05, "loss": 3.3068, "step": 770 }, { "epoch": 0.06, "learning_rate": 4.8963807367882075e-05, "loss": 3.3789, "step": 775 }, { "epoch": 0.06, "learning_rate": 4.895707009459132e-05, "loss": 3.2501, "step": 780 }, { "epoch": 0.06, "learning_rate": 4.8950332821300566e-05, "loss": 3.3165, "step": 785 }, { "epoch": 0.06, "learning_rate": 4.8943595548009815e-05, "loss": 3.2548, "step": 790 }, { "epoch": 0.06, "learning_rate": 4.8936858274719064e-05, "loss": 3.0099, "step": 795 }, { "epoch": 0.06, "learning_rate": 4.8930121001428306e-05, "loss": 3.466, "step": 800 }, { "epoch": 0.07, "learning_rate": 4.892338372813755e-05, "loss": 3.106, "step": 805 }, { "epoch": 0.07, "learning_rate": 4.89166464548468e-05, "loss": 3.4483, "step": 810 }, { "epoch": 0.07, "learning_rate": 4.890990918155604e-05, "loss": 3.0073, "step": 815 }, { "epoch": 0.07, "learning_rate": 4.890317190826529e-05, "loss": 3.1426, "step": 820 }, { "epoch": 0.07, "learning_rate": 4.889643463497454e-05, "loss": 3.2155, "step": 825 }, { "epoch": 0.07, "learning_rate": 4.8889697361683786e-05, "loss": 3.2883, "step": 830 }, { "epoch": 0.07, "learning_rate": 4.888296008839303e-05, "loss": 3.3822, "step": 835 }, { "epoch": 0.07, "learning_rate": 4.887622281510228e-05, "loss": 3.1398, "step": 840 }, { "epoch": 0.07, "learning_rate": 4.886948554181152e-05, "loss": 3.252, "step": 845 }, { "epoch": 0.07, "learning_rate": 4.886274826852077e-05, "loss": 3.0845, "step": 850 }, { "epoch": 0.07, "learning_rate": 4.885601099523001e-05, "loss": 3.4627, "step": 855 }, { "epoch": 0.07, "learning_rate": 4.884927372193926e-05, "loss": 3.1775, "step": 860 }, { "epoch": 0.07, "learning_rate": 4.884253644864851e-05, "loss": 3.5839, "step": 865 }, { "epoch": 0.07, "learning_rate": 4.883579917535775e-05, "loss": 3.1501, "step": 870 }, { "epoch": 0.07, "learning_rate": 4.8829061902067e-05, "loss": 3.3487, "step": 875 }, { "epoch": 0.07, "learning_rate": 4.882232462877625e-05, "loss": 3.1447, "step": 880 }, { "epoch": 0.07, "learning_rate": 4.881558735548549e-05, "loss": 3.0964, "step": 885 }, { "epoch": 0.07, "learning_rate": 4.880885008219473e-05, "loss": 3.3593, "step": 890 }, { "epoch": 0.07, "learning_rate": 4.880211280890398e-05, "loss": 3.0965, "step": 895 }, { "epoch": 0.07, "learning_rate": 4.879537553561323e-05, "loss": 3.0485, "step": 900 }, { "epoch": 0.07, "learning_rate": 4.878863826232247e-05, "loss": 3.497, "step": 905 }, { "epoch": 0.07, "learning_rate": 4.878190098903172e-05, "loss": 3.2529, "step": 910 }, { "epoch": 0.07, "learning_rate": 4.877516371574097e-05, "loss": 3.2689, "step": 915 }, { "epoch": 0.07, "learning_rate": 4.876842644245022e-05, "loss": 3.2244, "step": 920 }, { "epoch": 0.07, "learning_rate": 4.876168916915946e-05, "loss": 3.3559, "step": 925 }, { "epoch": 0.08, "learning_rate": 4.875495189586871e-05, "loss": 3.1012, "step": 930 }, { "epoch": 0.08, "learning_rate": 4.8749562077236104e-05, "loss": 2.9742, "step": 935 }, { "epoch": 0.08, "learning_rate": 4.8742824803945346e-05, "loss": 3.3356, "step": 940 }, { "epoch": 0.08, "learning_rate": 4.8736087530654595e-05, "loss": 3.0625, "step": 945 }, { "epoch": 0.08, "learning_rate": 4.8729350257363844e-05, "loss": 3.8974, "step": 950 }, { "epoch": 0.08, "learning_rate": 4.872261298407309e-05, "loss": 3.2096, "step": 955 }, { "epoch": 0.08, "learning_rate": 4.8715875710782335e-05, "loss": 3.32, "step": 960 }, { "epoch": 0.08, "learning_rate": 4.8709138437491584e-05, "loss": 3.3169, "step": 965 }, { "epoch": 0.08, "learning_rate": 4.8702401164200826e-05, "loss": 3.2782, "step": 970 }, { "epoch": 0.08, "learning_rate": 4.8695663890910075e-05, "loss": 3.7628, "step": 975 }, { "epoch": 0.08, "learning_rate": 4.868892661761932e-05, "loss": 3.3816, "step": 980 }, { "epoch": 0.08, "learning_rate": 4.8682189344328566e-05, "loss": 3.3809, "step": 985 }, { "epoch": 0.08, "learning_rate": 4.8675452071037814e-05, "loss": 3.5458, "step": 990 }, { "epoch": 0.08, "learning_rate": 4.8668714797747057e-05, "loss": 3.3134, "step": 995 }, { "epoch": 0.08, "learning_rate": 4.8661977524456305e-05, "loss": 3.062, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.8655240251165554e-05, "loss": 3.0732, "step": 1005 }, { "epoch": 0.08, "learning_rate": 4.8648502977874796e-05, "loss": 3.325, "step": 1010 }, { "epoch": 0.08, "learning_rate": 4.864176570458404e-05, "loss": 3.1344, "step": 1015 }, { "epoch": 0.08, "learning_rate": 4.863502843129329e-05, "loss": 3.4551, "step": 1020 }, { "epoch": 0.08, "learning_rate": 4.8628291158002536e-05, "loss": 3.0637, "step": 1025 }, { "epoch": 0.08, "learning_rate": 4.862155388471178e-05, "loss": 3.3949, "step": 1030 }, { "epoch": 0.08, "learning_rate": 4.861481661142103e-05, "loss": 3.172, "step": 1035 }, { "epoch": 0.08, "learning_rate": 4.8608079338130276e-05, "loss": 3.1154, "step": 1040 }, { "epoch": 0.08, "learning_rate": 4.8601342064839525e-05, "loss": 3.0203, "step": 1045 }, { "epoch": 0.08, "learning_rate": 4.859460479154877e-05, "loss": 3.3186, "step": 1050 }, { "epoch": 0.09, "learning_rate": 4.858786751825801e-05, "loss": 3.2057, "step": 1055 }, { "epoch": 0.09, "learning_rate": 4.858113024496726e-05, "loss": 3.2119, "step": 1060 }, { "epoch": 0.09, "learning_rate": 4.857439297167651e-05, "loss": 3.0146, "step": 1065 }, { "epoch": 0.09, "learning_rate": 4.856765569838575e-05, "loss": 3.0737, "step": 1070 }, { "epoch": 0.09, "learning_rate": 4.8560918425095e-05, "loss": 3.118, "step": 1075 }, { "epoch": 0.09, "learning_rate": 4.855418115180425e-05, "loss": 3.2081, "step": 1080 }, { "epoch": 0.09, "learning_rate": 4.854744387851349e-05, "loss": 3.2234, "step": 1085 }, { "epoch": 0.09, "learning_rate": 4.854070660522274e-05, "loss": 3.217, "step": 1090 }, { "epoch": 0.09, "learning_rate": 4.8533969331931987e-05, "loss": 3.4233, "step": 1095 }, { "epoch": 0.09, "learning_rate": 4.852723205864123e-05, "loss": 3.3069, "step": 1100 }, { "epoch": 0.09, "learning_rate": 4.852049478535047e-05, "loss": 3.2344, "step": 1105 }, { "epoch": 0.09, "learning_rate": 4.851375751205972e-05, "loss": 3.0722, "step": 1110 }, { "epoch": 0.09, "learning_rate": 4.850702023876897e-05, "loss": 2.9557, "step": 1115 }, { "epoch": 0.09, "learning_rate": 4.850028296547822e-05, "loss": 3.132, "step": 1120 }, { "epoch": 0.09, "learning_rate": 4.849354569218746e-05, "loss": 3.2406, "step": 1125 }, { "epoch": 0.09, "learning_rate": 4.848680841889671e-05, "loss": 3.1061, "step": 1130 }, { "epoch": 0.09, "learning_rate": 4.848007114560596e-05, "loss": 3.2933, "step": 1135 }, { "epoch": 0.09, "learning_rate": 4.84733338723152e-05, "loss": 2.9373, "step": 1140 }, { "epoch": 0.09, "learning_rate": 4.846659659902444e-05, "loss": 3.5017, "step": 1145 }, { "epoch": 0.09, "learning_rate": 4.845985932573369e-05, "loss": 3.1143, "step": 1150 }, { "epoch": 0.09, "learning_rate": 4.845312205244294e-05, "loss": 3.3182, "step": 1155 }, { "epoch": 0.09, "learning_rate": 4.844638477915218e-05, "loss": 3.3364, "step": 1160 }, { "epoch": 0.09, "learning_rate": 4.843964750586143e-05, "loss": 2.8652, "step": 1165 }, { "epoch": 0.09, "learning_rate": 4.843291023257068e-05, "loss": 3.3397, "step": 1170 }, { "epoch": 0.09, "learning_rate": 4.842617295927993e-05, "loss": 3.2569, "step": 1175 }, { "epoch": 0.1, "learning_rate": 4.841943568598917e-05, "loss": 3.1305, "step": 1180 }, { "epoch": 0.1, "learning_rate": 4.841269841269841e-05, "loss": 3.3159, "step": 1185 }, { "epoch": 0.1, "learning_rate": 4.840596113940766e-05, "loss": 3.2887, "step": 1190 }, { "epoch": 0.1, "learning_rate": 4.83992238661169e-05, "loss": 3.4384, "step": 1195 }, { "epoch": 0.1, "learning_rate": 4.839248659282615e-05, "loss": 2.9754, "step": 1200 }, { "epoch": 0.1, "learning_rate": 4.83857493195354e-05, "loss": 3.5198, "step": 1205 }, { "epoch": 0.1, "learning_rate": 4.837901204624465e-05, "loss": 3.1371, "step": 1210 }, { "epoch": 0.1, "learning_rate": 4.837227477295389e-05, "loss": 3.2225, "step": 1215 }, { "epoch": 0.1, "learning_rate": 4.836553749966314e-05, "loss": 3.0097, "step": 1220 }, { "epoch": 0.1, "learning_rate": 4.835880022637238e-05, "loss": 3.2081, "step": 1225 }, { "epoch": 0.1, "learning_rate": 4.835206295308163e-05, "loss": 3.3162, "step": 1230 }, { "epoch": 0.1, "learning_rate": 4.8345325679790874e-05, "loss": 3.1359, "step": 1235 }, { "epoch": 0.1, "learning_rate": 4.833858840650012e-05, "loss": 3.0363, "step": 1240 }, { "epoch": 0.1, "learning_rate": 4.833185113320937e-05, "loss": 3.2238, "step": 1245 }, { "epoch": 0.1, "learning_rate": 4.8325113859918614e-05, "loss": 3.3966, "step": 1250 }, { "epoch": 0.1, "learning_rate": 4.831837658662786e-05, "loss": 3.4219, "step": 1255 }, { "epoch": 0.1, "learning_rate": 4.831163931333711e-05, "loss": 3.2468, "step": 1260 }, { "epoch": 0.1, "learning_rate": 4.830490204004636e-05, "loss": 3.2864, "step": 1265 }, { "epoch": 0.1, "learning_rate": 4.82981647667556e-05, "loss": 3.2543, "step": 1270 }, { "epoch": 0.1, "learning_rate": 4.8291427493464844e-05, "loss": 3.3271, "step": 1275 }, { "epoch": 0.1, "learning_rate": 4.828469022017409e-05, "loss": 3.3202, "step": 1280 }, { "epoch": 0.1, "learning_rate": 4.8277952946883335e-05, "loss": 3.3111, "step": 1285 }, { "epoch": 0.1, "learning_rate": 4.8271215673592584e-05, "loss": 3.2664, "step": 1290 }, { "epoch": 0.1, "learning_rate": 4.826447840030183e-05, "loss": 3.2784, "step": 1295 }, { "epoch": 0.11, "learning_rate": 4.825774112701108e-05, "loss": 3.2833, "step": 1300 }, { "epoch": 0.11, "learning_rate": 4.8251003853720324e-05, "loss": 3.1219, "step": 1305 }, { "epoch": 0.11, "learning_rate": 4.824426658042957e-05, "loss": 3.1704, "step": 1310 }, { "epoch": 0.11, "learning_rate": 4.8237529307138815e-05, "loss": 3.1486, "step": 1315 }, { "epoch": 0.11, "learning_rate": 4.8230792033848064e-05, "loss": 3.2648, "step": 1320 }, { "epoch": 0.11, "learning_rate": 4.8224054760557306e-05, "loss": 3.1059, "step": 1325 }, { "epoch": 0.11, "learning_rate": 4.8217317487266555e-05, "loss": 3.3623, "step": 1330 }, { "epoch": 0.11, "learning_rate": 4.8210580213975804e-05, "loss": 3.5866, "step": 1335 }, { "epoch": 0.11, "learning_rate": 4.8203842940685046e-05, "loss": 3.3165, "step": 1340 }, { "epoch": 0.11, "learning_rate": 4.8197105667394295e-05, "loss": 3.3361, "step": 1345 }, { "epoch": 0.11, "learning_rate": 4.8190368394103544e-05, "loss": 3.3784, "step": 1350 }, { "epoch": 0.11, "learning_rate": 4.8183631120812786e-05, "loss": 2.9877, "step": 1355 }, { "epoch": 0.11, "learning_rate": 4.817689384752203e-05, "loss": 3.5171, "step": 1360 }, { "epoch": 0.11, "learning_rate": 4.817015657423128e-05, "loss": 3.1608, "step": 1365 }, { "epoch": 0.11, "learning_rate": 4.8163419300940526e-05, "loss": 3.1863, "step": 1370 }, { "epoch": 0.11, "learning_rate": 4.8156682027649774e-05, "loss": 3.1035, "step": 1375 }, { "epoch": 0.11, "learning_rate": 4.8149944754359017e-05, "loss": 3.4208, "step": 1380 }, { "epoch": 0.11, "learning_rate": 4.8143207481068265e-05, "loss": 3.0785, "step": 1385 }, { "epoch": 0.11, "learning_rate": 4.8136470207777514e-05, "loss": 3.1869, "step": 1390 }, { "epoch": 0.11, "learning_rate": 4.8129732934486756e-05, "loss": 3.0122, "step": 1395 }, { "epoch": 0.11, "learning_rate": 4.8122995661196005e-05, "loss": 3.5176, "step": 1400 }, { "epoch": 0.11, "learning_rate": 4.811625838790525e-05, "loss": 3.3739, "step": 1405 }, { "epoch": 0.11, "learning_rate": 4.8109521114614496e-05, "loss": 3.1952, "step": 1410 }, { "epoch": 0.11, "learning_rate": 4.810278384132374e-05, "loss": 3.2811, "step": 1415 }, { "epoch": 0.11, "learning_rate": 4.809604656803299e-05, "loss": 3.4939, "step": 1420 }, { "epoch": 0.12, "learning_rate": 4.8089309294742236e-05, "loss": 3.3097, "step": 1425 }, { "epoch": 0.12, "learning_rate": 4.8082572021451485e-05, "loss": 3.1636, "step": 1430 }, { "epoch": 0.12, "learning_rate": 4.807583474816073e-05, "loss": 3.1452, "step": 1435 }, { "epoch": 0.12, "learning_rate": 4.8069097474869976e-05, "loss": 3.3203, "step": 1440 }, { "epoch": 0.12, "learning_rate": 4.806236020157922e-05, "loss": 3.1017, "step": 1445 }, { "epoch": 0.12, "learning_rate": 4.805562292828846e-05, "loss": 3.1715, "step": 1450 }, { "epoch": 0.12, "learning_rate": 4.804888565499771e-05, "loss": 2.8621, "step": 1455 }, { "epoch": 0.12, "learning_rate": 4.804214838170696e-05, "loss": 3.1176, "step": 1460 }, { "epoch": 0.12, "learning_rate": 4.803541110841621e-05, "loss": 3.5175, "step": 1465 }, { "epoch": 0.12, "learning_rate": 4.802867383512545e-05, "loss": 3.245, "step": 1470 }, { "epoch": 0.12, "learning_rate": 4.80219365618347e-05, "loss": 3.0326, "step": 1475 }, { "epoch": 0.12, "learning_rate": 4.8015199288543947e-05, "loss": 3.2505, "step": 1480 }, { "epoch": 0.12, "learning_rate": 4.800846201525319e-05, "loss": 3.36, "step": 1485 }, { "epoch": 0.12, "learning_rate": 4.800172474196243e-05, "loss": 3.3422, "step": 1490 }, { "epoch": 0.12, "learning_rate": 4.799498746867168e-05, "loss": 3.2268, "step": 1495 }, { "epoch": 0.12, "learning_rate": 4.798825019538093e-05, "loss": 3.1994, "step": 1500 }, { "epoch": 0.12, "learning_rate": 4.7982860376748324e-05, "loss": 3.1365, "step": 1505 }, { "epoch": 0.12, "learning_rate": 4.797612310345757e-05, "loss": 3.2696, "step": 1510 }, { "epoch": 0.12, "learning_rate": 4.796938583016682e-05, "loss": 2.9565, "step": 1515 }, { "epoch": 0.12, "learning_rate": 4.796264855687606e-05, "loss": 2.9312, "step": 1520 }, { "epoch": 0.12, "learning_rate": 4.7955911283585305e-05, "loss": 2.901, "step": 1525 }, { "epoch": 0.12, "learning_rate": 4.7949174010294554e-05, "loss": 3.3423, "step": 1530 }, { "epoch": 0.12, "learning_rate": 4.79424367370038e-05, "loss": 3.4224, "step": 1535 }, { "epoch": 0.12, "learning_rate": 4.7935699463713045e-05, "loss": 3.1498, "step": 1540 }, { "epoch": 0.12, "learning_rate": 4.7928962190422294e-05, "loss": 3.31, "step": 1545 }, { "epoch": 0.13, "learning_rate": 4.792222491713154e-05, "loss": 3.3928, "step": 1550 }, { "epoch": 0.13, "learning_rate": 4.791548764384079e-05, "loss": 3.3894, "step": 1555 }, { "epoch": 0.13, "learning_rate": 4.7908750370550034e-05, "loss": 3.056, "step": 1560 }, { "epoch": 0.13, "learning_rate": 4.790201309725928e-05, "loss": 3.1893, "step": 1565 }, { "epoch": 0.13, "learning_rate": 4.7895275823968525e-05, "loss": 3.1102, "step": 1570 }, { "epoch": 0.13, "learning_rate": 4.788853855067777e-05, "loss": 3.2477, "step": 1575 }, { "epoch": 0.13, "learning_rate": 4.7881801277387016e-05, "loss": 3.1685, "step": 1580 }, { "epoch": 0.13, "learning_rate": 4.7875064004096265e-05, "loss": 3.285, "step": 1585 }, { "epoch": 0.13, "learning_rate": 4.7868326730805514e-05, "loss": 3.3288, "step": 1590 }, { "epoch": 0.13, "learning_rate": 4.7861589457514756e-05, "loss": 2.9146, "step": 1595 }, { "epoch": 0.13, "learning_rate": 4.7854852184224005e-05, "loss": 3.3004, "step": 1600 }, { "epoch": 0.13, "learning_rate": 4.7848114910933254e-05, "loss": 3.3265, "step": 1605 }, { "epoch": 0.13, "learning_rate": 4.7841377637642496e-05, "loss": 3.2319, "step": 1610 }, { "epoch": 0.13, "learning_rate": 4.783464036435174e-05, "loss": 3.2694, "step": 1615 }, { "epoch": 0.13, "learning_rate": 4.782790309106099e-05, "loss": 3.3163, "step": 1620 }, { "epoch": 0.13, "learning_rate": 4.7821165817770235e-05, "loss": 2.9109, "step": 1625 }, { "epoch": 0.13, "learning_rate": 4.781442854447948e-05, "loss": 3.1788, "step": 1630 }, { "epoch": 0.13, "learning_rate": 4.7807691271188726e-05, "loss": 3.2638, "step": 1635 }, { "epoch": 0.13, "learning_rate": 4.7800953997897975e-05, "loss": 3.1715, "step": 1640 }, { "epoch": 0.13, "learning_rate": 4.7794216724607224e-05, "loss": 3.0204, "step": 1645 }, { "epoch": 0.13, "learning_rate": 4.7787479451316466e-05, "loss": 3.1442, "step": 1650 }, { "epoch": 0.13, "learning_rate": 4.778074217802571e-05, "loss": 3.1554, "step": 1655 }, { "epoch": 0.13, "learning_rate": 4.777400490473496e-05, "loss": 3.0041, "step": 1660 }, { "epoch": 0.13, "learning_rate": 4.7767267631444206e-05, "loss": 3.4228, "step": 1665 }, { "epoch": 0.14, "learning_rate": 4.776053035815345e-05, "loss": 3.1159, "step": 1670 }, { "epoch": 0.14, "learning_rate": 4.77537930848627e-05, "loss": 3.3559, "step": 1675 }, { "epoch": 0.14, "learning_rate": 4.7747055811571946e-05, "loss": 2.8674, "step": 1680 }, { "epoch": 0.14, "learning_rate": 4.774031853828119e-05, "loss": 3.1701, "step": 1685 }, { "epoch": 0.14, "learning_rate": 4.773358126499044e-05, "loss": 3.3984, "step": 1690 }, { "epoch": 0.14, "learning_rate": 4.772684399169968e-05, "loss": 3.2335, "step": 1695 }, { "epoch": 0.14, "learning_rate": 4.772010671840893e-05, "loss": 3.0153, "step": 1700 }, { "epoch": 0.14, "learning_rate": 4.771336944511817e-05, "loss": 3.2157, "step": 1705 }, { "epoch": 0.14, "learning_rate": 4.770663217182742e-05, "loss": 3.2443, "step": 1710 }, { "epoch": 0.14, "learning_rate": 4.769989489853667e-05, "loss": 3.2683, "step": 1715 }, { "epoch": 0.14, "learning_rate": 4.769315762524591e-05, "loss": 3.3998, "step": 1720 }, { "epoch": 0.14, "learning_rate": 4.768642035195516e-05, "loss": 3.2701, "step": 1725 }, { "epoch": 0.14, "learning_rate": 4.767968307866441e-05, "loss": 3.2304, "step": 1730 }, { "epoch": 0.14, "learning_rate": 4.7672945805373656e-05, "loss": 3.4887, "step": 1735 }, { "epoch": 0.14, "learning_rate": 4.76662085320829e-05, "loss": 3.1407, "step": 1740 }, { "epoch": 0.14, "learning_rate": 4.765947125879214e-05, "loss": 3.3806, "step": 1745 }, { "epoch": 0.14, "learning_rate": 4.765273398550139e-05, "loss": 2.9394, "step": 1750 }, { "epoch": 0.14, "learning_rate": 4.764599671221064e-05, "loss": 3.0697, "step": 1755 }, { "epoch": 0.14, "learning_rate": 4.763925943891988e-05, "loss": 3.1154, "step": 1760 }, { "epoch": 0.14, "learning_rate": 4.763252216562913e-05, "loss": 3.308, "step": 1765 }, { "epoch": 0.14, "learning_rate": 4.762578489233838e-05, "loss": 3.1236, "step": 1770 }, { "epoch": 0.14, "learning_rate": 4.761904761904762e-05, "loss": 3.2146, "step": 1775 }, { "epoch": 0.14, "learning_rate": 4.761231034575687e-05, "loss": 3.0922, "step": 1780 }, { "epoch": 0.14, "learning_rate": 4.760557307246611e-05, "loss": 3.2447, "step": 1785 }, { "epoch": 0.14, "learning_rate": 4.759883579917536e-05, "loss": 3.2431, "step": 1790 }, { "epoch": 0.15, "learning_rate": 4.75920985258846e-05, "loss": 3.2935, "step": 1795 }, { "epoch": 0.15, "learning_rate": 4.758536125259385e-05, "loss": 3.0225, "step": 1800 }, { "epoch": 0.15, "learning_rate": 4.75786239793031e-05, "loss": 3.1884, "step": 1805 }, { "epoch": 0.15, "learning_rate": 4.757188670601235e-05, "loss": 3.1911, "step": 1810 }, { "epoch": 0.15, "learning_rate": 4.756514943272159e-05, "loss": 3.1588, "step": 1815 }, { "epoch": 0.15, "learning_rate": 4.755841215943084e-05, "loss": 3.0782, "step": 1820 }, { "epoch": 0.15, "learning_rate": 4.755167488614008e-05, "loss": 3.0288, "step": 1825 }, { "epoch": 0.15, "learning_rate": 4.7544937612849324e-05, "loss": 3.0151, "step": 1830 }, { "epoch": 0.15, "learning_rate": 4.753820033955857e-05, "loss": 3.058, "step": 1835 }, { "epoch": 0.15, "learning_rate": 4.753146306626782e-05, "loss": 3.1809, "step": 1840 }, { "epoch": 0.15, "learning_rate": 4.752472579297707e-05, "loss": 3.1528, "step": 1845 }, { "epoch": 0.15, "learning_rate": 4.751798851968631e-05, "loss": 3.1677, "step": 1850 }, { "epoch": 0.15, "learning_rate": 4.751125124639556e-05, "loss": 2.9781, "step": 1855 }, { "epoch": 0.15, "learning_rate": 4.750451397310481e-05, "loss": 3.4727, "step": 1860 }, { "epoch": 0.15, "learning_rate": 4.749777669981406e-05, "loss": 3.0776, "step": 1865 }, { "epoch": 0.15, "learning_rate": 4.74910394265233e-05, "loss": 3.4074, "step": 1870 }, { "epoch": 0.15, "learning_rate": 4.7484302153232544e-05, "loss": 3.0213, "step": 1875 }, { "epoch": 0.15, "learning_rate": 4.747756487994179e-05, "loss": 3.2027, "step": 1880 }, { "epoch": 0.15, "learning_rate": 4.7470827606651035e-05, "loss": 3.1548, "step": 1885 }, { "epoch": 0.15, "learning_rate": 4.7464090333360283e-05, "loss": 3.3882, "step": 1890 }, { "epoch": 0.15, "learning_rate": 4.745735306006953e-05, "loss": 3.0727, "step": 1895 }, { "epoch": 0.15, "learning_rate": 4.745061578677878e-05, "loss": 2.8628, "step": 1900 }, { "epoch": 0.15, "learning_rate": 4.744387851348802e-05, "loss": 3.4395, "step": 1905 }, { "epoch": 0.15, "learning_rate": 4.743714124019727e-05, "loss": 3.1199, "step": 1910 }, { "epoch": 0.15, "learning_rate": 4.7430403966906514e-05, "loss": 3.2914, "step": 1915 }, { "epoch": 0.16, "learning_rate": 4.742366669361576e-05, "loss": 3.3878, "step": 1920 }, { "epoch": 0.16, "learning_rate": 4.7416929420325005e-05, "loss": 3.0218, "step": 1925 }, { "epoch": 0.16, "learning_rate": 4.7410192147034254e-05, "loss": 3.2209, "step": 1930 }, { "epoch": 0.16, "learning_rate": 4.74034548737435e-05, "loss": 3.049, "step": 1935 }, { "epoch": 0.16, "learning_rate": 4.7396717600452745e-05, "loss": 3.1507, "step": 1940 }, { "epoch": 0.16, "learning_rate": 4.7389980327161994e-05, "loss": 3.2019, "step": 1945 }, { "epoch": 0.16, "learning_rate": 4.738324305387124e-05, "loss": 3.1197, "step": 1950 }, { "epoch": 0.16, "learning_rate": 4.7376505780580485e-05, "loss": 3.4023, "step": 1955 }, { "epoch": 0.16, "learning_rate": 4.736976850728973e-05, "loss": 3.0877, "step": 1960 }, { "epoch": 0.16, "learning_rate": 4.7363031233998976e-05, "loss": 3.2648, "step": 1965 }, { "epoch": 0.16, "learning_rate": 4.7356293960708225e-05, "loss": 2.9336, "step": 1970 }, { "epoch": 0.16, "learning_rate": 4.7349556687417474e-05, "loss": 3.0737, "step": 1975 }, { "epoch": 0.16, "learning_rate": 4.7342819414126716e-05, "loss": 2.8234, "step": 1980 }, { "epoch": 0.16, "learning_rate": 4.7336082140835965e-05, "loss": 3.2141, "step": 1985 }, { "epoch": 0.16, "learning_rate": 4.7329344867545213e-05, "loss": 3.1565, "step": 1990 }, { "epoch": 0.16, "learning_rate": 4.7322607594254456e-05, "loss": 2.9412, "step": 1995 }, { "epoch": 0.16, "learning_rate": 4.7315870320963704e-05, "loss": 3.2878, "step": 2000 }, { "epoch": 0.16, "learning_rate": 4.7309133047672947e-05, "loss": 3.0794, "step": 2005 }, { "epoch": 0.16, "learning_rate": 4.7302395774382195e-05, "loss": 3.397, "step": 2010 }, { "epoch": 0.16, "learning_rate": 4.729565850109144e-05, "loss": 3.0795, "step": 2015 }, { "epoch": 0.16, "learning_rate": 4.7288921227800686e-05, "loss": 3.5331, "step": 2020 }, { "epoch": 0.16, "learning_rate": 4.7282183954509935e-05, "loss": 3.1282, "step": 2025 }, { "epoch": 0.16, "learning_rate": 4.727544668121918e-05, "loss": 3.1138, "step": 2030 }, { "epoch": 0.16, "learning_rate": 4.7268709407928426e-05, "loss": 3.1175, "step": 2035 }, { "epoch": 0.16, "learning_rate": 4.7261972134637675e-05, "loss": 3.0259, "step": 2040 }, { "epoch": 0.17, "learning_rate": 4.725523486134692e-05, "loss": 3.4783, "step": 2045 }, { "epoch": 0.17, "learning_rate": 4.724849758805616e-05, "loss": 3.1384, "step": 2050 }, { "epoch": 0.17, "learning_rate": 4.724176031476541e-05, "loss": 3.2739, "step": 2055 }, { "epoch": 0.17, "learning_rate": 4.723502304147466e-05, "loss": 3.0155, "step": 2060 }, { "epoch": 0.17, "learning_rate": 4.7228285768183906e-05, "loss": 3.1998, "step": 2065 }, { "epoch": 0.17, "learning_rate": 4.722154849489315e-05, "loss": 3.0975, "step": 2070 }, { "epoch": 0.17, "learning_rate": 4.72148112216024e-05, "loss": 3.2449, "step": 2075 }, { "epoch": 0.17, "learning_rate": 4.7208073948311646e-05, "loss": 2.9318, "step": 2080 }, { "epoch": 0.17, "learning_rate": 4.720133667502089e-05, "loss": 3.0997, "step": 2085 }, { "epoch": 0.17, "learning_rate": 4.719459940173013e-05, "loss": 2.9982, "step": 2090 }, { "epoch": 0.17, "learning_rate": 4.718786212843938e-05, "loss": 2.9443, "step": 2095 }, { "epoch": 0.17, "learning_rate": 4.718112485514863e-05, "loss": 2.889, "step": 2100 }, { "epoch": 0.17, "learning_rate": 4.717438758185787e-05, "loss": 3.4088, "step": 2105 }, { "epoch": 0.17, "learning_rate": 4.716765030856712e-05, "loss": 3.184, "step": 2110 }, { "epoch": 0.17, "learning_rate": 4.716091303527637e-05, "loss": 3.0915, "step": 2115 }, { "epoch": 0.17, "learning_rate": 4.7154175761985616e-05, "loss": 3.2379, "step": 2120 }, { "epoch": 0.17, "learning_rate": 4.714743848869486e-05, "loss": 3.1446, "step": 2125 }, { "epoch": 0.17, "learning_rate": 4.71407012154041e-05, "loss": 3.1694, "step": 2130 }, { "epoch": 0.17, "learning_rate": 4.713396394211335e-05, "loss": 3.8897, "step": 2135 }, { "epoch": 0.17, "learning_rate": 4.712722666882259e-05, "loss": 3.0934, "step": 2140 }, { "epoch": 0.17, "learning_rate": 4.712048939553184e-05, "loss": 3.1613, "step": 2145 }, { "epoch": 0.17, "learning_rate": 4.711375212224109e-05, "loss": 3.3189, "step": 2150 }, { "epoch": 0.17, "learning_rate": 4.710701484895034e-05, "loss": 3.218, "step": 2155 }, { "epoch": 0.17, "learning_rate": 4.710027757565958e-05, "loss": 3.1543, "step": 2160 }, { "epoch": 0.18, "learning_rate": 4.709354030236883e-05, "loss": 3.1454, "step": 2165 }, { "epoch": 0.18, "learning_rate": 4.708680302907808e-05, "loss": 2.9892, "step": 2170 }, { "epoch": 0.18, "learning_rate": 4.708006575578732e-05, "loss": 3.2591, "step": 2175 }, { "epoch": 0.18, "learning_rate": 4.707332848249656e-05, "loss": 2.9655, "step": 2180 }, { "epoch": 0.18, "learning_rate": 4.706659120920581e-05, "loss": 3.1065, "step": 2185 }, { "epoch": 0.18, "learning_rate": 4.705985393591506e-05, "loss": 2.8932, "step": 2190 }, { "epoch": 0.18, "learning_rate": 4.70531166626243e-05, "loss": 3.2734, "step": 2195 }, { "epoch": 0.18, "learning_rate": 4.704637938933355e-05, "loss": 3.4112, "step": 2200 }, { "epoch": 0.18, "learning_rate": 4.70396421160428e-05, "loss": 3.039, "step": 2205 }, { "epoch": 0.18, "learning_rate": 4.703290484275205e-05, "loss": 3.3834, "step": 2210 }, { "epoch": 0.18, "learning_rate": 4.702616756946129e-05, "loss": 3.2542, "step": 2215 }, { "epoch": 0.18, "learning_rate": 4.701943029617053e-05, "loss": 3.2943, "step": 2220 }, { "epoch": 0.18, "learning_rate": 4.701269302287978e-05, "loss": 3.1251, "step": 2225 }, { "epoch": 0.18, "learning_rate": 4.700595574958903e-05, "loss": 3.4983, "step": 2230 }, { "epoch": 0.18, "learning_rate": 4.699921847629827e-05, "loss": 3.0634, "step": 2235 }, { "epoch": 0.18, "learning_rate": 4.699248120300752e-05, "loss": 3.2068, "step": 2240 }, { "epoch": 0.18, "learning_rate": 4.698574392971677e-05, "loss": 3.0052, "step": 2245 }, { "epoch": 0.18, "learning_rate": 4.697900665642601e-05, "loss": 2.9226, "step": 2250 }, { "epoch": 0.18, "learning_rate": 4.697226938313526e-05, "loss": 3.0226, "step": 2255 }, { "epoch": 0.18, "learning_rate": 4.6965532109844504e-05, "loss": 3.5132, "step": 2260 }, { "epoch": 0.18, "learning_rate": 4.695879483655375e-05, "loss": 3.0079, "step": 2265 }, { "epoch": 0.18, "learning_rate": 4.6952057563262995e-05, "loss": 3.1758, "step": 2270 }, { "epoch": 0.18, "learning_rate": 4.6945320289972243e-05, "loss": 3.2862, "step": 2275 }, { "epoch": 0.18, "learning_rate": 4.693858301668149e-05, "loss": 3.2161, "step": 2280 }, { "epoch": 0.18, "learning_rate": 4.693184574339074e-05, "loss": 2.9678, "step": 2285 }, { "epoch": 0.19, "learning_rate": 4.692510847009998e-05, "loss": 3.1297, "step": 2290 }, { "epoch": 0.19, "learning_rate": 4.691837119680923e-05, "loss": 3.2068, "step": 2295 }, { "epoch": 0.19, "learning_rate": 4.691163392351848e-05, "loss": 2.9587, "step": 2300 }, { "epoch": 0.19, "learning_rate": 4.690489665022772e-05, "loss": 3.2795, "step": 2305 }, { "epoch": 0.19, "learning_rate": 4.6898159376936965e-05, "loss": 3.5198, "step": 2310 }, { "epoch": 0.19, "learning_rate": 4.6891422103646214e-05, "loss": 3.1278, "step": 2315 }, { "epoch": 0.19, "learning_rate": 4.688468483035546e-05, "loss": 3.1983, "step": 2320 }, { "epoch": 0.19, "learning_rate": 4.6877947557064705e-05, "loss": 2.9866, "step": 2325 }, { "epoch": 0.19, "learning_rate": 4.6871210283773954e-05, "loss": 3.1094, "step": 2330 }, { "epoch": 0.19, "learning_rate": 4.68644730104832e-05, "loss": 2.8821, "step": 2335 }, { "epoch": 0.19, "learning_rate": 4.6857735737192445e-05, "loss": 3.0871, "step": 2340 }, { "epoch": 0.19, "learning_rate": 4.6850998463901694e-05, "loss": 3.3086, "step": 2345 }, { "epoch": 0.19, "learning_rate": 4.6844261190610936e-05, "loss": 3.1617, "step": 2350 }, { "epoch": 0.19, "learning_rate": 4.6837523917320185e-05, "loss": 3.0705, "step": 2355 }, { "epoch": 0.19, "learning_rate": 4.683078664402943e-05, "loss": 3.3558, "step": 2360 }, { "epoch": 0.19, "learning_rate": 4.6824049370738676e-05, "loss": 3.2012, "step": 2365 }, { "epoch": 0.19, "learning_rate": 4.6817312097447925e-05, "loss": 3.1229, "step": 2370 }, { "epoch": 0.19, "learning_rate": 4.6810574824157173e-05, "loss": 3.1233, "step": 2375 }, { "epoch": 0.19, "learning_rate": 4.6803837550866416e-05, "loss": 3.4662, "step": 2380 }, { "epoch": 0.19, "learning_rate": 4.6797100277575664e-05, "loss": 3.5225, "step": 2385 }, { "epoch": 0.19, "learning_rate": 4.6790363004284906e-05, "loss": 3.1466, "step": 2390 }, { "epoch": 0.19, "learning_rate": 4.678362573099415e-05, "loss": 3.3767, "step": 2395 }, { "epoch": 0.19, "learning_rate": 4.67768884577034e-05, "loss": 3.1327, "step": 2400 }, { "epoch": 0.19, "learning_rate": 4.6770151184412646e-05, "loss": 3.0657, "step": 2405 }, { "epoch": 0.19, "learning_rate": 4.6763413911121895e-05, "loss": 3.238, "step": 2410 }, { "epoch": 0.2, "learning_rate": 4.675667663783114e-05, "loss": 3.0663, "step": 2415 }, { "epoch": 0.2, "learning_rate": 4.6749939364540386e-05, "loss": 3.2997, "step": 2420 }, { "epoch": 0.2, "learning_rate": 4.6743202091249635e-05, "loss": 3.1138, "step": 2425 }, { "epoch": 0.2, "learning_rate": 4.673646481795888e-05, "loss": 3.1481, "step": 2430 }, { "epoch": 0.2, "learning_rate": 4.6729727544668126e-05, "loss": 3.1374, "step": 2435 }, { "epoch": 0.2, "learning_rate": 4.672299027137737e-05, "loss": 3.137, "step": 2440 }, { "epoch": 0.2, "learning_rate": 4.671625299808662e-05, "loss": 3.0878, "step": 2445 }, { "epoch": 0.2, "learning_rate": 4.670951572479586e-05, "loss": 3.0358, "step": 2450 }, { "epoch": 0.2, "learning_rate": 4.670277845150511e-05, "loss": 3.0953, "step": 2455 }, { "epoch": 0.2, "learning_rate": 4.669604117821436e-05, "loss": 3.0776, "step": 2460 }, { "epoch": 0.2, "learning_rate": 4.6689303904923606e-05, "loss": 3.4543, "step": 2465 }, { "epoch": 0.2, "learning_rate": 4.668256663163285e-05, "loss": 3.1356, "step": 2470 }, { "epoch": 0.2, "learning_rate": 4.66758293583421e-05, "loss": 3.3752, "step": 2475 }, { "epoch": 0.2, "learning_rate": 4.666909208505134e-05, "loss": 3.0158, "step": 2480 }, { "epoch": 0.2, "learning_rate": 4.666235481176059e-05, "loss": 3.1832, "step": 2485 }, { "epoch": 0.2, "learning_rate": 4.665561753846983e-05, "loss": 3.2367, "step": 2490 }, { "epoch": 0.2, "learning_rate": 4.664888026517908e-05, "loss": 3.2553, "step": 2495 }, { "epoch": 0.2, "learning_rate": 4.664214299188833e-05, "loss": 3.3684, "step": 2500 }, { "epoch": 0.2, "learning_rate": 4.663540571859757e-05, "loss": 3.157, "step": 2505 }, { "epoch": 0.2, "learning_rate": 4.662866844530682e-05, "loss": 3.3339, "step": 2510 }, { "epoch": 0.2, "learning_rate": 4.662193117201607e-05, "loss": 3.142, "step": 2515 }, { "epoch": 0.2, "learning_rate": 4.661519389872531e-05, "loss": 3.0608, "step": 2520 }, { "epoch": 0.2, "learning_rate": 4.660845662543455e-05, "loss": 3.0232, "step": 2525 }, { "epoch": 0.2, "learning_rate": 4.66017193521438e-05, "loss": 3.0909, "step": 2530 }, { "epoch": 0.2, "learning_rate": 4.659498207885305e-05, "loss": 3.0394, "step": 2535 }, { "epoch": 0.21, "learning_rate": 4.65882448055623e-05, "loss": 3.3022, "step": 2540 }, { "epoch": 0.21, "learning_rate": 4.658150753227154e-05, "loss": 3.0824, "step": 2545 }, { "epoch": 0.21, "learning_rate": 4.657477025898079e-05, "loss": 3.0483, "step": 2550 }, { "epoch": 0.21, "learning_rate": 4.656803298569004e-05, "loss": 3.0755, "step": 2555 }, { "epoch": 0.21, "learning_rate": 4.656129571239928e-05, "loss": 3.4422, "step": 2560 }, { "epoch": 0.21, "learning_rate": 4.655455843910852e-05, "loss": 3.1593, "step": 2565 }, { "epoch": 0.21, "learning_rate": 4.654782116581777e-05, "loss": 3.2496, "step": 2570 }, { "epoch": 0.21, "learning_rate": 4.654108389252702e-05, "loss": 3.3281, "step": 2575 }, { "epoch": 0.21, "learning_rate": 4.653434661923626e-05, "loss": 2.9645, "step": 2580 }, { "epoch": 0.21, "learning_rate": 4.652760934594551e-05, "loss": 3.4181, "step": 2585 }, { "epoch": 0.21, "learning_rate": 4.652087207265476e-05, "loss": 3.1505, "step": 2590 }, { "epoch": 0.21, "learning_rate": 4.6514134799364e-05, "loss": 3.2713, "step": 2595 }, { "epoch": 0.21, "learning_rate": 4.650739752607325e-05, "loss": 3.0737, "step": 2600 }, { "epoch": 0.21, "learning_rate": 4.65006602527825e-05, "loss": 3.1046, "step": 2605 }, { "epoch": 0.21, "learning_rate": 4.649392297949174e-05, "loss": 3.0764, "step": 2610 }, { "epoch": 0.21, "learning_rate": 4.6487185706200984e-05, "loss": 3.1572, "step": 2615 }, { "epoch": 0.21, "learning_rate": 4.648044843291023e-05, "loss": 3.1946, "step": 2620 }, { "epoch": 0.21, "learning_rate": 4.647371115961948e-05, "loss": 3.0024, "step": 2625 }, { "epoch": 0.21, "learning_rate": 4.646697388632873e-05, "loss": 3.1511, "step": 2630 }, { "epoch": 0.21, "learning_rate": 4.646023661303797e-05, "loss": 3.333, "step": 2635 }, { "epoch": 0.21, "learning_rate": 4.645349933974722e-05, "loss": 2.9438, "step": 2640 }, { "epoch": 0.21, "learning_rate": 4.644676206645647e-05, "loss": 3.327, "step": 2645 }, { "epoch": 0.21, "learning_rate": 4.644002479316571e-05, "loss": 3.0695, "step": 2650 }, { "epoch": 0.21, "learning_rate": 4.6433287519874954e-05, "loss": 3.1144, "step": 2655 }, { "epoch": 0.22, "learning_rate": 4.64265502465842e-05, "loss": 3.4706, "step": 2660 }, { "epoch": 0.22, "learning_rate": 4.641981297329345e-05, "loss": 3.1941, "step": 2665 }, { "epoch": 0.22, "learning_rate": 4.6413075700002694e-05, "loss": 3.1067, "step": 2670 }, { "epoch": 0.22, "learning_rate": 4.640633842671194e-05, "loss": 3.1044, "step": 2675 }, { "epoch": 0.22, "learning_rate": 4.639960115342119e-05, "loss": 3.1814, "step": 2680 }, { "epoch": 0.22, "learning_rate": 4.639286388013044e-05, "loss": 3.2541, "step": 2685 }, { "epoch": 0.22, "learning_rate": 4.638612660683968e-05, "loss": 3.1815, "step": 2690 }, { "epoch": 0.22, "learning_rate": 4.6379389333548925e-05, "loss": 3.1444, "step": 2695 }, { "epoch": 0.22, "learning_rate": 4.6372652060258174e-05, "loss": 3.0728, "step": 2700 }, { "epoch": 0.22, "learning_rate": 4.6365914786967416e-05, "loss": 3.2032, "step": 2705 }, { "epoch": 0.22, "learning_rate": 4.6359177513676665e-05, "loss": 2.8255, "step": 2710 }, { "epoch": 0.22, "learning_rate": 4.6352440240385914e-05, "loss": 3.1325, "step": 2715 }, { "epoch": 0.22, "learning_rate": 4.634570296709516e-05, "loss": 3.2602, "step": 2720 }, { "epoch": 0.22, "learning_rate": 4.6338965693804405e-05, "loss": 2.9978, "step": 2725 }, { "epoch": 0.22, "learning_rate": 4.6332228420513654e-05, "loss": 3.3716, "step": 2730 }, { "epoch": 0.22, "learning_rate": 4.63254911472229e-05, "loss": 3.2488, "step": 2735 }, { "epoch": 0.22, "learning_rate": 4.6318753873932145e-05, "loss": 2.9851, "step": 2740 }, { "epoch": 0.22, "learning_rate": 4.631201660064139e-05, "loss": 2.9942, "step": 2745 }, { "epoch": 0.22, "learning_rate": 4.6305279327350636e-05, "loss": 3.2165, "step": 2750 }, { "epoch": 0.22, "learning_rate": 4.6298542054059884e-05, "loss": 3.0885, "step": 2755 }, { "epoch": 0.22, "learning_rate": 4.6291804780769127e-05, "loss": 3.0966, "step": 2760 }, { "epoch": 0.22, "learning_rate": 4.6285067507478375e-05, "loss": 3.2696, "step": 2765 }, { "epoch": 0.22, "learning_rate": 4.6278330234187624e-05, "loss": 3.0802, "step": 2770 }, { "epoch": 0.22, "learning_rate": 4.627159296089687e-05, "loss": 3.2361, "step": 2775 }, { "epoch": 0.22, "learning_rate": 4.6264855687606115e-05, "loss": 3.0949, "step": 2780 }, { "epoch": 0.23, "learning_rate": 4.625811841431536e-05, "loss": 3.3435, "step": 2785 }, { "epoch": 0.23, "learning_rate": 4.6251381141024606e-05, "loss": 3.3882, "step": 2790 }, { "epoch": 0.23, "learning_rate": 4.6244643867733855e-05, "loss": 2.8436, "step": 2795 }, { "epoch": 0.23, "learning_rate": 4.62379065944431e-05, "loss": 3.3431, "step": 2800 }, { "epoch": 0.23, "learning_rate": 4.6231169321152346e-05, "loss": 3.4118, "step": 2805 }, { "epoch": 0.23, "learning_rate": 4.6224432047861595e-05, "loss": 3.1914, "step": 2810 }, { "epoch": 0.23, "learning_rate": 4.621769477457084e-05, "loss": 3.061, "step": 2815 }, { "epoch": 0.23, "learning_rate": 4.6210957501280086e-05, "loss": 3.1028, "step": 2820 }, { "epoch": 0.23, "learning_rate": 4.620422022798933e-05, "loss": 3.0601, "step": 2825 }, { "epoch": 0.23, "learning_rate": 4.619748295469858e-05, "loss": 3.3313, "step": 2830 }, { "epoch": 0.23, "learning_rate": 4.619074568140782e-05, "loss": 3.1585, "step": 2835 }, { "epoch": 0.23, "learning_rate": 4.618400840811707e-05, "loss": 2.9804, "step": 2840 }, { "epoch": 0.23, "learning_rate": 4.617727113482632e-05, "loss": 3.197, "step": 2845 }, { "epoch": 0.23, "learning_rate": 4.6170533861535566e-05, "loss": 2.823, "step": 2850 }, { "epoch": 0.23, "learning_rate": 4.616379658824481e-05, "loss": 3.0995, "step": 2855 }, { "epoch": 0.23, "learning_rate": 4.6157059314954057e-05, "loss": 3.2473, "step": 2860 }, { "epoch": 0.23, "learning_rate": 4.61503220416633e-05, "loss": 3.3361, "step": 2865 }, { "epoch": 0.23, "learning_rate": 4.614358476837254e-05, "loss": 2.9229, "step": 2870 }, { "epoch": 0.23, "learning_rate": 4.613684749508179e-05, "loss": 3.2953, "step": 2875 }, { "epoch": 0.23, "learning_rate": 4.613011022179104e-05, "loss": 3.1187, "step": 2880 }, { "epoch": 0.23, "learning_rate": 4.612337294850029e-05, "loss": 3.009, "step": 2885 }, { "epoch": 0.23, "learning_rate": 4.611663567520953e-05, "loss": 3.5421, "step": 2890 }, { "epoch": 0.23, "learning_rate": 4.610989840191878e-05, "loss": 3.0521, "step": 2895 }, { "epoch": 0.23, "learning_rate": 4.610316112862803e-05, "loss": 2.9707, "step": 2900 }, { "epoch": 0.23, "learning_rate": 4.609642385533727e-05, "loss": 3.0522, "step": 2905 }, { "epoch": 0.24, "learning_rate": 4.608968658204652e-05, "loss": 3.1429, "step": 2910 }, { "epoch": 0.24, "learning_rate": 4.608294930875576e-05, "loss": 3.1433, "step": 2915 }, { "epoch": 0.24, "learning_rate": 4.607621203546501e-05, "loss": 3.0362, "step": 2920 }, { "epoch": 0.24, "learning_rate": 4.606947476217425e-05, "loss": 3.0698, "step": 2925 }, { "epoch": 0.24, "learning_rate": 4.60627374888835e-05, "loss": 2.9708, "step": 2930 }, { "epoch": 0.24, "learning_rate": 4.605600021559275e-05, "loss": 2.929, "step": 2935 }, { "epoch": 0.24, "learning_rate": 4.6049262942302e-05, "loss": 3.0652, "step": 2940 }, { "epoch": 0.24, "learning_rate": 4.604252566901124e-05, "loss": 3.0888, "step": 2945 }, { "epoch": 0.24, "learning_rate": 4.603578839572049e-05, "loss": 3.0333, "step": 2950 }, { "epoch": 0.24, "learning_rate": 4.602905112242973e-05, "loss": 3.1151, "step": 2955 }, { "epoch": 0.24, "learning_rate": 4.602231384913897e-05, "loss": 3.1866, "step": 2960 }, { "epoch": 0.24, "learning_rate": 4.601557657584822e-05, "loss": 3.0419, "step": 2965 }, { "epoch": 0.24, "learning_rate": 4.600883930255747e-05, "loss": 2.9358, "step": 2970 }, { "epoch": 0.24, "learning_rate": 4.600210202926672e-05, "loss": 3.3567, "step": 2975 }, { "epoch": 0.24, "learning_rate": 4.599536475597596e-05, "loss": 2.9169, "step": 2980 }, { "epoch": 0.24, "learning_rate": 4.598862748268521e-05, "loss": 3.0725, "step": 2985 }, { "epoch": 0.24, "learning_rate": 4.598189020939446e-05, "loss": 3.4152, "step": 2990 }, { "epoch": 0.24, "learning_rate": 4.59751529361037e-05, "loss": 3.0889, "step": 2995 }, { "epoch": 0.24, "learning_rate": 4.5968415662812944e-05, "loss": 3.3419, "step": 3000 }, { "epoch": 0.24, "learning_rate": 4.596167838952219e-05, "loss": 3.3641, "step": 3005 }, { "epoch": 0.24, "learning_rate": 4.595494111623144e-05, "loss": 3.0926, "step": 3010 }, { "epoch": 0.24, "learning_rate": 4.5948203842940684e-05, "loss": 3.0744, "step": 3015 }, { "epoch": 0.24, "learning_rate": 4.594146656964993e-05, "loss": 3.0896, "step": 3020 }, { "epoch": 0.24, "learning_rate": 4.593472929635918e-05, "loss": 3.2848, "step": 3025 }, { "epoch": 0.24, "learning_rate": 4.592799202306843e-05, "loss": 3.1748, "step": 3030 }, { "epoch": 0.25, "learning_rate": 4.592125474977767e-05, "loss": 3.3514, "step": 3035 }, { "epoch": 0.25, "learning_rate": 4.591451747648692e-05, "loss": 3.2087, "step": 3040 }, { "epoch": 0.25, "learning_rate": 4.590778020319616e-05, "loss": 3.1578, "step": 3045 }, { "epoch": 0.25, "learning_rate": 4.590104292990541e-05, "loss": 3.0495, "step": 3050 }, { "epoch": 0.25, "learning_rate": 4.5894305656614654e-05, "loss": 3.0536, "step": 3055 }, { "epoch": 0.25, "learning_rate": 4.58875683833239e-05, "loss": 3.1117, "step": 3060 }, { "epoch": 0.25, "learning_rate": 4.588083111003315e-05, "loss": 3.3505, "step": 3065 }, { "epoch": 0.25, "learning_rate": 4.5874093836742394e-05, "loss": 3.3008, "step": 3070 }, { "epoch": 0.25, "learning_rate": 4.586735656345164e-05, "loss": 2.9855, "step": 3075 }, { "epoch": 0.25, "learning_rate": 4.586061929016089e-05, "loss": 3.1741, "step": 3080 }, { "epoch": 0.25, "learning_rate": 4.5853882016870134e-05, "loss": 3.1377, "step": 3085 }, { "epoch": 0.25, "learning_rate": 4.5847144743579376e-05, "loss": 3.2383, "step": 3090 }, { "epoch": 0.25, "learning_rate": 4.5840407470288625e-05, "loss": 3.0129, "step": 3095 }, { "epoch": 0.25, "learning_rate": 4.5833670196997874e-05, "loss": 3.1036, "step": 3100 }, { "epoch": 0.25, "learning_rate": 4.582693292370712e-05, "loss": 3.1, "step": 3105 }, { "epoch": 0.25, "learning_rate": 4.5820195650416365e-05, "loss": 3.21, "step": 3110 }, { "epoch": 0.25, "learning_rate": 4.5813458377125614e-05, "loss": 3.1048, "step": 3115 }, { "epoch": 0.25, "learning_rate": 4.580672110383486e-05, "loss": 3.2457, "step": 3120 }, { "epoch": 0.25, "learning_rate": 4.5799983830544105e-05, "loss": 3.1279, "step": 3125 }, { "epoch": 0.25, "learning_rate": 4.579324655725335e-05, "loss": 3.247, "step": 3130 }, { "epoch": 0.25, "learning_rate": 4.5786509283962596e-05, "loss": 3.2605, "step": 3135 }, { "epoch": 0.25, "learning_rate": 4.5779772010671844e-05, "loss": 3.3872, "step": 3140 }, { "epoch": 0.25, "learning_rate": 4.5773034737381087e-05, "loss": 2.9347, "step": 3145 }, { "epoch": 0.25, "learning_rate": 4.5766297464090335e-05, "loss": 3.0918, "step": 3150 }, { "epoch": 0.26, "learning_rate": 4.5759560190799584e-05, "loss": 3.1318, "step": 3155 }, { "epoch": 0.26, "learning_rate": 4.575282291750883e-05, "loss": 2.9989, "step": 3160 }, { "epoch": 0.26, "learning_rate": 4.5746085644218075e-05, "loss": 3.2661, "step": 3165 }, { "epoch": 0.26, "learning_rate": 4.5739348370927324e-05, "loss": 3.2538, "step": 3170 }, { "epoch": 0.26, "learning_rate": 4.5732611097636566e-05, "loss": 3.2111, "step": 3175 }, { "epoch": 0.26, "learning_rate": 4.572587382434581e-05, "loss": 3.0219, "step": 3180 }, { "epoch": 0.26, "learning_rate": 4.571913655105506e-05, "loss": 3.2369, "step": 3185 }, { "epoch": 0.26, "learning_rate": 4.5712399277764306e-05, "loss": 3.1409, "step": 3190 }, { "epoch": 0.26, "learning_rate": 4.5705662004473555e-05, "loss": 3.1233, "step": 3195 }, { "epoch": 0.26, "learning_rate": 4.56989247311828e-05, "loss": 3.2719, "step": 3200 }, { "epoch": 0.26, "learning_rate": 4.5692187457892046e-05, "loss": 2.9358, "step": 3205 }, { "epoch": 0.26, "learning_rate": 4.5685450184601295e-05, "loss": 3.1265, "step": 3210 }, { "epoch": 0.26, "learning_rate": 4.567871291131054e-05, "loss": 3.0724, "step": 3215 }, { "epoch": 0.26, "learning_rate": 4.567197563801978e-05, "loss": 3.0934, "step": 3220 }, { "epoch": 0.26, "learning_rate": 4.566523836472903e-05, "loss": 3.3711, "step": 3225 }, { "epoch": 0.26, "learning_rate": 4.565850109143828e-05, "loss": 2.8675, "step": 3230 }, { "epoch": 0.26, "learning_rate": 4.565176381814752e-05, "loss": 3.0045, "step": 3235 }, { "epoch": 0.26, "learning_rate": 4.564502654485677e-05, "loss": 3.0661, "step": 3240 }, { "epoch": 0.26, "learning_rate": 4.5638289271566017e-05, "loss": 3.1129, "step": 3245 }, { "epoch": 0.26, "learning_rate": 4.5631551998275265e-05, "loss": 3.0902, "step": 3250 }, { "epoch": 0.26, "learning_rate": 4.562481472498451e-05, "loss": 3.1345, "step": 3255 }, { "epoch": 0.26, "learning_rate": 4.561807745169375e-05, "loss": 2.9902, "step": 3260 }, { "epoch": 0.26, "learning_rate": 4.5611340178403e-05, "loss": 3.191, "step": 3265 }, { "epoch": 0.26, "learning_rate": 4.560460290511224e-05, "loss": 3.1233, "step": 3270 }, { "epoch": 0.26, "learning_rate": 4.559786563182149e-05, "loss": 3.203, "step": 3275 }, { "epoch": 0.27, "learning_rate": 4.559112835853074e-05, "loss": 3.112, "step": 3280 }, { "epoch": 0.27, "learning_rate": 4.558439108523999e-05, "loss": 3.0281, "step": 3285 }, { "epoch": 0.27, "learning_rate": 4.557765381194923e-05, "loss": 3.3552, "step": 3290 }, { "epoch": 0.27, "learning_rate": 4.557091653865848e-05, "loss": 3.0418, "step": 3295 }, { "epoch": 0.27, "learning_rate": 4.556417926536772e-05, "loss": 3.0119, "step": 3300 }, { "epoch": 0.27, "learning_rate": 4.555744199207697e-05, "loss": 3.003, "step": 3305 }, { "epoch": 0.27, "learning_rate": 4.555070471878621e-05, "loss": 3.1365, "step": 3310 }, { "epoch": 0.27, "learning_rate": 4.554396744549546e-05, "loss": 3.0694, "step": 3315 }, { "epoch": 0.27, "learning_rate": 4.553723017220471e-05, "loss": 2.8513, "step": 3320 }, { "epoch": 0.27, "learning_rate": 4.553049289891395e-05, "loss": 3.1681, "step": 3325 }, { "epoch": 0.27, "learning_rate": 4.55237556256232e-05, "loss": 3.1393, "step": 3330 }, { "epoch": 0.27, "learning_rate": 4.551701835233245e-05, "loss": 3.0198, "step": 3335 }, { "epoch": 0.27, "learning_rate": 4.55102810790417e-05, "loss": 3.0511, "step": 3340 }, { "epoch": 0.27, "learning_rate": 4.550354380575094e-05, "loss": 3.1593, "step": 3345 }, { "epoch": 0.27, "learning_rate": 4.549680653246018e-05, "loss": 2.9654, "step": 3350 }, { "epoch": 0.27, "learning_rate": 4.549006925916943e-05, "loss": 3.1572, "step": 3355 }, { "epoch": 0.27, "learning_rate": 4.548333198587868e-05, "loss": 3.283, "step": 3360 }, { "epoch": 0.27, "learning_rate": 4.547659471258792e-05, "loss": 3.1824, "step": 3365 }, { "epoch": 0.27, "learning_rate": 4.546985743929717e-05, "loss": 3.086, "step": 3370 }, { "epoch": 0.27, "learning_rate": 4.546312016600642e-05, "loss": 3.1806, "step": 3375 }, { "epoch": 0.27, "learning_rate": 4.545638289271566e-05, "loss": 3.2051, "step": 3380 }, { "epoch": 0.27, "learning_rate": 4.544964561942491e-05, "loss": 3.0539, "step": 3385 }, { "epoch": 0.27, "learning_rate": 4.544290834613415e-05, "loss": 3.0205, "step": 3390 }, { "epoch": 0.27, "learning_rate": 4.54361710728434e-05, "loss": 3.2201, "step": 3395 }, { "epoch": 0.27, "learning_rate": 4.5429433799552644e-05, "loss": 3.2691, "step": 3400 }, { "epoch": 0.28, "learning_rate": 4.542269652626189e-05, "loss": 3.1167, "step": 3405 }, { "epoch": 0.28, "learning_rate": 4.541595925297114e-05, "loss": 2.9539, "step": 3410 }, { "epoch": 0.28, "learning_rate": 4.540922197968039e-05, "loss": 3.1993, "step": 3415 }, { "epoch": 0.28, "learning_rate": 4.540248470638963e-05, "loss": 3.1305, "step": 3420 }, { "epoch": 0.28, "learning_rate": 4.539574743309888e-05, "loss": 3.1626, "step": 3425 }, { "epoch": 0.28, "learning_rate": 4.538901015980812e-05, "loss": 3.2176, "step": 3430 }, { "epoch": 0.28, "learning_rate": 4.5382272886517365e-05, "loss": 3.0691, "step": 3435 }, { "epoch": 0.28, "learning_rate": 4.5375535613226614e-05, "loss": 3.0611, "step": 3440 }, { "epoch": 0.28, "learning_rate": 4.536879833993586e-05, "loss": 3.1824, "step": 3445 }, { "epoch": 0.28, "learning_rate": 4.536206106664511e-05, "loss": 3.1394, "step": 3450 }, { "epoch": 0.28, "learning_rate": 4.5355323793354354e-05, "loss": 3.1272, "step": 3455 }, { "epoch": 0.28, "learning_rate": 4.53485865200636e-05, "loss": 2.9905, "step": 3460 }, { "epoch": 0.28, "learning_rate": 4.534184924677285e-05, "loss": 2.8781, "step": 3465 }, { "epoch": 0.28, "learning_rate": 4.53351119734821e-05, "loss": 3.1255, "step": 3470 }, { "epoch": 0.28, "learning_rate": 4.532837470019134e-05, "loss": 3.0489, "step": 3475 }, { "epoch": 0.28, "learning_rate": 4.5321637426900585e-05, "loss": 3.1483, "step": 3480 }, { "epoch": 0.28, "learning_rate": 4.5314900153609834e-05, "loss": 2.923, "step": 3485 }, { "epoch": 0.28, "learning_rate": 4.5308162880319076e-05, "loss": 3.1935, "step": 3490 }, { "epoch": 0.28, "learning_rate": 4.5301425607028325e-05, "loss": 2.8445, "step": 3495 }, { "epoch": 0.28, "learning_rate": 4.5294688333737574e-05, "loss": 3.1171, "step": 3500 }, { "epoch": 0.28, "learning_rate": 4.528795106044682e-05, "loss": 3.2084, "step": 3505 }, { "epoch": 0.28, "learning_rate": 4.5281213787156065e-05, "loss": 3.1549, "step": 3510 }, { "epoch": 0.28, "learning_rate": 4.527447651386531e-05, "loss": 3.0279, "step": 3515 }, { "epoch": 0.28, "learning_rate": 4.5267739240574555e-05, "loss": 2.7719, "step": 3520 }, { "epoch": 0.28, "learning_rate": 4.52610019672838e-05, "loss": 3.1109, "step": 3525 }, { "epoch": 0.29, "learning_rate": 4.5254264693993046e-05, "loss": 2.9881, "step": 3530 }, { "epoch": 0.29, "learning_rate": 4.5247527420702295e-05, "loss": 3.2017, "step": 3535 }, { "epoch": 0.29, "learning_rate": 4.5240790147411544e-05, "loss": 2.9567, "step": 3540 }, { "epoch": 0.29, "learning_rate": 4.5234052874120786e-05, "loss": 3.2274, "step": 3545 }, { "epoch": 0.29, "learning_rate": 4.5227315600830035e-05, "loss": 3.1517, "step": 3550 }, { "epoch": 0.29, "learning_rate": 4.5220578327539284e-05, "loss": 3.0133, "step": 3555 }, { "epoch": 0.29, "learning_rate": 4.5213841054248526e-05, "loss": 3.2494, "step": 3560 }, { "epoch": 0.29, "learning_rate": 4.520710378095777e-05, "loss": 3.1607, "step": 3565 }, { "epoch": 0.29, "learning_rate": 4.520036650766702e-05, "loss": 3.1622, "step": 3570 }, { "epoch": 0.29, "learning_rate": 4.5193629234376266e-05, "loss": 3.0597, "step": 3575 }, { "epoch": 0.29, "learning_rate": 4.518689196108551e-05, "loss": 3.1157, "step": 3580 }, { "epoch": 0.29, "learning_rate": 4.518015468779476e-05, "loss": 3.1916, "step": 3585 }, { "epoch": 0.29, "learning_rate": 4.5173417414504006e-05, "loss": 3.3892, "step": 3590 }, { "epoch": 0.29, "learning_rate": 4.5166680141213255e-05, "loss": 3.2352, "step": 3595 }, { "epoch": 0.29, "learning_rate": 4.51599428679225e-05, "loss": 2.9455, "step": 3600 }, { "epoch": 0.29, "learning_rate": 4.515455304928989e-05, "loss": 3.2882, "step": 3605 }, { "epoch": 0.29, "learning_rate": 4.514781577599914e-05, "loss": 3.1644, "step": 3610 }, { "epoch": 0.29, "learning_rate": 4.514107850270838e-05, "loss": 3.1653, "step": 3615 }, { "epoch": 0.29, "learning_rate": 4.513434122941763e-05, "loss": 3.286, "step": 3620 }, { "epoch": 0.29, "learning_rate": 4.512760395612688e-05, "loss": 3.2771, "step": 3625 }, { "epoch": 0.29, "learning_rate": 4.512086668283613e-05, "loss": 3.4175, "step": 3630 }, { "epoch": 0.29, "learning_rate": 4.511412940954537e-05, "loss": 3.0911, "step": 3635 }, { "epoch": 0.29, "learning_rate": 4.510739213625462e-05, "loss": 3.0502, "step": 3640 }, { "epoch": 0.29, "learning_rate": 4.510065486296386e-05, "loss": 3.0543, "step": 3645 }, { "epoch": 0.3, "learning_rate": 4.5093917589673105e-05, "loss": 3.125, "step": 3650 }, { "epoch": 0.3, "learning_rate": 4.5087180316382353e-05, "loss": 3.1286, "step": 3655 }, { "epoch": 0.3, "learning_rate": 4.50804430430916e-05, "loss": 3.1895, "step": 3660 }, { "epoch": 0.3, "learning_rate": 4.507370576980085e-05, "loss": 3.0078, "step": 3665 }, { "epoch": 0.3, "learning_rate": 4.506696849651009e-05, "loss": 3.0766, "step": 3670 }, { "epoch": 0.3, "learning_rate": 4.506023122321934e-05, "loss": 3.4027, "step": 3675 }, { "epoch": 0.3, "learning_rate": 4.505349394992859e-05, "loss": 2.8891, "step": 3680 }, { "epoch": 0.3, "learning_rate": 4.504675667663783e-05, "loss": 3.0093, "step": 3685 }, { "epoch": 0.3, "learning_rate": 4.5040019403347075e-05, "loss": 3.1543, "step": 3690 }, { "epoch": 0.3, "learning_rate": 4.5033282130056324e-05, "loss": 3.0005, "step": 3695 }, { "epoch": 0.3, "learning_rate": 4.502654485676557e-05, "loss": 3.2393, "step": 3700 }, { "epoch": 0.3, "learning_rate": 4.5019807583474815e-05, "loss": 3.2052, "step": 3705 }, { "epoch": 0.3, "learning_rate": 4.5013070310184064e-05, "loss": 3.1979, "step": 3710 }, { "epoch": 0.3, "learning_rate": 4.500633303689331e-05, "loss": 3.1785, "step": 3715 }, { "epoch": 0.3, "learning_rate": 4.499959576360256e-05, "loss": 3.1174, "step": 3720 }, { "epoch": 0.3, "learning_rate": 4.4992858490311804e-05, "loss": 3.0281, "step": 3725 }, { "epoch": 0.3, "learning_rate": 4.4986121217021046e-05, "loss": 3.1682, "step": 3730 }, { "epoch": 0.3, "learning_rate": 4.4979383943730295e-05, "loss": 2.9703, "step": 3735 }, { "epoch": 0.3, "learning_rate": 4.4972646670439544e-05, "loss": 3.0364, "step": 3740 }, { "epoch": 0.3, "learning_rate": 4.4965909397148786e-05, "loss": 2.8145, "step": 3745 }, { "epoch": 0.3, "learning_rate": 4.4959172123858035e-05, "loss": 3.0781, "step": 3750 }, { "epoch": 0.3, "learning_rate": 4.4952434850567283e-05, "loss": 3.0378, "step": 3755 }, { "epoch": 0.3, "learning_rate": 4.4945697577276526e-05, "loss": 3.1284, "step": 3760 }, { "epoch": 0.3, "learning_rate": 4.4938960303985774e-05, "loss": 3.2207, "step": 3765 }, { "epoch": 0.3, "learning_rate": 4.4932223030695017e-05, "loss": 3.1658, "step": 3770 }, { "epoch": 0.31, "learning_rate": 4.4925485757404265e-05, "loss": 2.9948, "step": 3775 }, { "epoch": 0.31, "learning_rate": 4.491874848411351e-05, "loss": 3.1124, "step": 3780 }, { "epoch": 0.31, "learning_rate": 4.4912011210822756e-05, "loss": 2.9172, "step": 3785 }, { "epoch": 0.31, "learning_rate": 4.4905273937532005e-05, "loss": 3.22, "step": 3790 }, { "epoch": 0.31, "learning_rate": 4.4898536664241254e-05, "loss": 2.9467, "step": 3795 }, { "epoch": 0.31, "learning_rate": 4.4891799390950496e-05, "loss": 3.1856, "step": 3800 }, { "epoch": 0.31, "learning_rate": 4.4885062117659745e-05, "loss": 3.3563, "step": 3805 }, { "epoch": 0.31, "learning_rate": 4.4878324844368994e-05, "loss": 3.1518, "step": 3810 }, { "epoch": 0.31, "learning_rate": 4.4871587571078236e-05, "loss": 3.1046, "step": 3815 }, { "epoch": 0.31, "learning_rate": 4.486485029778748e-05, "loss": 3.1422, "step": 3820 }, { "epoch": 0.31, "learning_rate": 4.485811302449673e-05, "loss": 2.9912, "step": 3825 }, { "epoch": 0.31, "learning_rate": 4.4851375751205976e-05, "loss": 3.3937, "step": 3830 }, { "epoch": 0.31, "learning_rate": 4.484463847791522e-05, "loss": 2.772, "step": 3835 }, { "epoch": 0.31, "learning_rate": 4.483790120462447e-05, "loss": 3.1256, "step": 3840 }, { "epoch": 0.31, "learning_rate": 4.4831163931333716e-05, "loss": 3.2421, "step": 3845 }, { "epoch": 0.31, "learning_rate": 4.4824426658042965e-05, "loss": 2.8323, "step": 3850 }, { "epoch": 0.31, "learning_rate": 4.481768938475221e-05, "loss": 3.0066, "step": 3855 }, { "epoch": 0.31, "learning_rate": 4.481095211146145e-05, "loss": 3.2898, "step": 3860 }, { "epoch": 0.31, "learning_rate": 4.48042148381707e-05, "loss": 3.0331, "step": 3865 }, { "epoch": 0.31, "learning_rate": 4.479747756487994e-05, "loss": 3.0689, "step": 3870 }, { "epoch": 0.31, "learning_rate": 4.479074029158919e-05, "loss": 2.9034, "step": 3875 }, { "epoch": 0.31, "learning_rate": 4.478400301829844e-05, "loss": 2.8014, "step": 3880 }, { "epoch": 0.31, "learning_rate": 4.4777265745007686e-05, "loss": 2.984, "step": 3885 }, { "epoch": 0.31, "learning_rate": 4.477052847171693e-05, "loss": 3.0642, "step": 3890 }, { "epoch": 0.31, "learning_rate": 4.476379119842618e-05, "loss": 2.9044, "step": 3895 }, { "epoch": 0.32, "learning_rate": 4.475705392513542e-05, "loss": 3.2954, "step": 3900 }, { "epoch": 0.32, "learning_rate": 4.475031665184466e-05, "loss": 3.1445, "step": 3905 }, { "epoch": 0.32, "learning_rate": 4.474357937855391e-05, "loss": 3.3147, "step": 3910 }, { "epoch": 0.32, "learning_rate": 4.473684210526316e-05, "loss": 3.2616, "step": 3915 }, { "epoch": 0.32, "learning_rate": 4.473010483197241e-05, "loss": 3.3639, "step": 3920 }, { "epoch": 0.32, "learning_rate": 4.472336755868165e-05, "loss": 2.9422, "step": 3925 }, { "epoch": 0.32, "learning_rate": 4.47166302853909e-05, "loss": 3.193, "step": 3930 }, { "epoch": 0.32, "learning_rate": 4.470989301210015e-05, "loss": 3.0869, "step": 3935 }, { "epoch": 0.32, "learning_rate": 4.47031557388094e-05, "loss": 3.097, "step": 3940 }, { "epoch": 0.32, "learning_rate": 4.469641846551864e-05, "loss": 3.2279, "step": 3945 }, { "epoch": 0.32, "learning_rate": 4.468968119222788e-05, "loss": 3.4025, "step": 3950 }, { "epoch": 0.32, "learning_rate": 4.468294391893713e-05, "loss": 3.0516, "step": 3955 }, { "epoch": 0.32, "learning_rate": 4.467620664564637e-05, "loss": 3.0323, "step": 3960 }, { "epoch": 0.32, "learning_rate": 4.466946937235562e-05, "loss": 2.7716, "step": 3965 }, { "epoch": 0.32, "learning_rate": 4.466273209906487e-05, "loss": 3.1923, "step": 3970 }, { "epoch": 0.32, "learning_rate": 4.465599482577412e-05, "loss": 3.1534, "step": 3975 }, { "epoch": 0.32, "learning_rate": 4.464925755248336e-05, "loss": 2.8173, "step": 3980 }, { "epoch": 0.32, "learning_rate": 4.464252027919261e-05, "loss": 3.0176, "step": 3985 }, { "epoch": 0.32, "learning_rate": 4.463578300590185e-05, "loss": 3.2045, "step": 3990 }, { "epoch": 0.32, "learning_rate": 4.46290457326111e-05, "loss": 3.0714, "step": 3995 }, { "epoch": 0.32, "learning_rate": 4.462230845932034e-05, "loss": 3.0958, "step": 4000 }, { "epoch": 0.32, "eval_loss": 3.0753977298736572, "eval_rouge2_fmeasure": 0.0055, "eval_rouge2_precision": 0.0064, "eval_rouge2_recall": 0.0063, "eval_runtime": 2992.0589, "eval_samples_per_second": 0.092, "eval_steps_per_second": 0.046, "step": 4000 }, { "epoch": 0.32, "learning_rate": 4.461557118602959e-05, "loss": 2.9516, "step": 4005 }, { "epoch": 0.32, "learning_rate": 4.460883391273884e-05, "loss": 3.2431, "step": 4010 }, { "epoch": 0.32, "learning_rate": 4.460209663944808e-05, "loss": 3.0873, "step": 4015 }, { "epoch": 0.32, "learning_rate": 4.459535936615733e-05, "loss": 2.8684, "step": 4020 }, { "epoch": 0.33, "learning_rate": 4.458862209286658e-05, "loss": 3.1442, "step": 4025 }, { "epoch": 0.33, "learning_rate": 4.458188481957582e-05, "loss": 3.1723, "step": 4030 }, { "epoch": 0.33, "learning_rate": 4.4575147546285065e-05, "loss": 3.4121, "step": 4035 }, { "epoch": 0.33, "learning_rate": 4.4568410272994313e-05, "loss": 2.8923, "step": 4040 }, { "epoch": 0.33, "learning_rate": 4.456167299970356e-05, "loss": 3.3188, "step": 4045 }, { "epoch": 0.33, "learning_rate": 4.455493572641281e-05, "loss": 3.3509, "step": 4050 }, { "epoch": 0.33, "learning_rate": 4.454819845312205e-05, "loss": 2.9415, "step": 4055 }, { "epoch": 0.33, "learning_rate": 4.45414611798313e-05, "loss": 3.1391, "step": 4060 }, { "epoch": 0.33, "learning_rate": 4.453472390654055e-05, "loss": 3.2693, "step": 4065 }, { "epoch": 0.33, "learning_rate": 4.452798663324979e-05, "loss": 3.1826, "step": 4070 }, { "epoch": 0.33, "learning_rate": 4.452124935995904e-05, "loss": 2.9731, "step": 4075 }, { "epoch": 0.33, "learning_rate": 4.4514512086668284e-05, "loss": 3.0303, "step": 4080 }, { "epoch": 0.33, "learning_rate": 4.450777481337753e-05, "loss": 3.1995, "step": 4085 }, { "epoch": 0.33, "learning_rate": 4.4501037540086775e-05, "loss": 3.0824, "step": 4090 }, { "epoch": 0.33, "learning_rate": 4.4494300266796024e-05, "loss": 3.1416, "step": 4095 }, { "epoch": 0.33, "learning_rate": 4.448756299350527e-05, "loss": 3.0063, "step": 4100 }, { "epoch": 0.33, "learning_rate": 4.448082572021452e-05, "loss": 2.9765, "step": 4105 }, { "epoch": 0.33, "learning_rate": 4.4474088446923764e-05, "loss": 3.1823, "step": 4110 }, { "epoch": 0.33, "learning_rate": 4.446735117363301e-05, "loss": 3.255, "step": 4115 }, { "epoch": 0.33, "learning_rate": 4.4460613900342255e-05, "loss": 3.2149, "step": 4120 }, { "epoch": 0.33, "learning_rate": 4.44538766270515e-05, "loss": 3.0531, "step": 4125 }, { "epoch": 0.33, "learning_rate": 4.4447139353760746e-05, "loss": 3.0287, "step": 4130 }, { "epoch": 0.33, "learning_rate": 4.4440402080469995e-05, "loss": 2.8681, "step": 4135 }, { "epoch": 0.33, "learning_rate": 4.4433664807179243e-05, "loss": 3.2937, "step": 4140 }, { "epoch": 0.34, "learning_rate": 4.4426927533888486e-05, "loss": 3.1149, "step": 4145 }, { "epoch": 0.34, "learning_rate": 4.4420190260597734e-05, "loss": 3.1983, "step": 4150 }, { "epoch": 0.34, "learning_rate": 4.441345298730698e-05, "loss": 3.3325, "step": 4155 }, { "epoch": 0.34, "learning_rate": 4.4406715714016225e-05, "loss": 3.1252, "step": 4160 }, { "epoch": 0.34, "learning_rate": 4.439997844072547e-05, "loss": 3.2575, "step": 4165 }, { "epoch": 0.34, "learning_rate": 4.4393241167434716e-05, "loss": 3.1463, "step": 4170 }, { "epoch": 0.34, "learning_rate": 4.4386503894143965e-05, "loss": 3.002, "step": 4175 }, { "epoch": 0.34, "learning_rate": 4.437976662085321e-05, "loss": 3.3059, "step": 4180 }, { "epoch": 0.34, "learning_rate": 4.4373029347562456e-05, "loss": 3.2497, "step": 4185 }, { "epoch": 0.34, "learning_rate": 4.4366292074271705e-05, "loss": 2.9685, "step": 4190 }, { "epoch": 0.34, "learning_rate": 4.4359554800980954e-05, "loss": 2.9922, "step": 4195 }, { "epoch": 0.34, "learning_rate": 4.4352817527690196e-05, "loss": 3.261, "step": 4200 }, { "epoch": 0.34, "learning_rate": 4.434608025439944e-05, "loss": 3.0169, "step": 4205 }, { "epoch": 0.34, "learning_rate": 4.433934298110869e-05, "loss": 3.0149, "step": 4210 }, { "epoch": 0.34, "learning_rate": 4.433260570781793e-05, "loss": 3.0762, "step": 4215 }, { "epoch": 0.34, "learning_rate": 4.432586843452718e-05, "loss": 3.1659, "step": 4220 }, { "epoch": 0.34, "learning_rate": 4.431913116123643e-05, "loss": 3.1697, "step": 4225 }, { "epoch": 0.34, "learning_rate": 4.4312393887945676e-05, "loss": 3.2132, "step": 4230 }, { "epoch": 0.34, "learning_rate": 4.430565661465492e-05, "loss": 2.9254, "step": 4235 }, { "epoch": 0.34, "learning_rate": 4.429891934136417e-05, "loss": 3.1275, "step": 4240 }, { "epoch": 0.34, "learning_rate": 4.4292182068073416e-05, "loss": 3.298, "step": 4245 }, { "epoch": 0.34, "learning_rate": 4.428544479478266e-05, "loss": 2.9652, "step": 4250 }, { "epoch": 0.34, "learning_rate": 4.42787075214919e-05, "loss": 2.8456, "step": 4255 }, { "epoch": 0.34, "learning_rate": 4.427197024820115e-05, "loss": 2.9501, "step": 4260 }, { "epoch": 0.34, "learning_rate": 4.42652329749104e-05, "loss": 2.9993, "step": 4265 }, { "epoch": 0.35, "learning_rate": 4.425849570161964e-05, "loss": 3.1575, "step": 4270 }, { "epoch": 0.35, "learning_rate": 4.425175842832889e-05, "loss": 2.9604, "step": 4275 }, { "epoch": 0.35, "learning_rate": 4.424502115503814e-05, "loss": 3.1482, "step": 4280 }, { "epoch": 0.35, "learning_rate": 4.4238283881747386e-05, "loss": 3.2198, "step": 4285 }, { "epoch": 0.35, "learning_rate": 4.423154660845663e-05, "loss": 3.3901, "step": 4290 }, { "epoch": 0.35, "learning_rate": 4.422480933516587e-05, "loss": 3.1776, "step": 4295 }, { "epoch": 0.35, "learning_rate": 4.421807206187512e-05, "loss": 3.0075, "step": 4300 }, { "epoch": 0.35, "learning_rate": 4.421133478858437e-05, "loss": 2.9798, "step": 4305 }, { "epoch": 0.35, "learning_rate": 4.420459751529361e-05, "loss": 2.9517, "step": 4310 }, { "epoch": 0.35, "learning_rate": 4.419786024200286e-05, "loss": 3.1629, "step": 4315 }, { "epoch": 0.35, "learning_rate": 4.419112296871211e-05, "loss": 3.1076, "step": 4320 }, { "epoch": 0.35, "learning_rate": 4.418438569542135e-05, "loss": 2.9667, "step": 4325 }, { "epoch": 0.35, "learning_rate": 4.41776484221306e-05, "loss": 3.1091, "step": 4330 }, { "epoch": 0.35, "learning_rate": 4.417091114883984e-05, "loss": 2.9619, "step": 4335 }, { "epoch": 0.35, "learning_rate": 4.416417387554909e-05, "loss": 3.2804, "step": 4340 }, { "epoch": 0.35, "learning_rate": 4.415743660225833e-05, "loss": 2.9225, "step": 4345 }, { "epoch": 0.35, "learning_rate": 4.415069932896758e-05, "loss": 2.8438, "step": 4350 }, { "epoch": 0.35, "learning_rate": 4.414396205567683e-05, "loss": 3.2374, "step": 4355 }, { "epoch": 0.35, "learning_rate": 4.413722478238608e-05, "loss": 3.027, "step": 4360 }, { "epoch": 0.35, "learning_rate": 4.413048750909532e-05, "loss": 3.1018, "step": 4365 }, { "epoch": 0.35, "learning_rate": 4.412375023580457e-05, "loss": 3.0387, "step": 4370 }, { "epoch": 0.35, "learning_rate": 4.411701296251382e-05, "loss": 3.1002, "step": 4375 }, { "epoch": 0.35, "learning_rate": 4.411027568922306e-05, "loss": 3.0958, "step": 4380 }, { "epoch": 0.35, "learning_rate": 4.41035384159323e-05, "loss": 3.0154, "step": 4385 }, { "epoch": 0.35, "learning_rate": 4.409680114264155e-05, "loss": 2.8887, "step": 4390 }, { "epoch": 0.36, "learning_rate": 4.40900638693508e-05, "loss": 3.3108, "step": 4395 }, { "epoch": 0.36, "learning_rate": 4.408332659606004e-05, "loss": 3.1558, "step": 4400 }, { "epoch": 0.36, "learning_rate": 4.407658932276929e-05, "loss": 3.0019, "step": 4405 }, { "epoch": 0.36, "learning_rate": 4.406985204947854e-05, "loss": 3.1111, "step": 4410 }, { "epoch": 0.36, "learning_rate": 4.406311477618779e-05, "loss": 3.0919, "step": 4415 }, { "epoch": 0.36, "learning_rate": 4.405637750289703e-05, "loss": 3.1809, "step": 4420 }, { "epoch": 0.36, "learning_rate": 4.404964022960627e-05, "loss": 3.211, "step": 4425 }, { "epoch": 0.36, "learning_rate": 4.404290295631552e-05, "loss": 3.0161, "step": 4430 }, { "epoch": 0.36, "learning_rate": 4.4036165683024764e-05, "loss": 3.0918, "step": 4435 }, { "epoch": 0.36, "learning_rate": 4.402942840973401e-05, "loss": 3.1814, "step": 4440 }, { "epoch": 0.36, "learning_rate": 4.402269113644326e-05, "loss": 2.9785, "step": 4445 }, { "epoch": 0.36, "learning_rate": 4.401595386315251e-05, "loss": 3.1153, "step": 4450 }, { "epoch": 0.36, "learning_rate": 4.400921658986175e-05, "loss": 2.8357, "step": 4455 }, { "epoch": 0.36, "learning_rate": 4.4002479316571e-05, "loss": 3.0285, "step": 4460 }, { "epoch": 0.36, "learning_rate": 4.3995742043280244e-05, "loss": 3.2999, "step": 4465 }, { "epoch": 0.36, "learning_rate": 4.398900476998949e-05, "loss": 3.0133, "step": 4470 }, { "epoch": 0.36, "learning_rate": 4.3982267496698735e-05, "loss": 3.3555, "step": 4475 }, { "epoch": 0.36, "learning_rate": 4.3975530223407984e-05, "loss": 3.296, "step": 4480 }, { "epoch": 0.36, "learning_rate": 4.396879295011723e-05, "loss": 3.4159, "step": 4485 }, { "epoch": 0.36, "learning_rate": 4.3962055676826475e-05, "loss": 3.1615, "step": 4490 }, { "epoch": 0.36, "learning_rate": 4.3955318403535724e-05, "loss": 3.1965, "step": 4495 }, { "epoch": 0.36, "learning_rate": 4.394858113024497e-05, "loss": 3.0436, "step": 4500 }, { "epoch": 0.36, "learning_rate": 4.3941843856954215e-05, "loss": 3.1806, "step": 4505 }, { "epoch": 0.36, "learning_rate": 4.393510658366346e-05, "loss": 3.2201, "step": 4510 }, { "epoch": 0.37, "learning_rate": 4.3928369310372706e-05, "loss": 3.0641, "step": 4515 }, { "epoch": 0.37, "learning_rate": 4.3921632037081954e-05, "loss": 2.8647, "step": 4520 }, { "epoch": 0.37, "learning_rate": 4.3914894763791197e-05, "loss": 2.8806, "step": 4525 }, { "epoch": 0.37, "learning_rate": 4.3908157490500445e-05, "loss": 2.9292, "step": 4530 }, { "epoch": 0.37, "learning_rate": 4.3901420217209694e-05, "loss": 3.0328, "step": 4535 }, { "epoch": 0.37, "learning_rate": 4.389468294391894e-05, "loss": 3.1128, "step": 4540 }, { "epoch": 0.37, "learning_rate": 4.3887945670628185e-05, "loss": 3.0104, "step": 4545 }, { "epoch": 0.37, "learning_rate": 4.3881208397337434e-05, "loss": 2.7728, "step": 4550 }, { "epoch": 0.37, "learning_rate": 4.3874471124046676e-05, "loss": 3.2391, "step": 4555 }, { "epoch": 0.37, "learning_rate": 4.3867733850755925e-05, "loss": 2.8613, "step": 4560 }, { "epoch": 0.37, "learning_rate": 4.386099657746517e-05, "loss": 3.3067, "step": 4565 }, { "epoch": 0.37, "learning_rate": 4.3854259304174416e-05, "loss": 2.9091, "step": 4570 }, { "epoch": 0.37, "learning_rate": 4.3847522030883665e-05, "loss": 2.8817, "step": 4575 }, { "epoch": 0.37, "learning_rate": 4.384078475759291e-05, "loss": 2.9741, "step": 4580 }, { "epoch": 0.37, "learning_rate": 4.3834047484302156e-05, "loss": 3.0892, "step": 4585 }, { "epoch": 0.37, "learning_rate": 4.3827310211011405e-05, "loss": 3.2429, "step": 4590 }, { "epoch": 0.37, "learning_rate": 4.382057293772065e-05, "loss": 2.9408, "step": 4595 }, { "epoch": 0.37, "learning_rate": 4.381383566442989e-05, "loss": 3.0757, "step": 4600 }, { "epoch": 0.37, "learning_rate": 4.380709839113914e-05, "loss": 2.8738, "step": 4605 }, { "epoch": 0.37, "learning_rate": 4.380036111784839e-05, "loss": 3.2346, "step": 4610 }, { "epoch": 0.37, "learning_rate": 4.3793623844557636e-05, "loss": 2.9399, "step": 4615 }, { "epoch": 0.37, "learning_rate": 4.378688657126688e-05, "loss": 3.195, "step": 4620 }, { "epoch": 0.37, "learning_rate": 4.3780149297976127e-05, "loss": 2.7772, "step": 4625 }, { "epoch": 0.37, "learning_rate": 4.3773412024685375e-05, "loss": 3.4355, "step": 4630 }, { "epoch": 0.37, "learning_rate": 4.376667475139462e-05, "loss": 3.2062, "step": 4635 }, { "epoch": 0.38, "learning_rate": 4.375993747810386e-05, "loss": 3.1914, "step": 4640 }, { "epoch": 0.38, "learning_rate": 4.375320020481311e-05, "loss": 3.1476, "step": 4645 }, { "epoch": 0.38, "learning_rate": 4.374646293152236e-05, "loss": 3.0083, "step": 4650 }, { "epoch": 0.38, "learning_rate": 4.37397256582316e-05, "loss": 3.2261, "step": 4655 }, { "epoch": 0.38, "learning_rate": 4.373298838494085e-05, "loss": 3.2371, "step": 4660 }, { "epoch": 0.38, "learning_rate": 4.37262511116501e-05, "loss": 2.9125, "step": 4665 }, { "epoch": 0.38, "learning_rate": 4.3719513838359346e-05, "loss": 3.3789, "step": 4670 }, { "epoch": 0.38, "learning_rate": 4.371277656506859e-05, "loss": 3.1501, "step": 4675 }, { "epoch": 0.38, "learning_rate": 4.370603929177784e-05, "loss": 3.1306, "step": 4680 }, { "epoch": 0.38, "learning_rate": 4.369930201848708e-05, "loss": 3.0257, "step": 4685 }, { "epoch": 0.38, "learning_rate": 4.369256474519632e-05, "loss": 3.1348, "step": 4690 }, { "epoch": 0.38, "learning_rate": 4.368582747190557e-05, "loss": 3.1772, "step": 4695 }, { "epoch": 0.38, "learning_rate": 4.367909019861482e-05, "loss": 2.9573, "step": 4700 }, { "epoch": 0.38, "learning_rate": 4.367235292532407e-05, "loss": 3.0109, "step": 4705 }, { "epoch": 0.38, "learning_rate": 4.366561565203331e-05, "loss": 3.1603, "step": 4710 }, { "epoch": 0.38, "learning_rate": 4.365887837874256e-05, "loss": 3.0631, "step": 4715 }, { "epoch": 0.38, "learning_rate": 4.365214110545181e-05, "loss": 3.2663, "step": 4720 }, { "epoch": 0.38, "learning_rate": 4.364540383216105e-05, "loss": 3.216, "step": 4725 }, { "epoch": 0.38, "learning_rate": 4.363866655887029e-05, "loss": 3.1248, "step": 4730 }, { "epoch": 0.38, "learning_rate": 4.363192928557954e-05, "loss": 3.0795, "step": 4735 }, { "epoch": 0.38, "learning_rate": 4.362519201228879e-05, "loss": 3.2579, "step": 4740 }, { "epoch": 0.38, "learning_rate": 4.361845473899803e-05, "loss": 2.9879, "step": 4745 }, { "epoch": 0.38, "learning_rate": 4.361171746570728e-05, "loss": 3.0311, "step": 4750 }, { "epoch": 0.38, "learning_rate": 4.360498019241653e-05, "loss": 2.9413, "step": 4755 }, { "epoch": 0.38, "learning_rate": 4.359824291912578e-05, "loss": 2.9917, "step": 4760 }, { "epoch": 0.39, "learning_rate": 4.359150564583502e-05, "loss": 2.9656, "step": 4765 }, { "epoch": 0.39, "learning_rate": 4.358476837254426e-05, "loss": 2.833, "step": 4770 }, { "epoch": 0.39, "learning_rate": 4.357803109925351e-05, "loss": 3.1525, "step": 4775 }, { "epoch": 0.39, "learning_rate": 4.357129382596276e-05, "loss": 3.1786, "step": 4780 }, { "epoch": 0.39, "learning_rate": 4.3564556552672e-05, "loss": 3.0506, "step": 4785 }, { "epoch": 0.39, "learning_rate": 4.355781927938125e-05, "loss": 3.3234, "step": 4790 }, { "epoch": 0.39, "learning_rate": 4.35510820060905e-05, "loss": 3.3562, "step": 4795 }, { "epoch": 0.39, "learning_rate": 4.354434473279974e-05, "loss": 3.1701, "step": 4800 }, { "epoch": 0.39, "learning_rate": 4.353760745950899e-05, "loss": 2.8417, "step": 4805 }, { "epoch": 0.39, "learning_rate": 4.353087018621823e-05, "loss": 3.1097, "step": 4810 }, { "epoch": 0.39, "learning_rate": 4.352413291292748e-05, "loss": 3.3805, "step": 4815 }, { "epoch": 0.39, "learning_rate": 4.3517395639636724e-05, "loss": 2.9212, "step": 4820 }, { "epoch": 0.39, "learning_rate": 4.351065836634597e-05, "loss": 2.9774, "step": 4825 }, { "epoch": 0.39, "learning_rate": 4.350392109305522e-05, "loss": 3.2141, "step": 4830 }, { "epoch": 0.39, "learning_rate": 4.3497183819764464e-05, "loss": 3.2205, "step": 4835 }, { "epoch": 0.39, "learning_rate": 4.349044654647371e-05, "loss": 3.0818, "step": 4840 }, { "epoch": 0.39, "learning_rate": 4.348370927318296e-05, "loss": 3.0723, "step": 4845 }, { "epoch": 0.39, "learning_rate": 4.347697199989221e-05, "loss": 3.3671, "step": 4850 }, { "epoch": 0.39, "learning_rate": 4.347023472660145e-05, "loss": 3.2076, "step": 4855 }, { "epoch": 0.39, "learning_rate": 4.3463497453310695e-05, "loss": 3.2495, "step": 4860 }, { "epoch": 0.39, "learning_rate": 4.3456760180019944e-05, "loss": 2.7959, "step": 4865 }, { "epoch": 0.39, "learning_rate": 4.345002290672919e-05, "loss": 3.1347, "step": 4870 }, { "epoch": 0.39, "learning_rate": 4.3443285633438435e-05, "loss": 2.9321, "step": 4875 }, { "epoch": 0.39, "learning_rate": 4.3436548360147684e-05, "loss": 2.9697, "step": 4880 }, { "epoch": 0.39, "learning_rate": 4.342981108685693e-05, "loss": 3.0605, "step": 4885 }, { "epoch": 0.4, "learning_rate": 4.3423073813566175e-05, "loss": 3.1212, "step": 4890 }, { "epoch": 0.4, "learning_rate": 4.3416336540275423e-05, "loss": 2.9201, "step": 4895 }, { "epoch": 0.4, "learning_rate": 4.3409599266984666e-05, "loss": 2.9829, "step": 4900 }, { "epoch": 0.4, "learning_rate": 4.3402861993693914e-05, "loss": 3.3439, "step": 4905 }, { "epoch": 0.4, "learning_rate": 4.3396124720403157e-05, "loss": 3.0339, "step": 4910 }, { "epoch": 0.4, "learning_rate": 4.3389387447112405e-05, "loss": 3.0685, "step": 4915 }, { "epoch": 0.4, "learning_rate": 4.3382650173821654e-05, "loss": 3.0576, "step": 4920 }, { "epoch": 0.4, "learning_rate": 4.33759129005309e-05, "loss": 3.0614, "step": 4925 }, { "epoch": 0.4, "learning_rate": 4.3369175627240145e-05, "loss": 3.1312, "step": 4930 }, { "epoch": 0.4, "learning_rate": 4.3362438353949394e-05, "loss": 3.1098, "step": 4935 }, { "epoch": 0.4, "learning_rate": 4.3355701080658636e-05, "loss": 3.2032, "step": 4940 }, { "epoch": 0.4, "learning_rate": 4.334896380736788e-05, "loss": 3.0584, "step": 4945 }, { "epoch": 0.4, "learning_rate": 4.334222653407713e-05, "loss": 2.87, "step": 4950 }, { "epoch": 0.4, "learning_rate": 4.3335489260786376e-05, "loss": 2.872, "step": 4955 }, { "epoch": 0.4, "learning_rate": 4.3328751987495625e-05, "loss": 3.0233, "step": 4960 }, { "epoch": 0.4, "learning_rate": 4.332201471420487e-05, "loss": 3.1497, "step": 4965 }, { "epoch": 0.4, "learning_rate": 4.3315277440914116e-05, "loss": 3.3478, "step": 4970 }, { "epoch": 0.4, "learning_rate": 4.3308540167623365e-05, "loss": 2.6513, "step": 4975 }, { "epoch": 0.4, "learning_rate": 4.3301802894332614e-05, "loss": 3.1054, "step": 4980 }, { "epoch": 0.4, "learning_rate": 4.3295065621041856e-05, "loss": 3.1679, "step": 4985 }, { "epoch": 0.4, "learning_rate": 4.32883283477511e-05, "loss": 2.9424, "step": 4990 }, { "epoch": 0.4, "learning_rate": 4.328159107446035e-05, "loss": 2.9678, "step": 4995 }, { "epoch": 0.4, "learning_rate": 4.327485380116959e-05, "loss": 2.8093, "step": 5000 }, { "epoch": 0.4, "learning_rate": 4.326811652787884e-05, "loss": 3.1868, "step": 5005 }, { "epoch": 0.41, "learning_rate": 4.3261379254588087e-05, "loss": 3.1751, "step": 5010 }, { "epoch": 0.41, "learning_rate": 4.3254641981297335e-05, "loss": 3.1772, "step": 5015 }, { "epoch": 0.41, "learning_rate": 4.324790470800658e-05, "loss": 2.9964, "step": 5020 }, { "epoch": 0.41, "learning_rate": 4.3241167434715826e-05, "loss": 2.8733, "step": 5025 }, { "epoch": 0.41, "learning_rate": 4.323443016142507e-05, "loss": 2.9992, "step": 5030 }, { "epoch": 0.41, "learning_rate": 4.322769288813432e-05, "loss": 3.0003, "step": 5035 }, { "epoch": 0.41, "learning_rate": 4.322095561484356e-05, "loss": 3.0774, "step": 5040 }, { "epoch": 0.41, "learning_rate": 4.321421834155281e-05, "loss": 2.9182, "step": 5045 }, { "epoch": 0.41, "learning_rate": 4.320748106826206e-05, "loss": 3.325, "step": 5050 }, { "epoch": 0.41, "learning_rate": 4.32007437949713e-05, "loss": 2.7606, "step": 5055 }, { "epoch": 0.41, "learning_rate": 4.319400652168055e-05, "loss": 2.9947, "step": 5060 }, { "epoch": 0.41, "learning_rate": 4.31872692483898e-05, "loss": 3.024, "step": 5065 }, { "epoch": 0.41, "learning_rate": 4.318053197509904e-05, "loss": 3.0752, "step": 5070 }, { "epoch": 0.41, "learning_rate": 4.317379470180828e-05, "loss": 3.1484, "step": 5075 }, { "epoch": 0.41, "learning_rate": 4.316705742851753e-05, "loss": 3.0559, "step": 5080 }, { "epoch": 0.41, "learning_rate": 4.316032015522678e-05, "loss": 3.1059, "step": 5085 }, { "epoch": 0.41, "learning_rate": 4.315358288193602e-05, "loss": 3.062, "step": 5090 }, { "epoch": 0.41, "learning_rate": 4.314684560864527e-05, "loss": 2.8981, "step": 5095 }, { "epoch": 0.41, "learning_rate": 4.314010833535452e-05, "loss": 3.0228, "step": 5100 }, { "epoch": 0.41, "learning_rate": 4.313337106206377e-05, "loss": 3.1474, "step": 5105 }, { "epoch": 0.41, "learning_rate": 4.312663378877301e-05, "loss": 3.053, "step": 5110 }, { "epoch": 0.41, "learning_rate": 4.311989651548226e-05, "loss": 2.8585, "step": 5115 }, { "epoch": 0.41, "learning_rate": 4.31131592421915e-05, "loss": 2.927, "step": 5120 }, { "epoch": 0.41, "learning_rate": 4.310642196890075e-05, "loss": 2.9696, "step": 5125 }, { "epoch": 0.41, "learning_rate": 4.309968469560999e-05, "loss": 3.0903, "step": 5130 }, { "epoch": 0.42, "learning_rate": 4.309294742231924e-05, "loss": 3.1462, "step": 5135 }, { "epoch": 0.42, "learning_rate": 4.308621014902849e-05, "loss": 3.0195, "step": 5140 }, { "epoch": 0.42, "learning_rate": 4.307947287573773e-05, "loss": 2.931, "step": 5145 }, { "epoch": 0.42, "learning_rate": 4.307273560244698e-05, "loss": 3.1945, "step": 5150 }, { "epoch": 0.42, "learning_rate": 4.306599832915623e-05, "loss": 2.9829, "step": 5155 }, { "epoch": 0.42, "learning_rate": 4.305926105586547e-05, "loss": 3.0799, "step": 5160 }, { "epoch": 0.42, "learning_rate": 4.3052523782574714e-05, "loss": 2.9349, "step": 5165 }, { "epoch": 0.42, "learning_rate": 4.304578650928396e-05, "loss": 3.1081, "step": 5170 }, { "epoch": 0.42, "learning_rate": 4.303904923599321e-05, "loss": 2.9171, "step": 5175 }, { "epoch": 0.42, "learning_rate": 4.303231196270246e-05, "loss": 3.1681, "step": 5180 }, { "epoch": 0.42, "learning_rate": 4.30255746894117e-05, "loss": 3.1256, "step": 5185 }, { "epoch": 0.42, "learning_rate": 4.301883741612095e-05, "loss": 3.0517, "step": 5190 }, { "epoch": 0.42, "learning_rate": 4.30121001428302e-05, "loss": 3.1481, "step": 5195 }, { "epoch": 0.42, "learning_rate": 4.300536286953944e-05, "loss": 3.1047, "step": 5200 }, { "epoch": 0.42, "learning_rate": 4.2998625596248684e-05, "loss": 3.0044, "step": 5205 }, { "epoch": 0.42, "learning_rate": 4.299188832295793e-05, "loss": 3.2102, "step": 5210 }, { "epoch": 0.42, "learning_rate": 4.298515104966718e-05, "loss": 3.383, "step": 5215 }, { "epoch": 0.42, "learning_rate": 4.2978413776376424e-05, "loss": 3.2182, "step": 5220 }, { "epoch": 0.42, "learning_rate": 4.297167650308567e-05, "loss": 3.0297, "step": 5225 }, { "epoch": 0.42, "learning_rate": 4.296493922979492e-05, "loss": 3.2297, "step": 5230 }, { "epoch": 0.42, "learning_rate": 4.295820195650417e-05, "loss": 3.2441, "step": 5235 }, { "epoch": 0.42, "learning_rate": 4.295146468321341e-05, "loss": 3.0492, "step": 5240 }, { "epoch": 0.42, "learning_rate": 4.2944727409922655e-05, "loss": 2.9498, "step": 5245 }, { "epoch": 0.42, "learning_rate": 4.2937990136631904e-05, "loss": 2.8979, "step": 5250 }, { "epoch": 0.42, "learning_rate": 4.2931252863341146e-05, "loss": 3.0763, "step": 5255 }, { "epoch": 0.43, "learning_rate": 4.2924515590050395e-05, "loss": 3.071, "step": 5260 }, { "epoch": 0.43, "learning_rate": 4.2917778316759644e-05, "loss": 3.1906, "step": 5265 }, { "epoch": 0.43, "learning_rate": 4.291238849812704e-05, "loss": 3.9365, "step": 5270 }, { "epoch": 0.43, "learning_rate": 4.290565122483629e-05, "loss": 3.0687, "step": 5275 }, { "epoch": 0.43, "learning_rate": 4.2898913951545536e-05, "loss": 2.8605, "step": 5280 }, { "epoch": 0.43, "learning_rate": 4.289217667825478e-05, "loss": 3.0541, "step": 5285 }, { "epoch": 0.43, "learning_rate": 4.288543940496402e-05, "loss": 3.1296, "step": 5290 }, { "epoch": 0.43, "learning_rate": 4.287870213167327e-05, "loss": 3.1929, "step": 5295 }, { "epoch": 0.43, "learning_rate": 4.287196485838252e-05, "loss": 3.0888, "step": 5300 }, { "epoch": 0.43, "learning_rate": 4.286522758509177e-05, "loss": 3.1341, "step": 5305 }, { "epoch": 0.43, "learning_rate": 4.285849031180101e-05, "loss": 3.2743, "step": 5310 }, { "epoch": 0.43, "learning_rate": 4.285175303851026e-05, "loss": 2.9379, "step": 5315 }, { "epoch": 0.43, "learning_rate": 4.284501576521951e-05, "loss": 2.93, "step": 5320 }, { "epoch": 0.43, "learning_rate": 4.283827849192875e-05, "loss": 2.9956, "step": 5325 }, { "epoch": 0.43, "learning_rate": 4.283154121863799e-05, "loss": 2.9685, "step": 5330 }, { "epoch": 0.43, "learning_rate": 4.282480394534724e-05, "loss": 3.1619, "step": 5335 }, { "epoch": 0.43, "learning_rate": 4.281806667205649e-05, "loss": 3.085, "step": 5340 }, { "epoch": 0.43, "learning_rate": 4.281132939876573e-05, "loss": 2.9773, "step": 5345 }, { "epoch": 0.43, "learning_rate": 4.280459212547498e-05, "loss": 3.2456, "step": 5350 }, { "epoch": 0.43, "learning_rate": 4.279785485218423e-05, "loss": 2.9965, "step": 5355 }, { "epoch": 0.43, "learning_rate": 4.279111757889348e-05, "loss": 3.0508, "step": 5360 }, { "epoch": 0.43, "learning_rate": 4.278438030560272e-05, "loss": 3.123, "step": 5365 }, { "epoch": 0.43, "learning_rate": 4.277764303231196e-05, "loss": 3.0023, "step": 5370 }, { "epoch": 0.43, "learning_rate": 4.277090575902121e-05, "loss": 3.3, "step": 5375 }, { "epoch": 0.43, "learning_rate": 4.276416848573045e-05, "loss": 3.139, "step": 5380 }, { "epoch": 0.44, "learning_rate": 4.27574312124397e-05, "loss": 3.1068, "step": 5385 }, { "epoch": 0.44, "learning_rate": 4.275069393914895e-05, "loss": 3.07, "step": 5390 }, { "epoch": 0.44, "learning_rate": 4.27439566658582e-05, "loss": 3.1072, "step": 5395 }, { "epoch": 0.44, "learning_rate": 4.273721939256744e-05, "loss": 3.0112, "step": 5400 }, { "epoch": 0.44, "learning_rate": 4.273048211927669e-05, "loss": 3.1176, "step": 5405 }, { "epoch": 0.44, "learning_rate": 4.272374484598593e-05, "loss": 2.7726, "step": 5410 }, { "epoch": 0.44, "learning_rate": 4.271700757269518e-05, "loss": 2.8491, "step": 5415 }, { "epoch": 0.44, "learning_rate": 4.2710270299404423e-05, "loss": 2.8852, "step": 5420 }, { "epoch": 0.44, "learning_rate": 4.270353302611367e-05, "loss": 2.7856, "step": 5425 }, { "epoch": 0.44, "learning_rate": 4.269679575282292e-05, "loss": 3.1395, "step": 5430 }, { "epoch": 0.44, "learning_rate": 4.269005847953216e-05, "loss": 3.2047, "step": 5435 }, { "epoch": 0.44, "learning_rate": 4.268332120624141e-05, "loss": 3.033, "step": 5440 }, { "epoch": 0.44, "learning_rate": 4.267658393295066e-05, "loss": 2.987, "step": 5445 }, { "epoch": 0.44, "learning_rate": 4.266984665965991e-05, "loss": 3.0016, "step": 5450 }, { "epoch": 0.44, "learning_rate": 4.266310938636915e-05, "loss": 3.1097, "step": 5455 }, { "epoch": 0.44, "learning_rate": 4.2656372113078394e-05, "loss": 3.0633, "step": 5460 }, { "epoch": 0.44, "learning_rate": 4.264963483978764e-05, "loss": 2.8894, "step": 5465 }, { "epoch": 0.44, "learning_rate": 4.264289756649689e-05, "loss": 3.026, "step": 5470 }, { "epoch": 0.44, "learning_rate": 4.2636160293206134e-05, "loss": 2.9712, "step": 5475 }, { "epoch": 0.44, "learning_rate": 4.262942301991538e-05, "loss": 3.2726, "step": 5480 }, { "epoch": 0.44, "learning_rate": 4.262268574662463e-05, "loss": 3.1535, "step": 5485 }, { "epoch": 0.44, "learning_rate": 4.2615948473333874e-05, "loss": 3.0379, "step": 5490 }, { "epoch": 0.44, "learning_rate": 4.260921120004312e-05, "loss": 3.3582, "step": 5495 }, { "epoch": 0.44, "learning_rate": 4.2602473926752365e-05, "loss": 2.9169, "step": 5500 }, { "epoch": 0.45, "learning_rate": 4.2595736653461614e-05, "loss": 3.0502, "step": 5505 }, { "epoch": 0.45, "learning_rate": 4.2588999380170856e-05, "loss": 3.2374, "step": 5510 }, { "epoch": 0.45, "learning_rate": 4.2582262106880105e-05, "loss": 3.0465, "step": 5515 }, { "epoch": 0.45, "learning_rate": 4.2575524833589353e-05, "loss": 3.4409, "step": 5520 }, { "epoch": 0.45, "learning_rate": 4.2568787560298596e-05, "loss": 2.8746, "step": 5525 }, { "epoch": 0.45, "learning_rate": 4.2562050287007844e-05, "loss": 2.9985, "step": 5530 }, { "epoch": 0.45, "learning_rate": 4.255531301371709e-05, "loss": 3.2513, "step": 5535 }, { "epoch": 0.45, "learning_rate": 4.2548575740426335e-05, "loss": 3.142, "step": 5540 }, { "epoch": 0.45, "learning_rate": 4.254183846713558e-05, "loss": 2.9062, "step": 5545 }, { "epoch": 0.45, "learning_rate": 4.2535101193844826e-05, "loss": 2.9976, "step": 5550 }, { "epoch": 0.45, "learning_rate": 4.2528363920554075e-05, "loss": 3.2293, "step": 5555 }, { "epoch": 0.45, "learning_rate": 4.2521626647263324e-05, "loss": 2.7799, "step": 5560 }, { "epoch": 0.45, "learning_rate": 4.2514889373972566e-05, "loss": 2.9451, "step": 5565 }, { "epoch": 0.45, "learning_rate": 4.2508152100681815e-05, "loss": 2.9368, "step": 5570 }, { "epoch": 0.45, "learning_rate": 4.2501414827391064e-05, "loss": 2.7972, "step": 5575 }, { "epoch": 0.45, "learning_rate": 4.2494677554100306e-05, "loss": 2.9851, "step": 5580 }, { "epoch": 0.45, "learning_rate": 4.2487940280809555e-05, "loss": 2.9721, "step": 5585 }, { "epoch": 0.45, "learning_rate": 4.24812030075188e-05, "loss": 3.0178, "step": 5590 }, { "epoch": 0.45, "learning_rate": 4.2474465734228046e-05, "loss": 3.0758, "step": 5595 }, { "epoch": 0.45, "learning_rate": 4.246772846093729e-05, "loss": 3.1676, "step": 5600 }, { "epoch": 0.45, "learning_rate": 4.246099118764654e-05, "loss": 3.232, "step": 5605 }, { "epoch": 0.45, "learning_rate": 4.2454253914355786e-05, "loss": 3.1686, "step": 5610 }, { "epoch": 0.45, "learning_rate": 4.2447516641065035e-05, "loss": 3.1155, "step": 5615 }, { "epoch": 0.45, "learning_rate": 4.244077936777428e-05, "loss": 3.328, "step": 5620 }, { "epoch": 0.45, "learning_rate": 4.2434042094483526e-05, "loss": 2.904, "step": 5625 }, { "epoch": 0.46, "learning_rate": 4.242730482119277e-05, "loss": 2.9753, "step": 5630 }, { "epoch": 0.46, "learning_rate": 4.242056754790201e-05, "loss": 2.9243, "step": 5635 }, { "epoch": 0.46, "learning_rate": 4.241383027461126e-05, "loss": 3.1304, "step": 5640 }, { "epoch": 0.46, "learning_rate": 4.240709300132051e-05, "loss": 3.0722, "step": 5645 }, { "epoch": 0.46, "learning_rate": 4.2400355728029756e-05, "loss": 3.0274, "step": 5650 }, { "epoch": 0.46, "learning_rate": 4.2393618454739e-05, "loss": 2.9222, "step": 5655 }, { "epoch": 0.46, "learning_rate": 4.238688118144825e-05, "loss": 3.0571, "step": 5660 }, { "epoch": 0.46, "learning_rate": 4.2380143908157496e-05, "loss": 2.8809, "step": 5665 }, { "epoch": 0.46, "learning_rate": 4.237340663486674e-05, "loss": 3.025, "step": 5670 }, { "epoch": 0.46, "learning_rate": 4.236666936157598e-05, "loss": 3.1147, "step": 5675 }, { "epoch": 0.46, "learning_rate": 4.235993208828523e-05, "loss": 3.1962, "step": 5680 }, { "epoch": 0.46, "learning_rate": 4.235319481499448e-05, "loss": 3.2954, "step": 5685 }, { "epoch": 0.46, "learning_rate": 4.234645754170372e-05, "loss": 3.0492, "step": 5690 }, { "epoch": 0.46, "learning_rate": 4.233972026841297e-05, "loss": 2.9564, "step": 5695 }, { "epoch": 0.46, "learning_rate": 4.233298299512222e-05, "loss": 2.8588, "step": 5700 }, { "epoch": 0.46, "learning_rate": 4.232624572183147e-05, "loss": 2.9818, "step": 5705 }, { "epoch": 0.46, "learning_rate": 4.231950844854071e-05, "loss": 3.3102, "step": 5710 }, { "epoch": 0.46, "learning_rate": 4.231277117524995e-05, "loss": 3.0969, "step": 5715 }, { "epoch": 0.46, "learning_rate": 4.23060339019592e-05, "loss": 3.1605, "step": 5720 }, { "epoch": 0.46, "learning_rate": 4.229929662866845e-05, "loss": 3.1074, "step": 5725 }, { "epoch": 0.46, "learning_rate": 4.229255935537769e-05, "loss": 3.5156, "step": 5730 }, { "epoch": 0.46, "learning_rate": 4.228582208208694e-05, "loss": 3.0901, "step": 5735 }, { "epoch": 0.46, "learning_rate": 4.227908480879619e-05, "loss": 3.1218, "step": 5740 }, { "epoch": 0.46, "learning_rate": 4.227234753550543e-05, "loss": 2.7888, "step": 5745 }, { "epoch": 0.46, "learning_rate": 4.226561026221468e-05, "loss": 2.8804, "step": 5750 }, { "epoch": 0.47, "learning_rate": 4.225887298892393e-05, "loss": 3.1825, "step": 5755 }, { "epoch": 0.47, "learning_rate": 4.225213571563317e-05, "loss": 3.0061, "step": 5760 }, { "epoch": 0.47, "learning_rate": 4.224539844234241e-05, "loss": 3.0918, "step": 5765 }, { "epoch": 0.47, "learning_rate": 4.223866116905166e-05, "loss": 2.8022, "step": 5770 }, { "epoch": 0.47, "learning_rate": 4.223192389576091e-05, "loss": 3.1484, "step": 5775 }, { "epoch": 0.47, "learning_rate": 4.222518662247016e-05, "loss": 2.9748, "step": 5780 }, { "epoch": 0.47, "learning_rate": 4.22184493491794e-05, "loss": 3.053, "step": 5785 }, { "epoch": 0.47, "learning_rate": 4.221171207588865e-05, "loss": 3.1931, "step": 5790 }, { "epoch": 0.47, "learning_rate": 4.22049748025979e-05, "loss": 3.5065, "step": 5795 }, { "epoch": 0.47, "learning_rate": 4.219823752930714e-05, "loss": 2.9901, "step": 5800 }, { "epoch": 0.47, "learning_rate": 4.219150025601638e-05, "loss": 3.3565, "step": 5805 }, { "epoch": 0.47, "learning_rate": 4.218476298272563e-05, "loss": 2.9501, "step": 5810 }, { "epoch": 0.47, "learning_rate": 4.217802570943488e-05, "loss": 2.9877, "step": 5815 }, { "epoch": 0.47, "learning_rate": 4.217128843614412e-05, "loss": 3.5543, "step": 5820 }, { "epoch": 0.47, "learning_rate": 4.216455116285337e-05, "loss": 3.1097, "step": 5825 }, { "epoch": 0.47, "learning_rate": 4.215781388956262e-05, "loss": 2.9987, "step": 5830 }, { "epoch": 0.47, "learning_rate": 4.215107661627186e-05, "loss": 3.0965, "step": 5835 }, { "epoch": 0.47, "learning_rate": 4.214433934298111e-05, "loss": 3.0131, "step": 5840 }, { "epoch": 0.47, "learning_rate": 4.2137602069690354e-05, "loss": 3.4088, "step": 5845 }, { "epoch": 0.47, "learning_rate": 4.21308647963996e-05, "loss": 2.932, "step": 5850 }, { "epoch": 0.47, "learning_rate": 4.2124127523108845e-05, "loss": 3.1906, "step": 5855 }, { "epoch": 0.47, "learning_rate": 4.2117390249818094e-05, "loss": 3.3301, "step": 5860 }, { "epoch": 0.47, "learning_rate": 4.211065297652734e-05, "loss": 3.0456, "step": 5865 }, { "epoch": 0.47, "learning_rate": 4.210391570323659e-05, "loss": 2.8092, "step": 5870 }, { "epoch": 0.47, "learning_rate": 4.2097178429945834e-05, "loss": 3.1395, "step": 5875 }, { "epoch": 0.48, "learning_rate": 4.209044115665508e-05, "loss": 2.8979, "step": 5880 }, { "epoch": 0.48, "learning_rate": 4.208370388336433e-05, "loss": 3.1059, "step": 5885 }, { "epoch": 0.48, "learning_rate": 4.2076966610073574e-05, "loss": 3.2633, "step": 5890 }, { "epoch": 0.48, "learning_rate": 4.2070229336782816e-05, "loss": 2.9585, "step": 5895 }, { "epoch": 0.48, "learning_rate": 4.2063492063492065e-05, "loss": 3.2317, "step": 5900 }, { "epoch": 0.48, "learning_rate": 4.2056754790201313e-05, "loss": 2.9057, "step": 5905 }, { "epoch": 0.48, "learning_rate": 4.2050017516910556e-05, "loss": 2.8944, "step": 5910 }, { "epoch": 0.48, "learning_rate": 4.2043280243619804e-05, "loss": 3.0617, "step": 5915 }, { "epoch": 0.48, "learning_rate": 4.203654297032905e-05, "loss": 3.1542, "step": 5920 }, { "epoch": 0.48, "learning_rate": 4.20298056970383e-05, "loss": 3.0791, "step": 5925 }, { "epoch": 0.48, "learning_rate": 4.2023068423747544e-05, "loss": 3.2551, "step": 5930 }, { "epoch": 0.48, "learning_rate": 4.2016331150456786e-05, "loss": 3.2556, "step": 5935 }, { "epoch": 0.48, "learning_rate": 4.2009593877166035e-05, "loss": 2.9848, "step": 5940 }, { "epoch": 0.48, "learning_rate": 4.200285660387528e-05, "loss": 3.2682, "step": 5945 }, { "epoch": 0.48, "learning_rate": 4.1996119330584526e-05, "loss": 2.9622, "step": 5950 }, { "epoch": 0.48, "learning_rate": 4.1989382057293775e-05, "loss": 3.4463, "step": 5955 }, { "epoch": 0.48, "learning_rate": 4.1982644784003024e-05, "loss": 3.197, "step": 5960 }, { "epoch": 0.48, "learning_rate": 4.1975907510712266e-05, "loss": 2.9737, "step": 5965 }, { "epoch": 0.48, "learning_rate": 4.1969170237421515e-05, "loss": 3.1124, "step": 5970 }, { "epoch": 0.48, "learning_rate": 4.196243296413076e-05, "loss": 3.067, "step": 5975 }, { "epoch": 0.48, "learning_rate": 4.1955695690840006e-05, "loss": 3.224, "step": 5980 }, { "epoch": 0.48, "learning_rate": 4.194895841754925e-05, "loss": 3.2847, "step": 5985 }, { "epoch": 0.48, "learning_rate": 4.19422211442585e-05, "loss": 3.0231, "step": 5990 }, { "epoch": 0.48, "learning_rate": 4.1935483870967746e-05, "loss": 2.8844, "step": 5995 }, { "epoch": 0.49, "learning_rate": 4.192874659767699e-05, "loss": 3.0196, "step": 6000 }, { "epoch": 0.49, "learning_rate": 4.192200932438624e-05, "loss": 2.9583, "step": 6005 }, { "epoch": 0.49, "learning_rate": 4.1915272051095486e-05, "loss": 3.0375, "step": 6010 }, { "epoch": 0.49, "learning_rate": 4.1908534777804734e-05, "loss": 2.9743, "step": 6015 }, { "epoch": 0.49, "learning_rate": 4.1901797504513977e-05, "loss": 2.8241, "step": 6020 }, { "epoch": 0.49, "learning_rate": 4.189506023122322e-05, "loss": 2.836, "step": 6025 }, { "epoch": 0.49, "learning_rate": 4.188832295793247e-05, "loss": 3.1118, "step": 6030 }, { "epoch": 0.49, "learning_rate": 4.1881585684641716e-05, "loss": 2.8837, "step": 6035 }, { "epoch": 0.49, "learning_rate": 4.187484841135096e-05, "loss": 3.093, "step": 6040 }, { "epoch": 0.49, "learning_rate": 4.186811113806021e-05, "loss": 2.9789, "step": 6045 }, { "epoch": 0.49, "learning_rate": 4.1861373864769456e-05, "loss": 2.9944, "step": 6050 }, { "epoch": 0.49, "learning_rate": 4.18546365914787e-05, "loss": 3.1287, "step": 6055 }, { "epoch": 0.49, "learning_rate": 4.184789931818795e-05, "loss": 3.1213, "step": 6060 }, { "epoch": 0.49, "learning_rate": 4.184116204489719e-05, "loss": 3.2943, "step": 6065 }, { "epoch": 0.49, "learning_rate": 4.183442477160644e-05, "loss": 3.1028, "step": 6070 }, { "epoch": 0.49, "learning_rate": 4.182768749831568e-05, "loss": 3.243, "step": 6075 }, { "epoch": 0.49, "learning_rate": 4.182095022502493e-05, "loss": 2.8142, "step": 6080 }, { "epoch": 0.49, "learning_rate": 4.181421295173418e-05, "loss": 3.1794, "step": 6085 }, { "epoch": 0.49, "learning_rate": 4.180747567844342e-05, "loss": 3.3118, "step": 6090 }, { "epoch": 0.49, "learning_rate": 4.180073840515267e-05, "loss": 3.1506, "step": 6095 }, { "epoch": 0.49, "learning_rate": 4.179400113186192e-05, "loss": 3.177, "step": 6100 }, { "epoch": 0.49, "learning_rate": 4.178726385857116e-05, "loss": 3.0311, "step": 6105 }, { "epoch": 0.49, "learning_rate": 4.17805265852804e-05, "loss": 3.0866, "step": 6110 }, { "epoch": 0.49, "learning_rate": 4.177378931198965e-05, "loss": 3.5669, "step": 6115 }, { "epoch": 0.49, "learning_rate": 4.17670520386989e-05, "loss": 2.942, "step": 6120 }, { "epoch": 0.5, "learning_rate": 4.176031476540815e-05, "loss": 3.1499, "step": 6125 }, { "epoch": 0.5, "learning_rate": 4.175357749211739e-05, "loss": 3.0345, "step": 6130 }, { "epoch": 0.5, "learning_rate": 4.174684021882664e-05, "loss": 2.9467, "step": 6135 }, { "epoch": 0.5, "learning_rate": 4.174010294553589e-05, "loss": 3.0161, "step": 6140 }, { "epoch": 0.5, "learning_rate": 4.173336567224513e-05, "loss": 3.2259, "step": 6145 }, { "epoch": 0.5, "learning_rate": 4.172662839895437e-05, "loss": 2.9918, "step": 6150 }, { "epoch": 0.5, "learning_rate": 4.171989112566362e-05, "loss": 2.7739, "step": 6155 }, { "epoch": 0.5, "learning_rate": 4.171315385237287e-05, "loss": 3.0321, "step": 6160 }, { "epoch": 0.5, "learning_rate": 4.170641657908211e-05, "loss": 3.378, "step": 6165 }, { "epoch": 0.5, "learning_rate": 4.169967930579136e-05, "loss": 3.0588, "step": 6170 }, { "epoch": 0.5, "learning_rate": 4.169294203250061e-05, "loss": 2.8973, "step": 6175 }, { "epoch": 0.5, "learning_rate": 4.168620475920986e-05, "loss": 3.0668, "step": 6180 }, { "epoch": 0.5, "learning_rate": 4.16794674859191e-05, "loss": 3.1018, "step": 6185 }, { "epoch": 0.5, "learning_rate": 4.167273021262835e-05, "loss": 2.729, "step": 6190 }, { "epoch": 0.5, "learning_rate": 4.166599293933759e-05, "loss": 3.2127, "step": 6195 }, { "epoch": 0.5, "learning_rate": 4.1659255666046834e-05, "loss": 3.0852, "step": 6200 }, { "epoch": 0.5, "learning_rate": 4.165251839275608e-05, "loss": 2.9025, "step": 6205 }, { "epoch": 0.5, "learning_rate": 4.164578111946533e-05, "loss": 3.0034, "step": 6210 }, { "epoch": 0.5, "learning_rate": 4.163904384617458e-05, "loss": 2.9844, "step": 6215 }, { "epoch": 0.5, "learning_rate": 4.163230657288382e-05, "loss": 2.8532, "step": 6220 }, { "epoch": 0.5, "learning_rate": 4.162556929959307e-05, "loss": 3.0747, "step": 6225 }, { "epoch": 0.5, "learning_rate": 4.161883202630232e-05, "loss": 2.9875, "step": 6230 }, { "epoch": 0.5, "learning_rate": 4.161209475301156e-05, "loss": 3.0558, "step": 6235 }, { "epoch": 0.5, "learning_rate": 4.1605357479720805e-05, "loss": 2.968, "step": 6240 }, { "epoch": 0.5, "learning_rate": 4.1598620206430054e-05, "loss": 2.9673, "step": 6245 }, { "epoch": 0.51, "learning_rate": 4.15918829331393e-05, "loss": 2.948, "step": 6250 }, { "epoch": 0.51, "learning_rate": 4.1585145659848545e-05, "loss": 3.173, "step": 6255 }, { "epoch": 0.51, "learning_rate": 4.1578408386557794e-05, "loss": 3.1657, "step": 6260 }, { "epoch": 0.51, "learning_rate": 4.157167111326704e-05, "loss": 3.011, "step": 6265 }, { "epoch": 0.51, "learning_rate": 4.156493383997629e-05, "loss": 3.2625, "step": 6270 }, { "epoch": 0.51, "learning_rate": 4.1558196566685534e-05, "loss": 3.1201, "step": 6275 }, { "epoch": 0.51, "learning_rate": 4.1551459293394776e-05, "loss": 3.258, "step": 6280 }, { "epoch": 0.51, "learning_rate": 4.1544722020104024e-05, "loss": 3.428, "step": 6285 }, { "epoch": 0.51, "learning_rate": 4.153798474681327e-05, "loss": 3.257, "step": 6290 }, { "epoch": 0.51, "learning_rate": 4.1531247473522515e-05, "loss": 3.2086, "step": 6295 }, { "epoch": 0.51, "learning_rate": 4.1524510200231764e-05, "loss": 3.2408, "step": 6300 }, { "epoch": 0.51, "learning_rate": 4.151777292694101e-05, "loss": 3.0592, "step": 6305 }, { "epoch": 0.51, "learning_rate": 4.1511035653650255e-05, "loss": 3.2017, "step": 6310 }, { "epoch": 0.51, "learning_rate": 4.1504298380359504e-05, "loss": 2.9562, "step": 6315 }, { "epoch": 0.51, "learning_rate": 4.149756110706875e-05, "loss": 3.2331, "step": 6320 }, { "epoch": 0.51, "learning_rate": 4.1490823833777995e-05, "loss": 3.1174, "step": 6325 }, { "epoch": 0.51, "learning_rate": 4.148408656048724e-05, "loss": 3.0513, "step": 6330 }, { "epoch": 0.51, "learning_rate": 4.1477349287196486e-05, "loss": 3.0752, "step": 6335 }, { "epoch": 0.51, "learning_rate": 4.1470612013905735e-05, "loss": 2.9025, "step": 6340 }, { "epoch": 0.51, "learning_rate": 4.1463874740614984e-05, "loss": 3.0994, "step": 6345 }, { "epoch": 0.51, "learning_rate": 4.1457137467324226e-05, "loss": 3.1386, "step": 6350 }, { "epoch": 0.51, "learning_rate": 4.1450400194033475e-05, "loss": 3.0328, "step": 6355 }, { "epoch": 0.51, "learning_rate": 4.1443662920742724e-05, "loss": 2.7842, "step": 6360 }, { "epoch": 0.51, "learning_rate": 4.1436925647451966e-05, "loss": 2.8813, "step": 6365 }, { "epoch": 0.51, "learning_rate": 4.143018837416121e-05, "loss": 3.3991, "step": 6370 }, { "epoch": 0.52, "learning_rate": 4.142345110087046e-05, "loss": 3.0849, "step": 6375 }, { "epoch": 0.52, "learning_rate": 4.1416713827579706e-05, "loss": 3.5411, "step": 6380 }, { "epoch": 0.52, "learning_rate": 4.140997655428895e-05, "loss": 2.8933, "step": 6385 }, { "epoch": 0.52, "learning_rate": 4.1403239280998197e-05, "loss": 2.9984, "step": 6390 }, { "epoch": 0.52, "learning_rate": 4.1396502007707445e-05, "loss": 3.1318, "step": 6395 }, { "epoch": 0.52, "learning_rate": 4.138976473441669e-05, "loss": 3.2986, "step": 6400 }, { "epoch": 0.52, "learning_rate": 4.1383027461125936e-05, "loss": 2.9299, "step": 6405 }, { "epoch": 0.52, "learning_rate": 4.137629018783518e-05, "loss": 3.0908, "step": 6410 }, { "epoch": 0.52, "learning_rate": 4.136955291454443e-05, "loss": 3.0864, "step": 6415 }, { "epoch": 0.52, "learning_rate": 4.136281564125367e-05, "loss": 2.9794, "step": 6420 }, { "epoch": 0.52, "learning_rate": 4.135607836796292e-05, "loss": 3.1128, "step": 6425 }, { "epoch": 0.52, "learning_rate": 4.134934109467217e-05, "loss": 2.9951, "step": 6430 }, { "epoch": 0.52, "learning_rate": 4.1342603821381416e-05, "loss": 3.187, "step": 6435 }, { "epoch": 0.52, "learning_rate": 4.133586654809066e-05, "loss": 3.2286, "step": 6440 }, { "epoch": 0.52, "learning_rate": 4.132912927479991e-05, "loss": 2.9073, "step": 6445 }, { "epoch": 0.52, "learning_rate": 4.132239200150915e-05, "loss": 2.9606, "step": 6450 }, { "epoch": 0.52, "learning_rate": 4.13156547282184e-05, "loss": 3.3927, "step": 6455 }, { "epoch": 0.52, "learning_rate": 4.130891745492764e-05, "loss": 2.9814, "step": 6460 }, { "epoch": 0.52, "learning_rate": 4.130218018163689e-05, "loss": 3.134, "step": 6465 }, { "epoch": 0.52, "learning_rate": 4.129544290834614e-05, "loss": 3.1801, "step": 6470 }, { "epoch": 0.52, "learning_rate": 4.128870563505538e-05, "loss": 3.3085, "step": 6475 }, { "epoch": 0.52, "learning_rate": 4.128196836176463e-05, "loss": 3.1814, "step": 6480 }, { "epoch": 0.52, "learning_rate": 4.127523108847388e-05, "loss": 3.2744, "step": 6485 }, { "epoch": 0.52, "learning_rate": 4.126849381518313e-05, "loss": 2.9528, "step": 6490 }, { "epoch": 0.53, "learning_rate": 4.126175654189237e-05, "loss": 3.1222, "step": 6495 }, { "epoch": 0.53, "learning_rate": 4.125501926860161e-05, "loss": 2.8767, "step": 6500 }, { "epoch": 0.53, "learning_rate": 4.124828199531086e-05, "loss": 2.9187, "step": 6505 }, { "epoch": 0.53, "learning_rate": 4.12415447220201e-05, "loss": 3.1005, "step": 6510 }, { "epoch": 0.53, "learning_rate": 4.123480744872935e-05, "loss": 3.0222, "step": 6515 }, { "epoch": 0.53, "learning_rate": 4.12280701754386e-05, "loss": 3.0137, "step": 6520 }, { "epoch": 0.53, "learning_rate": 4.122133290214785e-05, "loss": 3.1305, "step": 6525 }, { "epoch": 0.53, "learning_rate": 4.121459562885709e-05, "loss": 2.9975, "step": 6530 }, { "epoch": 0.53, "learning_rate": 4.120785835556634e-05, "loss": 2.8521, "step": 6535 }, { "epoch": 0.53, "learning_rate": 4.120112108227558e-05, "loss": 2.8654, "step": 6540 }, { "epoch": 0.53, "learning_rate": 4.119438380898483e-05, "loss": 3.0614, "step": 6545 }, { "epoch": 0.53, "learning_rate": 4.118764653569407e-05, "loss": 2.9413, "step": 6550 }, { "epoch": 0.53, "learning_rate": 4.118090926240332e-05, "loss": 2.9269, "step": 6555 }, { "epoch": 0.53, "learning_rate": 4.117417198911257e-05, "loss": 3.1327, "step": 6560 }, { "epoch": 0.53, "learning_rate": 4.116743471582181e-05, "loss": 3.1815, "step": 6565 }, { "epoch": 0.53, "learning_rate": 4.116069744253106e-05, "loss": 3.3745, "step": 6570 }, { "epoch": 0.53, "learning_rate": 4.115396016924031e-05, "loss": 3.0903, "step": 6575 }, { "epoch": 0.53, "learning_rate": 4.114722289594955e-05, "loss": 3.0028, "step": 6580 }, { "epoch": 0.53, "learning_rate": 4.1140485622658794e-05, "loss": 3.1658, "step": 6585 }, { "epoch": 0.53, "learning_rate": 4.113374834936804e-05, "loss": 3.0588, "step": 6590 }, { "epoch": 0.53, "learning_rate": 4.112701107607729e-05, "loss": 2.9141, "step": 6595 }, { "epoch": 0.53, "learning_rate": 4.112027380278654e-05, "loss": 3.0221, "step": 6600 }, { "epoch": 0.53, "learning_rate": 4.111353652949578e-05, "loss": 3.0241, "step": 6605 }, { "epoch": 0.53, "learning_rate": 4.110679925620503e-05, "loss": 3.1218, "step": 6610 }, { "epoch": 0.53, "learning_rate": 4.110006198291428e-05, "loss": 3.1217, "step": 6615 }, { "epoch": 0.54, "learning_rate": 4.109332470962352e-05, "loss": 2.8752, "step": 6620 }, { "epoch": 0.54, "learning_rate": 4.108658743633277e-05, "loss": 3.3187, "step": 6625 }, { "epoch": 0.54, "learning_rate": 4.1079850163042014e-05, "loss": 3.1058, "step": 6630 }, { "epoch": 0.54, "learning_rate": 4.107311288975126e-05, "loss": 3.142, "step": 6635 }, { "epoch": 0.54, "learning_rate": 4.1066375616460505e-05, "loss": 3.048, "step": 6640 }, { "epoch": 0.54, "learning_rate": 4.1059638343169754e-05, "loss": 2.75, "step": 6645 }, { "epoch": 0.54, "learning_rate": 4.1052901069879e-05, "loss": 3.0339, "step": 6650 }, { "epoch": 0.54, "learning_rate": 4.104616379658825e-05, "loss": 3.113, "step": 6655 }, { "epoch": 0.54, "learning_rate": 4.1039426523297493e-05, "loss": 2.8537, "step": 6660 }, { "epoch": 0.54, "learning_rate": 4.103268925000674e-05, "loss": 3.1454, "step": 6665 }, { "epoch": 0.54, "learning_rate": 4.1025951976715984e-05, "loss": 2.9773, "step": 6670 }, { "epoch": 0.54, "learning_rate": 4.1019214703425227e-05, "loss": 2.959, "step": 6675 }, { "epoch": 0.54, "learning_rate": 4.1012477430134475e-05, "loss": 3.2781, "step": 6680 }, { "epoch": 0.54, "learning_rate": 4.1005740156843724e-05, "loss": 3.3036, "step": 6685 }, { "epoch": 0.54, "learning_rate": 4.099900288355297e-05, "loss": 3.0932, "step": 6690 }, { "epoch": 0.54, "learning_rate": 4.0992265610262215e-05, "loss": 2.9254, "step": 6695 }, { "epoch": 0.54, "learning_rate": 4.0985528336971464e-05, "loss": 2.9804, "step": 6700 }, { "epoch": 0.54, "learning_rate": 4.097879106368071e-05, "loss": 2.8821, "step": 6705 }, { "epoch": 0.54, "learning_rate": 4.0972053790389955e-05, "loss": 3.245, "step": 6710 }, { "epoch": 0.54, "learning_rate": 4.09653165170992e-05, "loss": 3.04, "step": 6715 }, { "epoch": 0.54, "learning_rate": 4.0958579243808446e-05, "loss": 3.2381, "step": 6720 }, { "epoch": 0.54, "learning_rate": 4.0951841970517695e-05, "loss": 3.176, "step": 6725 }, { "epoch": 0.54, "learning_rate": 4.094510469722694e-05, "loss": 3.1522, "step": 6730 }, { "epoch": 0.54, "learning_rate": 4.0938367423936186e-05, "loss": 3.071, "step": 6735 }, { "epoch": 0.54, "learning_rate": 4.0931630150645435e-05, "loss": 3.195, "step": 6740 }, { "epoch": 0.55, "learning_rate": 4.0924892877354684e-05, "loss": 3.317, "step": 6745 }, { "epoch": 0.55, "learning_rate": 4.0918155604063926e-05, "loss": 3.0664, "step": 6750 }, { "epoch": 0.55, "learning_rate": 4.0911418330773175e-05, "loss": 3.04, "step": 6755 }, { "epoch": 0.55, "learning_rate": 4.090468105748242e-05, "loss": 2.9613, "step": 6760 }, { "epoch": 0.55, "learning_rate": 4.089794378419166e-05, "loss": 3.2902, "step": 6765 }, { "epoch": 0.55, "learning_rate": 4.089120651090091e-05, "loss": 2.9059, "step": 6770 }, { "epoch": 0.55, "learning_rate": 4.0884469237610157e-05, "loss": 3.0708, "step": 6775 }, { "epoch": 0.55, "learning_rate": 4.0877731964319405e-05, "loss": 3.2298, "step": 6780 }, { "epoch": 0.55, "learning_rate": 4.08723421456868e-05, "loss": 3.3366, "step": 6785 }, { "epoch": 0.55, "learning_rate": 4.086560487239605e-05, "loss": 2.985, "step": 6790 }, { "epoch": 0.55, "learning_rate": 4.085886759910529e-05, "loss": 2.9827, "step": 6795 }, { "epoch": 0.55, "learning_rate": 4.0852130325814534e-05, "loss": 3.1584, "step": 6800 }, { "epoch": 0.55, "learning_rate": 4.084539305252378e-05, "loss": 2.8316, "step": 6805 }, { "epoch": 0.55, "learning_rate": 4.083865577923303e-05, "loss": 3.1549, "step": 6810 }, { "epoch": 0.55, "learning_rate": 4.083191850594228e-05, "loss": 3.2837, "step": 6815 }, { "epoch": 0.55, "learning_rate": 4.082518123265152e-05, "loss": 2.9758, "step": 6820 }, { "epoch": 0.55, "learning_rate": 4.081844395936077e-05, "loss": 3.2667, "step": 6825 }, { "epoch": 0.55, "learning_rate": 4.081170668607002e-05, "loss": 3.1173, "step": 6830 }, { "epoch": 0.55, "learning_rate": 4.080496941277926e-05, "loss": 3.0736, "step": 6835 }, { "epoch": 0.55, "learning_rate": 4.0798232139488504e-05, "loss": 2.9801, "step": 6840 }, { "epoch": 0.55, "learning_rate": 4.079149486619775e-05, "loss": 2.9754, "step": 6845 }, { "epoch": 0.55, "learning_rate": 4.0784757592907e-05, "loss": 2.8494, "step": 6850 }, { "epoch": 0.55, "learning_rate": 4.0778020319616244e-05, "loss": 3.2473, "step": 6855 }, { "epoch": 0.55, "learning_rate": 4.077128304632549e-05, "loss": 2.8755, "step": 6860 }, { "epoch": 0.55, "learning_rate": 4.076454577303474e-05, "loss": 3.0215, "step": 6865 }, { "epoch": 0.56, "learning_rate": 4.075780849974399e-05, "loss": 2.9058, "step": 6870 }, { "epoch": 0.56, "learning_rate": 4.075107122645323e-05, "loss": 3.2889, "step": 6875 }, { "epoch": 0.56, "learning_rate": 4.0744333953162475e-05, "loss": 2.8474, "step": 6880 }, { "epoch": 0.56, "learning_rate": 4.0737596679871724e-05, "loss": 2.9836, "step": 6885 }, { "epoch": 0.56, "learning_rate": 4.0730859406580966e-05, "loss": 2.9131, "step": 6890 }, { "epoch": 0.56, "learning_rate": 4.0724122133290215e-05, "loss": 2.9475, "step": 6895 }, { "epoch": 0.56, "learning_rate": 4.0717384859999464e-05, "loss": 3.168, "step": 6900 }, { "epoch": 0.56, "learning_rate": 4.071064758670871e-05, "loss": 3.2896, "step": 6905 }, { "epoch": 0.56, "learning_rate": 4.0703910313417955e-05, "loss": 2.8471, "step": 6910 }, { "epoch": 0.56, "learning_rate": 4.06971730401272e-05, "loss": 2.9173, "step": 6915 }, { "epoch": 0.56, "learning_rate": 4.069043576683645e-05, "loss": 3.1674, "step": 6920 }, { "epoch": 0.56, "learning_rate": 4.0683698493545694e-05, "loss": 3.1038, "step": 6925 }, { "epoch": 0.56, "learning_rate": 4.0676961220254936e-05, "loss": 3.0604, "step": 6930 }, { "epoch": 0.56, "learning_rate": 4.0670223946964185e-05, "loss": 2.9545, "step": 6935 }, { "epoch": 0.56, "learning_rate": 4.0663486673673434e-05, "loss": 3.1475, "step": 6940 }, { "epoch": 0.56, "learning_rate": 4.0656749400382676e-05, "loss": 2.9257, "step": 6945 }, { "epoch": 0.56, "learning_rate": 4.0650012127091925e-05, "loss": 2.6838, "step": 6950 }, { "epoch": 0.56, "learning_rate": 4.0643274853801174e-05, "loss": 3.1874, "step": 6955 }, { "epoch": 0.56, "learning_rate": 4.063653758051042e-05, "loss": 3.2852, "step": 6960 }, { "epoch": 0.56, "learning_rate": 4.0629800307219665e-05, "loss": 2.8721, "step": 6965 }, { "epoch": 0.56, "learning_rate": 4.062306303392891e-05, "loss": 3.1328, "step": 6970 }, { "epoch": 0.56, "learning_rate": 4.0616325760638156e-05, "loss": 3.207, "step": 6975 }, { "epoch": 0.56, "learning_rate": 4.0609588487347405e-05, "loss": 3.2351, "step": 6980 }, { "epoch": 0.56, "learning_rate": 4.060285121405665e-05, "loss": 3.0004, "step": 6985 }, { "epoch": 0.57, "learning_rate": 4.0596113940765896e-05, "loss": 3.0114, "step": 6990 }, { "epoch": 0.57, "learning_rate": 4.0589376667475145e-05, "loss": 3.3165, "step": 6995 }, { "epoch": 0.57, "learning_rate": 4.058263939418439e-05, "loss": 3.212, "step": 7000 }, { "epoch": 0.57, "learning_rate": 4.0575902120893636e-05, "loss": 2.9537, "step": 7005 }, { "epoch": 0.57, "learning_rate": 4.056916484760288e-05, "loss": 3.0058, "step": 7010 }, { "epoch": 0.57, "learning_rate": 4.056242757431213e-05, "loss": 2.9632, "step": 7015 }, { "epoch": 0.57, "learning_rate": 4.055569030102137e-05, "loss": 3.0076, "step": 7020 }, { "epoch": 0.57, "learning_rate": 4.054895302773062e-05, "loss": 3.118, "step": 7025 }, { "epoch": 0.57, "learning_rate": 4.0542215754439866e-05, "loss": 3.0387, "step": 7030 }, { "epoch": 0.57, "learning_rate": 4.0535478481149115e-05, "loss": 3.0755, "step": 7035 }, { "epoch": 0.57, "learning_rate": 4.053008866251651e-05, "loss": 3.5368, "step": 7040 }, { "epoch": 0.57, "learning_rate": 4.052335138922575e-05, "loss": 2.8257, "step": 7045 }, { "epoch": 0.57, "learning_rate": 4.0516614115935e-05, "loss": 3.0709, "step": 7050 }, { "epoch": 0.57, "learning_rate": 4.0509876842644243e-05, "loss": 2.8406, "step": 7055 }, { "epoch": 0.57, "learning_rate": 4.050313956935349e-05, "loss": 3.1349, "step": 7060 }, { "epoch": 0.57, "learning_rate": 4.049640229606274e-05, "loss": 2.9948, "step": 7065 }, { "epoch": 0.57, "learning_rate": 4.048966502277198e-05, "loss": 2.9537, "step": 7070 }, { "epoch": 0.57, "learning_rate": 4.048292774948123e-05, "loss": 2.7327, "step": 7075 }, { "epoch": 0.57, "learning_rate": 4.047619047619048e-05, "loss": 2.9098, "step": 7080 }, { "epoch": 0.57, "learning_rate": 4.046945320289972e-05, "loss": 3.1846, "step": 7085 }, { "epoch": 0.57, "learning_rate": 4.046271592960897e-05, "loss": 3.046, "step": 7090 }, { "epoch": 0.57, "learning_rate": 4.0455978656318214e-05, "loss": 3.0464, "step": 7095 }, { "epoch": 0.57, "learning_rate": 4.044924138302746e-05, "loss": 3.3597, "step": 7100 }, { "epoch": 0.57, "learning_rate": 4.044250410973671e-05, "loss": 3.1111, "step": 7105 }, { "epoch": 0.57, "learning_rate": 4.0435766836445954e-05, "loss": 3.187, "step": 7110 }, { "epoch": 0.58, "learning_rate": 4.04290295631552e-05, "loss": 2.8657, "step": 7115 }, { "epoch": 0.58, "learning_rate": 4.042229228986445e-05, "loss": 3.3056, "step": 7120 }, { "epoch": 0.58, "learning_rate": 4.0415555016573694e-05, "loss": 2.8097, "step": 7125 }, { "epoch": 0.58, "learning_rate": 4.040881774328294e-05, "loss": 3.078, "step": 7130 }, { "epoch": 0.58, "learning_rate": 4.0402080469992185e-05, "loss": 3.1158, "step": 7135 }, { "epoch": 0.58, "learning_rate": 4.0395343196701434e-05, "loss": 2.904, "step": 7140 }, { "epoch": 0.58, "learning_rate": 4.0388605923410676e-05, "loss": 3.0502, "step": 7145 }, { "epoch": 0.58, "learning_rate": 4.0381868650119925e-05, "loss": 3.0258, "step": 7150 }, { "epoch": 0.58, "learning_rate": 4.0375131376829173e-05, "loss": 3.079, "step": 7155 }, { "epoch": 0.58, "learning_rate": 4.036839410353842e-05, "loss": 3.3828, "step": 7160 }, { "epoch": 0.58, "learning_rate": 4.0361656830247664e-05, "loss": 3.3249, "step": 7165 }, { "epoch": 0.58, "learning_rate": 4.035491955695691e-05, "loss": 3.1637, "step": 7170 }, { "epoch": 0.58, "learning_rate": 4.0348182283666155e-05, "loss": 3.3418, "step": 7175 }, { "epoch": 0.58, "learning_rate": 4.03414450103754e-05, "loss": 3.0859, "step": 7180 }, { "epoch": 0.58, "learning_rate": 4.0334707737084646e-05, "loss": 3.1996, "step": 7185 }, { "epoch": 0.58, "learning_rate": 4.0327970463793895e-05, "loss": 3.1428, "step": 7190 }, { "epoch": 0.58, "learning_rate": 4.0321233190503144e-05, "loss": 3.2682, "step": 7195 }, { "epoch": 0.58, "learning_rate": 4.0314495917212386e-05, "loss": 2.856, "step": 7200 }, { "epoch": 0.58, "learning_rate": 4.0307758643921635e-05, "loss": 3.2575, "step": 7205 }, { "epoch": 0.58, "learning_rate": 4.0301021370630884e-05, "loss": 3.1416, "step": 7210 }, { "epoch": 0.58, "learning_rate": 4.0294284097340126e-05, "loss": 2.8951, "step": 7215 }, { "epoch": 0.58, "learning_rate": 4.028754682404937e-05, "loss": 3.0688, "step": 7220 }, { "epoch": 0.58, "learning_rate": 4.028080955075862e-05, "loss": 2.9216, "step": 7225 }, { "epoch": 0.58, "learning_rate": 4.0274072277467866e-05, "loss": 3.0731, "step": 7230 }, { "epoch": 0.58, "learning_rate": 4.026733500417711e-05, "loss": 2.8017, "step": 7235 }, { "epoch": 0.59, "learning_rate": 4.026059773088636e-05, "loss": 3.1149, "step": 7240 }, { "epoch": 0.59, "learning_rate": 4.0253860457595606e-05, "loss": 2.9981, "step": 7245 }, { "epoch": 0.59, "learning_rate": 4.0247123184304855e-05, "loss": 3.094, "step": 7250 }, { "epoch": 0.59, "learning_rate": 4.02403859110141e-05, "loss": 3.0985, "step": 7255 }, { "epoch": 0.59, "learning_rate": 4.0233648637723346e-05, "loss": 3.1172, "step": 7260 }, { "epoch": 0.59, "learning_rate": 4.022691136443259e-05, "loss": 3.1349, "step": 7265 }, { "epoch": 0.59, "learning_rate": 4.022017409114183e-05, "loss": 2.9795, "step": 7270 }, { "epoch": 0.59, "learning_rate": 4.021343681785108e-05, "loss": 3.1452, "step": 7275 }, { "epoch": 0.59, "learning_rate": 4.020669954456033e-05, "loss": 3.0341, "step": 7280 }, { "epoch": 0.59, "learning_rate": 4.0199962271269576e-05, "loss": 3.3302, "step": 7285 }, { "epoch": 0.59, "learning_rate": 4.019322499797882e-05, "loss": 3.282, "step": 7290 }, { "epoch": 0.59, "learning_rate": 4.018648772468807e-05, "loss": 3.1143, "step": 7295 }, { "epoch": 0.59, "learning_rate": 4.0179750451397316e-05, "loss": 3.047, "step": 7300 }, { "epoch": 0.59, "learning_rate": 4.017301317810656e-05, "loss": 2.9338, "step": 7305 }, { "epoch": 0.59, "learning_rate": 4.01662759048158e-05, "loss": 3.0096, "step": 7310 }, { "epoch": 0.59, "learning_rate": 4.015953863152505e-05, "loss": 2.8297, "step": 7315 }, { "epoch": 0.59, "learning_rate": 4.01528013582343e-05, "loss": 3.3948, "step": 7320 }, { "epoch": 0.59, "learning_rate": 4.014606408494354e-05, "loss": 3.1253, "step": 7325 }, { "epoch": 0.59, "learning_rate": 4.013932681165279e-05, "loss": 3.2223, "step": 7330 }, { "epoch": 0.59, "learning_rate": 4.013258953836204e-05, "loss": 2.956, "step": 7335 }, { "epoch": 0.59, "learning_rate": 4.012585226507129e-05, "loss": 3.335, "step": 7340 }, { "epoch": 0.59, "learning_rate": 4.011911499178053e-05, "loss": 3.1048, "step": 7345 }, { "epoch": 0.59, "learning_rate": 4.011237771848977e-05, "loss": 3.0714, "step": 7350 }, { "epoch": 0.59, "learning_rate": 4.010564044519902e-05, "loss": 3.0881, "step": 7355 }, { "epoch": 0.6, "learning_rate": 4.009890317190827e-05, "loss": 2.8853, "step": 7360 }, { "epoch": 0.6, "learning_rate": 4.009216589861751e-05, "loss": 2.93, "step": 7365 }, { "epoch": 0.6, "learning_rate": 4.008542862532676e-05, "loss": 3.0098, "step": 7370 }, { "epoch": 0.6, "learning_rate": 4.007869135203601e-05, "loss": 2.8795, "step": 7375 }, { "epoch": 0.6, "learning_rate": 4.007195407874525e-05, "loss": 3.1682, "step": 7380 }, { "epoch": 0.6, "learning_rate": 4.00652168054545e-05, "loss": 2.9576, "step": 7385 }, { "epoch": 0.6, "learning_rate": 4.005847953216375e-05, "loss": 3.1748, "step": 7390 }, { "epoch": 0.6, "learning_rate": 4.005174225887299e-05, "loss": 2.9084, "step": 7395 }, { "epoch": 0.6, "learning_rate": 4.004500498558223e-05, "loss": 3.3471, "step": 7400 }, { "epoch": 0.6, "learning_rate": 4.003826771229148e-05, "loss": 3.0671, "step": 7405 }, { "epoch": 0.6, "learning_rate": 4.003153043900073e-05, "loss": 2.7733, "step": 7410 }, { "epoch": 0.6, "learning_rate": 4.002479316570998e-05, "loss": 2.9595, "step": 7415 }, { "epoch": 0.6, "learning_rate": 4.001805589241922e-05, "loss": 3.3316, "step": 7420 }, { "epoch": 0.6, "learning_rate": 4.001131861912847e-05, "loss": 2.9916, "step": 7425 }, { "epoch": 0.6, "learning_rate": 4.000458134583772e-05, "loss": 3.11, "step": 7430 }, { "epoch": 0.6, "learning_rate": 3.999784407254696e-05, "loss": 2.9916, "step": 7435 }, { "epoch": 0.6, "learning_rate": 3.99911067992562e-05, "loss": 2.9348, "step": 7440 }, { "epoch": 0.6, "learning_rate": 3.998436952596545e-05, "loss": 3.1977, "step": 7445 }, { "epoch": 0.6, "learning_rate": 3.99776322526747e-05, "loss": 3.1294, "step": 7450 }, { "epoch": 0.6, "learning_rate": 3.997089497938394e-05, "loss": 2.9127, "step": 7455 }, { "epoch": 0.6, "learning_rate": 3.996415770609319e-05, "loss": 2.996, "step": 7460 }, { "epoch": 0.6, "learning_rate": 3.995742043280244e-05, "loss": 3.0576, "step": 7465 }, { "epoch": 0.6, "learning_rate": 3.995068315951169e-05, "loss": 3.1013, "step": 7470 }, { "epoch": 0.6, "learning_rate": 3.994394588622093e-05, "loss": 2.8199, "step": 7475 }, { "epoch": 0.6, "learning_rate": 3.9937208612930174e-05, "loss": 3.0425, "step": 7480 }, { "epoch": 0.61, "learning_rate": 3.993047133963942e-05, "loss": 2.7993, "step": 7485 }, { "epoch": 0.61, "learning_rate": 3.9923734066348665e-05, "loss": 3.192, "step": 7490 }, { "epoch": 0.61, "learning_rate": 3.9916996793057914e-05, "loss": 2.9459, "step": 7495 }, { "epoch": 0.61, "learning_rate": 3.991025951976716e-05, "loss": 3.0381, "step": 7500 }, { "epoch": 0.61, "learning_rate": 3.990352224647641e-05, "loss": 3.3137, "step": 7505 }, { "epoch": 0.61, "learning_rate": 3.9896784973185654e-05, "loss": 2.9695, "step": 7510 }, { "epoch": 0.61, "learning_rate": 3.98900476998949e-05, "loss": 2.8945, "step": 7515 }, { "epoch": 0.61, "learning_rate": 3.9883310426604145e-05, "loss": 3.0771, "step": 7520 }, { "epoch": 0.61, "learning_rate": 3.987657315331339e-05, "loss": 2.8429, "step": 7525 }, { "epoch": 0.61, "learning_rate": 3.9869835880022636e-05, "loss": 3.35, "step": 7530 }, { "epoch": 0.61, "learning_rate": 3.986444606139004e-05, "loss": 3.2072, "step": 7535 }, { "epoch": 0.61, "learning_rate": 3.9857708788099286e-05, "loss": 3.4036, "step": 7540 }, { "epoch": 0.61, "learning_rate": 3.985097151480853e-05, "loss": 2.8693, "step": 7545 }, { "epoch": 0.61, "learning_rate": 3.984423424151778e-05, "loss": 3.1092, "step": 7550 }, { "epoch": 0.61, "learning_rate": 3.983749696822702e-05, "loss": 2.8871, "step": 7555 }, { "epoch": 0.61, "learning_rate": 3.983075969493627e-05, "loss": 2.8921, "step": 7560 }, { "epoch": 0.61, "learning_rate": 3.982402242164551e-05, "loss": 3.216, "step": 7565 }, { "epoch": 0.61, "learning_rate": 3.981728514835476e-05, "loss": 3.0464, "step": 7570 }, { "epoch": 0.61, "learning_rate": 3.981054787506401e-05, "loss": 2.952, "step": 7575 }, { "epoch": 0.61, "learning_rate": 3.980381060177325e-05, "loss": 2.8887, "step": 7580 }, { "epoch": 0.61, "learning_rate": 3.97970733284825e-05, "loss": 3.2435, "step": 7585 }, { "epoch": 0.61, "learning_rate": 3.979033605519175e-05, "loss": 3.0364, "step": 7590 }, { "epoch": 0.61, "learning_rate": 3.9783598781901e-05, "loss": 3.008, "step": 7595 }, { "epoch": 0.61, "learning_rate": 3.977686150861024e-05, "loss": 2.9321, "step": 7600 }, { "epoch": 0.61, "learning_rate": 3.977012423531948e-05, "loss": 3.0257, "step": 7605 }, { "epoch": 0.62, "learning_rate": 3.976338696202873e-05, "loss": 3.3022, "step": 7610 }, { "epoch": 0.62, "learning_rate": 3.975664968873797e-05, "loss": 3.286, "step": 7615 }, { "epoch": 0.62, "learning_rate": 3.974991241544722e-05, "loss": 3.1355, "step": 7620 }, { "epoch": 0.62, "learning_rate": 3.974317514215647e-05, "loss": 3.1771, "step": 7625 }, { "epoch": 0.62, "learning_rate": 3.973643786886572e-05, "loss": 3.1195, "step": 7630 }, { "epoch": 0.62, "learning_rate": 3.972970059557496e-05, "loss": 2.8496, "step": 7635 }, { "epoch": 0.62, "learning_rate": 3.972296332228421e-05, "loss": 3.3008, "step": 7640 }, { "epoch": 0.62, "learning_rate": 3.971622604899345e-05, "loss": 3.0312, "step": 7645 }, { "epoch": 0.62, "learning_rate": 3.9709488775702694e-05, "loss": 3.4856, "step": 7650 }, { "epoch": 0.62, "learning_rate": 3.970275150241194e-05, "loss": 3.1483, "step": 7655 }, { "epoch": 0.62, "learning_rate": 3.969601422912119e-05, "loss": 2.8638, "step": 7660 }, { "epoch": 0.62, "learning_rate": 3.968927695583044e-05, "loss": 3.1239, "step": 7665 }, { "epoch": 0.62, "learning_rate": 3.968253968253968e-05, "loss": 3.0082, "step": 7670 }, { "epoch": 0.62, "learning_rate": 3.967580240924893e-05, "loss": 2.9481, "step": 7675 }, { "epoch": 0.62, "learning_rate": 3.966906513595818e-05, "loss": 2.9585, "step": 7680 }, { "epoch": 0.62, "learning_rate": 3.966232786266742e-05, "loss": 2.8524, "step": 7685 }, { "epoch": 0.62, "learning_rate": 3.9655590589376664e-05, "loss": 3.0689, "step": 7690 }, { "epoch": 0.62, "learning_rate": 3.964885331608591e-05, "loss": 3.0235, "step": 7695 }, { "epoch": 0.62, "learning_rate": 3.964211604279516e-05, "loss": 3.1008, "step": 7700 }, { "epoch": 0.62, "learning_rate": 3.9635378769504404e-05, "loss": 3.1541, "step": 7705 }, { "epoch": 0.62, "learning_rate": 3.962864149621365e-05, "loss": 3.2348, "step": 7710 }, { "epoch": 0.62, "learning_rate": 3.96219042229229e-05, "loss": 3.0871, "step": 7715 }, { "epoch": 0.62, "learning_rate": 3.961516694963215e-05, "loss": 2.8276, "step": 7720 }, { "epoch": 0.62, "learning_rate": 3.960842967634139e-05, "loss": 2.9168, "step": 7725 }, { "epoch": 0.62, "learning_rate": 3.960169240305064e-05, "loss": 3.037, "step": 7730 }, { "epoch": 0.63, "learning_rate": 3.9594955129759884e-05, "loss": 3.1009, "step": 7735 }, { "epoch": 0.63, "learning_rate": 3.958821785646913e-05, "loss": 2.8545, "step": 7740 }, { "epoch": 0.63, "learning_rate": 3.9581480583178375e-05, "loss": 3.2073, "step": 7745 }, { "epoch": 0.63, "learning_rate": 3.9574743309887624e-05, "loss": 3.1771, "step": 7750 }, { "epoch": 0.63, "learning_rate": 3.956800603659687e-05, "loss": 3.0321, "step": 7755 }, { "epoch": 0.63, "learning_rate": 3.9561268763306115e-05, "loss": 2.9031, "step": 7760 }, { "epoch": 0.63, "learning_rate": 3.9554531490015364e-05, "loss": 2.9989, "step": 7765 }, { "epoch": 0.63, "learning_rate": 3.954779421672461e-05, "loss": 2.9791, "step": 7770 }, { "epoch": 0.63, "learning_rate": 3.9541056943433855e-05, "loss": 3.1055, "step": 7775 }, { "epoch": 0.63, "learning_rate": 3.95343196701431e-05, "loss": 3.2512, "step": 7780 }, { "epoch": 0.63, "learning_rate": 3.9527582396852346e-05, "loss": 2.939, "step": 7785 }, { "epoch": 0.63, "learning_rate": 3.9520845123561594e-05, "loss": 3.0405, "step": 7790 }, { "epoch": 0.63, "learning_rate": 3.951410785027084e-05, "loss": 3.4549, "step": 7795 }, { "epoch": 0.63, "learning_rate": 3.9507370576980085e-05, "loss": 3.0582, "step": 7800 }, { "epoch": 0.63, "learning_rate": 3.9500633303689334e-05, "loss": 2.8642, "step": 7805 }, { "epoch": 0.63, "learning_rate": 3.949389603039858e-05, "loss": 2.9274, "step": 7810 }, { "epoch": 0.63, "learning_rate": 3.9487158757107825e-05, "loss": 3.3588, "step": 7815 }, { "epoch": 0.63, "learning_rate": 3.948042148381707e-05, "loss": 3.177, "step": 7820 }, { "epoch": 0.63, "learning_rate": 3.9473684210526316e-05, "loss": 2.9691, "step": 7825 }, { "epoch": 0.63, "learning_rate": 3.9466946937235565e-05, "loss": 3.4174, "step": 7830 }, { "epoch": 0.63, "learning_rate": 3.946020966394481e-05, "loss": 2.9425, "step": 7835 }, { "epoch": 0.63, "learning_rate": 3.9453472390654056e-05, "loss": 2.8462, "step": 7840 }, { "epoch": 0.63, "learning_rate": 3.9446735117363305e-05, "loss": 3.307, "step": 7845 }, { "epoch": 0.63, "learning_rate": 3.9439997844072554e-05, "loss": 2.9172, "step": 7850 }, { "epoch": 0.64, "learning_rate": 3.9433260570781796e-05, "loss": 2.9545, "step": 7855 }, { "epoch": 0.64, "learning_rate": 3.9426523297491045e-05, "loss": 3.1286, "step": 7860 }, { "epoch": 0.64, "learning_rate": 3.941978602420029e-05, "loss": 2.924, "step": 7865 }, { "epoch": 0.64, "learning_rate": 3.941304875090953e-05, "loss": 2.8875, "step": 7870 }, { "epoch": 0.64, "learning_rate": 3.940631147761878e-05, "loss": 3.1005, "step": 7875 }, { "epoch": 0.64, "learning_rate": 3.939957420432803e-05, "loss": 2.9367, "step": 7880 }, { "epoch": 0.64, "learning_rate": 3.9392836931037276e-05, "loss": 2.9924, "step": 7885 }, { "epoch": 0.64, "learning_rate": 3.938609965774652e-05, "loss": 3.1627, "step": 7890 }, { "epoch": 0.64, "learning_rate": 3.9379362384455767e-05, "loss": 2.8434, "step": 7895 }, { "epoch": 0.64, "learning_rate": 3.9372625111165015e-05, "loss": 3.1066, "step": 7900 }, { "epoch": 0.64, "learning_rate": 3.936588783787426e-05, "loss": 3.3607, "step": 7905 }, { "epoch": 0.64, "learning_rate": 3.93591505645835e-05, "loss": 2.9483, "step": 7910 }, { "epoch": 0.64, "learning_rate": 3.935241329129275e-05, "loss": 3.1395, "step": 7915 }, { "epoch": 0.64, "learning_rate": 3.9345676018002e-05, "loss": 2.8039, "step": 7920 }, { "epoch": 0.64, "learning_rate": 3.933893874471124e-05, "loss": 2.9289, "step": 7925 }, { "epoch": 0.64, "learning_rate": 3.933220147142049e-05, "loss": 3.1207, "step": 7930 }, { "epoch": 0.64, "learning_rate": 3.932546419812974e-05, "loss": 2.956, "step": 7935 }, { "epoch": 0.64, "learning_rate": 3.9318726924838986e-05, "loss": 3.1368, "step": 7940 }, { "epoch": 0.64, "learning_rate": 3.931198965154823e-05, "loss": 3.0236, "step": 7945 }, { "epoch": 0.64, "learning_rate": 3.930525237825747e-05, "loss": 3.1166, "step": 7950 }, { "epoch": 0.64, "learning_rate": 3.929851510496672e-05, "loss": 2.8586, "step": 7955 }, { "epoch": 0.64, "learning_rate": 3.929177783167596e-05, "loss": 3.1254, "step": 7960 }, { "epoch": 0.64, "learning_rate": 3.928504055838521e-05, "loss": 2.9588, "step": 7965 }, { "epoch": 0.64, "learning_rate": 3.927830328509446e-05, "loss": 2.9815, "step": 7970 }, { "epoch": 0.64, "learning_rate": 3.927156601180371e-05, "loss": 3.0071, "step": 7975 }, { "epoch": 0.65, "learning_rate": 3.926482873851295e-05, "loss": 3.0211, "step": 7980 }, { "epoch": 0.65, "learning_rate": 3.92580914652222e-05, "loss": 2.8963, "step": 7985 }, { "epoch": 0.65, "learning_rate": 3.925135419193144e-05, "loss": 3.0395, "step": 7990 }, { "epoch": 0.65, "learning_rate": 3.924461691864069e-05, "loss": 2.8795, "step": 7995 }, { "epoch": 0.65, "learning_rate": 3.923787964534993e-05, "loss": 3.0102, "step": 8000 }, { "epoch": 0.65, "eval_loss": 3.0175957679748535, "eval_rouge2_fmeasure": 0.0061, "eval_rouge2_precision": 0.0093, "eval_rouge2_recall": 0.0066, "eval_runtime": 2672.3133, "eval_samples_per_second": 0.103, "eval_steps_per_second": 0.052, "step": 8000 }, { "epoch": 0.65, "learning_rate": 3.923114237205918e-05, "loss": 3.0919, "step": 8005 }, { "epoch": 0.65, "learning_rate": 3.922440509876843e-05, "loss": 3.1249, "step": 8010 }, { "epoch": 0.65, "learning_rate": 3.921766782547767e-05, "loss": 2.8888, "step": 8015 }, { "epoch": 0.65, "learning_rate": 3.921093055218692e-05, "loss": 3.0429, "step": 8020 }, { "epoch": 0.65, "learning_rate": 3.920419327889617e-05, "loss": 3.0775, "step": 8025 }, { "epoch": 0.65, "learning_rate": 3.919745600560542e-05, "loss": 2.9262, "step": 8030 }, { "epoch": 0.65, "learning_rate": 3.919071873231466e-05, "loss": 3.1079, "step": 8035 }, { "epoch": 0.65, "learning_rate": 3.91839814590239e-05, "loss": 2.9723, "step": 8040 }, { "epoch": 0.65, "learning_rate": 3.917724418573315e-05, "loss": 3.3847, "step": 8045 }, { "epoch": 0.65, "learning_rate": 3.91705069124424e-05, "loss": 3.084, "step": 8050 }, { "epoch": 0.65, "learning_rate": 3.916376963915164e-05, "loss": 2.9241, "step": 8055 }, { "epoch": 0.65, "learning_rate": 3.915703236586089e-05, "loss": 2.8274, "step": 8060 }, { "epoch": 0.65, "learning_rate": 3.915029509257014e-05, "loss": 3.0998, "step": 8065 }, { "epoch": 0.65, "learning_rate": 3.914355781927938e-05, "loss": 3.2271, "step": 8070 }, { "epoch": 0.65, "learning_rate": 3.913682054598863e-05, "loss": 3.0446, "step": 8075 }, { "epoch": 0.65, "learning_rate": 3.913008327269787e-05, "loss": 3.2226, "step": 8080 }, { "epoch": 0.65, "learning_rate": 3.912334599940712e-05, "loss": 2.9655, "step": 8085 }, { "epoch": 0.65, "learning_rate": 3.9116608726116364e-05, "loss": 3.2028, "step": 8090 }, { "epoch": 0.65, "learning_rate": 3.910987145282561e-05, "loss": 3.1591, "step": 8095 }, { "epoch": 0.65, "learning_rate": 3.910313417953486e-05, "loss": 2.9486, "step": 8100 }, { "epoch": 0.66, "learning_rate": 3.909639690624411e-05, "loss": 3.0678, "step": 8105 }, { "epoch": 0.66, "learning_rate": 3.908965963295335e-05, "loss": 2.7782, "step": 8110 }, { "epoch": 0.66, "learning_rate": 3.90829223596626e-05, "loss": 3.3781, "step": 8115 }, { "epoch": 0.66, "learning_rate": 3.9076185086371844e-05, "loss": 3.2015, "step": 8120 }, { "epoch": 0.66, "learning_rate": 3.9069447813081086e-05, "loss": 3.2439, "step": 8125 }, { "epoch": 0.66, "learning_rate": 3.9062710539790335e-05, "loss": 2.8638, "step": 8130 }, { "epoch": 0.66, "learning_rate": 3.9055973266499584e-05, "loss": 2.9865, "step": 8135 }, { "epoch": 0.66, "learning_rate": 3.904923599320883e-05, "loss": 3.0172, "step": 8140 }, { "epoch": 0.66, "learning_rate": 3.9042498719918075e-05, "loss": 2.7773, "step": 8145 }, { "epoch": 0.66, "learning_rate": 3.9035761446627324e-05, "loss": 3.3144, "step": 8150 }, { "epoch": 0.66, "learning_rate": 3.902902417333657e-05, "loss": 3.3635, "step": 8155 }, { "epoch": 0.66, "learning_rate": 3.902228690004582e-05, "loss": 2.9169, "step": 8160 }, { "epoch": 0.66, "learning_rate": 3.9015549626755063e-05, "loss": 2.8164, "step": 8165 }, { "epoch": 0.66, "learning_rate": 3.9008812353464306e-05, "loss": 2.8467, "step": 8170 }, { "epoch": 0.66, "learning_rate": 3.9002075080173554e-05, "loss": 3.0674, "step": 8175 }, { "epoch": 0.66, "learning_rate": 3.8995337806882797e-05, "loss": 2.9448, "step": 8180 }, { "epoch": 0.66, "learning_rate": 3.8988600533592045e-05, "loss": 3.3653, "step": 8185 }, { "epoch": 0.66, "learning_rate": 3.8981863260301294e-05, "loss": 2.9465, "step": 8190 }, { "epoch": 0.66, "learning_rate": 3.897512598701054e-05, "loss": 2.9197, "step": 8195 }, { "epoch": 0.66, "learning_rate": 3.8968388713719785e-05, "loss": 3.0495, "step": 8200 }, { "epoch": 0.66, "learning_rate": 3.8961651440429034e-05, "loss": 3.104, "step": 8205 }, { "epoch": 0.66, "learning_rate": 3.8954914167138276e-05, "loss": 3.0136, "step": 8210 }, { "epoch": 0.66, "learning_rate": 3.8948176893847525e-05, "loss": 2.9737, "step": 8215 }, { "epoch": 0.66, "learning_rate": 3.894143962055677e-05, "loss": 3.0539, "step": 8220 }, { "epoch": 0.66, "learning_rate": 3.8934702347266016e-05, "loss": 3.0518, "step": 8225 }, { "epoch": 0.67, "learning_rate": 3.8927965073975265e-05, "loss": 3.2109, "step": 8230 }, { "epoch": 0.67, "learning_rate": 3.892122780068451e-05, "loss": 3.0303, "step": 8235 }, { "epoch": 0.67, "learning_rate": 3.8914490527393756e-05, "loss": 3.2467, "step": 8240 }, { "epoch": 0.67, "learning_rate": 3.8907753254103005e-05, "loss": 3.0201, "step": 8245 }, { "epoch": 0.67, "learning_rate": 3.890101598081225e-05, "loss": 3.052, "step": 8250 }, { "epoch": 0.67, "learning_rate": 3.889427870752149e-05, "loss": 3.2192, "step": 8255 }, { "epoch": 0.67, "learning_rate": 3.888754143423074e-05, "loss": 2.8097, "step": 8260 }, { "epoch": 0.67, "learning_rate": 3.888080416093999e-05, "loss": 2.9116, "step": 8265 }, { "epoch": 0.67, "learning_rate": 3.887406688764923e-05, "loss": 3.1297, "step": 8270 }, { "epoch": 0.67, "learning_rate": 3.886732961435848e-05, "loss": 3.2419, "step": 8275 }, { "epoch": 0.67, "learning_rate": 3.8860592341067727e-05, "loss": 2.9156, "step": 8280 }, { "epoch": 0.67, "learning_rate": 3.8853855067776975e-05, "loss": 3.1024, "step": 8285 }, { "epoch": 0.67, "learning_rate": 3.884711779448622e-05, "loss": 2.9807, "step": 8290 }, { "epoch": 0.67, "learning_rate": 3.8840380521195466e-05, "loss": 3.2677, "step": 8295 }, { "epoch": 0.67, "learning_rate": 3.883364324790471e-05, "loss": 3.0497, "step": 8300 }, { "epoch": 0.67, "learning_rate": 3.882690597461396e-05, "loss": 3.1933, "step": 8305 }, { "epoch": 0.67, "learning_rate": 3.88201687013232e-05, "loss": 2.9817, "step": 8310 }, { "epoch": 0.67, "learning_rate": 3.881343142803245e-05, "loss": 2.9614, "step": 8315 }, { "epoch": 0.67, "learning_rate": 3.88066941547417e-05, "loss": 3.2148, "step": 8320 }, { "epoch": 0.67, "learning_rate": 3.879995688145094e-05, "loss": 3.4115, "step": 8325 }, { "epoch": 0.67, "learning_rate": 3.879321960816019e-05, "loss": 3.109, "step": 8330 }, { "epoch": 0.67, "learning_rate": 3.878648233486944e-05, "loss": 3.3976, "step": 8335 }, { "epoch": 0.67, "learning_rate": 3.877974506157868e-05, "loss": 2.9871, "step": 8340 }, { "epoch": 0.67, "learning_rate": 3.877300778828792e-05, "loss": 3.0941, "step": 8345 }, { "epoch": 0.68, "learning_rate": 3.876627051499717e-05, "loss": 3.0284, "step": 8350 }, { "epoch": 0.68, "learning_rate": 3.875953324170642e-05, "loss": 2.8833, "step": 8355 }, { "epoch": 0.68, "learning_rate": 3.875279596841567e-05, "loss": 3.0523, "step": 8360 }, { "epoch": 0.68, "learning_rate": 3.874605869512491e-05, "loss": 3.09, "step": 8365 }, { "epoch": 0.68, "learning_rate": 3.873932142183416e-05, "loss": 3.1666, "step": 8370 }, { "epoch": 0.68, "learning_rate": 3.873258414854341e-05, "loss": 3.1402, "step": 8375 }, { "epoch": 0.68, "learning_rate": 3.872584687525265e-05, "loss": 3.0485, "step": 8380 }, { "epoch": 0.68, "learning_rate": 3.871910960196189e-05, "loss": 3.1616, "step": 8385 }, { "epoch": 0.68, "learning_rate": 3.871237232867114e-05, "loss": 2.9211, "step": 8390 }, { "epoch": 0.68, "learning_rate": 3.870563505538039e-05, "loss": 2.9857, "step": 8395 }, { "epoch": 0.68, "learning_rate": 3.869889778208963e-05, "loss": 3.1664, "step": 8400 }, { "epoch": 0.68, "learning_rate": 3.869216050879888e-05, "loss": 3.0174, "step": 8405 }, { "epoch": 0.68, "learning_rate": 3.868542323550813e-05, "loss": 3.0245, "step": 8410 }, { "epoch": 0.68, "learning_rate": 3.867868596221738e-05, "loss": 3.1736, "step": 8415 }, { "epoch": 0.68, "learning_rate": 3.867194868892662e-05, "loss": 3.1161, "step": 8420 }, { "epoch": 0.68, "learning_rate": 3.866521141563586e-05, "loss": 3.0927, "step": 8425 }, { "epoch": 0.68, "learning_rate": 3.865847414234511e-05, "loss": 3.0474, "step": 8430 }, { "epoch": 0.68, "learning_rate": 3.8651736869054354e-05, "loss": 2.7999, "step": 8435 }, { "epoch": 0.68, "learning_rate": 3.86449995957636e-05, "loss": 2.9191, "step": 8440 }, { "epoch": 0.68, "learning_rate": 3.863826232247285e-05, "loss": 2.9211, "step": 8445 }, { "epoch": 0.68, "learning_rate": 3.86315250491821e-05, "loss": 3.1388, "step": 8450 }, { "epoch": 0.68, "learning_rate": 3.862478777589134e-05, "loss": 3.2235, "step": 8455 }, { "epoch": 0.68, "learning_rate": 3.861805050260059e-05, "loss": 3.0092, "step": 8460 }, { "epoch": 0.68, "learning_rate": 3.861131322930984e-05, "loss": 3.0099, "step": 8465 }, { "epoch": 0.68, "learning_rate": 3.860457595601908e-05, "loss": 3.1131, "step": 8470 }, { "epoch": 0.69, "learning_rate": 3.8597838682728324e-05, "loss": 3.0014, "step": 8475 }, { "epoch": 0.69, "learning_rate": 3.859110140943757e-05, "loss": 3.2398, "step": 8480 }, { "epoch": 0.69, "learning_rate": 3.858436413614682e-05, "loss": 3.0938, "step": 8485 }, { "epoch": 0.69, "learning_rate": 3.8577626862856064e-05, "loss": 2.9129, "step": 8490 }, { "epoch": 0.69, "learning_rate": 3.857088958956531e-05, "loss": 3.3385, "step": 8495 }, { "epoch": 0.69, "learning_rate": 3.856415231627456e-05, "loss": 2.9213, "step": 8500 }, { "epoch": 0.69, "learning_rate": 3.855741504298381e-05, "loss": 3.254, "step": 8505 }, { "epoch": 0.69, "learning_rate": 3.855067776969305e-05, "loss": 3.041, "step": 8510 }, { "epoch": 0.69, "learning_rate": 3.8543940496402295e-05, "loss": 3.0149, "step": 8515 }, { "epoch": 0.69, "learning_rate": 3.8537203223111544e-05, "loss": 2.9648, "step": 8520 }, { "epoch": 0.69, "learning_rate": 3.8530465949820786e-05, "loss": 2.8249, "step": 8525 }, { "epoch": 0.69, "learning_rate": 3.8523728676530035e-05, "loss": 3.4272, "step": 8530 }, { "epoch": 0.69, "learning_rate": 3.8516991403239284e-05, "loss": 3.2086, "step": 8535 }, { "epoch": 0.69, "learning_rate": 3.851025412994853e-05, "loss": 3.3207, "step": 8540 }, { "epoch": 0.69, "learning_rate": 3.8503516856657775e-05, "loss": 3.0051, "step": 8545 }, { "epoch": 0.69, "learning_rate": 3.849677958336702e-05, "loss": 3.0208, "step": 8550 }, { "epoch": 0.69, "learning_rate": 3.8490042310076265e-05, "loss": 3.1673, "step": 8555 }, { "epoch": 0.69, "learning_rate": 3.8483305036785514e-05, "loss": 3.1774, "step": 8560 }, { "epoch": 0.69, "learning_rate": 3.8476567763494756e-05, "loss": 2.9722, "step": 8565 }, { "epoch": 0.69, "learning_rate": 3.8469830490204005e-05, "loss": 2.7325, "step": 8570 }, { "epoch": 0.69, "learning_rate": 3.8463093216913254e-05, "loss": 3.0779, "step": 8575 }, { "epoch": 0.69, "learning_rate": 3.8456355943622496e-05, "loss": 3.1809, "step": 8580 }, { "epoch": 0.69, "learning_rate": 3.8449618670331745e-05, "loss": 2.9728, "step": 8585 }, { "epoch": 0.69, "learning_rate": 3.8442881397040994e-05, "loss": 2.8935, "step": 8590 }, { "epoch": 0.69, "learning_rate": 3.843614412375024e-05, "loss": 3.0019, "step": 8595 }, { "epoch": 0.7, "learning_rate": 3.8429406850459485e-05, "loss": 3.1546, "step": 8600 }, { "epoch": 0.7, "learning_rate": 3.842266957716873e-05, "loss": 2.9821, "step": 8605 }, { "epoch": 0.7, "learning_rate": 3.8415932303877976e-05, "loss": 2.9484, "step": 8610 }, { "epoch": 0.7, "learning_rate": 3.8409195030587225e-05, "loss": 3.1934, "step": 8615 }, { "epoch": 0.7, "learning_rate": 3.840245775729647e-05, "loss": 3.2961, "step": 8620 }, { "epoch": 0.7, "learning_rate": 3.8395720484005716e-05, "loss": 3.0485, "step": 8625 }, { "epoch": 0.7, "learning_rate": 3.8388983210714965e-05, "loss": 3.0174, "step": 8630 }, { "epoch": 0.7, "learning_rate": 3.838224593742421e-05, "loss": 3.1566, "step": 8635 }, { "epoch": 0.7, "learning_rate": 3.8375508664133456e-05, "loss": 2.893, "step": 8640 }, { "epoch": 0.7, "learning_rate": 3.83687713908427e-05, "loss": 3.1131, "step": 8645 }, { "epoch": 0.7, "learning_rate": 3.836203411755195e-05, "loss": 2.8845, "step": 8650 }, { "epoch": 0.7, "learning_rate": 3.835529684426119e-05, "loss": 2.8261, "step": 8655 }, { "epoch": 0.7, "learning_rate": 3.834855957097044e-05, "loss": 3.0335, "step": 8660 }, { "epoch": 0.7, "learning_rate": 3.8341822297679686e-05, "loss": 3.0395, "step": 8665 }, { "epoch": 0.7, "learning_rate": 3.8335085024388935e-05, "loss": 3.2093, "step": 8670 }, { "epoch": 0.7, "learning_rate": 3.832834775109818e-05, "loss": 3.0218, "step": 8675 }, { "epoch": 0.7, "learning_rate": 3.8321610477807426e-05, "loss": 3.0037, "step": 8680 }, { "epoch": 0.7, "learning_rate": 3.831487320451667e-05, "loss": 3.0408, "step": 8685 }, { "epoch": 0.7, "learning_rate": 3.830813593122591e-05, "loss": 2.9611, "step": 8690 }, { "epoch": 0.7, "learning_rate": 3.830139865793516e-05, "loss": 3.2521, "step": 8695 }, { "epoch": 0.7, "learning_rate": 3.829466138464441e-05, "loss": 2.9996, "step": 8700 }, { "epoch": 0.7, "learning_rate": 3.828792411135366e-05, "loss": 3.1156, "step": 8705 }, { "epoch": 0.7, "learning_rate": 3.82811868380629e-05, "loss": 2.7829, "step": 8710 }, { "epoch": 0.7, "learning_rate": 3.827444956477215e-05, "loss": 2.9055, "step": 8715 }, { "epoch": 0.7, "learning_rate": 3.82677122914814e-05, "loss": 2.969, "step": 8720 }, { "epoch": 0.71, "learning_rate": 3.826097501819064e-05, "loss": 2.7114, "step": 8725 }, { "epoch": 0.71, "learning_rate": 3.825423774489988e-05, "loss": 2.9136, "step": 8730 }, { "epoch": 0.71, "learning_rate": 3.824750047160913e-05, "loss": 2.8986, "step": 8735 }, { "epoch": 0.71, "learning_rate": 3.824076319831838e-05, "loss": 3.0897, "step": 8740 }, { "epoch": 0.71, "learning_rate": 3.823402592502762e-05, "loss": 3.1301, "step": 8745 }, { "epoch": 0.71, "learning_rate": 3.822728865173687e-05, "loss": 2.9386, "step": 8750 }, { "epoch": 0.71, "learning_rate": 3.822055137844612e-05, "loss": 3.1028, "step": 8755 }, { "epoch": 0.71, "learning_rate": 3.821381410515537e-05, "loss": 2.7394, "step": 8760 }, { "epoch": 0.71, "learning_rate": 3.820707683186461e-05, "loss": 3.0343, "step": 8765 }, { "epoch": 0.71, "learning_rate": 3.820033955857386e-05, "loss": 3.0186, "step": 8770 }, { "epoch": 0.71, "learning_rate": 3.81936022852831e-05, "loss": 3.1434, "step": 8775 }, { "epoch": 0.71, "learning_rate": 3.818686501199235e-05, "loss": 3.1984, "step": 8780 }, { "epoch": 0.71, "learning_rate": 3.818012773870159e-05, "loss": 2.9959, "step": 8785 }, { "epoch": 0.71, "learning_rate": 3.817339046541084e-05, "loss": 2.9381, "step": 8790 }, { "epoch": 0.71, "learning_rate": 3.816665319212009e-05, "loss": 3.2456, "step": 8795 }, { "epoch": 0.71, "learning_rate": 3.815991591882933e-05, "loss": 3.0803, "step": 8800 }, { "epoch": 0.71, "learning_rate": 3.815317864553858e-05, "loss": 2.755, "step": 8805 }, { "epoch": 0.71, "learning_rate": 3.814644137224783e-05, "loss": 3.3534, "step": 8810 }, { "epoch": 0.71, "learning_rate": 3.813970409895707e-05, "loss": 3.0963, "step": 8815 }, { "epoch": 0.71, "learning_rate": 3.8132966825666313e-05, "loss": 2.9195, "step": 8820 }, { "epoch": 0.71, "learning_rate": 3.812622955237556e-05, "loss": 2.8593, "step": 8825 }, { "epoch": 0.71, "learning_rate": 3.811949227908481e-05, "loss": 2.8676, "step": 8830 }, { "epoch": 0.71, "learning_rate": 3.811275500579405e-05, "loss": 2.7496, "step": 8835 }, { "epoch": 0.71, "learning_rate": 3.81060177325033e-05, "loss": 3.2668, "step": 8840 }, { "epoch": 0.72, "learning_rate": 3.809928045921255e-05, "loss": 3.1165, "step": 8845 }, { "epoch": 0.72, "learning_rate": 3.80925431859218e-05, "loss": 3.0659, "step": 8850 }, { "epoch": 0.72, "learning_rate": 3.808580591263104e-05, "loss": 2.8842, "step": 8855 }, { "epoch": 0.72, "learning_rate": 3.8079068639340284e-05, "loss": 3.003, "step": 8860 }, { "epoch": 0.72, "learning_rate": 3.807233136604953e-05, "loss": 3.0949, "step": 8865 }, { "epoch": 0.72, "learning_rate": 3.806559409275878e-05, "loss": 2.9894, "step": 8870 }, { "epoch": 0.72, "learning_rate": 3.8058856819468024e-05, "loss": 2.8607, "step": 8875 }, { "epoch": 0.72, "learning_rate": 3.805211954617727e-05, "loss": 3.3431, "step": 8880 }, { "epoch": 0.72, "learning_rate": 3.804538227288652e-05, "loss": 3.0648, "step": 8885 }, { "epoch": 0.72, "learning_rate": 3.8038644999595764e-05, "loss": 3.2921, "step": 8890 }, { "epoch": 0.72, "learning_rate": 3.803190772630501e-05, "loss": 2.9446, "step": 8895 }, { "epoch": 0.72, "learning_rate": 3.802517045301426e-05, "loss": 3.1472, "step": 8900 }, { "epoch": 0.72, "learning_rate": 3.8018433179723504e-05, "loss": 3.2051, "step": 8905 }, { "epoch": 0.72, "learning_rate": 3.8011695906432746e-05, "loss": 3.4834, "step": 8910 }, { "epoch": 0.72, "learning_rate": 3.8004958633141995e-05, "loss": 3.0165, "step": 8915 }, { "epoch": 0.72, "learning_rate": 3.7998221359851243e-05, "loss": 3.028, "step": 8920 }, { "epoch": 0.72, "learning_rate": 3.799148408656049e-05, "loss": 3.0388, "step": 8925 }, { "epoch": 0.72, "learning_rate": 3.7984746813269734e-05, "loss": 2.9662, "step": 8930 }, { "epoch": 0.72, "learning_rate": 3.797800953997898e-05, "loss": 3.0036, "step": 8935 }, { "epoch": 0.72, "learning_rate": 3.797127226668823e-05, "loss": 2.9179, "step": 8940 }, { "epoch": 0.72, "learning_rate": 3.7964534993397474e-05, "loss": 2.934, "step": 8945 }, { "epoch": 0.72, "learning_rate": 3.7957797720106716e-05, "loss": 3.2011, "step": 8950 }, { "epoch": 0.72, "learning_rate": 3.7951060446815965e-05, "loss": 3.1329, "step": 8955 }, { "epoch": 0.72, "learning_rate": 3.7944323173525214e-05, "loss": 3.2528, "step": 8960 }, { "epoch": 0.72, "learning_rate": 3.7937585900234456e-05, "loss": 3.3252, "step": 8965 }, { "epoch": 0.73, "learning_rate": 3.7930848626943705e-05, "loss": 3.2545, "step": 8970 }, { "epoch": 0.73, "learning_rate": 3.7924111353652954e-05, "loss": 3.2199, "step": 8975 }, { "epoch": 0.73, "learning_rate": 3.79173740803622e-05, "loss": 2.9862, "step": 8980 }, { "epoch": 0.73, "learning_rate": 3.7910636807071445e-05, "loss": 3.0251, "step": 8985 }, { "epoch": 0.73, "learning_rate": 3.790389953378069e-05, "loss": 2.9251, "step": 8990 }, { "epoch": 0.73, "learning_rate": 3.7897162260489936e-05, "loss": 3.0674, "step": 8995 }, { "epoch": 0.73, "learning_rate": 3.789042498719918e-05, "loss": 3.0134, "step": 9000 }, { "epoch": 0.73, "learning_rate": 3.788368771390843e-05, "loss": 3.4247, "step": 9005 }, { "epoch": 0.73, "learning_rate": 3.787829789527583e-05, "loss": 3.4266, "step": 9010 }, { "epoch": 0.73, "learning_rate": 3.787156062198507e-05, "loss": 3.334, "step": 9015 }, { "epoch": 0.73, "learning_rate": 3.786482334869432e-05, "loss": 2.6583, "step": 9020 }, { "epoch": 0.73, "learning_rate": 3.785808607540356e-05, "loss": 2.9451, "step": 9025 }, { "epoch": 0.73, "learning_rate": 3.785134880211281e-05, "loss": 2.9675, "step": 9030 }, { "epoch": 0.73, "learning_rate": 3.784461152882205e-05, "loss": 3.0311, "step": 9035 }, { "epoch": 0.73, "learning_rate": 3.78378742555313e-05, "loss": 3.0225, "step": 9040 }, { "epoch": 0.73, "learning_rate": 3.783113698224055e-05, "loss": 3.4978, "step": 9045 }, { "epoch": 0.73, "learning_rate": 3.78243997089498e-05, "loss": 2.8633, "step": 9050 }, { "epoch": 0.73, "learning_rate": 3.781766243565904e-05, "loss": 3.1776, "step": 9055 }, { "epoch": 0.73, "learning_rate": 3.781092516236829e-05, "loss": 2.9358, "step": 9060 }, { "epoch": 0.73, "learning_rate": 3.780418788907754e-05, "loss": 2.9577, "step": 9065 }, { "epoch": 0.73, "learning_rate": 3.779745061578678e-05, "loss": 2.8513, "step": 9070 }, { "epoch": 0.73, "learning_rate": 3.779071334249602e-05, "loss": 2.9536, "step": 9075 }, { "epoch": 0.73, "learning_rate": 3.778397606920527e-05, "loss": 3.3119, "step": 9080 }, { "epoch": 0.73, "learning_rate": 3.777723879591452e-05, "loss": 3.1788, "step": 9085 }, { "epoch": 0.73, "learning_rate": 3.777050152262376e-05, "loss": 3.2256, "step": 9090 }, { "epoch": 0.74, "learning_rate": 3.776376424933301e-05, "loss": 3.4169, "step": 9095 }, { "epoch": 0.74, "learning_rate": 3.775702697604226e-05, "loss": 3.2875, "step": 9100 }, { "epoch": 0.74, "learning_rate": 3.775028970275151e-05, "loss": 2.7868, "step": 9105 }, { "epoch": 0.74, "learning_rate": 3.774355242946075e-05, "loss": 3.1224, "step": 9110 }, { "epoch": 0.74, "learning_rate": 3.7736815156169994e-05, "loss": 2.9381, "step": 9115 }, { "epoch": 0.74, "learning_rate": 3.773007788287924e-05, "loss": 2.9636, "step": 9120 }, { "epoch": 0.74, "learning_rate": 3.7723340609588485e-05, "loss": 3.1651, "step": 9125 }, { "epoch": 0.74, "learning_rate": 3.7716603336297734e-05, "loss": 3.1566, "step": 9130 }, { "epoch": 0.74, "learning_rate": 3.770986606300698e-05, "loss": 3.0716, "step": 9135 }, { "epoch": 0.74, "learning_rate": 3.770312878971623e-05, "loss": 2.9066, "step": 9140 }, { "epoch": 0.74, "learning_rate": 3.7696391516425474e-05, "loss": 3.0514, "step": 9145 }, { "epoch": 0.74, "learning_rate": 3.768965424313472e-05, "loss": 2.9148, "step": 9150 }, { "epoch": 0.74, "learning_rate": 3.7682916969843965e-05, "loss": 2.9026, "step": 9155 }, { "epoch": 0.74, "learning_rate": 3.7676179696553214e-05, "loss": 3.373, "step": 9160 }, { "epoch": 0.74, "learning_rate": 3.7669442423262456e-05, "loss": 3.0228, "step": 9165 }, { "epoch": 0.74, "learning_rate": 3.7662705149971705e-05, "loss": 2.9815, "step": 9170 }, { "epoch": 0.74, "learning_rate": 3.7655967876680953e-05, "loss": 2.9908, "step": 9175 }, { "epoch": 0.74, "learning_rate": 3.7649230603390196e-05, "loss": 3.0224, "step": 9180 }, { "epoch": 0.74, "learning_rate": 3.7642493330099444e-05, "loss": 3.5886, "step": 9185 }, { "epoch": 0.74, "learning_rate": 3.763575605680869e-05, "loss": 2.9071, "step": 9190 }, { "epoch": 0.74, "learning_rate": 3.7629018783517935e-05, "loss": 2.9337, "step": 9195 }, { "epoch": 0.74, "learning_rate": 3.7622281510227184e-05, "loss": 3.4955, "step": 9200 }, { "epoch": 0.74, "learning_rate": 3.7615544236936426e-05, "loss": 2.9738, "step": 9205 }, { "epoch": 0.74, "learning_rate": 3.7608806963645675e-05, "loss": 2.9454, "step": 9210 }, { "epoch": 0.74, "learning_rate": 3.7602069690354924e-05, "loss": 3.086, "step": 9215 }, { "epoch": 0.75, "learning_rate": 3.7595332417064166e-05, "loss": 3.4329, "step": 9220 }, { "epoch": 0.75, "learning_rate": 3.7588595143773415e-05, "loss": 3.1704, "step": 9225 }, { "epoch": 0.75, "learning_rate": 3.7581857870482664e-05, "loss": 3.0783, "step": 9230 }, { "epoch": 0.75, "learning_rate": 3.7575120597191906e-05, "loss": 2.9674, "step": 9235 }, { "epoch": 0.75, "learning_rate": 3.7568383323901155e-05, "loss": 3.1008, "step": 9240 }, { "epoch": 0.75, "learning_rate": 3.75616460506104e-05, "loss": 3.07, "step": 9245 }, { "epoch": 0.75, "learning_rate": 3.7554908777319646e-05, "loss": 3.3072, "step": 9250 }, { "epoch": 0.75, "learning_rate": 3.754817150402889e-05, "loss": 3.1095, "step": 9255 }, { "epoch": 0.75, "learning_rate": 3.754143423073814e-05, "loss": 3.0179, "step": 9260 }, { "epoch": 0.75, "learning_rate": 3.7534696957447386e-05, "loss": 2.901, "step": 9265 }, { "epoch": 0.75, "learning_rate": 3.752795968415663e-05, "loss": 2.9, "step": 9270 }, { "epoch": 0.75, "learning_rate": 3.752122241086588e-05, "loss": 3.0123, "step": 9275 }, { "epoch": 0.75, "learning_rate": 3.7514485137575126e-05, "loss": 3.2678, "step": 9280 }, { "epoch": 0.75, "learning_rate": 3.750774786428437e-05, "loss": 2.8679, "step": 9285 }, { "epoch": 0.75, "learning_rate": 3.750101059099361e-05, "loss": 2.9353, "step": 9290 }, { "epoch": 0.75, "learning_rate": 3.749427331770286e-05, "loss": 2.8873, "step": 9295 }, { "epoch": 0.75, "learning_rate": 3.748753604441211e-05, "loss": 3.2057, "step": 9300 }, { "epoch": 0.75, "learning_rate": 3.7480798771121356e-05, "loss": 3.1522, "step": 9305 }, { "epoch": 0.75, "learning_rate": 3.74740614978306e-05, "loss": 3.1599, "step": 9310 }, { "epoch": 0.75, "learning_rate": 3.746732422453985e-05, "loss": 2.9306, "step": 9315 }, { "epoch": 0.75, "learning_rate": 3.7460586951249096e-05, "loss": 3.013, "step": 9320 }, { "epoch": 0.75, "learning_rate": 3.745384967795834e-05, "loss": 3.0665, "step": 9325 }, { "epoch": 0.75, "learning_rate": 3.744711240466758e-05, "loss": 3.2568, "step": 9330 }, { "epoch": 0.75, "learning_rate": 3.744037513137683e-05, "loss": 2.9778, "step": 9335 }, { "epoch": 0.76, "learning_rate": 3.743363785808608e-05, "loss": 3.2069, "step": 9340 }, { "epoch": 0.76, "learning_rate": 3.742690058479532e-05, "loss": 3.0228, "step": 9345 }, { "epoch": 0.76, "learning_rate": 3.742016331150457e-05, "loss": 2.8045, "step": 9350 }, { "epoch": 0.76, "learning_rate": 3.741342603821382e-05, "loss": 3.0413, "step": 9355 }, { "epoch": 0.76, "learning_rate": 3.740668876492307e-05, "loss": 2.8799, "step": 9360 }, { "epoch": 0.76, "learning_rate": 3.739995149163231e-05, "loss": 3.2395, "step": 9365 }, { "epoch": 0.76, "learning_rate": 3.739321421834156e-05, "loss": 3.055, "step": 9370 }, { "epoch": 0.76, "learning_rate": 3.73864769450508e-05, "loss": 3.046, "step": 9375 }, { "epoch": 0.76, "learning_rate": 3.737973967176004e-05, "loss": 2.7768, "step": 9380 }, { "epoch": 0.76, "learning_rate": 3.737300239846929e-05, "loss": 3.4218, "step": 9385 }, { "epoch": 0.76, "learning_rate": 3.736626512517854e-05, "loss": 2.961, "step": 9390 }, { "epoch": 0.76, "learning_rate": 3.735952785188779e-05, "loss": 3.1829, "step": 9395 }, { "epoch": 0.76, "learning_rate": 3.735279057859703e-05, "loss": 3.1464, "step": 9400 }, { "epoch": 0.76, "learning_rate": 3.734605330530628e-05, "loss": 3.0201, "step": 9405 }, { "epoch": 0.76, "learning_rate": 3.733931603201553e-05, "loss": 2.8433, "step": 9410 }, { "epoch": 0.76, "learning_rate": 3.733257875872477e-05, "loss": 2.9373, "step": 9415 }, { "epoch": 0.76, "learning_rate": 3.732584148543401e-05, "loss": 3.1014, "step": 9420 }, { "epoch": 0.76, "learning_rate": 3.731910421214326e-05, "loss": 2.9788, "step": 9425 }, { "epoch": 0.76, "learning_rate": 3.731236693885251e-05, "loss": 3.1346, "step": 9430 }, { "epoch": 0.76, "learning_rate": 3.730562966556175e-05, "loss": 3.0174, "step": 9435 }, { "epoch": 0.76, "learning_rate": 3.7298892392271e-05, "loss": 3.1674, "step": 9440 }, { "epoch": 0.76, "learning_rate": 3.729215511898025e-05, "loss": 2.9165, "step": 9445 }, { "epoch": 0.76, "learning_rate": 3.72854178456895e-05, "loss": 2.9635, "step": 9450 }, { "epoch": 0.76, "learning_rate": 3.727868057239874e-05, "loss": 3.3654, "step": 9455 }, { "epoch": 0.76, "learning_rate": 3.727194329910798e-05, "loss": 2.9462, "step": 9460 }, { "epoch": 0.77, "learning_rate": 3.726520602581723e-05, "loss": 3.0271, "step": 9465 }, { "epoch": 0.77, "learning_rate": 3.725846875252648e-05, "loss": 2.9914, "step": 9470 }, { "epoch": 0.77, "learning_rate": 3.725173147923572e-05, "loss": 3.0899, "step": 9475 }, { "epoch": 0.77, "learning_rate": 3.724499420594497e-05, "loss": 3.2328, "step": 9480 }, { "epoch": 0.77, "learning_rate": 3.723825693265422e-05, "loss": 2.9155, "step": 9485 }, { "epoch": 0.77, "learning_rate": 3.723151965936346e-05, "loss": 3.0138, "step": 9490 }, { "epoch": 0.77, "learning_rate": 3.722478238607271e-05, "loss": 3.0283, "step": 9495 }, { "epoch": 0.77, "learning_rate": 3.721804511278196e-05, "loss": 3.021, "step": 9500 }, { "epoch": 0.77, "learning_rate": 3.72113078394912e-05, "loss": 3.3555, "step": 9505 }, { "epoch": 0.77, "learning_rate": 3.7204570566200445e-05, "loss": 2.9653, "step": 9510 }, { "epoch": 0.77, "learning_rate": 3.7197833292909694e-05, "loss": 2.8215, "step": 9515 }, { "epoch": 0.77, "learning_rate": 3.719109601961894e-05, "loss": 2.9035, "step": 9520 }, { "epoch": 0.77, "learning_rate": 3.718435874632819e-05, "loss": 3.1791, "step": 9525 }, { "epoch": 0.77, "learning_rate": 3.7177621473037434e-05, "loss": 3.0717, "step": 9530 }, { "epoch": 0.77, "learning_rate": 3.717088419974668e-05, "loss": 3.1412, "step": 9535 }, { "epoch": 0.77, "learning_rate": 3.716414692645593e-05, "loss": 2.8527, "step": 9540 }, { "epoch": 0.77, "learning_rate": 3.7157409653165173e-05, "loss": 3.1082, "step": 9545 }, { "epoch": 0.77, "learning_rate": 3.7150672379874416e-05, "loss": 3.0959, "step": 9550 }, { "epoch": 0.77, "learning_rate": 3.7143935106583664e-05, "loss": 2.8961, "step": 9555 }, { "epoch": 0.77, "learning_rate": 3.713719783329291e-05, "loss": 3.0879, "step": 9560 }, { "epoch": 0.77, "learning_rate": 3.7130460560002155e-05, "loss": 3.0639, "step": 9565 }, { "epoch": 0.77, "learning_rate": 3.7123723286711404e-05, "loss": 2.9382, "step": 9570 }, { "epoch": 0.77, "learning_rate": 3.711698601342065e-05, "loss": 2.9074, "step": 9575 }, { "epoch": 0.77, "learning_rate": 3.7110248740129895e-05, "loss": 3.1172, "step": 9580 }, { "epoch": 0.77, "learning_rate": 3.7103511466839144e-05, "loss": 2.6656, "step": 9585 }, { "epoch": 0.78, "learning_rate": 3.7096774193548386e-05, "loss": 2.9794, "step": 9590 }, { "epoch": 0.78, "learning_rate": 3.7090036920257635e-05, "loss": 3.0121, "step": 9595 }, { "epoch": 0.78, "learning_rate": 3.708329964696688e-05, "loss": 2.9782, "step": 9600 }, { "epoch": 0.78, "learning_rate": 3.7076562373676126e-05, "loss": 3.3433, "step": 9605 }, { "epoch": 0.78, "learning_rate": 3.7069825100385375e-05, "loss": 2.7182, "step": 9610 }, { "epoch": 0.78, "learning_rate": 3.7063087827094624e-05, "loss": 2.9858, "step": 9615 }, { "epoch": 0.78, "learning_rate": 3.7056350553803866e-05, "loss": 3.4233, "step": 9620 }, { "epoch": 0.78, "learning_rate": 3.7049613280513115e-05, "loss": 3.2665, "step": 9625 }, { "epoch": 0.78, "learning_rate": 3.704287600722236e-05, "loss": 2.9364, "step": 9630 }, { "epoch": 0.78, "learning_rate": 3.70361387339316e-05, "loss": 3.0708, "step": 9635 }, { "epoch": 0.78, "learning_rate": 3.702940146064085e-05, "loss": 3.078, "step": 9640 }, { "epoch": 0.78, "learning_rate": 3.70226641873501e-05, "loss": 2.9575, "step": 9645 }, { "epoch": 0.78, "learning_rate": 3.7015926914059346e-05, "loss": 3.0181, "step": 9650 }, { "epoch": 0.78, "learning_rate": 3.700918964076859e-05, "loss": 2.7718, "step": 9655 }, { "epoch": 0.78, "learning_rate": 3.7002452367477837e-05, "loss": 3.2755, "step": 9660 }, { "epoch": 0.78, "learning_rate": 3.6995715094187085e-05, "loss": 3.126, "step": 9665 }, { "epoch": 0.78, "learning_rate": 3.6988977820896334e-05, "loss": 3.04, "step": 9670 }, { "epoch": 0.78, "learning_rate": 3.6982240547605576e-05, "loss": 3.1031, "step": 9675 }, { "epoch": 0.78, "learning_rate": 3.697550327431482e-05, "loss": 3.0017, "step": 9680 }, { "epoch": 0.78, "learning_rate": 3.697011345568222e-05, "loss": 3.3596, "step": 9685 }, { "epoch": 0.78, "learning_rate": 3.696337618239146e-05, "loss": 3.1731, "step": 9690 }, { "epoch": 0.78, "learning_rate": 3.695663890910071e-05, "loss": 2.9105, "step": 9695 }, { "epoch": 0.78, "learning_rate": 3.694990163580996e-05, "loss": 3.0292, "step": 9700 }, { "epoch": 0.78, "learning_rate": 3.69431643625192e-05, "loss": 3.1361, "step": 9705 }, { "epoch": 0.78, "learning_rate": 3.693642708922845e-05, "loss": 3.1294, "step": 9710 }, { "epoch": 0.79, "learning_rate": 3.692968981593769e-05, "loss": 3.1243, "step": 9715 }, { "epoch": 0.79, "learning_rate": 3.692295254264694e-05, "loss": 3.0418, "step": 9720 }, { "epoch": 0.79, "learning_rate": 3.6916215269356184e-05, "loss": 3.1548, "step": 9725 }, { "epoch": 0.79, "learning_rate": 3.690947799606543e-05, "loss": 2.8778, "step": 9730 }, { "epoch": 0.79, "learning_rate": 3.690274072277468e-05, "loss": 3.1158, "step": 9735 }, { "epoch": 0.79, "learning_rate": 3.689600344948393e-05, "loss": 2.9114, "step": 9740 }, { "epoch": 0.79, "learning_rate": 3.688926617619317e-05, "loss": 3.0476, "step": 9745 }, { "epoch": 0.79, "learning_rate": 3.688252890290242e-05, "loss": 3.1116, "step": 9750 }, { "epoch": 0.79, "learning_rate": 3.6875791629611664e-05, "loss": 3.1288, "step": 9755 }, { "epoch": 0.79, "learning_rate": 3.6869054356320906e-05, "loss": 3.2127, "step": 9760 }, { "epoch": 0.79, "learning_rate": 3.6862317083030155e-05, "loss": 3.1348, "step": 9765 }, { "epoch": 0.79, "learning_rate": 3.6855579809739404e-05, "loss": 3.189, "step": 9770 }, { "epoch": 0.79, "learning_rate": 3.684884253644865e-05, "loss": 2.9306, "step": 9775 }, { "epoch": 0.79, "learning_rate": 3.6842105263157895e-05, "loss": 3.0042, "step": 9780 }, { "epoch": 0.79, "learning_rate": 3.6835367989867144e-05, "loss": 2.8858, "step": 9785 }, { "epoch": 0.79, "learning_rate": 3.682863071657639e-05, "loss": 3.2717, "step": 9790 }, { "epoch": 0.79, "learning_rate": 3.6821893443285635e-05, "loss": 3.0997, "step": 9795 }, { "epoch": 0.79, "learning_rate": 3.681515616999488e-05, "loss": 3.1203, "step": 9800 }, { "epoch": 0.79, "learning_rate": 3.6808418896704126e-05, "loss": 3.0215, "step": 9805 }, { "epoch": 0.79, "learning_rate": 3.6801681623413374e-05, "loss": 3.1261, "step": 9810 }, { "epoch": 0.79, "learning_rate": 3.6794944350122617e-05, "loss": 3.4258, "step": 9815 }, { "epoch": 0.79, "learning_rate": 3.6788207076831865e-05, "loss": 2.8897, "step": 9820 }, { "epoch": 0.79, "learning_rate": 3.6781469803541114e-05, "loss": 2.9942, "step": 9825 }, { "epoch": 0.79, "learning_rate": 3.677473253025036e-05, "loss": 2.9707, "step": 9830 }, { "epoch": 0.8, "learning_rate": 3.6767995256959605e-05, "loss": 3.0694, "step": 9835 }, { "epoch": 0.8, "learning_rate": 3.6761257983668854e-05, "loss": 2.8014, "step": 9840 }, { "epoch": 0.8, "learning_rate": 3.6754520710378096e-05, "loss": 3.0555, "step": 9845 }, { "epoch": 0.8, "learning_rate": 3.6747783437087345e-05, "loss": 2.8841, "step": 9850 }, { "epoch": 0.8, "learning_rate": 3.674104616379659e-05, "loss": 3.1467, "step": 9855 }, { "epoch": 0.8, "learning_rate": 3.6734308890505836e-05, "loss": 2.8715, "step": 9860 }, { "epoch": 0.8, "learning_rate": 3.6727571617215085e-05, "loss": 3.08, "step": 9865 }, { "epoch": 0.8, "learning_rate": 3.672083434392433e-05, "loss": 3.231, "step": 9870 }, { "epoch": 0.8, "learning_rate": 3.6714097070633576e-05, "loss": 3.0236, "step": 9875 }, { "epoch": 0.8, "learning_rate": 3.6707359797342825e-05, "loss": 3.2113, "step": 9880 }, { "epoch": 0.8, "learning_rate": 3.670062252405207e-05, "loss": 3.0357, "step": 9885 }, { "epoch": 0.8, "learning_rate": 3.669388525076131e-05, "loss": 3.2755, "step": 9890 }, { "epoch": 0.8, "learning_rate": 3.668714797747056e-05, "loss": 2.7913, "step": 9895 }, { "epoch": 0.8, "learning_rate": 3.668041070417981e-05, "loss": 2.9929, "step": 9900 }, { "epoch": 0.8, "learning_rate": 3.6673673430889056e-05, "loss": 3.2487, "step": 9905 }, { "epoch": 0.8, "learning_rate": 3.66669361575983e-05, "loss": 3.0298, "step": 9910 }, { "epoch": 0.8, "learning_rate": 3.6660198884307547e-05, "loss": 2.9926, "step": 9915 }, { "epoch": 0.8, "learning_rate": 3.6653461611016795e-05, "loss": 3.0167, "step": 9920 }, { "epoch": 0.8, "learning_rate": 3.664672433772604e-05, "loss": 2.9078, "step": 9925 }, { "epoch": 0.8, "learning_rate": 3.663998706443528e-05, "loss": 2.8785, "step": 9930 }, { "epoch": 0.8, "learning_rate": 3.663324979114453e-05, "loss": 3.0155, "step": 9935 }, { "epoch": 0.8, "learning_rate": 3.662651251785378e-05, "loss": 2.9471, "step": 9940 }, { "epoch": 0.8, "learning_rate": 3.661977524456302e-05, "loss": 2.8238, "step": 9945 }, { "epoch": 0.8, "learning_rate": 3.661303797127227e-05, "loss": 2.7744, "step": 9950 }, { "epoch": 0.8, "learning_rate": 3.660630069798152e-05, "loss": 3.2174, "step": 9955 }, { "epoch": 0.81, "learning_rate": 3.659956342469076e-05, "loss": 3.0096, "step": 9960 }, { "epoch": 0.81, "learning_rate": 3.659282615140001e-05, "loss": 2.9241, "step": 9965 }, { "epoch": 0.81, "learning_rate": 3.658608887810926e-05, "loss": 2.8246, "step": 9970 }, { "epoch": 0.81, "learning_rate": 3.65793516048185e-05, "loss": 3.0677, "step": 9975 }, { "epoch": 0.81, "learning_rate": 3.657261433152774e-05, "loss": 3.3918, "step": 9980 }, { "epoch": 0.81, "learning_rate": 3.656587705823699e-05, "loss": 3.0951, "step": 9985 }, { "epoch": 0.81, "learning_rate": 3.655913978494624e-05, "loss": 3.2887, "step": 9990 }, { "epoch": 0.81, "learning_rate": 3.6553749966313634e-05, "loss": 3.2198, "step": 9995 }, { "epoch": 0.81, "learning_rate": 3.654701269302288e-05, "loss": 3.1275, "step": 10000 }, { "epoch": 0.81, "learning_rate": 3.654027541973213e-05, "loss": 2.8183, "step": 10005 }, { "epoch": 0.81, "learning_rate": 3.6533538146441374e-05, "loss": 3.2344, "step": 10010 }, { "epoch": 0.81, "learning_rate": 3.6526800873150616e-05, "loss": 3.0728, "step": 10015 }, { "epoch": 0.81, "learning_rate": 3.6520063599859865e-05, "loss": 3.1439, "step": 10020 }, { "epoch": 0.81, "learning_rate": 3.6513326326569114e-05, "loss": 2.9468, "step": 10025 }, { "epoch": 0.81, "learning_rate": 3.650658905327836e-05, "loss": 3.0711, "step": 10030 }, { "epoch": 0.81, "learning_rate": 3.6499851779987605e-05, "loss": 2.9164, "step": 10035 }, { "epoch": 0.81, "learning_rate": 3.6493114506696854e-05, "loss": 3.1862, "step": 10040 }, { "epoch": 0.81, "learning_rate": 3.64863772334061e-05, "loss": 2.7662, "step": 10045 }, { "epoch": 0.81, "learning_rate": 3.6479639960115344e-05, "loss": 2.7516, "step": 10050 }, { "epoch": 0.81, "learning_rate": 3.6472902686824587e-05, "loss": 3.0381, "step": 10055 }, { "epoch": 0.81, "learning_rate": 3.6466165413533835e-05, "loss": 2.9114, "step": 10060 }, { "epoch": 0.81, "learning_rate": 3.6459428140243084e-05, "loss": 3.1425, "step": 10065 }, { "epoch": 0.81, "learning_rate": 3.6452690866952326e-05, "loss": 3.1069, "step": 10070 }, { "epoch": 0.81, "learning_rate": 3.6445953593661575e-05, "loss": 3.1094, "step": 10075 }, { "epoch": 0.81, "learning_rate": 3.6439216320370824e-05, "loss": 3.1806, "step": 10080 }, { "epoch": 0.82, "learning_rate": 3.6432479047080066e-05, "loss": 3.2412, "step": 10085 }, { "epoch": 0.82, "learning_rate": 3.6425741773789315e-05, "loss": 2.6815, "step": 10090 }, { "epoch": 0.82, "learning_rate": 3.641900450049856e-05, "loss": 2.9824, "step": 10095 }, { "epoch": 0.82, "learning_rate": 3.6412267227207806e-05, "loss": 3.0131, "step": 10100 }, { "epoch": 0.82, "learning_rate": 3.640552995391705e-05, "loss": 3.2256, "step": 10105 }, { "epoch": 0.82, "learning_rate": 3.63987926806263e-05, "loss": 3.0638, "step": 10110 }, { "epoch": 0.82, "learning_rate": 3.6392055407335546e-05, "loss": 2.9276, "step": 10115 }, { "epoch": 0.82, "learning_rate": 3.6385318134044795e-05, "loss": 3.1751, "step": 10120 }, { "epoch": 0.82, "learning_rate": 3.637858086075404e-05, "loss": 3.1735, "step": 10125 }, { "epoch": 0.82, "learning_rate": 3.6371843587463286e-05, "loss": 2.7972, "step": 10130 }, { "epoch": 0.82, "learning_rate": 3.636510631417253e-05, "loss": 2.9994, "step": 10135 }, { "epoch": 0.82, "learning_rate": 3.635836904088178e-05, "loss": 2.9632, "step": 10140 }, { "epoch": 0.82, "learning_rate": 3.635163176759102e-05, "loss": 2.9621, "step": 10145 }, { "epoch": 0.82, "learning_rate": 3.634489449430027e-05, "loss": 2.826, "step": 10150 }, { "epoch": 0.82, "learning_rate": 3.633815722100952e-05, "loss": 3.0373, "step": 10155 }, { "epoch": 0.82, "learning_rate": 3.633141994771876e-05, "loss": 2.8925, "step": 10160 }, { "epoch": 0.82, "learning_rate": 3.632468267442801e-05, "loss": 3.0813, "step": 10165 }, { "epoch": 0.82, "learning_rate": 3.6317945401137256e-05, "loss": 2.8686, "step": 10170 }, { "epoch": 0.82, "learning_rate": 3.6311208127846505e-05, "loss": 3.0871, "step": 10175 }, { "epoch": 0.82, "learning_rate": 3.630447085455575e-05, "loss": 2.9655, "step": 10180 }, { "epoch": 0.82, "learning_rate": 3.629773358126499e-05, "loss": 2.9345, "step": 10185 }, { "epoch": 0.82, "learning_rate": 3.629099630797424e-05, "loss": 3.0776, "step": 10190 }, { "epoch": 0.82, "learning_rate": 3.628425903468348e-05, "loss": 3.0574, "step": 10195 }, { "epoch": 0.82, "learning_rate": 3.627752176139273e-05, "loss": 2.9316, "step": 10200 }, { "epoch": 0.83, "learning_rate": 3.627078448810198e-05, "loss": 2.8688, "step": 10205 }, { "epoch": 0.83, "learning_rate": 3.626404721481123e-05, "loss": 2.9406, "step": 10210 }, { "epoch": 0.83, "learning_rate": 3.625730994152047e-05, "loss": 3.0147, "step": 10215 }, { "epoch": 0.83, "learning_rate": 3.625057266822972e-05, "loss": 3.0162, "step": 10220 }, { "epoch": 0.83, "learning_rate": 3.624383539493896e-05, "loss": 2.9309, "step": 10225 }, { "epoch": 0.83, "learning_rate": 3.623709812164821e-05, "loss": 3.2362, "step": 10230 }, { "epoch": 0.83, "learning_rate": 3.623036084835745e-05, "loss": 3.1789, "step": 10235 }, { "epoch": 0.83, "learning_rate": 3.62236235750667e-05, "loss": 2.7308, "step": 10240 }, { "epoch": 0.83, "learning_rate": 3.621688630177595e-05, "loss": 2.8587, "step": 10245 }, { "epoch": 0.83, "learning_rate": 3.621014902848519e-05, "loss": 2.9517, "step": 10250 }, { "epoch": 0.83, "learning_rate": 3.620341175519444e-05, "loss": 2.9824, "step": 10255 }, { "epoch": 0.83, "learning_rate": 3.619667448190369e-05, "loss": 3.0932, "step": 10260 }, { "epoch": 0.83, "learning_rate": 3.618993720861293e-05, "loss": 3.3439, "step": 10265 }, { "epoch": 0.83, "learning_rate": 3.618319993532217e-05, "loss": 3.1795, "step": 10270 }, { "epoch": 0.83, "learning_rate": 3.617646266203142e-05, "loss": 3.0621, "step": 10275 }, { "epoch": 0.83, "learning_rate": 3.616972538874067e-05, "loss": 2.8596, "step": 10280 }, { "epoch": 0.83, "learning_rate": 3.616298811544992e-05, "loss": 3.0604, "step": 10285 }, { "epoch": 0.83, "learning_rate": 3.615625084215916e-05, "loss": 3.0342, "step": 10290 }, { "epoch": 0.83, "learning_rate": 3.614951356886841e-05, "loss": 2.9337, "step": 10295 }, { "epoch": 0.83, "learning_rate": 3.614277629557766e-05, "loss": 3.0446, "step": 10300 }, { "epoch": 0.83, "learning_rate": 3.61360390222869e-05, "loss": 2.8938, "step": 10305 }, { "epoch": 0.83, "learning_rate": 3.612930174899615e-05, "loss": 2.8491, "step": 10310 }, { "epoch": 0.83, "learning_rate": 3.612256447570539e-05, "loss": 2.9921, "step": 10315 }, { "epoch": 0.83, "learning_rate": 3.611582720241464e-05, "loss": 3.0225, "step": 10320 }, { "epoch": 0.83, "learning_rate": 3.6109089929123883e-05, "loss": 2.9918, "step": 10325 }, { "epoch": 0.84, "learning_rate": 3.610235265583313e-05, "loss": 3.1685, "step": 10330 }, { "epoch": 0.84, "learning_rate": 3.609561538254238e-05, "loss": 3.1366, "step": 10335 }, { "epoch": 0.84, "learning_rate": 3.608887810925163e-05, "loss": 3.1979, "step": 10340 }, { "epoch": 0.84, "learning_rate": 3.608214083596087e-05, "loss": 3.2472, "step": 10345 }, { "epoch": 0.84, "learning_rate": 3.607540356267012e-05, "loss": 3.0492, "step": 10350 }, { "epoch": 0.84, "learning_rate": 3.606866628937936e-05, "loss": 3.0889, "step": 10355 }, { "epoch": 0.84, "learning_rate": 3.6061929016088605e-05, "loss": 3.1712, "step": 10360 }, { "epoch": 0.84, "learning_rate": 3.6055191742797854e-05, "loss": 3.1994, "step": 10365 }, { "epoch": 0.84, "learning_rate": 3.60484544695071e-05, "loss": 2.9661, "step": 10370 }, { "epoch": 0.84, "learning_rate": 3.604171719621635e-05, "loss": 3.0717, "step": 10375 }, { "epoch": 0.84, "learning_rate": 3.6034979922925594e-05, "loss": 2.963, "step": 10380 }, { "epoch": 0.84, "learning_rate": 3.602824264963484e-05, "loss": 2.9531, "step": 10385 }, { "epoch": 0.84, "learning_rate": 3.602150537634409e-05, "loss": 2.7738, "step": 10390 }, { "epoch": 0.84, "learning_rate": 3.6014768103053334e-05, "loss": 3.2369, "step": 10395 }, { "epoch": 0.84, "learning_rate": 3.6008030829762576e-05, "loss": 3.0199, "step": 10400 }, { "epoch": 0.84, "learning_rate": 3.6001293556471825e-05, "loss": 2.9024, "step": 10405 }, { "epoch": 0.84, "learning_rate": 3.5994556283181074e-05, "loss": 3.4314, "step": 10410 }, { "epoch": 0.84, "learning_rate": 3.5987819009890316e-05, "loss": 2.9359, "step": 10415 }, { "epoch": 0.84, "learning_rate": 3.5981081736599565e-05, "loss": 3.0674, "step": 10420 }, { "epoch": 0.84, "learning_rate": 3.5974344463308813e-05, "loss": 2.8982, "step": 10425 }, { "epoch": 0.84, "learning_rate": 3.596760719001806e-05, "loss": 3.1686, "step": 10430 }, { "epoch": 0.84, "learning_rate": 3.5960869916727304e-05, "loss": 2.855, "step": 10435 }, { "epoch": 0.84, "learning_rate": 3.595413264343655e-05, "loss": 2.9604, "step": 10440 }, { "epoch": 0.84, "learning_rate": 3.5947395370145795e-05, "loss": 2.9602, "step": 10445 }, { "epoch": 0.84, "learning_rate": 3.594065809685504e-05, "loss": 3.1929, "step": 10450 }, { "epoch": 0.85, "learning_rate": 3.5933920823564286e-05, "loss": 3.1893, "step": 10455 }, { "epoch": 0.85, "learning_rate": 3.5927183550273535e-05, "loss": 3.0879, "step": 10460 }, { "epoch": 0.85, "learning_rate": 3.5920446276982784e-05, "loss": 2.8213, "step": 10465 }, { "epoch": 0.85, "learning_rate": 3.5913709003692026e-05, "loss": 3.1896, "step": 10470 }, { "epoch": 0.85, "learning_rate": 3.5906971730401275e-05, "loss": 2.8873, "step": 10475 }, { "epoch": 0.85, "learning_rate": 3.5900234457110524e-05, "loss": 2.7271, "step": 10480 }, { "epoch": 0.85, "learning_rate": 3.5893497183819766e-05, "loss": 2.9885, "step": 10485 }, { "epoch": 0.85, "learning_rate": 3.588675991052901e-05, "loss": 2.9931, "step": 10490 }, { "epoch": 0.85, "learning_rate": 3.588002263723826e-05, "loss": 3.0486, "step": 10495 }, { "epoch": 0.85, "learning_rate": 3.5873285363947506e-05, "loss": 2.7903, "step": 10500 }, { "epoch": 0.85, "learning_rate": 3.586654809065675e-05, "loss": 2.9754, "step": 10505 }, { "epoch": 0.85, "learning_rate": 3.5859810817366e-05, "loss": 3.2095, "step": 10510 }, { "epoch": 0.85, "learning_rate": 3.5853073544075246e-05, "loss": 3.1133, "step": 10515 }, { "epoch": 0.85, "learning_rate": 3.5846336270784495e-05, "loss": 2.7682, "step": 10520 }, { "epoch": 0.85, "learning_rate": 3.583959899749374e-05, "loss": 2.941, "step": 10525 }, { "epoch": 0.85, "learning_rate": 3.583286172420298e-05, "loss": 3.0385, "step": 10530 }, { "epoch": 0.85, "learning_rate": 3.582612445091223e-05, "loss": 2.8513, "step": 10535 }, { "epoch": 0.85, "learning_rate": 3.5819387177621477e-05, "loss": 3.1885, "step": 10540 }, { "epoch": 0.85, "learning_rate": 3.581264990433072e-05, "loss": 3.2157, "step": 10545 }, { "epoch": 0.85, "learning_rate": 3.580591263103997e-05, "loss": 2.9265, "step": 10550 }, { "epoch": 0.85, "learning_rate": 3.5799175357749216e-05, "loss": 2.6739, "step": 10555 }, { "epoch": 0.85, "learning_rate": 3.579243808445846e-05, "loss": 2.8451, "step": 10560 }, { "epoch": 0.85, "learning_rate": 3.578570081116771e-05, "loss": 2.9205, "step": 10565 }, { "epoch": 0.85, "learning_rate": 3.577896353787695e-05, "loss": 2.6902, "step": 10570 }, { "epoch": 0.85, "learning_rate": 3.57722262645862e-05, "loss": 2.9319, "step": 10575 }, { "epoch": 0.86, "learning_rate": 3.576548899129544e-05, "loss": 3.5273, "step": 10580 }, { "epoch": 0.86, "learning_rate": 3.575875171800469e-05, "loss": 3.2621, "step": 10585 }, { "epoch": 0.86, "learning_rate": 3.575201444471394e-05, "loss": 2.8811, "step": 10590 }, { "epoch": 0.86, "learning_rate": 3.574527717142319e-05, "loss": 3.0016, "step": 10595 }, { "epoch": 0.86, "learning_rate": 3.573853989813243e-05, "loss": 3.0808, "step": 10600 }, { "epoch": 0.86, "learning_rate": 3.573180262484168e-05, "loss": 3.0308, "step": 10605 }, { "epoch": 0.86, "learning_rate": 3.572506535155093e-05, "loss": 2.9164, "step": 10610 }, { "epoch": 0.86, "learning_rate": 3.571832807826017e-05, "loss": 2.8661, "step": 10615 }, { "epoch": 0.86, "learning_rate": 3.571159080496941e-05, "loss": 3.1764, "step": 10620 }, { "epoch": 0.86, "learning_rate": 3.570485353167866e-05, "loss": 2.9688, "step": 10625 }, { "epoch": 0.86, "learning_rate": 3.569811625838791e-05, "loss": 2.8292, "step": 10630 }, { "epoch": 0.86, "learning_rate": 3.569137898509715e-05, "loss": 3.1102, "step": 10635 }, { "epoch": 0.86, "learning_rate": 3.56846417118064e-05, "loss": 2.9934, "step": 10640 }, { "epoch": 0.86, "learning_rate": 3.567790443851565e-05, "loss": 3.2563, "step": 10645 }, { "epoch": 0.86, "learning_rate": 3.56711671652249e-05, "loss": 2.8752, "step": 10650 }, { "epoch": 0.86, "learning_rate": 3.566442989193414e-05, "loss": 2.9797, "step": 10655 }, { "epoch": 0.86, "learning_rate": 3.565769261864338e-05, "loss": 2.8909, "step": 10660 }, { "epoch": 0.86, "learning_rate": 3.565095534535263e-05, "loss": 3.0765, "step": 10665 }, { "epoch": 0.86, "learning_rate": 3.564421807206187e-05, "loss": 2.8784, "step": 10670 }, { "epoch": 0.86, "learning_rate": 3.563748079877112e-05, "loss": 3.0186, "step": 10675 }, { "epoch": 0.86, "learning_rate": 3.563074352548037e-05, "loss": 3.0764, "step": 10680 }, { "epoch": 0.86, "learning_rate": 3.562400625218962e-05, "loss": 3.015, "step": 10685 }, { "epoch": 0.86, "learning_rate": 3.561726897889886e-05, "loss": 2.9773, "step": 10690 }, { "epoch": 0.86, "learning_rate": 3.561053170560811e-05, "loss": 2.9843, "step": 10695 }, { "epoch": 0.87, "learning_rate": 3.560379443231735e-05, "loss": 2.8827, "step": 10700 }, { "epoch": 0.87, "learning_rate": 3.5597057159026595e-05, "loss": 3.1021, "step": 10705 }, { "epoch": 0.87, "learning_rate": 3.559031988573584e-05, "loss": 3.0825, "step": 10710 }, { "epoch": 0.87, "learning_rate": 3.558358261244509e-05, "loss": 2.9813, "step": 10715 }, { "epoch": 0.87, "learning_rate": 3.557684533915434e-05, "loss": 2.8546, "step": 10720 }, { "epoch": 0.87, "learning_rate": 3.557010806586358e-05, "loss": 3.0049, "step": 10725 }, { "epoch": 0.87, "learning_rate": 3.556337079257283e-05, "loss": 2.6026, "step": 10730 }, { "epoch": 0.87, "learning_rate": 3.555663351928208e-05, "loss": 3.0555, "step": 10735 }, { "epoch": 0.87, "learning_rate": 3.554989624599133e-05, "loss": 2.9656, "step": 10740 }, { "epoch": 0.87, "learning_rate": 3.554315897270057e-05, "loss": 2.9598, "step": 10745 }, { "epoch": 0.87, "learning_rate": 3.5536421699409814e-05, "loss": 2.9622, "step": 10750 }, { "epoch": 0.87, "learning_rate": 3.552968442611906e-05, "loss": 3.2513, "step": 10755 }, { "epoch": 0.87, "learning_rate": 3.5522947152828305e-05, "loss": 3.0568, "step": 10760 }, { "epoch": 0.87, "learning_rate": 3.5516209879537554e-05, "loss": 3.1684, "step": 10765 }, { "epoch": 0.87, "learning_rate": 3.55094726062468e-05, "loss": 3.0154, "step": 10770 }, { "epoch": 0.87, "learning_rate": 3.550273533295605e-05, "loss": 2.8607, "step": 10775 }, { "epoch": 0.87, "learning_rate": 3.5495998059665294e-05, "loss": 2.953, "step": 10780 }, { "epoch": 0.87, "learning_rate": 3.548926078637454e-05, "loss": 3.1728, "step": 10785 }, { "epoch": 0.87, "learning_rate": 3.5482523513083785e-05, "loss": 2.8006, "step": 10790 }, { "epoch": 0.87, "learning_rate": 3.5475786239793034e-05, "loss": 3.0427, "step": 10795 }, { "epoch": 0.87, "learning_rate": 3.5469048966502276e-05, "loss": 3.0487, "step": 10800 }, { "epoch": 0.87, "learning_rate": 3.5462311693211525e-05, "loss": 3.1628, "step": 10805 }, { "epoch": 0.87, "learning_rate": 3.5455574419920773e-05, "loss": 2.8268, "step": 10810 }, { "epoch": 0.87, "learning_rate": 3.5448837146630015e-05, "loss": 2.9314, "step": 10815 }, { "epoch": 0.87, "learning_rate": 3.5442099873339264e-05, "loss": 2.9674, "step": 10820 }, { "epoch": 0.88, "learning_rate": 3.543536260004851e-05, "loss": 3.027, "step": 10825 }, { "epoch": 0.88, "learning_rate": 3.5428625326757755e-05, "loss": 3.0957, "step": 10830 }, { "epoch": 0.88, "learning_rate": 3.5421888053467e-05, "loss": 2.9217, "step": 10835 }, { "epoch": 0.88, "learning_rate": 3.5415150780176246e-05, "loss": 2.8224, "step": 10840 }, { "epoch": 0.88, "learning_rate": 3.5408413506885495e-05, "loss": 2.9139, "step": 10845 }, { "epoch": 0.88, "learning_rate": 3.5401676233594744e-05, "loss": 2.8247, "step": 10850 }, { "epoch": 0.88, "learning_rate": 3.5394938960303986e-05, "loss": 2.9044, "step": 10855 }, { "epoch": 0.88, "learning_rate": 3.5388201687013235e-05, "loss": 2.8095, "step": 10860 }, { "epoch": 0.88, "learning_rate": 3.5381464413722484e-05, "loss": 3.1797, "step": 10865 }, { "epoch": 0.88, "learning_rate": 3.5374727140431726e-05, "loss": 2.7943, "step": 10870 }, { "epoch": 0.88, "learning_rate": 3.5367989867140975e-05, "loss": 2.8645, "step": 10875 }, { "epoch": 0.88, "learning_rate": 3.536125259385022e-05, "loss": 2.8684, "step": 10880 }, { "epoch": 0.88, "learning_rate": 3.5354515320559466e-05, "loss": 3.0959, "step": 10885 }, { "epoch": 0.88, "learning_rate": 3.534777804726871e-05, "loss": 3.2173, "step": 10890 }, { "epoch": 0.88, "learning_rate": 3.534104077397796e-05, "loss": 2.8572, "step": 10895 }, { "epoch": 0.88, "learning_rate": 3.5334303500687206e-05, "loss": 2.8573, "step": 10900 }, { "epoch": 0.88, "learning_rate": 3.5327566227396455e-05, "loss": 3.1226, "step": 10905 }, { "epoch": 0.88, "learning_rate": 3.53208289541057e-05, "loss": 2.8455, "step": 10910 }, { "epoch": 0.88, "learning_rate": 3.5314091680814946e-05, "loss": 3.0792, "step": 10915 }, { "epoch": 0.88, "learning_rate": 3.530735440752419e-05, "loss": 3.1342, "step": 10920 }, { "epoch": 0.88, "learning_rate": 3.530061713423343e-05, "loss": 2.9695, "step": 10925 }, { "epoch": 0.88, "learning_rate": 3.529387986094268e-05, "loss": 2.9094, "step": 10930 }, { "epoch": 0.88, "learning_rate": 3.528714258765193e-05, "loss": 2.8054, "step": 10935 }, { "epoch": 0.88, "learning_rate": 3.5280405314361176e-05, "loss": 3.2511, "step": 10940 }, { "epoch": 0.88, "learning_rate": 3.527366804107042e-05, "loss": 3.067, "step": 10945 }, { "epoch": 0.89, "learning_rate": 3.526693076777967e-05, "loss": 3.0682, "step": 10950 }, { "epoch": 0.89, "learning_rate": 3.5260193494488916e-05, "loss": 3.2405, "step": 10955 }, { "epoch": 0.89, "learning_rate": 3.525345622119816e-05, "loss": 2.8744, "step": 10960 }, { "epoch": 0.89, "learning_rate": 3.52467189479074e-05, "loss": 2.8158, "step": 10965 }, { "epoch": 0.89, "learning_rate": 3.523998167461665e-05, "loss": 3.1325, "step": 10970 }, { "epoch": 0.89, "learning_rate": 3.52332444013259e-05, "loss": 2.8495, "step": 10975 }, { "epoch": 0.89, "learning_rate": 3.522650712803514e-05, "loss": 2.9472, "step": 10980 }, { "epoch": 0.89, "learning_rate": 3.521976985474439e-05, "loss": 3.1621, "step": 10985 }, { "epoch": 0.89, "learning_rate": 3.521303258145364e-05, "loss": 2.998, "step": 10990 }, { "epoch": 0.89, "learning_rate": 3.520629530816289e-05, "loss": 2.9869, "step": 10995 }, { "epoch": 0.89, "learning_rate": 3.519955803487213e-05, "loss": 3.0895, "step": 11000 }, { "epoch": 0.89, "learning_rate": 3.519282076158137e-05, "loss": 2.8368, "step": 11005 }, { "epoch": 0.89, "learning_rate": 3.518608348829062e-05, "loss": 3.1186, "step": 11010 }, { "epoch": 0.89, "learning_rate": 3.517934621499986e-05, "loss": 2.9144, "step": 11015 }, { "epoch": 0.89, "learning_rate": 3.517260894170911e-05, "loss": 3.1524, "step": 11020 }, { "epoch": 0.89, "learning_rate": 3.516587166841836e-05, "loss": 2.7921, "step": 11025 }, { "epoch": 0.89, "learning_rate": 3.515913439512761e-05, "loss": 2.9172, "step": 11030 }, { "epoch": 0.89, "learning_rate": 3.515239712183685e-05, "loss": 3.1107, "step": 11035 }, { "epoch": 0.89, "learning_rate": 3.51456598485461e-05, "loss": 3.0204, "step": 11040 }, { "epoch": 0.89, "learning_rate": 3.513892257525535e-05, "loss": 2.9076, "step": 11045 }, { "epoch": 0.89, "learning_rate": 3.513218530196459e-05, "loss": 2.746, "step": 11050 }, { "epoch": 0.89, "learning_rate": 3.512544802867383e-05, "loss": 3.1643, "step": 11055 }, { "epoch": 0.89, "learning_rate": 3.511871075538308e-05, "loss": 2.9974, "step": 11060 }, { "epoch": 0.89, "learning_rate": 3.511197348209233e-05, "loss": 2.9475, "step": 11065 }, { "epoch": 0.89, "learning_rate": 3.510523620880157e-05, "loss": 2.8503, "step": 11070 }, { "epoch": 0.9, "learning_rate": 3.509849893551082e-05, "loss": 2.8255, "step": 11075 }, { "epoch": 0.9, "learning_rate": 3.509176166222007e-05, "loss": 3.0081, "step": 11080 }, { "epoch": 0.9, "learning_rate": 3.508502438892932e-05, "loss": 3.0882, "step": 11085 }, { "epoch": 0.9, "learning_rate": 3.507828711563856e-05, "loss": 3.3281, "step": 11090 }, { "epoch": 0.9, "learning_rate": 3.50715498423478e-05, "loss": 2.7693, "step": 11095 }, { "epoch": 0.9, "learning_rate": 3.506481256905705e-05, "loss": 2.9684, "step": 11100 }, { "epoch": 0.9, "learning_rate": 3.50580752957663e-05, "loss": 2.8099, "step": 11105 }, { "epoch": 0.9, "learning_rate": 3.505133802247554e-05, "loss": 2.9284, "step": 11110 }, { "epoch": 0.9, "learning_rate": 3.504460074918479e-05, "loss": 3.0644, "step": 11115 }, { "epoch": 0.9, "learning_rate": 3.503786347589404e-05, "loss": 3.0546, "step": 11120 }, { "epoch": 0.9, "learning_rate": 3.503112620260328e-05, "loss": 3.0909, "step": 11125 }, { "epoch": 0.9, "learning_rate": 3.502438892931253e-05, "loss": 3.0999, "step": 11130 }, { "epoch": 0.9, "learning_rate": 3.5017651656021774e-05, "loss": 2.9832, "step": 11135 }, { "epoch": 0.9, "learning_rate": 3.501091438273102e-05, "loss": 3.2219, "step": 11140 }, { "epoch": 0.9, "learning_rate": 3.5004177109440265e-05, "loss": 2.7767, "step": 11145 }, { "epoch": 0.9, "learning_rate": 3.4997439836149514e-05, "loss": 3.1374, "step": 11150 }, { "epoch": 0.9, "learning_rate": 3.499070256285876e-05, "loss": 2.6789, "step": 11155 }, { "epoch": 0.9, "learning_rate": 3.498396528956801e-05, "loss": 2.7915, "step": 11160 }, { "epoch": 0.9, "learning_rate": 3.4977228016277254e-05, "loss": 3.1062, "step": 11165 }, { "epoch": 0.9, "learning_rate": 3.49704907429865e-05, "loss": 2.7951, "step": 11170 }, { "epoch": 0.9, "learning_rate": 3.496375346969575e-05, "loss": 3.0343, "step": 11175 }, { "epoch": 0.9, "learning_rate": 3.4957016196404993e-05, "loss": 2.9418, "step": 11180 }, { "epoch": 0.9, "learning_rate": 3.4950278923114236e-05, "loss": 2.9457, "step": 11185 }, { "epoch": 0.9, "learning_rate": 3.4943541649823484e-05, "loss": 2.9444, "step": 11190 }, { "epoch": 0.91, "learning_rate": 3.493680437653273e-05, "loss": 2.9274, "step": 11195 }, { "epoch": 0.91, "learning_rate": 3.4930067103241975e-05, "loss": 2.9349, "step": 11200 }, { "epoch": 0.91, "learning_rate": 3.4923329829951224e-05, "loss": 2.8807, "step": 11205 }, { "epoch": 0.91, "learning_rate": 3.491659255666047e-05, "loss": 3.0897, "step": 11210 }, { "epoch": 0.91, "learning_rate": 3.490985528336972e-05, "loss": 3.0386, "step": 11215 }, { "epoch": 0.91, "learning_rate": 3.4903118010078964e-05, "loss": 3.0223, "step": 11220 }, { "epoch": 0.91, "learning_rate": 3.4896380736788206e-05, "loss": 2.9861, "step": 11225 }, { "epoch": 0.91, "learning_rate": 3.4889643463497455e-05, "loss": 3.4111, "step": 11230 }, { "epoch": 0.91, "learning_rate": 3.48829061902067e-05, "loss": 2.8458, "step": 11235 }, { "epoch": 0.91, "learning_rate": 3.4876168916915946e-05, "loss": 3.1452, "step": 11240 }, { "epoch": 0.91, "learning_rate": 3.4869431643625195e-05, "loss": 3.0766, "step": 11245 }, { "epoch": 0.91, "learning_rate": 3.4862694370334444e-05, "loss": 3.0981, "step": 11250 }, { "epoch": 0.91, "learning_rate": 3.4855957097043686e-05, "loss": 2.9809, "step": 11255 }, { "epoch": 0.91, "learning_rate": 3.4849219823752935e-05, "loss": 2.9177, "step": 11260 }, { "epoch": 0.91, "learning_rate": 3.484248255046218e-05, "loss": 2.9045, "step": 11265 }, { "epoch": 0.91, "learning_rate": 3.483574527717142e-05, "loss": 3.1389, "step": 11270 }, { "epoch": 0.91, "learning_rate": 3.482900800388067e-05, "loss": 3.273, "step": 11275 }, { "epoch": 0.91, "learning_rate": 3.482227073058992e-05, "loss": 3.0738, "step": 11280 }, { "epoch": 0.91, "learning_rate": 3.4815533457299166e-05, "loss": 3.0059, "step": 11285 }, { "epoch": 0.91, "learning_rate": 3.480879618400841e-05, "loss": 3.1052, "step": 11290 }, { "epoch": 0.91, "learning_rate": 3.4802058910717657e-05, "loss": 2.9186, "step": 11295 }, { "epoch": 0.91, "learning_rate": 3.4795321637426905e-05, "loss": 3.4463, "step": 11300 }, { "epoch": 0.91, "learning_rate": 3.478858436413615e-05, "loss": 2.9143, "step": 11305 }, { "epoch": 0.91, "learning_rate": 3.4781847090845396e-05, "loss": 3.0278, "step": 11310 }, { "epoch": 0.91, "learning_rate": 3.477510981755464e-05, "loss": 2.7317, "step": 11315 }, { "epoch": 0.92, "learning_rate": 3.476837254426389e-05, "loss": 3.2409, "step": 11320 }, { "epoch": 0.92, "learning_rate": 3.476163527097313e-05, "loss": 2.8277, "step": 11325 }, { "epoch": 0.92, "learning_rate": 3.475489799768238e-05, "loss": 2.7135, "step": 11330 }, { "epoch": 0.92, "learning_rate": 3.474816072439163e-05, "loss": 2.9345, "step": 11335 }, { "epoch": 0.92, "learning_rate": 3.4741423451100876e-05, "loss": 3.0272, "step": 11340 }, { "epoch": 0.92, "learning_rate": 3.473468617781012e-05, "loss": 3.1851, "step": 11345 }, { "epoch": 0.92, "learning_rate": 3.472794890451937e-05, "loss": 3.0905, "step": 11350 }, { "epoch": 0.92, "learning_rate": 3.472121163122861e-05, "loss": 3.0061, "step": 11355 }, { "epoch": 0.92, "learning_rate": 3.471447435793786e-05, "loss": 3.1029, "step": 11360 }, { "epoch": 0.92, "learning_rate": 3.47077370846471e-05, "loss": 3.0591, "step": 11365 }, { "epoch": 0.92, "learning_rate": 3.470099981135635e-05, "loss": 3.0617, "step": 11370 }, { "epoch": 0.92, "learning_rate": 3.46942625380656e-05, "loss": 3.0491, "step": 11375 }, { "epoch": 0.92, "learning_rate": 3.468752526477484e-05, "loss": 3.0714, "step": 11380 }, { "epoch": 0.92, "learning_rate": 3.468078799148409e-05, "loss": 2.9701, "step": 11385 }, { "epoch": 0.92, "learning_rate": 3.467405071819334e-05, "loss": 3.2235, "step": 11390 }, { "epoch": 0.92, "learning_rate": 3.466731344490258e-05, "loss": 2.8136, "step": 11395 }, { "epoch": 0.92, "learning_rate": 3.466057617161182e-05, "loss": 2.9353, "step": 11400 }, { "epoch": 0.92, "learning_rate": 3.465383889832107e-05, "loss": 3.0163, "step": 11405 }, { "epoch": 0.92, "learning_rate": 3.464710162503032e-05, "loss": 2.9738, "step": 11410 }, { "epoch": 0.92, "learning_rate": 3.464036435173957e-05, "loss": 3.0965, "step": 11415 }, { "epoch": 0.92, "learning_rate": 3.463362707844881e-05, "loss": 2.955, "step": 11420 }, { "epoch": 0.92, "learning_rate": 3.462688980515806e-05, "loss": 2.8734, "step": 11425 }, { "epoch": 0.92, "learning_rate": 3.462015253186731e-05, "loss": 3.0841, "step": 11430 }, { "epoch": 0.92, "learning_rate": 3.461341525857655e-05, "loss": 3.2559, "step": 11435 }, { "epoch": 0.92, "learning_rate": 3.460667798528579e-05, "loss": 2.9961, "step": 11440 }, { "epoch": 0.93, "learning_rate": 3.459994071199504e-05, "loss": 3.0601, "step": 11445 }, { "epoch": 0.93, "learning_rate": 3.459320343870429e-05, "loss": 3.4935, "step": 11450 }, { "epoch": 0.93, "learning_rate": 3.458646616541353e-05, "loss": 2.9135, "step": 11455 }, { "epoch": 0.93, "learning_rate": 3.457972889212278e-05, "loss": 2.9466, "step": 11460 }, { "epoch": 0.93, "learning_rate": 3.457299161883203e-05, "loss": 3.0844, "step": 11465 }, { "epoch": 0.93, "learning_rate": 3.456625434554128e-05, "loss": 3.1374, "step": 11470 }, { "epoch": 0.93, "learning_rate": 3.455951707225052e-05, "loss": 2.9251, "step": 11475 }, { "epoch": 0.93, "learning_rate": 3.455277979895977e-05, "loss": 3.0553, "step": 11480 }, { "epoch": 0.93, "learning_rate": 3.454604252566901e-05, "loss": 3.1865, "step": 11485 }, { "epoch": 0.93, "learning_rate": 3.4539305252378254e-05, "loss": 2.8396, "step": 11490 }, { "epoch": 0.93, "learning_rate": 3.4533915433745656e-05, "loss": 3.1356, "step": 11495 }, { "epoch": 0.93, "learning_rate": 3.4527178160454905e-05, "loss": 2.9385, "step": 11500 }, { "epoch": 0.93, "learning_rate": 3.452044088716415e-05, "loss": 2.909, "step": 11505 }, { "epoch": 0.93, "learning_rate": 3.4513703613873396e-05, "loss": 3.1132, "step": 11510 }, { "epoch": 0.93, "learning_rate": 3.4506966340582645e-05, "loss": 2.9435, "step": 11515 }, { "epoch": 0.93, "learning_rate": 3.450157652195003e-05, "loss": 3.6755, "step": 11520 }, { "epoch": 0.93, "learning_rate": 3.449483924865928e-05, "loss": 3.2154, "step": 11525 }, { "epoch": 0.93, "learning_rate": 3.448810197536853e-05, "loss": 3.0608, "step": 11530 }, { "epoch": 0.93, "learning_rate": 3.448136470207778e-05, "loss": 3.012, "step": 11535 }, { "epoch": 0.93, "learning_rate": 3.447462742878702e-05, "loss": 3.1076, "step": 11540 }, { "epoch": 0.93, "learning_rate": 3.446789015549627e-05, "loss": 2.9761, "step": 11545 }, { "epoch": 0.93, "learning_rate": 3.446115288220552e-05, "loss": 2.9443, "step": 11550 }, { "epoch": 0.93, "learning_rate": 3.445441560891476e-05, "loss": 2.9833, "step": 11555 }, { "epoch": 0.93, "learning_rate": 3.4447678335624004e-05, "loss": 2.9272, "step": 11560 }, { "epoch": 0.93, "learning_rate": 3.444094106233325e-05, "loss": 2.7781, "step": 11565 }, { "epoch": 0.94, "learning_rate": 3.44342037890425e-05, "loss": 2.8401, "step": 11570 }, { "epoch": 0.94, "learning_rate": 3.4427466515751743e-05, "loss": 3.3159, "step": 11575 }, { "epoch": 0.94, "learning_rate": 3.442072924246099e-05, "loss": 3.2537, "step": 11580 }, { "epoch": 0.94, "learning_rate": 3.441399196917024e-05, "loss": 2.7323, "step": 11585 }, { "epoch": 0.94, "learning_rate": 3.440725469587949e-05, "loss": 2.8754, "step": 11590 }, { "epoch": 0.94, "learning_rate": 3.440051742258873e-05, "loss": 2.7398, "step": 11595 }, { "epoch": 0.94, "learning_rate": 3.4393780149297974e-05, "loss": 3.2692, "step": 11600 }, { "epoch": 0.94, "learning_rate": 3.438704287600722e-05, "loss": 2.9441, "step": 11605 }, { "epoch": 0.94, "learning_rate": 3.438030560271647e-05, "loss": 3.0601, "step": 11610 }, { "epoch": 0.94, "learning_rate": 3.4373568329425714e-05, "loss": 2.9512, "step": 11615 }, { "epoch": 0.94, "learning_rate": 3.436683105613496e-05, "loss": 3.04, "step": 11620 }, { "epoch": 0.94, "learning_rate": 3.436009378284421e-05, "loss": 2.9893, "step": 11625 }, { "epoch": 0.94, "learning_rate": 3.4353356509553454e-05, "loss": 2.9824, "step": 11630 }, { "epoch": 0.94, "learning_rate": 3.43466192362627e-05, "loss": 2.9835, "step": 11635 }, { "epoch": 0.94, "learning_rate": 3.4339881962971945e-05, "loss": 2.8617, "step": 11640 }, { "epoch": 0.94, "learning_rate": 3.4333144689681194e-05, "loss": 2.8856, "step": 11645 }, { "epoch": 0.94, "learning_rate": 3.4326407416390436e-05, "loss": 3.131, "step": 11650 }, { "epoch": 0.94, "learning_rate": 3.4319670143099685e-05, "loss": 2.846, "step": 11655 }, { "epoch": 0.94, "learning_rate": 3.4312932869808934e-05, "loss": 3.2464, "step": 11660 }, { "epoch": 0.94, "learning_rate": 3.430619559651818e-05, "loss": 3.1764, "step": 11665 }, { "epoch": 0.94, "learning_rate": 3.4299458323227425e-05, "loss": 3.0205, "step": 11670 }, { "epoch": 0.94, "learning_rate": 3.4292721049936674e-05, "loss": 3.0125, "step": 11675 }, { "epoch": 0.94, "learning_rate": 3.428598377664592e-05, "loss": 3.0583, "step": 11680 }, { "epoch": 0.94, "learning_rate": 3.4279246503355164e-05, "loss": 3.2121, "step": 11685 }, { "epoch": 0.95, "learning_rate": 3.4272509230064407e-05, "loss": 3.0483, "step": 11690 }, { "epoch": 0.95, "learning_rate": 3.4265771956773655e-05, "loss": 2.9595, "step": 11695 }, { "epoch": 0.95, "learning_rate": 3.4259034683482904e-05, "loss": 2.8907, "step": 11700 }, { "epoch": 0.95, "learning_rate": 3.4252297410192146e-05, "loss": 3.2868, "step": 11705 }, { "epoch": 0.95, "learning_rate": 3.4245560136901395e-05, "loss": 2.9464, "step": 11710 }, { "epoch": 0.95, "learning_rate": 3.4238822863610644e-05, "loss": 3.4274, "step": 11715 }, { "epoch": 0.95, "learning_rate": 3.423208559031989e-05, "loss": 3.1743, "step": 11720 }, { "epoch": 0.95, "learning_rate": 3.4225348317029135e-05, "loss": 3.2586, "step": 11725 }, { "epoch": 0.95, "learning_rate": 3.421861104373838e-05, "loss": 2.7091, "step": 11730 }, { "epoch": 0.95, "learning_rate": 3.4211873770447626e-05, "loss": 2.7987, "step": 11735 }, { "epoch": 0.95, "learning_rate": 3.420513649715687e-05, "loss": 2.9936, "step": 11740 }, { "epoch": 0.95, "learning_rate": 3.419839922386612e-05, "loss": 3.0006, "step": 11745 }, { "epoch": 0.95, "learning_rate": 3.4191661950575366e-05, "loss": 3.2487, "step": 11750 }, { "epoch": 0.95, "learning_rate": 3.4184924677284615e-05, "loss": 2.9673, "step": 11755 }, { "epoch": 0.95, "learning_rate": 3.417818740399386e-05, "loss": 2.9303, "step": 11760 }, { "epoch": 0.95, "learning_rate": 3.4171450130703106e-05, "loss": 2.7594, "step": 11765 }, { "epoch": 0.95, "learning_rate": 3.416471285741235e-05, "loss": 2.9743, "step": 11770 }, { "epoch": 0.95, "learning_rate": 3.41579755841216e-05, "loss": 2.7895, "step": 11775 }, { "epoch": 0.95, "learning_rate": 3.415123831083084e-05, "loss": 3.0708, "step": 11780 }, { "epoch": 0.95, "learning_rate": 3.414450103754009e-05, "loss": 2.9679, "step": 11785 }, { "epoch": 0.95, "learning_rate": 3.413776376424934e-05, "loss": 2.998, "step": 11790 }, { "epoch": 0.95, "learning_rate": 3.413102649095858e-05, "loss": 2.9087, "step": 11795 }, { "epoch": 0.95, "learning_rate": 3.412428921766783e-05, "loss": 2.9266, "step": 11800 }, { "epoch": 0.95, "learning_rate": 3.4117551944377076e-05, "loss": 3.1942, "step": 11805 }, { "epoch": 0.95, "learning_rate": 3.411081467108632e-05, "loss": 3.1146, "step": 11810 }, { "epoch": 0.96, "learning_rate": 3.410407739779557e-05, "loss": 3.1195, "step": 11815 }, { "epoch": 0.96, "learning_rate": 3.409734012450481e-05, "loss": 2.8045, "step": 11820 }, { "epoch": 0.96, "learning_rate": 3.409060285121406e-05, "loss": 3.0467, "step": 11825 }, { "epoch": 0.96, "learning_rate": 3.40838655779233e-05, "loss": 2.8286, "step": 11830 }, { "epoch": 0.96, "learning_rate": 3.407712830463255e-05, "loss": 3.0586, "step": 11835 }, { "epoch": 0.96, "learning_rate": 3.40703910313418e-05, "loss": 2.9959, "step": 11840 }, { "epoch": 0.96, "learning_rate": 3.406365375805105e-05, "loss": 3.0457, "step": 11845 }, { "epoch": 0.96, "learning_rate": 3.405691648476029e-05, "loss": 2.9982, "step": 11850 }, { "epoch": 0.96, "learning_rate": 3.405017921146954e-05, "loss": 2.8829, "step": 11855 }, { "epoch": 0.96, "learning_rate": 3.404344193817878e-05, "loss": 3.3715, "step": 11860 }, { "epoch": 0.96, "learning_rate": 3.403670466488803e-05, "loss": 3.0459, "step": 11865 }, { "epoch": 0.96, "learning_rate": 3.402996739159727e-05, "loss": 2.8927, "step": 11870 }, { "epoch": 0.96, "learning_rate": 3.402323011830652e-05, "loss": 2.9431, "step": 11875 }, { "epoch": 0.96, "learning_rate": 3.401649284501577e-05, "loss": 2.9148, "step": 11880 }, { "epoch": 0.96, "learning_rate": 3.400975557172501e-05, "loss": 2.9739, "step": 11885 }, { "epoch": 0.96, "learning_rate": 3.400301829843426e-05, "loss": 2.8028, "step": 11890 }, { "epoch": 0.96, "learning_rate": 3.399628102514351e-05, "loss": 2.9279, "step": 11895 }, { "epoch": 0.96, "learning_rate": 3.398954375185275e-05, "loss": 3.0049, "step": 11900 }, { "epoch": 0.96, "learning_rate": 3.398280647856199e-05, "loss": 3.0606, "step": 11905 }, { "epoch": 0.96, "learning_rate": 3.397606920527124e-05, "loss": 2.9626, "step": 11910 }, { "epoch": 0.96, "learning_rate": 3.396933193198049e-05, "loss": 3.2141, "step": 11915 }, { "epoch": 0.96, "learning_rate": 3.396259465868974e-05, "loss": 2.9912, "step": 11920 }, { "epoch": 0.96, "learning_rate": 3.395585738539898e-05, "loss": 2.9204, "step": 11925 }, { "epoch": 0.96, "learning_rate": 3.394912011210823e-05, "loss": 2.7776, "step": 11930 }, { "epoch": 0.96, "learning_rate": 3.394238283881748e-05, "loss": 3.2191, "step": 11935 }, { "epoch": 0.97, "learning_rate": 3.393564556552672e-05, "loss": 2.8625, "step": 11940 }, { "epoch": 0.97, "learning_rate": 3.3928908292235964e-05, "loss": 3.0099, "step": 11945 }, { "epoch": 0.97, "learning_rate": 3.392217101894521e-05, "loss": 3.1315, "step": 11950 }, { "epoch": 0.97, "learning_rate": 3.391543374565446e-05, "loss": 3.0517, "step": 11955 }, { "epoch": 0.97, "learning_rate": 3.3908696472363703e-05, "loss": 3.1481, "step": 11960 }, { "epoch": 0.97, "learning_rate": 3.390195919907295e-05, "loss": 2.9481, "step": 11965 }, { "epoch": 0.97, "learning_rate": 3.38952219257822e-05, "loss": 3.0222, "step": 11970 }, { "epoch": 0.97, "learning_rate": 3.388848465249145e-05, "loss": 3.0445, "step": 11975 }, { "epoch": 0.97, "learning_rate": 3.388174737920069e-05, "loss": 3.1314, "step": 11980 }, { "epoch": 0.97, "learning_rate": 3.387501010590994e-05, "loss": 2.888, "step": 11985 }, { "epoch": 0.97, "learning_rate": 3.386827283261918e-05, "loss": 3.1614, "step": 11990 }, { "epoch": 0.97, "learning_rate": 3.3861535559328425e-05, "loss": 3.1802, "step": 11995 }, { "epoch": 0.97, "learning_rate": 3.3854798286037674e-05, "loss": 3.0269, "step": 12000 }, { "epoch": 0.97, "eval_loss": 2.9837048053741455, "eval_rouge2_fmeasure": 0.0042, "eval_rouge2_precision": 0.01, "eval_rouge2_recall": 0.0033, "eval_runtime": 2865.3591, "eval_samples_per_second": 0.096, "eval_steps_per_second": 0.048, "step": 12000 } ], "max_steps": 37107, "num_train_epochs": 3, "total_flos": 4.68270352171008e+17, "trial_name": null, "trial_params": null }