SISUSIK / trainer_state.json
exontidev's picture
Upload 12 files
9e30eee
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.17394329448599757,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.4999999999999999e-05,
"loss": 5.797,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 4.2e-05,
"loss": 4.6838,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 7.199999999999999e-05,
"loss": 5.6319,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 0.000102,
"loss": 5.0822,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 0.000129,
"loss": 4.2645,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 0.000159,
"loss": 4.4867,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 0.00018899999999999999,
"loss": 4.321,
"step": 70
},
{
"epoch": 0.0,
"learning_rate": 0.00021899999999999998,
"loss": 3.9697,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 0.000249,
"loss": 3.8873,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 0.000279,
"loss": 3.9087,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 0.0002999947786737831,
"loss": 3.7777,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 0.0002999773742530603,
"loss": 4.0537,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 0.00029995996983233736,
"loss": 3.8181,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 0.00029994256541161454,
"loss": 3.7261,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 0.00029992516099089166,
"loss": 4.0132,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 0.0002999077565701688,
"loss": 3.7437,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 0.0002998903521494459,
"loss": 3.5736,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 0.00029987294772872305,
"loss": 3.8578,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 0.00029985554330800023,
"loss": 3.5236,
"step": 190
},
{
"epoch": 0.01,
"learning_rate": 0.00029983813888727736,
"loss": 3.8339,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 0.0002998207344665545,
"loss": 3.8672,
"step": 210
},
{
"epoch": 0.01,
"learning_rate": 0.0002998033300458316,
"loss": 3.4447,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 0.00029978592562510874,
"loss": 3.3767,
"step": 230
},
{
"epoch": 0.01,
"learning_rate": 0.0002997685212043859,
"loss": 3.8753,
"step": 240
},
{
"epoch": 0.01,
"learning_rate": 0.000299751116783663,
"loss": 3.825,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 0.0002997337123629402,
"loss": 3.7691,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 0.0002997163079422173,
"loss": 3.7529,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 0.00029969890352149443,
"loss": 3.7102,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 0.00029968149910077155,
"loss": 3.6104,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 0.0002996640946800487,
"loss": 3.7396,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 0.00029964669025932586,
"loss": 3.5363,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 0.000299629285838603,
"loss": 3.7725,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 0.0002996118814178801,
"loss": 3.728,
"step": 330
},
{
"epoch": 0.02,
"learning_rate": 0.00029959447699715725,
"loss": 3.333,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 0.00029957707257643437,
"loss": 3.7651,
"step": 350
},
{
"epoch": 0.02,
"learning_rate": 0.00029955966815571155,
"loss": 3.6743,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 0.00029954226373498863,
"loss": 3.6236,
"step": 370
},
{
"epoch": 0.02,
"learning_rate": 0.0002995248593142658,
"loss": 3.837,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 0.00029950745489354294,
"loss": 3.8097,
"step": 390
},
{
"epoch": 0.02,
"learning_rate": 0.00029949005047282006,
"loss": 3.886,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 0.0002994726460520972,
"loss": 3.7826,
"step": 410
},
{
"epoch": 0.02,
"learning_rate": 0.0002994552416313743,
"loss": 3.6854,
"step": 420
},
{
"epoch": 0.02,
"learning_rate": 0.0002994378372106515,
"loss": 3.4106,
"step": 430
},
{
"epoch": 0.03,
"learning_rate": 0.00029942043278992857,
"loss": 3.7991,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 0.00029940302836920575,
"loss": 3.5353,
"step": 450
},
{
"epoch": 0.03,
"learning_rate": 0.0002993856239484829,
"loss": 3.7661,
"step": 460
},
{
"epoch": 0.03,
"learning_rate": 0.00029936821952776,
"loss": 3.43,
"step": 470
},
{
"epoch": 0.03,
"learning_rate": 0.00029935081510703714,
"loss": 3.5965,
"step": 480
},
{
"epoch": 0.03,
"learning_rate": 0.00029933341068631426,
"loss": 3.6576,
"step": 490
},
{
"epoch": 0.03,
"learning_rate": 0.00029931600626559144,
"loss": 3.6634,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 0.00029929860184486857,
"loss": 3.5374,
"step": 510
},
{
"epoch": 0.03,
"learning_rate": 0.0002992811974241457,
"loss": 3.6912,
"step": 520
},
{
"epoch": 0.03,
"learning_rate": 0.0002992637930034228,
"loss": 3.6375,
"step": 530
},
{
"epoch": 0.03,
"learning_rate": 0.00029924638858269995,
"loss": 3.3367,
"step": 540
},
{
"epoch": 0.03,
"learning_rate": 0.00029922898416197714,
"loss": 3.7179,
"step": 550
},
{
"epoch": 0.03,
"learning_rate": 0.00029921157974125426,
"loss": 3.5059,
"step": 560
},
{
"epoch": 0.03,
"learning_rate": 0.0002991941753205314,
"loss": 3.2627,
"step": 570
},
{
"epoch": 0.03,
"learning_rate": 0.0002991767708998085,
"loss": 3.5826,
"step": 580
},
{
"epoch": 0.03,
"learning_rate": 0.0002991593664790857,
"loss": 3.5854,
"step": 590
},
{
"epoch": 0.03,
"learning_rate": 0.00029914196205836277,
"loss": 3.6205,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 0.00029912455763763995,
"loss": 3.3627,
"step": 610
},
{
"epoch": 0.04,
"learning_rate": 0.0002991071532169171,
"loss": 3.7299,
"step": 620
},
{
"epoch": 0.04,
"learning_rate": 0.0002990897487961942,
"loss": 3.4119,
"step": 630
},
{
"epoch": 0.04,
"learning_rate": 0.00029907234437547133,
"loss": 3.6321,
"step": 640
},
{
"epoch": 0.04,
"learning_rate": 0.00029905493995474846,
"loss": 3.4665,
"step": 650
},
{
"epoch": 0.04,
"learning_rate": 0.00029903753553402564,
"loss": 3.3959,
"step": 660
},
{
"epoch": 0.04,
"learning_rate": 0.00029902013111330277,
"loss": 3.4348,
"step": 670
},
{
"epoch": 0.04,
"learning_rate": 0.0002990027266925799,
"loss": 3.6478,
"step": 680
},
{
"epoch": 0.04,
"learning_rate": 0.000298985322271857,
"loss": 3.5589,
"step": 690
},
{
"epoch": 0.04,
"learning_rate": 0.00029896791785113415,
"loss": 3.4357,
"step": 700
},
{
"epoch": 0.04,
"learning_rate": 0.00029895051343041133,
"loss": 3.5833,
"step": 710
},
{
"epoch": 0.04,
"learning_rate": 0.0002989331090096884,
"loss": 3.4633,
"step": 720
},
{
"epoch": 0.04,
"learning_rate": 0.0002989157045889656,
"loss": 3.4848,
"step": 730
},
{
"epoch": 0.04,
"learning_rate": 0.0002988983001682427,
"loss": 3.1688,
"step": 740
},
{
"epoch": 0.04,
"learning_rate": 0.00029888089574751984,
"loss": 3.7193,
"step": 750
},
{
"epoch": 0.04,
"learning_rate": 0.00029886349132679697,
"loss": 3.5895,
"step": 760
},
{
"epoch": 0.04,
"learning_rate": 0.0002988460869060741,
"loss": 3.5518,
"step": 770
},
{
"epoch": 0.05,
"learning_rate": 0.0002988286824853513,
"loss": 3.5631,
"step": 780
},
{
"epoch": 0.05,
"learning_rate": 0.0002988112780646284,
"loss": 3.5545,
"step": 790
},
{
"epoch": 0.05,
"learning_rate": 0.00029879387364390553,
"loss": 3.5699,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 0.00029877646922318266,
"loss": 3.5634,
"step": 810
},
{
"epoch": 0.05,
"learning_rate": 0.0002987590648024598,
"loss": 3.607,
"step": 820
},
{
"epoch": 0.05,
"learning_rate": 0.00029874166038173697,
"loss": 3.3944,
"step": 830
},
{
"epoch": 0.05,
"learning_rate": 0.00029872425596101404,
"loss": 3.5578,
"step": 840
},
{
"epoch": 0.05,
"learning_rate": 0.0002987068515402912,
"loss": 3.3392,
"step": 850
},
{
"epoch": 0.05,
"learning_rate": 0.00029868944711956835,
"loss": 3.5491,
"step": 860
},
{
"epoch": 0.05,
"learning_rate": 0.0002986720426988455,
"loss": 3.4634,
"step": 870
},
{
"epoch": 0.05,
"learning_rate": 0.0002986546382781226,
"loss": 3.481,
"step": 880
},
{
"epoch": 0.05,
"learning_rate": 0.00029863723385739973,
"loss": 3.4969,
"step": 890
},
{
"epoch": 0.05,
"learning_rate": 0.0002986198294366769,
"loss": 3.4256,
"step": 900
},
{
"epoch": 0.05,
"learning_rate": 0.00029860242501595404,
"loss": 3.3778,
"step": 910
},
{
"epoch": 0.05,
"learning_rate": 0.00029858502059523117,
"loss": 3.291,
"step": 920
},
{
"epoch": 0.05,
"learning_rate": 0.0002985676161745083,
"loss": 3.5129,
"step": 930
},
{
"epoch": 0.05,
"learning_rate": 0.0002985502117537854,
"loss": 3.5895,
"step": 940
},
{
"epoch": 0.06,
"learning_rate": 0.0002985328073330626,
"loss": 3.4668,
"step": 950
},
{
"epoch": 0.06,
"learning_rate": 0.0002985154029123397,
"loss": 3.4919,
"step": 960
},
{
"epoch": 0.06,
"learning_rate": 0.00029849799849161686,
"loss": 3.5387,
"step": 970
},
{
"epoch": 0.06,
"learning_rate": 0.000298480594070894,
"loss": 3.5237,
"step": 980
},
{
"epoch": 0.06,
"learning_rate": 0.0002984631896501711,
"loss": 3.5108,
"step": 990
},
{
"epoch": 0.06,
"learning_rate": 0.00029844578522944824,
"loss": 3.6527,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 0.00029842838080872537,
"loss": 3.2056,
"step": 1010
},
{
"epoch": 0.06,
"learning_rate": 0.00029841097638800255,
"loss": 3.4018,
"step": 1020
},
{
"epoch": 0.06,
"learning_rate": 0.0002983935719672797,
"loss": 3.4554,
"step": 1030
},
{
"epoch": 0.06,
"learning_rate": 0.0002983761675465568,
"loss": 3.4236,
"step": 1040
},
{
"epoch": 0.06,
"learning_rate": 0.00029835876312583393,
"loss": 3.3244,
"step": 1050
},
{
"epoch": 0.06,
"learning_rate": 0.00029834135870511106,
"loss": 3.4164,
"step": 1060
},
{
"epoch": 0.06,
"learning_rate": 0.00029832395428438824,
"loss": 3.0626,
"step": 1070
},
{
"epoch": 0.06,
"learning_rate": 0.0002983065498636653,
"loss": 3.7244,
"step": 1080
},
{
"epoch": 0.06,
"learning_rate": 0.0002982891454429425,
"loss": 3.4565,
"step": 1090
},
{
"epoch": 0.06,
"learning_rate": 0.0002982717410222196,
"loss": 3.5781,
"step": 1100
},
{
"epoch": 0.06,
"learning_rate": 0.00029825433660149675,
"loss": 3.2126,
"step": 1110
},
{
"epoch": 0.06,
"learning_rate": 0.0002982369321807739,
"loss": 3.6434,
"step": 1120
},
{
"epoch": 0.07,
"learning_rate": 0.000298219527760051,
"loss": 3.4129,
"step": 1130
},
{
"epoch": 0.07,
"learning_rate": 0.0002982021233393282,
"loss": 3.2887,
"step": 1140
},
{
"epoch": 0.07,
"learning_rate": 0.0002981847189186053,
"loss": 3.5407,
"step": 1150
},
{
"epoch": 0.07,
"learning_rate": 0.00029816731449788244,
"loss": 3.5717,
"step": 1160
},
{
"epoch": 0.07,
"learning_rate": 0.00029814991007715957,
"loss": 3.6888,
"step": 1170
},
{
"epoch": 0.07,
"learning_rate": 0.0002981325056564367,
"loss": 3.6167,
"step": 1180
},
{
"epoch": 0.07,
"learning_rate": 0.0002981151012357139,
"loss": 3.3514,
"step": 1190
},
{
"epoch": 0.07,
"learning_rate": 0.00029809769681499095,
"loss": 3.4163,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 0.00029808029239426813,
"loss": 3.5967,
"step": 1210
},
{
"epoch": 0.07,
"learning_rate": 0.00029806288797354526,
"loss": 3.4587,
"step": 1220
},
{
"epoch": 0.07,
"learning_rate": 0.0002980454835528224,
"loss": 3.3907,
"step": 1230
},
{
"epoch": 0.07,
"learning_rate": 0.0002980280791320995,
"loss": 3.6969,
"step": 1240
},
{
"epoch": 0.07,
"learning_rate": 0.00029801067471137664,
"loss": 3.2609,
"step": 1250
},
{
"epoch": 0.07,
"learning_rate": 0.0002979932702906538,
"loss": 3.5595,
"step": 1260
},
{
"epoch": 0.07,
"learning_rate": 0.00029797586586993095,
"loss": 3.3332,
"step": 1270
},
{
"epoch": 0.07,
"learning_rate": 0.0002979584614492081,
"loss": 3.4112,
"step": 1280
},
{
"epoch": 0.07,
"learning_rate": 0.0002979410570284852,
"loss": 3.4097,
"step": 1290
},
{
"epoch": 0.08,
"learning_rate": 0.00029792365260776233,
"loss": 3.619,
"step": 1300
},
{
"epoch": 0.08,
"learning_rate": 0.0002979062481870395,
"loss": 3.2694,
"step": 1310
},
{
"epoch": 0.08,
"learning_rate": 0.0002978888437663166,
"loss": 3.6746,
"step": 1320
},
{
"epoch": 0.08,
"learning_rate": 0.00029787143934559377,
"loss": 3.3979,
"step": 1330
},
{
"epoch": 0.08,
"learning_rate": 0.0002978540349248709,
"loss": 3.5019,
"step": 1340
},
{
"epoch": 0.08,
"learning_rate": 0.000297836630504148,
"loss": 3.2628,
"step": 1350
},
{
"epoch": 0.08,
"learning_rate": 0.00029781922608342515,
"loss": 3.2965,
"step": 1360
},
{
"epoch": 0.08,
"learning_rate": 0.0002978018216627023,
"loss": 3.6118,
"step": 1370
},
{
"epoch": 0.08,
"learning_rate": 0.00029778441724197946,
"loss": 3.5439,
"step": 1380
},
{
"epoch": 0.08,
"learning_rate": 0.00029776701282125653,
"loss": 3.1755,
"step": 1390
},
{
"epoch": 0.08,
"learning_rate": 0.0002977496084005337,
"loss": 3.6912,
"step": 1400
},
{
"epoch": 0.08,
"learning_rate": 0.00029773220397981084,
"loss": 3.2572,
"step": 1410
},
{
"epoch": 0.08,
"learning_rate": 0.00029771479955908797,
"loss": 3.364,
"step": 1420
},
{
"epoch": 0.08,
"learning_rate": 0.0002976973951383651,
"loss": 3.4011,
"step": 1430
},
{
"epoch": 0.08,
"learning_rate": 0.0002976799907176422,
"loss": 3.5433,
"step": 1440
},
{
"epoch": 0.08,
"learning_rate": 0.0002976625862969194,
"loss": 3.4217,
"step": 1450
},
{
"epoch": 0.08,
"learning_rate": 0.00029764518187619653,
"loss": 3.331,
"step": 1460
},
{
"epoch": 0.09,
"learning_rate": 0.00029762777745547366,
"loss": 3.1582,
"step": 1470
},
{
"epoch": 0.09,
"learning_rate": 0.0002976103730347508,
"loss": 3.3783,
"step": 1480
},
{
"epoch": 0.09,
"learning_rate": 0.0002975929686140279,
"loss": 3.5096,
"step": 1490
},
{
"epoch": 0.09,
"learning_rate": 0.0002975755641933051,
"loss": 3.3047,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 0.00029755815977258217,
"loss": 3.3239,
"step": 1510
},
{
"epoch": 0.09,
"learning_rate": 0.00029754075535185935,
"loss": 3.3897,
"step": 1520
},
{
"epoch": 0.09,
"learning_rate": 0.0002975233509311365,
"loss": 2.9298,
"step": 1530
},
{
"epoch": 0.09,
"learning_rate": 0.0002975059465104136,
"loss": 3.2706,
"step": 1540
},
{
"epoch": 0.09,
"learning_rate": 0.00029748854208969073,
"loss": 3.6979,
"step": 1550
},
{
"epoch": 0.09,
"learning_rate": 0.00029747113766896786,
"loss": 3.3929,
"step": 1560
},
{
"epoch": 0.09,
"learning_rate": 0.00029745373324824504,
"loss": 3.3763,
"step": 1570
},
{
"epoch": 0.09,
"learning_rate": 0.00029743632882752217,
"loss": 3.4914,
"step": 1580
},
{
"epoch": 0.09,
"learning_rate": 0.0002974189244067993,
"loss": 3.4507,
"step": 1590
},
{
"epoch": 0.09,
"learning_rate": 0.0002974015199860764,
"loss": 3.4506,
"step": 1600
},
{
"epoch": 0.09,
"learning_rate": 0.00029738411556535355,
"loss": 3.2699,
"step": 1610
},
{
"epoch": 0.09,
"learning_rate": 0.00029736671114463073,
"loss": 3.5554,
"step": 1620
},
{
"epoch": 0.09,
"learning_rate": 0.00029734930672390786,
"loss": 3.677,
"step": 1630
},
{
"epoch": 0.1,
"learning_rate": 0.000297331902303185,
"loss": 3.3466,
"step": 1640
},
{
"epoch": 0.1,
"learning_rate": 0.0002973144978824621,
"loss": 3.4125,
"step": 1650
},
{
"epoch": 0.1,
"learning_rate": 0.00029729709346173924,
"loss": 3.4123,
"step": 1660
},
{
"epoch": 0.1,
"learning_rate": 0.00029727968904101637,
"loss": 3.1869,
"step": 1670
},
{
"epoch": 0.1,
"learning_rate": 0.00029726228462029355,
"loss": 3.5066,
"step": 1680
},
{
"epoch": 0.1,
"learning_rate": 0.0002972448801995707,
"loss": 3.68,
"step": 1690
},
{
"epoch": 0.1,
"learning_rate": 0.0002972274757788478,
"loss": 3.3881,
"step": 1700
},
{
"epoch": 0.1,
"learning_rate": 0.00029721007135812493,
"loss": 3.5452,
"step": 1710
},
{
"epoch": 0.1,
"learning_rate": 0.00029719266693740206,
"loss": 3.4605,
"step": 1720
},
{
"epoch": 0.1,
"learning_rate": 0.00029717526251667924,
"loss": 3.5601,
"step": 1730
},
{
"epoch": 0.1,
"learning_rate": 0.00029715785809595637,
"loss": 3.3588,
"step": 1740
},
{
"epoch": 0.1,
"learning_rate": 0.0002971404536752335,
"loss": 3.3235,
"step": 1750
},
{
"epoch": 0.1,
"learning_rate": 0.0002971230492545106,
"loss": 3.6218,
"step": 1760
},
{
"epoch": 0.1,
"learning_rate": 0.00029710564483378775,
"loss": 3.4837,
"step": 1770
},
{
"epoch": 0.1,
"learning_rate": 0.00029708824041306493,
"loss": 3.402,
"step": 1780
},
{
"epoch": 0.1,
"learning_rate": 0.000297070835992342,
"loss": 3.2912,
"step": 1790
},
{
"epoch": 0.1,
"learning_rate": 0.0002970534315716192,
"loss": 3.704,
"step": 1800
},
{
"epoch": 0.1,
"learning_rate": 0.0002970360271508963,
"loss": 3.6123,
"step": 1810
},
{
"epoch": 0.11,
"learning_rate": 0.00029701862273017344,
"loss": 3.3626,
"step": 1820
},
{
"epoch": 0.11,
"learning_rate": 0.00029700121830945057,
"loss": 3.2765,
"step": 1830
},
{
"epoch": 0.11,
"learning_rate": 0.0002969838138887277,
"loss": 3.3083,
"step": 1840
},
{
"epoch": 0.11,
"learning_rate": 0.0002969664094680049,
"loss": 3.8005,
"step": 1850
},
{
"epoch": 0.11,
"learning_rate": 0.000296949005047282,
"loss": 3.6119,
"step": 1860
},
{
"epoch": 0.11,
"learning_rate": 0.00029693160062655913,
"loss": 3.2547,
"step": 1870
},
{
"epoch": 0.11,
"learning_rate": 0.00029691419620583626,
"loss": 3.4915,
"step": 1880
},
{
"epoch": 0.11,
"learning_rate": 0.0002968967917851134,
"loss": 3.4441,
"step": 1890
},
{
"epoch": 0.11,
"learning_rate": 0.00029687938736439057,
"loss": 3.6537,
"step": 1900
},
{
"epoch": 0.11,
"learning_rate": 0.00029686198294366764,
"loss": 3.2625,
"step": 1910
},
{
"epoch": 0.11,
"learning_rate": 0.0002968445785229448,
"loss": 3.411,
"step": 1920
},
{
"epoch": 0.11,
"learning_rate": 0.00029682717410222195,
"loss": 2.997,
"step": 1930
},
{
"epoch": 0.11,
"learning_rate": 0.0002968097696814991,
"loss": 3.3158,
"step": 1940
},
{
"epoch": 0.11,
"learning_rate": 0.0002967923652607762,
"loss": 3.0668,
"step": 1950
},
{
"epoch": 0.11,
"learning_rate": 0.00029677496084005333,
"loss": 3.3956,
"step": 1960
},
{
"epoch": 0.11,
"learning_rate": 0.0002967575564193305,
"loss": 3.5991,
"step": 1970
},
{
"epoch": 0.11,
"learning_rate": 0.00029674015199860764,
"loss": 3.5781,
"step": 1980
},
{
"epoch": 0.12,
"learning_rate": 0.00029672274757788477,
"loss": 3.5981,
"step": 1990
},
{
"epoch": 0.12,
"learning_rate": 0.0002967053431571619,
"loss": 3.6671,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 0.000296687938736439,
"loss": 2.9026,
"step": 2010
},
{
"epoch": 0.12,
"learning_rate": 0.0002966705343157162,
"loss": 3.2852,
"step": 2020
},
{
"epoch": 0.12,
"learning_rate": 0.0002966531298949933,
"loss": 3.439,
"step": 2030
},
{
"epoch": 0.12,
"learning_rate": 0.00029663572547427046,
"loss": 3.3326,
"step": 2040
},
{
"epoch": 0.12,
"learning_rate": 0.0002966183210535476,
"loss": 3.3541,
"step": 2050
},
{
"epoch": 0.12,
"learning_rate": 0.0002966009166328247,
"loss": 3.3194,
"step": 2060
},
{
"epoch": 0.12,
"learning_rate": 0.00029658351221210184,
"loss": 3.5999,
"step": 2070
},
{
"epoch": 0.12,
"learning_rate": 0.00029656610779137896,
"loss": 3.1402,
"step": 2080
},
{
"epoch": 0.12,
"learning_rate": 0.00029654870337065615,
"loss": 3.0779,
"step": 2090
},
{
"epoch": 0.12,
"learning_rate": 0.0002965312989499333,
"loss": 3.4571,
"step": 2100
},
{
"epoch": 0.12,
"learning_rate": 0.0002965138945292104,
"loss": 3.2815,
"step": 2110
},
{
"epoch": 0.12,
"learning_rate": 0.00029649649010848753,
"loss": 3.2003,
"step": 2120
},
{
"epoch": 0.12,
"learning_rate": 0.00029647908568776466,
"loss": 3.5262,
"step": 2130
},
{
"epoch": 0.12,
"learning_rate": 0.00029646168126704184,
"loss": 3.1835,
"step": 2140
},
{
"epoch": 0.12,
"learning_rate": 0.0002964442768463189,
"loss": 3.0947,
"step": 2150
},
{
"epoch": 0.13,
"learning_rate": 0.0002964268724255961,
"loss": 3.5398,
"step": 2160
},
{
"epoch": 0.13,
"learning_rate": 0.0002964094680048732,
"loss": 3.319,
"step": 2170
},
{
"epoch": 0.13,
"learning_rate": 0.00029639206358415035,
"loss": 3.3722,
"step": 2180
},
{
"epoch": 0.13,
"learning_rate": 0.0002963746591634275,
"loss": 3.3396,
"step": 2190
},
{
"epoch": 0.13,
"learning_rate": 0.0002963572547427046,
"loss": 3.2734,
"step": 2200
},
{
"epoch": 0.13,
"learning_rate": 0.0002963398503219818,
"loss": 3.3672,
"step": 2210
},
{
"epoch": 0.13,
"learning_rate": 0.0002963224459012589,
"loss": 3.3559,
"step": 2220
},
{
"epoch": 0.13,
"learning_rate": 0.00029630504148053604,
"loss": 3.3601,
"step": 2230
},
{
"epoch": 0.13,
"learning_rate": 0.00029628763705981316,
"loss": 3.6175,
"step": 2240
},
{
"epoch": 0.13,
"learning_rate": 0.0002962702326390903,
"loss": 3.2681,
"step": 2250
},
{
"epoch": 0.13,
"learning_rate": 0.00029625282821836747,
"loss": 3.3181,
"step": 2260
},
{
"epoch": 0.13,
"learning_rate": 0.00029623542379764455,
"loss": 3.5367,
"step": 2270
},
{
"epoch": 0.13,
"learning_rate": 0.00029621801937692173,
"loss": 3.2207,
"step": 2280
},
{
"epoch": 0.13,
"learning_rate": 0.00029620061495619885,
"loss": 3.2358,
"step": 2290
},
{
"epoch": 0.13,
"learning_rate": 0.000296183210535476,
"loss": 3.4455,
"step": 2300
},
{
"epoch": 0.13,
"learning_rate": 0.0002961658061147531,
"loss": 3.4017,
"step": 2310
},
{
"epoch": 0.13,
"learning_rate": 0.00029614840169403024,
"loss": 3.4288,
"step": 2320
},
{
"epoch": 0.14,
"learning_rate": 0.0002961309972733074,
"loss": 3.3411,
"step": 2330
},
{
"epoch": 0.14,
"learning_rate": 0.00029611359285258455,
"loss": 3.1286,
"step": 2340
},
{
"epoch": 0.14,
"learning_rate": 0.00029609618843186167,
"loss": 2.9427,
"step": 2350
},
{
"epoch": 0.14,
"learning_rate": 0.0002960787840111388,
"loss": 3.4088,
"step": 2360
},
{
"epoch": 0.14,
"learning_rate": 0.0002960613795904159,
"loss": 3.3628,
"step": 2370
},
{
"epoch": 0.14,
"learning_rate": 0.00029604397516969305,
"loss": 3.4993,
"step": 2380
},
{
"epoch": 0.14,
"learning_rate": 0.0002960265707489702,
"loss": 3.6364,
"step": 2390
},
{
"epoch": 0.14,
"learning_rate": 0.00029600916632824736,
"loss": 3.482,
"step": 2400
},
{
"epoch": 0.14,
"learning_rate": 0.0002959917619075245,
"loss": 3.4856,
"step": 2410
},
{
"epoch": 0.14,
"learning_rate": 0.0002959743574868016,
"loss": 3.4327,
"step": 2420
},
{
"epoch": 0.14,
"learning_rate": 0.00029595695306607874,
"loss": 3.4458,
"step": 2430
},
{
"epoch": 0.14,
"learning_rate": 0.00029593954864535587,
"loss": 3.3191,
"step": 2440
},
{
"epoch": 0.14,
"learning_rate": 0.00029592214422463305,
"loss": 3.4966,
"step": 2450
},
{
"epoch": 0.14,
"learning_rate": 0.0002959047398039101,
"loss": 3.1979,
"step": 2460
},
{
"epoch": 0.14,
"learning_rate": 0.0002958873353831873,
"loss": 3.3932,
"step": 2470
},
{
"epoch": 0.14,
"learning_rate": 0.00029586993096246444,
"loss": 3.1029,
"step": 2480
},
{
"epoch": 0.14,
"learning_rate": 0.00029585252654174156,
"loss": 3.2549,
"step": 2490
},
{
"epoch": 0.14,
"learning_rate": 0.0002958351221210187,
"loss": 3.3986,
"step": 2500
},
{
"epoch": 0.15,
"learning_rate": 0.0002958177177002958,
"loss": 3.22,
"step": 2510
},
{
"epoch": 0.15,
"learning_rate": 0.000295800313279573,
"loss": 3.0488,
"step": 2520
},
{
"epoch": 0.15,
"learning_rate": 0.0002957829088588501,
"loss": 3.5085,
"step": 2530
},
{
"epoch": 0.15,
"learning_rate": 0.00029576550443812725,
"loss": 3.2679,
"step": 2540
},
{
"epoch": 0.15,
"learning_rate": 0.0002957481000174044,
"loss": 3.4828,
"step": 2550
},
{
"epoch": 0.15,
"learning_rate": 0.0002957306955966815,
"loss": 3.4081,
"step": 2560
},
{
"epoch": 0.15,
"learning_rate": 0.0002957132911759587,
"loss": 3.3543,
"step": 2570
},
{
"epoch": 0.15,
"learning_rate": 0.00029569588675523576,
"loss": 3.6927,
"step": 2580
},
{
"epoch": 0.15,
"learning_rate": 0.00029567848233451294,
"loss": 3.215,
"step": 2590
},
{
"epoch": 0.15,
"learning_rate": 0.00029566107791379007,
"loss": 3.502,
"step": 2600
},
{
"epoch": 0.15,
"learning_rate": 0.0002956436734930672,
"loss": 3.4641,
"step": 2610
},
{
"epoch": 0.15,
"learning_rate": 0.0002956262690723443,
"loss": 3.2631,
"step": 2620
},
{
"epoch": 0.15,
"learning_rate": 0.00029560886465162145,
"loss": 3.3747,
"step": 2630
},
{
"epoch": 0.15,
"learning_rate": 0.00029559146023089863,
"loss": 3.3669,
"step": 2640
},
{
"epoch": 0.15,
"learning_rate": 0.00029557405581017576,
"loss": 2.8722,
"step": 2650
},
{
"epoch": 0.15,
"learning_rate": 0.0002955566513894529,
"loss": 3.4885,
"step": 2660
},
{
"epoch": 0.15,
"learning_rate": 0.00029553924696873,
"loss": 3.5238,
"step": 2670
},
{
"epoch": 0.16,
"learning_rate": 0.00029552184254800714,
"loss": 3.2318,
"step": 2680
},
{
"epoch": 0.16,
"learning_rate": 0.0002955044381272843,
"loss": 3.4054,
"step": 2690
},
{
"epoch": 0.16,
"learning_rate": 0.00029548703370656145,
"loss": 3.2475,
"step": 2700
},
{
"epoch": 0.16,
"learning_rate": 0.0002954696292858386,
"loss": 3.3495,
"step": 2710
},
{
"epoch": 0.16,
"learning_rate": 0.0002954522248651157,
"loss": 3.5605,
"step": 2720
},
{
"epoch": 0.16,
"learning_rate": 0.00029543482044439283,
"loss": 3.0016,
"step": 2730
},
{
"epoch": 0.16,
"learning_rate": 0.00029541741602366996,
"loss": 3.1471,
"step": 2740
},
{
"epoch": 0.16,
"learning_rate": 0.00029540001160294714,
"loss": 3.5481,
"step": 2750
},
{
"epoch": 0.16,
"learning_rate": 0.00029538260718222427,
"loss": 3.4854,
"step": 2760
},
{
"epoch": 0.16,
"learning_rate": 0.0002953652027615014,
"loss": 3.535,
"step": 2770
},
{
"epoch": 0.16,
"learning_rate": 0.0002953477983407785,
"loss": 3.2613,
"step": 2780
},
{
"epoch": 0.16,
"learning_rate": 0.00029533039392005565,
"loss": 3.3677,
"step": 2790
},
{
"epoch": 0.16,
"learning_rate": 0.00029531298949933283,
"loss": 3.2767,
"step": 2800
},
{
"epoch": 0.16,
"learning_rate": 0.00029529558507860996,
"loss": 3.4752,
"step": 2810
},
{
"epoch": 0.16,
"learning_rate": 0.0002952781806578871,
"loss": 3.6749,
"step": 2820
},
{
"epoch": 0.16,
"learning_rate": 0.0002952607762371642,
"loss": 3.2192,
"step": 2830
},
{
"epoch": 0.16,
"learning_rate": 0.00029524337181644134,
"loss": 3.2643,
"step": 2840
},
{
"epoch": 0.17,
"learning_rate": 0.0002952259673957185,
"loss": 3.5323,
"step": 2850
},
{
"epoch": 0.17,
"learning_rate": 0.0002952085629749956,
"loss": 3.1769,
"step": 2860
},
{
"epoch": 0.17,
"learning_rate": 0.0002951911585542728,
"loss": 3.4551,
"step": 2870
},
{
"epoch": 0.17,
"learning_rate": 0.0002951737541335499,
"loss": 3.387,
"step": 2880
},
{
"epoch": 0.17,
"learning_rate": 0.00029515634971282703,
"loss": 3.2243,
"step": 2890
},
{
"epoch": 0.17,
"learning_rate": 0.00029513894529210416,
"loss": 2.9179,
"step": 2900
},
{
"epoch": 0.17,
"learning_rate": 0.0002951215408713813,
"loss": 3.2905,
"step": 2910
},
{
"epoch": 0.17,
"learning_rate": 0.00029510413645065847,
"loss": 3.262,
"step": 2920
},
{
"epoch": 0.17,
"learning_rate": 0.0002950867320299356,
"loss": 3.2404,
"step": 2930
},
{
"epoch": 0.17,
"learning_rate": 0.0002950693276092127,
"loss": 3.577,
"step": 2940
},
{
"epoch": 0.17,
"learning_rate": 0.00029505192318848985,
"loss": 3.5627,
"step": 2950
},
{
"epoch": 0.17,
"learning_rate": 0.000295034518767767,
"loss": 3.5328,
"step": 2960
},
{
"epoch": 0.17,
"learning_rate": 0.00029501711434704416,
"loss": 3.5149,
"step": 2970
},
{
"epoch": 0.17,
"learning_rate": 0.00029499970992632123,
"loss": 3.2421,
"step": 2980
},
{
"epoch": 0.17,
"learning_rate": 0.0002949823055055984,
"loss": 3.0618,
"step": 2990
},
{
"epoch": 0.17,
"learning_rate": 0.00029496490108487554,
"loss": 3.2825,
"step": 3000
}
],
"max_steps": 172470,
"num_train_epochs": 10,
"total_flos": 6.54510518697984e+16,
"trial_name": null,
"trial_params": null
}