DSDescription / trainer_state.json
ThornRugal's picture
initialize model for Dark Souls Item Description in Chinese
61e5666 verified
raw
history blame
24.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.4140012070006036,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 53.90464782714844,
"learning_rate": 9.946323134728933e-06,
"loss": 5.2914,
"step": 10
},
{
"epoch": 0.03,
"grad_norm": 53.05311965942383,
"learning_rate": 9.892646269457864e-06,
"loss": 4.9539,
"step": 20
},
{
"epoch": 0.05,
"grad_norm": 64.5956802368164,
"learning_rate": 9.838969404186796e-06,
"loss": 4.7374,
"step": 30
},
{
"epoch": 0.06,
"grad_norm": 54.43781280517578,
"learning_rate": 9.785292538915728e-06,
"loss": 4.5988,
"step": 40
},
{
"epoch": 0.08,
"grad_norm": 48.784854888916016,
"learning_rate": 9.73161567364466e-06,
"loss": 4.4899,
"step": 50
},
{
"epoch": 0.1,
"grad_norm": 56.800575256347656,
"learning_rate": 9.677938808373591e-06,
"loss": 4.4391,
"step": 60
},
{
"epoch": 0.11,
"grad_norm": 51.58082962036133,
"learning_rate": 9.624261943102525e-06,
"loss": 4.3347,
"step": 70
},
{
"epoch": 0.13,
"grad_norm": 50.625308990478516,
"learning_rate": 9.570585077831455e-06,
"loss": 4.3145,
"step": 80
},
{
"epoch": 0.14,
"grad_norm": 46.56022262573242,
"learning_rate": 9.516908212560388e-06,
"loss": 4.1742,
"step": 90
},
{
"epoch": 0.16,
"grad_norm": 51.267852783203125,
"learning_rate": 9.463231347289318e-06,
"loss": 4.1308,
"step": 100
},
{
"epoch": 0.18,
"grad_norm": 52.627098083496094,
"learning_rate": 9.40955448201825e-06,
"loss": 4.1251,
"step": 110
},
{
"epoch": 0.19,
"grad_norm": 51.49211120605469,
"learning_rate": 9.355877616747183e-06,
"loss": 4.073,
"step": 120
},
{
"epoch": 0.21,
"grad_norm": 50.88691329956055,
"learning_rate": 9.302200751476115e-06,
"loss": 3.9998,
"step": 130
},
{
"epoch": 0.23,
"grad_norm": 45.862796783447266,
"learning_rate": 9.248523886205046e-06,
"loss": 3.8666,
"step": 140
},
{
"epoch": 0.24,
"grad_norm": 51.27406692504883,
"learning_rate": 9.194847020933978e-06,
"loss": 3.9477,
"step": 150
},
{
"epoch": 0.26,
"grad_norm": 47.50687026977539,
"learning_rate": 9.14117015566291e-06,
"loss": 3.7899,
"step": 160
},
{
"epoch": 0.27,
"grad_norm": 48.58837127685547,
"learning_rate": 9.087493290391842e-06,
"loss": 3.7295,
"step": 170
},
{
"epoch": 0.29,
"grad_norm": 48.3990592956543,
"learning_rate": 9.033816425120775e-06,
"loss": 3.7023,
"step": 180
},
{
"epoch": 0.31,
"grad_norm": 46.30027770996094,
"learning_rate": 8.980139559849705e-06,
"loss": 3.6479,
"step": 190
},
{
"epoch": 0.32,
"grad_norm": 47.15484619140625,
"learning_rate": 8.926462694578637e-06,
"loss": 3.6161,
"step": 200
},
{
"epoch": 0.34,
"grad_norm": 49.22386932373047,
"learning_rate": 8.87278582930757e-06,
"loss": 3.5503,
"step": 210
},
{
"epoch": 0.35,
"grad_norm": 45.485557556152344,
"learning_rate": 8.819108964036502e-06,
"loss": 3.4459,
"step": 220
},
{
"epoch": 0.37,
"grad_norm": 50.84394454956055,
"learning_rate": 8.765432098765432e-06,
"loss": 3.4918,
"step": 230
},
{
"epoch": 0.39,
"grad_norm": 43.17815399169922,
"learning_rate": 8.711755233494365e-06,
"loss": 3.3527,
"step": 240
},
{
"epoch": 0.4,
"grad_norm": 41.90092849731445,
"learning_rate": 8.658078368223295e-06,
"loss": 3.347,
"step": 250
},
{
"epoch": 0.42,
"grad_norm": 43.23625564575195,
"learning_rate": 8.60440150295223e-06,
"loss": 3.3433,
"step": 260
},
{
"epoch": 0.43,
"grad_norm": 44.05680847167969,
"learning_rate": 8.55072463768116e-06,
"loss": 3.2831,
"step": 270
},
{
"epoch": 0.45,
"grad_norm": 46.9146842956543,
"learning_rate": 8.497047772410092e-06,
"loss": 3.2932,
"step": 280
},
{
"epoch": 0.47,
"grad_norm": 48.542701721191406,
"learning_rate": 8.443370907139024e-06,
"loss": 3.2108,
"step": 290
},
{
"epoch": 0.48,
"grad_norm": 39.1197509765625,
"learning_rate": 8.389694041867955e-06,
"loss": 3.1704,
"step": 300
},
{
"epoch": 0.5,
"grad_norm": 45.300697326660156,
"learning_rate": 8.336017176596887e-06,
"loss": 3.2239,
"step": 310
},
{
"epoch": 0.51,
"grad_norm": 47.439823150634766,
"learning_rate": 8.28234031132582e-06,
"loss": 3.1709,
"step": 320
},
{
"epoch": 0.53,
"grad_norm": 42.67869567871094,
"learning_rate": 8.228663446054752e-06,
"loss": 3.0708,
"step": 330
},
{
"epoch": 0.55,
"grad_norm": 46.0591926574707,
"learning_rate": 8.174986580783682e-06,
"loss": 3.0959,
"step": 340
},
{
"epoch": 0.56,
"grad_norm": 42.061805725097656,
"learning_rate": 8.121309715512614e-06,
"loss": 3.0442,
"step": 350
},
{
"epoch": 0.58,
"grad_norm": 44.487159729003906,
"learning_rate": 8.067632850241547e-06,
"loss": 2.8881,
"step": 360
},
{
"epoch": 0.6,
"grad_norm": 44.4419059753418,
"learning_rate": 8.013955984970479e-06,
"loss": 2.9994,
"step": 370
},
{
"epoch": 0.61,
"grad_norm": 40.00767135620117,
"learning_rate": 7.96027911969941e-06,
"loss": 2.8931,
"step": 380
},
{
"epoch": 0.63,
"grad_norm": 44.14014434814453,
"learning_rate": 7.906602254428342e-06,
"loss": 2.9288,
"step": 390
},
{
"epoch": 0.64,
"grad_norm": 42.418888092041016,
"learning_rate": 7.852925389157274e-06,
"loss": 2.7776,
"step": 400
},
{
"epoch": 0.66,
"grad_norm": 49.98628234863281,
"learning_rate": 7.799248523886206e-06,
"loss": 2.8408,
"step": 410
},
{
"epoch": 0.68,
"grad_norm": 39.240352630615234,
"learning_rate": 7.745571658615137e-06,
"loss": 2.7995,
"step": 420
},
{
"epoch": 0.69,
"grad_norm": 43.185569763183594,
"learning_rate": 7.691894793344069e-06,
"loss": 2.7486,
"step": 430
},
{
"epoch": 0.71,
"grad_norm": 37.41790008544922,
"learning_rate": 7.638217928073001e-06,
"loss": 2.7459,
"step": 440
},
{
"epoch": 0.72,
"grad_norm": 38.520973205566406,
"learning_rate": 7.584541062801934e-06,
"loss": 2.6785,
"step": 450
},
{
"epoch": 0.74,
"grad_norm": 42.01523971557617,
"learning_rate": 7.530864197530865e-06,
"loss": 2.7745,
"step": 460
},
{
"epoch": 0.76,
"grad_norm": 40.55296325683594,
"learning_rate": 7.477187332259796e-06,
"loss": 2.7046,
"step": 470
},
{
"epoch": 0.77,
"grad_norm": 38.31270217895508,
"learning_rate": 7.423510466988728e-06,
"loss": 2.6057,
"step": 480
},
{
"epoch": 0.79,
"grad_norm": 37.31733322143555,
"learning_rate": 7.369833601717661e-06,
"loss": 2.5229,
"step": 490
},
{
"epoch": 0.8,
"grad_norm": 37.55893325805664,
"learning_rate": 7.316156736446592e-06,
"loss": 2.5846,
"step": 500
},
{
"epoch": 0.82,
"grad_norm": 40.64212417602539,
"learning_rate": 7.262479871175524e-06,
"loss": 2.4992,
"step": 510
},
{
"epoch": 0.84,
"grad_norm": 41.34495544433594,
"learning_rate": 7.208803005904456e-06,
"loss": 2.5575,
"step": 520
},
{
"epoch": 0.85,
"grad_norm": 38.53882598876953,
"learning_rate": 7.155126140633387e-06,
"loss": 2.5468,
"step": 530
},
{
"epoch": 0.87,
"grad_norm": 39.23023986816406,
"learning_rate": 7.10144927536232e-06,
"loss": 2.5009,
"step": 540
},
{
"epoch": 0.89,
"grad_norm": 35.68672180175781,
"learning_rate": 7.047772410091251e-06,
"loss": 2.2929,
"step": 550
},
{
"epoch": 0.9,
"grad_norm": 38.0786247253418,
"learning_rate": 6.994095544820183e-06,
"loss": 2.3178,
"step": 560
},
{
"epoch": 0.92,
"grad_norm": 42.21394348144531,
"learning_rate": 6.940418679549115e-06,
"loss": 2.4645,
"step": 570
},
{
"epoch": 0.93,
"grad_norm": 37.197696685791016,
"learning_rate": 6.886741814278046e-06,
"loss": 2.4797,
"step": 580
},
{
"epoch": 0.95,
"grad_norm": 40.341514587402344,
"learning_rate": 6.833064949006979e-06,
"loss": 2.3048,
"step": 590
},
{
"epoch": 0.97,
"grad_norm": 36.51962661743164,
"learning_rate": 6.779388083735911e-06,
"loss": 2.4652,
"step": 600
},
{
"epoch": 0.98,
"grad_norm": 35.32337951660156,
"learning_rate": 6.725711218464842e-06,
"loss": 2.2979,
"step": 610
},
{
"epoch": 1.0,
"grad_norm": 40.75404739379883,
"learning_rate": 6.672034353193773e-06,
"loss": 2.354,
"step": 620
},
{
"epoch": 1.01,
"grad_norm": 30.498510360717773,
"learning_rate": 6.6183574879227065e-06,
"loss": 1.8592,
"step": 630
},
{
"epoch": 1.03,
"grad_norm": 31.13385772705078,
"learning_rate": 6.564680622651638e-06,
"loss": 1.7776,
"step": 640
},
{
"epoch": 1.05,
"grad_norm": 34.9401969909668,
"learning_rate": 6.511003757380569e-06,
"loss": 1.9329,
"step": 650
},
{
"epoch": 1.06,
"grad_norm": 32.90480041503906,
"learning_rate": 6.457326892109501e-06,
"loss": 1.8312,
"step": 660
},
{
"epoch": 1.08,
"grad_norm": 32.94902420043945,
"learning_rate": 6.403650026838433e-06,
"loss": 1.8556,
"step": 670
},
{
"epoch": 1.09,
"grad_norm": 32.02881622314453,
"learning_rate": 6.349973161567365e-06,
"loss": 1.8142,
"step": 680
},
{
"epoch": 1.11,
"grad_norm": 32.752323150634766,
"learning_rate": 6.296296296296297e-06,
"loss": 1.7429,
"step": 690
},
{
"epoch": 1.13,
"grad_norm": 31.938289642333984,
"learning_rate": 6.242619431025229e-06,
"loss": 1.821,
"step": 700
},
{
"epoch": 1.14,
"grad_norm": 32.64255142211914,
"learning_rate": 6.18894256575416e-06,
"loss": 1.7492,
"step": 710
},
{
"epoch": 1.16,
"grad_norm": 30.172483444213867,
"learning_rate": 6.135265700483092e-06,
"loss": 1.7661,
"step": 720
},
{
"epoch": 1.17,
"grad_norm": 32.1895637512207,
"learning_rate": 6.081588835212025e-06,
"loss": 1.6979,
"step": 730
},
{
"epoch": 1.19,
"grad_norm": 32.555870056152344,
"learning_rate": 6.027911969940956e-06,
"loss": 1.778,
"step": 740
},
{
"epoch": 1.21,
"grad_norm": 31.702539443969727,
"learning_rate": 5.974235104669888e-06,
"loss": 1.7263,
"step": 750
},
{
"epoch": 1.22,
"grad_norm": 32.07310104370117,
"learning_rate": 5.920558239398819e-06,
"loss": 1.5491,
"step": 760
},
{
"epoch": 1.24,
"grad_norm": 31.130224227905273,
"learning_rate": 5.866881374127752e-06,
"loss": 1.6249,
"step": 770
},
{
"epoch": 1.26,
"grad_norm": 39.838436126708984,
"learning_rate": 5.8132045088566835e-06,
"loss": 1.6721,
"step": 780
},
{
"epoch": 1.27,
"grad_norm": 33.75567626953125,
"learning_rate": 5.759527643585615e-06,
"loss": 1.6779,
"step": 790
},
{
"epoch": 1.29,
"grad_norm": 31.251935958862305,
"learning_rate": 5.705850778314546e-06,
"loss": 1.6401,
"step": 800
},
{
"epoch": 1.3,
"grad_norm": 31.644649505615234,
"learning_rate": 5.652173913043479e-06,
"loss": 1.6647,
"step": 810
},
{
"epoch": 1.32,
"grad_norm": 30.14424705505371,
"learning_rate": 5.598497047772411e-06,
"loss": 1.6887,
"step": 820
},
{
"epoch": 1.34,
"grad_norm": 29.70695686340332,
"learning_rate": 5.544820182501342e-06,
"loss": 1.5387,
"step": 830
},
{
"epoch": 1.35,
"grad_norm": 31.330068588256836,
"learning_rate": 5.4911433172302745e-06,
"loss": 1.6414,
"step": 840
},
{
"epoch": 1.37,
"grad_norm": 32.08658981323242,
"learning_rate": 5.437466451959206e-06,
"loss": 1.6158,
"step": 850
},
{
"epoch": 1.38,
"grad_norm": 33.42084503173828,
"learning_rate": 5.383789586688137e-06,
"loss": 1.6728,
"step": 860
},
{
"epoch": 1.4,
"grad_norm": 32.10792922973633,
"learning_rate": 5.3301127214170704e-06,
"loss": 1.5365,
"step": 870
},
{
"epoch": 1.42,
"grad_norm": 34.231239318847656,
"learning_rate": 5.276435856146002e-06,
"loss": 1.584,
"step": 880
},
{
"epoch": 1.43,
"grad_norm": 32.19587326049805,
"learning_rate": 5.222758990874933e-06,
"loss": 1.6233,
"step": 890
},
{
"epoch": 1.45,
"grad_norm": 30.36279296875,
"learning_rate": 5.169082125603865e-06,
"loss": 1.5246,
"step": 900
},
{
"epoch": 1.46,
"grad_norm": 33.34714889526367,
"learning_rate": 5.115405260332798e-06,
"loss": 1.5514,
"step": 910
},
{
"epoch": 1.48,
"grad_norm": 32.581424713134766,
"learning_rate": 5.061728395061729e-06,
"loss": 1.495,
"step": 920
},
{
"epoch": 1.5,
"grad_norm": 33.158203125,
"learning_rate": 5.0080515297906606e-06,
"loss": 1.5546,
"step": 930
},
{
"epoch": 1.51,
"grad_norm": 29.796606063842773,
"learning_rate": 4.954374664519592e-06,
"loss": 1.5141,
"step": 940
},
{
"epoch": 1.53,
"grad_norm": 31.936180114746094,
"learning_rate": 4.900697799248524e-06,
"loss": 1.5156,
"step": 950
},
{
"epoch": 1.54,
"grad_norm": 30.770095825195312,
"learning_rate": 4.847020933977456e-06,
"loss": 1.5262,
"step": 960
},
{
"epoch": 1.56,
"grad_norm": 32.497520446777344,
"learning_rate": 4.793344068706388e-06,
"loss": 1.5303,
"step": 970
},
{
"epoch": 1.58,
"grad_norm": 31.067218780517578,
"learning_rate": 4.739667203435319e-06,
"loss": 1.4994,
"step": 980
},
{
"epoch": 1.59,
"grad_norm": 27.720073699951172,
"learning_rate": 4.6859903381642516e-06,
"loss": 1.4268,
"step": 990
},
{
"epoch": 1.61,
"grad_norm": 30.310941696166992,
"learning_rate": 4.632313472893184e-06,
"loss": 1.4636,
"step": 1000
},
{
"epoch": 1.63,
"grad_norm": 33.62602996826172,
"learning_rate": 4.578636607622115e-06,
"loss": 1.4783,
"step": 1010
},
{
"epoch": 1.64,
"grad_norm": 28.9564266204834,
"learning_rate": 4.5249597423510475e-06,
"loss": 1.359,
"step": 1020
},
{
"epoch": 1.66,
"grad_norm": 29.886262893676758,
"learning_rate": 4.471282877079979e-06,
"loss": 1.4405,
"step": 1030
},
{
"epoch": 1.67,
"grad_norm": 26.291038513183594,
"learning_rate": 4.417606011808911e-06,
"loss": 1.3914,
"step": 1040
},
{
"epoch": 1.69,
"grad_norm": 30.628904342651367,
"learning_rate": 4.3639291465378425e-06,
"loss": 1.4335,
"step": 1050
},
{
"epoch": 1.71,
"grad_norm": 27.96939468383789,
"learning_rate": 4.310252281266775e-06,
"loss": 1.3577,
"step": 1060
},
{
"epoch": 1.72,
"grad_norm": 29.119224548339844,
"learning_rate": 4.256575415995706e-06,
"loss": 1.3808,
"step": 1070
},
{
"epoch": 1.74,
"grad_norm": 30.36097526550293,
"learning_rate": 4.202898550724638e-06,
"loss": 1.3545,
"step": 1080
},
{
"epoch": 1.75,
"grad_norm": 30.843242645263672,
"learning_rate": 4.14922168545357e-06,
"loss": 1.3751,
"step": 1090
},
{
"epoch": 1.77,
"grad_norm": 29.29217529296875,
"learning_rate": 4.095544820182501e-06,
"loss": 1.3649,
"step": 1100
},
{
"epoch": 1.79,
"grad_norm": 30.685625076293945,
"learning_rate": 4.0418679549114335e-06,
"loss": 1.4354,
"step": 1110
},
{
"epoch": 1.8,
"grad_norm": 26.101669311523438,
"learning_rate": 3.988191089640365e-06,
"loss": 1.3355,
"step": 1120
},
{
"epoch": 1.82,
"grad_norm": 29.12729835510254,
"learning_rate": 3.934514224369297e-06,
"loss": 1.3568,
"step": 1130
},
{
"epoch": 1.83,
"grad_norm": 27.82271957397461,
"learning_rate": 3.880837359098229e-06,
"loss": 1.3702,
"step": 1140
},
{
"epoch": 1.85,
"grad_norm": 26.432327270507812,
"learning_rate": 3.827160493827161e-06,
"loss": 1.3231,
"step": 1150
},
{
"epoch": 1.87,
"grad_norm": 30.632972717285156,
"learning_rate": 3.7734836285560927e-06,
"loss": 1.3283,
"step": 1160
},
{
"epoch": 1.88,
"grad_norm": 27.142309188842773,
"learning_rate": 3.7198067632850245e-06,
"loss": 1.3159,
"step": 1170
},
{
"epoch": 1.9,
"grad_norm": 27.63045310974121,
"learning_rate": 3.6661298980139563e-06,
"loss": 1.3777,
"step": 1180
},
{
"epoch": 1.92,
"grad_norm": 30.256242752075195,
"learning_rate": 3.612453032742888e-06,
"loss": 1.2845,
"step": 1190
},
{
"epoch": 1.93,
"grad_norm": 28.592174530029297,
"learning_rate": 3.5587761674718204e-06,
"loss": 1.3163,
"step": 1200
},
{
"epoch": 1.95,
"grad_norm": 29.088247299194336,
"learning_rate": 3.505099302200752e-06,
"loss": 1.3145,
"step": 1210
},
{
"epoch": 1.96,
"grad_norm": 27.801074981689453,
"learning_rate": 3.4514224369296832e-06,
"loss": 1.3675,
"step": 1220
},
{
"epoch": 1.98,
"grad_norm": 28.81484603881836,
"learning_rate": 3.3977455716586155e-06,
"loss": 1.2854,
"step": 1230
},
{
"epoch": 2.0,
"grad_norm": 28.966217041015625,
"learning_rate": 3.3440687063875473e-06,
"loss": 1.3431,
"step": 1240
},
{
"epoch": 2.01,
"grad_norm": 23.021453857421875,
"learning_rate": 3.290391841116479e-06,
"loss": 1.0516,
"step": 1250
},
{
"epoch": 2.03,
"grad_norm": 25.622419357299805,
"learning_rate": 3.236714975845411e-06,
"loss": 0.9667,
"step": 1260
},
{
"epoch": 2.04,
"grad_norm": 26.45795249938965,
"learning_rate": 3.1830381105743428e-06,
"loss": 0.9341,
"step": 1270
},
{
"epoch": 2.06,
"grad_norm": 26.28618812561035,
"learning_rate": 3.1293612453032746e-06,
"loss": 0.9024,
"step": 1280
},
{
"epoch": 2.08,
"grad_norm": 24.80799102783203,
"learning_rate": 3.075684380032206e-06,
"loss": 0.8784,
"step": 1290
},
{
"epoch": 2.09,
"grad_norm": 25.70199966430664,
"learning_rate": 3.0220075147611383e-06,
"loss": 0.9307,
"step": 1300
},
{
"epoch": 2.11,
"grad_norm": 24.88735580444336,
"learning_rate": 2.9683306494900697e-06,
"loss": 0.9037,
"step": 1310
},
{
"epoch": 2.12,
"grad_norm": 26.51141929626465,
"learning_rate": 2.914653784219002e-06,
"loss": 0.9884,
"step": 1320
},
{
"epoch": 2.14,
"grad_norm": 25.662946701049805,
"learning_rate": 2.8609769189479338e-06,
"loss": 0.8631,
"step": 1330
},
{
"epoch": 2.16,
"grad_norm": 22.733741760253906,
"learning_rate": 2.8073000536768656e-06,
"loss": 0.9273,
"step": 1340
},
{
"epoch": 2.17,
"grad_norm": 24.159793853759766,
"learning_rate": 2.7536231884057974e-06,
"loss": 0.9262,
"step": 1350
},
{
"epoch": 2.19,
"grad_norm": 23.92421531677246,
"learning_rate": 2.699946323134729e-06,
"loss": 0.9048,
"step": 1360
},
{
"epoch": 2.2,
"grad_norm": 28.564496994018555,
"learning_rate": 2.646269457863661e-06,
"loss": 0.9879,
"step": 1370
},
{
"epoch": 2.22,
"grad_norm": 25.430883407592773,
"learning_rate": 2.5925925925925925e-06,
"loss": 0.892,
"step": 1380
},
{
"epoch": 2.24,
"grad_norm": 23.307687759399414,
"learning_rate": 2.5389157273215247e-06,
"loss": 0.894,
"step": 1390
},
{
"epoch": 2.25,
"grad_norm": 25.83247184753418,
"learning_rate": 2.4852388620504566e-06,
"loss": 0.8817,
"step": 1400
},
{
"epoch": 2.27,
"grad_norm": 25.72507095336914,
"learning_rate": 2.4315619967793884e-06,
"loss": 0.9155,
"step": 1410
},
{
"epoch": 2.29,
"grad_norm": 26.67945098876953,
"learning_rate": 2.3778851315083202e-06,
"loss": 0.9325,
"step": 1420
},
{
"epoch": 2.3,
"grad_norm": 25.82522964477539,
"learning_rate": 2.324208266237252e-06,
"loss": 0.9055,
"step": 1430
},
{
"epoch": 2.32,
"grad_norm": 22.66315269470215,
"learning_rate": 2.270531400966184e-06,
"loss": 0.8631,
"step": 1440
},
{
"epoch": 2.33,
"grad_norm": 25.832313537597656,
"learning_rate": 2.2168545356951157e-06,
"loss": 0.9539,
"step": 1450
},
{
"epoch": 2.35,
"grad_norm": 23.865262985229492,
"learning_rate": 2.163177670424047e-06,
"loss": 0.8461,
"step": 1460
},
{
"epoch": 2.37,
"grad_norm": 23.32217025756836,
"learning_rate": 2.109500805152979e-06,
"loss": 0.8595,
"step": 1470
},
{
"epoch": 2.38,
"grad_norm": 24.299062728881836,
"learning_rate": 2.0558239398819112e-06,
"loss": 0.899,
"step": 1480
},
{
"epoch": 2.4,
"grad_norm": 25.582359313964844,
"learning_rate": 2.002147074610843e-06,
"loss": 0.898,
"step": 1490
},
{
"epoch": 2.41,
"grad_norm": 25.416006088256836,
"learning_rate": 1.948470209339775e-06,
"loss": 0.898,
"step": 1500
}
],
"logging_steps": 10,
"max_steps": 1863,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 5730542923874304.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}