Union-AI-OSS's picture
Upload folder using huggingface_hub
6a5b14e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.969227041434456,
"eval_steps": 100,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.875e-05,
"loss": 2.5575,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 3.75e-05,
"loss": 2.5088,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 5.625e-05,
"loss": 2.5653,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 7.5e-05,
"loss": 2.5625,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 9.374999999999999e-05,
"loss": 2.4798,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 0.0001125,
"loss": 2.5331,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 0.00013125,
"loss": 2.6329,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 0.00015,
"loss": 2.5976,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 0.00016874999999999998,
"loss": 2.6081,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 0.00018749999999999998,
"loss": 2.6396,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 0.00020624999999999997,
"loss": 2.5221,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 0.000225,
"loss": 2.6252,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 0.00024375,
"loss": 2.5929,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 0.0002625,
"loss": 2.5922,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 0.00028125,
"loss": 2.4996,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 0.0003,
"loss": 2.5948,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 0.000299997027249348,
"loss": 2.5224,
"step": 17
},
{
"epoch": 0.03,
"learning_rate": 0.00029998810911522207,
"loss": 2.5954,
"step": 18
},
{
"epoch": 0.04,
"learning_rate": 0.0002999732459511074,
"loss": 2.5453,
"step": 19
},
{
"epoch": 0.04,
"learning_rate": 0.00029995243834613037,
"loss": 2.6684,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 0.0002999256871250353,
"loss": 2.5159,
"step": 21
},
{
"epoch": 0.04,
"learning_rate": 0.0002998929933481515,
"loss": 2.6213,
"step": 22
},
{
"epoch": 0.04,
"learning_rate": 0.0002998543583113518,
"loss": 2.6888,
"step": 23
},
{
"epoch": 0.05,
"learning_rate": 0.00029980978354600055,
"loss": 2.4422,
"step": 24
},
{
"epoch": 0.05,
"learning_rate": 0.00029975927081889316,
"loss": 2.5714,
"step": 25
},
{
"epoch": 0.05,
"learning_rate": 0.0002997028221321863,
"loss": 2.6195,
"step": 26
},
{
"epoch": 0.05,
"learning_rate": 0.0002996404397233182,
"loss": 2.6447,
"step": 27
},
{
"epoch": 0.05,
"learning_rate": 0.00029957212606492007,
"loss": 2.5583,
"step": 28
},
{
"epoch": 0.06,
"learning_rate": 0.00029949788386471836,
"loss": 2.5892,
"step": 29
},
{
"epoch": 0.06,
"learning_rate": 0.00029941771606542696,
"loss": 2.7688,
"step": 30
},
{
"epoch": 0.06,
"learning_rate": 0.00029933162584463096,
"loss": 2.6723,
"step": 31
},
{
"epoch": 0.06,
"learning_rate": 0.00029923961661466045,
"loss": 2.7775,
"step": 32
},
{
"epoch": 0.06,
"learning_rate": 0.0002991416920224554,
"loss": 2.6753,
"step": 33
},
{
"epoch": 0.07,
"learning_rate": 0.0002990378559494212,
"loss": 2.7772,
"step": 34
},
{
"epoch": 0.07,
"learning_rate": 0.0002989281125112744,
"loss": 2.8511,
"step": 35
},
{
"epoch": 0.07,
"learning_rate": 0.0002988124660578801,
"loss": 2.8939,
"step": 36
},
{
"epoch": 0.07,
"learning_rate": 0.0002986909211730792,
"loss": 2.8698,
"step": 37
},
{
"epoch": 0.07,
"learning_rate": 0.0002985634826745069,
"loss": 3.0635,
"step": 38
},
{
"epoch": 0.08,
"learning_rate": 0.0002984301556134016,
"loss": 3.0185,
"step": 39
},
{
"epoch": 0.08,
"learning_rate": 0.0002982909452744047,
"loss": 3.0233,
"step": 40
},
{
"epoch": 0.08,
"learning_rate": 0.0002981458571753512,
"loss": 3.1451,
"step": 41
},
{
"epoch": 0.08,
"learning_rate": 0.000297994897067051,
"loss": 3.2266,
"step": 42
},
{
"epoch": 0.08,
"learning_rate": 0.0002978380709330609,
"loss": 3.3143,
"step": 43
},
{
"epoch": 0.09,
"learning_rate": 0.0002976753849894475,
"loss": 3.3626,
"step": 44
},
{
"epoch": 0.09,
"learning_rate": 0.00029750684568454063,
"loss": 3.3557,
"step": 45
},
{
"epoch": 0.09,
"learning_rate": 0.00029733245969867814,
"loss": 3.486,
"step": 46
},
{
"epoch": 0.09,
"learning_rate": 0.0002971522339439407,
"loss": 3.4997,
"step": 47
},
{
"epoch": 0.09,
"learning_rate": 0.0002969661755638779,
"loss": 3.6059,
"step": 48
},
{
"epoch": 0.09,
"learning_rate": 0.00029677429193322556,
"loss": 3.7884,
"step": 49
},
{
"epoch": 0.1,
"learning_rate": 0.00029657659065761267,
"loss": 3.9539,
"step": 50
},
{
"epoch": 0.1,
"learning_rate": 0.0002963730795732607,
"loss": 4.0443,
"step": 51
},
{
"epoch": 0.1,
"learning_rate": 0.00029616376674667223,
"loss": 4.2825,
"step": 52
},
{
"epoch": 0.1,
"learning_rate": 0.000295948660474312,
"loss": 4.476,
"step": 53
},
{
"epoch": 0.1,
"learning_rate": 0.0002957277692822774,
"loss": 4.6866,
"step": 54
},
{
"epoch": 0.11,
"learning_rate": 0.0002955011019259608,
"loss": 5.0977,
"step": 55
},
{
"epoch": 0.11,
"learning_rate": 0.00029526866738970286,
"loss": 5.2591,
"step": 56
},
{
"epoch": 0.11,
"learning_rate": 0.0002950304748864356,
"loss": 5.6691,
"step": 57
},
{
"epoch": 0.11,
"learning_rate": 0.00029478653385731817,
"loss": 5.87,
"step": 58
},
{
"epoch": 0.11,
"learning_rate": 0.0002945368539713617,
"loss": 6.4257,
"step": 59
},
{
"epoch": 0.12,
"learning_rate": 0.000294281445125047,
"loss": 6.4306,
"step": 60
},
{
"epoch": 0.12,
"learning_rate": 0.0002940203174419314,
"loss": 6.9147,
"step": 61
},
{
"epoch": 0.12,
"learning_rate": 0.0002937534812722483,
"loss": 7.2439,
"step": 62
},
{
"epoch": 0.12,
"learning_rate": 0.00029348094719249614,
"loss": 7.2032,
"step": 63
},
{
"epoch": 0.12,
"learning_rate": 0.00029320272600501983,
"loss": 7.6343,
"step": 64
},
{
"epoch": 0.13,
"learning_rate": 0.0002929188287375824,
"loss": 7.7117,
"step": 65
},
{
"epoch": 0.13,
"learning_rate": 0.00029262926664292744,
"loss": 7.7917,
"step": 66
},
{
"epoch": 0.13,
"learning_rate": 0.00029233405119833387,
"loss": 8.1056,
"step": 67
},
{
"epoch": 0.13,
"learning_rate": 0.0002920331941051603,
"loss": 8.2865,
"step": 68
},
{
"epoch": 0.13,
"learning_rate": 0.0002917267072883818,
"loss": 8.6984,
"step": 69
},
{
"epoch": 0.14,
"learning_rate": 0.0002914146028961167,
"loss": 8.9353,
"step": 70
},
{
"epoch": 0.14,
"learning_rate": 0.0002910968932991455,
"loss": 8.8638,
"step": 71
},
{
"epoch": 0.14,
"learning_rate": 0.0002907735910904205,
"loss": 9.0658,
"step": 72
},
{
"epoch": 0.14,
"learning_rate": 0.0002904447090845662,
"loss": 9.4505,
"step": 73
},
{
"epoch": 0.14,
"learning_rate": 0.00029011026031737193,
"loss": 9.4891,
"step": 74
},
{
"epoch": 0.15,
"learning_rate": 0.000289770258045275,
"loss": 9.4665,
"step": 75
},
{
"epoch": 0.15,
"learning_rate": 0.00028942471574483497,
"loss": 10.0014,
"step": 76
},
{
"epoch": 0.15,
"learning_rate": 0.00028907364711219997,
"loss": 9.5228,
"step": 77
},
{
"epoch": 0.15,
"learning_rate": 0.00028871706606256326,
"loss": 9.6988,
"step": 78
},
{
"epoch": 0.15,
"learning_rate": 0.00028835498672961224,
"loss": 9.8295,
"step": 79
},
{
"epoch": 0.16,
"learning_rate": 0.0002879874234649679,
"loss": 9.302,
"step": 80
},
{
"epoch": 0.16,
"learning_rate": 0.00028761439083761596,
"loss": 8.9286,
"step": 81
},
{
"epoch": 0.16,
"learning_rate": 0.0002872359036333296,
"loss": 8.762,
"step": 82
},
{
"epoch": 0.16,
"learning_rate": 0.0002868519768540833,
"loss": 8.4371,
"step": 83
},
{
"epoch": 0.16,
"learning_rate": 0.0002864626257174581,
"loss": 8.2755,
"step": 84
},
{
"epoch": 0.16,
"learning_rate": 0.00028606786565603875,
"loss": 8.1114,
"step": 85
},
{
"epoch": 0.17,
"learning_rate": 0.0002856677123168015,
"loss": 7.8714,
"step": 86
},
{
"epoch": 0.17,
"learning_rate": 0.00028526218156049433,
"loss": 7.7255,
"step": 87
},
{
"epoch": 0.17,
"learning_rate": 0.0002848512894610083,
"loss": 7.6783,
"step": 88
},
{
"epoch": 0.17,
"learning_rate": 0.00028443505230474006,
"loss": 7.342,
"step": 89
},
{
"epoch": 0.17,
"learning_rate": 0.0002840134865899468,
"loss": 7.2997,
"step": 90
},
{
"epoch": 0.18,
"learning_rate": 0.00028358660902609166,
"loss": 7.2757,
"step": 91
},
{
"epoch": 0.18,
"learning_rate": 0.00028315443653318225,
"loss": 7.2005,
"step": 92
},
{
"epoch": 0.18,
"learning_rate": 0.00028271698624109933,
"loss": 7.0789,
"step": 93
},
{
"epoch": 0.18,
"learning_rate": 0.00028227427548891803,
"loss": 7.043,
"step": 94
},
{
"epoch": 0.18,
"learning_rate": 0.0002818263218242208,
"loss": 7.1089,
"step": 95
},
{
"epoch": 0.19,
"learning_rate": 0.00028137314300240166,
"loss": 6.9611,
"step": 96
},
{
"epoch": 0.19,
"learning_rate": 0.00028091475698596236,
"loss": 6.8566,
"step": 97
},
{
"epoch": 0.19,
"learning_rate": 0.0002804511819438006,
"loss": 7.0051,
"step": 98
},
{
"epoch": 0.19,
"learning_rate": 0.0002799824362504899,
"loss": 6.8284,
"step": 99
},
{
"epoch": 0.19,
"learning_rate": 0.00027950853848555116,
"loss": 6.7849,
"step": 100
},
{
"epoch": 0.19,
"eval_loss": 6.93548059463501,
"eval_runtime": 0.4634,
"eval_samples_per_second": 144.584,
"eval_steps_per_second": 10.79,
"step": 100
},
{
"epoch": 0.2,
"learning_rate": 0.0002790295074327162,
"loss": 6.9271,
"step": 101
},
{
"epoch": 0.2,
"learning_rate": 0.00027854536207918336,
"loss": 6.7996,
"step": 102
},
{
"epoch": 0.2,
"learning_rate": 0.00027805612161486477,
"loss": 6.7169,
"step": 103
},
{
"epoch": 0.2,
"learning_rate": 0.00027756180543162597,
"loss": 6.7617,
"step": 104
},
{
"epoch": 0.2,
"learning_rate": 0.000277062433122517,
"loss": 6.6488,
"step": 105
},
{
"epoch": 0.21,
"learning_rate": 0.000276558024480996,
"loss": 6.7194,
"step": 106
},
{
"epoch": 0.21,
"learning_rate": 0.00027604859950014455,
"loss": 6.5303,
"step": 107
},
{
"epoch": 0.21,
"learning_rate": 0.0002755341783718752,
"loss": 6.6088,
"step": 108
},
{
"epoch": 0.21,
"learning_rate": 0.00027501478148613114,
"loss": 6.5223,
"step": 109
},
{
"epoch": 0.21,
"learning_rate": 0.0002744904294300782,
"loss": 6.5884,
"step": 110
},
{
"epoch": 0.22,
"learning_rate": 0.00027396114298728865,
"loss": 6.4799,
"step": 111
},
{
"epoch": 0.22,
"learning_rate": 0.0002734269431369173,
"loss": 6.5031,
"step": 112
},
{
"epoch": 0.22,
"learning_rate": 0.00027288785105287024,
"loss": 6.483,
"step": 113
},
{
"epoch": 0.22,
"learning_rate": 0.0002723438881029654,
"loss": 6.517,
"step": 114
},
{
"epoch": 0.22,
"learning_rate": 0.00027179507584808554,
"loss": 6.4037,
"step": 115
},
{
"epoch": 0.22,
"learning_rate": 0.000271241436041324,
"loss": 6.3645,
"step": 116
},
{
"epoch": 0.23,
"learning_rate": 0.00027068299062712195,
"loss": 6.386,
"step": 117
},
{
"epoch": 0.23,
"learning_rate": 0.00027011976174039904,
"loss": 6.408,
"step": 118
},
{
"epoch": 0.23,
"learning_rate": 0.0002695517717056757,
"loss": 6.5254,
"step": 119
},
{
"epoch": 0.23,
"learning_rate": 0.0002689790430361887,
"loss": 6.2899,
"step": 120
},
{
"epoch": 0.23,
"learning_rate": 0.0002684015984329983,
"loss": 6.3701,
"step": 121
},
{
"epoch": 0.24,
"learning_rate": 0.00026781946078408876,
"loss": 6.3711,
"step": 122
},
{
"epoch": 0.24,
"learning_rate": 0.00026723265316346104,
"loss": 6.4739,
"step": 123
},
{
"epoch": 0.24,
"learning_rate": 0.00026664119883021843,
"loss": 6.4259,
"step": 124
},
{
"epoch": 0.24,
"learning_rate": 0.00026604512122764426,
"loss": 6.3035,
"step": 125
},
{
"epoch": 0.24,
"learning_rate": 0.0002654444439822729,
"loss": 6.26,
"step": 126
},
{
"epoch": 0.25,
"learning_rate": 0.0002648391909029534,
"loss": 6.3777,
"step": 127
},
{
"epoch": 0.25,
"learning_rate": 0.00026422938597990553,
"loss": 6.3888,
"step": 128
},
{
"epoch": 0.25,
"learning_rate": 0.0002636150533837691,
"loss": 6.1202,
"step": 129
},
{
"epoch": 0.25,
"learning_rate": 0.0002629962174646457,
"loss": 6.2316,
"step": 130
},
{
"epoch": 0.25,
"learning_rate": 0.00026237290275113386,
"loss": 6.1776,
"step": 131
},
{
"epoch": 0.26,
"learning_rate": 0.00026174513394935646,
"loss": 6.1823,
"step": 132
},
{
"epoch": 0.26,
"learning_rate": 0.0002611129359419817,
"loss": 6.1094,
"step": 133
},
{
"epoch": 0.26,
"learning_rate": 0.00026047633378723683,
"loss": 6.076,
"step": 134
},
{
"epoch": 0.26,
"learning_rate": 0.0002598353527179147,
"loss": 6.0366,
"step": 135
},
{
"epoch": 0.26,
"learning_rate": 0.00025919001814037393,
"loss": 6.0301,
"step": 136
},
{
"epoch": 0.27,
"learning_rate": 0.00025854035563353166,
"loss": 6.1283,
"step": 137
},
{
"epoch": 0.27,
"learning_rate": 0.0002578863909478497,
"loss": 5.8362,
"step": 138
},
{
"epoch": 0.27,
"learning_rate": 0.00025722815000431406,
"loss": 5.8631,
"step": 139
},
{
"epoch": 0.27,
"learning_rate": 0.0002565656588934073,
"loss": 5.8031,
"step": 140
},
{
"epoch": 0.27,
"learning_rate": 0.0002558989438740745,
"loss": 5.7674,
"step": 141
},
{
"epoch": 0.28,
"learning_rate": 0.00025522803137268253,
"loss": 5.7825,
"step": 142
},
{
"epoch": 0.28,
"learning_rate": 0.0002545529479819723,
"loss": 5.8328,
"step": 143
},
{
"epoch": 0.28,
"learning_rate": 0.000253873720460005,
"loss": 5.8554,
"step": 144
},
{
"epoch": 0.28,
"learning_rate": 0.0002531903757291015,
"loss": 5.8429,
"step": 145
},
{
"epoch": 0.28,
"learning_rate": 0.000252502940874775,
"loss": 5.7095,
"step": 146
},
{
"epoch": 0.28,
"learning_rate": 0.00025181144314465764,
"loss": 5.5604,
"step": 147
},
{
"epoch": 0.29,
"learning_rate": 0.0002511159099474205,
"loss": 5.5383,
"step": 148
},
{
"epoch": 0.29,
"learning_rate": 0.00025041636885168715,
"loss": 5.4359,
"step": 149
},
{
"epoch": 0.29,
"learning_rate": 0.0002497128475849408,
"loss": 5.2092,
"step": 150
},
{
"epoch": 0.29,
"learning_rate": 0.0002490053740324256,
"loss": 5.2694,
"step": 151
},
{
"epoch": 0.29,
"learning_rate": 0.000248293976236041,
"loss": 5.1733,
"step": 152
},
{
"epoch": 0.3,
"learning_rate": 0.0002475786823932306,
"loss": 4.9904,
"step": 153
},
{
"epoch": 0.3,
"learning_rate": 0.0002468595208558641,
"loss": 5.0725,
"step": 154
},
{
"epoch": 0.3,
"learning_rate": 0.00024613652012911403,
"loss": 5.1754,
"step": 155
},
{
"epoch": 0.3,
"learning_rate": 0.00024540970887032543,
"loss": 4.8482,
"step": 156
},
{
"epoch": 0.3,
"learning_rate": 0.00024467911588788016,
"loss": 5.0397,
"step": 157
},
{
"epoch": 0.31,
"learning_rate": 0.00024394477014005514,
"loss": 5.0625,
"step": 158
},
{
"epoch": 0.31,
"learning_rate": 0.0002432067007338744,
"loss": 5.1172,
"step": 159
},
{
"epoch": 0.31,
"learning_rate": 0.0002424649369239553,
"loss": 5.135,
"step": 160
},
{
"epoch": 0.31,
"learning_rate": 0.00024171950811134927,
"loss": 4.9951,
"step": 161
},
{
"epoch": 0.31,
"learning_rate": 0.00024097044384237607,
"loss": 4.9701,
"step": 162
},
{
"epoch": 0.32,
"learning_rate": 0.000240217773807453,
"loss": 4.9604,
"step": 163
},
{
"epoch": 0.32,
"learning_rate": 0.00023946152783991786,
"loss": 5.0689,
"step": 164
},
{
"epoch": 0.32,
"learning_rate": 0.0002387017359148466,
"loss": 4.9858,
"step": 165
},
{
"epoch": 0.32,
"learning_rate": 0.00023793842814786505,
"loss": 5.0683,
"step": 166
},
{
"epoch": 0.32,
"learning_rate": 0.00023717163479395538,
"loss": 5.0672,
"step": 167
},
{
"epoch": 0.33,
"learning_rate": 0.00023640138624625684,
"loss": 4.9967,
"step": 168
},
{
"epoch": 0.33,
"learning_rate": 0.00023562771303486108,
"loss": 5.1249,
"step": 169
},
{
"epoch": 0.33,
"learning_rate": 0.00023485064582560197,
"loss": 5.1674,
"step": 170
},
{
"epoch": 0.33,
"learning_rate": 0.00023407021541884025,
"loss": 4.9008,
"step": 171
},
{
"epoch": 0.33,
"learning_rate": 0.00023328645274824254,
"loss": 5.1389,
"step": 172
},
{
"epoch": 0.34,
"learning_rate": 0.00023249938887955543,
"loss": 4.9922,
"step": 173
},
{
"epoch": 0.34,
"learning_rate": 0.00023170905500937396,
"loss": 5.0586,
"step": 174
},
{
"epoch": 0.34,
"learning_rate": 0.0002309154824639052,
"loss": 5.1404,
"step": 175
},
{
"epoch": 0.34,
"learning_rate": 0.00023011870269772642,
"loss": 5.1627,
"step": 176
},
{
"epoch": 0.34,
"learning_rate": 0.00022931874729253856,
"loss": 5.0564,
"step": 177
},
{
"epoch": 0.35,
"learning_rate": 0.00022851564795591442,
"loss": 5.22,
"step": 178
},
{
"epoch": 0.35,
"learning_rate": 0.0002277094365200416,
"loss": 5.0221,
"step": 179
},
{
"epoch": 0.35,
"learning_rate": 0.00022690014494046104,
"loss": 5.0044,
"step": 180
},
{
"epoch": 0.35,
"learning_rate": 0.0002260878052948004,
"loss": 5.1381,
"step": 181
},
{
"epoch": 0.35,
"learning_rate": 0.00022527244978150248,
"loss": 4.931,
"step": 182
},
{
"epoch": 0.35,
"learning_rate": 0.0002244541107185491,
"loss": 5.0278,
"step": 183
},
{
"epoch": 0.36,
"learning_rate": 0.00022363282054217994,
"loss": 5.0771,
"step": 184
},
{
"epoch": 0.36,
"learning_rate": 0.0002228086118056072,
"loss": 5.1335,
"step": 185
},
{
"epoch": 0.36,
"learning_rate": 0.00022198151717772494,
"loss": 5.0262,
"step": 186
},
{
"epoch": 0.36,
"learning_rate": 0.00022115156944181442,
"loss": 5.1367,
"step": 187
},
{
"epoch": 0.36,
"learning_rate": 0.00022031880149424462,
"loss": 5.0092,
"step": 188
},
{
"epoch": 0.37,
"learning_rate": 0.00021948324634316833,
"loss": 4.9525,
"step": 189
},
{
"epoch": 0.37,
"learning_rate": 0.00021864493710721384,
"loss": 5.1732,
"step": 190
},
{
"epoch": 0.37,
"learning_rate": 0.00021780390701417216,
"loss": 4.944,
"step": 191
},
{
"epoch": 0.37,
"learning_rate": 0.00021696018939968,
"loss": 4.8821,
"step": 192
},
{
"epoch": 0.37,
"learning_rate": 0.00021611381770589866,
"loss": 4.9632,
"step": 193
},
{
"epoch": 0.38,
"learning_rate": 0.00021526482548018814,
"loss": 4.8977,
"step": 194
},
{
"epoch": 0.38,
"learning_rate": 0.00021441324637377768,
"loss": 4.9198,
"step": 195
},
{
"epoch": 0.38,
"learning_rate": 0.00021355911414043185,
"loss": 4.8276,
"step": 196
},
{
"epoch": 0.38,
"learning_rate": 0.00021270246263511273,
"loss": 4.8213,
"step": 197
},
{
"epoch": 0.38,
"learning_rate": 0.00021184332581263785,
"loss": 4.8753,
"step": 198
},
{
"epoch": 0.39,
"learning_rate": 0.00021098173772633462,
"loss": 4.9393,
"step": 199
},
{
"epoch": 0.39,
"learning_rate": 0.00021011773252669027,
"loss": 5.0001,
"step": 200
},
{
"epoch": 0.39,
"eval_loss": 5.125741481781006,
"eval_runtime": 0.4595,
"eval_samples_per_second": 145.823,
"eval_steps_per_second": 10.882,
"step": 200
},
{
"epoch": 0.39,
"learning_rate": 0.00020925134445999843,
"loss": 5.0326,
"step": 201
},
{
"epoch": 0.39,
"learning_rate": 0.0002083826078670016,
"loss": 4.9906,
"step": 202
},
{
"epoch": 0.39,
"learning_rate": 0.00020751155718153012,
"loss": 4.9179,
"step": 203
},
{
"epoch": 0.4,
"learning_rate": 0.00020663822692913722,
"loss": 5.0525,
"step": 204
},
{
"epoch": 0.4,
"learning_rate": 0.0002057626517257306,
"loss": 5.191,
"step": 205
},
{
"epoch": 0.4,
"learning_rate": 0.00020488486627620036,
"loss": 5.0969,
"step": 206
},
{
"epoch": 0.4,
"learning_rate": 0.00020400490537304336,
"loss": 5.1378,
"step": 207
},
{
"epoch": 0.4,
"learning_rate": 0.0002031228038949843,
"loss": 5.0901,
"step": 208
},
{
"epoch": 0.41,
"learning_rate": 0.00020223859680559305,
"loss": 5.2614,
"step": 209
},
{
"epoch": 0.41,
"learning_rate": 0.00020135231915189897,
"loss": 5.3052,
"step": 210
},
{
"epoch": 0.41,
"learning_rate": 0.00020046400606300177,
"loss": 5.2046,
"step": 211
},
{
"epoch": 0.41,
"learning_rate": 0.0001995736927486789,
"loss": 5.1249,
"step": 212
},
{
"epoch": 0.41,
"learning_rate": 0.00019868141449799016,
"loss": 5.3504,
"step": 213
},
{
"epoch": 0.41,
"learning_rate": 0.00019778720667787894,
"loss": 5.296,
"step": 214
},
{
"epoch": 0.42,
"learning_rate": 0.0001968911047317703,
"loss": 5.3431,
"step": 215
},
{
"epoch": 0.42,
"learning_rate": 0.00019599314417816617,
"loss": 5.3026,
"step": 216
},
{
"epoch": 0.42,
"learning_rate": 0.00019509336060923748,
"loss": 5.1208,
"step": 217
},
{
"epoch": 0.42,
"learning_rate": 0.00019419178968941344,
"loss": 5.2991,
"step": 218
},
{
"epoch": 0.42,
"learning_rate": 0.00019328846715396797,
"loss": 5.4355,
"step": 219
},
{
"epoch": 0.43,
"learning_rate": 0.00019238342880760305,
"loss": 5.3463,
"step": 220
},
{
"epoch": 0.43,
"learning_rate": 0.00019147671052302992,
"loss": 5.381,
"step": 221
},
{
"epoch": 0.43,
"learning_rate": 0.00019056834823954683,
"loss": 5.2602,
"step": 222
},
{
"epoch": 0.43,
"learning_rate": 0.00018965837796161464,
"loss": 5.6047,
"step": 223
},
{
"epoch": 0.43,
"learning_rate": 0.00018874683575742995,
"loss": 5.4004,
"step": 224
},
{
"epoch": 0.44,
"learning_rate": 0.0001878337577574951,
"loss": 5.3746,
"step": 225
},
{
"epoch": 0.44,
"learning_rate": 0.00018691918015318644,
"loss": 5.5042,
"step": 226
},
{
"epoch": 0.44,
"learning_rate": 0.0001860031391953195,
"loss": 5.6208,
"step": 227
},
{
"epoch": 0.44,
"learning_rate": 0.00018508567119271237,
"loss": 5.6673,
"step": 228
},
{
"epoch": 0.44,
"learning_rate": 0.00018416681251074633,
"loss": 5.6802,
"step": 229
},
{
"epoch": 0.45,
"learning_rate": 0.0001832465995699248,
"loss": 5.6571,
"step": 230
},
{
"epoch": 0.45,
"learning_rate": 0.00018232506884442932,
"loss": 5.5854,
"step": 231
},
{
"epoch": 0.45,
"learning_rate": 0.00018140225686067403,
"loss": 5.6822,
"step": 232
},
{
"epoch": 0.45,
"learning_rate": 0.00018047820019585805,
"loss": 5.6988,
"step": 233
},
{
"epoch": 0.45,
"learning_rate": 0.00017955293547651535,
"loss": 5.6732,
"step": 234
},
{
"epoch": 0.46,
"learning_rate": 0.00017862649937706323,
"loss": 5.628,
"step": 235
},
{
"epoch": 0.46,
"learning_rate": 0.00017769892861834867,
"loss": 5.6503,
"step": 236
},
{
"epoch": 0.46,
"learning_rate": 0.00017677025996619265,
"loss": 5.7701,
"step": 237
},
{
"epoch": 0.46,
"learning_rate": 0.000175840530229933,
"loss": 5.9086,
"step": 238
},
{
"epoch": 0.46,
"learning_rate": 0.00017490977626096558,
"loss": 5.8063,
"step": 239
},
{
"epoch": 0.47,
"learning_rate": 0.00017397803495128322,
"loss": 5.7099,
"step": 240
},
{
"epoch": 0.47,
"learning_rate": 0.0001730453432320137,
"loss": 5.8545,
"step": 241
},
{
"epoch": 0.47,
"learning_rate": 0.000172111738071956,
"loss": 5.7577,
"step": 242
},
{
"epoch": 0.47,
"learning_rate": 0.00017117725647611468,
"loss": 5.7155,
"step": 243
},
{
"epoch": 0.47,
"learning_rate": 0.0001702419354842334,
"loss": 5.614,
"step": 244
},
{
"epoch": 0.47,
"learning_rate": 0.0001693058121693267,
"loss": 5.8233,
"step": 245
},
{
"epoch": 0.48,
"learning_rate": 0.00016836892363621052,
"loss": 5.8977,
"step": 246
},
{
"epoch": 0.48,
"learning_rate": 0.00016743130702003147,
"loss": 5.7225,
"step": 247
},
{
"epoch": 0.48,
"learning_rate": 0.00016649299948479494,
"loss": 5.6069,
"step": 248
},
{
"epoch": 0.48,
"learning_rate": 0.00016555403822189214,
"loss": 5.7829,
"step": 249
},
{
"epoch": 0.48,
"learning_rate": 0.00016461446044862584,
"loss": 5.765,
"step": 250
},
{
"epoch": 0.49,
"learning_rate": 0.00016367430340673514,
"loss": 5.7193,
"step": 251
},
{
"epoch": 0.49,
"learning_rate": 0.0001627336043609196,
"loss": 5.5909,
"step": 252
},
{
"epoch": 0.49,
"learning_rate": 0.00016179240059736183,
"loss": 5.7002,
"step": 253
},
{
"epoch": 0.49,
"learning_rate": 0.00016085072942224985,
"loss": 5.7926,
"step": 254
},
{
"epoch": 0.49,
"learning_rate": 0.00015990862816029836,
"loss": 5.763,
"step": 255
},
{
"epoch": 0.5,
"learning_rate": 0.0001589661341532692,
"loss": 5.678,
"step": 256
},
{
"epoch": 0.5,
"learning_rate": 0.00015802328475849142,
"loss": 5.6742,
"step": 257
},
{
"epoch": 0.5,
"learning_rate": 0.00015708011734738033,
"loss": 5.7914,
"step": 258
},
{
"epoch": 0.5,
"learning_rate": 0.00015613666930395644,
"loss": 5.6897,
"step": 259
},
{
"epoch": 0.5,
"learning_rate": 0.00015519297802336354,
"loss": 5.7609,
"step": 260
},
{
"epoch": 0.51,
"learning_rate": 0.0001542490809103866,
"loss": 5.5896,
"step": 261
},
{
"epoch": 0.51,
"learning_rate": 0.00015330501537796906,
"loss": 5.7305,
"step": 262
},
{
"epoch": 0.51,
"learning_rate": 0.00015236081884572984,
"loss": 5.6916,
"step": 263
},
{
"epoch": 0.51,
"learning_rate": 0.00015141652873848054,
"loss": 5.8479,
"step": 264
},
{
"epoch": 0.51,
"learning_rate": 0.00015047218248474148,
"loss": 5.6922,
"step": 265
},
{
"epoch": 0.52,
"learning_rate": 0.00014952781751525855,
"loss": 5.7687,
"step": 266
},
{
"epoch": 0.52,
"learning_rate": 0.00014858347126151948,
"loss": 5.939,
"step": 267
},
{
"epoch": 0.52,
"learning_rate": 0.00014763918115427013,
"loss": 5.7656,
"step": 268
},
{
"epoch": 0.52,
"learning_rate": 0.000146694984622031,
"loss": 5.6402,
"step": 269
},
{
"epoch": 0.52,
"learning_rate": 0.0001457509190896134,
"loss": 5.745,
"step": 270
},
{
"epoch": 0.53,
"learning_rate": 0.0001448070219766365,
"loss": 5.7993,
"step": 271
},
{
"epoch": 0.53,
"learning_rate": 0.0001438633306960436,
"loss": 5.9042,
"step": 272
},
{
"epoch": 0.53,
"learning_rate": 0.0001429198826526197,
"loss": 5.9073,
"step": 273
},
{
"epoch": 0.53,
"learning_rate": 0.0001419767152415086,
"loss": 5.9313,
"step": 274
},
{
"epoch": 0.53,
"learning_rate": 0.00014103386584673078,
"loss": 5.9989,
"step": 275
},
{
"epoch": 0.54,
"learning_rate": 0.00014009137183970167,
"loss": 6.0108,
"step": 276
},
{
"epoch": 0.54,
"learning_rate": 0.00013914927057775018,
"loss": 5.9416,
"step": 277
},
{
"epoch": 0.54,
"learning_rate": 0.0001382075994026382,
"loss": 5.8839,
"step": 278
},
{
"epoch": 0.54,
"learning_rate": 0.0001372663956390804,
"loss": 6.0596,
"step": 279
},
{
"epoch": 0.54,
"learning_rate": 0.00013632569659326486,
"loss": 6.1034,
"step": 280
},
{
"epoch": 0.54,
"learning_rate": 0.00013538553955137414,
"loss": 6.1142,
"step": 281
},
{
"epoch": 0.55,
"learning_rate": 0.00013444596177810783,
"loss": 6.178,
"step": 282
},
{
"epoch": 0.55,
"learning_rate": 0.00013350700051520506,
"loss": 6.0406,
"step": 283
},
{
"epoch": 0.55,
"learning_rate": 0.00013256869297996853,
"loss": 6.4111,
"step": 284
},
{
"epoch": 0.55,
"learning_rate": 0.00013163107636378945,
"loss": 6.1454,
"step": 285
},
{
"epoch": 0.55,
"learning_rate": 0.00013069418783067326,
"loss": 6.4537,
"step": 286
},
{
"epoch": 0.56,
"learning_rate": 0.0001297580645157666,
"loss": 6.4003,
"step": 287
},
{
"epoch": 0.56,
"learning_rate": 0.0001288227435238853,
"loss": 6.3845,
"step": 288
},
{
"epoch": 0.56,
"learning_rate": 0.000127888261928044,
"loss": 6.4016,
"step": 289
},
{
"epoch": 0.56,
"learning_rate": 0.00012695465676798627,
"loss": 6.5177,
"step": 290
},
{
"epoch": 0.56,
"learning_rate": 0.00012602196504871678,
"loss": 6.4294,
"step": 291
},
{
"epoch": 0.57,
"learning_rate": 0.00012509022373903442,
"loss": 6.6357,
"step": 292
},
{
"epoch": 0.57,
"learning_rate": 0.00012415946977006696,
"loss": 6.7642,
"step": 293
},
{
"epoch": 0.57,
"learning_rate": 0.00012322974003380735,
"loss": 6.9096,
"step": 294
},
{
"epoch": 0.57,
"learning_rate": 0.00012230107138165133,
"loss": 6.7728,
"step": 295
},
{
"epoch": 0.57,
"learning_rate": 0.00012137350062293677,
"loss": 6.9269,
"step": 296
},
{
"epoch": 0.58,
"learning_rate": 0.00012044706452348465,
"loss": 6.9235,
"step": 297
},
{
"epoch": 0.58,
"learning_rate": 0.00011952179980414195,
"loss": 6.9825,
"step": 298
},
{
"epoch": 0.58,
"learning_rate": 0.00011859774313932597,
"loss": 7.2581,
"step": 299
},
{
"epoch": 0.58,
"learning_rate": 0.0001176749311555707,
"loss": 7.0916,
"step": 300
},
{
"epoch": 0.58,
"eval_loss": 7.345704078674316,
"eval_runtime": 0.4561,
"eval_samples_per_second": 146.905,
"eval_steps_per_second": 10.963,
"step": 300
},
{
"epoch": 0.58,
"learning_rate": 0.00011675340043007519,
"loss": 7.2218,
"step": 301
},
{
"epoch": 0.59,
"learning_rate": 0.00011583318748925367,
"loss": 7.1207,
"step": 302
},
{
"epoch": 0.59,
"learning_rate": 0.00011491432880728765,
"loss": 7.3706,
"step": 303
},
{
"epoch": 0.59,
"learning_rate": 0.0001139968608046805,
"loss": 7.3684,
"step": 304
},
{
"epoch": 0.59,
"learning_rate": 0.00011308081984681356,
"loss": 7.2527,
"step": 305
},
{
"epoch": 0.59,
"learning_rate": 0.00011216624224250487,
"loss": 7.281,
"step": 306
},
{
"epoch": 0.6,
"learning_rate": 0.00011125316424257002,
"loss": 7.3978,
"step": 307
},
{
"epoch": 0.6,
"learning_rate": 0.00011034162203838534,
"loss": 7.3763,
"step": 308
},
{
"epoch": 0.6,
"learning_rate": 0.00010943165176045317,
"loss": 7.3376,
"step": 309
},
{
"epoch": 0.6,
"learning_rate": 0.00010852328947697004,
"loss": 7.2407,
"step": 310
},
{
"epoch": 0.6,
"learning_rate": 0.0001076165711923969,
"loss": 7.2541,
"step": 311
},
{
"epoch": 0.6,
"learning_rate": 0.00010671153284603203,
"loss": 7.4064,
"step": 312
},
{
"epoch": 0.61,
"learning_rate": 0.0001058082103105865,
"loss": 7.2814,
"step": 313
},
{
"epoch": 0.61,
"learning_rate": 0.0001049066393907625,
"loss": 7.2034,
"step": 314
},
{
"epoch": 0.61,
"learning_rate": 0.00010400685582183382,
"loss": 7.2216,
"step": 315
},
{
"epoch": 0.61,
"learning_rate": 0.00010310889526822966,
"loss": 7.2839,
"step": 316
},
{
"epoch": 0.61,
"learning_rate": 0.00010221279332212101,
"loss": 7.151,
"step": 317
},
{
"epoch": 0.62,
"learning_rate": 0.00010131858550200983,
"loss": 7.0071,
"step": 318
},
{
"epoch": 0.62,
"learning_rate": 0.00010042630725132104,
"loss": 7.2225,
"step": 319
},
{
"epoch": 0.62,
"learning_rate": 9.953599393699819e-05,
"loss": 7.0026,
"step": 320
},
{
"epoch": 0.62,
"learning_rate": 9.8647680848101e-05,
"loss": 6.9343,
"step": 321
},
{
"epoch": 0.62,
"learning_rate": 9.776140319440695e-05,
"loss": 6.8403,
"step": 322
},
{
"epoch": 0.63,
"learning_rate": 9.687719610501572e-05,
"loss": 6.8452,
"step": 323
},
{
"epoch": 0.63,
"learning_rate": 9.599509462695665e-05,
"loss": 6.7219,
"step": 324
},
{
"epoch": 0.63,
"learning_rate": 9.511513372379965e-05,
"loss": 6.6855,
"step": 325
},
{
"epoch": 0.63,
"learning_rate": 9.423734827426941e-05,
"loss": 6.6364,
"step": 326
},
{
"epoch": 0.63,
"learning_rate": 9.336177307086277e-05,
"loss": 6.4916,
"step": 327
},
{
"epoch": 0.64,
"learning_rate": 9.24884428184699e-05,
"loss": 6.5035,
"step": 328
},
{
"epoch": 0.64,
"learning_rate": 9.161739213299841e-05,
"loss": 6.4218,
"step": 329
},
{
"epoch": 0.64,
"learning_rate": 9.074865554000161e-05,
"loss": 6.3434,
"step": 330
},
{
"epoch": 0.64,
"learning_rate": 8.988226747330973e-05,
"loss": 6.2743,
"step": 331
},
{
"epoch": 0.64,
"learning_rate": 8.90182622736654e-05,
"loss": 6.4393,
"step": 332
},
{
"epoch": 0.65,
"learning_rate": 8.815667418736217e-05,
"loss": 6.3851,
"step": 333
},
{
"epoch": 0.65,
"learning_rate": 8.729753736488734e-05,
"loss": 6.3961,
"step": 334
},
{
"epoch": 0.65,
"learning_rate": 8.644088585956816e-05,
"loss": 6.4392,
"step": 335
},
{
"epoch": 0.65,
"learning_rate": 8.558675362622229e-05,
"loss": 6.3019,
"step": 336
},
{
"epoch": 0.65,
"learning_rate": 8.473517451981186e-05,
"loss": 6.4448,
"step": 337
},
{
"epoch": 0.66,
"learning_rate": 8.38861822941013e-05,
"loss": 6.3658,
"step": 338
},
{
"epoch": 0.66,
"learning_rate": 8.303981060031993e-05,
"loss": 6.3605,
"step": 339
},
{
"epoch": 0.66,
"learning_rate": 8.219609298582788e-05,
"loss": 6.374,
"step": 340
},
{
"epoch": 0.66,
"learning_rate": 8.135506289278618e-05,
"loss": 6.4577,
"step": 341
},
{
"epoch": 0.66,
"learning_rate": 8.051675365683163e-05,
"loss": 6.3441,
"step": 342
},
{
"epoch": 0.66,
"learning_rate": 7.968119850575538e-05,
"loss": 6.2837,
"step": 343
},
{
"epoch": 0.67,
"learning_rate": 7.884843055818558e-05,
"loss": 6.4202,
"step": 344
},
{
"epoch": 0.67,
"learning_rate": 7.801848282227504e-05,
"loss": 6.3048,
"step": 345
},
{
"epoch": 0.67,
"learning_rate": 7.719138819439281e-05,
"loss": 6.2596,
"step": 346
},
{
"epoch": 0.67,
"learning_rate": 7.636717945782003e-05,
"loss": 6.396,
"step": 347
},
{
"epoch": 0.67,
"learning_rate": 7.554588928145088e-05,
"loss": 6.4539,
"step": 348
},
{
"epoch": 0.68,
"learning_rate": 7.47275502184975e-05,
"loss": 6.391,
"step": 349
},
{
"epoch": 0.68,
"learning_rate": 7.391219470519957e-05,
"loss": 6.3495,
"step": 350
},
{
"epoch": 0.68,
"learning_rate": 7.309985505953892e-05,
"loss": 6.5338,
"step": 351
},
{
"epoch": 0.68,
"learning_rate": 7.229056347995841e-05,
"loss": 6.5981,
"step": 352
},
{
"epoch": 0.68,
"learning_rate": 7.148435204408557e-05,
"loss": 6.4835,
"step": 353
},
{
"epoch": 0.69,
"learning_rate": 7.068125270746138e-05,
"loss": 6.5864,
"step": 354
},
{
"epoch": 0.69,
"learning_rate": 6.98812973022736e-05,
"loss": 6.6236,
"step": 355
},
{
"epoch": 0.69,
"learning_rate": 6.908451753609481e-05,
"loss": 6.6355,
"step": 356
},
{
"epoch": 0.69,
"learning_rate": 6.829094499062603e-05,
"loss": 6.7108,
"step": 357
},
{
"epoch": 0.69,
"learning_rate": 6.750061112044455e-05,
"loss": 6.6534,
"step": 358
},
{
"epoch": 0.7,
"learning_rate": 6.671354725175742e-05,
"loss": 6.7027,
"step": 359
},
{
"epoch": 0.7,
"learning_rate": 6.592978458115978e-05,
"loss": 6.7136,
"step": 360
},
{
"epoch": 0.7,
"learning_rate": 6.514935417439802e-05,
"loss": 6.6459,
"step": 361
},
{
"epoch": 0.7,
"learning_rate": 6.43722869651389e-05,
"loss": 6.7461,
"step": 362
},
{
"epoch": 0.7,
"learning_rate": 6.359861375374315e-05,
"loss": 6.697,
"step": 363
},
{
"epoch": 0.71,
"learning_rate": 6.28283652060446e-05,
"loss": 6.8183,
"step": 364
},
{
"epoch": 0.71,
"learning_rate": 6.206157185213493e-05,
"loss": 6.8525,
"step": 365
},
{
"epoch": 0.71,
"learning_rate": 6.12982640851534e-05,
"loss": 6.9362,
"step": 366
},
{
"epoch": 0.71,
"learning_rate": 6.0538472160082105e-05,
"loss": 6.7913,
"step": 367
},
{
"epoch": 0.71,
"learning_rate": 5.978222619254696e-05,
"loss": 6.6714,
"step": 368
},
{
"epoch": 0.72,
"learning_rate": 5.902955615762392e-05,
"loss": 6.8751,
"step": 369
},
{
"epoch": 0.72,
"learning_rate": 5.828049188865071e-05,
"loss": 6.718,
"step": 370
},
{
"epoch": 0.72,
"learning_rate": 5.753506307604464e-05,
"loss": 6.8452,
"step": 371
},
{
"epoch": 0.72,
"learning_rate": 5.67932992661256e-05,
"loss": 6.8406,
"step": 372
},
{
"epoch": 0.72,
"learning_rate": 5.605522985994481e-05,
"loss": 6.8587,
"step": 373
},
{
"epoch": 0.72,
"learning_rate": 5.5320884112119776e-05,
"loss": 6.9518,
"step": 374
},
{
"epoch": 0.73,
"learning_rate": 5.4590291129674564e-05,
"loss": 7.0429,
"step": 375
},
{
"epoch": 0.73,
"learning_rate": 5.386347987088592e-05,
"loss": 6.7883,
"step": 376
},
{
"epoch": 0.73,
"learning_rate": 5.314047914413587e-05,
"loss": 6.7102,
"step": 377
},
{
"epoch": 0.73,
"learning_rate": 5.2421317606769455e-05,
"loss": 6.678,
"step": 378
},
{
"epoch": 0.73,
"learning_rate": 5.1706023763959004e-05,
"loss": 6.9334,
"step": 379
},
{
"epoch": 0.74,
"learning_rate": 5.099462596757441e-05,
"loss": 6.817,
"step": 380
},
{
"epoch": 0.74,
"learning_rate": 5.0287152415059226e-05,
"loss": 6.8474,
"step": 381
},
{
"epoch": 0.74,
"learning_rate": 4.958363114831286e-05,
"loss": 6.7632,
"step": 382
},
{
"epoch": 0.74,
"learning_rate": 4.888409005257946e-05,
"loss": 6.6268,
"step": 383
},
{
"epoch": 0.74,
"learning_rate": 4.8188556855342355e-05,
"loss": 6.5044,
"step": 384
},
{
"epoch": 0.75,
"learning_rate": 4.749705912522501e-05,
"loss": 6.7882,
"step": 385
},
{
"epoch": 0.75,
"learning_rate": 4.680962427089849e-05,
"loss": 6.6334,
"step": 386
},
{
"epoch": 0.75,
"learning_rate": 4.6126279539995005e-05,
"loss": 6.8479,
"step": 387
},
{
"epoch": 0.75,
"learning_rate": 4.544705201802772e-05,
"loss": 6.6996,
"step": 388
},
{
"epoch": 0.75,
"learning_rate": 4.477196862731747e-05,
"loss": 6.6673,
"step": 389
},
{
"epoch": 0.76,
"learning_rate": 4.41010561259255e-05,
"loss": 6.6098,
"step": 390
},
{
"epoch": 0.76,
"learning_rate": 4.343434110659271e-05,
"loss": 6.8179,
"step": 391
},
{
"epoch": 0.76,
"learning_rate": 4.277184999568594e-05,
"loss": 6.7396,
"step": 392
},
{
"epoch": 0.76,
"learning_rate": 4.2113609052150335e-05,
"loss": 6.5957,
"step": 393
},
{
"epoch": 0.76,
"learning_rate": 4.145964436646837e-05,
"loss": 6.5761,
"step": 394
},
{
"epoch": 0.77,
"learning_rate": 4.080998185962606e-05,
"loss": 6.6821,
"step": 395
},
{
"epoch": 0.77,
"learning_rate": 4.0164647282085296e-05,
"loss": 6.6868,
"step": 396
},
{
"epoch": 0.77,
"learning_rate": 3.9523666212763166e-05,
"loss": 6.7086,
"step": 397
},
{
"epoch": 0.77,
"learning_rate": 3.8887064058018244e-05,
"loss": 6.6119,
"step": 398
},
{
"epoch": 0.77,
"learning_rate": 3.825486605064354e-05,
"loss": 6.5869,
"step": 399
},
{
"epoch": 0.78,
"learning_rate": 3.7627097248866136e-05,
"loss": 6.8201,
"step": 400
},
{
"epoch": 0.78,
"eval_loss": 6.829553127288818,
"eval_runtime": 0.4568,
"eval_samples_per_second": 146.658,
"eval_steps_per_second": 10.945,
"step": 400
},
{
"epoch": 0.78,
"learning_rate": 3.700378253535427e-05,
"loss": 6.584,
"step": 401
},
{
"epoch": 0.78,
"learning_rate": 3.6384946616230933e-05,
"loss": 6.6932,
"step": 402
},
{
"epoch": 0.78,
"learning_rate": 3.577061402009446e-05,
"loss": 6.6921,
"step": 403
},
{
"epoch": 0.78,
"learning_rate": 3.5160809097046586e-05,
"loss": 6.6652,
"step": 404
},
{
"epoch": 0.79,
"learning_rate": 3.4555556017727096e-05,
"loss": 6.5146,
"step": 405
},
{
"epoch": 0.79,
"learning_rate": 3.395487877235575e-05,
"loss": 6.5751,
"step": 406
},
{
"epoch": 0.79,
"learning_rate": 3.335880116978154e-05,
"loss": 6.4599,
"step": 407
},
{
"epoch": 0.79,
"learning_rate": 3.276734683653894e-05,
"loss": 6.5829,
"step": 408
},
{
"epoch": 0.79,
"learning_rate": 3.2180539215911254e-05,
"loss": 6.7763,
"step": 409
},
{
"epoch": 0.79,
"learning_rate": 3.15984015670017e-05,
"loss": 6.6207,
"step": 410
},
{
"epoch": 0.8,
"learning_rate": 3.1020956963811285e-05,
"loss": 6.6804,
"step": 411
},
{
"epoch": 0.8,
"learning_rate": 3.0448228294324255e-05,
"loss": 6.754,
"step": 412
},
{
"epoch": 0.8,
"learning_rate": 2.988023825960095e-05,
"loss": 6.6705,
"step": 413
},
{
"epoch": 0.8,
"learning_rate": 2.9317009372878037e-05,
"loss": 6.6933,
"step": 414
},
{
"epoch": 0.8,
"learning_rate": 2.8758563958675974e-05,
"loss": 6.7342,
"step": 415
},
{
"epoch": 0.81,
"learning_rate": 2.8204924151914428e-05,
"loss": 6.6217,
"step": 416
},
{
"epoch": 0.81,
"learning_rate": 2.765611189703461e-05,
"loss": 6.4585,
"step": 417
},
{
"epoch": 0.81,
"learning_rate": 2.7112148947129736e-05,
"loss": 6.5491,
"step": 418
},
{
"epoch": 0.81,
"learning_rate": 2.6573056863082698e-05,
"loss": 6.5655,
"step": 419
},
{
"epoch": 0.81,
"learning_rate": 2.603885701271133e-05,
"loss": 6.7607,
"step": 420
},
{
"epoch": 0.82,
"learning_rate": 2.550957056992174e-05,
"loss": 6.6543,
"step": 421
},
{
"epoch": 0.82,
"learning_rate": 2.498521851386886e-05,
"loss": 6.5007,
"step": 422
},
{
"epoch": 0.82,
"learning_rate": 2.4465821628124837e-05,
"loss": 6.6124,
"step": 423
},
{
"epoch": 0.82,
"learning_rate": 2.3951400499855446e-05,
"loss": 6.5528,
"step": 424
},
{
"epoch": 0.82,
"learning_rate": 2.344197551900398e-05,
"loss": 6.7489,
"step": 425
},
{
"epoch": 0.83,
"learning_rate": 2.293756687748297e-05,
"loss": 6.6461,
"step": 426
},
{
"epoch": 0.83,
"learning_rate": 2.2438194568374007e-05,
"loss": 6.6335,
"step": 427
},
{
"epoch": 0.83,
"learning_rate": 2.1943878385135227e-05,
"loss": 6.5654,
"step": 428
},
{
"epoch": 0.83,
"learning_rate": 2.1454637920816646e-05,
"loss": 6.5755,
"step": 429
},
{
"epoch": 0.83,
"learning_rate": 2.0970492567283765e-05,
"loss": 6.6072,
"step": 430
},
{
"epoch": 0.84,
"learning_rate": 2.0491461514448803e-05,
"loss": 6.578,
"step": 431
},
{
"epoch": 0.84,
"learning_rate": 2.001756374951006e-05,
"loss": 6.6676,
"step": 432
},
{
"epoch": 0.84,
"learning_rate": 1.9548818056199377e-05,
"loss": 6.4739,
"step": 433
},
{
"epoch": 0.84,
"learning_rate": 1.908524301403764e-05,
"loss": 6.6598,
"step": 434
},
{
"epoch": 0.84,
"learning_rate": 1.8626856997598355e-05,
"loss": 6.5202,
"step": 435
},
{
"epoch": 0.85,
"learning_rate": 1.817367817577915e-05,
"loss": 6.5515,
"step": 436
},
{
"epoch": 0.85,
"learning_rate": 1.7725724511081924e-05,
"loss": 6.4875,
"step": 437
},
{
"epoch": 0.85,
"learning_rate": 1.72830137589007e-05,
"loss": 6.6304,
"step": 438
},
{
"epoch": 0.85,
"learning_rate": 1.6845563466817745e-05,
"loss": 6.6853,
"step": 439
},
{
"epoch": 0.85,
"learning_rate": 1.6413390973908342e-05,
"loss": 6.6227,
"step": 440
},
{
"epoch": 0.85,
"learning_rate": 1.5986513410053247e-05,
"loss": 6.8159,
"step": 441
},
{
"epoch": 0.86,
"learning_rate": 1.556494769525991e-05,
"loss": 6.578,
"step": 442
},
{
"epoch": 0.86,
"learning_rate": 1.5148710538991727e-05,
"loss": 6.5347,
"step": 443
},
{
"epoch": 0.86,
"learning_rate": 1.4737818439505656e-05,
"loss": 6.8032,
"step": 444
},
{
"epoch": 0.86,
"learning_rate": 1.433228768319853e-05,
"loss": 6.6454,
"step": 445
},
{
"epoch": 0.86,
"learning_rate": 1.3932134343961265e-05,
"loss": 6.5615,
"step": 446
},
{
"epoch": 0.87,
"learning_rate": 1.3537374282541847e-05,
"loss": 6.5985,
"step": 447
},
{
"epoch": 0.87,
"learning_rate": 1.314802314591667e-05,
"loss": 6.5956,
"step": 448
},
{
"epoch": 0.87,
"learning_rate": 1.276409636667038e-05,
"loss": 6.6768,
"step": 449
},
{
"epoch": 0.87,
"learning_rate": 1.2385609162384019e-05,
"loss": 6.5845,
"step": 450
},
{
"epoch": 0.87,
"learning_rate": 1.2012576535032087e-05,
"loss": 6.513,
"step": 451
},
{
"epoch": 0.88,
"learning_rate": 1.1645013270387738e-05,
"loss": 6.6015,
"step": 452
},
{
"epoch": 0.88,
"learning_rate": 1.1282933937436721e-05,
"loss": 6.5111,
"step": 453
},
{
"epoch": 0.88,
"learning_rate": 1.0926352887800033e-05,
"loss": 6.5472,
"step": 454
},
{
"epoch": 0.88,
"learning_rate": 1.0575284255164989e-05,
"loss": 6.7356,
"step": 455
},
{
"epoch": 0.88,
"learning_rate": 1.022974195472499e-05,
"loss": 6.7244,
"step": 456
},
{
"epoch": 0.89,
"learning_rate": 9.889739682628034e-06,
"loss": 6.7113,
"step": 457
},
{
"epoch": 0.89,
"learning_rate": 9.555290915433821e-06,
"loss": 6.6869,
"step": 458
},
{
"epoch": 0.89,
"learning_rate": 9.226408909579519e-06,
"loss": 6.7583,
"step": 459
},
{
"epoch": 0.89,
"learning_rate": 8.903106700854423e-06,
"loss": 6.687,
"step": 460
},
{
"epoch": 0.89,
"learning_rate": 8.585397103883296e-06,
"loss": 6.6874,
"step": 461
},
{
"epoch": 0.9,
"learning_rate": 8.2732927116182e-06,
"loss": 6.5792,
"step": 462
},
{
"epoch": 0.9,
"learning_rate": 7.966805894839656e-06,
"loss": 6.7618,
"step": 463
},
{
"epoch": 0.9,
"learning_rate": 7.665948801666139e-06,
"loss": 6.9631,
"step": 464
},
{
"epoch": 0.9,
"learning_rate": 7.370733357072539e-06,
"loss": 6.6466,
"step": 465
},
{
"epoch": 0.9,
"learning_rate": 7.081171262417606e-06,
"loss": 6.5379,
"step": 466
},
{
"epoch": 0.91,
"learning_rate": 6.797273994980118e-06,
"loss": 6.6883,
"step": 467
},
{
"epoch": 0.91,
"learning_rate": 6.5190528075038436e-06,
"loss": 6.6152,
"step": 468
},
{
"epoch": 0.91,
"learning_rate": 6.246518727751704e-06,
"loss": 6.5607,
"step": 469
},
{
"epoch": 0.91,
"learning_rate": 5.979682558068566e-06,
"loss": 6.9042,
"step": 470
},
{
"epoch": 0.91,
"learning_rate": 5.718554874952991e-06,
"loss": 6.5923,
"step": 471
},
{
"epoch": 0.91,
"learning_rate": 5.463146028638249e-06,
"loss": 6.5547,
"step": 472
},
{
"epoch": 0.92,
"learning_rate": 5.213466142681832e-06,
"loss": 6.5457,
"step": 473
},
{
"epoch": 0.92,
"learning_rate": 4.969525113564327e-06,
"loss": 6.8205,
"step": 474
},
{
"epoch": 0.92,
"learning_rate": 4.7313326102971225e-06,
"loss": 6.5781,
"step": 475
},
{
"epoch": 0.92,
"learning_rate": 4.498898074039126e-06,
"loss": 6.6173,
"step": 476
},
{
"epoch": 0.92,
"learning_rate": 4.272230717722602e-06,
"loss": 6.6039,
"step": 477
},
{
"epoch": 0.93,
"learning_rate": 4.051339525687991e-06,
"loss": 6.7091,
"step": 478
},
{
"epoch": 0.93,
"learning_rate": 3.83623325332772e-06,
"loss": 6.7178,
"step": 479
},
{
"epoch": 0.93,
"learning_rate": 3.6269204267392825e-06,
"loss": 6.6408,
"step": 480
},
{
"epoch": 0.93,
"learning_rate": 3.4234093423872786e-06,
"loss": 6.548,
"step": 481
},
{
"epoch": 0.93,
"learning_rate": 3.2257080667744407e-06,
"loss": 6.8339,
"step": 482
},
{
"epoch": 0.94,
"learning_rate": 3.0338244361220564e-06,
"loss": 6.6542,
"step": 483
},
{
"epoch": 0.94,
"learning_rate": 2.8477660560593196e-06,
"loss": 6.4732,
"step": 484
},
{
"epoch": 0.94,
"learning_rate": 2.6675403013218355e-06,
"loss": 6.7991,
"step": 485
},
{
"epoch": 0.94,
"learning_rate": 2.4931543154593223e-06,
"loss": 6.7538,
"step": 486
},
{
"epoch": 0.94,
"learning_rate": 2.3246150105525054e-06,
"loss": 6.5082,
"step": 487
},
{
"epoch": 0.95,
"learning_rate": 2.161929066939083e-06,
"loss": 6.8893,
"step": 488
},
{
"epoch": 0.95,
"learning_rate": 2.005102932948993e-06,
"loss": 6.639,
"step": 489
},
{
"epoch": 0.95,
"learning_rate": 1.8541428246487966e-06,
"loss": 6.6773,
"step": 490
},
{
"epoch": 0.95,
"learning_rate": 1.7090547255952935e-06,
"loss": 6.6149,
"step": 491
},
{
"epoch": 0.95,
"learning_rate": 1.5698443865983789e-06,
"loss": 6.6495,
"step": 492
},
{
"epoch": 0.96,
"learning_rate": 1.4365173254930585e-06,
"loss": 6.7779,
"step": 493
},
{
"epoch": 0.96,
"learning_rate": 1.309078826920773e-06,
"loss": 6.7067,
"step": 494
},
{
"epoch": 0.96,
"learning_rate": 1.1875339421199004e-06,
"loss": 6.6096,
"step": 495
},
{
"epoch": 0.96,
"learning_rate": 1.0718874887256146e-06,
"loss": 6.4771,
"step": 496
},
{
"epoch": 0.96,
"learning_rate": 9.621440505788225e-07,
"loss": 6.7159,
"step": 497
},
{
"epoch": 0.97,
"learning_rate": 8.583079775445423e-07,
"loss": 6.5673,
"step": 498
},
{
"epoch": 0.97,
"learning_rate": 7.603833853395247e-07,
"loss": 6.6136,
"step": 499
},
{
"epoch": 0.97,
"learning_rate": 6.68374155369017e-07,
"loss": 6.6013,
"step": 500
},
{
"epoch": 0.97,
"eval_loss": 6.8933210372924805,
"eval_runtime": 0.4586,
"eval_samples_per_second": 146.102,
"eval_steps_per_second": 10.903,
"step": 500
}
],
"logging_steps": 1,
"max_steps": 515,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 4108329021603840.0,
"trial_name": null,
"trial_params": null
}