|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6788866259334692, |
|
"eval_steps": 125, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0013577732518669382, |
|
"eval_loss": 1.933146357536316, |
|
"eval_runtime": 7.8521, |
|
"eval_samples_per_second": 158.046, |
|
"eval_steps_per_second": 19.867, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006788866259334691, |
|
"grad_norm": 0.1681375950574875, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7982, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013577732518669382, |
|
"grad_norm": 0.18828649818897247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7923, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020366598778004074, |
|
"grad_norm": 0.2104337513446808, |
|
"learning_rate": 9.99743108100344e-05, |
|
"loss": 1.8487, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.027155465037338764, |
|
"grad_norm": 0.2100468873977661, |
|
"learning_rate": 9.989726963751682e-05, |
|
"loss": 1.7487, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03394433129667346, |
|
"grad_norm": 0.21163082122802734, |
|
"learning_rate": 9.976895564745991e-05, |
|
"loss": 1.8533, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04073319755600815, |
|
"grad_norm": 0.23047874867916107, |
|
"learning_rate": 9.95895006911623e-05, |
|
"loss": 1.7436, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04752206381534284, |
|
"grad_norm": 0.2215753197669983, |
|
"learning_rate": 9.935908917072252e-05, |
|
"loss": 1.7594, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05431093007467753, |
|
"grad_norm": 0.25571998953819275, |
|
"learning_rate": 9.907795784955327e-05, |
|
"loss": 1.6957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06109979633401222, |
|
"grad_norm": 0.22950182855129242, |
|
"learning_rate": 9.874639560909117e-05, |
|
"loss": 1.6263, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06788866259334692, |
|
"grad_norm": 0.24856191873550415, |
|
"learning_rate": 9.836474315195147e-05, |
|
"loss": 1.6548, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0746775288526816, |
|
"grad_norm": 0.23954300582408905, |
|
"learning_rate": 9.793339265183303e-05, |
|
"loss": 1.6122, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0814663951120163, |
|
"grad_norm": 0.21579493582248688, |
|
"learning_rate": 9.745278735053343e-05, |
|
"loss": 1.6289, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08825526137135098, |
|
"grad_norm": 0.2218601107597351, |
|
"learning_rate": 9.692342110248802e-05, |
|
"loss": 1.6328, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09504412763068568, |
|
"grad_norm": 0.23692508041858673, |
|
"learning_rate": 9.63458378673011e-05, |
|
"loss": 1.5982, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10183299389002037, |
|
"grad_norm": 0.1912090927362442, |
|
"learning_rate": 9.572063115079063e-05, |
|
"loss": 1.6324, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10862186014935506, |
|
"grad_norm": 0.23047535121440887, |
|
"learning_rate": 9.504844339512095e-05, |
|
"loss": 1.4911, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11541072640868975, |
|
"grad_norm": 0.20515207946300507, |
|
"learning_rate": 9.432996531865002e-05, |
|
"loss": 1.5588, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12219959266802444, |
|
"grad_norm": 0.21330514550209045, |
|
"learning_rate": 9.356593520616948e-05, |
|
"loss": 1.4904, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12898845892735913, |
|
"grad_norm": 0.23363173007965088, |
|
"learning_rate": 9.275713815026731e-05, |
|
"loss": 1.6034, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13577732518669383, |
|
"grad_norm": 0.17454369366168976, |
|
"learning_rate": 9.190440524459203e-05, |
|
"loss": 1.5246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1425661914460285, |
|
"grad_norm": 0.18648678064346313, |
|
"learning_rate": 9.10086127298478e-05, |
|
"loss": 1.5047, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1493550577053632, |
|
"grad_norm": 0.19136574864387512, |
|
"learning_rate": 9.007068109339784e-05, |
|
"loss": 1.5417, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1561439239646979, |
|
"grad_norm": 0.22987370193004608, |
|
"learning_rate": 8.90915741234015e-05, |
|
"loss": 1.5261, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1629327902240326, |
|
"grad_norm": 0.20558615028858185, |
|
"learning_rate": 8.807229791845673e-05, |
|
"loss": 1.5108, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1697216564833673, |
|
"grad_norm": 0.22430284321308136, |
|
"learning_rate": 8.701389985376578e-05, |
|
"loss": 1.533, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1697216564833673, |
|
"eval_loss": 1.5494850873947144, |
|
"eval_runtime": 7.7808, |
|
"eval_samples_per_second": 159.495, |
|
"eval_steps_per_second": 20.049, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17651052274270196, |
|
"grad_norm": 0.20106786489486694, |
|
"learning_rate": 8.591746750488639e-05, |
|
"loss": 1.538, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18329938900203666, |
|
"grad_norm": 0.21852199733257294, |
|
"learning_rate": 8.478412753017433e-05, |
|
"loss": 1.5041, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19008825526137135, |
|
"grad_norm": 0.21438615024089813, |
|
"learning_rate": 8.361504451306585e-05, |
|
"loss": 1.4955, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19687712152070605, |
|
"grad_norm": 0.26983126997947693, |
|
"learning_rate": 8.241141976538943e-05, |
|
"loss": 1.5475, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.20366598778004075, |
|
"grad_norm": 0.21069645881652832, |
|
"learning_rate": 8.117449009293668e-05, |
|
"loss": 1.5053, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21045485403937542, |
|
"grad_norm": 0.19532069563865662, |
|
"learning_rate": 7.990552652456081e-05, |
|
"loss": 1.5054, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2172437202987101, |
|
"grad_norm": 0.24983985722064972, |
|
"learning_rate": 7.860583300610849e-05, |
|
"loss": 1.4672, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2240325865580448, |
|
"grad_norm": 0.23071178793907166, |
|
"learning_rate": 7.727674506052743e-05, |
|
"loss": 1.4741, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2308214528173795, |
|
"grad_norm": 0.22894631326198578, |
|
"learning_rate": 7.591962841552627e-05, |
|
"loss": 1.5716, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23761031907671418, |
|
"grad_norm": 0.2204715609550476, |
|
"learning_rate": 7.45358776001969e-05, |
|
"loss": 1.4912, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.24439918533604887, |
|
"grad_norm": 0.20580367743968964, |
|
"learning_rate": 7.312691451204178e-05, |
|
"loss": 1.5067, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.25118805159538354, |
|
"grad_norm": 0.22472696006298065, |
|
"learning_rate": 7.169418695587791e-05, |
|
"loss": 1.5726, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.25797691785471827, |
|
"grad_norm": 0.27027320861816406, |
|
"learning_rate": 7.023916715611969e-05, |
|
"loss": 1.4581, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.26476578411405294, |
|
"grad_norm": 0.23604366183280945, |
|
"learning_rate": 6.876335024396872e-05, |
|
"loss": 1.471, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.27155465037338766, |
|
"grad_norm": 0.2352292388677597, |
|
"learning_rate": 6.726825272106538e-05, |
|
"loss": 1.4378, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27834351663272233, |
|
"grad_norm": 0.2463415563106537, |
|
"learning_rate": 6.575541090118105e-05, |
|
"loss": 1.4849, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.285132382892057, |
|
"grad_norm": 0.2296827733516693, |
|
"learning_rate": 6.422637933155162e-05, |
|
"loss": 1.3912, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2919212491513917, |
|
"grad_norm": 0.2640281915664673, |
|
"learning_rate": 6.268272919547537e-05, |
|
"loss": 1.4078, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2987101154107264, |
|
"grad_norm": 0.31660768389701843, |
|
"learning_rate": 6.112604669781572e-05, |
|
"loss": 1.4462, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3054989816700611, |
|
"grad_norm": 0.22085396945476532, |
|
"learning_rate": 5.955793143506863e-05, |
|
"loss": 1.4574, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3122878479293958, |
|
"grad_norm": 0.24354016780853271, |
|
"learning_rate": 5.7979994751668964e-05, |
|
"loss": 1.5296, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31907671418873046, |
|
"grad_norm": 0.24986490607261658, |
|
"learning_rate": 5.6393858084225305e-05, |
|
"loss": 1.4725, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3258655804480652, |
|
"grad_norm": 0.2531109154224396, |
|
"learning_rate": 5.480115129538409e-05, |
|
"loss": 1.4542, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33265444670739985, |
|
"grad_norm": 0.19950221478939056, |
|
"learning_rate": 5.320351099903565e-05, |
|
"loss": 1.4582, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3394433129667346, |
|
"grad_norm": 0.2342836707830429, |
|
"learning_rate": 5.1602578878582776e-05, |
|
"loss": 1.4385, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3394433129667346, |
|
"eval_loss": 1.4789527654647827, |
|
"eval_runtime": 7.722, |
|
"eval_samples_per_second": 160.71, |
|
"eval_steps_per_second": 20.202, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.34623217922606925, |
|
"grad_norm": 0.22694501280784607, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4285, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.3530210454854039, |
|
"grad_norm": 0.23925504088401794, |
|
"learning_rate": 4.839742112141724e-05, |
|
"loss": 1.4588, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35980991174473864, |
|
"grad_norm": 0.2735687494277954, |
|
"learning_rate": 4.679648900096436e-05, |
|
"loss": 1.4691, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3665987780040733, |
|
"grad_norm": 0.29210373759269714, |
|
"learning_rate": 4.5198848704615914e-05, |
|
"loss": 1.441, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37338764426340804, |
|
"grad_norm": 0.2414471060037613, |
|
"learning_rate": 4.3606141915774693e-05, |
|
"loss": 1.4356, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3801765105227427, |
|
"grad_norm": 0.23838582634925842, |
|
"learning_rate": 4.2020005248331054e-05, |
|
"loss": 1.4412, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3869653767820774, |
|
"grad_norm": 0.22352631390094757, |
|
"learning_rate": 4.04420685649314e-05, |
|
"loss": 1.459, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3937542430414121, |
|
"grad_norm": 0.25490501523017883, |
|
"learning_rate": 3.887395330218429e-05, |
|
"loss": 1.4208, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.40054310930074677, |
|
"grad_norm": 0.26434803009033203, |
|
"learning_rate": 3.731727080452464e-05, |
|
"loss": 1.4264, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4073319755600815, |
|
"grad_norm": 0.23617196083068848, |
|
"learning_rate": 3.5773620668448384e-05, |
|
"loss": 1.3935, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41412084181941616, |
|
"grad_norm": 0.23081578314304352, |
|
"learning_rate": 3.424458909881897e-05, |
|
"loss": 1.4113, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.42090970807875083, |
|
"grad_norm": 0.24254988133907318, |
|
"learning_rate": 3.273174727893463e-05, |
|
"loss": 1.4908, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.42769857433808556, |
|
"grad_norm": 0.21009749174118042, |
|
"learning_rate": 3.12366497560313e-05, |
|
"loss": 1.4607, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.4344874405974202, |
|
"grad_norm": 0.2565441131591797, |
|
"learning_rate": 2.976083284388031e-05, |
|
"loss": 1.4664, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4412763068567549, |
|
"grad_norm": 0.2448188215494156, |
|
"learning_rate": 2.8305813044122097e-05, |
|
"loss": 1.4369, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4480651731160896, |
|
"grad_norm": 0.19102215766906738, |
|
"learning_rate": 2.687308548795825e-05, |
|
"loss": 1.3941, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4548540393754243, |
|
"grad_norm": 0.23744595050811768, |
|
"learning_rate": 2.5464122399803125e-05, |
|
"loss": 1.4053, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.461642905634759, |
|
"grad_norm": 0.25465700030326843, |
|
"learning_rate": 2.4080371584473748e-05, |
|
"loss": 1.4054, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4684317718940937, |
|
"grad_norm": 0.26104259490966797, |
|
"learning_rate": 2.272325493947257e-05, |
|
"loss": 1.4511, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.47522063815342835, |
|
"grad_norm": 0.22542956471443176, |
|
"learning_rate": 2.139416699389153e-05, |
|
"loss": 1.4673, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4820095044127631, |
|
"grad_norm": 0.2605678141117096, |
|
"learning_rate": 2.0094473475439202e-05, |
|
"loss": 1.3932, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.48879837067209775, |
|
"grad_norm": 0.24848726391792297, |
|
"learning_rate": 1.8825509907063327e-05, |
|
"loss": 1.4204, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4955872369314325, |
|
"grad_norm": 0.2906915843486786, |
|
"learning_rate": 1.758858023461059e-05, |
|
"loss": 1.3734, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5023761031907671, |
|
"grad_norm": 0.25691258907318115, |
|
"learning_rate": 1.6384955486934156e-05, |
|
"loss": 1.4086, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5091649694501018, |
|
"grad_norm": 0.2475077509880066, |
|
"learning_rate": 1.5215872469825682e-05, |
|
"loss": 1.4393, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5091649694501018, |
|
"eval_loss": 1.4588358402252197, |
|
"eval_runtime": 7.7286, |
|
"eval_samples_per_second": 160.573, |
|
"eval_steps_per_second": 20.185, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5159538357094365, |
|
"grad_norm": 0.3304094076156616, |
|
"learning_rate": 1.4082532495113626e-05, |
|
"loss": 1.4469, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5227427019687713, |
|
"grad_norm": 0.2766299545764923, |
|
"learning_rate": 1.2986100146234232e-05, |
|
"loss": 1.357, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5295315682281059, |
|
"grad_norm": 0.29647424817085266, |
|
"learning_rate": 1.1927702081543279e-05, |
|
"loss": 1.47, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5363204344874406, |
|
"grad_norm": 0.2745124399662018, |
|
"learning_rate": 1.090842587659851e-05, |
|
"loss": 1.398, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5431093007467753, |
|
"grad_norm": 0.2885884642601013, |
|
"learning_rate": 9.929318906602175e-06, |
|
"loss": 1.4597, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5498981670061099, |
|
"grad_norm": 0.24886402487754822, |
|
"learning_rate": 8.991387270152201e-06, |
|
"loss": 1.4669, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5566870332654447, |
|
"grad_norm": 0.2334742695093155, |
|
"learning_rate": 8.09559475540797e-06, |
|
"loss": 1.4107, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5634758995247794, |
|
"grad_norm": 0.27737849950790405, |
|
"learning_rate": 7.242861849732696e-06, |
|
"loss": 1.5048, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.570264765784114, |
|
"grad_norm": 0.23328500986099243, |
|
"learning_rate": 6.43406479383053e-06, |
|
"loss": 1.4487, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5770536320434487, |
|
"grad_norm": 0.32235902547836304, |
|
"learning_rate": 5.670034681349995e-06, |
|
"loss": 1.4791, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5838424983027835, |
|
"grad_norm": 0.3034408688545227, |
|
"learning_rate": 4.951556604879048e-06, |
|
"loss": 1.4643, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5906313645621182, |
|
"grad_norm": 0.24768030643463135, |
|
"learning_rate": 4.279368849209381e-06, |
|
"loss": 1.4803, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.5974202308214528, |
|
"grad_norm": 0.23609156906604767, |
|
"learning_rate": 3.654162132698918e-06, |
|
"loss": 1.3877, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6042090970807875, |
|
"grad_norm": 0.25662508606910706, |
|
"learning_rate": 3.076578897511978e-06, |
|
"loss": 1.4758, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6109979633401222, |
|
"grad_norm": 0.24194221198558807, |
|
"learning_rate": 2.547212649466568e-06, |
|
"loss": 1.4027, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6177868295994569, |
|
"grad_norm": 0.3545863628387451, |
|
"learning_rate": 2.066607348166971e-06, |
|
"loss": 1.4078, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6245756958587916, |
|
"grad_norm": 0.2740108072757721, |
|
"learning_rate": 1.6352568480485276e-06, |
|
"loss": 1.4463, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6313645621181263, |
|
"grad_norm": 0.2730073630809784, |
|
"learning_rate": 1.2536043909088191e-06, |
|
"loss": 1.4089, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6381534283774609, |
|
"grad_norm": 0.2379709631204605, |
|
"learning_rate": 9.220421504467281e-07, |
|
"loss": 1.4428, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6449422946367956, |
|
"grad_norm": 0.2579050362110138, |
|
"learning_rate": 6.409108292774913e-07, |
|
"loss": 1.4112, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6517311608961304, |
|
"grad_norm": 0.2765941917896271, |
|
"learning_rate": 4.104993088376974e-07, |
|
"loss": 1.4722, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.658520027155465, |
|
"grad_norm": 0.27427244186401367, |
|
"learning_rate": 2.310443525400885e-07, |
|
"loss": 1.3907, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6653088934147997, |
|
"grad_norm": 0.2611384689807892, |
|
"learning_rate": 1.0273036248318324e-07, |
|
"loss": 1.4292, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6720977596741344, |
|
"grad_norm": 0.2796425223350525, |
|
"learning_rate": 2.568918996560532e-08, |
|
"loss": 1.4264, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.6788866259334692, |
|
"grad_norm": 0.21907183527946472, |
|
"learning_rate": 0.0, |
|
"loss": 1.4451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6788866259334692, |
|
"eval_loss": 1.4559390544891357, |
|
"eval_runtime": 7.7312, |
|
"eval_samples_per_second": 160.519, |
|
"eval_steps_per_second": 20.178, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 125, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0682976996163584e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|