|
{ |
|
"best_metric": 5.203610897064209, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 2.030456852791878, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01015228426395939, |
|
"grad_norm": 0.46062880754470825, |
|
"learning_rate": 5e-06, |
|
"loss": 5.3841, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01015228426395939, |
|
"eval_loss": 5.5305962562561035, |
|
"eval_runtime": 8.7087, |
|
"eval_samples_per_second": 19.061, |
|
"eval_steps_per_second": 9.531, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02030456852791878, |
|
"grad_norm": 0.5978866815567017, |
|
"learning_rate": 1e-05, |
|
"loss": 5.4053, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.030456852791878174, |
|
"grad_norm": 0.4448310434818268, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.3689, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.04060913705583756, |
|
"grad_norm": 0.507191002368927, |
|
"learning_rate": 2e-05, |
|
"loss": 5.4424, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.050761421319796954, |
|
"grad_norm": 0.4688023626804352, |
|
"learning_rate": 2.5e-05, |
|
"loss": 5.4252, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06091370558375635, |
|
"grad_norm": 0.4471573531627655, |
|
"learning_rate": 3e-05, |
|
"loss": 5.3927, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07106598984771574, |
|
"grad_norm": 0.45171481370925903, |
|
"learning_rate": 3.5e-05, |
|
"loss": 5.3751, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.08121827411167512, |
|
"grad_norm": 0.6176170706748962, |
|
"learning_rate": 4e-05, |
|
"loss": 5.4215, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.09137055837563451, |
|
"grad_norm": 0.5099933743476868, |
|
"learning_rate": 4.5e-05, |
|
"loss": 5.3422, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 0.48276761174201965, |
|
"learning_rate": 5e-05, |
|
"loss": 5.4263, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1116751269035533, |
|
"grad_norm": 0.5048362612724304, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 5.3427, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.1218274111675127, |
|
"grad_norm": 0.5130631923675537, |
|
"learning_rate": 6e-05, |
|
"loss": 5.3817, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1319796954314721, |
|
"grad_norm": 0.5273154377937317, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 5.3019, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.14213197969543148, |
|
"grad_norm": 0.523825466632843, |
|
"learning_rate": 7e-05, |
|
"loss": 5.2535, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.15228426395939088, |
|
"grad_norm": 0.6113125681877136, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 5.3172, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.16243654822335024, |
|
"grad_norm": 0.5640417337417603, |
|
"learning_rate": 8e-05, |
|
"loss": 5.2758, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.17258883248730963, |
|
"grad_norm": 0.4186924397945404, |
|
"learning_rate": 8.5e-05, |
|
"loss": 5.2586, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.18274111675126903, |
|
"grad_norm": 0.5193726420402527, |
|
"learning_rate": 9e-05, |
|
"loss": 5.2615, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.19289340101522842, |
|
"grad_norm": 0.5647014379501343, |
|
"learning_rate": 9.5e-05, |
|
"loss": 5.2652, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 0.5719868540763855, |
|
"learning_rate": 0.0001, |
|
"loss": 5.1955, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2131979695431472, |
|
"grad_norm": 0.6991127133369446, |
|
"learning_rate": 9.999238475781957e-05, |
|
"loss": 5.1646, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.2233502538071066, |
|
"grad_norm": 0.7082369327545166, |
|
"learning_rate": 9.99695413509548e-05, |
|
"loss": 5.1052, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.233502538071066, |
|
"grad_norm": 0.7457136511802673, |
|
"learning_rate": 9.99314767377287e-05, |
|
"loss": 5.1863, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.2436548223350254, |
|
"grad_norm": 1.6564850807189941, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 5.0987, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 5.510101318359375, |
|
"learning_rate": 9.980973490458728e-05, |
|
"loss": 5.5984, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2639593908629442, |
|
"grad_norm": 4.215838432312012, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 5.5124, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.27411167512690354, |
|
"grad_norm": 1.8195075988769531, |
|
"learning_rate": 9.962730758206611e-05, |
|
"loss": 5.4004, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.28426395939086296, |
|
"grad_norm": 1.5931973457336426, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 5.3794, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.29441624365482233, |
|
"grad_norm": 0.6483675241470337, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 5.3566, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 0.3973766267299652, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 5.2972, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3147208121827411, |
|
"grad_norm": 0.3534790277481079, |
|
"learning_rate": 9.908135917238321e-05, |
|
"loss": 5.2902, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3248730964467005, |
|
"grad_norm": 0.3970736861228943, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 5.2949, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3350253807106599, |
|
"grad_norm": 0.37264251708984375, |
|
"learning_rate": 9.871850323926177e-05, |
|
"loss": 5.2944, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.34517766497461927, |
|
"grad_norm": 0.3872509002685547, |
|
"learning_rate": 9.851478631379982e-05, |
|
"loss": 5.326, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3553299492385787, |
|
"grad_norm": 0.3853321969509125, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 5.2708, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.36548223350253806, |
|
"grad_norm": 0.39588385820388794, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 5.3228, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3756345177664975, |
|
"grad_norm": 0.4093726575374603, |
|
"learning_rate": 9.781523779815179e-05, |
|
"loss": 5.2831, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.38578680203045684, |
|
"grad_norm": 0.4420938491821289, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 5.2103, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.39593908629441626, |
|
"grad_norm": 0.5512685775756836, |
|
"learning_rate": 9.727592877996585e-05, |
|
"loss": 5.1775, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 0.3994300067424774, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 5.2478, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41624365482233505, |
|
"grad_norm": 0.5147640705108643, |
|
"learning_rate": 9.667902132486009e-05, |
|
"loss": 5.1743, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.4263959390862944, |
|
"grad_norm": 0.6237267851829529, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 5.2249, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4365482233502538, |
|
"grad_norm": 0.5305359363555908, |
|
"learning_rate": 9.602524267262203e-05, |
|
"loss": 5.2915, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.4467005076142132, |
|
"grad_norm": 0.4987878203392029, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 5.253, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.45685279187817257, |
|
"grad_norm": 0.6333546042442322, |
|
"learning_rate": 9.53153893518325e-05, |
|
"loss": 5.1612, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.467005076142132, |
|
"grad_norm": 0.7043673396110535, |
|
"learning_rate": 9.493970231495835e-05, |
|
"loss": 5.1424, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.47715736040609136, |
|
"grad_norm": 0.6457162499427795, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 5.1876, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.4873096446700508, |
|
"grad_norm": 0.9820389151573181, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 5.0792, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.49746192893401014, |
|
"grad_norm": 2.78731369972229, |
|
"learning_rate": 9.373098535696979e-05, |
|
"loss": 5.5062, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 2.0711777210235596, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 5.4469, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"eval_loss": 5.311061859130859, |
|
"eval_runtime": 8.7447, |
|
"eval_samples_per_second": 18.983, |
|
"eval_steps_per_second": 9.491, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5177664974619289, |
|
"grad_norm": 0.99236661195755, |
|
"learning_rate": 9.285836503510562e-05, |
|
"loss": 5.3363, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5279187817258884, |
|
"grad_norm": 0.48146185278892517, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 5.3438, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5380710659898477, |
|
"grad_norm": 0.33013349771499634, |
|
"learning_rate": 9.193352839727121e-05, |
|
"loss": 5.3068, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5482233502538071, |
|
"grad_norm": 0.37118765711784363, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 5.306, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5583756345177665, |
|
"grad_norm": 0.4362952411174774, |
|
"learning_rate": 9.09576022144496e-05, |
|
"loss": 5.2849, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5685279187817259, |
|
"grad_norm": 0.3819132149219513, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 5.3258, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5786802030456852, |
|
"grad_norm": 0.3916621804237366, |
|
"learning_rate": 8.993177550236464e-05, |
|
"loss": 5.3383, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5888324873096447, |
|
"grad_norm": 0.36510735750198364, |
|
"learning_rate": 8.940053768033609e-05, |
|
"loss": 5.2607, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5989847715736041, |
|
"grad_norm": 0.40830719470977783, |
|
"learning_rate": 8.885729807284856e-05, |
|
"loss": 5.3212, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 0.4307074248790741, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 5.2596, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6192893401015228, |
|
"grad_norm": 0.3730439245700836, |
|
"learning_rate": 8.773547901113862e-05, |
|
"loss": 5.2363, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6294416243654822, |
|
"grad_norm": 0.4428587555885315, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 5.267, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6395939086294417, |
|
"grad_norm": 0.41558074951171875, |
|
"learning_rate": 8.656768508095853e-05, |
|
"loss": 5.1866, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.649746192893401, |
|
"grad_norm": 0.4241006672382355, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 5.1834, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6598984771573604, |
|
"grad_norm": 0.3360666036605835, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 5.2194, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6700507614213198, |
|
"grad_norm": 0.38096800446510315, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 5.2324, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6802030456852792, |
|
"grad_norm": 0.437884122133255, |
|
"learning_rate": 8.409991800312493e-05, |
|
"loss": 5.2587, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6903553299492385, |
|
"grad_norm": 0.48057064414024353, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 5.2374, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.700507614213198, |
|
"grad_norm": 0.5798227787017822, |
|
"learning_rate": 8.280295144952536e-05, |
|
"loss": 5.1645, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 0.5949220061302185, |
|
"learning_rate": 8.213938048432697e-05, |
|
"loss": 5.1196, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7208121827411168, |
|
"grad_norm": 0.6557019948959351, |
|
"learning_rate": 8.146601955249188e-05, |
|
"loss": 5.1316, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.7309644670050761, |
|
"grad_norm": 0.7752095460891724, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 5.0944, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7411167512690355, |
|
"grad_norm": 0.8926941156387329, |
|
"learning_rate": 8.009075115760243e-05, |
|
"loss": 5.3464, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.751269035532995, |
|
"grad_norm": 0.7516569495201111, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 5.3015, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"grad_norm": 0.6225847005844116, |
|
"learning_rate": 7.86788218175523e-05, |
|
"loss": 5.3173, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7715736040609137, |
|
"grad_norm": 0.47938278317451477, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 5.3143, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7817258883248731, |
|
"grad_norm": 0.3764425814151764, |
|
"learning_rate": 7.723195175075136e-05, |
|
"loss": 5.3017, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7918781725888325, |
|
"grad_norm": 0.3587036430835724, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 5.2366, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.8020304568527918, |
|
"grad_norm": 0.3250347673892975, |
|
"learning_rate": 7.575190374550272e-05, |
|
"loss": 5.2564, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 0.345753937959671, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 5.2704, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8223350253807107, |
|
"grad_norm": 0.3820163905620575, |
|
"learning_rate": 7.424048101231686e-05, |
|
"loss": 5.3212, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.8324873096446701, |
|
"grad_norm": 0.40871894359588623, |
|
"learning_rate": 7.347357813929454e-05, |
|
"loss": 5.2279, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.8426395939086294, |
|
"grad_norm": 0.36811622977256775, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 5.263, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.8527918781725888, |
|
"grad_norm": 0.3945890963077545, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 5.309, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.8629441624365483, |
|
"grad_norm": 0.37406954169273376, |
|
"learning_rate": 7.113091308703498e-05, |
|
"loss": 5.2704, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8730964467005076, |
|
"grad_norm": 0.39915502071380615, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 5.2693, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.883248730964467, |
|
"grad_norm": 0.4481967091560364, |
|
"learning_rate": 6.953655642446368e-05, |
|
"loss": 5.2332, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8934010152284264, |
|
"grad_norm": 0.4243590235710144, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 5.2426, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.9035532994923858, |
|
"grad_norm": 0.46772047877311707, |
|
"learning_rate": 6.7918397477265e-05, |
|
"loss": 5.1668, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 0.4974723756313324, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 5.17, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9238578680203046, |
|
"grad_norm": 0.4955981373786926, |
|
"learning_rate": 6.627840772285784e-05, |
|
"loss": 5.2168, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.934010152284264, |
|
"grad_norm": 0.4544804096221924, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 5.218, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.9441624365482234, |
|
"grad_norm": 0.5566396117210388, |
|
"learning_rate": 6.461858523613684e-05, |
|
"loss": 5.1191, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.9543147208121827, |
|
"grad_norm": 0.5591332316398621, |
|
"learning_rate": 6.378186779084995e-05, |
|
"loss": 5.0513, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.9644670050761421, |
|
"grad_norm": 0.6510249972343445, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 5.0533, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9746192893401016, |
|
"grad_norm": 0.8038920760154724, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 5.0682, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9847715736040609, |
|
"grad_norm": 0.5465517640113831, |
|
"learning_rate": 6.124755271719325e-05, |
|
"loss": 5.3038, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9949238578680203, |
|
"grad_norm": 0.4467344582080841, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 5.2282, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.0050761421319796, |
|
"grad_norm": 0.6803540587425232, |
|
"learning_rate": 5.9540449768827246e-05, |
|
"loss": 7.978, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 0.3697003424167633, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 5.1768, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"eval_loss": 5.2339911460876465, |
|
"eval_runtime": 8.769, |
|
"eval_samples_per_second": 18.93, |
|
"eval_steps_per_second": 9.465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0253807106598984, |
|
"grad_norm": 0.36876264214515686, |
|
"learning_rate": 5.782172325201155e-05, |
|
"loss": 5.3701, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.0355329949238579, |
|
"grad_norm": 0.37881383299827576, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 5.1475, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.0456852791878173, |
|
"grad_norm": 0.4124298691749573, |
|
"learning_rate": 5.6093467170257374e-05, |
|
"loss": 5.2639, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.0558375634517767, |
|
"grad_norm": 0.4313642680644989, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 5.2658, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.0659898477157361, |
|
"grad_norm": 0.3469761312007904, |
|
"learning_rate": 5.435778713738292e-05, |
|
"loss": 5.1731, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0761421319796955, |
|
"grad_norm": 0.34174656867980957, |
|
"learning_rate": 5.348782368720626e-05, |
|
"loss": 5.2532, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0862944162436547, |
|
"grad_norm": 0.37279707193374634, |
|
"learning_rate": 5.26167978121472e-05, |
|
"loss": 5.3232, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.0964467005076142, |
|
"grad_norm": 0.34107351303100586, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 5.2014, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.1065989847715736, |
|
"grad_norm": 0.44603461027145386, |
|
"learning_rate": 5.0872620321864185e-05, |
|
"loss": 5.274, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.116751269035533, |
|
"grad_norm": 0.43816447257995605, |
|
"learning_rate": 5e-05, |
|
"loss": 5.2644, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1269035532994924, |
|
"grad_norm": 0.42787858843803406, |
|
"learning_rate": 4.912737967813583e-05, |
|
"loss": 5.182, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.1370558375634519, |
|
"grad_norm": 0.4469396471977234, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 5.1906, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.1472081218274113, |
|
"grad_norm": 0.47741401195526123, |
|
"learning_rate": 4.738320218785281e-05, |
|
"loss": 5.208, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.1573604060913705, |
|
"grad_norm": 0.5933973789215088, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 5.056, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.16751269035533, |
|
"grad_norm": 0.5861052870750427, |
|
"learning_rate": 4.564221286261709e-05, |
|
"loss": 5.1204, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1776649746192893, |
|
"grad_norm": 0.6024320125579834, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 5.1356, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.1878172588832487, |
|
"grad_norm": 0.5369526147842407, |
|
"learning_rate": 4.390653282974264e-05, |
|
"loss": 5.2165, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.1979695431472082, |
|
"grad_norm": 0.4957830309867859, |
|
"learning_rate": 4.3041344951996746e-05, |
|
"loss": 5.1409, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.2081218274111676, |
|
"grad_norm": 0.5669883489608765, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 5.1294, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.218274111675127, |
|
"grad_norm": 0.6567976474761963, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 5.127, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2284263959390862, |
|
"grad_norm": 0.7601398825645447, |
|
"learning_rate": 4.045955023117276e-05, |
|
"loss": 4.9877, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.2385786802030456, |
|
"grad_norm": 0.8807834982872009, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 5.2077, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.248730964467005, |
|
"grad_norm": 0.9801356196403503, |
|
"learning_rate": 3.875244728280676e-05, |
|
"loss": 4.9937, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.2588832487309645, |
|
"grad_norm": 0.7310280799865723, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 5.2336, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.2690355329949239, |
|
"grad_norm": 0.560065507888794, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 5.4395, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2791878172588833, |
|
"grad_norm": 0.39355024695396423, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 5.0604, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.2893401015228427, |
|
"grad_norm": 0.3939688503742218, |
|
"learning_rate": 3.5381414763863166e-05, |
|
"loss": 5.2652, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.299492385786802, |
|
"grad_norm": 0.38597550988197327, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 5.1938, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.3096446700507614, |
|
"grad_norm": 0.4252302050590515, |
|
"learning_rate": 3.372159227714218e-05, |
|
"loss": 5.1874, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.3197969543147208, |
|
"grad_norm": 0.4855193793773651, |
|
"learning_rate": 3.289899283371657e-05, |
|
"loss": 5.273, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.3299492385786802, |
|
"grad_norm": 0.5743886232376099, |
|
"learning_rate": 3.2081602522734986e-05, |
|
"loss": 5.2788, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.3401015228426396, |
|
"grad_norm": 0.4098842740058899, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 5.1825, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.350253807106599, |
|
"grad_norm": 0.3755365014076233, |
|
"learning_rate": 3.046344357553632e-05, |
|
"loss": 5.1837, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.3604060913705585, |
|
"grad_norm": 0.4349859356880188, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 5.2206, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.3705583756345177, |
|
"grad_norm": 0.48483696579933167, |
|
"learning_rate": 2.886908691296504e-05, |
|
"loss": 5.2736, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.380710659898477, |
|
"grad_norm": 0.5594541430473328, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 5.1473, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.3908629441624365, |
|
"grad_norm": 0.478778213262558, |
|
"learning_rate": 2.7300475013022663e-05, |
|
"loss": 5.1136, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.401015228426396, |
|
"grad_norm": 0.47473740577697754, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 5.1958, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.4111675126903553, |
|
"grad_norm": 0.504795491695404, |
|
"learning_rate": 2.575951898768315e-05, |
|
"loss": 5.0851, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.4213197969543148, |
|
"grad_norm": 0.4611382484436035, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 5.1284, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4314720812182742, |
|
"grad_norm": 0.49524998664855957, |
|
"learning_rate": 2.4248096254497288e-05, |
|
"loss": 5.2106, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.4416243654822334, |
|
"grad_norm": 0.4873555302619934, |
|
"learning_rate": 2.350403678833976e-05, |
|
"loss": 5.1443, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.451776649746193, |
|
"grad_norm": 0.5808872580528259, |
|
"learning_rate": 2.2768048249248648e-05, |
|
"loss": 5.0184, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.4619289340101522, |
|
"grad_norm": 0.695599377155304, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 5.2417, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.4720812182741116, |
|
"grad_norm": 0.7416836023330688, |
|
"learning_rate": 2.132117818244771e-05, |
|
"loss": 5.0167, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.482233502538071, |
|
"grad_norm": 0.9077966213226318, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 5.3683, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.4923857868020305, |
|
"grad_norm": 0.7044989466667175, |
|
"learning_rate": 1.9909248842397584e-05, |
|
"loss": 4.8825, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.50253807106599, |
|
"grad_norm": 0.5296655893325806, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 5.17, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.512690355329949, |
|
"grad_norm": 0.47559142112731934, |
|
"learning_rate": 1.8533980447508137e-05, |
|
"loss": 5.4245, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"grad_norm": 0.4813889265060425, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 5.1568, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"eval_loss": 5.212484836578369, |
|
"eval_runtime": 8.7715, |
|
"eval_samples_per_second": 18.925, |
|
"eval_steps_per_second": 9.462, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.532994923857868, |
|
"grad_norm": 0.4661964178085327, |
|
"learning_rate": 1.7197048550474643e-05, |
|
"loss": 5.3035, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.5431472081218274, |
|
"grad_norm": 0.4161906838417053, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 5.249, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.5532994923857868, |
|
"grad_norm": 0.3857448697090149, |
|
"learning_rate": 1.5900081996875083e-05, |
|
"loss": 5.1989, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.5634517766497462, |
|
"grad_norm": 0.38317275047302246, |
|
"learning_rate": 1.526708147705013e-05, |
|
"loss": 5.2647, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.5736040609137056, |
|
"grad_norm": 0.42127397656440735, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 5.3172, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5837563451776648, |
|
"grad_norm": 0.40184593200683594, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 5.1843, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.5939086294416245, |
|
"grad_norm": 0.39526352286338806, |
|
"learning_rate": 1.3432314919041478e-05, |
|
"loss": 5.2035, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.6040609137055837, |
|
"grad_norm": 0.36701011657714844, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 5.259, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.614213197969543, |
|
"grad_norm": 0.3989609181880951, |
|
"learning_rate": 1.22645209888614e-05, |
|
"loss": 5.1989, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.6243654822335025, |
|
"grad_norm": 0.4475120007991791, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 5.1748, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.634517766497462, |
|
"grad_norm": 0.4706570506095886, |
|
"learning_rate": 1.1142701927151456e-05, |
|
"loss": 5.1278, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.6446700507614214, |
|
"grad_norm": 0.5085177421569824, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 5.1414, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.6548223350253806, |
|
"grad_norm": 0.5082749128341675, |
|
"learning_rate": 1.006822449763537e-05, |
|
"loss": 5.2022, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.6649746192893402, |
|
"grad_norm": 0.5005104541778564, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 5.0471, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.6751269035532994, |
|
"grad_norm": 0.5254844427108765, |
|
"learning_rate": 9.042397785550405e-06, |
|
"loss": 5.2574, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6852791878172588, |
|
"grad_norm": 0.55852210521698, |
|
"learning_rate": 8.548121372247918e-06, |
|
"loss": 5.1745, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.6954314720812182, |
|
"grad_norm": 0.6532874703407288, |
|
"learning_rate": 8.066471602728803e-06, |
|
"loss": 5.1302, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.7055837563451777, |
|
"grad_norm": 0.6552545428276062, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 5.0333, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.715736040609137, |
|
"grad_norm": 0.7609627842903137, |
|
"learning_rate": 7.1416349648943894e-06, |
|
"loss": 4.9455, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.7258883248730963, |
|
"grad_norm": 0.9393100142478943, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 5.2311, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.736040609137056, |
|
"grad_norm": 0.6443814635276794, |
|
"learning_rate": 6.269014643030213e-06, |
|
"loss": 4.9513, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.7461928934010151, |
|
"grad_norm": 0.37428995966911316, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 5.1892, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.7563451776649748, |
|
"grad_norm": 0.3807058036327362, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 5.4823, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.766497461928934, |
|
"grad_norm": 0.3458186984062195, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 5.0147, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.7766497461928934, |
|
"grad_norm": 0.3948209285736084, |
|
"learning_rate": 4.684610648167503e-06, |
|
"loss": 5.3152, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7868020304568528, |
|
"grad_norm": 0.3780762553215027, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 5.2797, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.796954314720812, |
|
"grad_norm": 0.3970668911933899, |
|
"learning_rate": 3.974757327377981e-06, |
|
"loss": 5.153, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.8071065989847717, |
|
"grad_norm": 0.42801252007484436, |
|
"learning_rate": 3.6408072716606346e-06, |
|
"loss": 5.2812, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.8172588832487309, |
|
"grad_norm": 0.3600170314311981, |
|
"learning_rate": 3.3209786751399187e-06, |
|
"loss": 5.226, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.8274111675126905, |
|
"grad_norm": 0.40839913487434387, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 5.2793, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8375634517766497, |
|
"grad_norm": 0.4590075612068176, |
|
"learning_rate": 2.724071220034158e-06, |
|
"loss": 5.2714, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.8477157360406091, |
|
"grad_norm": 0.4016427993774414, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 5.1897, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.8578680203045685, |
|
"grad_norm": 0.382449746131897, |
|
"learning_rate": 2.1847622018482283e-06, |
|
"loss": 5.2737, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.868020304568528, |
|
"grad_norm": 0.44841504096984863, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 5.1636, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.8781725888324874, |
|
"grad_norm": 0.47819268703460693, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 5.1766, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8883248730964466, |
|
"grad_norm": 0.4346996545791626, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 5.1172, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.8984771573604062, |
|
"grad_norm": 0.4988497495651245, |
|
"learning_rate": 1.2814967607382432e-06, |
|
"loss": 5.1769, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.9086294416243654, |
|
"grad_norm": 0.47669705748558044, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 5.0482, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.9187817258883249, |
|
"grad_norm": 0.505896270275116, |
|
"learning_rate": 9.186408276168013e-07, |
|
"loss": 5.2213, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.9289340101522843, |
|
"grad_norm": 0.5310911536216736, |
|
"learning_rate": 7.596123493895991e-07, |
|
"loss": 5.1086, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.9390862944162437, |
|
"grad_norm": 0.48757103085517883, |
|
"learning_rate": 6.15582970243117e-07, |
|
"loss": 5.039, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.9492385786802031, |
|
"grad_norm": 0.6253727078437805, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 5.0935, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.9593908629441623, |
|
"grad_norm": 0.7249897122383118, |
|
"learning_rate": 3.7269241793390085e-07, |
|
"loss": 5.0918, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.969543147208122, |
|
"grad_norm": 0.9451597929000854, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 5.0525, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.9796954314720812, |
|
"grad_norm": 0.7079738974571228, |
|
"learning_rate": 1.9026509541272275e-07, |
|
"loss": 5.5103, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.9898477157360406, |
|
"grad_norm": 0.3672516644001007, |
|
"learning_rate": 1.2179748700879012e-07, |
|
"loss": 5.1374, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.114559292793274, |
|
"learning_rate": 6.852326227130834e-08, |
|
"loss": 6.8999, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.010152284263959, |
|
"grad_norm": 0.32475075125694275, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 5.2734, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.020304568527919, |
|
"grad_norm": 0.2769867777824402, |
|
"learning_rate": 7.615242180436522e-09, |
|
"loss": 5.2687, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 0.2693822979927063, |
|
"learning_rate": 0.0, |
|
"loss": 5.2514, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"eval_loss": 5.203610897064209, |
|
"eval_runtime": 8.7648, |
|
"eval_samples_per_second": 18.939, |
|
"eval_steps_per_second": 9.47, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1363884845760512e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|